1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Jason Ekstrand (jason@jlekstrand.net) 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "nir.h" 29bf215546Sopenharmony_ci#include "nir_builder.h" 30bf215546Sopenharmony_ci#include "nir_vla.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci/* 33bf215546Sopenharmony_ci * This file implements an out-of-SSA pass as described in "Revisiting 34bf215546Sopenharmony_ci * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by 35bf215546Sopenharmony_ci * Boissinot et al. 36bf215546Sopenharmony_ci */ 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistruct from_ssa_state { 39bf215546Sopenharmony_ci nir_builder builder; 40bf215546Sopenharmony_ci void *dead_ctx; 41bf215546Sopenharmony_ci struct exec_list dead_instrs; 42bf215546Sopenharmony_ci bool phi_webs_only; 43bf215546Sopenharmony_ci struct hash_table *merge_node_table; 44bf215546Sopenharmony_ci nir_instr *instr; 45bf215546Sopenharmony_ci bool progress; 46bf215546Sopenharmony_ci}; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci/* Returns if def @a comes after def @b. 49bf215546Sopenharmony_ci * 50bf215546Sopenharmony_ci * The core observation that makes the Boissinot algorithm efficient 51bf215546Sopenharmony_ci * is that, given two properly sorted sets, we can check for 52bf215546Sopenharmony_ci * interference in these sets via a linear walk. This is accomplished 53bf215546Sopenharmony_ci * by doing single combined walk over union of the two sets in DFS 54bf215546Sopenharmony_ci * order. It doesn't matter what DFS we do so long as we're 55bf215546Sopenharmony_ci * consistent. Fortunately, the dominance algorithm we ran prior to 56bf215546Sopenharmony_ci * this pass did such a walk and recorded the pre- and post-indices in 57bf215546Sopenharmony_ci * the blocks. 58bf215546Sopenharmony_ci * 59bf215546Sopenharmony_ci * We treat SSA undefs as always coming before other instruction types. 60bf215546Sopenharmony_ci */ 61bf215546Sopenharmony_cistatic bool 62bf215546Sopenharmony_cidef_after(nir_ssa_def *a, nir_ssa_def *b) 63bf215546Sopenharmony_ci{ 64bf215546Sopenharmony_ci if (a->parent_instr->type == nir_instr_type_ssa_undef) 65bf215546Sopenharmony_ci return false; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci if (b->parent_instr->type == nir_instr_type_ssa_undef) 68bf215546Sopenharmony_ci return true; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* If they're in the same block, we can rely on whichever instruction 71bf215546Sopenharmony_ci * comes first in the block. 72bf215546Sopenharmony_ci */ 73bf215546Sopenharmony_ci if (a->parent_instr->block == b->parent_instr->block) 74bf215546Sopenharmony_ci return a->parent_instr->index > b->parent_instr->index; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci /* Otherwise, if blocks are distinct, we sort them in DFS pre-order */ 77bf215546Sopenharmony_ci return a->parent_instr->block->dom_pre_index > 78bf215546Sopenharmony_ci b->parent_instr->block->dom_pre_index; 79bf215546Sopenharmony_ci} 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci/* Returns true if a dominates b */ 82bf215546Sopenharmony_cistatic bool 83bf215546Sopenharmony_cissa_def_dominates(nir_ssa_def *a, nir_ssa_def *b) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci if (a->parent_instr->type == nir_instr_type_ssa_undef) { 86bf215546Sopenharmony_ci /* SSA undefs always dominate */ 87bf215546Sopenharmony_ci return true; 88bf215546Sopenharmony_ci } if (def_after(a, b)) { 89bf215546Sopenharmony_ci return false; 90bf215546Sopenharmony_ci } else if (a->parent_instr->block == b->parent_instr->block) { 91bf215546Sopenharmony_ci return def_after(b, a); 92bf215546Sopenharmony_ci } else { 93bf215546Sopenharmony_ci return nir_block_dominates(a->parent_instr->block, 94bf215546Sopenharmony_ci b->parent_instr->block); 95bf215546Sopenharmony_ci } 96bf215546Sopenharmony_ci} 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci/* The following data structure, which I have named merge_set is a way of 100bf215546Sopenharmony_ci * representing a set registers of non-interfering registers. This is 101bf215546Sopenharmony_ci * based on the concept of a "dominance forest" presented in "Fast Copy 102bf215546Sopenharmony_ci * Coalescing and Live-Range Identification" by Budimlic et al. but the 103bf215546Sopenharmony_ci * implementation concept is taken from "Revisiting Out-of-SSA Translation 104bf215546Sopenharmony_ci * for Correctness, Code Quality, and Efficiency" by Boissinot et al. 105bf215546Sopenharmony_ci * 106bf215546Sopenharmony_ci * Each SSA definition is associated with a merge_node and the association 107bf215546Sopenharmony_ci * is represented by a combination of a hash table and the "def" parameter 108bf215546Sopenharmony_ci * in the merge_node structure. The merge_set stores a linked list of 109bf215546Sopenharmony_ci * merge_nodes, ordered by a pre-order DFS walk of the dominance tree. (Since 110bf215546Sopenharmony_ci * the liveness analysis pass indexes the SSA values in dominance order for 111bf215546Sopenharmony_ci * us, this is an easy thing to keep up.) It is assumed that no pair of the 112bf215546Sopenharmony_ci * nodes in a given set interfere. Merging two sets or checking for 113bf215546Sopenharmony_ci * interference can be done in a single linear-time merge-sort walk of the 114bf215546Sopenharmony_ci * two lists of nodes. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_cistruct merge_set; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_citypedef struct { 119bf215546Sopenharmony_ci struct exec_node node; 120bf215546Sopenharmony_ci struct merge_set *set; 121bf215546Sopenharmony_ci nir_ssa_def *def; 122bf215546Sopenharmony_ci} merge_node; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_citypedef struct merge_set { 125bf215546Sopenharmony_ci struct exec_list nodes; 126bf215546Sopenharmony_ci unsigned size; 127bf215546Sopenharmony_ci bool divergent; 128bf215546Sopenharmony_ci nir_register *reg; 129bf215546Sopenharmony_ci} merge_set; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci#if 0 132bf215546Sopenharmony_cistatic void 133bf215546Sopenharmony_cimerge_set_dump(merge_set *set, FILE *fp) 134bf215546Sopenharmony_ci{ 135bf215546Sopenharmony_ci nir_ssa_def *dom[set->size]; 136bf215546Sopenharmony_ci int dom_idx = -1; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci foreach_list_typed(merge_node, node, node, &set->nodes) { 139bf215546Sopenharmony_ci while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def)) 140bf215546Sopenharmony_ci dom_idx--; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci for (int i = 0; i <= dom_idx; i++) 143bf215546Sopenharmony_ci fprintf(fp, " "); 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci fprintf(fp, "ssa_%d\n", node->def->index); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci dom[++dom_idx] = node->def; 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci} 150bf215546Sopenharmony_ci#endif 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_cistatic merge_node * 153bf215546Sopenharmony_ciget_merge_node(nir_ssa_def *def, struct from_ssa_state *state) 154bf215546Sopenharmony_ci{ 155bf215546Sopenharmony_ci struct hash_entry *entry = 156bf215546Sopenharmony_ci _mesa_hash_table_search(state->merge_node_table, def); 157bf215546Sopenharmony_ci if (entry) 158bf215546Sopenharmony_ci return entry->data; 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci merge_set *set = ralloc(state->dead_ctx, merge_set); 161bf215546Sopenharmony_ci exec_list_make_empty(&set->nodes); 162bf215546Sopenharmony_ci set->size = 1; 163bf215546Sopenharmony_ci set->divergent = def->divergent; 164bf215546Sopenharmony_ci set->reg = NULL; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci merge_node *node = ralloc(state->dead_ctx, merge_node); 167bf215546Sopenharmony_ci node->set = set; 168bf215546Sopenharmony_ci node->def = def; 169bf215546Sopenharmony_ci exec_list_push_head(&set->nodes, &node->node); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci _mesa_hash_table_insert(state->merge_node_table, def, node); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci return node; 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cistatic bool 177bf215546Sopenharmony_cimerge_nodes_interfere(merge_node *a, merge_node *b) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci /* There's no need to check for interference within the same set, 180bf215546Sopenharmony_ci * because we assume, that sets themselves are already 181bf215546Sopenharmony_ci * interference-free. 182bf215546Sopenharmony_ci */ 183bf215546Sopenharmony_ci if (a->set == b->set) 184bf215546Sopenharmony_ci return false; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci return nir_ssa_defs_interfere(a->def, b->def); 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci/* Merges b into a 190bf215546Sopenharmony_ci * 191bf215546Sopenharmony_ci * This algorithm uses def_after to ensure that the sets always stay in the 192bf215546Sopenharmony_ci * same order as the pre-order DFS done by the liveness algorithm. 193bf215546Sopenharmony_ci */ 194bf215546Sopenharmony_cistatic merge_set * 195bf215546Sopenharmony_cimerge_merge_sets(merge_set *a, merge_set *b) 196bf215546Sopenharmony_ci{ 197bf215546Sopenharmony_ci struct exec_node *an = exec_list_get_head(&a->nodes); 198bf215546Sopenharmony_ci struct exec_node *bn = exec_list_get_head(&b->nodes); 199bf215546Sopenharmony_ci while (!exec_node_is_tail_sentinel(bn)) { 200bf215546Sopenharmony_ci merge_node *a_node = exec_node_data(merge_node, an, node); 201bf215546Sopenharmony_ci merge_node *b_node = exec_node_data(merge_node, bn, node); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci if (exec_node_is_tail_sentinel(an) || 204bf215546Sopenharmony_ci def_after(a_node->def, b_node->def)) { 205bf215546Sopenharmony_ci struct exec_node *next = bn->next; 206bf215546Sopenharmony_ci exec_node_remove(bn); 207bf215546Sopenharmony_ci exec_node_insert_node_before(an, bn); 208bf215546Sopenharmony_ci exec_node_data(merge_node, bn, node)->set = a; 209bf215546Sopenharmony_ci bn = next; 210bf215546Sopenharmony_ci } else { 211bf215546Sopenharmony_ci an = an->next; 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci } 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci a->size += b->size; 216bf215546Sopenharmony_ci b->size = 0; 217bf215546Sopenharmony_ci a->divergent |= b->divergent; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci return a; 220bf215546Sopenharmony_ci} 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci/* Checks for any interference between two merge sets 223bf215546Sopenharmony_ci * 224bf215546Sopenharmony_ci * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA 225bf215546Sopenharmony_ci * Translation for Correctness, Code Quality, and Efficiency" by 226bf215546Sopenharmony_ci * Boissinot et al. 227bf215546Sopenharmony_ci */ 228bf215546Sopenharmony_cistatic bool 229bf215546Sopenharmony_cimerge_sets_interfere(merge_set *a, merge_set *b) 230bf215546Sopenharmony_ci{ 231bf215546Sopenharmony_ci /* List of all the nodes which dominate the current node, in dominance 232bf215546Sopenharmony_ci * order. 233bf215546Sopenharmony_ci */ 234bf215546Sopenharmony_ci NIR_VLA(merge_node *, dom, a->size + b->size); 235bf215546Sopenharmony_ci int dom_idx = -1; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci struct exec_node *an = exec_list_get_head(&a->nodes); 238bf215546Sopenharmony_ci struct exec_node *bn = exec_list_get_head(&b->nodes); 239bf215546Sopenharmony_ci while (!exec_node_is_tail_sentinel(an) || 240bf215546Sopenharmony_ci !exec_node_is_tail_sentinel(bn)) { 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci /* We walk the union of the two sets in the same order as the pre-order 243bf215546Sopenharmony_ci * DFS done by liveness analysis. 244bf215546Sopenharmony_ci */ 245bf215546Sopenharmony_ci merge_node *current; 246bf215546Sopenharmony_ci if (exec_node_is_tail_sentinel(an)) { 247bf215546Sopenharmony_ci current = exec_node_data(merge_node, bn, node); 248bf215546Sopenharmony_ci bn = bn->next; 249bf215546Sopenharmony_ci } else if (exec_node_is_tail_sentinel(bn)) { 250bf215546Sopenharmony_ci current = exec_node_data(merge_node, an, node); 251bf215546Sopenharmony_ci an = an->next; 252bf215546Sopenharmony_ci } else { 253bf215546Sopenharmony_ci merge_node *a_node = exec_node_data(merge_node, an, node); 254bf215546Sopenharmony_ci merge_node *b_node = exec_node_data(merge_node, bn, node); 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci if (def_after(b_node->def, a_node->def)) { 257bf215546Sopenharmony_ci current = a_node; 258bf215546Sopenharmony_ci an = an->next; 259bf215546Sopenharmony_ci } else { 260bf215546Sopenharmony_ci current = b_node; 261bf215546Sopenharmony_ci bn = bn->next; 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci /* Because our walk is a pre-order DFS, we can maintain the list of 266bf215546Sopenharmony_ci * dominating nodes as a simple stack, pushing every node onto the list 267bf215546Sopenharmony_ci * after we visit it and popping any non-dominating nodes off before we 268bf215546Sopenharmony_ci * visit the current node. 269bf215546Sopenharmony_ci */ 270bf215546Sopenharmony_ci while (dom_idx >= 0 && 271bf215546Sopenharmony_ci !ssa_def_dominates(dom[dom_idx]->def, current->def)) 272bf215546Sopenharmony_ci dom_idx--; 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci /* There are three invariants of this algorithm that are important here: 275bf215546Sopenharmony_ci * 276bf215546Sopenharmony_ci * 1. There is no interference within either set a or set b. 277bf215546Sopenharmony_ci * 2. None of the nodes processed up until this point interfere. 278bf215546Sopenharmony_ci * 3. All the dominators of `current` have been processed 279bf215546Sopenharmony_ci * 280bf215546Sopenharmony_ci * Because of these invariants, we only need to check the current node 281bf215546Sopenharmony_ci * against its minimal dominator. If any other node N in the union 282bf215546Sopenharmony_ci * interferes with current, then N must dominate current because we are 283bf215546Sopenharmony_ci * in SSA form. If N dominates current then it must also dominate our 284bf215546Sopenharmony_ci * minimal dominator dom[dom_idx]. Since N is live at current it must 285bf215546Sopenharmony_ci * also be live at the minimal dominator which means N interferes with 286bf215546Sopenharmony_ci * the minimal dominator dom[dom_idx] and, by invariants 2 and 3 above, 287bf215546Sopenharmony_ci * the algorithm would have already terminated. Therefore, if we got 288bf215546Sopenharmony_ci * here, the only node that can possibly interfere with current is the 289bf215546Sopenharmony_ci * minimal dominator dom[dom_idx]. 290bf215546Sopenharmony_ci * 291bf215546Sopenharmony_ci * This is what allows us to do a interference check of the union of the 292bf215546Sopenharmony_ci * two sets with a single linear-time walk. 293bf215546Sopenharmony_ci */ 294bf215546Sopenharmony_ci if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx])) 295bf215546Sopenharmony_ci return true; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci dom[++dom_idx] = current; 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci return false; 301bf215546Sopenharmony_ci} 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_cistatic bool 304bf215546Sopenharmony_ciadd_parallel_copy_to_end_of_block(nir_shader *shader, nir_block *block, void *dead_ctx) 305bf215546Sopenharmony_ci{ 306bf215546Sopenharmony_ci bool need_end_copy = false; 307bf215546Sopenharmony_ci if (block->successors[0]) { 308bf215546Sopenharmony_ci nir_instr *instr = nir_block_first_instr(block->successors[0]); 309bf215546Sopenharmony_ci if (instr && instr->type == nir_instr_type_phi) 310bf215546Sopenharmony_ci need_end_copy = true; 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci if (block->successors[1]) { 314bf215546Sopenharmony_ci nir_instr *instr = nir_block_first_instr(block->successors[1]); 315bf215546Sopenharmony_ci if (instr && instr->type == nir_instr_type_phi) 316bf215546Sopenharmony_ci need_end_copy = true; 317bf215546Sopenharmony_ci } 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci if (need_end_copy) { 320bf215546Sopenharmony_ci /* If one of our successors has at least one phi node, we need to 321bf215546Sopenharmony_ci * create a parallel copy at the end of the block but before the jump 322bf215546Sopenharmony_ci * (if there is one). 323bf215546Sopenharmony_ci */ 324bf215546Sopenharmony_ci nir_parallel_copy_instr *pcopy = 325bf215546Sopenharmony_ci nir_parallel_copy_instr_create(shader); 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr); 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci return true; 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_cistatic nir_parallel_copy_instr * 334bf215546Sopenharmony_ciget_parallel_copy_at_end_of_block(nir_block *block) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci nir_instr *last_instr = nir_block_last_instr(block); 337bf215546Sopenharmony_ci if (last_instr == NULL) 338bf215546Sopenharmony_ci return NULL; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci /* The last instruction may be a jump in which case the parallel copy is 341bf215546Sopenharmony_ci * right before it. 342bf215546Sopenharmony_ci */ 343bf215546Sopenharmony_ci if (last_instr->type == nir_instr_type_jump) 344bf215546Sopenharmony_ci last_instr = nir_instr_prev(last_instr); 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci if (last_instr && last_instr->type == nir_instr_type_parallel_copy) 347bf215546Sopenharmony_ci return nir_instr_as_parallel_copy(last_instr); 348bf215546Sopenharmony_ci else 349bf215546Sopenharmony_ci return NULL; 350bf215546Sopenharmony_ci} 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci/** Isolate phi nodes with parallel copies 353bf215546Sopenharmony_ci * 354bf215546Sopenharmony_ci * In order to solve the dependency problems with the sources and 355bf215546Sopenharmony_ci * destinations of phi nodes, we first isolate them by adding parallel 356bf215546Sopenharmony_ci * copies to the beginnings and ends of basic blocks. For every block with 357bf215546Sopenharmony_ci * phi nodes, we add a parallel copy immediately following the last phi 358bf215546Sopenharmony_ci * node that copies the destinations of all of the phi nodes to new SSA 359bf215546Sopenharmony_ci * values. We also add a parallel copy to the end of every block that has 360bf215546Sopenharmony_ci * a successor with phi nodes that, for each phi node in each successor, 361bf215546Sopenharmony_ci * copies the corresponding sorce of the phi node and adjust the phi to 362bf215546Sopenharmony_ci * used the destination of the parallel copy. 363bf215546Sopenharmony_ci * 364bf215546Sopenharmony_ci * In SSA form, each value has exactly one definition. What this does is 365bf215546Sopenharmony_ci * ensure that each value used in a phi also has exactly one use. The 366bf215546Sopenharmony_ci * destinations of phis are only used by the parallel copy immediately 367bf215546Sopenharmony_ci * following the phi nodes and. Thanks to the parallel copy at the end of 368bf215546Sopenharmony_ci * the predecessor block, the sources of phi nodes are are the only use of 369bf215546Sopenharmony_ci * that value. This allows us to immediately assign all the sources and 370bf215546Sopenharmony_ci * destinations of any given phi node to the same register without worrying 371bf215546Sopenharmony_ci * about interference at all. We do coalescing to get rid of the parallel 372bf215546Sopenharmony_ci * copies where possible. 373bf215546Sopenharmony_ci * 374bf215546Sopenharmony_ci * Before this pass can be run, we have to iterate over the blocks with 375bf215546Sopenharmony_ci * add_parallel_copy_to_end_of_block to ensure that the parallel copies at 376bf215546Sopenharmony_ci * the ends of blocks exist. We can create the ones at the beginnings as 377bf215546Sopenharmony_ci * we go, but the ones at the ends of blocks need to be created ahead of 378bf215546Sopenharmony_ci * time because of potential back-edges in the CFG. 379bf215546Sopenharmony_ci */ 380bf215546Sopenharmony_cistatic bool 381bf215546Sopenharmony_ciisolate_phi_nodes_block(nir_shader *shader, nir_block *block, void *dead_ctx) 382bf215546Sopenharmony_ci{ 383bf215546Sopenharmony_ci nir_instr *last_phi_instr = NULL; 384bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 385bf215546Sopenharmony_ci /* Phi nodes only ever come at the start of a block */ 386bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 387bf215546Sopenharmony_ci break; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci last_phi_instr = instr; 390bf215546Sopenharmony_ci } 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci /* If we don't have any phis, then there's nothing for us to do. */ 393bf215546Sopenharmony_ci if (last_phi_instr == NULL) 394bf215546Sopenharmony_ci return true; 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci /* If we have phi nodes, we need to create a parallel copy at the 397bf215546Sopenharmony_ci * start of this block but after the phi nodes. 398bf215546Sopenharmony_ci */ 399bf215546Sopenharmony_ci nir_parallel_copy_instr *block_pcopy = 400bf215546Sopenharmony_ci nir_parallel_copy_instr_create(shader); 401bf215546Sopenharmony_ci nir_instr_insert_after(last_phi_instr, &block_pcopy->instr); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 404bf215546Sopenharmony_ci /* Phi nodes only ever come at the start of a block */ 405bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 406bf215546Sopenharmony_ci break; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 409bf215546Sopenharmony_ci assert(phi->dest.is_ssa); 410bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 411bf215546Sopenharmony_ci if (nir_src_is_undef(src->src)) 412bf215546Sopenharmony_ci continue; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci nir_parallel_copy_instr *pcopy = 415bf215546Sopenharmony_ci get_parallel_copy_at_end_of_block(src->pred); 416bf215546Sopenharmony_ci assert(pcopy); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci nir_parallel_copy_entry *entry = rzalloc(dead_ctx, 419bf215546Sopenharmony_ci nir_parallel_copy_entry); 420bf215546Sopenharmony_ci nir_ssa_dest_init(&pcopy->instr, &entry->dest, 421bf215546Sopenharmony_ci phi->dest.ssa.num_components, 422bf215546Sopenharmony_ci phi->dest.ssa.bit_size, NULL); 423bf215546Sopenharmony_ci entry->dest.ssa.divergent = nir_src_is_divergent(src->src); 424bf215546Sopenharmony_ci exec_list_push_tail(&pcopy->entries, &entry->node); 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci assert(src->src.is_ssa); 427bf215546Sopenharmony_ci nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src); 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci nir_instr_rewrite_src(&phi->instr, &src->src, 430bf215546Sopenharmony_ci nir_src_for_ssa(&entry->dest.ssa)); 431bf215546Sopenharmony_ci } 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci nir_parallel_copy_entry *entry = rzalloc(dead_ctx, 434bf215546Sopenharmony_ci nir_parallel_copy_entry); 435bf215546Sopenharmony_ci nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, 436bf215546Sopenharmony_ci phi->dest.ssa.num_components, phi->dest.ssa.bit_size, 437bf215546Sopenharmony_ci NULL); 438bf215546Sopenharmony_ci entry->dest.ssa.divergent = phi->dest.ssa.divergent; 439bf215546Sopenharmony_ci exec_list_push_tail(&block_pcopy->entries, &entry->node); 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&phi->dest.ssa, 442bf215546Sopenharmony_ci &entry->dest.ssa); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci nir_instr_rewrite_src(&block_pcopy->instr, &entry->src, 445bf215546Sopenharmony_ci nir_src_for_ssa(&phi->dest.ssa)); 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci return true; 449bf215546Sopenharmony_ci} 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_cistatic bool 452bf215546Sopenharmony_cicoalesce_phi_nodes_block(nir_block *block, struct from_ssa_state *state) 453bf215546Sopenharmony_ci{ 454bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 455bf215546Sopenharmony_ci /* Phi nodes only ever come at the start of a block */ 456bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 457bf215546Sopenharmony_ci break; 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci assert(phi->dest.is_ssa); 462bf215546Sopenharmony_ci merge_node *dest_node = get_merge_node(&phi->dest.ssa, state); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 465bf215546Sopenharmony_ci assert(src->src.is_ssa); 466bf215546Sopenharmony_ci if (nir_src_is_undef(src->src)) 467bf215546Sopenharmony_ci continue; 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci merge_node *src_node = get_merge_node(src->src.ssa, state); 470bf215546Sopenharmony_ci if (src_node->set != dest_node->set) 471bf215546Sopenharmony_ci merge_merge_sets(dest_node->set, src_node->set); 472bf215546Sopenharmony_ci } 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci return true; 476bf215546Sopenharmony_ci} 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_cistatic void 479bf215546Sopenharmony_ciaggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, 480bf215546Sopenharmony_ci struct from_ssa_state *state) 481bf215546Sopenharmony_ci{ 482bf215546Sopenharmony_ci nir_foreach_parallel_copy_entry(entry, pcopy) { 483bf215546Sopenharmony_ci if (!entry->src.is_ssa) 484bf215546Sopenharmony_ci continue; 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci /* Since load_const instructions are SSA only, we can't replace their 487bf215546Sopenharmony_ci * destinations with registers and, therefore, can't coalesce them. 488bf215546Sopenharmony_ci */ 489bf215546Sopenharmony_ci if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const) 490bf215546Sopenharmony_ci continue; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci /* Don't try and coalesce these */ 493bf215546Sopenharmony_ci if (entry->dest.ssa.num_components != entry->src.ssa->num_components) 494bf215546Sopenharmony_ci continue; 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci merge_node *src_node = get_merge_node(entry->src.ssa, state); 497bf215546Sopenharmony_ci merge_node *dest_node = get_merge_node(&entry->dest.ssa, state); 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci if (src_node->set == dest_node->set) 500bf215546Sopenharmony_ci continue; 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci /* TODO: We can probably do better here but for now we should be safe if 503bf215546Sopenharmony_ci * we just don't coalesce things with different divergence. 504bf215546Sopenharmony_ci */ 505bf215546Sopenharmony_ci if (dest_node->set->divergent != src_node->set->divergent) 506bf215546Sopenharmony_ci continue; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci if (!merge_sets_interfere(src_node->set, dest_node->set)) 509bf215546Sopenharmony_ci merge_merge_sets(src_node->set, dest_node->set); 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci} 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_cistatic bool 514bf215546Sopenharmony_ciaggressive_coalesce_block(nir_block *block, struct from_ssa_state *state) 515bf215546Sopenharmony_ci{ 516bf215546Sopenharmony_ci nir_parallel_copy_instr *start_pcopy = NULL; 517bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 518bf215546Sopenharmony_ci /* Phi nodes only ever come at the start of a block */ 519bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) { 520bf215546Sopenharmony_ci if (instr->type != nir_instr_type_parallel_copy) 521bf215546Sopenharmony_ci break; /* The parallel copy must be right after the phis */ 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci start_pcopy = nir_instr_as_parallel_copy(instr); 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci aggressive_coalesce_parallel_copy(start_pcopy, state); 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci break; 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci } 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci nir_parallel_copy_instr *end_pcopy = 532bf215546Sopenharmony_ci get_parallel_copy_at_end_of_block(block); 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci if (end_pcopy && end_pcopy != start_pcopy) 535bf215546Sopenharmony_ci aggressive_coalesce_parallel_copy(end_pcopy, state); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci return true; 538bf215546Sopenharmony_ci} 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_cistatic nir_register * 541bf215546Sopenharmony_cicreate_reg_for_ssa_def(nir_ssa_def *def, nir_function_impl *impl) 542bf215546Sopenharmony_ci{ 543bf215546Sopenharmony_ci nir_register *reg = nir_local_reg_create(impl); 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci reg->num_components = def->num_components; 546bf215546Sopenharmony_ci reg->bit_size = def->bit_size; 547bf215546Sopenharmony_ci reg->num_array_elems = 0; 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci return reg; 550bf215546Sopenharmony_ci} 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_cistatic bool 553bf215546Sopenharmony_cirewrite_ssa_def(nir_ssa_def *def, void *void_state) 554bf215546Sopenharmony_ci{ 555bf215546Sopenharmony_ci struct from_ssa_state *state = void_state; 556bf215546Sopenharmony_ci nir_register *reg; 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci struct hash_entry *entry = 559bf215546Sopenharmony_ci _mesa_hash_table_search(state->merge_node_table, def); 560bf215546Sopenharmony_ci if (entry) { 561bf215546Sopenharmony_ci /* In this case, we're part of a phi web. Use the web's register. */ 562bf215546Sopenharmony_ci merge_node *node = (merge_node *)entry->data; 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci /* If it doesn't have a register yet, create one. Note that all of 565bf215546Sopenharmony_ci * the things in the merge set should be the same so it doesn't 566bf215546Sopenharmony_ci * matter which node's definition we use. 567bf215546Sopenharmony_ci */ 568bf215546Sopenharmony_ci if (node->set->reg == NULL) { 569bf215546Sopenharmony_ci node->set->reg = create_reg_for_ssa_def(def, state->builder.impl); 570bf215546Sopenharmony_ci node->set->reg->divergent = node->set->divergent; 571bf215546Sopenharmony_ci } 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci reg = node->set->reg; 574bf215546Sopenharmony_ci } else { 575bf215546Sopenharmony_ci if (state->phi_webs_only) 576bf215546Sopenharmony_ci return true; 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci /* We leave load_const SSA values alone. They act as immediates to 579bf215546Sopenharmony_ci * the backend. If it got coalesced into a phi, that's ok. 580bf215546Sopenharmony_ci */ 581bf215546Sopenharmony_ci if (def->parent_instr->type == nir_instr_type_load_const) 582bf215546Sopenharmony_ci return true; 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci reg = create_reg_for_ssa_def(def, state->builder.impl); 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_src(def, nir_src_for_reg(reg)); 588bf215546Sopenharmony_ci assert(nir_ssa_def_is_unused(def)); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci if (def->parent_instr->type == nir_instr_type_ssa_undef) { 591bf215546Sopenharmony_ci /* If it's an ssa_undef instruction, remove it since we know we just got 592bf215546Sopenharmony_ci * rid of all its uses. 593bf215546Sopenharmony_ci */ 594bf215546Sopenharmony_ci nir_instr *parent_instr = def->parent_instr; 595bf215546Sopenharmony_ci nir_instr_remove(parent_instr); 596bf215546Sopenharmony_ci exec_list_push_tail(&state->dead_instrs, &parent_instr->node); 597bf215546Sopenharmony_ci state->progress = true; 598bf215546Sopenharmony_ci return true; 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci assert(def->parent_instr->type != nir_instr_type_load_const); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci /* At this point we know a priori that this SSA def is part of a 604bf215546Sopenharmony_ci * nir_dest. We can use exec_node_data to get the dest pointer. 605bf215546Sopenharmony_ci */ 606bf215546Sopenharmony_ci nir_dest *dest = exec_node_data(nir_dest, def, ssa); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg)); 609bf215546Sopenharmony_ci state->progress = true; 610bf215546Sopenharmony_ci return true; 611bf215546Sopenharmony_ci} 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci/* Resolves ssa definitions to registers. While we're at it, we also 614bf215546Sopenharmony_ci * remove phi nodes. 615bf215546Sopenharmony_ci */ 616bf215546Sopenharmony_cistatic void 617bf215546Sopenharmony_ciresolve_registers_block(nir_block *block, struct from_ssa_state *state) 618bf215546Sopenharmony_ci{ 619bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 620bf215546Sopenharmony_ci state->instr = instr; 621bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, rewrite_ssa_def, state); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) { 624bf215546Sopenharmony_ci nir_instr_remove(instr); 625bf215546Sopenharmony_ci exec_list_push_tail(&state->dead_instrs, &instr->node); 626bf215546Sopenharmony_ci state->progress = true; 627bf215546Sopenharmony_ci } 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci state->instr = NULL; 630bf215546Sopenharmony_ci} 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_cistatic void 633bf215546Sopenharmony_ciemit_copy(nir_builder *b, nir_src src, nir_src dest_src) 634bf215546Sopenharmony_ci{ 635bf215546Sopenharmony_ci assert(!dest_src.is_ssa && 636bf215546Sopenharmony_ci dest_src.reg.indirect == NULL && 637bf215546Sopenharmony_ci dest_src.reg.base_offset == 0); 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci assert(!nir_src_is_divergent(src) || nir_src_is_divergent(dest_src)); 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci if (src.is_ssa) 642bf215546Sopenharmony_ci assert(src.ssa->num_components >= dest_src.reg.reg->num_components); 643bf215546Sopenharmony_ci else 644bf215546Sopenharmony_ci assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components); 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov); 647bf215546Sopenharmony_ci nir_src_copy(&mov->src[0].src, &src); 648bf215546Sopenharmony_ci mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg); 649bf215546Sopenharmony_ci mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1; 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci nir_builder_instr_insert(b, &mov->instr); 652bf215546Sopenharmony_ci} 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci/* Resolves a single parallel copy operation into a sequence of movs 655bf215546Sopenharmony_ci * 656bf215546Sopenharmony_ci * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for 657bf215546Sopenharmony_ci * Correctness, Code Quality, and Efficiency" by Boissinot et al. 658bf215546Sopenharmony_ci * However, I never got the algorithm to work as written, so this version 659bf215546Sopenharmony_ci * is slightly modified. 660bf215546Sopenharmony_ci * 661bf215546Sopenharmony_ci * The algorithm works by playing this little shell game with the values. 662bf215546Sopenharmony_ci * We start by recording where every source value is and which source value 663bf215546Sopenharmony_ci * each destination value should receive. We then grab any copy whose 664bf215546Sopenharmony_ci * destination is "empty", i.e. not used as a source, and do the following: 665bf215546Sopenharmony_ci * - Find where its source value currently lives 666bf215546Sopenharmony_ci * - Emit the move instruction 667bf215546Sopenharmony_ci * - Set the location of the source value to the destination 668bf215546Sopenharmony_ci * - Mark the location containing the source value 669bf215546Sopenharmony_ci * - Mark the destination as no longer needing to be copied 670bf215546Sopenharmony_ci * 671bf215546Sopenharmony_ci * When we run out of "empty" destinations, we have a cycle and so we 672bf215546Sopenharmony_ci * create a temporary register, copy to that register, and mark the value 673bf215546Sopenharmony_ci * we copied as living in that temporary. Now, the cycle is broken, so we 674bf215546Sopenharmony_ci * can continue with the above steps. 675bf215546Sopenharmony_ci */ 676bf215546Sopenharmony_cistatic void 677bf215546Sopenharmony_ciresolve_parallel_copy(nir_parallel_copy_instr *pcopy, 678bf215546Sopenharmony_ci struct from_ssa_state *state) 679bf215546Sopenharmony_ci{ 680bf215546Sopenharmony_ci unsigned num_copies = 0; 681bf215546Sopenharmony_ci nir_foreach_parallel_copy_entry(entry, pcopy) { 682bf215546Sopenharmony_ci /* Sources may be SSA */ 683bf215546Sopenharmony_ci if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) 684bf215546Sopenharmony_ci continue; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci num_copies++; 687bf215546Sopenharmony_ci } 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci if (num_copies == 0) { 690bf215546Sopenharmony_ci /* Hooray, we don't need any copies! */ 691bf215546Sopenharmony_ci nir_instr_remove(&pcopy->instr); 692bf215546Sopenharmony_ci exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node); 693bf215546Sopenharmony_ci return; 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci /* The register/source corresponding to the given index */ 697bf215546Sopenharmony_ci NIR_VLA_ZERO(nir_src, values, num_copies * 2); 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci /* The current location of a given piece of data. We will use -1 for "null" */ 700bf215546Sopenharmony_ci NIR_VLA_FILL(int, loc, num_copies * 2, -1); 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */ 703bf215546Sopenharmony_ci NIR_VLA_FILL(int, pred, num_copies * 2, -1); 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci /* The destinations we have yet to properly fill */ 706bf215546Sopenharmony_ci NIR_VLA(int, to_do, num_copies * 2); 707bf215546Sopenharmony_ci int to_do_idx = -1; 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci state->builder.cursor = nir_before_instr(&pcopy->instr); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci /* Now we set everything up: 712bf215546Sopenharmony_ci * - All values get assigned a temporary index 713bf215546Sopenharmony_ci * - Current locations are set from sources 714bf215546Sopenharmony_ci * - Predicessors are recorded from sources and destinations 715bf215546Sopenharmony_ci */ 716bf215546Sopenharmony_ci int num_vals = 0; 717bf215546Sopenharmony_ci nir_foreach_parallel_copy_entry(entry, pcopy) { 718bf215546Sopenharmony_ci /* Sources may be SSA */ 719bf215546Sopenharmony_ci if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) 720bf215546Sopenharmony_ci continue; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci int src_idx = -1; 723bf215546Sopenharmony_ci for (int i = 0; i < num_vals; ++i) { 724bf215546Sopenharmony_ci if (nir_srcs_equal(values[i], entry->src)) 725bf215546Sopenharmony_ci src_idx = i; 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci if (src_idx < 0) { 728bf215546Sopenharmony_ci src_idx = num_vals++; 729bf215546Sopenharmony_ci values[src_idx] = entry->src; 730bf215546Sopenharmony_ci } 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg); 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci int dest_idx = -1; 735bf215546Sopenharmony_ci for (int i = 0; i < num_vals; ++i) { 736bf215546Sopenharmony_ci if (nir_srcs_equal(values[i], dest_src)) { 737bf215546Sopenharmony_ci /* Each destination of a parallel copy instruction should be 738bf215546Sopenharmony_ci * unique. A destination may get used as a source, so we still 739bf215546Sopenharmony_ci * have to walk the list. However, the predecessor should not, 740bf215546Sopenharmony_ci * at this point, be set yet, so we should have -1 here. 741bf215546Sopenharmony_ci */ 742bf215546Sopenharmony_ci assert(pred[i] == -1); 743bf215546Sopenharmony_ci dest_idx = i; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci if (dest_idx < 0) { 747bf215546Sopenharmony_ci dest_idx = num_vals++; 748bf215546Sopenharmony_ci values[dest_idx] = dest_src; 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci loc[src_idx] = src_idx; 752bf215546Sopenharmony_ci pred[dest_idx] = src_idx; 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci to_do[++to_do_idx] = dest_idx; 755bf215546Sopenharmony_ci } 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci /* Currently empty destinations we can go ahead and fill */ 758bf215546Sopenharmony_ci NIR_VLA(int, ready, num_copies * 2); 759bf215546Sopenharmony_ci int ready_idx = -1; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci /* Mark the ones that are ready for copying. We know an index is a 762bf215546Sopenharmony_ci * destination if it has a predecessor and it's ready for copying if 763bf215546Sopenharmony_ci * it's not marked as containing data. 764bf215546Sopenharmony_ci */ 765bf215546Sopenharmony_ci for (int i = 0; i < num_vals; i++) { 766bf215546Sopenharmony_ci if (pred[i] != -1 && loc[i] == -1) 767bf215546Sopenharmony_ci ready[++ready_idx] = i; 768bf215546Sopenharmony_ci } 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci while (to_do_idx >= 0) { 771bf215546Sopenharmony_ci while (ready_idx >= 0) { 772bf215546Sopenharmony_ci int b = ready[ready_idx--]; 773bf215546Sopenharmony_ci int a = pred[b]; 774bf215546Sopenharmony_ci emit_copy(&state->builder, values[loc[a]], values[b]); 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci /* b has been filled, mark it as not needing to be copied */ 777bf215546Sopenharmony_ci pred[b] = -1; 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci /* The next bit only applies if the source and destination have the 780bf215546Sopenharmony_ci * same divergence. If they differ (it must be convergent -> 781bf215546Sopenharmony_ci * divergent), then we can't guarantee we won't need the convergent 782bf215546Sopenharmony_ci * version of again. 783bf215546Sopenharmony_ci */ 784bf215546Sopenharmony_ci if (nir_src_is_divergent(values[a]) == 785bf215546Sopenharmony_ci nir_src_is_divergent(values[b])) { 786bf215546Sopenharmony_ci /* If any other copies want a they can find it at b but only if the 787bf215546Sopenharmony_ci * two have the same divergence. 788bf215546Sopenharmony_ci */ 789bf215546Sopenharmony_ci loc[a] = b; 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci /* If a needs to be filled... */ 792bf215546Sopenharmony_ci if (pred[a] != -1) { 793bf215546Sopenharmony_ci /* If any other copies want a they can find it at b */ 794bf215546Sopenharmony_ci loc[a] = b; 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci /* It's ready for copying now */ 797bf215546Sopenharmony_ci ready[++ready_idx] = a; 798bf215546Sopenharmony_ci } 799bf215546Sopenharmony_ci } 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci int b = to_do[to_do_idx--]; 802bf215546Sopenharmony_ci if (pred[b] == -1) 803bf215546Sopenharmony_ci continue; 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci /* If we got here, then we don't have any more trivial copies that we 806bf215546Sopenharmony_ci * can do. We have to break a cycle, so we create a new temporary 807bf215546Sopenharmony_ci * register for that purpose. Normally, if going out of SSA after 808bf215546Sopenharmony_ci * register allocation, you would want to avoid creating temporary 809bf215546Sopenharmony_ci * registers. However, we are going out of SSA before register 810bf215546Sopenharmony_ci * allocation, so we would rather not create extra register 811bf215546Sopenharmony_ci * dependencies for the backend to deal with. If it wants, the 812bf215546Sopenharmony_ci * backend can coalesce the (possibly multiple) temporaries. 813bf215546Sopenharmony_ci */ 814bf215546Sopenharmony_ci assert(num_vals < num_copies * 2); 815bf215546Sopenharmony_ci nir_register *reg = nir_local_reg_create(state->builder.impl); 816bf215546Sopenharmony_ci reg->num_array_elems = 0; 817bf215546Sopenharmony_ci if (values[b].is_ssa) { 818bf215546Sopenharmony_ci reg->num_components = values[b].ssa->num_components; 819bf215546Sopenharmony_ci reg->bit_size = values[b].ssa->bit_size; 820bf215546Sopenharmony_ci } else { 821bf215546Sopenharmony_ci reg->num_components = values[b].reg.reg->num_components; 822bf215546Sopenharmony_ci reg->bit_size = values[b].reg.reg->bit_size; 823bf215546Sopenharmony_ci } 824bf215546Sopenharmony_ci reg->divergent = nir_src_is_divergent(values[b]); 825bf215546Sopenharmony_ci values[num_vals].is_ssa = false; 826bf215546Sopenharmony_ci values[num_vals].reg.reg = reg; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci emit_copy(&state->builder, values[b], values[num_vals]); 829bf215546Sopenharmony_ci loc[b] = num_vals; 830bf215546Sopenharmony_ci ready[++ready_idx] = b; 831bf215546Sopenharmony_ci num_vals++; 832bf215546Sopenharmony_ci } 833bf215546Sopenharmony_ci 834bf215546Sopenharmony_ci nir_instr_remove(&pcopy->instr); 835bf215546Sopenharmony_ci exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node); 836bf215546Sopenharmony_ci} 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci/* Resolves the parallel copies in a block. Each block can have at most 839bf215546Sopenharmony_ci * two: One at the beginning, right after all the phi noces, and one at 840bf215546Sopenharmony_ci * the end (or right before the final jump if it exists). 841bf215546Sopenharmony_ci */ 842bf215546Sopenharmony_cistatic bool 843bf215546Sopenharmony_ciresolve_parallel_copies_block(nir_block *block, struct from_ssa_state *state) 844bf215546Sopenharmony_ci{ 845bf215546Sopenharmony_ci /* At this point, we have removed all of the phi nodes. If a parallel 846bf215546Sopenharmony_ci * copy existed right after the phi nodes in this block, it is now the 847bf215546Sopenharmony_ci * first instruction. 848bf215546Sopenharmony_ci */ 849bf215546Sopenharmony_ci nir_instr *first_instr = nir_block_first_instr(block); 850bf215546Sopenharmony_ci if (first_instr == NULL) 851bf215546Sopenharmony_ci return true; /* Empty, nothing to do. */ 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci if (first_instr->type == nir_instr_type_parallel_copy) { 854bf215546Sopenharmony_ci nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci resolve_parallel_copy(pcopy, state); 857bf215546Sopenharmony_ci } 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci /* It's possible that the above code already cleaned up the end parallel 860bf215546Sopenharmony_ci * copy. However, doing so removed it form the instructions list so we 861bf215546Sopenharmony_ci * won't find it here. Therefore, it's safe to go ahead and just look 862bf215546Sopenharmony_ci * for one and clean it up if it exists. 863bf215546Sopenharmony_ci */ 864bf215546Sopenharmony_ci nir_parallel_copy_instr *end_pcopy = 865bf215546Sopenharmony_ci get_parallel_copy_at_end_of_block(block); 866bf215546Sopenharmony_ci if (end_pcopy) 867bf215546Sopenharmony_ci resolve_parallel_copy(end_pcopy, state); 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci return true; 870bf215546Sopenharmony_ci} 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_cistatic bool 873bf215546Sopenharmony_cinir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) 874bf215546Sopenharmony_ci{ 875bf215546Sopenharmony_ci nir_shader *shader = impl->function->shader; 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci struct from_ssa_state state; 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci nir_builder_init(&state.builder, impl); 880bf215546Sopenharmony_ci state.dead_ctx = ralloc_context(NULL); 881bf215546Sopenharmony_ci state.phi_webs_only = phi_webs_only; 882bf215546Sopenharmony_ci state.merge_node_table = _mesa_pointer_hash_table_create(NULL); 883bf215546Sopenharmony_ci state.progress = false; 884bf215546Sopenharmony_ci exec_list_make_empty(&state.dead_instrs); 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 887bf215546Sopenharmony_ci add_parallel_copy_to_end_of_block(shader, block, state.dead_ctx); 888bf215546Sopenharmony_ci } 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 891bf215546Sopenharmony_ci isolate_phi_nodes_block(shader, block, state.dead_ctx); 892bf215546Sopenharmony_ci } 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci /* Mark metadata as dirty before we ask for liveness analysis */ 895bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 896bf215546Sopenharmony_ci nir_metadata_dominance); 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_instr_index | 899bf215546Sopenharmony_ci nir_metadata_live_ssa_defs | 900bf215546Sopenharmony_ci nir_metadata_dominance); 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 903bf215546Sopenharmony_ci coalesce_phi_nodes_block(block, &state); 904bf215546Sopenharmony_ci } 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 907bf215546Sopenharmony_ci aggressive_coalesce_block(block, &state); 908bf215546Sopenharmony_ci } 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 911bf215546Sopenharmony_ci resolve_registers_block(block, &state); 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 915bf215546Sopenharmony_ci resolve_parallel_copies_block(block, &state); 916bf215546Sopenharmony_ci } 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 919bf215546Sopenharmony_ci nir_metadata_dominance); 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci /* Clean up dead instructions and the hash tables */ 922bf215546Sopenharmony_ci nir_instr_free_list(&state.dead_instrs); 923bf215546Sopenharmony_ci _mesa_hash_table_destroy(state.merge_node_table, NULL); 924bf215546Sopenharmony_ci ralloc_free(state.dead_ctx); 925bf215546Sopenharmony_ci return state.progress; 926bf215546Sopenharmony_ci} 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_cibool 929bf215546Sopenharmony_cinir_convert_from_ssa(nir_shader *shader, bool phi_webs_only) 930bf215546Sopenharmony_ci{ 931bf215546Sopenharmony_ci bool progress = false; 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 934bf215546Sopenharmony_ci if (function->impl) 935bf215546Sopenharmony_ci progress |= nir_convert_from_ssa_impl(function->impl, phi_webs_only); 936bf215546Sopenharmony_ci } 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci return progress; 939bf215546Sopenharmony_ci} 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_cistatic void 943bf215546Sopenharmony_ciplace_phi_read(nir_builder *b, nir_register *reg, 944bf215546Sopenharmony_ci nir_ssa_def *def, nir_block *block, struct set *visited_blocks) 945bf215546Sopenharmony_ci{ 946bf215546Sopenharmony_ci /* Search already visited blocks to avoid back edges in tree */ 947bf215546Sopenharmony_ci if (_mesa_set_search(visited_blocks, block) == NULL) { 948bf215546Sopenharmony_ci /* Try to go up the single-successor tree */ 949bf215546Sopenharmony_ci bool all_single_successors = true; 950bf215546Sopenharmony_ci set_foreach(block->predecessors, entry) { 951bf215546Sopenharmony_ci nir_block *pred = (nir_block *)entry->key; 952bf215546Sopenharmony_ci if (pred->successors[0] && pred->successors[1]) { 953bf215546Sopenharmony_ci all_single_successors = false; 954bf215546Sopenharmony_ci break; 955bf215546Sopenharmony_ci } 956bf215546Sopenharmony_ci } 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci if (all_single_successors) { 959bf215546Sopenharmony_ci /* All predecessors of this block have exactly one successor and it 960bf215546Sopenharmony_ci * is this block so they must eventually lead here without 961bf215546Sopenharmony_ci * intersecting each other. Place the reads in the predecessors 962bf215546Sopenharmony_ci * instead of this block. 963bf215546Sopenharmony_ci */ 964bf215546Sopenharmony_ci _mesa_set_add(visited_blocks, block); 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci set_foreach(block->predecessors, entry) { 967bf215546Sopenharmony_ci place_phi_read(b, reg, def, (nir_block *)entry->key, visited_blocks); 968bf215546Sopenharmony_ci } 969bf215546Sopenharmony_ci return; 970bf215546Sopenharmony_ci } 971bf215546Sopenharmony_ci } 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci b->cursor = nir_after_block_before_jump(block); 974bf215546Sopenharmony_ci nir_store_reg(b, reg, def, ~0); 975bf215546Sopenharmony_ci} 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci/** Lower all of the phi nodes in a block to movs to and from a register 978bf215546Sopenharmony_ci * 979bf215546Sopenharmony_ci * This provides a very quick-and-dirty out-of-SSA pass that you can run on a 980bf215546Sopenharmony_ci * single block to convert all of its phis to a register and some movs. 981bf215546Sopenharmony_ci * The code that is generated, while not optimal for actual codegen in a 982bf215546Sopenharmony_ci * back-end, is easy to generate, correct, and will turn into the same set of 983bf215546Sopenharmony_ci * phis after you call regs_to_ssa and do some copy propagation. For each phi 984bf215546Sopenharmony_ci * node we do the following: 985bf215546Sopenharmony_ci * 986bf215546Sopenharmony_ci * 1. For each phi instruction in the block, create a new nir_register 987bf215546Sopenharmony_ci * 988bf215546Sopenharmony_ci * 2. Insert movs at the top of the destination block for each phi and 989bf215546Sopenharmony_ci * rewrite all uses of the phi to use the mov. 990bf215546Sopenharmony_ci * 991bf215546Sopenharmony_ci * 3. For each phi source, insert movs in the predecessor block from the phi 992bf215546Sopenharmony_ci * source to the register associated with the phi. 993bf215546Sopenharmony_ci * 994bf215546Sopenharmony_ci * Correctness is guaranteed by the fact that we create a new register for 995bf215546Sopenharmony_ci * each phi and emit movs on both sides of the control-flow edge. Because all 996bf215546Sopenharmony_ci * the phis have SSA destinations (we assert this) and there is a separate 997bf215546Sopenharmony_ci * temporary for each phi, all movs inserted in any particular block have 998bf215546Sopenharmony_ci * unique destinations so the order of operations does not matter. 999bf215546Sopenharmony_ci * 1000bf215546Sopenharmony_ci * The one intelligent thing this pass does is that it places the moves from 1001bf215546Sopenharmony_ci * the phi sources as high up the predecessor tree as possible instead of in 1002bf215546Sopenharmony_ci * the exact predecessor. This means that, in particular, it will crawl into 1003bf215546Sopenharmony_ci * the deepest nesting of any if-ladders. In order to ensure that doing so is 1004bf215546Sopenharmony_ci * safe, it stops as soon as one of the predecessors has multiple successors. 1005bf215546Sopenharmony_ci */ 1006bf215546Sopenharmony_cibool 1007bf215546Sopenharmony_cinir_lower_phis_to_regs_block(nir_block *block) 1008bf215546Sopenharmony_ci{ 1009bf215546Sopenharmony_ci nir_builder b; 1010bf215546Sopenharmony_ci nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node)); 1011bf215546Sopenharmony_ci struct set *visited_blocks = _mesa_set_create(NULL, _mesa_hash_pointer, 1012bf215546Sopenharmony_ci _mesa_key_pointer_equal); 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci bool progress = false; 1015bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 1016bf215546Sopenharmony_ci if (instr->type != nir_instr_type_phi) 1017bf215546Sopenharmony_ci break; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 1020bf215546Sopenharmony_ci assert(phi->dest.is_ssa); 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci nir_register *reg = create_reg_for_ssa_def(&phi->dest.ssa, b.impl); 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci b.cursor = nir_after_instr(&phi->instr); 1025bf215546Sopenharmony_ci nir_ssa_def *def = nir_load_reg(&b, reg); 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&phi->dest.ssa, def); 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 1030bf215546Sopenharmony_ci if (src->src.is_ssa) { 1031bf215546Sopenharmony_ci _mesa_set_add(visited_blocks, src->src.ssa->parent_instr->block); 1032bf215546Sopenharmony_ci place_phi_read(&b, reg, src->src.ssa, src->pred, visited_blocks); 1033bf215546Sopenharmony_ci _mesa_set_clear(visited_blocks, NULL); 1034bf215546Sopenharmony_ci } else { 1035bf215546Sopenharmony_ci b.cursor = nir_after_block_before_jump(src->pred); 1036bf215546Sopenharmony_ci nir_ssa_def *src_ssa = 1037bf215546Sopenharmony_ci nir_ssa_for_src(&b, src->src, phi->dest.ssa.num_components); 1038bf215546Sopenharmony_ci nir_store_reg(&b, reg, src_ssa, ~0); 1039bf215546Sopenharmony_ci } 1040bf215546Sopenharmony_ci } 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci nir_instr_remove(&phi->instr); 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci progress = true; 1045bf215546Sopenharmony_ci } 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci _mesa_set_destroy(visited_blocks, NULL); 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci return progress; 1050bf215546Sopenharmony_ci} 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_cistruct ssa_def_to_reg_state { 1053bf215546Sopenharmony_ci nir_function_impl *impl; 1054bf215546Sopenharmony_ci bool progress; 1055bf215546Sopenharmony_ci}; 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_cistatic bool 1058bf215546Sopenharmony_cidest_replace_ssa_with_reg(nir_dest *dest, void *void_state) 1059bf215546Sopenharmony_ci{ 1060bf215546Sopenharmony_ci struct ssa_def_to_reg_state *state = void_state; 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci if (!dest->is_ssa) 1063bf215546Sopenharmony_ci return true; 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci nir_register *reg = create_reg_for_ssa_def(&dest->ssa, state->impl); 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_src(&dest->ssa, nir_src_for_reg(reg)); 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci nir_instr *instr = dest->ssa.parent_instr; 1070bf215546Sopenharmony_ci *dest = nir_dest_for_reg(reg); 1071bf215546Sopenharmony_ci dest->reg.parent_instr = instr; 1072bf215546Sopenharmony_ci list_addtail(&dest->reg.def_link, ®->defs); 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci state->progress = true; 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci return true; 1077bf215546Sopenharmony_ci} 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_cistatic bool 1080bf215546Sopenharmony_cissa_def_is_local_to_block(nir_ssa_def *def, UNUSED void *state) 1081bf215546Sopenharmony_ci{ 1082bf215546Sopenharmony_ci nir_block *block = def->parent_instr->block; 1083bf215546Sopenharmony_ci nir_foreach_use(use_src, def) { 1084bf215546Sopenharmony_ci if (use_src->parent_instr->block != block || 1085bf215546Sopenharmony_ci use_src->parent_instr->type == nir_instr_type_phi) { 1086bf215546Sopenharmony_ci return false; 1087bf215546Sopenharmony_ci } 1088bf215546Sopenharmony_ci } 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci if (!list_is_empty(&def->if_uses)) 1091bf215546Sopenharmony_ci return false; 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_ci return true; 1094bf215546Sopenharmony_ci} 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci/** Lower all of the SSA defs in a block to registers 1097bf215546Sopenharmony_ci * 1098bf215546Sopenharmony_ci * This performs the very simple operation of blindly replacing all of the SSA 1099bf215546Sopenharmony_ci * defs in the given block with registers. If not used carefully, this may 1100bf215546Sopenharmony_ci * result in phi nodes with register sources which is technically invalid. 1101bf215546Sopenharmony_ci * Fortunately, the register-based into-SSA pass handles them anyway. 1102bf215546Sopenharmony_ci */ 1103bf215546Sopenharmony_cibool 1104bf215546Sopenharmony_cinir_lower_ssa_defs_to_regs_block(nir_block *block) 1105bf215546Sopenharmony_ci{ 1106bf215546Sopenharmony_ci nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); 1107bf215546Sopenharmony_ci nir_shader *shader = impl->function->shader; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci struct ssa_def_to_reg_state state = { 1110bf215546Sopenharmony_ci .impl = impl, 1111bf215546Sopenharmony_ci .progress = false, 1112bf215546Sopenharmony_ci }; 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1115bf215546Sopenharmony_ci if (instr->type == nir_instr_type_ssa_undef) { 1116bf215546Sopenharmony_ci /* Undefs are just a read of something never written. */ 1117bf215546Sopenharmony_ci nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(instr); 1118bf215546Sopenharmony_ci nir_register *reg = create_reg_for_ssa_def(&undef->def, state.impl); 1119bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_src(&undef->def, nir_src_for_reg(reg)); 1120bf215546Sopenharmony_ci } else if (instr->type == nir_instr_type_load_const) { 1121bf215546Sopenharmony_ci /* Constant loads are SSA-only, we need to insert a move */ 1122bf215546Sopenharmony_ci nir_load_const_instr *load = nir_instr_as_load_const(instr); 1123bf215546Sopenharmony_ci nir_register *reg = create_reg_for_ssa_def(&load->def, state.impl); 1124bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_src(&load->def, nir_src_for_reg(reg)); 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_ci nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); 1127bf215546Sopenharmony_ci mov->src[0].src = nir_src_for_ssa(&load->def); 1128bf215546Sopenharmony_ci mov->dest.dest = nir_dest_for_reg(reg); 1129bf215546Sopenharmony_ci mov->dest.write_mask = (1 << reg->num_components) - 1; 1130bf215546Sopenharmony_ci nir_instr_insert(nir_after_instr(&load->instr), &mov->instr); 1131bf215546Sopenharmony_ci } else if (nir_foreach_ssa_def(instr, ssa_def_is_local_to_block, NULL)) { 1132bf215546Sopenharmony_ci /* If the SSA def produced by this instruction is only in the block 1133bf215546Sopenharmony_ci * in which it is defined and is not used by ifs or phis, then we 1134bf215546Sopenharmony_ci * don't have a reason to convert it to a register. 1135bf215546Sopenharmony_ci */ 1136bf215546Sopenharmony_ci } else { 1137bf215546Sopenharmony_ci nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state); 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci return state.progress; 1142bf215546Sopenharmony_ci} 1143