1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Red Hat 3bf215546Sopenharmony_ci * Copyright © 2019 Valve Corporation 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci * 24bf215546Sopenharmony_ci * Authors: 25bf215546Sopenharmony_ci * Rob Clark (robdclark@gmail.com> 26bf215546Sopenharmony_ci * Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de) 27bf215546Sopenharmony_ci * Rhys Perry (pendingchaos02@gmail.com) 28bf215546Sopenharmony_ci * 29bf215546Sopenharmony_ci */ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "nir.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci/* 35bf215546Sopenharmony_ci * A simple pass that moves some instructions into the least common 36bf215546Sopenharmony_ci * anscestor of consuming instructions. 37bf215546Sopenharmony_ci */ 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cibool 40bf215546Sopenharmony_cinir_can_move_instr(nir_instr *instr, nir_move_options options) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci switch (instr->type) { 43bf215546Sopenharmony_ci case nir_instr_type_load_const: 44bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: { 45bf215546Sopenharmony_ci return options & nir_move_const_undef; 46bf215546Sopenharmony_ci } 47bf215546Sopenharmony_ci case nir_instr_type_alu: { 48bf215546Sopenharmony_ci if (nir_op_is_vec(nir_instr_as_alu(instr)->op) || 49bf215546Sopenharmony_ci nir_instr_as_alu(instr)->op == nir_op_b2i32) 50bf215546Sopenharmony_ci return options & nir_move_copies; 51bf215546Sopenharmony_ci if (nir_alu_instr_is_comparison(nir_instr_as_alu(instr))) 52bf215546Sopenharmony_ci return options & nir_move_comparisons; 53bf215546Sopenharmony_ci return false; 54bf215546Sopenharmony_ci } 55bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 56bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 57bf215546Sopenharmony_ci switch (intrin->intrinsic) { 58bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 59bf215546Sopenharmony_ci case nir_intrinsic_load_ubo_vec4: 60bf215546Sopenharmony_ci return options & nir_move_load_ubo; 61bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 62bf215546Sopenharmony_ci return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin); 63bf215546Sopenharmony_ci case nir_intrinsic_load_input: 64bf215546Sopenharmony_ci case nir_intrinsic_load_interpolated_input: 65bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 66bf215546Sopenharmony_ci return options & nir_move_load_input; 67bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 68bf215546Sopenharmony_ci return options & nir_move_load_uniform; 69bf215546Sopenharmony_ci default: 70bf215546Sopenharmony_ci return false; 71bf215546Sopenharmony_ci } 72bf215546Sopenharmony_ci } 73bf215546Sopenharmony_ci default: 74bf215546Sopenharmony_ci return false; 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci} 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_cistatic nir_loop * 79bf215546Sopenharmony_ciget_innermost_loop(nir_cf_node *node) 80bf215546Sopenharmony_ci{ 81bf215546Sopenharmony_ci for (; node != NULL; node = node->parent) { 82bf215546Sopenharmony_ci if (node->type == nir_cf_node_loop) 83bf215546Sopenharmony_ci return (nir_loop*)node; 84bf215546Sopenharmony_ci } 85bf215546Sopenharmony_ci return NULL; 86bf215546Sopenharmony_ci} 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic bool 89bf215546Sopenharmony_ciloop_contains_block(nir_loop *loop, nir_block *block) 90bf215546Sopenharmony_ci{ 91bf215546Sopenharmony_ci nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 92bf215546Sopenharmony_ci nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci return block->index > before->index && block->index < after->index; 95bf215546Sopenharmony_ci} 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci/* Given the LCA of all uses and the definition, find a block on the path 98bf215546Sopenharmony_ci * between them in the dominance tree that is outside of as many loops as 99bf215546Sopenharmony_ci * possible. If "sink_out_of_loops" is false, then we disallow sinking the 100bf215546Sopenharmony_ci * definition outside of the loop it's defined in (if any). 101bf215546Sopenharmony_ci */ 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_cistatic nir_block * 104bf215546Sopenharmony_ciadjust_block_for_loops(nir_block *use_block, nir_block *def_block, 105bf215546Sopenharmony_ci bool sink_out_of_loops) 106bf215546Sopenharmony_ci{ 107bf215546Sopenharmony_ci nir_loop *def_loop = NULL; 108bf215546Sopenharmony_ci if (!sink_out_of_loops) 109bf215546Sopenharmony_ci def_loop = get_innermost_loop(&def_block->cf_node); 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom; 112bf215546Sopenharmony_ci cur_block = cur_block->imm_dom) { 113bf215546Sopenharmony_ci if (!sink_out_of_loops && def_loop && 114bf215546Sopenharmony_ci !loop_contains_block(def_loop, use_block)) { 115bf215546Sopenharmony_ci use_block = cur_block; 116bf215546Sopenharmony_ci continue; 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node); 120bf215546Sopenharmony_ci if (next && next->type == nir_cf_node_loop) { 121bf215546Sopenharmony_ci nir_loop *following_loop = nir_cf_node_as_loop(next); 122bf215546Sopenharmony_ci if (loop_contains_block(following_loop, use_block)) { 123bf215546Sopenharmony_ci use_block = cur_block; 124bf215546Sopenharmony_ci continue; 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci return use_block; 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci/* iterate a ssa def's use's and try to find a more optimal block to 133bf215546Sopenharmony_ci * move it to, using the dominance tree. In short, if all of the uses 134bf215546Sopenharmony_ci * are contained in a single block, the load will be moved there, 135bf215546Sopenharmony_ci * otherwise it will be move to the least common ancestor block of all 136bf215546Sopenharmony_ci * the uses 137bf215546Sopenharmony_ci */ 138bf215546Sopenharmony_cistatic nir_block * 139bf215546Sopenharmony_ciget_preferred_block(nir_ssa_def *def, bool sink_out_of_loops) 140bf215546Sopenharmony_ci{ 141bf215546Sopenharmony_ci nir_block *lca = NULL; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci nir_foreach_use(use, def) { 144bf215546Sopenharmony_ci nir_instr *instr = use->parent_instr; 145bf215546Sopenharmony_ci nir_block *use_block = instr->block; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci /* 148bf215546Sopenharmony_ci * Kind of an ugly special-case, but phi instructions 149bf215546Sopenharmony_ci * need to appear first in the block, so by definition 150bf215546Sopenharmony_ci * we can't move an instruction into a block where it is 151bf215546Sopenharmony_ci * consumed by a phi instruction. We could conceivably 152bf215546Sopenharmony_ci * move it into a dominator block. 153bf215546Sopenharmony_ci */ 154bf215546Sopenharmony_ci if (instr->type == nir_instr_type_phi) { 155bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(instr); 156bf215546Sopenharmony_ci nir_block *phi_lca = NULL; 157bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 158bf215546Sopenharmony_ci if (&src->src == use) 159bf215546Sopenharmony_ci phi_lca = nir_dominance_lca(phi_lca, src->pred); 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci use_block = phi_lca; 162bf215546Sopenharmony_ci } 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci lca = nir_dominance_lca(lca, use_block); 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci nir_foreach_if_use(use, def) { 168bf215546Sopenharmony_ci nir_block *use_block = 169bf215546Sopenharmony_ci nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node)); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci lca = nir_dominance_lca(lca, use_block); 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* return in case, we didn't find a reachable user */ 175bf215546Sopenharmony_ci if (!lca) 176bf215546Sopenharmony_ci return NULL; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci /* We don't sink any instructions into loops to avoid repeated executions 179bf215546Sopenharmony_ci * This might occasionally increase register pressure, but seems overall 180bf215546Sopenharmony_ci * the better choice. 181bf215546Sopenharmony_ci */ 182bf215546Sopenharmony_ci lca = adjust_block_for_loops(lca, def->parent_instr->block, 183bf215546Sopenharmony_ci sink_out_of_loops); 184bf215546Sopenharmony_ci assert(nir_block_dominates(def->parent_instr->block, lca)); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci return lca; 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistatic bool 190bf215546Sopenharmony_cican_sink_out_of_loop(nir_intrinsic_instr *intrin) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci /* Don't sink buffer loads out of loops because that can make its 193bf215546Sopenharmony_ci * resource divergent and break code like that which is generated 194bf215546Sopenharmony_ci * by nir_lower_non_uniform_access. 195bf215546Sopenharmony_ci */ 196bf215546Sopenharmony_ci return intrin->intrinsic != nir_intrinsic_load_ubo && 197bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_load_ssbo; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_cibool 201bf215546Sopenharmony_cinir_opt_sink(nir_shader *shader, nir_move_options options) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci bool progress = false; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 206bf215546Sopenharmony_ci if (!function->impl) 207bf215546Sopenharmony_ci continue; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci nir_metadata_require(function->impl, 210bf215546Sopenharmony_ci nir_metadata_block_index | nir_metadata_dominance); 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci nir_foreach_block_reverse(block, function->impl) { 213bf215546Sopenharmony_ci nir_foreach_instr_reverse_safe(instr, block) { 214bf215546Sopenharmony_ci if (!nir_can_move_instr(instr, options)) 215bf215546Sopenharmony_ci continue; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci nir_ssa_def *def = nir_instr_ssa_def(instr); 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci bool sink_out_of_loops = 220bf215546Sopenharmony_ci instr->type != nir_instr_type_intrinsic || 221bf215546Sopenharmony_ci can_sink_out_of_loop(nir_instr_as_intrinsic(instr)); 222bf215546Sopenharmony_ci nir_block *use_block = 223bf215546Sopenharmony_ci get_preferred_block(def, sink_out_of_loops); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci if (!use_block || use_block == instr->block) 226bf215546Sopenharmony_ci continue; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci nir_instr_remove(instr); 229bf215546Sopenharmony_ci nir_instr_insert(nir_after_phis(use_block), instr); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci progress = true; 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, 236bf215546Sopenharmony_ci nir_metadata_block_index | nir_metadata_dominance); 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci return progress; 240bf215546Sopenharmony_ci} 241