1/* 2 * Copyright © 2018 Red Hat 3 * Copyright © 2019 Valve Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark (robdclark@gmail.com> 26 * Daniel Schürmann (daniel.schuermann@campus.tu-berlin.de) 27 * Rhys Perry (pendingchaos02@gmail.com) 28 * 29 */ 30 31#include "nir.h" 32 33 34/* 35 * A simple pass that moves some instructions into the least common 36 * anscestor of consuming instructions. 37 */ 38 39bool 40nir_can_move_instr(nir_instr *instr, nir_move_options options) 41{ 42 switch (instr->type) { 43 case nir_instr_type_load_const: 44 case nir_instr_type_ssa_undef: { 45 return options & nir_move_const_undef; 46 } 47 case nir_instr_type_alu: { 48 if (nir_op_is_vec(nir_instr_as_alu(instr)->op) || 49 nir_instr_as_alu(instr)->op == nir_op_b2i32) 50 return options & nir_move_copies; 51 if (nir_alu_instr_is_comparison(nir_instr_as_alu(instr))) 52 return options & nir_move_comparisons; 53 return false; 54 } 55 case nir_instr_type_intrinsic: { 56 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 57 switch (intrin->intrinsic) { 58 case nir_intrinsic_load_ubo: 59 case nir_intrinsic_load_ubo_vec4: 60 return options & nir_move_load_ubo; 61 case nir_intrinsic_load_ssbo: 62 return (options & nir_move_load_ssbo) && nir_intrinsic_can_reorder(intrin); 63 case nir_intrinsic_load_input: 64 case nir_intrinsic_load_interpolated_input: 65 case nir_intrinsic_load_per_vertex_input: 66 return options & nir_move_load_input; 67 case nir_intrinsic_load_uniform: 68 return options & nir_move_load_uniform; 69 default: 70 return false; 71 } 72 } 73 default: 74 return false; 75 } 76} 77 78static nir_loop * 79get_innermost_loop(nir_cf_node *node) 80{ 81 for (; node != NULL; node = node->parent) { 82 if (node->type == nir_cf_node_loop) 83 return (nir_loop*)node; 84 } 85 return NULL; 86} 87 88static bool 89loop_contains_block(nir_loop *loop, nir_block *block) 90{ 91 nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 92 nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 93 94 return block->index > before->index && block->index < after->index; 95} 96 97/* Given the LCA of all uses and the definition, find a block on the path 98 * between them in the dominance tree that is outside of as many loops as 99 * possible. If "sink_out_of_loops" is false, then we disallow sinking the 100 * definition outside of the loop it's defined in (if any). 101 */ 102 103static nir_block * 104adjust_block_for_loops(nir_block *use_block, nir_block *def_block, 105 bool sink_out_of_loops) 106{ 107 nir_loop *def_loop = NULL; 108 if (!sink_out_of_loops) 109 def_loop = get_innermost_loop(&def_block->cf_node); 110 111 for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom; 112 cur_block = cur_block->imm_dom) { 113 if (!sink_out_of_loops && def_loop && 114 !loop_contains_block(def_loop, use_block)) { 115 use_block = cur_block; 116 continue; 117 } 118 119 nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node); 120 if (next && next->type == nir_cf_node_loop) { 121 nir_loop *following_loop = nir_cf_node_as_loop(next); 122 if (loop_contains_block(following_loop, use_block)) { 123 use_block = cur_block; 124 continue; 125 } 126 } 127 } 128 129 return use_block; 130} 131 132/* iterate a ssa def's use's and try to find a more optimal block to 133 * move it to, using the dominance tree. In short, if all of the uses 134 * are contained in a single block, the load will be moved there, 135 * otherwise it will be move to the least common ancestor block of all 136 * the uses 137 */ 138static nir_block * 139get_preferred_block(nir_ssa_def *def, bool sink_out_of_loops) 140{ 141 nir_block *lca = NULL; 142 143 nir_foreach_use(use, def) { 144 nir_instr *instr = use->parent_instr; 145 nir_block *use_block = instr->block; 146 147 /* 148 * Kind of an ugly special-case, but phi instructions 149 * need to appear first in the block, so by definition 150 * we can't move an instruction into a block where it is 151 * consumed by a phi instruction. We could conceivably 152 * move it into a dominator block. 153 */ 154 if (instr->type == nir_instr_type_phi) { 155 nir_phi_instr *phi = nir_instr_as_phi(instr); 156 nir_block *phi_lca = NULL; 157 nir_foreach_phi_src(src, phi) { 158 if (&src->src == use) 159 phi_lca = nir_dominance_lca(phi_lca, src->pred); 160 } 161 use_block = phi_lca; 162 } 163 164 lca = nir_dominance_lca(lca, use_block); 165 } 166 167 nir_foreach_if_use(use, def) { 168 nir_block *use_block = 169 nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node)); 170 171 lca = nir_dominance_lca(lca, use_block); 172 } 173 174 /* return in case, we didn't find a reachable user */ 175 if (!lca) 176 return NULL; 177 178 /* We don't sink any instructions into loops to avoid repeated executions 179 * This might occasionally increase register pressure, but seems overall 180 * the better choice. 181 */ 182 lca = adjust_block_for_loops(lca, def->parent_instr->block, 183 sink_out_of_loops); 184 assert(nir_block_dominates(def->parent_instr->block, lca)); 185 186 return lca; 187} 188 189static bool 190can_sink_out_of_loop(nir_intrinsic_instr *intrin) 191{ 192 /* Don't sink buffer loads out of loops because that can make its 193 * resource divergent and break code like that which is generated 194 * by nir_lower_non_uniform_access. 195 */ 196 return intrin->intrinsic != nir_intrinsic_load_ubo && 197 intrin->intrinsic != nir_intrinsic_load_ssbo; 198} 199 200bool 201nir_opt_sink(nir_shader *shader, nir_move_options options) 202{ 203 bool progress = false; 204 205 nir_foreach_function(function, shader) { 206 if (!function->impl) 207 continue; 208 209 nir_metadata_require(function->impl, 210 nir_metadata_block_index | nir_metadata_dominance); 211 212 nir_foreach_block_reverse(block, function->impl) { 213 nir_foreach_instr_reverse_safe(instr, block) { 214 if (!nir_can_move_instr(instr, options)) 215 continue; 216 217 nir_ssa_def *def = nir_instr_ssa_def(instr); 218 219 bool sink_out_of_loops = 220 instr->type != nir_instr_type_intrinsic || 221 can_sink_out_of_loop(nir_instr_as_intrinsic(instr)); 222 nir_block *use_block = 223 get_preferred_block(def, sink_out_of_loops); 224 225 if (!use_block || use_block == instr->block) 226 continue; 227 228 nir_instr_remove(instr); 229 nir_instr_insert(nir_after_phis(use_block), instr); 230 231 progress = true; 232 } 233 } 234 235 nir_metadata_preserve(function->impl, 236 nir_metadata_block_index | nir_metadata_dominance); 237 } 238 239 return progress; 240} 241