1/* 2 * Copyright © 2019 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_vla.h" 26 27/* Lowering for amul instructions, for drivers that support imul24. 28 * This pass will analyze indirect derefs, and convert corresponding 29 * amul instructions to either imul or imul24, depending on the 30 * required range. 31 * 32 * 1) Analyze the uniform variables and build a table of UBOs and SSBOs 33 * that are either too large, or might be too large (unknown size) 34 * for imul24 35 * 36 * 2) Loop thru looking at all the intrinsics, finding dereferences of 37 * large variables, and recursively replacing all amul instructions 38 * used with imul 39 * 40 * 3) Finally loop again thru all instructions replacing any remaining 41 * amul with imul24. At this point any remaining amul instructions 42 * are not involved in calculating an offset into a large variable, 43 * thanks to the 2nd step, so they can be safely replace with imul24. 44 * 45 * Using two passes over all the instructions lets us handle the case 46 * where, due to CSE, an amul is used to calculate an offset into both 47 * a large and small variable. 48 */ 49 50typedef struct { 51 nir_shader *shader; 52 53 int (*type_size)(const struct glsl_type *, bool); 54 55 /* Tables of UBOs and SSBOs mapping driver_location/base whether 56 * they are too large to use imul24: 57 */ 58 bool *large_ubos; 59 bool *large_ssbos; 60 61 /* for cases that we cannot determine UBO/SSBO index, track if *any* 62 * UBO/SSBO is too large for imul24: 63 */ 64 bool has_large_ubo; 65 bool has_large_ssbo; 66 67 unsigned max_slot; 68 69 bool progress; 70} lower_state; 71 72/* Lower 'amul's in offset src of large variables to 'imul': */ 73static bool 74lower_large_src(nir_src *src, void *s) 75{ 76 lower_state *state = s; 77 78 assert(src->is_ssa); 79 80 nir_instr *parent = src->ssa->parent_instr; 81 82 /* No need to visit instructions we've already visited.. this also 83 * avoids infinite recursion when phi's are involved: 84 */ 85 if (parent->pass_flags) 86 return false; 87 88 nir_foreach_src(parent, lower_large_src, state); 89 90 if (parent->type == nir_instr_type_alu) { 91 nir_alu_instr *alu = nir_instr_as_alu(parent); 92 if (alu->op == nir_op_amul) { 93 alu->op = nir_op_imul; 94 state->progress = true; 95 } 96 } 97 98 parent->pass_flags = 1; 99 100 return true; 101} 102 103static bool 104large_ubo(lower_state *state, nir_src src) 105{ 106 if (!nir_src_is_const(src)) 107 return state->has_large_ubo; 108 unsigned idx = nir_src_as_uint(src); 109 assert(idx < state->shader->info.num_ubos); 110 return state->large_ubos[idx]; 111} 112 113static bool 114large_ssbo(lower_state *state, nir_src src) 115{ 116 if (!nir_src_is_const(src)) 117 return state->has_large_ssbo; 118 unsigned idx = nir_src_as_uint(src); 119 assert(idx < state->shader->info.num_ssbos); 120 return state->large_ssbos[idx]; 121} 122 123static void 124lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr) 125{ 126 switch (intr->intrinsic) { 127 case nir_intrinsic_load_ubo: 128 //# src[] = { buffer_index, offset }. 129 if (large_ubo(state, intr->src[0])) 130 lower_large_src(&intr->src[1], state); 131 return; 132 133 case nir_intrinsic_load_ssbo: 134 //# src[] = { buffer_index, offset }. 135 if (large_ssbo(state, intr->src[0])) 136 lower_large_src(&intr->src[1], state); 137 return; 138 139 case nir_intrinsic_store_ssbo: 140 //# src[] = { value, block_index, offset } 141 if (large_ssbo(state, intr->src[1])) 142 lower_large_src(&intr->src[2], state); 143 return; 144 145 case nir_intrinsic_ssbo_atomic_add: 146 case nir_intrinsic_ssbo_atomic_imin: 147 case nir_intrinsic_ssbo_atomic_umin: 148 case nir_intrinsic_ssbo_atomic_imax: 149 case nir_intrinsic_ssbo_atomic_umax: 150 case nir_intrinsic_ssbo_atomic_and: 151 case nir_intrinsic_ssbo_atomic_or: 152 case nir_intrinsic_ssbo_atomic_xor: 153 case nir_intrinsic_ssbo_atomic_exchange: 154 case nir_intrinsic_ssbo_atomic_comp_swap: 155 case nir_intrinsic_ssbo_atomic_fadd: 156 case nir_intrinsic_ssbo_atomic_fmin: 157 case nir_intrinsic_ssbo_atomic_fmax: 158 case nir_intrinsic_ssbo_atomic_fcomp_swap: 159 /* 0: SSBO index 160 * 1: offset 161 */ 162 if (large_ssbo(state, intr->src[0])) 163 lower_large_src(&intr->src[1], state); 164 return; 165 166 case nir_intrinsic_global_atomic_add: 167 case nir_intrinsic_global_atomic_imin: 168 case nir_intrinsic_global_atomic_umin: 169 case nir_intrinsic_global_atomic_imax: 170 case nir_intrinsic_global_atomic_umax: 171 case nir_intrinsic_global_atomic_and: 172 case nir_intrinsic_global_atomic_or: 173 case nir_intrinsic_global_atomic_xor: 174 case nir_intrinsic_global_atomic_exchange: 175 case nir_intrinsic_global_atomic_comp_swap: 176 case nir_intrinsic_global_atomic_fadd: 177 case nir_intrinsic_global_atomic_fmin: 178 case nir_intrinsic_global_atomic_fmax: 179 case nir_intrinsic_global_atomic_fcomp_swap: 180 case nir_intrinsic_load_global_constant: 181 case nir_intrinsic_load_global: 182 /* just assume we that 24b is not sufficient: */ 183 lower_large_src(&intr->src[0], state); 184 return; 185 186 case nir_intrinsic_store_global: 187 /* just assume we that 24b is not sufficient: */ 188 lower_large_src(&intr->src[1], state); 189 return; 190 191 /* These should all be small enough to unconditionally use imul24: */ 192 case nir_intrinsic_shared_atomic_add: 193 case nir_intrinsic_shared_atomic_imin: 194 case nir_intrinsic_shared_atomic_umin: 195 case nir_intrinsic_shared_atomic_imax: 196 case nir_intrinsic_shared_atomic_umax: 197 case nir_intrinsic_shared_atomic_and: 198 case nir_intrinsic_shared_atomic_or: 199 case nir_intrinsic_shared_atomic_xor: 200 case nir_intrinsic_shared_atomic_exchange: 201 case nir_intrinsic_shared_atomic_comp_swap: 202 case nir_intrinsic_shared_atomic_fadd: 203 case nir_intrinsic_shared_atomic_fmin: 204 case nir_intrinsic_shared_atomic_fmax: 205 case nir_intrinsic_shared_atomic_fcomp_swap: 206 case nir_intrinsic_load_uniform: 207 case nir_intrinsic_load_input: 208 case nir_intrinsic_load_output: 209 case nir_intrinsic_store_output: 210 default: 211 return; 212 } 213} 214 215static void 216lower_instr(lower_state *state, nir_instr *instr) 217{ 218 if (instr->type == nir_instr_type_intrinsic) { 219 lower_intrinsic(state, nir_instr_as_intrinsic(instr)); 220 } 221} 222 223static bool 224is_large(lower_state *state, nir_variable *var) 225{ 226 const struct glsl_type *type = glsl_without_array(var->type); 227 unsigned size = state->type_size(type, false); 228 229 /* if size is not known (ie. VLA) then assume the worst: */ 230 if (!size) 231 return true; 232 233 return size >= (1 << 23); 234} 235 236bool 237nir_lower_amul(nir_shader *shader, 238 int (*type_size)(const struct glsl_type *, bool)) 239{ 240 assert(shader->options->has_imul24); 241 assert(type_size); 242 243 NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0); 244 NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0); 245 246 lower_state state = { 247 .shader = shader, 248 .type_size = type_size, 249 .large_ubos = large_ubos, 250 .large_ssbos = large_ssbos, 251 }; 252 253 /* Figure out which UBOs or SSBOs are large enough to be 254 * disqualified from imul24: 255 */ 256 nir_foreach_variable_in_shader (var, shader) { 257 if (var->data.mode == nir_var_mem_ubo) { 258 if (is_large(&state, var)) { 259 state.has_large_ubo = true; 260 unsigned size = MAX2(1, glsl_array_size(var->type)); 261 for (unsigned i = 0; i < size; i++) 262 state.large_ubos[var->data.binding + i] = true; 263 } 264 } else if (var->data.mode == nir_var_mem_ssbo) { 265 if (is_large(&state, var)) { 266 state.has_large_ssbo = true; 267 unsigned size = MAX2(1, glsl_array_size(var->type)); 268 for (unsigned i = 0; i < size; i++) 269 state.large_ssbos[var->data.binding + i] = true; 270 } 271 } 272 } 273 274 /* clear pass flags: */ 275 nir_foreach_function(function, shader) { 276 nir_function_impl *impl = function->impl; 277 if (!impl) 278 continue; 279 280 nir_foreach_block(block, impl) { 281 nir_foreach_instr(instr, block) { 282 instr->pass_flags = 0; 283 } 284 } 285 } 286 287 nir_foreach_function(function, shader) { 288 nir_function_impl *impl = function->impl; 289 290 if (!impl) 291 continue; 292 293 nir_foreach_block(block, impl) { 294 nir_foreach_instr(instr, block) { 295 lower_instr(&state, instr); 296 } 297 } 298 } 299 300 /* At this point, all 'amul's used in calculating an offset into 301 * a large variable have been replaced with 'imul'. So remaining 302 * 'amul's can be replaced with 'imul24': 303 * 304 * Note the exception for 64b (such as load/store_global where 305 * address size is 64b) as imul24 cannot have 64b bitsize 306 */ 307 nir_foreach_function(function, shader) { 308 nir_function_impl *impl = function->impl; 309 310 if (!impl) 311 continue; 312 313 nir_foreach_block(block, impl) { 314 nir_foreach_instr(instr, block) { 315 if (instr->type != nir_instr_type_alu) 316 continue; 317 318 nir_alu_instr *alu = nir_instr_as_alu(instr); 319 if (alu->op != nir_op_amul) 320 continue; 321 322 if (nir_dest_bit_size(alu->dest.dest) <= 32) 323 alu->op = nir_op_imul24; 324 else 325 alu->op = nir_op_imul; 326 327 state.progress |= true; 328 } 329 } 330 331 nir_metadata_preserve(impl, nir_metadata_block_index | 332 nir_metadata_dominance); 333 334 } 335 336 return state.progress; 337} 338