1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jason Ekstrand (jason@jlekstrand.net) 25 * 26 */ 27 28#include "nir.h" 29#include "nir_builder.h" 30#include "nir_constant_expressions.h" 31#include "nir_deref.h" 32#include <math.h> 33 34/* 35 * Implements SSA-based constant folding. 36 */ 37 38struct constant_fold_state { 39 bool has_load_constant; 40 bool has_indirect_load_const; 41}; 42 43static bool 44try_fold_alu(nir_builder *b, nir_alu_instr *alu) 45{ 46 nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS]; 47 48 if (!alu->dest.dest.is_ssa) 49 return false; 50 51 /* In the case that any outputs/inputs have unsized types, then we need to 52 * guess the bit-size. In this case, the validator ensures that all 53 * bit-sizes match so we can just take the bit-size from first 54 * output/input with an unsized type. If all the outputs/inputs are sized 55 * then we don't need to guess the bit-size at all because the code we 56 * generate for constant opcodes in this case already knows the sizes of 57 * the types involved and does not need the provided bit-size for anything 58 * (although it still requires to receive a valid bit-size). 59 */ 60 unsigned bit_size = 0; 61 if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type)) 62 bit_size = alu->dest.dest.ssa.bit_size; 63 64 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 65 if (!alu->src[i].src.is_ssa) 66 return false; 67 68 if (bit_size == 0 && 69 !nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i])) 70 bit_size = alu->src[i].src.ssa->bit_size; 71 72 nir_instr *src_instr = alu->src[i].src.ssa->parent_instr; 73 74 if (src_instr->type != nir_instr_type_load_const) 75 return false; 76 nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); 77 78 for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); 79 j++) { 80 src[i][j] = load_const->value[alu->src[i].swizzle[j]]; 81 } 82 83 /* We shouldn't have any source modifiers in the optimization loop. */ 84 assert(!alu->src[i].abs && !alu->src[i].negate); 85 } 86 87 if (bit_size == 0) 88 bit_size = 32; 89 90 /* We shouldn't have any saturate modifiers in the optimization loop. */ 91 assert(!alu->dest.saturate); 92 93 nir_const_value dest[NIR_MAX_VEC_COMPONENTS]; 94 nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS]; 95 memset(dest, 0, sizeof(dest)); 96 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i) 97 srcs[i] = src[i]; 98 nir_eval_const_opcode(alu->op, dest, alu->dest.dest.ssa.num_components, 99 bit_size, srcs, 100 b->shader->info.float_controls_execution_mode); 101 102 b->cursor = nir_before_instr(&alu->instr); 103 nir_ssa_def *imm = nir_build_imm(b, alu->dest.dest.ssa.num_components, 104 alu->dest.dest.ssa.bit_size, 105 dest); 106 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, imm); 107 nir_instr_remove(&alu->instr); 108 nir_instr_free(&alu->instr); 109 110 return true; 111} 112 113static nir_const_value * 114const_value_for_deref(nir_deref_instr *deref) 115{ 116 if (!nir_deref_mode_is(deref, nir_var_mem_constant)) 117 return NULL; 118 119 nir_deref_path path; 120 nir_deref_path_init(&path, deref, NULL); 121 if (path.path[0]->deref_type != nir_deref_type_var) 122 goto fail; 123 124 nir_variable *var = path.path[0]->var; 125 assert(var->data.mode == nir_var_mem_constant); 126 if (var->constant_initializer == NULL) 127 goto fail; 128 129 nir_constant *c = var->constant_initializer; 130 nir_const_value *v = NULL; /* Vector value for array-deref-of-vec */ 131 132 for (unsigned i = 1; path.path[i] != NULL; i++) { 133 nir_deref_instr *p = path.path[i]; 134 switch (p->deref_type) { 135 case nir_deref_type_var: 136 unreachable("Deref paths can only start with a var deref"); 137 138 case nir_deref_type_array: { 139 assert(v == NULL); 140 if (!nir_src_is_const(p->arr.index)) 141 goto fail; 142 143 uint64_t idx = nir_src_as_uint(p->arr.index); 144 if (c->num_elements > 0) { 145 assert(glsl_type_is_array(path.path[i-1]->type)); 146 if (idx >= c->num_elements) 147 goto fail; 148 c = c->elements[idx]; 149 } else { 150 assert(glsl_type_is_vector(path.path[i-1]->type)); 151 assert(glsl_type_is_scalar(p->type)); 152 if (idx >= NIR_MAX_VEC_COMPONENTS) 153 goto fail; 154 v = &c->values[idx]; 155 } 156 break; 157 } 158 159 case nir_deref_type_struct: 160 assert(glsl_type_is_struct(path.path[i-1]->type)); 161 assert(v == NULL && c->num_elements > 0); 162 if (p->strct.index >= c->num_elements) 163 goto fail; 164 c = c->elements[p->strct.index]; 165 break; 166 167 default: 168 goto fail; 169 } 170 } 171 172 /* We have to have ended at a vector */ 173 assert(c->num_elements == 0); 174 return v ? v : c->values; 175 176fail: 177 nir_deref_path_finish(&path); 178 return NULL; 179} 180 181static bool 182try_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, 183 struct constant_fold_state *state) 184{ 185 switch (intrin->intrinsic) { 186 case nir_intrinsic_demote_if: 187 case nir_intrinsic_discard_if: 188 case nir_intrinsic_terminate_if: 189 if (nir_src_is_const(intrin->src[0])) { 190 if (nir_src_as_bool(intrin->src[0])) { 191 b->cursor = nir_before_instr(&intrin->instr); 192 nir_intrinsic_op op; 193 switch (intrin->intrinsic) { 194 case nir_intrinsic_discard_if: 195 op = nir_intrinsic_discard; 196 break; 197 case nir_intrinsic_demote_if: 198 op = nir_intrinsic_demote; 199 break; 200 case nir_intrinsic_terminate_if: 201 op = nir_intrinsic_terminate; 202 break; 203 default: 204 unreachable("invalid intrinsic"); 205 } 206 nir_intrinsic_instr *new_instr = 207 nir_intrinsic_instr_create(b->shader, op); 208 nir_builder_instr_insert(b, &new_instr->instr); 209 } 210 nir_instr_remove(&intrin->instr); 211 return true; 212 } 213 return false; 214 215 case nir_intrinsic_load_deref: { 216 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 217 nir_const_value *v = const_value_for_deref(deref); 218 if (v) { 219 b->cursor = nir_before_instr(&intrin->instr); 220 nir_ssa_def *val = nir_build_imm(b, intrin->dest.ssa.num_components, 221 intrin->dest.ssa.bit_size, v); 222 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); 223 nir_instr_remove(&intrin->instr); 224 return true; 225 } 226 return false; 227 } 228 229 case nir_intrinsic_load_constant: { 230 state->has_load_constant = true; 231 232 if (!nir_src_is_const(intrin->src[0])) { 233 state->has_indirect_load_const = true; 234 return false; 235 } 236 237 unsigned offset = nir_src_as_uint(intrin->src[0]); 238 unsigned base = nir_intrinsic_base(intrin); 239 unsigned range = nir_intrinsic_range(intrin); 240 assert(base + range <= b->shader->constant_data_size); 241 242 b->cursor = nir_before_instr(&intrin->instr); 243 nir_ssa_def *val; 244 if (offset >= range) { 245 val = nir_ssa_undef(b, intrin->dest.ssa.num_components, 246 intrin->dest.ssa.bit_size); 247 } else { 248 nir_const_value imm[NIR_MAX_VEC_COMPONENTS]; 249 memset(imm, 0, sizeof(imm)); 250 uint8_t *data = (uint8_t*)b->shader->constant_data + base; 251 for (unsigned i = 0; i < intrin->num_components; i++) { 252 unsigned bytes = intrin->dest.ssa.bit_size / 8; 253 bytes = MIN2(bytes, range - offset); 254 255 memcpy(&imm[i].u64, data + offset, bytes); 256 offset += bytes; 257 } 258 val = nir_build_imm(b, intrin->dest.ssa.num_components, 259 intrin->dest.ssa.bit_size, imm); 260 } 261 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); 262 nir_instr_remove(&intrin->instr); 263 return true; 264 } 265 266 case nir_intrinsic_vote_any: 267 case nir_intrinsic_vote_all: 268 case nir_intrinsic_read_invocation: 269 case nir_intrinsic_read_first_invocation: 270 case nir_intrinsic_shuffle: 271 case nir_intrinsic_shuffle_xor: 272 case nir_intrinsic_shuffle_up: 273 case nir_intrinsic_shuffle_down: 274 case nir_intrinsic_quad_broadcast: 275 case nir_intrinsic_quad_swap_horizontal: 276 case nir_intrinsic_quad_swap_vertical: 277 case nir_intrinsic_quad_swap_diagonal: 278 case nir_intrinsic_quad_swizzle_amd: 279 case nir_intrinsic_masked_swizzle_amd: 280 /* All of these have the data payload in the first source. They may 281 * have a second source with a shuffle index but that doesn't matter if 282 * the data is constant. 283 */ 284 if (nir_src_is_const(intrin->src[0])) { 285 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 286 intrin->src[0].ssa); 287 nir_instr_remove(&intrin->instr); 288 return true; 289 } 290 return false; 291 292 case nir_intrinsic_vote_feq: 293 case nir_intrinsic_vote_ieq: 294 if (nir_src_is_const(intrin->src[0])) { 295 b->cursor = nir_before_instr(&intrin->instr); 296 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 297 nir_imm_true(b)); 298 nir_instr_remove(&intrin->instr); 299 return true; 300 } 301 return false; 302 303 default: 304 return false; 305 } 306} 307 308static bool 309try_fold_txb_to_tex(nir_builder *b, nir_tex_instr *tex) 310{ 311 assert(tex->op == nir_texop_txb); 312 313 const int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 314 315 /* nir_to_tgsi_lower_tex mangles many kinds of texture instructions, 316 * including txb, into invalid states. It removes the special 317 * parameters and appends the values to the texture coordinate. 318 */ 319 if (bias_idx < 0) 320 return false; 321 322 if (nir_src_is_const(tex->src[bias_idx].src) && 323 nir_src_as_float(tex->src[bias_idx].src) == 0.0) { 324 nir_tex_instr_remove_src(tex, bias_idx); 325 tex->op = nir_texop_tex; 326 return true; 327 } 328 329 return false; 330} 331 332static bool 333try_fold_tex_offset(nir_tex_instr *tex, unsigned *index, 334 nir_tex_src_type src_type) 335{ 336 const int src_idx = nir_tex_instr_src_index(tex, src_type); 337 if (src_idx < 0) 338 return false; 339 340 if (!nir_src_is_const(tex->src[src_idx].src)) 341 return false; 342 343 *index += nir_src_as_uint(tex->src[src_idx].src); 344 nir_tex_instr_remove_src(tex, src_idx); 345 346 return true; 347} 348 349static bool 350try_fold_tex(nir_builder *b, nir_tex_instr *tex) 351{ 352 bool progress = false; 353 354 progress |= try_fold_tex_offset(tex, &tex->texture_index, 355 nir_tex_src_texture_offset); 356 progress |= try_fold_tex_offset(tex, &tex->sampler_index, 357 nir_tex_src_sampler_offset); 358 359 /* txb with a bias of constant zero is just tex. */ 360 if (tex->op == nir_texop_txb) 361 progress |= try_fold_txb_to_tex(b, tex); 362 363 return progress; 364} 365 366static bool 367try_fold_instr(nir_builder *b, nir_instr *instr, void *_state) 368{ 369 switch (instr->type) { 370 case nir_instr_type_alu: 371 return try_fold_alu(b, nir_instr_as_alu(instr)); 372 case nir_instr_type_intrinsic: 373 return try_fold_intrinsic(b, nir_instr_as_intrinsic(instr), _state); 374 case nir_instr_type_tex: 375 return try_fold_tex(b, nir_instr_as_tex(instr)); 376 default: 377 /* Don't know how to constant fold */ 378 return false; 379 } 380} 381 382bool 383nir_opt_constant_folding(nir_shader *shader) 384{ 385 struct constant_fold_state state; 386 state.has_load_constant = false; 387 state.has_indirect_load_const = false; 388 389 bool progress = nir_shader_instructions_pass(shader, try_fold_instr, 390 nir_metadata_block_index | 391 nir_metadata_dominance, 392 &state); 393 394 /* This doesn't free the constant data if there are no constant loads because 395 * the data might still be used but the loads have been lowered to load_ubo 396 */ 397 if (state.has_load_constant && !state.has_indirect_load_const && 398 shader->constant_data_size) { 399 ralloc_free(shader->constant_data); 400 shader->constant_data = NULL; 401 shader->constant_data_size = 0; 402 } 403 404 return progress; 405} 406