1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Jason Ekstrand (jason@jlekstrand.net) 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "nir.h" 29bf215546Sopenharmony_ci#include "nir_builder.h" 30bf215546Sopenharmony_ci#include "nir_constant_expressions.h" 31bf215546Sopenharmony_ci#include "nir_deref.h" 32bf215546Sopenharmony_ci#include <math.h> 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci/* 35bf215546Sopenharmony_ci * Implements SSA-based constant folding. 36bf215546Sopenharmony_ci */ 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistruct constant_fold_state { 39bf215546Sopenharmony_ci bool has_load_constant; 40bf215546Sopenharmony_ci bool has_indirect_load_const; 41bf215546Sopenharmony_ci}; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_cistatic bool 44bf215546Sopenharmony_citry_fold_alu(nir_builder *b, nir_alu_instr *alu) 45bf215546Sopenharmony_ci{ 46bf215546Sopenharmony_ci nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS]; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci if (!alu->dest.dest.is_ssa) 49bf215546Sopenharmony_ci return false; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci /* In the case that any outputs/inputs have unsized types, then we need to 52bf215546Sopenharmony_ci * guess the bit-size. In this case, the validator ensures that all 53bf215546Sopenharmony_ci * bit-sizes match so we can just take the bit-size from first 54bf215546Sopenharmony_ci * output/input with an unsized type. If all the outputs/inputs are sized 55bf215546Sopenharmony_ci * then we don't need to guess the bit-size at all because the code we 56bf215546Sopenharmony_ci * generate for constant opcodes in this case already knows the sizes of 57bf215546Sopenharmony_ci * the types involved and does not need the provided bit-size for anything 58bf215546Sopenharmony_ci * (although it still requires to receive a valid bit-size). 59bf215546Sopenharmony_ci */ 60bf215546Sopenharmony_ci unsigned bit_size = 0; 61bf215546Sopenharmony_ci if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type)) 62bf215546Sopenharmony_ci bit_size = alu->dest.dest.ssa.bit_size; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 65bf215546Sopenharmony_ci if (!alu->src[i].src.is_ssa) 66bf215546Sopenharmony_ci return false; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci if (bit_size == 0 && 69bf215546Sopenharmony_ci !nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i])) 70bf215546Sopenharmony_ci bit_size = alu->src[i].src.ssa->bit_size; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci nir_instr *src_instr = alu->src[i].src.ssa->parent_instr; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci if (src_instr->type != nir_instr_type_load_const) 75bf215546Sopenharmony_ci return false; 76bf215546Sopenharmony_ci nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); 79bf215546Sopenharmony_ci j++) { 80bf215546Sopenharmony_ci src[i][j] = load_const->value[alu->src[i].swizzle[j]]; 81bf215546Sopenharmony_ci } 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* We shouldn't have any source modifiers in the optimization loop. */ 84bf215546Sopenharmony_ci assert(!alu->src[i].abs && !alu->src[i].negate); 85bf215546Sopenharmony_ci } 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci if (bit_size == 0) 88bf215546Sopenharmony_ci bit_size = 32; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /* We shouldn't have any saturate modifiers in the optimization loop. */ 91bf215546Sopenharmony_ci assert(!alu->dest.saturate); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci nir_const_value dest[NIR_MAX_VEC_COMPONENTS]; 94bf215546Sopenharmony_ci nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS]; 95bf215546Sopenharmony_ci memset(dest, 0, sizeof(dest)); 96bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i) 97bf215546Sopenharmony_ci srcs[i] = src[i]; 98bf215546Sopenharmony_ci nir_eval_const_opcode(alu->op, dest, alu->dest.dest.ssa.num_components, 99bf215546Sopenharmony_ci bit_size, srcs, 100bf215546Sopenharmony_ci b->shader->info.float_controls_execution_mode); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci b->cursor = nir_before_instr(&alu->instr); 103bf215546Sopenharmony_ci nir_ssa_def *imm = nir_build_imm(b, alu->dest.dest.ssa.num_components, 104bf215546Sopenharmony_ci alu->dest.dest.ssa.bit_size, 105bf215546Sopenharmony_ci dest); 106bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, imm); 107bf215546Sopenharmony_ci nir_instr_remove(&alu->instr); 108bf215546Sopenharmony_ci nir_instr_free(&alu->instr); 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci return true; 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_cistatic nir_const_value * 114bf215546Sopenharmony_ciconst_value_for_deref(nir_deref_instr *deref) 115bf215546Sopenharmony_ci{ 116bf215546Sopenharmony_ci if (!nir_deref_mode_is(deref, nir_var_mem_constant)) 117bf215546Sopenharmony_ci return NULL; 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci nir_deref_path path; 120bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, NULL); 121bf215546Sopenharmony_ci if (path.path[0]->deref_type != nir_deref_type_var) 122bf215546Sopenharmony_ci goto fail; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci nir_variable *var = path.path[0]->var; 125bf215546Sopenharmony_ci assert(var->data.mode == nir_var_mem_constant); 126bf215546Sopenharmony_ci if (var->constant_initializer == NULL) 127bf215546Sopenharmony_ci goto fail; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci nir_constant *c = var->constant_initializer; 130bf215546Sopenharmony_ci nir_const_value *v = NULL; /* Vector value for array-deref-of-vec */ 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci for (unsigned i = 1; path.path[i] != NULL; i++) { 133bf215546Sopenharmony_ci nir_deref_instr *p = path.path[i]; 134bf215546Sopenharmony_ci switch (p->deref_type) { 135bf215546Sopenharmony_ci case nir_deref_type_var: 136bf215546Sopenharmony_ci unreachable("Deref paths can only start with a var deref"); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci case nir_deref_type_array: { 139bf215546Sopenharmony_ci assert(v == NULL); 140bf215546Sopenharmony_ci if (!nir_src_is_const(p->arr.index)) 141bf215546Sopenharmony_ci goto fail; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci uint64_t idx = nir_src_as_uint(p->arr.index); 144bf215546Sopenharmony_ci if (c->num_elements > 0) { 145bf215546Sopenharmony_ci assert(glsl_type_is_array(path.path[i-1]->type)); 146bf215546Sopenharmony_ci if (idx >= c->num_elements) 147bf215546Sopenharmony_ci goto fail; 148bf215546Sopenharmony_ci c = c->elements[idx]; 149bf215546Sopenharmony_ci } else { 150bf215546Sopenharmony_ci assert(glsl_type_is_vector(path.path[i-1]->type)); 151bf215546Sopenharmony_ci assert(glsl_type_is_scalar(p->type)); 152bf215546Sopenharmony_ci if (idx >= NIR_MAX_VEC_COMPONENTS) 153bf215546Sopenharmony_ci goto fail; 154bf215546Sopenharmony_ci v = &c->values[idx]; 155bf215546Sopenharmony_ci } 156bf215546Sopenharmony_ci break; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci case nir_deref_type_struct: 160bf215546Sopenharmony_ci assert(glsl_type_is_struct(path.path[i-1]->type)); 161bf215546Sopenharmony_ci assert(v == NULL && c->num_elements > 0); 162bf215546Sopenharmony_ci if (p->strct.index >= c->num_elements) 163bf215546Sopenharmony_ci goto fail; 164bf215546Sopenharmony_ci c = c->elements[p->strct.index]; 165bf215546Sopenharmony_ci break; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci default: 168bf215546Sopenharmony_ci goto fail; 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci /* We have to have ended at a vector */ 173bf215546Sopenharmony_ci assert(c->num_elements == 0); 174bf215546Sopenharmony_ci return v ? v : c->values; 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_cifail: 177bf215546Sopenharmony_ci nir_deref_path_finish(&path); 178bf215546Sopenharmony_ci return NULL; 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_cistatic bool 182bf215546Sopenharmony_citry_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, 183bf215546Sopenharmony_ci struct constant_fold_state *state) 184bf215546Sopenharmony_ci{ 185bf215546Sopenharmony_ci switch (intrin->intrinsic) { 186bf215546Sopenharmony_ci case nir_intrinsic_demote_if: 187bf215546Sopenharmony_ci case nir_intrinsic_discard_if: 188bf215546Sopenharmony_ci case nir_intrinsic_terminate_if: 189bf215546Sopenharmony_ci if (nir_src_is_const(intrin->src[0])) { 190bf215546Sopenharmony_ci if (nir_src_as_bool(intrin->src[0])) { 191bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 192bf215546Sopenharmony_ci nir_intrinsic_op op; 193bf215546Sopenharmony_ci switch (intrin->intrinsic) { 194bf215546Sopenharmony_ci case nir_intrinsic_discard_if: 195bf215546Sopenharmony_ci op = nir_intrinsic_discard; 196bf215546Sopenharmony_ci break; 197bf215546Sopenharmony_ci case nir_intrinsic_demote_if: 198bf215546Sopenharmony_ci op = nir_intrinsic_demote; 199bf215546Sopenharmony_ci break; 200bf215546Sopenharmony_ci case nir_intrinsic_terminate_if: 201bf215546Sopenharmony_ci op = nir_intrinsic_terminate; 202bf215546Sopenharmony_ci break; 203bf215546Sopenharmony_ci default: 204bf215546Sopenharmony_ci unreachable("invalid intrinsic"); 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci nir_intrinsic_instr *new_instr = 207bf215546Sopenharmony_ci nir_intrinsic_instr_create(b->shader, op); 208bf215546Sopenharmony_ci nir_builder_instr_insert(b, &new_instr->instr); 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 211bf215546Sopenharmony_ci return true; 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci return false; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci case nir_intrinsic_load_deref: { 216bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 217bf215546Sopenharmony_ci nir_const_value *v = const_value_for_deref(deref); 218bf215546Sopenharmony_ci if (v) { 219bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 220bf215546Sopenharmony_ci nir_ssa_def *val = nir_build_imm(b, intrin->dest.ssa.num_components, 221bf215546Sopenharmony_ci intrin->dest.ssa.bit_size, v); 222bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); 223bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 224bf215546Sopenharmony_ci return true; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci return false; 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci case nir_intrinsic_load_constant: { 230bf215546Sopenharmony_ci state->has_load_constant = true; 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci if (!nir_src_is_const(intrin->src[0])) { 233bf215546Sopenharmony_ci state->has_indirect_load_const = true; 234bf215546Sopenharmony_ci return false; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci unsigned offset = nir_src_as_uint(intrin->src[0]); 238bf215546Sopenharmony_ci unsigned base = nir_intrinsic_base(intrin); 239bf215546Sopenharmony_ci unsigned range = nir_intrinsic_range(intrin); 240bf215546Sopenharmony_ci assert(base + range <= b->shader->constant_data_size); 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 243bf215546Sopenharmony_ci nir_ssa_def *val; 244bf215546Sopenharmony_ci if (offset >= range) { 245bf215546Sopenharmony_ci val = nir_ssa_undef(b, intrin->dest.ssa.num_components, 246bf215546Sopenharmony_ci intrin->dest.ssa.bit_size); 247bf215546Sopenharmony_ci } else { 248bf215546Sopenharmony_ci nir_const_value imm[NIR_MAX_VEC_COMPONENTS]; 249bf215546Sopenharmony_ci memset(imm, 0, sizeof(imm)); 250bf215546Sopenharmony_ci uint8_t *data = (uint8_t*)b->shader->constant_data + base; 251bf215546Sopenharmony_ci for (unsigned i = 0; i < intrin->num_components; i++) { 252bf215546Sopenharmony_ci unsigned bytes = intrin->dest.ssa.bit_size / 8; 253bf215546Sopenharmony_ci bytes = MIN2(bytes, range - offset); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci memcpy(&imm[i].u64, data + offset, bytes); 256bf215546Sopenharmony_ci offset += bytes; 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci val = nir_build_imm(b, intrin->dest.ssa.num_components, 259bf215546Sopenharmony_ci intrin->dest.ssa.bit_size, imm); 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); 262bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 263bf215546Sopenharmony_ci return true; 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci case nir_intrinsic_vote_any: 267bf215546Sopenharmony_ci case nir_intrinsic_vote_all: 268bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 269bf215546Sopenharmony_ci case nir_intrinsic_read_first_invocation: 270bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 271bf215546Sopenharmony_ci case nir_intrinsic_shuffle_xor: 272bf215546Sopenharmony_ci case nir_intrinsic_shuffle_up: 273bf215546Sopenharmony_ci case nir_intrinsic_shuffle_down: 274bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 275bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 276bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 277bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 278bf215546Sopenharmony_ci case nir_intrinsic_quad_swizzle_amd: 279bf215546Sopenharmony_ci case nir_intrinsic_masked_swizzle_amd: 280bf215546Sopenharmony_ci /* All of these have the data payload in the first source. They may 281bf215546Sopenharmony_ci * have a second source with a shuffle index but that doesn't matter if 282bf215546Sopenharmony_ci * the data is constant. 283bf215546Sopenharmony_ci */ 284bf215546Sopenharmony_ci if (nir_src_is_const(intrin->src[0])) { 285bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 286bf215546Sopenharmony_ci intrin->src[0].ssa); 287bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 288bf215546Sopenharmony_ci return true; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci return false; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci case nir_intrinsic_vote_feq: 293bf215546Sopenharmony_ci case nir_intrinsic_vote_ieq: 294bf215546Sopenharmony_ci if (nir_src_is_const(intrin->src[0])) { 295bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 296bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 297bf215546Sopenharmony_ci nir_imm_true(b)); 298bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 299bf215546Sopenharmony_ci return true; 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci return false; 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci default: 304bf215546Sopenharmony_ci return false; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cistatic bool 309bf215546Sopenharmony_citry_fold_txb_to_tex(nir_builder *b, nir_tex_instr *tex) 310bf215546Sopenharmony_ci{ 311bf215546Sopenharmony_ci assert(tex->op == nir_texop_txb); 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci const int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci /* nir_to_tgsi_lower_tex mangles many kinds of texture instructions, 316bf215546Sopenharmony_ci * including txb, into invalid states. It removes the special 317bf215546Sopenharmony_ci * parameters and appends the values to the texture coordinate. 318bf215546Sopenharmony_ci */ 319bf215546Sopenharmony_ci if (bias_idx < 0) 320bf215546Sopenharmony_ci return false; 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci if (nir_src_is_const(tex->src[bias_idx].src) && 323bf215546Sopenharmony_ci nir_src_as_float(tex->src[bias_idx].src) == 0.0) { 324bf215546Sopenharmony_ci nir_tex_instr_remove_src(tex, bias_idx); 325bf215546Sopenharmony_ci tex->op = nir_texop_tex; 326bf215546Sopenharmony_ci return true; 327bf215546Sopenharmony_ci } 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci return false; 330bf215546Sopenharmony_ci} 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_cistatic bool 333bf215546Sopenharmony_citry_fold_tex_offset(nir_tex_instr *tex, unsigned *index, 334bf215546Sopenharmony_ci nir_tex_src_type src_type) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci const int src_idx = nir_tex_instr_src_index(tex, src_type); 337bf215546Sopenharmony_ci if (src_idx < 0) 338bf215546Sopenharmony_ci return false; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci if (!nir_src_is_const(tex->src[src_idx].src)) 341bf215546Sopenharmony_ci return false; 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci *index += nir_src_as_uint(tex->src[src_idx].src); 344bf215546Sopenharmony_ci nir_tex_instr_remove_src(tex, src_idx); 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci return true; 347bf215546Sopenharmony_ci} 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_cistatic bool 350bf215546Sopenharmony_citry_fold_tex(nir_builder *b, nir_tex_instr *tex) 351bf215546Sopenharmony_ci{ 352bf215546Sopenharmony_ci bool progress = false; 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci progress |= try_fold_tex_offset(tex, &tex->texture_index, 355bf215546Sopenharmony_ci nir_tex_src_texture_offset); 356bf215546Sopenharmony_ci progress |= try_fold_tex_offset(tex, &tex->sampler_index, 357bf215546Sopenharmony_ci nir_tex_src_sampler_offset); 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci /* txb with a bias of constant zero is just tex. */ 360bf215546Sopenharmony_ci if (tex->op == nir_texop_txb) 361bf215546Sopenharmony_ci progress |= try_fold_txb_to_tex(b, tex); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci return progress; 364bf215546Sopenharmony_ci} 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_cistatic bool 367bf215546Sopenharmony_citry_fold_instr(nir_builder *b, nir_instr *instr, void *_state) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci switch (instr->type) { 370bf215546Sopenharmony_ci case nir_instr_type_alu: 371bf215546Sopenharmony_ci return try_fold_alu(b, nir_instr_as_alu(instr)); 372bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 373bf215546Sopenharmony_ci return try_fold_intrinsic(b, nir_instr_as_intrinsic(instr), _state); 374bf215546Sopenharmony_ci case nir_instr_type_tex: 375bf215546Sopenharmony_ci return try_fold_tex(b, nir_instr_as_tex(instr)); 376bf215546Sopenharmony_ci default: 377bf215546Sopenharmony_ci /* Don't know how to constant fold */ 378bf215546Sopenharmony_ci return false; 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci} 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_cibool 383bf215546Sopenharmony_cinir_opt_constant_folding(nir_shader *shader) 384bf215546Sopenharmony_ci{ 385bf215546Sopenharmony_ci struct constant_fold_state state; 386bf215546Sopenharmony_ci state.has_load_constant = false; 387bf215546Sopenharmony_ci state.has_indirect_load_const = false; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci bool progress = nir_shader_instructions_pass(shader, try_fold_instr, 390bf215546Sopenharmony_ci nir_metadata_block_index | 391bf215546Sopenharmony_ci nir_metadata_dominance, 392bf215546Sopenharmony_ci &state); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci /* This doesn't free the constant data if there are no constant loads because 395bf215546Sopenharmony_ci * the data might still be used but the loads have been lowered to load_ubo 396bf215546Sopenharmony_ci */ 397bf215546Sopenharmony_ci if (state.has_load_constant && !state.has_indirect_load_const && 398bf215546Sopenharmony_ci shader->constant_data_size) { 399bf215546Sopenharmony_ci ralloc_free(shader->constant_data); 400bf215546Sopenharmony_ci shader->constant_data = NULL; 401bf215546Sopenharmony_ci shader->constant_data_size = 0; 402bf215546Sopenharmony_ci } 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci return progress; 405bf215546Sopenharmony_ci} 406