1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015 Thomas Helland 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_constant_expressions.h" 26bf215546Sopenharmony_ci#include "nir_loop_analyze.h" 27bf215546Sopenharmony_ci#include "util/bitset.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_citypedef enum { 30bf215546Sopenharmony_ci undefined, 31bf215546Sopenharmony_ci invariant, 32bf215546Sopenharmony_ci not_invariant, 33bf215546Sopenharmony_ci basic_induction 34bf215546Sopenharmony_ci} nir_loop_variable_type; 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_citypedef struct nir_basic_induction_var { 37bf215546Sopenharmony_ci nir_alu_instr *alu; /* The def of the alu-operation */ 38bf215546Sopenharmony_ci nir_ssa_def *def_outside_loop; /* The phi-src outside the loop */ 39bf215546Sopenharmony_ci} nir_basic_induction_var; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_citypedef struct { 42bf215546Sopenharmony_ci /* A link for the work list */ 43bf215546Sopenharmony_ci struct list_head process_link; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci bool in_loop; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci /* The ssa_def associated with this info */ 48bf215546Sopenharmony_ci nir_ssa_def *def; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci /* The type of this ssa_def */ 51bf215546Sopenharmony_ci nir_loop_variable_type type; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* If this is of type basic_induction */ 54bf215546Sopenharmony_ci struct nir_basic_induction_var *ind; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci /* True if variable is in an if branch */ 57bf215546Sopenharmony_ci bool in_if_branch; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci /* True if variable is in a nested loop */ 60bf215546Sopenharmony_ci bool in_nested_loop; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci /* Could be a basic_induction if following uniforms are inlined */ 63bf215546Sopenharmony_ci nir_src *init_src; 64bf215546Sopenharmony_ci nir_alu_src *update_src; 65bf215546Sopenharmony_ci} nir_loop_variable; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_citypedef struct { 68bf215546Sopenharmony_ci /* The loop we store information for */ 69bf215546Sopenharmony_ci nir_loop *loop; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci /* Loop_variable for all ssa_defs in function */ 72bf215546Sopenharmony_ci nir_loop_variable *loop_vars; 73bf215546Sopenharmony_ci BITSET_WORD *loop_vars_init; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* A list of the loop_vars to analyze */ 76bf215546Sopenharmony_ci struct list_head process_list; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci nir_variable_mode indirect_mask; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci bool force_unroll_sampler_indirect; 81bf215546Sopenharmony_ci} loop_info_state; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic nir_loop_variable * 84bf215546Sopenharmony_ciget_loop_var(nir_ssa_def *value, loop_info_state *state) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci nir_loop_variable *var = &(state->loop_vars[value->index]); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci if (!BITSET_TEST(state->loop_vars_init, value->index)) { 89bf215546Sopenharmony_ci var->in_loop = false; 90bf215546Sopenharmony_ci var->def = value; 91bf215546Sopenharmony_ci var->in_if_branch = false; 92bf215546Sopenharmony_ci var->in_nested_loop = false; 93bf215546Sopenharmony_ci var->init_src = NULL; 94bf215546Sopenharmony_ci var->update_src = NULL; 95bf215546Sopenharmony_ci if (value->parent_instr->type == nir_instr_type_load_const) 96bf215546Sopenharmony_ci var->type = invariant; 97bf215546Sopenharmony_ci else 98bf215546Sopenharmony_ci var->type = undefined; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci BITSET_SET(state->loop_vars_init, value->index); 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci return var; 104bf215546Sopenharmony_ci} 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_citypedef struct { 107bf215546Sopenharmony_ci loop_info_state *state; 108bf215546Sopenharmony_ci bool in_if_branch; 109bf215546Sopenharmony_ci bool in_nested_loop; 110bf215546Sopenharmony_ci} init_loop_state; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_cistatic bool 113bf215546Sopenharmony_ciinit_loop_def(nir_ssa_def *def, void *void_init_loop_state) 114bf215546Sopenharmony_ci{ 115bf215546Sopenharmony_ci init_loop_state *loop_init_state = void_init_loop_state; 116bf215546Sopenharmony_ci nir_loop_variable *var = get_loop_var(def, loop_init_state->state); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci if (loop_init_state->in_nested_loop) { 119bf215546Sopenharmony_ci var->in_nested_loop = true; 120bf215546Sopenharmony_ci } else if (loop_init_state->in_if_branch) { 121bf215546Sopenharmony_ci var->in_if_branch = true; 122bf215546Sopenharmony_ci } else { 123bf215546Sopenharmony_ci /* Add to the tail of the list. That way we start at the beginning of 124bf215546Sopenharmony_ci * the defs in the loop instead of the end when walking the list. This 125bf215546Sopenharmony_ci * means less recursive calls. Only add defs that are not in nested 126bf215546Sopenharmony_ci * loops or conditional blocks. 127bf215546Sopenharmony_ci */ 128bf215546Sopenharmony_ci list_addtail(&var->process_link, &loop_init_state->state->process_list); 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci var->in_loop = true; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci return true; 134bf215546Sopenharmony_ci} 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci/** Calculate an estimated cost in number of instructions 137bf215546Sopenharmony_ci * 138bf215546Sopenharmony_ci * We do this so that we don't unroll loops which will later get massively 139bf215546Sopenharmony_ci * inflated due to int64 or fp64 lowering. The estimates provided here don't 140bf215546Sopenharmony_ci * have to be massively accurate; they just have to be good enough that loop 141bf215546Sopenharmony_ci * unrolling doesn't cause things to blow up too much. 142bf215546Sopenharmony_ci */ 143bf215546Sopenharmony_cistatic unsigned 144bf215546Sopenharmony_ciinstr_cost(nir_instr *instr, const nir_shader_compiler_options *options) 145bf215546Sopenharmony_ci{ 146bf215546Sopenharmony_ci if (instr->type == nir_instr_type_intrinsic || 147bf215546Sopenharmony_ci instr->type == nir_instr_type_tex) 148bf215546Sopenharmony_ci return 1; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 151bf215546Sopenharmony_ci return 0; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 154bf215546Sopenharmony_ci const nir_op_info *info = &nir_op_infos[alu->op]; 155bf215546Sopenharmony_ci unsigned cost = 1; 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci if (alu->op == nir_op_flrp) { 158bf215546Sopenharmony_ci if ((options->lower_flrp16 && nir_dest_bit_size(alu->dest.dest) == 16) || 159bf215546Sopenharmony_ci (options->lower_flrp32 && nir_dest_bit_size(alu->dest.dest) == 32) || 160bf215546Sopenharmony_ci (options->lower_flrp64 && nir_dest_bit_size(alu->dest.dest) == 64)) 161bf215546Sopenharmony_ci cost *= 3; 162bf215546Sopenharmony_ci } 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci /* Assume everything 16 or 32-bit is cheap. 165bf215546Sopenharmony_ci * 166bf215546Sopenharmony_ci * There are no 64-bit ops that don't have a 64-bit thing as their 167bf215546Sopenharmony_ci * destination or first source. 168bf215546Sopenharmony_ci */ 169bf215546Sopenharmony_ci if (nir_dest_bit_size(alu->dest.dest) < 64 && 170bf215546Sopenharmony_ci nir_src_bit_size(alu->src[0].src) < 64) 171bf215546Sopenharmony_ci return cost; 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci bool is_fp64 = nir_dest_bit_size(alu->dest.dest) == 64 && 174bf215546Sopenharmony_ci nir_alu_type_get_base_type(info->output_type) == nir_type_float; 175bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_inputs; i++) { 176bf215546Sopenharmony_ci if (nir_src_bit_size(alu->src[i].src) == 64 && 177bf215546Sopenharmony_ci nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float) 178bf215546Sopenharmony_ci is_fp64 = true; 179bf215546Sopenharmony_ci } 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci if (is_fp64) { 182bf215546Sopenharmony_ci /* If it's something lowered normally, it's expensive. */ 183bf215546Sopenharmony_ci if (options->lower_doubles_options & 184bf215546Sopenharmony_ci nir_lower_doubles_op_to_options_mask(alu->op)) 185bf215546Sopenharmony_ci cost *= 20; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci /* If it's full software, it's even more expensive */ 188bf215546Sopenharmony_ci if (options->lower_doubles_options & nir_lower_fp64_full_software) 189bf215546Sopenharmony_ci cost *= 100; 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci return cost; 192bf215546Sopenharmony_ci } else { 193bf215546Sopenharmony_ci if (options->lower_int64_options & 194bf215546Sopenharmony_ci nir_lower_int64_op_to_options_mask(alu->op)) { 195bf215546Sopenharmony_ci /* These require a doing the division algorithm. */ 196bf215546Sopenharmony_ci if (alu->op == nir_op_idiv || alu->op == nir_op_udiv || 197bf215546Sopenharmony_ci alu->op == nir_op_imod || alu->op == nir_op_umod || 198bf215546Sopenharmony_ci alu->op == nir_op_irem) 199bf215546Sopenharmony_ci return cost * 100; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci /* Other int64 lowering isn't usually all that expensive */ 202bf215546Sopenharmony_ci return cost * 5; 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci return cost; 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cistatic bool 210bf215546Sopenharmony_ciinit_loop_block(nir_block *block, loop_info_state *state, 211bf215546Sopenharmony_ci bool in_if_branch, bool in_nested_loop, 212bf215546Sopenharmony_ci const nir_shader_compiler_options *options) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci init_loop_state init_state = {.in_if_branch = in_if_branch, 215bf215546Sopenharmony_ci .in_nested_loop = in_nested_loop, 216bf215546Sopenharmony_ci .state = state }; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 219bf215546Sopenharmony_ci state->loop->info->instr_cost += instr_cost(instr, options); 220bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, init_loop_def, &init_state); 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci return true; 224bf215546Sopenharmony_ci} 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_cistatic inline bool 227bf215546Sopenharmony_ciis_var_alu(nir_loop_variable *var) 228bf215546Sopenharmony_ci{ 229bf215546Sopenharmony_ci return var->def->parent_instr->type == nir_instr_type_alu; 230bf215546Sopenharmony_ci} 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_cistatic inline bool 233bf215546Sopenharmony_ciis_var_phi(nir_loop_variable *var) 234bf215546Sopenharmony_ci{ 235bf215546Sopenharmony_ci return var->def->parent_instr->type == nir_instr_type_phi; 236bf215546Sopenharmony_ci} 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_cistatic inline bool 239bf215546Sopenharmony_cimark_invariant(nir_ssa_def *def, loop_info_state *state) 240bf215546Sopenharmony_ci{ 241bf215546Sopenharmony_ci nir_loop_variable *var = get_loop_var(def, state); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci if (var->type == invariant) 244bf215546Sopenharmony_ci return true; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci if (!var->in_loop) { 247bf215546Sopenharmony_ci var->type = invariant; 248bf215546Sopenharmony_ci return true; 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci if (var->type == not_invariant) 252bf215546Sopenharmony_ci return false; 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci if (is_var_alu(var)) { 255bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 258bf215546Sopenharmony_ci if (!mark_invariant(alu->src[i].src.ssa, state)) { 259bf215546Sopenharmony_ci var->type = not_invariant; 260bf215546Sopenharmony_ci return false; 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci var->type = invariant; 264bf215546Sopenharmony_ci return true; 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci /* Phis shouldn't be invariant except if one operand is invariant, and the 268bf215546Sopenharmony_ci * other is the phi itself. These should be removed by opt_remove_phis. 269bf215546Sopenharmony_ci * load_consts are already set to invariant and constant during init, 270bf215546Sopenharmony_ci * and so should return earlier. Remaining op_codes are set undefined. 271bf215546Sopenharmony_ci */ 272bf215546Sopenharmony_ci var->type = not_invariant; 273bf215546Sopenharmony_ci return false; 274bf215546Sopenharmony_ci} 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_cistatic void 277bf215546Sopenharmony_cicompute_invariance_information(loop_info_state *state) 278bf215546Sopenharmony_ci{ 279bf215546Sopenharmony_ci /* An expression is invariant in a loop L if: 280bf215546Sopenharmony_ci * (base cases) 281bf215546Sopenharmony_ci * – it’s a constant 282bf215546Sopenharmony_ci * – it’s a variable use, all of whose single defs are outside of L 283bf215546Sopenharmony_ci * (inductive cases) 284bf215546Sopenharmony_ci * – it’s a pure computation all of whose args are loop invariant 285bf215546Sopenharmony_ci * – it’s a variable use whose single reaching def, and the 286bf215546Sopenharmony_ci * rhs of that def is loop-invariant 287bf215546Sopenharmony_ci */ 288bf215546Sopenharmony_ci list_for_each_entry_safe(nir_loop_variable, var, &state->process_list, 289bf215546Sopenharmony_ci process_link) { 290bf215546Sopenharmony_ci assert(!var->in_if_branch && !var->in_nested_loop); 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci if (mark_invariant(var->def, state)) 293bf215546Sopenharmony_ci list_del(&var->process_link); 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci} 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci/* If all of the instruction sources point to identical ALU instructions (as 298bf215546Sopenharmony_ci * per nir_instrs_equal), return one of the ALU instructions. Otherwise, 299bf215546Sopenharmony_ci * return NULL. 300bf215546Sopenharmony_ci */ 301bf215546Sopenharmony_cistatic nir_alu_instr * 302bf215546Sopenharmony_ciphi_instr_as_alu(nir_phi_instr *phi) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci nir_alu_instr *first = NULL; 305bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 306bf215546Sopenharmony_ci assert(src->src.is_ssa); 307bf215546Sopenharmony_ci if (src->src.ssa->parent_instr->type != nir_instr_type_alu) 308bf215546Sopenharmony_ci return NULL; 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr); 311bf215546Sopenharmony_ci if (first == NULL) { 312bf215546Sopenharmony_ci first = alu; 313bf215546Sopenharmony_ci } else { 314bf215546Sopenharmony_ci if (!nir_instrs_equal(&first->instr, &alu->instr)) 315bf215546Sopenharmony_ci return NULL; 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci } 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci return first; 320bf215546Sopenharmony_ci} 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_cistatic bool 323bf215546Sopenharmony_cialu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx) 324bf215546Sopenharmony_ci{ 325bf215546Sopenharmony_ci assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0); 326bf215546Sopenharmony_ci assert(alu->dest.dest.is_ssa); 327bf215546Sopenharmony_ci for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) { 328bf215546Sopenharmony_ci if (alu->src[src_idx].swizzle[i] != i) 329bf215546Sopenharmony_ci return false; 330bf215546Sopenharmony_ci } 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci return true; 333bf215546Sopenharmony_ci} 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_cistatic bool 336bf215546Sopenharmony_ciis_only_uniform_src(nir_src *src) 337bf215546Sopenharmony_ci{ 338bf215546Sopenharmony_ci if (!src->is_ssa) 339bf215546Sopenharmony_ci return false; 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci nir_instr *instr = src->ssa->parent_instr; 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci switch (instr->type) { 344bf215546Sopenharmony_ci case nir_instr_type_alu: { 345bf215546Sopenharmony_ci /* Return true if all sources return true. */ 346bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 347bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 348bf215546Sopenharmony_ci if (!is_only_uniform_src(&alu->src[i].src)) 349bf215546Sopenharmony_ci return false; 350bf215546Sopenharmony_ci } 351bf215546Sopenharmony_ci return true; 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 355bf215546Sopenharmony_ci nir_intrinsic_instr *inst = nir_instr_as_intrinsic(instr); 356bf215546Sopenharmony_ci /* current uniform inline only support load ubo */ 357bf215546Sopenharmony_ci return inst->intrinsic == nir_intrinsic_load_ubo; 358bf215546Sopenharmony_ci } 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci case nir_instr_type_load_const: 361bf215546Sopenharmony_ci /* Always return true for constants. */ 362bf215546Sopenharmony_ci return true; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci default: 365bf215546Sopenharmony_ci return false; 366bf215546Sopenharmony_ci } 367bf215546Sopenharmony_ci} 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_cistatic bool 370bf215546Sopenharmony_cicompute_induction_information(loop_info_state *state) 371bf215546Sopenharmony_ci{ 372bf215546Sopenharmony_ci bool found_induction_var = false; 373bf215546Sopenharmony_ci unsigned num_induction_vars = 0; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci list_for_each_entry_safe(nir_loop_variable, var, &state->process_list, 376bf215546Sopenharmony_ci process_link) { 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci /* It can't be an induction variable if it is invariant. Invariants and 379bf215546Sopenharmony_ci * things in nested loops or conditionals should have been removed from 380bf215546Sopenharmony_ci * the list by compute_invariance_information(). 381bf215546Sopenharmony_ci */ 382bf215546Sopenharmony_ci assert(!var->in_if_branch && !var->in_nested_loop && 383bf215546Sopenharmony_ci var->type != invariant); 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci /* We are only interested in checking phis for the basic induction 386bf215546Sopenharmony_ci * variable case as its simple to detect. All basic induction variables 387bf215546Sopenharmony_ci * have a phi node 388bf215546Sopenharmony_ci */ 389bf215546Sopenharmony_ci if (!is_var_phi(var)) 390bf215546Sopenharmony_ci continue; 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(var->def->parent_instr); 393bf215546Sopenharmony_ci nir_basic_induction_var *biv = rzalloc(state, nir_basic_induction_var); 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci nir_src *init_src = NULL; 396bf215546Sopenharmony_ci nir_loop_variable *alu_src_var = NULL; 397bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 398bf215546Sopenharmony_ci nir_loop_variable *src_var = get_loop_var(src->src.ssa, state); 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci /* If one of the sources is in an if branch or nested loop then don't 401bf215546Sopenharmony_ci * attempt to go any further. 402bf215546Sopenharmony_ci */ 403bf215546Sopenharmony_ci if (src_var->in_if_branch || src_var->in_nested_loop) 404bf215546Sopenharmony_ci break; 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci /* Detect inductions variables that are incremented in both branches 407bf215546Sopenharmony_ci * of an unnested if rather than in a loop block. 408bf215546Sopenharmony_ci */ 409bf215546Sopenharmony_ci if (is_var_phi(src_var)) { 410bf215546Sopenharmony_ci nir_phi_instr *src_phi = 411bf215546Sopenharmony_ci nir_instr_as_phi(src_var->def->parent_instr); 412bf215546Sopenharmony_ci nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi); 413bf215546Sopenharmony_ci if (src_phi_alu) { 414bf215546Sopenharmony_ci src_var = get_loop_var(&src_phi_alu->dest.dest.ssa, state); 415bf215546Sopenharmony_ci if (!src_var->in_if_branch) 416bf215546Sopenharmony_ci break; 417bf215546Sopenharmony_ci } 418bf215546Sopenharmony_ci } 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci if (!src_var->in_loop && !biv->def_outside_loop) { 421bf215546Sopenharmony_ci biv->def_outside_loop = src_var->def; 422bf215546Sopenharmony_ci init_src = &src->src; 423bf215546Sopenharmony_ci } else if (is_var_alu(src_var) && !biv->alu) { 424bf215546Sopenharmony_ci alu_src_var = src_var; 425bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(src_var->def->parent_instr); 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci /* Check for unsupported alu operations */ 428bf215546Sopenharmony_ci if (alu->op != nir_op_iadd && alu->op != nir_op_fadd) 429bf215546Sopenharmony_ci break; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci if (nir_op_infos[alu->op].num_inputs == 2) { 432bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 433bf215546Sopenharmony_ci /* Is one of the operands const or uniform, and the other the phi. 434bf215546Sopenharmony_ci * The phi source can't be swizzled in any way. 435bf215546Sopenharmony_ci */ 436bf215546Sopenharmony_ci if (alu->src[1-i].src.ssa == &phi->dest.ssa && 437bf215546Sopenharmony_ci alu_src_has_identity_swizzle(alu, 1 - i)) { 438bf215546Sopenharmony_ci nir_src *src = &alu->src[i].src; 439bf215546Sopenharmony_ci if (nir_src_is_const(*src)) 440bf215546Sopenharmony_ci biv->alu = alu; 441bf215546Sopenharmony_ci else if (is_only_uniform_src(src)) { 442bf215546Sopenharmony_ci /* Update value of induction variable is a statement 443bf215546Sopenharmony_ci * contains only uniform and constant 444bf215546Sopenharmony_ci */ 445bf215546Sopenharmony_ci var->update_src = alu->src + i; 446bf215546Sopenharmony_ci biv->alu = alu; 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci } 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci if (!biv->alu) 453bf215546Sopenharmony_ci break; 454bf215546Sopenharmony_ci } else { 455bf215546Sopenharmony_ci biv->alu = NULL; 456bf215546Sopenharmony_ci break; 457bf215546Sopenharmony_ci } 458bf215546Sopenharmony_ci } 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci if (biv->alu && biv->def_outside_loop) { 461bf215546Sopenharmony_ci nir_instr *inst = biv->def_outside_loop->parent_instr; 462bf215546Sopenharmony_ci if (inst->type == nir_instr_type_load_const) { 463bf215546Sopenharmony_ci /* Initial value of induction variable is a constant */ 464bf215546Sopenharmony_ci if (var->update_src) { 465bf215546Sopenharmony_ci alu_src_var->update_src = var->update_src; 466bf215546Sopenharmony_ci ralloc_free(biv); 467bf215546Sopenharmony_ci } else { 468bf215546Sopenharmony_ci alu_src_var->type = basic_induction; 469bf215546Sopenharmony_ci alu_src_var->ind = biv; 470bf215546Sopenharmony_ci var->type = basic_induction; 471bf215546Sopenharmony_ci var->ind = biv; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci found_induction_var = true; 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci num_induction_vars += 2; 476bf215546Sopenharmony_ci } else if (is_only_uniform_src(init_src)) { 477bf215546Sopenharmony_ci /* Initial value of induction variable is a uniform */ 478bf215546Sopenharmony_ci var->init_src = init_src; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci alu_src_var->init_src = var->init_src; 481bf215546Sopenharmony_ci alu_src_var->update_src = var->update_src; 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci num_induction_vars += 2; 484bf215546Sopenharmony_ci ralloc_free(biv); 485bf215546Sopenharmony_ci } else { 486bf215546Sopenharmony_ci var->update_src = NULL; 487bf215546Sopenharmony_ci ralloc_free(biv); 488bf215546Sopenharmony_ci } 489bf215546Sopenharmony_ci } else { 490bf215546Sopenharmony_ci var->update_src = NULL; 491bf215546Sopenharmony_ci ralloc_free(biv); 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci nir_loop_info *info = state->loop->info; 496bf215546Sopenharmony_ci ralloc_free(info->induction_vars); 497bf215546Sopenharmony_ci info->num_induction_vars = 0; 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci /* record induction variables into nir_loop_info */ 500bf215546Sopenharmony_ci if (num_induction_vars) { 501bf215546Sopenharmony_ci info->induction_vars = ralloc_array(info, nir_loop_induction_variable, 502bf215546Sopenharmony_ci num_induction_vars); 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci list_for_each_entry(nir_loop_variable, var, &state->process_list, 505bf215546Sopenharmony_ci process_link) { 506bf215546Sopenharmony_ci if (var->type == basic_induction || var->init_src || var->update_src) { 507bf215546Sopenharmony_ci nir_loop_induction_variable *ivar = 508bf215546Sopenharmony_ci &info->induction_vars[info->num_induction_vars++]; 509bf215546Sopenharmony_ci ivar->def = var->def; 510bf215546Sopenharmony_ci ivar->init_src = var->init_src; 511bf215546Sopenharmony_ci ivar->update_src = var->update_src; 512bf215546Sopenharmony_ci } 513bf215546Sopenharmony_ci } 514bf215546Sopenharmony_ci /* don't overflow */ 515bf215546Sopenharmony_ci assert(info->num_induction_vars <= num_induction_vars); 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci return found_induction_var; 519bf215546Sopenharmony_ci} 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_cistatic bool 522bf215546Sopenharmony_cifind_loop_terminators(loop_info_state *state) 523bf215546Sopenharmony_ci{ 524bf215546Sopenharmony_ci bool success = false; 525bf215546Sopenharmony_ci foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) { 526bf215546Sopenharmony_ci if (node->type == nir_cf_node_if) { 527bf215546Sopenharmony_ci nir_if *nif = nir_cf_node_as_if(node); 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci nir_block *break_blk = NULL; 530bf215546Sopenharmony_ci nir_block *continue_from_blk = NULL; 531bf215546Sopenharmony_ci bool continue_from_then = true; 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci nir_block *last_then = nir_if_last_then_block(nif); 534bf215546Sopenharmony_ci nir_block *last_else = nir_if_last_else_block(nif); 535bf215546Sopenharmony_ci if (nir_block_ends_in_break(last_then)) { 536bf215546Sopenharmony_ci break_blk = last_then; 537bf215546Sopenharmony_ci continue_from_blk = last_else; 538bf215546Sopenharmony_ci continue_from_then = false; 539bf215546Sopenharmony_ci } else if (nir_block_ends_in_break(last_else)) { 540bf215546Sopenharmony_ci break_blk = last_else; 541bf215546Sopenharmony_ci continue_from_blk = last_then; 542bf215546Sopenharmony_ci } 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci /* If there is a break then we should find a terminator. If we can 545bf215546Sopenharmony_ci * not find a loop terminator, but there is a break-statement then 546bf215546Sopenharmony_ci * we should return false so that we do not try to find trip-count 547bf215546Sopenharmony_ci */ 548bf215546Sopenharmony_ci if (!nir_is_trivial_loop_if(nif, break_blk)) { 549bf215546Sopenharmony_ci state->loop->info->complex_loop = true; 550bf215546Sopenharmony_ci return false; 551bf215546Sopenharmony_ci } 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci /* Continue if the if contained no jumps at all */ 554bf215546Sopenharmony_ci if (!break_blk) 555bf215546Sopenharmony_ci continue; 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci if (nif->condition.ssa->parent_instr->type == nir_instr_type_phi) { 558bf215546Sopenharmony_ci state->loop->info->complex_loop = true; 559bf215546Sopenharmony_ci return false; 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci nir_loop_terminator *terminator = 563bf215546Sopenharmony_ci rzalloc(state->loop->info, nir_loop_terminator); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci list_addtail(&terminator->loop_terminator_link, 566bf215546Sopenharmony_ci &state->loop->info->loop_terminator_list); 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci terminator->nif = nif; 569bf215546Sopenharmony_ci terminator->break_block = break_blk; 570bf215546Sopenharmony_ci terminator->continue_from_block = continue_from_blk; 571bf215546Sopenharmony_ci terminator->continue_from_then = continue_from_then; 572bf215546Sopenharmony_ci terminator->conditional_instr = nif->condition.ssa->parent_instr; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci success = true; 575bf215546Sopenharmony_ci } 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci return success; 579bf215546Sopenharmony_ci} 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci/* This function looks for an array access within a loop that uses an 582bf215546Sopenharmony_ci * induction variable for the array index. If found it returns the size of the 583bf215546Sopenharmony_ci * array, otherwise 0 is returned. If we find an induction var we pass it back 584bf215546Sopenharmony_ci * to the caller via array_index_out. 585bf215546Sopenharmony_ci */ 586bf215546Sopenharmony_cistatic unsigned 587bf215546Sopenharmony_cifind_array_access_via_induction(loop_info_state *state, 588bf215546Sopenharmony_ci nir_deref_instr *deref, 589bf215546Sopenharmony_ci nir_loop_variable **array_index_out) 590bf215546Sopenharmony_ci{ 591bf215546Sopenharmony_ci for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 592bf215546Sopenharmony_ci if (d->deref_type != nir_deref_type_array) 593bf215546Sopenharmony_ci continue; 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci assert(d->arr.index.is_ssa); 596bf215546Sopenharmony_ci nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci if (array_index->type != basic_induction) 599bf215546Sopenharmony_ci continue; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci if (array_index_out) 602bf215546Sopenharmony_ci *array_index_out = array_index; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(d); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci if (glsl_type_is_array_or_matrix(parent->type)) { 607bf215546Sopenharmony_ci return glsl_get_length(parent->type); 608bf215546Sopenharmony_ci } else { 609bf215546Sopenharmony_ci assert(glsl_type_is_vector(parent->type)); 610bf215546Sopenharmony_ci return glsl_get_vector_elements(parent->type); 611bf215546Sopenharmony_ci } 612bf215546Sopenharmony_ci } 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci return 0; 615bf215546Sopenharmony_ci} 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_cistatic bool 618bf215546Sopenharmony_ciguess_loop_limit(loop_info_state *state, nir_const_value *limit_val, 619bf215546Sopenharmony_ci nir_ssa_scalar basic_ind) 620bf215546Sopenharmony_ci{ 621bf215546Sopenharmony_ci unsigned min_array_size = 0; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { 624bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 625bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 626bf215546Sopenharmony_ci continue; 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci /* Check for arrays variably-indexed by a loop induction variable. */ 631bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref || 632bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_deref || 633bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_copy_deref) { 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci nir_loop_variable *array_idx = NULL; 636bf215546Sopenharmony_ci unsigned array_size = 637bf215546Sopenharmony_ci find_array_access_via_induction(state, 638bf215546Sopenharmony_ci nir_src_as_deref(intrin->src[0]), 639bf215546Sopenharmony_ci &array_idx); 640bf215546Sopenharmony_ci if (array_idx && basic_ind.def == array_idx->def && 641bf215546Sopenharmony_ci (min_array_size == 0 || min_array_size > array_size)) { 642bf215546Sopenharmony_ci /* Array indices are scalars */ 643bf215546Sopenharmony_ci assert(basic_ind.def->num_components == 1); 644bf215546Sopenharmony_ci min_array_size = array_size; 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_copy_deref) 648bf215546Sopenharmony_ci continue; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci array_size = 651bf215546Sopenharmony_ci find_array_access_via_induction(state, 652bf215546Sopenharmony_ci nir_src_as_deref(intrin->src[1]), 653bf215546Sopenharmony_ci &array_idx); 654bf215546Sopenharmony_ci if (array_idx && basic_ind.def == array_idx->def && 655bf215546Sopenharmony_ci (min_array_size == 0 || min_array_size > array_size)) { 656bf215546Sopenharmony_ci /* Array indices are scalars */ 657bf215546Sopenharmony_ci assert(basic_ind.def->num_components == 1); 658bf215546Sopenharmony_ci min_array_size = array_size; 659bf215546Sopenharmony_ci } 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci if (min_array_size) { 665bf215546Sopenharmony_ci *limit_val = nir_const_value_for_uint(min_array_size, 666bf215546Sopenharmony_ci basic_ind.def->bit_size); 667bf215546Sopenharmony_ci return true; 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci return false; 671bf215546Sopenharmony_ci} 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_cistatic bool 674bf215546Sopenharmony_citry_find_limit_of_alu(nir_ssa_scalar limit, nir_const_value *limit_val, 675bf215546Sopenharmony_ci nir_loop_terminator *terminator, loop_info_state *state) 676bf215546Sopenharmony_ci{ 677bf215546Sopenharmony_ci if (!nir_ssa_scalar_is_alu(limit)) 678bf215546Sopenharmony_ci return false; 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci nir_op limit_op = nir_ssa_scalar_alu_op(limit); 681bf215546Sopenharmony_ci if (limit_op == nir_op_imin || limit_op == nir_op_fmin) { 682bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 683bf215546Sopenharmony_ci nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(limit, i); 684bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(src)) { 685bf215546Sopenharmony_ci *limit_val = nir_ssa_scalar_as_const_value(src); 686bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 687bf215546Sopenharmony_ci return true; 688bf215546Sopenharmony_ci } 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci } 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci return false; 693bf215546Sopenharmony_ci} 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_cistatic nir_const_value 696bf215546Sopenharmony_cieval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0, 697bf215546Sopenharmony_ci unsigned execution_mode) 698bf215546Sopenharmony_ci{ 699bf215546Sopenharmony_ci assert(nir_op_infos[op].num_inputs == 1); 700bf215546Sopenharmony_ci nir_const_value dest; 701bf215546Sopenharmony_ci nir_const_value *src[1] = { &src0 }; 702bf215546Sopenharmony_ci nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode); 703bf215546Sopenharmony_ci return dest; 704bf215546Sopenharmony_ci} 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_cistatic nir_const_value 707bf215546Sopenharmony_cieval_const_binop(nir_op op, unsigned bit_size, 708bf215546Sopenharmony_ci nir_const_value src0, nir_const_value src1, 709bf215546Sopenharmony_ci unsigned execution_mode) 710bf215546Sopenharmony_ci{ 711bf215546Sopenharmony_ci assert(nir_op_infos[op].num_inputs == 2); 712bf215546Sopenharmony_ci nir_const_value dest; 713bf215546Sopenharmony_ci nir_const_value *src[2] = { &src0, &src1 }; 714bf215546Sopenharmony_ci nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode); 715bf215546Sopenharmony_ci return dest; 716bf215546Sopenharmony_ci} 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_cistatic int32_t 719bf215546Sopenharmony_ciget_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step, 720bf215546Sopenharmony_ci nir_const_value limit, unsigned bit_size, 721bf215546Sopenharmony_ci unsigned execution_mode) 722bf215546Sopenharmony_ci{ 723bf215546Sopenharmony_ci nir_const_value span, iter; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci switch (cond_op) { 726bf215546Sopenharmony_ci case nir_op_ige: 727bf215546Sopenharmony_ci case nir_op_ilt: 728bf215546Sopenharmony_ci case nir_op_ieq: 729bf215546Sopenharmony_ci case nir_op_ine: 730bf215546Sopenharmony_ci span = eval_const_binop(nir_op_isub, bit_size, limit, initial, 731bf215546Sopenharmony_ci execution_mode); 732bf215546Sopenharmony_ci iter = eval_const_binop(nir_op_idiv, bit_size, span, step, 733bf215546Sopenharmony_ci execution_mode); 734bf215546Sopenharmony_ci break; 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci case nir_op_uge: 737bf215546Sopenharmony_ci case nir_op_ult: 738bf215546Sopenharmony_ci span = eval_const_binop(nir_op_isub, bit_size, limit, initial, 739bf215546Sopenharmony_ci execution_mode); 740bf215546Sopenharmony_ci iter = eval_const_binop(nir_op_udiv, bit_size, span, step, 741bf215546Sopenharmony_ci execution_mode); 742bf215546Sopenharmony_ci break; 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci case nir_op_fge: 745bf215546Sopenharmony_ci case nir_op_flt: 746bf215546Sopenharmony_ci case nir_op_feq: 747bf215546Sopenharmony_ci case nir_op_fneu: 748bf215546Sopenharmony_ci span = eval_const_binop(nir_op_fsub, bit_size, limit, initial, 749bf215546Sopenharmony_ci execution_mode); 750bf215546Sopenharmony_ci iter = eval_const_binop(nir_op_fdiv, bit_size, span, 751bf215546Sopenharmony_ci step, execution_mode); 752bf215546Sopenharmony_ci iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode); 753bf215546Sopenharmony_ci break; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci default: 756bf215546Sopenharmony_ci return -1; 757bf215546Sopenharmony_ci } 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci uint64_t iter_u64 = nir_const_value_as_uint(iter, bit_size); 760bf215546Sopenharmony_ci return iter_u64 > INT_MAX ? -1 : (int)iter_u64; 761bf215546Sopenharmony_ci} 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_cistatic bool 764bf215546Sopenharmony_ciwill_break_on_first_iteration(nir_const_value step, 765bf215546Sopenharmony_ci nir_alu_type induction_base_type, 766bf215546Sopenharmony_ci unsigned trip_offset, 767bf215546Sopenharmony_ci nir_op cond_op, unsigned bit_size, 768bf215546Sopenharmony_ci nir_const_value initial, 769bf215546Sopenharmony_ci nir_const_value limit, 770bf215546Sopenharmony_ci bool limit_rhs, bool invert_cond, 771bf215546Sopenharmony_ci unsigned execution_mode) 772bf215546Sopenharmony_ci{ 773bf215546Sopenharmony_ci if (trip_offset == 1) { 774bf215546Sopenharmony_ci nir_op add_op; 775bf215546Sopenharmony_ci switch (induction_base_type) { 776bf215546Sopenharmony_ci case nir_type_float: 777bf215546Sopenharmony_ci add_op = nir_op_fadd; 778bf215546Sopenharmony_ci break; 779bf215546Sopenharmony_ci case nir_type_int: 780bf215546Sopenharmony_ci case nir_type_uint: 781bf215546Sopenharmony_ci add_op = nir_op_iadd; 782bf215546Sopenharmony_ci break; 783bf215546Sopenharmony_ci default: 784bf215546Sopenharmony_ci unreachable("Unhandled induction variable base type!"); 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci initial = eval_const_binop(add_op, bit_size, initial, step, 788bf215546Sopenharmony_ci execution_mode); 789bf215546Sopenharmony_ci } 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci nir_const_value *src[2]; 792bf215546Sopenharmony_ci src[limit_rhs ? 0 : 1] = &initial; 793bf215546Sopenharmony_ci src[limit_rhs ? 1 : 0] = &limit; 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci /* Evaluate the loop exit condition */ 796bf215546Sopenharmony_ci nir_const_value result; 797bf215546Sopenharmony_ci nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode); 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci return invert_cond ? !result.b : result.b; 800bf215546Sopenharmony_ci} 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_cistatic bool 803bf215546Sopenharmony_citest_iterations(int32_t iter_int, nir_const_value step, 804bf215546Sopenharmony_ci nir_const_value limit, nir_op cond_op, unsigned bit_size, 805bf215546Sopenharmony_ci nir_alu_type induction_base_type, 806bf215546Sopenharmony_ci nir_const_value initial, bool limit_rhs, bool invert_cond, 807bf215546Sopenharmony_ci unsigned execution_mode) 808bf215546Sopenharmony_ci{ 809bf215546Sopenharmony_ci assert(nir_op_infos[cond_op].num_inputs == 2); 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci nir_const_value iter_src; 812bf215546Sopenharmony_ci nir_op mul_op; 813bf215546Sopenharmony_ci nir_op add_op; 814bf215546Sopenharmony_ci switch (induction_base_type) { 815bf215546Sopenharmony_ci case nir_type_float: 816bf215546Sopenharmony_ci iter_src = nir_const_value_for_float(iter_int, bit_size); 817bf215546Sopenharmony_ci mul_op = nir_op_fmul; 818bf215546Sopenharmony_ci add_op = nir_op_fadd; 819bf215546Sopenharmony_ci break; 820bf215546Sopenharmony_ci case nir_type_int: 821bf215546Sopenharmony_ci case nir_type_uint: 822bf215546Sopenharmony_ci iter_src = nir_const_value_for_int(iter_int, bit_size); 823bf215546Sopenharmony_ci mul_op = nir_op_imul; 824bf215546Sopenharmony_ci add_op = nir_op_iadd; 825bf215546Sopenharmony_ci break; 826bf215546Sopenharmony_ci default: 827bf215546Sopenharmony_ci unreachable("Unhandled induction variable base type!"); 828bf215546Sopenharmony_ci } 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci /* Multiple the iteration count we are testing by the number of times we 831bf215546Sopenharmony_ci * step the induction variable each iteration. 832bf215546Sopenharmony_ci */ 833bf215546Sopenharmony_ci nir_const_value mul_result = 834bf215546Sopenharmony_ci eval_const_binop(mul_op, bit_size, iter_src, step, execution_mode); 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_ci /* Add the initial value to the accumulated induction variable total */ 837bf215546Sopenharmony_ci nir_const_value add_result = 838bf215546Sopenharmony_ci eval_const_binop(add_op, bit_size, mul_result, initial, execution_mode); 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci nir_const_value *src[2]; 841bf215546Sopenharmony_ci src[limit_rhs ? 0 : 1] = &add_result; 842bf215546Sopenharmony_ci src[limit_rhs ? 1 : 0] = &limit; 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci /* Evaluate the loop exit condition */ 845bf215546Sopenharmony_ci nir_const_value result; 846bf215546Sopenharmony_ci nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode); 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci return invert_cond ? !result.b : result.b; 849bf215546Sopenharmony_ci} 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_cistatic int 852bf215546Sopenharmony_cicalculate_iterations(nir_const_value initial, nir_const_value step, 853bf215546Sopenharmony_ci nir_const_value limit, nir_alu_instr *alu, 854bf215546Sopenharmony_ci nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs, 855bf215546Sopenharmony_ci bool invert_cond, unsigned execution_mode) 856bf215546Sopenharmony_ci{ 857bf215546Sopenharmony_ci /* nir_op_isub should have been lowered away by this point */ 858bf215546Sopenharmony_ci assert(alu->op != nir_op_isub); 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci /* Make sure the alu type for our induction variable is compatible with the 861bf215546Sopenharmony_ci * conditional alus input type. If its not something has gone really wrong. 862bf215546Sopenharmony_ci */ 863bf215546Sopenharmony_ci nir_alu_type induction_base_type = 864bf215546Sopenharmony_ci nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type); 865bf215546Sopenharmony_ci if (induction_base_type == nir_type_int || induction_base_type == nir_type_uint) { 866bf215546Sopenharmony_ci assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_int || 867bf215546Sopenharmony_ci nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_uint); 868bf215546Sopenharmony_ci } else { 869bf215546Sopenharmony_ci assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) == 870bf215546Sopenharmony_ci induction_base_type); 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci /* Only variable with these update ops were marked as induction. */ 874bf215546Sopenharmony_ci assert(alu->op == nir_op_iadd || alu->op == nir_op_fadd); 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci /* do-while loops can increment the starting value before the condition is 877bf215546Sopenharmony_ci * checked. e.g. 878bf215546Sopenharmony_ci * 879bf215546Sopenharmony_ci * do { 880bf215546Sopenharmony_ci * ndx++; 881bf215546Sopenharmony_ci * } while (ndx < 3); 882bf215546Sopenharmony_ci * 883bf215546Sopenharmony_ci * Here we check if the induction variable is used directly by the loop 884bf215546Sopenharmony_ci * condition and if so we assume we need to step the initial value. 885bf215546Sopenharmony_ci */ 886bf215546Sopenharmony_ci unsigned trip_offset = 0; 887bf215546Sopenharmony_ci nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr); 888bf215546Sopenharmony_ci if (cond_alu->src[0].src.ssa == &alu->dest.dest.ssa || 889bf215546Sopenharmony_ci cond_alu->src[1].src.ssa == &alu->dest.dest.ssa) { 890bf215546Sopenharmony_ci trip_offset = 1; 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci assert(nir_src_bit_size(alu->src[0].src) == 894bf215546Sopenharmony_ci nir_src_bit_size(alu->src[1].src)); 895bf215546Sopenharmony_ci unsigned bit_size = nir_src_bit_size(alu->src[0].src); 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci /* get_iteration works under assumption that iterator will be 898bf215546Sopenharmony_ci * incremented or decremented until it hits the limit, 899bf215546Sopenharmony_ci * however if the loop condition is false on the first iteration 900bf215546Sopenharmony_ci * get_iteration's assumption is broken. Handle such loops first. 901bf215546Sopenharmony_ci */ 902bf215546Sopenharmony_ci if (will_break_on_first_iteration(step, induction_base_type, trip_offset, 903bf215546Sopenharmony_ci alu_op, bit_size, initial, 904bf215546Sopenharmony_ci limit, limit_rhs, invert_cond, 905bf215546Sopenharmony_ci execution_mode)) { 906bf215546Sopenharmony_ci return 0; 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci int iter_int = get_iteration(alu_op, initial, step, limit, bit_size, 910bf215546Sopenharmony_ci execution_mode); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci /* If iter_int is negative the loop is ill-formed or is the conditional is 913bf215546Sopenharmony_ci * unsigned with a huge iteration count so don't bother going any further. 914bf215546Sopenharmony_ci */ 915bf215546Sopenharmony_ci if (iter_int < 0) 916bf215546Sopenharmony_ci return -1; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci /* An explanation from the GLSL unrolling pass: 919bf215546Sopenharmony_ci * 920bf215546Sopenharmony_ci * Make sure that the calculated number of iterations satisfies the exit 921bf215546Sopenharmony_ci * condition. This is needed to catch off-by-one errors and some types of 922bf215546Sopenharmony_ci * ill-formed loops. For example, we need to detect that the following 923bf215546Sopenharmony_ci * loop does not have a maximum iteration count. 924bf215546Sopenharmony_ci * 925bf215546Sopenharmony_ci * for (float x = 0.0; x != 0.9; x += 0.2); 926bf215546Sopenharmony_ci */ 927bf215546Sopenharmony_ci for (int bias = -1; bias <= 1; bias++) { 928bf215546Sopenharmony_ci const int iter_bias = iter_int + bias; 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci if (test_iterations(iter_bias, step, limit, alu_op, bit_size, 931bf215546Sopenharmony_ci induction_base_type, initial, 932bf215546Sopenharmony_ci limit_rhs, invert_cond, execution_mode)) { 933bf215546Sopenharmony_ci return iter_bias > 0 ? iter_bias - trip_offset : iter_bias; 934bf215546Sopenharmony_ci } 935bf215546Sopenharmony_ci } 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci return -1; 938bf215546Sopenharmony_ci} 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_cistatic nir_op 941bf215546Sopenharmony_ciinverse_comparison(nir_op alu_op) 942bf215546Sopenharmony_ci{ 943bf215546Sopenharmony_ci switch (alu_op) { 944bf215546Sopenharmony_ci case nir_op_fge: 945bf215546Sopenharmony_ci return nir_op_flt; 946bf215546Sopenharmony_ci case nir_op_ige: 947bf215546Sopenharmony_ci return nir_op_ilt; 948bf215546Sopenharmony_ci case nir_op_uge: 949bf215546Sopenharmony_ci return nir_op_ult; 950bf215546Sopenharmony_ci case nir_op_flt: 951bf215546Sopenharmony_ci return nir_op_fge; 952bf215546Sopenharmony_ci case nir_op_ilt: 953bf215546Sopenharmony_ci return nir_op_ige; 954bf215546Sopenharmony_ci case nir_op_ult: 955bf215546Sopenharmony_ci return nir_op_uge; 956bf215546Sopenharmony_ci case nir_op_feq: 957bf215546Sopenharmony_ci return nir_op_fneu; 958bf215546Sopenharmony_ci case nir_op_ieq: 959bf215546Sopenharmony_ci return nir_op_ine; 960bf215546Sopenharmony_ci case nir_op_fneu: 961bf215546Sopenharmony_ci return nir_op_feq; 962bf215546Sopenharmony_ci case nir_op_ine: 963bf215546Sopenharmony_ci return nir_op_ieq; 964bf215546Sopenharmony_ci default: 965bf215546Sopenharmony_ci unreachable("Unsuported comparison!"); 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci} 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_cistatic bool 970bf215546Sopenharmony_ciget_induction_and_limit_vars(nir_ssa_scalar cond, 971bf215546Sopenharmony_ci nir_ssa_scalar *ind, 972bf215546Sopenharmony_ci nir_ssa_scalar *limit, 973bf215546Sopenharmony_ci bool *limit_rhs, 974bf215546Sopenharmony_ci loop_info_state *state) 975bf215546Sopenharmony_ci{ 976bf215546Sopenharmony_ci nir_ssa_scalar rhs, lhs; 977bf215546Sopenharmony_ci lhs = nir_ssa_scalar_chase_alu_src(cond, 0); 978bf215546Sopenharmony_ci rhs = nir_ssa_scalar_chase_alu_src(cond, 1); 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci if (get_loop_var(lhs.def, state)->type == basic_induction) { 981bf215546Sopenharmony_ci *ind = lhs; 982bf215546Sopenharmony_ci *limit = rhs; 983bf215546Sopenharmony_ci *limit_rhs = true; 984bf215546Sopenharmony_ci return true; 985bf215546Sopenharmony_ci } else if (get_loop_var(rhs.def, state)->type == basic_induction) { 986bf215546Sopenharmony_ci *ind = rhs; 987bf215546Sopenharmony_ci *limit = lhs; 988bf215546Sopenharmony_ci *limit_rhs = false; 989bf215546Sopenharmony_ci return true; 990bf215546Sopenharmony_ci } else { 991bf215546Sopenharmony_ci return false; 992bf215546Sopenharmony_ci } 993bf215546Sopenharmony_ci} 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_cistatic bool 996bf215546Sopenharmony_citry_find_trip_count_vars_in_iand(nir_ssa_scalar *cond, 997bf215546Sopenharmony_ci nir_ssa_scalar *ind, 998bf215546Sopenharmony_ci nir_ssa_scalar *limit, 999bf215546Sopenharmony_ci bool *limit_rhs, 1000bf215546Sopenharmony_ci loop_info_state *state) 1001bf215546Sopenharmony_ci{ 1002bf215546Sopenharmony_ci const nir_op alu_op = nir_ssa_scalar_alu_op(*cond); 1003bf215546Sopenharmony_ci assert(alu_op == nir_op_ieq || alu_op == nir_op_inot); 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci nir_ssa_scalar iand = nir_ssa_scalar_chase_alu_src(*cond, 0); 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_ci if (alu_op == nir_op_ieq) { 1008bf215546Sopenharmony_ci nir_ssa_scalar zero = nir_ssa_scalar_chase_alu_src(*cond, 1); 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci if (!nir_ssa_scalar_is_alu(iand) || !nir_ssa_scalar_is_const(zero)) { 1011bf215546Sopenharmony_ci /* Maybe we had it the wrong way, flip things around */ 1012bf215546Sopenharmony_ci nir_ssa_scalar tmp = zero; 1013bf215546Sopenharmony_ci zero = iand; 1014bf215546Sopenharmony_ci iand = tmp; 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci /* If we still didn't find what we need then return */ 1017bf215546Sopenharmony_ci if (!nir_ssa_scalar_is_const(zero)) 1018bf215546Sopenharmony_ci return false; 1019bf215546Sopenharmony_ci } 1020bf215546Sopenharmony_ci 1021bf215546Sopenharmony_ci /* If the loop is not breaking on (x && y) == 0 then return */ 1022bf215546Sopenharmony_ci if (nir_ssa_scalar_as_uint(zero) != 0) 1023bf215546Sopenharmony_ci return false; 1024bf215546Sopenharmony_ci } 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_ci if (!nir_ssa_scalar_is_alu(iand)) 1027bf215546Sopenharmony_ci return false; 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci if (nir_ssa_scalar_alu_op(iand) != nir_op_iand) 1030bf215546Sopenharmony_ci return false; 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci /* Check if iand src is a terminator condition and try get induction var 1033bf215546Sopenharmony_ci * and trip limit var. 1034bf215546Sopenharmony_ci */ 1035bf215546Sopenharmony_ci bool found_induction_var = false; 1036bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1037bf215546Sopenharmony_ci nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(iand, i); 1038bf215546Sopenharmony_ci if (nir_is_supported_terminator_condition(src) && 1039bf215546Sopenharmony_ci get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) { 1040bf215546Sopenharmony_ci *cond = src; 1041bf215546Sopenharmony_ci found_induction_var = true; 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci /* If we've found one with a constant limit, stop. */ 1044bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(*limit)) 1045bf215546Sopenharmony_ci return true; 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci } 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci return found_induction_var; 1050bf215546Sopenharmony_ci} 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_ci/* Run through each of the terminators of the loop and try to infer a possible 1053bf215546Sopenharmony_ci * trip-count. We need to check them all, and set the lowest trip-count as the 1054bf215546Sopenharmony_ci * trip-count of our loop. If one of the terminators has an undecidable 1055bf215546Sopenharmony_ci * trip-count we can not safely assume anything about the duration of the 1056bf215546Sopenharmony_ci * loop. 1057bf215546Sopenharmony_ci */ 1058bf215546Sopenharmony_cistatic void 1059bf215546Sopenharmony_cifind_trip_count(loop_info_state *state, unsigned execution_mode) 1060bf215546Sopenharmony_ci{ 1061bf215546Sopenharmony_ci bool trip_count_known = true; 1062bf215546Sopenharmony_ci bool guessed_trip_count = false; 1063bf215546Sopenharmony_ci nir_loop_terminator *limiting_terminator = NULL; 1064bf215546Sopenharmony_ci int max_trip_count = -1; 1065bf215546Sopenharmony_ci 1066bf215546Sopenharmony_ci list_for_each_entry(nir_loop_terminator, terminator, 1067bf215546Sopenharmony_ci &state->loop->info->loop_terminator_list, 1068bf215546Sopenharmony_ci loop_terminator_link) { 1069bf215546Sopenharmony_ci assert(terminator->nif->condition.is_ssa); 1070bf215546Sopenharmony_ci nir_ssa_scalar cond = { terminator->nif->condition.ssa, 0 }; 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci if (!nir_ssa_scalar_is_alu(cond)) { 1073bf215546Sopenharmony_ci /* If we get here the loop is dead and will get cleaned up by the 1074bf215546Sopenharmony_ci * nir_opt_dead_cf pass. 1075bf215546Sopenharmony_ci */ 1076bf215546Sopenharmony_ci trip_count_known = false; 1077bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 1078bf215546Sopenharmony_ci continue; 1079bf215546Sopenharmony_ci } 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_ci nir_op alu_op = nir_ssa_scalar_alu_op(cond); 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci bool limit_rhs; 1084bf215546Sopenharmony_ci nir_ssa_scalar basic_ind = { NULL, 0 }; 1085bf215546Sopenharmony_ci nir_ssa_scalar limit; 1086bf215546Sopenharmony_ci if ((alu_op == nir_op_inot || alu_op == nir_op_ieq) && 1087bf215546Sopenharmony_ci try_find_trip_count_vars_in_iand(&cond, &basic_ind, &limit, 1088bf215546Sopenharmony_ci &limit_rhs, state)) { 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci /* The loop is exiting on (x && y) == 0 so we need to get the 1091bf215546Sopenharmony_ci * inverse of x or y (i.e. which ever contained the induction var) in 1092bf215546Sopenharmony_ci * order to compute the trip count. 1093bf215546Sopenharmony_ci */ 1094bf215546Sopenharmony_ci alu_op = inverse_comparison(nir_ssa_scalar_alu_op(cond)); 1095bf215546Sopenharmony_ci trip_count_known = false; 1096bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 1097bf215546Sopenharmony_ci } 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci if (!basic_ind.def) { 1100bf215546Sopenharmony_ci if (nir_is_supported_terminator_condition(cond)) { 1101bf215546Sopenharmony_ci get_induction_and_limit_vars(cond, &basic_ind, 1102bf215546Sopenharmony_ci &limit, &limit_rhs, state); 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci /* The comparison has to have a basic induction variable for us to be 1107bf215546Sopenharmony_ci * able to find trip counts. 1108bf215546Sopenharmony_ci */ 1109bf215546Sopenharmony_ci if (!basic_ind.def) { 1110bf215546Sopenharmony_ci trip_count_known = false; 1111bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 1112bf215546Sopenharmony_ci continue; 1113bf215546Sopenharmony_ci } 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci terminator->induction_rhs = !limit_rhs; 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci /* Attempt to find a constant limit for the loop */ 1118bf215546Sopenharmony_ci nir_const_value limit_val; 1119bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(limit)) { 1120bf215546Sopenharmony_ci limit_val = nir_ssa_scalar_as_const_value(limit); 1121bf215546Sopenharmony_ci } else { 1122bf215546Sopenharmony_ci trip_count_known = false; 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci if (!try_find_limit_of_alu(limit, &limit_val, terminator, state)) { 1125bf215546Sopenharmony_ci /* Guess loop limit based on array access */ 1126bf215546Sopenharmony_ci if (!guess_loop_limit(state, &limit_val, basic_ind)) { 1127bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 1128bf215546Sopenharmony_ci continue; 1129bf215546Sopenharmony_ci } 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci guessed_trip_count = true; 1132bf215546Sopenharmony_ci } 1133bf215546Sopenharmony_ci } 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci /* We have determined that we have the following constants: 1136bf215546Sopenharmony_ci * (With the typical int i = 0; i < x; i++; as an example) 1137bf215546Sopenharmony_ci * - Upper limit. 1138bf215546Sopenharmony_ci * - Starting value 1139bf215546Sopenharmony_ci * - Step / iteration size 1140bf215546Sopenharmony_ci * Thats all thats needed to calculate the trip-count 1141bf215546Sopenharmony_ci */ 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci nir_basic_induction_var *ind_var = 1144bf215546Sopenharmony_ci get_loop_var(basic_ind.def, state)->ind; 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci /* The basic induction var might be a vector but, because we guarantee 1147bf215546Sopenharmony_ci * earlier that the phi source has a scalar swizzle, we can take the 1148bf215546Sopenharmony_ci * component from basic_ind. 1149bf215546Sopenharmony_ci */ 1150bf215546Sopenharmony_ci nir_ssa_scalar initial_s = { ind_var->def_outside_loop, basic_ind.comp }; 1151bf215546Sopenharmony_ci nir_ssa_scalar alu_s = { &ind_var->alu->dest.dest.ssa, basic_ind.comp }; 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci nir_const_value initial_val = nir_ssa_scalar_as_const_value(initial_s); 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci /* We are guaranteed by earlier code that at least one of these sources 1156bf215546Sopenharmony_ci * is a constant but we don't know which. 1157bf215546Sopenharmony_ci */ 1158bf215546Sopenharmony_ci nir_const_value step_val; 1159bf215546Sopenharmony_ci memset(&step_val, 0, sizeof(step_val)); 1160bf215546Sopenharmony_ci UNUSED bool found_step_value = false; 1161bf215546Sopenharmony_ci assert(nir_op_infos[ind_var->alu->op].num_inputs == 2); 1162bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1163bf215546Sopenharmony_ci nir_ssa_scalar alu_src = nir_ssa_scalar_chase_alu_src(alu_s, i); 1164bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(alu_src)) { 1165bf215546Sopenharmony_ci found_step_value = true; 1166bf215546Sopenharmony_ci step_val = nir_ssa_scalar_as_const_value(alu_src); 1167bf215546Sopenharmony_ci break; 1168bf215546Sopenharmony_ci } 1169bf215546Sopenharmony_ci } 1170bf215546Sopenharmony_ci assert(found_step_value); 1171bf215546Sopenharmony_ci 1172bf215546Sopenharmony_ci int iterations = calculate_iterations(initial_val, step_val, limit_val, 1173bf215546Sopenharmony_ci ind_var->alu, cond, 1174bf215546Sopenharmony_ci alu_op, limit_rhs, 1175bf215546Sopenharmony_ci terminator->continue_from_then, 1176bf215546Sopenharmony_ci execution_mode); 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci /* Where we not able to calculate the iteration count */ 1179bf215546Sopenharmony_ci if (iterations == -1) { 1180bf215546Sopenharmony_ci trip_count_known = false; 1181bf215546Sopenharmony_ci guessed_trip_count = false; 1182bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 1183bf215546Sopenharmony_ci continue; 1184bf215546Sopenharmony_ci } 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_ci if (guessed_trip_count) { 1187bf215546Sopenharmony_ci guessed_trip_count = false; 1188bf215546Sopenharmony_ci terminator->exact_trip_count_unknown = true; 1189bf215546Sopenharmony_ci if (state->loop->info->guessed_trip_count == 0 || 1190bf215546Sopenharmony_ci state->loop->info->guessed_trip_count > iterations) 1191bf215546Sopenharmony_ci state->loop->info->guessed_trip_count = iterations; 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci continue; 1194bf215546Sopenharmony_ci } 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci /* If this is the first run or we have found a smaller amount of 1197bf215546Sopenharmony_ci * iterations than previously (we have identified a more limiting 1198bf215546Sopenharmony_ci * terminator) set the trip count and limiting terminator. 1199bf215546Sopenharmony_ci */ 1200bf215546Sopenharmony_ci if (max_trip_count == -1 || iterations < max_trip_count) { 1201bf215546Sopenharmony_ci max_trip_count = iterations; 1202bf215546Sopenharmony_ci limiting_terminator = terminator; 1203bf215546Sopenharmony_ci } 1204bf215546Sopenharmony_ci } 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci state->loop->info->exact_trip_count_known = trip_count_known; 1207bf215546Sopenharmony_ci if (max_trip_count > -1) 1208bf215546Sopenharmony_ci state->loop->info->max_trip_count = max_trip_count; 1209bf215546Sopenharmony_ci state->loop->info->limiting_terminator = limiting_terminator; 1210bf215546Sopenharmony_ci} 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_cistatic bool 1213bf215546Sopenharmony_ciforce_unroll_array_access(loop_info_state *state, nir_deref_instr *deref, 1214bf215546Sopenharmony_ci bool contains_sampler) 1215bf215546Sopenharmony_ci{ 1216bf215546Sopenharmony_ci unsigned array_size = find_array_access_via_induction(state, deref, NULL); 1217bf215546Sopenharmony_ci if (array_size) { 1218bf215546Sopenharmony_ci if ((array_size == state->loop->info->max_trip_count) && 1219bf215546Sopenharmony_ci nir_deref_mode_must_be(deref, nir_var_shader_in | 1220bf215546Sopenharmony_ci nir_var_shader_out | 1221bf215546Sopenharmony_ci nir_var_shader_temp | 1222bf215546Sopenharmony_ci nir_var_function_temp)) 1223bf215546Sopenharmony_ci return true; 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci if (nir_deref_mode_must_be(deref, state->indirect_mask)) 1226bf215546Sopenharmony_ci return true; 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci if (contains_sampler && state->force_unroll_sampler_indirect) 1229bf215546Sopenharmony_ci return true; 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci return false; 1233bf215546Sopenharmony_ci} 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_cistatic bool 1236bf215546Sopenharmony_ciforce_unroll_heuristics(loop_info_state *state, nir_block *block) 1237bf215546Sopenharmony_ci{ 1238bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1239bf215546Sopenharmony_ci if (instr->type == nir_instr_type_tex) { 1240bf215546Sopenharmony_ci nir_tex_instr *tex_instr = nir_instr_as_tex(instr); 1241bf215546Sopenharmony_ci int sampler_idx = 1242bf215546Sopenharmony_ci nir_tex_instr_src_index(tex_instr, 1243bf215546Sopenharmony_ci nir_tex_src_sampler_deref); 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci if (sampler_idx >= 0) { 1247bf215546Sopenharmony_ci nir_deref_instr *deref = 1248bf215546Sopenharmony_ci nir_instr_as_deref(tex_instr->src[sampler_idx].src.ssa->parent_instr); 1249bf215546Sopenharmony_ci if (force_unroll_array_access(state, deref, true)) 1250bf215546Sopenharmony_ci return true; 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1256bf215546Sopenharmony_ci continue; 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ci /* Check for arrays variably-indexed by a loop induction variable. 1261bf215546Sopenharmony_ci * Unrolling the loop may convert that access into constant-indexing. 1262bf215546Sopenharmony_ci */ 1263bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref || 1264bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_deref || 1265bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_copy_deref) { 1266bf215546Sopenharmony_ci if (force_unroll_array_access(state, 1267bf215546Sopenharmony_ci nir_src_as_deref(intrin->src[0]), 1268bf215546Sopenharmony_ci false)) 1269bf215546Sopenharmony_ci return true; 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_copy_deref && 1272bf215546Sopenharmony_ci force_unroll_array_access(state, 1273bf215546Sopenharmony_ci nir_src_as_deref(intrin->src[1]), 1274bf215546Sopenharmony_ci false)) 1275bf215546Sopenharmony_ci return true; 1276bf215546Sopenharmony_ci } 1277bf215546Sopenharmony_ci } 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci return false; 1280bf215546Sopenharmony_ci} 1281bf215546Sopenharmony_ci 1282bf215546Sopenharmony_cistatic void 1283bf215546Sopenharmony_ciget_loop_info(loop_info_state *state, nir_function_impl *impl) 1284bf215546Sopenharmony_ci{ 1285bf215546Sopenharmony_ci nir_shader *shader = impl->function->shader; 1286bf215546Sopenharmony_ci const nir_shader_compiler_options *options = shader->options; 1287bf215546Sopenharmony_ci 1288bf215546Sopenharmony_ci /* Add all entries in the outermost part of the loop to the processing list 1289bf215546Sopenharmony_ci * Mark the entries in conditionals or in nested loops accordingly 1290bf215546Sopenharmony_ci */ 1291bf215546Sopenharmony_ci foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) { 1292bf215546Sopenharmony_ci switch (node->type) { 1293bf215546Sopenharmony_ci 1294bf215546Sopenharmony_ci case nir_cf_node_block: 1295bf215546Sopenharmony_ci init_loop_block(nir_cf_node_as_block(node), state, 1296bf215546Sopenharmony_ci false, false, options); 1297bf215546Sopenharmony_ci break; 1298bf215546Sopenharmony_ci 1299bf215546Sopenharmony_ci case nir_cf_node_if: 1300bf215546Sopenharmony_ci nir_foreach_block_in_cf_node(block, node) 1301bf215546Sopenharmony_ci init_loop_block(block, state, true, false, options); 1302bf215546Sopenharmony_ci break; 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci case nir_cf_node_loop: 1305bf215546Sopenharmony_ci nir_foreach_block_in_cf_node(block, node) { 1306bf215546Sopenharmony_ci init_loop_block(block, state, false, true, options); 1307bf215546Sopenharmony_ci } 1308bf215546Sopenharmony_ci break; 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci case nir_cf_node_function: 1311bf215546Sopenharmony_ci break; 1312bf215546Sopenharmony_ci } 1313bf215546Sopenharmony_ci } 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci /* Try to find all simple terminators of the loop. If we can't find any, 1316bf215546Sopenharmony_ci * or we find possible terminators that have side effects then bail. 1317bf215546Sopenharmony_ci */ 1318bf215546Sopenharmony_ci if (!find_loop_terminators(state)) { 1319bf215546Sopenharmony_ci list_for_each_entry_safe(nir_loop_terminator, terminator, 1320bf215546Sopenharmony_ci &state->loop->info->loop_terminator_list, 1321bf215546Sopenharmony_ci loop_terminator_link) { 1322bf215546Sopenharmony_ci list_del(&terminator->loop_terminator_link); 1323bf215546Sopenharmony_ci ralloc_free(terminator); 1324bf215546Sopenharmony_ci } 1325bf215546Sopenharmony_ci return; 1326bf215546Sopenharmony_ci } 1327bf215546Sopenharmony_ci 1328bf215546Sopenharmony_ci /* Induction analysis needs invariance information so get that first */ 1329bf215546Sopenharmony_ci compute_invariance_information(state); 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_ci /* We have invariance information so try to find induction variables */ 1332bf215546Sopenharmony_ci if (!compute_induction_information(state)) 1333bf215546Sopenharmony_ci return; 1334bf215546Sopenharmony_ci 1335bf215546Sopenharmony_ci /* Run through each of the terminators and try to compute a trip-count */ 1336bf215546Sopenharmony_ci find_trip_count(state, impl->function->shader->info.float_controls_execution_mode); 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { 1339bf215546Sopenharmony_ci if (force_unroll_heuristics(state, block)) { 1340bf215546Sopenharmony_ci state->loop->info->force_unroll = true; 1341bf215546Sopenharmony_ci break; 1342bf215546Sopenharmony_ci } 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci} 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_cistatic loop_info_state * 1347bf215546Sopenharmony_ciinitialize_loop_info_state(nir_loop *loop, void *mem_ctx, 1348bf215546Sopenharmony_ci nir_function_impl *impl) 1349bf215546Sopenharmony_ci{ 1350bf215546Sopenharmony_ci loop_info_state *state = rzalloc(mem_ctx, loop_info_state); 1351bf215546Sopenharmony_ci state->loop_vars = ralloc_array(mem_ctx, nir_loop_variable, 1352bf215546Sopenharmony_ci impl->ssa_alloc); 1353bf215546Sopenharmony_ci state->loop_vars_init = rzalloc_array(mem_ctx, BITSET_WORD, 1354bf215546Sopenharmony_ci BITSET_WORDS(impl->ssa_alloc)); 1355bf215546Sopenharmony_ci state->loop = loop; 1356bf215546Sopenharmony_ci 1357bf215546Sopenharmony_ci list_inithead(&state->process_list); 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci if (loop->info) 1360bf215546Sopenharmony_ci ralloc_free(loop->info); 1361bf215546Sopenharmony_ci 1362bf215546Sopenharmony_ci loop->info = rzalloc(loop, nir_loop_info); 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci list_inithead(&loop->info->loop_terminator_list); 1365bf215546Sopenharmony_ci 1366bf215546Sopenharmony_ci return state; 1367bf215546Sopenharmony_ci} 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_cistatic void 1370bf215546Sopenharmony_ciprocess_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask, 1371bf215546Sopenharmony_ci bool force_unroll_sampler_indirect) 1372bf215546Sopenharmony_ci{ 1373bf215546Sopenharmony_ci switch (cf_node->type) { 1374bf215546Sopenharmony_ci case nir_cf_node_block: 1375bf215546Sopenharmony_ci return; 1376bf215546Sopenharmony_ci case nir_cf_node_if: { 1377bf215546Sopenharmony_ci nir_if *if_stmt = nir_cf_node_as_if(cf_node); 1378bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) 1379bf215546Sopenharmony_ci process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); 1380bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) 1381bf215546Sopenharmony_ci process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); 1382bf215546Sopenharmony_ci return; 1383bf215546Sopenharmony_ci } 1384bf215546Sopenharmony_ci case nir_cf_node_loop: { 1385bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(cf_node); 1386bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) 1387bf215546Sopenharmony_ci process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); 1388bf215546Sopenharmony_ci break; 1389bf215546Sopenharmony_ci } 1390bf215546Sopenharmony_ci default: 1391bf215546Sopenharmony_ci unreachable("unknown cf node type"); 1392bf215546Sopenharmony_ci } 1393bf215546Sopenharmony_ci 1394bf215546Sopenharmony_ci nir_loop *loop = nir_cf_node_as_loop(cf_node); 1395bf215546Sopenharmony_ci nir_function_impl *impl = nir_cf_node_get_function(cf_node); 1396bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci loop_info_state *state = initialize_loop_info_state(loop, mem_ctx, impl); 1399bf215546Sopenharmony_ci state->indirect_mask = indirect_mask; 1400bf215546Sopenharmony_ci state->force_unroll_sampler_indirect = force_unroll_sampler_indirect; 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_ci get_loop_info(state, impl); 1403bf215546Sopenharmony_ci 1404bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1405bf215546Sopenharmony_ci} 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_civoid 1408bf215546Sopenharmony_cinir_loop_analyze_impl(nir_function_impl *impl, 1409bf215546Sopenharmony_ci nir_variable_mode indirect_mask, 1410bf215546Sopenharmony_ci bool force_unroll_sampler_indirect) 1411bf215546Sopenharmony_ci{ 1412bf215546Sopenharmony_ci nir_index_ssa_defs(impl); 1413bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, &impl->body) 1414bf215546Sopenharmony_ci process_loops(node, indirect_mask, force_unroll_sampler_indirect); 1415bf215546Sopenharmony_ci} 1416