1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015 Connor Abbott 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci/** 26bf215546Sopenharmony_ci * nir_opt_vectorize() aims to vectorize ALU instructions. 27bf215546Sopenharmony_ci * 28bf215546Sopenharmony_ci * The default vectorization width is 4. 29bf215546Sopenharmony_ci * If desired, a callback function which returns the max vectorization width 30bf215546Sopenharmony_ci * per instruction can be provided. 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * The max vectorization width must be a power of 2. 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "nir.h" 36bf215546Sopenharmony_ci#include "nir_vla.h" 37bf215546Sopenharmony_ci#include "nir_builder.h" 38bf215546Sopenharmony_ci#include "util/u_dynarray.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci#define HASH(hash, data) XXH32(&data, sizeof(data), hash) 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistatic uint32_t 43bf215546Sopenharmony_cihash_src(uint32_t hash, const nir_src *src) 44bf215546Sopenharmony_ci{ 45bf215546Sopenharmony_ci assert(src->is_ssa); 46bf215546Sopenharmony_ci void *hash_data = nir_src_is_const(*src) ? NULL : src->ssa; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci return HASH(hash, hash_data); 49bf215546Sopenharmony_ci} 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_cistatic uint32_t 52bf215546Sopenharmony_cihash_alu_src(uint32_t hash, const nir_alu_src *src, 53bf215546Sopenharmony_ci uint32_t num_components, uint32_t max_vec) 54bf215546Sopenharmony_ci{ 55bf215546Sopenharmony_ci assert(!src->abs && !src->negate); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci /* hash whether a swizzle accesses elements beyond the maximum 58bf215546Sopenharmony_ci * vectorization factor: 59bf215546Sopenharmony_ci * For example accesses to .x and .y are considered different variables 60bf215546Sopenharmony_ci * compared to accesses to .z and .w for 16-bit vec2. 61bf215546Sopenharmony_ci */ 62bf215546Sopenharmony_ci uint32_t swizzle = (src->swizzle[0] & ~(max_vec - 1)); 63bf215546Sopenharmony_ci hash = HASH(hash, swizzle); 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci return hash_src(hash, &src->src); 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_cistatic uint32_t 69bf215546Sopenharmony_cihash_instr(const void *data) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci const nir_instr *instr = (nir_instr *) data; 72bf215546Sopenharmony_ci assert(instr->type == nir_instr_type_alu); 73bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci uint32_t hash = HASH(0, alu->op); 76bf215546Sopenharmony_ci hash = HASH(hash, alu->dest.dest.ssa.bit_size); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) 79bf215546Sopenharmony_ci hash = hash_alu_src(hash, &alu->src[i], 80bf215546Sopenharmony_ci alu->dest.dest.ssa.num_components, 81bf215546Sopenharmony_ci instr->pass_flags); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci return hash; 84bf215546Sopenharmony_ci} 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_cistatic bool 87bf215546Sopenharmony_cisrcs_equal(const nir_src *src1, const nir_src *src2) 88bf215546Sopenharmony_ci{ 89bf215546Sopenharmony_ci assert(src1->is_ssa); 90bf215546Sopenharmony_ci assert(src2->is_ssa); 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci return src1->ssa == src2->ssa || 93bf215546Sopenharmony_ci (nir_src_is_const(*src1) && nir_src_is_const(*src2)); 94bf215546Sopenharmony_ci} 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_cistatic bool 97bf215546Sopenharmony_cialu_srcs_equal(const nir_alu_src *src1, const nir_alu_src *src2, 98bf215546Sopenharmony_ci uint32_t max_vec) 99bf215546Sopenharmony_ci{ 100bf215546Sopenharmony_ci assert(!src1->abs); 101bf215546Sopenharmony_ci assert(!src1->negate); 102bf215546Sopenharmony_ci assert(!src2->abs); 103bf215546Sopenharmony_ci assert(!src2->negate); 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci uint32_t mask = ~(max_vec - 1); 106bf215546Sopenharmony_ci if ((src1->swizzle[0] & mask) != (src2->swizzle[0] & mask)) 107bf215546Sopenharmony_ci return false; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci return srcs_equal(&src1->src, &src2->src); 110bf215546Sopenharmony_ci} 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_cistatic bool 113bf215546Sopenharmony_ciinstrs_equal(const void *data1, const void *data2) 114bf215546Sopenharmony_ci{ 115bf215546Sopenharmony_ci const nir_instr *instr1 = (nir_instr *) data1; 116bf215546Sopenharmony_ci const nir_instr *instr2 = (nir_instr *) data2; 117bf215546Sopenharmony_ci assert(instr1->type == nir_instr_type_alu); 118bf215546Sopenharmony_ci assert(instr2->type == nir_instr_type_alu); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci nir_alu_instr *alu1 = nir_instr_as_alu(instr1); 121bf215546Sopenharmony_ci nir_alu_instr *alu2 = nir_instr_as_alu(instr2); 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci if (alu1->op != alu2->op) 124bf215546Sopenharmony_ci return false; 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size) 127bf215546Sopenharmony_ci return false; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { 130bf215546Sopenharmony_ci if (!alu_srcs_equal(&alu1->src[i], &alu2->src[i], instr1->pass_flags)) 131bf215546Sopenharmony_ci return false; 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci return true; 135bf215546Sopenharmony_ci} 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_cistatic bool 138bf215546Sopenharmony_ciinstr_can_rewrite(nir_instr *instr) 139bf215546Sopenharmony_ci{ 140bf215546Sopenharmony_ci switch (instr->type) { 141bf215546Sopenharmony_ci case nir_instr_type_alu: { 142bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* Don't try and vectorize mov's. Either they'll be handled by copy 145bf215546Sopenharmony_ci * prop, or they're actually necessary and trying to vectorize them 146bf215546Sopenharmony_ci * would result in fighting with copy prop. 147bf215546Sopenharmony_ci */ 148bf215546Sopenharmony_ci if (alu->op == nir_op_mov) 149bf215546Sopenharmony_ci return false; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci /* no need to hash instructions which are already vectorized */ 152bf215546Sopenharmony_ci if (alu->dest.dest.ssa.num_components >= instr->pass_flags) 153bf215546Sopenharmony_ci return false; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci if (nir_op_infos[alu->op].output_size != 0) 156bf215546Sopenharmony_ci return false; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 159bf215546Sopenharmony_ci if (nir_op_infos[alu->op].input_sizes[i] != 0) 160bf215546Sopenharmony_ci return false; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci /* don't hash instructions which are already swizzled 163bf215546Sopenharmony_ci * outside of max_components: these should better be scalarized */ 164bf215546Sopenharmony_ci uint32_t mask = ~(instr->pass_flags - 1); 165bf215546Sopenharmony_ci for (unsigned j = 1; j < alu->dest.dest.ssa.num_components; j++) { 166bf215546Sopenharmony_ci if ((alu->src[i].swizzle[0] & mask) != (alu->src[i].swizzle[j] & mask)) 167bf215546Sopenharmony_ci return false; 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci return true; 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* TODO support phi nodes */ 175bf215546Sopenharmony_ci default: 176bf215546Sopenharmony_ci break; 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci return false; 180bf215546Sopenharmony_ci} 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci/* 183bf215546Sopenharmony_ci * Tries to combine two instructions whose sources are different components of 184bf215546Sopenharmony_ci * the same instructions into one vectorized instruction. Note that instr1 185bf215546Sopenharmony_ci * should dominate instr2. 186bf215546Sopenharmony_ci */ 187bf215546Sopenharmony_cistatic nir_instr * 188bf215546Sopenharmony_ciinstr_try_combine(struct set *instr_set, nir_instr *instr1, nir_instr *instr2) 189bf215546Sopenharmony_ci{ 190bf215546Sopenharmony_ci assert(instr1->type == nir_instr_type_alu); 191bf215546Sopenharmony_ci assert(instr2->type == nir_instr_type_alu); 192bf215546Sopenharmony_ci nir_alu_instr *alu1 = nir_instr_as_alu(instr1); 193bf215546Sopenharmony_ci nir_alu_instr *alu2 = nir_instr_as_alu(instr2); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci assert(alu1->dest.dest.ssa.bit_size == alu2->dest.dest.ssa.bit_size); 196bf215546Sopenharmony_ci unsigned alu1_components = alu1->dest.dest.ssa.num_components; 197bf215546Sopenharmony_ci unsigned alu2_components = alu2->dest.dest.ssa.num_components; 198bf215546Sopenharmony_ci unsigned total_components = alu1_components + alu2_components; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci assert(instr1->pass_flags == instr2->pass_flags); 201bf215546Sopenharmony_ci if (total_components > instr1->pass_flags) 202bf215546Sopenharmony_ci return NULL; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci nir_builder b; 205bf215546Sopenharmony_ci nir_builder_init(&b, nir_cf_node_get_function(&instr1->block->cf_node)); 206bf215546Sopenharmony_ci b.cursor = nir_after_instr(instr1); 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci nir_alu_instr *new_alu = nir_alu_instr_create(b.shader, alu1->op); 209bf215546Sopenharmony_ci nir_ssa_dest_init(&new_alu->instr, &new_alu->dest.dest, 210bf215546Sopenharmony_ci total_components, alu1->dest.dest.ssa.bit_size, NULL); 211bf215546Sopenharmony_ci new_alu->dest.write_mask = (1 << total_components) - 1; 212bf215546Sopenharmony_ci new_alu->instr.pass_flags = alu1->instr.pass_flags; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci /* If either channel is exact, we have to preserve it even if it's 215bf215546Sopenharmony_ci * not optimal for other channels. 216bf215546Sopenharmony_ci */ 217bf215546Sopenharmony_ci new_alu->exact = alu1->exact || alu2->exact; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci /* If all channels don't wrap, we can say that the whole vector doesn't 220bf215546Sopenharmony_ci * wrap. 221bf215546Sopenharmony_ci */ 222bf215546Sopenharmony_ci new_alu->no_signed_wrap = alu1->no_signed_wrap && alu2->no_signed_wrap; 223bf215546Sopenharmony_ci new_alu->no_unsigned_wrap = alu1->no_unsigned_wrap && alu2->no_unsigned_wrap; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { 226bf215546Sopenharmony_ci /* handle constant merging case */ 227bf215546Sopenharmony_ci if (alu1->src[i].src.ssa != alu2->src[i].src.ssa) { 228bf215546Sopenharmony_ci nir_const_value *c1 = nir_src_as_const_value(alu1->src[i].src); 229bf215546Sopenharmony_ci nir_const_value *c2 = nir_src_as_const_value(alu2->src[i].src); 230bf215546Sopenharmony_ci assert(c1 && c2); 231bf215546Sopenharmony_ci nir_const_value value[NIR_MAX_VEC_COMPONENTS]; 232bf215546Sopenharmony_ci unsigned bit_size = alu1->src[i].src.ssa->bit_size; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci for (unsigned j = 0; j < total_components; j++) { 235bf215546Sopenharmony_ci value[j].u64 = j < alu1_components ? 236bf215546Sopenharmony_ci c1[alu1->src[i].swizzle[j]].u64 : 237bf215546Sopenharmony_ci c2[alu2->src[i].swizzle[j - alu1_components]].u64; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci nir_ssa_def *def = nir_build_imm(&b, total_components, bit_size, value); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci new_alu->src[i].src = nir_src_for_ssa(def); 242bf215546Sopenharmony_ci for (unsigned j = 0; j < total_components; j++) 243bf215546Sopenharmony_ci new_alu->src[i].swizzle[j] = j; 244bf215546Sopenharmony_ci continue; 245bf215546Sopenharmony_ci } 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci new_alu->src[i].src = alu1->src[i].src; 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci for (unsigned j = 0; j < alu1_components; j++) 250bf215546Sopenharmony_ci new_alu->src[i].swizzle[j] = alu1->src[i].swizzle[j]; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci for (unsigned j = 0; j < alu2_components; j++) { 253bf215546Sopenharmony_ci new_alu->src[i].swizzle[j + alu1_components] = 254bf215546Sopenharmony_ci alu2->src[i].swizzle[j]; 255bf215546Sopenharmony_ci } 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci nir_builder_instr_insert(&b, &new_alu->instr); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci /* update all ALU uses */ 261bf215546Sopenharmony_ci nir_foreach_use_safe(src, &alu1->dest.dest.ssa) { 262bf215546Sopenharmony_ci nir_instr *user_instr = src->parent_instr; 263bf215546Sopenharmony_ci if (user_instr->type == nir_instr_type_alu) { 264bf215546Sopenharmony_ci /* Check if user is found in the hashset */ 265bf215546Sopenharmony_ci struct set_entry *entry = _mesa_set_search(instr_set, user_instr); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci /* For ALU instructions, rewrite the source directly to avoid a 268bf215546Sopenharmony_ci * round-trip through copy propagation. 269bf215546Sopenharmony_ci */ 270bf215546Sopenharmony_ci nir_instr_rewrite_src(user_instr, src, 271bf215546Sopenharmony_ci nir_src_for_ssa(&new_alu->dest.dest.ssa)); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci /* Rehash user if it was found in the hashset */ 274bf215546Sopenharmony_ci if (entry && entry->key == user_instr) { 275bf215546Sopenharmony_ci _mesa_set_remove(instr_set, entry); 276bf215546Sopenharmony_ci _mesa_set_add(instr_set, user_instr); 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci } 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci nir_foreach_use_safe(src, &alu2->dest.dest.ssa) { 282bf215546Sopenharmony_ci if (src->parent_instr->type == nir_instr_type_alu) { 283bf215546Sopenharmony_ci /* For ALU instructions, rewrite the source directly to avoid a 284bf215546Sopenharmony_ci * round-trip through copy propagation. 285bf215546Sopenharmony_ci */ 286bf215546Sopenharmony_ci nir_instr_rewrite_src(src->parent_instr, src, 287bf215546Sopenharmony_ci nir_src_for_ssa(&new_alu->dest.dest.ssa)); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci nir_alu_src *alu_src = container_of(src, nir_alu_src, src); 290bf215546Sopenharmony_ci nir_alu_instr *use = nir_instr_as_alu(src->parent_instr); 291bf215546Sopenharmony_ci unsigned components = nir_ssa_alu_instr_src_components(use, alu_src - use->src); 292bf215546Sopenharmony_ci for (unsigned i = 0; i < components; i++) 293bf215546Sopenharmony_ci alu_src->swizzle[i] += alu1_components; 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci /* update all other uses if there are any */ 298bf215546Sopenharmony_ci unsigned swiz[NIR_MAX_VEC_COMPONENTS]; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci if (!nir_ssa_def_is_unused(&alu1->dest.dest.ssa)) { 301bf215546Sopenharmony_ci for (unsigned i = 0; i < alu1_components; i++) 302bf215546Sopenharmony_ci swiz[i] = i; 303bf215546Sopenharmony_ci nir_ssa_def *new_alu1 = nir_swizzle(&b, &new_alu->dest.dest.ssa, swiz, 304bf215546Sopenharmony_ci alu1_components); 305bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&alu1->dest.dest.ssa, new_alu1); 306bf215546Sopenharmony_ci } 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci if (!nir_ssa_def_is_unused(&alu2->dest.dest.ssa)) { 309bf215546Sopenharmony_ci for (unsigned i = 0; i < alu2_components; i++) 310bf215546Sopenharmony_ci swiz[i] = i + alu1_components; 311bf215546Sopenharmony_ci nir_ssa_def *new_alu2 = nir_swizzle(&b, &new_alu->dest.dest.ssa, swiz, 312bf215546Sopenharmony_ci alu2_components); 313bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&alu2->dest.dest.ssa, new_alu2); 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci nir_instr_remove(instr1); 317bf215546Sopenharmony_ci nir_instr_remove(instr2); 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci return &new_alu->instr; 320bf215546Sopenharmony_ci} 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_cistatic struct set * 323bf215546Sopenharmony_civec_instr_set_create(void) 324bf215546Sopenharmony_ci{ 325bf215546Sopenharmony_ci return _mesa_set_create(NULL, hash_instr, instrs_equal); 326bf215546Sopenharmony_ci} 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_cistatic void 329bf215546Sopenharmony_civec_instr_set_destroy(struct set *instr_set) 330bf215546Sopenharmony_ci{ 331bf215546Sopenharmony_ci _mesa_set_destroy(instr_set, NULL); 332bf215546Sopenharmony_ci} 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_cistatic bool 335bf215546Sopenharmony_civec_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr, 336bf215546Sopenharmony_ci nir_vectorize_cb filter, void *data) 337bf215546Sopenharmony_ci{ 338bf215546Sopenharmony_ci /* set max vector to instr pass flags: this is used to hash swizzles */ 339bf215546Sopenharmony_ci instr->pass_flags = filter ? filter(instr, data) : 4; 340bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(instr->pass_flags)); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci if (!instr_can_rewrite(instr)) 343bf215546Sopenharmony_ci return false; 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci struct set_entry *entry = _mesa_set_search(instr_set, instr); 346bf215546Sopenharmony_ci if (entry) { 347bf215546Sopenharmony_ci nir_instr *old_instr = (nir_instr *) entry->key; 348bf215546Sopenharmony_ci _mesa_set_remove(instr_set, entry); 349bf215546Sopenharmony_ci nir_instr *new_instr = instr_try_combine(instr_set, old_instr, instr); 350bf215546Sopenharmony_ci if (new_instr) { 351bf215546Sopenharmony_ci if (instr_can_rewrite(new_instr)) 352bf215546Sopenharmony_ci _mesa_set_add(instr_set, new_instr); 353bf215546Sopenharmony_ci return true; 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci _mesa_set_add(instr_set, instr); 358bf215546Sopenharmony_ci return false; 359bf215546Sopenharmony_ci} 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_cistatic bool 362bf215546Sopenharmony_civectorize_block(nir_block *block, struct set *instr_set, 363bf215546Sopenharmony_ci nir_vectorize_cb filter, void *data) 364bf215546Sopenharmony_ci{ 365bf215546Sopenharmony_ci bool progress = false; 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 368bf215546Sopenharmony_ci if (vec_instr_set_add_or_rewrite(instr_set, instr, filter, data)) 369bf215546Sopenharmony_ci progress = true; 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci for (unsigned i = 0; i < block->num_dom_children; i++) { 373bf215546Sopenharmony_ci nir_block *child = block->dom_children[i]; 374bf215546Sopenharmony_ci progress |= vectorize_block(child, instr_set, filter, data); 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci nir_foreach_instr_reverse(instr, block) { 378bf215546Sopenharmony_ci if (instr_can_rewrite(instr)) 379bf215546Sopenharmony_ci _mesa_set_remove_key(instr_set, instr); 380bf215546Sopenharmony_ci } 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci return progress; 383bf215546Sopenharmony_ci} 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_cistatic bool 386bf215546Sopenharmony_cinir_opt_vectorize_impl(nir_function_impl *impl, 387bf215546Sopenharmony_ci nir_vectorize_cb filter, void *data) 388bf215546Sopenharmony_ci{ 389bf215546Sopenharmony_ci struct set *instr_set = vec_instr_set_create(); 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_dominance); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci bool progress = vectorize_block(nir_start_block(impl), instr_set, 394bf215546Sopenharmony_ci filter, data); 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci if (progress) { 397bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 398bf215546Sopenharmony_ci nir_metadata_dominance); 399bf215546Sopenharmony_ci } else { 400bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci vec_instr_set_destroy(instr_set); 404bf215546Sopenharmony_ci return progress; 405bf215546Sopenharmony_ci} 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_cibool 408bf215546Sopenharmony_cinir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter, 409bf215546Sopenharmony_ci void *data) 410bf215546Sopenharmony_ci{ 411bf215546Sopenharmony_ci bool progress = false; 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 414bf215546Sopenharmony_ci if (function->impl) 415bf215546Sopenharmony_ci progress |= nir_opt_vectorize_impl(function->impl, filter, data); 416bf215546Sopenharmony_ci } 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci return progress; 419bf215546Sopenharmony_ci} 420