1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir_instr_set.h" 25bf215546Sopenharmony_ci#include "nir_search_helpers.h" 26bf215546Sopenharmony_ci#include "nir_builder.h" 27bf215546Sopenharmony_ci#include "util/u_vector.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/* Partial redundancy elimination of compares 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * Seaches for comparisons of the form 'a cmp b' that dominate arithmetic 32bf215546Sopenharmony_ci * instructions like 'b - a'. The comparison is replaced by the arithmetic 33bf215546Sopenharmony_ci * instruction, and the result is compared with zero. For example, 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * vec1 32 ssa_111 = flt 0.37, ssa_110.w 36bf215546Sopenharmony_ci * if ssa_111 { 37bf215546Sopenharmony_ci * block block_1: 38bf215546Sopenharmony_ci * vec1 32 ssa_112 = fadd ssa_110.w, -0.37 39bf215546Sopenharmony_ci * ... 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * becomes 42bf215546Sopenharmony_ci * 43bf215546Sopenharmony_ci * vec1 32 ssa_111 = fadd ssa_110.w, -0.37 44bf215546Sopenharmony_ci * vec1 32 ssa_112 = flt 0.0, ssa_111 45bf215546Sopenharmony_ci * if ssa_112 { 46bf215546Sopenharmony_ci * block block_1: 47bf215546Sopenharmony_ci * ... 48bf215546Sopenharmony_ci */ 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistruct block_queue { 51bf215546Sopenharmony_ci /** 52bf215546Sopenharmony_ci * Stack of blocks from the current location in the CFG to the entry point 53bf215546Sopenharmony_ci * of the function. 54bf215546Sopenharmony_ci * 55bf215546Sopenharmony_ci * This is sort of a poor man's dominator tree. 56bf215546Sopenharmony_ci */ 57bf215546Sopenharmony_ci struct exec_list blocks; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci /** List of freed block_instructions structures that can be reused. */ 60bf215546Sopenharmony_ci struct exec_list reusable_blocks; 61bf215546Sopenharmony_ci}; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_cistruct block_instructions { 64bf215546Sopenharmony_ci struct exec_node node; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci /** 67bf215546Sopenharmony_ci * Set of comparison instructions from the block that are candidates for 68bf215546Sopenharmony_ci * being replaced by add instructions. 69bf215546Sopenharmony_ci */ 70bf215546Sopenharmony_ci struct u_vector instructions; 71bf215546Sopenharmony_ci}; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_cistatic void 74bf215546Sopenharmony_ciblock_queue_init(struct block_queue *bq) 75bf215546Sopenharmony_ci{ 76bf215546Sopenharmony_ci exec_list_make_empty(&bq->blocks); 77bf215546Sopenharmony_ci exec_list_make_empty(&bq->reusable_blocks); 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistatic void 81bf215546Sopenharmony_ciblock_queue_finish(struct block_queue *bq) 82bf215546Sopenharmony_ci{ 83bf215546Sopenharmony_ci struct block_instructions *n; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci while ((n = (struct block_instructions *) exec_list_pop_head(&bq->blocks)) != NULL) { 86bf215546Sopenharmony_ci u_vector_finish(&n->instructions); 87bf215546Sopenharmony_ci free(n); 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci while ((n = (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks)) != NULL) { 91bf215546Sopenharmony_ci free(n); 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci} 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_cistatic struct block_instructions * 96bf215546Sopenharmony_cipush_block(struct block_queue *bq) 97bf215546Sopenharmony_ci{ 98bf215546Sopenharmony_ci struct block_instructions *bi = 99bf215546Sopenharmony_ci (struct block_instructions *) exec_list_pop_head(&bq->reusable_blocks); 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci if (bi == NULL) { 102bf215546Sopenharmony_ci bi = calloc(1, sizeof(struct block_instructions)); 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci if (bi == NULL) 105bf215546Sopenharmony_ci return NULL; 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci if (!u_vector_init_pow2(&bi->instructions, 8, sizeof(nir_alu_instr *))) { 109bf215546Sopenharmony_ci free(bi); 110bf215546Sopenharmony_ci return NULL; 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci exec_list_push_tail(&bq->blocks, &bi->node); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci return bi; 116bf215546Sopenharmony_ci} 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_cistatic void 119bf215546Sopenharmony_cipop_block(struct block_queue *bq, struct block_instructions *bi) 120bf215546Sopenharmony_ci{ 121bf215546Sopenharmony_ci u_vector_finish(&bi->instructions); 122bf215546Sopenharmony_ci exec_node_remove(&bi->node); 123bf215546Sopenharmony_ci exec_list_push_head(&bq->reusable_blocks, &bi->node); 124bf215546Sopenharmony_ci} 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_cistatic void 127bf215546Sopenharmony_ciadd_instruction_for_block(struct block_instructions *bi, 128bf215546Sopenharmony_ci nir_alu_instr *alu) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci nir_alu_instr **data = 131bf215546Sopenharmony_ci u_vector_add(&bi->instructions); 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci *data = alu; 134bf215546Sopenharmony_ci} 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_cistatic void 137bf215546Sopenharmony_cirewrite_compare_instruction(nir_builder *bld, nir_alu_instr *orig_cmp, 138bf215546Sopenharmony_ci nir_alu_instr *orig_add, bool zero_on_left) 139bf215546Sopenharmony_ci{ 140bf215546Sopenharmony_ci bld->cursor = nir_before_instr(&orig_cmp->instr); 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci /* This is somewhat tricky. The compare instruction may be something like 143bf215546Sopenharmony_ci * (fcmp, a, b) while the add instruction is something like (fadd, fneg(a), 144bf215546Sopenharmony_ci * b). This is problematic because the SSA value for the fneg(a) may not 145bf215546Sopenharmony_ci * exist yet at the compare instruction. 146bf215546Sopenharmony_ci * 147bf215546Sopenharmony_ci * We fabricate the operands of the new add. This is done using 148bf215546Sopenharmony_ci * information provided by zero_on_left. If zero_on_left is true, we know 149bf215546Sopenharmony_ci * the resulting compare instruction is (fcmp, 0.0, (fadd, x, y)). If the 150bf215546Sopenharmony_ci * original compare instruction was (fcmp, a, b), x = b and y = -a. If 151bf215546Sopenharmony_ci * zero_on_left is false, the resulting compare instruction is (fcmp, 152bf215546Sopenharmony_ci * (fadd, x, y), 0.0) and x = a and y = -b. 153bf215546Sopenharmony_ci */ 154bf215546Sopenharmony_ci nir_ssa_def *const a = nir_ssa_for_alu_src(bld, orig_cmp, 0); 155bf215546Sopenharmony_ci nir_ssa_def *const b = nir_ssa_for_alu_src(bld, orig_cmp, 1); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci nir_ssa_def *const fadd = zero_on_left 158bf215546Sopenharmony_ci ? nir_fadd(bld, b, nir_fneg(bld, a)) 159bf215546Sopenharmony_ci : nir_fadd(bld, a, nir_fneg(bld, b)); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci nir_ssa_def *const zero = 162bf215546Sopenharmony_ci nir_imm_floatN_t(bld, 0.0, orig_add->dest.dest.ssa.bit_size); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci nir_ssa_def *const cmp = zero_on_left 165bf215546Sopenharmony_ci ? nir_build_alu(bld, orig_cmp->op, zero, fadd, NULL, NULL) 166bf215546Sopenharmony_ci : nir_build_alu(bld, orig_cmp->op, fadd, zero, NULL, NULL); 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci /* Generating extra moves of the results is the easy way to make sure the 169bf215546Sopenharmony_ci * writemasks match the original instructions. Later optimization passes 170bf215546Sopenharmony_ci * will clean these up. This is similar to nir_replace_instr (in 171bf215546Sopenharmony_ci * nir_search.c). 172bf215546Sopenharmony_ci */ 173bf215546Sopenharmony_ci nir_alu_instr *mov_add = nir_alu_instr_create(bld->shader, nir_op_mov); 174bf215546Sopenharmony_ci mov_add->dest.write_mask = orig_add->dest.write_mask; 175bf215546Sopenharmony_ci nir_ssa_dest_init(&mov_add->instr, &mov_add->dest.dest, 176bf215546Sopenharmony_ci orig_add->dest.dest.ssa.num_components, 177bf215546Sopenharmony_ci orig_add->dest.dest.ssa.bit_size, NULL); 178bf215546Sopenharmony_ci mov_add->src[0].src = nir_src_for_ssa(fadd); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci nir_builder_instr_insert(bld, &mov_add->instr); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci nir_alu_instr *mov_cmp = nir_alu_instr_create(bld->shader, nir_op_mov); 183bf215546Sopenharmony_ci mov_cmp->dest.write_mask = orig_cmp->dest.write_mask; 184bf215546Sopenharmony_ci nir_ssa_dest_init(&mov_cmp->instr, &mov_cmp->dest.dest, 185bf215546Sopenharmony_ci orig_cmp->dest.dest.ssa.num_components, 186bf215546Sopenharmony_ci orig_cmp->dest.dest.ssa.bit_size, NULL); 187bf215546Sopenharmony_ci mov_cmp->src[0].src = nir_src_for_ssa(cmp); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci nir_builder_instr_insert(bld, &mov_cmp->instr); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&orig_cmp->dest.dest.ssa, 192bf215546Sopenharmony_ci &mov_cmp->dest.dest.ssa); 193bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&orig_add->dest.dest.ssa, 194bf215546Sopenharmony_ci &mov_add->dest.dest.ssa); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* We know these have no more uses because we just rewrote them all, so we 197bf215546Sopenharmony_ci * can remove them. 198bf215546Sopenharmony_ci */ 199bf215546Sopenharmony_ci nir_instr_remove(&orig_cmp->instr); 200bf215546Sopenharmony_ci nir_instr_remove(&orig_add->instr); 201bf215546Sopenharmony_ci} 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_cistatic bool 204bf215546Sopenharmony_cicomparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld) 205bf215546Sopenharmony_ci{ 206bf215546Sopenharmony_ci bool progress = false; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci struct block_instructions *bi = push_block(bq); 209bf215546Sopenharmony_ci if (bi == NULL) 210bf215546Sopenharmony_ci return false; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci /* Starting with the current block, examine each instruction. If the 213bf215546Sopenharmony_ci * instruction is a comparison that matches the '±a cmp ±b' pattern, add it 214bf215546Sopenharmony_ci * to the block_instructions::instructions set. If the instruction is an 215bf215546Sopenharmony_ci * add instruction, walk up the block queue looking at the stored 216bf215546Sopenharmony_ci * instructions. If a matching comparison is found, move the addition and 217bf215546Sopenharmony_ci * replace the comparison with a different comparison based on the result 218bf215546Sopenharmony_ci * of the addition. All of the blocks in the queue are guaranteed to be 219bf215546Sopenharmony_ci * dominators of the current block. 220bf215546Sopenharmony_ci * 221bf215546Sopenharmony_ci * After processing the current block, recurse into the blocks dominated by 222bf215546Sopenharmony_ci * the current block. 223bf215546Sopenharmony_ci */ 224bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 225bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 226bf215546Sopenharmony_ci continue; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci nir_alu_instr *const alu = nir_instr_as_alu(instr); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (alu->dest.dest.ssa.num_components != 1) 231bf215546Sopenharmony_ci continue; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci if (alu->dest.saturate) 234bf215546Sopenharmony_ci continue; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci static const uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = {0}; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci switch (alu->op) { 239bf215546Sopenharmony_ci case nir_op_fadd: { 240bf215546Sopenharmony_ci /* If the instruction is fadd, check it against comparison 241bf215546Sopenharmony_ci * instructions that dominate it. 242bf215546Sopenharmony_ci */ 243bf215546Sopenharmony_ci struct block_instructions *b = 244bf215546Sopenharmony_ci (struct block_instructions *) exec_list_get_head_raw(&bq->blocks); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci while (b->node.next != NULL) { 247bf215546Sopenharmony_ci nir_alu_instr **a; 248bf215546Sopenharmony_ci bool rewrote_compare = false; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci u_vector_foreach(a, &b->instructions) { 251bf215546Sopenharmony_ci nir_alu_instr *const cmp = *a; 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_ci if (cmp == NULL) 254bf215546Sopenharmony_ci continue; 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci /* The operands of both instructions are, with some liberty, 257bf215546Sopenharmony_ci * commutative. Check all four permutations. The third and 258bf215546Sopenharmony_ci * fourth permutations are negations of the first two. 259bf215546Sopenharmony_ci */ 260bf215546Sopenharmony_ci if ((nir_alu_srcs_equal(cmp, alu, 0, 0) && 261bf215546Sopenharmony_ci nir_alu_srcs_negative_equal(cmp, alu, 1, 1)) || 262bf215546Sopenharmony_ci (nir_alu_srcs_equal(cmp, alu, 0, 1) && 263bf215546Sopenharmony_ci nir_alu_srcs_negative_equal(cmp, alu, 1, 0))) { 264bf215546Sopenharmony_ci /* These are the cases where (A cmp B) matches either (A + 265bf215546Sopenharmony_ci * -B) or (-B + A) 266bf215546Sopenharmony_ci * 267bf215546Sopenharmony_ci * A cmp B <=> A + -B cmp 0 268bf215546Sopenharmony_ci */ 269bf215546Sopenharmony_ci rewrite_compare_instruction(bld, cmp, alu, false); 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci *a = NULL; 272bf215546Sopenharmony_ci rewrote_compare = true; 273bf215546Sopenharmony_ci break; 274bf215546Sopenharmony_ci } else if ((nir_alu_srcs_equal(cmp, alu, 1, 0) && 275bf215546Sopenharmony_ci nir_alu_srcs_negative_equal(cmp, alu, 0, 1)) || 276bf215546Sopenharmony_ci (nir_alu_srcs_equal(cmp, alu, 1, 1) && 277bf215546Sopenharmony_ci nir_alu_srcs_negative_equal(cmp, alu, 0, 0))) { 278bf215546Sopenharmony_ci /* This is the case where (A cmp B) matches (B + -A) or (-A 279bf215546Sopenharmony_ci * + B). 280bf215546Sopenharmony_ci * 281bf215546Sopenharmony_ci * A cmp B <=> 0 cmp B + -A 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_ci rewrite_compare_instruction(bld, cmp, alu, true); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci *a = NULL; 286bf215546Sopenharmony_ci rewrote_compare = true; 287bf215546Sopenharmony_ci break; 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci /* Bail after a compare in the most dominating block is found. 292bf215546Sopenharmony_ci * This is necessary because 'alu' has been removed from the 293bf215546Sopenharmony_ci * instruction stream. Should there be a matching compare in 294bf215546Sopenharmony_ci * another block, calling rewrite_compare_instruction again will 295bf215546Sopenharmony_ci * try to operate on a node that is not in the list as if it were 296bf215546Sopenharmony_ci * in the list. 297bf215546Sopenharmony_ci * 298bf215546Sopenharmony_ci * FINISHME: There may be opportunity for additional optimization 299bf215546Sopenharmony_ci * here. I discovered this problem due to a shader in Guacamelee. 300bf215546Sopenharmony_ci * It may be possible to rewrite the matching compares that are 301bf215546Sopenharmony_ci * encountered later to reuse the result from the compare that was 302bf215546Sopenharmony_ci * first rewritten. It's also possible that this is just taken 303bf215546Sopenharmony_ci * care of by calling the optimization pass repeatedly. 304bf215546Sopenharmony_ci */ 305bf215546Sopenharmony_ci if (rewrote_compare) { 306bf215546Sopenharmony_ci progress = true; 307bf215546Sopenharmony_ci break; 308bf215546Sopenharmony_ci } 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci b = (struct block_instructions *) b->node.next; 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci break; 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci case nir_op_flt: 317bf215546Sopenharmony_ci case nir_op_fge: 318bf215546Sopenharmony_ci case nir_op_fneu: 319bf215546Sopenharmony_ci case nir_op_feq: 320bf215546Sopenharmony_ci /* If the instruction is a comparison that is used by an if-statement 321bf215546Sopenharmony_ci * and neither operand is immediate value 0, add it to the set. 322bf215546Sopenharmony_ci */ 323bf215546Sopenharmony_ci if (is_used_by_if(alu) && 324bf215546Sopenharmony_ci is_not_const_zero(NULL, alu, 0, 1, swizzle) && 325bf215546Sopenharmony_ci is_not_const_zero(NULL, alu, 1, 1, swizzle)) 326bf215546Sopenharmony_ci add_instruction_for_block(bi, alu); 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci break; 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci default: 331bf215546Sopenharmony_ci break; 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci for (unsigned i = 0; i < block->num_dom_children; i++) { 336bf215546Sopenharmony_ci nir_block *child = block->dom_children[i]; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci if (comparison_pre_block(child, bq, bld)) 339bf215546Sopenharmony_ci progress = true; 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci pop_block(bq, bi); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci return progress; 345bf215546Sopenharmony_ci} 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_cibool 348bf215546Sopenharmony_cinir_opt_comparison_pre_impl(nir_function_impl *impl) 349bf215546Sopenharmony_ci{ 350bf215546Sopenharmony_ci struct block_queue bq; 351bf215546Sopenharmony_ci nir_builder bld; 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci block_queue_init(&bq); 354bf215546Sopenharmony_ci nir_builder_init(&bld, impl); 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci nir_metadata_require(impl, nir_metadata_dominance); 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci const bool progress = 359bf215546Sopenharmony_ci comparison_pre_block(nir_start_block(impl), &bq, &bld); 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci block_queue_finish(&bq); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci if (progress) { 364bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 365bf215546Sopenharmony_ci nir_metadata_dominance); 366bf215546Sopenharmony_ci } else { 367bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci return progress; 371bf215546Sopenharmony_ci} 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_cibool 374bf215546Sopenharmony_cinir_opt_comparison_pre(nir_shader *shader) 375bf215546Sopenharmony_ci{ 376bf215546Sopenharmony_ci bool progress = false; 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 379bf215546Sopenharmony_ci if (function->impl) 380bf215546Sopenharmony_ci progress |= nir_opt_comparison_pre_impl(function->impl); 381bf215546Sopenharmony_ci } 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci return progress; 384bf215546Sopenharmony_ci} 385