1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir/nir_builder.h" 26bf215546Sopenharmony_ci#include "nir_constant_expressions.h" 27bf215546Sopenharmony_ci#include "nir_control_flow.h" 28bf215546Sopenharmony_ci#include "nir_loop_analyze.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cistatic bool 31bf215546Sopenharmony_ciis_two_src_comparison(const nir_alu_instr *instr) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci switch (instr->op) { 34bf215546Sopenharmony_ci case nir_op_flt: 35bf215546Sopenharmony_ci case nir_op_flt32: 36bf215546Sopenharmony_ci case nir_op_fge: 37bf215546Sopenharmony_ci case nir_op_fge32: 38bf215546Sopenharmony_ci case nir_op_feq: 39bf215546Sopenharmony_ci case nir_op_feq32: 40bf215546Sopenharmony_ci case nir_op_fneu: 41bf215546Sopenharmony_ci case nir_op_fneu32: 42bf215546Sopenharmony_ci case nir_op_ilt: 43bf215546Sopenharmony_ci case nir_op_ilt32: 44bf215546Sopenharmony_ci case nir_op_ult: 45bf215546Sopenharmony_ci case nir_op_ult32: 46bf215546Sopenharmony_ci case nir_op_ige: 47bf215546Sopenharmony_ci case nir_op_ige32: 48bf215546Sopenharmony_ci case nir_op_uge: 49bf215546Sopenharmony_ci case nir_op_uge32: 50bf215546Sopenharmony_ci case nir_op_ieq: 51bf215546Sopenharmony_ci case nir_op_ieq32: 52bf215546Sopenharmony_ci case nir_op_ine: 53bf215546Sopenharmony_ci case nir_op_ine32: 54bf215546Sopenharmony_ci return true; 55bf215546Sopenharmony_ci default: 56bf215546Sopenharmony_ci return false; 57bf215546Sopenharmony_ci } 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistatic bool 61bf215546Sopenharmony_ciall_srcs_are_ssa(const nir_alu_instr *instr) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 64bf215546Sopenharmony_ci if (!instr->src[i].src.is_ssa) 65bf215546Sopenharmony_ci return false; 66bf215546Sopenharmony_ci } 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci return true; 69bf215546Sopenharmony_ci} 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistatic bool 73bf215546Sopenharmony_ciall_uses_are_bcsel(const nir_alu_instr *instr) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci if (!instr->dest.dest.is_ssa) 76bf215546Sopenharmony_ci return false; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci nir_foreach_use(use, &instr->dest.dest.ssa) { 79bf215546Sopenharmony_ci if (use->parent_instr->type != nir_instr_type_alu) 80bf215546Sopenharmony_ci return false; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci nir_alu_instr *const alu = nir_instr_as_alu(use->parent_instr); 83bf215546Sopenharmony_ci if (alu->op != nir_op_bcsel && 84bf215546Sopenharmony_ci alu->op != nir_op_b32csel) 85bf215546Sopenharmony_ci return false; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci /* Not only must the result be used by a bcsel, but it must be used as 88bf215546Sopenharmony_ci * the first source (the condition). 89bf215546Sopenharmony_ci */ 90bf215546Sopenharmony_ci if (alu->src[0].src.ssa != &instr->dest.dest.ssa) 91bf215546Sopenharmony_ci return false; 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci return true; 95bf215546Sopenharmony_ci} 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_cistatic bool 98bf215546Sopenharmony_cinir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl) 99bf215546Sopenharmony_ci{ 100bf215546Sopenharmony_ci bool progress = false; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 103bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 104bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 105bf215546Sopenharmony_ci continue; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci nir_alu_instr *const alu = nir_instr_as_alu(instr); 108bf215546Sopenharmony_ci if (!is_two_src_comparison(alu)) 109bf215546Sopenharmony_ci continue; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci if (!all_srcs_are_ssa(alu)) 112bf215546Sopenharmony_ci continue; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci if (!all_uses_are_bcsel(alu)) 115bf215546Sopenharmony_ci continue; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci /* At this point it is known that alu is a comparison instruction 118bf215546Sopenharmony_ci * that is only used by nir_op_bcsel and possibly by if-statements 119bf215546Sopenharmony_ci * (though the latter has not been explicitly checked). 120bf215546Sopenharmony_ci * 121bf215546Sopenharmony_ci * Iterate through each use of the comparison. For every use (or use 122bf215546Sopenharmony_ci * by an if-statement) that is in a different block, emit a copy of 123bf215546Sopenharmony_ci * the comparison. Care must be taken here. The original 124bf215546Sopenharmony_ci * instruction must be duplicated only once in each block because CSE 125bf215546Sopenharmony_ci * cannot be run after this pass. 126bf215546Sopenharmony_ci */ 127bf215546Sopenharmony_ci nir_foreach_use_safe(use, &alu->dest.dest.ssa) { 128bf215546Sopenharmony_ci nir_instr *const use_instr = use->parent_instr; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci /* If the use is in the same block as the def, don't 131bf215546Sopenharmony_ci * rematerialize. 132bf215546Sopenharmony_ci */ 133bf215546Sopenharmony_ci if (use_instr->block == alu->instr.block) 134bf215546Sopenharmony_ci continue; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci nir_alu_instr *clone = nir_alu_instr_clone(shader, alu); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci nir_instr_insert_before(use_instr, &clone->instr); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr); 141bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) { 142bf215546Sopenharmony_ci if (use_alu->src[i].src.ssa == &alu->dest.dest.ssa) { 143bf215546Sopenharmony_ci nir_instr_rewrite_src(&use_alu->instr, 144bf215546Sopenharmony_ci &use_alu->src[i].src, 145bf215546Sopenharmony_ci nir_src_for_ssa(&clone->dest.dest.ssa)); 146bf215546Sopenharmony_ci progress = true; 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci nir_foreach_if_use_safe(use, &alu->dest.dest.ssa) { 152bf215546Sopenharmony_ci nir_if *const if_stmt = use->parent_if; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci nir_block *const prev_block = 155bf215546Sopenharmony_ci nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node)); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci /* If the compare is from the previous block, don't 158bf215546Sopenharmony_ci * rematerialize. 159bf215546Sopenharmony_ci */ 160bf215546Sopenharmony_ci if (prev_block == alu->instr.block) 161bf215546Sopenharmony_ci continue; 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci nir_alu_instr *clone = nir_alu_instr_clone(shader, alu); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci nir_instr_insert_after_block(prev_block, &clone->instr); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci nir_if_rewrite_condition(if_stmt, 168bf215546Sopenharmony_ci nir_src_for_ssa(&clone->dest.dest.ssa)); 169bf215546Sopenharmony_ci progress = true; 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci } 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci if (progress) { 175bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 176bf215546Sopenharmony_ci nir_metadata_dominance); 177bf215546Sopenharmony_ci } else { 178bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 179bf215546Sopenharmony_ci } 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci return progress; 182bf215546Sopenharmony_ci} 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_cibool 185bf215546Sopenharmony_cinir_opt_rematerialize_compares(nir_shader *shader) 186bf215546Sopenharmony_ci{ 187bf215546Sopenharmony_ci bool progress = false; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 190bf215546Sopenharmony_ci if (function->impl == NULL) 191bf215546Sopenharmony_ci continue; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci progress = nir_opt_rematerialize_compares_impl(shader, function->impl) 194bf215546Sopenharmony_ci || progress; 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci return progress; 198bf215546Sopenharmony_ci} 199