1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/** 28bf215546Sopenharmony_ci * \file nir_opt_intrinsics.c 29bf215546Sopenharmony_ci */ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cistatic bool 32bf215546Sopenharmony_cisrc_is_single_use_shuffle(nir_src src, nir_ssa_def **data, nir_ssa_def **index) 33bf215546Sopenharmony_ci{ 34bf215546Sopenharmony_ci nir_intrinsic_instr *shuffle = nir_src_as_intrinsic(src); 35bf215546Sopenharmony_ci if (shuffle == NULL || shuffle->intrinsic != nir_intrinsic_shuffle) 36bf215546Sopenharmony_ci return false; 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci /* This is only called when src is part of an ALU op so requiring no if 39bf215546Sopenharmony_ci * uses is reasonable. If we ever want to use this from an if statement, 40bf215546Sopenharmony_ci * we can change it then. 41bf215546Sopenharmony_ci */ 42bf215546Sopenharmony_ci if (!list_is_empty(&shuffle->dest.ssa.if_uses) || 43bf215546Sopenharmony_ci !list_is_singular(&shuffle->dest.ssa.uses)) 44bf215546Sopenharmony_ci return false; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci assert(shuffle->src[0].is_ssa); 47bf215546Sopenharmony_ci assert(shuffle->src[1].is_ssa); 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci *data = shuffle->src[0].ssa; 50bf215546Sopenharmony_ci *index = shuffle->src[1].ssa; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci return true; 53bf215546Sopenharmony_ci} 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_cistatic nir_ssa_def * 56bf215546Sopenharmony_citry_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu, 57bf215546Sopenharmony_ci bool block_has_discard) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci assert(alu->op == nir_op_bcsel); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci /* If we've seen a discard in this block, don't do the optimization. We 62bf215546Sopenharmony_ci * could try to do something fancy where we check if the shuffle is on our 63bf215546Sopenharmony_ci * side of the discard or not but this is good enough for correctness for 64bf215546Sopenharmony_ci * now and subgroup ops in the presence of discard aren't common. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci if (block_has_discard) 67bf215546Sopenharmony_ci return false; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci if (!nir_alu_src_is_trivial_ssa(alu, 0)) 70bf215546Sopenharmony_ci return NULL; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci nir_ssa_def *data1, *index1; 73bf215546Sopenharmony_ci if (!nir_alu_src_is_trivial_ssa(alu, 1) || 74bf215546Sopenharmony_ci alu->src[1].src.ssa->parent_instr->block != alu->instr.block || 75bf215546Sopenharmony_ci !src_is_single_use_shuffle(alu->src[1].src, &data1, &index1)) 76bf215546Sopenharmony_ci return NULL; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci nir_ssa_def *data2, *index2; 79bf215546Sopenharmony_ci if (!nir_alu_src_is_trivial_ssa(alu, 2) || 80bf215546Sopenharmony_ci alu->src[2].src.ssa->parent_instr->block != alu->instr.block || 81bf215546Sopenharmony_ci !src_is_single_use_shuffle(alu->src[2].src, &data2, &index2)) 82bf215546Sopenharmony_ci return NULL; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci if (data1 != data2) 85bf215546Sopenharmony_ci return NULL; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci nir_ssa_def *index = nir_bcsel(b, alu->src[0].src.ssa, index1, index2); 88bf215546Sopenharmony_ci nir_ssa_def *shuffle = nir_shuffle(b, data1, index); 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci return shuffle; 91bf215546Sopenharmony_ci} 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_cistatic bool 94bf215546Sopenharmony_ciopt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu, 95bf215546Sopenharmony_ci bool block_has_discard) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci nir_ssa_def *replacement = NULL; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci switch (alu->op) { 100bf215546Sopenharmony_ci case nir_op_bcsel: 101bf215546Sopenharmony_ci replacement = try_opt_bcsel_of_shuffle(b, alu, block_has_discard); 102bf215546Sopenharmony_ci break; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci default: 105bf215546Sopenharmony_ci break; 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci if (replacement) { 109bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, 110bf215546Sopenharmony_ci replacement); 111bf215546Sopenharmony_ci nir_instr_remove(&alu->instr); 112bf215546Sopenharmony_ci return true; 113bf215546Sopenharmony_ci } else { 114bf215546Sopenharmony_ci return false; 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci} 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_cistatic bool 119bf215546Sopenharmony_ciopt_intrinsics_intrin(nir_builder *b, nir_intrinsic_instr *intrin, 120bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options) 121bf215546Sopenharmony_ci{ 122bf215546Sopenharmony_ci switch (intrin->intrinsic) { 123bf215546Sopenharmony_ci case nir_intrinsic_load_sample_mask_in: { 124bf215546Sopenharmony_ci /* Transform: 125bf215546Sopenharmony_ci * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 126bf215546Sopenharmony_ci * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 127bf215546Sopenharmony_ci */ 128bf215546Sopenharmony_ci if (!options->optimize_sample_mask_in) 129bf215546Sopenharmony_ci return false; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci bool progress = false; 132bf215546Sopenharmony_ci nir_foreach_use_safe(use_src, &intrin->dest.ssa) { 133bf215546Sopenharmony_ci if (use_src->parent_instr->type == nir_instr_type_alu) { 134bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci if (alu->op == nir_op_ieq || 137bf215546Sopenharmony_ci alu->op == nir_op_ine) { 138bf215546Sopenharmony_ci /* Check for 0 in either operand. */ 139bf215546Sopenharmony_ci nir_const_value *const_val = 140bf215546Sopenharmony_ci nir_src_as_const_value(alu->src[0].src); 141bf215546Sopenharmony_ci if (!const_val) 142bf215546Sopenharmony_ci const_val = nir_src_as_const_value(alu->src[1].src); 143bf215546Sopenharmony_ci if (!const_val || const_val->i32 != 0) 144bf215546Sopenharmony_ci continue; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci nir_ssa_def *new_expr = nir_load_helper_invocation(b, 1); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (alu->op == nir_op_ine) 149bf215546Sopenharmony_ci new_expr = nir_inot(b, new_expr); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, 152bf215546Sopenharmony_ci new_expr); 153bf215546Sopenharmony_ci nir_instr_remove(&alu->instr); 154bf215546Sopenharmony_ci progress = true; 155bf215546Sopenharmony_ci } 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci return progress; 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci default: 162bf215546Sopenharmony_ci return false; 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci} 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_cistatic bool 167bf215546Sopenharmony_ciopt_intrinsics_impl(nir_function_impl *impl, 168bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options) 169bf215546Sopenharmony_ci{ 170bf215546Sopenharmony_ci nir_builder b; 171bf215546Sopenharmony_ci nir_builder_init(&b, impl); 172bf215546Sopenharmony_ci bool progress = false; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 175bf215546Sopenharmony_ci bool block_has_discard = false; 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 178bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci switch (instr->type) { 181bf215546Sopenharmony_ci case nir_instr_type_alu: 182bf215546Sopenharmony_ci if (opt_intrinsics_alu(&b, nir_instr_as_alu(instr), 183bf215546Sopenharmony_ci block_has_discard)) 184bf215546Sopenharmony_ci progress = true; 185bf215546Sopenharmony_ci break; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 188bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 189bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_discard || 190bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_discard_if || 191bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_demote || 192bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_demote_if || 193bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_terminate || 194bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_terminate_if) 195bf215546Sopenharmony_ci block_has_discard = true; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci if (opt_intrinsics_intrin(&b, intrin, options)) 198bf215546Sopenharmony_ci progress = true; 199bf215546Sopenharmony_ci break; 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci default: 203bf215546Sopenharmony_ci break; 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci return progress; 209bf215546Sopenharmony_ci} 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_cibool 212bf215546Sopenharmony_cinir_opt_intrinsics(nir_shader *shader) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci bool progress = false; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 217bf215546Sopenharmony_ci if (!function->impl) 218bf215546Sopenharmony_ci continue; 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci if (opt_intrinsics_impl(function->impl, shader->options)) { 221bf215546Sopenharmony_ci progress = true; 222bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_block_index | 223bf215546Sopenharmony_ci nir_metadata_dominance); 224bf215546Sopenharmony_ci } else { 225bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_all); 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci return progress; 230bf215546Sopenharmony_ci} 231