1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/* This helps separate shaders because the next shader doesn't have to be known. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci * It optimizes VS and TES outputs before FS as follows: 27bf215546Sopenharmony_ci * - Eliminate and merge equal outputs, and treat undef as equal to everything, e.g. 28bf215546Sopenharmony_ci * (x,y,undef,undef) == (undef,y,z,undef) --> (x,y,z,undef) regardless of the interpolation 29bf215546Sopenharmony_ci * qualifier (AMD can map 1 output to multiple PS inputs and interpolate each differently). 30bf215546Sopenharmony_ci * - Remove constant outputs that match AMD DEFAULT_VAL options, e.g. (0,0,0,1), 31bf215546Sopenharmony_ci * treat undef as whatever. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * It requires that there is no indirect indexing and all output stores must be scalar. 34bf215546Sopenharmony_ci */ 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "ac_nir.h" 37bf215546Sopenharmony_ci#include "nir_builder.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cistruct ac_chan_info { 40bf215546Sopenharmony_ci nir_instr *value; 41bf215546Sopenharmony_ci nir_intrinsic_instr *store_intr; /* The intrinsic writing the value. */ 42bf215546Sopenharmony_ci}; 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cistruct ac_out_info { 45bf215546Sopenharmony_ci unsigned base; /* nir_intrinsic_base */ 46bf215546Sopenharmony_ci nir_alu_type types; 47bf215546Sopenharmony_ci bool duplicated; 48bf215546Sopenharmony_ci bool constant; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci /* Channels 0-3 are 32-bit channels or low bits of 16-bit channels. 51bf215546Sopenharmony_ci * Channels 4-7 are high bits of 16-bit channels. 52bf215546Sopenharmony_ci */ 53bf215546Sopenharmony_ci struct ac_chan_info chan[8]; 54bf215546Sopenharmony_ci}; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic void ac_remove_varying(struct ac_out_info *out) 57bf215546Sopenharmony_ci{ 58bf215546Sopenharmony_ci /* Remove the output. (all channels) */ 59bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(out->chan); i++) { 60bf215546Sopenharmony_ci if (out->chan[i].store_intr) { 61bf215546Sopenharmony_ci nir_remove_varying(out->chan[i].store_intr); 62bf215546Sopenharmony_ci out->chan[i].store_intr = NULL; 63bf215546Sopenharmony_ci out->chan[i].value = NULL; 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci } 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci/* Return true if the output matches DEFAULT_VAL and has been eliminated. */ 69bf215546Sopenharmony_cistatic bool ac_eliminate_const_output(struct ac_out_info *out, 70bf215546Sopenharmony_ci gl_varying_slot semantic, 71bf215546Sopenharmony_ci uint8_t *param_export_index) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci if (!(out->types & 32)) 74bf215546Sopenharmony_ci return false; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci bool is_zero[4] = {0}, is_one[4] = {0}; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 79bf215546Sopenharmony_ci /* NULL means undef. */ 80bf215546Sopenharmony_ci if (!out->chan[i].value) { 81bf215546Sopenharmony_ci is_zero[i] = true; 82bf215546Sopenharmony_ci is_one[i] = true; 83bf215546Sopenharmony_ci } else if (out->chan[i].value->type == nir_instr_type_load_const) { 84bf215546Sopenharmony_ci if (nir_instr_as_load_const(out->chan[i].value)->value[0].f32 == 0) 85bf215546Sopenharmony_ci is_zero[i] = true; 86bf215546Sopenharmony_ci else if (nir_instr_as_load_const(out->chan[i].value)->value[0].f32 == 1) 87bf215546Sopenharmony_ci is_one[i] = true; 88bf215546Sopenharmony_ci else 89bf215546Sopenharmony_ci return false; /* other constant */ 90bf215546Sopenharmony_ci } else 91bf215546Sopenharmony_ci return false; 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci /* Only certain combinations of 0 and 1 are supported. */ 95bf215546Sopenharmony_ci unsigned default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci if (is_zero[0] && is_zero[1] && is_zero[2]) { 98bf215546Sopenharmony_ci if (is_zero[3]) 99bf215546Sopenharmony_ci default_val = AC_EXP_PARAM_DEFAULT_VAL_0000; 100bf215546Sopenharmony_ci else if (is_one[3]) 101bf215546Sopenharmony_ci default_val = AC_EXP_PARAM_DEFAULT_VAL_0001; 102bf215546Sopenharmony_ci else 103bf215546Sopenharmony_ci return false; 104bf215546Sopenharmony_ci } else if (is_one[0] && is_one[1] && is_one[2]) { 105bf215546Sopenharmony_ci if (is_zero[3]) 106bf215546Sopenharmony_ci default_val = AC_EXP_PARAM_DEFAULT_VAL_1110; 107bf215546Sopenharmony_ci else if (is_one[3]) 108bf215546Sopenharmony_ci default_val = AC_EXP_PARAM_DEFAULT_VAL_1111; 109bf215546Sopenharmony_ci else 110bf215546Sopenharmony_ci return false; 111bf215546Sopenharmony_ci } else { 112bf215546Sopenharmony_ci return false; 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci /* Change OFFSET to DEFAULT_VAL. */ 116bf215546Sopenharmony_ci param_export_index[semantic] = default_val; 117bf215546Sopenharmony_ci out->constant = true; 118bf215546Sopenharmony_ci ac_remove_varying(out); 119bf215546Sopenharmony_ci return true; 120bf215546Sopenharmony_ci} 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_cistatic bool ac_eliminate_duplicated_output(struct ac_out_info *outputs, 123bf215546Sopenharmony_ci BITSET_DECLARE(outputs_optimized, NUM_TOTAL_VARYING_SLOTS), 124bf215546Sopenharmony_ci gl_varying_slot current, struct nir_builder *b, 125bf215546Sopenharmony_ci int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS]) 126bf215546Sopenharmony_ci{ 127bf215546Sopenharmony_ci struct ac_out_info *cur = &outputs[current]; 128bf215546Sopenharmony_ci unsigned p, copy_back_channels = 0; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci /* Check all outputs before current. */ 131bf215546Sopenharmony_ci BITSET_FOREACH_SET(p, outputs_optimized, current) { 132bf215546Sopenharmony_ci struct ac_out_info *prev = &outputs[p]; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci /* Only compare with real outputs. */ 135bf215546Sopenharmony_ci if (prev->constant || prev->duplicated) 136bf215546Sopenharmony_ci continue; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci /* The types must match (only 16-bit and 32-bit types are allowed). */ 139bf215546Sopenharmony_ci if ((prev->types & 16) != (cur->types & 16)) 140bf215546Sopenharmony_ci continue; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci bool different = false; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* Iterate over all channels, including 16-bit channels in chan_hi. */ 145bf215546Sopenharmony_ci for (unsigned j = 0; j < 8; j++) { 146bf215546Sopenharmony_ci nir_instr *prev_chan = prev->chan[j].value; 147bf215546Sopenharmony_ci nir_instr *cur_chan = cur->chan[j].value; 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci /* Treat undef as a match. */ 150bf215546Sopenharmony_ci if (!cur_chan) 151bf215546Sopenharmony_ci continue; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci /* If prev is undef but cur isn't, we can merge the outputs 154bf215546Sopenharmony_ci * and consider the output duplicated. 155bf215546Sopenharmony_ci */ 156bf215546Sopenharmony_ci if (!prev_chan) { 157bf215546Sopenharmony_ci copy_back_channels |= 1 << j; 158bf215546Sopenharmony_ci continue; 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci /* Test whether the values are different. */ 162bf215546Sopenharmony_ci if (prev_chan != cur_chan && 163bf215546Sopenharmony_ci (prev_chan->type != nir_instr_type_load_const || 164bf215546Sopenharmony_ci cur_chan->type != nir_instr_type_load_const || 165bf215546Sopenharmony_ci nir_instr_as_load_const(prev_chan)->value[0].u32 != 166bf215546Sopenharmony_ci nir_instr_as_load_const(cur_chan)->value[0].u32)) { 167bf215546Sopenharmony_ci different = true; 168bf215546Sopenharmony_ci break; 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci if (!different) 172bf215546Sopenharmony_ci break; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci copy_back_channels = 0; 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci if (p == current) 177bf215546Sopenharmony_ci return false; 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci /* An equal output already exists. Make FS use the existing one instead. 180bf215546Sopenharmony_ci * This effectively disables the current output and the param export shouldn't 181bf215546Sopenharmony_ci * be generated. 182bf215546Sopenharmony_ci */ 183bf215546Sopenharmony_ci cur->duplicated = true; 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci /* p is gl_varying_slot in addition to being an index into outputs. */ 186bf215546Sopenharmony_ci slot_remap[current] = p; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci /* If the matching preceding output has undef where the current one has a proper value, 189bf215546Sopenharmony_ci * move the value to the preceding output. 190bf215546Sopenharmony_ci */ 191bf215546Sopenharmony_ci struct ac_out_info *prev = &outputs[p]; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci while (copy_back_channels) { 194bf215546Sopenharmony_ci unsigned i = u_bit_scan(©_back_channels); 195bf215546Sopenharmony_ci struct ac_chan_info *prev_chan = &prev->chan[i]; 196bf215546Sopenharmony_ci struct ac_chan_info *cur_chan = &cur->chan[i]; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci b->cursor = nir_after_instr(&cur_chan->store_intr->instr); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /* The store intrinsic doesn't exist for this channel. Create a new one. */ 201bf215546Sopenharmony_ci nir_alu_type src_type = nir_intrinsic_src_type(cur_chan->store_intr); 202bf215546Sopenharmony_ci struct nir_io_semantics sem = nir_intrinsic_io_semantics(cur_chan->store_intr); 203bf215546Sopenharmony_ci struct nir_io_xfb xfb = nir_intrinsic_io_xfb(cur_chan->store_intr); 204bf215546Sopenharmony_ci struct nir_io_xfb xfb2 = nir_intrinsic_io_xfb2(cur_chan->store_intr); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci /* p is gl_varying_slot in addition to being an index into outputs. */ 207bf215546Sopenharmony_ci sem.location = p; 208bf215546Sopenharmony_ci assert(sem.high_16bits == i / 4); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* If it's a sysval output (such as CLIPDIST), we move the varying portion but keep 211bf215546Sopenharmony_ci * the system value output. This is just the varying portion. 212bf215546Sopenharmony_ci */ 213bf215546Sopenharmony_ci sem.no_sysval_output = 1; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci /* Write just one component. */ 216bf215546Sopenharmony_ci prev_chan->store_intr = nir_store_output(b, nir_instr_ssa_def(cur_chan->value), 217bf215546Sopenharmony_ci nir_imm_int(b, 0), 218bf215546Sopenharmony_ci .base = prev->base, 219bf215546Sopenharmony_ci .component = i % 4, 220bf215546Sopenharmony_ci .io_semantics = sem, 221bf215546Sopenharmony_ci .src_type = src_type, 222bf215546Sopenharmony_ci .write_mask = 0x1, 223bf215546Sopenharmony_ci .io_xfb = xfb, 224bf215546Sopenharmony_ci .io_xfb2 = xfb2); 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci /* Update the undef channels in the output info. */ 227bf215546Sopenharmony_ci assert(!prev_chan->value); 228bf215546Sopenharmony_ci prev_chan->value = cur_chan->value; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci /* Remove transform feedback info from the current instruction because 231bf215546Sopenharmony_ci * we moved it too. The instruction might not be removed if it's a system 232bf215546Sopenharmony_ci * value output. 233bf215546Sopenharmony_ci */ 234bf215546Sopenharmony_ci static struct nir_io_xfb zero_xfb; 235bf215546Sopenharmony_ci nir_intrinsic_set_io_xfb(cur->chan[i].store_intr, zero_xfb); 236bf215546Sopenharmony_ci nir_intrinsic_set_io_xfb2(cur->chan[i].store_intr, zero_xfb); 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci ac_remove_varying(cur); 240bf215546Sopenharmony_ci return true; 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cibool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed, 244bf215546Sopenharmony_ci int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS], 245bf215546Sopenharmony_ci uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 248bf215546Sopenharmony_ci assert(impl); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (nir->info.stage != MESA_SHADER_VERTEX && 251bf215546Sopenharmony_ci nir->info.stage != MESA_SHADER_TESS_EVAL) { 252bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 253bf215546Sopenharmony_ci return false; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci struct ac_out_info outputs[NUM_TOTAL_VARYING_SLOTS] = { 0 }; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci BITSET_DECLARE(outputs_optimized, NUM_TOTAL_VARYING_SLOTS); 259bf215546Sopenharmony_ci BITSET_ZERO(outputs_optimized); 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci /* Gather outputs. */ 262bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 263bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 264bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 265bf215546Sopenharmony_ci continue; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 268bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_store_output) 269bf215546Sopenharmony_ci continue; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci /* Only process varyings that appear as param exports. */ 274bf215546Sopenharmony_ci if (!nir_slot_is_varying(sem.location) || sem.no_varying) 275bf215546Sopenharmony_ci continue; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci /* We can't optimize texture coordinates if sprite_coord_enable can override them. */ 278bf215546Sopenharmony_ci if (sem.location >= VARYING_SLOT_TEX0 && sem.location <= VARYING_SLOT_TEX7 && 279bf215546Sopenharmony_ci !sprite_tex_disallowed) 280bf215546Sopenharmony_ci continue; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci BITSET_SET(outputs_optimized, sem.location); 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci /* No indirect indexing allowed. */ 285bf215546Sopenharmony_ci ASSERTED nir_src offset = *nir_get_io_offset_src(intr); 286bf215546Sopenharmony_ci assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0); 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci /* nir_lower_io_to_scalar is required before this */ 289bf215546Sopenharmony_ci assert(intr->src[0].ssa->num_components == 1); 290bf215546Sopenharmony_ci /* No intrinsic should store undef. */ 291bf215546Sopenharmony_ci assert(intr->src[0].ssa->parent_instr->type != nir_instr_type_ssa_undef); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci /* Gather the output. */ 294bf215546Sopenharmony_ci struct ac_out_info *out_info = &outputs[sem.location]; 295bf215546Sopenharmony_ci if (!out_info->types) 296bf215546Sopenharmony_ci out_info->base = nir_intrinsic_base(intr); 297bf215546Sopenharmony_ci else 298bf215546Sopenharmony_ci assert(out_info->base == nir_intrinsic_base(intr)); 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci out_info->types |= nir_intrinsic_src_type(intr); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci unsigned chan = sem.high_16bits * 4 + nir_intrinsic_component(intr); 303bf215546Sopenharmony_ci out_info->chan[chan].store_intr = intr; 304bf215546Sopenharmony_ci out_info->chan[chan].value = intr->src[0].ssa->parent_instr; 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci } 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci unsigned i; 309bf215546Sopenharmony_ci bool progress = false; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci struct nir_builder b; 312bf215546Sopenharmony_ci nir_builder_init(&b, impl); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci /* Optimize outputs. */ 315bf215546Sopenharmony_ci BITSET_FOREACH_SET(i, outputs_optimized, NUM_TOTAL_VARYING_SLOTS) { 316bf215546Sopenharmony_ci progress |= 317bf215546Sopenharmony_ci ac_eliminate_const_output(&outputs[i], i, param_export_index) || 318bf215546Sopenharmony_ci ac_eliminate_duplicated_output(outputs, outputs_optimized, i, &b, slot_remap); 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci if (progress) { 322bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_dominance | 323bf215546Sopenharmony_ci nir_metadata_block_index); 324bf215546Sopenharmony_ci } else { 325bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci return progress; 328bf215546Sopenharmony_ci} 329