1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2018 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir_opcodes.h" 25bf215546Sopenharmony_ci#include "zink_context.h" 26bf215546Sopenharmony_ci#include "zink_compiler.h" 27bf215546Sopenharmony_ci#include "zink_program.h" 28bf215546Sopenharmony_ci#include "zink_screen.h" 29bf215546Sopenharmony_ci#include "nir_to_spirv/nir_to_spirv.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "pipe/p_state.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include "nir.h" 34bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h" 37bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h" 38bf215546Sopenharmony_ci#include "tgsi/tgsi_from_mesa.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci#include "util/u_memory.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#include "compiler/spirv/nir_spirv.h" 43bf215546Sopenharmony_ci#include "vulkan/util/vk_util.h" 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_cibool 46bf215546Sopenharmony_cizink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask); 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic void 49bf215546Sopenharmony_cicreate_vs_pushconst(nir_shader *nir) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci nir_variable *vs_pushconst; 52bf215546Sopenharmony_ci /* create compatible layout for the ntv push constant loader */ 53bf215546Sopenharmony_ci struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2); 54bf215546Sopenharmony_ci fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); 55bf215546Sopenharmony_ci fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed"); 56bf215546Sopenharmony_ci fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); 57bf215546Sopenharmony_ci fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0); 58bf215546Sopenharmony_ci fields[1].name = ralloc_asprintf(nir, "draw_id"); 59bf215546Sopenharmony_ci fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id); 60bf215546Sopenharmony_ci vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, 61bf215546Sopenharmony_ci glsl_struct_type(fields, 2, "struct", false), "vs_pushconst"); 62bf215546Sopenharmony_ci vs_pushconst->data.location = INT_MAX; //doesn't really matter 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistatic void 66bf215546Sopenharmony_cicreate_cs_pushconst(nir_shader *nir) 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci nir_variable *cs_pushconst; 69bf215546Sopenharmony_ci /* create compatible layout for the ntv push constant loader */ 70bf215546Sopenharmony_ci struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field)); 71bf215546Sopenharmony_ci fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); 72bf215546Sopenharmony_ci fields[0].name = ralloc_asprintf(nir, "work_dim"); 73bf215546Sopenharmony_ci fields[0].offset = 0; 74bf215546Sopenharmony_ci cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, 75bf215546Sopenharmony_ci glsl_struct_type(fields, 1, "struct", false), "cs_pushconst"); 76bf215546Sopenharmony_ci cs_pushconst->data.location = INT_MAX; //doesn't really matter 77bf215546Sopenharmony_ci} 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistatic bool 80bf215546Sopenharmony_cireads_work_dim(nir_shader *shader) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM); 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic bool 86bf215546Sopenharmony_cilower_work_dim_instr(nir_builder *b, nir_instr *in, void *data) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci if (in->type != nir_instr_type_intrinsic) 89bf215546Sopenharmony_ci return false; 90bf215546Sopenharmony_ci nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 91bf215546Sopenharmony_ci if (instr->intrinsic != nir_intrinsic_load_work_dim) 92bf215546Sopenharmony_ci return false; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci if (instr->intrinsic == nir_intrinsic_load_work_dim) { 95bf215546Sopenharmony_ci b->cursor = nir_after_instr(&instr->instr); 96bf215546Sopenharmony_ci nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); 97bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 98bf215546Sopenharmony_ci nir_intrinsic_set_range(load, 3 * sizeof(uint32_t)); 99bf215546Sopenharmony_ci load->num_components = 1; 100bf215546Sopenharmony_ci nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim"); 101bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci return true; 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cistatic bool 110bf215546Sopenharmony_cilower_work_dim(nir_shader *shader) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci if (shader->info.stage != MESA_SHADER_KERNEL) 113bf215546Sopenharmony_ci return false; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (!reads_work_dim(shader)) 116bf215546Sopenharmony_ci return false; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL); 119bf215546Sopenharmony_ci} 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistatic bool 122bf215546Sopenharmony_cilower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 125bf215546Sopenharmony_ci return false; 126bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 127bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_deref) 128bf215546Sopenharmony_ci return false; 129bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr)); 130bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_in) 131bf215546Sopenharmony_ci return false; 132bf215546Sopenharmony_ci if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3) 133bf215546Sopenharmony_ci return false; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci /* create second variable for the split */ 136bf215546Sopenharmony_ci nir_variable *var2 = nir_variable_clone(var, b->shader); 137bf215546Sopenharmony_ci /* split new variable into second slot */ 138bf215546Sopenharmony_ci var2->data.driver_location++; 139bf215546Sopenharmony_ci nir_shader_add_variable(b->shader, var2); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci unsigned total_num_components = glsl_get_vector_elements(var->type); 142bf215546Sopenharmony_ci /* new variable is the second half of the dvec */ 143bf215546Sopenharmony_ci var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2); 144bf215546Sopenharmony_ci /* clamp original variable to a dvec2 */ 145bf215546Sopenharmony_ci var->type = glsl_vector_type(glsl_get_base_type(var->type), 2); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci /* this is the first load instruction for the first half of the dvec3/4 components */ 150bf215546Sopenharmony_ci nir_ssa_def *load = nir_load_var(b, var); 151bf215546Sopenharmony_ci /* this is the second load instruction for the second half of the dvec3/4 components */ 152bf215546Sopenharmony_ci nir_ssa_def *load2 = nir_load_var(b, var2); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci nir_ssa_def *def[4]; 155bf215546Sopenharmony_ci /* create a new dvec3/4 comprised of all the loaded components from both variables */ 156bf215546Sopenharmony_ci def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0)); 157bf215546Sopenharmony_ci def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1)); 158bf215546Sopenharmony_ci def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0)); 159bf215546Sopenharmony_ci if (total_num_components == 4) 160bf215546Sopenharmony_ci def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1)); 161bf215546Sopenharmony_ci nir_ssa_def *new_vec = nir_vec(b, def, total_num_components); 162bf215546Sopenharmony_ci /* use the assembled dvec3/4 for all other uses of the load */ 163bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec, 164bf215546Sopenharmony_ci new_vec->parent_instr); 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci /* remove the original instr and its deref chain */ 167bf215546Sopenharmony_ci nir_instr *parent = intr->src[0].ssa->parent_instr; 168bf215546Sopenharmony_ci nir_instr_remove(instr); 169bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent)); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci return true; 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci/* mesa/gallium always provides UINT versions of 64bit formats: 175bf215546Sopenharmony_ci * - rewrite loads as 32bit vec loads 176bf215546Sopenharmony_ci * - cast back to 64bit 177bf215546Sopenharmony_ci */ 178bf215546Sopenharmony_cistatic bool 179bf215546Sopenharmony_cilower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data) 180bf215546Sopenharmony_ci{ 181bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 182bf215546Sopenharmony_ci return false; 183bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 184bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_deref) 185bf215546Sopenharmony_ci return false; 186bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr)); 187bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_in) 188bf215546Sopenharmony_ci return false; 189bf215546Sopenharmony_ci if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER) 190bf215546Sopenharmony_ci return false; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci unsigned num_components = glsl_get_vector_elements(var->type); 193bf215546Sopenharmony_ci enum glsl_base_type base_type; 194bf215546Sopenharmony_ci switch (glsl_get_base_type(var->type)) { 195bf215546Sopenharmony_ci case GLSL_TYPE_UINT64: 196bf215546Sopenharmony_ci base_type = GLSL_TYPE_UINT; 197bf215546Sopenharmony_ci break; 198bf215546Sopenharmony_ci case GLSL_TYPE_INT64: 199bf215546Sopenharmony_ci base_type = GLSL_TYPE_INT; 200bf215546Sopenharmony_ci break; 201bf215546Sopenharmony_ci case GLSL_TYPE_DOUBLE: 202bf215546Sopenharmony_ci base_type = GLSL_TYPE_FLOAT; 203bf215546Sopenharmony_ci break; 204bf215546Sopenharmony_ci default: 205bf215546Sopenharmony_ci unreachable("unknown 64-bit vertex attribute format!"); 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci var->type = glsl_vector_type(base_type, num_components * 2); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci nir_ssa_def *load = nir_load_var(b, var); 212bf215546Sopenharmony_ci nir_ssa_def *casted[2]; 213bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) 214bf215546Sopenharmony_ci casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2))); 215bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components)); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci /* remove the original instr and its deref chain */ 218bf215546Sopenharmony_ci nir_instr *parent = intr->src[0].ssa->parent_instr; 219bf215546Sopenharmony_ci nir_instr_remove(instr); 220bf215546Sopenharmony_ci nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent)); 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci return true; 223bf215546Sopenharmony_ci} 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci/* "64-bit three- and four-component vectors consume two consecutive locations." 226bf215546Sopenharmony_ci * - 14.1.4. Location Assignment 227bf215546Sopenharmony_ci * 228bf215546Sopenharmony_ci * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which 229bf215546Sopenharmony_ci * are assigned to consecutive locations, loaded separately, and then assembled back into a 230bf215546Sopenharmony_ci * composite value that's used in place of the original loaded ssa src 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_cistatic bool 233bf215546Sopenharmony_cilower_64bit_vertex_attribs(nir_shader *shader) 234bf215546Sopenharmony_ci{ 235bf215546Sopenharmony_ci if (shader->info.stage != MESA_SHADER_VERTEX) 236bf215546Sopenharmony_ci return false; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL); 239bf215546Sopenharmony_ci progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL); 240bf215546Sopenharmony_ci return progress; 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cistatic bool 244bf215546Sopenharmony_cilower_basevertex_instr(nir_builder *b, nir_instr *in, void *data) 245bf215546Sopenharmony_ci{ 246bf215546Sopenharmony_ci if (in->type != nir_instr_type_intrinsic) 247bf215546Sopenharmony_ci return false; 248bf215546Sopenharmony_ci nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 249bf215546Sopenharmony_ci if (instr->intrinsic != nir_intrinsic_load_base_vertex) 250bf215546Sopenharmony_ci return false; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci b->cursor = nir_after_instr(&instr->instr); 253bf215546Sopenharmony_ci nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); 254bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 255bf215546Sopenharmony_ci nir_intrinsic_set_range(load, 4); 256bf215546Sopenharmony_ci load->num_components = 1; 257bf215546Sopenharmony_ci nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed"); 258bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel, 261bf215546Sopenharmony_ci nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL), 262bf215546Sopenharmony_ci &instr->dest.ssa, 263bf215546Sopenharmony_ci nir_imm_int(b, 0), 264bf215546Sopenharmony_ci NULL); 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite, 267bf215546Sopenharmony_ci composite->parent_instr); 268bf215546Sopenharmony_ci return true; 269bf215546Sopenharmony_ci} 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_cistatic bool 272bf215546Sopenharmony_cilower_basevertex(nir_shader *shader) 273bf215546Sopenharmony_ci{ 274bf215546Sopenharmony_ci if (shader->info.stage != MESA_SHADER_VERTEX) 275bf215546Sopenharmony_ci return false; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) 278bf215546Sopenharmony_ci return false; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL); 281bf215546Sopenharmony_ci} 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_cistatic bool 285bf215546Sopenharmony_cilower_drawid_instr(nir_builder *b, nir_instr *in, void *data) 286bf215546Sopenharmony_ci{ 287bf215546Sopenharmony_ci if (in->type != nir_instr_type_intrinsic) 288bf215546Sopenharmony_ci return false; 289bf215546Sopenharmony_ci nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 290bf215546Sopenharmony_ci if (instr->intrinsic != nir_intrinsic_load_draw_id) 291bf215546Sopenharmony_ci return false; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci b->cursor = nir_before_instr(&instr->instr); 294bf215546Sopenharmony_ci nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); 295bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1)); 296bf215546Sopenharmony_ci nir_intrinsic_set_range(load, 4); 297bf215546Sopenharmony_ci load->num_components = 1; 298bf215546Sopenharmony_ci nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id"); 299bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci return true; 304bf215546Sopenharmony_ci} 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_cistatic bool 307bf215546Sopenharmony_cilower_drawid(nir_shader *shader) 308bf215546Sopenharmony_ci{ 309bf215546Sopenharmony_ci if (shader->info.stage != MESA_SHADER_VERTEX) 310bf215546Sopenharmony_ci return false; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) 313bf215546Sopenharmony_ci return false; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL); 316bf215546Sopenharmony_ci} 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_cistatic bool 319bf215546Sopenharmony_cilower_dual_blend(nir_shader *shader) 320bf215546Sopenharmony_ci{ 321bf215546Sopenharmony_ci bool progress = false; 322bf215546Sopenharmony_ci nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1); 323bf215546Sopenharmony_ci if (var) { 324bf215546Sopenharmony_ci var->data.location = FRAG_RESULT_DATA0; 325bf215546Sopenharmony_ci var->data.index = 1; 326bf215546Sopenharmony_ci progress = true; 327bf215546Sopenharmony_ci } 328bf215546Sopenharmony_ci nir_shader_preserve_all_metadata(shader); 329bf215546Sopenharmony_ci return progress; 330bf215546Sopenharmony_ci} 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_civoid 333bf215546Sopenharmony_cizink_screen_init_compiler(struct zink_screen *screen) 334bf215546Sopenharmony_ci{ 335bf215546Sopenharmony_ci static const struct nir_shader_compiler_options 336bf215546Sopenharmony_ci default_options = { 337bf215546Sopenharmony_ci .lower_ffma16 = true, 338bf215546Sopenharmony_ci .lower_ffma32 = true, 339bf215546Sopenharmony_ci .lower_ffma64 = true, 340bf215546Sopenharmony_ci .lower_scmp = true, 341bf215546Sopenharmony_ci .lower_fdph = true, 342bf215546Sopenharmony_ci .lower_flrp32 = true, 343bf215546Sopenharmony_ci .lower_fpow = true, 344bf215546Sopenharmony_ci .lower_fsat = true, 345bf215546Sopenharmony_ci .lower_extract_byte = true, 346bf215546Sopenharmony_ci .lower_extract_word = true, 347bf215546Sopenharmony_ci .lower_insert_byte = true, 348bf215546Sopenharmony_ci .lower_insert_word = true, 349bf215546Sopenharmony_ci .lower_mul_high = true, 350bf215546Sopenharmony_ci .lower_rotate = true, 351bf215546Sopenharmony_ci .lower_uadd_carry = true, 352bf215546Sopenharmony_ci .lower_uadd_sat = true, 353bf215546Sopenharmony_ci .lower_usub_sat = true, 354bf215546Sopenharmony_ci .lower_vector_cmp = true, 355bf215546Sopenharmony_ci .lower_int64_options = 0, 356bf215546Sopenharmony_ci .lower_doubles_options = 0, 357bf215546Sopenharmony_ci .lower_uniforms_to_ubo = true, 358bf215546Sopenharmony_ci .has_fsub = true, 359bf215546Sopenharmony_ci .has_isub = true, 360bf215546Sopenharmony_ci .has_txs = true, 361bf215546Sopenharmony_ci .lower_mul_2x32_64 = true, 362bf215546Sopenharmony_ci .support_16bit_alu = true, /* not quite what it sounds like */ 363bf215546Sopenharmony_ci }; 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci screen->nir_options = default_options; 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci if (!screen->info.feats.features.shaderInt64) 368bf215546Sopenharmony_ci screen->nir_options.lower_int64_options = ~0; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci if (!screen->info.feats.features.shaderFloat64) { 371bf215546Sopenharmony_ci screen->nir_options.lower_doubles_options = ~0; 372bf215546Sopenharmony_ci screen->nir_options.lower_flrp64 = true; 373bf215546Sopenharmony_ci screen->nir_options.lower_ffma64 = true; 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci /* 377bf215546Sopenharmony_ci The OpFRem and OpFMod instructions use cheap approximations of remainder, 378bf215546Sopenharmony_ci and the error can be large due to the discontinuity in trunc() and floor(). 379bf215546Sopenharmony_ci This can produce mathematically unexpected results in some cases, such as 380bf215546Sopenharmony_ci FMod(x,x) computing x rather than 0, and can also cause the result to have 381bf215546Sopenharmony_ci a different sign than the infinitely precise result. 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci -Table 84. Precision of core SPIR-V Instructions 384bf215546Sopenharmony_ci * for drivers that are known to have imprecise fmod for doubles, lower dmod 385bf215546Sopenharmony_ci */ 386bf215546Sopenharmony_ci if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV || 387bf215546Sopenharmony_ci screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE || 388bf215546Sopenharmony_ci screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY) 389bf215546Sopenharmony_ci screen->nir_options.lower_doubles_options = nir_lower_dmod; 390bf215546Sopenharmony_ci} 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ciconst void * 393bf215546Sopenharmony_cizink_get_compiler_options(struct pipe_screen *pscreen, 394bf215546Sopenharmony_ci enum pipe_shader_ir ir, 395bf215546Sopenharmony_ci enum pipe_shader_type shader) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci assert(ir == PIPE_SHADER_IR_NIR); 398bf215546Sopenharmony_ci return &zink_screen(pscreen)->nir_options; 399bf215546Sopenharmony_ci} 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_cistruct nir_shader * 402bf215546Sopenharmony_cizink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci if (zink_debug & ZINK_DEBUG_TGSI) { 405bf215546Sopenharmony_ci fprintf(stderr, "TGSI shader:\n---8<---\n"); 406bf215546Sopenharmony_ci tgsi_dump_to_file(tokens, 0, stderr); 407bf215546Sopenharmony_ci fprintf(stderr, "---8<---\n\n"); 408bf215546Sopenharmony_ci } 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci return tgsi_to_nir(tokens, screen, false); 411bf215546Sopenharmony_ci} 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_cistatic bool 415bf215546Sopenharmony_cidest_is_64bit(nir_dest *dest, void *state) 416bf215546Sopenharmony_ci{ 417bf215546Sopenharmony_ci bool *lower = (bool *)state; 418bf215546Sopenharmony_ci if (dest && (nir_dest_bit_size(*dest) == 64)) { 419bf215546Sopenharmony_ci *lower = true; 420bf215546Sopenharmony_ci return false; 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci return true; 423bf215546Sopenharmony_ci} 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_cistatic bool 426bf215546Sopenharmony_cisrc_is_64bit(nir_src *src, void *state) 427bf215546Sopenharmony_ci{ 428bf215546Sopenharmony_ci bool *lower = (bool *)state; 429bf215546Sopenharmony_ci if (src && (nir_src_bit_size(*src) == 64)) { 430bf215546Sopenharmony_ci *lower = true; 431bf215546Sopenharmony_ci return false; 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci return true; 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_cistatic bool 437bf215546Sopenharmony_cifilter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data) 438bf215546Sopenharmony_ci{ 439bf215546Sopenharmony_ci bool lower = false; 440bf215546Sopenharmony_ci /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_* 441bf215546Sopenharmony_ci * doesn't have const variants, so do the ugly const_cast here. */ 442bf215546Sopenharmony_ci nir_instr *instr = (nir_instr *)const_instr; 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci nir_foreach_dest(instr, dest_is_64bit, &lower); 445bf215546Sopenharmony_ci if (lower) 446bf215546Sopenharmony_ci return true; 447bf215546Sopenharmony_ci nir_foreach_src(instr, src_is_64bit, &lower); 448bf215546Sopenharmony_ci return lower; 449bf215546Sopenharmony_ci} 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_cistatic bool 452bf215546Sopenharmony_cifilter_pack_instr(const nir_instr *const_instr, UNUSED const void *data) 453bf215546Sopenharmony_ci{ 454bf215546Sopenharmony_ci nir_instr *instr = (nir_instr *)const_instr; 455bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 456bf215546Sopenharmony_ci switch (alu->op) { 457bf215546Sopenharmony_ci case nir_op_pack_64_2x32_split: 458bf215546Sopenharmony_ci case nir_op_pack_32_2x16_split: 459bf215546Sopenharmony_ci case nir_op_unpack_32_2x16_split_x: 460bf215546Sopenharmony_ci case nir_op_unpack_32_2x16_split_y: 461bf215546Sopenharmony_ci case nir_op_unpack_64_2x32_split_x: 462bf215546Sopenharmony_ci case nir_op_unpack_64_2x32_split_y: 463bf215546Sopenharmony_ci return true; 464bf215546Sopenharmony_ci default: 465bf215546Sopenharmony_ci break; 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci return false; 468bf215546Sopenharmony_ci} 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_cistruct bo_vars { 472bf215546Sopenharmony_ci nir_variable *uniforms[5]; 473bf215546Sopenharmony_ci nir_variable *ubo[5]; 474bf215546Sopenharmony_ci nir_variable *ssbo[5]; 475bf215546Sopenharmony_ci uint32_t first_ubo; 476bf215546Sopenharmony_ci uint32_t first_ssbo; 477bf215546Sopenharmony_ci}; 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_cistatic struct bo_vars 480bf215546Sopenharmony_ciget_bo_vars(struct zink_shader *zs, nir_shader *shader) 481bf215546Sopenharmony_ci{ 482bf215546Sopenharmony_ci struct bo_vars bo; 483bf215546Sopenharmony_ci memset(&bo, 0, sizeof(bo)); 484bf215546Sopenharmony_ci if (zs->ubos_used) 485bf215546Sopenharmony_ci bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2; 486bf215546Sopenharmony_ci assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS); 487bf215546Sopenharmony_ci if (zs->ssbos_used) 488bf215546Sopenharmony_ci bo.first_ssbo = ffs(zs->ssbos_used) - 1; 489bf215546Sopenharmony_ci assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS); 490bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { 491bf215546Sopenharmony_ci unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1; 492bf215546Sopenharmony_ci if (var->data.mode == nir_var_mem_ssbo) { 493bf215546Sopenharmony_ci assert(!bo.ssbo[idx]); 494bf215546Sopenharmony_ci bo.ssbo[idx] = var; 495bf215546Sopenharmony_ci } else { 496bf215546Sopenharmony_ci if (var->data.driver_location) { 497bf215546Sopenharmony_ci assert(!bo.ubo[idx]); 498bf215546Sopenharmony_ci bo.ubo[idx] = var; 499bf215546Sopenharmony_ci } else { 500bf215546Sopenharmony_ci assert(!bo.uniforms[idx]); 501bf215546Sopenharmony_ci bo.uniforms[idx] = var; 502bf215546Sopenharmony_ci } 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci return bo; 506bf215546Sopenharmony_ci} 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_cistatic bool 509bf215546Sopenharmony_cibound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) 510bf215546Sopenharmony_ci{ 511bf215546Sopenharmony_ci struct bo_vars *bo = data; 512bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 513bf215546Sopenharmony_ci return false; 514bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 515bf215546Sopenharmony_ci nir_variable *var = NULL; 516bf215546Sopenharmony_ci nir_ssa_def *offset = NULL; 517bf215546Sopenharmony_ci bool is_load = true; 518bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci switch (intr->intrinsic) { 521bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 522bf215546Sopenharmony_ci var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4]; 523bf215546Sopenharmony_ci offset = intr->src[2].ssa; 524bf215546Sopenharmony_ci is_load = false; 525bf215546Sopenharmony_ci break; 526bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 527bf215546Sopenharmony_ci var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4]; 528bf215546Sopenharmony_ci offset = intr->src[1].ssa; 529bf215546Sopenharmony_ci break; 530bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 531bf215546Sopenharmony_ci if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0) 532bf215546Sopenharmony_ci var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4]; 533bf215546Sopenharmony_ci else 534bf215546Sopenharmony_ci var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4]; 535bf215546Sopenharmony_ci offset = intr->src[1].ssa; 536bf215546Sopenharmony_ci break; 537bf215546Sopenharmony_ci default: 538bf215546Sopenharmony_ci return false; 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci nir_src offset_src = nir_src_for_ssa(offset); 541bf215546Sopenharmony_ci if (!nir_src_is_const(offset_src)) 542bf215546Sopenharmony_ci return false; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32; 545bf215546Sopenharmony_ci const struct glsl_type *strct_type = glsl_get_array_element(var->type); 546bf215546Sopenharmony_ci unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0)); 547bf215546Sopenharmony_ci bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0; 548bf215546Sopenharmony_ci if (has_unsized || offset_bytes + intr->num_components - 1 < size) 549bf215546Sopenharmony_ci return false; 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci unsigned rewrites = 0; 552bf215546Sopenharmony_ci nir_ssa_def *result[2]; 553bf215546Sopenharmony_ci for (unsigned i = 0; i < intr->num_components; i++) { 554bf215546Sopenharmony_ci if (offset_bytes + i >= size) { 555bf215546Sopenharmony_ci rewrites++; 556bf215546Sopenharmony_ci if (is_load) 557bf215546Sopenharmony_ci result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest)); 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci } 560bf215546Sopenharmony_ci assert(rewrites == intr->num_components); 561bf215546Sopenharmony_ci if (is_load) { 562bf215546Sopenharmony_ci nir_ssa_def *load = nir_vec(b, result, intr->num_components); 563bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci nir_instr_remove(instr); 566bf215546Sopenharmony_ci return true; 567bf215546Sopenharmony_ci} 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_cistatic bool 570bf215546Sopenharmony_cibound_bo_access(nir_shader *shader, struct zink_shader *zs) 571bf215546Sopenharmony_ci{ 572bf215546Sopenharmony_ci struct bo_vars bo = get_bo_vars(zs, shader); 573bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo); 574bf215546Sopenharmony_ci} 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_cistatic void 577bf215546Sopenharmony_cioptimize_nir(struct nir_shader *s, struct zink_shader *zs) 578bf215546Sopenharmony_ci{ 579bf215546Sopenharmony_ci bool progress; 580bf215546Sopenharmony_ci do { 581bf215546Sopenharmony_ci progress = false; 582bf215546Sopenharmony_ci if (s->options->lower_int64_options) 583bf215546Sopenharmony_ci NIR_PASS_V(s, nir_lower_int64); 584bf215546Sopenharmony_ci NIR_PASS_V(s, nir_lower_vars_to_ssa); 585bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL); 586bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_copy_prop_vars); 587bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_copy_prop); 588bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_remove_phis); 589bf215546Sopenharmony_ci if (s->options->lower_int64_options) { 590bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_lower_64bit_phis); 591bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL); 592bf215546Sopenharmony_ci } 593bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_dce); 594bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_dead_cf); 595bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); 596bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_cse); 597bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); 598bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_algebraic); 599bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_constant_folding); 600bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_undef); 601bf215546Sopenharmony_ci NIR_PASS(progress, s, zink_nir_lower_b2b); 602bf215546Sopenharmony_ci if (zs) 603bf215546Sopenharmony_ci NIR_PASS(progress, s, bound_bo_access, zs); 604bf215546Sopenharmony_ci } while (progress); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci do { 607bf215546Sopenharmony_ci progress = false; 608bf215546Sopenharmony_ci NIR_PASS(progress, s, nir_opt_algebraic_late); 609bf215546Sopenharmony_ci if (progress) { 610bf215546Sopenharmony_ci NIR_PASS_V(s, nir_copy_prop); 611bf215546Sopenharmony_ci NIR_PASS_V(s, nir_opt_dce); 612bf215546Sopenharmony_ci NIR_PASS_V(s, nir_opt_cse); 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci } while (progress); 615bf215546Sopenharmony_ci} 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci/* - copy the lowered fbfetch variable 618bf215546Sopenharmony_ci * - set the new one up as an input attachment for descriptor 0.6 619bf215546Sopenharmony_ci * - load it as an image 620bf215546Sopenharmony_ci * - overwrite the previous load 621bf215546Sopenharmony_ci */ 622bf215546Sopenharmony_cistatic bool 623bf215546Sopenharmony_cilower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data) 624bf215546Sopenharmony_ci{ 625bf215546Sopenharmony_ci bool ms = data != NULL; 626bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 627bf215546Sopenharmony_ci return false; 628bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 629bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_deref) 630bf215546Sopenharmony_ci return false; 631bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); 632bf215546Sopenharmony_ci if (!var->data.fb_fetch_output) 633bf215546Sopenharmony_ci return false; 634bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 635bf215546Sopenharmony_ci nir_variable *fbfetch = nir_variable_clone(var, b->shader); 636bf215546Sopenharmony_ci /* If Dim is SubpassData, ... Image Format must be Unknown 637bf215546Sopenharmony_ci * - SPIRV OpTypeImage specification 638bf215546Sopenharmony_ci */ 639bf215546Sopenharmony_ci fbfetch->data.image.format = 0; 640bf215546Sopenharmony_ci fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */ 641bf215546Sopenharmony_ci fbfetch->data.mode = nir_var_uniform; 642bf215546Sopenharmony_ci fbfetch->data.binding = ZINK_FBFETCH_BINDING; 643bf215546Sopenharmony_ci fbfetch->data.binding = ZINK_FBFETCH_BINDING; 644bf215546Sopenharmony_ci fbfetch->data.sample = ms; 645bf215546Sopenharmony_ci enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS; 646bf215546Sopenharmony_ci fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 647bf215546Sopenharmony_ci nir_shader_add_variable(b->shader, fbfetch); 648bf215546Sopenharmony_ci nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa; 649bf215546Sopenharmony_ci nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32); 650bf215546Sopenharmony_ci nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0)); 651bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 652bf215546Sopenharmony_ci return true; 653bf215546Sopenharmony_ci} 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_cistatic bool 656bf215546Sopenharmony_cilower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms) 657bf215546Sopenharmony_ci{ 658bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, shader) { 659bf215546Sopenharmony_ci if (var->data.fb_fetch_output) { 660bf215546Sopenharmony_ci *fbfetch = var; 661bf215546Sopenharmony_ci break; 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci } 664bf215546Sopenharmony_ci assert(*fbfetch); 665bf215546Sopenharmony_ci if (!*fbfetch) 666bf215546Sopenharmony_ci return false; 667bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms); 668bf215546Sopenharmony_ci} 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */ 671bf215546Sopenharmony_cistatic bool 672bf215546Sopenharmony_cicheck_psiz(struct nir_shader *s) 673bf215546Sopenharmony_ci{ 674bf215546Sopenharmony_ci bool have_psiz = false; 675bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, s) { 676bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_PSIZ) { 677bf215546Sopenharmony_ci /* genuine PSIZ outputs will have this set */ 678bf215546Sopenharmony_ci have_psiz |= !!var->data.explicit_location; 679bf215546Sopenharmony_ci } 680bf215546Sopenharmony_ci } 681bf215546Sopenharmony_ci return have_psiz; 682bf215546Sopenharmony_ci} 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_cistatic nir_variable * 685bf215546Sopenharmony_cifind_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz) 686bf215546Sopenharmony_ci{ 687bf215546Sopenharmony_ci unsigned found = 0; 688bf215546Sopenharmony_ci if (!location_frac && location != VARYING_SLOT_PSIZ) { 689bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 690bf215546Sopenharmony_ci if (var->data.location == location) 691bf215546Sopenharmony_ci found++; 692bf215546Sopenharmony_ci } 693bf215546Sopenharmony_ci } 694bf215546Sopenharmony_ci if (found) { 695bf215546Sopenharmony_ci /* multiple variables found for this location: find the biggest one */ 696bf215546Sopenharmony_ci nir_variable *out = NULL; 697bf215546Sopenharmony_ci unsigned slots = 0; 698bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 699bf215546Sopenharmony_ci if (var->data.location == location) { 700bf215546Sopenharmony_ci unsigned count_slots = glsl_count_vec4_slots(var->type, false, false); 701bf215546Sopenharmony_ci if (count_slots > slots) { 702bf215546Sopenharmony_ci slots = count_slots; 703bf215546Sopenharmony_ci out = var; 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci } 707bf215546Sopenharmony_ci return out; 708bf215546Sopenharmony_ci } else { 709bf215546Sopenharmony_ci /* only one variable found or this is location_frac */ 710bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 711bf215546Sopenharmony_ci if (var->data.location == location && 712bf215546Sopenharmony_ci (var->data.location_frac == location_frac || 713bf215546Sopenharmony_ci (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) { 714bf215546Sopenharmony_ci if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location) 715bf215546Sopenharmony_ci return var; 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci } 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci return NULL; 720bf215546Sopenharmony_ci} 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_cistatic bool 723bf215546Sopenharmony_ciis_inlined(const bool *inlined, const struct pipe_stream_output *output) 724bf215546Sopenharmony_ci{ 725bf215546Sopenharmony_ci for (unsigned i = 0; i < output->num_components; i++) 726bf215546Sopenharmony_ci if (!inlined[output->start_component + i]) 727bf215546Sopenharmony_ci return false; 728bf215546Sopenharmony_ci return true; 729bf215546Sopenharmony_ci} 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_cistatic void 732bf215546Sopenharmony_ciupdate_psiz_location(nir_shader *nir, nir_variable *psiz) 733bf215546Sopenharmony_ci{ 734bf215546Sopenharmony_ci uint32_t last_output = util_last_bit64(nir->info.outputs_written); 735bf215546Sopenharmony_ci if (last_output < VARYING_SLOT_VAR0) 736bf215546Sopenharmony_ci last_output = VARYING_SLOT_VAR0; 737bf215546Sopenharmony_ci else 738bf215546Sopenharmony_ci last_output++; 739bf215546Sopenharmony_ci /* this should get fixed up by slot remapping */ 740bf215546Sopenharmony_ci psiz->data.location = last_output; 741bf215546Sopenharmony_ci} 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_cistatic const struct glsl_type * 744bf215546Sopenharmony_ciclamp_slot_type(const struct glsl_type *type, unsigned slot) 745bf215546Sopenharmony_ci{ 746bf215546Sopenharmony_ci /* could be dvec/dmat/mat: each member is the same */ 747bf215546Sopenharmony_ci const struct glsl_type *plain = glsl_without_array_or_matrix(type); 748bf215546Sopenharmony_ci /* determine size of each member type */ 749bf215546Sopenharmony_ci unsigned slot_count = glsl_count_vec4_slots(plain, false, false); 750bf215546Sopenharmony_ci /* normalize slot idx to current type's size */ 751bf215546Sopenharmony_ci slot %= slot_count; 752bf215546Sopenharmony_ci unsigned slot_components = glsl_get_components(plain); 753bf215546Sopenharmony_ci if (glsl_base_type_is_64bit(glsl_get_base_type(plain))) 754bf215546Sopenharmony_ci slot_components *= 2; 755bf215546Sopenharmony_ci /* create a vec4 mask of the selected slot's components out of all the components */ 756bf215546Sopenharmony_ci uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4); 757bf215546Sopenharmony_ci /* return a vecN of the selected components */ 758bf215546Sopenharmony_ci slot_components = util_bitcount(mask); 759bf215546Sopenharmony_ci return glsl_vec_type(slot_components); 760bf215546Sopenharmony_ci} 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_cistatic const struct glsl_type * 763bf215546Sopenharmony_ciunroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx) 764bf215546Sopenharmony_ci{ 765bf215546Sopenharmony_ci const struct glsl_type *type = slot_type; 766bf215546Sopenharmony_ci unsigned slot_count = 0; 767bf215546Sopenharmony_ci unsigned cur_slot = 0; 768bf215546Sopenharmony_ci /* iterate over all the members in the struct, stopping once the slot idx is reached */ 769bf215546Sopenharmony_ci for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) { 770bf215546Sopenharmony_ci /* use array type for slot counting but return array member type for unroll */ 771bf215546Sopenharmony_ci const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i); 772bf215546Sopenharmony_ci type = glsl_without_array(arraytype); 773bf215546Sopenharmony_ci slot_count = glsl_count_vec4_slots(arraytype, false, false); 774bf215546Sopenharmony_ci } 775bf215546Sopenharmony_ci *slot_idx -= (cur_slot - slot_count); 776bf215546Sopenharmony_ci if (!glsl_type_is_struct_or_ifc(type)) 777bf215546Sopenharmony_ci /* this is a fully unrolled struct: find the number of vec components to output */ 778bf215546Sopenharmony_ci type = clamp_slot_type(type, *slot_idx); 779bf215546Sopenharmony_ci return type; 780bf215546Sopenharmony_ci} 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_cistatic unsigned 783bf215546Sopenharmony_ciget_slot_components(nir_variable *var, unsigned slot, unsigned so_slot) 784bf215546Sopenharmony_ci{ 785bf215546Sopenharmony_ci assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false)); 786bf215546Sopenharmony_ci const struct glsl_type *orig_type = var->type; 787bf215546Sopenharmony_ci const struct glsl_type *type = glsl_without_array(var->type); 788bf215546Sopenharmony_ci unsigned slot_idx = slot - so_slot; 789bf215546Sopenharmony_ci if (type != orig_type) 790bf215546Sopenharmony_ci slot_idx %= glsl_count_vec4_slots(type, false, false); 791bf215546Sopenharmony_ci /* need to find the vec4 that's being exported by this slot */ 792bf215546Sopenharmony_ci while (glsl_type_is_struct_or_ifc(type)) 793bf215546Sopenharmony_ci type = unroll_struct_type(type, &slot_idx); 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci /* arrays here are already fully unrolled from their structs, so slot handling is implicit */ 796bf215546Sopenharmony_ci unsigned num_components = glsl_get_components(glsl_without_array(type)); 797bf215546Sopenharmony_ci const struct glsl_type *arraytype = orig_type; 798bf215546Sopenharmony_ci while (glsl_type_is_array(arraytype) && !glsl_type_is_struct_or_ifc(glsl_without_array(arraytype))) { 799bf215546Sopenharmony_ci num_components *= glsl_array_size(arraytype); 800bf215546Sopenharmony_ci arraytype = glsl_get_array_element(arraytype); 801bf215546Sopenharmony_ci } 802bf215546Sopenharmony_ci assert(num_components); 803bf215546Sopenharmony_ci /* gallium handles xfb in terms of 32bit units */ 804bf215546Sopenharmony_ci if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type)))) 805bf215546Sopenharmony_ci num_components *= 2; 806bf215546Sopenharmony_ci return num_components; 807bf215546Sopenharmony_ci} 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_cistatic const struct pipe_stream_output * 810bf215546Sopenharmony_cifind_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot) 811bf215546Sopenharmony_ci{ 812bf215546Sopenharmony_ci for (unsigned i = 0; i < so_info->num_outputs; i++) { 813bf215546Sopenharmony_ci const struct pipe_stream_output *packed_output = &so_info->output[i]; 814bf215546Sopenharmony_ci if (reverse_map[packed_output->register_index] == slot) 815bf215546Sopenharmony_ci return packed_output; 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci return NULL; 818bf215546Sopenharmony_ci} 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_cistatic void 821bf215546Sopenharmony_ciupdate_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info, 822bf215546Sopenharmony_ci uint64_t outputs_written, bool have_psiz) 823bf215546Sopenharmony_ci{ 824bf215546Sopenharmony_ci uint8_t reverse_map[VARYING_SLOT_MAX] = {0}; 825bf215546Sopenharmony_ci unsigned slot = 0; 826bf215546Sopenharmony_ci /* semi-copied from iris */ 827bf215546Sopenharmony_ci while (outputs_written) { 828bf215546Sopenharmony_ci int bit = u_bit_scan64(&outputs_written); 829bf215546Sopenharmony_ci /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ 830bf215546Sopenharmony_ci if (bit == VARYING_SLOT_PSIZ && !have_psiz) 831bf215546Sopenharmony_ci continue; 832bf215546Sopenharmony_ci reverse_map[slot++] = bit; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci bool have_fake_psiz = false; 836bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, zs->nir) { 837bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location) 838bf215546Sopenharmony_ci have_fake_psiz = true; 839bf215546Sopenharmony_ci } 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci bool inlined[VARYING_SLOT_MAX][4] = {0}; 842bf215546Sopenharmony_ci uint64_t packed = 0; 843bf215546Sopenharmony_ci uint8_t packed_components[VARYING_SLOT_MAX] = {0}; 844bf215546Sopenharmony_ci uint8_t packed_streams[VARYING_SLOT_MAX] = {0}; 845bf215546Sopenharmony_ci uint8_t packed_buffers[VARYING_SLOT_MAX] = {0}; 846bf215546Sopenharmony_ci uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0}; 847bf215546Sopenharmony_ci nir_variable *psiz = NULL; 848bf215546Sopenharmony_ci for (unsigned i = 0; i < so_info->num_outputs; i++) { 849bf215546Sopenharmony_ci const struct pipe_stream_output *output = &so_info->output[i]; 850bf215546Sopenharmony_ci unsigned slot = reverse_map[output->register_index]; 851bf215546Sopenharmony_ci /* always set stride to be used during draw */ 852bf215546Sopenharmony_ci zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer]; 853bf215546Sopenharmony_ci if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) { 854bf215546Sopenharmony_ci nir_variable *var = NULL; 855bf215546Sopenharmony_ci unsigned so_slot; 856bf215546Sopenharmony_ci while (!var) 857bf215546Sopenharmony_ci var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz); 858bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_PSIZ) 859bf215546Sopenharmony_ci psiz = var; 860bf215546Sopenharmony_ci so_slot = slot + 1; 861bf215546Sopenharmony_ci slot = reverse_map[output->register_index]; 862bf215546Sopenharmony_ci if (var->data.explicit_xfb_buffer) { 863bf215546Sopenharmony_ci /* handle dvec3 where gallium splits streamout over 2 registers */ 864bf215546Sopenharmony_ci for (unsigned j = 0; j < output->num_components; j++) 865bf215546Sopenharmony_ci inlined[slot][output->start_component + j] = true; 866bf215546Sopenharmony_ci } 867bf215546Sopenharmony_ci if (is_inlined(inlined[slot], output)) 868bf215546Sopenharmony_ci continue; 869bf215546Sopenharmony_ci bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type)); 870bf215546Sopenharmony_ci unsigned num_components = get_slot_components(var, slot, so_slot); 871bf215546Sopenharmony_ci /* if this is the entire variable, try to blast it out during the initial declaration 872bf215546Sopenharmony_ci * structs must be handled later to ensure accurate analysis 873bf215546Sopenharmony_ci */ 874bf215546Sopenharmony_ci if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) { 875bf215546Sopenharmony_ci var->data.explicit_xfb_buffer = 1; 876bf215546Sopenharmony_ci var->data.xfb.buffer = output->output_buffer; 877bf215546Sopenharmony_ci var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; 878bf215546Sopenharmony_ci var->data.offset = output->dst_offset * 4; 879bf215546Sopenharmony_ci var->data.stream = output->stream; 880bf215546Sopenharmony_ci for (unsigned j = 0; j < output->num_components; j++) 881bf215546Sopenharmony_ci inlined[slot][output->start_component + j] = true; 882bf215546Sopenharmony_ci } else { 883bf215546Sopenharmony_ci /* otherwise store some metadata for later */ 884bf215546Sopenharmony_ci packed |= BITFIELD64_BIT(slot); 885bf215546Sopenharmony_ci packed_components[slot] += output->num_components; 886bf215546Sopenharmony_ci packed_streams[slot] |= BITFIELD_BIT(output->stream); 887bf215546Sopenharmony_ci packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer); 888bf215546Sopenharmony_ci for (unsigned j = 0; j < output->num_components; j++) 889bf215546Sopenharmony_ci packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j; 890bf215546Sopenharmony_ci } 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci } 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci /* if this was flagged as a packed output before, and if all the components are 895bf215546Sopenharmony_ci * being output with the same stream on the same buffer with increasing offsets, this entire variable 896bf215546Sopenharmony_ci * can be consolidated into a single output to conserve locations 897bf215546Sopenharmony_ci */ 898bf215546Sopenharmony_ci for (unsigned i = 0; i < so_info->num_outputs; i++) { 899bf215546Sopenharmony_ci const struct pipe_stream_output *output = &so_info->output[i]; 900bf215546Sopenharmony_ci unsigned slot = reverse_map[output->register_index]; 901bf215546Sopenharmony_ci if (is_inlined(inlined[slot], output)) 902bf215546Sopenharmony_ci continue; 903bf215546Sopenharmony_ci if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) { 904bf215546Sopenharmony_ci nir_variable *var = NULL; 905bf215546Sopenharmony_ci while (!var) 906bf215546Sopenharmony_ci var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz); 907bf215546Sopenharmony_ci /* this is a lowered 64bit variable that can't be exported due to packing */ 908bf215546Sopenharmony_ci if (var->data.is_xfb) 909bf215546Sopenharmony_ci goto out; 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci unsigned num_slots = glsl_count_vec4_slots(var->type, false, false); 912bf215546Sopenharmony_ci /* for each variable, iterate over all the variable's slots and inline the outputs */ 913bf215546Sopenharmony_ci for (unsigned j = 0; j < num_slots; j++) { 914bf215546Sopenharmony_ci slot = var->data.location + j; 915bf215546Sopenharmony_ci const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot); 916bf215546Sopenharmony_ci if (!packed_output) 917bf215546Sopenharmony_ci goto out; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */ 920bf215546Sopenharmony_ci if (!(packed & BITFIELD64_BIT(slot)) || 921bf215546Sopenharmony_ci util_bitcount(packed_streams[slot]) != 1 || 922bf215546Sopenharmony_ci util_bitcount(packed_buffers[slot]) != 1) 923bf215546Sopenharmony_ci goto out; 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci /* if all the components the variable exports to this slot aren't captured, skip consolidation */ 926bf215546Sopenharmony_ci unsigned num_components = get_slot_components(var, slot, var->data.location); 927bf215546Sopenharmony_ci if (glsl_type_is_array(var->type) && !glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 928bf215546Sopenharmony_ci num_components /= glsl_array_size(var->type); 929bf215546Sopenharmony_ci if (num_components != packed_components[slot]) 930bf215546Sopenharmony_ci goto out; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci /* in order to pack the xfb output, all the offsets must be sequentially incrementing */ 933bf215546Sopenharmony_ci uint32_t prev_offset = packed_offsets[packed_output->register_index][0]; 934bf215546Sopenharmony_ci for (unsigned k = 1; k < num_components; k++) { 935bf215546Sopenharmony_ci /* if the offsets are not incrementing as expected, skip consolidation */ 936bf215546Sopenharmony_ci if (packed_offsets[packed_output->register_index][k] != prev_offset + 1) 937bf215546Sopenharmony_ci goto out; 938bf215546Sopenharmony_ci prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component]; 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci } 941bf215546Sopenharmony_ci /* this output can be consolidated: blast out all the data inlined */ 942bf215546Sopenharmony_ci var->data.explicit_xfb_buffer = 1; 943bf215546Sopenharmony_ci var->data.xfb.buffer = output->output_buffer; 944bf215546Sopenharmony_ci var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; 945bf215546Sopenharmony_ci var->data.offset = output->dst_offset * 4; 946bf215546Sopenharmony_ci var->data.stream = output->stream; 947bf215546Sopenharmony_ci /* GLSL specifies that interface blocks are split per-buffer in XFB */ 948bf215546Sopenharmony_ci if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type))) 949bf215546Sopenharmony_ci zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0); 950bf215546Sopenharmony_ci /* mark all slot components inlined to skip subsequent loop iterations */ 951bf215546Sopenharmony_ci for (unsigned j = 0; j < num_slots; j++) { 952bf215546Sopenharmony_ci slot = var->data.location + j; 953bf215546Sopenharmony_ci for (unsigned k = 0; k < packed_components[slot]; k++) 954bf215546Sopenharmony_ci inlined[slot][k] = true; 955bf215546Sopenharmony_ci packed &= ~BITFIELD64_BIT(slot); 956bf215546Sopenharmony_ci } 957bf215546Sopenharmony_ci continue; 958bf215546Sopenharmony_ci } 959bf215546Sopenharmony_ciout: 960bf215546Sopenharmony_ci /* these are packed/explicit varyings which can't be exported with normal output */ 961bf215546Sopenharmony_ci zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output; 962bf215546Sopenharmony_ci /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ 963bf215546Sopenharmony_ci zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index]; 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate; 966bf215546Sopenharmony_ci /* ensure this doesn't get output in the shader by unsetting location */ 967bf215546Sopenharmony_ci if (have_fake_psiz && psiz) 968bf215546Sopenharmony_ci update_psiz_location(zs->nir, psiz); 969bf215546Sopenharmony_ci} 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_cistruct decompose_state { 972bf215546Sopenharmony_ci nir_variable **split; 973bf215546Sopenharmony_ci bool needs_w; 974bf215546Sopenharmony_ci}; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_cistatic bool 977bf215546Sopenharmony_cilower_attrib(nir_builder *b, nir_instr *instr, void *data) 978bf215546Sopenharmony_ci{ 979bf215546Sopenharmony_ci struct decompose_state *state = data; 980bf215546Sopenharmony_ci nir_variable **split = state->split; 981bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 982bf215546Sopenharmony_ci return false; 983bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 984bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_deref) 985bf215546Sopenharmony_ci return false; 986bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 987bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 988bf215546Sopenharmony_ci if (var != split[0]) 989bf215546Sopenharmony_ci return false; 990bf215546Sopenharmony_ci unsigned num_components = glsl_get_vector_elements(split[0]->type); 991bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 992bf215546Sopenharmony_ci nir_ssa_def *loads[4]; 993bf215546Sopenharmony_ci for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++) 994bf215546Sopenharmony_ci loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1])); 995bf215546Sopenharmony_ci if (state->needs_w) { 996bf215546Sopenharmony_ci /* oob load w comopnent to get correct value for int/float */ 997bf215546Sopenharmony_ci loads[3] = nir_channel(b, loads[0], 3); 998bf215546Sopenharmony_ci loads[0] = nir_channel(b, loads[0], 0); 999bf215546Sopenharmony_ci } 1000bf215546Sopenharmony_ci nir_ssa_def *new_load = nir_vec(b, loads, num_components); 1001bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load); 1002bf215546Sopenharmony_ci nir_instr_remove_v(instr); 1003bf215546Sopenharmony_ci return true; 1004bf215546Sopenharmony_ci} 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_cistatic bool 1007bf215546Sopenharmony_cidecompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w) 1008bf215546Sopenharmony_ci{ 1009bf215546Sopenharmony_ci uint32_t bits = 0; 1010bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) 1011bf215546Sopenharmony_ci bits |= BITFIELD_BIT(var->data.driver_location); 1012bf215546Sopenharmony_ci bits = ~bits; 1013bf215546Sopenharmony_ci u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) { 1014bf215546Sopenharmony_ci nir_variable *split[5]; 1015bf215546Sopenharmony_ci struct decompose_state state; 1016bf215546Sopenharmony_ci state.split = split; 1017bf215546Sopenharmony_ci nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location); 1018bf215546Sopenharmony_ci assert(var); 1019bf215546Sopenharmony_ci split[0] = var; 1020bf215546Sopenharmony_ci bits |= BITFIELD_BIT(var->data.driver_location); 1021bf215546Sopenharmony_ci const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type); 1022bf215546Sopenharmony_ci unsigned num_components = glsl_get_vector_elements(var->type); 1023bf215546Sopenharmony_ci state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4; 1024bf215546Sopenharmony_ci for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) { 1025bf215546Sopenharmony_ci split[i+1] = nir_variable_clone(var, nir); 1026bf215546Sopenharmony_ci split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i); 1027bf215546Sopenharmony_ci if (decomposed_attrs_without_w & BITFIELD_BIT(location)) 1028bf215546Sopenharmony_ci split[i+1]->type = !i && num_components == 4 ? var->type : new_type; 1029bf215546Sopenharmony_ci else 1030bf215546Sopenharmony_ci split[i+1]->type = new_type; 1031bf215546Sopenharmony_ci split[i+1]->data.driver_location = ffs(bits) - 1; 1032bf215546Sopenharmony_ci bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location); 1033bf215546Sopenharmony_ci nir_shader_add_variable(nir, split[i+1]); 1034bf215546Sopenharmony_ci } 1035bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 1036bf215546Sopenharmony_ci nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state); 1037bf215546Sopenharmony_ci } 1038bf215546Sopenharmony_ci nir_fixup_deref_modes(nir); 1039bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 1040bf215546Sopenharmony_ci optimize_nir(nir, NULL); 1041bf215546Sopenharmony_ci return true; 1042bf215546Sopenharmony_ci} 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_cistatic bool 1045bf215546Sopenharmony_cirewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) 1046bf215546Sopenharmony_ci{ 1047bf215546Sopenharmony_ci struct zink_screen *screen = data; 1048bf215546Sopenharmony_ci const bool has_int64 = screen->info.feats.features.shaderInt64; 1049bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1050bf215546Sopenharmony_ci return false; 1051bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1052bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 1053bf215546Sopenharmony_ci switch (intr->intrinsic) { 1054bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fadd: 1055bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 1056bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 1057bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 1058bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 1059bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 1060bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 1061bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 1062bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 1063bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 1064bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: { 1065bf215546Sopenharmony_ci /* convert offset to uintN_t[idx] */ 1066bf215546Sopenharmony_ci nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8); 1067bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); 1068bf215546Sopenharmony_ci return true; 1069bf215546Sopenharmony_ci } 1070bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 1071bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: { 1072bf215546Sopenharmony_ci /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */ 1073bf215546Sopenharmony_ci bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo && 1074bf215546Sopenharmony_ci nir_src_is_const(intr->src[0]) && 1075bf215546Sopenharmony_ci nir_src_as_uint(intr->src[0]) == 0 && 1076bf215546Sopenharmony_ci nir_dest_bit_size(intr->dest) == 64 && 1077bf215546Sopenharmony_ci nir_intrinsic_align_offset(intr) % 8 != 0; 1078bf215546Sopenharmony_ci force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64; 1079bf215546Sopenharmony_ci nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8); 1080bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); 1081bf215546Sopenharmony_ci /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ 1082bf215546Sopenharmony_ci if (force_2x32) { 1083bf215546Sopenharmony_ci /* this is always scalarized */ 1084bf215546Sopenharmony_ci assert(intr->dest.ssa.num_components == 1); 1085bf215546Sopenharmony_ci /* rewrite as 2x32 */ 1086bf215546Sopenharmony_ci nir_ssa_def *load[2]; 1087bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1088bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_load_ssbo) 1089bf215546Sopenharmony_ci load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0); 1090bf215546Sopenharmony_ci else 1091bf215546Sopenharmony_ci load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4); 1092bf215546Sopenharmony_ci nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr)); 1093bf215546Sopenharmony_ci } 1094bf215546Sopenharmony_ci /* cast back to 64bit */ 1095bf215546Sopenharmony_ci nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); 1096bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted); 1097bf215546Sopenharmony_ci nir_instr_remove(instr); 1098bf215546Sopenharmony_ci } 1099bf215546Sopenharmony_ci return true; 1100bf215546Sopenharmony_ci } 1101bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 1102bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 1103bf215546Sopenharmony_ci bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64; 1104bf215546Sopenharmony_ci nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8); 1105bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset); 1106bf215546Sopenharmony_ci /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ 1107bf215546Sopenharmony_ci if (force_2x32) { 1108bf215546Sopenharmony_ci /* this is always scalarized */ 1109bf215546Sopenharmony_ci assert(intr->dest.ssa.num_components == 1); 1110bf215546Sopenharmony_ci /* rewrite as 2x32 */ 1111bf215546Sopenharmony_ci nir_ssa_def *load[2]; 1112bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) 1113bf215546Sopenharmony_ci load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0); 1114bf215546Sopenharmony_ci /* cast back to 64bit */ 1115bf215546Sopenharmony_ci nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); 1116bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted); 1117bf215546Sopenharmony_ci nir_instr_remove(instr); 1118bf215546Sopenharmony_ci return true; 1119bf215546Sopenharmony_ci } 1120bf215546Sopenharmony_ci break; 1121bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: { 1122bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 1123bf215546Sopenharmony_ci bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; 1124bf215546Sopenharmony_ci nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); 1125bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset); 1126bf215546Sopenharmony_ci /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ 1127bf215546Sopenharmony_ci if (force_2x32) { 1128bf215546Sopenharmony_ci /* this is always scalarized */ 1129bf215546Sopenharmony_ci assert(intr->src[0].ssa->num_components == 1); 1130bf215546Sopenharmony_ci nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; 1131bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) 1132bf215546Sopenharmony_ci nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0); 1133bf215546Sopenharmony_ci nir_instr_remove(instr); 1134bf215546Sopenharmony_ci } 1135bf215546Sopenharmony_ci return true; 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci case nir_intrinsic_store_shared: { 1138bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 1139bf215546Sopenharmony_ci bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; 1140bf215546Sopenharmony_ci nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); 1141bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); 1142bf215546Sopenharmony_ci /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ 1143bf215546Sopenharmony_ci if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { 1144bf215546Sopenharmony_ci /* this is always scalarized */ 1145bf215546Sopenharmony_ci assert(intr->src[0].ssa->num_components == 1); 1146bf215546Sopenharmony_ci nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; 1147bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) 1148bf215546Sopenharmony_ci nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0); 1149bf215546Sopenharmony_ci nir_instr_remove(instr); 1150bf215546Sopenharmony_ci } 1151bf215546Sopenharmony_ci return true; 1152bf215546Sopenharmony_ci } 1153bf215546Sopenharmony_ci default: 1154bf215546Sopenharmony_ci break; 1155bf215546Sopenharmony_ci } 1156bf215546Sopenharmony_ci return false; 1157bf215546Sopenharmony_ci} 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_cistatic bool 1160bf215546Sopenharmony_cirewrite_bo_access(nir_shader *shader, struct zink_screen *screen) 1161bf215546Sopenharmony_ci{ 1162bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen); 1163bf215546Sopenharmony_ci} 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_cistatic nir_variable * 1166bf215546Sopenharmony_ciget_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size) 1167bf215546Sopenharmony_ci{ 1168bf215546Sopenharmony_ci nir_variable *var, **ptr; 1169bf215546Sopenharmony_ci unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1; 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci if (ssbo) 1172bf215546Sopenharmony_ci ptr = &bo->ssbo[bit_size >> 4]; 1173bf215546Sopenharmony_ci else { 1174bf215546Sopenharmony_ci if (!idx) { 1175bf215546Sopenharmony_ci ptr = &bo->uniforms[bit_size >> 4]; 1176bf215546Sopenharmony_ci } else 1177bf215546Sopenharmony_ci ptr = &bo->ubo[bit_size >> 4]; 1178bf215546Sopenharmony_ci } 1179bf215546Sopenharmony_ci var = *ptr; 1180bf215546Sopenharmony_ci if (!var) { 1181bf215546Sopenharmony_ci if (ssbo) 1182bf215546Sopenharmony_ci var = bo->ssbo[32 >> 4]; 1183bf215546Sopenharmony_ci else { 1184bf215546Sopenharmony_ci if (!idx) 1185bf215546Sopenharmony_ci var = bo->uniforms[32 >> 4]; 1186bf215546Sopenharmony_ci else 1187bf215546Sopenharmony_ci var = bo->ubo[32 >> 4]; 1188bf215546Sopenharmony_ci } 1189bf215546Sopenharmony_ci var = nir_variable_clone(var, shader); 1190bf215546Sopenharmony_ci *ptr = var; 1191bf215546Sopenharmony_ci nir_shader_add_variable(shader, var); 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); 1194bf215546Sopenharmony_ci fields[0].name = ralloc_strdup(shader, "base"); 1195bf215546Sopenharmony_ci fields[1].name = ralloc_strdup(shader, "unsized"); 1196bf215546Sopenharmony_ci unsigned array_size = glsl_get_length(var->type); 1197bf215546Sopenharmony_ci const struct glsl_type *bare_type = glsl_without_array(var->type); 1198bf215546Sopenharmony_ci const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0); 1199bf215546Sopenharmony_ci unsigned length = glsl_get_length(array_type); 1200bf215546Sopenharmony_ci const struct glsl_type *type; 1201bf215546Sopenharmony_ci const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8); 1202bf215546Sopenharmony_ci if (bit_size > 32) { 1203bf215546Sopenharmony_ci assert(bit_size == 64); 1204bf215546Sopenharmony_ci type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8); 1205bf215546Sopenharmony_ci } else { 1206bf215546Sopenharmony_ci type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8); 1207bf215546Sopenharmony_ci } 1208bf215546Sopenharmony_ci fields[0].type = type; 1209bf215546Sopenharmony_ci fields[1].type = unsized; 1210bf215546Sopenharmony_ci var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0); 1211bf215546Sopenharmony_ci var->data.driver_location = idx; 1212bf215546Sopenharmony_ci } 1213bf215546Sopenharmony_ci return var; 1214bf215546Sopenharmony_ci} 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_cistatic void 1217bf215546Sopenharmony_cirewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo) 1218bf215546Sopenharmony_ci{ 1219bf215546Sopenharmony_ci nir_intrinsic_op op; 1220bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1221bf215546Sopenharmony_ci switch (intr->intrinsic) { 1222bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 1223bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_add; 1224bf215546Sopenharmony_ci break; 1225bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 1226bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_umin; 1227bf215546Sopenharmony_ci break; 1228bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 1229bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_imin; 1230bf215546Sopenharmony_ci break; 1231bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 1232bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_umax; 1233bf215546Sopenharmony_ci break; 1234bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 1235bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_imax; 1236bf215546Sopenharmony_ci break; 1237bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 1238bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_and; 1239bf215546Sopenharmony_ci break; 1240bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 1241bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_or; 1242bf215546Sopenharmony_ci break; 1243bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 1244bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_xor; 1245bf215546Sopenharmony_ci break; 1246bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 1247bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_exchange; 1248bf215546Sopenharmony_ci break; 1249bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: 1250bf215546Sopenharmony_ci op = nir_intrinsic_deref_atomic_comp_swap; 1251bf215546Sopenharmony_ci break; 1252bf215546Sopenharmony_ci default: 1253bf215546Sopenharmony_ci unreachable("unknown intrinsic"); 1254bf215546Sopenharmony_ci } 1255bf215546Sopenharmony_ci nir_ssa_def *offset = intr->src[1].ssa; 1256bf215546Sopenharmony_ci nir_src *src = &intr->src[0]; 1257bf215546Sopenharmony_ci nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest)); 1258bf215546Sopenharmony_ci nir_deref_instr *deref_var = nir_build_deref_var(b, var); 1259bf215546Sopenharmony_ci nir_ssa_def *idx = src->ssa; 1260bf215546Sopenharmony_ci if (bo->first_ssbo) 1261bf215546Sopenharmony_ci idx = nir_iadd_imm(b, idx, -bo->first_ssbo); 1262bf215546Sopenharmony_ci nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx); 1263bf215546Sopenharmony_ci nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); 1264bf215546Sopenharmony_ci 1265bf215546Sopenharmony_ci /* generate new atomic deref ops for every component */ 1266bf215546Sopenharmony_ci nir_ssa_def *result[4]; 1267bf215546Sopenharmony_ci unsigned num_components = nir_dest_num_components(intr->dest); 1268bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 1269bf215546Sopenharmony_ci nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); 1270bf215546Sopenharmony_ci nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op); 1271bf215546Sopenharmony_ci nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), ""); 1272bf215546Sopenharmony_ci new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa); 1273bf215546Sopenharmony_ci /* deref ops have no offset src, so copy the srcs after it */ 1274bf215546Sopenharmony_ci for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) 1275bf215546Sopenharmony_ci nir_src_copy(&new_instr->src[i - 1], &intr->src[i]); 1276bf215546Sopenharmony_ci nir_builder_instr_insert(b, &new_instr->instr); 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci result[i] = &new_instr->dest.ssa; 1279bf215546Sopenharmony_ci offset = nir_iadd_imm(b, offset, 1); 1280bf215546Sopenharmony_ci } 1281bf215546Sopenharmony_ci 1282bf215546Sopenharmony_ci nir_ssa_def *load = nir_vec(b, result, num_components); 1283bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 1284bf215546Sopenharmony_ci nir_instr_remove(instr); 1285bf215546Sopenharmony_ci} 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_cistatic bool 1288bf215546Sopenharmony_ciremove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) 1289bf215546Sopenharmony_ci{ 1290bf215546Sopenharmony_ci struct bo_vars *bo = data; 1291bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1292bf215546Sopenharmony_ci return false; 1293bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1294bf215546Sopenharmony_ci nir_variable *var = NULL; 1295bf215546Sopenharmony_ci nir_ssa_def *offset = NULL; 1296bf215546Sopenharmony_ci bool is_load = true; 1297bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 1298bf215546Sopenharmony_ci nir_src *src; 1299bf215546Sopenharmony_ci bool ssbo = true; 1300bf215546Sopenharmony_ci switch (intr->intrinsic) { 1301bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 1302bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 1303bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 1304bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 1305bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 1306bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 1307bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 1308bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 1309bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 1310bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: 1311bf215546Sopenharmony_ci rewrite_atomic_ssbo_instr(b, instr, bo); 1312bf215546Sopenharmony_ci return true; 1313bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 1314bf215546Sopenharmony_ci src = &intr->src[1]; 1315bf215546Sopenharmony_ci var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0])); 1316bf215546Sopenharmony_ci offset = intr->src[2].ssa; 1317bf215546Sopenharmony_ci is_load = false; 1318bf215546Sopenharmony_ci break; 1319bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 1320bf215546Sopenharmony_ci src = &intr->src[0]; 1321bf215546Sopenharmony_ci var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest)); 1322bf215546Sopenharmony_ci offset = intr->src[1].ssa; 1323bf215546Sopenharmony_ci break; 1324bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 1325bf215546Sopenharmony_ci src = &intr->src[0]; 1326bf215546Sopenharmony_ci var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest)); 1327bf215546Sopenharmony_ci offset = intr->src[1].ssa; 1328bf215546Sopenharmony_ci ssbo = false; 1329bf215546Sopenharmony_ci break; 1330bf215546Sopenharmony_ci default: 1331bf215546Sopenharmony_ci return false; 1332bf215546Sopenharmony_ci } 1333bf215546Sopenharmony_ci assert(var); 1334bf215546Sopenharmony_ci assert(offset); 1335bf215546Sopenharmony_ci nir_deref_instr *deref_var = nir_build_deref_var(b, var); 1336bf215546Sopenharmony_ci nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa; 1337bf215546Sopenharmony_ci if (!ssbo && bo->first_ubo && var->data.driver_location) 1338bf215546Sopenharmony_ci idx = nir_iadd_imm(b, idx, -bo->first_ubo); 1339bf215546Sopenharmony_ci else if (ssbo && bo->first_ssbo) 1340bf215546Sopenharmony_ci idx = nir_iadd_imm(b, idx, -bo->first_ssbo); 1341bf215546Sopenharmony_ci nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx); 1342bf215546Sopenharmony_ci nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); 1343bf215546Sopenharmony_ci assert(intr->num_components <= 2); 1344bf215546Sopenharmony_ci if (is_load) { 1345bf215546Sopenharmony_ci nir_ssa_def *result[2]; 1346bf215546Sopenharmony_ci for (unsigned i = 0; i < intr->num_components; i++) { 1347bf215546Sopenharmony_ci nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); 1348bf215546Sopenharmony_ci result[i] = nir_load_deref(b, deref_arr); 1349bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_load_ssbo) 1350bf215546Sopenharmony_ci nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr)); 1351bf215546Sopenharmony_ci offset = nir_iadd_imm(b, offset, 1); 1352bf215546Sopenharmony_ci } 1353bf215546Sopenharmony_ci nir_ssa_def *load = nir_vec(b, result, intr->num_components); 1354bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 1355bf215546Sopenharmony_ci } else { 1356bf215546Sopenharmony_ci nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); 1357bf215546Sopenharmony_ci nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr)); 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci nir_instr_remove(instr); 1360bf215546Sopenharmony_ci return true; 1361bf215546Sopenharmony_ci} 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_cistatic bool 1364bf215546Sopenharmony_ciremove_bo_access(nir_shader *shader, struct zink_shader *zs) 1365bf215546Sopenharmony_ci{ 1366bf215546Sopenharmony_ci struct bo_vars bo = get_bo_vars(zs, shader); 1367bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo); 1368bf215546Sopenharmony_ci} 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_cistatic void 1371bf215546Sopenharmony_ciassign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) 1372bf215546Sopenharmony_ci{ 1373bf215546Sopenharmony_ci unsigned slot = var->data.location; 1374bf215546Sopenharmony_ci switch (slot) { 1375bf215546Sopenharmony_ci case -1: 1376bf215546Sopenharmony_ci case VARYING_SLOT_POS: 1377bf215546Sopenharmony_ci case VARYING_SLOT_PNTC: 1378bf215546Sopenharmony_ci case VARYING_SLOT_PSIZ: 1379bf215546Sopenharmony_ci case VARYING_SLOT_LAYER: 1380bf215546Sopenharmony_ci case VARYING_SLOT_PRIMITIVE_ID: 1381bf215546Sopenharmony_ci case VARYING_SLOT_CLIP_DIST0: 1382bf215546Sopenharmony_ci case VARYING_SLOT_CULL_DIST0: 1383bf215546Sopenharmony_ci case VARYING_SLOT_VIEWPORT: 1384bf215546Sopenharmony_ci case VARYING_SLOT_FACE: 1385bf215546Sopenharmony_ci case VARYING_SLOT_TESS_LEVEL_OUTER: 1386bf215546Sopenharmony_ci case VARYING_SLOT_TESS_LEVEL_INNER: 1387bf215546Sopenharmony_ci /* use a sentinel value to avoid counting later */ 1388bf215546Sopenharmony_ci var->data.driver_location = UINT_MAX; 1389bf215546Sopenharmony_ci break; 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci default: 1392bf215546Sopenharmony_ci if (var->data.patch) { 1393bf215546Sopenharmony_ci assert(slot >= VARYING_SLOT_PATCH0); 1394bf215546Sopenharmony_ci slot -= VARYING_SLOT_PATCH0; 1395bf215546Sopenharmony_ci } 1396bf215546Sopenharmony_ci if (slot_map[slot] == 0xff) { 1397bf215546Sopenharmony_ci assert(*reserved < MAX_VARYING); 1398bf215546Sopenharmony_ci unsigned num_slots; 1399bf215546Sopenharmony_ci if (nir_is_arrayed_io(var, stage)) 1400bf215546Sopenharmony_ci num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false); 1401bf215546Sopenharmony_ci else 1402bf215546Sopenharmony_ci num_slots = glsl_count_vec4_slots(var->type, false, false); 1403bf215546Sopenharmony_ci assert(*reserved + num_slots <= MAX_VARYING); 1404bf215546Sopenharmony_ci for (unsigned i = 0; i < num_slots; i++) 1405bf215546Sopenharmony_ci slot_map[slot + i] = (*reserved)++; 1406bf215546Sopenharmony_ci } 1407bf215546Sopenharmony_ci slot = slot_map[slot]; 1408bf215546Sopenharmony_ci assert(slot < MAX_VARYING); 1409bf215546Sopenharmony_ci var->data.driver_location = slot; 1410bf215546Sopenharmony_ci } 1411bf215546Sopenharmony_ci} 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_ciALWAYS_INLINE static bool 1414bf215546Sopenharmony_ciis_texcoord(gl_shader_stage stage, const nir_variable *var) 1415bf215546Sopenharmony_ci{ 1416bf215546Sopenharmony_ci if (stage != MESA_SHADER_FRAGMENT) 1417bf215546Sopenharmony_ci return false; 1418bf215546Sopenharmony_ci return var->data.location >= VARYING_SLOT_TEX0 && 1419bf215546Sopenharmony_ci var->data.location <= VARYING_SLOT_TEX7; 1420bf215546Sopenharmony_ci} 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_cistatic bool 1423bf215546Sopenharmony_ciassign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) 1424bf215546Sopenharmony_ci{ 1425bf215546Sopenharmony_ci unsigned slot = var->data.location; 1426bf215546Sopenharmony_ci switch (slot) { 1427bf215546Sopenharmony_ci case VARYING_SLOT_POS: 1428bf215546Sopenharmony_ci case VARYING_SLOT_PNTC: 1429bf215546Sopenharmony_ci case VARYING_SLOT_PSIZ: 1430bf215546Sopenharmony_ci case VARYING_SLOT_LAYER: 1431bf215546Sopenharmony_ci case VARYING_SLOT_PRIMITIVE_ID: 1432bf215546Sopenharmony_ci case VARYING_SLOT_CLIP_DIST0: 1433bf215546Sopenharmony_ci case VARYING_SLOT_CULL_DIST0: 1434bf215546Sopenharmony_ci case VARYING_SLOT_VIEWPORT: 1435bf215546Sopenharmony_ci case VARYING_SLOT_FACE: 1436bf215546Sopenharmony_ci case VARYING_SLOT_TESS_LEVEL_OUTER: 1437bf215546Sopenharmony_ci case VARYING_SLOT_TESS_LEVEL_INNER: 1438bf215546Sopenharmony_ci /* use a sentinel value to avoid counting later */ 1439bf215546Sopenharmony_ci var->data.driver_location = UINT_MAX; 1440bf215546Sopenharmony_ci break; 1441bf215546Sopenharmony_ci default: 1442bf215546Sopenharmony_ci if (var->data.patch) { 1443bf215546Sopenharmony_ci assert(slot >= VARYING_SLOT_PATCH0); 1444bf215546Sopenharmony_ci slot -= VARYING_SLOT_PATCH0; 1445bf215546Sopenharmony_ci } 1446bf215546Sopenharmony_ci if (slot_map[slot] == (unsigned char)-1) { 1447bf215546Sopenharmony_ci if (stage != MESA_SHADER_TESS_CTRL && !is_texcoord(stage, var)) 1448bf215546Sopenharmony_ci /* dead io */ 1449bf215546Sopenharmony_ci return false; 1450bf215546Sopenharmony_ci /* - texcoords can't be eliminated in fs due to GL_COORD_REPLACE 1451bf215546Sopenharmony_ci * - patch variables may be read in the workgroup 1452bf215546Sopenharmony_ci */ 1453bf215546Sopenharmony_ci slot_map[slot] = (*reserved)++; 1454bf215546Sopenharmony_ci } 1455bf215546Sopenharmony_ci var->data.driver_location = slot_map[slot]; 1456bf215546Sopenharmony_ci } 1457bf215546Sopenharmony_ci return true; 1458bf215546Sopenharmony_ci} 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_cistatic bool 1462bf215546Sopenharmony_cirewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data) 1463bf215546Sopenharmony_ci{ 1464bf215546Sopenharmony_ci nir_variable *var = data; 1465bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1466bf215546Sopenharmony_ci return false; 1467bf215546Sopenharmony_ci 1468bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1469bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_deref) 1470bf215546Sopenharmony_ci return false; 1471bf215546Sopenharmony_ci nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); 1472bf215546Sopenharmony_ci if (deref_var != var) 1473bf215546Sopenharmony_ci return false; 1474bf215546Sopenharmony_ci nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest)); 1475bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef); 1476bf215546Sopenharmony_ci return true; 1477bf215546Sopenharmony_ci} 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_civoid 1480bf215546Sopenharmony_cizink_compiler_assign_io(nir_shader *producer, nir_shader *consumer) 1481bf215546Sopenharmony_ci{ 1482bf215546Sopenharmony_ci unsigned reserved = 0; 1483bf215546Sopenharmony_ci unsigned char slot_map[VARYING_SLOT_MAX]; 1484bf215546Sopenharmony_ci memset(slot_map, -1, sizeof(slot_map)); 1485bf215546Sopenharmony_ci bool do_fixup = false; 1486bf215546Sopenharmony_ci nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer; 1487bf215546Sopenharmony_ci if (consumer->info.stage != MESA_SHADER_FRAGMENT) { 1488bf215546Sopenharmony_ci /* remove injected pointsize from all but the last vertex stage */ 1489bf215546Sopenharmony_ci nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ); 1490bf215546Sopenharmony_ci if (var && !var->data.explicit_location) { 1491bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 1492bf215546Sopenharmony_ci nir_fixup_deref_modes(producer); 1493bf215546Sopenharmony_ci NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); 1494bf215546Sopenharmony_ci optimize_nir(producer, NULL); 1495bf215546Sopenharmony_ci } 1496bf215546Sopenharmony_ci } 1497bf215546Sopenharmony_ci if (producer->info.stage == MESA_SHADER_TESS_CTRL) { 1498bf215546Sopenharmony_ci /* never assign from tcs -> tes, always invert */ 1499bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) 1500bf215546Sopenharmony_ci assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map); 1501bf215546Sopenharmony_ci nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) { 1502bf215546Sopenharmony_ci if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map)) 1503bf215546Sopenharmony_ci /* this is an output, nothing more needs to be done for it to be dropped */ 1504bf215546Sopenharmony_ci do_fixup = true; 1505bf215546Sopenharmony_ci } 1506bf215546Sopenharmony_ci } else { 1507bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, producer, nir_var_shader_out) 1508bf215546Sopenharmony_ci assign_producer_var_io(producer->info.stage, var, &reserved, slot_map); 1509bf215546Sopenharmony_ci nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) { 1510bf215546Sopenharmony_ci if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) { 1511bf215546Sopenharmony_ci do_fixup = true; 1512bf215546Sopenharmony_ci /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */ 1513bf215546Sopenharmony_ci nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var); 1514bf215546Sopenharmony_ci } 1515bf215546Sopenharmony_ci } 1516bf215546Sopenharmony_ci } 1517bf215546Sopenharmony_ci if (!do_fixup) 1518bf215546Sopenharmony_ci return; 1519bf215546Sopenharmony_ci nir_fixup_deref_modes(nir); 1520bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 1521bf215546Sopenharmony_ci optimize_nir(nir, NULL); 1522bf215546Sopenharmony_ci} 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci/* all types that hit this function contain something that is 64bit */ 1525bf215546Sopenharmony_cistatic const struct glsl_type * 1526bf215546Sopenharmony_cirewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var) 1527bf215546Sopenharmony_ci{ 1528bf215546Sopenharmony_ci if (glsl_type_is_array(type)) { 1529bf215546Sopenharmony_ci const struct glsl_type *child = glsl_get_array_element(type); 1530bf215546Sopenharmony_ci unsigned elements = glsl_array_size(type); 1531bf215546Sopenharmony_ci unsigned stride = glsl_get_explicit_stride(type); 1532bf215546Sopenharmony_ci return glsl_array_type(rewrite_64bit_type(nir, child, var), elements, stride); 1533bf215546Sopenharmony_ci } 1534bf215546Sopenharmony_ci /* rewrite structs recursively */ 1535bf215546Sopenharmony_ci if (glsl_type_is_struct_or_ifc(type)) { 1536bf215546Sopenharmony_ci unsigned nmembers = glsl_get_length(type); 1537bf215546Sopenharmony_ci struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2); 1538bf215546Sopenharmony_ci unsigned xfb_offset = 0; 1539bf215546Sopenharmony_ci for (unsigned i = 0; i < nmembers; i++) { 1540bf215546Sopenharmony_ci const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i); 1541bf215546Sopenharmony_ci fields[i] = *f; 1542bf215546Sopenharmony_ci xfb_offset += glsl_get_component_slots(fields[i].type) * 4; 1543bf215546Sopenharmony_ci if (i < nmembers - 1 && xfb_offset % 8 && 1544bf215546Sopenharmony_ci glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1))) { 1545bf215546Sopenharmony_ci var->data.is_xfb = true; 1546bf215546Sopenharmony_ci } 1547bf215546Sopenharmony_ci fields[i].type = rewrite_64bit_type(nir, f->type, var); 1548bf215546Sopenharmony_ci } 1549bf215546Sopenharmony_ci return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type)); 1550bf215546Sopenharmony_ci } 1551bf215546Sopenharmony_ci if (!glsl_type_is_64bit(type)) 1552bf215546Sopenharmony_ci return type; 1553bf215546Sopenharmony_ci enum glsl_base_type base_type; 1554bf215546Sopenharmony_ci switch (glsl_get_base_type(type)) { 1555bf215546Sopenharmony_ci case GLSL_TYPE_UINT64: 1556bf215546Sopenharmony_ci base_type = GLSL_TYPE_UINT; 1557bf215546Sopenharmony_ci break; 1558bf215546Sopenharmony_ci case GLSL_TYPE_INT64: 1559bf215546Sopenharmony_ci base_type = GLSL_TYPE_INT; 1560bf215546Sopenharmony_ci break; 1561bf215546Sopenharmony_ci case GLSL_TYPE_DOUBLE: 1562bf215546Sopenharmony_ci base_type = GLSL_TYPE_FLOAT; 1563bf215546Sopenharmony_ci break; 1564bf215546Sopenharmony_ci default: 1565bf215546Sopenharmony_ci unreachable("unknown 64-bit vertex attribute format!"); 1566bf215546Sopenharmony_ci } 1567bf215546Sopenharmony_ci if (glsl_type_is_scalar(type)) 1568bf215546Sopenharmony_ci return glsl_vector_type(base_type, 2); 1569bf215546Sopenharmony_ci unsigned num_components; 1570bf215546Sopenharmony_ci if (glsl_type_is_matrix(type)) { 1571bf215546Sopenharmony_ci /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */ 1572bf215546Sopenharmony_ci unsigned vec_components = glsl_get_vector_elements(type); 1573bf215546Sopenharmony_ci if (vec_components == 3) 1574bf215546Sopenharmony_ci vec_components = 4; 1575bf215546Sopenharmony_ci num_components = vec_components * 2 * glsl_get_matrix_columns(type); 1576bf215546Sopenharmony_ci } else { 1577bf215546Sopenharmony_ci num_components = glsl_get_vector_elements(type) * 2; 1578bf215546Sopenharmony_ci if (num_components <= 4) 1579bf215546Sopenharmony_ci return glsl_vector_type(base_type, num_components); 1580bf215546Sopenharmony_ci } 1581bf215546Sopenharmony_ci /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */ 1582bf215546Sopenharmony_ci struct glsl_struct_field fields[8] = {0}; 1583bf215546Sopenharmony_ci unsigned remaining = num_components; 1584bf215546Sopenharmony_ci unsigned nfields = 0; 1585bf215546Sopenharmony_ci for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) { 1586bf215546Sopenharmony_ci assert(i < ARRAY_SIZE(fields)); 1587bf215546Sopenharmony_ci fields[i].name = ""; 1588bf215546Sopenharmony_ci fields[i].offset = i * 16; 1589bf215546Sopenharmony_ci fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining)); 1590bf215546Sopenharmony_ci } 1591bf215546Sopenharmony_ci char buf[64]; 1592bf215546Sopenharmony_ci snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type)); 1593bf215546Sopenharmony_ci return glsl_struct_type(fields, nfields, buf, true); 1594bf215546Sopenharmony_ci} 1595bf215546Sopenharmony_ci 1596bf215546Sopenharmony_cistatic const struct glsl_type * 1597bf215546Sopenharmony_cideref_is_matrix(nir_deref_instr *deref) 1598bf215546Sopenharmony_ci{ 1599bf215546Sopenharmony_ci if (glsl_type_is_matrix(deref->type)) 1600bf215546Sopenharmony_ci return deref->type; 1601bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 1602bf215546Sopenharmony_ci if (parent) 1603bf215546Sopenharmony_ci return deref_is_matrix(parent); 1604bf215546Sopenharmony_ci return NULL; 1605bf215546Sopenharmony_ci} 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_ci/* rewrite all input/output variables using 32bit types and load/stores */ 1608bf215546Sopenharmony_cistatic bool 1609bf215546Sopenharmony_cilower_64bit_vars(nir_shader *shader) 1610bf215546Sopenharmony_ci{ 1611bf215546Sopenharmony_ci bool progress = false; 1612bf215546Sopenharmony_ci struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); 1613bf215546Sopenharmony_ci struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); 1614bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) { 1615bf215546Sopenharmony_ci if (!glsl_type_contains_64bit(var->type)) 1616bf215546Sopenharmony_ci continue; 1617bf215546Sopenharmony_ci var->type = rewrite_64bit_type(shader, var->type, var); 1618bf215546Sopenharmony_ci /* once type is rewritten, rewrite all loads and stores */ 1619bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 1620bf215546Sopenharmony_ci bool func_progress = false; 1621bf215546Sopenharmony_ci if (!function->impl) 1622bf215546Sopenharmony_ci continue; 1623bf215546Sopenharmony_ci nir_builder b; 1624bf215546Sopenharmony_ci nir_builder_init(&b, function->impl); 1625bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 1626bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 1627bf215546Sopenharmony_ci switch (instr->type) { 1628bf215546Sopenharmony_ci case nir_instr_type_deref: { 1629bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 1630bf215546Sopenharmony_ci if (!(deref->modes & (nir_var_shader_in | nir_var_shader_out))) 1631bf215546Sopenharmony_ci continue; 1632bf215546Sopenharmony_ci if (nir_deref_instr_get_variable(deref) != var) 1633bf215546Sopenharmony_ci continue; 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci /* matrix types are special: store the original deref type for later use */ 1636bf215546Sopenharmony_ci const struct glsl_type *matrix = deref_is_matrix(deref); 1637bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 1638bf215546Sopenharmony_ci if (!matrix) { 1639bf215546Sopenharmony_ci /* if this isn't a direct matrix deref, it's maybe a matrix row deref */ 1640bf215546Sopenharmony_ci hash_table_foreach(derefs, he) { 1641bf215546Sopenharmony_ci /* propagate parent matrix type to row deref */ 1642bf215546Sopenharmony_ci if (he->key == parent) 1643bf215546Sopenharmony_ci matrix = he->data; 1644bf215546Sopenharmony_ci } 1645bf215546Sopenharmony_ci } 1646bf215546Sopenharmony_ci if (matrix) 1647bf215546Sopenharmony_ci _mesa_hash_table_insert(derefs, deref, (void*)matrix); 1648bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) 1649bf215546Sopenharmony_ci deref->type = var->type; 1650bf215546Sopenharmony_ci else 1651bf215546Sopenharmony_ci deref->type = rewrite_64bit_type(shader, deref->type, var); 1652bf215546Sopenharmony_ci } 1653bf215546Sopenharmony_ci break; 1654bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 1655bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1656bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_store_deref && 1657bf215546Sopenharmony_ci intr->intrinsic != nir_intrinsic_load_deref) 1658bf215546Sopenharmony_ci break; 1659bf215546Sopenharmony_ci if (nir_intrinsic_get_var(intr, 0) != var) 1660bf215546Sopenharmony_ci break; 1661bf215546Sopenharmony_ci if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) || 1662bf215546Sopenharmony_ci (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64)) 1663bf215546Sopenharmony_ci break; 1664bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 1665bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 1666bf215546Sopenharmony_ci unsigned num_components = intr->num_components * 2; 1667bf215546Sopenharmony_ci nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS]; 1668bf215546Sopenharmony_ci /* this is the stored matrix type from the deref */ 1669bf215546Sopenharmony_ci struct hash_entry *he = _mesa_hash_table_search(derefs, deref); 1670bf215546Sopenharmony_ci const struct glsl_type *matrix = he ? he->data : NULL; 1671bf215546Sopenharmony_ci func_progress = true; 1672bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_store_deref) { 1673bf215546Sopenharmony_ci /* first, unpack the src data to 32bit vec2 components */ 1674bf215546Sopenharmony_ci for (unsigned i = 0; i < intr->num_components; i++) { 1675bf215546Sopenharmony_ci nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i)); 1676bf215546Sopenharmony_ci comp[i * 2] = nir_channel(&b, ssa, 0); 1677bf215546Sopenharmony_ci comp[i * 2 + 1] = nir_channel(&b, ssa, 1); 1678bf215546Sopenharmony_ci } 1679bf215546Sopenharmony_ci unsigned wrmask = nir_intrinsic_write_mask(intr); 1680bf215546Sopenharmony_ci unsigned mask = 0; 1681bf215546Sopenharmony_ci /* expand writemask for doubled components */ 1682bf215546Sopenharmony_ci for (unsigned i = 0; i < intr->num_components; i++) { 1683bf215546Sopenharmony_ci if (wrmask & BITFIELD_BIT(i)) 1684bf215546Sopenharmony_ci mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1); 1685bf215546Sopenharmony_ci } 1686bf215546Sopenharmony_ci if (matrix) { 1687bf215546Sopenharmony_ci /* matrix types always come from array (row) derefs */ 1688bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_array); 1689bf215546Sopenharmony_ci nir_deref_instr *var_deref = nir_deref_instr_parent(deref); 1690bf215546Sopenharmony_ci /* let optimization clean up consts later */ 1691bf215546Sopenharmony_ci nir_ssa_def *index = deref->arr.index.ssa; 1692bf215546Sopenharmony_ci /* this might be an indirect array index: 1693bf215546Sopenharmony_ci * - iterate over matrix columns 1694bf215546Sopenharmony_ci * - add if blocks for each column 1695bf215546Sopenharmony_ci * - perform the store in the block 1696bf215546Sopenharmony_ci */ 1697bf215546Sopenharmony_ci for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) { 1698bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, index, idx)); 1699bf215546Sopenharmony_ci unsigned vec_components = glsl_get_vector_elements(matrix); 1700bf215546Sopenharmony_ci /* always clamp dvec3 to 4 components */ 1701bf215546Sopenharmony_ci if (vec_components == 3) 1702bf215546Sopenharmony_ci vec_components = 4; 1703bf215546Sopenharmony_ci unsigned start_component = idx * vec_components * 2; 1704bf215546Sopenharmony_ci /* struct member */ 1705bf215546Sopenharmony_ci unsigned member = start_component / 4; 1706bf215546Sopenharmony_ci /* number of components remaining */ 1707bf215546Sopenharmony_ci unsigned remaining = num_components; 1708bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; member++) { 1709bf215546Sopenharmony_ci if (!(mask & BITFIELD_BIT(i))) 1710bf215546Sopenharmony_ci continue; 1711bf215546Sopenharmony_ci assert(member < glsl_get_length(var_deref->type)); 1712bf215546Sopenharmony_ci /* deref the rewritten struct to the appropriate vec4/vec2 */ 1713bf215546Sopenharmony_ci nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); 1714bf215546Sopenharmony_ci unsigned incr = MIN2(remaining, 4); 1715bf215546Sopenharmony_ci /* assemble the write component vec */ 1716bf215546Sopenharmony_ci nir_ssa_def *val = nir_vec(&b, &comp[i], incr); 1717bf215546Sopenharmony_ci /* use the number of components being written as the writemask */ 1718bf215546Sopenharmony_ci if (glsl_get_vector_elements(strct->type) > val->num_components) 1719bf215546Sopenharmony_ci val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type)); 1720bf215546Sopenharmony_ci nir_store_deref(&b, strct, val, BITFIELD_MASK(incr)); 1721bf215546Sopenharmony_ci remaining -= incr; 1722bf215546Sopenharmony_ci i += incr; 1723bf215546Sopenharmony_ci } 1724bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1725bf215546Sopenharmony_ci } 1726bf215546Sopenharmony_ci _mesa_set_add(deletes, &deref->instr); 1727bf215546Sopenharmony_ci } else if (num_components <= 4) { 1728bf215546Sopenharmony_ci /* simple store case: just write out the components */ 1729bf215546Sopenharmony_ci nir_ssa_def *dest = nir_vec(&b, comp, num_components); 1730bf215546Sopenharmony_ci nir_store_deref(&b, deref, dest, mask); 1731bf215546Sopenharmony_ci } else { 1732bf215546Sopenharmony_ci /* writing > 4 components: access the struct and write to the appropriate vec4 members */ 1733bf215546Sopenharmony_ci for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) { 1734bf215546Sopenharmony_ci if (!(mask & BITFIELD_MASK(4))) 1735bf215546Sopenharmony_ci continue; 1736bf215546Sopenharmony_ci nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); 1737bf215546Sopenharmony_ci nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4)); 1738bf215546Sopenharmony_ci if (glsl_get_vector_elements(strct->type) > dest->num_components) 1739bf215546Sopenharmony_ci dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type)); 1740bf215546Sopenharmony_ci nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4)); 1741bf215546Sopenharmony_ci mask >>= 4; 1742bf215546Sopenharmony_ci } 1743bf215546Sopenharmony_ci } 1744bf215546Sopenharmony_ci } else { 1745bf215546Sopenharmony_ci nir_ssa_def *dest = NULL; 1746bf215546Sopenharmony_ci if (matrix) { 1747bf215546Sopenharmony_ci /* matrix types always come from array (row) derefs */ 1748bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_array); 1749bf215546Sopenharmony_ci nir_deref_instr *var_deref = nir_deref_instr_parent(deref); 1750bf215546Sopenharmony_ci /* let optimization clean up consts later */ 1751bf215546Sopenharmony_ci nir_ssa_def *index = deref->arr.index.ssa; 1752bf215546Sopenharmony_ci /* this might be an indirect array index: 1753bf215546Sopenharmony_ci * - iterate over matrix columns 1754bf215546Sopenharmony_ci * - add if blocks for each column 1755bf215546Sopenharmony_ci * - phi the loads using the array index 1756bf215546Sopenharmony_ci */ 1757bf215546Sopenharmony_ci unsigned cols = glsl_get_matrix_columns(matrix); 1758bf215546Sopenharmony_ci nir_ssa_def *dests[4]; 1759bf215546Sopenharmony_ci for (unsigned idx = 0; idx < cols; idx++) { 1760bf215546Sopenharmony_ci /* don't add an if for the final row: this will be handled in the else */ 1761bf215546Sopenharmony_ci if (idx < cols - 1) 1762bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, index, idx)); 1763bf215546Sopenharmony_ci unsigned vec_components = glsl_get_vector_elements(matrix); 1764bf215546Sopenharmony_ci /* always clamp dvec3 to 4 components */ 1765bf215546Sopenharmony_ci if (vec_components == 3) 1766bf215546Sopenharmony_ci vec_components = 4; 1767bf215546Sopenharmony_ci unsigned start_component = idx * vec_components * 2; 1768bf215546Sopenharmony_ci /* struct member */ 1769bf215546Sopenharmony_ci unsigned member = start_component / 4; 1770bf215546Sopenharmony_ci /* number of components remaining */ 1771bf215546Sopenharmony_ci unsigned remaining = num_components; 1772bf215546Sopenharmony_ci /* component index */ 1773bf215546Sopenharmony_ci unsigned comp_idx = 0; 1774bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; member++) { 1775bf215546Sopenharmony_ci assert(member < glsl_get_length(var_deref->type)); 1776bf215546Sopenharmony_ci nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); 1777bf215546Sopenharmony_ci nir_ssa_def *load = nir_load_deref(&b, strct); 1778bf215546Sopenharmony_ci unsigned incr = MIN2(remaining, 4); 1779bf215546Sopenharmony_ci /* repack the loads to 64bit */ 1780bf215546Sopenharmony_ci for (unsigned c = 0; c < incr / 2; c++, comp_idx++) 1781bf215546Sopenharmony_ci comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2))); 1782bf215546Sopenharmony_ci remaining -= incr; 1783bf215546Sopenharmony_ci i += incr; 1784bf215546Sopenharmony_ci } 1785bf215546Sopenharmony_ci dest = dests[idx] = nir_vec(&b, comp, intr->num_components); 1786bf215546Sopenharmony_ci if (idx < cols - 1) 1787bf215546Sopenharmony_ci nir_push_else(&b, NULL); 1788bf215546Sopenharmony_ci } 1789bf215546Sopenharmony_ci /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */ 1790bf215546Sopenharmony_ci for (unsigned idx = cols - 1; idx >= 1; idx--) { 1791bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1792bf215546Sopenharmony_ci dest = nir_if_phi(&b, dests[idx - 1], dest); 1793bf215546Sopenharmony_ci } 1794bf215546Sopenharmony_ci _mesa_set_add(deletes, &deref->instr); 1795bf215546Sopenharmony_ci } else if (num_components <= 4) { 1796bf215546Sopenharmony_ci /* simple load case */ 1797bf215546Sopenharmony_ci nir_ssa_def *load = nir_load_deref(&b, deref); 1798bf215546Sopenharmony_ci /* pack 32bit loads into 64bit: this will automagically get optimized out later */ 1799bf215546Sopenharmony_ci for (unsigned i = 0; i < intr->num_components; i++) { 1800bf215546Sopenharmony_ci comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2))); 1801bf215546Sopenharmony_ci } 1802bf215546Sopenharmony_ci dest = nir_vec(&b, comp, intr->num_components); 1803bf215546Sopenharmony_ci } else { 1804bf215546Sopenharmony_ci /* writing > 4 components: access the struct and load the appropriate vec4 members */ 1805bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++, num_components -= 4) { 1806bf215546Sopenharmony_ci nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); 1807bf215546Sopenharmony_ci nir_ssa_def *load = nir_load_deref(&b, strct); 1808bf215546Sopenharmony_ci comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2))); 1809bf215546Sopenharmony_ci if (num_components > 2) 1810bf215546Sopenharmony_ci comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2))); 1811bf215546Sopenharmony_ci } 1812bf215546Sopenharmony_ci dest = nir_vec(&b, comp, intr->num_components); 1813bf215546Sopenharmony_ci } 1814bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr); 1815bf215546Sopenharmony_ci } 1816bf215546Sopenharmony_ci _mesa_set_add(deletes, instr); 1817bf215546Sopenharmony_ci break; 1818bf215546Sopenharmony_ci } 1819bf215546Sopenharmony_ci break; 1820bf215546Sopenharmony_ci default: break; 1821bf215546Sopenharmony_ci } 1822bf215546Sopenharmony_ci } 1823bf215546Sopenharmony_ci } 1824bf215546Sopenharmony_ci if (func_progress) 1825bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_none); 1826bf215546Sopenharmony_ci /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */ 1827bf215546Sopenharmony_ci set_foreach_remove(deletes, he) 1828bf215546Sopenharmony_ci nir_instr_remove((void*)he->key); 1829bf215546Sopenharmony_ci } 1830bf215546Sopenharmony_ci progress = true; 1831bf215546Sopenharmony_ci } 1832bf215546Sopenharmony_ci ralloc_free(deletes); 1833bf215546Sopenharmony_ci ralloc_free(derefs); 1834bf215546Sopenharmony_ci if (progress) { 1835bf215546Sopenharmony_ci nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL); 1836bf215546Sopenharmony_ci nir_lower_phis_to_scalar(shader, false); 1837bf215546Sopenharmony_ci optimize_nir(shader, NULL); 1838bf215546Sopenharmony_ci } 1839bf215546Sopenharmony_ci return progress; 1840bf215546Sopenharmony_ci} 1841bf215546Sopenharmony_ci 1842bf215546Sopenharmony_cistatic bool 1843bf215546Sopenharmony_cisplit_blocks(nir_shader *nir) 1844bf215546Sopenharmony_ci{ 1845bf215546Sopenharmony_ci bool progress = false; 1846bf215546Sopenharmony_ci bool changed = true; 1847bf215546Sopenharmony_ci do { 1848bf215546Sopenharmony_ci progress = false; 1849bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 1850bf215546Sopenharmony_ci const struct glsl_type *base_type = glsl_without_array(var->type); 1851bf215546Sopenharmony_ci nir_variable *members[32]; //can't have more than this without breaking NIR 1852bf215546Sopenharmony_ci if (!glsl_type_is_struct(base_type)) 1853bf215546Sopenharmony_ci continue; 1854bf215546Sopenharmony_ci /* TODO: arrays? */ 1855bf215546Sopenharmony_ci if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1) 1856bf215546Sopenharmony_ci continue; 1857bf215546Sopenharmony_ci if (glsl_count_attribute_slots(var->type, false) == 1) 1858bf215546Sopenharmony_ci continue; 1859bf215546Sopenharmony_ci unsigned offset = 0; 1860bf215546Sopenharmony_ci for (unsigned i = 0; i < glsl_get_length(var->type); i++) { 1861bf215546Sopenharmony_ci members[i] = nir_variable_clone(var, nir); 1862bf215546Sopenharmony_ci members[i]->type = glsl_get_struct_field(var->type, i); 1863bf215546Sopenharmony_ci members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i); 1864bf215546Sopenharmony_ci members[i]->data.location += offset; 1865bf215546Sopenharmony_ci offset += glsl_count_attribute_slots(members[i]->type, false); 1866bf215546Sopenharmony_ci nir_shader_add_variable(nir, members[i]); 1867bf215546Sopenharmony_ci } 1868bf215546Sopenharmony_ci nir_foreach_function(function, nir) { 1869bf215546Sopenharmony_ci bool func_progress = false; 1870bf215546Sopenharmony_ci if (!function->impl) 1871bf215546Sopenharmony_ci continue; 1872bf215546Sopenharmony_ci nir_builder b; 1873bf215546Sopenharmony_ci nir_builder_init(&b, function->impl); 1874bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 1875bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 1876bf215546Sopenharmony_ci switch (instr->type) { 1877bf215546Sopenharmony_ci case nir_instr_type_deref: { 1878bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 1879bf215546Sopenharmony_ci if (!(deref->modes & nir_var_shader_out)) 1880bf215546Sopenharmony_ci continue; 1881bf215546Sopenharmony_ci if (nir_deref_instr_get_variable(deref) != var) 1882bf215546Sopenharmony_ci continue; 1883bf215546Sopenharmony_ci if (deref->deref_type != nir_deref_type_struct) 1884bf215546Sopenharmony_ci continue; 1885bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 1886bf215546Sopenharmony_ci if (parent->deref_type != nir_deref_type_var) 1887bf215546Sopenharmony_ci continue; 1888bf215546Sopenharmony_ci deref->modes = nir_var_shader_temp; 1889bf215546Sopenharmony_ci parent->modes = nir_var_shader_temp; 1890bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 1891bf215546Sopenharmony_ci nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa; 1892bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr); 1893bf215546Sopenharmony_ci nir_instr_remove(&deref->instr); 1894bf215546Sopenharmony_ci func_progress = true; 1895bf215546Sopenharmony_ci break; 1896bf215546Sopenharmony_ci } 1897bf215546Sopenharmony_ci default: break; 1898bf215546Sopenharmony_ci } 1899bf215546Sopenharmony_ci } 1900bf215546Sopenharmony_ci } 1901bf215546Sopenharmony_ci if (func_progress) 1902bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_none); 1903bf215546Sopenharmony_ci } 1904bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 1905bf215546Sopenharmony_ci changed = true; 1906bf215546Sopenharmony_ci progress = true; 1907bf215546Sopenharmony_ci } 1908bf215546Sopenharmony_ci } while (progress); 1909bf215546Sopenharmony_ci return changed; 1910bf215546Sopenharmony_ci} 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_cistatic void 1913bf215546Sopenharmony_cizink_shader_dump(void *words, size_t size, const char *file) 1914bf215546Sopenharmony_ci{ 1915bf215546Sopenharmony_ci FILE *fp = fopen(file, "wb"); 1916bf215546Sopenharmony_ci if (fp) { 1917bf215546Sopenharmony_ci fwrite(words, 1, size, fp); 1918bf215546Sopenharmony_ci fclose(fp); 1919bf215546Sopenharmony_ci fprintf(stderr, "wrote '%s'...\n", file); 1920bf215546Sopenharmony_ci } 1921bf215546Sopenharmony_ci} 1922bf215546Sopenharmony_ci 1923bf215546Sopenharmony_ciVkShaderModule 1924bf215546Sopenharmony_cizink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv) 1925bf215546Sopenharmony_ci{ 1926bf215546Sopenharmony_ci VkShaderModule mod; 1927bf215546Sopenharmony_ci VkShaderModuleCreateInfo smci = {0}; 1928bf215546Sopenharmony_ci 1929bf215546Sopenharmony_ci if (!spirv) 1930bf215546Sopenharmony_ci spirv = zs->spirv; 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci if (zink_debug & ZINK_DEBUG_SPIRV) { 1933bf215546Sopenharmony_ci char buf[256]; 1934bf215546Sopenharmony_ci static int i; 1935bf215546Sopenharmony_ci snprintf(buf, sizeof(buf), "dump%02d.spv", i++); 1936bf215546Sopenharmony_ci zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf); 1937bf215546Sopenharmony_ci } 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_ci smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 1940bf215546Sopenharmony_ci smci.codeSize = spirv->num_words * sizeof(uint32_t); 1941bf215546Sopenharmony_ci smci.pCode = spirv->words; 1942bf215546Sopenharmony_ci 1943bf215546Sopenharmony_ci#ifndef NDEBUG 1944bf215546Sopenharmony_ci if (zink_debug & ZINK_DEBUG_VALIDATION) { 1945bf215546Sopenharmony_ci static const struct spirv_to_nir_options spirv_options = { 1946bf215546Sopenharmony_ci .environment = NIR_SPIRV_VULKAN, 1947bf215546Sopenharmony_ci .caps = { 1948bf215546Sopenharmony_ci .float64 = true, 1949bf215546Sopenharmony_ci .int16 = true, 1950bf215546Sopenharmony_ci .int64 = true, 1951bf215546Sopenharmony_ci .tessellation = true, 1952bf215546Sopenharmony_ci .float_controls = true, 1953bf215546Sopenharmony_ci .image_ms_array = true, 1954bf215546Sopenharmony_ci .image_read_without_format = true, 1955bf215546Sopenharmony_ci .image_write_without_format = true, 1956bf215546Sopenharmony_ci .storage_image_ms = true, 1957bf215546Sopenharmony_ci .geometry_streams = true, 1958bf215546Sopenharmony_ci .storage_8bit = true, 1959bf215546Sopenharmony_ci .storage_16bit = true, 1960bf215546Sopenharmony_ci .variable_pointers = true, 1961bf215546Sopenharmony_ci .stencil_export = true, 1962bf215546Sopenharmony_ci .post_depth_coverage = true, 1963bf215546Sopenharmony_ci .transform_feedback = true, 1964bf215546Sopenharmony_ci .device_group = true, 1965bf215546Sopenharmony_ci .draw_parameters = true, 1966bf215546Sopenharmony_ci .shader_viewport_index_layer = true, 1967bf215546Sopenharmony_ci .multiview = true, 1968bf215546Sopenharmony_ci .physical_storage_buffer_address = true, 1969bf215546Sopenharmony_ci .int64_atomics = true, 1970bf215546Sopenharmony_ci .subgroup_arithmetic = true, 1971bf215546Sopenharmony_ci .subgroup_basic = true, 1972bf215546Sopenharmony_ci .subgroup_ballot = true, 1973bf215546Sopenharmony_ci .subgroup_quad = true, 1974bf215546Sopenharmony_ci .subgroup_shuffle = true, 1975bf215546Sopenharmony_ci .subgroup_vote = true, 1976bf215546Sopenharmony_ci .vk_memory_model = true, 1977bf215546Sopenharmony_ci .vk_memory_model_device_scope = true, 1978bf215546Sopenharmony_ci .int8 = true, 1979bf215546Sopenharmony_ci .float16 = true, 1980bf215546Sopenharmony_ci .demote_to_helper_invocation = true, 1981bf215546Sopenharmony_ci .sparse_residency = true, 1982bf215546Sopenharmony_ci .min_lod = true, 1983bf215546Sopenharmony_ci }, 1984bf215546Sopenharmony_ci .ubo_addr_format = nir_address_format_32bit_index_offset, 1985bf215546Sopenharmony_ci .ssbo_addr_format = nir_address_format_32bit_index_offset, 1986bf215546Sopenharmony_ci .phys_ssbo_addr_format = nir_address_format_64bit_global, 1987bf215546Sopenharmony_ci .push_const_addr_format = nir_address_format_logical, 1988bf215546Sopenharmony_ci .shared_addr_format = nir_address_format_32bit_offset, 1989bf215546Sopenharmony_ci }; 1990bf215546Sopenharmony_ci uint32_t num_spec_entries = 0; 1991bf215546Sopenharmony_ci struct nir_spirv_specialization *spec_entries = NULL; 1992bf215546Sopenharmony_ci VkSpecializationInfo sinfo = {0}; 1993bf215546Sopenharmony_ci VkSpecializationMapEntry me[3]; 1994bf215546Sopenharmony_ci uint32_t size[3] = {1,1,1}; 1995bf215546Sopenharmony_ci if (!zs->nir->info.workgroup_size[0]) { 1996bf215546Sopenharmony_ci sinfo.mapEntryCount = 3; 1997bf215546Sopenharmony_ci sinfo.pMapEntries = &me[0]; 1998bf215546Sopenharmony_ci sinfo.dataSize = sizeof(uint32_t) * 3; 1999bf215546Sopenharmony_ci sinfo.pData = size; 2000bf215546Sopenharmony_ci uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z}; 2001bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 2002bf215546Sopenharmony_ci me[i].size = sizeof(uint32_t); 2003bf215546Sopenharmony_ci me[i].constantID = ids[i]; 2004bf215546Sopenharmony_ci me[i].offset = i * sizeof(uint32_t); 2005bf215546Sopenharmony_ci } 2006bf215546Sopenharmony_ci spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries); 2007bf215546Sopenharmony_ci } 2008bf215546Sopenharmony_ci nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words, 2009bf215546Sopenharmony_ci spec_entries, num_spec_entries, 2010bf215546Sopenharmony_ci zs->nir->info.stage, "main", &spirv_options, &screen->nir_options); 2011bf215546Sopenharmony_ci assert(nir); 2012bf215546Sopenharmony_ci ralloc_free(nir); 2013bf215546Sopenharmony_ci free(spec_entries); 2014bf215546Sopenharmony_ci } 2015bf215546Sopenharmony_ci#endif 2016bf215546Sopenharmony_ci 2017bf215546Sopenharmony_ci VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod); 2018bf215546Sopenharmony_ci bool success = zink_screen_handle_vkresult(screen, ret); 2019bf215546Sopenharmony_ci assert(success); 2020bf215546Sopenharmony_ci return success ? mod : VK_NULL_HANDLE; 2021bf215546Sopenharmony_ci} 2022bf215546Sopenharmony_ci 2023bf215546Sopenharmony_cistatic bool 2024bf215546Sopenharmony_cifind_var_deref(nir_shader *nir, nir_variable *var) 2025bf215546Sopenharmony_ci{ 2026bf215546Sopenharmony_ci nir_foreach_function(function, nir) { 2027bf215546Sopenharmony_ci if (!function->impl) 2028bf215546Sopenharmony_ci continue; 2029bf215546Sopenharmony_ci 2030bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 2031bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 2032bf215546Sopenharmony_ci if (instr->type != nir_instr_type_deref) 2033bf215546Sopenharmony_ci continue; 2034bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 2035bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var && deref->var == var) 2036bf215546Sopenharmony_ci return true; 2037bf215546Sopenharmony_ci } 2038bf215546Sopenharmony_ci } 2039bf215546Sopenharmony_ci } 2040bf215546Sopenharmony_ci return false; 2041bf215546Sopenharmony_ci} 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_cistatic void 2044bf215546Sopenharmony_ciprune_io(nir_shader *nir) 2045bf215546Sopenharmony_ci{ 2046bf215546Sopenharmony_ci nir_foreach_shader_in_variable_safe(var, nir) { 2047bf215546Sopenharmony_ci if (!find_var_deref(nir, var)) 2048bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 2049bf215546Sopenharmony_ci } 2050bf215546Sopenharmony_ci nir_foreach_shader_out_variable_safe(var, nir) { 2051bf215546Sopenharmony_ci if (!find_var_deref(nir, var)) 2052bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 2053bf215546Sopenharmony_ci } 2054bf215546Sopenharmony_ci} 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ciVkShaderModule 2057bf215546Sopenharmony_cizink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key) 2058bf215546Sopenharmony_ci{ 2059bf215546Sopenharmony_ci VkShaderModule mod = VK_NULL_HANDLE; 2060bf215546Sopenharmony_ci struct zink_shader_info *sinfo = &zs->sinfo; 2061bf215546Sopenharmony_ci nir_shader *nir = nir_shader_clone(NULL, base_nir); 2062bf215546Sopenharmony_ci bool need_optimize = false; 2063bf215546Sopenharmony_ci bool inlined_uniforms = false; 2064bf215546Sopenharmony_ci 2065bf215546Sopenharmony_ci if (key) { 2066bf215546Sopenharmony_ci if (key->inline_uniforms) { 2067bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_inline_uniforms, 2068bf215546Sopenharmony_ci nir->info.num_inlinable_uniforms, 2069bf215546Sopenharmony_ci key->base.inlined_uniform_values, 2070bf215546Sopenharmony_ci nir->info.inlinable_uniform_dw_offsets); 2071bf215546Sopenharmony_ci 2072bf215546Sopenharmony_ci inlined_uniforms = true; 2073bf215546Sopenharmony_ci } 2074bf215546Sopenharmony_ci 2075bf215546Sopenharmony_ci /* TODO: use a separate mem ctx here for ralloc */ 2076bf215546Sopenharmony_ci switch (zs->nir->info.stage) { 2077bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: { 2078bf215546Sopenharmony_ci uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0; 2079bf215546Sopenharmony_ci const struct zink_vs_key *vs_key = zink_vs_key(key); 2080bf215546Sopenharmony_ci switch (vs_key->size) { 2081bf215546Sopenharmony_ci case 4: 2082bf215546Sopenharmony_ci decomposed_attrs = vs_key->u32.decomposed_attrs; 2083bf215546Sopenharmony_ci decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w; 2084bf215546Sopenharmony_ci break; 2085bf215546Sopenharmony_ci case 2: 2086bf215546Sopenharmony_ci decomposed_attrs = vs_key->u16.decomposed_attrs; 2087bf215546Sopenharmony_ci decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w; 2088bf215546Sopenharmony_ci break; 2089bf215546Sopenharmony_ci case 1: 2090bf215546Sopenharmony_ci decomposed_attrs = vs_key->u8.decomposed_attrs; 2091bf215546Sopenharmony_ci decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w; 2092bf215546Sopenharmony_ci break; 2093bf215546Sopenharmony_ci default: break; 2094bf215546Sopenharmony_ci } 2095bf215546Sopenharmony_ci if (decomposed_attrs || decomposed_attrs_without_w) 2096bf215546Sopenharmony_ci NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w); 2097bf215546Sopenharmony_ci FALLTHROUGH; 2098bf215546Sopenharmony_ci } 2099bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 2100bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 2101bf215546Sopenharmony_ci if (zink_vs_key_base(key)->last_vertex_stage) { 2102bf215546Sopenharmony_ci if (zs->sinfo.have_xfb) 2103bf215546Sopenharmony_ci sinfo->last_vertex = true; 2104bf215546Sopenharmony_ci 2105bf215546Sopenharmony_ci if (!zink_vs_key_base(key)->clip_halfz && screen->driver_workarounds.depth_clip_control_missing) { 2106bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_clip_halfz); 2107bf215546Sopenharmony_ci } 2108bf215546Sopenharmony_ci if (zink_vs_key_base(key)->push_drawid) { 2109bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_drawid); 2110bf215546Sopenharmony_ci } 2111bf215546Sopenharmony_ci } 2112bf215546Sopenharmony_ci break; 2113bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 2114bf215546Sopenharmony_ci if (!zink_fs_key(key)->samples && 2115bf215546Sopenharmony_ci nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { 2116bf215546Sopenharmony_ci /* VK will always use gl_SampleMask[] values even if sample count is 0, 2117bf215546Sopenharmony_ci * so we need to skip this write here to mimic GL's behavior of ignoring it 2118bf215546Sopenharmony_ci */ 2119bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 2120bf215546Sopenharmony_ci if (var->data.location == FRAG_RESULT_SAMPLE_MASK) 2121bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 2122bf215546Sopenharmony_ci } 2123bf215546Sopenharmony_ci nir_fixup_deref_modes(nir); 2124bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 2125bf215546Sopenharmony_ci need_optimize = true; 2126bf215546Sopenharmony_ci } 2127bf215546Sopenharmony_ci if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) { 2128bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_dual_blend); 2129bf215546Sopenharmony_ci } 2130bf215546Sopenharmony_ci if (zink_fs_key(key)->coord_replace_bits) { 2131bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, 2132bf215546Sopenharmony_ci false, zink_fs_key(key)->coord_replace_yinvert); 2133bf215546Sopenharmony_ci } 2134bf215546Sopenharmony_ci if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) { 2135bf215546Sopenharmony_ci nir_foreach_shader_in_variable(var, nir) 2136bf215546Sopenharmony_ci var->data.sample = true; 2137bf215546Sopenharmony_ci nir->info.fs.uses_sample_qualifier = true; 2138bf215546Sopenharmony_ci nir->info.fs.uses_sample_shading = true; 2139bf215546Sopenharmony_ci } 2140bf215546Sopenharmony_ci if (nir->info.fs.uses_fbfetch_output) { 2141bf215546Sopenharmony_ci nir_variable *fbfetch = NULL; 2142bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key(key)->fbfetch_ms); 2143bf215546Sopenharmony_ci /* old variable must be deleted to avoid spirv errors */ 2144bf215546Sopenharmony_ci fbfetch->data.mode = nir_var_shader_temp; 2145bf215546Sopenharmony_ci nir_fixup_deref_modes(nir); 2146bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 2147bf215546Sopenharmony_ci need_optimize = true; 2148bf215546Sopenharmony_ci } 2149bf215546Sopenharmony_ci break; 2150bf215546Sopenharmony_ci default: break; 2151bf215546Sopenharmony_ci } 2152bf215546Sopenharmony_ci if (key->base.nonseamless_cube_mask) { 2153bf215546Sopenharmony_ci NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask); 2154bf215546Sopenharmony_ci need_optimize = true; 2155bf215546Sopenharmony_ci } 2156bf215546Sopenharmony_ci } 2157bf215546Sopenharmony_ci if (screen->driconf.inline_uniforms) { 2158bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); 2159bf215546Sopenharmony_ci NIR_PASS_V(nir, rewrite_bo_access, screen); 2160bf215546Sopenharmony_ci NIR_PASS_V(nir, remove_bo_access, zs); 2161bf215546Sopenharmony_ci need_optimize = true; 2162bf215546Sopenharmony_ci } 2163bf215546Sopenharmony_ci if (inlined_uniforms) { 2164bf215546Sopenharmony_ci optimize_nir(nir, zs); 2165bf215546Sopenharmony_ci 2166bf215546Sopenharmony_ci /* This must be done again. */ 2167bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | 2168bf215546Sopenharmony_ci nir_var_shader_out); 2169bf215546Sopenharmony_ci 2170bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2171bf215546Sopenharmony_ci if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT) 2172bf215546Sopenharmony_ci zs->can_inline = false; 2173bf215546Sopenharmony_ci } else if (need_optimize) 2174bf215546Sopenharmony_ci optimize_nir(nir, zs); 2175bf215546Sopenharmony_ci prune_io(nir); 2176bf215546Sopenharmony_ci 2177bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_convert_from_ssa, true); 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_ci struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version); 2180bf215546Sopenharmony_ci if (spirv) 2181bf215546Sopenharmony_ci mod = zink_shader_spirv_compile(screen, zs, spirv); 2182bf215546Sopenharmony_ci 2183bf215546Sopenharmony_ci ralloc_free(nir); 2184bf215546Sopenharmony_ci 2185bf215546Sopenharmony_ci /* TODO: determine if there's any reason to cache spirv output? */ 2186bf215546Sopenharmony_ci if (zs->nir->info.stage == MESA_SHADER_TESS_CTRL && zs->is_generated) 2187bf215546Sopenharmony_ci zs->spirv = spirv; 2188bf215546Sopenharmony_ci else 2189bf215546Sopenharmony_ci ralloc_free(spirv); 2190bf215546Sopenharmony_ci return mod; 2191bf215546Sopenharmony_ci} 2192bf215546Sopenharmony_ci 2193bf215546Sopenharmony_cistatic bool 2194bf215546Sopenharmony_cilower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data) 2195bf215546Sopenharmony_ci{ 2196bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2197bf215546Sopenharmony_ci return false; 2198bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2199bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_instance_id) 2200bf215546Sopenharmony_ci return false; 2201bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 2202bf215546Sopenharmony_ci nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b)); 2203bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); 2204bf215546Sopenharmony_ci return true; 2205bf215546Sopenharmony_ci} 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_cistatic bool 2208bf215546Sopenharmony_cilower_baseinstance(nir_shader *shader) 2209bf215546Sopenharmony_ci{ 2210bf215546Sopenharmony_ci if (shader->info.stage != MESA_SHADER_VERTEX) 2211bf215546Sopenharmony_ci return false; 2212bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL); 2213bf215546Sopenharmony_ci} 2214bf215546Sopenharmony_ci 2215bf215546Sopenharmony_ci/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access 2216bf215546Sopenharmony_ci * so instead we delete all those broken variables and just make new ones 2217bf215546Sopenharmony_ci */ 2218bf215546Sopenharmony_cistatic bool 2219bf215546Sopenharmony_ciunbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size) 2220bf215546Sopenharmony_ci{ 2221bf215546Sopenharmony_ci uint64_t max_ssbo_size = 0; 2222bf215546Sopenharmony_ci uint64_t max_ubo_size = 0; 2223bf215546Sopenharmony_ci uint64_t max_uniform_size = 0; 2224bf215546Sopenharmony_ci 2225bf215546Sopenharmony_ci if (!shader->info.num_ssbos && !shader->info.num_ubos) 2226bf215546Sopenharmony_ci return false; 2227bf215546Sopenharmony_ci 2228bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { 2229bf215546Sopenharmony_ci const struct glsl_type *type = glsl_without_array(var->type); 2230bf215546Sopenharmony_ci if (type_is_counter(type)) 2231bf215546Sopenharmony_ci continue; 2232bf215546Sopenharmony_ci /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */ 2233bf215546Sopenharmony_ci unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false); 2234bf215546Sopenharmony_ci const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL; 2235bf215546Sopenharmony_ci if (interface_type) { 2236bf215546Sopenharmony_ci unsigned block_size = glsl_get_explicit_size(interface_type, true); 2237bf215546Sopenharmony_ci block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4); 2238bf215546Sopenharmony_ci size = MAX2(size, block_size); 2239bf215546Sopenharmony_ci } 2240bf215546Sopenharmony_ci if (var->data.mode == nir_var_mem_ubo) { 2241bf215546Sopenharmony_ci if (var->data.driver_location) 2242bf215546Sopenharmony_ci max_ubo_size = MAX2(max_ubo_size, size); 2243bf215546Sopenharmony_ci else 2244bf215546Sopenharmony_ci max_uniform_size = MAX2(max_uniform_size, size); 2245bf215546Sopenharmony_ci } else { 2246bf215546Sopenharmony_ci max_ssbo_size = MAX2(max_ssbo_size, size); 2247bf215546Sopenharmony_ci if (interface_type) { 2248bf215546Sopenharmony_ci if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1))) 2249bf215546Sopenharmony_ci needs_size = true; 2250bf215546Sopenharmony_ci } 2251bf215546Sopenharmony_ci } 2252bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 2253bf215546Sopenharmony_ci } 2254bf215546Sopenharmony_ci nir_fixup_deref_modes(shader); 2255bf215546Sopenharmony_ci NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); 2256bf215546Sopenharmony_ci optimize_nir(shader, NULL); 2257bf215546Sopenharmony_ci 2258bf215546Sopenharmony_ci struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); 2259bf215546Sopenharmony_ci fields[0].name = ralloc_strdup(shader, "base"); 2260bf215546Sopenharmony_ci fields[1].name = ralloc_strdup(shader, "unsized"); 2261bf215546Sopenharmony_ci if (shader->info.num_ubos) { 2262bf215546Sopenharmony_ci if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) { 2263bf215546Sopenharmony_ci fields[0].type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4); 2264bf215546Sopenharmony_ci nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, 2265bf215546Sopenharmony_ci glsl_array_type(glsl_interface_type(fields, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0), 2266bf215546Sopenharmony_ci "uniform_0"); 2267bf215546Sopenharmony_ci var->interface_type = var->type; 2268bf215546Sopenharmony_ci var->data.mode = nir_var_mem_ubo; 2269bf215546Sopenharmony_ci var->data.driver_location = 0; 2270bf215546Sopenharmony_ci } 2271bf215546Sopenharmony_ci 2272bf215546Sopenharmony_ci unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo; 2273bf215546Sopenharmony_ci uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0); 2274bf215546Sopenharmony_ci if (num_ubos && ubos_used) { 2275bf215546Sopenharmony_ci fields[0].type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); 2276bf215546Sopenharmony_ci /* shrink array as much as possible */ 2277bf215546Sopenharmony_ci unsigned first_ubo = ffs(ubos_used) - 2; 2278bf215546Sopenharmony_ci assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS); 2279bf215546Sopenharmony_ci num_ubos -= first_ubo; 2280bf215546Sopenharmony_ci assert(num_ubos); 2281bf215546Sopenharmony_ci nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, 2282bf215546Sopenharmony_ci glsl_array_type(glsl_struct_type(fields, 1, "struct", false), num_ubos, 0), 2283bf215546Sopenharmony_ci "ubos"); 2284bf215546Sopenharmony_ci var->interface_type = var->type; 2285bf215546Sopenharmony_ci var->data.mode = nir_var_mem_ubo; 2286bf215546Sopenharmony_ci var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo; 2287bf215546Sopenharmony_ci } 2288bf215546Sopenharmony_ci } 2289bf215546Sopenharmony_ci if (shader->info.num_ssbos && zs->ssbos_used) { 2290bf215546Sopenharmony_ci /* shrink array as much as possible */ 2291bf215546Sopenharmony_ci unsigned first_ssbo = ffs(zs->ssbos_used) - 1; 2292bf215546Sopenharmony_ci assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS); 2293bf215546Sopenharmony_ci unsigned num_ssbos = shader->info.num_ssbos - first_ssbo; 2294bf215546Sopenharmony_ci assert(num_ssbos); 2295bf215546Sopenharmony_ci const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4); 2296bf215546Sopenharmony_ci const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4); 2297bf215546Sopenharmony_ci fields[0].type = ssbo_type; 2298bf215546Sopenharmony_ci fields[1].type = max_ssbo_size ? unsized : NULL; 2299bf215546Sopenharmony_ci unsigned field_count = max_ssbo_size && needs_size ? 2 : 1; 2300bf215546Sopenharmony_ci nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, 2301bf215546Sopenharmony_ci glsl_array_type(glsl_struct_type(fields, field_count, "struct", false), num_ssbos, 0), 2302bf215546Sopenharmony_ci "ssbos"); 2303bf215546Sopenharmony_ci var->interface_type = var->type; 2304bf215546Sopenharmony_ci var->data.mode = nir_var_mem_ssbo; 2305bf215546Sopenharmony_ci var->data.driver_location = first_ssbo; 2306bf215546Sopenharmony_ci } 2307bf215546Sopenharmony_ci return true; 2308bf215546Sopenharmony_ci} 2309bf215546Sopenharmony_ci 2310bf215546Sopenharmony_cistatic uint32_t 2311bf215546Sopenharmony_ciget_src_mask_ssbo(unsigned total, nir_src src) 2312bf215546Sopenharmony_ci{ 2313bf215546Sopenharmony_ci if (nir_src_is_const(src)) 2314bf215546Sopenharmony_ci return BITFIELD_BIT(nir_src_as_uint(src)); 2315bf215546Sopenharmony_ci return BITFIELD_MASK(total); 2316bf215546Sopenharmony_ci} 2317bf215546Sopenharmony_ci 2318bf215546Sopenharmony_cistatic uint32_t 2319bf215546Sopenharmony_ciget_src_mask_ubo(unsigned total, nir_src src) 2320bf215546Sopenharmony_ci{ 2321bf215546Sopenharmony_ci if (nir_src_is_const(src)) 2322bf215546Sopenharmony_ci return BITFIELD_BIT(nir_src_as_uint(src)); 2323bf215546Sopenharmony_ci return BITFIELD_MASK(total) & ~BITFIELD_BIT(0); 2324bf215546Sopenharmony_ci} 2325bf215546Sopenharmony_ci 2326bf215546Sopenharmony_cistatic bool 2327bf215546Sopenharmony_cianalyze_io(struct zink_shader *zs, nir_shader *shader) 2328bf215546Sopenharmony_ci{ 2329bf215546Sopenharmony_ci bool ret = false; 2330bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 2331bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 2332bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 2333bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2334bf215546Sopenharmony_ci continue; 2335bf215546Sopenharmony_ci 2336bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2337bf215546Sopenharmony_ci switch (intrin->intrinsic) { 2338bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 2339bf215546Sopenharmony_ci zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]); 2340bf215546Sopenharmony_ci break; 2341bf215546Sopenharmony_ci 2342bf215546Sopenharmony_ci case nir_intrinsic_get_ssbo_size: { 2343bf215546Sopenharmony_ci zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]); 2344bf215546Sopenharmony_ci ret = true; 2345bf215546Sopenharmony_ci break; 2346bf215546Sopenharmony_ci } 2347bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fadd: 2348bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 2349bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 2350bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 2351bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 2352bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 2353bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 2354bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 2355bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 2356bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 2357bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: 2358bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmin: 2359bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmax: 2360bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fcomp_swap: 2361bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 2362bf215546Sopenharmony_ci zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]); 2363bf215546Sopenharmony_ci break; 2364bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 2365bf215546Sopenharmony_ci case nir_intrinsic_load_ubo_vec4: 2366bf215546Sopenharmony_ci zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]); 2367bf215546Sopenharmony_ci break; 2368bf215546Sopenharmony_ci default: 2369bf215546Sopenharmony_ci break; 2370bf215546Sopenharmony_ci } 2371bf215546Sopenharmony_ci } 2372bf215546Sopenharmony_ci } 2373bf215546Sopenharmony_ci return ret; 2374bf215546Sopenharmony_ci} 2375bf215546Sopenharmony_ci 2376bf215546Sopenharmony_cistruct zink_bindless_info { 2377bf215546Sopenharmony_ci nir_variable *bindless[4]; 2378bf215546Sopenharmony_ci unsigned bindless_set; 2379bf215546Sopenharmony_ci}; 2380bf215546Sopenharmony_ci 2381bf215546Sopenharmony_ci/* this is a "default" bindless texture used if the shader has no texture variables */ 2382bf215546Sopenharmony_cistatic nir_variable * 2383bf215546Sopenharmony_cicreate_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set) 2384bf215546Sopenharmony_ci{ 2385bf215546Sopenharmony_ci unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0; 2386bf215546Sopenharmony_ci nir_variable *var; 2387bf215546Sopenharmony_ci 2388bf215546Sopenharmony_ci const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT); 2389bf215546Sopenharmony_ci var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture"); 2390bf215546Sopenharmony_ci var->data.descriptor_set = descriptor_set; 2391bf215546Sopenharmony_ci var->data.driver_location = var->data.binding = binding; 2392bf215546Sopenharmony_ci return var; 2393bf215546Sopenharmony_ci} 2394bf215546Sopenharmony_ci 2395bf215546Sopenharmony_ci/* this is a "default" bindless image used if the shader has no image variables */ 2396bf215546Sopenharmony_cistatic nir_variable * 2397bf215546Sopenharmony_cicreate_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set) 2398bf215546Sopenharmony_ci{ 2399bf215546Sopenharmony_ci unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2; 2400bf215546Sopenharmony_ci nir_variable *var; 2401bf215546Sopenharmony_ci 2402bf215546Sopenharmony_ci const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); 2403bf215546Sopenharmony_ci var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image"); 2404bf215546Sopenharmony_ci var->data.descriptor_set = descriptor_set; 2405bf215546Sopenharmony_ci var->data.driver_location = var->data.binding = binding; 2406bf215546Sopenharmony_ci var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM; 2407bf215546Sopenharmony_ci return var; 2408bf215546Sopenharmony_ci} 2409bf215546Sopenharmony_ci 2410bf215546Sopenharmony_ci/* rewrite bindless instructions as array deref instructions */ 2411bf215546Sopenharmony_cistatic bool 2412bf215546Sopenharmony_cilower_bindless_instr(nir_builder *b, nir_instr *in, void *data) 2413bf215546Sopenharmony_ci{ 2414bf215546Sopenharmony_ci struct zink_bindless_info *bindless = data; 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_ci if (in->type == nir_instr_type_tex) { 2417bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(in); 2418bf215546Sopenharmony_ci int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); 2419bf215546Sopenharmony_ci if (idx == -1) 2420bf215546Sopenharmony_ci return false; 2421bf215546Sopenharmony_ci 2422bf215546Sopenharmony_ci nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0]; 2423bf215546Sopenharmony_ci if (!var) 2424bf215546Sopenharmony_ci var = create_bindless_texture(b->shader, tex, bindless->bindless_set); 2425bf215546Sopenharmony_ci b->cursor = nir_before_instr(in); 2426bf215546Sopenharmony_ci nir_deref_instr *deref = nir_build_deref_var(b, var); 2427bf215546Sopenharmony_ci if (glsl_type_is_array(var->type)) 2428bf215546Sopenharmony_ci deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32)); 2429bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa); 2430bf215546Sopenharmony_ci 2431bf215546Sopenharmony_ci /* bindless sampling uses the variable type directly, which means the tex instr has to exactly 2432bf215546Sopenharmony_ci * match up with it in contrast to normal sampler ops where things are a bit more flexible; 2433bf215546Sopenharmony_ci * this results in cases where a shader is passed with sampler2DArray but the tex instr only has 2434bf215546Sopenharmony_ci * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors 2435bf215546Sopenharmony_ci * 2436bf215546Sopenharmony_ci * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing 2437bf215546Sopenharmony_ci * - Warhammer 40k: Dawn of War III 2438bf215546Sopenharmony_ci */ 2439bf215546Sopenharmony_ci unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type)); 2440bf215546Sopenharmony_ci unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord); 2441bf215546Sopenharmony_ci unsigned coord_components = nir_src_num_components(tex->src[c].src); 2442bf215546Sopenharmony_ci if (coord_components < needed_components) { 2443bf215546Sopenharmony_ci nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components); 2444bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def); 2445bf215546Sopenharmony_ci tex->coord_components = needed_components; 2446bf215546Sopenharmony_ci } 2447bf215546Sopenharmony_ci return true; 2448bf215546Sopenharmony_ci } 2449bf215546Sopenharmony_ci if (in->type != nir_instr_type_intrinsic) 2450bf215546Sopenharmony_ci return false; 2451bf215546Sopenharmony_ci nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 2452bf215546Sopenharmony_ci 2453bf215546Sopenharmony_ci nir_intrinsic_op op; 2454bf215546Sopenharmony_ci#define OP_SWAP(OP) \ 2455bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_##OP: \ 2456bf215546Sopenharmony_ci op = nir_intrinsic_image_deref_##OP; \ 2457bf215546Sopenharmony_ci break; 2458bf215546Sopenharmony_ci 2459bf215546Sopenharmony_ci 2460bf215546Sopenharmony_ci /* convert bindless intrinsics to deref intrinsics */ 2461bf215546Sopenharmony_ci switch (instr->intrinsic) { 2462bf215546Sopenharmony_ci OP_SWAP(atomic_add) 2463bf215546Sopenharmony_ci OP_SWAP(atomic_and) 2464bf215546Sopenharmony_ci OP_SWAP(atomic_comp_swap) 2465bf215546Sopenharmony_ci OP_SWAP(atomic_dec_wrap) 2466bf215546Sopenharmony_ci OP_SWAP(atomic_exchange) 2467bf215546Sopenharmony_ci OP_SWAP(atomic_fadd) 2468bf215546Sopenharmony_ci OP_SWAP(atomic_fmax) 2469bf215546Sopenharmony_ci OP_SWAP(atomic_fmin) 2470bf215546Sopenharmony_ci OP_SWAP(atomic_imax) 2471bf215546Sopenharmony_ci OP_SWAP(atomic_imin) 2472bf215546Sopenharmony_ci OP_SWAP(atomic_inc_wrap) 2473bf215546Sopenharmony_ci OP_SWAP(atomic_or) 2474bf215546Sopenharmony_ci OP_SWAP(atomic_umax) 2475bf215546Sopenharmony_ci OP_SWAP(atomic_umin) 2476bf215546Sopenharmony_ci OP_SWAP(atomic_xor) 2477bf215546Sopenharmony_ci OP_SWAP(format) 2478bf215546Sopenharmony_ci OP_SWAP(load) 2479bf215546Sopenharmony_ci OP_SWAP(order) 2480bf215546Sopenharmony_ci OP_SWAP(samples) 2481bf215546Sopenharmony_ci OP_SWAP(size) 2482bf215546Sopenharmony_ci OP_SWAP(store) 2483bf215546Sopenharmony_ci default: 2484bf215546Sopenharmony_ci return false; 2485bf215546Sopenharmony_ci } 2486bf215546Sopenharmony_ci 2487bf215546Sopenharmony_ci enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); 2488bf215546Sopenharmony_ci nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2]; 2489bf215546Sopenharmony_ci if (!var) 2490bf215546Sopenharmony_ci var = create_bindless_image(b->shader, dim, bindless->bindless_set); 2491bf215546Sopenharmony_ci instr->intrinsic = op; 2492bf215546Sopenharmony_ci b->cursor = nir_before_instr(in); 2493bf215546Sopenharmony_ci nir_deref_instr *deref = nir_build_deref_var(b, var); 2494bf215546Sopenharmony_ci if (glsl_type_is_array(var->type)) 2495bf215546Sopenharmony_ci deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32)); 2496bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa); 2497bf215546Sopenharmony_ci return true; 2498bf215546Sopenharmony_ci} 2499bf215546Sopenharmony_ci 2500bf215546Sopenharmony_cistatic bool 2501bf215546Sopenharmony_cilower_bindless(nir_shader *shader, struct zink_bindless_info *bindless) 2502bf215546Sopenharmony_ci{ 2503bf215546Sopenharmony_ci if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless)) 2504bf215546Sopenharmony_ci return false; 2505bf215546Sopenharmony_ci nir_fixup_deref_modes(shader); 2506bf215546Sopenharmony_ci NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); 2507bf215546Sopenharmony_ci optimize_nir(shader, NULL); 2508bf215546Sopenharmony_ci return true; 2509bf215546Sopenharmony_ci} 2510bf215546Sopenharmony_ci 2511bf215546Sopenharmony_ci/* convert shader image/texture io variables to int64 handles for bindless indexing */ 2512bf215546Sopenharmony_cistatic bool 2513bf215546Sopenharmony_cilower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data) 2514bf215546Sopenharmony_ci{ 2515bf215546Sopenharmony_ci if (in->type != nir_instr_type_intrinsic) 2516bf215546Sopenharmony_ci return false; 2517bf215546Sopenharmony_ci nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 2518bf215546Sopenharmony_ci if (instr->intrinsic != nir_intrinsic_load_deref && 2519bf215546Sopenharmony_ci instr->intrinsic != nir_intrinsic_store_deref) 2520bf215546Sopenharmony_ci return false; 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]); 2523bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(src_deref); 2524bf215546Sopenharmony_ci if (var->data.bindless) 2525bf215546Sopenharmony_ci return false; 2526bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out) 2527bf215546Sopenharmony_ci return false; 2528bf215546Sopenharmony_ci if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type)) 2529bf215546Sopenharmony_ci return false; 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_ci var->type = glsl_int64_t_type(); 2532bf215546Sopenharmony_ci var->data.bindless = 1; 2533bf215546Sopenharmony_ci b->cursor = nir_before_instr(in); 2534bf215546Sopenharmony_ci nir_deref_instr *deref = nir_build_deref_var(b, var); 2535bf215546Sopenharmony_ci if (instr->intrinsic == nir_intrinsic_load_deref) { 2536bf215546Sopenharmony_ci nir_ssa_def *def = nir_load_deref(b, deref); 2537bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(in, &instr->src[0], def); 2538bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&instr->dest.ssa, def); 2539bf215546Sopenharmony_ci } else { 2540bf215546Sopenharmony_ci nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr)); 2541bf215546Sopenharmony_ci } 2542bf215546Sopenharmony_ci nir_instr_remove(in); 2543bf215546Sopenharmony_ci nir_instr_remove(&src_deref->instr); 2544bf215546Sopenharmony_ci return true; 2545bf215546Sopenharmony_ci} 2546bf215546Sopenharmony_ci 2547bf215546Sopenharmony_cistatic bool 2548bf215546Sopenharmony_cilower_bindless_io(nir_shader *shader) 2549bf215546Sopenharmony_ci{ 2550bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL); 2551bf215546Sopenharmony_ci} 2552bf215546Sopenharmony_ci 2553bf215546Sopenharmony_cistatic uint32_t 2554bf215546Sopenharmony_cizink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors) 2555bf215546Sopenharmony_ci{ 2556bf215546Sopenharmony_ci if (stage == MESA_SHADER_NONE) { 2557bf215546Sopenharmony_ci unreachable("not supported"); 2558bf215546Sopenharmony_ci } else { 2559bf215546Sopenharmony_ci switch (type) { 2560bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 2561bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 2562bf215546Sopenharmony_ci return stage * 2 + !!index; 2563bf215546Sopenharmony_ci 2564bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 2565bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 2566bf215546Sopenharmony_ci assert(index < PIPE_MAX_SAMPLERS); 2567bf215546Sopenharmony_ci return (stage * PIPE_MAX_SAMPLERS) + index; 2568bf215546Sopenharmony_ci 2569bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 2570bf215546Sopenharmony_ci return stage + (compact_descriptors * (ZINK_SHADER_COUNT * 2)); 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 2573bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 2574bf215546Sopenharmony_ci assert(index < ZINK_MAX_SHADER_IMAGES); 2575bf215546Sopenharmony_ci return (stage * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_SHADER_COUNT * PIPE_MAX_SAMPLERS)); 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_ci default: 2578bf215546Sopenharmony_ci unreachable("unexpected type"); 2579bf215546Sopenharmony_ci } 2580bf215546Sopenharmony_ci } 2581bf215546Sopenharmony_ci} 2582bf215546Sopenharmony_ci 2583bf215546Sopenharmony_cistatic void 2584bf215546Sopenharmony_cihandle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless) 2585bf215546Sopenharmony_ci{ 2586bf215546Sopenharmony_ci if (glsl_type_is_struct(type)) { 2587bf215546Sopenharmony_ci for (unsigned i = 0; i < glsl_get_length(type); i++) 2588bf215546Sopenharmony_ci handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless); 2589bf215546Sopenharmony_ci return; 2590bf215546Sopenharmony_ci } 2591bf215546Sopenharmony_ci 2592bf215546Sopenharmony_ci /* just a random scalar in a struct */ 2593bf215546Sopenharmony_ci if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type)) 2594bf215546Sopenharmony_ci return; 2595bf215546Sopenharmony_ci 2596bf215546Sopenharmony_ci VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); 2597bf215546Sopenharmony_ci unsigned binding; 2598bf215546Sopenharmony_ci switch (vktype) { 2599bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 2600bf215546Sopenharmony_ci binding = 0; 2601bf215546Sopenharmony_ci break; 2602bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 2603bf215546Sopenharmony_ci binding = 1; 2604bf215546Sopenharmony_ci break; 2605bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 2606bf215546Sopenharmony_ci binding = 2; 2607bf215546Sopenharmony_ci break; 2608bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 2609bf215546Sopenharmony_ci binding = 3; 2610bf215546Sopenharmony_ci break; 2611bf215546Sopenharmony_ci default: 2612bf215546Sopenharmony_ci unreachable("unknown"); 2613bf215546Sopenharmony_ci } 2614bf215546Sopenharmony_ci if (!bindless->bindless[binding]) { 2615bf215546Sopenharmony_ci bindless->bindless[binding] = nir_variable_clone(var, nir); 2616bf215546Sopenharmony_ci bindless->bindless[binding]->data.bindless = 0; 2617bf215546Sopenharmony_ci bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set; 2618bf215546Sopenharmony_ci bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0); 2619bf215546Sopenharmony_ci bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding; 2620bf215546Sopenharmony_ci if (!bindless->bindless[binding]->data.image.format) 2621bf215546Sopenharmony_ci bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM; 2622bf215546Sopenharmony_ci nir_shader_add_variable(nir, bindless->bindless[binding]); 2623bf215546Sopenharmony_ci } else { 2624bf215546Sopenharmony_ci assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type))); 2625bf215546Sopenharmony_ci } 2626bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 2627bf215546Sopenharmony_ci} 2628bf215546Sopenharmony_ci 2629bf215546Sopenharmony_cistatic enum pipe_prim_type 2630bf215546Sopenharmony_ciprim_to_pipe(enum shader_prim primitive_type) 2631bf215546Sopenharmony_ci{ 2632bf215546Sopenharmony_ci switch (primitive_type) { 2633bf215546Sopenharmony_ci case SHADER_PRIM_POINTS: 2634bf215546Sopenharmony_ci return PIPE_PRIM_POINTS; 2635bf215546Sopenharmony_ci case SHADER_PRIM_LINES: 2636bf215546Sopenharmony_ci case SHADER_PRIM_LINE_LOOP: 2637bf215546Sopenharmony_ci case SHADER_PRIM_LINE_STRIP: 2638bf215546Sopenharmony_ci case SHADER_PRIM_LINES_ADJACENCY: 2639bf215546Sopenharmony_ci case SHADER_PRIM_LINE_STRIP_ADJACENCY: 2640bf215546Sopenharmony_ci return PIPE_PRIM_LINES; 2641bf215546Sopenharmony_ci default: 2642bf215546Sopenharmony_ci return PIPE_PRIM_TRIANGLES; 2643bf215546Sopenharmony_ci } 2644bf215546Sopenharmony_ci} 2645bf215546Sopenharmony_ci 2646bf215546Sopenharmony_cistatic enum pipe_prim_type 2647bf215546Sopenharmony_citess_prim_to_pipe(enum tess_primitive_mode prim_mode) 2648bf215546Sopenharmony_ci{ 2649bf215546Sopenharmony_ci switch (prim_mode) { 2650bf215546Sopenharmony_ci case TESS_PRIMITIVE_ISOLINES: 2651bf215546Sopenharmony_ci return PIPE_PRIM_LINES; 2652bf215546Sopenharmony_ci default: 2653bf215546Sopenharmony_ci return PIPE_PRIM_TRIANGLES; 2654bf215546Sopenharmony_ci } 2655bf215546Sopenharmony_ci} 2656bf215546Sopenharmony_ci 2657bf215546Sopenharmony_cistatic enum pipe_prim_type 2658bf215546Sopenharmony_ciget_shader_base_prim_type(struct nir_shader *nir) 2659bf215546Sopenharmony_ci{ 2660bf215546Sopenharmony_ci switch (nir->info.stage) { 2661bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 2662bf215546Sopenharmony_ci return prim_to_pipe(nir->info.gs.output_primitive); 2663bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 2664bf215546Sopenharmony_ci return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : tess_prim_to_pipe(nir->info.tess._primitive_mode); 2665bf215546Sopenharmony_ci default: 2666bf215546Sopenharmony_ci break; 2667bf215546Sopenharmony_ci } 2668bf215546Sopenharmony_ci return PIPE_PRIM_MAX; 2669bf215546Sopenharmony_ci} 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_cistatic bool 2672bf215546Sopenharmony_ciconvert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data) 2673bf215546Sopenharmony_ci{ 2674bf215546Sopenharmony_ci struct zink_screen *screen = data; 2675bf215546Sopenharmony_ci if (instr->type != nir_instr_type_tex) 2676bf215546Sopenharmony_ci return false; 2677bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(instr); 2678bf215546Sopenharmony_ci if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow) 2679bf215546Sopenharmony_ci return false; 2680bf215546Sopenharmony_ci if (tex->is_sparse && screen->need_2D_sparse) { 2681bf215546Sopenharmony_ci /* no known case of this exists: only nvidia can hit it, and nothing uses it */ 2682bf215546Sopenharmony_ci mesa_loge("unhandled/unsupported 1D sparse texture!"); 2683bf215546Sopenharmony_ci abort(); 2684bf215546Sopenharmony_ci } 2685bf215546Sopenharmony_ci tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 2686bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 2687bf215546Sopenharmony_ci tex->coord_components++; 2688bf215546Sopenharmony_ci unsigned srcs[] = { 2689bf215546Sopenharmony_ci nir_tex_src_coord, 2690bf215546Sopenharmony_ci nir_tex_src_offset, 2691bf215546Sopenharmony_ci nir_tex_src_ddx, 2692bf215546Sopenharmony_ci nir_tex_src_ddy, 2693bf215546Sopenharmony_ci }; 2694bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) { 2695bf215546Sopenharmony_ci unsigned c = nir_tex_instr_src_index(tex, srcs[i]); 2696bf215546Sopenharmony_ci if (c == -1) 2697bf215546Sopenharmony_ci continue; 2698bf215546Sopenharmony_ci if (tex->src[c].src.ssa->num_components == tex->coord_components) 2699bf215546Sopenharmony_ci continue; 2700bf215546Sopenharmony_ci nir_ssa_def *def; 2701bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size); 2702bf215546Sopenharmony_ci if (tex->src[c].src.ssa->num_components == 1) 2703bf215546Sopenharmony_ci def = nir_vec2(b, tex->src[c].src.ssa, zero); 2704bf215546Sopenharmony_ci else 2705bf215546Sopenharmony_ci def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1)); 2706bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def); 2707bf215546Sopenharmony_ci } 2708bf215546Sopenharmony_ci b->cursor = nir_after_instr(instr); 2709bf215546Sopenharmony_ci unsigned needed_components = nir_tex_instr_dest_size(tex); 2710bf215546Sopenharmony_ci unsigned num_components = tex->dest.ssa.num_components; 2711bf215546Sopenharmony_ci if (needed_components > num_components) { 2712bf215546Sopenharmony_ci tex->dest.ssa.num_components = needed_components; 2713bf215546Sopenharmony_ci assert(num_components < 3); 2714bf215546Sopenharmony_ci /* take either xz or just x since this is promoted to 2D from 1D */ 2715bf215546Sopenharmony_ci uint32_t mask = num_components == 2 ? (1|4) : 1; 2716bf215546Sopenharmony_ci nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask); 2717bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr); 2718bf215546Sopenharmony_ci } 2719bf215546Sopenharmony_ci return true; 2720bf215546Sopenharmony_ci} 2721bf215546Sopenharmony_ci 2722bf215546Sopenharmony_cistatic bool 2723bf215546Sopenharmony_cilower_1d_shadow(nir_shader *shader, struct zink_screen *screen) 2724bf215546Sopenharmony_ci{ 2725bf215546Sopenharmony_ci bool found = false; 2726bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) { 2727bf215546Sopenharmony_ci const struct glsl_type *type = glsl_without_array(var->type); 2728bf215546Sopenharmony_ci unsigned length = glsl_get_length(var->type); 2729bf215546Sopenharmony_ci if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D) 2730bf215546Sopenharmony_ci continue; 2731bf215546Sopenharmony_ci const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type)); 2732bf215546Sopenharmony_ci var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler; 2733bf215546Sopenharmony_ci 2734bf215546Sopenharmony_ci found = true; 2735bf215546Sopenharmony_ci } 2736bf215546Sopenharmony_ci if (found) 2737bf215546Sopenharmony_ci nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen); 2738bf215546Sopenharmony_ci return found; 2739bf215546Sopenharmony_ci} 2740bf215546Sopenharmony_ci 2741bf215546Sopenharmony_cistatic void 2742bf215546Sopenharmony_ciscan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs) 2743bf215546Sopenharmony_ci{ 2744bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 2745bf215546Sopenharmony_ci if (!function->impl) 2746bf215546Sopenharmony_ci continue; 2747bf215546Sopenharmony_ci nir_foreach_block_safe(block, function->impl) { 2748bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 2749bf215546Sopenharmony_ci if (instr->type == nir_instr_type_tex) { 2750bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(instr); 2751bf215546Sopenharmony_ci zs->sinfo.have_sparse |= tex->is_sparse; 2752bf215546Sopenharmony_ci } 2753bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2754bf215546Sopenharmony_ci continue; 2755bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2756bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_image_deref_load || 2757bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_sparse_load || 2758bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_store || 2759bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_add || 2760bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_imin || 2761bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_umin || 2762bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_imax || 2763bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_umax || 2764bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_and || 2765bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_or || 2766bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_xor || 2767bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange || 2768bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap || 2769bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd || 2770bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_size || 2771bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_samples || 2772bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_format || 2773bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_order) { 2774bf215546Sopenharmony_ci 2775bf215546Sopenharmony_ci nir_variable *var = 2776bf215546Sopenharmony_ci nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); 2777bf215546Sopenharmony_ci 2778bf215546Sopenharmony_ci /* Structs have been lowered already, so get_aoa_size is sufficient. */ 2779bf215546Sopenharmony_ci const unsigned size = 2780bf215546Sopenharmony_ci glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; 2781bf215546Sopenharmony_ci BITSET_SET_RANGE(shader->info.images_used, var->data.binding, 2782bf215546Sopenharmony_ci var->data.binding + (MAX2(size, 1) - 1)); 2783bf215546Sopenharmony_ci } 2784bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident || 2785bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_image_deref_sparse_load) 2786bf215546Sopenharmony_ci zs->sinfo.have_sparse = true; 2787bf215546Sopenharmony_ci 2788bf215546Sopenharmony_ci static bool warned = false; 2789bf215546Sopenharmony_ci if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) { 2790bf215546Sopenharmony_ci switch (intr->intrinsic) { 2791bf215546Sopenharmony_ci case nir_intrinsic_image_deref_atomic_add: { 2792bf215546Sopenharmony_ci nir_variable *var = nir_intrinsic_get_var(intr, 0); 2793bf215546Sopenharmony_ci if (util_format_is_float(var->data.image.format)) 2794bf215546Sopenharmony_ci fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n"); 2795bf215546Sopenharmony_ci break; 2796bf215546Sopenharmony_ci } 2797bf215546Sopenharmony_ci default: 2798bf215546Sopenharmony_ci break; 2799bf215546Sopenharmony_ci } 2800bf215546Sopenharmony_ci } 2801bf215546Sopenharmony_ci } 2802bf215546Sopenharmony_ci } 2803bf215546Sopenharmony_ci } 2804bf215546Sopenharmony_ci} 2805bf215546Sopenharmony_ci 2806bf215546Sopenharmony_cistatic bool 2807bf215546Sopenharmony_ciis_residency_code(nir_ssa_def *src) 2808bf215546Sopenharmony_ci{ 2809bf215546Sopenharmony_ci nir_instr *parent = src->parent_instr; 2810bf215546Sopenharmony_ci while (1) { 2811bf215546Sopenharmony_ci if (parent->type == nir_instr_type_intrinsic) { 2812bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); 2813bf215546Sopenharmony_ci assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident); 2814bf215546Sopenharmony_ci return false; 2815bf215546Sopenharmony_ci } 2816bf215546Sopenharmony_ci if (parent->type == nir_instr_type_tex) 2817bf215546Sopenharmony_ci return true; 2818bf215546Sopenharmony_ci assert(parent->type == nir_instr_type_alu); 2819bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(parent); 2820bf215546Sopenharmony_ci parent = alu->src[0].src.ssa->parent_instr; 2821bf215546Sopenharmony_ci } 2822bf215546Sopenharmony_ci} 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_cistatic bool 2825bf215546Sopenharmony_cilower_sparse_instr(nir_builder *b, nir_instr *in, void *data) 2826bf215546Sopenharmony_ci{ 2827bf215546Sopenharmony_ci if (in->type != nir_instr_type_intrinsic) 2828bf215546Sopenharmony_ci return false; 2829bf215546Sopenharmony_ci nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); 2830bf215546Sopenharmony_ci if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) { 2831bf215546Sopenharmony_ci b->cursor = nir_before_instr(&instr->instr); 2832bf215546Sopenharmony_ci nir_ssa_def *src0; 2833bf215546Sopenharmony_ci if (is_residency_code(instr->src[0].ssa)) 2834bf215546Sopenharmony_ci src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa); 2835bf215546Sopenharmony_ci else 2836bf215546Sopenharmony_ci src0 = instr->src[0].ssa; 2837bf215546Sopenharmony_ci nir_ssa_def *src1; 2838bf215546Sopenharmony_ci if (is_residency_code(instr->src[1].ssa)) 2839bf215546Sopenharmony_ci src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa); 2840bf215546Sopenharmony_ci else 2841bf215546Sopenharmony_ci src1 = instr->src[1].ssa; 2842bf215546Sopenharmony_ci nir_ssa_def *def = nir_iand(b, src0, src1); 2843bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in); 2844bf215546Sopenharmony_ci nir_instr_remove(in); 2845bf215546Sopenharmony_ci return true; 2846bf215546Sopenharmony_ci } 2847bf215546Sopenharmony_ci if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident) 2848bf215546Sopenharmony_ci return false; 2849bf215546Sopenharmony_ci 2850bf215546Sopenharmony_ci /* vulkan vec can only be a vec4, but this is (maybe) vec5, 2851bf215546Sopenharmony_ci * so just rewrite as the first component since ntv is going to use a different 2852bf215546Sopenharmony_ci * method for storing the residency value anyway 2853bf215546Sopenharmony_ci */ 2854bf215546Sopenharmony_ci b->cursor = nir_before_instr(&instr->instr); 2855bf215546Sopenharmony_ci nir_instr *parent = instr->src[0].ssa->parent_instr; 2856bf215546Sopenharmony_ci if (is_residency_code(instr->src[0].ssa)) { 2857bf215546Sopenharmony_ci assert(parent->type == nir_instr_type_alu); 2858bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(parent); 2859bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent); 2860bf215546Sopenharmony_ci nir_instr_remove(parent); 2861bf215546Sopenharmony_ci } else { 2862bf215546Sopenharmony_ci nir_ssa_def *src; 2863bf215546Sopenharmony_ci if (parent->type == nir_instr_type_intrinsic) { 2864bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); 2865bf215546Sopenharmony_ci assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident); 2866bf215546Sopenharmony_ci src = intr->src[0].ssa; 2867bf215546Sopenharmony_ci } else { 2868bf215546Sopenharmony_ci assert(parent->type == nir_instr_type_alu); 2869bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(parent); 2870bf215546Sopenharmony_ci src = alu->src[0].src.ssa; 2871bf215546Sopenharmony_ci } 2872bf215546Sopenharmony_ci if (instr->dest.ssa.bit_size != 32) { 2873bf215546Sopenharmony_ci if (instr->dest.ssa.bit_size == 1) 2874bf215546Sopenharmony_ci src = nir_ieq_imm(b, src, 1); 2875bf215546Sopenharmony_ci else 2876bf215546Sopenharmony_ci src = nir_u2uN(b, src, instr->dest.ssa.bit_size); 2877bf215546Sopenharmony_ci } 2878bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&instr->dest.ssa, src); 2879bf215546Sopenharmony_ci nir_instr_remove(in); 2880bf215546Sopenharmony_ci } 2881bf215546Sopenharmony_ci return true; 2882bf215546Sopenharmony_ci} 2883bf215546Sopenharmony_ci 2884bf215546Sopenharmony_cistatic bool 2885bf215546Sopenharmony_cilower_sparse(nir_shader *shader) 2886bf215546Sopenharmony_ci{ 2887bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL); 2888bf215546Sopenharmony_ci} 2889bf215546Sopenharmony_ci 2890bf215546Sopenharmony_cistatic bool 2891bf215546Sopenharmony_cimatch_tex_dests_instr(nir_builder *b, nir_instr *in, void *data) 2892bf215546Sopenharmony_ci{ 2893bf215546Sopenharmony_ci if (in->type != nir_instr_type_tex) 2894bf215546Sopenharmony_ci return false; 2895bf215546Sopenharmony_ci nir_tex_instr *tex = nir_instr_as_tex(in); 2896bf215546Sopenharmony_ci if (tex->op == nir_texop_txs || tex->op == nir_texop_lod) 2897bf215546Sopenharmony_ci return false; 2898bf215546Sopenharmony_ci int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); 2899bf215546Sopenharmony_ci nir_variable *var = NULL; 2900bf215546Sopenharmony_ci if (handle != -1) { 2901bf215546Sopenharmony_ci var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src)); 2902bf215546Sopenharmony_ci } else { 2903bf215546Sopenharmony_ci nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) { 2904bf215546Sopenharmony_ci if (glsl_type_is_sampler(glsl_without_array(img->type))) { 2905bf215546Sopenharmony_ci unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1; 2906bf215546Sopenharmony_ci if (tex->texture_index >= img->data.driver_location && 2907bf215546Sopenharmony_ci tex->texture_index < img->data.driver_location + size) { 2908bf215546Sopenharmony_ci var = img; 2909bf215546Sopenharmony_ci break; 2910bf215546Sopenharmony_ci } 2911bf215546Sopenharmony_ci } 2912bf215546Sopenharmony_ci } 2913bf215546Sopenharmony_ci } 2914bf215546Sopenharmony_ci assert(var); 2915bf215546Sopenharmony_ci const struct glsl_type *type = glsl_without_array(var->type); 2916bf215546Sopenharmony_ci enum glsl_base_type ret_type = glsl_get_sampler_result_type(type); 2917bf215546Sopenharmony_ci bool is_int = glsl_base_type_is_integer(ret_type); 2918bf215546Sopenharmony_ci unsigned bit_size = glsl_base_type_get_bit_size(ret_type); 2919bf215546Sopenharmony_ci unsigned dest_size = nir_dest_bit_size(tex->dest); 2920bf215546Sopenharmony_ci b->cursor = nir_after_instr(in); 2921bf215546Sopenharmony_ci unsigned num_components = nir_dest_num_components(tex->dest); 2922bf215546Sopenharmony_ci bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse; 2923bf215546Sopenharmony_ci if (bit_size == dest_size && !rewrite_depth) 2924bf215546Sopenharmony_ci return false; 2925bf215546Sopenharmony_ci nir_ssa_def *dest = &tex->dest.ssa; 2926bf215546Sopenharmony_ci if (bit_size != dest_size) { 2927bf215546Sopenharmony_ci tex->dest.ssa.bit_size = bit_size; 2928bf215546Sopenharmony_ci tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type); 2929bf215546Sopenharmony_ci if (rewrite_depth) { 2930bf215546Sopenharmony_ci assert(!tex->is_new_style_shadow); 2931bf215546Sopenharmony_ci tex->dest.ssa.num_components = 1; 2932bf215546Sopenharmony_ci tex->is_new_style_shadow = true; 2933bf215546Sopenharmony_ci } 2934bf215546Sopenharmony_ci 2935bf215546Sopenharmony_ci if (is_int) { 2936bf215546Sopenharmony_ci if (glsl_unsigned_base_type_of(ret_type) == ret_type) 2937bf215546Sopenharmony_ci dest = nir_u2uN(b, &tex->dest.ssa, dest_size); 2938bf215546Sopenharmony_ci else 2939bf215546Sopenharmony_ci dest = nir_i2iN(b, &tex->dest.ssa, dest_size); 2940bf215546Sopenharmony_ci } else { 2941bf215546Sopenharmony_ci dest = nir_f2fN(b, &tex->dest.ssa, dest_size); 2942bf215546Sopenharmony_ci } 2943bf215546Sopenharmony_ci if (rewrite_depth) { 2944bf215546Sopenharmony_ci nir_ssa_def *vec[4] = {dest, dest, dest, dest}; 2945bf215546Sopenharmony_ci dest = nir_vec(b, vec, num_components); 2946bf215546Sopenharmony_ci } 2947bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr); 2948bf215546Sopenharmony_ci } else if (rewrite_depth) { 2949bf215546Sopenharmony_ci assert(!tex->is_new_style_shadow); 2950bf215546Sopenharmony_ci tex->dest.ssa.num_components = 1; 2951bf215546Sopenharmony_ci tex->is_new_style_shadow = true; 2952bf215546Sopenharmony_ci nir_ssa_def *vec[4] = {dest, dest, dest, dest}; 2953bf215546Sopenharmony_ci nir_ssa_def *splat = nir_vec(b, vec, num_components); 2954bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr); 2955bf215546Sopenharmony_ci } 2956bf215546Sopenharmony_ci return true; 2957bf215546Sopenharmony_ci} 2958bf215546Sopenharmony_ci 2959bf215546Sopenharmony_cistatic bool 2960bf215546Sopenharmony_cimatch_tex_dests(nir_shader *shader) 2961bf215546Sopenharmony_ci{ 2962bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, NULL); 2963bf215546Sopenharmony_ci} 2964bf215546Sopenharmony_ci 2965bf215546Sopenharmony_cistatic bool 2966bf215546Sopenharmony_cisplit_bitfields_instr(nir_builder *b, nir_instr *in, void *data) 2967bf215546Sopenharmony_ci{ 2968bf215546Sopenharmony_ci if (in->type != nir_instr_type_alu) 2969bf215546Sopenharmony_ci return false; 2970bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(in); 2971bf215546Sopenharmony_ci switch (alu->op) { 2972bf215546Sopenharmony_ci case nir_op_ubitfield_extract: 2973bf215546Sopenharmony_ci case nir_op_ibitfield_extract: 2974bf215546Sopenharmony_ci case nir_op_bitfield_insert: 2975bf215546Sopenharmony_ci break; 2976bf215546Sopenharmony_ci default: 2977bf215546Sopenharmony_ci return false; 2978bf215546Sopenharmony_ci } 2979bf215546Sopenharmony_ci unsigned num_components = nir_dest_num_components(alu->dest.dest); 2980bf215546Sopenharmony_ci if (num_components == 1) 2981bf215546Sopenharmony_ci return false; 2982bf215546Sopenharmony_ci b->cursor = nir_before_instr(in); 2983bf215546Sopenharmony_ci nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS]; 2984bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 2985bf215546Sopenharmony_ci if (alu->op == nir_op_bitfield_insert) 2986bf215546Sopenharmony_ci dests[i] = nir_bitfield_insert(b, 2987bf215546Sopenharmony_ci nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]), 2988bf215546Sopenharmony_ci nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), 2989bf215546Sopenharmony_ci nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]), 2990bf215546Sopenharmony_ci nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i])); 2991bf215546Sopenharmony_ci else if (alu->op == nir_op_ubitfield_extract) 2992bf215546Sopenharmony_ci dests[i] = nir_ubitfield_extract(b, 2993bf215546Sopenharmony_ci nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]), 2994bf215546Sopenharmony_ci nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), 2995bf215546Sopenharmony_ci nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i])); 2996bf215546Sopenharmony_ci else 2997bf215546Sopenharmony_ci dests[i] = nir_ibitfield_extract(b, 2998bf215546Sopenharmony_ci nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]), 2999bf215546Sopenharmony_ci nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), 3000bf215546Sopenharmony_ci nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i])); 3001bf215546Sopenharmony_ci } 3002bf215546Sopenharmony_ci nir_ssa_def *dest = nir_vec(b, dests, num_components); 3003bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in); 3004bf215546Sopenharmony_ci nir_instr_remove(in); 3005bf215546Sopenharmony_ci return true; 3006bf215546Sopenharmony_ci} 3007bf215546Sopenharmony_ci 3008bf215546Sopenharmony_ci 3009bf215546Sopenharmony_cistatic bool 3010bf215546Sopenharmony_cisplit_bitfields(nir_shader *shader) 3011bf215546Sopenharmony_ci{ 3012bf215546Sopenharmony_ci return nir_shader_instructions_pass(shader, split_bitfields_instr, nir_metadata_dominance, NULL); 3013bf215546Sopenharmony_ci} 3014bf215546Sopenharmony_ci 3015bf215546Sopenharmony_cistruct zink_shader * 3016bf215546Sopenharmony_cizink_shader_create(struct zink_screen *screen, struct nir_shader *nir, 3017bf215546Sopenharmony_ci const struct pipe_stream_output_info *so_info) 3018bf215546Sopenharmony_ci{ 3019bf215546Sopenharmony_ci struct zink_shader *ret = CALLOC_STRUCT(zink_shader); 3020bf215546Sopenharmony_ci bool have_psiz = false; 3021bf215546Sopenharmony_ci 3022bf215546Sopenharmony_ci ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model; 3023bf215546Sopenharmony_ci 3024bf215546Sopenharmony_ci ret->hash = _mesa_hash_pointer(ret); 3025bf215546Sopenharmony_ci ret->reduced_prim = get_shader_base_prim_type(nir); 3026bf215546Sopenharmony_ci 3027bf215546Sopenharmony_ci ret->programs = _mesa_pointer_set_create(NULL); 3028bf215546Sopenharmony_ci simple_mtx_init(&ret->lock, mtx_plain); 3029bf215546Sopenharmony_ci 3030bf215546Sopenharmony_ci nir_variable_mode indirect_derefs_modes = nir_var_function_temp; 3031bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_TESS_CTRL || 3032bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_EVAL) 3033bf215546Sopenharmony_ci indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out; 3034bf215546Sopenharmony_ci 3035bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes, 3036bf215546Sopenharmony_ci UINT32_MAX); 3037bf215546Sopenharmony_ci 3038bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX) 3039bf215546Sopenharmony_ci create_vs_pushconst(nir); 3040bf215546Sopenharmony_ci else if (nir->info.stage == MESA_SHADER_TESS_CTRL || 3041bf215546Sopenharmony_ci nir->info.stage == MESA_SHADER_TESS_EVAL) 3042bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); 3043bf215546Sopenharmony_ci else if (nir->info.stage == MESA_SHADER_KERNEL) 3044bf215546Sopenharmony_ci create_cs_pushconst(nir); 3045bf215546Sopenharmony_ci 3046bf215546Sopenharmony_ci if (nir->info.stage < MESA_SHADER_FRAGMENT) 3047bf215546Sopenharmony_ci have_psiz = check_psiz(nir); 3048bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_basevertex); 3049bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_work_dim); 3050bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_regs_to_ssa); 3051bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_baseinstance); 3052bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_sparse); 3053bf215546Sopenharmony_ci NIR_PASS_V(nir, split_bitfields); 3054bf215546Sopenharmony_ci 3055bf215546Sopenharmony_ci if (screen->need_2D_zs) 3056bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_1d_shadow, screen); 3057bf215546Sopenharmony_ci 3058bf215546Sopenharmony_ci { 3059bf215546Sopenharmony_ci nir_lower_subgroups_options subgroup_options = {0}; 3060bf215546Sopenharmony_ci subgroup_options.lower_to_scalar = true; 3061bf215546Sopenharmony_ci subgroup_options.subgroup_size = screen->info.props11.subgroupSize; 3062bf215546Sopenharmony_ci subgroup_options.ballot_bit_size = 32; 3063bf215546Sopenharmony_ci subgroup_options.ballot_components = 4; 3064bf215546Sopenharmony_ci subgroup_options.lower_subgroup_masks = true; 3065bf215546Sopenharmony_ci if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(nir->info.stage))) { 3066bf215546Sopenharmony_ci subgroup_options.subgroup_size = 1; 3067bf215546Sopenharmony_ci subgroup_options.lower_vote_trivial = true; 3068bf215546Sopenharmony_ci } 3069bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options); 3070bf215546Sopenharmony_ci } 3071bf215546Sopenharmony_ci 3072bf215546Sopenharmony_ci if (so_info && so_info->num_outputs) 3073bf215546Sopenharmony_ci NIR_PASS_V(nir, split_blocks); 3074bf215546Sopenharmony_ci 3075bf215546Sopenharmony_ci optimize_nir(nir, NULL); 3076bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 3077bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_discard_if); 3078bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_fragcolor, 3079bf215546Sopenharmony_ci nir->info.fs.color_is_dual_source ? 1 : 8); 3080bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_64bit_vertex_attribs); 3081bf215546Sopenharmony_ci bool needs_size = analyze_io(ret, nir); 3082bf215546Sopenharmony_ci NIR_PASS_V(nir, unbreak_bos, ret, needs_size); 3083bf215546Sopenharmony_ci /* run in compile if there could be inlined uniforms */ 3084bf215546Sopenharmony_ci if (!screen->driconf.inline_uniforms) { 3085bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); 3086bf215546Sopenharmony_ci NIR_PASS_V(nir, rewrite_bo_access, screen); 3087bf215546Sopenharmony_ci NIR_PASS_V(nir, remove_bo_access, ret); 3088bf215546Sopenharmony_ci } 3089bf215546Sopenharmony_ci 3090bf215546Sopenharmony_ci if (zink_debug & ZINK_DEBUG_NIR) { 3091bf215546Sopenharmony_ci fprintf(stderr, "NIR shader:\n---8<---\n"); 3092bf215546Sopenharmony_ci nir_print_shader(nir, stderr); 3093bf215546Sopenharmony_ci fprintf(stderr, "---8<---\n"); 3094bf215546Sopenharmony_ci } 3095bf215546Sopenharmony_ci 3096bf215546Sopenharmony_ci struct zink_bindless_info bindless = {0}; 3097bf215546Sopenharmony_ci bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]; 3098bf215546Sopenharmony_ci bool has_bindless_io = false; 3099bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { 3100bf215546Sopenharmony_ci var->data.is_xfb = false; 3101bf215546Sopenharmony_ci if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) { 3102bf215546Sopenharmony_ci has_bindless_io = true; 3103bf215546Sopenharmony_ci break; 3104bf215546Sopenharmony_ci } 3105bf215546Sopenharmony_ci } 3106bf215546Sopenharmony_ci if (has_bindless_io) 3107bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_bindless_io); 3108bf215546Sopenharmony_ci 3109bf215546Sopenharmony_ci optimize_nir(nir, NULL); 3110bf215546Sopenharmony_ci prune_io(nir); 3111bf215546Sopenharmony_ci 3112bf215546Sopenharmony_ci scan_nir(screen, nir, ret); 3113bf215546Sopenharmony_ci 3114bf215546Sopenharmony_ci foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) { 3115bf215546Sopenharmony_ci if (_nir_shader_variable_has_mode(var, nir_var_uniform | 3116bf215546Sopenharmony_ci nir_var_image | 3117bf215546Sopenharmony_ci nir_var_mem_ubo | 3118bf215546Sopenharmony_ci nir_var_mem_ssbo)) { 3119bf215546Sopenharmony_ci enum zink_descriptor_type ztype; 3120bf215546Sopenharmony_ci const struct glsl_type *type = glsl_without_array(var->type); 3121bf215546Sopenharmony_ci if (var->data.mode == nir_var_mem_ubo) { 3122bf215546Sopenharmony_ci ztype = ZINK_DESCRIPTOR_TYPE_UBO; 3123bf215546Sopenharmony_ci /* buffer 0 is a push descriptor */ 3124bf215546Sopenharmony_ci var->data.descriptor_set = !!var->data.driver_location; 3125bf215546Sopenharmony_ci var->data.binding = !var->data.driver_location ? nir->info.stage : 3126bf215546Sopenharmony_ci zink_binding(nir->info.stage, 3127bf215546Sopenharmony_ci VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3128bf215546Sopenharmony_ci var->data.driver_location, 3129bf215546Sopenharmony_ci screen->compact_descriptors); 3130bf215546Sopenharmony_ci assert(var->data.driver_location || var->data.binding < 10); 3131bf215546Sopenharmony_ci VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 3132bf215546Sopenharmony_ci int binding = var->data.binding; 3133bf215546Sopenharmony_ci 3134bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; 3135bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; 3136bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; 3137bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); 3138bf215546Sopenharmony_ci assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); 3139bf215546Sopenharmony_ci ret->num_bindings[ztype]++; 3140bf215546Sopenharmony_ci } else if (var->data.mode == nir_var_mem_ssbo) { 3141bf215546Sopenharmony_ci ztype = ZINK_DESCRIPTOR_TYPE_SSBO; 3142bf215546Sopenharmony_ci var->data.descriptor_set = screen->desc_set_id[ztype]; 3143bf215546Sopenharmony_ci var->data.binding = zink_binding(nir->info.stage, 3144bf215546Sopenharmony_ci VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3145bf215546Sopenharmony_ci var->data.driver_location, 3146bf215546Sopenharmony_ci screen->compact_descriptors); 3147bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; 3148bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; 3149bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 3150bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); 3151bf215546Sopenharmony_ci assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); 3152bf215546Sopenharmony_ci ret->num_bindings[ztype]++; 3153bf215546Sopenharmony_ci } else { 3154bf215546Sopenharmony_ci assert(var->data.mode == nir_var_uniform || 3155bf215546Sopenharmony_ci var->data.mode == nir_var_image); 3156bf215546Sopenharmony_ci if (var->data.bindless) { 3157bf215546Sopenharmony_ci ret->bindless = true; 3158bf215546Sopenharmony_ci handle_bindless_var(nir, var, type, &bindless); 3159bf215546Sopenharmony_ci } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) { 3160bf215546Sopenharmony_ci VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); 3161bf215546Sopenharmony_ci ztype = zink_desc_type_from_vktype(vktype); 3162bf215546Sopenharmony_ci if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) 3163bf215546Sopenharmony_ci ret->num_texel_buffers++; 3164bf215546Sopenharmony_ci var->data.driver_location = var->data.binding; 3165bf215546Sopenharmony_ci var->data.descriptor_set = screen->desc_set_id[ztype]; 3166bf215546Sopenharmony_ci var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors); 3167bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; 3168bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; 3169bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; 3170bf215546Sopenharmony_ci if (glsl_type_is_array(var->type)) 3171bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type); 3172bf215546Sopenharmony_ci else 3173bf215546Sopenharmony_ci ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; 3174bf215546Sopenharmony_ci ret->num_bindings[ztype]++; 3175bf215546Sopenharmony_ci } 3176bf215546Sopenharmony_ci } 3177bf215546Sopenharmony_ci } 3178bf215546Sopenharmony_ci } 3179bf215546Sopenharmony_ci bool bindless_lowered = false; 3180bf215546Sopenharmony_ci NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless); 3181bf215546Sopenharmony_ci ret->bindless |= bindless_lowered; 3182bf215546Sopenharmony_ci 3183bf215546Sopenharmony_ci if (!screen->info.feats.features.shaderInt64) 3184bf215546Sopenharmony_ci NIR_PASS_V(nir, lower_64bit_vars); 3185bf215546Sopenharmony_ci NIR_PASS_V(nir, match_tex_dests); 3186bf215546Sopenharmony_ci 3187bf215546Sopenharmony_ci ret->nir = nir; 3188bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) 3189bf215546Sopenharmony_ci var->data.explicit_xfb_buffer = 0; 3190bf215546Sopenharmony_ci if (so_info && so_info->num_outputs) 3191bf215546Sopenharmony_ci update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); 3192bf215546Sopenharmony_ci else if (have_psiz) { 3193bf215546Sopenharmony_ci bool have_fake_psiz = false; 3194bf215546Sopenharmony_ci nir_variable *psiz = NULL; 3195bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 3196bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_PSIZ) { 3197bf215546Sopenharmony_ci if (!var->data.explicit_location) 3198bf215546Sopenharmony_ci have_fake_psiz = true; 3199bf215546Sopenharmony_ci else 3200bf215546Sopenharmony_ci psiz = var; 3201bf215546Sopenharmony_ci } 3202bf215546Sopenharmony_ci } 3203bf215546Sopenharmony_ci if (have_fake_psiz && psiz) { 3204bf215546Sopenharmony_ci psiz->data.mode = nir_var_shader_temp; 3205bf215546Sopenharmony_ci nir_fixup_deref_modes(nir); 3206bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); 3207bf215546Sopenharmony_ci } 3208bf215546Sopenharmony_ci } 3209bf215546Sopenharmony_ci 3210bf215546Sopenharmony_ci ret->can_inline = true; 3211bf215546Sopenharmony_ci 3212bf215546Sopenharmony_ci return ret; 3213bf215546Sopenharmony_ci} 3214bf215546Sopenharmony_ci 3215bf215546Sopenharmony_cichar * 3216bf215546Sopenharmony_cizink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) 3217bf215546Sopenharmony_ci{ 3218bf215546Sopenharmony_ci struct zink_screen *screen = zink_screen(pscreen); 3219bf215546Sopenharmony_ci nir_shader *nir = nirptr; 3220bf215546Sopenharmony_ci 3221bf215546Sopenharmony_ci nir_lower_tex_options tex_opts = { 3222bf215546Sopenharmony_ci .lower_invalid_implicit_lod = true, 3223bf215546Sopenharmony_ci }; 3224bf215546Sopenharmony_ci /* 3225bf215546Sopenharmony_ci Sampled Image must be an object whose type is OpTypeSampledImage. 3226bf215546Sopenharmony_ci The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D, 3227bf215546Sopenharmony_ci or Rect, and the Arrayed and MS operands must be 0. 3228bf215546Sopenharmony_ci - SPIRV, OpImageSampleProj* opcodes 3229bf215546Sopenharmony_ci */ 3230bf215546Sopenharmony_ci tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | 3231bf215546Sopenharmony_ci BITFIELD_BIT(GLSL_SAMPLER_DIM_MS); 3232bf215546Sopenharmony_ci tex_opts.lower_txp_array = true; 3233bf215546Sopenharmony_ci if (!screen->info.feats.features.shaderImageGatherExtended) 3234bf215546Sopenharmony_ci tex_opts.lower_tg4_offsets = true; 3235bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_tex, &tex_opts); 3236bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_GEOMETRY) 3237bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); 3238bf215546Sopenharmony_ci optimize_nir(nir, NULL); 3239bf215546Sopenharmony_ci nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 3240bf215546Sopenharmony_ci if (screen->driconf.inline_uniforms) 3241bf215546Sopenharmony_ci nir_find_inlinable_uniforms(nir); 3242bf215546Sopenharmony_ci 3243bf215546Sopenharmony_ci return NULL; 3244bf215546Sopenharmony_ci} 3245bf215546Sopenharmony_ci 3246bf215546Sopenharmony_civoid 3247bf215546Sopenharmony_cizink_shader_free(struct zink_context *ctx, struct zink_shader *shader) 3248bf215546Sopenharmony_ci{ 3249bf215546Sopenharmony_ci set_foreach(shader->programs, entry) { 3250bf215546Sopenharmony_ci if (shader->nir->info.stage == MESA_SHADER_COMPUTE) { 3251bf215546Sopenharmony_ci struct zink_compute_program *comp = (void*)entry->key; 3252bf215546Sopenharmony_ci if (!comp->base.removed) { 3253bf215546Sopenharmony_ci _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader); 3254bf215546Sopenharmony_ci comp->base.removed = true; 3255bf215546Sopenharmony_ci } 3256bf215546Sopenharmony_ci comp->shader = NULL; 3257bf215546Sopenharmony_ci zink_compute_program_reference(ctx, &comp, NULL); 3258bf215546Sopenharmony_ci } else { 3259bf215546Sopenharmony_ci struct zink_gfx_program *prog = (void*)entry->key; 3260bf215546Sopenharmony_ci enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage); 3261bf215546Sopenharmony_ci assert(pstage < ZINK_SHADER_COUNT); 3262bf215546Sopenharmony_ci if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) { 3263bf215546Sopenharmony_ci unsigned stages_present = prog->stages_present; 3264bf215546Sopenharmony_ci if (prog->shaders[PIPE_SHADER_TESS_CTRL] && prog->shaders[PIPE_SHADER_TESS_CTRL]->is_generated) 3265bf215546Sopenharmony_ci stages_present &= ~BITFIELD_BIT(PIPE_SHADER_TESS_CTRL); 3266bf215546Sopenharmony_ci struct hash_table *ht = &ctx->program_cache[stages_present >> 2]; 3267bf215546Sopenharmony_ci struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders); 3268bf215546Sopenharmony_ci assert(he); 3269bf215546Sopenharmony_ci _mesa_hash_table_remove(ht, he); 3270bf215546Sopenharmony_ci prog->base.removed = true; 3271bf215546Sopenharmony_ci } 3272bf215546Sopenharmony_ci if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated) 3273bf215546Sopenharmony_ci prog->shaders[pstage] = NULL; 3274bf215546Sopenharmony_ci /* only remove generated tcs during parent tes destruction */ 3275bf215546Sopenharmony_ci if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) 3276bf215546Sopenharmony_ci prog->shaders[PIPE_SHADER_TESS_CTRL] = NULL; 3277bf215546Sopenharmony_ci zink_gfx_program_reference(ctx, &prog, NULL); 3278bf215546Sopenharmony_ci } 3279bf215546Sopenharmony_ci } 3280bf215546Sopenharmony_ci if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) { 3281bf215546Sopenharmony_ci /* automatically destroy generated tcs shaders when tes is destroyed */ 3282bf215546Sopenharmony_ci zink_shader_free(ctx, shader->generated); 3283bf215546Sopenharmony_ci shader->generated = NULL; 3284bf215546Sopenharmony_ci } 3285bf215546Sopenharmony_ci _mesa_set_destroy(shader->programs, NULL); 3286bf215546Sopenharmony_ci ralloc_free(shader->nir); 3287bf215546Sopenharmony_ci ralloc_free(shader->spirv); 3288bf215546Sopenharmony_ci FREE(shader); 3289bf215546Sopenharmony_ci} 3290bf215546Sopenharmony_ci 3291bf215546Sopenharmony_ci 3292bf215546Sopenharmony_ciVkShaderModule 3293bf215546Sopenharmony_cizink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices) 3294bf215546Sopenharmony_ci{ 3295bf215546Sopenharmony_ci assert(zs->nir->info.stage == MESA_SHADER_TESS_CTRL); 3296bf215546Sopenharmony_ci /* shortcut all the nir passes since we just have to change this one word */ 3297bf215546Sopenharmony_ci zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices; 3298bf215546Sopenharmony_ci return zink_shader_spirv_compile(screen, zs, NULL); 3299bf215546Sopenharmony_ci} 3300bf215546Sopenharmony_ci 3301bf215546Sopenharmony_ci/* creating a passthrough tcs shader that's roughly: 3302bf215546Sopenharmony_ci 3303bf215546Sopenharmony_ci#version 150 3304bf215546Sopenharmony_ci#extension GL_ARB_tessellation_shader : require 3305bf215546Sopenharmony_ci 3306bf215546Sopenharmony_ciin vec4 some_var[gl_MaxPatchVertices]; 3307bf215546Sopenharmony_ciout vec4 some_var_out; 3308bf215546Sopenharmony_ci 3309bf215546Sopenharmony_cilayout(push_constant) uniform tcsPushConstants { 3310bf215546Sopenharmony_ci layout(offset = 0) float TessLevelInner[2]; 3311bf215546Sopenharmony_ci layout(offset = 8) float TessLevelOuter[4]; 3312bf215546Sopenharmony_ci} u_tcsPushConstants; 3313bf215546Sopenharmony_cilayout(vertices = $vertices_per_patch) out; 3314bf215546Sopenharmony_civoid main() 3315bf215546Sopenharmony_ci{ 3316bf215546Sopenharmony_ci gl_TessLevelInner = u_tcsPushConstants.TessLevelInner; 3317bf215546Sopenharmony_ci gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter; 3318bf215546Sopenharmony_ci some_var_out = some_var[gl_InvocationID]; 3319bf215546Sopenharmony_ci} 3320bf215546Sopenharmony_ci 3321bf215546Sopenharmony_ci*/ 3322bf215546Sopenharmony_cistruct zink_shader * 3323bf215546Sopenharmony_cizink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch) 3324bf215546Sopenharmony_ci{ 3325bf215546Sopenharmony_ci struct zink_shader *ret = CALLOC_STRUCT(zink_shader); 3326bf215546Sopenharmony_ci ret->hash = _mesa_hash_pointer(ret); 3327bf215546Sopenharmony_ci ret->programs = _mesa_pointer_set_create(NULL); 3328bf215546Sopenharmony_ci simple_mtx_init(&ret->lock, mtx_plain); 3329bf215546Sopenharmony_ci 3330bf215546Sopenharmony_ci nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL); 3331bf215546Sopenharmony_ci nir_function *fn = nir_function_create(nir, "main"); 3332bf215546Sopenharmony_ci fn->is_entrypoint = true; 3333bf215546Sopenharmony_ci nir_function_impl *impl = nir_function_impl_create(fn); 3334bf215546Sopenharmony_ci 3335bf215546Sopenharmony_ci nir_builder b; 3336bf215546Sopenharmony_ci nir_builder_init(&b, impl); 3337bf215546Sopenharmony_ci b.cursor = nir_before_block(nir_start_block(impl)); 3338bf215546Sopenharmony_ci 3339bf215546Sopenharmony_ci nir_ssa_def *invocation_id = nir_load_invocation_id(&b); 3340bf215546Sopenharmony_ci 3341bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, vs->nir) { 3342bf215546Sopenharmony_ci const struct glsl_type *type = var->type; 3343bf215546Sopenharmony_ci const struct glsl_type *in_type = var->type; 3344bf215546Sopenharmony_ci const struct glsl_type *out_type = var->type; 3345bf215546Sopenharmony_ci char buf[1024]; 3346bf215546Sopenharmony_ci snprintf(buf, sizeof(buf), "%s_out", var->name); 3347bf215546Sopenharmony_ci in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); 3348bf215546Sopenharmony_ci out_type = glsl_array_type(type, vertices_per_patch, 0); 3349bf215546Sopenharmony_ci 3350bf215546Sopenharmony_ci nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name); 3351bf215546Sopenharmony_ci nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf); 3352bf215546Sopenharmony_ci out->data.location = in->data.location = var->data.location; 3353bf215546Sopenharmony_ci out->data.location_frac = in->data.location_frac = var->data.location_frac; 3354bf215546Sopenharmony_ci 3355bf215546Sopenharmony_ci /* gl_in[] receives values from equivalent built-in output 3356bf215546Sopenharmony_ci variables written by the vertex shader (section 2.14.7). Each array 3357bf215546Sopenharmony_ci element of gl_in[] is a structure holding values for a specific vertex of 3358bf215546Sopenharmony_ci the input patch. The length of gl_in[] is equal to the 3359bf215546Sopenharmony_ci implementation-dependent maximum patch size (gl_MaxPatchVertices). 3360bf215546Sopenharmony_ci - ARB_tessellation_shader 3361bf215546Sopenharmony_ci */ 3362bf215546Sopenharmony_ci /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */ 3363bf215546Sopenharmony_ci nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id); 3364bf215546Sopenharmony_ci nir_ssa_def *load = nir_load_deref(&b, in_array_var); 3365bf215546Sopenharmony_ci nir_deref_instr *out_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id); 3366bf215546Sopenharmony_ci nir_store_deref(&b, out_array_var, load, 0xff); 3367bf215546Sopenharmony_ci } 3368bf215546Sopenharmony_ci nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner"); 3369bf215546Sopenharmony_ci gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER; 3370bf215546Sopenharmony_ci gl_TessLevelInner->data.patch = 1; 3371bf215546Sopenharmony_ci nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter"); 3372bf215546Sopenharmony_ci gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER; 3373bf215546Sopenharmony_ci gl_TessLevelOuter->data.patch = 1; 3374bf215546Sopenharmony_ci 3375bf215546Sopenharmony_ci /* hacks so we can size these right for now */ 3376bf215546Sopenharmony_ci struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3); 3377bf215546Sopenharmony_ci /* just use a single blob for padding here because it's easier */ 3378bf215546Sopenharmony_ci fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0); 3379bf215546Sopenharmony_ci fields[0].name = ralloc_asprintf(nir, "padding"); 3380bf215546Sopenharmony_ci fields[0].offset = 0; 3381bf215546Sopenharmony_ci fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0); 3382bf215546Sopenharmony_ci fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner"); 3383bf215546Sopenharmony_ci fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level); 3384bf215546Sopenharmony_ci fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0); 3385bf215546Sopenharmony_ci fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter"); 3386bf215546Sopenharmony_ci fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level); 3387bf215546Sopenharmony_ci nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const, 3388bf215546Sopenharmony_ci glsl_struct_type(fields, 3, "struct", false), "pushconst"); 3389bf215546Sopenharmony_ci pushconst->data.location = VARYING_SLOT_VAR0; 3390bf215546Sopenharmony_ci 3391bf215546Sopenharmony_ci nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8); 3392bf215546Sopenharmony_ci nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16); 3393bf215546Sopenharmony_ci 3394bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 3395bf215546Sopenharmony_ci nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i); 3396bf215546Sopenharmony_ci nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff); 3397bf215546Sopenharmony_ci } 3398bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 3399bf215546Sopenharmony_ci nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i); 3400bf215546Sopenharmony_ci nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff); 3401bf215546Sopenharmony_ci } 3402bf215546Sopenharmony_ci 3403bf215546Sopenharmony_ci nir->info.tess.tcs_vertices_out = vertices_per_patch; 3404bf215546Sopenharmony_ci nir_validate_shader(nir, "created"); 3405bf215546Sopenharmony_ci 3406bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_regs_to_ssa); 3407bf215546Sopenharmony_ci optimize_nir(nir, NULL); 3408bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); 3409bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_convert_from_ssa, true); 3410bf215546Sopenharmony_ci 3411bf215546Sopenharmony_ci ret->nir = nir; 3412bf215546Sopenharmony_ci ret->is_generated = true; 3413bf215546Sopenharmony_ci return ret; 3414bf215546Sopenharmony_ci} 3415