1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Connor Abbott (cwabbott0@gmail.com) 25bf215546Sopenharmony_ci * Jason Ekstrand (jason@jlekstrand.net) 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci */ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/* 30bf215546Sopenharmony_ci * This lowering pass converts references to input/output variables with 31bf215546Sopenharmony_ci * loads/stores to actual input/output intrinsics. 32bf215546Sopenharmony_ci */ 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "nir.h" 35bf215546Sopenharmony_ci#include "nir_builder.h" 36bf215546Sopenharmony_ci#include "nir_deref.h" 37bf215546Sopenharmony_ci#include "nir_xfb_info.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#include "util/u_math.h" 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistruct lower_io_state { 42bf215546Sopenharmony_ci void *dead_ctx; 43bf215546Sopenharmony_ci nir_builder builder; 44bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *type, bool); 45bf215546Sopenharmony_ci nir_variable_mode modes; 46bf215546Sopenharmony_ci nir_lower_io_options options; 47bf215546Sopenharmony_ci}; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_cistatic nir_intrinsic_op 50bf215546Sopenharmony_cissbo_atomic_for_deref(nir_intrinsic_op deref_op) 51bf215546Sopenharmony_ci{ 52bf215546Sopenharmony_ci switch (deref_op) { 53bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O; 54bf215546Sopenharmony_ci OP(atomic_exchange) 55bf215546Sopenharmony_ci OP(atomic_comp_swap) 56bf215546Sopenharmony_ci OP(atomic_add) 57bf215546Sopenharmony_ci OP(atomic_imin) 58bf215546Sopenharmony_ci OP(atomic_umin) 59bf215546Sopenharmony_ci OP(atomic_imax) 60bf215546Sopenharmony_ci OP(atomic_umax) 61bf215546Sopenharmony_ci OP(atomic_and) 62bf215546Sopenharmony_ci OP(atomic_or) 63bf215546Sopenharmony_ci OP(atomic_xor) 64bf215546Sopenharmony_ci OP(atomic_fadd) 65bf215546Sopenharmony_ci OP(atomic_fmin) 66bf215546Sopenharmony_ci OP(atomic_fmax) 67bf215546Sopenharmony_ci OP(atomic_fcomp_swap) 68bf215546Sopenharmony_ci#undef OP 69bf215546Sopenharmony_ci default: 70bf215546Sopenharmony_ci unreachable("Invalid SSBO atomic"); 71bf215546Sopenharmony_ci } 72bf215546Sopenharmony_ci} 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_cistatic nir_intrinsic_op 75bf215546Sopenharmony_ciglobal_atomic_for_deref(nir_address_format addr_format, 76bf215546Sopenharmony_ci nir_intrinsic_op deref_op) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci switch (deref_op) { 79bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: \ 80bf215546Sopenharmony_ci if (addr_format != nir_address_format_2x32bit_global) \ 81bf215546Sopenharmony_ci return nir_intrinsic_global_##O; \ 82bf215546Sopenharmony_ci else \ 83bf215546Sopenharmony_ci return nir_intrinsic_global_##O##_2x32; 84bf215546Sopenharmony_ci OP(atomic_exchange) 85bf215546Sopenharmony_ci OP(atomic_comp_swap) 86bf215546Sopenharmony_ci OP(atomic_add) 87bf215546Sopenharmony_ci OP(atomic_imin) 88bf215546Sopenharmony_ci OP(atomic_umin) 89bf215546Sopenharmony_ci OP(atomic_imax) 90bf215546Sopenharmony_ci OP(atomic_umax) 91bf215546Sopenharmony_ci OP(atomic_and) 92bf215546Sopenharmony_ci OP(atomic_or) 93bf215546Sopenharmony_ci OP(atomic_xor) 94bf215546Sopenharmony_ci OP(atomic_fadd) 95bf215546Sopenharmony_ci OP(atomic_fmin) 96bf215546Sopenharmony_ci OP(atomic_fmax) 97bf215546Sopenharmony_ci OP(atomic_fcomp_swap) 98bf215546Sopenharmony_ci#undef OP 99bf215546Sopenharmony_ci default: 100bf215546Sopenharmony_ci unreachable("Invalid SSBO atomic"); 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci} 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_cistatic nir_intrinsic_op 105bf215546Sopenharmony_cishared_atomic_for_deref(nir_intrinsic_op deref_op) 106bf215546Sopenharmony_ci{ 107bf215546Sopenharmony_ci switch (deref_op) { 108bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O; 109bf215546Sopenharmony_ci OP(atomic_exchange) 110bf215546Sopenharmony_ci OP(atomic_comp_swap) 111bf215546Sopenharmony_ci OP(atomic_add) 112bf215546Sopenharmony_ci OP(atomic_imin) 113bf215546Sopenharmony_ci OP(atomic_umin) 114bf215546Sopenharmony_ci OP(atomic_imax) 115bf215546Sopenharmony_ci OP(atomic_umax) 116bf215546Sopenharmony_ci OP(atomic_and) 117bf215546Sopenharmony_ci OP(atomic_or) 118bf215546Sopenharmony_ci OP(atomic_xor) 119bf215546Sopenharmony_ci OP(atomic_fadd) 120bf215546Sopenharmony_ci OP(atomic_fmin) 121bf215546Sopenharmony_ci OP(atomic_fmax) 122bf215546Sopenharmony_ci OP(atomic_fcomp_swap) 123bf215546Sopenharmony_ci#undef OP 124bf215546Sopenharmony_ci default: 125bf215546Sopenharmony_ci unreachable("Invalid shared atomic"); 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci} 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_cistatic nir_intrinsic_op 130bf215546Sopenharmony_citask_payload_atomic_for_deref(nir_intrinsic_op deref_op) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci switch (deref_op) { 133bf215546Sopenharmony_ci#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_task_payload_##O; 134bf215546Sopenharmony_ci OP(atomic_exchange) 135bf215546Sopenharmony_ci OP(atomic_comp_swap) 136bf215546Sopenharmony_ci OP(atomic_add) 137bf215546Sopenharmony_ci OP(atomic_imin) 138bf215546Sopenharmony_ci OP(atomic_umin) 139bf215546Sopenharmony_ci OP(atomic_imax) 140bf215546Sopenharmony_ci OP(atomic_umax) 141bf215546Sopenharmony_ci OP(atomic_and) 142bf215546Sopenharmony_ci OP(atomic_or) 143bf215546Sopenharmony_ci OP(atomic_xor) 144bf215546Sopenharmony_ci OP(atomic_fadd) 145bf215546Sopenharmony_ci OP(atomic_fmin) 146bf215546Sopenharmony_ci OP(atomic_fmax) 147bf215546Sopenharmony_ci OP(atomic_fcomp_swap) 148bf215546Sopenharmony_ci#undef OP 149bf215546Sopenharmony_ci default: 150bf215546Sopenharmony_ci unreachable("Invalid task payload atomic"); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci} 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_civoid 155bf215546Sopenharmony_cinir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 156bf215546Sopenharmony_ci unsigned *size, 157bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool)) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci unsigned location = 0; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, mode) { 162bf215546Sopenharmony_ci var->data.driver_location = location; 163bf215546Sopenharmony_ci bool bindless_type_size = var->data.mode == nir_var_shader_in || 164bf215546Sopenharmony_ci var->data.mode == nir_var_shader_out || 165bf215546Sopenharmony_ci var->data.bindless; 166bf215546Sopenharmony_ci location += type_size(var->type, bindless_type_size); 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci *size = location; 170bf215546Sopenharmony_ci} 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci/** 173bf215546Sopenharmony_ci * Some inputs and outputs are arrayed, meaning that there is an extra level 174bf215546Sopenharmony_ci * of array indexing to handle mismatches between the shader interface and the 175bf215546Sopenharmony_ci * dispatch pattern of the shader. For instance, geometry shaders are 176bf215546Sopenharmony_ci * executed per-primitive while their inputs and outputs are specified 177bf215546Sopenharmony_ci * per-vertex so all inputs and outputs have to be additionally indexed with 178bf215546Sopenharmony_ci * the vertex index within the primitive. 179bf215546Sopenharmony_ci */ 180bf215546Sopenharmony_cibool 181bf215546Sopenharmony_cinir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci if (var->data.patch || !glsl_type_is_array(var->type)) 184bf215546Sopenharmony_ci return false; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (stage == MESA_SHADER_MESH) { 187bf215546Sopenharmony_ci /* NV_mesh_shader: this is flat array for the whole workgroup. */ 188bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) 189bf215546Sopenharmony_ci return var->data.per_primitive; 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (var->data.mode == nir_var_shader_in) 193bf215546Sopenharmony_ci return stage == MESA_SHADER_GEOMETRY || 194bf215546Sopenharmony_ci stage == MESA_SHADER_TESS_CTRL || 195bf215546Sopenharmony_ci stage == MESA_SHADER_TESS_EVAL; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci if (var->data.mode == nir_var_shader_out) 198bf215546Sopenharmony_ci return stage == MESA_SHADER_TESS_CTRL || 199bf215546Sopenharmony_ci stage == MESA_SHADER_MESH; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci return false; 202bf215546Sopenharmony_ci} 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_cistatic unsigned get_number_of_slots(struct lower_io_state *state, 205bf215546Sopenharmony_ci const nir_variable *var) 206bf215546Sopenharmony_ci{ 207bf215546Sopenharmony_ci const struct glsl_type *type = var->type; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) { 210bf215546Sopenharmony_ci assert(glsl_type_is_array(type)); 211bf215546Sopenharmony_ci type = glsl_get_array_element(type); 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci /* NV_mesh_shader: 215bf215546Sopenharmony_ci * PRIMITIVE_INDICES is a flat array, not a proper arrayed output, 216bf215546Sopenharmony_ci * as opposed to D3D-style mesh shaders where it's addressed by 217bf215546Sopenharmony_ci * the primitive index. 218bf215546Sopenharmony_ci * Prevent assigning several slots to primitive indices, 219bf215546Sopenharmony_ci * to avoid some issues. 220bf215546Sopenharmony_ci */ 221bf215546Sopenharmony_ci if (state->builder.shader->info.stage == MESA_SHADER_MESH && 222bf215546Sopenharmony_ci var->data.location == VARYING_SLOT_PRIMITIVE_INDICES && 223bf215546Sopenharmony_ci !nir_is_arrayed_io(var, state->builder.shader->info.stage)) 224bf215546Sopenharmony_ci return 1; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci return state->type_size(type, var->data.bindless); 227bf215546Sopenharmony_ci} 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_cistatic nir_ssa_def * 230bf215546Sopenharmony_ciget_io_offset(nir_builder *b, nir_deref_instr *deref, 231bf215546Sopenharmony_ci nir_ssa_def **array_index, 232bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool), 233bf215546Sopenharmony_ci unsigned *component, bool bts) 234bf215546Sopenharmony_ci{ 235bf215546Sopenharmony_ci nir_deref_path path; 236bf215546Sopenharmony_ci nir_deref_path_init(&path, deref, NULL); 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci assert(path.path[0]->deref_type == nir_deref_type_var); 239bf215546Sopenharmony_ci nir_deref_instr **p = &path.path[1]; 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader 242bf215546Sopenharmony_ci * inputs), skip the outermost array index. Process the rest normally. 243bf215546Sopenharmony_ci */ 244bf215546Sopenharmony_ci if (array_index != NULL) { 245bf215546Sopenharmony_ci assert((*p)->deref_type == nir_deref_type_array); 246bf215546Sopenharmony_ci *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 247bf215546Sopenharmony_ci p++; 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (path.path[0]->var->data.compact) { 251bf215546Sopenharmony_ci assert((*p)->deref_type == nir_deref_type_array); 252bf215546Sopenharmony_ci assert(glsl_type_is_scalar((*p)->type)); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci /* We always lower indirect dereferences for "compact" array vars. */ 255bf215546Sopenharmony_ci const unsigned index = nir_src_as_uint((*p)->arr.index); 256bf215546Sopenharmony_ci const unsigned total_offset = *component + index; 257bf215546Sopenharmony_ci const unsigned slot_offset = total_offset / 4; 258bf215546Sopenharmony_ci *component = total_offset % 4; 259bf215546Sopenharmony_ci return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci /* Just emit code and let constant-folding go to town */ 263bf215546Sopenharmony_ci nir_ssa_def *offset = nir_imm_int(b, 0); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci for (; *p; p++) { 266bf215546Sopenharmony_ci if ((*p)->deref_type == nir_deref_type_array) { 267bf215546Sopenharmony_ci unsigned size = type_size((*p)->type, bts); 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci nir_ssa_def *mul = 270bf215546Sopenharmony_ci nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci offset = nir_iadd(b, offset, mul); 273bf215546Sopenharmony_ci } else if ((*p)->deref_type == nir_deref_type_struct) { 274bf215546Sopenharmony_ci /* p starts at path[1], so this is safe */ 275bf215546Sopenharmony_ci nir_deref_instr *parent = *(p - 1); 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci unsigned field_offset = 0; 278bf215546Sopenharmony_ci for (unsigned i = 0; i < (*p)->strct.index; i++) { 279bf215546Sopenharmony_ci field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci offset = nir_iadd_imm(b, offset, field_offset); 282bf215546Sopenharmony_ci } else { 283bf215546Sopenharmony_ci unreachable("Unsupported deref type"); 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci } 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci nir_deref_path_finish(&path); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci return offset; 290bf215546Sopenharmony_ci} 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_cistatic nir_ssa_def * 293bf215546Sopenharmony_ciemit_load(struct lower_io_state *state, 294bf215546Sopenharmony_ci nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 295bf215546Sopenharmony_ci unsigned component, unsigned num_components, unsigned bit_size, 296bf215546Sopenharmony_ci nir_alu_type dest_type) 297bf215546Sopenharmony_ci{ 298bf215546Sopenharmony_ci nir_builder *b = &state->builder; 299bf215546Sopenharmony_ci const nir_shader *nir = b->shader; 300bf215546Sopenharmony_ci nir_variable_mode mode = var->data.mode; 301bf215546Sopenharmony_ci nir_ssa_def *barycentric = NULL; 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci nir_intrinsic_op op; 304bf215546Sopenharmony_ci switch (mode) { 305bf215546Sopenharmony_ci case nir_var_shader_in: 306bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT && 307bf215546Sopenharmony_ci nir->options->use_interpolated_input_intrinsics && 308bf215546Sopenharmony_ci var->data.interpolation != INTERP_MODE_FLAT && 309bf215546Sopenharmony_ci !var->data.per_primitive) { 310bf215546Sopenharmony_ci if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 311bf215546Sopenharmony_ci assert(array_index != NULL); 312bf215546Sopenharmony_ci op = nir_intrinsic_load_input_vertex; 313bf215546Sopenharmony_ci } else { 314bf215546Sopenharmony_ci assert(array_index == NULL); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci nir_intrinsic_op bary_op; 317bf215546Sopenharmony_ci if (var->data.sample || 318bf215546Sopenharmony_ci (state->options & nir_lower_io_force_sample_interpolation)) 319bf215546Sopenharmony_ci bary_op = nir_intrinsic_load_barycentric_sample; 320bf215546Sopenharmony_ci else if (var->data.centroid) 321bf215546Sopenharmony_ci bary_op = nir_intrinsic_load_barycentric_centroid; 322bf215546Sopenharmony_ci else 323bf215546Sopenharmony_ci bary_op = nir_intrinsic_load_barycentric_pixel; 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci barycentric = nir_load_barycentric(&state->builder, bary_op, 326bf215546Sopenharmony_ci var->data.interpolation); 327bf215546Sopenharmony_ci op = nir_intrinsic_load_interpolated_input; 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci } else { 330bf215546Sopenharmony_ci op = array_index ? nir_intrinsic_load_per_vertex_input : 331bf215546Sopenharmony_ci nir_intrinsic_load_input; 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci break; 334bf215546Sopenharmony_ci case nir_var_shader_out: 335bf215546Sopenharmony_ci op = !array_index ? nir_intrinsic_load_output : 336bf215546Sopenharmony_ci var->data.per_primitive ? nir_intrinsic_load_per_primitive_output : 337bf215546Sopenharmony_ci nir_intrinsic_load_per_vertex_output; 338bf215546Sopenharmony_ci break; 339bf215546Sopenharmony_ci case nir_var_uniform: 340bf215546Sopenharmony_ci op = nir_intrinsic_load_uniform; 341bf215546Sopenharmony_ci break; 342bf215546Sopenharmony_ci default: 343bf215546Sopenharmony_ci unreachable("Unknown variable mode"); 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci nir_intrinsic_instr *load = 347bf215546Sopenharmony_ci nir_intrinsic_instr_create(state->builder.shader, op); 348bf215546Sopenharmony_ci load->num_components = num_components; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci nir_intrinsic_set_base(load, var->data.driver_location); 351bf215546Sopenharmony_ci if (mode == nir_var_shader_in || mode == nir_var_shader_out) 352bf215546Sopenharmony_ci nir_intrinsic_set_component(load, component); 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci if (load->intrinsic == nir_intrinsic_load_uniform) 355bf215546Sopenharmony_ci nir_intrinsic_set_range(load, 356bf215546Sopenharmony_ci state->type_size(var->type, var->data.bindless)); 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci if (nir_intrinsic_has_access(load)) 359bf215546Sopenharmony_ci nir_intrinsic_set_access(load, var->data.access); 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci nir_intrinsic_set_dest_type(load, dest_type); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci if (load->intrinsic != nir_intrinsic_load_uniform) { 364bf215546Sopenharmony_ci nir_io_semantics semantics = {0}; 365bf215546Sopenharmony_ci semantics.location = var->data.location; 366bf215546Sopenharmony_ci semantics.num_slots = get_number_of_slots(state, var); 367bf215546Sopenharmony_ci semantics.fb_fetch_output = var->data.fb_fetch_output; 368bf215546Sopenharmony_ci semantics.medium_precision = 369bf215546Sopenharmony_ci var->data.precision == GLSL_PRECISION_MEDIUM || 370bf215546Sopenharmony_ci var->data.precision == GLSL_PRECISION_LOW; 371bf215546Sopenharmony_ci nir_intrinsic_set_io_semantics(load, semantics); 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci if (array_index) { 375bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(array_index); 376bf215546Sopenharmony_ci load->src[1] = nir_src_for_ssa(offset); 377bf215546Sopenharmony_ci } else if (barycentric) { 378bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(barycentric); 379bf215546Sopenharmony_ci load->src[1] = nir_src_for_ssa(offset); 380bf215546Sopenharmony_ci } else { 381bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(offset); 382bf215546Sopenharmony_ci } 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci nir_ssa_dest_init(&load->instr, &load->dest, 385bf215546Sopenharmony_ci num_components, bit_size, NULL); 386bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci return &load->dest.ssa; 389bf215546Sopenharmony_ci} 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_cistatic nir_ssa_def * 392bf215546Sopenharmony_cilower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 393bf215546Sopenharmony_ci nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 394bf215546Sopenharmony_ci unsigned component, const struct glsl_type *type) 395bf215546Sopenharmony_ci{ 396bf215546Sopenharmony_ci assert(intrin->dest.is_ssa); 397bf215546Sopenharmony_ci if (intrin->dest.ssa.bit_size == 64 && 398bf215546Sopenharmony_ci (state->options & nir_lower_io_lower_64bit_to_32)) { 399bf215546Sopenharmony_ci nir_builder *b = &state->builder; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci nir_ssa_def *comp64[4]; 404bf215546Sopenharmony_ci assert(component == 0 || component == 2); 405bf215546Sopenharmony_ci unsigned dest_comp = 0; 406bf215546Sopenharmony_ci while (dest_comp < intrin->dest.ssa.num_components) { 407bf215546Sopenharmony_ci const unsigned num_comps = 408bf215546Sopenharmony_ci MIN2(intrin->dest.ssa.num_components - dest_comp, 409bf215546Sopenharmony_ci (4 - component) / 2); 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci nir_ssa_def *data32 = 412bf215546Sopenharmony_ci emit_load(state, array_index, var, offset, component, 413bf215546Sopenharmony_ci num_comps * 2, 32, nir_type_uint32); 414bf215546Sopenharmony_ci for (unsigned i = 0; i < num_comps; i++) { 415bf215546Sopenharmony_ci comp64[dest_comp + i] = 416bf215546Sopenharmony_ci nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2))); 417bf215546Sopenharmony_ci } 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci /* Only the first store has a component offset */ 420bf215546Sopenharmony_ci component = 0; 421bf215546Sopenharmony_ci dest_comp += num_comps; 422bf215546Sopenharmony_ci offset = nir_iadd_imm(b, offset, slot_size); 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci return nir_vec(b, comp64, intrin->dest.ssa.num_components); 426bf215546Sopenharmony_ci } else if (intrin->dest.ssa.bit_size == 1) { 427bf215546Sopenharmony_ci /* Booleans are 32-bit */ 428bf215546Sopenharmony_ci assert(glsl_type_is_boolean(type)); 429bf215546Sopenharmony_ci return nir_b2b1(&state->builder, 430bf215546Sopenharmony_ci emit_load(state, array_index, var, offset, component, 431bf215546Sopenharmony_ci intrin->dest.ssa.num_components, 32, 432bf215546Sopenharmony_ci nir_type_bool32)); 433bf215546Sopenharmony_ci } else { 434bf215546Sopenharmony_ci return emit_load(state, array_index, var, offset, component, 435bf215546Sopenharmony_ci intrin->dest.ssa.num_components, 436bf215546Sopenharmony_ci intrin->dest.ssa.bit_size, 437bf215546Sopenharmony_ci nir_get_nir_type_for_glsl_type(type)); 438bf215546Sopenharmony_ci } 439bf215546Sopenharmony_ci} 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_cistatic void 442bf215546Sopenharmony_ciemit_store(struct lower_io_state *state, nir_ssa_def *data, 443bf215546Sopenharmony_ci nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 444bf215546Sopenharmony_ci unsigned component, unsigned num_components, 445bf215546Sopenharmony_ci nir_component_mask_t write_mask, nir_alu_type src_type) 446bf215546Sopenharmony_ci{ 447bf215546Sopenharmony_ci nir_builder *b = &state->builder; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci assert(var->data.mode == nir_var_shader_out); 450bf215546Sopenharmony_ci nir_intrinsic_op op = 451bf215546Sopenharmony_ci !array_index ? nir_intrinsic_store_output : 452bf215546Sopenharmony_ci var->data.per_primitive ? nir_intrinsic_store_per_primitive_output : 453bf215546Sopenharmony_ci nir_intrinsic_store_per_vertex_output; 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci nir_intrinsic_instr *store = 456bf215546Sopenharmony_ci nir_intrinsic_instr_create(state->builder.shader, op); 457bf215546Sopenharmony_ci store->num_components = num_components; 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci store->src[0] = nir_src_for_ssa(data); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci nir_intrinsic_set_base(store, var->data.driver_location); 462bf215546Sopenharmony_ci nir_intrinsic_set_component(store, component); 463bf215546Sopenharmony_ci nir_intrinsic_set_src_type(store, src_type); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci nir_intrinsic_set_write_mask(store, write_mask); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci if (nir_intrinsic_has_access(store)) 468bf215546Sopenharmony_ci nir_intrinsic_set_access(store, var->data.access); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci if (array_index) 471bf215546Sopenharmony_ci store->src[1] = nir_src_for_ssa(array_index); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci unsigned gs_streams = 0; 476bf215546Sopenharmony_ci if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) { 477bf215546Sopenharmony_ci if (var->data.stream & NIR_STREAM_PACKED) { 478bf215546Sopenharmony_ci gs_streams = var->data.stream & ~NIR_STREAM_PACKED; 479bf215546Sopenharmony_ci } else { 480bf215546Sopenharmony_ci assert(var->data.stream < 4); 481bf215546Sopenharmony_ci gs_streams = 0; 482bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; ++i) 483bf215546Sopenharmony_ci gs_streams |= var->data.stream << (2 * i); 484bf215546Sopenharmony_ci } 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci nir_io_semantics semantics = {0}; 488bf215546Sopenharmony_ci semantics.location = var->data.location; 489bf215546Sopenharmony_ci semantics.num_slots = get_number_of_slots(state, var); 490bf215546Sopenharmony_ci semantics.dual_source_blend_index = var->data.index; 491bf215546Sopenharmony_ci semantics.gs_streams = gs_streams; 492bf215546Sopenharmony_ci semantics.medium_precision = 493bf215546Sopenharmony_ci var->data.precision == GLSL_PRECISION_MEDIUM || 494bf215546Sopenharmony_ci var->data.precision == GLSL_PRECISION_LOW; 495bf215546Sopenharmony_ci semantics.per_view = var->data.per_view; 496bf215546Sopenharmony_ci semantics.invariant = var->data.invariant; 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci nir_intrinsic_set_io_semantics(store, semantics); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci nir_builder_instr_insert(b, &store->instr); 501bf215546Sopenharmony_ci} 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_cistatic void 504bf215546Sopenharmony_cilower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 505bf215546Sopenharmony_ci nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 506bf215546Sopenharmony_ci unsigned component, const struct glsl_type *type) 507bf215546Sopenharmony_ci{ 508bf215546Sopenharmony_ci assert(intrin->src[1].is_ssa); 509bf215546Sopenharmony_ci if (intrin->src[1].ssa->bit_size == 64 && 510bf215546Sopenharmony_ci (state->options & nir_lower_io_lower_64bit_to_32)) { 511bf215546Sopenharmony_ci nir_builder *b = &state->builder; 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci assert(component == 0 || component == 2); 516bf215546Sopenharmony_ci unsigned src_comp = 0; 517bf215546Sopenharmony_ci nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 518bf215546Sopenharmony_ci while (src_comp < intrin->num_components) { 519bf215546Sopenharmony_ci const unsigned num_comps = 520bf215546Sopenharmony_ci MIN2(intrin->num_components - src_comp, 521bf215546Sopenharmony_ci (4 - component) / 2); 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci if (write_mask & BITFIELD_MASK(num_comps)) { 524bf215546Sopenharmony_ci nir_ssa_def *data = 525bf215546Sopenharmony_ci nir_channels(b, intrin->src[1].ssa, 526bf215546Sopenharmony_ci BITFIELD_RANGE(src_comp, num_comps)); 527bf215546Sopenharmony_ci nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32); 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci nir_component_mask_t write_mask32 = 0; 530bf215546Sopenharmony_ci for (unsigned i = 0; i < num_comps; i++) { 531bf215546Sopenharmony_ci if (write_mask & BITFIELD_MASK(num_comps) & (1 << i)) 532bf215546Sopenharmony_ci write_mask32 |= 3 << (i * 2); 533bf215546Sopenharmony_ci } 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci emit_store(state, data32, array_index, var, offset, 536bf215546Sopenharmony_ci component, data32->num_components, write_mask32, 537bf215546Sopenharmony_ci nir_type_uint32); 538bf215546Sopenharmony_ci } 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci /* Only the first store has a component offset */ 541bf215546Sopenharmony_ci component = 0; 542bf215546Sopenharmony_ci src_comp += num_comps; 543bf215546Sopenharmony_ci write_mask >>= num_comps; 544bf215546Sopenharmony_ci offset = nir_iadd_imm(b, offset, slot_size); 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci } else if (intrin->dest.ssa.bit_size == 1) { 547bf215546Sopenharmony_ci /* Booleans are 32-bit */ 548bf215546Sopenharmony_ci assert(glsl_type_is_boolean(type)); 549bf215546Sopenharmony_ci nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa); 550bf215546Sopenharmony_ci emit_store(state, b32_val, array_index, var, offset, 551bf215546Sopenharmony_ci component, intrin->num_components, 552bf215546Sopenharmony_ci nir_intrinsic_write_mask(intrin), 553bf215546Sopenharmony_ci nir_type_bool32); 554bf215546Sopenharmony_ci } else { 555bf215546Sopenharmony_ci emit_store(state, intrin->src[1].ssa, array_index, var, offset, 556bf215546Sopenharmony_ci component, intrin->num_components, 557bf215546Sopenharmony_ci nir_intrinsic_write_mask(intrin), 558bf215546Sopenharmony_ci nir_get_nir_type_for_glsl_type(type)); 559bf215546Sopenharmony_ci } 560bf215546Sopenharmony_ci} 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_cistatic nir_ssa_def * 563bf215546Sopenharmony_cilower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 564bf215546Sopenharmony_ci nir_variable *var, nir_ssa_def *offset, unsigned component, 565bf215546Sopenharmony_ci const struct glsl_type *type) 566bf215546Sopenharmony_ci{ 567bf215546Sopenharmony_ci nir_builder *b = &state->builder; 568bf215546Sopenharmony_ci assert(var->data.mode == nir_var_shader_in); 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci /* Ignore interpolateAt() for flat variables - flat is flat. Lower 571bf215546Sopenharmony_ci * interpolateAtVertex() for explicit variables. 572bf215546Sopenharmony_ci */ 573bf215546Sopenharmony_ci if (var->data.interpolation == INTERP_MODE_FLAT || 574bf215546Sopenharmony_ci var->data.interpolation == INTERP_MODE_EXPLICIT) { 575bf215546Sopenharmony_ci nir_ssa_def *vertex_index = NULL; 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 578bf215546Sopenharmony_ci assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex); 579bf215546Sopenharmony_ci vertex_index = intrin->src[1].ssa; 580bf215546Sopenharmony_ci } 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci return lower_load(intrin, state, vertex_index, var, offset, component, type); 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci /* None of the supported APIs allow interpolation on 64-bit things */ 586bf215546Sopenharmony_ci assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32); 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci nir_intrinsic_op bary_op; 589bf215546Sopenharmony_ci switch (intrin->intrinsic) { 590bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_centroid: 591bf215546Sopenharmony_ci bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 592bf215546Sopenharmony_ci nir_intrinsic_load_barycentric_sample : 593bf215546Sopenharmony_ci nir_intrinsic_load_barycentric_centroid; 594bf215546Sopenharmony_ci break; 595bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_sample: 596bf215546Sopenharmony_ci bary_op = nir_intrinsic_load_barycentric_at_sample; 597bf215546Sopenharmony_ci break; 598bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_offset: 599bf215546Sopenharmony_ci bary_op = nir_intrinsic_load_barycentric_at_offset; 600bf215546Sopenharmony_ci break; 601bf215546Sopenharmony_ci default: 602bf215546Sopenharmony_ci unreachable("Bogus interpolateAt() intrinsic."); 603bf215546Sopenharmony_ci } 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci nir_intrinsic_instr *bary_setup = 606bf215546Sopenharmony_ci nir_intrinsic_instr_create(state->builder.shader, bary_op); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 609bf215546Sopenharmony_ci nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 612bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_interp_deref_at_offset || 613bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex) 614bf215546Sopenharmony_ci nir_src_copy(&bary_setup->src[0], &intrin->src[1]); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci nir_builder_instr_insert(b, &bary_setup->instr); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci nir_io_semantics semantics = {0}; 619bf215546Sopenharmony_ci semantics.location = var->data.location; 620bf215546Sopenharmony_ci semantics.num_slots = get_number_of_slots(state, var); 621bf215546Sopenharmony_ci semantics.medium_precision = 622bf215546Sopenharmony_ci var->data.precision == GLSL_PRECISION_MEDIUM || 623bf215546Sopenharmony_ci var->data.precision == GLSL_PRECISION_LOW; 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci assert(intrin->dest.is_ssa); 626bf215546Sopenharmony_ci nir_ssa_def *load = 627bf215546Sopenharmony_ci nir_load_interpolated_input(&state->builder, 628bf215546Sopenharmony_ci intrin->dest.ssa.num_components, 629bf215546Sopenharmony_ci intrin->dest.ssa.bit_size, 630bf215546Sopenharmony_ci &bary_setup->dest.ssa, 631bf215546Sopenharmony_ci offset, 632bf215546Sopenharmony_ci .base = var->data.driver_location, 633bf215546Sopenharmony_ci .component = component, 634bf215546Sopenharmony_ci .io_semantics = semantics); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci return load; 637bf215546Sopenharmony_ci} 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_cistatic bool 640bf215546Sopenharmony_cinir_lower_io_block(nir_block *block, 641bf215546Sopenharmony_ci struct lower_io_state *state) 642bf215546Sopenharmony_ci{ 643bf215546Sopenharmony_ci nir_builder *b = &state->builder; 644bf215546Sopenharmony_ci const nir_shader_compiler_options *options = b->shader->options; 645bf215546Sopenharmony_ci bool progress = false; 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 648bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 649bf215546Sopenharmony_ci continue; 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci switch (intrin->intrinsic) { 654bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 655bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 656bf215546Sopenharmony_ci /* We can lower the io for this nir instrinsic */ 657bf215546Sopenharmony_ci break; 658bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_centroid: 659bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_sample: 660bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_offset: 661bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_vertex: 662bf215546Sopenharmony_ci /* We can optionally lower these to load_interpolated_input */ 663bf215546Sopenharmony_ci if (options->use_interpolated_input_intrinsics || 664bf215546Sopenharmony_ci options->lower_interpolate_at) 665bf215546Sopenharmony_ci break; 666bf215546Sopenharmony_ci FALLTHROUGH; 667bf215546Sopenharmony_ci default: 668bf215546Sopenharmony_ci /* We can't lower the io for this nir instrinsic, so skip it */ 669bf215546Sopenharmony_ci continue; 670bf215546Sopenharmony_ci } 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 673bf215546Sopenharmony_ci if (!nir_deref_mode_is_one_of(deref, state->modes)) 674bf215546Sopenharmony_ci continue; 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci b->cursor = nir_before_instr(instr); 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage); 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci nir_ssa_def *offset; 683bf215546Sopenharmony_ci nir_ssa_def *array_index = NULL; 684bf215546Sopenharmony_ci unsigned component_offset = var->data.location_frac; 685bf215546Sopenharmony_ci bool bindless_type_size = var->data.mode == nir_var_shader_in || 686bf215546Sopenharmony_ci var->data.mode == nir_var_shader_out || 687bf215546Sopenharmony_ci var->data.bindless; 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci if (nir_deref_instr_is_known_out_of_bounds(deref)) { 690bf215546Sopenharmony_ci /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says: 691bf215546Sopenharmony_ci * 692bf215546Sopenharmony_ci * In the subsections described above for array, vector, matrix and 693bf215546Sopenharmony_ci * structure accesses, any out-of-bounds access produced undefined 694bf215546Sopenharmony_ci * behavior.... 695bf215546Sopenharmony_ci * Out-of-bounds reads return undefined values, which 696bf215546Sopenharmony_ci * include values from other variables of the active program or zero. 697bf215546Sopenharmony_ci * Out-of-bounds writes may be discarded or overwrite 698bf215546Sopenharmony_ci * other variables of the active program. 699bf215546Sopenharmony_ci * 700bf215546Sopenharmony_ci * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero 701bf215546Sopenharmony_ci * for reads. 702bf215546Sopenharmony_ci * 703bf215546Sopenharmony_ci * Otherwise get_io_offset would return out-of-bound offset which may 704bf215546Sopenharmony_ci * result in out-of-bound loading/storing of inputs/outputs, 705bf215546Sopenharmony_ci * that could cause issues in drivers down the line. 706bf215546Sopenharmony_ci */ 707bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_store_deref) { 708bf215546Sopenharmony_ci nir_ssa_def *zero = 709bf215546Sopenharmony_ci nir_imm_zero(b, intrin->dest.ssa.num_components, 710bf215546Sopenharmony_ci intrin->dest.ssa.bit_size); 711bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 712bf215546Sopenharmony_ci zero); 713bf215546Sopenharmony_ci } 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 716bf215546Sopenharmony_ci progress = true; 717bf215546Sopenharmony_ci continue; 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL, 721bf215546Sopenharmony_ci state->type_size, &component_offset, 722bf215546Sopenharmony_ci bindless_type_size); 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_ci nir_ssa_def *replacement = NULL; 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci switch (intrin->intrinsic) { 727bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 728bf215546Sopenharmony_ci replacement = lower_load(intrin, state, array_index, var, offset, 729bf215546Sopenharmony_ci component_offset, deref->type); 730bf215546Sopenharmony_ci break; 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 733bf215546Sopenharmony_ci lower_store(intrin, state, array_index, var, offset, 734bf215546Sopenharmony_ci component_offset, deref->type); 735bf215546Sopenharmony_ci break; 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_centroid: 738bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_sample: 739bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_offset: 740bf215546Sopenharmony_ci case nir_intrinsic_interp_deref_at_vertex: 741bf215546Sopenharmony_ci assert(array_index == NULL); 742bf215546Sopenharmony_ci replacement = lower_interpolate_at(intrin, state, var, offset, 743bf215546Sopenharmony_ci component_offset, deref->type); 744bf215546Sopenharmony_ci break; 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci default: 747bf215546Sopenharmony_ci continue; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci if (replacement) { 751bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 752bf215546Sopenharmony_ci replacement); 753bf215546Sopenharmony_ci } 754bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 755bf215546Sopenharmony_ci progress = true; 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci return progress; 759bf215546Sopenharmony_ci} 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_cistatic bool 762bf215546Sopenharmony_cinir_lower_io_impl(nir_function_impl *impl, 763bf215546Sopenharmony_ci nir_variable_mode modes, 764bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool), 765bf215546Sopenharmony_ci nir_lower_io_options options) 766bf215546Sopenharmony_ci{ 767bf215546Sopenharmony_ci struct lower_io_state state; 768bf215546Sopenharmony_ci bool progress = false; 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci nir_builder_init(&state.builder, impl); 771bf215546Sopenharmony_ci state.dead_ctx = ralloc_context(NULL); 772bf215546Sopenharmony_ci state.modes = modes; 773bf215546Sopenharmony_ci state.type_size = type_size; 774bf215546Sopenharmony_ci state.options = options; 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci ASSERTED nir_variable_mode supported_modes = 777bf215546Sopenharmony_ci nir_var_shader_in | nir_var_shader_out | nir_var_uniform; 778bf215546Sopenharmony_ci assert(!(modes & ~supported_modes)); 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 781bf215546Sopenharmony_ci progress |= nir_lower_io_block(block, &state); 782bf215546Sopenharmony_ci } 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci ralloc_free(state.dead_ctx); 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_none); 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci return progress; 789bf215546Sopenharmony_ci} 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics 792bf215546Sopenharmony_ci * 793bf215546Sopenharmony_ci * This pass is intended to be used for cross-stage shader I/O and driver- 794bf215546Sopenharmony_ci * managed uniforms to turn deref-based access into a simpler model using 795bf215546Sopenharmony_ci * locations or offsets. For fragment shader inputs, it can optionally turn 796bf215546Sopenharmony_ci * load_deref into an explicit interpolation using barycentrics coming from 797bf215546Sopenharmony_ci * one of the load_barycentric_* intrinsics. This pass requires that all 798bf215546Sopenharmony_ci * deref chains are complete and contain no casts. 799bf215546Sopenharmony_ci */ 800bf215546Sopenharmony_cibool 801bf215546Sopenharmony_cinir_lower_io(nir_shader *shader, nir_variable_mode modes, 802bf215546Sopenharmony_ci int (*type_size)(const struct glsl_type *, bool), 803bf215546Sopenharmony_ci nir_lower_io_options options) 804bf215546Sopenharmony_ci{ 805bf215546Sopenharmony_ci bool progress = false; 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 808bf215546Sopenharmony_ci if (function->impl) { 809bf215546Sopenharmony_ci progress |= nir_lower_io_impl(function->impl, modes, 810bf215546Sopenharmony_ci type_size, options); 811bf215546Sopenharmony_ci } 812bf215546Sopenharmony_ci } 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci return progress; 815bf215546Sopenharmony_ci} 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_cistatic unsigned 818bf215546Sopenharmony_citype_scalar_size_bytes(const struct glsl_type *type) 819bf215546Sopenharmony_ci{ 820bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(type) || 821bf215546Sopenharmony_ci glsl_type_is_matrix(type)); 822bf215546Sopenharmony_ci return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 823bf215546Sopenharmony_ci} 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_cistatic nir_ssa_def * 826bf215546Sopenharmony_cibuild_addr_iadd(nir_builder *b, nir_ssa_def *addr, 827bf215546Sopenharmony_ci nir_address_format addr_format, 828bf215546Sopenharmony_ci nir_variable_mode modes, 829bf215546Sopenharmony_ci nir_ssa_def *offset) 830bf215546Sopenharmony_ci{ 831bf215546Sopenharmony_ci assert(offset->num_components == 1); 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci switch (addr_format) { 834bf215546Sopenharmony_ci case nir_address_format_32bit_global: 835bf215546Sopenharmony_ci case nir_address_format_64bit_global: 836bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 837bf215546Sopenharmony_ci assert(addr->bit_size == offset->bit_size); 838bf215546Sopenharmony_ci assert(addr->num_components == 1); 839bf215546Sopenharmony_ci return nir_iadd(b, addr, offset); 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: { 842bf215546Sopenharmony_ci assert(addr->num_components == 2); 843bf215546Sopenharmony_ci nir_ssa_def *lo = nir_channel(b, addr, 0); 844bf215546Sopenharmony_ci nir_ssa_def *hi = nir_channel(b, addr, 1); 845bf215546Sopenharmony_ci nir_ssa_def *res_lo = nir_iadd(b, lo, offset); 846bf215546Sopenharmony_ci nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo)); 847bf215546Sopenharmony_ci nir_ssa_def *res_hi = nir_iadd(b, hi, carry); 848bf215546Sopenharmony_ci return nir_vec2(b, res_lo, res_hi); 849bf215546Sopenharmony_ci } 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: 852bf215546Sopenharmony_ci assert(addr->num_components == 1); 853bf215546Sopenharmony_ci assert(offset->bit_size == 32); 854bf215546Sopenharmony_ci return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset)); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci case nir_address_format_64bit_global_32bit_offset: 857bf215546Sopenharmony_ci case nir_address_format_64bit_bounded_global: 858bf215546Sopenharmony_ci assert(addr->num_components == 4); 859bf215546Sopenharmony_ci assert(addr->bit_size == offset->bit_size); 860bf215546Sopenharmony_ci return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 863bf215546Sopenharmony_ci assert(addr->num_components == 2); 864bf215546Sopenharmony_ci assert(addr->bit_size == offset->bit_size); 865bf215546Sopenharmony_ci return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1); 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: 868bf215546Sopenharmony_ci assert(addr->num_components == 1); 869bf215546Sopenharmony_ci assert(offset->bit_size == 32); 870bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, 871bf215546Sopenharmony_ci nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset), 872bf215546Sopenharmony_ci nir_unpack_64_2x32_split_y(b, addr)); 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 875bf215546Sopenharmony_ci assert(addr->num_components == 3); 876bf215546Sopenharmony_ci assert(offset->bit_size == 32); 877bf215546Sopenharmony_ci return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2); 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci case nir_address_format_62bit_generic: 880bf215546Sopenharmony_ci assert(addr->num_components == 1); 881bf215546Sopenharmony_ci assert(addr->bit_size == 64); 882bf215546Sopenharmony_ci assert(offset->bit_size == 64); 883bf215546Sopenharmony_ci if (!(modes & ~(nir_var_function_temp | 884bf215546Sopenharmony_ci nir_var_shader_temp | 885bf215546Sopenharmony_ci nir_var_mem_shared))) { 886bf215546Sopenharmony_ci /* If we're sure it's one of these modes, we can do an easy 32-bit 887bf215546Sopenharmony_ci * addition and don't need to bother with 64-bit math. 888bf215546Sopenharmony_ci */ 889bf215546Sopenharmony_ci nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr); 890bf215546Sopenharmony_ci nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr); 891bf215546Sopenharmony_ci addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset)); 892bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, addr32, type); 893bf215546Sopenharmony_ci } else { 894bf215546Sopenharmony_ci return nir_iadd(b, addr, offset); 895bf215546Sopenharmony_ci } 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci case nir_address_format_logical: 898bf215546Sopenharmony_ci unreachable("Unsupported address format"); 899bf215546Sopenharmony_ci } 900bf215546Sopenharmony_ci unreachable("Invalid address format"); 901bf215546Sopenharmony_ci} 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_cistatic unsigned 904bf215546Sopenharmony_ciaddr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format) 905bf215546Sopenharmony_ci{ 906bf215546Sopenharmony_ci if (addr_format == nir_address_format_32bit_offset_as_64bit || 907bf215546Sopenharmony_ci addr_format == nir_address_format_32bit_index_offset_pack64) 908bf215546Sopenharmony_ci return 32; 909bf215546Sopenharmony_ci return addr->bit_size; 910bf215546Sopenharmony_ci} 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_cistatic nir_ssa_def * 913bf215546Sopenharmony_cibuild_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, 914bf215546Sopenharmony_ci nir_address_format addr_format, 915bf215546Sopenharmony_ci nir_variable_mode modes, 916bf215546Sopenharmony_ci int64_t offset) 917bf215546Sopenharmony_ci{ 918bf215546Sopenharmony_ci return build_addr_iadd(b, addr, addr_format, modes, 919bf215546Sopenharmony_ci nir_imm_intN_t(b, offset, 920bf215546Sopenharmony_ci addr_get_offset_bit_size(addr, addr_format))); 921bf215546Sopenharmony_ci} 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_cistatic nir_ssa_def * 924bf215546Sopenharmony_cibuild_addr_for_var(nir_builder *b, nir_variable *var, 925bf215546Sopenharmony_ci nir_address_format addr_format) 926bf215546Sopenharmony_ci{ 927bf215546Sopenharmony_ci assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared | 928bf215546Sopenharmony_ci nir_var_mem_task_payload | 929bf215546Sopenharmony_ci nir_var_mem_global | 930bf215546Sopenharmony_ci nir_var_shader_temp | nir_var_function_temp | 931bf215546Sopenharmony_ci nir_var_mem_push_const | nir_var_mem_constant)); 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci const unsigned num_comps = nir_address_format_num_components(addr_format); 934bf215546Sopenharmony_ci const unsigned bit_size = nir_address_format_bit_size(addr_format); 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci switch (addr_format) { 937bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: 938bf215546Sopenharmony_ci case nir_address_format_32bit_global: 939bf215546Sopenharmony_ci case nir_address_format_64bit_global: { 940bf215546Sopenharmony_ci nir_ssa_def *base_addr; 941bf215546Sopenharmony_ci switch (var->data.mode) { 942bf215546Sopenharmony_ci case nir_var_shader_temp: 943bf215546Sopenharmony_ci base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0); 944bf215546Sopenharmony_ci break; 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci case nir_var_function_temp: 947bf215546Sopenharmony_ci base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1); 948bf215546Sopenharmony_ci break; 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci case nir_var_mem_constant: 951bf215546Sopenharmony_ci base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size); 952bf215546Sopenharmony_ci break; 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci case nir_var_mem_shared: 955bf215546Sopenharmony_ci base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size); 956bf215546Sopenharmony_ci break; 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci case nir_var_mem_global: 959bf215546Sopenharmony_ci base_addr = nir_load_global_base_ptr(b, num_comps, bit_size); 960bf215546Sopenharmony_ci break; 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci default: 963bf215546Sopenharmony_ci unreachable("Unsupported variable mode"); 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode, 967bf215546Sopenharmony_ci var->data.driver_location); 968bf215546Sopenharmony_ci } 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 971bf215546Sopenharmony_ci assert(var->data.driver_location <= UINT32_MAX); 972bf215546Sopenharmony_ci return nir_imm_int(b, var->data.driver_location); 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: 975bf215546Sopenharmony_ci assert(var->data.driver_location <= UINT32_MAX); 976bf215546Sopenharmony_ci return nir_imm_int64(b, var->data.driver_location); 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci case nir_address_format_62bit_generic: 979bf215546Sopenharmony_ci switch (var->data.mode) { 980bf215546Sopenharmony_ci case nir_var_shader_temp: 981bf215546Sopenharmony_ci case nir_var_function_temp: 982bf215546Sopenharmony_ci assert(var->data.driver_location <= UINT32_MAX); 983bf215546Sopenharmony_ci return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64); 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ci case nir_var_mem_shared: 986bf215546Sopenharmony_ci assert(var->data.driver_location <= UINT32_MAX); 987bf215546Sopenharmony_ci return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64); 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci case nir_var_mem_global: 990bf215546Sopenharmony_ci return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size), 991bf215546Sopenharmony_ci var->data.driver_location); 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci default: 994bf215546Sopenharmony_ci unreachable("Unsupported variable mode"); 995bf215546Sopenharmony_ci } 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci default: 998bf215546Sopenharmony_ci unreachable("Unsupported address format"); 999bf215546Sopenharmony_ci } 1000bf215546Sopenharmony_ci} 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_cistatic nir_ssa_def * 1003bf215546Sopenharmony_cibuild_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr, 1004bf215546Sopenharmony_ci nir_address_format addr_format, 1005bf215546Sopenharmony_ci nir_variable_mode mode) 1006bf215546Sopenharmony_ci{ 1007bf215546Sopenharmony_ci /* The compile-time check failed; do a run-time check */ 1008bf215546Sopenharmony_ci switch (addr_format) { 1009bf215546Sopenharmony_ci case nir_address_format_62bit_generic: { 1010bf215546Sopenharmony_ci assert(addr->num_components == 1); 1011bf215546Sopenharmony_ci assert(addr->bit_size == 64); 1012bf215546Sopenharmony_ci nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62)); 1013bf215546Sopenharmony_ci switch (mode) { 1014bf215546Sopenharmony_ci case nir_var_function_temp: 1015bf215546Sopenharmony_ci case nir_var_shader_temp: 1016bf215546Sopenharmony_ci return nir_ieq_imm(b, mode_enum, 0x2); 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ci case nir_var_mem_shared: 1019bf215546Sopenharmony_ci return nir_ieq_imm(b, mode_enum, 0x1); 1020bf215546Sopenharmony_ci 1021bf215546Sopenharmony_ci case nir_var_mem_global: 1022bf215546Sopenharmony_ci return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0), 1023bf215546Sopenharmony_ci nir_ieq_imm(b, mode_enum, 0x3)); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci default: 1026bf215546Sopenharmony_ci unreachable("Invalid mode check intrinsic"); 1027bf215546Sopenharmony_ci } 1028bf215546Sopenharmony_ci } 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci default: 1031bf215546Sopenharmony_ci unreachable("Unsupported address mode"); 1032bf215546Sopenharmony_ci } 1033bf215546Sopenharmony_ci} 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ciunsigned 1036bf215546Sopenharmony_cinir_address_format_bit_size(nir_address_format addr_format) 1037bf215546Sopenharmony_ci{ 1038bf215546Sopenharmony_ci switch (addr_format) { 1039bf215546Sopenharmony_ci case nir_address_format_32bit_global: return 32; 1040bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: return 32; 1041bf215546Sopenharmony_ci case nir_address_format_64bit_global: return 64; 1042bf215546Sopenharmony_ci case nir_address_format_64bit_global_32bit_offset: return 32; 1043bf215546Sopenharmony_ci case nir_address_format_64bit_bounded_global: return 32; 1044bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: return 32; 1045bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: return 64; 1046bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: return 32; 1047bf215546Sopenharmony_ci case nir_address_format_62bit_generic: return 64; 1048bf215546Sopenharmony_ci case nir_address_format_32bit_offset: return 32; 1049bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: return 64; 1050bf215546Sopenharmony_ci case nir_address_format_logical: return 32; 1051bf215546Sopenharmony_ci } 1052bf215546Sopenharmony_ci unreachable("Invalid address format"); 1053bf215546Sopenharmony_ci} 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ciunsigned 1056bf215546Sopenharmony_cinir_address_format_num_components(nir_address_format addr_format) 1057bf215546Sopenharmony_ci{ 1058bf215546Sopenharmony_ci switch (addr_format) { 1059bf215546Sopenharmony_ci case nir_address_format_32bit_global: return 1; 1060bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: return 2; 1061bf215546Sopenharmony_ci case nir_address_format_64bit_global: return 1; 1062bf215546Sopenharmony_ci case nir_address_format_64bit_global_32bit_offset: return 4; 1063bf215546Sopenharmony_ci case nir_address_format_64bit_bounded_global: return 4; 1064bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: return 2; 1065bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: return 1; 1066bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: return 3; 1067bf215546Sopenharmony_ci case nir_address_format_62bit_generic: return 1; 1068bf215546Sopenharmony_ci case nir_address_format_32bit_offset: return 1; 1069bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: return 1; 1070bf215546Sopenharmony_ci case nir_address_format_logical: return 1; 1071bf215546Sopenharmony_ci } 1072bf215546Sopenharmony_ci unreachable("Invalid address format"); 1073bf215546Sopenharmony_ci} 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_cistatic nir_ssa_def * 1076bf215546Sopenharmony_ciaddr_to_index(nir_builder *b, nir_ssa_def *addr, 1077bf215546Sopenharmony_ci nir_address_format addr_format) 1078bf215546Sopenharmony_ci{ 1079bf215546Sopenharmony_ci switch (addr_format) { 1080bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 1081bf215546Sopenharmony_ci assert(addr->num_components == 2); 1082bf215546Sopenharmony_ci return nir_channel(b, addr, 0); 1083bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: 1084bf215546Sopenharmony_ci return nir_unpack_64_2x32_split_y(b, addr); 1085bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 1086bf215546Sopenharmony_ci assert(addr->num_components == 3); 1087bf215546Sopenharmony_ci return nir_channels(b, addr, 0x3); 1088bf215546Sopenharmony_ci default: unreachable("Invalid address format"); 1089bf215546Sopenharmony_ci } 1090bf215546Sopenharmony_ci} 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_cistatic nir_ssa_def * 1093bf215546Sopenharmony_ciaddr_to_offset(nir_builder *b, nir_ssa_def *addr, 1094bf215546Sopenharmony_ci nir_address_format addr_format) 1095bf215546Sopenharmony_ci{ 1096bf215546Sopenharmony_ci switch (addr_format) { 1097bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 1098bf215546Sopenharmony_ci assert(addr->num_components == 2); 1099bf215546Sopenharmony_ci return nir_channel(b, addr, 1); 1100bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: 1101bf215546Sopenharmony_ci return nir_unpack_64_2x32_split_x(b, addr); 1102bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 1103bf215546Sopenharmony_ci assert(addr->num_components == 3); 1104bf215546Sopenharmony_ci return nir_channel(b, addr, 2); 1105bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 1106bf215546Sopenharmony_ci return addr; 1107bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: 1108bf215546Sopenharmony_ci case nir_address_format_62bit_generic: 1109bf215546Sopenharmony_ci return nir_u2u32(b, addr); 1110bf215546Sopenharmony_ci default: 1111bf215546Sopenharmony_ci unreachable("Invalid address format"); 1112bf215546Sopenharmony_ci } 1113bf215546Sopenharmony_ci} 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci/** Returns true if the given address format resolves to a global address */ 1116bf215546Sopenharmony_cistatic bool 1117bf215546Sopenharmony_ciaddr_format_is_global(nir_address_format addr_format, 1118bf215546Sopenharmony_ci nir_variable_mode mode) 1119bf215546Sopenharmony_ci{ 1120bf215546Sopenharmony_ci if (addr_format == nir_address_format_62bit_generic) 1121bf215546Sopenharmony_ci return mode == nir_var_mem_global; 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci return addr_format == nir_address_format_32bit_global || 1124bf215546Sopenharmony_ci addr_format == nir_address_format_2x32bit_global || 1125bf215546Sopenharmony_ci addr_format == nir_address_format_64bit_global || 1126bf215546Sopenharmony_ci addr_format == nir_address_format_64bit_global_32bit_offset || 1127bf215546Sopenharmony_ci addr_format == nir_address_format_64bit_bounded_global; 1128bf215546Sopenharmony_ci} 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_cistatic bool 1131bf215546Sopenharmony_ciaddr_format_is_offset(nir_address_format addr_format, 1132bf215546Sopenharmony_ci nir_variable_mode mode) 1133bf215546Sopenharmony_ci{ 1134bf215546Sopenharmony_ci if (addr_format == nir_address_format_62bit_generic) 1135bf215546Sopenharmony_ci return mode != nir_var_mem_global; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci return addr_format == nir_address_format_32bit_offset || 1138bf215546Sopenharmony_ci addr_format == nir_address_format_32bit_offset_as_64bit; 1139bf215546Sopenharmony_ci} 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_cistatic nir_ssa_def * 1142bf215546Sopenharmony_ciaddr_to_global(nir_builder *b, nir_ssa_def *addr, 1143bf215546Sopenharmony_ci nir_address_format addr_format) 1144bf215546Sopenharmony_ci{ 1145bf215546Sopenharmony_ci switch (addr_format) { 1146bf215546Sopenharmony_ci case nir_address_format_32bit_global: 1147bf215546Sopenharmony_ci case nir_address_format_64bit_global: 1148bf215546Sopenharmony_ci case nir_address_format_62bit_generic: 1149bf215546Sopenharmony_ci assert(addr->num_components == 1); 1150bf215546Sopenharmony_ci return addr; 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: 1153bf215546Sopenharmony_ci assert(addr->num_components == 2); 1154bf215546Sopenharmony_ci return addr; 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci case nir_address_format_64bit_global_32bit_offset: 1157bf215546Sopenharmony_ci case nir_address_format_64bit_bounded_global: 1158bf215546Sopenharmony_ci assert(addr->num_components == 4); 1159bf215546Sopenharmony_ci return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)), 1160bf215546Sopenharmony_ci nir_u2u64(b, nir_channel(b, addr, 3))); 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 1163bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: 1164bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 1165bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 1166bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: 1167bf215546Sopenharmony_ci case nir_address_format_logical: 1168bf215546Sopenharmony_ci unreachable("Cannot get a 64-bit address with this address format"); 1169bf215546Sopenharmony_ci } 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci unreachable("Invalid address format"); 1172bf215546Sopenharmony_ci} 1173bf215546Sopenharmony_ci 1174bf215546Sopenharmony_cistatic bool 1175bf215546Sopenharmony_ciaddr_format_needs_bounds_check(nir_address_format addr_format) 1176bf215546Sopenharmony_ci{ 1177bf215546Sopenharmony_ci return addr_format == nir_address_format_64bit_bounded_global; 1178bf215546Sopenharmony_ci} 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_cistatic nir_ssa_def * 1181bf215546Sopenharmony_ciaddr_is_in_bounds(nir_builder *b, nir_ssa_def *addr, 1182bf215546Sopenharmony_ci nir_address_format addr_format, unsigned size) 1183bf215546Sopenharmony_ci{ 1184bf215546Sopenharmony_ci assert(addr_format == nir_address_format_64bit_bounded_global); 1185bf215546Sopenharmony_ci assert(addr->num_components == 4); 1186bf215546Sopenharmony_ci return nir_ige(b, nir_channel(b, addr, 2), 1187bf215546Sopenharmony_ci nir_iadd_imm(b, nir_channel(b, addr, 3), size)); 1188bf215546Sopenharmony_ci} 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_cistatic void 1191bf215546Sopenharmony_cinir_get_explicit_deref_range(nir_deref_instr *deref, 1192bf215546Sopenharmony_ci nir_address_format addr_format, 1193bf215546Sopenharmony_ci uint32_t *out_base, 1194bf215546Sopenharmony_ci uint32_t *out_range) 1195bf215546Sopenharmony_ci{ 1196bf215546Sopenharmony_ci uint32_t base = 0; 1197bf215546Sopenharmony_ci uint32_t range = glsl_get_explicit_size(deref->type, false); 1198bf215546Sopenharmony_ci 1199bf215546Sopenharmony_ci while (true) { 1200bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci switch (deref->deref_type) { 1203bf215546Sopenharmony_ci case nir_deref_type_array: 1204bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 1205bf215546Sopenharmony_ci case nir_deref_type_ptr_as_array: { 1206bf215546Sopenharmony_ci const unsigned stride = nir_deref_instr_array_stride(deref); 1207bf215546Sopenharmony_ci if (stride == 0) 1208bf215546Sopenharmony_ci goto fail; 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci if (!parent) 1211bf215546Sopenharmony_ci goto fail; 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci if (deref->deref_type != nir_deref_type_array_wildcard && 1214bf215546Sopenharmony_ci nir_src_is_const(deref->arr.index)) { 1215bf215546Sopenharmony_ci base += stride * nir_src_as_uint(deref->arr.index); 1216bf215546Sopenharmony_ci } else { 1217bf215546Sopenharmony_ci if (glsl_get_length(parent->type) == 0) 1218bf215546Sopenharmony_ci goto fail; 1219bf215546Sopenharmony_ci range += stride * (glsl_get_length(parent->type) - 1); 1220bf215546Sopenharmony_ci } 1221bf215546Sopenharmony_ci break; 1222bf215546Sopenharmony_ci } 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci case nir_deref_type_struct: { 1225bf215546Sopenharmony_ci if (!parent) 1226bf215546Sopenharmony_ci goto fail; 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci base += glsl_get_struct_field_offset(parent->type, deref->strct.index); 1229bf215546Sopenharmony_ci break; 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci case nir_deref_type_cast: { 1233bf215546Sopenharmony_ci nir_instr *parent_instr = deref->parent.ssa->parent_instr; 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_ci switch (parent_instr->type) { 1236bf215546Sopenharmony_ci case nir_instr_type_load_const: { 1237bf215546Sopenharmony_ci nir_load_const_instr *load = nir_instr_as_load_const(parent_instr); 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci switch (addr_format) { 1240bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 1241bf215546Sopenharmony_ci base += load->value[1].u32; 1242bf215546Sopenharmony_ci break; 1243bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 1244bf215546Sopenharmony_ci base += load->value[1].u32; 1245bf215546Sopenharmony_ci break; 1246bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 1247bf215546Sopenharmony_ci base += load->value[2].u32; 1248bf215546Sopenharmony_ci break; 1249bf215546Sopenharmony_ci default: 1250bf215546Sopenharmony_ci goto fail; 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci 1253bf215546Sopenharmony_ci *out_base = base; 1254bf215546Sopenharmony_ci *out_range = range; 1255bf215546Sopenharmony_ci return; 1256bf215546Sopenharmony_ci } 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 1259bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr); 1260bf215546Sopenharmony_ci switch (intr->intrinsic) { 1261bf215546Sopenharmony_ci case nir_intrinsic_load_vulkan_descriptor: 1262bf215546Sopenharmony_ci /* Assume that a load_vulkan_descriptor won't contribute to an 1263bf215546Sopenharmony_ci * offset within the resource. 1264bf215546Sopenharmony_ci */ 1265bf215546Sopenharmony_ci break; 1266bf215546Sopenharmony_ci default: 1267bf215546Sopenharmony_ci goto fail; 1268bf215546Sopenharmony_ci } 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_ci *out_base = base; 1271bf215546Sopenharmony_ci *out_range = range; 1272bf215546Sopenharmony_ci return; 1273bf215546Sopenharmony_ci } 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci default: 1276bf215546Sopenharmony_ci goto fail; 1277bf215546Sopenharmony_ci } 1278bf215546Sopenharmony_ci } 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci default: 1281bf215546Sopenharmony_ci goto fail; 1282bf215546Sopenharmony_ci } 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ci deref = parent; 1285bf215546Sopenharmony_ci } 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_cifail: 1288bf215546Sopenharmony_ci *out_base = 0; 1289bf215546Sopenharmony_ci *out_range = ~0; 1290bf215546Sopenharmony_ci} 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_cistatic nir_variable_mode 1293bf215546Sopenharmony_cicanonicalize_generic_modes(nir_variable_mode modes) 1294bf215546Sopenharmony_ci{ 1295bf215546Sopenharmony_ci assert(modes != 0); 1296bf215546Sopenharmony_ci if (util_bitcount(modes) == 1) 1297bf215546Sopenharmony_ci return modes; 1298bf215546Sopenharmony_ci 1299bf215546Sopenharmony_ci assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp | 1300bf215546Sopenharmony_ci nir_var_mem_shared | nir_var_mem_global))); 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_ci /* Canonicalize by converting shader_temp to function_temp */ 1303bf215546Sopenharmony_ci if (modes & nir_var_shader_temp) { 1304bf215546Sopenharmony_ci modes &= ~nir_var_shader_temp; 1305bf215546Sopenharmony_ci modes |= nir_var_function_temp; 1306bf215546Sopenharmony_ci } 1307bf215546Sopenharmony_ci 1308bf215546Sopenharmony_ci return modes; 1309bf215546Sopenharmony_ci} 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_cistatic nir_intrinsic_op 1312bf215546Sopenharmony_ciget_store_global_op_from_addr_format(nir_address_format addr_format) 1313bf215546Sopenharmony_ci{ 1314bf215546Sopenharmony_ci if (addr_format != nir_address_format_2x32bit_global) 1315bf215546Sopenharmony_ci return nir_intrinsic_store_global; 1316bf215546Sopenharmony_ci else 1317bf215546Sopenharmony_ci return nir_intrinsic_store_global_2x32; 1318bf215546Sopenharmony_ci} 1319bf215546Sopenharmony_ci 1320bf215546Sopenharmony_cistatic nir_intrinsic_op 1321bf215546Sopenharmony_ciget_load_global_op_from_addr_format(nir_address_format addr_format) 1322bf215546Sopenharmony_ci{ 1323bf215546Sopenharmony_ci if (addr_format != nir_address_format_2x32bit_global) 1324bf215546Sopenharmony_ci return nir_intrinsic_load_global; 1325bf215546Sopenharmony_ci else 1326bf215546Sopenharmony_ci return nir_intrinsic_load_global_2x32; 1327bf215546Sopenharmony_ci} 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_cistatic nir_ssa_def * 1330bf215546Sopenharmony_cibuild_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, 1331bf215546Sopenharmony_ci nir_ssa_def *addr, nir_address_format addr_format, 1332bf215546Sopenharmony_ci nir_variable_mode modes, 1333bf215546Sopenharmony_ci uint32_t align_mul, uint32_t align_offset, 1334bf215546Sopenharmony_ci unsigned num_components) 1335bf215546Sopenharmony_ci{ 1336bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1337bf215546Sopenharmony_ci modes = canonicalize_generic_modes(modes); 1338bf215546Sopenharmony_ci 1339bf215546Sopenharmony_ci if (util_bitcount(modes) > 1) { 1340bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, modes)) { 1341bf215546Sopenharmony_ci return build_explicit_io_load(b, intrin, addr, addr_format, 1342bf215546Sopenharmony_ci nir_var_mem_global, 1343bf215546Sopenharmony_ci align_mul, align_offset, 1344bf215546Sopenharmony_ci num_components); 1345bf215546Sopenharmony_ci } else if (modes & nir_var_function_temp) { 1346bf215546Sopenharmony_ci nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1347bf215546Sopenharmony_ci nir_var_function_temp)); 1348bf215546Sopenharmony_ci nir_ssa_def *res1 = 1349bf215546Sopenharmony_ci build_explicit_io_load(b, intrin, addr, addr_format, 1350bf215546Sopenharmony_ci nir_var_function_temp, 1351bf215546Sopenharmony_ci align_mul, align_offset, 1352bf215546Sopenharmony_ci num_components); 1353bf215546Sopenharmony_ci nir_push_else(b, NULL); 1354bf215546Sopenharmony_ci nir_ssa_def *res2 = 1355bf215546Sopenharmony_ci build_explicit_io_load(b, intrin, addr, addr_format, 1356bf215546Sopenharmony_ci modes & ~nir_var_function_temp, 1357bf215546Sopenharmony_ci align_mul, align_offset, 1358bf215546Sopenharmony_ci num_components); 1359bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1360bf215546Sopenharmony_ci return nir_if_phi(b, res1, res2); 1361bf215546Sopenharmony_ci } else { 1362bf215546Sopenharmony_ci nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1363bf215546Sopenharmony_ci nir_var_mem_shared)); 1364bf215546Sopenharmony_ci assert(modes & nir_var_mem_shared); 1365bf215546Sopenharmony_ci nir_ssa_def *res1 = 1366bf215546Sopenharmony_ci build_explicit_io_load(b, intrin, addr, addr_format, 1367bf215546Sopenharmony_ci nir_var_mem_shared, 1368bf215546Sopenharmony_ci align_mul, align_offset, 1369bf215546Sopenharmony_ci num_components); 1370bf215546Sopenharmony_ci nir_push_else(b, NULL); 1371bf215546Sopenharmony_ci assert(modes & nir_var_mem_global); 1372bf215546Sopenharmony_ci nir_ssa_def *res2 = 1373bf215546Sopenharmony_ci build_explicit_io_load(b, intrin, addr, addr_format, 1374bf215546Sopenharmony_ci nir_var_mem_global, 1375bf215546Sopenharmony_ci align_mul, align_offset, 1376bf215546Sopenharmony_ci num_components); 1377bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1378bf215546Sopenharmony_ci return nir_if_phi(b, res1, res2); 1379bf215546Sopenharmony_ci } 1380bf215546Sopenharmony_ci } 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci assert(util_bitcount(modes) == 1); 1383bf215546Sopenharmony_ci const nir_variable_mode mode = modes; 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci nir_intrinsic_op op; 1386bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1387bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 1388bf215546Sopenharmony_ci switch (mode) { 1389bf215546Sopenharmony_ci case nir_var_mem_ubo: 1390bf215546Sopenharmony_ci if (addr_format == nir_address_format_64bit_global_32bit_offset) 1391bf215546Sopenharmony_ci op = nir_intrinsic_load_global_constant_offset; 1392bf215546Sopenharmony_ci else if (addr_format == nir_address_format_64bit_bounded_global) 1393bf215546Sopenharmony_ci op = nir_intrinsic_load_global_constant_bounded; 1394bf215546Sopenharmony_ci else if (addr_format_is_global(addr_format, mode)) 1395bf215546Sopenharmony_ci op = nir_intrinsic_load_global_constant; 1396bf215546Sopenharmony_ci else 1397bf215546Sopenharmony_ci op = nir_intrinsic_load_ubo; 1398bf215546Sopenharmony_ci break; 1399bf215546Sopenharmony_ci case nir_var_mem_ssbo: 1400bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) 1401bf215546Sopenharmony_ci op = nir_intrinsic_load_global; 1402bf215546Sopenharmony_ci else 1403bf215546Sopenharmony_ci op = nir_intrinsic_load_ssbo; 1404bf215546Sopenharmony_ci break; 1405bf215546Sopenharmony_ci case nir_var_mem_global: 1406bf215546Sopenharmony_ci assert(addr_format_is_global(addr_format, mode)); 1407bf215546Sopenharmony_ci op = get_load_global_op_from_addr_format(addr_format); 1408bf215546Sopenharmony_ci break; 1409bf215546Sopenharmony_ci case nir_var_uniform: 1410bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1411bf215546Sopenharmony_ci assert(b->shader->info.stage == MESA_SHADER_KERNEL); 1412bf215546Sopenharmony_ci op = nir_intrinsic_load_kernel_input; 1413bf215546Sopenharmony_ci break; 1414bf215546Sopenharmony_ci case nir_var_mem_shared: 1415bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1416bf215546Sopenharmony_ci op = nir_intrinsic_load_shared; 1417bf215546Sopenharmony_ci break; 1418bf215546Sopenharmony_ci case nir_var_mem_task_payload: 1419bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1420bf215546Sopenharmony_ci op = nir_intrinsic_load_task_payload; 1421bf215546Sopenharmony_ci break; 1422bf215546Sopenharmony_ci case nir_var_shader_temp: 1423bf215546Sopenharmony_ci case nir_var_function_temp: 1424bf215546Sopenharmony_ci if (addr_format_is_offset(addr_format, mode)) { 1425bf215546Sopenharmony_ci op = nir_intrinsic_load_scratch; 1426bf215546Sopenharmony_ci } else { 1427bf215546Sopenharmony_ci assert(addr_format_is_global(addr_format, mode)); 1428bf215546Sopenharmony_ci op = get_load_global_op_from_addr_format(addr_format); 1429bf215546Sopenharmony_ci } 1430bf215546Sopenharmony_ci break; 1431bf215546Sopenharmony_ci case nir_var_mem_push_const: 1432bf215546Sopenharmony_ci assert(addr_format == nir_address_format_32bit_offset); 1433bf215546Sopenharmony_ci op = nir_intrinsic_load_push_constant; 1434bf215546Sopenharmony_ci break; 1435bf215546Sopenharmony_ci case nir_var_mem_constant: 1436bf215546Sopenharmony_ci if (addr_format_is_offset(addr_format, mode)) { 1437bf215546Sopenharmony_ci op = nir_intrinsic_load_constant; 1438bf215546Sopenharmony_ci } else { 1439bf215546Sopenharmony_ci assert(addr_format_is_global(addr_format, mode)); 1440bf215546Sopenharmony_ci op = get_load_global_op_from_addr_format(addr_format); 1441bf215546Sopenharmony_ci } 1442bf215546Sopenharmony_ci break; 1443bf215546Sopenharmony_ci default: 1444bf215546Sopenharmony_ci unreachable("Unsupported explicit IO variable mode"); 1445bf215546Sopenharmony_ci } 1446bf215546Sopenharmony_ci break; 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci case nir_intrinsic_load_deref_block_intel: 1449bf215546Sopenharmony_ci switch (mode) { 1450bf215546Sopenharmony_ci case nir_var_mem_ssbo: 1451bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) 1452bf215546Sopenharmony_ci op = nir_intrinsic_load_global_block_intel; 1453bf215546Sopenharmony_ci else 1454bf215546Sopenharmony_ci op = nir_intrinsic_load_ssbo_block_intel; 1455bf215546Sopenharmony_ci break; 1456bf215546Sopenharmony_ci case nir_var_mem_global: 1457bf215546Sopenharmony_ci op = nir_intrinsic_load_global_block_intel; 1458bf215546Sopenharmony_ci break; 1459bf215546Sopenharmony_ci case nir_var_mem_shared: 1460bf215546Sopenharmony_ci op = nir_intrinsic_load_shared_block_intel; 1461bf215546Sopenharmony_ci break; 1462bf215546Sopenharmony_ci default: 1463bf215546Sopenharmony_ci unreachable("Unsupported explicit IO variable mode"); 1464bf215546Sopenharmony_ci } 1465bf215546Sopenharmony_ci break; 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci default: 1468bf215546Sopenharmony_ci unreachable("Invalid intrinsic"); 1469bf215546Sopenharmony_ci } 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); 1472bf215546Sopenharmony_ci 1473bf215546Sopenharmony_ci if (op == nir_intrinsic_load_global_constant_offset) { 1474bf215546Sopenharmony_ci assert(addr_format == nir_address_format_64bit_global_32bit_offset); 1475bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa( 1476bf215546Sopenharmony_ci nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 1477bf215546Sopenharmony_ci load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 1478bf215546Sopenharmony_ci } else if (op == nir_intrinsic_load_global_constant_bounded) { 1479bf215546Sopenharmony_ci assert(addr_format == nir_address_format_64bit_bounded_global); 1480bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa( 1481bf215546Sopenharmony_ci nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 1482bf215546Sopenharmony_ci load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 1483bf215546Sopenharmony_ci load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2)); 1484bf215546Sopenharmony_ci } else if (addr_format_is_global(addr_format, mode)) { 1485bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1486bf215546Sopenharmony_ci } else if (addr_format_is_offset(addr_format, mode)) { 1487bf215546Sopenharmony_ci assert(addr->num_components == 1); 1488bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1489bf215546Sopenharmony_ci } else { 1490bf215546Sopenharmony_ci load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1491bf215546Sopenharmony_ci load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1492bf215546Sopenharmony_ci } 1493bf215546Sopenharmony_ci 1494bf215546Sopenharmony_ci if (nir_intrinsic_has_access(load)) 1495bf215546Sopenharmony_ci nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci if (op == nir_intrinsic_load_constant) { 1498bf215546Sopenharmony_ci nir_intrinsic_set_base(load, 0); 1499bf215546Sopenharmony_ci nir_intrinsic_set_range(load, b->shader->constant_data_size); 1500bf215546Sopenharmony_ci } else if (mode == nir_var_mem_push_const) { 1501bf215546Sopenharmony_ci /* Push constants are required to be able to be chased back to the 1502bf215546Sopenharmony_ci * variable so we can provide a base/range. 1503bf215546Sopenharmony_ci */ 1504bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 1505bf215546Sopenharmony_ci nir_intrinsic_set_base(load, 0); 1506bf215546Sopenharmony_ci nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false)); 1507bf215546Sopenharmony_ci } 1508bf215546Sopenharmony_ci 1509bf215546Sopenharmony_ci unsigned bit_size = intrin->dest.ssa.bit_size; 1510bf215546Sopenharmony_ci if (bit_size == 1) { 1511bf215546Sopenharmony_ci /* TODO: Make the native bool bit_size an option. */ 1512bf215546Sopenharmony_ci bit_size = 32; 1513bf215546Sopenharmony_ci } 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci if (nir_intrinsic_has_align(load)) 1516bf215546Sopenharmony_ci nir_intrinsic_set_align(load, align_mul, align_offset); 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci if (nir_intrinsic_has_range_base(load)) { 1519bf215546Sopenharmony_ci unsigned base, range; 1520bf215546Sopenharmony_ci nir_get_explicit_deref_range(deref, addr_format, &base, &range); 1521bf215546Sopenharmony_ci nir_intrinsic_set_range_base(load, base); 1522bf215546Sopenharmony_ci nir_intrinsic_set_range(load, range); 1523bf215546Sopenharmony_ci } 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci assert(intrin->dest.is_ssa); 1526bf215546Sopenharmony_ci load->num_components = num_components; 1527bf215546Sopenharmony_ci nir_ssa_dest_init(&load->instr, &load->dest, num_components, 1528bf215546Sopenharmony_ci bit_size, NULL); 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci assert(bit_size % 8 == 0); 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci nir_ssa_def *result; 1533bf215546Sopenharmony_ci if (addr_format_needs_bounds_check(addr_format) && 1534bf215546Sopenharmony_ci op != nir_intrinsic_load_global_constant_bounded) { 1535bf215546Sopenharmony_ci /* We don't need to bounds-check global_constant_bounded because bounds 1536bf215546Sopenharmony_ci * checking is handled by the intrinsic itself. 1537bf215546Sopenharmony_ci * 1538bf215546Sopenharmony_ci * The Vulkan spec for robustBufferAccess gives us quite a few options 1539bf215546Sopenharmony_ci * as to what we can do with an OOB read. Unfortunately, returning 1540bf215546Sopenharmony_ci * undefined values isn't one of them so we return an actual zero. 1541bf215546Sopenharmony_ci */ 1542bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size); 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci /* TODO: Better handle block_intel. */ 1545bf215546Sopenharmony_ci const unsigned load_size = (bit_size / 8) * load->num_components; 1546bf215546Sopenharmony_ci nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1551bf215546Sopenharmony_ci 1552bf215546Sopenharmony_ci result = nir_if_phi(b, &load->dest.ssa, zero); 1553bf215546Sopenharmony_ci } else { 1554bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 1555bf215546Sopenharmony_ci result = &load->dest.ssa; 1556bf215546Sopenharmony_ci } 1557bf215546Sopenharmony_ci 1558bf215546Sopenharmony_ci if (intrin->dest.ssa.bit_size == 1) { 1559bf215546Sopenharmony_ci /* For shared, we can go ahead and use NIR's and/or the back-end's 1560bf215546Sopenharmony_ci * standard encoding for booleans rather than forcing a 0/1 boolean. 1561bf215546Sopenharmony_ci * This should save an instruction or two. 1562bf215546Sopenharmony_ci */ 1563bf215546Sopenharmony_ci if (mode == nir_var_mem_shared || 1564bf215546Sopenharmony_ci mode == nir_var_shader_temp || 1565bf215546Sopenharmony_ci mode == nir_var_function_temp) 1566bf215546Sopenharmony_ci result = nir_b2b1(b, result); 1567bf215546Sopenharmony_ci else 1568bf215546Sopenharmony_ci result = nir_i2b(b, result); 1569bf215546Sopenharmony_ci } 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci return result; 1572bf215546Sopenharmony_ci} 1573bf215546Sopenharmony_ci 1574bf215546Sopenharmony_cistatic void 1575bf215546Sopenharmony_cibuild_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, 1576bf215546Sopenharmony_ci nir_ssa_def *addr, nir_address_format addr_format, 1577bf215546Sopenharmony_ci nir_variable_mode modes, 1578bf215546Sopenharmony_ci uint32_t align_mul, uint32_t align_offset, 1579bf215546Sopenharmony_ci nir_ssa_def *value, nir_component_mask_t write_mask) 1580bf215546Sopenharmony_ci{ 1581bf215546Sopenharmony_ci modes = canonicalize_generic_modes(modes); 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci if (util_bitcount(modes) > 1) { 1584bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, modes)) { 1585bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1586bf215546Sopenharmony_ci nir_var_mem_global, 1587bf215546Sopenharmony_ci align_mul, align_offset, 1588bf215546Sopenharmony_ci value, write_mask); 1589bf215546Sopenharmony_ci } else if (modes & nir_var_function_temp) { 1590bf215546Sopenharmony_ci nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1591bf215546Sopenharmony_ci nir_var_function_temp)); 1592bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1593bf215546Sopenharmony_ci nir_var_function_temp, 1594bf215546Sopenharmony_ci align_mul, align_offset, 1595bf215546Sopenharmony_ci value, write_mask); 1596bf215546Sopenharmony_ci nir_push_else(b, NULL); 1597bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1598bf215546Sopenharmony_ci modes & ~nir_var_function_temp, 1599bf215546Sopenharmony_ci align_mul, align_offset, 1600bf215546Sopenharmony_ci value, write_mask); 1601bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1602bf215546Sopenharmony_ci } else { 1603bf215546Sopenharmony_ci nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1604bf215546Sopenharmony_ci nir_var_mem_shared)); 1605bf215546Sopenharmony_ci assert(modes & nir_var_mem_shared); 1606bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1607bf215546Sopenharmony_ci nir_var_mem_shared, 1608bf215546Sopenharmony_ci align_mul, align_offset, 1609bf215546Sopenharmony_ci value, write_mask); 1610bf215546Sopenharmony_ci nir_push_else(b, NULL); 1611bf215546Sopenharmony_ci assert(modes & nir_var_mem_global); 1612bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1613bf215546Sopenharmony_ci nir_var_mem_global, 1614bf215546Sopenharmony_ci align_mul, align_offset, 1615bf215546Sopenharmony_ci value, write_mask); 1616bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1617bf215546Sopenharmony_ci } 1618bf215546Sopenharmony_ci return; 1619bf215546Sopenharmony_ci } 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci assert(util_bitcount(modes) == 1); 1622bf215546Sopenharmony_ci const nir_variable_mode mode = modes; 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_ci nir_intrinsic_op op; 1625bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1626bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 1627bf215546Sopenharmony_ci assert(write_mask != 0); 1628bf215546Sopenharmony_ci 1629bf215546Sopenharmony_ci switch (mode) { 1630bf215546Sopenharmony_ci case nir_var_mem_ssbo: 1631bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) 1632bf215546Sopenharmony_ci op = get_store_global_op_from_addr_format(addr_format); 1633bf215546Sopenharmony_ci else 1634bf215546Sopenharmony_ci op = nir_intrinsic_store_ssbo; 1635bf215546Sopenharmony_ci break; 1636bf215546Sopenharmony_ci case nir_var_mem_global: 1637bf215546Sopenharmony_ci assert(addr_format_is_global(addr_format, mode)); 1638bf215546Sopenharmony_ci op = get_store_global_op_from_addr_format(addr_format); 1639bf215546Sopenharmony_ci break; 1640bf215546Sopenharmony_ci case nir_var_mem_shared: 1641bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1642bf215546Sopenharmony_ci op = nir_intrinsic_store_shared; 1643bf215546Sopenharmony_ci break; 1644bf215546Sopenharmony_ci case nir_var_mem_task_payload: 1645bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1646bf215546Sopenharmony_ci op = nir_intrinsic_store_task_payload; 1647bf215546Sopenharmony_ci break; 1648bf215546Sopenharmony_ci case nir_var_shader_temp: 1649bf215546Sopenharmony_ci case nir_var_function_temp: 1650bf215546Sopenharmony_ci if (addr_format_is_offset(addr_format, mode)) { 1651bf215546Sopenharmony_ci op = nir_intrinsic_store_scratch; 1652bf215546Sopenharmony_ci } else { 1653bf215546Sopenharmony_ci assert(addr_format_is_global(addr_format, mode)); 1654bf215546Sopenharmony_ci op = get_store_global_op_from_addr_format(addr_format); 1655bf215546Sopenharmony_ci } 1656bf215546Sopenharmony_ci break; 1657bf215546Sopenharmony_ci default: 1658bf215546Sopenharmony_ci unreachable("Unsupported explicit IO variable mode"); 1659bf215546Sopenharmony_ci } 1660bf215546Sopenharmony_ci break; 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_ci case nir_intrinsic_store_deref_block_intel: 1663bf215546Sopenharmony_ci assert(write_mask == 0); 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_ci switch (mode) { 1666bf215546Sopenharmony_ci case nir_var_mem_ssbo: 1667bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) 1668bf215546Sopenharmony_ci op = nir_intrinsic_store_global_block_intel; 1669bf215546Sopenharmony_ci else 1670bf215546Sopenharmony_ci op = nir_intrinsic_store_ssbo_block_intel; 1671bf215546Sopenharmony_ci break; 1672bf215546Sopenharmony_ci case nir_var_mem_global: 1673bf215546Sopenharmony_ci op = nir_intrinsic_store_global_block_intel; 1674bf215546Sopenharmony_ci break; 1675bf215546Sopenharmony_ci case nir_var_mem_shared: 1676bf215546Sopenharmony_ci op = nir_intrinsic_store_shared_block_intel; 1677bf215546Sopenharmony_ci break; 1678bf215546Sopenharmony_ci default: 1679bf215546Sopenharmony_ci unreachable("Unsupported explicit IO variable mode"); 1680bf215546Sopenharmony_ci } 1681bf215546Sopenharmony_ci break; 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci default: 1684bf215546Sopenharmony_ci unreachable("Invalid intrinsic"); 1685bf215546Sopenharmony_ci } 1686bf215546Sopenharmony_ci 1687bf215546Sopenharmony_ci nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_ci if (value->bit_size == 1) { 1690bf215546Sopenharmony_ci /* For shared, we can go ahead and use NIR's and/or the back-end's 1691bf215546Sopenharmony_ci * standard encoding for booleans rather than forcing a 0/1 boolean. 1692bf215546Sopenharmony_ci * This should save an instruction or two. 1693bf215546Sopenharmony_ci * 1694bf215546Sopenharmony_ci * TODO: Make the native bool bit_size an option. 1695bf215546Sopenharmony_ci */ 1696bf215546Sopenharmony_ci if (mode == nir_var_mem_shared || 1697bf215546Sopenharmony_ci mode == nir_var_shader_temp || 1698bf215546Sopenharmony_ci mode == nir_var_function_temp) 1699bf215546Sopenharmony_ci value = nir_b2b32(b, value); 1700bf215546Sopenharmony_ci else 1701bf215546Sopenharmony_ci value = nir_b2i(b, value, 32); 1702bf215546Sopenharmony_ci } 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci store->src[0] = nir_src_for_ssa(value); 1705bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) { 1706bf215546Sopenharmony_ci store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1707bf215546Sopenharmony_ci } else if (addr_format_is_offset(addr_format, mode)) { 1708bf215546Sopenharmony_ci assert(addr->num_components == 1); 1709bf215546Sopenharmony_ci store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1710bf215546Sopenharmony_ci } else { 1711bf215546Sopenharmony_ci store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1712bf215546Sopenharmony_ci store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1713bf215546Sopenharmony_ci } 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci nir_intrinsic_set_write_mask(store, write_mask); 1716bf215546Sopenharmony_ci 1717bf215546Sopenharmony_ci if (nir_intrinsic_has_access(store)) 1718bf215546Sopenharmony_ci nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); 1719bf215546Sopenharmony_ci 1720bf215546Sopenharmony_ci nir_intrinsic_set_align(store, align_mul, align_offset); 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci assert(value->num_components == 1 || 1723bf215546Sopenharmony_ci value->num_components == intrin->num_components); 1724bf215546Sopenharmony_ci store->num_components = value->num_components; 1725bf215546Sopenharmony_ci 1726bf215546Sopenharmony_ci assert(value->bit_size % 8 == 0); 1727bf215546Sopenharmony_ci 1728bf215546Sopenharmony_ci if (addr_format_needs_bounds_check(addr_format)) { 1729bf215546Sopenharmony_ci /* TODO: Better handle block_intel. */ 1730bf215546Sopenharmony_ci const unsigned store_size = (value->bit_size / 8) * store->num_components; 1731bf215546Sopenharmony_ci nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_ci nir_builder_instr_insert(b, &store->instr); 1734bf215546Sopenharmony_ci 1735bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1736bf215546Sopenharmony_ci } else { 1737bf215546Sopenharmony_ci nir_builder_instr_insert(b, &store->instr); 1738bf215546Sopenharmony_ci } 1739bf215546Sopenharmony_ci} 1740bf215546Sopenharmony_ci 1741bf215546Sopenharmony_cistatic nir_ssa_def * 1742bf215546Sopenharmony_cibuild_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, 1743bf215546Sopenharmony_ci nir_ssa_def *addr, nir_address_format addr_format, 1744bf215546Sopenharmony_ci nir_variable_mode modes) 1745bf215546Sopenharmony_ci{ 1746bf215546Sopenharmony_ci modes = canonicalize_generic_modes(modes); 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci if (util_bitcount(modes) > 1) { 1749bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, modes)) { 1750bf215546Sopenharmony_ci return build_explicit_io_atomic(b, intrin, addr, addr_format, 1751bf215546Sopenharmony_ci nir_var_mem_global); 1752bf215546Sopenharmony_ci } else if (modes & nir_var_function_temp) { 1753bf215546Sopenharmony_ci nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1754bf215546Sopenharmony_ci nir_var_function_temp)); 1755bf215546Sopenharmony_ci nir_ssa_def *res1 = 1756bf215546Sopenharmony_ci build_explicit_io_atomic(b, intrin, addr, addr_format, 1757bf215546Sopenharmony_ci nir_var_function_temp); 1758bf215546Sopenharmony_ci nir_push_else(b, NULL); 1759bf215546Sopenharmony_ci nir_ssa_def *res2 = 1760bf215546Sopenharmony_ci build_explicit_io_atomic(b, intrin, addr, addr_format, 1761bf215546Sopenharmony_ci modes & ~nir_var_function_temp); 1762bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1763bf215546Sopenharmony_ci return nir_if_phi(b, res1, res2); 1764bf215546Sopenharmony_ci } else { 1765bf215546Sopenharmony_ci nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1766bf215546Sopenharmony_ci nir_var_mem_shared)); 1767bf215546Sopenharmony_ci assert(modes & nir_var_mem_shared); 1768bf215546Sopenharmony_ci nir_ssa_def *res1 = 1769bf215546Sopenharmony_ci build_explicit_io_atomic(b, intrin, addr, addr_format, 1770bf215546Sopenharmony_ci nir_var_mem_shared); 1771bf215546Sopenharmony_ci nir_push_else(b, NULL); 1772bf215546Sopenharmony_ci assert(modes & nir_var_mem_global); 1773bf215546Sopenharmony_ci nir_ssa_def *res2 = 1774bf215546Sopenharmony_ci build_explicit_io_atomic(b, intrin, addr, addr_format, 1775bf215546Sopenharmony_ci nir_var_mem_global); 1776bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1777bf215546Sopenharmony_ci return nir_if_phi(b, res1, res2); 1778bf215546Sopenharmony_ci } 1779bf215546Sopenharmony_ci } 1780bf215546Sopenharmony_ci 1781bf215546Sopenharmony_ci assert(util_bitcount(modes) == 1); 1782bf215546Sopenharmony_ci const nir_variable_mode mode = modes; 1783bf215546Sopenharmony_ci 1784bf215546Sopenharmony_ci const unsigned num_data_srcs = 1785bf215546Sopenharmony_ci nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1; 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci nir_intrinsic_op op; 1788bf215546Sopenharmony_ci switch (mode) { 1789bf215546Sopenharmony_ci case nir_var_mem_ssbo: 1790bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) 1791bf215546Sopenharmony_ci op = global_atomic_for_deref(addr_format, intrin->intrinsic); 1792bf215546Sopenharmony_ci else 1793bf215546Sopenharmony_ci op = ssbo_atomic_for_deref(intrin->intrinsic); 1794bf215546Sopenharmony_ci break; 1795bf215546Sopenharmony_ci case nir_var_mem_global: 1796bf215546Sopenharmony_ci assert(addr_format_is_global(addr_format, mode)); 1797bf215546Sopenharmony_ci op = global_atomic_for_deref(addr_format, intrin->intrinsic); 1798bf215546Sopenharmony_ci break; 1799bf215546Sopenharmony_ci case nir_var_mem_shared: 1800bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1801bf215546Sopenharmony_ci op = shared_atomic_for_deref(intrin->intrinsic); 1802bf215546Sopenharmony_ci break; 1803bf215546Sopenharmony_ci case nir_var_mem_task_payload: 1804bf215546Sopenharmony_ci assert(addr_format_is_offset(addr_format, mode)); 1805bf215546Sopenharmony_ci op = task_payload_atomic_for_deref(intrin->intrinsic); 1806bf215546Sopenharmony_ci break; 1807bf215546Sopenharmony_ci default: 1808bf215546Sopenharmony_ci unreachable("Unsupported explicit IO variable mode"); 1809bf215546Sopenharmony_ci } 1810bf215546Sopenharmony_ci 1811bf215546Sopenharmony_ci nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op); 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci unsigned src = 0; 1814bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, mode)) { 1815bf215546Sopenharmony_ci atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1816bf215546Sopenharmony_ci } else if (addr_format_is_offset(addr_format, mode)) { 1817bf215546Sopenharmony_ci assert(addr->num_components == 1); 1818bf215546Sopenharmony_ci atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1819bf215546Sopenharmony_ci } else { 1820bf215546Sopenharmony_ci atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1821bf215546Sopenharmony_ci atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1822bf215546Sopenharmony_ci } 1823bf215546Sopenharmony_ci for (unsigned i = 0; i < num_data_srcs; i++) { 1824bf215546Sopenharmony_ci atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa); 1825bf215546Sopenharmony_ci } 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci /* Global atomics don't have access flags because they assume that the 1828bf215546Sopenharmony_ci * address may be non-uniform. 1829bf215546Sopenharmony_ci */ 1830bf215546Sopenharmony_ci if (nir_intrinsic_has_access(atomic)) 1831bf215546Sopenharmony_ci nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); 1832bf215546Sopenharmony_ci 1833bf215546Sopenharmony_ci assert(intrin->dest.ssa.num_components == 1); 1834bf215546Sopenharmony_ci nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1835bf215546Sopenharmony_ci 1, intrin->dest.ssa.bit_size, NULL); 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ci assert(atomic->dest.ssa.bit_size % 8 == 0); 1838bf215546Sopenharmony_ci 1839bf215546Sopenharmony_ci if (addr_format_needs_bounds_check(addr_format)) { 1840bf215546Sopenharmony_ci const unsigned atomic_size = atomic->dest.ssa.bit_size / 8; 1841bf215546Sopenharmony_ci nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci nir_builder_instr_insert(b, &atomic->instr); 1844bf215546Sopenharmony_ci 1845bf215546Sopenharmony_ci nir_pop_if(b, NULL); 1846bf215546Sopenharmony_ci return nir_if_phi(b, &atomic->dest.ssa, 1847bf215546Sopenharmony_ci nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size)); 1848bf215546Sopenharmony_ci } else { 1849bf215546Sopenharmony_ci nir_builder_instr_insert(b, &atomic->instr); 1850bf215546Sopenharmony_ci return &atomic->dest.ssa; 1851bf215546Sopenharmony_ci } 1852bf215546Sopenharmony_ci} 1853bf215546Sopenharmony_ci 1854bf215546Sopenharmony_cinir_ssa_def * 1855bf215546Sopenharmony_cinir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, 1856bf215546Sopenharmony_ci nir_ssa_def *base_addr, 1857bf215546Sopenharmony_ci nir_address_format addr_format) 1858bf215546Sopenharmony_ci{ 1859bf215546Sopenharmony_ci assert(deref->dest.is_ssa); 1860bf215546Sopenharmony_ci switch (deref->deref_type) { 1861bf215546Sopenharmony_ci case nir_deref_type_var: 1862bf215546Sopenharmony_ci return build_addr_for_var(b, deref->var, addr_format); 1863bf215546Sopenharmony_ci 1864bf215546Sopenharmony_ci case nir_deref_type_ptr_as_array: 1865bf215546Sopenharmony_ci case nir_deref_type_array: { 1866bf215546Sopenharmony_ci unsigned stride = nir_deref_instr_array_stride(deref); 1867bf215546Sopenharmony_ci assert(stride > 0); 1868bf215546Sopenharmony_ci 1869bf215546Sopenharmony_ci unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format); 1870bf215546Sopenharmony_ci nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 1871bf215546Sopenharmony_ci nir_ssa_def *offset; 1872bf215546Sopenharmony_ci 1873bf215546Sopenharmony_ci /* If the access chain has been declared in-bounds, then we know it doesn't 1874bf215546Sopenharmony_ci * overflow the type. For nir_deref_type_array, this implies it cannot be 1875bf215546Sopenharmony_ci * negative. Also, since types in NIR have a maximum 32-bit size, we know the 1876bf215546Sopenharmony_ci * final result will fit in a 32-bit value so we can convert the index to 1877bf215546Sopenharmony_ci * 32-bit before multiplying and save ourselves from a 64-bit multiply. 1878bf215546Sopenharmony_ci */ 1879bf215546Sopenharmony_ci if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) { 1880bf215546Sopenharmony_ci index = nir_u2u32(b, index); 1881bf215546Sopenharmony_ci offset = nir_u2u(b, nir_amul_imm(b, index, stride), offset_bit_size); 1882bf215546Sopenharmony_ci } else { 1883bf215546Sopenharmony_ci index = nir_i2i(b, index, offset_bit_size); 1884bf215546Sopenharmony_ci offset = nir_amul_imm(b, index, stride); 1885bf215546Sopenharmony_ci } 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci return build_addr_iadd(b, base_addr, addr_format, deref->modes, offset); 1888bf215546Sopenharmony_ci } 1889bf215546Sopenharmony_ci 1890bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 1891bf215546Sopenharmony_ci unreachable("Wildcards should be lowered by now"); 1892bf215546Sopenharmony_ci break; 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_ci case nir_deref_type_struct: { 1895bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 1896bf215546Sopenharmony_ci int offset = glsl_get_struct_field_offset(parent->type, 1897bf215546Sopenharmony_ci deref->strct.index); 1898bf215546Sopenharmony_ci assert(offset >= 0); 1899bf215546Sopenharmony_ci return build_addr_iadd_imm(b, base_addr, addr_format, 1900bf215546Sopenharmony_ci deref->modes, offset); 1901bf215546Sopenharmony_ci } 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci case nir_deref_type_cast: 1904bf215546Sopenharmony_ci /* Nothing to do here */ 1905bf215546Sopenharmony_ci return base_addr; 1906bf215546Sopenharmony_ci } 1907bf215546Sopenharmony_ci 1908bf215546Sopenharmony_ci unreachable("Invalid NIR deref type"); 1909bf215546Sopenharmony_ci} 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_civoid 1912bf215546Sopenharmony_cinir_lower_explicit_io_instr(nir_builder *b, 1913bf215546Sopenharmony_ci nir_intrinsic_instr *intrin, 1914bf215546Sopenharmony_ci nir_ssa_def *addr, 1915bf215546Sopenharmony_ci nir_address_format addr_format) 1916bf215546Sopenharmony_ci{ 1917bf215546Sopenharmony_ci b->cursor = nir_after_instr(&intrin->instr); 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1920bf215546Sopenharmony_ci unsigned vec_stride = glsl_get_explicit_stride(deref->type); 1921bf215546Sopenharmony_ci unsigned scalar_size = type_scalar_size_bytes(deref->type); 1922bf215546Sopenharmony_ci assert(vec_stride == 0 || glsl_type_is_vector(deref->type)); 1923bf215546Sopenharmony_ci assert(vec_stride == 0 || vec_stride >= scalar_size); 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci uint32_t align_mul, align_offset; 1926bf215546Sopenharmony_ci if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) { 1927bf215546Sopenharmony_ci /* If we don't have an alignment from the deref, assume scalar */ 1928bf215546Sopenharmony_ci align_mul = scalar_size; 1929bf215546Sopenharmony_ci align_offset = 0; 1930bf215546Sopenharmony_ci } 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1933bf215546Sopenharmony_ci case nir_intrinsic_load_deref: { 1934bf215546Sopenharmony_ci nir_ssa_def *value; 1935bf215546Sopenharmony_ci if (vec_stride > scalar_size) { 1936bf215546Sopenharmony_ci nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, }; 1937bf215546Sopenharmony_ci for (unsigned i = 0; i < intrin->num_components; i++) { 1938bf215546Sopenharmony_ci unsigned comp_offset = i * vec_stride; 1939bf215546Sopenharmony_ci nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 1940bf215546Sopenharmony_ci deref->modes, 1941bf215546Sopenharmony_ci comp_offset); 1942bf215546Sopenharmony_ci comps[i] = build_explicit_io_load(b, intrin, comp_addr, 1943bf215546Sopenharmony_ci addr_format, deref->modes, 1944bf215546Sopenharmony_ci align_mul, 1945bf215546Sopenharmony_ci (align_offset + comp_offset) % 1946bf215546Sopenharmony_ci align_mul, 1947bf215546Sopenharmony_ci 1); 1948bf215546Sopenharmony_ci } 1949bf215546Sopenharmony_ci value = nir_vec(b, comps, intrin->num_components); 1950bf215546Sopenharmony_ci } else { 1951bf215546Sopenharmony_ci value = build_explicit_io_load(b, intrin, addr, addr_format, 1952bf215546Sopenharmony_ci deref->modes, align_mul, align_offset, 1953bf215546Sopenharmony_ci intrin->num_components); 1954bf215546Sopenharmony_ci } 1955bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1956bf215546Sopenharmony_ci break; 1957bf215546Sopenharmony_ci } 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_ci case nir_intrinsic_store_deref: { 1960bf215546Sopenharmony_ci assert(intrin->src[1].is_ssa); 1961bf215546Sopenharmony_ci nir_ssa_def *value = intrin->src[1].ssa; 1962bf215546Sopenharmony_ci nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 1963bf215546Sopenharmony_ci if (vec_stride > scalar_size) { 1964bf215546Sopenharmony_ci for (unsigned i = 0; i < intrin->num_components; i++) { 1965bf215546Sopenharmony_ci if (!(write_mask & (1 << i))) 1966bf215546Sopenharmony_ci continue; 1967bf215546Sopenharmony_ci 1968bf215546Sopenharmony_ci unsigned comp_offset = i * vec_stride; 1969bf215546Sopenharmony_ci nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 1970bf215546Sopenharmony_ci deref->modes, 1971bf215546Sopenharmony_ci comp_offset); 1972bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, comp_addr, addr_format, 1973bf215546Sopenharmony_ci deref->modes, align_mul, 1974bf215546Sopenharmony_ci (align_offset + comp_offset) % align_mul, 1975bf215546Sopenharmony_ci nir_channel(b, value, i), 1); 1976bf215546Sopenharmony_ci } 1977bf215546Sopenharmony_ci } else { 1978bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1979bf215546Sopenharmony_ci deref->modes, align_mul, align_offset, 1980bf215546Sopenharmony_ci value, write_mask); 1981bf215546Sopenharmony_ci } 1982bf215546Sopenharmony_ci break; 1983bf215546Sopenharmony_ci } 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_ci case nir_intrinsic_load_deref_block_intel: { 1986bf215546Sopenharmony_ci nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format, 1987bf215546Sopenharmony_ci deref->modes, 1988bf215546Sopenharmony_ci align_mul, align_offset, 1989bf215546Sopenharmony_ci intrin->num_components); 1990bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1991bf215546Sopenharmony_ci break; 1992bf215546Sopenharmony_ci } 1993bf215546Sopenharmony_ci 1994bf215546Sopenharmony_ci case nir_intrinsic_store_deref_block_intel: { 1995bf215546Sopenharmony_ci assert(intrin->src[1].is_ssa); 1996bf215546Sopenharmony_ci nir_ssa_def *value = intrin->src[1].ssa; 1997bf215546Sopenharmony_ci const nir_component_mask_t write_mask = 0; 1998bf215546Sopenharmony_ci build_explicit_io_store(b, intrin, addr, addr_format, 1999bf215546Sopenharmony_ci deref->modes, align_mul, align_offset, 2000bf215546Sopenharmony_ci value, write_mask); 2001bf215546Sopenharmony_ci break; 2002bf215546Sopenharmony_ci } 2003bf215546Sopenharmony_ci 2004bf215546Sopenharmony_ci default: { 2005bf215546Sopenharmony_ci nir_ssa_def *value = 2006bf215546Sopenharmony_ci build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes); 2007bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 2008bf215546Sopenharmony_ci break; 2009bf215546Sopenharmony_ci } 2010bf215546Sopenharmony_ci } 2011bf215546Sopenharmony_ci 2012bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 2013bf215546Sopenharmony_ci} 2014bf215546Sopenharmony_ci 2015bf215546Sopenharmony_cibool 2016bf215546Sopenharmony_cinir_get_explicit_deref_align(nir_deref_instr *deref, 2017bf215546Sopenharmony_ci bool default_to_type_align, 2018bf215546Sopenharmony_ci uint32_t *align_mul, 2019bf215546Sopenharmony_ci uint32_t *align_offset) 2020bf215546Sopenharmony_ci{ 2021bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_var) { 2022bf215546Sopenharmony_ci /* If we see a variable, align_mul is effectively infinite because we 2023bf215546Sopenharmony_ci * know the offset exactly (up to the offset of the base pointer for the 2024bf215546Sopenharmony_ci * given variable mode). We have to pick something so we choose 256B 2025bf215546Sopenharmony_ci * as an arbitrary alignment which seems high enough for any reasonable 2026bf215546Sopenharmony_ci * wide-load use-case. Back-ends should clamp alignments down if 256B 2027bf215546Sopenharmony_ci * is too large for some reason. 2028bf215546Sopenharmony_ci */ 2029bf215546Sopenharmony_ci *align_mul = 256; 2030bf215546Sopenharmony_ci *align_offset = deref->var->data.driver_location % 256; 2031bf215546Sopenharmony_ci return true; 2032bf215546Sopenharmony_ci } 2033bf215546Sopenharmony_ci 2034bf215546Sopenharmony_ci /* If we're a cast deref that has an alignment, use that. */ 2035bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) { 2036bf215546Sopenharmony_ci *align_mul = deref->cast.align_mul; 2037bf215546Sopenharmony_ci *align_offset = deref->cast.align_offset; 2038bf215546Sopenharmony_ci return true; 2039bf215546Sopenharmony_ci } 2040bf215546Sopenharmony_ci 2041bf215546Sopenharmony_ci /* Otherwise, we need to compute the alignment based on the parent */ 2042bf215546Sopenharmony_ci nir_deref_instr *parent = nir_deref_instr_parent(deref); 2043bf215546Sopenharmony_ci if (parent == NULL) { 2044bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_cast); 2045bf215546Sopenharmony_ci if (default_to_type_align) { 2046bf215546Sopenharmony_ci /* If we don't have a parent, assume the type's alignment, if any. */ 2047bf215546Sopenharmony_ci unsigned type_align = glsl_get_explicit_alignment(deref->type); 2048bf215546Sopenharmony_ci if (type_align == 0) 2049bf215546Sopenharmony_ci return false; 2050bf215546Sopenharmony_ci 2051bf215546Sopenharmony_ci *align_mul = type_align; 2052bf215546Sopenharmony_ci *align_offset = 0; 2053bf215546Sopenharmony_ci return true; 2054bf215546Sopenharmony_ci } else { 2055bf215546Sopenharmony_ci return false; 2056bf215546Sopenharmony_ci } 2057bf215546Sopenharmony_ci } 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci uint32_t parent_mul, parent_offset; 2060bf215546Sopenharmony_ci if (!nir_get_explicit_deref_align(parent, default_to_type_align, 2061bf215546Sopenharmony_ci &parent_mul, &parent_offset)) 2062bf215546Sopenharmony_ci return false; 2063bf215546Sopenharmony_ci 2064bf215546Sopenharmony_ci switch (deref->deref_type) { 2065bf215546Sopenharmony_ci case nir_deref_type_var: 2066bf215546Sopenharmony_ci unreachable("Handled above"); 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci case nir_deref_type_array: 2069bf215546Sopenharmony_ci case nir_deref_type_array_wildcard: 2070bf215546Sopenharmony_ci case nir_deref_type_ptr_as_array: { 2071bf215546Sopenharmony_ci const unsigned stride = nir_deref_instr_array_stride(deref); 2072bf215546Sopenharmony_ci if (stride == 0) 2073bf215546Sopenharmony_ci return false; 2074bf215546Sopenharmony_ci 2075bf215546Sopenharmony_ci if (deref->deref_type != nir_deref_type_array_wildcard && 2076bf215546Sopenharmony_ci nir_src_is_const(deref->arr.index)) { 2077bf215546Sopenharmony_ci unsigned offset = nir_src_as_uint(deref->arr.index) * stride; 2078bf215546Sopenharmony_ci *align_mul = parent_mul; 2079bf215546Sopenharmony_ci *align_offset = (parent_offset + offset) % parent_mul; 2080bf215546Sopenharmony_ci } else { 2081bf215546Sopenharmony_ci /* If this is a wildcard or an indirect deref, we have to go with the 2082bf215546Sopenharmony_ci * power-of-two gcd. 2083bf215546Sopenharmony_ci */ 2084bf215546Sopenharmony_ci *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1)); 2085bf215546Sopenharmony_ci *align_offset = parent_offset % *align_mul; 2086bf215546Sopenharmony_ci } 2087bf215546Sopenharmony_ci return true; 2088bf215546Sopenharmony_ci } 2089bf215546Sopenharmony_ci 2090bf215546Sopenharmony_ci case nir_deref_type_struct: { 2091bf215546Sopenharmony_ci const int offset = glsl_get_struct_field_offset(parent->type, 2092bf215546Sopenharmony_ci deref->strct.index); 2093bf215546Sopenharmony_ci if (offset < 0) 2094bf215546Sopenharmony_ci return false; 2095bf215546Sopenharmony_ci 2096bf215546Sopenharmony_ci *align_mul = parent_mul; 2097bf215546Sopenharmony_ci *align_offset = (parent_offset + offset) % parent_mul; 2098bf215546Sopenharmony_ci return true; 2099bf215546Sopenharmony_ci } 2100bf215546Sopenharmony_ci 2101bf215546Sopenharmony_ci case nir_deref_type_cast: 2102bf215546Sopenharmony_ci /* We handled the explicit alignment case above. */ 2103bf215546Sopenharmony_ci assert(deref->cast.align_mul == 0); 2104bf215546Sopenharmony_ci *align_mul = parent_mul; 2105bf215546Sopenharmony_ci *align_offset = parent_offset; 2106bf215546Sopenharmony_ci return true; 2107bf215546Sopenharmony_ci } 2108bf215546Sopenharmony_ci 2109bf215546Sopenharmony_ci unreachable("Invalid deref_instr_type"); 2110bf215546Sopenharmony_ci} 2111bf215546Sopenharmony_ci 2112bf215546Sopenharmony_cistatic void 2113bf215546Sopenharmony_cilower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref, 2114bf215546Sopenharmony_ci nir_address_format addr_format) 2115bf215546Sopenharmony_ci{ 2116bf215546Sopenharmony_ci /* Just delete the deref if it's not used. We can't use 2117bf215546Sopenharmony_ci * nir_deref_instr_remove_if_unused here because it may remove more than 2118bf215546Sopenharmony_ci * one deref which could break our list walking since we walk the list 2119bf215546Sopenharmony_ci * backwards. 2120bf215546Sopenharmony_ci */ 2121bf215546Sopenharmony_ci assert(list_is_empty(&deref->dest.ssa.if_uses)); 2122bf215546Sopenharmony_ci if (list_is_empty(&deref->dest.ssa.uses)) { 2123bf215546Sopenharmony_ci nir_instr_remove(&deref->instr); 2124bf215546Sopenharmony_ci return; 2125bf215546Sopenharmony_ci } 2126bf215546Sopenharmony_ci 2127bf215546Sopenharmony_ci b->cursor = nir_after_instr(&deref->instr); 2128bf215546Sopenharmony_ci 2129bf215546Sopenharmony_ci nir_ssa_def *base_addr = NULL; 2130bf215546Sopenharmony_ci if (deref->deref_type != nir_deref_type_var) { 2131bf215546Sopenharmony_ci assert(deref->parent.is_ssa); 2132bf215546Sopenharmony_ci base_addr = deref->parent.ssa; 2133bf215546Sopenharmony_ci } 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr, 2136bf215546Sopenharmony_ci addr_format); 2137bf215546Sopenharmony_ci assert(addr->bit_size == deref->dest.ssa.bit_size); 2138bf215546Sopenharmony_ci assert(addr->num_components == deref->dest.ssa.num_components); 2139bf215546Sopenharmony_ci 2140bf215546Sopenharmony_ci nir_instr_remove(&deref->instr); 2141bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr); 2142bf215546Sopenharmony_ci} 2143bf215546Sopenharmony_ci 2144bf215546Sopenharmony_cistatic void 2145bf215546Sopenharmony_cilower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin, 2146bf215546Sopenharmony_ci nir_address_format addr_format) 2147bf215546Sopenharmony_ci{ 2148bf215546Sopenharmony_ci assert(intrin->src[0].is_ssa); 2149bf215546Sopenharmony_ci nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format); 2150bf215546Sopenharmony_ci} 2151bf215546Sopenharmony_ci 2152bf215546Sopenharmony_cistatic void 2153bf215546Sopenharmony_cilower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, 2154bf215546Sopenharmony_ci nir_address_format addr_format) 2155bf215546Sopenharmony_ci{ 2156bf215546Sopenharmony_ci b->cursor = nir_after_instr(&intrin->instr); 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2159bf215546Sopenharmony_ci 2160bf215546Sopenharmony_ci assert(glsl_type_is_array(deref->type)); 2161bf215546Sopenharmony_ci assert(glsl_get_length(deref->type) == 0); 2162bf215546Sopenharmony_ci assert(nir_deref_mode_is(deref, nir_var_mem_ssbo)); 2163bf215546Sopenharmony_ci unsigned stride = glsl_get_explicit_stride(deref->type); 2164bf215546Sopenharmony_ci assert(stride > 0); 2165bf215546Sopenharmony_ci 2166bf215546Sopenharmony_ci nir_ssa_def *addr = &deref->dest.ssa; 2167bf215546Sopenharmony_ci nir_ssa_def *index = addr_to_index(b, addr, addr_format); 2168bf215546Sopenharmony_ci nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); 2169bf215546Sopenharmony_ci unsigned access = nir_intrinsic_access(intrin); 2170bf215546Sopenharmony_ci 2171bf215546Sopenharmony_ci nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access); 2172bf215546Sopenharmony_ci arr_size = nir_usub_sat(b, arr_size, offset); 2173bf215546Sopenharmony_ci arr_size = nir_udiv_imm(b, arr_size, stride); 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size); 2176bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 2177bf215546Sopenharmony_ci} 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_cistatic void 2180bf215546Sopenharmony_cilower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin, 2181bf215546Sopenharmony_ci nir_address_format addr_format) 2182bf215546Sopenharmony_ci{ 2183bf215546Sopenharmony_ci if (addr_format_is_global(addr_format, 0)) { 2184bf215546Sopenharmony_ci /* If the address format is always global, then the driver can use 2185bf215546Sopenharmony_ci * global addresses regardless of the mode. In that case, don't create 2186bf215546Sopenharmony_ci * a check, just whack the intrinsic to addr_mode_is and delegate to the 2187bf215546Sopenharmony_ci * driver lowering. 2188bf215546Sopenharmony_ci */ 2189bf215546Sopenharmony_ci intrin->intrinsic = nir_intrinsic_addr_mode_is; 2190bf215546Sopenharmony_ci return; 2191bf215546Sopenharmony_ci } 2192bf215546Sopenharmony_ci 2193bf215546Sopenharmony_ci assert(intrin->src[0].is_ssa); 2194bf215546Sopenharmony_ci nir_ssa_def *addr = intrin->src[0].ssa; 2195bf215546Sopenharmony_ci 2196bf215546Sopenharmony_ci b->cursor = nir_instr_remove(&intrin->instr); 2197bf215546Sopenharmony_ci 2198bf215546Sopenharmony_ci nir_ssa_def *is_mode = 2199bf215546Sopenharmony_ci build_runtime_addr_mode_check(b, addr, addr_format, 2200bf215546Sopenharmony_ci nir_intrinsic_memory_modes(intrin)); 2201bf215546Sopenharmony_ci 2202bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode); 2203bf215546Sopenharmony_ci} 2204bf215546Sopenharmony_ci 2205bf215546Sopenharmony_cistatic bool 2206bf215546Sopenharmony_cinir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, 2207bf215546Sopenharmony_ci nir_address_format addr_format) 2208bf215546Sopenharmony_ci{ 2209bf215546Sopenharmony_ci bool progress = false; 2210bf215546Sopenharmony_ci 2211bf215546Sopenharmony_ci nir_builder b; 2212bf215546Sopenharmony_ci nir_builder_init(&b, impl); 2213bf215546Sopenharmony_ci 2214bf215546Sopenharmony_ci /* Walk in reverse order so that we can see the full deref chain when we 2215bf215546Sopenharmony_ci * lower the access operations. We lower them assuming that the derefs 2216bf215546Sopenharmony_ci * will be turned into address calculations later. 2217bf215546Sopenharmony_ci */ 2218bf215546Sopenharmony_ci nir_foreach_block_reverse(block, impl) { 2219bf215546Sopenharmony_ci nir_foreach_instr_reverse_safe(instr, block) { 2220bf215546Sopenharmony_ci switch (instr->type) { 2221bf215546Sopenharmony_ci case nir_instr_type_deref: { 2222bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 2223bf215546Sopenharmony_ci if (nir_deref_mode_is_in_set(deref, modes)) { 2224bf215546Sopenharmony_ci lower_explicit_io_deref(&b, deref, addr_format); 2225bf215546Sopenharmony_ci progress = true; 2226bf215546Sopenharmony_ci } 2227bf215546Sopenharmony_ci break; 2228bf215546Sopenharmony_ci } 2229bf215546Sopenharmony_ci 2230bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 2231bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2232bf215546Sopenharmony_ci switch (intrin->intrinsic) { 2233bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 2234bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 2235bf215546Sopenharmony_ci case nir_intrinsic_load_deref_block_intel: 2236bf215546Sopenharmony_ci case nir_intrinsic_store_deref_block_intel: 2237bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_add: 2238bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_imin: 2239bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_umin: 2240bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_imax: 2241bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_umax: 2242bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_and: 2243bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_or: 2244bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_xor: 2245bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_exchange: 2246bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_comp_swap: 2247bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fadd: 2248bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fmin: 2249bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fmax: 2250bf215546Sopenharmony_ci case nir_intrinsic_deref_atomic_fcomp_swap: { 2251bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2252bf215546Sopenharmony_ci if (nir_deref_mode_is_in_set(deref, modes)) { 2253bf215546Sopenharmony_ci lower_explicit_io_access(&b, intrin, addr_format); 2254bf215546Sopenharmony_ci progress = true; 2255bf215546Sopenharmony_ci } 2256bf215546Sopenharmony_ci break; 2257bf215546Sopenharmony_ci } 2258bf215546Sopenharmony_ci 2259bf215546Sopenharmony_ci case nir_intrinsic_deref_buffer_array_length: { 2260bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2261bf215546Sopenharmony_ci if (nir_deref_mode_is_in_set(deref, modes)) { 2262bf215546Sopenharmony_ci lower_explicit_io_array_length(&b, intrin, addr_format); 2263bf215546Sopenharmony_ci progress = true; 2264bf215546Sopenharmony_ci } 2265bf215546Sopenharmony_ci break; 2266bf215546Sopenharmony_ci } 2267bf215546Sopenharmony_ci 2268bf215546Sopenharmony_ci case nir_intrinsic_deref_mode_is: { 2269bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2270bf215546Sopenharmony_ci if (nir_deref_mode_is_in_set(deref, modes)) { 2271bf215546Sopenharmony_ci lower_explicit_io_mode_check(&b, intrin, addr_format); 2272bf215546Sopenharmony_ci progress = true; 2273bf215546Sopenharmony_ci } 2274bf215546Sopenharmony_ci break; 2275bf215546Sopenharmony_ci } 2276bf215546Sopenharmony_ci 2277bf215546Sopenharmony_ci default: 2278bf215546Sopenharmony_ci break; 2279bf215546Sopenharmony_ci } 2280bf215546Sopenharmony_ci break; 2281bf215546Sopenharmony_ci } 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci default: 2284bf215546Sopenharmony_ci /* Nothing to do */ 2285bf215546Sopenharmony_ci break; 2286bf215546Sopenharmony_ci } 2287bf215546Sopenharmony_ci } 2288bf215546Sopenharmony_ci } 2289bf215546Sopenharmony_ci 2290bf215546Sopenharmony_ci if (progress) { 2291bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 2292bf215546Sopenharmony_ci nir_metadata_dominance); 2293bf215546Sopenharmony_ci } else { 2294bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 2295bf215546Sopenharmony_ci } 2296bf215546Sopenharmony_ci 2297bf215546Sopenharmony_ci return progress; 2298bf215546Sopenharmony_ci} 2299bf215546Sopenharmony_ci 2300bf215546Sopenharmony_ci/** Lower explicitly laid out I/O access to byte offset/address intrinsics 2301bf215546Sopenharmony_ci * 2302bf215546Sopenharmony_ci * This pass is intended to be used for any I/O which touches memory external 2303bf215546Sopenharmony_ci * to the shader or which is directly visible to the client. It requires that 2304bf215546Sopenharmony_ci * all data types in the given modes have a explicit stride/offset decorations 2305bf215546Sopenharmony_ci * to tell it exactly how to calculate the offset/address for the given load, 2306bf215546Sopenharmony_ci * store, or atomic operation. If the offset/stride information does not come 2307bf215546Sopenharmony_ci * from the client explicitly (as with shared variables in GL or Vulkan), 2308bf215546Sopenharmony_ci * nir_lower_vars_to_explicit_types() can be used to add them. 2309bf215546Sopenharmony_ci * 2310bf215546Sopenharmony_ci * Unlike nir_lower_io, this pass is fully capable of handling incomplete 2311bf215546Sopenharmony_ci * pointer chains which may contain cast derefs. It does so by walking the 2312bf215546Sopenharmony_ci * deref chain backwards and simply replacing each deref, one at a time, with 2313bf215546Sopenharmony_ci * the appropriate address calculation. The pass takes a nir_address_format 2314bf215546Sopenharmony_ci * parameter which describes how the offset or address is to be represented 2315bf215546Sopenharmony_ci * during calculations. By ensuring that the address is always in a 2316bf215546Sopenharmony_ci * consistent format, pointers can safely be conjured from thin air by the 2317bf215546Sopenharmony_ci * driver, stored to variables, passed through phis, etc. 2318bf215546Sopenharmony_ci * 2319bf215546Sopenharmony_ci * The one exception to the simple algorithm described above is for handling 2320bf215546Sopenharmony_ci * row-major matrices in which case we may look down one additional level of 2321bf215546Sopenharmony_ci * the deref chain. 2322bf215546Sopenharmony_ci * 2323bf215546Sopenharmony_ci * This pass is also capable of handling OpenCL generic pointers. If the 2324bf215546Sopenharmony_ci * address mode is global, it will lower any ambiguous (more than one mode) 2325bf215546Sopenharmony_ci * access to global and pass through the deref_mode_is run-time checks as 2326bf215546Sopenharmony_ci * addr_mode_is. This assumes the driver has somehow mapped shared and 2327bf215546Sopenharmony_ci * scratch memory to the global address space. For other modes such as 2328bf215546Sopenharmony_ci * 62bit_generic, there is an enum embedded in the address and we lower 2329bf215546Sopenharmony_ci * ambiguous access to an if-ladder and deref_mode_is to a check against the 2330bf215546Sopenharmony_ci * embedded enum. If nir_lower_explicit_io is called on any shader that 2331bf215546Sopenharmony_ci * contains generic pointers, it must either be used on all of the generic 2332bf215546Sopenharmony_ci * modes or none. 2333bf215546Sopenharmony_ci */ 2334bf215546Sopenharmony_cibool 2335bf215546Sopenharmony_cinir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, 2336bf215546Sopenharmony_ci nir_address_format addr_format) 2337bf215546Sopenharmony_ci{ 2338bf215546Sopenharmony_ci bool progress = false; 2339bf215546Sopenharmony_ci 2340bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 2341bf215546Sopenharmony_ci if (function->impl && 2342bf215546Sopenharmony_ci nir_lower_explicit_io_impl(function->impl, modes, addr_format)) 2343bf215546Sopenharmony_ci progress = true; 2344bf215546Sopenharmony_ci } 2345bf215546Sopenharmony_ci 2346bf215546Sopenharmony_ci return progress; 2347bf215546Sopenharmony_ci} 2348bf215546Sopenharmony_ci 2349bf215546Sopenharmony_cistatic bool 2350bf215546Sopenharmony_cinir_lower_vars_to_explicit_types_impl(nir_function_impl *impl, 2351bf215546Sopenharmony_ci nir_variable_mode modes, 2352bf215546Sopenharmony_ci glsl_type_size_align_func type_info) 2353bf215546Sopenharmony_ci{ 2354bf215546Sopenharmony_ci bool progress = false; 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 2357bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 2358bf215546Sopenharmony_ci if (instr->type != nir_instr_type_deref) 2359bf215546Sopenharmony_ci continue; 2360bf215546Sopenharmony_ci 2361bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 2362bf215546Sopenharmony_ci if (!nir_deref_mode_is_in_set(deref, modes)) 2363bf215546Sopenharmony_ci continue; 2364bf215546Sopenharmony_ci 2365bf215546Sopenharmony_ci unsigned size, alignment; 2366bf215546Sopenharmony_ci const struct glsl_type *new_type = 2367bf215546Sopenharmony_ci glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment); 2368bf215546Sopenharmony_ci if (new_type != deref->type) { 2369bf215546Sopenharmony_ci progress = true; 2370bf215546Sopenharmony_ci deref->type = new_type; 2371bf215546Sopenharmony_ci } 2372bf215546Sopenharmony_ci if (deref->deref_type == nir_deref_type_cast) { 2373bf215546Sopenharmony_ci /* See also glsl_type::get_explicit_type_for_size_align() */ 2374bf215546Sopenharmony_ci unsigned new_stride = align(size, alignment); 2375bf215546Sopenharmony_ci if (new_stride != deref->cast.ptr_stride) { 2376bf215546Sopenharmony_ci deref->cast.ptr_stride = new_stride; 2377bf215546Sopenharmony_ci progress = true; 2378bf215546Sopenharmony_ci } 2379bf215546Sopenharmony_ci } 2380bf215546Sopenharmony_ci } 2381bf215546Sopenharmony_ci } 2382bf215546Sopenharmony_ci 2383bf215546Sopenharmony_ci if (progress) { 2384bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 2385bf215546Sopenharmony_ci nir_metadata_dominance | 2386bf215546Sopenharmony_ci nir_metadata_live_ssa_defs | 2387bf215546Sopenharmony_ci nir_metadata_loop_analysis); 2388bf215546Sopenharmony_ci } else { 2389bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 2390bf215546Sopenharmony_ci } 2391bf215546Sopenharmony_ci 2392bf215546Sopenharmony_ci return progress; 2393bf215546Sopenharmony_ci} 2394bf215546Sopenharmony_ci 2395bf215546Sopenharmony_cistatic bool 2396bf215546Sopenharmony_cilower_vars_to_explicit(nir_shader *shader, 2397bf215546Sopenharmony_ci struct exec_list *vars, nir_variable_mode mode, 2398bf215546Sopenharmony_ci glsl_type_size_align_func type_info) 2399bf215546Sopenharmony_ci{ 2400bf215546Sopenharmony_ci bool progress = false; 2401bf215546Sopenharmony_ci unsigned offset; 2402bf215546Sopenharmony_ci switch (mode) { 2403bf215546Sopenharmony_ci case nir_var_uniform: 2404bf215546Sopenharmony_ci assert(shader->info.stage == MESA_SHADER_KERNEL); 2405bf215546Sopenharmony_ci offset = 0; 2406bf215546Sopenharmony_ci break; 2407bf215546Sopenharmony_ci case nir_var_function_temp: 2408bf215546Sopenharmony_ci case nir_var_shader_temp: 2409bf215546Sopenharmony_ci offset = shader->scratch_size; 2410bf215546Sopenharmony_ci break; 2411bf215546Sopenharmony_ci case nir_var_mem_shared: 2412bf215546Sopenharmony_ci offset = shader->info.shared_size; 2413bf215546Sopenharmony_ci break; 2414bf215546Sopenharmony_ci case nir_var_mem_task_payload: 2415bf215546Sopenharmony_ci offset = shader->info.task_payload_size; 2416bf215546Sopenharmony_ci break; 2417bf215546Sopenharmony_ci case nir_var_mem_global: 2418bf215546Sopenharmony_ci offset = shader->global_mem_size; 2419bf215546Sopenharmony_ci break; 2420bf215546Sopenharmony_ci case nir_var_mem_constant: 2421bf215546Sopenharmony_ci offset = shader->constant_data_size; 2422bf215546Sopenharmony_ci break; 2423bf215546Sopenharmony_ci case nir_var_shader_call_data: 2424bf215546Sopenharmony_ci case nir_var_ray_hit_attrib: 2425bf215546Sopenharmony_ci offset = 0; 2426bf215546Sopenharmony_ci break; 2427bf215546Sopenharmony_ci default: 2428bf215546Sopenharmony_ci unreachable("Unsupported mode"); 2429bf215546Sopenharmony_ci } 2430bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, vars) { 2431bf215546Sopenharmony_ci if (var->data.mode != mode) 2432bf215546Sopenharmony_ci continue; 2433bf215546Sopenharmony_ci 2434bf215546Sopenharmony_ci unsigned size, align; 2435bf215546Sopenharmony_ci const struct glsl_type *explicit_type = 2436bf215546Sopenharmony_ci glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align); 2437bf215546Sopenharmony_ci 2438bf215546Sopenharmony_ci if (explicit_type != var->type) 2439bf215546Sopenharmony_ci var->type = explicit_type; 2440bf215546Sopenharmony_ci 2441bf215546Sopenharmony_ci UNUSED bool is_empty_struct = 2442bf215546Sopenharmony_ci glsl_type_is_struct_or_ifc(explicit_type) && 2443bf215546Sopenharmony_ci glsl_get_length(explicit_type) == 0; 2444bf215546Sopenharmony_ci 2445bf215546Sopenharmony_ci assert(util_is_power_of_two_nonzero(align) || is_empty_struct); 2446bf215546Sopenharmony_ci var->data.driver_location = ALIGN_POT(offset, align); 2447bf215546Sopenharmony_ci offset = var->data.driver_location + size; 2448bf215546Sopenharmony_ci progress = true; 2449bf215546Sopenharmony_ci } 2450bf215546Sopenharmony_ci 2451bf215546Sopenharmony_ci switch (mode) { 2452bf215546Sopenharmony_ci case nir_var_uniform: 2453bf215546Sopenharmony_ci assert(shader->info.stage == MESA_SHADER_KERNEL); 2454bf215546Sopenharmony_ci shader->num_uniforms = offset; 2455bf215546Sopenharmony_ci break; 2456bf215546Sopenharmony_ci case nir_var_shader_temp: 2457bf215546Sopenharmony_ci case nir_var_function_temp: 2458bf215546Sopenharmony_ci shader->scratch_size = offset; 2459bf215546Sopenharmony_ci break; 2460bf215546Sopenharmony_ci case nir_var_mem_shared: 2461bf215546Sopenharmony_ci shader->info.shared_size = offset; 2462bf215546Sopenharmony_ci break; 2463bf215546Sopenharmony_ci case nir_var_mem_task_payload: 2464bf215546Sopenharmony_ci shader->info.task_payload_size = offset; 2465bf215546Sopenharmony_ci break; 2466bf215546Sopenharmony_ci case nir_var_mem_global: 2467bf215546Sopenharmony_ci shader->global_mem_size = offset; 2468bf215546Sopenharmony_ci break; 2469bf215546Sopenharmony_ci case nir_var_mem_constant: 2470bf215546Sopenharmony_ci shader->constant_data_size = offset; 2471bf215546Sopenharmony_ci break; 2472bf215546Sopenharmony_ci case nir_var_shader_call_data: 2473bf215546Sopenharmony_ci case nir_var_ray_hit_attrib: 2474bf215546Sopenharmony_ci break; 2475bf215546Sopenharmony_ci default: 2476bf215546Sopenharmony_ci unreachable("Unsupported mode"); 2477bf215546Sopenharmony_ci } 2478bf215546Sopenharmony_ci 2479bf215546Sopenharmony_ci return progress; 2480bf215546Sopenharmony_ci} 2481bf215546Sopenharmony_ci 2482bf215546Sopenharmony_ci/* If nir_lower_vars_to_explicit_types is called on any shader that contains 2483bf215546Sopenharmony_ci * generic pointers, it must either be used on all of the generic modes or 2484bf215546Sopenharmony_ci * none. 2485bf215546Sopenharmony_ci */ 2486bf215546Sopenharmony_cibool 2487bf215546Sopenharmony_cinir_lower_vars_to_explicit_types(nir_shader *shader, 2488bf215546Sopenharmony_ci nir_variable_mode modes, 2489bf215546Sopenharmony_ci glsl_type_size_align_func type_info) 2490bf215546Sopenharmony_ci{ 2491bf215546Sopenharmony_ci /* TODO: Situations which need to be handled to support more modes: 2492bf215546Sopenharmony_ci * - row-major matrices 2493bf215546Sopenharmony_ci * - compact shader inputs/outputs 2494bf215546Sopenharmony_ci * - interface types 2495bf215546Sopenharmony_ci */ 2496bf215546Sopenharmony_ci ASSERTED nir_variable_mode supported = 2497bf215546Sopenharmony_ci nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant | 2498bf215546Sopenharmony_ci nir_var_shader_temp | nir_var_function_temp | nir_var_uniform | 2499bf215546Sopenharmony_ci nir_var_shader_call_data | nir_var_ray_hit_attrib | 2500bf215546Sopenharmony_ci nir_var_mem_task_payload; 2501bf215546Sopenharmony_ci assert(!(modes & ~supported) && "unsupported"); 2502bf215546Sopenharmony_ci 2503bf215546Sopenharmony_ci bool progress = false; 2504bf215546Sopenharmony_ci 2505bf215546Sopenharmony_ci if (modes & nir_var_uniform) 2506bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info); 2507bf215546Sopenharmony_ci if (modes & nir_var_mem_global) 2508bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info); 2509bf215546Sopenharmony_ci 2510bf215546Sopenharmony_ci if (modes & nir_var_mem_shared) { 2511bf215546Sopenharmony_ci assert(!shader->info.shared_memory_explicit_layout); 2512bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); 2513bf215546Sopenharmony_ci } 2514bf215546Sopenharmony_ci 2515bf215546Sopenharmony_ci if (modes & nir_var_shader_temp) 2516bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info); 2517bf215546Sopenharmony_ci if (modes & nir_var_mem_constant) 2518bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info); 2519bf215546Sopenharmony_ci if (modes & nir_var_shader_call_data) 2520bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info); 2521bf215546Sopenharmony_ci if (modes & nir_var_ray_hit_attrib) 2522bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info); 2523bf215546Sopenharmony_ci if (modes & nir_var_mem_task_payload) 2524bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info); 2525bf215546Sopenharmony_ci 2526bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 2527bf215546Sopenharmony_ci if (function->impl) { 2528bf215546Sopenharmony_ci if (modes & nir_var_function_temp) 2529bf215546Sopenharmony_ci progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info); 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_ci progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info); 2532bf215546Sopenharmony_ci } 2533bf215546Sopenharmony_ci } 2534bf215546Sopenharmony_ci 2535bf215546Sopenharmony_ci return progress; 2536bf215546Sopenharmony_ci} 2537bf215546Sopenharmony_ci 2538bf215546Sopenharmony_cistatic void 2539bf215546Sopenharmony_ciwrite_constant(void *dst, size_t dst_size, 2540bf215546Sopenharmony_ci const nir_constant *c, const struct glsl_type *type) 2541bf215546Sopenharmony_ci{ 2542bf215546Sopenharmony_ci if (glsl_type_is_vector_or_scalar(type)) { 2543bf215546Sopenharmony_ci const unsigned num_components = glsl_get_vector_elements(type); 2544bf215546Sopenharmony_ci const unsigned bit_size = glsl_get_bit_size(type); 2545bf215546Sopenharmony_ci if (bit_size == 1) { 2546bf215546Sopenharmony_ci /* Booleans are special-cased to be 32-bit 2547bf215546Sopenharmony_ci * 2548bf215546Sopenharmony_ci * TODO: Make the native bool bit_size an option. 2549bf215546Sopenharmony_ci */ 2550bf215546Sopenharmony_ci assert(num_components * 4 <= dst_size); 2551bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 2552bf215546Sopenharmony_ci int32_t b32 = -(int)c->values[i].b; 2553bf215546Sopenharmony_ci memcpy((char *)dst + i * 4, &b32, 4); 2554bf215546Sopenharmony_ci } 2555bf215546Sopenharmony_ci } else { 2556bf215546Sopenharmony_ci assert(bit_size >= 8 && bit_size % 8 == 0); 2557bf215546Sopenharmony_ci const unsigned byte_size = bit_size / 8; 2558bf215546Sopenharmony_ci assert(num_components * byte_size <= dst_size); 2559bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 2560bf215546Sopenharmony_ci /* Annoyingly, thanks to packed structs, we can't make any 2561bf215546Sopenharmony_ci * assumptions about the alignment of dst. To avoid any strange 2562bf215546Sopenharmony_ci * issues with unaligned writes, we always use memcpy. 2563bf215546Sopenharmony_ci */ 2564bf215546Sopenharmony_ci memcpy((char *)dst + i * byte_size, &c->values[i], byte_size); 2565bf215546Sopenharmony_ci } 2566bf215546Sopenharmony_ci } 2567bf215546Sopenharmony_ci } else if (glsl_type_is_array_or_matrix(type)) { 2568bf215546Sopenharmony_ci const unsigned array_len = glsl_get_length(type); 2569bf215546Sopenharmony_ci const unsigned stride = glsl_get_explicit_stride(type); 2570bf215546Sopenharmony_ci assert(stride > 0); 2571bf215546Sopenharmony_ci const struct glsl_type *elem_type = glsl_get_array_element(type); 2572bf215546Sopenharmony_ci for (unsigned i = 0; i < array_len; i++) { 2573bf215546Sopenharmony_ci unsigned elem_offset = i * stride; 2574bf215546Sopenharmony_ci assert(elem_offset < dst_size); 2575bf215546Sopenharmony_ci write_constant((char *)dst + elem_offset, dst_size - elem_offset, 2576bf215546Sopenharmony_ci c->elements[i], elem_type); 2577bf215546Sopenharmony_ci } 2578bf215546Sopenharmony_ci } else { 2579bf215546Sopenharmony_ci assert(glsl_type_is_struct_or_ifc(type)); 2580bf215546Sopenharmony_ci const unsigned num_fields = glsl_get_length(type); 2581bf215546Sopenharmony_ci for (unsigned i = 0; i < num_fields; i++) { 2582bf215546Sopenharmony_ci const int field_offset = glsl_get_struct_field_offset(type, i); 2583bf215546Sopenharmony_ci assert(field_offset >= 0 && field_offset < dst_size); 2584bf215546Sopenharmony_ci const struct glsl_type *field_type = glsl_get_struct_field(type, i); 2585bf215546Sopenharmony_ci write_constant((char *)dst + field_offset, dst_size - field_offset, 2586bf215546Sopenharmony_ci c->elements[i], field_type); 2587bf215546Sopenharmony_ci } 2588bf215546Sopenharmony_ci } 2589bf215546Sopenharmony_ci} 2590bf215546Sopenharmony_ci 2591bf215546Sopenharmony_civoid 2592bf215546Sopenharmony_cinir_gather_explicit_io_initializers(nir_shader *shader, 2593bf215546Sopenharmony_ci void *dst, size_t dst_size, 2594bf215546Sopenharmony_ci nir_variable_mode mode) 2595bf215546Sopenharmony_ci{ 2596bf215546Sopenharmony_ci /* It doesn't really make sense to gather initializers for more than one 2597bf215546Sopenharmony_ci * mode at a time. If this ever becomes well-defined, we can drop the 2598bf215546Sopenharmony_ci * assert then. 2599bf215546Sopenharmony_ci */ 2600bf215546Sopenharmony_ci assert(util_bitcount(mode) == 1); 2601bf215546Sopenharmony_ci 2602bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, shader, mode) { 2603bf215546Sopenharmony_ci assert(var->data.driver_location < dst_size); 2604bf215546Sopenharmony_ci write_constant((char *)dst + var->data.driver_location, 2605bf215546Sopenharmony_ci dst_size - var->data.driver_location, 2606bf215546Sopenharmony_ci var->constant_initializer, var->type); 2607bf215546Sopenharmony_ci } 2608bf215546Sopenharmony_ci} 2609bf215546Sopenharmony_ci 2610bf215546Sopenharmony_ci/** 2611bf215546Sopenharmony_ci * Return the offset source for a load/store intrinsic. 2612bf215546Sopenharmony_ci */ 2613bf215546Sopenharmony_cinir_src * 2614bf215546Sopenharmony_cinir_get_io_offset_src(nir_intrinsic_instr *instr) 2615bf215546Sopenharmony_ci{ 2616bf215546Sopenharmony_ci switch (instr->intrinsic) { 2617bf215546Sopenharmony_ci case nir_intrinsic_load_input: 2618bf215546Sopenharmony_ci case nir_intrinsic_load_output: 2619bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 2620bf215546Sopenharmony_ci case nir_intrinsic_load_task_payload: 2621bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 2622bf215546Sopenharmony_ci case nir_intrinsic_load_kernel_input: 2623bf215546Sopenharmony_ci case nir_intrinsic_load_global: 2624bf215546Sopenharmony_ci case nir_intrinsic_load_global_2x32: 2625bf215546Sopenharmony_ci case nir_intrinsic_load_global_constant: 2626bf215546Sopenharmony_ci case nir_intrinsic_load_scratch: 2627bf215546Sopenharmony_ci case nir_intrinsic_load_fs_input_interp_deltas: 2628bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_add: 2629bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_and: 2630bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_comp_swap: 2631bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_exchange: 2632bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fadd: 2633bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fcomp_swap: 2634bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fmax: 2635bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_fmin: 2636bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_imax: 2637bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_imin: 2638bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_or: 2639bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_umax: 2640bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_umin: 2641bf215546Sopenharmony_ci case nir_intrinsic_shared_atomic_xor: 2642bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_add: 2643bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_imin: 2644bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_umin: 2645bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_imax: 2646bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_umax: 2647bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_and: 2648bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_or: 2649bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_xor: 2650bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_exchange: 2651bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_comp_swap: 2652bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fadd: 2653bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fmin: 2654bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fmax: 2655bf215546Sopenharmony_ci case nir_intrinsic_task_payload_atomic_fcomp_swap: 2656bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_add: 2657bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_and: 2658bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_comp_swap: 2659bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_exchange: 2660bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fadd: 2661bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fcomp_swap: 2662bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmax: 2663bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_fmin: 2664bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imax: 2665bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imin: 2666bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_or: 2667bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umax: 2668bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umin: 2669bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_xor: 2670bf215546Sopenharmony_ci return &instr->src[0]; 2671bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 2672bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 2673bf215546Sopenharmony_ci case nir_intrinsic_load_input_vertex: 2674bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 2675bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_output: 2676bf215546Sopenharmony_ci case nir_intrinsic_load_per_primitive_output: 2677bf215546Sopenharmony_ci case nir_intrinsic_load_interpolated_input: 2678bf215546Sopenharmony_ci case nir_intrinsic_store_output: 2679bf215546Sopenharmony_ci case nir_intrinsic_store_shared: 2680bf215546Sopenharmony_ci case nir_intrinsic_store_task_payload: 2681bf215546Sopenharmony_ci case nir_intrinsic_store_global: 2682bf215546Sopenharmony_ci case nir_intrinsic_store_global_2x32: 2683bf215546Sopenharmony_ci case nir_intrinsic_store_scratch: 2684bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add: 2685bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin: 2686bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin: 2687bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax: 2688bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax: 2689bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and: 2690bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or: 2691bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor: 2692bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange: 2693bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap: 2694bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fadd: 2695bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmin: 2696bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fmax: 2697bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_fcomp_swap: 2698bf215546Sopenharmony_ci return &instr->src[1]; 2699bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 2700bf215546Sopenharmony_ci case nir_intrinsic_store_per_vertex_output: 2701bf215546Sopenharmony_ci case nir_intrinsic_store_per_primitive_output: 2702bf215546Sopenharmony_ci return &instr->src[2]; 2703bf215546Sopenharmony_ci default: 2704bf215546Sopenharmony_ci return NULL; 2705bf215546Sopenharmony_ci } 2706bf215546Sopenharmony_ci} 2707bf215546Sopenharmony_ci 2708bf215546Sopenharmony_ci/** 2709bf215546Sopenharmony_ci * Return the vertex index source for a load/store per_vertex intrinsic. 2710bf215546Sopenharmony_ci */ 2711bf215546Sopenharmony_cinir_src * 2712bf215546Sopenharmony_cinir_get_io_arrayed_index_src(nir_intrinsic_instr *instr) 2713bf215546Sopenharmony_ci{ 2714bf215546Sopenharmony_ci switch (instr->intrinsic) { 2715bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 2716bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_output: 2717bf215546Sopenharmony_ci case nir_intrinsic_load_per_primitive_output: 2718bf215546Sopenharmony_ci return &instr->src[0]; 2719bf215546Sopenharmony_ci case nir_intrinsic_store_per_vertex_output: 2720bf215546Sopenharmony_ci case nir_intrinsic_store_per_primitive_output: 2721bf215546Sopenharmony_ci return &instr->src[1]; 2722bf215546Sopenharmony_ci default: 2723bf215546Sopenharmony_ci return NULL; 2724bf215546Sopenharmony_ci } 2725bf215546Sopenharmony_ci} 2726bf215546Sopenharmony_ci 2727bf215546Sopenharmony_ci/** 2728bf215546Sopenharmony_ci * Return the numeric constant that identify a NULL pointer for each address 2729bf215546Sopenharmony_ci * format. 2730bf215546Sopenharmony_ci */ 2731bf215546Sopenharmony_ciconst nir_const_value * 2732bf215546Sopenharmony_cinir_address_format_null_value(nir_address_format addr_format) 2733bf215546Sopenharmony_ci{ 2734bf215546Sopenharmony_ci const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = { 2735bf215546Sopenharmony_ci [nir_address_format_32bit_global] = {{0}}, 2736bf215546Sopenharmony_ci [nir_address_format_2x32bit_global] = {{0}}, 2737bf215546Sopenharmony_ci [nir_address_format_64bit_global] = {{0}}, 2738bf215546Sopenharmony_ci [nir_address_format_64bit_global_32bit_offset] = {{0}}, 2739bf215546Sopenharmony_ci [nir_address_format_64bit_bounded_global] = {{0}}, 2740bf215546Sopenharmony_ci [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}}, 2741bf215546Sopenharmony_ci [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}}, 2742bf215546Sopenharmony_ci [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}}, 2743bf215546Sopenharmony_ci [nir_address_format_32bit_offset] = {{.u32 = ~0}}, 2744bf215546Sopenharmony_ci [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}}, 2745bf215546Sopenharmony_ci [nir_address_format_62bit_generic] = {{.u64 = 0}}, 2746bf215546Sopenharmony_ci [nir_address_format_logical] = {{.u32 = ~0}}, 2747bf215546Sopenharmony_ci }; 2748bf215546Sopenharmony_ci 2749bf215546Sopenharmony_ci assert(addr_format < ARRAY_SIZE(null_values)); 2750bf215546Sopenharmony_ci return null_values[addr_format]; 2751bf215546Sopenharmony_ci} 2752bf215546Sopenharmony_ci 2753bf215546Sopenharmony_cinir_ssa_def * 2754bf215546Sopenharmony_cinir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 2755bf215546Sopenharmony_ci nir_address_format addr_format) 2756bf215546Sopenharmony_ci{ 2757bf215546Sopenharmony_ci switch (addr_format) { 2758bf215546Sopenharmony_ci case nir_address_format_32bit_global: 2759bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: 2760bf215546Sopenharmony_ci case nir_address_format_64bit_global: 2761bf215546Sopenharmony_ci case nir_address_format_64bit_bounded_global: 2762bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 2763bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 2764bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 2765bf215546Sopenharmony_ci case nir_address_format_62bit_generic: 2766bf215546Sopenharmony_ci return nir_ball_iequal(b, addr0, addr1); 2767bf215546Sopenharmony_ci 2768bf215546Sopenharmony_ci case nir_address_format_64bit_global_32bit_offset: 2769bf215546Sopenharmony_ci return nir_ball_iequal(b, nir_channels(b, addr0, 0xb), 2770bf215546Sopenharmony_ci nir_channels(b, addr1, 0xb)); 2771bf215546Sopenharmony_ci 2772bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: 2773bf215546Sopenharmony_ci assert(addr0->num_components == 1 && addr1->num_components == 1); 2774bf215546Sopenharmony_ci return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)); 2775bf215546Sopenharmony_ci 2776bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: 2777bf215546Sopenharmony_ci assert(addr0->num_components == 1 && addr1->num_components == 1); 2778bf215546Sopenharmony_ci return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1)); 2779bf215546Sopenharmony_ci 2780bf215546Sopenharmony_ci case nir_address_format_logical: 2781bf215546Sopenharmony_ci unreachable("Unsupported address format"); 2782bf215546Sopenharmony_ci } 2783bf215546Sopenharmony_ci 2784bf215546Sopenharmony_ci unreachable("Invalid address format"); 2785bf215546Sopenharmony_ci} 2786bf215546Sopenharmony_ci 2787bf215546Sopenharmony_cinir_ssa_def * 2788bf215546Sopenharmony_cinir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 2789bf215546Sopenharmony_ci nir_address_format addr_format) 2790bf215546Sopenharmony_ci{ 2791bf215546Sopenharmony_ci switch (addr_format) { 2792bf215546Sopenharmony_ci case nir_address_format_32bit_global: 2793bf215546Sopenharmony_ci case nir_address_format_64bit_global: 2794bf215546Sopenharmony_ci case nir_address_format_32bit_offset: 2795bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset_pack64: 2796bf215546Sopenharmony_ci case nir_address_format_62bit_generic: 2797bf215546Sopenharmony_ci assert(addr0->num_components == 1); 2798bf215546Sopenharmony_ci assert(addr1->num_components == 1); 2799bf215546Sopenharmony_ci return nir_isub(b, addr0, addr1); 2800bf215546Sopenharmony_ci 2801bf215546Sopenharmony_ci case nir_address_format_2x32bit_global: 2802bf215546Sopenharmony_ci return nir_isub(b, addr_to_global(b, addr0, addr_format), 2803bf215546Sopenharmony_ci addr_to_global(b, addr1, addr_format)); 2804bf215546Sopenharmony_ci 2805bf215546Sopenharmony_ci case nir_address_format_32bit_offset_as_64bit: 2806bf215546Sopenharmony_ci assert(addr0->num_components == 1); 2807bf215546Sopenharmony_ci assert(addr1->num_components == 1); 2808bf215546Sopenharmony_ci return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1))); 2809bf215546Sopenharmony_ci 2810bf215546Sopenharmony_ci case nir_address_format_64bit_global_32bit_offset: 2811bf215546Sopenharmony_ci case nir_address_format_64bit_bounded_global: 2812bf215546Sopenharmony_ci return nir_isub(b, addr_to_global(b, addr0, addr_format), 2813bf215546Sopenharmony_ci addr_to_global(b, addr1, addr_format)); 2814bf215546Sopenharmony_ci 2815bf215546Sopenharmony_ci case nir_address_format_32bit_index_offset: 2816bf215546Sopenharmony_ci assert(addr0->num_components == 2); 2817bf215546Sopenharmony_ci assert(addr1->num_components == 2); 2818bf215546Sopenharmony_ci /* Assume the same buffer index. */ 2819bf215546Sopenharmony_ci return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1)); 2820bf215546Sopenharmony_ci 2821bf215546Sopenharmony_ci case nir_address_format_vec2_index_32bit_offset: 2822bf215546Sopenharmony_ci assert(addr0->num_components == 3); 2823bf215546Sopenharmony_ci assert(addr1->num_components == 3); 2824bf215546Sopenharmony_ci /* Assume the same buffer index. */ 2825bf215546Sopenharmony_ci return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2)); 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci case nir_address_format_logical: 2828bf215546Sopenharmony_ci unreachable("Unsupported address format"); 2829bf215546Sopenharmony_ci } 2830bf215546Sopenharmony_ci 2831bf215546Sopenharmony_ci unreachable("Invalid address format"); 2832bf215546Sopenharmony_ci} 2833bf215546Sopenharmony_ci 2834bf215546Sopenharmony_cistatic bool 2835bf215546Sopenharmony_ciis_input(nir_intrinsic_instr *intrin) 2836bf215546Sopenharmony_ci{ 2837bf215546Sopenharmony_ci return intrin->intrinsic == nir_intrinsic_load_input || 2838bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 2839bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_interpolated_input || 2840bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas; 2841bf215546Sopenharmony_ci} 2842bf215546Sopenharmony_ci 2843bf215546Sopenharmony_cistatic bool 2844bf215546Sopenharmony_ciis_output(nir_intrinsic_instr *intrin) 2845bf215546Sopenharmony_ci{ 2846bf215546Sopenharmony_ci return intrin->intrinsic == nir_intrinsic_load_output || 2847bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 2848bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_load_per_primitive_output || 2849bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_output || 2850bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_per_vertex_output || 2851bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_per_primitive_output; 2852bf215546Sopenharmony_ci} 2853bf215546Sopenharmony_ci 2854bf215546Sopenharmony_cistatic bool is_dual_slot(nir_intrinsic_instr *intrin) 2855bf215546Sopenharmony_ci{ 2856bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_store_output || 2857bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_per_vertex_output || 2858bf215546Sopenharmony_ci intrin->intrinsic == nir_intrinsic_store_per_primitive_output) { 2859bf215546Sopenharmony_ci return nir_src_bit_size(intrin->src[0]) == 64 && 2860bf215546Sopenharmony_ci nir_src_num_components(intrin->src[0]) >= 3; 2861bf215546Sopenharmony_ci } 2862bf215546Sopenharmony_ci 2863bf215546Sopenharmony_ci return nir_dest_bit_size(intrin->dest) == 64 && 2864bf215546Sopenharmony_ci nir_dest_num_components(intrin->dest) >= 3; 2865bf215546Sopenharmony_ci} 2866bf215546Sopenharmony_ci 2867bf215546Sopenharmony_ci/** 2868bf215546Sopenharmony_ci * This pass adds constant offsets to instr->const_index[0] for input/output 2869bf215546Sopenharmony_ci * intrinsics, and resets the offset source to 0. Non-constant offsets remain 2870bf215546Sopenharmony_ci * unchanged - since we don't know what part of a compound variable is 2871bf215546Sopenharmony_ci * accessed, we allocate storage for the entire thing. For drivers that use 2872bf215546Sopenharmony_ci * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that 2873bf215546Sopenharmony_ci * the offset source will be 0, so that they don't have to add it in manually. 2874bf215546Sopenharmony_ci */ 2875bf215546Sopenharmony_ci 2876bf215546Sopenharmony_cistatic bool 2877bf215546Sopenharmony_ciadd_const_offset_to_base_block(nir_block *block, nir_builder *b, 2878bf215546Sopenharmony_ci nir_variable_mode modes) 2879bf215546Sopenharmony_ci{ 2880bf215546Sopenharmony_ci bool progress = false; 2881bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 2882bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2883bf215546Sopenharmony_ci continue; 2884bf215546Sopenharmony_ci 2885bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2886bf215546Sopenharmony_ci 2887bf215546Sopenharmony_ci if (((modes & nir_var_shader_in) && is_input(intrin)) || 2888bf215546Sopenharmony_ci ((modes & nir_var_shader_out) && is_output(intrin))) { 2889bf215546Sopenharmony_ci nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); 2890bf215546Sopenharmony_ci 2891bf215546Sopenharmony_ci /* NV_mesh_shader: ignore MS primitive indices. */ 2892bf215546Sopenharmony_ci if (b->shader->info.stage == MESA_SHADER_MESH && 2893bf215546Sopenharmony_ci sem.location == VARYING_SLOT_PRIMITIVE_INDICES && 2894bf215546Sopenharmony_ci !(b->shader->info.per_primitive_outputs & 2895bf215546Sopenharmony_ci BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES))) 2896bf215546Sopenharmony_ci continue; 2897bf215546Sopenharmony_ci 2898bf215546Sopenharmony_ci nir_src *offset = nir_get_io_offset_src(intrin); 2899bf215546Sopenharmony_ci 2900bf215546Sopenharmony_ci /* TODO: Better handling of per-view variables here */ 2901bf215546Sopenharmony_ci if (nir_src_is_const(*offset) && 2902bf215546Sopenharmony_ci !nir_intrinsic_io_semantics(intrin).per_view) { 2903bf215546Sopenharmony_ci unsigned off = nir_src_as_uint(*offset); 2904bf215546Sopenharmony_ci 2905bf215546Sopenharmony_ci nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off); 2906bf215546Sopenharmony_ci 2907bf215546Sopenharmony_ci sem.location += off; 2908bf215546Sopenharmony_ci /* non-indirect indexing should reduce num_slots */ 2909bf215546Sopenharmony_ci sem.num_slots = is_dual_slot(intrin) ? 2 : 1; 2910bf215546Sopenharmony_ci nir_intrinsic_set_io_semantics(intrin, sem); 2911bf215546Sopenharmony_ci 2912bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 2913bf215546Sopenharmony_ci nir_instr_rewrite_src(&intrin->instr, offset, 2914bf215546Sopenharmony_ci nir_src_for_ssa(nir_imm_int(b, 0))); 2915bf215546Sopenharmony_ci progress = true; 2916bf215546Sopenharmony_ci } 2917bf215546Sopenharmony_ci } 2918bf215546Sopenharmony_ci } 2919bf215546Sopenharmony_ci 2920bf215546Sopenharmony_ci return progress; 2921bf215546Sopenharmony_ci} 2922bf215546Sopenharmony_ci 2923bf215546Sopenharmony_cibool 2924bf215546Sopenharmony_cinir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes) 2925bf215546Sopenharmony_ci{ 2926bf215546Sopenharmony_ci bool progress = false; 2927bf215546Sopenharmony_ci 2928bf215546Sopenharmony_ci nir_foreach_function(f, nir) { 2929bf215546Sopenharmony_ci if (f->impl) { 2930bf215546Sopenharmony_ci bool impl_progress = false; 2931bf215546Sopenharmony_ci nir_builder b; 2932bf215546Sopenharmony_ci nir_builder_init(&b, f->impl); 2933bf215546Sopenharmony_ci nir_foreach_block(block, f->impl) { 2934bf215546Sopenharmony_ci impl_progress |= add_const_offset_to_base_block(block, &b, modes); 2935bf215546Sopenharmony_ci } 2936bf215546Sopenharmony_ci progress |= impl_progress; 2937bf215546Sopenharmony_ci if (impl_progress) 2938bf215546Sopenharmony_ci nir_metadata_preserve(f->impl, nir_metadata_block_index | nir_metadata_dominance); 2939bf215546Sopenharmony_ci else 2940bf215546Sopenharmony_ci nir_metadata_preserve(f->impl, nir_metadata_all); 2941bf215546Sopenharmony_ci } 2942bf215546Sopenharmony_ci } 2943bf215546Sopenharmony_ci 2944bf215546Sopenharmony_ci return progress; 2945bf215546Sopenharmony_ci} 2946bf215546Sopenharmony_ci 2947bf215546Sopenharmony_cistatic bool 2948bf215546Sopenharmony_cinir_lower_color_inputs(nir_shader *nir) 2949bf215546Sopenharmony_ci{ 2950bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2951bf215546Sopenharmony_ci bool progress = false; 2952bf215546Sopenharmony_ci 2953bf215546Sopenharmony_ci nir_builder b; 2954bf215546Sopenharmony_ci nir_builder_init(&b, impl); 2955bf215546Sopenharmony_ci 2956bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 2957bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 2958bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2959bf215546Sopenharmony_ci continue; 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2962bf215546Sopenharmony_ci 2963bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_load_deref) 2964bf215546Sopenharmony_ci continue; 2965bf215546Sopenharmony_ci 2966bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2967bf215546Sopenharmony_ci if (!nir_deref_mode_is(deref, nir_var_shader_in)) 2968bf215546Sopenharmony_ci continue; 2969bf215546Sopenharmony_ci 2970bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 2971bf215546Sopenharmony_ci nir_variable *var = nir_deref_instr_get_variable(deref); 2972bf215546Sopenharmony_ci nir_ssa_def *def; 2973bf215546Sopenharmony_ci 2974bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_COL0) { 2975bf215546Sopenharmony_ci def = nir_load_color0(&b); 2976bf215546Sopenharmony_ci nir->info.fs.color0_interp = var->data.interpolation; 2977bf215546Sopenharmony_ci nir->info.fs.color0_sample = var->data.sample; 2978bf215546Sopenharmony_ci nir->info.fs.color0_centroid = var->data.centroid; 2979bf215546Sopenharmony_ci } else if (var->data.location == VARYING_SLOT_COL1) { 2980bf215546Sopenharmony_ci def = nir_load_color1(&b); 2981bf215546Sopenharmony_ci nir->info.fs.color1_interp = var->data.interpolation; 2982bf215546Sopenharmony_ci nir->info.fs.color1_sample = var->data.sample; 2983bf215546Sopenharmony_ci nir->info.fs.color1_centroid = var->data.centroid; 2984bf215546Sopenharmony_ci } else { 2985bf215546Sopenharmony_ci continue; 2986bf215546Sopenharmony_ci } 2987bf215546Sopenharmony_ci 2988bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def); 2989bf215546Sopenharmony_ci nir_instr_remove(instr); 2990bf215546Sopenharmony_ci progress = true; 2991bf215546Sopenharmony_ci } 2992bf215546Sopenharmony_ci } 2993bf215546Sopenharmony_ci 2994bf215546Sopenharmony_ci if (progress) { 2995bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_dominance | 2996bf215546Sopenharmony_ci nir_metadata_block_index); 2997bf215546Sopenharmony_ci } else { 2998bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 2999bf215546Sopenharmony_ci } 3000bf215546Sopenharmony_ci return progress; 3001bf215546Sopenharmony_ci} 3002bf215546Sopenharmony_ci 3003bf215546Sopenharmony_cibool 3004bf215546Sopenharmony_cinir_io_add_intrinsic_xfb_info(nir_shader *nir) 3005bf215546Sopenharmony_ci{ 3006bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 3007bf215546Sopenharmony_ci bool progress = false; 3008bf215546Sopenharmony_ci 3009bf215546Sopenharmony_ci for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) 3010bf215546Sopenharmony_ci nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4; 3011bf215546Sopenharmony_ci 3012bf215546Sopenharmony_ci nir_foreach_block (block, impl) { 3013bf215546Sopenharmony_ci nir_foreach_instr_safe (instr, block) { 3014bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 3015bf215546Sopenharmony_ci continue; 3016bf215546Sopenharmony_ci 3017bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 3018bf215546Sopenharmony_ci 3019bf215546Sopenharmony_ci if (!nir_intrinsic_has_io_xfb(intr)) 3020bf215546Sopenharmony_ci continue; 3021bf215546Sopenharmony_ci 3022bf215546Sopenharmony_ci /* No indirect indexing allowed. The index is implied to be 0. */ 3023bf215546Sopenharmony_ci ASSERTED nir_src offset = *nir_get_io_offset_src(intr); 3024bf215546Sopenharmony_ci assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0); 3025bf215546Sopenharmony_ci 3026bf215546Sopenharmony_ci /* Calling this pass for the second time shouldn't do anything. */ 3027bf215546Sopenharmony_ci if (nir_intrinsic_io_xfb(intr).out[0].num_components || 3028bf215546Sopenharmony_ci nir_intrinsic_io_xfb(intr).out[1].num_components || 3029bf215546Sopenharmony_ci nir_intrinsic_io_xfb2(intr).out[0].num_components || 3030bf215546Sopenharmony_ci nir_intrinsic_io_xfb2(intr).out[1].num_components) 3031bf215546Sopenharmony_ci continue; 3032bf215546Sopenharmony_ci 3033bf215546Sopenharmony_ci nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 3034bf215546Sopenharmony_ci unsigned writemask = nir_intrinsic_write_mask(intr) << 3035bf215546Sopenharmony_ci nir_intrinsic_component(intr); 3036bf215546Sopenharmony_ci 3037bf215546Sopenharmony_ci nir_io_xfb xfb[2]; 3038bf215546Sopenharmony_ci memset(xfb, 0, sizeof(xfb)); 3039bf215546Sopenharmony_ci 3040bf215546Sopenharmony_ci for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { 3041bf215546Sopenharmony_ci nir_xfb_output_info *out = &nir->xfb_info->outputs[i]; 3042bf215546Sopenharmony_ci if (out->location == sem.location) { 3043bf215546Sopenharmony_ci unsigned xfb_mask = writemask & out->component_mask; 3044bf215546Sopenharmony_ci 3045bf215546Sopenharmony_ci /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, " 3046bf215546Sopenharmony_ci "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n", 3047bf215546Sopenharmony_ci i, out->buffer, 3048bf215546Sopenharmony_ci out->offset, 3049bf215546Sopenharmony_ci out->location, 3050bf215546Sopenharmony_ci out->component_offset, 3051bf215546Sopenharmony_ci out->component_mask, 3052bf215546Sopenharmony_ci xfb_mask, sem.num_slots);*/ 3053bf215546Sopenharmony_ci 3054bf215546Sopenharmony_ci while (xfb_mask) { 3055bf215546Sopenharmony_ci int start, count; 3056bf215546Sopenharmony_ci u_bit_scan_consecutive_range(&xfb_mask, &start, &count); 3057bf215546Sopenharmony_ci 3058bf215546Sopenharmony_ci xfb[start / 2].out[start % 2].num_components = count; 3059bf215546Sopenharmony_ci xfb[start / 2].out[start % 2].buffer = out->buffer; 3060bf215546Sopenharmony_ci /* out->offset is relative to the first stored xfb component */ 3061bf215546Sopenharmony_ci /* start is relative to component 0 */ 3062bf215546Sopenharmony_ci xfb[start / 2].out[start % 2].offset = 3063bf215546Sopenharmony_ci out->offset / 4 - out->component_offset + start; 3064bf215546Sopenharmony_ci 3065bf215546Sopenharmony_ci progress = true; 3066bf215546Sopenharmony_ci } 3067bf215546Sopenharmony_ci } 3068bf215546Sopenharmony_ci } 3069bf215546Sopenharmony_ci 3070bf215546Sopenharmony_ci nir_intrinsic_set_io_xfb(intr, xfb[0]); 3071bf215546Sopenharmony_ci nir_intrinsic_set_io_xfb2(intr, xfb[1]); 3072bf215546Sopenharmony_ci } 3073bf215546Sopenharmony_ci } 3074bf215546Sopenharmony_ci 3075bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 3076bf215546Sopenharmony_ci return progress; 3077bf215546Sopenharmony_ci} 3078bf215546Sopenharmony_ci 3079bf215546Sopenharmony_cistatic int 3080bf215546Sopenharmony_citype_size_vec4(const struct glsl_type *type, bool bindless) 3081bf215546Sopenharmony_ci{ 3082bf215546Sopenharmony_ci return glsl_count_attribute_slots(type, false); 3083bf215546Sopenharmony_ci} 3084bf215546Sopenharmony_ci 3085bf215546Sopenharmony_civoid 3086bf215546Sopenharmony_cinir_lower_io_passes(nir_shader *nir) 3087bf215546Sopenharmony_ci{ 3088bf215546Sopenharmony_ci if (!nir->options->lower_io_variables) 3089bf215546Sopenharmony_ci return; 3090bf215546Sopenharmony_ci 3091bf215546Sopenharmony_ci bool has_indirect_inputs = 3092bf215546Sopenharmony_ci (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1; 3093bf215546Sopenharmony_ci 3094bf215546Sopenharmony_ci /* Transform feedback requires that indirect outputs are lowered. */ 3095bf215546Sopenharmony_ci bool has_indirect_outputs = 3096bf215546Sopenharmony_ci (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 && 3097bf215546Sopenharmony_ci nir->xfb_info == NULL; 3098bf215546Sopenharmony_ci 3099bf215546Sopenharmony_ci if (!has_indirect_inputs || !has_indirect_outputs) { 3100bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_to_temporaries, 3101bf215546Sopenharmony_ci nir_shader_get_entrypoint(nir), !has_indirect_outputs, 3102bf215546Sopenharmony_ci !has_indirect_inputs); 3103bf215546Sopenharmony_ci 3104bf215546Sopenharmony_ci /* We need to lower all the copy_deref's introduced by lower_io_to- 3105bf215546Sopenharmony_ci * _temporaries before calling nir_lower_io. 3106bf215546Sopenharmony_ci */ 3107bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_split_var_copies); 3108bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_var_copies); 3109bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_global_vars_to_local); 3110bf215546Sopenharmony_ci } 3111bf215546Sopenharmony_ci 3112bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_FRAGMENT && 3113bf215546Sopenharmony_ci nir->options->lower_fs_color_inputs) 3114bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_color_inputs); 3115bf215546Sopenharmony_ci 3116bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in, 3117bf215546Sopenharmony_ci type_size_vec4, nir_lower_io_lower_64bit_to_32); 3118bf215546Sopenharmony_ci 3119bf215546Sopenharmony_ci /* nir_io_add_const_offset_to_base needs actual constants. */ 3120bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_constant_folding); 3121bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | 3122bf215546Sopenharmony_ci nir_var_shader_out); 3123bf215546Sopenharmony_ci 3124bf215546Sopenharmony_ci /* Lower and remove dead derefs and variables to clean up the IR. */ 3125bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_vars_to_ssa); 3126bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_dce); 3127bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp | 3128bf215546Sopenharmony_ci nir_var_shader_in | nir_var_shader_out, NULL); 3129bf215546Sopenharmony_ci 3130bf215546Sopenharmony_ci if (nir->xfb_info) 3131bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info); 3132bf215546Sopenharmony_ci 3133bf215546Sopenharmony_ci nir->info.io_lowered = true; 3134bf215546Sopenharmony_ci} 3135