1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_cistatic void 28bf215546Sopenharmony_cibuild_constant_load(nir_builder *b, nir_deref_instr *deref, nir_constant *c) 29bf215546Sopenharmony_ci{ 30bf215546Sopenharmony_ci if (glsl_type_is_vector_or_scalar(deref->type)) { 31bf215546Sopenharmony_ci nir_load_const_instr *load = 32bf215546Sopenharmony_ci nir_load_const_instr_create(b->shader, 33bf215546Sopenharmony_ci glsl_get_vector_elements(deref->type), 34bf215546Sopenharmony_ci glsl_get_bit_size(deref->type)); 35bf215546Sopenharmony_ci memcpy(load->value, c->values, sizeof(*load->value) * load->def.num_components); 36bf215546Sopenharmony_ci nir_builder_instr_insert(b, &load->instr); 37bf215546Sopenharmony_ci nir_store_deref(b, deref, &load->def, ~0); 38bf215546Sopenharmony_ci } else if (glsl_type_is_struct_or_ifc(deref->type)) { 39bf215546Sopenharmony_ci unsigned len = glsl_get_length(deref->type); 40bf215546Sopenharmony_ci for (unsigned i = 0; i < len; i++) { 41bf215546Sopenharmony_ci build_constant_load(b, nir_build_deref_struct(b, deref, i), 42bf215546Sopenharmony_ci c->elements[i]); 43bf215546Sopenharmony_ci } 44bf215546Sopenharmony_ci } else { 45bf215546Sopenharmony_ci assert(glsl_type_is_array(deref->type) || 46bf215546Sopenharmony_ci glsl_type_is_matrix(deref->type)); 47bf215546Sopenharmony_ci unsigned len = glsl_get_length(deref->type); 48bf215546Sopenharmony_ci for (unsigned i = 0; i < len; i++) { 49bf215546Sopenharmony_ci build_constant_load(b, 50bf215546Sopenharmony_ci nir_build_deref_array_imm(b, deref, i), 51bf215546Sopenharmony_ci c->elements[i]); 52bf215546Sopenharmony_ci } 53bf215546Sopenharmony_ci } 54bf215546Sopenharmony_ci} 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic bool 57bf215546Sopenharmony_cilower_const_initializer(struct nir_builder *b, struct exec_list *var_list, 58bf215546Sopenharmony_ci nir_variable_mode modes) 59bf215546Sopenharmony_ci{ 60bf215546Sopenharmony_ci bool progress = false; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci b->cursor = nir_before_cf_list(&b->impl->body); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci nir_foreach_variable_in_list(var, var_list) { 65bf215546Sopenharmony_ci if (!(var->data.mode & modes)) 66bf215546Sopenharmony_ci continue; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci if (var->constant_initializer) { 69bf215546Sopenharmony_ci build_constant_load(b, nir_build_deref_var(b, var), 70bf215546Sopenharmony_ci var->constant_initializer); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci progress = true; 73bf215546Sopenharmony_ci var->constant_initializer = NULL; 74bf215546Sopenharmony_ci } else if (var->pointer_initializer) { 75bf215546Sopenharmony_ci nir_deref_instr *src_deref = nir_build_deref_var(b, var->pointer_initializer); 76bf215546Sopenharmony_ci nir_deref_instr *dst_deref = nir_build_deref_var(b, var); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci /* Note that this stores a pointer to src into dst */ 79bf215546Sopenharmony_ci nir_store_deref(b, dst_deref, &src_deref->dest.ssa, ~0); 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci progress = true; 82bf215546Sopenharmony_ci var->pointer_initializer = NULL; 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci } 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci return progress; 88bf215546Sopenharmony_ci} 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cibool 91bf215546Sopenharmony_cinir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci bool progress = false; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci /* Only some variables have initializers that we want to lower. Others 96bf215546Sopenharmony_ci * such as uniforms have initializers which are useful later during linking 97bf215546Sopenharmony_ci * so we want to skip over those. Restrict to only variable types where 98bf215546Sopenharmony_ci * initializers make sense so that callers can use nir_var_all. 99bf215546Sopenharmony_ci */ 100bf215546Sopenharmony_ci modes &= nir_var_shader_out | 101bf215546Sopenharmony_ci nir_var_shader_temp | 102bf215546Sopenharmony_ci nir_var_function_temp | 103bf215546Sopenharmony_ci nir_var_system_value; 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 106bf215546Sopenharmony_ci if (!function->impl) 107bf215546Sopenharmony_ci continue; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci bool impl_progress = false; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci nir_builder builder; 112bf215546Sopenharmony_ci nir_builder_init(&builder, function->impl); 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci if ((modes & ~nir_var_function_temp) && function->is_entrypoint) { 115bf215546Sopenharmony_ci impl_progress |= lower_const_initializer(&builder, 116bf215546Sopenharmony_ci &shader->variables, 117bf215546Sopenharmony_ci modes); 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci if (modes & nir_var_function_temp) { 121bf215546Sopenharmony_ci impl_progress |= lower_const_initializer(&builder, 122bf215546Sopenharmony_ci &function->impl->locals, 123bf215546Sopenharmony_ci nir_var_function_temp); 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci if (impl_progress) { 127bf215546Sopenharmony_ci progress = true; 128bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_block_index | 129bf215546Sopenharmony_ci nir_metadata_dominance | 130bf215546Sopenharmony_ci nir_metadata_live_ssa_defs); 131bf215546Sopenharmony_ci } else { 132bf215546Sopenharmony_ci nir_metadata_preserve(function->impl, nir_metadata_all); 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci return progress; 137bf215546Sopenharmony_ci} 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci/* Zero initialize shared_size bytes of shared memory by splitting work writes 140bf215546Sopenharmony_ci * of chunk_size bytes among the invocations. 141bf215546Sopenharmony_ci * 142bf215546Sopenharmony_ci * Used for implementing VK_KHR_zero_initialize_workgroup_memory. 143bf215546Sopenharmony_ci */ 144bf215546Sopenharmony_cibool 145bf215546Sopenharmony_cinir_zero_initialize_shared_memory(nir_shader *shader, 146bf215546Sopenharmony_ci const unsigned shared_size, 147bf215546Sopenharmony_ci const unsigned chunk_size) 148bf215546Sopenharmony_ci{ 149bf215546Sopenharmony_ci assert(shared_size > 0); 150bf215546Sopenharmony_ci assert(chunk_size > 0); 151bf215546Sopenharmony_ci assert(chunk_size % 4 == 0); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci nir_builder b; 154bf215546Sopenharmony_ci nir_builder_init(&b, nir_shader_get_entrypoint(shader)); 155bf215546Sopenharmony_ci b.cursor = nir_before_cf_list(&b.impl->body); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci assert(!shader->info.workgroup_size_variable); 158bf215546Sopenharmony_ci const unsigned local_count = shader->info.workgroup_size[0] * 159bf215546Sopenharmony_ci shader->info.workgroup_size[1] * 160bf215546Sopenharmony_ci shader->info.workgroup_size[2]; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci /* The initialization logic is simplified if we can always split the memory 163bf215546Sopenharmony_ci * in full chunk_size units. 164bf215546Sopenharmony_ci */ 165bf215546Sopenharmony_ci assert(shared_size % chunk_size == 0); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci const unsigned chunk_comps = chunk_size / 4; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(), 170bf215546Sopenharmony_ci "zero_init_iterator"); 171bf215546Sopenharmony_ci nir_ssa_def *local_index = nir_load_local_invocation_index(&b); 172bf215546Sopenharmony_ci nir_ssa_def *first_offset = nir_imul_imm(&b, local_index, chunk_size); 173bf215546Sopenharmony_ci nir_store_var(&b, it, first_offset, 0x1); 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci nir_loop *loop = nir_push_loop(&b); 176bf215546Sopenharmony_ci { 177bf215546Sopenharmony_ci nir_ssa_def *offset = nir_load_var(&b, it); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci nir_push_if(&b, nir_uge(&b, offset, nir_imm_int(&b, shared_size))); 180bf215546Sopenharmony_ci { 181bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset, 186bf215546Sopenharmony_ci .align_mul=chunk_size, 187bf215546Sopenharmony_ci .write_mask=((1 << chunk_comps) - 1)); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci nir_ssa_def *new_offset = nir_iadd_imm(&b, offset, chunk_size * local_count); 190bf215546Sopenharmony_ci nir_store_var(&b, it, new_offset, 0x1); 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci nir_pop_loop(&b, loop); 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP, NIR_SCOPE_WORKGROUP, 195bf215546Sopenharmony_ci NIR_MEMORY_ACQ_REL, nir_var_mem_shared); 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci return true; 200bf215546Sopenharmony_ci} 201