1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir_builder.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_cistatic void
28bf215546Sopenharmony_cibuild_constant_load(nir_builder *b, nir_deref_instr *deref, nir_constant *c)
29bf215546Sopenharmony_ci{
30bf215546Sopenharmony_ci   if (glsl_type_is_vector_or_scalar(deref->type)) {
31bf215546Sopenharmony_ci      nir_load_const_instr *load =
32bf215546Sopenharmony_ci         nir_load_const_instr_create(b->shader,
33bf215546Sopenharmony_ci                                     glsl_get_vector_elements(deref->type),
34bf215546Sopenharmony_ci                                     glsl_get_bit_size(deref->type));
35bf215546Sopenharmony_ci      memcpy(load->value, c->values, sizeof(*load->value) * load->def.num_components);
36bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &load->instr);
37bf215546Sopenharmony_ci      nir_store_deref(b, deref, &load->def, ~0);
38bf215546Sopenharmony_ci   } else if (glsl_type_is_struct_or_ifc(deref->type)) {
39bf215546Sopenharmony_ci      unsigned len = glsl_get_length(deref->type);
40bf215546Sopenharmony_ci      for (unsigned i = 0; i < len; i++) {
41bf215546Sopenharmony_ci         build_constant_load(b, nir_build_deref_struct(b, deref, i),
42bf215546Sopenharmony_ci                             c->elements[i]);
43bf215546Sopenharmony_ci      }
44bf215546Sopenharmony_ci   } else {
45bf215546Sopenharmony_ci      assert(glsl_type_is_array(deref->type) ||
46bf215546Sopenharmony_ci             glsl_type_is_matrix(deref->type));
47bf215546Sopenharmony_ci      unsigned len = glsl_get_length(deref->type);
48bf215546Sopenharmony_ci      for (unsigned i = 0; i < len; i++) {
49bf215546Sopenharmony_ci         build_constant_load(b,
50bf215546Sopenharmony_ci                             nir_build_deref_array_imm(b, deref, i),
51bf215546Sopenharmony_ci                             c->elements[i]);
52bf215546Sopenharmony_ci      }
53bf215546Sopenharmony_ci   }
54bf215546Sopenharmony_ci}
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_cistatic bool
57bf215546Sopenharmony_cilower_const_initializer(struct nir_builder *b, struct exec_list *var_list,
58bf215546Sopenharmony_ci                        nir_variable_mode modes)
59bf215546Sopenharmony_ci{
60bf215546Sopenharmony_ci   bool progress = false;
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   b->cursor = nir_before_cf_list(&b->impl->body);
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   nir_foreach_variable_in_list(var, var_list) {
65bf215546Sopenharmony_ci      if (!(var->data.mode & modes))
66bf215546Sopenharmony_ci         continue;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci      if (var->constant_initializer) {
69bf215546Sopenharmony_ci         build_constant_load(b, nir_build_deref_var(b, var),
70bf215546Sopenharmony_ci                             var->constant_initializer);
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci         progress = true;
73bf215546Sopenharmony_ci         var->constant_initializer = NULL;
74bf215546Sopenharmony_ci      } else if (var->pointer_initializer) {
75bf215546Sopenharmony_ci         nir_deref_instr *src_deref = nir_build_deref_var(b, var->pointer_initializer);
76bf215546Sopenharmony_ci         nir_deref_instr *dst_deref = nir_build_deref_var(b, var);
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci         /* Note that this stores a pointer to src into dst */
79bf215546Sopenharmony_ci         nir_store_deref(b, dst_deref, &src_deref->dest.ssa, ~0);
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci         progress = true;
82bf215546Sopenharmony_ci         var->pointer_initializer = NULL;
83bf215546Sopenharmony_ci      }
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   }
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   return progress;
88bf215546Sopenharmony_ci}
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_cibool
91bf215546Sopenharmony_cinir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes)
92bf215546Sopenharmony_ci{
93bf215546Sopenharmony_ci   bool progress = false;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   /* Only some variables have initializers that we want to lower.  Others
96bf215546Sopenharmony_ci    * such as uniforms have initializers which are useful later during linking
97bf215546Sopenharmony_ci    * so we want to skip over those.  Restrict to only variable types where
98bf215546Sopenharmony_ci    * initializers make sense so that callers can use nir_var_all.
99bf215546Sopenharmony_ci    */
100bf215546Sopenharmony_ci   modes &= nir_var_shader_out |
101bf215546Sopenharmony_ci            nir_var_shader_temp |
102bf215546Sopenharmony_ci            nir_var_function_temp |
103bf215546Sopenharmony_ci            nir_var_system_value;
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
106bf215546Sopenharmony_ci      if (!function->impl)
107bf215546Sopenharmony_ci	 continue;
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci      bool impl_progress = false;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci      nir_builder builder;
112bf215546Sopenharmony_ci      nir_builder_init(&builder, function->impl);
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci      if ((modes & ~nir_var_function_temp) && function->is_entrypoint) {
115bf215546Sopenharmony_ci         impl_progress |= lower_const_initializer(&builder,
116bf215546Sopenharmony_ci                                                  &shader->variables,
117bf215546Sopenharmony_ci                                                  modes);
118bf215546Sopenharmony_ci      }
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci      if (modes & nir_var_function_temp) {
121bf215546Sopenharmony_ci         impl_progress |= lower_const_initializer(&builder,
122bf215546Sopenharmony_ci                                                  &function->impl->locals,
123bf215546Sopenharmony_ci                                                  nir_var_function_temp);
124bf215546Sopenharmony_ci      }
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      if (impl_progress) {
127bf215546Sopenharmony_ci         progress = true;
128bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
129bf215546Sopenharmony_ci                                               nir_metadata_dominance |
130bf215546Sopenharmony_ci                                               nir_metadata_live_ssa_defs);
131bf215546Sopenharmony_ci      } else {
132bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_all);
133bf215546Sopenharmony_ci      }
134bf215546Sopenharmony_ci   }
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   return progress;
137bf215546Sopenharmony_ci}
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci/* Zero initialize shared_size bytes of shared memory by splitting work writes
140bf215546Sopenharmony_ci * of chunk_size bytes among the invocations.
141bf215546Sopenharmony_ci *
142bf215546Sopenharmony_ci * Used for implementing VK_KHR_zero_initialize_workgroup_memory.
143bf215546Sopenharmony_ci */
144bf215546Sopenharmony_cibool
145bf215546Sopenharmony_cinir_zero_initialize_shared_memory(nir_shader *shader,
146bf215546Sopenharmony_ci                                  const unsigned shared_size,
147bf215546Sopenharmony_ci                                  const unsigned chunk_size)
148bf215546Sopenharmony_ci{
149bf215546Sopenharmony_ci   assert(shared_size > 0);
150bf215546Sopenharmony_ci   assert(chunk_size > 0);
151bf215546Sopenharmony_ci   assert(chunk_size % 4 == 0);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci   nir_builder b;
154bf215546Sopenharmony_ci   nir_builder_init(&b, nir_shader_get_entrypoint(shader));
155bf215546Sopenharmony_ci   b.cursor = nir_before_cf_list(&b.impl->body);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   assert(!shader->info.workgroup_size_variable);
158bf215546Sopenharmony_ci   const unsigned local_count = shader->info.workgroup_size[0] *
159bf215546Sopenharmony_ci                                shader->info.workgroup_size[1] *
160bf215546Sopenharmony_ci                                shader->info.workgroup_size[2];
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   /* The initialization logic is simplified if we can always split the memory
163bf215546Sopenharmony_ci    * in full chunk_size units.
164bf215546Sopenharmony_ci    */
165bf215546Sopenharmony_ci   assert(shared_size % chunk_size == 0);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   const unsigned chunk_comps = chunk_size / 4;
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   nir_variable *it = nir_local_variable_create(b.impl, glsl_uint_type(),
170bf215546Sopenharmony_ci                                                "zero_init_iterator");
171bf215546Sopenharmony_ci   nir_ssa_def *local_index = nir_load_local_invocation_index(&b);
172bf215546Sopenharmony_ci   nir_ssa_def *first_offset = nir_imul_imm(&b, local_index, chunk_size);
173bf215546Sopenharmony_ci   nir_store_var(&b, it, first_offset, 0x1);
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   nir_loop *loop = nir_push_loop(&b);
176bf215546Sopenharmony_ci   {
177bf215546Sopenharmony_ci      nir_ssa_def *offset = nir_load_var(&b, it);
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci      nir_push_if(&b, nir_uge(&b, offset, nir_imm_int(&b, shared_size)));
180bf215546Sopenharmony_ci      {
181bf215546Sopenharmony_ci         nir_jump(&b, nir_jump_break);
182bf215546Sopenharmony_ci      }
183bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci      nir_store_shared(&b, nir_imm_zero(&b, chunk_comps, 32), offset,
186bf215546Sopenharmony_ci                       .align_mul=chunk_size,
187bf215546Sopenharmony_ci                       .write_mask=((1 << chunk_comps) - 1));
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci      nir_ssa_def *new_offset = nir_iadd_imm(&b, offset, chunk_size * local_count);
190bf215546Sopenharmony_ci      nir_store_var(&b, it, new_offset, 0x1);
191bf215546Sopenharmony_ci   }
192bf215546Sopenharmony_ci   nir_pop_loop(&b, loop);
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP, NIR_SCOPE_WORKGROUP,
195bf215546Sopenharmony_ci                      NIR_MEMORY_ACQ_REL, nir_var_mem_shared);
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_none);
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   return true;
200bf215546Sopenharmony_ci}
201