1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir_builder.h"
26bf215546Sopenharmony_ci#include "nir_deref.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_cistatic void
29bf215546Sopenharmony_ciemit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
30bf215546Sopenharmony_ci                      nir_deref_instr *parent,
31bf215546Sopenharmony_ci                      nir_deref_instr **deref_arr,
32bf215546Sopenharmony_ci                      nir_ssa_def **dest, nir_ssa_def *src);
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cistatic void
35bf215546Sopenharmony_ciemit_indirect_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
36bf215546Sopenharmony_ci                               nir_deref_instr *parent,
37bf215546Sopenharmony_ci                               nir_deref_instr **deref_arr,
38bf215546Sopenharmony_ci                               int start, int end,
39bf215546Sopenharmony_ci                               nir_ssa_def **dest, nir_ssa_def *src)
40bf215546Sopenharmony_ci{
41bf215546Sopenharmony_ci   assert(start < end);
42bf215546Sopenharmony_ci   if (start == end - 1) {
43bf215546Sopenharmony_ci      emit_load_store_deref(b, orig_instr,
44bf215546Sopenharmony_ci                            nir_build_deref_array_imm(b, parent, start),
45bf215546Sopenharmony_ci                            deref_arr + 1, dest, src);
46bf215546Sopenharmony_ci   } else {
47bf215546Sopenharmony_ci      int mid = start + (end - start) / 2;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci      nir_ssa_def *then_dest, *else_dest;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci      nir_deref_instr *deref = *deref_arr;
52bf215546Sopenharmony_ci      assert(deref->deref_type == nir_deref_type_array);
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci      nir_push_if(b, nir_ilt(b, deref->arr.index.ssa, nir_imm_intN_t(b, mid, parent->dest.ssa.bit_size)));
55bf215546Sopenharmony_ci      emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr,
56bf215546Sopenharmony_ci                                     start, mid, &then_dest, src);
57bf215546Sopenharmony_ci      nir_push_else(b, NULL);
58bf215546Sopenharmony_ci      emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr,
59bf215546Sopenharmony_ci                                     mid, end, &else_dest, src);
60bf215546Sopenharmony_ci      nir_pop_if(b, NULL);
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci      if (src == NULL)
63bf215546Sopenharmony_ci         *dest = nir_if_phi(b, then_dest, else_dest);
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci}
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistatic void
68bf215546Sopenharmony_ciemit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
69bf215546Sopenharmony_ci                      nir_deref_instr *parent,
70bf215546Sopenharmony_ci                      nir_deref_instr **deref_arr,
71bf215546Sopenharmony_ci                      nir_ssa_def **dest, nir_ssa_def *src)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   for (; *deref_arr; deref_arr++) {
74bf215546Sopenharmony_ci      nir_deref_instr *deref = *deref_arr;
75bf215546Sopenharmony_ci      if (deref->deref_type == nir_deref_type_array &&
76bf215546Sopenharmony_ci          !nir_src_is_const(deref->arr.index)) {
77bf215546Sopenharmony_ci         int length = glsl_get_length(parent->type);
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci         emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr,
80bf215546Sopenharmony_ci                                        0, length, dest, src);
81bf215546Sopenharmony_ci         return;
82bf215546Sopenharmony_ci      }
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci      parent = nir_build_deref_follower(b, parent, deref);
85bf215546Sopenharmony_ci   }
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   /* We reached the end of the deref chain.  Emit the instruction */
88bf215546Sopenharmony_ci   assert(*deref_arr == NULL);
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   if (src == NULL) {
91bf215546Sopenharmony_ci      /* This is a load instruction */
92bf215546Sopenharmony_ci      nir_intrinsic_instr *load =
93bf215546Sopenharmony_ci         nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic);
94bf215546Sopenharmony_ci      load->num_components = orig_instr->num_components;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci      load->src[0] = nir_src_for_ssa(&parent->dest.ssa);
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci      /* Copy over any other sources.  This is needed for interp_deref_at */
99bf215546Sopenharmony_ci      for (unsigned i = 1;
100bf215546Sopenharmony_ci           i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++)
101bf215546Sopenharmony_ci         nir_src_copy(&load->src[i], &orig_instr->src[i]);
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci      nir_ssa_dest_init(&load->instr, &load->dest,
104bf215546Sopenharmony_ci                        orig_instr->dest.ssa.num_components,
105bf215546Sopenharmony_ci                        orig_instr->dest.ssa.bit_size, NULL);
106bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &load->instr);
107bf215546Sopenharmony_ci      *dest = &load->dest.ssa;
108bf215546Sopenharmony_ci   } else {
109bf215546Sopenharmony_ci      assert(orig_instr->intrinsic == nir_intrinsic_store_deref);
110bf215546Sopenharmony_ci      nir_store_deref(b, parent, src, nir_intrinsic_write_mask(orig_instr));
111bf215546Sopenharmony_ci   }
112bf215546Sopenharmony_ci}
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_cistatic bool
115bf215546Sopenharmony_cilower_indirect_derefs_block(nir_block *block, nir_builder *b,
116bf215546Sopenharmony_ci                            nir_variable_mode modes,
117bf215546Sopenharmony_ci                            const struct set *vars,
118bf215546Sopenharmony_ci                            uint32_t max_lower_array_len)
119bf215546Sopenharmony_ci{
120bf215546Sopenharmony_ci   bool progress = false;
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   nir_foreach_instr_safe(instr, block) {
123bf215546Sopenharmony_ci      if (instr->type != nir_instr_type_intrinsic)
124bf215546Sopenharmony_ci         continue;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
127bf215546Sopenharmony_ci      if (intrin->intrinsic != nir_intrinsic_load_deref &&
128bf215546Sopenharmony_ci          intrin->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
129bf215546Sopenharmony_ci          intrin->intrinsic != nir_intrinsic_interp_deref_at_sample &&
130bf215546Sopenharmony_ci          intrin->intrinsic != nir_intrinsic_interp_deref_at_offset &&
131bf215546Sopenharmony_ci          intrin->intrinsic != nir_intrinsic_interp_deref_at_vertex &&
132bf215546Sopenharmony_ci          intrin->intrinsic != nir_intrinsic_store_deref)
133bf215546Sopenharmony_ci         continue;
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci      /* Walk the deref chain back to the base and look for indirects */
138bf215546Sopenharmony_ci      uint32_t indirect_array_len = 1;
139bf215546Sopenharmony_ci      bool has_indirect = false;
140bf215546Sopenharmony_ci      nir_deref_instr *base = deref;
141bf215546Sopenharmony_ci      while (base && base->deref_type != nir_deref_type_var) {
142bf215546Sopenharmony_ci         nir_deref_instr *parent = nir_deref_instr_parent(base);
143bf215546Sopenharmony_ci         if (base->deref_type == nir_deref_type_array &&
144bf215546Sopenharmony_ci             !nir_src_is_const(base->arr.index)) {
145bf215546Sopenharmony_ci            indirect_array_len *= glsl_get_length(parent->type);
146bf215546Sopenharmony_ci            has_indirect = true;
147bf215546Sopenharmony_ci         }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci         base = parent;
150bf215546Sopenharmony_ci      }
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci      if (!has_indirect || !base || indirect_array_len > max_lower_array_len)
153bf215546Sopenharmony_ci         continue;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci      /* Only lower variables whose mode is in the mask, or compact
156bf215546Sopenharmony_ci       * array variables.  (We can't handle indirects on tightly packed
157bf215546Sopenharmony_ci       * scalar arrays, so we need to lower them regardless.)
158bf215546Sopenharmony_ci       */
159bf215546Sopenharmony_ci      if (!(modes & base->var->data.mode) && !base->var->data.compact)
160bf215546Sopenharmony_ci         continue;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci      if (vars && !_mesa_set_search(vars, base->var))
163bf215546Sopenharmony_ci         continue;
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci      b->cursor = nir_instr_remove(&intrin->instr);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci      nir_deref_path path;
168bf215546Sopenharmony_ci      nir_deref_path_init(&path, deref, NULL);
169bf215546Sopenharmony_ci      assert(path.path[0] == base);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci      if (intrin->intrinsic == nir_intrinsic_store_deref) {
172bf215546Sopenharmony_ci         assert(intrin->src[1].is_ssa);
173bf215546Sopenharmony_ci         emit_load_store_deref(b, intrin, base, &path.path[1],
174bf215546Sopenharmony_ci                               NULL, intrin->src[1].ssa);
175bf215546Sopenharmony_ci      } else {
176bf215546Sopenharmony_ci         nir_ssa_def *result;
177bf215546Sopenharmony_ci         emit_load_store_deref(b, intrin, base, &path.path[1],
178bf215546Sopenharmony_ci                               &result, NULL);
179bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intrin->dest.ssa, result);
180bf215546Sopenharmony_ci      }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci      nir_deref_path_finish(&path);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci      progress = true;
185bf215546Sopenharmony_ci   }
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   return progress;
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cistatic bool
191bf215546Sopenharmony_cilower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes,
192bf215546Sopenharmony_ci                     const struct set *vars, uint32_t max_lower_array_len)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci   nir_builder builder;
195bf215546Sopenharmony_ci   nir_builder_init(&builder, impl);
196bf215546Sopenharmony_ci   bool progress = false;
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   nir_foreach_block_safe(block, impl) {
199bf215546Sopenharmony_ci      progress |= lower_indirect_derefs_block(block, &builder, modes, vars,
200bf215546Sopenharmony_ci                                              max_lower_array_len);
201bf215546Sopenharmony_ci   }
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   if (progress)
204bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_none);
205bf215546Sopenharmony_ci   else
206bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   return progress;
209bf215546Sopenharmony_ci}
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci/** Lowers indirect variable loads/stores to direct loads/stores.
212bf215546Sopenharmony_ci *
213bf215546Sopenharmony_ci * The pass works by replacing any indirect load or store with an if-ladder
214bf215546Sopenharmony_ci * that does a binary search on the array index.
215bf215546Sopenharmony_ci */
216bf215546Sopenharmony_cibool
217bf215546Sopenharmony_cinir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
218bf215546Sopenharmony_ci                          uint32_t max_lower_array_len)
219bf215546Sopenharmony_ci{
220bf215546Sopenharmony_ci   bool progress = false;
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
223bf215546Sopenharmony_ci      if (function->impl) {
224bf215546Sopenharmony_ci         progress = lower_indirects_impl(function->impl, modes, NULL,
225bf215546Sopenharmony_ci                                         max_lower_array_len) || progress;
226bf215546Sopenharmony_ci      }
227bf215546Sopenharmony_ci   }
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci   return progress;
230bf215546Sopenharmony_ci}
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ci/** Lowers indirects on any variables in the given set */
233bf215546Sopenharmony_cibool
234bf215546Sopenharmony_cinir_lower_indirect_var_derefs(nir_shader *shader, const struct set *vars)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   bool progress = false;
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
239bf215546Sopenharmony_ci      if (function->impl) {
240bf215546Sopenharmony_ci         progress = lower_indirects_impl(function->impl, nir_var_uniform,
241bf215546Sopenharmony_ci                                         vars, UINT_MAX) || progress;
242bf215546Sopenharmony_ci      }
243bf215546Sopenharmony_ci   }
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   return progress;
246bf215546Sopenharmony_ci}
247