1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Jason Ekstrand (jason@jlekstrand.net)
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/*
29bf215546Sopenharmony_ci * This lowering pass converts references to variables with loads/stores to
30bf215546Sopenharmony_ci * scratch space based on a few configurable parameters.
31bf215546Sopenharmony_ci */
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#include "nir.h"
34bf215546Sopenharmony_ci#include "nir_builder.h"
35bf215546Sopenharmony_ci#include "nir_deref.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistatic void
38bf215546Sopenharmony_cilower_load_store(nir_builder *b,
39bf215546Sopenharmony_ci                 nir_intrinsic_instr *intrin,
40bf215546Sopenharmony_ci                 glsl_type_size_align_func size_align)
41bf215546Sopenharmony_ci{
42bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intrin->instr);
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
45bf215546Sopenharmony_ci   nir_variable *var = nir_deref_instr_get_variable(deref);
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci   nir_ssa_def *offset =
48bf215546Sopenharmony_ci      nir_iadd_imm(b, nir_build_deref_offset(b, deref, size_align),
49bf215546Sopenharmony_ci                      var->data.location);
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   unsigned align, UNUSED size;
52bf215546Sopenharmony_ci   size_align(deref->type, &size, &align);
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   if (intrin->intrinsic == nir_intrinsic_load_deref) {
55bf215546Sopenharmony_ci      unsigned bit_size = intrin->dest.ssa.bit_size;
56bf215546Sopenharmony_ci      nir_ssa_def *value = nir_load_scratch(
57bf215546Sopenharmony_ci         b, intrin->num_components, bit_size == 1 ? 32 : bit_size, offset, .align_mul=align);
58bf215546Sopenharmony_ci      if (bit_size == 1)
59bf215546Sopenharmony_ci         value = nir_b2b1(b, value);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
62bf215546Sopenharmony_ci   } else {
63bf215546Sopenharmony_ci      assert(intrin->intrinsic == nir_intrinsic_store_deref);
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci      assert(intrin->src[1].is_ssa);
66bf215546Sopenharmony_ci      nir_ssa_def *value = intrin->src[1].ssa;
67bf215546Sopenharmony_ci      if (value->bit_size == 1)
68bf215546Sopenharmony_ci         value = nir_b2b32(b, value);
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci      nir_store_scratch(b, value, offset, .align_mul=align,
71bf215546Sopenharmony_ci                           .write_mask=nir_intrinsic_write_mask(intrin));
72bf215546Sopenharmony_ci   }
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   nir_instr_remove(&intrin->instr);
75bf215546Sopenharmony_ci   nir_deref_instr_remove_if_unused(deref);
76bf215546Sopenharmony_ci}
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_cistatic bool only_used_for_load_store(nir_deref_instr *deref)
79bf215546Sopenharmony_ci{
80bf215546Sopenharmony_ci   nir_foreach_use(src, &deref->dest.ssa) {
81bf215546Sopenharmony_ci      if (!src->parent_instr)
82bf215546Sopenharmony_ci         return false;
83bf215546Sopenharmony_ci      if (src->parent_instr->type == nir_instr_type_deref) {
84bf215546Sopenharmony_ci          if (!only_used_for_load_store(nir_instr_as_deref(src->parent_instr)))
85bf215546Sopenharmony_ci            return false;
86bf215546Sopenharmony_ci      } else if (src->parent_instr->type != nir_instr_type_intrinsic) {
87bf215546Sopenharmony_ci         return false;
88bf215546Sopenharmony_ci      } else {
89bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
90bf215546Sopenharmony_ci         if (intrin->intrinsic != nir_intrinsic_load_deref &&
91bf215546Sopenharmony_ci             intrin->intrinsic != nir_intrinsic_store_deref)
92bf215546Sopenharmony_ci            return false;
93bf215546Sopenharmony_ci      }
94bf215546Sopenharmony_ci   }
95bf215546Sopenharmony_ci   return true;
96bf215546Sopenharmony_ci}
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_cibool
99bf215546Sopenharmony_cinir_lower_vars_to_scratch(nir_shader *shader,
100bf215546Sopenharmony_ci                          nir_variable_mode modes,
101bf215546Sopenharmony_ci                          int size_threshold,
102bf215546Sopenharmony_ci                          glsl_type_size_align_func size_align)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   struct set *set = _mesa_pointer_set_create(NULL);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   /* First, we walk the instructions and flag any variables we want to lower
107bf215546Sopenharmony_ci    * by removing them from their respective list and setting the mode to 0.
108bf215546Sopenharmony_ci    */
109bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
110bf215546Sopenharmony_ci      nir_foreach_block(block, function->impl) {
111bf215546Sopenharmony_ci         nir_foreach_instr(instr, block) {
112bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
113bf215546Sopenharmony_ci               continue;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
116bf215546Sopenharmony_ci            if (intrin->intrinsic != nir_intrinsic_load_deref &&
117bf215546Sopenharmony_ci                intrin->intrinsic != nir_intrinsic_store_deref)
118bf215546Sopenharmony_ci               continue;
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
121bf215546Sopenharmony_ci            if (!nir_deref_mode_is_one_of(deref, modes))
122bf215546Sopenharmony_ci               continue;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci            if (!nir_deref_instr_has_indirect(nir_src_as_deref(intrin->src[0])))
125bf215546Sopenharmony_ci               continue;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci            nir_variable *var = nir_deref_instr_get_variable(deref);
128bf215546Sopenharmony_ci            if (!var)
129bf215546Sopenharmony_ci               continue;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci            /* We set var->mode to 0 to indicate that a variable will be moved
132bf215546Sopenharmony_ci             * to scratch.  Don't assign a scratch location twice.
133bf215546Sopenharmony_ci             */
134bf215546Sopenharmony_ci            if (var->data.mode == 0)
135bf215546Sopenharmony_ci               continue;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci            unsigned var_size, var_align;
138bf215546Sopenharmony_ci            size_align(var->type, &var_size, &var_align);
139bf215546Sopenharmony_ci            if (var_size <= size_threshold)
140bf215546Sopenharmony_ci               continue;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci            _mesa_set_add(set, var);
143bf215546Sopenharmony_ci         }
144bf215546Sopenharmony_ci      }
145bf215546Sopenharmony_ci   }
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci   if (set->entries == 0) {
148bf215546Sopenharmony_ci      _mesa_set_destroy(set, NULL);
149bf215546Sopenharmony_ci      return false;
150bf215546Sopenharmony_ci   }
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
153bf215546Sopenharmony_ci      nir_foreach_block(block, function->impl) {
154bf215546Sopenharmony_ci         nir_foreach_instr(instr, block) {
155bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_deref)
156bf215546Sopenharmony_ci               continue;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci            nir_deref_instr *deref = nir_instr_as_deref(instr);
159bf215546Sopenharmony_ci            if (deref->deref_type != nir_deref_type_var)
160bf215546Sopenharmony_ci               continue;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci            struct set_entry *entry = _mesa_set_search(set, deref->var);
163bf215546Sopenharmony_ci            if (!entry)
164bf215546Sopenharmony_ci               continue;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci            if (!only_used_for_load_store(deref))
167bf215546Sopenharmony_ci               _mesa_set_remove(set, entry);
168bf215546Sopenharmony_ci         }
169bf215546Sopenharmony_ci      }
170bf215546Sopenharmony_ci   }
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   set_foreach(set, entry) {
173bf215546Sopenharmony_ci      nir_variable* var = (void*)entry->key;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci      /* Remove it from its list */
176bf215546Sopenharmony_ci      exec_node_remove(&var->node);
177bf215546Sopenharmony_ci      /* Invalid mode used to flag "moving to scratch" */
178bf215546Sopenharmony_ci      var->data.mode = 0;
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci      /* We don't allocate space here as iteration in this loop is
181bf215546Sopenharmony_ci       * non-deterministic due to the nir_variable pointers. */
182bf215546Sopenharmony_ci      var->data.location = INT_MAX;
183bf215546Sopenharmony_ci   }
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   bool progress = false;
186bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
187bf215546Sopenharmony_ci      if (!function->impl)
188bf215546Sopenharmony_ci         continue;
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci      nir_builder build;
191bf215546Sopenharmony_ci      nir_builder_init(&build, function->impl);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci      bool impl_progress = false;
194bf215546Sopenharmony_ci      nir_foreach_block(block, function->impl) {
195bf215546Sopenharmony_ci         nir_foreach_instr_safe(instr, block) {
196bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
197bf215546Sopenharmony_ci               continue;
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
200bf215546Sopenharmony_ci            if (intrin->intrinsic != nir_intrinsic_load_deref &&
201bf215546Sopenharmony_ci                intrin->intrinsic != nir_intrinsic_store_deref)
202bf215546Sopenharmony_ci               continue;
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci            nir_variable *var = nir_intrinsic_get_var(intrin, 0);
205bf215546Sopenharmony_ci            /* Variables flagged for lowering above have mode == 0 */
206bf215546Sopenharmony_ci            if (!var || var->data.mode)
207bf215546Sopenharmony_ci               continue;
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci            if (var->data.location == INT_MAX) {
210bf215546Sopenharmony_ci               unsigned var_size, var_align;
211bf215546Sopenharmony_ci               size_align(var->type, &var_size, &var_align);
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci               var->data.location = ALIGN_POT(shader->scratch_size, var_align);
214bf215546Sopenharmony_ci               shader->scratch_size = var->data.location + var_size;
215bf215546Sopenharmony_ci            }
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci            lower_load_store(&build, intrin, size_align);
218bf215546Sopenharmony_ci            impl_progress = true;
219bf215546Sopenharmony_ci         }
220bf215546Sopenharmony_ci      }
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci      if (impl_progress) {
223bf215546Sopenharmony_ci         progress = true;
224bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_block_index |
225bf215546Sopenharmony_ci                                               nir_metadata_dominance);
226bf215546Sopenharmony_ci      } else {
227bf215546Sopenharmony_ci         nir_metadata_preserve(function->impl, nir_metadata_all);
228bf215546Sopenharmony_ci      }
229bf215546Sopenharmony_ci   }
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   _mesa_set_destroy(set, NULL);
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   return progress;
234bf215546Sopenharmony_ci}
235