1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir_builder.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/* This pass provides a way to move computations that are always the same for
28bf215546Sopenharmony_ci * an entire draw/compute dispatch into a "preamble" that runs before the main
29bf215546Sopenharmony_ci * entrypoint.
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * We also expose a separate API to get or construct the preamble of a shader
32bf215546Sopenharmony_ci * in case backends want to insert their own code.
33bf215546Sopenharmony_ci */
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cinir_function_impl *
37bf215546Sopenharmony_cinir_shader_get_preamble(nir_shader *shader)
38bf215546Sopenharmony_ci{
39bf215546Sopenharmony_ci   nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
40bf215546Sopenharmony_ci   if (entrypoint->preamble) {
41bf215546Sopenharmony_ci      return entrypoint->preamble->impl;
42bf215546Sopenharmony_ci   } else {
43bf215546Sopenharmony_ci      nir_function *preamble = nir_function_create(shader, "@preamble");
44bf215546Sopenharmony_ci      preamble->is_preamble = true;
45bf215546Sopenharmony_ci      nir_function_impl *impl = nir_function_impl_create(preamble);
46bf215546Sopenharmony_ci      entrypoint->preamble = preamble;
47bf215546Sopenharmony_ci      return impl;
48bf215546Sopenharmony_ci   }
49bf215546Sopenharmony_ci}
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_citypedef struct {
52bf215546Sopenharmony_ci   bool can_move;
53bf215546Sopenharmony_ci   bool candidate;
54bf215546Sopenharmony_ci   bool must_stay;
55bf215546Sopenharmony_ci   bool replace;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   unsigned can_move_users;
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   unsigned size, align;
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   unsigned offset;
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci   /* Average the cost of a value among its users, to try to account for
64bf215546Sopenharmony_ci    * values that have multiple can_move uses.
65bf215546Sopenharmony_ci    */
66bf215546Sopenharmony_ci   float value;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   /* Overall benefit, i.e. the value minus any cost to inserting
69bf215546Sopenharmony_ci    * load_preamble.
70bf215546Sopenharmony_ci    */
71bf215546Sopenharmony_ci   float benefit;
72bf215546Sopenharmony_ci} def_state;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_citypedef struct {
75bf215546Sopenharmony_ci   /* Per-definition array of states */
76bf215546Sopenharmony_ci   def_state *states;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci   nir_ssa_def *def;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   const nir_opt_preamble_options *options;
81bf215546Sopenharmony_ci} opt_preamble_ctx;
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_cistatic float
84bf215546Sopenharmony_ciget_instr_cost(nir_instr *instr, const nir_opt_preamble_options *options)
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci   /* No backend will want to hoist load_const or undef by itself, so handle
87bf215546Sopenharmony_ci    * this for them.
88bf215546Sopenharmony_ci    */
89bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_load_const ||
90bf215546Sopenharmony_ci       instr->type == nir_instr_type_ssa_undef)
91bf215546Sopenharmony_ci      return 0;
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   return options->instr_cost_cb(instr, options->cb_data);
94bf215546Sopenharmony_ci}
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_cistatic bool
97bf215546Sopenharmony_cican_move_src(nir_src *src, void *state)
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   opt_preamble_ctx *ctx = state;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   assert(src->is_ssa);
102bf215546Sopenharmony_ci   return ctx->states[src->ssa->index].can_move;
103bf215546Sopenharmony_ci}
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_cistatic bool
106bf215546Sopenharmony_cican_move_srcs(nir_instr *instr, opt_preamble_ctx *ctx)
107bf215546Sopenharmony_ci{
108bf215546Sopenharmony_ci   return nir_foreach_src(instr, can_move_src, ctx);
109bf215546Sopenharmony_ci}
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_cistatic bool
112bf215546Sopenharmony_cican_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
113bf215546Sopenharmony_ci{
114bf215546Sopenharmony_ci   switch (instr->intrinsic) {
115bf215546Sopenharmony_ci   /* Intrinsics which can always be moved */
116bf215546Sopenharmony_ci   case nir_intrinsic_load_push_constant:
117bf215546Sopenharmony_ci   case nir_intrinsic_load_work_dim:
118bf215546Sopenharmony_ci   case nir_intrinsic_load_num_workgroups:
119bf215546Sopenharmony_ci   case nir_intrinsic_load_workgroup_size:
120bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_launch_size:
121bf215546Sopenharmony_ci   case nir_intrinsic_load_ray_launch_size_addr_amd:
122bf215546Sopenharmony_ci   case nir_intrinsic_load_sbt_base_amd:
123bf215546Sopenharmony_ci   case nir_intrinsic_load_is_indexed_draw:
124bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_scale:
125bf215546Sopenharmony_ci   case nir_intrinsic_load_user_clip_plane:
126bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_x_scale:
127bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_y_scale:
128bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_z_scale:
129bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_offset:
130bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_x_offset:
131bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_y_offset:
132bf215546Sopenharmony_ci   case nir_intrinsic_load_viewport_z_offset:
133bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_a_float:
134bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_b_float:
135bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_g_float:
136bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_r_float:
137bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_rgba:
138bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
139bf215546Sopenharmony_ci   case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
140bf215546Sopenharmony_ci   case nir_intrinsic_load_line_width:
141bf215546Sopenharmony_ci   case nir_intrinsic_load_aa_line_width:
142bf215546Sopenharmony_ci   case nir_intrinsic_load_fb_layers_v3d:
143bf215546Sopenharmony_ci   case nir_intrinsic_load_tcs_num_patches_amd:
144bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_positions_pan:
145bf215546Sopenharmony_ci   case nir_intrinsic_load_shader_query_enabled_amd:
146bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_front_face_enabled_amd:
147bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_back_face_enabled_amd:
148bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_ccw_amd:
149bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_small_primitives_enabled_amd:
150bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_any_enabled_amd:
151bf215546Sopenharmony_ci   case nir_intrinsic_load_cull_small_prim_precision_amd:
152bf215546Sopenharmony_ci      return true;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   /* Intrinsics which can be moved depending on hardware */
155bf215546Sopenharmony_ci   case nir_intrinsic_load_base_instance:
156bf215546Sopenharmony_ci   case nir_intrinsic_load_base_vertex:
157bf215546Sopenharmony_ci   case nir_intrinsic_load_first_vertex:
158bf215546Sopenharmony_ci   case nir_intrinsic_load_draw_id:
159bf215546Sopenharmony_ci      return ctx->options->drawid_uniform;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   case nir_intrinsic_load_subgroup_size:
162bf215546Sopenharmony_ci   case nir_intrinsic_load_num_subgroups:
163bf215546Sopenharmony_ci      return ctx->options->subgroup_size_uniform;
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   /* Intrinsics which can be moved if the sources can */
166bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
167bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo_vec4:
168bf215546Sopenharmony_ci   case nir_intrinsic_get_ubo_size:
169bf215546Sopenharmony_ci   case nir_intrinsic_get_ssbo_size:
170bf215546Sopenharmony_ci   case nir_intrinsic_ballot_bitfield_extract:
171bf215546Sopenharmony_ci   case nir_intrinsic_ballot_find_lsb:
172bf215546Sopenharmony_ci   case nir_intrinsic_ballot_find_msb:
173bf215546Sopenharmony_ci   case nir_intrinsic_ballot_bit_count_reduce:
174bf215546Sopenharmony_ci   case nir_intrinsic_load_deref:
175bf215546Sopenharmony_ci   case nir_intrinsic_load_global_constant:
176bf215546Sopenharmony_ci   case nir_intrinsic_load_uniform:
177bf215546Sopenharmony_ci   case nir_intrinsic_load_constant:
178bf215546Sopenharmony_ci   case nir_intrinsic_load_sample_pos_from_id:
179bf215546Sopenharmony_ci   case nir_intrinsic_load_kernel_input:
180bf215546Sopenharmony_ci   case nir_intrinsic_load_buffer_amd:
181bf215546Sopenharmony_ci   case nir_intrinsic_image_samples:
182bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_samples:
183bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_samples:
184bf215546Sopenharmony_ci   case nir_intrinsic_image_size:
185bf215546Sopenharmony_ci   case nir_intrinsic_image_deref_size:
186bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_size:
187bf215546Sopenharmony_ci   case nir_intrinsic_vulkan_resource_index:
188bf215546Sopenharmony_ci   case nir_intrinsic_vulkan_resource_reindex:
189bf215546Sopenharmony_ci   case nir_intrinsic_load_vulkan_descriptor:
190bf215546Sopenharmony_ci   case nir_intrinsic_quad_swizzle_amd:
191bf215546Sopenharmony_ci   case nir_intrinsic_masked_swizzle_amd:
192bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo_address:
193bf215546Sopenharmony_ci   case nir_intrinsic_bindless_resource_ir3:
194bf215546Sopenharmony_ci      return can_move_srcs(&instr->instr, ctx);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   /* Image/SSBO loads can be moved if they are CAN_REORDER and their
197bf215546Sopenharmony_ci    * sources can be moved.
198bf215546Sopenharmony_ci    */
199bf215546Sopenharmony_ci   case nir_intrinsic_image_load:
200bf215546Sopenharmony_ci   case nir_intrinsic_bindless_image_load:
201bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
202bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo_ir3:
203bf215546Sopenharmony_ci      return (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) &&
204bf215546Sopenharmony_ci         can_move_srcs(&instr->instr, ctx);
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   default:
207bf215546Sopenharmony_ci      return false;
208bf215546Sopenharmony_ci   }
209bf215546Sopenharmony_ci}
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_cistatic bool
212bf215546Sopenharmony_cican_move_instr(nir_instr *instr, opt_preamble_ctx *ctx)
213bf215546Sopenharmony_ci{
214bf215546Sopenharmony_ci   switch (instr->type) {
215bf215546Sopenharmony_ci   case nir_instr_type_tex: {
216bf215546Sopenharmony_ci      nir_tex_instr *tex = nir_instr_as_tex(instr);
217bf215546Sopenharmony_ci      /* See note below about derivatives. We have special code to convert tex
218bf215546Sopenharmony_ci       * to txd, though, because it's a common case.
219bf215546Sopenharmony_ci       */
220bf215546Sopenharmony_ci      if (nir_tex_instr_has_implicit_derivative(tex) &&
221bf215546Sopenharmony_ci          tex->op != nir_texop_tex) {
222bf215546Sopenharmony_ci         return false;
223bf215546Sopenharmony_ci      }
224bf215546Sopenharmony_ci      return can_move_srcs(instr, ctx);
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci   case nir_instr_type_alu: {
227bf215546Sopenharmony_ci      /* The preamble is presumably run with only one thread, so we can't run
228bf215546Sopenharmony_ci       * derivatives in it.
229bf215546Sopenharmony_ci       * TODO: Replace derivatives with 0 instead, if real apps hit this.
230bf215546Sopenharmony_ci       */
231bf215546Sopenharmony_ci      nir_alu_instr *alu = nir_instr_as_alu(instr);
232bf215546Sopenharmony_ci      switch (alu->op) {
233bf215546Sopenharmony_ci      case nir_op_fddx:
234bf215546Sopenharmony_ci      case nir_op_fddy:
235bf215546Sopenharmony_ci      case nir_op_fddx_fine:
236bf215546Sopenharmony_ci      case nir_op_fddy_fine:
237bf215546Sopenharmony_ci      case nir_op_fddx_coarse:
238bf215546Sopenharmony_ci      case nir_op_fddy_coarse:
239bf215546Sopenharmony_ci         return false;
240bf215546Sopenharmony_ci      default:
241bf215546Sopenharmony_ci         return can_move_srcs(instr, ctx);
242bf215546Sopenharmony_ci      }
243bf215546Sopenharmony_ci   }
244bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
245bf215546Sopenharmony_ci      return can_move_intrinsic(nir_instr_as_intrinsic(instr), ctx);
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci   case nir_instr_type_load_const:
248bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
249bf215546Sopenharmony_ci      return true;
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   case nir_instr_type_deref: {
252bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_instr_as_deref(instr);
253bf215546Sopenharmony_ci      if (deref->deref_type == nir_deref_type_var) {
254bf215546Sopenharmony_ci         switch (deref->modes) {
255bf215546Sopenharmony_ci         case nir_var_uniform:
256bf215546Sopenharmony_ci         case nir_var_mem_ubo:
257bf215546Sopenharmony_ci            return true;
258bf215546Sopenharmony_ci         default:
259bf215546Sopenharmony_ci            return false;
260bf215546Sopenharmony_ci         }
261bf215546Sopenharmony_ci      } else {
262bf215546Sopenharmony_ci         return can_move_srcs(instr, ctx);
263bf215546Sopenharmony_ci      }
264bf215546Sopenharmony_ci   }
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   case nir_instr_type_phi:
267bf215546Sopenharmony_ci      /* TODO: we could move an if-statement if everything inside it is
268bf215546Sopenharmony_ci       * moveable.
269bf215546Sopenharmony_ci       */
270bf215546Sopenharmony_ci      return false;
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci   default:
273bf215546Sopenharmony_ci      return false;
274bf215546Sopenharmony_ci   }
275bf215546Sopenharmony_ci}
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci/* True if we should avoid making this a candidate. This is only called on
278bf215546Sopenharmony_ci * instructions we already determined we can move, this just makes it so that
279bf215546Sopenharmony_ci * uses of this instruction cannot be rewritten. Typically this happens
280bf215546Sopenharmony_ci * because of static constraints on the IR, for example some deref chains
281bf215546Sopenharmony_ci * cannot be broken.
282bf215546Sopenharmony_ci */
283bf215546Sopenharmony_cistatic bool
284bf215546Sopenharmony_ciavoid_instr(nir_instr *instr, const nir_opt_preamble_options *options)
285bf215546Sopenharmony_ci{
286bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_deref)
287bf215546Sopenharmony_ci      return true;
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   return options->avoid_instr_cb(instr, options->cb_data);
290bf215546Sopenharmony_ci}
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_cistatic bool
293bf215546Sopenharmony_ciupdate_src_value(nir_src *src, void *data)
294bf215546Sopenharmony_ci{
295bf215546Sopenharmony_ci   opt_preamble_ctx *ctx = data;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   def_state *state = &ctx->states[ctx->def->index];
298bf215546Sopenharmony_ci   def_state *src_state = &ctx->states[src->ssa->index];
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   assert(src_state->can_move);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   /* If an instruction has can_move and non-can_move users, it becomes a
303bf215546Sopenharmony_ci    * candidate and its value shouldn't propagate downwards. For example,
304bf215546Sopenharmony_ci    * imagine a chain like this:
305bf215546Sopenharmony_ci    *
306bf215546Sopenharmony_ci    *         -- F (cannot move)
307bf215546Sopenharmony_ci    *        /
308bf215546Sopenharmony_ci    *  A <-- B <-- C <-- D <-- E (cannot move)
309bf215546Sopenharmony_ci    *
310bf215546Sopenharmony_ci    * B and D are marked candidates. Picking B removes A and B, picking D
311bf215546Sopenharmony_ci    * removes C and D, and picking both removes all 4. Therefore B and D are
312bf215546Sopenharmony_ci    * independent and B's value shouldn't flow into D.
313bf215546Sopenharmony_ci    *
314bf215546Sopenharmony_ci    * A similar argument holds for must_stay values.
315bf215546Sopenharmony_ci    */
316bf215546Sopenharmony_ci   if (!src_state->must_stay && !src_state->candidate)
317bf215546Sopenharmony_ci      state->value += src_state->value;
318bf215546Sopenharmony_ci   return true;
319bf215546Sopenharmony_ci}
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_cistatic int
322bf215546Sopenharmony_cicandidate_sort(const void *data1, const void *data2)
323bf215546Sopenharmony_ci{
324bf215546Sopenharmony_ci   const def_state *state1 = *(def_state **)data1;
325bf215546Sopenharmony_ci   const def_state *state2 = *(def_state **)data2;
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci   float value1 = state1->value / state1->size;
328bf215546Sopenharmony_ci   float value2 = state2->value / state2->size;
329bf215546Sopenharmony_ci   if (value1 < value2)
330bf215546Sopenharmony_ci      return 1;
331bf215546Sopenharmony_ci   else if (value1 > value2)
332bf215546Sopenharmony_ci      return -1;
333bf215546Sopenharmony_ci   else
334bf215546Sopenharmony_ci      return 0;
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cibool
338bf215546Sopenharmony_cinir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
339bf215546Sopenharmony_ci                 unsigned *size)
340bf215546Sopenharmony_ci{
341bf215546Sopenharmony_ci   opt_preamble_ctx ctx = {
342bf215546Sopenharmony_ci      .options = options,
343bf215546Sopenharmony_ci   };
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
346bf215546Sopenharmony_ci   ctx.states = calloc(impl->ssa_alloc, sizeof(*ctx.states));
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   /* Step 1: Calculate can_move */
349bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
350bf215546Sopenharmony_ci      nir_foreach_instr (instr, block) {
351bf215546Sopenharmony_ci         nir_ssa_def *def = nir_instr_ssa_def(instr);
352bf215546Sopenharmony_ci         if (!def)
353bf215546Sopenharmony_ci            continue;
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci         def_state *state = &ctx.states[def->index];
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci         state->can_move = can_move_instr(instr, &ctx);
358bf215546Sopenharmony_ci      }
359bf215546Sopenharmony_ci   }
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   /* Step 2: Calculate is_candidate. This is complicated by the presence of
362bf215546Sopenharmony_ci    * non-candidate instructions like derefs whose users cannot be rewritten.
363bf215546Sopenharmony_ci    * If a deref chain is used at all by a non-can_move thing, then any offset
364bf215546Sopenharmony_ci    * sources anywhere along the chain should be considered candidates because
365bf215546Sopenharmony_ci    * the entire deref chain will never be deleted, but if it's only used by
366bf215546Sopenharmony_ci    * can_move things then it becomes subsumed by its users and none of the
367bf215546Sopenharmony_ci    * offset sources should be considered candidates as they will be removed
368bf215546Sopenharmony_ci    * when the users of the deref chain are moved. We need to replace "are
369bf215546Sopenharmony_ci    * there any non-can_move users" with "are there any non-can_move users,
370bf215546Sopenharmony_ci    * *recursing through non-candidate users*". We do this by walking backward
371bf215546Sopenharmony_ci    * and marking when a non-candidate instruction must stay in the final
372bf215546Sopenharmony_ci    * program because it has a non-can_move user, including recursively.
373bf215546Sopenharmony_ci    */
374bf215546Sopenharmony_ci   unsigned num_candidates = 0;
375bf215546Sopenharmony_ci   nir_foreach_block_reverse (block, impl) {
376bf215546Sopenharmony_ci      nir_foreach_instr_reverse (instr, block) {
377bf215546Sopenharmony_ci         nir_ssa_def *def = nir_instr_ssa_def(instr);
378bf215546Sopenharmony_ci         if (!def)
379bf215546Sopenharmony_ci            continue;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci         def_state *state = &ctx.states[def->index];
382bf215546Sopenharmony_ci         if (!state->can_move)
383bf215546Sopenharmony_ci            continue;
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci         state->value = get_instr_cost(instr, options);
386bf215546Sopenharmony_ci         bool is_candidate = !avoid_instr(instr, options);
387bf215546Sopenharmony_ci         state->candidate = false;
388bf215546Sopenharmony_ci         state->must_stay = false;
389bf215546Sopenharmony_ci         nir_foreach_use (use, def) {
390bf215546Sopenharmony_ci            nir_ssa_def *use_def = nir_instr_ssa_def(use->parent_instr);
391bf215546Sopenharmony_ci            if (!use_def || !ctx.states[use_def->index].can_move ||
392bf215546Sopenharmony_ci                ctx.states[use_def->index].must_stay) {
393bf215546Sopenharmony_ci               if (is_candidate)
394bf215546Sopenharmony_ci                  state->candidate = true;
395bf215546Sopenharmony_ci               else
396bf215546Sopenharmony_ci                  state->must_stay = true;
397bf215546Sopenharmony_ci            } else {
398bf215546Sopenharmony_ci               state->can_move_users++;
399bf215546Sopenharmony_ci            }
400bf215546Sopenharmony_ci         }
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci         nir_foreach_if_use (use, def) {
403bf215546Sopenharmony_ci            if (is_candidate)
404bf215546Sopenharmony_ci               state->candidate = true;
405bf215546Sopenharmony_ci            else
406bf215546Sopenharmony_ci               state->must_stay = true;
407bf215546Sopenharmony_ci            break;
408bf215546Sopenharmony_ci         }
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci         if (state->candidate)
411bf215546Sopenharmony_ci            num_candidates++;
412bf215546Sopenharmony_ci      }
413bf215546Sopenharmony_ci   }
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   if (num_candidates == 0) {
416bf215546Sopenharmony_ci      *size = 0;
417bf215546Sopenharmony_ci      free(ctx.states);
418bf215546Sopenharmony_ci      return false;
419bf215546Sopenharmony_ci   }
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci   def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
422bf215546Sopenharmony_ci   unsigned candidate_idx = 0;
423bf215546Sopenharmony_ci   unsigned total_size = 0;
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   /* Step 3: Calculate value of candidates by propagating downwards. We try
426bf215546Sopenharmony_ci    * to share the value amongst can_move uses, in case there are multiple.
427bf215546Sopenharmony_ci    * This won't always find the most optimal solution, but is hopefully a
428bf215546Sopenharmony_ci    * good heuristic.
429bf215546Sopenharmony_ci    *
430bf215546Sopenharmony_ci    * Note that we use the can_move adjusted in the last pass, because if a
431bf215546Sopenharmony_ci    * can_move instruction cannot be moved because it's not a candidate and it
432bf215546Sopenharmony_ci    * has a non-can_move source then we don't want to count it as a use.
433bf215546Sopenharmony_ci    *
434bf215546Sopenharmony_ci    * While we're here, also collect an array of candidates.
435bf215546Sopenharmony_ci    */
436bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
437bf215546Sopenharmony_ci      nir_foreach_instr (instr, block) {
438bf215546Sopenharmony_ci         nir_ssa_def *def = nir_instr_ssa_def(instr);
439bf215546Sopenharmony_ci         if (!def)
440bf215546Sopenharmony_ci            continue;
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci         def_state *state = &ctx.states[def->index];
443bf215546Sopenharmony_ci         if (!state->can_move || state->must_stay)
444bf215546Sopenharmony_ci            continue;
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci         ctx.def = def;
447bf215546Sopenharmony_ci         nir_foreach_src(instr, update_src_value, &ctx);
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci         /* If this instruction is a candidate, its value shouldn't be
450bf215546Sopenharmony_ci          * propagated so we skip dividing it.
451bf215546Sopenharmony_ci          *
452bf215546Sopenharmony_ci          * Note: if it's can_move but not a candidate, then all its users
453bf215546Sopenharmony_ci          * must be can_move, so if there are no users then it must be dead.
454bf215546Sopenharmony_ci          */
455bf215546Sopenharmony_ci         if (!state->candidate && !state->must_stay) {
456bf215546Sopenharmony_ci            if (state->can_move_users > 0)
457bf215546Sopenharmony_ci               state->value /= state->can_move_users;
458bf215546Sopenharmony_ci            else
459bf215546Sopenharmony_ci               state->value = 0;
460bf215546Sopenharmony_ci         }
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci         if (state->candidate) {
463bf215546Sopenharmony_ci            state->benefit = state->value -
464bf215546Sopenharmony_ci               options->rewrite_cost_cb(def, options->cb_data);
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci            if (state->benefit > 0) {
467bf215546Sopenharmony_ci               options->def_size(def, &state->size, &state->align);
468bf215546Sopenharmony_ci               total_size = ALIGN_POT(total_size, state->align);
469bf215546Sopenharmony_ci               total_size += state->size;
470bf215546Sopenharmony_ci               candidates[candidate_idx++] = state;
471bf215546Sopenharmony_ci            }
472bf215546Sopenharmony_ci         }
473bf215546Sopenharmony_ci      }
474bf215546Sopenharmony_ci   }
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci   assert(candidate_idx <= num_candidates);
477bf215546Sopenharmony_ci   num_candidates = candidate_idx;
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci   if (num_candidates == 0) {
480bf215546Sopenharmony_ci      *size = 0;
481bf215546Sopenharmony_ci      free(ctx.states);
482bf215546Sopenharmony_ci      free(candidates);
483bf215546Sopenharmony_ci      return false;
484bf215546Sopenharmony_ci   }
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci   /* Step 4: Figure out which candidates we're going to replace and assign an
487bf215546Sopenharmony_ci    * offset. Assuming there is no expression sharing, this is similar to the
488bf215546Sopenharmony_ci    * 0-1 knapsack problem, except when there is a gap introduced by
489bf215546Sopenharmony_ci    * alignment. We use a well-known greedy approximation, sorting by value
490bf215546Sopenharmony_ci    * divided by size.
491bf215546Sopenharmony_ci    */
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci   if (total_size > options->preamble_storage_size) {
494bf215546Sopenharmony_ci      qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort);
495bf215546Sopenharmony_ci   }
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci   unsigned offset = 0;
498bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_candidates; i++) {
499bf215546Sopenharmony_ci      def_state *state = candidates[i];
500bf215546Sopenharmony_ci      offset = ALIGN_POT(offset, state->align);
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci      if (offset + state->size > options->preamble_storage_size)
503bf215546Sopenharmony_ci         break;
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci      state->replace = true;
506bf215546Sopenharmony_ci      state->offset = offset;
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci      offset += state->size;
509bf215546Sopenharmony_ci   }
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   *size = offset;
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   free(candidates);
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   /* Step 5: Actually do the replacement. */
516bf215546Sopenharmony_ci   struct hash_table *remap_table =
517bf215546Sopenharmony_ci      _mesa_pointer_hash_table_create(NULL);
518bf215546Sopenharmony_ci   nir_function_impl *preamble =
519bf215546Sopenharmony_ci      nir_shader_get_preamble(impl->function->shader);
520bf215546Sopenharmony_ci   nir_builder _b;
521bf215546Sopenharmony_ci   nir_builder *b = &_b;
522bf215546Sopenharmony_ci   nir_builder_init(b, preamble);
523bf215546Sopenharmony_ci   b->cursor = nir_before_cf_list(&preamble->body);
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
526bf215546Sopenharmony_ci      nir_foreach_instr (instr, block) {
527bf215546Sopenharmony_ci         nir_ssa_def *def = nir_instr_ssa_def(instr);
528bf215546Sopenharmony_ci         if (!def)
529bf215546Sopenharmony_ci            continue;
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci         def_state *state = &ctx.states[def->index];
532bf215546Sopenharmony_ci         if (!state->can_move)
533bf215546Sopenharmony_ci            continue;
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci         nir_instr *clone = nir_instr_clone_deep(impl->function->shader,
536bf215546Sopenharmony_ci                                                 instr, remap_table);
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci         nir_builder_instr_insert(b, clone);
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci         if (clone->type == nir_instr_type_tex) {
541bf215546Sopenharmony_ci            nir_tex_instr *tex = nir_instr_as_tex(clone);
542bf215546Sopenharmony_ci            if (tex->op == nir_texop_tex) {
543bf215546Sopenharmony_ci               /* For maximum compatibility, replace normal textures with
544bf215546Sopenharmony_ci                * textureGrad with a gradient of 0.
545bf215546Sopenharmony_ci                * TODO: Handle txb somehow.
546bf215546Sopenharmony_ci                */
547bf215546Sopenharmony_ci               b->cursor = nir_before_instr(clone);
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci               nir_ssa_def *zero =
550bf215546Sopenharmony_ci                  nir_imm_zero(b, tex->coord_components - tex->is_array, 32);
551bf215546Sopenharmony_ci               nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(zero));
552bf215546Sopenharmony_ci               nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(zero));
553bf215546Sopenharmony_ci               tex->op = nir_texop_txd;
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci               b->cursor = nir_after_instr(clone);
556bf215546Sopenharmony_ci            }
557bf215546Sopenharmony_ci         }
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci         if (state->replace) {
560bf215546Sopenharmony_ci            nir_ssa_def *clone_def = nir_instr_ssa_def(clone);
561bf215546Sopenharmony_ci            nir_store_preamble(b, clone_def, .base = state->offset);
562bf215546Sopenharmony_ci         }
563bf215546Sopenharmony_ci      }
564bf215546Sopenharmony_ci   }
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci   nir_builder_init(b, impl);
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
569bf215546Sopenharmony_ci      nir_foreach_instr_safe (instr, block) {
570bf215546Sopenharmony_ci         nir_ssa_def *def = nir_instr_ssa_def(instr);
571bf215546Sopenharmony_ci         if (!def)
572bf215546Sopenharmony_ci            continue;
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci         def_state *state = &ctx.states[def->index];
575bf215546Sopenharmony_ci         if (!state->replace)
576bf215546Sopenharmony_ci            continue;
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci         b->cursor = nir_before_instr(instr);
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci         nir_ssa_def *new_def =
581bf215546Sopenharmony_ci            nir_load_preamble(b, def->num_components, def->bit_size,
582bf215546Sopenharmony_ci                              .base = state->offset);
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(def, new_def);
586bf215546Sopenharmony_ci         nir_instr_free_and_dce(instr);
587bf215546Sopenharmony_ci      }
588bf215546Sopenharmony_ci   }
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   nir_metadata_preserve(impl,
591bf215546Sopenharmony_ci                         nir_metadata_block_index |
592bf215546Sopenharmony_ci                         nir_metadata_dominance);
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   ralloc_free(remap_table);
595bf215546Sopenharmony_ci   free(ctx.states);
596bf215546Sopenharmony_ci   return true;
597bf215546Sopenharmony_ci}
598