1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2014 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Jason Ekstrand (jason@jlekstrand.net)
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "nir.h"
29bf215546Sopenharmony_ci#include "nir_instr_set.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci/*
32bf215546Sopenharmony_ci * Implements Global Code Motion.  A description of GCM can be found in
33bf215546Sopenharmony_ci * "Global Code Motion; Global Value Numbering" by Cliff Click.
34bf215546Sopenharmony_ci * Unfortunately, the algorithm presented in the paper is broken in a
35bf215546Sopenharmony_ci * number of ways.  The algorithm used here differs substantially from the
36bf215546Sopenharmony_ci * one in the paper but it is, in my opinion, much easier to read and
37bf215546Sopenharmony_ci * verify correcness.
38bf215546Sopenharmony_ci */
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci/* This is used to stop GCM moving instruction out of a loop if the loop
41bf215546Sopenharmony_ci * contains too many instructions and moving them would create excess spilling.
42bf215546Sopenharmony_ci *
43bf215546Sopenharmony_ci * TODO: Figure out a better way to decide if we should remove instructions from
44bf215546Sopenharmony_ci * a loop.
45bf215546Sopenharmony_ci */
46bf215546Sopenharmony_ci#define MAX_LOOP_INSTRUCTIONS 100
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistruct gcm_block_info {
49bf215546Sopenharmony_ci   /* Number of loops this block is inside */
50bf215546Sopenharmony_ci   unsigned loop_depth;
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci   /* Number of ifs this block is inside */
53bf215546Sopenharmony_ci   unsigned if_depth;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   unsigned loop_instr_count;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   /* The loop the block is nested inside or NULL */
58bf215546Sopenharmony_ci   nir_loop *loop;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   /* The last instruction inserted into this block.  This is used as we
61bf215546Sopenharmony_ci    * traverse the instructions and insert them back into the program to
62bf215546Sopenharmony_ci    * put them in the right order.
63bf215546Sopenharmony_ci    */
64bf215546Sopenharmony_ci   nir_instr *last_instr;
65bf215546Sopenharmony_ci};
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistruct gcm_instr_info {
68bf215546Sopenharmony_ci   nir_block *early_block;
69bf215546Sopenharmony_ci};
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci/* Flags used in the instr->pass_flags field for various instruction states */
72bf215546Sopenharmony_cienum {
73bf215546Sopenharmony_ci   GCM_INSTR_PINNED =                (1 << 0),
74bf215546Sopenharmony_ci   GCM_INSTR_SCHEDULE_EARLIER_ONLY = (1 << 1),
75bf215546Sopenharmony_ci   GCM_INSTR_SCHEDULED_EARLY =       (1 << 2),
76bf215546Sopenharmony_ci   GCM_INSTR_SCHEDULED_LATE =        (1 << 3),
77bf215546Sopenharmony_ci   GCM_INSTR_PLACED =                (1 << 4),
78bf215546Sopenharmony_ci};
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_cistruct gcm_state {
81bf215546Sopenharmony_ci   nir_function_impl *impl;
82bf215546Sopenharmony_ci   nir_instr *instr;
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   bool progress;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   /* The list of non-pinned instructions.  As we do the late scheduling,
87bf215546Sopenharmony_ci    * we pull non-pinned instructions out of their blocks and place them in
88bf215546Sopenharmony_ci    * this list.  This saves us from having linked-list problems when we go
89bf215546Sopenharmony_ci    * to put instructions back in their blocks.
90bf215546Sopenharmony_ci    */
91bf215546Sopenharmony_ci   struct exec_list instrs;
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   struct gcm_block_info *blocks;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   unsigned num_instrs;
96bf215546Sopenharmony_ci   struct gcm_instr_info *instr_infos;
97bf215546Sopenharmony_ci};
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_cistatic unsigned
100bf215546Sopenharmony_ciget_loop_instr_count(struct exec_list *cf_list)
101bf215546Sopenharmony_ci{
102bf215546Sopenharmony_ci   unsigned loop_instr_count = 0;
103bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, node, node, cf_list) {
104bf215546Sopenharmony_ci      switch (node->type) {
105bf215546Sopenharmony_ci      case nir_cf_node_block: {
106bf215546Sopenharmony_ci         nir_block *block = nir_cf_node_as_block(node);
107bf215546Sopenharmony_ci         nir_foreach_instr(instr, block) {
108bf215546Sopenharmony_ci            loop_instr_count++;
109bf215546Sopenharmony_ci         }
110bf215546Sopenharmony_ci         break;
111bf215546Sopenharmony_ci      }
112bf215546Sopenharmony_ci      case nir_cf_node_if: {
113bf215546Sopenharmony_ci         nir_if *if_stmt = nir_cf_node_as_if(node);
114bf215546Sopenharmony_ci         loop_instr_count += get_loop_instr_count(&if_stmt->then_list);
115bf215546Sopenharmony_ci         loop_instr_count += get_loop_instr_count(&if_stmt->else_list);
116bf215546Sopenharmony_ci         break;
117bf215546Sopenharmony_ci      }
118bf215546Sopenharmony_ci      case nir_cf_node_loop: {
119bf215546Sopenharmony_ci         nir_loop *loop = nir_cf_node_as_loop(node);
120bf215546Sopenharmony_ci         loop_instr_count += get_loop_instr_count(&loop->body);
121bf215546Sopenharmony_ci         break;
122bf215546Sopenharmony_ci      }
123bf215546Sopenharmony_ci      default:
124bf215546Sopenharmony_ci         unreachable("Invalid CF node type");
125bf215546Sopenharmony_ci      }
126bf215546Sopenharmony_ci   }
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   return loop_instr_count;
129bf215546Sopenharmony_ci}
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci/* Recursively walks the CFG and builds the block_info structure */
132bf215546Sopenharmony_cistatic void
133bf215546Sopenharmony_cigcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
134bf215546Sopenharmony_ci                     nir_loop *loop, unsigned loop_depth, unsigned if_depth,
135bf215546Sopenharmony_ci                     unsigned loop_instr_count)
136bf215546Sopenharmony_ci{
137bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, node, node, cf_list) {
138bf215546Sopenharmony_ci      switch (node->type) {
139bf215546Sopenharmony_ci      case nir_cf_node_block: {
140bf215546Sopenharmony_ci         nir_block *block = nir_cf_node_as_block(node);
141bf215546Sopenharmony_ci         state->blocks[block->index].if_depth = if_depth;
142bf215546Sopenharmony_ci         state->blocks[block->index].loop_depth = loop_depth;
143bf215546Sopenharmony_ci         state->blocks[block->index].loop_instr_count = loop_instr_count;
144bf215546Sopenharmony_ci         state->blocks[block->index].loop = loop;
145bf215546Sopenharmony_ci         break;
146bf215546Sopenharmony_ci      }
147bf215546Sopenharmony_ci      case nir_cf_node_if: {
148bf215546Sopenharmony_ci         nir_if *if_stmt = nir_cf_node_as_if(node);
149bf215546Sopenharmony_ci         gcm_build_block_info(&if_stmt->then_list, state, loop, loop_depth,
150bf215546Sopenharmony_ci                              if_depth + 1, ~0u);
151bf215546Sopenharmony_ci         gcm_build_block_info(&if_stmt->else_list, state, loop, loop_depth,
152bf215546Sopenharmony_ci                              if_depth + 1, ~0u);
153bf215546Sopenharmony_ci         break;
154bf215546Sopenharmony_ci      }
155bf215546Sopenharmony_ci      case nir_cf_node_loop: {
156bf215546Sopenharmony_ci         nir_loop *loop = nir_cf_node_as_loop(node);
157bf215546Sopenharmony_ci         gcm_build_block_info(&loop->body, state, loop, loop_depth + 1, if_depth,
158bf215546Sopenharmony_ci                              get_loop_instr_count(&loop->body));
159bf215546Sopenharmony_ci         break;
160bf215546Sopenharmony_ci      }
161bf215546Sopenharmony_ci      default:
162bf215546Sopenharmony_ci         unreachable("Invalid CF node type");
163bf215546Sopenharmony_ci      }
164bf215546Sopenharmony_ci   }
165bf215546Sopenharmony_ci}
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_cistatic bool
168bf215546Sopenharmony_ciis_src_scalarizable(nir_src *src)
169bf215546Sopenharmony_ci{
170bf215546Sopenharmony_ci   assert(src->is_ssa);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   nir_instr *src_instr = src->ssa->parent_instr;
173bf215546Sopenharmony_ci   switch (src_instr->type) {
174bf215546Sopenharmony_ci   case nir_instr_type_alu: {
175bf215546Sopenharmony_ci      nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci      /* ALU operations with output_size == 0 should be scalarized.  We
178bf215546Sopenharmony_ci       * will also see a bunch of vecN operations from scalarizing ALU
179bf215546Sopenharmony_ci       * operations and, since they can easily be copy-propagated, they
180bf215546Sopenharmony_ci       * are ok too.
181bf215546Sopenharmony_ci       */
182bf215546Sopenharmony_ci      return nir_op_infos[src_alu->op].output_size == 0 ||
183bf215546Sopenharmony_ci             src_alu->op == nir_op_vec2 ||
184bf215546Sopenharmony_ci             src_alu->op == nir_op_vec3 ||
185bf215546Sopenharmony_ci             src_alu->op == nir_op_vec4;
186bf215546Sopenharmony_ci   }
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   case nir_instr_type_load_const:
189bf215546Sopenharmony_ci      /* These are trivially scalarizable */
190bf215546Sopenharmony_ci      return true;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
193bf215546Sopenharmony_ci      return true;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   case nir_instr_type_intrinsic: {
196bf215546Sopenharmony_ci      nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci      switch (src_intrin->intrinsic) {
199bf215546Sopenharmony_ci      case nir_intrinsic_load_deref: {
200bf215546Sopenharmony_ci         /* Don't scalarize if we see a load of a local variable because it
201bf215546Sopenharmony_ci          * might turn into one of the things we can't scalarize.
202bf215546Sopenharmony_ci          */
203bf215546Sopenharmony_ci         nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
204bf215546Sopenharmony_ci         return !nir_deref_mode_may_be(deref, (nir_var_function_temp |
205bf215546Sopenharmony_ci                                               nir_var_shader_temp));
206bf215546Sopenharmony_ci      }
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_centroid:
209bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_sample:
210bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_offset:
211bf215546Sopenharmony_ci      case nir_intrinsic_load_uniform:
212bf215546Sopenharmony_ci      case nir_intrinsic_load_ubo:
213bf215546Sopenharmony_ci      case nir_intrinsic_load_ssbo:
214bf215546Sopenharmony_ci      case nir_intrinsic_load_global:
215bf215546Sopenharmony_ci      case nir_intrinsic_load_global_constant:
216bf215546Sopenharmony_ci      case nir_intrinsic_load_input:
217bf215546Sopenharmony_ci         return true;
218bf215546Sopenharmony_ci      default:
219bf215546Sopenharmony_ci         break;
220bf215546Sopenharmony_ci      }
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci      return false;
223bf215546Sopenharmony_ci   }
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   default:
226bf215546Sopenharmony_ci      /* We can't scalarize this type of instruction */
227bf215546Sopenharmony_ci      return false;
228bf215546Sopenharmony_ci   }
229bf215546Sopenharmony_ci}
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_cistatic bool
232bf215546Sopenharmony_ciis_binding_uniform(nir_src src)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   nir_binding binding = nir_chase_binding(src);
235bf215546Sopenharmony_ci   if (!binding.success)
236bf215546Sopenharmony_ci      return false;
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   for (unsigned i = 0; i < binding.num_indices; i++) {
239bf215546Sopenharmony_ci      if (!nir_src_is_always_uniform(binding.indices[i]))
240bf215546Sopenharmony_ci         return false;
241bf215546Sopenharmony_ci   }
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   return true;
244bf215546Sopenharmony_ci}
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_cistatic void
247bf215546Sopenharmony_cipin_intrinsic(nir_intrinsic_instr *intrin)
248bf215546Sopenharmony_ci{
249bf215546Sopenharmony_ci   nir_instr *instr = &intrin->instr;
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   if (!nir_intrinsic_can_reorder(intrin)) {
252bf215546Sopenharmony_ci      instr->pass_flags = GCM_INSTR_PINNED;
253bf215546Sopenharmony_ci      return;
254bf215546Sopenharmony_ci   }
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   instr->pass_flags = 0;
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   /* If the intrinsic requires a uniform source, we can't safely move it across non-uniform
259bf215546Sopenharmony_ci    * control flow if it's not uniform at the point it's defined.
260bf215546Sopenharmony_ci    * Stores and atomics can never be re-ordered, so we don't have to consider them here.
261bf215546Sopenharmony_ci    */
262bf215546Sopenharmony_ci   bool non_uniform = nir_intrinsic_has_access(intrin) &&
263bf215546Sopenharmony_ci                      (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM);
264bf215546Sopenharmony_ci   if (!non_uniform &&
265bf215546Sopenharmony_ci       (intrin->intrinsic == nir_intrinsic_load_ubo ||
266bf215546Sopenharmony_ci        intrin->intrinsic == nir_intrinsic_load_ssbo ||
267bf215546Sopenharmony_ci        intrin->intrinsic == nir_intrinsic_get_ubo_size ||
268bf215546Sopenharmony_ci        intrin->intrinsic == nir_intrinsic_get_ssbo_size ||
269bf215546Sopenharmony_ci        nir_intrinsic_has_image_dim(intrin) ||
270bf215546Sopenharmony_ci        ((intrin->intrinsic == nir_intrinsic_load_deref ||
271bf215546Sopenharmony_ci          intrin->intrinsic == nir_intrinsic_deref_buffer_array_length) &&
272bf215546Sopenharmony_ci         nir_deref_mode_may_be(nir_src_as_deref(intrin->src[0]),
273bf215546Sopenharmony_ci                               nir_var_mem_ubo | nir_var_mem_ssbo)))) {
274bf215546Sopenharmony_ci      if (!is_binding_uniform(intrin->src[0]))
275bf215546Sopenharmony_ci         instr->pass_flags = GCM_INSTR_PINNED;
276bf215546Sopenharmony_ci   } else if (intrin->intrinsic == nir_intrinsic_load_push_constant) {
277bf215546Sopenharmony_ci      if (!nir_src_is_always_uniform(intrin->src[0]))
278bf215546Sopenharmony_ci         instr->pass_flags = GCM_INSTR_PINNED;
279bf215546Sopenharmony_ci   } else if (intrin->intrinsic == nir_intrinsic_load_deref &&
280bf215546Sopenharmony_ci              nir_deref_mode_is(nir_src_as_deref(intrin->src[0]),
281bf215546Sopenharmony_ci                                nir_var_mem_push_const)) {
282bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
283bf215546Sopenharmony_ci      while (deref->deref_type != nir_deref_type_var) {
284bf215546Sopenharmony_ci         if ((deref->deref_type == nir_deref_type_array ||
285bf215546Sopenharmony_ci              deref->deref_type == nir_deref_type_ptr_as_array) &&
286bf215546Sopenharmony_ci             !nir_src_is_always_uniform(deref->arr.index)) {
287bf215546Sopenharmony_ci            instr->pass_flags = GCM_INSTR_PINNED;
288bf215546Sopenharmony_ci            return;
289bf215546Sopenharmony_ci         }
290bf215546Sopenharmony_ci         deref = nir_deref_instr_parent(deref);
291bf215546Sopenharmony_ci         if (!deref) {
292bf215546Sopenharmony_ci            instr->pass_flags = GCM_INSTR_PINNED;
293bf215546Sopenharmony_ci            return;
294bf215546Sopenharmony_ci         }
295bf215546Sopenharmony_ci      }
296bf215546Sopenharmony_ci   }
297bf215546Sopenharmony_ci}
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci/* Walks the instruction list and marks immovable instructions as pinned or
300bf215546Sopenharmony_ci * placed.
301bf215546Sopenharmony_ci *
302bf215546Sopenharmony_ci * This function also serves to initialize the instr->pass_flags field.
303bf215546Sopenharmony_ci * After this is completed, all instructions' pass_flags fields will be set
304bf215546Sopenharmony_ci * to either GCM_INSTR_PINNED, GCM_INSTR_PLACED or 0.
305bf215546Sopenharmony_ci */
306bf215546Sopenharmony_cistatic void
307bf215546Sopenharmony_cigcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state)
308bf215546Sopenharmony_ci{
309bf215546Sopenharmony_ci   state->num_instrs = 0;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
312bf215546Sopenharmony_ci      nir_foreach_instr_safe(instr, block) {
313bf215546Sopenharmony_ci         /* Index the instructions for use in gcm_state::instrs */
314bf215546Sopenharmony_ci         instr->index = state->num_instrs++;
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci         switch (instr->type) {
317bf215546Sopenharmony_ci         case nir_instr_type_alu:
318bf215546Sopenharmony_ci            switch (nir_instr_as_alu(instr)->op) {
319bf215546Sopenharmony_ci            case nir_op_fddx:
320bf215546Sopenharmony_ci            case nir_op_fddy:
321bf215546Sopenharmony_ci            case nir_op_fddx_fine:
322bf215546Sopenharmony_ci            case nir_op_fddy_fine:
323bf215546Sopenharmony_ci            case nir_op_fddx_coarse:
324bf215546Sopenharmony_ci            case nir_op_fddy_coarse:
325bf215546Sopenharmony_ci               /* These can only go in uniform control flow */
326bf215546Sopenharmony_ci               instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY;
327bf215546Sopenharmony_ci               break;
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci            case nir_op_mov:
330bf215546Sopenharmony_ci               if (!is_src_scalarizable(&(nir_instr_as_alu(instr)->src[0].src))) {
331bf215546Sopenharmony_ci                  instr->pass_flags = GCM_INSTR_PINNED;
332bf215546Sopenharmony_ci                  break;
333bf215546Sopenharmony_ci               }
334bf215546Sopenharmony_ci               FALLTHROUGH;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci            default:
337bf215546Sopenharmony_ci               instr->pass_flags = 0;
338bf215546Sopenharmony_ci               break;
339bf215546Sopenharmony_ci            }
340bf215546Sopenharmony_ci            break;
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci         case nir_instr_type_tex: {
343bf215546Sopenharmony_ci            nir_tex_instr *tex = nir_instr_as_tex(instr);
344bf215546Sopenharmony_ci            if (nir_tex_instr_has_implicit_derivative(tex))
345bf215546Sopenharmony_ci               instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci            for (unsigned i = 0; i < tex->num_srcs; i++) {
348bf215546Sopenharmony_ci               nir_tex_src *src = &tex->src[i];
349bf215546Sopenharmony_ci               switch (src->src_type) {
350bf215546Sopenharmony_ci               case nir_tex_src_texture_deref:
351bf215546Sopenharmony_ci                  if (!tex->texture_non_uniform && !is_binding_uniform(src->src))
352bf215546Sopenharmony_ci                     instr->pass_flags = GCM_INSTR_PINNED;
353bf215546Sopenharmony_ci                  break;
354bf215546Sopenharmony_ci               case nir_tex_src_sampler_deref:
355bf215546Sopenharmony_ci                  if (!tex->sampler_non_uniform && !is_binding_uniform(src->src))
356bf215546Sopenharmony_ci                     instr->pass_flags = GCM_INSTR_PINNED;
357bf215546Sopenharmony_ci                  break;
358bf215546Sopenharmony_ci               case nir_tex_src_texture_offset:
359bf215546Sopenharmony_ci               case nir_tex_src_texture_handle:
360bf215546Sopenharmony_ci                  if (!tex->texture_non_uniform && !nir_src_is_always_uniform(src->src))
361bf215546Sopenharmony_ci                     instr->pass_flags = GCM_INSTR_PINNED;
362bf215546Sopenharmony_ci                  break;
363bf215546Sopenharmony_ci               case nir_tex_src_sampler_offset:
364bf215546Sopenharmony_ci               case nir_tex_src_sampler_handle:
365bf215546Sopenharmony_ci                  if (!tex->sampler_non_uniform && !nir_src_is_always_uniform(src->src))
366bf215546Sopenharmony_ci                     instr->pass_flags = GCM_INSTR_PINNED;
367bf215546Sopenharmony_ci                  break;
368bf215546Sopenharmony_ci               default:
369bf215546Sopenharmony_ci                  break;
370bf215546Sopenharmony_ci               }
371bf215546Sopenharmony_ci            }
372bf215546Sopenharmony_ci            break;
373bf215546Sopenharmony_ci         }
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci         case nir_instr_type_deref:
376bf215546Sopenharmony_ci         case nir_instr_type_load_const:
377bf215546Sopenharmony_ci            instr->pass_flags = 0;
378bf215546Sopenharmony_ci            break;
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci         case nir_instr_type_intrinsic:
381bf215546Sopenharmony_ci            pin_intrinsic(nir_instr_as_intrinsic(instr));
382bf215546Sopenharmony_ci            break;
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci         case nir_instr_type_call:
385bf215546Sopenharmony_ci            instr->pass_flags = GCM_INSTR_PINNED;
386bf215546Sopenharmony_ci            break;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci         case nir_instr_type_jump:
389bf215546Sopenharmony_ci         case nir_instr_type_ssa_undef:
390bf215546Sopenharmony_ci         case nir_instr_type_phi:
391bf215546Sopenharmony_ci            instr->pass_flags = GCM_INSTR_PLACED;
392bf215546Sopenharmony_ci            break;
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci         default:
395bf215546Sopenharmony_ci            unreachable("Invalid instruction type in GCM");
396bf215546Sopenharmony_ci         }
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci         if (!(instr->pass_flags & GCM_INSTR_PLACED)) {
399bf215546Sopenharmony_ci            /* If this is an unplaced instruction, go ahead and pull it out of
400bf215546Sopenharmony_ci             * the program and put it on the instrs list.  This has a couple
401bf215546Sopenharmony_ci             * of benifits.  First, it makes the scheduling algorithm more
402bf215546Sopenharmony_ci             * efficient because we can avoid walking over basic blocks.
403bf215546Sopenharmony_ci             * Second, it keeps us from causing linked list confusion when
404bf215546Sopenharmony_ci             * we're trying to put everything in its proper place at the end
405bf215546Sopenharmony_ci             * of the pass.
406bf215546Sopenharmony_ci             *
407bf215546Sopenharmony_ci             * Note that we don't use nir_instr_remove here because that also
408bf215546Sopenharmony_ci             * cleans up uses and defs and we want to keep that information.
409bf215546Sopenharmony_ci             */
410bf215546Sopenharmony_ci            exec_node_remove(&instr->node);
411bf215546Sopenharmony_ci            exec_list_push_tail(&state->instrs, &instr->node);
412bf215546Sopenharmony_ci         }
413bf215546Sopenharmony_ci      }
414bf215546Sopenharmony_ci   }
415bf215546Sopenharmony_ci}
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_cistatic void
418bf215546Sopenharmony_cigcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci/** Update an instructions schedule for the given source
421bf215546Sopenharmony_ci *
422bf215546Sopenharmony_ci * This function is called iteratively as we walk the sources of an
423bf215546Sopenharmony_ci * instruction.  It ensures that the given source instruction has been
424bf215546Sopenharmony_ci * scheduled and then update this instruction's block if the source
425bf215546Sopenharmony_ci * instruction is lower down the tree.
426bf215546Sopenharmony_ci */
427bf215546Sopenharmony_cistatic bool
428bf215546Sopenharmony_cigcm_schedule_early_src(nir_src *src, void *void_state)
429bf215546Sopenharmony_ci{
430bf215546Sopenharmony_ci   struct gcm_state *state = void_state;
431bf215546Sopenharmony_ci   nir_instr *instr = state->instr;
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci   assert(src->is_ssa);
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci   gcm_schedule_early_instr(src->ssa->parent_instr, void_state);
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   /* While the index isn't a proper dominance depth, it does have the
438bf215546Sopenharmony_ci    * property that if A dominates B then A->index <= B->index.  Since we
439bf215546Sopenharmony_ci    * know that this instruction must have been dominated by all of its
440bf215546Sopenharmony_ci    * sources at some point (even if it's gone through value-numbering),
441bf215546Sopenharmony_ci    * all of the sources must lie on the same branch of the dominance tree.
442bf215546Sopenharmony_ci    * Therefore, we can just go ahead and just compare indices.
443bf215546Sopenharmony_ci    */
444bf215546Sopenharmony_ci   struct gcm_instr_info *src_info =
445bf215546Sopenharmony_ci      &state->instr_infos[src->ssa->parent_instr->index];
446bf215546Sopenharmony_ci   struct gcm_instr_info *info = &state->instr_infos[instr->index];
447bf215546Sopenharmony_ci   if (info->early_block->index < src_info->early_block->index)
448bf215546Sopenharmony_ci      info->early_block = src_info->early_block;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   /* We need to restore the state instruction because it may have been
451bf215546Sopenharmony_ci    * changed through the gcm_schedule_early_instr call above.  Since we
452bf215546Sopenharmony_ci    * may still be iterating through sources and future calls to
453bf215546Sopenharmony_ci    * gcm_schedule_early_src for the same instruction will still need it.
454bf215546Sopenharmony_ci    */
455bf215546Sopenharmony_ci   state->instr = instr;
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   return true;
458bf215546Sopenharmony_ci}
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci/** Schedules an instruction early
461bf215546Sopenharmony_ci *
462bf215546Sopenharmony_ci * This function performs a recursive depth-first search starting at the
463bf215546Sopenharmony_ci * given instruction and proceeding through the sources to schedule
464bf215546Sopenharmony_ci * instructions as early as they can possibly go in the dominance tree.
465bf215546Sopenharmony_ci * The instructions are "scheduled" by updating the early_block field of
466bf215546Sopenharmony_ci * the corresponding gcm_instr_state entry.
467bf215546Sopenharmony_ci */
468bf215546Sopenharmony_cistatic void
469bf215546Sopenharmony_cigcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state)
470bf215546Sopenharmony_ci{
471bf215546Sopenharmony_ci   if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY)
472bf215546Sopenharmony_ci      return;
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY;
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci   /* Pinned/placed instructions always get scheduled in their original block so
477bf215546Sopenharmony_ci    * we don't need to do anything.  Also, bailing here keeps us from ever
478bf215546Sopenharmony_ci    * following the sources of phi nodes which can be back-edges.
479bf215546Sopenharmony_ci    */
480bf215546Sopenharmony_ci   if (instr->pass_flags & GCM_INSTR_PINNED ||
481bf215546Sopenharmony_ci       instr->pass_flags & GCM_INSTR_PLACED) {
482bf215546Sopenharmony_ci      state->instr_infos[instr->index].early_block = instr->block;
483bf215546Sopenharmony_ci      return;
484bf215546Sopenharmony_ci   }
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci   /* Start with the instruction at the top.  As we iterate over the
487bf215546Sopenharmony_ci    * sources, it will get moved down as needed.
488bf215546Sopenharmony_ci    */
489bf215546Sopenharmony_ci   state->instr_infos[instr->index].early_block = nir_start_block(state->impl);
490bf215546Sopenharmony_ci   state->instr = instr;
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   nir_foreach_src(instr, gcm_schedule_early_src, state);
493bf215546Sopenharmony_ci}
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_cistatic bool
496bf215546Sopenharmony_ciset_block_for_loop_instr(struct gcm_state *state, nir_instr *instr,
497bf215546Sopenharmony_ci                         nir_block *block)
498bf215546Sopenharmony_ci{
499bf215546Sopenharmony_ci   /* If the instruction wasn't in a loop to begin with we don't want to push
500bf215546Sopenharmony_ci    * it down into one.
501bf215546Sopenharmony_ci    */
502bf215546Sopenharmony_ci   nir_loop *loop = state->blocks[instr->block->index].loop;
503bf215546Sopenharmony_ci   if (loop == NULL)
504bf215546Sopenharmony_ci      return true;
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   if (nir_block_dominates(instr->block, block))
507bf215546Sopenharmony_ci      return true;
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci   /* If the loop only executes a single time i.e its wrapped in a:
510bf215546Sopenharmony_ci    *    do{ ... break; } while(true)
511bf215546Sopenharmony_ci    * Don't move the instruction as it will not help anything.
512bf215546Sopenharmony_ci    */
513bf215546Sopenharmony_ci   if (loop->info->limiting_terminator == NULL && !loop->info->complex_loop &&
514bf215546Sopenharmony_ci       nir_block_ends_in_break(nir_loop_last_block(loop)))
515bf215546Sopenharmony_ci      return false;
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   /* Being too aggressive with how we pull instructions out of loops can
518bf215546Sopenharmony_ci    * result in extra register pressure and spilling. For example its fairly
519bf215546Sopenharmony_ci    * common for loops in compute shaders to calculate SSBO offsets using
520bf215546Sopenharmony_ci    * the workgroup id, subgroup id and subgroup invocation, pulling all
521bf215546Sopenharmony_ci    * these calculations outside the loop causes register pressure.
522bf215546Sopenharmony_ci    *
523bf215546Sopenharmony_ci    * To work around these issues for now we only allow constant and texture
524bf215546Sopenharmony_ci    * instructions to be moved outside their original loops, or instructions
525bf215546Sopenharmony_ci    * where the total loop instruction count is less than
526bf215546Sopenharmony_ci    * MAX_LOOP_INSTRUCTIONS.
527bf215546Sopenharmony_ci    *
528bf215546Sopenharmony_ci    * TODO: figure out some more heuristics to allow more to be moved out of
529bf215546Sopenharmony_ci    * loops.
530bf215546Sopenharmony_ci    */
531bf215546Sopenharmony_ci   if (state->blocks[instr->block->index].loop_instr_count < MAX_LOOP_INSTRUCTIONS)
532bf215546Sopenharmony_ci      return true;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_load_const ||
535bf215546Sopenharmony_ci       instr->type == nir_instr_type_tex)
536bf215546Sopenharmony_ci      return true;
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci   return false;
539bf215546Sopenharmony_ci}
540bf215546Sopenharmony_ci
541bf215546Sopenharmony_cistatic bool
542bf215546Sopenharmony_ciset_block_to_if_block(struct gcm_state *state,  nir_instr *instr,
543bf215546Sopenharmony_ci                      nir_block *block)
544bf215546Sopenharmony_ci{
545bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_load_const)
546bf215546Sopenharmony_ci      return true;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci   /* TODO: Figure out some more heuristics to allow more to be moved into
549bf215546Sopenharmony_ci    * if-statements.
550bf215546Sopenharmony_ci    */
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci   return false;
553bf215546Sopenharmony_ci}
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_cistatic nir_block *
556bf215546Sopenharmony_cigcm_choose_block_for_instr(nir_instr *instr, nir_block *early_block,
557bf215546Sopenharmony_ci                           nir_block *late_block, struct gcm_state *state)
558bf215546Sopenharmony_ci{
559bf215546Sopenharmony_ci   assert(nir_block_dominates(early_block, late_block));
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   bool block_set = false;
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   /* First see if we can push the instruction down into an if-statements block */
564bf215546Sopenharmony_ci   nir_block *best = late_block;
565bf215546Sopenharmony_ci   for (nir_block *block = late_block; block != NULL; block = block->imm_dom) {
566bf215546Sopenharmony_ci      if (state->blocks[block->index].loop_depth >
567bf215546Sopenharmony_ci          state->blocks[instr->block->index].loop_depth)
568bf215546Sopenharmony_ci         continue;
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci      if (state->blocks[block->index].if_depth >=
571bf215546Sopenharmony_ci          state->blocks[best->index].if_depth &&
572bf215546Sopenharmony_ci          set_block_to_if_block(state, instr, block)) {
573bf215546Sopenharmony_ci            /* If we are pushing the instruction into an if we want it to be
574bf215546Sopenharmony_ci             * in the earliest block not the latest to avoid creating register
575bf215546Sopenharmony_ci             * pressure issues. So we don't break unless we come across the
576bf215546Sopenharmony_ci             * block the instruction was originally in.
577bf215546Sopenharmony_ci             */
578bf215546Sopenharmony_ci            best = block;
579bf215546Sopenharmony_ci            block_set = true;
580bf215546Sopenharmony_ci            if (block == instr->block)
581bf215546Sopenharmony_ci               break;
582bf215546Sopenharmony_ci      } else if (block == instr->block) {
583bf215546Sopenharmony_ci         /* If we couldn't push the instruction later just put is back where it
584bf215546Sopenharmony_ci          * was previously.
585bf215546Sopenharmony_ci          */
586bf215546Sopenharmony_ci         if (!block_set)
587bf215546Sopenharmony_ci            best = block;
588bf215546Sopenharmony_ci         break;
589bf215546Sopenharmony_ci      }
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci      if (block == early_block)
592bf215546Sopenharmony_ci         break;
593bf215546Sopenharmony_ci   }
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   /* Now see if we can evict the instruction from a loop */
596bf215546Sopenharmony_ci   for (nir_block *block = late_block; block != NULL; block = block->imm_dom) {
597bf215546Sopenharmony_ci      if (state->blocks[block->index].loop_depth <
598bf215546Sopenharmony_ci          state->blocks[best->index].loop_depth) {
599bf215546Sopenharmony_ci         if (set_block_for_loop_instr(state, instr, block)) {
600bf215546Sopenharmony_ci            best = block;
601bf215546Sopenharmony_ci         } else if (block == instr->block) {
602bf215546Sopenharmony_ci            if (!block_set)
603bf215546Sopenharmony_ci               best = block;
604bf215546Sopenharmony_ci            break;
605bf215546Sopenharmony_ci         }
606bf215546Sopenharmony_ci      }
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci      if (block == early_block)
609bf215546Sopenharmony_ci         break;
610bf215546Sopenharmony_ci   }
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci   return best;
613bf215546Sopenharmony_ci}
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_cistatic void
616bf215546Sopenharmony_cigcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state);
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci/** Schedules the instruction associated with the given SSA def late
619bf215546Sopenharmony_ci *
620bf215546Sopenharmony_ci * This function works by first walking all of the uses of the given SSA
621bf215546Sopenharmony_ci * definition, ensuring that they are scheduled, and then computing the LCA
622bf215546Sopenharmony_ci * (least common ancestor) of its uses.  It then schedules this instruction
623bf215546Sopenharmony_ci * as close to the LCA as possible while trying to stay out of loops.
624bf215546Sopenharmony_ci */
625bf215546Sopenharmony_cistatic bool
626bf215546Sopenharmony_cigcm_schedule_late_def(nir_ssa_def *def, void *void_state)
627bf215546Sopenharmony_ci{
628bf215546Sopenharmony_ci   struct gcm_state *state = void_state;
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   nir_block *lca = NULL;
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_ci   nir_foreach_use(use_src, def) {
633bf215546Sopenharmony_ci      nir_instr *use_instr = use_src->parent_instr;
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci      gcm_schedule_late_instr(use_instr, state);
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_ci      /* Phi instructions are a bit special.  SSA definitions don't have to
638bf215546Sopenharmony_ci       * dominate the sources of the phi nodes that use them; instead, they
639bf215546Sopenharmony_ci       * have to dominate the predecessor block corresponding to the phi
640bf215546Sopenharmony_ci       * source.  We handle this by looking through the sources, finding
641bf215546Sopenharmony_ci       * any that are usingg this SSA def, and using those blocks instead
642bf215546Sopenharmony_ci       * of the one the phi lives in.
643bf215546Sopenharmony_ci       */
644bf215546Sopenharmony_ci      if (use_instr->type == nir_instr_type_phi) {
645bf215546Sopenharmony_ci         nir_phi_instr *phi = nir_instr_as_phi(use_instr);
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci         nir_foreach_phi_src(phi_src, phi) {
648bf215546Sopenharmony_ci            if (phi_src->src.ssa == def)
649bf215546Sopenharmony_ci               lca = nir_dominance_lca(lca, phi_src->pred);
650bf215546Sopenharmony_ci         }
651bf215546Sopenharmony_ci      } else {
652bf215546Sopenharmony_ci         lca = nir_dominance_lca(lca, use_instr->block);
653bf215546Sopenharmony_ci      }
654bf215546Sopenharmony_ci   }
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci   nir_foreach_if_use(use_src, def) {
657bf215546Sopenharmony_ci      nir_if *if_stmt = use_src->parent_if;
658bf215546Sopenharmony_ci
659bf215546Sopenharmony_ci      /* For if statements, we consider the block to be the one immediately
660bf215546Sopenharmony_ci       * preceding the if CF node.
661bf215546Sopenharmony_ci       */
662bf215546Sopenharmony_ci      nir_block *pred_block =
663bf215546Sopenharmony_ci         nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci      lca = nir_dominance_lca(lca, pred_block);
666bf215546Sopenharmony_ci   }
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   nir_block *early_block =
669bf215546Sopenharmony_ci      state->instr_infos[def->parent_instr->index].early_block;
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci   /* Some instructions may never be used.  Flag them and the instruction
672bf215546Sopenharmony_ci    * placement code will get rid of them for us.
673bf215546Sopenharmony_ci    */
674bf215546Sopenharmony_ci   if (lca == NULL) {
675bf215546Sopenharmony_ci      def->parent_instr->block = NULL;
676bf215546Sopenharmony_ci      return true;
677bf215546Sopenharmony_ci   }
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci   if (def->parent_instr->pass_flags & GCM_INSTR_SCHEDULE_EARLIER_ONLY &&
680bf215546Sopenharmony_ci       lca != def->parent_instr->block &&
681bf215546Sopenharmony_ci       nir_block_dominates(def->parent_instr->block, lca)) {
682bf215546Sopenharmony_ci      lca = def->parent_instr->block;
683bf215546Sopenharmony_ci   }
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci   /* We now have the LCA of all of the uses.  If our invariants hold,
686bf215546Sopenharmony_ci    * this is dominated by the block that we chose when scheduling early.
687bf215546Sopenharmony_ci    * We now walk up the dominance tree and pick the lowest block that is
688bf215546Sopenharmony_ci    * as far outside loops as we can get.
689bf215546Sopenharmony_ci    */
690bf215546Sopenharmony_ci   nir_block *best_block =
691bf215546Sopenharmony_ci      gcm_choose_block_for_instr(def->parent_instr, early_block, lca, state);
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci   if (def->parent_instr->block != best_block)
694bf215546Sopenharmony_ci      state->progress = true;
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   def->parent_instr->block = best_block;
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci   return true;
699bf215546Sopenharmony_ci}
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci/** Schedules an instruction late
702bf215546Sopenharmony_ci *
703bf215546Sopenharmony_ci * This function performs a depth-first search starting at the given
704bf215546Sopenharmony_ci * instruction and proceeding through its uses to schedule instructions as
705bf215546Sopenharmony_ci * late as they can reasonably go in the dominance tree.  The instructions
706bf215546Sopenharmony_ci * are "scheduled" by updating their instr->block field.
707bf215546Sopenharmony_ci *
708bf215546Sopenharmony_ci * The name of this function is actually a bit of a misnomer as it doesn't
709bf215546Sopenharmony_ci * schedule them "as late as possible" as the paper implies.  Instead, it
710bf215546Sopenharmony_ci * first finds the lates possible place it can schedule the instruction and
711bf215546Sopenharmony_ci * then possibly schedules it earlier than that.  The actual location is as
712bf215546Sopenharmony_ci * far down the tree as we can go while trying to stay out of loops.
713bf215546Sopenharmony_ci */
714bf215546Sopenharmony_cistatic void
715bf215546Sopenharmony_cigcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state)
716bf215546Sopenharmony_ci{
717bf215546Sopenharmony_ci   if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE)
718bf215546Sopenharmony_ci      return;
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci   instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE;
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci   /* Pinned/placed instructions are already scheduled so we don't need to do
723bf215546Sopenharmony_ci    * anything.  Also, bailing here keeps us from ever following phi nodes
724bf215546Sopenharmony_ci    * which can be back-edges.
725bf215546Sopenharmony_ci    */
726bf215546Sopenharmony_ci   if (instr->pass_flags & GCM_INSTR_PLACED ||
727bf215546Sopenharmony_ci       instr->pass_flags & GCM_INSTR_PINNED)
728bf215546Sopenharmony_ci      return;
729bf215546Sopenharmony_ci
730bf215546Sopenharmony_ci   nir_foreach_ssa_def(instr, gcm_schedule_late_def, state);
731bf215546Sopenharmony_ci}
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_cistatic bool
734bf215546Sopenharmony_cigcm_replace_def_with_undef(nir_ssa_def *def, void *void_state)
735bf215546Sopenharmony_ci{
736bf215546Sopenharmony_ci   struct gcm_state *state = void_state;
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci   if (nir_ssa_def_is_unused(def))
739bf215546Sopenharmony_ci      return true;
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci   nir_ssa_undef_instr *undef =
742bf215546Sopenharmony_ci      nir_ssa_undef_instr_create(state->impl->function->shader,
743bf215546Sopenharmony_ci                                 def->num_components, def->bit_size);
744bf215546Sopenharmony_ci   nir_instr_insert(nir_before_cf_list(&state->impl->body), &undef->instr);
745bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(def, &undef->def);
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   return true;
748bf215546Sopenharmony_ci}
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci/** Places an instrution back into the program
751bf215546Sopenharmony_ci *
752bf215546Sopenharmony_ci * The earlier passes of GCM simply choose blocks for each instruction and
753bf215546Sopenharmony_ci * otherwise leave them alone.  This pass actually places the instructions
754bf215546Sopenharmony_ci * into their chosen blocks.
755bf215546Sopenharmony_ci *
756bf215546Sopenharmony_ci * To do so, we simply insert instructions in the reverse order they were
757bf215546Sopenharmony_ci * extracted. This will simply place instructions that were scheduled earlier
758bf215546Sopenharmony_ci * onto the end of their new block and instructions that were scheduled later to
759bf215546Sopenharmony_ci * the start of their new block.
760bf215546Sopenharmony_ci */
761bf215546Sopenharmony_cistatic void
762bf215546Sopenharmony_cigcm_place_instr(nir_instr *instr, struct gcm_state *state)
763bf215546Sopenharmony_ci{
764bf215546Sopenharmony_ci   if (instr->pass_flags & GCM_INSTR_PLACED)
765bf215546Sopenharmony_ci      return;
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci   instr->pass_flags |= GCM_INSTR_PLACED;
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   if (instr->block == NULL) {
770bf215546Sopenharmony_ci      nir_foreach_ssa_def(instr, gcm_replace_def_with_undef, state);
771bf215546Sopenharmony_ci      nir_instr_remove(instr);
772bf215546Sopenharmony_ci      return;
773bf215546Sopenharmony_ci   }
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci   struct gcm_block_info *block_info = &state->blocks[instr->block->index];
776bf215546Sopenharmony_ci   exec_node_remove(&instr->node);
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   if (block_info->last_instr) {
779bf215546Sopenharmony_ci      exec_node_insert_node_before(&block_info->last_instr->node,
780bf215546Sopenharmony_ci                                   &instr->node);
781bf215546Sopenharmony_ci   } else {
782bf215546Sopenharmony_ci      /* Schedule it at the end of the block */
783bf215546Sopenharmony_ci      nir_instr *jump_instr = nir_block_last_instr(instr->block);
784bf215546Sopenharmony_ci      if (jump_instr && jump_instr->type == nir_instr_type_jump) {
785bf215546Sopenharmony_ci         exec_node_insert_node_before(&jump_instr->node, &instr->node);
786bf215546Sopenharmony_ci      } else {
787bf215546Sopenharmony_ci         exec_list_push_tail(&instr->block->instr_list, &instr->node);
788bf215546Sopenharmony_ci      }
789bf215546Sopenharmony_ci   }
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci   block_info->last_instr = instr;
792bf215546Sopenharmony_ci}
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_cistatic bool
795bf215546Sopenharmony_ciopt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number)
796bf215546Sopenharmony_ci{
797bf215546Sopenharmony_ci   nir_metadata_require(impl, nir_metadata_block_index |
798bf215546Sopenharmony_ci                              nir_metadata_dominance);
799bf215546Sopenharmony_ci   nir_metadata_require(impl, nir_metadata_loop_analysis,
800bf215546Sopenharmony_ci                        shader->options->force_indirect_unrolling,
801bf215546Sopenharmony_ci                        shader->options->force_indirect_unrolling_sampler);
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   /* A previous pass may have left pass_flags dirty, so clear it all out. */
804bf215546Sopenharmony_ci   nir_foreach_block(block, impl)
805bf215546Sopenharmony_ci      nir_foreach_instr(instr, block)
806bf215546Sopenharmony_ci         instr->pass_flags = 0;
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci   struct gcm_state state;
809bf215546Sopenharmony_ci
810bf215546Sopenharmony_ci   state.impl = impl;
811bf215546Sopenharmony_ci   state.instr = NULL;
812bf215546Sopenharmony_ci   state.progress = false;
813bf215546Sopenharmony_ci   exec_list_make_empty(&state.instrs);
814bf215546Sopenharmony_ci   state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   gcm_build_block_info(&impl->body, &state, NULL, 0, 0, ~0u);
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci   gcm_pin_instructions(impl, &state);
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci   state.instr_infos =
821bf215546Sopenharmony_ci      rzalloc_array(NULL, struct gcm_instr_info, state.num_instrs);
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci   if (value_number) {
824bf215546Sopenharmony_ci      struct set *gvn_set = nir_instr_set_create(NULL);
825bf215546Sopenharmony_ci      foreach_list_typed_safe(nir_instr, instr, node, &state.instrs) {
826bf215546Sopenharmony_ci         if (instr->pass_flags & GCM_INSTR_PINNED)
827bf215546Sopenharmony_ci            continue;
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_ci         if (nir_instr_set_add_or_rewrite(gvn_set, instr, NULL))
830bf215546Sopenharmony_ci            state.progress = true;
831bf215546Sopenharmony_ci      }
832bf215546Sopenharmony_ci      nir_instr_set_destroy(gvn_set);
833bf215546Sopenharmony_ci   }
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   foreach_list_typed(nir_instr, instr, node, &state.instrs)
836bf215546Sopenharmony_ci      gcm_schedule_early_instr(instr, &state);
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci   foreach_list_typed(nir_instr, instr, node, &state.instrs)
839bf215546Sopenharmony_ci      gcm_schedule_late_instr(instr, &state);
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci   while (!exec_list_is_empty(&state.instrs)) {
842bf215546Sopenharmony_ci      nir_instr *instr = exec_node_data(nir_instr,
843bf215546Sopenharmony_ci                                        state.instrs.tail_sentinel.prev, node);
844bf215546Sopenharmony_ci      gcm_place_instr(instr, &state);
845bf215546Sopenharmony_ci   }
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci   ralloc_free(state.blocks);
848bf215546Sopenharmony_ci   ralloc_free(state.instr_infos);
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_block_index |
851bf215546Sopenharmony_ci                               nir_metadata_dominance |
852bf215546Sopenharmony_ci                               nir_metadata_loop_analysis);
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci   return state.progress;
855bf215546Sopenharmony_ci}
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_cibool
858bf215546Sopenharmony_cinir_opt_gcm(nir_shader *shader, bool value_number)
859bf215546Sopenharmony_ci{
860bf215546Sopenharmony_ci   bool progress = false;
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
863bf215546Sopenharmony_ci      if (function->impl)
864bf215546Sopenharmony_ci         progress |= opt_gcm_impl(shader, function->impl, value_number);
865bf215546Sopenharmony_ci   }
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   return progress;
868bf215546Sopenharmony_ci}
869