1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019 Google, Inc.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/dag.h"
28bf215546Sopenharmony_ci#include "util/u_math.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "ir3.h"
31bf215546Sopenharmony_ci#include "ir3_compiler.h"
32bf215546Sopenharmony_ci#include "ir3_context.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci#ifdef DEBUG
35bf215546Sopenharmony_ci#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
36bf215546Sopenharmony_ci#else
37bf215546Sopenharmony_ci#define SCHED_DEBUG 0
38bf215546Sopenharmony_ci#endif
39bf215546Sopenharmony_ci#define d(fmt, ...)                                                            \
40bf215546Sopenharmony_ci   do {                                                                        \
41bf215546Sopenharmony_ci      if (SCHED_DEBUG) {                                                       \
42bf215546Sopenharmony_ci         mesa_logi("PSCHED: " fmt, ##__VA_ARGS__);                             \
43bf215546Sopenharmony_ci      }                                                                        \
44bf215546Sopenharmony_ci   } while (0)
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci#define di(instr, fmt, ...)                                                    \
47bf215546Sopenharmony_ci   do {                                                                        \
48bf215546Sopenharmony_ci      if (SCHED_DEBUG) {                                                       \
49bf215546Sopenharmony_ci         struct log_stream *stream = mesa_log_streami();                       \
50bf215546Sopenharmony_ci         mesa_log_stream_printf(stream, "PSCHED: " fmt ": ", ##__VA_ARGS__);   \
51bf215546Sopenharmony_ci         ir3_print_instr_stream(stream, instr);                                \
52bf215546Sopenharmony_ci         mesa_log_stream_destroy(stream);                                      \
53bf215546Sopenharmony_ci      }                                                                        \
54bf215546Sopenharmony_ci   } while (0)
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci/*
57bf215546Sopenharmony_ci * Post RA Instruction Scheduling
58bf215546Sopenharmony_ci */
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_cistruct ir3_postsched_ctx {
61bf215546Sopenharmony_ci   struct ir3 *ir;
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci   struct ir3_shader_variant *v;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   void *mem_ctx;
66bf215546Sopenharmony_ci   struct ir3_block *block; /* the current block */
67bf215546Sopenharmony_ci   struct dag *dag;
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   struct list_head unscheduled_list; /* unscheduled instructions */
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci   unsigned ip;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci   int ss_delay;
74bf215546Sopenharmony_ci   int sy_delay;
75bf215546Sopenharmony_ci};
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistruct ir3_postsched_node {
78bf215546Sopenharmony_ci   struct dag_node dag; /* must be first for util_dynarray_foreach */
79bf215546Sopenharmony_ci   struct ir3_instruction *instr;
80bf215546Sopenharmony_ci   bool partially_evaluated_path;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   unsigned earliest_ip;
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   bool has_sy_src, has_ss_src;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   unsigned delay;
87bf215546Sopenharmony_ci   unsigned max_delay;
88bf215546Sopenharmony_ci};
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci#define foreach_sched_node(__n, __list)                                        \
91bf215546Sopenharmony_ci   list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link)
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_cistatic bool
94bf215546Sopenharmony_cihas_sy_src(struct ir3_instruction *instr)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   struct ir3_postsched_node *node = instr->data;
97bf215546Sopenharmony_ci   return node->has_sy_src;
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic bool
101bf215546Sopenharmony_cihas_ss_src(struct ir3_instruction *instr)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci   struct ir3_postsched_node *node = instr->data;
104bf215546Sopenharmony_ci   return node->has_ss_src;
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_cistatic void
108bf215546Sopenharmony_cischedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
109bf215546Sopenharmony_ci{
110bf215546Sopenharmony_ci   assert(ctx->block == instr->block);
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   /* remove from unscheduled_list:
113bf215546Sopenharmony_ci    */
114bf215546Sopenharmony_ci   list_delinit(&instr->node);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci   di(instr, "schedule");
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   bool counts_for_delay = is_alu(instr) || is_flow(instr);
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0;
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   struct ir3_postsched_node *n = instr->data;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   /* We insert any nop's needed to get to earliest_ip, then advance
125bf215546Sopenharmony_ci    * delay_cycles by scheduling the instruction.
126bf215546Sopenharmony_ci    */
127bf215546Sopenharmony_ci   ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles;
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci   util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
130bf215546Sopenharmony_ci      unsigned delay = (unsigned)(uintptr_t)edge->data;
131bf215546Sopenharmony_ci      struct ir3_postsched_node *child =
132bf215546Sopenharmony_ci         container_of(edge->child, struct ir3_postsched_node, dag);
133bf215546Sopenharmony_ci      child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay);
134bf215546Sopenharmony_ci   }
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   list_addtail(&instr->node, &instr->block->instr_list);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   dag_prune_head(ctx->dag, &n->dag);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH))
141bf215546Sopenharmony_ci      return;
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   if (is_ss_producer(instr)) {
144bf215546Sopenharmony_ci      ctx->ss_delay = soft_ss_delay(instr);
145bf215546Sopenharmony_ci   } else if (has_ss_src(instr)) {
146bf215546Sopenharmony_ci      ctx->ss_delay = 0;
147bf215546Sopenharmony_ci   } else if (ctx->ss_delay > 0) {
148bf215546Sopenharmony_ci      ctx->ss_delay--;
149bf215546Sopenharmony_ci   }
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   if (is_sy_producer(instr)) {
152bf215546Sopenharmony_ci      ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader);
153bf215546Sopenharmony_ci   } else if (has_sy_src(instr)) {
154bf215546Sopenharmony_ci      ctx->sy_delay = 0;
155bf215546Sopenharmony_ci   } else if (ctx->sy_delay > 0) {
156bf215546Sopenharmony_ci      ctx->sy_delay--;
157bf215546Sopenharmony_ci   }
158bf215546Sopenharmony_ci}
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_cistatic void
161bf215546Sopenharmony_cidump_state(struct ir3_postsched_ctx *ctx)
162bf215546Sopenharmony_ci{
163bf215546Sopenharmony_ci   if (!SCHED_DEBUG)
164bf215546Sopenharmony_ci      return;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
167bf215546Sopenharmony_ci      di(n->instr, "maxdel=%3d    ", n->max_delay);
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci      util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
170bf215546Sopenharmony_ci         struct ir3_postsched_node *child =
171bf215546Sopenharmony_ci            (struct ir3_postsched_node *)edge->child;
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci         di(child->instr, " -> (%d parents) ", child->dag.parent_count);
174bf215546Sopenharmony_ci      }
175bf215546Sopenharmony_ci   }
176bf215546Sopenharmony_ci}
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_cistatic unsigned
179bf215546Sopenharmony_cinode_delay(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
180bf215546Sopenharmony_ci{
181bf215546Sopenharmony_ci   return MAX2(n->earliest_ip, ctx->ip) - ctx->ip;
182bf215546Sopenharmony_ci}
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_cistatic unsigned
185bf215546Sopenharmony_cinode_delay_soft(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
186bf215546Sopenharmony_ci{
187bf215546Sopenharmony_ci   unsigned delay = node_delay(ctx, n);
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   /* This takes into account that as when we schedule multiple tex or sfu, the
190bf215546Sopenharmony_ci    * first user has to wait for all of them to complete.
191bf215546Sopenharmony_ci    */
192bf215546Sopenharmony_ci   if (n->has_ss_src)
193bf215546Sopenharmony_ci      delay = MAX2(delay, ctx->ss_delay);
194bf215546Sopenharmony_ci   if (n->has_sy_src)
195bf215546Sopenharmony_ci      delay = MAX2(delay, ctx->sy_delay);
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   return delay;
198bf215546Sopenharmony_ci}
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci/* find instruction to schedule: */
201bf215546Sopenharmony_cistatic struct ir3_instruction *
202bf215546Sopenharmony_cichoose_instr(struct ir3_postsched_ctx *ctx)
203bf215546Sopenharmony_ci{
204bf215546Sopenharmony_ci   struct ir3_postsched_node *chosen = NULL;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   dump_state(ctx);
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
209bf215546Sopenharmony_ci      if (!is_meta(n->instr))
210bf215546Sopenharmony_ci         continue;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci      if (!chosen || (chosen->max_delay < n->max_delay))
213bf215546Sopenharmony_ci         chosen = n;
214bf215546Sopenharmony_ci   }
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   if (chosen) {
217bf215546Sopenharmony_ci      di(chosen->instr, "prio: chose (meta)");
218bf215546Sopenharmony_ci      return chosen->instr;
219bf215546Sopenharmony_ci   }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   /* Try to schedule inputs with a higher priority, if possible, as
222bf215546Sopenharmony_ci    * the last bary.f unlocks varying storage to unblock more VS
223bf215546Sopenharmony_ci    * warps.
224bf215546Sopenharmony_ci    */
225bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
226bf215546Sopenharmony_ci      if (!is_input(n->instr))
227bf215546Sopenharmony_ci         continue;
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci      if (!chosen || (chosen->max_delay < n->max_delay))
230bf215546Sopenharmony_ci         chosen = n;
231bf215546Sopenharmony_ci   }
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   if (chosen) {
234bf215546Sopenharmony_ci      di(chosen->instr, "prio: chose (input)");
235bf215546Sopenharmony_ci      return chosen->instr;
236bf215546Sopenharmony_ci   }
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   /* Next prioritize discards: */
239bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
240bf215546Sopenharmony_ci      unsigned d = node_delay(ctx, n);
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci      if (d > 0)
243bf215546Sopenharmony_ci         continue;
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci      if (!is_kill_or_demote(n->instr))
246bf215546Sopenharmony_ci         continue;
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci      if (!chosen || (chosen->max_delay < n->max_delay))
249bf215546Sopenharmony_ci         chosen = n;
250bf215546Sopenharmony_ci   }
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   if (chosen) {
253bf215546Sopenharmony_ci      di(chosen->instr, "csp: chose (kill, hard ready)");
254bf215546Sopenharmony_ci      return chosen->instr;
255bf215546Sopenharmony_ci   }
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci   /* Next prioritize expensive instructions: */
258bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
259bf215546Sopenharmony_ci      unsigned d = node_delay_soft(ctx, n);
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci      if (d > 0)
262bf215546Sopenharmony_ci         continue;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      if (!(is_ss_producer(n->instr) || is_sy_producer(n->instr)))
265bf215546Sopenharmony_ci         continue;
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci      if (!chosen || (chosen->max_delay < n->max_delay))
268bf215546Sopenharmony_ci         chosen = n;
269bf215546Sopenharmony_ci   }
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   if (chosen) {
272bf215546Sopenharmony_ci      di(chosen->instr, "csp: chose (sfu/tex, soft ready)");
273bf215546Sopenharmony_ci      return chosen->instr;
274bf215546Sopenharmony_ci   }
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci   /* Next try to find a ready leader w/ soft delay (ie. including extra
277bf215546Sopenharmony_ci    * delay for things like tex fetch which can be synchronized w/ sync
278bf215546Sopenharmony_ci    * bit (but we probably do want to schedule some other instructions
279bf215546Sopenharmony_ci    * while we wait). We also allow a small amount of nops, to prefer now-nops
280bf215546Sopenharmony_ci    * over future-nops up to a point, as that gives better results.
281bf215546Sopenharmony_ci    */
282bf215546Sopenharmony_ci   unsigned chosen_delay = 0;
283bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
284bf215546Sopenharmony_ci      unsigned d = node_delay_soft(ctx, n);
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci      if (d > 3)
287bf215546Sopenharmony_ci         continue;
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci      if (!chosen || d < chosen_delay) {
290bf215546Sopenharmony_ci         chosen = n;
291bf215546Sopenharmony_ci         chosen_delay = d;
292bf215546Sopenharmony_ci         continue;
293bf215546Sopenharmony_ci      }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci      if (d > chosen_delay)
296bf215546Sopenharmony_ci         continue;
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci      if (chosen->max_delay < n->max_delay) {
299bf215546Sopenharmony_ci         chosen = n;
300bf215546Sopenharmony_ci         chosen_delay = d;
301bf215546Sopenharmony_ci      }
302bf215546Sopenharmony_ci   }
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   if (chosen) {
305bf215546Sopenharmony_ci      di(chosen->instr, "csp: chose (soft ready)");
306bf215546Sopenharmony_ci      return chosen->instr;
307bf215546Sopenharmony_ci   }
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   /* Next try to find a ready leader that can be scheduled without nop's,
310bf215546Sopenharmony_ci    * which in the case of things that need (sy)/(ss) could result in
311bf215546Sopenharmony_ci    * stalls.. but we've already decided there is not a better option.
312bf215546Sopenharmony_ci    */
313bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
314bf215546Sopenharmony_ci      unsigned d = node_delay(ctx, n);
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci      if (d > 0)
317bf215546Sopenharmony_ci         continue;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci      if (!chosen || (chosen->max_delay < n->max_delay))
320bf215546Sopenharmony_ci         chosen = n;
321bf215546Sopenharmony_ci   }
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci   if (chosen) {
324bf215546Sopenharmony_ci      di(chosen->instr, "csp: chose (hard ready)");
325bf215546Sopenharmony_ci      return chosen->instr;
326bf215546Sopenharmony_ci   }
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   /* Otherwise choose leader with maximum cost:
329bf215546Sopenharmony_ci    */
330bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
331bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay < n->max_delay)
332bf215546Sopenharmony_ci         chosen = n;
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   if (chosen) {
336bf215546Sopenharmony_ci      di(chosen->instr, "csp: chose (leader)");
337bf215546Sopenharmony_ci      return chosen->instr;
338bf215546Sopenharmony_ci   }
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   return NULL;
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_cistruct ir3_postsched_deps_state {
344bf215546Sopenharmony_ci   struct ir3_postsched_ctx *ctx;
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   enum { F, R } direction;
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   bool merged;
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   /* Track the mapping between sched node (instruction) that last
351bf215546Sopenharmony_ci    * wrote a given register (in whichever direction we are iterating
352bf215546Sopenharmony_ci    * the block)
353bf215546Sopenharmony_ci    *
354bf215546Sopenharmony_ci    * Note, this table is twice as big as the # of regs, to deal with
355bf215546Sopenharmony_ci    * half-precision regs.  The approach differs depending on whether
356bf215546Sopenharmony_ci    * the half and full precision register files are "merged" (conflict,
357bf215546Sopenharmony_ci    * ie. a6xx+) in which case we consider each full precision dep
358bf215546Sopenharmony_ci    * as two half-precision dependencies, vs older separate (non-
359bf215546Sopenharmony_ci    * conflicting) in which case the first half of the table is used
360bf215546Sopenharmony_ci    * for full precision and 2nd half for half-precision.
361bf215546Sopenharmony_ci    */
362bf215546Sopenharmony_ci   struct ir3_postsched_node *regs[2 * 256];
363bf215546Sopenharmony_ci   unsigned dst_n[2 * 256];
364bf215546Sopenharmony_ci};
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci/* bounds checking read/write accessors, since OoB access to stuff on
367bf215546Sopenharmony_ci * the stack is gonna cause a bad day.
368bf215546Sopenharmony_ci */
369bf215546Sopenharmony_ci#define dep_reg(state, idx)                                                    \
370bf215546Sopenharmony_ci   *({                                                                         \
371bf215546Sopenharmony_ci      assert((idx) < ARRAY_SIZE((state)->regs));                               \
372bf215546Sopenharmony_ci      &(state)->regs[(idx)];                                                   \
373bf215546Sopenharmony_ci   })
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_cistatic void
376bf215546Sopenharmony_ciadd_dep(struct ir3_postsched_deps_state *state,
377bf215546Sopenharmony_ci        struct ir3_postsched_node *before, struct ir3_postsched_node *after,
378bf215546Sopenharmony_ci        unsigned d)
379bf215546Sopenharmony_ci{
380bf215546Sopenharmony_ci   if (!before || !after)
381bf215546Sopenharmony_ci      return;
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   assert(before != after);
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   if (state->direction == F) {
386bf215546Sopenharmony_ci      dag_add_edge_max_data(&before->dag, &after->dag, (uintptr_t)d);
387bf215546Sopenharmony_ci   } else {
388bf215546Sopenharmony_ci      dag_add_edge_max_data(&after->dag, &before->dag, 0);
389bf215546Sopenharmony_ci   }
390bf215546Sopenharmony_ci}
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_cistatic void
393bf215546Sopenharmony_ciadd_single_reg_dep(struct ir3_postsched_deps_state *state,
394bf215546Sopenharmony_ci                   struct ir3_postsched_node *node, unsigned num, int src_n,
395bf215546Sopenharmony_ci                   int dst_n)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   struct ir3_postsched_node *dep = dep_reg(state, num);
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   unsigned d = 0;
400bf215546Sopenharmony_ci   if (src_n >= 0 && dep && state->direction == F) {
401bf215546Sopenharmony_ci      /* get the dst_n this corresponds to */
402bf215546Sopenharmony_ci      unsigned dst_n = state->dst_n[num];
403bf215546Sopenharmony_ci      unsigned d_soft = ir3_delayslots(dep->instr, node->instr, src_n, true);
404bf215546Sopenharmony_ci      d = ir3_delayslots_with_repeat(dep->instr, node->instr, dst_n, src_n);
405bf215546Sopenharmony_ci      node->delay = MAX2(node->delay, d_soft);
406bf215546Sopenharmony_ci      if (is_sy_producer(dep->instr))
407bf215546Sopenharmony_ci         node->has_sy_src = true;
408bf215546Sopenharmony_ci      if (is_ss_producer(dep->instr))
409bf215546Sopenharmony_ci         node->has_ss_src = true;
410bf215546Sopenharmony_ci   }
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   add_dep(state, dep, node, d);
413bf215546Sopenharmony_ci   if (src_n < 0) {
414bf215546Sopenharmony_ci      dep_reg(state, num) = node;
415bf215546Sopenharmony_ci      state->dst_n[num] = dst_n;
416bf215546Sopenharmony_ci   }
417bf215546Sopenharmony_ci}
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci/* This is where we handled full vs half-precision, and potential conflicts
420bf215546Sopenharmony_ci * between half and full precision that result in additional dependencies.
421bf215546Sopenharmony_ci * The 'reg' arg is really just to know half vs full precision.
422bf215546Sopenharmony_ci *
423bf215546Sopenharmony_ci * If src_n is positive, then this adds a dependency on a source register, and
424bf215546Sopenharmony_ci * src_n is the index passed into ir3_delayslots() for calculating the delay:
425bf215546Sopenharmony_ci * it corresponds to node->instr->srcs[src_n]. If src_n is negative, then
426bf215546Sopenharmony_ci * this is for the destination register corresponding to dst_n.
427bf215546Sopenharmony_ci */
428bf215546Sopenharmony_cistatic void
429bf215546Sopenharmony_ciadd_reg_dep(struct ir3_postsched_deps_state *state,
430bf215546Sopenharmony_ci            struct ir3_postsched_node *node, const struct ir3_register *reg,
431bf215546Sopenharmony_ci            unsigned num, int src_n, int dst_n)
432bf215546Sopenharmony_ci{
433bf215546Sopenharmony_ci   if (state->merged) {
434bf215546Sopenharmony_ci      /* Make sure that special registers like a0.x that are written as
435bf215546Sopenharmony_ci       * half-registers don't alias random full registers by pretending that
436bf215546Sopenharmony_ci       * they're full registers:
437bf215546Sopenharmony_ci       */
438bf215546Sopenharmony_ci      if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
439bf215546Sopenharmony_ci         /* single conflict in half-reg space: */
440bf215546Sopenharmony_ci         add_single_reg_dep(state, node, num, src_n, dst_n);
441bf215546Sopenharmony_ci      } else {
442bf215546Sopenharmony_ci         /* two conflicts in half-reg space: */
443bf215546Sopenharmony_ci         add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n);
444bf215546Sopenharmony_ci         add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n);
445bf215546Sopenharmony_ci      }
446bf215546Sopenharmony_ci   } else {
447bf215546Sopenharmony_ci      if (reg->flags & IR3_REG_HALF)
448bf215546Sopenharmony_ci         num += ARRAY_SIZE(state->regs) / 2;
449bf215546Sopenharmony_ci      add_single_reg_dep(state, node, num, src_n, dst_n);
450bf215546Sopenharmony_ci   }
451bf215546Sopenharmony_ci}
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_cistatic void
454bf215546Sopenharmony_cicalculate_deps(struct ir3_postsched_deps_state *state,
455bf215546Sopenharmony_ci               struct ir3_postsched_node *node)
456bf215546Sopenharmony_ci{
457bf215546Sopenharmony_ci   /* Add dependencies on instructions that previously (or next,
458bf215546Sopenharmony_ci    * in the reverse direction) wrote any of our src registers:
459bf215546Sopenharmony_ci    */
460bf215546Sopenharmony_ci   foreach_src_n (reg, i, node->instr) {
461bf215546Sopenharmony_ci      if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
462bf215546Sopenharmony_ci         continue;
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci      if (reg->flags & IR3_REG_RELATIV) {
465bf215546Sopenharmony_ci         /* mark entire array as read: */
466bf215546Sopenharmony_ci         for (unsigned j = 0; j < reg->size; j++) {
467bf215546Sopenharmony_ci            add_reg_dep(state, node, reg, reg->array.base + j, i, -1);
468bf215546Sopenharmony_ci         }
469bf215546Sopenharmony_ci      } else {
470bf215546Sopenharmony_ci         assert(reg->wrmask >= 1);
471bf215546Sopenharmony_ci         u_foreach_bit (b, reg->wrmask) {
472bf215546Sopenharmony_ci            add_reg_dep(state, node, reg, reg->num + b, i, -1);
473bf215546Sopenharmony_ci         }
474bf215546Sopenharmony_ci      }
475bf215546Sopenharmony_ci   }
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci   /* And then after we update the state for what this instruction
478bf215546Sopenharmony_ci    * wrote:
479bf215546Sopenharmony_ci    */
480bf215546Sopenharmony_ci   foreach_dst_n (reg, i, node->instr) {
481bf215546Sopenharmony_ci      if (reg->wrmask == 0)
482bf215546Sopenharmony_ci         continue;
483bf215546Sopenharmony_ci      if (reg->flags & IR3_REG_RELATIV) {
484bf215546Sopenharmony_ci         /* mark the entire array as written: */
485bf215546Sopenharmony_ci         for (unsigned j = 0; j < reg->size; j++) {
486bf215546Sopenharmony_ci            add_reg_dep(state, node, reg, reg->array.base + j, -1, i);
487bf215546Sopenharmony_ci         }
488bf215546Sopenharmony_ci      } else {
489bf215546Sopenharmony_ci         assert(reg->wrmask >= 1);
490bf215546Sopenharmony_ci         u_foreach_bit (b, reg->wrmask) {
491bf215546Sopenharmony_ci            add_reg_dep(state, node, reg, reg->num + b, -1, i);
492bf215546Sopenharmony_ci         }
493bf215546Sopenharmony_ci      }
494bf215546Sopenharmony_ci   }
495bf215546Sopenharmony_ci}
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_cistatic void
498bf215546Sopenharmony_cicalculate_forward_deps(struct ir3_postsched_ctx *ctx)
499bf215546Sopenharmony_ci{
500bf215546Sopenharmony_ci   struct ir3_postsched_deps_state state = {
501bf215546Sopenharmony_ci      .ctx = ctx,
502bf215546Sopenharmony_ci      .direction = F,
503bf215546Sopenharmony_ci      .merged = ctx->v->mergedregs,
504bf215546Sopenharmony_ci   };
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   foreach_instr (instr, &ctx->unscheduled_list) {
507bf215546Sopenharmony_ci      calculate_deps(&state, instr->data);
508bf215546Sopenharmony_ci   }
509bf215546Sopenharmony_ci}
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_cistatic void
512bf215546Sopenharmony_cicalculate_reverse_deps(struct ir3_postsched_ctx *ctx)
513bf215546Sopenharmony_ci{
514bf215546Sopenharmony_ci   struct ir3_postsched_deps_state state = {
515bf215546Sopenharmony_ci      .ctx = ctx,
516bf215546Sopenharmony_ci      .direction = R,
517bf215546Sopenharmony_ci      .merged = ctx->v->mergedregs,
518bf215546Sopenharmony_ci   };
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci   foreach_instr_rev (instr, &ctx->unscheduled_list) {
521bf215546Sopenharmony_ci      calculate_deps(&state, instr->data);
522bf215546Sopenharmony_ci   }
523bf215546Sopenharmony_ci}
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_cistatic void
526bf215546Sopenharmony_cisched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
527bf215546Sopenharmony_ci{
528bf215546Sopenharmony_ci   struct ir3_postsched_node *n =
529bf215546Sopenharmony_ci      rzalloc(ctx->mem_ctx, struct ir3_postsched_node);
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   dag_init_node(ctx->dag, &n->dag);
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci   n->instr = instr;
534bf215546Sopenharmony_ci   instr->data = n;
535bf215546Sopenharmony_ci}
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_cistatic void
538bf215546Sopenharmony_cisched_dag_max_delay_cb(struct dag_node *node, void *state)
539bf215546Sopenharmony_ci{
540bf215546Sopenharmony_ci   struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
541bf215546Sopenharmony_ci   uint32_t max_delay = 0;
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci   util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
544bf215546Sopenharmony_ci      struct ir3_postsched_node *child =
545bf215546Sopenharmony_ci         (struct ir3_postsched_node *)edge->child;
546bf215546Sopenharmony_ci      max_delay = MAX2(child->max_delay, max_delay);
547bf215546Sopenharmony_ci   }
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
550bf215546Sopenharmony_ci}
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_cistatic void
553bf215546Sopenharmony_cisched_dag_init(struct ir3_postsched_ctx *ctx)
554bf215546Sopenharmony_ci{
555bf215546Sopenharmony_ci   ctx->mem_ctx = ralloc_context(NULL);
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   ctx->dag = dag_create(ctx->mem_ctx);
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   foreach_instr (instr, &ctx->unscheduled_list)
560bf215546Sopenharmony_ci      sched_node_init(ctx, instr);
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   calculate_forward_deps(ctx);
563bf215546Sopenharmony_ci   calculate_reverse_deps(ctx);
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci   /*
566bf215546Sopenharmony_ci    * To avoid expensive texture fetches, etc, from being moved ahead
567bf215546Sopenharmony_ci    * of kills, track the kills we've seen so far, so we can add an
568bf215546Sopenharmony_ci    * extra dependency on them for tex/mem instructions
569bf215546Sopenharmony_ci    */
570bf215546Sopenharmony_ci   struct util_dynarray kills;
571bf215546Sopenharmony_ci   util_dynarray_init(&kills, ctx->mem_ctx);
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci   /* The last bary.f with the (ei) flag must be scheduled before any kills,
574bf215546Sopenharmony_ci    * or the hw gets angry. Keep track of inputs here so we can add the
575bf215546Sopenharmony_ci    * false dep on the kill instruction.
576bf215546Sopenharmony_ci    */
577bf215546Sopenharmony_ci   struct util_dynarray inputs;
578bf215546Sopenharmony_ci   util_dynarray_init(&inputs, ctx->mem_ctx);
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci   /*
581bf215546Sopenharmony_ci    * Normal srcs won't be in SSA at this point, those are dealt with in
582bf215546Sopenharmony_ci    * calculate_forward_deps() and calculate_reverse_deps().  But we still
583bf215546Sopenharmony_ci    * have the false-dep information in SSA form, so go ahead and add
584bf215546Sopenharmony_ci    * dependencies for that here:
585bf215546Sopenharmony_ci    */
586bf215546Sopenharmony_ci   foreach_instr (instr, &ctx->unscheduled_list) {
587bf215546Sopenharmony_ci      struct ir3_postsched_node *n = instr->data;
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci      foreach_ssa_src_n (src, i, instr) {
590bf215546Sopenharmony_ci         if (src->block != instr->block)
591bf215546Sopenharmony_ci            continue;
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci         /* we can end up with unused false-deps.. just skip them: */
594bf215546Sopenharmony_ci         if (src->flags & IR3_INSTR_UNUSED)
595bf215546Sopenharmony_ci            continue;
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci         struct ir3_postsched_node *sn = src->data;
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci         /* don't consider dependencies in other blocks: */
600bf215546Sopenharmony_ci         if (src->block != instr->block)
601bf215546Sopenharmony_ci            continue;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci         dag_add_edge_max_data(&sn->dag, &n->dag, 0);
604bf215546Sopenharmony_ci      }
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci      if (is_input(instr)) {
607bf215546Sopenharmony_ci         util_dynarray_append(&inputs, struct ir3_instruction *, instr);
608bf215546Sopenharmony_ci      } else if (is_kill_or_demote(instr)) {
609bf215546Sopenharmony_ci         util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) {
610bf215546Sopenharmony_ci            struct ir3_instruction *input = *instrp;
611bf215546Sopenharmony_ci            struct ir3_postsched_node *in = input->data;
612bf215546Sopenharmony_ci            dag_add_edge_max_data(&in->dag, &n->dag, 0);
613bf215546Sopenharmony_ci         }
614bf215546Sopenharmony_ci         util_dynarray_append(&kills, struct ir3_instruction *, instr);
615bf215546Sopenharmony_ci      } else if (is_tex(instr) || is_mem(instr)) {
616bf215546Sopenharmony_ci         util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) {
617bf215546Sopenharmony_ci            struct ir3_instruction *kill = *instrp;
618bf215546Sopenharmony_ci            struct ir3_postsched_node *kn = kill->data;
619bf215546Sopenharmony_ci            dag_add_edge_max_data(&kn->dag, &n->dag, 0);
620bf215546Sopenharmony_ci         }
621bf215546Sopenharmony_ci      }
622bf215546Sopenharmony_ci   }
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci   // TODO do we want to do this after reverse-dependencies?
625bf215546Sopenharmony_ci   dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL);
626bf215546Sopenharmony_ci}
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_cistatic void
629bf215546Sopenharmony_cisched_dag_destroy(struct ir3_postsched_ctx *ctx)
630bf215546Sopenharmony_ci{
631bf215546Sopenharmony_ci   ralloc_free(ctx->mem_ctx);
632bf215546Sopenharmony_ci   ctx->mem_ctx = NULL;
633bf215546Sopenharmony_ci   ctx->dag = NULL;
634bf215546Sopenharmony_ci}
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_cistatic void
637bf215546Sopenharmony_cisched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
638bf215546Sopenharmony_ci{
639bf215546Sopenharmony_ci   ctx->block = block;
640bf215546Sopenharmony_ci   ctx->sy_delay = 0;
641bf215546Sopenharmony_ci   ctx->ss_delay = 0;
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   /* move all instructions to the unscheduled list, and
644bf215546Sopenharmony_ci    * empty the block's instruction list (to which we will
645bf215546Sopenharmony_ci    * be inserting).
646bf215546Sopenharmony_ci    */
647bf215546Sopenharmony_ci   list_replace(&block->instr_list, &ctx->unscheduled_list);
648bf215546Sopenharmony_ci   list_inithead(&block->instr_list);
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   // TODO once we are using post-sched for everything we can
651bf215546Sopenharmony_ci   // just not stick in NOP's prior to post-sched, and drop this.
652bf215546Sopenharmony_ci   // for now keep this, since it makes post-sched optional:
653bf215546Sopenharmony_ci   foreach_instr_safe (instr, &ctx->unscheduled_list) {
654bf215546Sopenharmony_ci      switch (instr->opc) {
655bf215546Sopenharmony_ci      case OPC_NOP:
656bf215546Sopenharmony_ci      case OPC_B:
657bf215546Sopenharmony_ci      case OPC_JUMP:
658bf215546Sopenharmony_ci         list_delinit(&instr->node);
659bf215546Sopenharmony_ci         break;
660bf215546Sopenharmony_ci      default:
661bf215546Sopenharmony_ci         break;
662bf215546Sopenharmony_ci      }
663bf215546Sopenharmony_ci   }
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   sched_dag_init(ctx);
666bf215546Sopenharmony_ci
667bf215546Sopenharmony_ci   /* First schedule all meta:input instructions, followed by
668bf215546Sopenharmony_ci    * tex-prefetch.  We want all of the instructions that load
669bf215546Sopenharmony_ci    * values into registers before the shader starts to go
670bf215546Sopenharmony_ci    * before any other instructions.  But in particular we
671bf215546Sopenharmony_ci    * want inputs to come before prefetches.  This is because
672bf215546Sopenharmony_ci    * a FS's bary_ij input may not actually be live in the
673bf215546Sopenharmony_ci    * shader, but it should not be scheduled on top of any
674bf215546Sopenharmony_ci    * other input (but can be overwritten by a tex prefetch)
675bf215546Sopenharmony_ci    */
676bf215546Sopenharmony_ci   foreach_instr_safe (instr, &ctx->unscheduled_list)
677bf215546Sopenharmony_ci      if (instr->opc == OPC_META_INPUT)
678bf215546Sopenharmony_ci         schedule(ctx, instr);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci   foreach_instr_safe (instr, &ctx->unscheduled_list)
681bf215546Sopenharmony_ci      if (instr->opc == OPC_META_TEX_PREFETCH)
682bf215546Sopenharmony_ci         schedule(ctx, instr);
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci   while (!list_is_empty(&ctx->unscheduled_list)) {
685bf215546Sopenharmony_ci      struct ir3_instruction *instr = choose_instr(ctx);
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci      unsigned delay = node_delay(ctx, instr->data);
688bf215546Sopenharmony_ci      d("delay=%u", delay);
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci      assert(delay <= 6);
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci      schedule(ctx, instr);
693bf215546Sopenharmony_ci   }
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci   sched_dag_destroy(ctx);
696bf215546Sopenharmony_ci}
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_cistatic bool
699bf215546Sopenharmony_ciis_self_mov(struct ir3_instruction *instr)
700bf215546Sopenharmony_ci{
701bf215546Sopenharmony_ci   if (!is_same_type_mov(instr))
702bf215546Sopenharmony_ci      return false;
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci   if (instr->dsts[0]->num != instr->srcs[0]->num)
705bf215546Sopenharmony_ci      return false;
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci   if (instr->dsts[0]->flags & IR3_REG_RELATIV)
708bf215546Sopenharmony_ci      return false;
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci   if (instr->cat1.round != ROUND_ZERO)
711bf215546Sopenharmony_ci      return false;
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci   if (instr->srcs[0]->flags &
714bf215546Sopenharmony_ci       (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG |
715bf215546Sopenharmony_ci        IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
716bf215546Sopenharmony_ci      return false;
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   return true;
719bf215546Sopenharmony_ci}
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci/* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y
722bf215546Sopenharmony_ci * as a result of places were before RA we are not sure that it is
723bf215546Sopenharmony_ci * safe to eliminate.  We could eliminate these earlier, but sometimes
724bf215546Sopenharmony_ci * they are tangled up in false-dep's, etc, so it is easier just to
725bf215546Sopenharmony_ci * let them exist until after RA
726bf215546Sopenharmony_ci */
727bf215546Sopenharmony_cistatic void
728bf215546Sopenharmony_cicleanup_self_movs(struct ir3 *ir)
729bf215546Sopenharmony_ci{
730bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
731bf215546Sopenharmony_ci      foreach_instr_safe (instr, &block->instr_list) {
732bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->deps_count; i++) {
733bf215546Sopenharmony_ci            if (instr->deps[i] && is_self_mov(instr->deps[i])) {
734bf215546Sopenharmony_ci               instr->deps[i] = NULL;
735bf215546Sopenharmony_ci            }
736bf215546Sopenharmony_ci         }
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci         if (is_self_mov(instr))
739bf215546Sopenharmony_ci            list_delinit(&instr->node);
740bf215546Sopenharmony_ci      }
741bf215546Sopenharmony_ci   }
742bf215546Sopenharmony_ci}
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_cibool
745bf215546Sopenharmony_ciir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v)
746bf215546Sopenharmony_ci{
747bf215546Sopenharmony_ci   struct ir3_postsched_ctx ctx = {
748bf215546Sopenharmony_ci      .ir = ir,
749bf215546Sopenharmony_ci      .v = v,
750bf215546Sopenharmony_ci   };
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci   cleanup_self_movs(ir);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
755bf215546Sopenharmony_ci      sched_block(&ctx, block);
756bf215546Sopenharmony_ci   }
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   return true;
759bf215546Sopenharmony_ci}
760