1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/dag.h"
28bf215546Sopenharmony_ci#include "util/u_math.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "ir3.h"
31bf215546Sopenharmony_ci#include "ir3_compiler.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#ifdef DEBUG
34bf215546Sopenharmony_ci#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
35bf215546Sopenharmony_ci#else
36bf215546Sopenharmony_ci#define SCHED_DEBUG 0
37bf215546Sopenharmony_ci#endif
38bf215546Sopenharmony_ci#define d(fmt, ...)                                                            \
39bf215546Sopenharmony_ci   do {                                                                        \
40bf215546Sopenharmony_ci      if (SCHED_DEBUG) {                                                       \
41bf215546Sopenharmony_ci         mesa_logi("SCHED: " fmt, ##__VA_ARGS__);                              \
42bf215546Sopenharmony_ci      }                                                                        \
43bf215546Sopenharmony_ci   } while (0)
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci#define di(instr, fmt, ...)                                                    \
46bf215546Sopenharmony_ci   do {                                                                        \
47bf215546Sopenharmony_ci      if (SCHED_DEBUG) {                                                       \
48bf215546Sopenharmony_ci         struct log_stream *stream = mesa_log_streami();                       \
49bf215546Sopenharmony_ci         mesa_log_stream_printf(stream, "SCHED: " fmt ": ", ##__VA_ARGS__);    \
50bf215546Sopenharmony_ci         ir3_print_instr_stream(stream, instr);                                \
51bf215546Sopenharmony_ci         mesa_log_stream_destroy(stream);                                      \
52bf215546Sopenharmony_ci      }                                                                        \
53bf215546Sopenharmony_ci   } while (0)
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci/*
56bf215546Sopenharmony_ci * Instruction Scheduling:
57bf215546Sopenharmony_ci *
58bf215546Sopenharmony_ci * A block-level pre-RA scheduler, which works by creating a DAG of
59bf215546Sopenharmony_ci * instruction dependencies, and heuristically picking a DAG head
60bf215546Sopenharmony_ci * (instruction with no unscheduled dependencies).
61bf215546Sopenharmony_ci *
62bf215546Sopenharmony_ci * Where possible, it tries to pick instructions that avoid nop delay
63bf215546Sopenharmony_ci * slots, but it will prefer to pick instructions that reduce (or do
64bf215546Sopenharmony_ci * not increase) the number of live values.
65bf215546Sopenharmony_ci *
66bf215546Sopenharmony_ci * If the only possible choices are instructions that increase the
67bf215546Sopenharmony_ci * number of live values, it will try to pick the one with the earliest
68bf215546Sopenharmony_ci * consumer (based on pre-sched program order).
69bf215546Sopenharmony_ci *
70bf215546Sopenharmony_ci * There are a few special cases that need to be handled, since sched
71bf215546Sopenharmony_ci * is currently independent of register allocation.  Usages of address
72bf215546Sopenharmony_ci * register (a0.x) or predicate register (p0.x) must be serialized.  Ie.
73bf215546Sopenharmony_ci * if you have two pairs of instructions that write the same special
74bf215546Sopenharmony_ci * register and then read it, then those pairs cannot be interleaved.
75bf215546Sopenharmony_ci * To solve this, when we are in such a scheduling "critical section",
76bf215546Sopenharmony_ci * and we encounter a conflicting write to a special register, we try
77bf215546Sopenharmony_ci * to schedule any remaining instructions that use that value first.
78bf215546Sopenharmony_ci *
79bf215546Sopenharmony_ci * TODO we can detect too-large live_values here.. would be a good place
80bf215546Sopenharmony_ci * to "spill" cheap things, like move from uniform/immed.  (Constructing
81bf215546Sopenharmony_ci * list of ssa def consumers before sched pass would make this easier.
82bf215546Sopenharmony_ci * Also, in general it is general it might be best not to re-use load_immed
83bf215546Sopenharmony_ci * across blocks.
84bf215546Sopenharmony_ci *
85bf215546Sopenharmony_ci * TODO we can use (abs)/(neg) src modifiers in a lot of cases to reduce
86bf215546Sopenharmony_ci * the # of immediates in play (or at least that would help with
87bf215546Sopenharmony_ci * dEQP-GLES31.functional.ubo.random.all_per_block_buffers.*).. probably
88bf215546Sopenharmony_ci * do this in a nir pass that inserts fneg/etc?  The cp pass should fold
89bf215546Sopenharmony_ci * these into src modifiers..
90bf215546Sopenharmony_ci */
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_cistruct ir3_sched_ctx {
93bf215546Sopenharmony_ci   struct ir3_block *block; /* the current block */
94bf215546Sopenharmony_ci   struct dag *dag;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   struct list_head unscheduled_list; /* unscheduled instructions */
97bf215546Sopenharmony_ci   struct ir3_instruction *scheduled; /* last scheduled instr */
98bf215546Sopenharmony_ci   struct ir3_instruction *addr0;     /* current a0.x user, if any */
99bf215546Sopenharmony_ci   struct ir3_instruction *addr1;     /* current a1.x user, if any */
100bf215546Sopenharmony_ci   struct ir3_instruction *pred;      /* current p0.x user, if any */
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   struct ir3_instruction *split; /* most-recently-split a0/a1/p0 producer */
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci   int remaining_kills;
105bf215546Sopenharmony_ci   int remaining_tex;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   bool error;
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci   unsigned ip;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   int sy_delay;
112bf215546Sopenharmony_ci   int ss_delay;
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   /* We order the scheduled (sy)/(ss) producers, and keep track of the
115bf215546Sopenharmony_ci    * index of the last waited on instruction, so we can know which
116bf215546Sopenharmony_ci    * instructions are still outstanding (and therefore would require us to
117bf215546Sopenharmony_ci    * wait for all outstanding instructions before scheduling a use).
118bf215546Sopenharmony_ci    */
119bf215546Sopenharmony_ci   int sy_index, first_outstanding_sy_index;
120bf215546Sopenharmony_ci   int ss_index, first_outstanding_ss_index;
121bf215546Sopenharmony_ci};
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_cistruct ir3_sched_node {
124bf215546Sopenharmony_ci   struct dag_node dag; /* must be first for util_dynarray_foreach */
125bf215546Sopenharmony_ci   struct ir3_instruction *instr;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   unsigned delay;
128bf215546Sopenharmony_ci   unsigned max_delay;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci   unsigned sy_index;
131bf215546Sopenharmony_ci   unsigned ss_index;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   /* For ready instructions, the earliest possible ip that it could be
134bf215546Sopenharmony_ci    * scheduled.
135bf215546Sopenharmony_ci    */
136bf215546Sopenharmony_ci   unsigned earliest_ip;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   /* For instructions that are a meta:collect src, once we schedule
139bf215546Sopenharmony_ci    * the first src of the collect, the entire vecN is live (at least
140bf215546Sopenharmony_ci    * from the PoV of the first RA pass.. the 2nd scalar pass can fill
141bf215546Sopenharmony_ci    * in some of the gaps, but often not all).  So we want to help out
142bf215546Sopenharmony_ci    * RA, and realize that as soon as we schedule the first collect
143bf215546Sopenharmony_ci    * src, there is no penalty to schedule the remainder (ie. they
144bf215546Sopenharmony_ci    * don't make additional values live).  In fact we'd prefer to
145bf215546Sopenharmony_ci    * schedule the rest ASAP to minimize the live range of the vecN.
146bf215546Sopenharmony_ci    *
147bf215546Sopenharmony_ci    * For instructions that are the src of a collect, we track the
148bf215546Sopenharmony_ci    * corresponding collect, and mark them as partially live as soon
149bf215546Sopenharmony_ci    * as any one of the src's is scheduled.
150bf215546Sopenharmony_ci    */
151bf215546Sopenharmony_ci   struct ir3_instruction *collect;
152bf215546Sopenharmony_ci   bool partially_live;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   /* Is this instruction a direct or indirect dependency for a kill?
155bf215546Sopenharmony_ci    * If so, we should prioritize it when possible
156bf215546Sopenharmony_ci    */
157bf215546Sopenharmony_ci   bool kill_path;
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   /* This node represents a shader output.  A semi-common pattern in
160bf215546Sopenharmony_ci    * shaders is something along the lines of:
161bf215546Sopenharmony_ci    *
162bf215546Sopenharmony_ci    *    fragcolor.w = 1.0
163bf215546Sopenharmony_ci    *
164bf215546Sopenharmony_ci    * Which we'd prefer to schedule as late as possible, since it
165bf215546Sopenharmony_ci    * produces a live value that is never killed/consumed.  So detect
166bf215546Sopenharmony_ci    * outputs up-front, and avoid scheduling them unless the reduce
167bf215546Sopenharmony_ci    * register pressure (or at least are neutral)
168bf215546Sopenharmony_ci    */
169bf215546Sopenharmony_ci   bool output;
170bf215546Sopenharmony_ci};
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci#define foreach_sched_node(__n, __list)                                        \
173bf215546Sopenharmony_ci   list_for_each_entry (struct ir3_sched_node, __n, __list, dag.link)
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_cistatic void sched_node_init(struct ir3_sched_ctx *ctx,
176bf215546Sopenharmony_ci                            struct ir3_instruction *instr);
177bf215546Sopenharmony_cistatic void sched_node_add_dep(struct ir3_instruction *instr,
178bf215546Sopenharmony_ci                               struct ir3_instruction *src, int i);
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_cistatic bool
181bf215546Sopenharmony_ciis_scheduled(struct ir3_instruction *instr)
182bf215546Sopenharmony_ci{
183bf215546Sopenharmony_ci   return !!(instr->flags & IR3_INSTR_MARK);
184bf215546Sopenharmony_ci}
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci/* check_src_cond() passing a ir3_sched_ctx. */
187bf215546Sopenharmony_cistatic bool
188bf215546Sopenharmony_cisched_check_src_cond(struct ir3_instruction *instr,
189bf215546Sopenharmony_ci                     bool (*cond)(struct ir3_instruction *,
190bf215546Sopenharmony_ci                                  struct ir3_sched_ctx *),
191bf215546Sopenharmony_ci                     struct ir3_sched_ctx *ctx)
192bf215546Sopenharmony_ci{
193bf215546Sopenharmony_ci   foreach_ssa_src (src, instr) {
194bf215546Sopenharmony_ci      /* meta:split/collect aren't real instructions, the thing that
195bf215546Sopenharmony_ci       * we actually care about is *their* srcs
196bf215546Sopenharmony_ci       */
197bf215546Sopenharmony_ci      if ((src->opc == OPC_META_SPLIT) || (src->opc == OPC_META_COLLECT)) {
198bf215546Sopenharmony_ci         if (sched_check_src_cond(src, cond, ctx))
199bf215546Sopenharmony_ci            return true;
200bf215546Sopenharmony_ci      } else {
201bf215546Sopenharmony_ci         if (cond(src, ctx))
202bf215546Sopenharmony_ci            return true;
203bf215546Sopenharmony_ci      }
204bf215546Sopenharmony_ci   }
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   return false;
207bf215546Sopenharmony_ci}
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci/* Is this a sy producer that hasn't been waited on yet? */
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_cistatic bool
212bf215546Sopenharmony_ciis_outstanding_sy(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx)
213bf215546Sopenharmony_ci{
214bf215546Sopenharmony_ci   if (!is_sy_producer(instr))
215bf215546Sopenharmony_ci      return false;
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   /* The sched node is only valid within the same block, we cannot
218bf215546Sopenharmony_ci    * really say anything about src's from other blocks
219bf215546Sopenharmony_ci    */
220bf215546Sopenharmony_ci   if (instr->block != ctx->block)
221bf215546Sopenharmony_ci      return true;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   struct ir3_sched_node *n = instr->data;
224bf215546Sopenharmony_ci   return n->sy_index >= ctx->first_outstanding_sy_index;
225bf215546Sopenharmony_ci}
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_cistatic bool
228bf215546Sopenharmony_ciis_outstanding_ss(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx)
229bf215546Sopenharmony_ci{
230bf215546Sopenharmony_ci   if (!is_ss_producer(instr))
231bf215546Sopenharmony_ci      return false;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   /* The sched node is only valid within the same block, we cannot
234bf215546Sopenharmony_ci    * really say anything about src's from other blocks
235bf215546Sopenharmony_ci    */
236bf215546Sopenharmony_ci   if (instr->block != ctx->block)
237bf215546Sopenharmony_ci      return true;
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci   struct ir3_sched_node *n = instr->data;
240bf215546Sopenharmony_ci   return n->ss_index >= ctx->first_outstanding_ss_index;
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_cistatic unsigned
244bf215546Sopenharmony_cicycle_count(struct ir3_instruction *instr)
245bf215546Sopenharmony_ci{
246bf215546Sopenharmony_ci   if (instr->opc == OPC_META_COLLECT) {
247bf215546Sopenharmony_ci      /* Assume that only immed/const sources produce moves */
248bf215546Sopenharmony_ci      unsigned n = 0;
249bf215546Sopenharmony_ci      foreach_src (src, instr) {
250bf215546Sopenharmony_ci         if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
251bf215546Sopenharmony_ci            n++;
252bf215546Sopenharmony_ci      }
253bf215546Sopenharmony_ci      return n;
254bf215546Sopenharmony_ci   } else if (is_meta(instr)) {
255bf215546Sopenharmony_ci      return 0;
256bf215546Sopenharmony_ci   } else {
257bf215546Sopenharmony_ci      return 1;
258bf215546Sopenharmony_ci   }
259bf215546Sopenharmony_ci}
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_cistatic void
262bf215546Sopenharmony_cischedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
263bf215546Sopenharmony_ci{
264bf215546Sopenharmony_ci   assert(ctx->block == instr->block);
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   /* remove from depth list:
267bf215546Sopenharmony_ci    */
268bf215546Sopenharmony_ci   list_delinit(&instr->node);
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci   if (writes_addr0(instr)) {
271bf215546Sopenharmony_ci      assert(ctx->addr0 == NULL);
272bf215546Sopenharmony_ci      ctx->addr0 = instr;
273bf215546Sopenharmony_ci   }
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   if (writes_addr1(instr)) {
276bf215546Sopenharmony_ci      assert(ctx->addr1 == NULL);
277bf215546Sopenharmony_ci      ctx->addr1 = instr;
278bf215546Sopenharmony_ci   }
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci   if (writes_pred(instr)) {
281bf215546Sopenharmony_ci      assert(ctx->pred == NULL);
282bf215546Sopenharmony_ci      ctx->pred = instr;
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   instr->flags |= IR3_INSTR_MARK;
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   di(instr, "schedule");
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   list_addtail(&instr->node, &instr->block->instr_list);
290bf215546Sopenharmony_ci   ctx->scheduled = instr;
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci   if (is_kill_or_demote(instr)) {
293bf215546Sopenharmony_ci      assert(ctx->remaining_kills > 0);
294bf215546Sopenharmony_ci      ctx->remaining_kills--;
295bf215546Sopenharmony_ci   }
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   struct ir3_sched_node *n = instr->data;
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   /* If this instruction is a meta:collect src, mark the remaining
300bf215546Sopenharmony_ci    * collect srcs as partially live.
301bf215546Sopenharmony_ci    */
302bf215546Sopenharmony_ci   if (n->collect) {
303bf215546Sopenharmony_ci      foreach_ssa_src (src, n->collect) {
304bf215546Sopenharmony_ci         if (src->block != instr->block)
305bf215546Sopenharmony_ci            continue;
306bf215546Sopenharmony_ci         struct ir3_sched_node *sn = src->data;
307bf215546Sopenharmony_ci         sn->partially_live = true;
308bf215546Sopenharmony_ci      }
309bf215546Sopenharmony_ci   }
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   bool counts_for_delay = is_alu(instr) || is_flow(instr);
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   /* TODO: switch to "cycles". For now try to match ir3_delay. */
314bf215546Sopenharmony_ci   unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0;
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci   /* We insert any nop's needed to get to earliest_ip, then advance
317bf215546Sopenharmony_ci    * delay_cycles by scheduling the instruction.
318bf215546Sopenharmony_ci    */
319bf215546Sopenharmony_ci   ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
322bf215546Sopenharmony_ci      unsigned delay = (unsigned)(uintptr_t)edge->data;
323bf215546Sopenharmony_ci      struct ir3_sched_node *child =
324bf215546Sopenharmony_ci         container_of(edge->child, struct ir3_sched_node, dag);
325bf215546Sopenharmony_ci      child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay);
326bf215546Sopenharmony_ci   }
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   dag_prune_head(ctx->dag, &n->dag);
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci   unsigned cycles = cycle_count(instr);
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci   if (is_ss_producer(instr)) {
333bf215546Sopenharmony_ci      ctx->ss_delay = soft_ss_delay(instr);
334bf215546Sopenharmony_ci      n->ss_index = ctx->ss_index++;
335bf215546Sopenharmony_ci   } else if (!is_meta(instr) &&
336bf215546Sopenharmony_ci              sched_check_src_cond(instr, is_outstanding_ss, ctx)) {
337bf215546Sopenharmony_ci      ctx->ss_delay = 0;
338bf215546Sopenharmony_ci      ctx->first_outstanding_ss_index = ctx->ss_index;
339bf215546Sopenharmony_ci   } else if (ctx->ss_delay > 0) {
340bf215546Sopenharmony_ci      ctx->ss_delay -= MIN2(cycles, ctx->ss_delay);
341bf215546Sopenharmony_ci   }
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci   if (is_sy_producer(instr)) {
344bf215546Sopenharmony_ci      /* NOTE that this isn't an attempt to hide texture fetch latency,
345bf215546Sopenharmony_ci       * but an attempt to hide the cost of switching to another warp.
346bf215546Sopenharmony_ci       * If we can, we'd like to try to schedule another texture fetch
347bf215546Sopenharmony_ci       * before scheduling something that would sync.
348bf215546Sopenharmony_ci       */
349bf215546Sopenharmony_ci      ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader);
350bf215546Sopenharmony_ci      assert(ctx->remaining_tex > 0);
351bf215546Sopenharmony_ci      ctx->remaining_tex--;
352bf215546Sopenharmony_ci      n->sy_index = ctx->sy_index++;
353bf215546Sopenharmony_ci   } else if (!is_meta(instr) &&
354bf215546Sopenharmony_ci              sched_check_src_cond(instr, is_outstanding_sy, ctx)) {
355bf215546Sopenharmony_ci      ctx->sy_delay = 0;
356bf215546Sopenharmony_ci      ctx->first_outstanding_sy_index = ctx->sy_index;
357bf215546Sopenharmony_ci   } else if (ctx->sy_delay > 0) {
358bf215546Sopenharmony_ci      ctx->sy_delay -= MIN2(cycles, ctx->sy_delay);
359bf215546Sopenharmony_ci   }
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci}
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_cistruct ir3_sched_notes {
364bf215546Sopenharmony_ci   /* there is at least one kill which could be scheduled, except
365bf215546Sopenharmony_ci    * for unscheduled bary.f's:
366bf215546Sopenharmony_ci    */
367bf215546Sopenharmony_ci   bool blocked_kill;
368bf215546Sopenharmony_ci   /* there is at least one instruction that could be scheduled,
369bf215546Sopenharmony_ci    * except for conflicting address/predicate register usage:
370bf215546Sopenharmony_ci    */
371bf215546Sopenharmony_ci   bool addr0_conflict, addr1_conflict, pred_conflict;
372bf215546Sopenharmony_ci};
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_cistatic bool
375bf215546Sopenharmony_cishould_skip(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
376bf215546Sopenharmony_ci{
377bf215546Sopenharmony_ci   if (ctx->remaining_kills && (is_tex(instr) || is_mem(instr))) {
378bf215546Sopenharmony_ci      /* avoid texture/memory access if we have unscheduled kills
379bf215546Sopenharmony_ci       * that could make the expensive operation unnecessary.  By
380bf215546Sopenharmony_ci       * definition, if there are remaining kills, and this instr
381bf215546Sopenharmony_ci       * is not a dependency of a kill, there are other instructions
382bf215546Sopenharmony_ci       * that we can choose from.
383bf215546Sopenharmony_ci       */
384bf215546Sopenharmony_ci      struct ir3_sched_node *n = instr->data;
385bf215546Sopenharmony_ci      if (!n->kill_path)
386bf215546Sopenharmony_ci         return true;
387bf215546Sopenharmony_ci   }
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   return false;
390bf215546Sopenharmony_ci}
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci/* could an instruction be scheduled if specified ssa src was scheduled? */
393bf215546Sopenharmony_cistatic bool
394bf215546Sopenharmony_cicould_sched(struct ir3_sched_ctx *ctx,
395bf215546Sopenharmony_ci            struct ir3_instruction *instr, struct ir3_instruction *src)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   foreach_ssa_src (other_src, instr) {
398bf215546Sopenharmony_ci      /* if dependency not scheduled, we aren't ready yet: */
399bf215546Sopenharmony_ci      if ((src != other_src) && !is_scheduled(other_src)) {
400bf215546Sopenharmony_ci         return false;
401bf215546Sopenharmony_ci      }
402bf215546Sopenharmony_ci   }
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci   /* Instructions not in the current block can never be scheduled.
405bf215546Sopenharmony_ci    */
406bf215546Sopenharmony_ci   if (instr->block != src->block)
407bf215546Sopenharmony_ci      return false;
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci   return !should_skip(ctx, instr);
410bf215546Sopenharmony_ci}
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci/* Check if instruction is ok to schedule.  Make sure it is not blocked
413bf215546Sopenharmony_ci * by use of addr/predicate register, etc.
414bf215546Sopenharmony_ci */
415bf215546Sopenharmony_cistatic bool
416bf215546Sopenharmony_cicheck_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
417bf215546Sopenharmony_ci            struct ir3_instruction *instr)
418bf215546Sopenharmony_ci{
419bf215546Sopenharmony_ci   assert(!is_scheduled(instr));
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci   if (instr == ctx->split) {
422bf215546Sopenharmony_ci      /* Don't schedule instructions created by splitting a a0.x/a1.x/p0.x
423bf215546Sopenharmony_ci       * write until another "normal" instruction has been scheduled.
424bf215546Sopenharmony_ci       */
425bf215546Sopenharmony_ci      return false;
426bf215546Sopenharmony_ci   }
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   if (should_skip(ctx, instr))
429bf215546Sopenharmony_ci       return false;
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci   /* For instructions that write address register we need to
432bf215546Sopenharmony_ci    * make sure there is at least one instruction that uses the
433bf215546Sopenharmony_ci    * addr value which is otherwise ready.
434bf215546Sopenharmony_ci    *
435bf215546Sopenharmony_ci    * NOTE if any instructions use pred register and have other
436bf215546Sopenharmony_ci    * src args, we would need to do the same for writes_pred()..
437bf215546Sopenharmony_ci    */
438bf215546Sopenharmony_ci   if (writes_addr0(instr)) {
439bf215546Sopenharmony_ci      struct ir3 *ir = instr->block->shader;
440bf215546Sopenharmony_ci      bool ready = false;
441bf215546Sopenharmony_ci      for (unsigned i = 0; (i < ir->a0_users_count) && !ready; i++) {
442bf215546Sopenharmony_ci         struct ir3_instruction *indirect = ir->a0_users[i];
443bf215546Sopenharmony_ci         if (!indirect)
444bf215546Sopenharmony_ci            continue;
445bf215546Sopenharmony_ci         if (indirect->address->def != instr->dsts[0])
446bf215546Sopenharmony_ci            continue;
447bf215546Sopenharmony_ci         ready = could_sched(ctx, indirect, instr);
448bf215546Sopenharmony_ci      }
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci      /* nothing could be scheduled, so keep looking: */
451bf215546Sopenharmony_ci      if (!ready)
452bf215546Sopenharmony_ci         return false;
453bf215546Sopenharmony_ci   }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   if (writes_addr1(instr)) {
456bf215546Sopenharmony_ci      struct ir3 *ir = instr->block->shader;
457bf215546Sopenharmony_ci      bool ready = false;
458bf215546Sopenharmony_ci      for (unsigned i = 0; (i < ir->a1_users_count) && !ready; i++) {
459bf215546Sopenharmony_ci         struct ir3_instruction *indirect = ir->a1_users[i];
460bf215546Sopenharmony_ci         if (!indirect)
461bf215546Sopenharmony_ci            continue;
462bf215546Sopenharmony_ci         if (indirect->address->def != instr->dsts[0])
463bf215546Sopenharmony_ci            continue;
464bf215546Sopenharmony_ci         ready = could_sched(ctx, indirect, instr);
465bf215546Sopenharmony_ci      }
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci      /* nothing could be scheduled, so keep looking: */
468bf215546Sopenharmony_ci      if (!ready)
469bf215546Sopenharmony_ci         return false;
470bf215546Sopenharmony_ci   }
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci   /* if this is a write to address/predicate register, and that
473bf215546Sopenharmony_ci    * register is currently in use, we need to defer until it is
474bf215546Sopenharmony_ci    * free:
475bf215546Sopenharmony_ci    */
476bf215546Sopenharmony_ci   if (writes_addr0(instr) && ctx->addr0) {
477bf215546Sopenharmony_ci      assert(ctx->addr0 != instr);
478bf215546Sopenharmony_ci      notes->addr0_conflict = true;
479bf215546Sopenharmony_ci      return false;
480bf215546Sopenharmony_ci   }
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci   if (writes_addr1(instr) && ctx->addr1) {
483bf215546Sopenharmony_ci      assert(ctx->addr1 != instr);
484bf215546Sopenharmony_ci      notes->addr1_conflict = true;
485bf215546Sopenharmony_ci      return false;
486bf215546Sopenharmony_ci   }
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   if (writes_pred(instr) && ctx->pred) {
489bf215546Sopenharmony_ci      assert(ctx->pred != instr);
490bf215546Sopenharmony_ci      notes->pred_conflict = true;
491bf215546Sopenharmony_ci      return false;
492bf215546Sopenharmony_ci   }
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   /* if the instruction is a kill, we need to ensure *every*
495bf215546Sopenharmony_ci    * bary.f is scheduled.  The hw seems unhappy if the thread
496bf215546Sopenharmony_ci    * gets killed before the end-input (ei) flag is hit.
497bf215546Sopenharmony_ci    *
498bf215546Sopenharmony_ci    * We could do this by adding each bary.f instruction as
499bf215546Sopenharmony_ci    * virtual ssa src for the kill instruction.  But we have
500bf215546Sopenharmony_ci    * fixed length instr->srcs[].
501bf215546Sopenharmony_ci    *
502bf215546Sopenharmony_ci    * TODO we could handle this by false-deps now, probably.
503bf215546Sopenharmony_ci    */
504bf215546Sopenharmony_ci   if (is_kill_or_demote(instr)) {
505bf215546Sopenharmony_ci      struct ir3 *ir = instr->block->shader;
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci      for (unsigned i = 0; i < ir->baryfs_count; i++) {
508bf215546Sopenharmony_ci         struct ir3_instruction *baryf = ir->baryfs[i];
509bf215546Sopenharmony_ci         if (baryf->flags & IR3_INSTR_UNUSED)
510bf215546Sopenharmony_ci            continue;
511bf215546Sopenharmony_ci         if (!is_scheduled(baryf)) {
512bf215546Sopenharmony_ci            notes->blocked_kill = true;
513bf215546Sopenharmony_ci            return false;
514bf215546Sopenharmony_ci         }
515bf215546Sopenharmony_ci      }
516bf215546Sopenharmony_ci   }
517bf215546Sopenharmony_ci
518bf215546Sopenharmony_ci   return true;
519bf215546Sopenharmony_ci}
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci/* Find the instr->ip of the closest use of an instruction, in
522bf215546Sopenharmony_ci * pre-sched order.  This isn't going to be the same as post-sched
523bf215546Sopenharmony_ci * order, but it is a reasonable approximation to limit scheduling
524bf215546Sopenharmony_ci * instructions *too* early.  This is mostly to prevent bad behavior
525bf215546Sopenharmony_ci * in cases where we have a large number of possible instructions
526bf215546Sopenharmony_ci * to choose, to avoid creating too much parallelism (ie. blowing
527bf215546Sopenharmony_ci * up register pressure)
528bf215546Sopenharmony_ci *
529bf215546Sopenharmony_ci * See
530bf215546Sopenharmony_ci * dEQP-GLES31.functional.atomic_counter.layout.reverse_offset.inc_dec.8_counters_5_calls_1_thread
531bf215546Sopenharmony_ci */
532bf215546Sopenharmony_cistatic int
533bf215546Sopenharmony_cinearest_use(struct ir3_instruction *instr)
534bf215546Sopenharmony_ci{
535bf215546Sopenharmony_ci   unsigned nearest = ~0;
536bf215546Sopenharmony_ci   foreach_ssa_use (use, instr)
537bf215546Sopenharmony_ci      if (!is_scheduled(use))
538bf215546Sopenharmony_ci         nearest = MIN2(nearest, use->ip);
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci   /* slight hack.. this heuristic tends to push bary.f's to later
541bf215546Sopenharmony_ci    * in the shader, closer to their uses.  But we actually would
542bf215546Sopenharmony_ci    * prefer to get these scheduled earlier, to unlock varying
543bf215546Sopenharmony_ci    * storage for more VS jobs:
544bf215546Sopenharmony_ci    */
545bf215546Sopenharmony_ci   if (is_input(instr))
546bf215546Sopenharmony_ci      nearest /= 2;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci   return nearest;
549bf215546Sopenharmony_ci}
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_cistatic bool
552bf215546Sopenharmony_ciis_only_nonscheduled_use(struct ir3_instruction *instr,
553bf215546Sopenharmony_ci                         struct ir3_instruction *use)
554bf215546Sopenharmony_ci{
555bf215546Sopenharmony_ci   foreach_ssa_use (other_use, instr) {
556bf215546Sopenharmony_ci      if (other_use != use && !is_scheduled(other_use))
557bf215546Sopenharmony_ci         return false;
558bf215546Sopenharmony_ci   }
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci   return true;
561bf215546Sopenharmony_ci}
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_cistatic unsigned
564bf215546Sopenharmony_cinew_regs(struct ir3_instruction *instr)
565bf215546Sopenharmony_ci{
566bf215546Sopenharmony_ci   unsigned regs = 0;
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   foreach_dst (dst, instr) {
569bf215546Sopenharmony_ci      if (!is_dest_gpr(dst))
570bf215546Sopenharmony_ci         continue;
571bf215546Sopenharmony_ci      regs += reg_elems(dst);
572bf215546Sopenharmony_ci   }
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   return regs;
575bf215546Sopenharmony_ci}
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci/* find net change to live values if instruction were scheduled: */
578bf215546Sopenharmony_cistatic int
579bf215546Sopenharmony_cilive_effect(struct ir3_instruction *instr)
580bf215546Sopenharmony_ci{
581bf215546Sopenharmony_ci   struct ir3_sched_node *n = instr->data;
582bf215546Sopenharmony_ci   int new_live =
583bf215546Sopenharmony_ci      (n->partially_live || !instr->uses || instr->uses->entries == 0)
584bf215546Sopenharmony_ci         ? 0
585bf215546Sopenharmony_ci         : new_regs(instr);
586bf215546Sopenharmony_ci   int freed_live = 0;
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   /* if we schedule something that causes a vecN to be live,
589bf215546Sopenharmony_ci    * then count all it's other components too:
590bf215546Sopenharmony_ci    */
591bf215546Sopenharmony_ci   if (n->collect)
592bf215546Sopenharmony_ci      new_live *= n->collect->srcs_count;
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   foreach_ssa_src_n (src, n, instr) {
595bf215546Sopenharmony_ci      if (__is_false_dep(instr, n))
596bf215546Sopenharmony_ci         continue;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci      if (instr->block != src->block)
599bf215546Sopenharmony_ci         continue;
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci      if (is_only_nonscheduled_use(src, instr))
602bf215546Sopenharmony_ci         freed_live += new_regs(src);
603bf215546Sopenharmony_ci   }
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci   return new_live - freed_live;
606bf215546Sopenharmony_ci}
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci/* Determine if this is an instruction that we'd prefer not to schedule
609bf215546Sopenharmony_ci * yet, in order to avoid an (ss)/(sy) sync.  This is limited by the
610bf215546Sopenharmony_ci * ss_delay/sy_delay counters, ie. the more cycles it has been since
611bf215546Sopenharmony_ci * the last SFU/tex, the less costly a sync would be, and the number of
612bf215546Sopenharmony_ci * outstanding SFU/tex instructions to prevent a blowup in register pressure.
613bf215546Sopenharmony_ci */
614bf215546Sopenharmony_cistatic bool
615bf215546Sopenharmony_cishould_defer(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
616bf215546Sopenharmony_ci{
617bf215546Sopenharmony_ci   if (ctx->ss_delay) {
618bf215546Sopenharmony_ci      if (sched_check_src_cond(instr, is_outstanding_ss, ctx))
619bf215546Sopenharmony_ci         return true;
620bf215546Sopenharmony_ci   }
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   /* We mostly just want to try to schedule another texture fetch
623bf215546Sopenharmony_ci    * before scheduling something that would (sy) sync, so we can
624bf215546Sopenharmony_ci    * limit this rule to cases where there are remaining texture
625bf215546Sopenharmony_ci    * fetches
626bf215546Sopenharmony_ci    */
627bf215546Sopenharmony_ci   if (ctx->sy_delay && ctx->remaining_tex) {
628bf215546Sopenharmony_ci      if (sched_check_src_cond(instr, is_outstanding_sy, ctx))
629bf215546Sopenharmony_ci         return true;
630bf215546Sopenharmony_ci   }
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_ci   /* Avoid scheduling too many outstanding texture or sfu instructions at
633bf215546Sopenharmony_ci    * once by deferring further tex/SFU instructions. This both prevents
634bf215546Sopenharmony_ci    * stalls when the queue of texture/sfu instructions becomes too large,
635bf215546Sopenharmony_ci    * and prevents unacceptably large increases in register pressure from too
636bf215546Sopenharmony_ci    * many outstanding texture instructions.
637bf215546Sopenharmony_ci    */
638bf215546Sopenharmony_ci   if (ctx->sy_index - ctx->first_outstanding_sy_index >= 8 && is_sy_producer(instr))
639bf215546Sopenharmony_ci      return true;
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci   if (ctx->ss_index - ctx->first_outstanding_ss_index >= 8 && is_ss_producer(instr))
642bf215546Sopenharmony_ci      return true;
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci   return false;
645bf215546Sopenharmony_ci}
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_cistatic struct ir3_sched_node *choose_instr_inc(struct ir3_sched_ctx *ctx,
648bf215546Sopenharmony_ci                                               struct ir3_sched_notes *notes,
649bf215546Sopenharmony_ci                                               bool defer, bool avoid_output);
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_cienum choose_instr_dec_rank {
652bf215546Sopenharmony_ci   DEC_NEUTRAL,
653bf215546Sopenharmony_ci   DEC_NEUTRAL_READY,
654bf215546Sopenharmony_ci   DEC_FREED,
655bf215546Sopenharmony_ci   DEC_FREED_READY,
656bf215546Sopenharmony_ci};
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_cistatic const char *
659bf215546Sopenharmony_cidec_rank_name(enum choose_instr_dec_rank rank)
660bf215546Sopenharmony_ci{
661bf215546Sopenharmony_ci   switch (rank) {
662bf215546Sopenharmony_ci   case DEC_NEUTRAL:
663bf215546Sopenharmony_ci      return "neutral";
664bf215546Sopenharmony_ci   case DEC_NEUTRAL_READY:
665bf215546Sopenharmony_ci      return "neutral+ready";
666bf215546Sopenharmony_ci   case DEC_FREED:
667bf215546Sopenharmony_ci      return "freed";
668bf215546Sopenharmony_ci   case DEC_FREED_READY:
669bf215546Sopenharmony_ci      return "freed+ready";
670bf215546Sopenharmony_ci   default:
671bf215546Sopenharmony_ci      return NULL;
672bf215546Sopenharmony_ci   }
673bf215546Sopenharmony_ci}
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_cistatic unsigned
676bf215546Sopenharmony_cinode_delay(struct ir3_sched_ctx *ctx, struct ir3_sched_node *n)
677bf215546Sopenharmony_ci{
678bf215546Sopenharmony_ci   return MAX2(n->earliest_ip, ctx->ip) - ctx->ip;
679bf215546Sopenharmony_ci}
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_ci/**
682bf215546Sopenharmony_ci * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code
683bf215546Sopenharmony_ci * Scheduling for Register pressure) heuristic.
684bf215546Sopenharmony_ci *
685bf215546Sopenharmony_ci * Only handles the case of choosing instructions that reduce register pressure
686bf215546Sopenharmony_ci * or are even.
687bf215546Sopenharmony_ci */
688bf215546Sopenharmony_cistatic struct ir3_sched_node *
689bf215546Sopenharmony_cichoose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
690bf215546Sopenharmony_ci                 bool defer)
691bf215546Sopenharmony_ci{
692bf215546Sopenharmony_ci   const char *mode = defer ? "-d" : "";
693bf215546Sopenharmony_ci   struct ir3_sched_node *chosen = NULL;
694bf215546Sopenharmony_ci   enum choose_instr_dec_rank chosen_rank = DEC_NEUTRAL;
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
697bf215546Sopenharmony_ci      if (defer && should_defer(ctx, n->instr))
698bf215546Sopenharmony_ci         continue;
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci      unsigned d = node_delay(ctx, n);
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci      int live = live_effect(n->instr);
703bf215546Sopenharmony_ci      if (live > 0)
704bf215546Sopenharmony_ci         continue;
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci      if (!check_instr(ctx, notes, n->instr))
707bf215546Sopenharmony_ci         continue;
708bf215546Sopenharmony_ci
709bf215546Sopenharmony_ci      enum choose_instr_dec_rank rank;
710bf215546Sopenharmony_ci      if (live < 0) {
711bf215546Sopenharmony_ci         /* Prioritize instrs which free up regs and can be scheduled with no
712bf215546Sopenharmony_ci          * delay.
713bf215546Sopenharmony_ci          */
714bf215546Sopenharmony_ci         if (d == 0)
715bf215546Sopenharmony_ci            rank = DEC_FREED_READY;
716bf215546Sopenharmony_ci         else
717bf215546Sopenharmony_ci            rank = DEC_FREED;
718bf215546Sopenharmony_ci      } else {
719bf215546Sopenharmony_ci         /* Contra the paper, pick a leader with no effect on used regs.  This
720bf215546Sopenharmony_ci          * may open up new opportunities, as otherwise a single-operand instr
721bf215546Sopenharmony_ci          * consuming a value will tend to block finding freeing that value.
722bf215546Sopenharmony_ci          * This had a massive effect on reducing spilling on V3D.
723bf215546Sopenharmony_ci          *
724bf215546Sopenharmony_ci          * XXX: Should this prioritize ready?
725bf215546Sopenharmony_ci          */
726bf215546Sopenharmony_ci         if (d == 0)
727bf215546Sopenharmony_ci            rank = DEC_NEUTRAL_READY;
728bf215546Sopenharmony_ci         else
729bf215546Sopenharmony_ci            rank = DEC_NEUTRAL;
730bf215546Sopenharmony_ci      }
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci      /* Prefer higher-ranked instructions, or in the case of a rank tie, the
733bf215546Sopenharmony_ci       * highest latency-to-end-of-program instruction.
734bf215546Sopenharmony_ci       */
735bf215546Sopenharmony_ci      if (!chosen || rank > chosen_rank ||
736bf215546Sopenharmony_ci          (rank == chosen_rank && chosen->max_delay < n->max_delay)) {
737bf215546Sopenharmony_ci         chosen = n;
738bf215546Sopenharmony_ci         chosen_rank = rank;
739bf215546Sopenharmony_ci      }
740bf215546Sopenharmony_ci   }
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci   if (chosen) {
743bf215546Sopenharmony_ci      di(chosen->instr, "dec%s: chose (%s)", mode, dec_rank_name(chosen_rank));
744bf215546Sopenharmony_ci      return chosen;
745bf215546Sopenharmony_ci   }
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   return choose_instr_inc(ctx, notes, defer, true);
748bf215546Sopenharmony_ci}
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_cienum choose_instr_inc_rank {
751bf215546Sopenharmony_ci   INC_DISTANCE,
752bf215546Sopenharmony_ci   INC_DISTANCE_READY,
753bf215546Sopenharmony_ci};
754bf215546Sopenharmony_ci
755bf215546Sopenharmony_cistatic const char *
756bf215546Sopenharmony_ciinc_rank_name(enum choose_instr_inc_rank rank)
757bf215546Sopenharmony_ci{
758bf215546Sopenharmony_ci   switch (rank) {
759bf215546Sopenharmony_ci   case INC_DISTANCE:
760bf215546Sopenharmony_ci      return "distance";
761bf215546Sopenharmony_ci   case INC_DISTANCE_READY:
762bf215546Sopenharmony_ci      return "distance+ready";
763bf215546Sopenharmony_ci   default:
764bf215546Sopenharmony_ci      return NULL;
765bf215546Sopenharmony_ci   }
766bf215546Sopenharmony_ci}
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci/**
769bf215546Sopenharmony_ci * When we can't choose an instruction that reduces register pressure or
770bf215546Sopenharmony_ci * is neutral, we end up here to try and pick the least bad option.
771bf215546Sopenharmony_ci */
772bf215546Sopenharmony_cistatic struct ir3_sched_node *
773bf215546Sopenharmony_cichoose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
774bf215546Sopenharmony_ci                 bool defer, bool avoid_output)
775bf215546Sopenharmony_ci{
776bf215546Sopenharmony_ci   const char *mode = defer ? "-d" : "";
777bf215546Sopenharmony_ci   struct ir3_sched_node *chosen = NULL;
778bf215546Sopenharmony_ci   enum choose_instr_inc_rank chosen_rank = INC_DISTANCE;
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   /*
781bf215546Sopenharmony_ci    * From hear on out, we are picking something that increases
782bf215546Sopenharmony_ci    * register pressure.  So try to pick something which will
783bf215546Sopenharmony_ci    * be consumed soon:
784bf215546Sopenharmony_ci    */
785bf215546Sopenharmony_ci   unsigned chosen_distance = 0;
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   /* Pick the max delay of the remaining ready set. */
788bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
789bf215546Sopenharmony_ci      if (avoid_output && n->output)
790bf215546Sopenharmony_ci         continue;
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci      if (defer && should_defer(ctx, n->instr))
793bf215546Sopenharmony_ci         continue;
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci      if (!check_instr(ctx, notes, n->instr))
796bf215546Sopenharmony_ci         continue;
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci      unsigned d = node_delay(ctx, n);
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci      enum choose_instr_inc_rank rank;
801bf215546Sopenharmony_ci      if (d == 0)
802bf215546Sopenharmony_ci         rank = INC_DISTANCE_READY;
803bf215546Sopenharmony_ci      else
804bf215546Sopenharmony_ci         rank = INC_DISTANCE;
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci      unsigned distance = nearest_use(n->instr);
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci      if (!chosen || rank > chosen_rank ||
809bf215546Sopenharmony_ci          (rank == chosen_rank && distance < chosen_distance)) {
810bf215546Sopenharmony_ci         chosen = n;
811bf215546Sopenharmony_ci         chosen_distance = distance;
812bf215546Sopenharmony_ci         chosen_rank = rank;
813bf215546Sopenharmony_ci      }
814bf215546Sopenharmony_ci   }
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   if (chosen) {
817bf215546Sopenharmony_ci      di(chosen->instr, "inc%s: chose (%s)", mode, inc_rank_name(chosen_rank));
818bf215546Sopenharmony_ci      return chosen;
819bf215546Sopenharmony_ci   }
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci   return NULL;
822bf215546Sopenharmony_ci}
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci/* Handles instruction selections for instructions we want to prioritize
825bf215546Sopenharmony_ci * even if csp/csr would not pick them.
826bf215546Sopenharmony_ci */
827bf215546Sopenharmony_cistatic struct ir3_sched_node *
828bf215546Sopenharmony_cichoose_instr_prio(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
829bf215546Sopenharmony_ci{
830bf215546Sopenharmony_ci   struct ir3_sched_node *chosen = NULL;
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
833bf215546Sopenharmony_ci      /*
834bf215546Sopenharmony_ci       * - phi nodes and inputs must be scheduled first
835bf215546Sopenharmony_ci       * - split should be scheduled first, so that the vector value is
836bf215546Sopenharmony_ci       *   killed as soon as possible. RA cannot split up the vector and
837bf215546Sopenharmony_ci       *   reuse components that have been killed until it's been killed.
838bf215546Sopenharmony_ci       * - collect, on the other hand, should be treated as a "normal"
839bf215546Sopenharmony_ci       *   instruction, and may add to register pressure if its sources are
840bf215546Sopenharmony_ci       *   part of another vector or immediates.
841bf215546Sopenharmony_ci       */
842bf215546Sopenharmony_ci      if (!is_meta(n->instr) || n->instr->opc == OPC_META_COLLECT)
843bf215546Sopenharmony_ci         continue;
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci      if (!chosen || (chosen->max_delay < n->max_delay))
846bf215546Sopenharmony_ci         chosen = n;
847bf215546Sopenharmony_ci   }
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci   if (chosen) {
850bf215546Sopenharmony_ci      di(chosen->instr, "prio: chose (meta)");
851bf215546Sopenharmony_ci      return chosen;
852bf215546Sopenharmony_ci   }
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci   return NULL;
855bf215546Sopenharmony_ci}
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_cistatic void
858bf215546Sopenharmony_cidump_state(struct ir3_sched_ctx *ctx)
859bf215546Sopenharmony_ci{
860bf215546Sopenharmony_ci   if (!SCHED_DEBUG)
861bf215546Sopenharmony_ci      return;
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci   foreach_sched_node (n, &ctx->dag->heads) {
864bf215546Sopenharmony_ci      di(n->instr, "maxdel=%3d le=%d del=%u ", n->max_delay,
865bf215546Sopenharmony_ci         live_effect(n->instr), node_delay(ctx, n));
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci      util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
868bf215546Sopenharmony_ci         struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child;
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci         di(child->instr, " -> (%d parents) ", child->dag.parent_count);
871bf215546Sopenharmony_ci      }
872bf215546Sopenharmony_ci   }
873bf215546Sopenharmony_ci}
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci/* find instruction to schedule: */
876bf215546Sopenharmony_cistatic struct ir3_instruction *
877bf215546Sopenharmony_cichoose_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes)
878bf215546Sopenharmony_ci{
879bf215546Sopenharmony_ci   struct ir3_sched_node *chosen;
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci   dump_state(ctx);
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci   chosen = choose_instr_prio(ctx, notes);
884bf215546Sopenharmony_ci   if (chosen)
885bf215546Sopenharmony_ci      return chosen->instr;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   chosen = choose_instr_dec(ctx, notes, true);
888bf215546Sopenharmony_ci   if (chosen)
889bf215546Sopenharmony_ci      return chosen->instr;
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci   chosen = choose_instr_dec(ctx, notes, false);
892bf215546Sopenharmony_ci   if (chosen)
893bf215546Sopenharmony_ci      return chosen->instr;
894bf215546Sopenharmony_ci
895bf215546Sopenharmony_ci   chosen = choose_instr_inc(ctx, notes, false, false);
896bf215546Sopenharmony_ci   if (chosen)
897bf215546Sopenharmony_ci      return chosen->instr;
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci   return NULL;
900bf215546Sopenharmony_ci}
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_cistatic struct ir3_instruction *
903bf215546Sopenharmony_cisplit_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr)
904bf215546Sopenharmony_ci{
905bf215546Sopenharmony_ci   struct ir3_instruction *new_instr = ir3_instr_clone(orig_instr);
906bf215546Sopenharmony_ci   di(new_instr, "split instruction");
907bf215546Sopenharmony_ci   sched_node_init(ctx, new_instr);
908bf215546Sopenharmony_ci   return new_instr;
909bf215546Sopenharmony_ci}
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci/* "spill" the address registers by remapping any unscheduled
912bf215546Sopenharmony_ci * instructions which depend on the current address register
913bf215546Sopenharmony_ci * to a clone of the instruction which wrote the address reg.
914bf215546Sopenharmony_ci */
915bf215546Sopenharmony_cistatic struct ir3_instruction *
916bf215546Sopenharmony_cisplit_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
917bf215546Sopenharmony_ci           struct ir3_instruction **users, unsigned users_count)
918bf215546Sopenharmony_ci{
919bf215546Sopenharmony_ci   struct ir3_instruction *new_addr = NULL;
920bf215546Sopenharmony_ci   unsigned i;
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   assert(*addr);
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci   for (i = 0; i < users_count; i++) {
925bf215546Sopenharmony_ci      struct ir3_instruction *indirect = users[i];
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci      if (!indirect)
928bf215546Sopenharmony_ci         continue;
929bf215546Sopenharmony_ci
930bf215546Sopenharmony_ci      /* skip instructions already scheduled: */
931bf215546Sopenharmony_ci      if (is_scheduled(indirect))
932bf215546Sopenharmony_ci         continue;
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci      /* remap remaining instructions using current addr
935bf215546Sopenharmony_ci       * to new addr:
936bf215546Sopenharmony_ci       */
937bf215546Sopenharmony_ci      if (indirect->address->def == (*addr)->dsts[0]) {
938bf215546Sopenharmony_ci         if (!new_addr) {
939bf215546Sopenharmony_ci            new_addr = split_instr(ctx, *addr);
940bf215546Sopenharmony_ci            /* original addr is scheduled, but new one isn't: */
941bf215546Sopenharmony_ci            new_addr->flags &= ~IR3_INSTR_MARK;
942bf215546Sopenharmony_ci         }
943bf215546Sopenharmony_ci         indirect->address->def = new_addr->dsts[0];
944bf215546Sopenharmony_ci         /* don't need to remove old dag edge since old addr is
945bf215546Sopenharmony_ci          * already scheduled:
946bf215546Sopenharmony_ci          */
947bf215546Sopenharmony_ci         sched_node_add_dep(indirect, new_addr, 0);
948bf215546Sopenharmony_ci         di(indirect, "new address");
949bf215546Sopenharmony_ci      }
950bf215546Sopenharmony_ci   }
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci   /* all remaining indirects remapped to new addr: */
953bf215546Sopenharmony_ci   *addr = NULL;
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci   return new_addr;
956bf215546Sopenharmony_ci}
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci/* "spill" the predicate register by remapping any unscheduled
959bf215546Sopenharmony_ci * instructions which depend on the current predicate register
960bf215546Sopenharmony_ci * to a clone of the instruction which wrote the address reg.
961bf215546Sopenharmony_ci */
962bf215546Sopenharmony_cistatic struct ir3_instruction *
963bf215546Sopenharmony_cisplit_pred(struct ir3_sched_ctx *ctx)
964bf215546Sopenharmony_ci{
965bf215546Sopenharmony_ci   struct ir3 *ir;
966bf215546Sopenharmony_ci   struct ir3_instruction *new_pred = NULL;
967bf215546Sopenharmony_ci   unsigned i;
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci   assert(ctx->pred);
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci   ir = ctx->pred->block->shader;
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci   for (i = 0; i < ir->predicates_count; i++) {
974bf215546Sopenharmony_ci      struct ir3_instruction *predicated = ir->predicates[i];
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci      if (!predicated)
977bf215546Sopenharmony_ci         continue;
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci      /* skip instructions already scheduled: */
980bf215546Sopenharmony_ci      if (is_scheduled(predicated))
981bf215546Sopenharmony_ci         continue;
982bf215546Sopenharmony_ci
983bf215546Sopenharmony_ci      /* remap remaining instructions using current pred
984bf215546Sopenharmony_ci       * to new pred:
985bf215546Sopenharmony_ci       *
986bf215546Sopenharmony_ci       * TODO is there ever a case when pred isn't first
987bf215546Sopenharmony_ci       * (and only) src?
988bf215546Sopenharmony_ci       */
989bf215546Sopenharmony_ci      if (ssa(predicated->srcs[0]) == ctx->pred) {
990bf215546Sopenharmony_ci         if (!new_pred) {
991bf215546Sopenharmony_ci            new_pred = split_instr(ctx, ctx->pred);
992bf215546Sopenharmony_ci            /* original pred is scheduled, but new one isn't: */
993bf215546Sopenharmony_ci            new_pred->flags &= ~IR3_INSTR_MARK;
994bf215546Sopenharmony_ci         }
995bf215546Sopenharmony_ci         predicated->srcs[0]->def->instr = new_pred;
996bf215546Sopenharmony_ci         /* don't need to remove old dag edge since old pred is
997bf215546Sopenharmony_ci          * already scheduled:
998bf215546Sopenharmony_ci          */
999bf215546Sopenharmony_ci         sched_node_add_dep(predicated, new_pred, 0);
1000bf215546Sopenharmony_ci         di(predicated, "new predicate");
1001bf215546Sopenharmony_ci      }
1002bf215546Sopenharmony_ci   }
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci   if (ctx->block->condition == ctx->pred) {
1005bf215546Sopenharmony_ci      if (!new_pred) {
1006bf215546Sopenharmony_ci         new_pred = split_instr(ctx, ctx->pred);
1007bf215546Sopenharmony_ci         /* original pred is scheduled, but new one isn't: */
1008bf215546Sopenharmony_ci         new_pred->flags &= ~IR3_INSTR_MARK;
1009bf215546Sopenharmony_ci      }
1010bf215546Sopenharmony_ci      ctx->block->condition = new_pred;
1011bf215546Sopenharmony_ci      d("new branch condition");
1012bf215546Sopenharmony_ci   }
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci   /* all remaining predicated remapped to new pred: */
1015bf215546Sopenharmony_ci   ctx->pred = NULL;
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci   return new_pred;
1018bf215546Sopenharmony_ci}
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_cistatic void
1021bf215546Sopenharmony_cisched_node_init(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
1022bf215546Sopenharmony_ci{
1023bf215546Sopenharmony_ci   struct ir3_sched_node *n = rzalloc(ctx->dag, struct ir3_sched_node);
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   dag_init_node(ctx->dag, &n->dag);
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci   n->instr = instr;
1028bf215546Sopenharmony_ci   instr->data = n;
1029bf215546Sopenharmony_ci}
1030bf215546Sopenharmony_ci
1031bf215546Sopenharmony_cistatic void
1032bf215546Sopenharmony_cisched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src,
1033bf215546Sopenharmony_ci                   int i)
1034bf215546Sopenharmony_ci{
1035bf215546Sopenharmony_ci   /* don't consider dependencies in other blocks: */
1036bf215546Sopenharmony_ci   if (src->block != instr->block)
1037bf215546Sopenharmony_ci      return;
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci   /* we could have false-dep's that end up unused: */
1040bf215546Sopenharmony_ci   if (src->flags & IR3_INSTR_UNUSED) {
1041bf215546Sopenharmony_ci      assert(__is_false_dep(instr, i));
1042bf215546Sopenharmony_ci      return;
1043bf215546Sopenharmony_ci   }
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci   struct ir3_sched_node *n = instr->data;
1046bf215546Sopenharmony_ci   struct ir3_sched_node *sn = src->data;
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ci   /* If src is consumed by a collect, track that to realize that once
1049bf215546Sopenharmony_ci    * any of the collect srcs are live, we should hurry up and schedule
1050bf215546Sopenharmony_ci    * the rest.
1051bf215546Sopenharmony_ci    */
1052bf215546Sopenharmony_ci   if (instr->opc == OPC_META_COLLECT)
1053bf215546Sopenharmony_ci      sn->collect = instr;
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_ci   unsigned d_soft = ir3_delayslots(src, instr, i, true);
1056bf215546Sopenharmony_ci   unsigned d = ir3_delayslots(src, instr, i, false);
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci   /* delays from (ss) and (sy) are considered separately and more accurately in
1059bf215546Sopenharmony_ci    * the scheduling heuristic, so ignore it when calculating the ip of
1060bf215546Sopenharmony_ci    * instructions, but do consider it when prioritizing which instructions to
1061bf215546Sopenharmony_ci    * schedule.
1062bf215546Sopenharmony_ci    */
1063bf215546Sopenharmony_ci   dag_add_edge_max_data(&sn->dag, &n->dag, (uintptr_t)d);
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci   n->delay = MAX2(n->delay, d_soft);
1066bf215546Sopenharmony_ci}
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_cistatic void
1069bf215546Sopenharmony_cimark_kill_path(struct ir3_instruction *instr)
1070bf215546Sopenharmony_ci{
1071bf215546Sopenharmony_ci   struct ir3_sched_node *n = instr->data;
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci   if (n->kill_path) {
1074bf215546Sopenharmony_ci      return;
1075bf215546Sopenharmony_ci   }
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci   n->kill_path = true;
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci   foreach_ssa_src (src, instr) {
1080bf215546Sopenharmony_ci      if (src->block != instr->block)
1081bf215546Sopenharmony_ci         continue;
1082bf215546Sopenharmony_ci      mark_kill_path(src);
1083bf215546Sopenharmony_ci   }
1084bf215546Sopenharmony_ci}
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci/* Is it an output? */
1087bf215546Sopenharmony_cistatic bool
1088bf215546Sopenharmony_ciis_output_collect(struct ir3_instruction *instr)
1089bf215546Sopenharmony_ci{
1090bf215546Sopenharmony_ci   if (instr->opc != OPC_META_COLLECT)
1091bf215546Sopenharmony_ci      return false;
1092bf215546Sopenharmony_ci
1093bf215546Sopenharmony_ci   foreach_ssa_use (use, instr) {
1094bf215546Sopenharmony_ci      if (use->opc != OPC_END && use->opc != OPC_CHMASK)
1095bf215546Sopenharmony_ci         return false;
1096bf215546Sopenharmony_ci   }
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci   return true;
1099bf215546Sopenharmony_ci}
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci/* Is it's only use as output? */
1102bf215546Sopenharmony_cistatic bool
1103bf215546Sopenharmony_ciis_output_only(struct ir3_instruction *instr)
1104bf215546Sopenharmony_ci{
1105bf215546Sopenharmony_ci   foreach_ssa_use (use, instr)
1106bf215546Sopenharmony_ci      if (!is_output_collect(use))
1107bf215546Sopenharmony_ci         return false;
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_ci   return true;
1110bf215546Sopenharmony_ci}
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_cistatic void
1113bf215546Sopenharmony_cisched_node_add_deps(struct ir3_instruction *instr)
1114bf215546Sopenharmony_ci{
1115bf215546Sopenharmony_ci   /* There's nothing to do for phi nodes, since they always go first. And
1116bf215546Sopenharmony_ci    * phi nodes can reference sources later in the same block, so handling
1117bf215546Sopenharmony_ci    * sources is not only unnecessary but could cause problems.
1118bf215546Sopenharmony_ci    */
1119bf215546Sopenharmony_ci   if (instr->opc == OPC_META_PHI)
1120bf215546Sopenharmony_ci      return;
1121bf215546Sopenharmony_ci
1122bf215546Sopenharmony_ci   /* Since foreach_ssa_src() already handles false-dep's we can construct
1123bf215546Sopenharmony_ci    * the DAG easily in a single pass.
1124bf215546Sopenharmony_ci    */
1125bf215546Sopenharmony_ci   foreach_ssa_src_n (src, i, instr) {
1126bf215546Sopenharmony_ci      sched_node_add_dep(instr, src, i);
1127bf215546Sopenharmony_ci   }
1128bf215546Sopenharmony_ci
1129bf215546Sopenharmony_ci   /* NOTE that all inputs must be scheduled before a kill, so
1130bf215546Sopenharmony_ci    * mark these to be prioritized as well:
1131bf215546Sopenharmony_ci    */
1132bf215546Sopenharmony_ci   if (is_kill_or_demote(instr) || is_input(instr)) {
1133bf215546Sopenharmony_ci      mark_kill_path(instr);
1134bf215546Sopenharmony_ci   }
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_ci   if (is_output_only(instr)) {
1137bf215546Sopenharmony_ci      struct ir3_sched_node *n = instr->data;
1138bf215546Sopenharmony_ci      n->output = true;
1139bf215546Sopenharmony_ci   }
1140bf215546Sopenharmony_ci}
1141bf215546Sopenharmony_ci
1142bf215546Sopenharmony_cistatic void
1143bf215546Sopenharmony_cisched_dag_max_delay_cb(struct dag_node *node, void *state)
1144bf215546Sopenharmony_ci{
1145bf215546Sopenharmony_ci   struct ir3_sched_node *n = (struct ir3_sched_node *)node;
1146bf215546Sopenharmony_ci   uint32_t max_delay = 0;
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
1149bf215546Sopenharmony_ci      struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child;
1150bf215546Sopenharmony_ci      max_delay = MAX2(child->max_delay, max_delay);
1151bf215546Sopenharmony_ci   }
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_ci   n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
1154bf215546Sopenharmony_ci}
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_cistatic void
1157bf215546Sopenharmony_cisched_dag_init(struct ir3_sched_ctx *ctx)
1158bf215546Sopenharmony_ci{
1159bf215546Sopenharmony_ci   ctx->dag = dag_create(ctx);
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_ci   foreach_instr (instr, &ctx->unscheduled_list)
1162bf215546Sopenharmony_ci      sched_node_init(ctx, instr);
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci   foreach_instr (instr, &ctx->unscheduled_list)
1165bf215546Sopenharmony_ci      sched_node_add_deps(instr);
1166bf215546Sopenharmony_ci
1167bf215546Sopenharmony_ci   dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL);
1168bf215546Sopenharmony_ci}
1169bf215546Sopenharmony_ci
1170bf215546Sopenharmony_cistatic void
1171bf215546Sopenharmony_cisched_dag_destroy(struct ir3_sched_ctx *ctx)
1172bf215546Sopenharmony_ci{
1173bf215546Sopenharmony_ci   ralloc_free(ctx->dag);
1174bf215546Sopenharmony_ci   ctx->dag = NULL;
1175bf215546Sopenharmony_ci}
1176bf215546Sopenharmony_ci
1177bf215546Sopenharmony_cistatic void
1178bf215546Sopenharmony_cisched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
1179bf215546Sopenharmony_ci{
1180bf215546Sopenharmony_ci   ctx->block = block;
1181bf215546Sopenharmony_ci
1182bf215546Sopenharmony_ci   /* addr/pred writes are per-block: */
1183bf215546Sopenharmony_ci   ctx->addr0 = NULL;
1184bf215546Sopenharmony_ci   ctx->addr1 = NULL;
1185bf215546Sopenharmony_ci   ctx->pred = NULL;
1186bf215546Sopenharmony_ci   ctx->sy_delay = 0;
1187bf215546Sopenharmony_ci   ctx->ss_delay = 0;
1188bf215546Sopenharmony_ci   ctx->sy_index = ctx->first_outstanding_sy_index = 0;
1189bf215546Sopenharmony_ci   ctx->ss_index = ctx->first_outstanding_ss_index = 0;
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_ci   /* move all instructions to the unscheduled list, and
1192bf215546Sopenharmony_ci    * empty the block's instruction list (to which we will
1193bf215546Sopenharmony_ci    * be inserting).
1194bf215546Sopenharmony_ci    */
1195bf215546Sopenharmony_ci   list_replace(&block->instr_list, &ctx->unscheduled_list);
1196bf215546Sopenharmony_ci   list_inithead(&block->instr_list);
1197bf215546Sopenharmony_ci
1198bf215546Sopenharmony_ci   sched_dag_init(ctx);
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_ci   ctx->remaining_kills = 0;
1201bf215546Sopenharmony_ci   ctx->remaining_tex = 0;
1202bf215546Sopenharmony_ci   foreach_instr_safe (instr, &ctx->unscheduled_list) {
1203bf215546Sopenharmony_ci      if (is_kill_or_demote(instr))
1204bf215546Sopenharmony_ci         ctx->remaining_kills++;
1205bf215546Sopenharmony_ci      if (is_sy_producer(instr))
1206bf215546Sopenharmony_ci         ctx->remaining_tex++;
1207bf215546Sopenharmony_ci   }
1208bf215546Sopenharmony_ci
1209bf215546Sopenharmony_ci   /* First schedule all meta:input and meta:phi instructions, followed by
1210bf215546Sopenharmony_ci    * tex-prefetch.  We want all of the instructions that load values into
1211bf215546Sopenharmony_ci    * registers before the shader starts to go before any other instructions.
1212bf215546Sopenharmony_ci    * But in particular we want inputs to come before prefetches.  This is
1213bf215546Sopenharmony_ci    * because a FS's bary_ij input may not actually be live in the shader,
1214bf215546Sopenharmony_ci    * but it should not be scheduled on top of any other input (but can be
1215bf215546Sopenharmony_ci    * overwritten by a tex prefetch)
1216bf215546Sopenharmony_ci    *
1217bf215546Sopenharmony_ci    * Note: Because the first block cannot have predecessors, meta:input and
1218bf215546Sopenharmony_ci    * meta:phi cannot exist in the same block.
1219bf215546Sopenharmony_ci    */
1220bf215546Sopenharmony_ci   foreach_instr_safe (instr, &ctx->unscheduled_list)
1221bf215546Sopenharmony_ci      if (instr->opc == OPC_META_INPUT || instr->opc == OPC_META_PHI)
1222bf215546Sopenharmony_ci         schedule(ctx, instr);
1223bf215546Sopenharmony_ci
1224bf215546Sopenharmony_ci   foreach_instr_safe (instr, &ctx->unscheduled_list)
1225bf215546Sopenharmony_ci      if (instr->opc == OPC_META_TEX_PREFETCH)
1226bf215546Sopenharmony_ci         schedule(ctx, instr);
1227bf215546Sopenharmony_ci
1228bf215546Sopenharmony_ci   while (!list_is_empty(&ctx->unscheduled_list)) {
1229bf215546Sopenharmony_ci      struct ir3_sched_notes notes = {0};
1230bf215546Sopenharmony_ci      struct ir3_instruction *instr;
1231bf215546Sopenharmony_ci
1232bf215546Sopenharmony_ci      instr = choose_instr(ctx, &notes);
1233bf215546Sopenharmony_ci      if (instr) {
1234bf215546Sopenharmony_ci         unsigned delay = node_delay(ctx, instr->data);
1235bf215546Sopenharmony_ci         d("delay=%u", delay);
1236bf215546Sopenharmony_ci
1237bf215546Sopenharmony_ci         assert(delay <= 6);
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_ci         schedule(ctx, instr);
1240bf215546Sopenharmony_ci
1241bf215546Sopenharmony_ci         /* Since we've scheduled a "real" instruction, we can now
1242bf215546Sopenharmony_ci          * schedule any split instruction created by the scheduler again.
1243bf215546Sopenharmony_ci          */
1244bf215546Sopenharmony_ci         ctx->split = NULL;
1245bf215546Sopenharmony_ci      } else {
1246bf215546Sopenharmony_ci         struct ir3_instruction *new_instr = NULL;
1247bf215546Sopenharmony_ci         struct ir3 *ir = block->shader;
1248bf215546Sopenharmony_ci
1249bf215546Sopenharmony_ci         /* nothing available to schedule.. if we are blocked on
1250bf215546Sopenharmony_ci          * address/predicate register conflict, then break the
1251bf215546Sopenharmony_ci          * deadlock by cloning the instruction that wrote that
1252bf215546Sopenharmony_ci          * reg:
1253bf215546Sopenharmony_ci          */
1254bf215546Sopenharmony_ci         if (notes.addr0_conflict) {
1255bf215546Sopenharmony_ci            new_instr =
1256bf215546Sopenharmony_ci               split_addr(ctx, &ctx->addr0, ir->a0_users, ir->a0_users_count);
1257bf215546Sopenharmony_ci         } else if (notes.addr1_conflict) {
1258bf215546Sopenharmony_ci            new_instr =
1259bf215546Sopenharmony_ci               split_addr(ctx, &ctx->addr1, ir->a1_users, ir->a1_users_count);
1260bf215546Sopenharmony_ci         } else if (notes.pred_conflict) {
1261bf215546Sopenharmony_ci            new_instr = split_pred(ctx);
1262bf215546Sopenharmony_ci         } else {
1263bf215546Sopenharmony_ci            d("unscheduled_list:");
1264bf215546Sopenharmony_ci            foreach_instr (instr, &ctx->unscheduled_list)
1265bf215546Sopenharmony_ci               di(instr, "unscheduled: ");
1266bf215546Sopenharmony_ci            assert(0);
1267bf215546Sopenharmony_ci            ctx->error = true;
1268bf215546Sopenharmony_ci            return;
1269bf215546Sopenharmony_ci         }
1270bf215546Sopenharmony_ci
1271bf215546Sopenharmony_ci         if (new_instr) {
1272bf215546Sopenharmony_ci            list_delinit(&new_instr->node);
1273bf215546Sopenharmony_ci            list_addtail(&new_instr->node, &ctx->unscheduled_list);
1274bf215546Sopenharmony_ci         }
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci         /* If we produced a new instruction, do not schedule it next to
1277bf215546Sopenharmony_ci          * guarantee progress.
1278bf215546Sopenharmony_ci          */
1279bf215546Sopenharmony_ci         ctx->split = new_instr;
1280bf215546Sopenharmony_ci      }
1281bf215546Sopenharmony_ci   }
1282bf215546Sopenharmony_ci
1283bf215546Sopenharmony_ci   sched_dag_destroy(ctx);
1284bf215546Sopenharmony_ci}
1285bf215546Sopenharmony_ci
1286bf215546Sopenharmony_ciint
1287bf215546Sopenharmony_ciir3_sched(struct ir3 *ir)
1288bf215546Sopenharmony_ci{
1289bf215546Sopenharmony_ci   struct ir3_sched_ctx *ctx = rzalloc(NULL, struct ir3_sched_ctx);
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
1292bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
1293bf215546Sopenharmony_ci         instr->data = NULL;
1294bf215546Sopenharmony_ci      }
1295bf215546Sopenharmony_ci   }
1296bf215546Sopenharmony_ci
1297bf215546Sopenharmony_ci   ir3_count_instructions(ir);
1298bf215546Sopenharmony_ci   ir3_clear_mark(ir);
1299bf215546Sopenharmony_ci   ir3_find_ssa_uses(ir, ctx, false);
1300bf215546Sopenharmony_ci
1301bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
1302bf215546Sopenharmony_ci      sched_block(ctx, block);
1303bf215546Sopenharmony_ci   }
1304bf215546Sopenharmony_ci
1305bf215546Sopenharmony_ci   int ret = ctx->error ? -1 : 0;
1306bf215546Sopenharmony_ci
1307bf215546Sopenharmony_ci   ralloc_free(ctx);
1308bf215546Sopenharmony_ci
1309bf215546Sopenharmony_ci   return ret;
1310bf215546Sopenharmony_ci}
1311bf215546Sopenharmony_ci
1312bf215546Sopenharmony_cistatic unsigned
1313bf215546Sopenharmony_ciget_array_id(struct ir3_instruction *instr)
1314bf215546Sopenharmony_ci{
1315bf215546Sopenharmony_ci   /* The expectation is that there is only a single array
1316bf215546Sopenharmony_ci    * src or dst, ir3_cp should enforce this.
1317bf215546Sopenharmony_ci    */
1318bf215546Sopenharmony_ci
1319bf215546Sopenharmony_ci   foreach_dst (dst, instr)
1320bf215546Sopenharmony_ci      if (dst->flags & IR3_REG_ARRAY)
1321bf215546Sopenharmony_ci         return dst->array.id;
1322bf215546Sopenharmony_ci   foreach_src (src, instr)
1323bf215546Sopenharmony_ci      if (src->flags & IR3_REG_ARRAY)
1324bf215546Sopenharmony_ci         return src->array.id;
1325bf215546Sopenharmony_ci
1326bf215546Sopenharmony_ci   unreachable("this was unexpected");
1327bf215546Sopenharmony_ci}
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci/* does instruction 'prior' need to be scheduled before 'instr'? */
1330bf215546Sopenharmony_cistatic bool
1331bf215546Sopenharmony_cidepends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
1332bf215546Sopenharmony_ci{
1333bf215546Sopenharmony_ci   /* TODO for dependencies that are related to a specific object, ie
1334bf215546Sopenharmony_ci    * a specific SSBO/image/array, we could relax this constraint to
1335bf215546Sopenharmony_ci    * make accesses to unrelated objects not depend on each other (at
1336bf215546Sopenharmony_ci    * least as long as not declared coherent)
1337bf215546Sopenharmony_ci    */
1338bf215546Sopenharmony_ci   if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) &&
1339bf215546Sopenharmony_ci        prior->barrier_class) ||
1340bf215546Sopenharmony_ci       ((prior->barrier_class & IR3_BARRIER_EVERYTHING) &&
1341bf215546Sopenharmony_ci        instr->barrier_class))
1342bf215546Sopenharmony_ci      return true;
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci   if (instr->barrier_class & prior->barrier_conflict) {
1345bf215546Sopenharmony_ci      if (!(instr->barrier_class &
1346bf215546Sopenharmony_ci            ~(IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W))) {
1347bf215546Sopenharmony_ci         /* if only array barrier, then we can further limit false-deps
1348bf215546Sopenharmony_ci          * by considering the array-id, ie reads/writes to different
1349bf215546Sopenharmony_ci          * arrays do not depend on each other (no aliasing)
1350bf215546Sopenharmony_ci          */
1351bf215546Sopenharmony_ci         if (get_array_id(instr) != get_array_id(prior)) {
1352bf215546Sopenharmony_ci            return false;
1353bf215546Sopenharmony_ci         }
1354bf215546Sopenharmony_ci      }
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci      return true;
1357bf215546Sopenharmony_ci   }
1358bf215546Sopenharmony_ci
1359bf215546Sopenharmony_ci   return false;
1360bf215546Sopenharmony_ci}
1361bf215546Sopenharmony_ci
1362bf215546Sopenharmony_cistatic void
1363bf215546Sopenharmony_ciadd_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
1364bf215546Sopenharmony_ci{
1365bf215546Sopenharmony_ci   struct list_head *prev = instr->node.prev;
1366bf215546Sopenharmony_ci   struct list_head *next = instr->node.next;
1367bf215546Sopenharmony_ci
1368bf215546Sopenharmony_ci   /* add dependencies on previous instructions that must be scheduled
1369bf215546Sopenharmony_ci    * prior to the current instruction
1370bf215546Sopenharmony_ci    */
1371bf215546Sopenharmony_ci   while (prev != &block->instr_list) {
1372bf215546Sopenharmony_ci      struct ir3_instruction *pi =
1373bf215546Sopenharmony_ci         list_entry(prev, struct ir3_instruction, node);
1374bf215546Sopenharmony_ci
1375bf215546Sopenharmony_ci      prev = prev->prev;
1376bf215546Sopenharmony_ci
1377bf215546Sopenharmony_ci      if (is_meta(pi))
1378bf215546Sopenharmony_ci         continue;
1379bf215546Sopenharmony_ci
1380bf215546Sopenharmony_ci      if (instr->barrier_class == pi->barrier_class) {
1381bf215546Sopenharmony_ci         ir3_instr_add_dep(instr, pi);
1382bf215546Sopenharmony_ci         break;
1383bf215546Sopenharmony_ci      }
1384bf215546Sopenharmony_ci
1385bf215546Sopenharmony_ci      if (depends_on(instr, pi))
1386bf215546Sopenharmony_ci         ir3_instr_add_dep(instr, pi);
1387bf215546Sopenharmony_ci   }
1388bf215546Sopenharmony_ci
1389bf215546Sopenharmony_ci   /* add dependencies on this instruction to following instructions
1390bf215546Sopenharmony_ci    * that must be scheduled after the current instruction:
1391bf215546Sopenharmony_ci    */
1392bf215546Sopenharmony_ci   while (next != &block->instr_list) {
1393bf215546Sopenharmony_ci      struct ir3_instruction *ni =
1394bf215546Sopenharmony_ci         list_entry(next, struct ir3_instruction, node);
1395bf215546Sopenharmony_ci
1396bf215546Sopenharmony_ci      next = next->next;
1397bf215546Sopenharmony_ci
1398bf215546Sopenharmony_ci      if (is_meta(ni))
1399bf215546Sopenharmony_ci         continue;
1400bf215546Sopenharmony_ci
1401bf215546Sopenharmony_ci      if (instr->barrier_class == ni->barrier_class) {
1402bf215546Sopenharmony_ci         ir3_instr_add_dep(ni, instr);
1403bf215546Sopenharmony_ci         break;
1404bf215546Sopenharmony_ci      }
1405bf215546Sopenharmony_ci
1406bf215546Sopenharmony_ci      if (depends_on(ni, instr))
1407bf215546Sopenharmony_ci         ir3_instr_add_dep(ni, instr);
1408bf215546Sopenharmony_ci   }
1409bf215546Sopenharmony_ci}
1410bf215546Sopenharmony_ci
1411bf215546Sopenharmony_ci/* before scheduling a block, we need to add any necessary false-dependencies
1412bf215546Sopenharmony_ci * to ensure that:
1413bf215546Sopenharmony_ci *
1414bf215546Sopenharmony_ci *  (1) barriers are scheduled in the right order wrt instructions related
1415bf215546Sopenharmony_ci *      to the barrier
1416bf215546Sopenharmony_ci *
1417bf215546Sopenharmony_ci *  (2) reads that come before a write actually get scheduled before the
1418bf215546Sopenharmony_ci *      write
1419bf215546Sopenharmony_ci */
1420bf215546Sopenharmony_cibool
1421bf215546Sopenharmony_ciir3_sched_add_deps(struct ir3 *ir)
1422bf215546Sopenharmony_ci{
1423bf215546Sopenharmony_ci   bool progress = false;
1424bf215546Sopenharmony_ci
1425bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
1426bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
1427bf215546Sopenharmony_ci         if (instr->barrier_class) {
1428bf215546Sopenharmony_ci            add_barrier_deps(block, instr);
1429bf215546Sopenharmony_ci            progress = true;
1430bf215546Sopenharmony_ci         }
1431bf215546Sopenharmony_ci      }
1432bf215546Sopenharmony_ci   }
1433bf215546Sopenharmony_ci
1434bf215546Sopenharmony_ci   return progress;
1435bf215546Sopenharmony_ci}
1436