1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Broadcom
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir_schedule.h"
25bf215546Sopenharmony_ci#include "util/dag.h"
26bf215546Sopenharmony_ci#include "util/u_dynarray.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/** @file
29bf215546Sopenharmony_ci *
30bf215546Sopenharmony_ci * Implements basic-block-level prepass instruction scheduling in NIR to
31bf215546Sopenharmony_ci * manage register pressure.
32bf215546Sopenharmony_ci *
33bf215546Sopenharmony_ci * This is based on the Goodman/Hsu paper (1988, cached copy at
34bf215546Sopenharmony_ci * https://people.freedesktop.org/~anholt/scheduling-goodman-hsu.pdf).  We
35bf215546Sopenharmony_ci * make up the DDG for NIR (which can be mostly done using the NIR def/use
36bf215546Sopenharmony_ci * chains for SSA instructions, plus some edges for ordering register writes
37bf215546Sopenharmony_ci * vs reads, and some more for ordering intrinsics).  Then we pick heads off
38bf215546Sopenharmony_ci * of the DDG using their heuristic to emit the NIR instructions back into the
39bf215546Sopenharmony_ci * block in their new order.
40bf215546Sopenharmony_ci *
41bf215546Sopenharmony_ci * The hard case for prepass scheduling on GPUs seems to always be consuming
42bf215546Sopenharmony_ci * texture/ubo results.  The register pressure heuristic doesn't want to pick
43bf215546Sopenharmony_ci * an instr that starts consuming texture results because it usually won't be
44bf215546Sopenharmony_ci * the only usage, so that instruction will increase pressure.
45bf215546Sopenharmony_ci *
46bf215546Sopenharmony_ci * If you try to force consumption of tex results always, then in a case where
47bf215546Sopenharmony_ci * single sample is used for many outputs, you'll end up picking every other
48bf215546Sopenharmony_ci * user and expanding register pressure.  The partially_evaluated_path flag
49bf215546Sopenharmony_ci * helps tremendously, in that if you happen for whatever reason to pick a
50bf215546Sopenharmony_ci * texture sample's output, then you'll try to finish off that sample.  Future
51bf215546Sopenharmony_ci * work may include doing some local search before locking in a choice, to try
52bf215546Sopenharmony_ci * to more reliably find the case where just a few choices going against the
53bf215546Sopenharmony_ci * heuristic can manage to free the whole vector.
54bf215546Sopenharmony_ci */
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_cistatic bool debug;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci/**
59bf215546Sopenharmony_ci * Represents a node in the DDG for a NIR instruction.
60bf215546Sopenharmony_ci */
61bf215546Sopenharmony_citypedef struct {
62bf215546Sopenharmony_ci   struct dag_node dag; /* must be first for our u_dynarray_foreach */
63bf215546Sopenharmony_ci   nir_instr *instr;
64bf215546Sopenharmony_ci   bool partially_evaluated_path;
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   /* Approximate estimate of the delay between starting this instruction and
67bf215546Sopenharmony_ci    * its results being available.
68bf215546Sopenharmony_ci    *
69bf215546Sopenharmony_ci    * Accuracy is not too important, given that we're prepass scheduling here
70bf215546Sopenharmony_ci    * and just trying to reduce excess dependencies introduced by a register
71bf215546Sopenharmony_ci    * allocator by stretching out the live intervals of expensive
72bf215546Sopenharmony_ci    * instructions.
73bf215546Sopenharmony_ci    */
74bf215546Sopenharmony_ci   uint32_t delay;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   /* Cost of the maximum-delay path from this node to the leaves. */
77bf215546Sopenharmony_ci   uint32_t max_delay;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   /* scoreboard->time value when this instruction can be scheduled without
80bf215546Sopenharmony_ci    * any stalls expected.
81bf215546Sopenharmony_ci    */
82bf215546Sopenharmony_ci   uint32_t ready_time;
83bf215546Sopenharmony_ci} nir_schedule_node;
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_citypedef struct {
86bf215546Sopenharmony_ci   struct dag *dag;
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   nir_shader *shader;
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   /* Mapping from nir_register * or nir_ssa_def * to a struct set of
91bf215546Sopenharmony_ci    * instructions remaining to be scheduled using the register.
92bf215546Sopenharmony_ci    */
93bf215546Sopenharmony_ci   struct hash_table *remaining_uses;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   /* Map from nir_instr to nir_schedule_node * */
96bf215546Sopenharmony_ci   struct hash_table *instr_map;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   /* Set of nir_register * or nir_ssa_def * that have had any instruction
99bf215546Sopenharmony_ci    * scheduled on them.
100bf215546Sopenharmony_ci    */
101bf215546Sopenharmony_ci   struct set *live_values;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   /* An abstract approximation of the number of nir_scheduler_node->delay
104bf215546Sopenharmony_ci    * units since the start of the shader.
105bf215546Sopenharmony_ci    */
106bf215546Sopenharmony_ci   uint32_t time;
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   /* Number of channels currently used by the NIR instructions that have been
109bf215546Sopenharmony_ci    * scheduled.
110bf215546Sopenharmony_ci    */
111bf215546Sopenharmony_ci   int pressure;
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   /* Options specified by the backend */
114bf215546Sopenharmony_ci   const nir_schedule_options *options;
115bf215546Sopenharmony_ci} nir_schedule_scoreboard;
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci/* When walking the instructions in reverse, we use this flag to swap
118bf215546Sopenharmony_ci * before/after in add_dep().
119bf215546Sopenharmony_ci */
120bf215546Sopenharmony_cienum direction { F, R };
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_cistruct nir_schedule_class_dep {
123bf215546Sopenharmony_ci   int klass;
124bf215546Sopenharmony_ci   nir_schedule_node *node;
125bf215546Sopenharmony_ci   struct nir_schedule_class_dep *next;
126bf215546Sopenharmony_ci};
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_citypedef struct {
129bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   /* Map from nir_register to nir_schedule_node * */
132bf215546Sopenharmony_ci   struct hash_table *reg_map;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   /* Scheduler nodes for last instruction involved in some class of dependency.
135bf215546Sopenharmony_ci    */
136bf215546Sopenharmony_ci   nir_schedule_node *load_input;
137bf215546Sopenharmony_ci   nir_schedule_node *store_shared;
138bf215546Sopenharmony_ci   nir_schedule_node *unknown_intrinsic;
139bf215546Sopenharmony_ci   nir_schedule_node *discard;
140bf215546Sopenharmony_ci   nir_schedule_node *jump;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   struct nir_schedule_class_dep *class_deps;
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   enum direction dir;
145bf215546Sopenharmony_ci} nir_deps_state;
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_cistatic void *
148bf215546Sopenharmony_ci_mesa_hash_table_search_data(struct hash_table *ht, void *key)
149bf215546Sopenharmony_ci{
150bf215546Sopenharmony_ci   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
151bf215546Sopenharmony_ci   if (!entry)
152bf215546Sopenharmony_ci      return NULL;
153bf215546Sopenharmony_ci   return entry->data;
154bf215546Sopenharmony_ci}
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_cistatic nir_schedule_node *
157bf215546Sopenharmony_cinir_schedule_get_node(struct hash_table *instr_map, nir_instr *instr)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   return _mesa_hash_table_search_data(instr_map, instr);
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic struct set *
163bf215546Sopenharmony_cinir_schedule_scoreboard_get_src(nir_schedule_scoreboard *scoreboard, nir_src *src)
164bf215546Sopenharmony_ci{
165bf215546Sopenharmony_ci   if (src->is_ssa) {
166bf215546Sopenharmony_ci      return _mesa_hash_table_search_data(scoreboard->remaining_uses, src->ssa);
167bf215546Sopenharmony_ci   } else {
168bf215546Sopenharmony_ci      return _mesa_hash_table_search_data(scoreboard->remaining_uses,
169bf215546Sopenharmony_ci                                          src->reg.reg);
170bf215546Sopenharmony_ci   }
171bf215546Sopenharmony_ci}
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_cistatic int
174bf215546Sopenharmony_cinir_schedule_def_pressure(nir_ssa_def *def)
175bf215546Sopenharmony_ci{
176bf215546Sopenharmony_ci   return def->num_components;
177bf215546Sopenharmony_ci}
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_cistatic int
180bf215546Sopenharmony_cinir_schedule_src_pressure(nir_src *src)
181bf215546Sopenharmony_ci{
182bf215546Sopenharmony_ci   if (src->is_ssa)
183bf215546Sopenharmony_ci      return nir_schedule_def_pressure(src->ssa);
184bf215546Sopenharmony_ci   else
185bf215546Sopenharmony_ci      return src->reg.reg->num_components;
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cistatic int
189bf215546Sopenharmony_cinir_schedule_dest_pressure(nir_dest *dest)
190bf215546Sopenharmony_ci{
191bf215546Sopenharmony_ci   if (dest->is_ssa)
192bf215546Sopenharmony_ci      return nir_schedule_def_pressure(&dest->ssa);
193bf215546Sopenharmony_ci   else
194bf215546Sopenharmony_ci      return dest->reg.reg->num_components;
195bf215546Sopenharmony_ci}
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci/**
198bf215546Sopenharmony_ci * Adds a dependency such that @after must appear in the final program after
199bf215546Sopenharmony_ci * @before.
200bf215546Sopenharmony_ci *
201bf215546Sopenharmony_ci * We add @before as a child of @after, so that DAG heads are the outputs of
202bf215546Sopenharmony_ci * the program and we make our scheduling decisions bottom to top.
203bf215546Sopenharmony_ci */
204bf215546Sopenharmony_cistatic void
205bf215546Sopenharmony_ciadd_dep(nir_deps_state *state,
206bf215546Sopenharmony_ci        nir_schedule_node *before,
207bf215546Sopenharmony_ci        nir_schedule_node *after)
208bf215546Sopenharmony_ci{
209bf215546Sopenharmony_ci   if (!before || !after)
210bf215546Sopenharmony_ci      return;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   assert(before != after);
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   if (state->dir == F)
215bf215546Sopenharmony_ci      dag_add_edge(&before->dag, &after->dag, 0);
216bf215546Sopenharmony_ci   else
217bf215546Sopenharmony_ci      dag_add_edge(&after->dag, &before->dag, 0);
218bf215546Sopenharmony_ci}
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_cistatic void
222bf215546Sopenharmony_ciadd_read_dep(nir_deps_state *state,
223bf215546Sopenharmony_ci             nir_schedule_node *before,
224bf215546Sopenharmony_ci             nir_schedule_node *after)
225bf215546Sopenharmony_ci{
226bf215546Sopenharmony_ci   add_dep(state, before, after);
227bf215546Sopenharmony_ci}
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistatic void
230bf215546Sopenharmony_ciadd_write_dep(nir_deps_state *state,
231bf215546Sopenharmony_ci              nir_schedule_node **before,
232bf215546Sopenharmony_ci              nir_schedule_node *after)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   add_dep(state, *before, after);
235bf215546Sopenharmony_ci   *before = after;
236bf215546Sopenharmony_ci}
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_cistatic bool
239bf215546Sopenharmony_cinir_schedule_reg_src_deps(nir_src *src, void *in_state)
240bf215546Sopenharmony_ci{
241bf215546Sopenharmony_ci   nir_deps_state *state = in_state;
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   if (src->is_ssa)
244bf215546Sopenharmony_ci      return true;
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   struct hash_entry *entry = _mesa_hash_table_search(state->reg_map,
247bf215546Sopenharmony_ci                                                      src->reg.reg);
248bf215546Sopenharmony_ci   if (!entry)
249bf215546Sopenharmony_ci      return true;
250bf215546Sopenharmony_ci   nir_schedule_node *dst_n = entry->data;
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   nir_schedule_node *src_n =
253bf215546Sopenharmony_ci      nir_schedule_get_node(state->scoreboard->instr_map,
254bf215546Sopenharmony_ci                            src->parent_instr);
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   add_dep(state, dst_n, src_n);
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   return true;
259bf215546Sopenharmony_ci}
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_cistatic bool
262bf215546Sopenharmony_cinir_schedule_reg_dest_deps(nir_dest *dest, void *in_state)
263bf215546Sopenharmony_ci{
264bf215546Sopenharmony_ci   nir_deps_state *state = in_state;
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   if (dest->is_ssa)
267bf215546Sopenharmony_ci      return true;
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci   nir_schedule_node *dest_n =
270bf215546Sopenharmony_ci      nir_schedule_get_node(state->scoreboard->instr_map,
271bf215546Sopenharmony_ci                            dest->reg.parent_instr);
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   struct hash_entry *entry = _mesa_hash_table_search(state->reg_map,
274bf215546Sopenharmony_ci                                                      dest->reg.reg);
275bf215546Sopenharmony_ci   if (!entry) {
276bf215546Sopenharmony_ci      _mesa_hash_table_insert(state->reg_map, dest->reg.reg, dest_n);
277bf215546Sopenharmony_ci      return true;
278bf215546Sopenharmony_ci   }
279bf215546Sopenharmony_ci   nir_schedule_node **before = (nir_schedule_node **)&entry->data;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   add_write_dep(state, before, dest_n);
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci   return true;
284bf215546Sopenharmony_ci}
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_cistatic bool
287bf215546Sopenharmony_cinir_schedule_ssa_deps(nir_ssa_def *def, void *in_state)
288bf215546Sopenharmony_ci{
289bf215546Sopenharmony_ci   nir_deps_state *state = in_state;
290bf215546Sopenharmony_ci   struct hash_table *instr_map = state->scoreboard->instr_map;
291bf215546Sopenharmony_ci   nir_schedule_node *def_n = nir_schedule_get_node(instr_map, def->parent_instr);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   nir_foreach_use(src, def) {
294bf215546Sopenharmony_ci      nir_schedule_node *use_n = nir_schedule_get_node(instr_map,
295bf215546Sopenharmony_ci                                                       src->parent_instr);
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci      add_read_dep(state, def_n, use_n);
298bf215546Sopenharmony_ci   }
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   return true;
301bf215546Sopenharmony_ci}
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_cistatic struct nir_schedule_class_dep *
304bf215546Sopenharmony_cinir_schedule_get_class_dep(nir_deps_state *state,
305bf215546Sopenharmony_ci                           int klass)
306bf215546Sopenharmony_ci{
307bf215546Sopenharmony_ci   for (struct nir_schedule_class_dep *class_dep = state->class_deps;
308bf215546Sopenharmony_ci        class_dep != NULL;
309bf215546Sopenharmony_ci        class_dep = class_dep->next) {
310bf215546Sopenharmony_ci      if (class_dep->klass == klass)
311bf215546Sopenharmony_ci         return class_dep;
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   struct nir_schedule_class_dep *class_dep =
315bf215546Sopenharmony_ci      ralloc(state->reg_map, struct nir_schedule_class_dep);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci   class_dep->klass = klass;
318bf215546Sopenharmony_ci   class_dep->node = NULL;
319bf215546Sopenharmony_ci   class_dep->next = state->class_deps;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   state->class_deps = class_dep;
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci   return class_dep;
324bf215546Sopenharmony_ci}
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_cistatic void
327bf215546Sopenharmony_cinir_schedule_intrinsic_deps(nir_deps_state *state,
328bf215546Sopenharmony_ci                            nir_intrinsic_instr *instr)
329bf215546Sopenharmony_ci{
330bf215546Sopenharmony_ci   nir_schedule_node *n = nir_schedule_get_node(state->scoreboard->instr_map,
331bf215546Sopenharmony_ci                                                &instr->instr);
332bf215546Sopenharmony_ci   const nir_schedule_options *options = state->scoreboard->options;
333bf215546Sopenharmony_ci   nir_schedule_dependency dep;
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   if (options->intrinsic_cb &&
336bf215546Sopenharmony_ci       options->intrinsic_cb(instr, &dep, options->intrinsic_cb_data)) {
337bf215546Sopenharmony_ci      struct nir_schedule_class_dep *class_dep =
338bf215546Sopenharmony_ci         nir_schedule_get_class_dep(state, dep.klass);
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci      switch (dep.type) {
341bf215546Sopenharmony_ci      case NIR_SCHEDULE_READ_DEPENDENCY:
342bf215546Sopenharmony_ci         add_read_dep(state, class_dep->node, n);
343bf215546Sopenharmony_ci         break;
344bf215546Sopenharmony_ci      case NIR_SCHEDULE_WRITE_DEPENDENCY:
345bf215546Sopenharmony_ci         add_write_dep(state, &class_dep->node, n);
346bf215546Sopenharmony_ci         break;
347bf215546Sopenharmony_ci      }
348bf215546Sopenharmony_ci   }
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   switch (instr->intrinsic) {
351bf215546Sopenharmony_ci   case nir_intrinsic_load_uniform:
352bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
353bf215546Sopenharmony_ci   case nir_intrinsic_load_front_face:
354bf215546Sopenharmony_ci      break;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   case nir_intrinsic_discard:
357bf215546Sopenharmony_ci   case nir_intrinsic_discard_if:
358bf215546Sopenharmony_ci   case nir_intrinsic_demote:
359bf215546Sopenharmony_ci   case nir_intrinsic_demote_if:
360bf215546Sopenharmony_ci   case nir_intrinsic_terminate:
361bf215546Sopenharmony_ci   case nir_intrinsic_terminate_if:
362bf215546Sopenharmony_ci      /* We are adding two dependencies:
363bf215546Sopenharmony_ci       *
364bf215546Sopenharmony_ci       * * A individual one that we could use to add a read_dep while handling
365bf215546Sopenharmony_ci       *   nir_instr_type_tex
366bf215546Sopenharmony_ci       *
367bf215546Sopenharmony_ci       * * Include it on the unknown intrinsic set, as we want discard to be
368bf215546Sopenharmony_ci       *   serialized in in the same order relative to intervening stores or
369bf215546Sopenharmony_ci       *   atomic accesses to SSBOs and images
370bf215546Sopenharmony_ci       */
371bf215546Sopenharmony_ci      add_write_dep(state, &state->discard, n);
372bf215546Sopenharmony_ci      add_write_dep(state, &state->unknown_intrinsic, n);
373bf215546Sopenharmony_ci      break;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci   case nir_intrinsic_store_output:
376bf215546Sopenharmony_ci      /* For some hardware and stages, output stores affect the same shared
377bf215546Sopenharmony_ci       * memory as input loads.
378bf215546Sopenharmony_ci       */
379bf215546Sopenharmony_ci      if ((state->scoreboard->options->stages_with_shared_io_memory &
380bf215546Sopenharmony_ci           (1 << state->scoreboard->shader->info.stage)))
381bf215546Sopenharmony_ci         add_write_dep(state, &state->load_input, n);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci      /* Make sure that preceding discards stay before the store_output */
384bf215546Sopenharmony_ci      add_read_dep(state, state->discard, n);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci      break;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   case nir_intrinsic_load_input:
389bf215546Sopenharmony_ci   case nir_intrinsic_load_per_vertex_input:
390bf215546Sopenharmony_ci      add_read_dep(state, state->load_input, n);
391bf215546Sopenharmony_ci      break;
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci   case nir_intrinsic_load_shared:
394bf215546Sopenharmony_ci   case nir_intrinsic_load_shared2_amd:
395bf215546Sopenharmony_ci      /* Don't move load_shared beyond a following store_shared, as it could
396bf215546Sopenharmony_ci       * change their value
397bf215546Sopenharmony_ci       */
398bf215546Sopenharmony_ci      add_read_dep(state, state->store_shared, n);
399bf215546Sopenharmony_ci      break;
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   case nir_intrinsic_store_shared:
402bf215546Sopenharmony_ci   case nir_intrinsic_store_shared2_amd:
403bf215546Sopenharmony_ci      add_write_dep(state, &state->store_shared, n);
404bf215546Sopenharmony_ci      break;
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   case nir_intrinsic_control_barrier:
407bf215546Sopenharmony_ci   case nir_intrinsic_memory_barrier_shared:
408bf215546Sopenharmony_ci   case nir_intrinsic_group_memory_barrier:
409bf215546Sopenharmony_ci   /* A generic memory barrier can be emitted when multiple synchronization
410bf215546Sopenharmony_ci    * semantics are involved, including shared memory.
411bf215546Sopenharmony_ci    */
412bf215546Sopenharmony_ci   case nir_intrinsic_memory_barrier:
413bf215546Sopenharmony_ci      add_write_dep(state, &state->store_shared, n);
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci      /* Serialize against ssbos/atomics/etc. */
416bf215546Sopenharmony_ci      add_write_dep(state, &state->unknown_intrinsic, n);
417bf215546Sopenharmony_ci      break;
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci   case nir_intrinsic_scoped_barrier: {
420bf215546Sopenharmony_ci      const nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci      if (modes & nir_var_mem_shared)
423bf215546Sopenharmony_ci         add_write_dep(state, &state->store_shared, n);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci      /* Serialize against other categories. */
426bf215546Sopenharmony_ci      add_write_dep(state, &state->unknown_intrinsic, n);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci      break;
429bf215546Sopenharmony_ci   }
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci   default:
432bf215546Sopenharmony_ci      /* Attempt to handle other intrinsics that we haven't individually
433bf215546Sopenharmony_ci       * categorized by serializing them in the same order relative to each
434bf215546Sopenharmony_ci       * other.
435bf215546Sopenharmony_ci       */
436bf215546Sopenharmony_ci      add_write_dep(state, &state->unknown_intrinsic, n);
437bf215546Sopenharmony_ci      break;
438bf215546Sopenharmony_ci   }
439bf215546Sopenharmony_ci}
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci/**
442bf215546Sopenharmony_ci * Common code for dependencies that need to be tracked both forward and
443bf215546Sopenharmony_ci * backward.
444bf215546Sopenharmony_ci *
445bf215546Sopenharmony_ci * This is for things like "all reads of r4 have to happen between the r4
446bf215546Sopenharmony_ci * writes that surround them".
447bf215546Sopenharmony_ci */
448bf215546Sopenharmony_cistatic void
449bf215546Sopenharmony_cinir_schedule_calculate_deps(nir_deps_state *state, nir_schedule_node *n)
450bf215546Sopenharmony_ci{
451bf215546Sopenharmony_ci   nir_instr *instr = n->instr;
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci   /* For NIR SSA defs, we only need to do a single pass of making the uses
454bf215546Sopenharmony_ci    * depend on the def.
455bf215546Sopenharmony_ci    */
456bf215546Sopenharmony_ci   if (state->dir == F)
457bf215546Sopenharmony_ci      nir_foreach_ssa_def(instr, nir_schedule_ssa_deps, state);
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci   /* For NIR regs, track the last writer in the scheduler state so that we
460bf215546Sopenharmony_ci    * can keep the writes in order and let reads get reordered only between
461bf215546Sopenharmony_ci    * each write.
462bf215546Sopenharmony_ci    */
463bf215546Sopenharmony_ci   nir_foreach_src(instr, nir_schedule_reg_src_deps, state);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   nir_foreach_dest(instr, nir_schedule_reg_dest_deps, state);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   /* Make sure any other instructions keep their positions relative to
468bf215546Sopenharmony_ci    * jumps.
469bf215546Sopenharmony_ci    */
470bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_jump)
471bf215546Sopenharmony_ci      add_read_dep(state, state->jump, n);
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   switch (instr->type) {
474bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
475bf215546Sopenharmony_ci   case nir_instr_type_load_const:
476bf215546Sopenharmony_ci   case nir_instr_type_alu:
477bf215546Sopenharmony_ci   case nir_instr_type_deref:
478bf215546Sopenharmony_ci      break;
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci   case nir_instr_type_tex:
481bf215546Sopenharmony_ci      /* Don't move texture ops before a discard, as that could increase
482bf215546Sopenharmony_ci       * memory bandwidth for reading the discarded samples.
483bf215546Sopenharmony_ci       */
484bf215546Sopenharmony_ci      add_read_dep(state, state->discard, n);
485bf215546Sopenharmony_ci      break;
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   case nir_instr_type_jump:
488bf215546Sopenharmony_ci      add_write_dep(state, &state->jump, n);
489bf215546Sopenharmony_ci      break;
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci   case nir_instr_type_call:
492bf215546Sopenharmony_ci      unreachable("Calls should have been lowered");
493bf215546Sopenharmony_ci      break;
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   case nir_instr_type_parallel_copy:
496bf215546Sopenharmony_ci      unreachable("Parallel copies should have been lowered");
497bf215546Sopenharmony_ci      break;
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci   case nir_instr_type_phi:
500bf215546Sopenharmony_ci      unreachable("nir_schedule() should be called after lowering from SSA");
501bf215546Sopenharmony_ci      break;
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
504bf215546Sopenharmony_ci      nir_schedule_intrinsic_deps(state, nir_instr_as_intrinsic(instr));
505bf215546Sopenharmony_ci      break;
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci}
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_cistatic void
510bf215546Sopenharmony_cicalculate_forward_deps(nir_schedule_scoreboard *scoreboard, nir_block *block)
511bf215546Sopenharmony_ci{
512bf215546Sopenharmony_ci   nir_deps_state state = {
513bf215546Sopenharmony_ci      .scoreboard = scoreboard,
514bf215546Sopenharmony_ci      .dir = F,
515bf215546Sopenharmony_ci      .reg_map = _mesa_pointer_hash_table_create(NULL),
516bf215546Sopenharmony_ci   };
517bf215546Sopenharmony_ci
518bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
519bf215546Sopenharmony_ci      nir_schedule_node *node = nir_schedule_get_node(scoreboard->instr_map,
520bf215546Sopenharmony_ci                                                      instr);
521bf215546Sopenharmony_ci      nir_schedule_calculate_deps(&state, node);
522bf215546Sopenharmony_ci   }
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   ralloc_free(state.reg_map);
525bf215546Sopenharmony_ci}
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_cistatic void
528bf215546Sopenharmony_cicalculate_reverse_deps(nir_schedule_scoreboard *scoreboard, nir_block *block)
529bf215546Sopenharmony_ci{
530bf215546Sopenharmony_ci   nir_deps_state state = {
531bf215546Sopenharmony_ci      .scoreboard = scoreboard,
532bf215546Sopenharmony_ci      .dir = R,
533bf215546Sopenharmony_ci      .reg_map = _mesa_pointer_hash_table_create(NULL),
534bf215546Sopenharmony_ci   };
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci   nir_foreach_instr_reverse(instr, block) {
537bf215546Sopenharmony_ci      nir_schedule_node *node = nir_schedule_get_node(scoreboard->instr_map,
538bf215546Sopenharmony_ci                                                      instr);
539bf215546Sopenharmony_ci      nir_schedule_calculate_deps(&state, node);
540bf215546Sopenharmony_ci   }
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   ralloc_free(state.reg_map);
543bf215546Sopenharmony_ci}
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_citypedef struct {
546bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard;
547bf215546Sopenharmony_ci   int regs_freed;
548bf215546Sopenharmony_ci} nir_schedule_regs_freed_state;
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_cistatic bool
551bf215546Sopenharmony_cinir_schedule_regs_freed_src_cb(nir_src *src, void *in_state)
552bf215546Sopenharmony_ci{
553bf215546Sopenharmony_ci   nir_schedule_regs_freed_state *state = in_state;
554bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = state->scoreboard;
555bf215546Sopenharmony_ci   struct set *remaining_uses = nir_schedule_scoreboard_get_src(scoreboard, src);
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   if (remaining_uses->entries == 1 &&
558bf215546Sopenharmony_ci       _mesa_set_search(remaining_uses, src->parent_instr)) {
559bf215546Sopenharmony_ci      state->regs_freed += nir_schedule_src_pressure(src);
560bf215546Sopenharmony_ci   }
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   return true;
563bf215546Sopenharmony_ci}
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_cistatic bool
566bf215546Sopenharmony_cinir_schedule_regs_freed_def_cb(nir_ssa_def *def, void *in_state)
567bf215546Sopenharmony_ci{
568bf215546Sopenharmony_ci   nir_schedule_regs_freed_state *state = in_state;
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci   state->regs_freed -= nir_schedule_def_pressure(def);
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci   return true;
573bf215546Sopenharmony_ci}
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_cistatic bool
576bf215546Sopenharmony_cinir_schedule_regs_freed_dest_cb(nir_dest *dest, void *in_state)
577bf215546Sopenharmony_ci{
578bf215546Sopenharmony_ci   nir_schedule_regs_freed_state *state = in_state;
579bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = state->scoreboard;
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_ci   if (dest->is_ssa)
582bf215546Sopenharmony_ci      return true;
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci   nir_register *reg = dest->reg.reg;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   /* Only the first def of a reg counts against register pressure. */
587bf215546Sopenharmony_ci   if (!_mesa_set_search(scoreboard->live_values, reg))
588bf215546Sopenharmony_ci      state->regs_freed -= nir_schedule_dest_pressure(dest);
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   return true;
591bf215546Sopenharmony_ci}
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_cistatic int
594bf215546Sopenharmony_cinir_schedule_regs_freed(nir_schedule_scoreboard *scoreboard, nir_schedule_node *n)
595bf215546Sopenharmony_ci{
596bf215546Sopenharmony_ci   nir_schedule_regs_freed_state state = {
597bf215546Sopenharmony_ci      .scoreboard = scoreboard,
598bf215546Sopenharmony_ci   };
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   nir_foreach_src(n->instr, nir_schedule_regs_freed_src_cb, &state);
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci   nir_foreach_ssa_def(n->instr, nir_schedule_regs_freed_def_cb, &state);
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   nir_foreach_dest(n->instr, nir_schedule_regs_freed_dest_cb, &state);
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci   return state.regs_freed;
607bf215546Sopenharmony_ci}
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci/**
610bf215546Sopenharmony_ci * Chooses an instruction that will minimise the register pressure as much as
611bf215546Sopenharmony_ci * possible. This should only be used as a fallback when the regular scheduling
612bf215546Sopenharmony_ci * generates a shader whose register allocation fails.
613bf215546Sopenharmony_ci */
614bf215546Sopenharmony_cistatic nir_schedule_node *
615bf215546Sopenharmony_cinir_schedule_choose_instruction_fallback(nir_schedule_scoreboard *scoreboard)
616bf215546Sopenharmony_ci{
617bf215546Sopenharmony_ci   nir_schedule_node *chosen = NULL;
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   /* Find the leader in the ready (shouldn't-stall) set with the mininum
620bf215546Sopenharmony_ci    * cost.
621bf215546Sopenharmony_ci    */
622bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
623bf215546Sopenharmony_ci      if (scoreboard->time < n->ready_time)
624bf215546Sopenharmony_ci         continue;
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay > n->max_delay)
627bf215546Sopenharmony_ci         chosen = n;
628bf215546Sopenharmony_ci   }
629bf215546Sopenharmony_ci   if (chosen) {
630bf215546Sopenharmony_ci      if (debug) {
631bf215546Sopenharmony_ci         fprintf(stderr, "chose (ready fallback):          ");
632bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
633bf215546Sopenharmony_ci         fprintf(stderr, "\n");
634bf215546Sopenharmony_ci      }
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci      return chosen;
637bf215546Sopenharmony_ci   }
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   /* Otherwise, choose the leader with the minimum cost. */
640bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
641bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay > n->max_delay)
642bf215546Sopenharmony_ci         chosen = n;
643bf215546Sopenharmony_ci   }
644bf215546Sopenharmony_ci   if (debug) {
645bf215546Sopenharmony_ci      fprintf(stderr, "chose (leader fallback):         ");
646bf215546Sopenharmony_ci      nir_print_instr(chosen->instr, stderr);
647bf215546Sopenharmony_ci      fprintf(stderr, "\n");
648bf215546Sopenharmony_ci   }
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   return chosen;
651bf215546Sopenharmony_ci}
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci/**
654bf215546Sopenharmony_ci * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSP (Code
655bf215546Sopenharmony_ci * Scheduling for Parallelism) heuristic.
656bf215546Sopenharmony_ci *
657bf215546Sopenharmony_ci * Picks an instruction on the critical that's ready to execute without
658bf215546Sopenharmony_ci * stalls, if possible, otherwise picks the instruction on the critical path.
659bf215546Sopenharmony_ci */
660bf215546Sopenharmony_cistatic nir_schedule_node *
661bf215546Sopenharmony_cinir_schedule_choose_instruction_csp(nir_schedule_scoreboard *scoreboard)
662bf215546Sopenharmony_ci{
663bf215546Sopenharmony_ci   nir_schedule_node *chosen = NULL;
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   /* Find the leader in the ready (shouldn't-stall) set with the maximum
666bf215546Sopenharmony_ci    * cost.
667bf215546Sopenharmony_ci    */
668bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
669bf215546Sopenharmony_ci      if (scoreboard->time < n->ready_time)
670bf215546Sopenharmony_ci         continue;
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay < n->max_delay)
673bf215546Sopenharmony_ci         chosen = n;
674bf215546Sopenharmony_ci   }
675bf215546Sopenharmony_ci   if (chosen) {
676bf215546Sopenharmony_ci      if (debug) {
677bf215546Sopenharmony_ci         fprintf(stderr, "chose (ready):          ");
678bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
679bf215546Sopenharmony_ci         fprintf(stderr, "\n");
680bf215546Sopenharmony_ci      }
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci      return chosen;
683bf215546Sopenharmony_ci   }
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci   /* Otherwise, choose the leader with the maximum cost. */
686bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
687bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay < n->max_delay)
688bf215546Sopenharmony_ci         chosen = n;
689bf215546Sopenharmony_ci   }
690bf215546Sopenharmony_ci   if (debug) {
691bf215546Sopenharmony_ci      fprintf(stderr, "chose (leader):         ");
692bf215546Sopenharmony_ci      nir_print_instr(chosen->instr, stderr);
693bf215546Sopenharmony_ci      fprintf(stderr, "\n");
694bf215546Sopenharmony_ci   }
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   return chosen;
697bf215546Sopenharmony_ci}
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci/**
700bf215546Sopenharmony_ci * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code
701bf215546Sopenharmony_ci * Scheduling for Register pressure) heuristic.
702bf215546Sopenharmony_ci */
703bf215546Sopenharmony_cistatic nir_schedule_node *
704bf215546Sopenharmony_cinir_schedule_choose_instruction_csr(nir_schedule_scoreboard *scoreboard)
705bf215546Sopenharmony_ci{
706bf215546Sopenharmony_ci   nir_schedule_node *chosen = NULL;
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   /* Find a ready inst with regs freed and pick the one with max cost. */
709bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
710bf215546Sopenharmony_ci      if (n->ready_time > scoreboard->time)
711bf215546Sopenharmony_ci         continue;
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci      int regs_freed = nir_schedule_regs_freed(scoreboard, n);
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci      if (regs_freed > 0 && (!chosen || chosen->max_delay < n->max_delay)) {
716bf215546Sopenharmony_ci         chosen = n;
717bf215546Sopenharmony_ci      }
718bf215546Sopenharmony_ci   }
719bf215546Sopenharmony_ci   if (chosen) {
720bf215546Sopenharmony_ci      if (debug) {
721bf215546Sopenharmony_ci         fprintf(stderr, "chose (freed+ready):    ");
722bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
723bf215546Sopenharmony_ci         fprintf(stderr, "\n");
724bf215546Sopenharmony_ci      }
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci      return chosen;
727bf215546Sopenharmony_ci   }
728bf215546Sopenharmony_ci
729bf215546Sopenharmony_ci   /* Find a leader with regs freed and pick the one with max cost. */
730bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
731bf215546Sopenharmony_ci      int regs_freed = nir_schedule_regs_freed(scoreboard, n);
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci      if (regs_freed > 0 && (!chosen || chosen->max_delay < n->max_delay)) {
734bf215546Sopenharmony_ci         chosen = n;
735bf215546Sopenharmony_ci      }
736bf215546Sopenharmony_ci   }
737bf215546Sopenharmony_ci   if (chosen) {
738bf215546Sopenharmony_ci      if (debug) {
739bf215546Sopenharmony_ci         fprintf(stderr, "chose (regs freed):     ");
740bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
741bf215546Sopenharmony_ci         fprintf(stderr, "\n");
742bf215546Sopenharmony_ci      }
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci      return chosen;
745bf215546Sopenharmony_ci   }
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci   /* Find a partially evaluated path and try to finish it off */
748bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
749bf215546Sopenharmony_ci      if (n->partially_evaluated_path &&
750bf215546Sopenharmony_ci          (!chosen || chosen->max_delay < n->max_delay)) {
751bf215546Sopenharmony_ci         chosen = n;
752bf215546Sopenharmony_ci      }
753bf215546Sopenharmony_ci   }
754bf215546Sopenharmony_ci   if (chosen) {
755bf215546Sopenharmony_ci      if (debug) {
756bf215546Sopenharmony_ci         fprintf(stderr, "chose (partial path):   ");
757bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
758bf215546Sopenharmony_ci         fprintf(stderr, "\n");
759bf215546Sopenharmony_ci      }
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci      return chosen;
762bf215546Sopenharmony_ci   }
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci   /* Contra the paper, pick a leader with no effect on used regs.  This may
765bf215546Sopenharmony_ci    * open up new opportunities, as otherwise a single-operand instr consuming
766bf215546Sopenharmony_ci    * a value will tend to block finding freeing that value.  This had a
767bf215546Sopenharmony_ci    * massive effect on reducing spilling on V3D.
768bf215546Sopenharmony_ci    *
769bf215546Sopenharmony_ci    * XXX: Should this prioritize ready?
770bf215546Sopenharmony_ci    */
771bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
772bf215546Sopenharmony_ci      if (nir_schedule_regs_freed(scoreboard, n) != 0)
773bf215546Sopenharmony_ci         continue;
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay < n->max_delay)
776bf215546Sopenharmony_ci         chosen = n;
777bf215546Sopenharmony_ci   }
778bf215546Sopenharmony_ci   if (chosen) {
779bf215546Sopenharmony_ci      if (debug) {
780bf215546Sopenharmony_ci         fprintf(stderr, "chose (regs no-op):     ");
781bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
782bf215546Sopenharmony_ci         fprintf(stderr, "\n");
783bf215546Sopenharmony_ci      }
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci      return chosen;
786bf215546Sopenharmony_ci   }
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci   /* Pick the max delay of the remaining ready set. */
789bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
790bf215546Sopenharmony_ci      if (n->ready_time > scoreboard->time)
791bf215546Sopenharmony_ci         continue;
792bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay < n->max_delay)
793bf215546Sopenharmony_ci         chosen = n;
794bf215546Sopenharmony_ci   }
795bf215546Sopenharmony_ci   if (chosen) {
796bf215546Sopenharmony_ci      if (debug) {
797bf215546Sopenharmony_ci         fprintf(stderr, "chose (ready max delay):   ");
798bf215546Sopenharmony_ci         nir_print_instr(chosen->instr, stderr);
799bf215546Sopenharmony_ci         fprintf(stderr, "\n");
800bf215546Sopenharmony_ci      }
801bf215546Sopenharmony_ci      return chosen;
802bf215546Sopenharmony_ci   }
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_ci   /* Pick the max delay of the remaining leaders. */
805bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
806bf215546Sopenharmony_ci      if (!chosen || chosen->max_delay < n->max_delay)
807bf215546Sopenharmony_ci         chosen = n;
808bf215546Sopenharmony_ci   }
809bf215546Sopenharmony_ci
810bf215546Sopenharmony_ci   if (debug) {
811bf215546Sopenharmony_ci      fprintf(stderr, "chose (max delay):         ");
812bf215546Sopenharmony_ci      nir_print_instr(chosen->instr, stderr);
813bf215546Sopenharmony_ci      fprintf(stderr, "\n");
814bf215546Sopenharmony_ci   }
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   return chosen;
817bf215546Sopenharmony_ci}
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_cistatic void
820bf215546Sopenharmony_cidump_state(nir_schedule_scoreboard *scoreboard)
821bf215546Sopenharmony_ci{
822bf215546Sopenharmony_ci   list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) {
823bf215546Sopenharmony_ci      fprintf(stderr, "maxdel %5d ", n->max_delay);
824bf215546Sopenharmony_ci      nir_print_instr(n->instr, stderr);
825bf215546Sopenharmony_ci      fprintf(stderr, "\n");
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
828bf215546Sopenharmony_ci         nir_schedule_node *child = (nir_schedule_node *)edge->child;
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_ci         fprintf(stderr, " -> (%d parents) ", child->dag.parent_count);
831bf215546Sopenharmony_ci         nir_print_instr(child->instr, stderr);
832bf215546Sopenharmony_ci         fprintf(stderr, "\n");
833bf215546Sopenharmony_ci      }
834bf215546Sopenharmony_ci   }
835bf215546Sopenharmony_ci}
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_cistatic void
838bf215546Sopenharmony_cinir_schedule_mark_use(nir_schedule_scoreboard *scoreboard,
839bf215546Sopenharmony_ci                      void *reg_or_def,
840bf215546Sopenharmony_ci                      nir_instr *reg_or_def_parent,
841bf215546Sopenharmony_ci                      int pressure)
842bf215546Sopenharmony_ci{
843bf215546Sopenharmony_ci   /* Make the value live if it's the first time it's been used. */
844bf215546Sopenharmony_ci   if (!_mesa_set_search(scoreboard->live_values, reg_or_def)) {
845bf215546Sopenharmony_ci      _mesa_set_add(scoreboard->live_values, reg_or_def);
846bf215546Sopenharmony_ci      scoreboard->pressure += pressure;
847bf215546Sopenharmony_ci   }
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci   /* Make the value dead if it's the last remaining use.  Be careful when one
850bf215546Sopenharmony_ci    * instruction uses a value twice to not decrement pressure twice.
851bf215546Sopenharmony_ci    */
852bf215546Sopenharmony_ci   struct set *remaining_uses =
853bf215546Sopenharmony_ci      _mesa_hash_table_search_data(scoreboard->remaining_uses, reg_or_def);
854bf215546Sopenharmony_ci   struct set_entry *entry = _mesa_set_search(remaining_uses, reg_or_def_parent);
855bf215546Sopenharmony_ci   if (entry) {
856bf215546Sopenharmony_ci      _mesa_set_remove(remaining_uses, entry);
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci      if (remaining_uses->entries == 0)
859bf215546Sopenharmony_ci         scoreboard->pressure -= pressure;
860bf215546Sopenharmony_ci   }
861bf215546Sopenharmony_ci}
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_cistatic bool
864bf215546Sopenharmony_cinir_schedule_mark_src_scheduled(nir_src *src, void *state)
865bf215546Sopenharmony_ci{
866bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = state;
867bf215546Sopenharmony_ci   struct set *remaining_uses = nir_schedule_scoreboard_get_src(scoreboard, src);
868bf215546Sopenharmony_ci
869bf215546Sopenharmony_ci   struct set_entry *entry = _mesa_set_search(remaining_uses,
870bf215546Sopenharmony_ci                                              src->parent_instr);
871bf215546Sopenharmony_ci   if (entry) {
872bf215546Sopenharmony_ci      /* Once we've used an SSA value in one instruction, bump the priority of
873bf215546Sopenharmony_ci       * the other uses so the SSA value can get fully consumed.
874bf215546Sopenharmony_ci       *
875bf215546Sopenharmony_ci       * We don't do this for registers, and it's would be a hassle and it's
876bf215546Sopenharmony_ci       * unclear if that would help or not.  Also, skip it for constants, as
877bf215546Sopenharmony_ci       * they're often folded as immediates into backend instructions and have
878bf215546Sopenharmony_ci       * many unrelated instructions all referencing the same value (0).
879bf215546Sopenharmony_ci       */
880bf215546Sopenharmony_ci      if (src->is_ssa &&
881bf215546Sopenharmony_ci          src->ssa->parent_instr->type != nir_instr_type_load_const) {
882bf215546Sopenharmony_ci         nir_foreach_use(other_src, src->ssa) {
883bf215546Sopenharmony_ci            if (other_src->parent_instr == src->parent_instr)
884bf215546Sopenharmony_ci               continue;
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_ci            nir_schedule_node *n =
887bf215546Sopenharmony_ci               nir_schedule_get_node(scoreboard->instr_map,
888bf215546Sopenharmony_ci                                     other_src->parent_instr);
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci            if (n && !n->partially_evaluated_path) {
891bf215546Sopenharmony_ci               if (debug) {
892bf215546Sopenharmony_ci                  fprintf(stderr, "  New partially evaluated path: ");
893bf215546Sopenharmony_ci                  nir_print_instr(n->instr, stderr);
894bf215546Sopenharmony_ci                  fprintf(stderr, "\n");
895bf215546Sopenharmony_ci               }
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci               n->partially_evaluated_path = true;
898bf215546Sopenharmony_ci            }
899bf215546Sopenharmony_ci         }
900bf215546Sopenharmony_ci      }
901bf215546Sopenharmony_ci   }
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci   nir_schedule_mark_use(scoreboard,
904bf215546Sopenharmony_ci                         src->is_ssa ? (void *)src->ssa : (void *)src->reg.reg,
905bf215546Sopenharmony_ci                         src->parent_instr,
906bf215546Sopenharmony_ci                         nir_schedule_src_pressure(src));
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   return true;
909bf215546Sopenharmony_ci}
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_cistatic bool
912bf215546Sopenharmony_cinir_schedule_mark_def_scheduled(nir_ssa_def *def, void *state)
913bf215546Sopenharmony_ci{
914bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = state;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   nir_schedule_mark_use(scoreboard, def, def->parent_instr,
917bf215546Sopenharmony_ci                         nir_schedule_def_pressure(def));
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci   return true;
920bf215546Sopenharmony_ci}
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_cistatic bool
923bf215546Sopenharmony_cinir_schedule_mark_dest_scheduled(nir_dest *dest, void *state)
924bf215546Sopenharmony_ci{
925bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = state;
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci   /* SSA defs were handled in nir_schedule_mark_def_scheduled()
928bf215546Sopenharmony_ci    */
929bf215546Sopenharmony_ci   if (dest->is_ssa)
930bf215546Sopenharmony_ci      return true;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci   /* XXX: This is not actually accurate for regs -- the last use of a reg may
933bf215546Sopenharmony_ci    * have a live interval that extends across control flow.  We should
934bf215546Sopenharmony_ci    * calculate the live ranges of regs, and have scheduler nodes for the CF
935bf215546Sopenharmony_ci    * nodes that also "use" the reg.
936bf215546Sopenharmony_ci    */
937bf215546Sopenharmony_ci   nir_schedule_mark_use(scoreboard, dest->reg.reg,
938bf215546Sopenharmony_ci                         dest->reg.parent_instr,
939bf215546Sopenharmony_ci                         nir_schedule_dest_pressure(dest));
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci   return true;
942bf215546Sopenharmony_ci}
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_cistatic void
945bf215546Sopenharmony_cinir_schedule_mark_node_scheduled(nir_schedule_scoreboard *scoreboard,
946bf215546Sopenharmony_ci                                 nir_schedule_node *n)
947bf215546Sopenharmony_ci{
948bf215546Sopenharmony_ci   nir_foreach_src(n->instr, nir_schedule_mark_src_scheduled, scoreboard);
949bf215546Sopenharmony_ci   nir_foreach_ssa_def(n->instr, nir_schedule_mark_def_scheduled, scoreboard);
950bf215546Sopenharmony_ci   nir_foreach_dest(n->instr, nir_schedule_mark_dest_scheduled, scoreboard);
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci   util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
953bf215546Sopenharmony_ci      nir_schedule_node *child = (nir_schedule_node *)edge->child;
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci      child->ready_time = MAX2(child->ready_time,
956bf215546Sopenharmony_ci                               scoreboard->time + n->delay);
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci      if (child->dag.parent_count == 1) {
959bf215546Sopenharmony_ci         if (debug) {
960bf215546Sopenharmony_ci            fprintf(stderr, "  New DAG head: ");
961bf215546Sopenharmony_ci            nir_print_instr(child->instr, stderr);
962bf215546Sopenharmony_ci            fprintf(stderr, "\n");
963bf215546Sopenharmony_ci         }
964bf215546Sopenharmony_ci      }
965bf215546Sopenharmony_ci   }
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_ci   dag_prune_head(scoreboard->dag, &n->dag);
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci   scoreboard->time = MAX2(n->ready_time, scoreboard->time);
970bf215546Sopenharmony_ci   scoreboard->time++;
971bf215546Sopenharmony_ci}
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_cistatic void
974bf215546Sopenharmony_cinir_schedule_instructions(nir_schedule_scoreboard *scoreboard, nir_block *block)
975bf215546Sopenharmony_ci{
976bf215546Sopenharmony_ci   while (!list_is_empty(&scoreboard->dag->heads)) {
977bf215546Sopenharmony_ci      if (debug) {
978bf215546Sopenharmony_ci         fprintf(stderr, "current list:\n");
979bf215546Sopenharmony_ci         dump_state(scoreboard);
980bf215546Sopenharmony_ci      }
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci      nir_schedule_node *chosen;
983bf215546Sopenharmony_ci      if (scoreboard->options->fallback)
984bf215546Sopenharmony_ci         chosen = nir_schedule_choose_instruction_fallback(scoreboard);
985bf215546Sopenharmony_ci      else if (scoreboard->pressure < scoreboard->options->threshold)
986bf215546Sopenharmony_ci         chosen = nir_schedule_choose_instruction_csp(scoreboard);
987bf215546Sopenharmony_ci      else
988bf215546Sopenharmony_ci         chosen = nir_schedule_choose_instruction_csr(scoreboard);
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_ci      /* Now that we've scheduled a new instruction, some of its children may
991bf215546Sopenharmony_ci       * be promoted to the list of instructions ready to be scheduled.
992bf215546Sopenharmony_ci       */
993bf215546Sopenharmony_ci      nir_schedule_mark_node_scheduled(scoreboard, chosen);
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci      /* Move the instruction to the end (so our first chosen instructions are
996bf215546Sopenharmony_ci       * the start of the program).
997bf215546Sopenharmony_ci       */
998bf215546Sopenharmony_ci      exec_node_remove(&chosen->instr->node);
999bf215546Sopenharmony_ci      exec_list_push_tail(&block->instr_list, &chosen->instr->node);
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci      if (debug)
1002bf215546Sopenharmony_ci         fprintf(stderr, "\n");
1003bf215546Sopenharmony_ci   }
1004bf215546Sopenharmony_ci}
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_cistatic uint32_t
1007bf215546Sopenharmony_cinir_schedule_get_delay(nir_schedule_scoreboard *scoreboard, nir_instr *instr)
1008bf215546Sopenharmony_ci{
1009bf215546Sopenharmony_ci   if (scoreboard->options->instr_delay_cb) {
1010bf215546Sopenharmony_ci      void *cb_data = scoreboard->options->instr_delay_cb_data;
1011bf215546Sopenharmony_ci      return scoreboard->options->instr_delay_cb(instr, cb_data);
1012bf215546Sopenharmony_ci   }
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci   switch (instr->type) {
1015bf215546Sopenharmony_ci   case nir_instr_type_ssa_undef:
1016bf215546Sopenharmony_ci   case nir_instr_type_load_const:
1017bf215546Sopenharmony_ci   case nir_instr_type_alu:
1018bf215546Sopenharmony_ci   case nir_instr_type_deref:
1019bf215546Sopenharmony_ci   case nir_instr_type_jump:
1020bf215546Sopenharmony_ci   case nir_instr_type_parallel_copy:
1021bf215546Sopenharmony_ci   case nir_instr_type_call:
1022bf215546Sopenharmony_ci   case nir_instr_type_phi:
1023bf215546Sopenharmony_ci      return 1;
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
1026bf215546Sopenharmony_ci      switch (nir_instr_as_intrinsic(instr)->intrinsic) {
1027bf215546Sopenharmony_ci      case nir_intrinsic_load_ubo:
1028bf215546Sopenharmony_ci      case nir_intrinsic_load_ssbo:
1029bf215546Sopenharmony_ci      case nir_intrinsic_load_scratch:
1030bf215546Sopenharmony_ci      case nir_intrinsic_load_shared:
1031bf215546Sopenharmony_ci      case nir_intrinsic_image_load:
1032bf215546Sopenharmony_ci         return 50;
1033bf215546Sopenharmony_ci      default:
1034bf215546Sopenharmony_ci         return 1;
1035bf215546Sopenharmony_ci      }
1036bf215546Sopenharmony_ci      break;
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci   case nir_instr_type_tex:
1039bf215546Sopenharmony_ci      /* Pick some large number to try to fetch textures early and sample them
1040bf215546Sopenharmony_ci       * late.
1041bf215546Sopenharmony_ci       */
1042bf215546Sopenharmony_ci      return 100;
1043bf215546Sopenharmony_ci   }
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci   return 0;
1046bf215546Sopenharmony_ci}
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_cistatic void
1049bf215546Sopenharmony_cinir_schedule_dag_max_delay_cb(struct dag_node *node, void *state)
1050bf215546Sopenharmony_ci{
1051bf215546Sopenharmony_ci   nir_schedule_node *n = (nir_schedule_node *)node;
1052bf215546Sopenharmony_ci   uint32_t max_delay = 0;
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci   util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1055bf215546Sopenharmony_ci      nir_schedule_node *child = (nir_schedule_node *)edge->child;
1056bf215546Sopenharmony_ci      max_delay = MAX2(child->max_delay, max_delay);
1057bf215546Sopenharmony_ci   }
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci   n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
1060bf215546Sopenharmony_ci }
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_cistatic void
1063bf215546Sopenharmony_cinir_schedule_block(nir_schedule_scoreboard *scoreboard, nir_block *block)
1064bf215546Sopenharmony_ci{
1065bf215546Sopenharmony_ci   void *mem_ctx = ralloc_context(NULL);
1066bf215546Sopenharmony_ci   scoreboard->instr_map = _mesa_pointer_hash_table_create(mem_ctx);
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ci   scoreboard->dag = dag_create(mem_ctx);
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
1071bf215546Sopenharmony_ci      nir_schedule_node *n =
1072bf215546Sopenharmony_ci         rzalloc(mem_ctx, nir_schedule_node);
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci      n->instr = instr;
1075bf215546Sopenharmony_ci      n->delay = nir_schedule_get_delay(scoreboard, instr);
1076bf215546Sopenharmony_ci      dag_init_node(scoreboard->dag, &n->dag);
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci      _mesa_hash_table_insert(scoreboard->instr_map, instr, n);
1079bf215546Sopenharmony_ci   }
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_ci   calculate_forward_deps(scoreboard, block);
1082bf215546Sopenharmony_ci   calculate_reverse_deps(scoreboard, block);
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci   dag_traverse_bottom_up(scoreboard->dag, nir_schedule_dag_max_delay_cb, NULL);
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   nir_schedule_instructions(scoreboard, block);
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci   ralloc_free(mem_ctx);
1089bf215546Sopenharmony_ci   scoreboard->instr_map = NULL;
1090bf215546Sopenharmony_ci}
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_cistatic bool
1093bf215546Sopenharmony_cinir_schedule_ssa_def_init_scoreboard(nir_ssa_def *def, void *state)
1094bf215546Sopenharmony_ci{
1095bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = state;
1096bf215546Sopenharmony_ci   struct set *def_uses = _mesa_pointer_set_create(scoreboard);
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_ci   _mesa_hash_table_insert(scoreboard->remaining_uses, def, def_uses);
1099bf215546Sopenharmony_ci
1100bf215546Sopenharmony_ci   _mesa_set_add(def_uses, def->parent_instr);
1101bf215546Sopenharmony_ci
1102bf215546Sopenharmony_ci   nir_foreach_use(src, def) {
1103bf215546Sopenharmony_ci      _mesa_set_add(def_uses, src->parent_instr);
1104bf215546Sopenharmony_ci   }
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_ci   /* XXX: Handle if uses */
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_ci   return true;
1109bf215546Sopenharmony_ci}
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_cistatic nir_schedule_scoreboard *
1112bf215546Sopenharmony_cinir_schedule_get_scoreboard(nir_shader *shader,
1113bf215546Sopenharmony_ci                            const nir_schedule_options *options)
1114bf215546Sopenharmony_ci{
1115bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = rzalloc(NULL, nir_schedule_scoreboard);
1116bf215546Sopenharmony_ci
1117bf215546Sopenharmony_ci   scoreboard->shader = shader;
1118bf215546Sopenharmony_ci   scoreboard->live_values = _mesa_pointer_set_create(scoreboard);
1119bf215546Sopenharmony_ci   scoreboard->remaining_uses = _mesa_pointer_hash_table_create(scoreboard);
1120bf215546Sopenharmony_ci   scoreboard->options = options;
1121bf215546Sopenharmony_ci   scoreboard->pressure = 0;
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
1124bf215546Sopenharmony_ci      nir_foreach_register(reg, &function->impl->registers) {
1125bf215546Sopenharmony_ci         struct set *register_uses =
1126bf215546Sopenharmony_ci            _mesa_pointer_set_create(scoreboard);
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci         _mesa_hash_table_insert(scoreboard->remaining_uses, reg, register_uses);
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci         nir_foreach_use(src, reg) {
1131bf215546Sopenharmony_ci            _mesa_set_add(register_uses, src->parent_instr);
1132bf215546Sopenharmony_ci         }
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_ci         /* XXX: Handle if uses */
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_ci         nir_foreach_def(dest, reg) {
1137bf215546Sopenharmony_ci            _mesa_set_add(register_uses, dest->reg.parent_instr);
1138bf215546Sopenharmony_ci         }
1139bf215546Sopenharmony_ci      }
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci      nir_foreach_block(block, function->impl) {
1142bf215546Sopenharmony_ci         nir_foreach_instr(instr, block) {
1143bf215546Sopenharmony_ci            nir_foreach_ssa_def(instr, nir_schedule_ssa_def_init_scoreboard,
1144bf215546Sopenharmony_ci                                scoreboard);
1145bf215546Sopenharmony_ci         }
1146bf215546Sopenharmony_ci
1147bf215546Sopenharmony_ci         /* XXX: We're ignoring if uses, which may prioritize scheduling other
1148bf215546Sopenharmony_ci          * uses of the if src even when it doesn't help.  That's not many
1149bf215546Sopenharmony_ci          * values, though, so meh.
1150bf215546Sopenharmony_ci          */
1151bf215546Sopenharmony_ci      }
1152bf215546Sopenharmony_ci   }
1153bf215546Sopenharmony_ci
1154bf215546Sopenharmony_ci   return scoreboard;
1155bf215546Sopenharmony_ci}
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_cistatic void
1158bf215546Sopenharmony_cinir_schedule_validate_uses(nir_schedule_scoreboard *scoreboard)
1159bf215546Sopenharmony_ci{
1160bf215546Sopenharmony_ci#ifdef NDEBUG
1161bf215546Sopenharmony_ci   return;
1162bf215546Sopenharmony_ci#endif
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci   bool any_uses = false;
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ci   hash_table_foreach(scoreboard->remaining_uses, entry) {
1167bf215546Sopenharmony_ci      struct set *remaining_uses = entry->data;
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci      set_foreach(remaining_uses, instr_entry) {
1170bf215546Sopenharmony_ci         if (!any_uses) {
1171bf215546Sopenharmony_ci            fprintf(stderr, "Tracked uses remain after scheduling.  "
1172bf215546Sopenharmony_ci                    "Affected instructions: \n");
1173bf215546Sopenharmony_ci            any_uses = true;
1174bf215546Sopenharmony_ci         }
1175bf215546Sopenharmony_ci         nir_print_instr(instr_entry->key, stderr);
1176bf215546Sopenharmony_ci         fprintf(stderr, "\n");
1177bf215546Sopenharmony_ci      }
1178bf215546Sopenharmony_ci   }
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_ci   assert(!any_uses);
1181bf215546Sopenharmony_ci}
1182bf215546Sopenharmony_ci
1183bf215546Sopenharmony_ci/**
1184bf215546Sopenharmony_ci * Schedules the NIR instructions to try to decrease stalls (for example,
1185bf215546Sopenharmony_ci * delaying texture reads) while managing register pressure.
1186bf215546Sopenharmony_ci *
1187bf215546Sopenharmony_ci * The threshold represents "number of NIR register/SSA def channels live
1188bf215546Sopenharmony_ci * before switching the scheduling heuristic to reduce register pressure",
1189bf215546Sopenharmony_ci * since most of our GPU architectures are scalar (extending to vector with a
1190bf215546Sopenharmony_ci * flag wouldn't be hard).  This number should be a bit below the number of
1191bf215546Sopenharmony_ci * registers available (counting any that may be occupied by system value
1192bf215546Sopenharmony_ci * payload values, for example), since the heuristic may not always be able to
1193bf215546Sopenharmony_ci * free a register immediately.  The amount below the limit is up to you to
1194bf215546Sopenharmony_ci * tune.
1195bf215546Sopenharmony_ci */
1196bf215546Sopenharmony_civoid
1197bf215546Sopenharmony_cinir_schedule(nir_shader *shader,
1198bf215546Sopenharmony_ci             const nir_schedule_options *options)
1199bf215546Sopenharmony_ci{
1200bf215546Sopenharmony_ci   nir_schedule_scoreboard *scoreboard = nir_schedule_get_scoreboard(shader,
1201bf215546Sopenharmony_ci                                                                     options);
1202bf215546Sopenharmony_ci
1203bf215546Sopenharmony_ci   if (debug) {
1204bf215546Sopenharmony_ci      fprintf(stderr, "NIR shader before scheduling:\n");
1205bf215546Sopenharmony_ci      nir_print_shader(shader, stderr);
1206bf215546Sopenharmony_ci   }
1207bf215546Sopenharmony_ci
1208bf215546Sopenharmony_ci   nir_foreach_function(function, shader) {
1209bf215546Sopenharmony_ci      if (!function->impl)
1210bf215546Sopenharmony_ci         continue;
1211bf215546Sopenharmony_ci
1212bf215546Sopenharmony_ci      nir_foreach_block(block, function->impl) {
1213bf215546Sopenharmony_ci         nir_schedule_block(scoreboard, block);
1214bf215546Sopenharmony_ci      }
1215bf215546Sopenharmony_ci   }
1216bf215546Sopenharmony_ci
1217bf215546Sopenharmony_ci   nir_schedule_validate_uses(scoreboard);
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ci   ralloc_free(scoreboard);
1220bf215546Sopenharmony_ci}
1221