1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Broadcom 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir_schedule.h" 25bf215546Sopenharmony_ci#include "util/dag.h" 26bf215546Sopenharmony_ci#include "util/u_dynarray.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/** @file 29bf215546Sopenharmony_ci * 30bf215546Sopenharmony_ci * Implements basic-block-level prepass instruction scheduling in NIR to 31bf215546Sopenharmony_ci * manage register pressure. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * This is based on the Goodman/Hsu paper (1988, cached copy at 34bf215546Sopenharmony_ci * https://people.freedesktop.org/~anholt/scheduling-goodman-hsu.pdf). We 35bf215546Sopenharmony_ci * make up the DDG for NIR (which can be mostly done using the NIR def/use 36bf215546Sopenharmony_ci * chains for SSA instructions, plus some edges for ordering register writes 37bf215546Sopenharmony_ci * vs reads, and some more for ordering intrinsics). Then we pick heads off 38bf215546Sopenharmony_ci * of the DDG using their heuristic to emit the NIR instructions back into the 39bf215546Sopenharmony_ci * block in their new order. 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * The hard case for prepass scheduling on GPUs seems to always be consuming 42bf215546Sopenharmony_ci * texture/ubo results. The register pressure heuristic doesn't want to pick 43bf215546Sopenharmony_ci * an instr that starts consuming texture results because it usually won't be 44bf215546Sopenharmony_ci * the only usage, so that instruction will increase pressure. 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * If you try to force consumption of tex results always, then in a case where 47bf215546Sopenharmony_ci * single sample is used for many outputs, you'll end up picking every other 48bf215546Sopenharmony_ci * user and expanding register pressure. The partially_evaluated_path flag 49bf215546Sopenharmony_ci * helps tremendously, in that if you happen for whatever reason to pick a 50bf215546Sopenharmony_ci * texture sample's output, then you'll try to finish off that sample. Future 51bf215546Sopenharmony_ci * work may include doing some local search before locking in a choice, to try 52bf215546Sopenharmony_ci * to more reliably find the case where just a few choices going against the 53bf215546Sopenharmony_ci * heuristic can manage to free the whole vector. 54bf215546Sopenharmony_ci */ 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic bool debug; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci/** 59bf215546Sopenharmony_ci * Represents a node in the DDG for a NIR instruction. 60bf215546Sopenharmony_ci */ 61bf215546Sopenharmony_citypedef struct { 62bf215546Sopenharmony_ci struct dag_node dag; /* must be first for our u_dynarray_foreach */ 63bf215546Sopenharmony_ci nir_instr *instr; 64bf215546Sopenharmony_ci bool partially_evaluated_path; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci /* Approximate estimate of the delay between starting this instruction and 67bf215546Sopenharmony_ci * its results being available. 68bf215546Sopenharmony_ci * 69bf215546Sopenharmony_ci * Accuracy is not too important, given that we're prepass scheduling here 70bf215546Sopenharmony_ci * and just trying to reduce excess dependencies introduced by a register 71bf215546Sopenharmony_ci * allocator by stretching out the live intervals of expensive 72bf215546Sopenharmony_ci * instructions. 73bf215546Sopenharmony_ci */ 74bf215546Sopenharmony_ci uint32_t delay; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci /* Cost of the maximum-delay path from this node to the leaves. */ 77bf215546Sopenharmony_ci uint32_t max_delay; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci /* scoreboard->time value when this instruction can be scheduled without 80bf215546Sopenharmony_ci * any stalls expected. 81bf215546Sopenharmony_ci */ 82bf215546Sopenharmony_ci uint32_t ready_time; 83bf215546Sopenharmony_ci} nir_schedule_node; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_citypedef struct { 86bf215546Sopenharmony_ci struct dag *dag; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci nir_shader *shader; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /* Mapping from nir_register * or nir_ssa_def * to a struct set of 91bf215546Sopenharmony_ci * instructions remaining to be scheduled using the register. 92bf215546Sopenharmony_ci */ 93bf215546Sopenharmony_ci struct hash_table *remaining_uses; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci /* Map from nir_instr to nir_schedule_node * */ 96bf215546Sopenharmony_ci struct hash_table *instr_map; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci /* Set of nir_register * or nir_ssa_def * that have had any instruction 99bf215546Sopenharmony_ci * scheduled on them. 100bf215546Sopenharmony_ci */ 101bf215546Sopenharmony_ci struct set *live_values; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci /* An abstract approximation of the number of nir_scheduler_node->delay 104bf215546Sopenharmony_ci * units since the start of the shader. 105bf215546Sopenharmony_ci */ 106bf215546Sopenharmony_ci uint32_t time; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci /* Number of channels currently used by the NIR instructions that have been 109bf215546Sopenharmony_ci * scheduled. 110bf215546Sopenharmony_ci */ 111bf215546Sopenharmony_ci int pressure; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* Options specified by the backend */ 114bf215546Sopenharmony_ci const nir_schedule_options *options; 115bf215546Sopenharmony_ci} nir_schedule_scoreboard; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci/* When walking the instructions in reverse, we use this flag to swap 118bf215546Sopenharmony_ci * before/after in add_dep(). 119bf215546Sopenharmony_ci */ 120bf215546Sopenharmony_cienum direction { F, R }; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_cistruct nir_schedule_class_dep { 123bf215546Sopenharmony_ci int klass; 124bf215546Sopenharmony_ci nir_schedule_node *node; 125bf215546Sopenharmony_ci struct nir_schedule_class_dep *next; 126bf215546Sopenharmony_ci}; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_citypedef struct { 129bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci /* Map from nir_register to nir_schedule_node * */ 132bf215546Sopenharmony_ci struct hash_table *reg_map; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci /* Scheduler nodes for last instruction involved in some class of dependency. 135bf215546Sopenharmony_ci */ 136bf215546Sopenharmony_ci nir_schedule_node *load_input; 137bf215546Sopenharmony_ci nir_schedule_node *store_shared; 138bf215546Sopenharmony_ci nir_schedule_node *unknown_intrinsic; 139bf215546Sopenharmony_ci nir_schedule_node *discard; 140bf215546Sopenharmony_ci nir_schedule_node *jump; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci struct nir_schedule_class_dep *class_deps; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci enum direction dir; 145bf215546Sopenharmony_ci} nir_deps_state; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_cistatic void * 148bf215546Sopenharmony_ci_mesa_hash_table_search_data(struct hash_table *ht, void *key) 149bf215546Sopenharmony_ci{ 150bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(ht, key); 151bf215546Sopenharmony_ci if (!entry) 152bf215546Sopenharmony_ci return NULL; 153bf215546Sopenharmony_ci return entry->data; 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistatic nir_schedule_node * 157bf215546Sopenharmony_cinir_schedule_get_node(struct hash_table *instr_map, nir_instr *instr) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci return _mesa_hash_table_search_data(instr_map, instr); 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic struct set * 163bf215546Sopenharmony_cinir_schedule_scoreboard_get_src(nir_schedule_scoreboard *scoreboard, nir_src *src) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci if (src->is_ssa) { 166bf215546Sopenharmony_ci return _mesa_hash_table_search_data(scoreboard->remaining_uses, src->ssa); 167bf215546Sopenharmony_ci } else { 168bf215546Sopenharmony_ci return _mesa_hash_table_search_data(scoreboard->remaining_uses, 169bf215546Sopenharmony_ci src->reg.reg); 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_cistatic int 174bf215546Sopenharmony_cinir_schedule_def_pressure(nir_ssa_def *def) 175bf215546Sopenharmony_ci{ 176bf215546Sopenharmony_ci return def->num_components; 177bf215546Sopenharmony_ci} 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_cistatic int 180bf215546Sopenharmony_cinir_schedule_src_pressure(nir_src *src) 181bf215546Sopenharmony_ci{ 182bf215546Sopenharmony_ci if (src->is_ssa) 183bf215546Sopenharmony_ci return nir_schedule_def_pressure(src->ssa); 184bf215546Sopenharmony_ci else 185bf215546Sopenharmony_ci return src->reg.reg->num_components; 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_cistatic int 189bf215546Sopenharmony_cinir_schedule_dest_pressure(nir_dest *dest) 190bf215546Sopenharmony_ci{ 191bf215546Sopenharmony_ci if (dest->is_ssa) 192bf215546Sopenharmony_ci return nir_schedule_def_pressure(&dest->ssa); 193bf215546Sopenharmony_ci else 194bf215546Sopenharmony_ci return dest->reg.reg->num_components; 195bf215546Sopenharmony_ci} 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci/** 198bf215546Sopenharmony_ci * Adds a dependency such that @after must appear in the final program after 199bf215546Sopenharmony_ci * @before. 200bf215546Sopenharmony_ci * 201bf215546Sopenharmony_ci * We add @before as a child of @after, so that DAG heads are the outputs of 202bf215546Sopenharmony_ci * the program and we make our scheduling decisions bottom to top. 203bf215546Sopenharmony_ci */ 204bf215546Sopenharmony_cistatic void 205bf215546Sopenharmony_ciadd_dep(nir_deps_state *state, 206bf215546Sopenharmony_ci nir_schedule_node *before, 207bf215546Sopenharmony_ci nir_schedule_node *after) 208bf215546Sopenharmony_ci{ 209bf215546Sopenharmony_ci if (!before || !after) 210bf215546Sopenharmony_ci return; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci assert(before != after); 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci if (state->dir == F) 215bf215546Sopenharmony_ci dag_add_edge(&before->dag, &after->dag, 0); 216bf215546Sopenharmony_ci else 217bf215546Sopenharmony_ci dag_add_edge(&after->dag, &before->dag, 0); 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_cistatic void 222bf215546Sopenharmony_ciadd_read_dep(nir_deps_state *state, 223bf215546Sopenharmony_ci nir_schedule_node *before, 224bf215546Sopenharmony_ci nir_schedule_node *after) 225bf215546Sopenharmony_ci{ 226bf215546Sopenharmony_ci add_dep(state, before, after); 227bf215546Sopenharmony_ci} 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_cistatic void 230bf215546Sopenharmony_ciadd_write_dep(nir_deps_state *state, 231bf215546Sopenharmony_ci nir_schedule_node **before, 232bf215546Sopenharmony_ci nir_schedule_node *after) 233bf215546Sopenharmony_ci{ 234bf215546Sopenharmony_ci add_dep(state, *before, after); 235bf215546Sopenharmony_ci *before = after; 236bf215546Sopenharmony_ci} 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_cistatic bool 239bf215546Sopenharmony_cinir_schedule_reg_src_deps(nir_src *src, void *in_state) 240bf215546Sopenharmony_ci{ 241bf215546Sopenharmony_ci nir_deps_state *state = in_state; 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci if (src->is_ssa) 244bf215546Sopenharmony_ci return true; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(state->reg_map, 247bf215546Sopenharmony_ci src->reg.reg); 248bf215546Sopenharmony_ci if (!entry) 249bf215546Sopenharmony_ci return true; 250bf215546Sopenharmony_ci nir_schedule_node *dst_n = entry->data; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci nir_schedule_node *src_n = 253bf215546Sopenharmony_ci nir_schedule_get_node(state->scoreboard->instr_map, 254bf215546Sopenharmony_ci src->parent_instr); 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci add_dep(state, dst_n, src_n); 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci return true; 259bf215546Sopenharmony_ci} 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_cistatic bool 262bf215546Sopenharmony_cinir_schedule_reg_dest_deps(nir_dest *dest, void *in_state) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci nir_deps_state *state = in_state; 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci if (dest->is_ssa) 267bf215546Sopenharmony_ci return true; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci nir_schedule_node *dest_n = 270bf215546Sopenharmony_ci nir_schedule_get_node(state->scoreboard->instr_map, 271bf215546Sopenharmony_ci dest->reg.parent_instr); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(state->reg_map, 274bf215546Sopenharmony_ci dest->reg.reg); 275bf215546Sopenharmony_ci if (!entry) { 276bf215546Sopenharmony_ci _mesa_hash_table_insert(state->reg_map, dest->reg.reg, dest_n); 277bf215546Sopenharmony_ci return true; 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci nir_schedule_node **before = (nir_schedule_node **)&entry->data; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci add_write_dep(state, before, dest_n); 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci return true; 284bf215546Sopenharmony_ci} 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_cistatic bool 287bf215546Sopenharmony_cinir_schedule_ssa_deps(nir_ssa_def *def, void *in_state) 288bf215546Sopenharmony_ci{ 289bf215546Sopenharmony_ci nir_deps_state *state = in_state; 290bf215546Sopenharmony_ci struct hash_table *instr_map = state->scoreboard->instr_map; 291bf215546Sopenharmony_ci nir_schedule_node *def_n = nir_schedule_get_node(instr_map, def->parent_instr); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci nir_foreach_use(src, def) { 294bf215546Sopenharmony_ci nir_schedule_node *use_n = nir_schedule_get_node(instr_map, 295bf215546Sopenharmony_ci src->parent_instr); 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci add_read_dep(state, def_n, use_n); 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci return true; 301bf215546Sopenharmony_ci} 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_cistatic struct nir_schedule_class_dep * 304bf215546Sopenharmony_cinir_schedule_get_class_dep(nir_deps_state *state, 305bf215546Sopenharmony_ci int klass) 306bf215546Sopenharmony_ci{ 307bf215546Sopenharmony_ci for (struct nir_schedule_class_dep *class_dep = state->class_deps; 308bf215546Sopenharmony_ci class_dep != NULL; 309bf215546Sopenharmony_ci class_dep = class_dep->next) { 310bf215546Sopenharmony_ci if (class_dep->klass == klass) 311bf215546Sopenharmony_ci return class_dep; 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci struct nir_schedule_class_dep *class_dep = 315bf215546Sopenharmony_ci ralloc(state->reg_map, struct nir_schedule_class_dep); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci class_dep->klass = klass; 318bf215546Sopenharmony_ci class_dep->node = NULL; 319bf215546Sopenharmony_ci class_dep->next = state->class_deps; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci state->class_deps = class_dep; 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci return class_dep; 324bf215546Sopenharmony_ci} 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_cistatic void 327bf215546Sopenharmony_cinir_schedule_intrinsic_deps(nir_deps_state *state, 328bf215546Sopenharmony_ci nir_intrinsic_instr *instr) 329bf215546Sopenharmony_ci{ 330bf215546Sopenharmony_ci nir_schedule_node *n = nir_schedule_get_node(state->scoreboard->instr_map, 331bf215546Sopenharmony_ci &instr->instr); 332bf215546Sopenharmony_ci const nir_schedule_options *options = state->scoreboard->options; 333bf215546Sopenharmony_ci nir_schedule_dependency dep; 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (options->intrinsic_cb && 336bf215546Sopenharmony_ci options->intrinsic_cb(instr, &dep, options->intrinsic_cb_data)) { 337bf215546Sopenharmony_ci struct nir_schedule_class_dep *class_dep = 338bf215546Sopenharmony_ci nir_schedule_get_class_dep(state, dep.klass); 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci switch (dep.type) { 341bf215546Sopenharmony_ci case NIR_SCHEDULE_READ_DEPENDENCY: 342bf215546Sopenharmony_ci add_read_dep(state, class_dep->node, n); 343bf215546Sopenharmony_ci break; 344bf215546Sopenharmony_ci case NIR_SCHEDULE_WRITE_DEPENDENCY: 345bf215546Sopenharmony_ci add_write_dep(state, &class_dep->node, n); 346bf215546Sopenharmony_ci break; 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci } 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci switch (instr->intrinsic) { 351bf215546Sopenharmony_ci case nir_intrinsic_load_uniform: 352bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 353bf215546Sopenharmony_ci case nir_intrinsic_load_front_face: 354bf215546Sopenharmony_ci break; 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci case nir_intrinsic_discard: 357bf215546Sopenharmony_ci case nir_intrinsic_discard_if: 358bf215546Sopenharmony_ci case nir_intrinsic_demote: 359bf215546Sopenharmony_ci case nir_intrinsic_demote_if: 360bf215546Sopenharmony_ci case nir_intrinsic_terminate: 361bf215546Sopenharmony_ci case nir_intrinsic_terminate_if: 362bf215546Sopenharmony_ci /* We are adding two dependencies: 363bf215546Sopenharmony_ci * 364bf215546Sopenharmony_ci * * A individual one that we could use to add a read_dep while handling 365bf215546Sopenharmony_ci * nir_instr_type_tex 366bf215546Sopenharmony_ci * 367bf215546Sopenharmony_ci * * Include it on the unknown intrinsic set, as we want discard to be 368bf215546Sopenharmony_ci * serialized in in the same order relative to intervening stores or 369bf215546Sopenharmony_ci * atomic accesses to SSBOs and images 370bf215546Sopenharmony_ci */ 371bf215546Sopenharmony_ci add_write_dep(state, &state->discard, n); 372bf215546Sopenharmony_ci add_write_dep(state, &state->unknown_intrinsic, n); 373bf215546Sopenharmony_ci break; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci case nir_intrinsic_store_output: 376bf215546Sopenharmony_ci /* For some hardware and stages, output stores affect the same shared 377bf215546Sopenharmony_ci * memory as input loads. 378bf215546Sopenharmony_ci */ 379bf215546Sopenharmony_ci if ((state->scoreboard->options->stages_with_shared_io_memory & 380bf215546Sopenharmony_ci (1 << state->scoreboard->shader->info.stage))) 381bf215546Sopenharmony_ci add_write_dep(state, &state->load_input, n); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci /* Make sure that preceding discards stay before the store_output */ 384bf215546Sopenharmony_ci add_read_dep(state, state->discard, n); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci break; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci case nir_intrinsic_load_input: 389bf215546Sopenharmony_ci case nir_intrinsic_load_per_vertex_input: 390bf215546Sopenharmony_ci add_read_dep(state, state->load_input, n); 391bf215546Sopenharmony_ci break; 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 394bf215546Sopenharmony_ci case nir_intrinsic_load_shared2_amd: 395bf215546Sopenharmony_ci /* Don't move load_shared beyond a following store_shared, as it could 396bf215546Sopenharmony_ci * change their value 397bf215546Sopenharmony_ci */ 398bf215546Sopenharmony_ci add_read_dep(state, state->store_shared, n); 399bf215546Sopenharmony_ci break; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci case nir_intrinsic_store_shared: 402bf215546Sopenharmony_ci case nir_intrinsic_store_shared2_amd: 403bf215546Sopenharmony_ci add_write_dep(state, &state->store_shared, n); 404bf215546Sopenharmony_ci break; 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci case nir_intrinsic_control_barrier: 407bf215546Sopenharmony_ci case nir_intrinsic_memory_barrier_shared: 408bf215546Sopenharmony_ci case nir_intrinsic_group_memory_barrier: 409bf215546Sopenharmony_ci /* A generic memory barrier can be emitted when multiple synchronization 410bf215546Sopenharmony_ci * semantics are involved, including shared memory. 411bf215546Sopenharmony_ci */ 412bf215546Sopenharmony_ci case nir_intrinsic_memory_barrier: 413bf215546Sopenharmony_ci add_write_dep(state, &state->store_shared, n); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci /* Serialize against ssbos/atomics/etc. */ 416bf215546Sopenharmony_ci add_write_dep(state, &state->unknown_intrinsic, n); 417bf215546Sopenharmony_ci break; 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci case nir_intrinsic_scoped_barrier: { 420bf215546Sopenharmony_ci const nir_variable_mode modes = nir_intrinsic_memory_modes(instr); 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci if (modes & nir_var_mem_shared) 423bf215546Sopenharmony_ci add_write_dep(state, &state->store_shared, n); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci /* Serialize against other categories. */ 426bf215546Sopenharmony_ci add_write_dep(state, &state->unknown_intrinsic, n); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci break; 429bf215546Sopenharmony_ci } 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci default: 432bf215546Sopenharmony_ci /* Attempt to handle other intrinsics that we haven't individually 433bf215546Sopenharmony_ci * categorized by serializing them in the same order relative to each 434bf215546Sopenharmony_ci * other. 435bf215546Sopenharmony_ci */ 436bf215546Sopenharmony_ci add_write_dep(state, &state->unknown_intrinsic, n); 437bf215546Sopenharmony_ci break; 438bf215546Sopenharmony_ci } 439bf215546Sopenharmony_ci} 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci/** 442bf215546Sopenharmony_ci * Common code for dependencies that need to be tracked both forward and 443bf215546Sopenharmony_ci * backward. 444bf215546Sopenharmony_ci * 445bf215546Sopenharmony_ci * This is for things like "all reads of r4 have to happen between the r4 446bf215546Sopenharmony_ci * writes that surround them". 447bf215546Sopenharmony_ci */ 448bf215546Sopenharmony_cistatic void 449bf215546Sopenharmony_cinir_schedule_calculate_deps(nir_deps_state *state, nir_schedule_node *n) 450bf215546Sopenharmony_ci{ 451bf215546Sopenharmony_ci nir_instr *instr = n->instr; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci /* For NIR SSA defs, we only need to do a single pass of making the uses 454bf215546Sopenharmony_ci * depend on the def. 455bf215546Sopenharmony_ci */ 456bf215546Sopenharmony_ci if (state->dir == F) 457bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, nir_schedule_ssa_deps, state); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci /* For NIR regs, track the last writer in the scheduler state so that we 460bf215546Sopenharmony_ci * can keep the writes in order and let reads get reordered only between 461bf215546Sopenharmony_ci * each write. 462bf215546Sopenharmony_ci */ 463bf215546Sopenharmony_ci nir_foreach_src(instr, nir_schedule_reg_src_deps, state); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci nir_foreach_dest(instr, nir_schedule_reg_dest_deps, state); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci /* Make sure any other instructions keep their positions relative to 468bf215546Sopenharmony_ci * jumps. 469bf215546Sopenharmony_ci */ 470bf215546Sopenharmony_ci if (instr->type != nir_instr_type_jump) 471bf215546Sopenharmony_ci add_read_dep(state, state->jump, n); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci switch (instr->type) { 474bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 475bf215546Sopenharmony_ci case nir_instr_type_load_const: 476bf215546Sopenharmony_ci case nir_instr_type_alu: 477bf215546Sopenharmony_ci case nir_instr_type_deref: 478bf215546Sopenharmony_ci break; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci case nir_instr_type_tex: 481bf215546Sopenharmony_ci /* Don't move texture ops before a discard, as that could increase 482bf215546Sopenharmony_ci * memory bandwidth for reading the discarded samples. 483bf215546Sopenharmony_ci */ 484bf215546Sopenharmony_ci add_read_dep(state, state->discard, n); 485bf215546Sopenharmony_ci break; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci case nir_instr_type_jump: 488bf215546Sopenharmony_ci add_write_dep(state, &state->jump, n); 489bf215546Sopenharmony_ci break; 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci case nir_instr_type_call: 492bf215546Sopenharmony_ci unreachable("Calls should have been lowered"); 493bf215546Sopenharmony_ci break; 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: 496bf215546Sopenharmony_ci unreachable("Parallel copies should have been lowered"); 497bf215546Sopenharmony_ci break; 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci case nir_instr_type_phi: 500bf215546Sopenharmony_ci unreachable("nir_schedule() should be called after lowering from SSA"); 501bf215546Sopenharmony_ci break; 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 504bf215546Sopenharmony_ci nir_schedule_intrinsic_deps(state, nir_instr_as_intrinsic(instr)); 505bf215546Sopenharmony_ci break; 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci} 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_cistatic void 510bf215546Sopenharmony_cicalculate_forward_deps(nir_schedule_scoreboard *scoreboard, nir_block *block) 511bf215546Sopenharmony_ci{ 512bf215546Sopenharmony_ci nir_deps_state state = { 513bf215546Sopenharmony_ci .scoreboard = scoreboard, 514bf215546Sopenharmony_ci .dir = F, 515bf215546Sopenharmony_ci .reg_map = _mesa_pointer_hash_table_create(NULL), 516bf215546Sopenharmony_ci }; 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 519bf215546Sopenharmony_ci nir_schedule_node *node = nir_schedule_get_node(scoreboard->instr_map, 520bf215546Sopenharmony_ci instr); 521bf215546Sopenharmony_ci nir_schedule_calculate_deps(&state, node); 522bf215546Sopenharmony_ci } 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci ralloc_free(state.reg_map); 525bf215546Sopenharmony_ci} 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_cistatic void 528bf215546Sopenharmony_cicalculate_reverse_deps(nir_schedule_scoreboard *scoreboard, nir_block *block) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci nir_deps_state state = { 531bf215546Sopenharmony_ci .scoreboard = scoreboard, 532bf215546Sopenharmony_ci .dir = R, 533bf215546Sopenharmony_ci .reg_map = _mesa_pointer_hash_table_create(NULL), 534bf215546Sopenharmony_ci }; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci nir_foreach_instr_reverse(instr, block) { 537bf215546Sopenharmony_ci nir_schedule_node *node = nir_schedule_get_node(scoreboard->instr_map, 538bf215546Sopenharmony_ci instr); 539bf215546Sopenharmony_ci nir_schedule_calculate_deps(&state, node); 540bf215546Sopenharmony_ci } 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci ralloc_free(state.reg_map); 543bf215546Sopenharmony_ci} 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_citypedef struct { 546bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard; 547bf215546Sopenharmony_ci int regs_freed; 548bf215546Sopenharmony_ci} nir_schedule_regs_freed_state; 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_cistatic bool 551bf215546Sopenharmony_cinir_schedule_regs_freed_src_cb(nir_src *src, void *in_state) 552bf215546Sopenharmony_ci{ 553bf215546Sopenharmony_ci nir_schedule_regs_freed_state *state = in_state; 554bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = state->scoreboard; 555bf215546Sopenharmony_ci struct set *remaining_uses = nir_schedule_scoreboard_get_src(scoreboard, src); 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci if (remaining_uses->entries == 1 && 558bf215546Sopenharmony_ci _mesa_set_search(remaining_uses, src->parent_instr)) { 559bf215546Sopenharmony_ci state->regs_freed += nir_schedule_src_pressure(src); 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci return true; 563bf215546Sopenharmony_ci} 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_cistatic bool 566bf215546Sopenharmony_cinir_schedule_regs_freed_def_cb(nir_ssa_def *def, void *in_state) 567bf215546Sopenharmony_ci{ 568bf215546Sopenharmony_ci nir_schedule_regs_freed_state *state = in_state; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci state->regs_freed -= nir_schedule_def_pressure(def); 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci return true; 573bf215546Sopenharmony_ci} 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_cistatic bool 576bf215546Sopenharmony_cinir_schedule_regs_freed_dest_cb(nir_dest *dest, void *in_state) 577bf215546Sopenharmony_ci{ 578bf215546Sopenharmony_ci nir_schedule_regs_freed_state *state = in_state; 579bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = state->scoreboard; 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci if (dest->is_ssa) 582bf215546Sopenharmony_ci return true; 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci nir_register *reg = dest->reg.reg; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /* Only the first def of a reg counts against register pressure. */ 587bf215546Sopenharmony_ci if (!_mesa_set_search(scoreboard->live_values, reg)) 588bf215546Sopenharmony_ci state->regs_freed -= nir_schedule_dest_pressure(dest); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci return true; 591bf215546Sopenharmony_ci} 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_cistatic int 594bf215546Sopenharmony_cinir_schedule_regs_freed(nir_schedule_scoreboard *scoreboard, nir_schedule_node *n) 595bf215546Sopenharmony_ci{ 596bf215546Sopenharmony_ci nir_schedule_regs_freed_state state = { 597bf215546Sopenharmony_ci .scoreboard = scoreboard, 598bf215546Sopenharmony_ci }; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci nir_foreach_src(n->instr, nir_schedule_regs_freed_src_cb, &state); 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci nir_foreach_ssa_def(n->instr, nir_schedule_regs_freed_def_cb, &state); 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci nir_foreach_dest(n->instr, nir_schedule_regs_freed_dest_cb, &state); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci return state.regs_freed; 607bf215546Sopenharmony_ci} 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci/** 610bf215546Sopenharmony_ci * Chooses an instruction that will minimise the register pressure as much as 611bf215546Sopenharmony_ci * possible. This should only be used as a fallback when the regular scheduling 612bf215546Sopenharmony_ci * generates a shader whose register allocation fails. 613bf215546Sopenharmony_ci */ 614bf215546Sopenharmony_cistatic nir_schedule_node * 615bf215546Sopenharmony_cinir_schedule_choose_instruction_fallback(nir_schedule_scoreboard *scoreboard) 616bf215546Sopenharmony_ci{ 617bf215546Sopenharmony_ci nir_schedule_node *chosen = NULL; 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci /* Find the leader in the ready (shouldn't-stall) set with the mininum 620bf215546Sopenharmony_ci * cost. 621bf215546Sopenharmony_ci */ 622bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 623bf215546Sopenharmony_ci if (scoreboard->time < n->ready_time) 624bf215546Sopenharmony_ci continue; 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci if (!chosen || chosen->max_delay > n->max_delay) 627bf215546Sopenharmony_ci chosen = n; 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci if (chosen) { 630bf215546Sopenharmony_ci if (debug) { 631bf215546Sopenharmony_ci fprintf(stderr, "chose (ready fallback): "); 632bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 633bf215546Sopenharmony_ci fprintf(stderr, "\n"); 634bf215546Sopenharmony_ci } 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci return chosen; 637bf215546Sopenharmony_ci } 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci /* Otherwise, choose the leader with the minimum cost. */ 640bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 641bf215546Sopenharmony_ci if (!chosen || chosen->max_delay > n->max_delay) 642bf215546Sopenharmony_ci chosen = n; 643bf215546Sopenharmony_ci } 644bf215546Sopenharmony_ci if (debug) { 645bf215546Sopenharmony_ci fprintf(stderr, "chose (leader fallback): "); 646bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 647bf215546Sopenharmony_ci fprintf(stderr, "\n"); 648bf215546Sopenharmony_ci } 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci return chosen; 651bf215546Sopenharmony_ci} 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci/** 654bf215546Sopenharmony_ci * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSP (Code 655bf215546Sopenharmony_ci * Scheduling for Parallelism) heuristic. 656bf215546Sopenharmony_ci * 657bf215546Sopenharmony_ci * Picks an instruction on the critical that's ready to execute without 658bf215546Sopenharmony_ci * stalls, if possible, otherwise picks the instruction on the critical path. 659bf215546Sopenharmony_ci */ 660bf215546Sopenharmony_cistatic nir_schedule_node * 661bf215546Sopenharmony_cinir_schedule_choose_instruction_csp(nir_schedule_scoreboard *scoreboard) 662bf215546Sopenharmony_ci{ 663bf215546Sopenharmony_ci nir_schedule_node *chosen = NULL; 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci /* Find the leader in the ready (shouldn't-stall) set with the maximum 666bf215546Sopenharmony_ci * cost. 667bf215546Sopenharmony_ci */ 668bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 669bf215546Sopenharmony_ci if (scoreboard->time < n->ready_time) 670bf215546Sopenharmony_ci continue; 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci if (!chosen || chosen->max_delay < n->max_delay) 673bf215546Sopenharmony_ci chosen = n; 674bf215546Sopenharmony_ci } 675bf215546Sopenharmony_ci if (chosen) { 676bf215546Sopenharmony_ci if (debug) { 677bf215546Sopenharmony_ci fprintf(stderr, "chose (ready): "); 678bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 679bf215546Sopenharmony_ci fprintf(stderr, "\n"); 680bf215546Sopenharmony_ci } 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci return chosen; 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci /* Otherwise, choose the leader with the maximum cost. */ 686bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 687bf215546Sopenharmony_ci if (!chosen || chosen->max_delay < n->max_delay) 688bf215546Sopenharmony_ci chosen = n; 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci if (debug) { 691bf215546Sopenharmony_ci fprintf(stderr, "chose (leader): "); 692bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 693bf215546Sopenharmony_ci fprintf(stderr, "\n"); 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci return chosen; 697bf215546Sopenharmony_ci} 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci/** 700bf215546Sopenharmony_ci * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code 701bf215546Sopenharmony_ci * Scheduling for Register pressure) heuristic. 702bf215546Sopenharmony_ci */ 703bf215546Sopenharmony_cistatic nir_schedule_node * 704bf215546Sopenharmony_cinir_schedule_choose_instruction_csr(nir_schedule_scoreboard *scoreboard) 705bf215546Sopenharmony_ci{ 706bf215546Sopenharmony_ci nir_schedule_node *chosen = NULL; 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci /* Find a ready inst with regs freed and pick the one with max cost. */ 709bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 710bf215546Sopenharmony_ci if (n->ready_time > scoreboard->time) 711bf215546Sopenharmony_ci continue; 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci int regs_freed = nir_schedule_regs_freed(scoreboard, n); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci if (regs_freed > 0 && (!chosen || chosen->max_delay < n->max_delay)) { 716bf215546Sopenharmony_ci chosen = n; 717bf215546Sopenharmony_ci } 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci if (chosen) { 720bf215546Sopenharmony_ci if (debug) { 721bf215546Sopenharmony_ci fprintf(stderr, "chose (freed+ready): "); 722bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 723bf215546Sopenharmony_ci fprintf(stderr, "\n"); 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci return chosen; 727bf215546Sopenharmony_ci } 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci /* Find a leader with regs freed and pick the one with max cost. */ 730bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 731bf215546Sopenharmony_ci int regs_freed = nir_schedule_regs_freed(scoreboard, n); 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci if (regs_freed > 0 && (!chosen || chosen->max_delay < n->max_delay)) { 734bf215546Sopenharmony_ci chosen = n; 735bf215546Sopenharmony_ci } 736bf215546Sopenharmony_ci } 737bf215546Sopenharmony_ci if (chosen) { 738bf215546Sopenharmony_ci if (debug) { 739bf215546Sopenharmony_ci fprintf(stderr, "chose (regs freed): "); 740bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 741bf215546Sopenharmony_ci fprintf(stderr, "\n"); 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci return chosen; 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci /* Find a partially evaluated path and try to finish it off */ 748bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 749bf215546Sopenharmony_ci if (n->partially_evaluated_path && 750bf215546Sopenharmony_ci (!chosen || chosen->max_delay < n->max_delay)) { 751bf215546Sopenharmony_ci chosen = n; 752bf215546Sopenharmony_ci } 753bf215546Sopenharmony_ci } 754bf215546Sopenharmony_ci if (chosen) { 755bf215546Sopenharmony_ci if (debug) { 756bf215546Sopenharmony_ci fprintf(stderr, "chose (partial path): "); 757bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 758bf215546Sopenharmony_ci fprintf(stderr, "\n"); 759bf215546Sopenharmony_ci } 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci return chosen; 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci /* Contra the paper, pick a leader with no effect on used regs. This may 765bf215546Sopenharmony_ci * open up new opportunities, as otherwise a single-operand instr consuming 766bf215546Sopenharmony_ci * a value will tend to block finding freeing that value. This had a 767bf215546Sopenharmony_ci * massive effect on reducing spilling on V3D. 768bf215546Sopenharmony_ci * 769bf215546Sopenharmony_ci * XXX: Should this prioritize ready? 770bf215546Sopenharmony_ci */ 771bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 772bf215546Sopenharmony_ci if (nir_schedule_regs_freed(scoreboard, n) != 0) 773bf215546Sopenharmony_ci continue; 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci if (!chosen || chosen->max_delay < n->max_delay) 776bf215546Sopenharmony_ci chosen = n; 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci if (chosen) { 779bf215546Sopenharmony_ci if (debug) { 780bf215546Sopenharmony_ci fprintf(stderr, "chose (regs no-op): "); 781bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 782bf215546Sopenharmony_ci fprintf(stderr, "\n"); 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci return chosen; 786bf215546Sopenharmony_ci } 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci /* Pick the max delay of the remaining ready set. */ 789bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 790bf215546Sopenharmony_ci if (n->ready_time > scoreboard->time) 791bf215546Sopenharmony_ci continue; 792bf215546Sopenharmony_ci if (!chosen || chosen->max_delay < n->max_delay) 793bf215546Sopenharmony_ci chosen = n; 794bf215546Sopenharmony_ci } 795bf215546Sopenharmony_ci if (chosen) { 796bf215546Sopenharmony_ci if (debug) { 797bf215546Sopenharmony_ci fprintf(stderr, "chose (ready max delay): "); 798bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 799bf215546Sopenharmony_ci fprintf(stderr, "\n"); 800bf215546Sopenharmony_ci } 801bf215546Sopenharmony_ci return chosen; 802bf215546Sopenharmony_ci } 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci /* Pick the max delay of the remaining leaders. */ 805bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 806bf215546Sopenharmony_ci if (!chosen || chosen->max_delay < n->max_delay) 807bf215546Sopenharmony_ci chosen = n; 808bf215546Sopenharmony_ci } 809bf215546Sopenharmony_ci 810bf215546Sopenharmony_ci if (debug) { 811bf215546Sopenharmony_ci fprintf(stderr, "chose (max delay): "); 812bf215546Sopenharmony_ci nir_print_instr(chosen->instr, stderr); 813bf215546Sopenharmony_ci fprintf(stderr, "\n"); 814bf215546Sopenharmony_ci } 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci return chosen; 817bf215546Sopenharmony_ci} 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_cistatic void 820bf215546Sopenharmony_cidump_state(nir_schedule_scoreboard *scoreboard) 821bf215546Sopenharmony_ci{ 822bf215546Sopenharmony_ci list_for_each_entry(nir_schedule_node, n, &scoreboard->dag->heads, dag.link) { 823bf215546Sopenharmony_ci fprintf(stderr, "maxdel %5d ", n->max_delay); 824bf215546Sopenharmony_ci nir_print_instr(n->instr, stderr); 825bf215546Sopenharmony_ci fprintf(stderr, "\n"); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) { 828bf215546Sopenharmony_ci nir_schedule_node *child = (nir_schedule_node *)edge->child; 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci fprintf(stderr, " -> (%d parents) ", child->dag.parent_count); 831bf215546Sopenharmony_ci nir_print_instr(child->instr, stderr); 832bf215546Sopenharmony_ci fprintf(stderr, "\n"); 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci } 835bf215546Sopenharmony_ci} 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_cistatic void 838bf215546Sopenharmony_cinir_schedule_mark_use(nir_schedule_scoreboard *scoreboard, 839bf215546Sopenharmony_ci void *reg_or_def, 840bf215546Sopenharmony_ci nir_instr *reg_or_def_parent, 841bf215546Sopenharmony_ci int pressure) 842bf215546Sopenharmony_ci{ 843bf215546Sopenharmony_ci /* Make the value live if it's the first time it's been used. */ 844bf215546Sopenharmony_ci if (!_mesa_set_search(scoreboard->live_values, reg_or_def)) { 845bf215546Sopenharmony_ci _mesa_set_add(scoreboard->live_values, reg_or_def); 846bf215546Sopenharmony_ci scoreboard->pressure += pressure; 847bf215546Sopenharmony_ci } 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci /* Make the value dead if it's the last remaining use. Be careful when one 850bf215546Sopenharmony_ci * instruction uses a value twice to not decrement pressure twice. 851bf215546Sopenharmony_ci */ 852bf215546Sopenharmony_ci struct set *remaining_uses = 853bf215546Sopenharmony_ci _mesa_hash_table_search_data(scoreboard->remaining_uses, reg_or_def); 854bf215546Sopenharmony_ci struct set_entry *entry = _mesa_set_search(remaining_uses, reg_or_def_parent); 855bf215546Sopenharmony_ci if (entry) { 856bf215546Sopenharmony_ci _mesa_set_remove(remaining_uses, entry); 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci if (remaining_uses->entries == 0) 859bf215546Sopenharmony_ci scoreboard->pressure -= pressure; 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci} 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_cistatic bool 864bf215546Sopenharmony_cinir_schedule_mark_src_scheduled(nir_src *src, void *state) 865bf215546Sopenharmony_ci{ 866bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = state; 867bf215546Sopenharmony_ci struct set *remaining_uses = nir_schedule_scoreboard_get_src(scoreboard, src); 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci struct set_entry *entry = _mesa_set_search(remaining_uses, 870bf215546Sopenharmony_ci src->parent_instr); 871bf215546Sopenharmony_ci if (entry) { 872bf215546Sopenharmony_ci /* Once we've used an SSA value in one instruction, bump the priority of 873bf215546Sopenharmony_ci * the other uses so the SSA value can get fully consumed. 874bf215546Sopenharmony_ci * 875bf215546Sopenharmony_ci * We don't do this for registers, and it's would be a hassle and it's 876bf215546Sopenharmony_ci * unclear if that would help or not. Also, skip it for constants, as 877bf215546Sopenharmony_ci * they're often folded as immediates into backend instructions and have 878bf215546Sopenharmony_ci * many unrelated instructions all referencing the same value (0). 879bf215546Sopenharmony_ci */ 880bf215546Sopenharmony_ci if (src->is_ssa && 881bf215546Sopenharmony_ci src->ssa->parent_instr->type != nir_instr_type_load_const) { 882bf215546Sopenharmony_ci nir_foreach_use(other_src, src->ssa) { 883bf215546Sopenharmony_ci if (other_src->parent_instr == src->parent_instr) 884bf215546Sopenharmony_ci continue; 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci nir_schedule_node *n = 887bf215546Sopenharmony_ci nir_schedule_get_node(scoreboard->instr_map, 888bf215546Sopenharmony_ci other_src->parent_instr); 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci if (n && !n->partially_evaluated_path) { 891bf215546Sopenharmony_ci if (debug) { 892bf215546Sopenharmony_ci fprintf(stderr, " New partially evaluated path: "); 893bf215546Sopenharmony_ci nir_print_instr(n->instr, stderr); 894bf215546Sopenharmony_ci fprintf(stderr, "\n"); 895bf215546Sopenharmony_ci } 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci n->partially_evaluated_path = true; 898bf215546Sopenharmony_ci } 899bf215546Sopenharmony_ci } 900bf215546Sopenharmony_ci } 901bf215546Sopenharmony_ci } 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci nir_schedule_mark_use(scoreboard, 904bf215546Sopenharmony_ci src->is_ssa ? (void *)src->ssa : (void *)src->reg.reg, 905bf215546Sopenharmony_ci src->parent_instr, 906bf215546Sopenharmony_ci nir_schedule_src_pressure(src)); 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci return true; 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_cistatic bool 912bf215546Sopenharmony_cinir_schedule_mark_def_scheduled(nir_ssa_def *def, void *state) 913bf215546Sopenharmony_ci{ 914bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = state; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci nir_schedule_mark_use(scoreboard, def, def->parent_instr, 917bf215546Sopenharmony_ci nir_schedule_def_pressure(def)); 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci return true; 920bf215546Sopenharmony_ci} 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_cistatic bool 923bf215546Sopenharmony_cinir_schedule_mark_dest_scheduled(nir_dest *dest, void *state) 924bf215546Sopenharmony_ci{ 925bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = state; 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci /* SSA defs were handled in nir_schedule_mark_def_scheduled() 928bf215546Sopenharmony_ci */ 929bf215546Sopenharmony_ci if (dest->is_ssa) 930bf215546Sopenharmony_ci return true; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci /* XXX: This is not actually accurate for regs -- the last use of a reg may 933bf215546Sopenharmony_ci * have a live interval that extends across control flow. We should 934bf215546Sopenharmony_ci * calculate the live ranges of regs, and have scheduler nodes for the CF 935bf215546Sopenharmony_ci * nodes that also "use" the reg. 936bf215546Sopenharmony_ci */ 937bf215546Sopenharmony_ci nir_schedule_mark_use(scoreboard, dest->reg.reg, 938bf215546Sopenharmony_ci dest->reg.parent_instr, 939bf215546Sopenharmony_ci nir_schedule_dest_pressure(dest)); 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci return true; 942bf215546Sopenharmony_ci} 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_cistatic void 945bf215546Sopenharmony_cinir_schedule_mark_node_scheduled(nir_schedule_scoreboard *scoreboard, 946bf215546Sopenharmony_ci nir_schedule_node *n) 947bf215546Sopenharmony_ci{ 948bf215546Sopenharmony_ci nir_foreach_src(n->instr, nir_schedule_mark_src_scheduled, scoreboard); 949bf215546Sopenharmony_ci nir_foreach_ssa_def(n->instr, nir_schedule_mark_def_scheduled, scoreboard); 950bf215546Sopenharmony_ci nir_foreach_dest(n->instr, nir_schedule_mark_dest_scheduled, scoreboard); 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) { 953bf215546Sopenharmony_ci nir_schedule_node *child = (nir_schedule_node *)edge->child; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci child->ready_time = MAX2(child->ready_time, 956bf215546Sopenharmony_ci scoreboard->time + n->delay); 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci if (child->dag.parent_count == 1) { 959bf215546Sopenharmony_ci if (debug) { 960bf215546Sopenharmony_ci fprintf(stderr, " New DAG head: "); 961bf215546Sopenharmony_ci nir_print_instr(child->instr, stderr); 962bf215546Sopenharmony_ci fprintf(stderr, "\n"); 963bf215546Sopenharmony_ci } 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci dag_prune_head(scoreboard->dag, &n->dag); 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci scoreboard->time = MAX2(n->ready_time, scoreboard->time); 970bf215546Sopenharmony_ci scoreboard->time++; 971bf215546Sopenharmony_ci} 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_cistatic void 974bf215546Sopenharmony_cinir_schedule_instructions(nir_schedule_scoreboard *scoreboard, nir_block *block) 975bf215546Sopenharmony_ci{ 976bf215546Sopenharmony_ci while (!list_is_empty(&scoreboard->dag->heads)) { 977bf215546Sopenharmony_ci if (debug) { 978bf215546Sopenharmony_ci fprintf(stderr, "current list:\n"); 979bf215546Sopenharmony_ci dump_state(scoreboard); 980bf215546Sopenharmony_ci } 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci nir_schedule_node *chosen; 983bf215546Sopenharmony_ci if (scoreboard->options->fallback) 984bf215546Sopenharmony_ci chosen = nir_schedule_choose_instruction_fallback(scoreboard); 985bf215546Sopenharmony_ci else if (scoreboard->pressure < scoreboard->options->threshold) 986bf215546Sopenharmony_ci chosen = nir_schedule_choose_instruction_csp(scoreboard); 987bf215546Sopenharmony_ci else 988bf215546Sopenharmony_ci chosen = nir_schedule_choose_instruction_csr(scoreboard); 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci /* Now that we've scheduled a new instruction, some of its children may 991bf215546Sopenharmony_ci * be promoted to the list of instructions ready to be scheduled. 992bf215546Sopenharmony_ci */ 993bf215546Sopenharmony_ci nir_schedule_mark_node_scheduled(scoreboard, chosen); 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci /* Move the instruction to the end (so our first chosen instructions are 996bf215546Sopenharmony_ci * the start of the program). 997bf215546Sopenharmony_ci */ 998bf215546Sopenharmony_ci exec_node_remove(&chosen->instr->node); 999bf215546Sopenharmony_ci exec_list_push_tail(&block->instr_list, &chosen->instr->node); 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci if (debug) 1002bf215546Sopenharmony_ci fprintf(stderr, "\n"); 1003bf215546Sopenharmony_ci } 1004bf215546Sopenharmony_ci} 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_cistatic uint32_t 1007bf215546Sopenharmony_cinir_schedule_get_delay(nir_schedule_scoreboard *scoreboard, nir_instr *instr) 1008bf215546Sopenharmony_ci{ 1009bf215546Sopenharmony_ci if (scoreboard->options->instr_delay_cb) { 1010bf215546Sopenharmony_ci void *cb_data = scoreboard->options->instr_delay_cb_data; 1011bf215546Sopenharmony_ci return scoreboard->options->instr_delay_cb(instr, cb_data); 1012bf215546Sopenharmony_ci } 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci switch (instr->type) { 1015bf215546Sopenharmony_ci case nir_instr_type_ssa_undef: 1016bf215546Sopenharmony_ci case nir_instr_type_load_const: 1017bf215546Sopenharmony_ci case nir_instr_type_alu: 1018bf215546Sopenharmony_ci case nir_instr_type_deref: 1019bf215546Sopenharmony_ci case nir_instr_type_jump: 1020bf215546Sopenharmony_ci case nir_instr_type_parallel_copy: 1021bf215546Sopenharmony_ci case nir_instr_type_call: 1022bf215546Sopenharmony_ci case nir_instr_type_phi: 1023bf215546Sopenharmony_ci return 1; 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 1026bf215546Sopenharmony_ci switch (nir_instr_as_intrinsic(instr)->intrinsic) { 1027bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 1028bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 1029bf215546Sopenharmony_ci case nir_intrinsic_load_scratch: 1030bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 1031bf215546Sopenharmony_ci case nir_intrinsic_image_load: 1032bf215546Sopenharmony_ci return 50; 1033bf215546Sopenharmony_ci default: 1034bf215546Sopenharmony_ci return 1; 1035bf215546Sopenharmony_ci } 1036bf215546Sopenharmony_ci break; 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci case nir_instr_type_tex: 1039bf215546Sopenharmony_ci /* Pick some large number to try to fetch textures early and sample them 1040bf215546Sopenharmony_ci * late. 1041bf215546Sopenharmony_ci */ 1042bf215546Sopenharmony_ci return 100; 1043bf215546Sopenharmony_ci } 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci return 0; 1046bf215546Sopenharmony_ci} 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_cistatic void 1049bf215546Sopenharmony_cinir_schedule_dag_max_delay_cb(struct dag_node *node, void *state) 1050bf215546Sopenharmony_ci{ 1051bf215546Sopenharmony_ci nir_schedule_node *n = (nir_schedule_node *)node; 1052bf215546Sopenharmony_ci uint32_t max_delay = 0; 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) { 1055bf215546Sopenharmony_ci nir_schedule_node *child = (nir_schedule_node *)edge->child; 1056bf215546Sopenharmony_ci max_delay = MAX2(child->max_delay, max_delay); 1057bf215546Sopenharmony_ci } 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci n->max_delay = MAX2(n->max_delay, max_delay + n->delay); 1060bf215546Sopenharmony_ci } 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_cistatic void 1063bf215546Sopenharmony_cinir_schedule_block(nir_schedule_scoreboard *scoreboard, nir_block *block) 1064bf215546Sopenharmony_ci{ 1065bf215546Sopenharmony_ci void *mem_ctx = ralloc_context(NULL); 1066bf215546Sopenharmony_ci scoreboard->instr_map = _mesa_pointer_hash_table_create(mem_ctx); 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci scoreboard->dag = dag_create(mem_ctx); 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1071bf215546Sopenharmony_ci nir_schedule_node *n = 1072bf215546Sopenharmony_ci rzalloc(mem_ctx, nir_schedule_node); 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci n->instr = instr; 1075bf215546Sopenharmony_ci n->delay = nir_schedule_get_delay(scoreboard, instr); 1076bf215546Sopenharmony_ci dag_init_node(scoreboard->dag, &n->dag); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci _mesa_hash_table_insert(scoreboard->instr_map, instr, n); 1079bf215546Sopenharmony_ci } 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_ci calculate_forward_deps(scoreboard, block); 1082bf215546Sopenharmony_ci calculate_reverse_deps(scoreboard, block); 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci dag_traverse_bottom_up(scoreboard->dag, nir_schedule_dag_max_delay_cb, NULL); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci nir_schedule_instructions(scoreboard, block); 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci ralloc_free(mem_ctx); 1089bf215546Sopenharmony_ci scoreboard->instr_map = NULL; 1090bf215546Sopenharmony_ci} 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_cistatic bool 1093bf215546Sopenharmony_cinir_schedule_ssa_def_init_scoreboard(nir_ssa_def *def, void *state) 1094bf215546Sopenharmony_ci{ 1095bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = state; 1096bf215546Sopenharmony_ci struct set *def_uses = _mesa_pointer_set_create(scoreboard); 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci _mesa_hash_table_insert(scoreboard->remaining_uses, def, def_uses); 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_ci _mesa_set_add(def_uses, def->parent_instr); 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci nir_foreach_use(src, def) { 1103bf215546Sopenharmony_ci _mesa_set_add(def_uses, src->parent_instr); 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci /* XXX: Handle if uses */ 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_ci return true; 1109bf215546Sopenharmony_ci} 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_cistatic nir_schedule_scoreboard * 1112bf215546Sopenharmony_cinir_schedule_get_scoreboard(nir_shader *shader, 1113bf215546Sopenharmony_ci const nir_schedule_options *options) 1114bf215546Sopenharmony_ci{ 1115bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = rzalloc(NULL, nir_schedule_scoreboard); 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci scoreboard->shader = shader; 1118bf215546Sopenharmony_ci scoreboard->live_values = _mesa_pointer_set_create(scoreboard); 1119bf215546Sopenharmony_ci scoreboard->remaining_uses = _mesa_pointer_hash_table_create(scoreboard); 1120bf215546Sopenharmony_ci scoreboard->options = options; 1121bf215546Sopenharmony_ci scoreboard->pressure = 0; 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 1124bf215546Sopenharmony_ci nir_foreach_register(reg, &function->impl->registers) { 1125bf215546Sopenharmony_ci struct set *register_uses = 1126bf215546Sopenharmony_ci _mesa_pointer_set_create(scoreboard); 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci _mesa_hash_table_insert(scoreboard->remaining_uses, reg, register_uses); 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci nir_foreach_use(src, reg) { 1131bf215546Sopenharmony_ci _mesa_set_add(register_uses, src->parent_instr); 1132bf215546Sopenharmony_ci } 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci /* XXX: Handle if uses */ 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci nir_foreach_def(dest, reg) { 1137bf215546Sopenharmony_ci _mesa_set_add(register_uses, dest->reg.parent_instr); 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 1142bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1143bf215546Sopenharmony_ci nir_foreach_ssa_def(instr, nir_schedule_ssa_def_init_scoreboard, 1144bf215546Sopenharmony_ci scoreboard); 1145bf215546Sopenharmony_ci } 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci /* XXX: We're ignoring if uses, which may prioritize scheduling other 1148bf215546Sopenharmony_ci * uses of the if src even when it doesn't help. That's not many 1149bf215546Sopenharmony_ci * values, though, so meh. 1150bf215546Sopenharmony_ci */ 1151bf215546Sopenharmony_ci } 1152bf215546Sopenharmony_ci } 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ci return scoreboard; 1155bf215546Sopenharmony_ci} 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_cistatic void 1158bf215546Sopenharmony_cinir_schedule_validate_uses(nir_schedule_scoreboard *scoreboard) 1159bf215546Sopenharmony_ci{ 1160bf215546Sopenharmony_ci#ifdef NDEBUG 1161bf215546Sopenharmony_ci return; 1162bf215546Sopenharmony_ci#endif 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci bool any_uses = false; 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci hash_table_foreach(scoreboard->remaining_uses, entry) { 1167bf215546Sopenharmony_ci struct set *remaining_uses = entry->data; 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci set_foreach(remaining_uses, instr_entry) { 1170bf215546Sopenharmony_ci if (!any_uses) { 1171bf215546Sopenharmony_ci fprintf(stderr, "Tracked uses remain after scheduling. " 1172bf215546Sopenharmony_ci "Affected instructions: \n"); 1173bf215546Sopenharmony_ci any_uses = true; 1174bf215546Sopenharmony_ci } 1175bf215546Sopenharmony_ci nir_print_instr(instr_entry->key, stderr); 1176bf215546Sopenharmony_ci fprintf(stderr, "\n"); 1177bf215546Sopenharmony_ci } 1178bf215546Sopenharmony_ci } 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_ci assert(!any_uses); 1181bf215546Sopenharmony_ci} 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci/** 1184bf215546Sopenharmony_ci * Schedules the NIR instructions to try to decrease stalls (for example, 1185bf215546Sopenharmony_ci * delaying texture reads) while managing register pressure. 1186bf215546Sopenharmony_ci * 1187bf215546Sopenharmony_ci * The threshold represents "number of NIR register/SSA def channels live 1188bf215546Sopenharmony_ci * before switching the scheduling heuristic to reduce register pressure", 1189bf215546Sopenharmony_ci * since most of our GPU architectures are scalar (extending to vector with a 1190bf215546Sopenharmony_ci * flag wouldn't be hard). This number should be a bit below the number of 1191bf215546Sopenharmony_ci * registers available (counting any that may be occupied by system value 1192bf215546Sopenharmony_ci * payload values, for example), since the heuristic may not always be able to 1193bf215546Sopenharmony_ci * free a register immediately. The amount below the limit is up to you to 1194bf215546Sopenharmony_ci * tune. 1195bf215546Sopenharmony_ci */ 1196bf215546Sopenharmony_civoid 1197bf215546Sopenharmony_cinir_schedule(nir_shader *shader, 1198bf215546Sopenharmony_ci const nir_schedule_options *options) 1199bf215546Sopenharmony_ci{ 1200bf215546Sopenharmony_ci nir_schedule_scoreboard *scoreboard = nir_schedule_get_scoreboard(shader, 1201bf215546Sopenharmony_ci options); 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci if (debug) { 1204bf215546Sopenharmony_ci fprintf(stderr, "NIR shader before scheduling:\n"); 1205bf215546Sopenharmony_ci nir_print_shader(shader, stderr); 1206bf215546Sopenharmony_ci } 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 1209bf215546Sopenharmony_ci if (!function->impl) 1210bf215546Sopenharmony_ci continue; 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 1213bf215546Sopenharmony_ci nir_schedule_block(scoreboard, block); 1214bf215546Sopenharmony_ci } 1215bf215546Sopenharmony_ci } 1216bf215546Sopenharmony_ci 1217bf215546Sopenharmony_ci nir_schedule_validate_uses(scoreboard); 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci ralloc_free(scoreboard); 1220bf215546Sopenharmony_ci} 1221