1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/dag.h" 28bf215546Sopenharmony_ci#include "util/u_math.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "ir3.h" 31bf215546Sopenharmony_ci#include "ir3_compiler.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#ifdef DEBUG 34bf215546Sopenharmony_ci#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS) 35bf215546Sopenharmony_ci#else 36bf215546Sopenharmony_ci#define SCHED_DEBUG 0 37bf215546Sopenharmony_ci#endif 38bf215546Sopenharmony_ci#define d(fmt, ...) \ 39bf215546Sopenharmony_ci do { \ 40bf215546Sopenharmony_ci if (SCHED_DEBUG) { \ 41bf215546Sopenharmony_ci mesa_logi("SCHED: " fmt, ##__VA_ARGS__); \ 42bf215546Sopenharmony_ci } \ 43bf215546Sopenharmony_ci } while (0) 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci#define di(instr, fmt, ...) \ 46bf215546Sopenharmony_ci do { \ 47bf215546Sopenharmony_ci if (SCHED_DEBUG) { \ 48bf215546Sopenharmony_ci struct log_stream *stream = mesa_log_streami(); \ 49bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "SCHED: " fmt ": ", ##__VA_ARGS__); \ 50bf215546Sopenharmony_ci ir3_print_instr_stream(stream, instr); \ 51bf215546Sopenharmony_ci mesa_log_stream_destroy(stream); \ 52bf215546Sopenharmony_ci } \ 53bf215546Sopenharmony_ci } while (0) 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci/* 56bf215546Sopenharmony_ci * Instruction Scheduling: 57bf215546Sopenharmony_ci * 58bf215546Sopenharmony_ci * A block-level pre-RA scheduler, which works by creating a DAG of 59bf215546Sopenharmony_ci * instruction dependencies, and heuristically picking a DAG head 60bf215546Sopenharmony_ci * (instruction with no unscheduled dependencies). 61bf215546Sopenharmony_ci * 62bf215546Sopenharmony_ci * Where possible, it tries to pick instructions that avoid nop delay 63bf215546Sopenharmony_ci * slots, but it will prefer to pick instructions that reduce (or do 64bf215546Sopenharmony_ci * not increase) the number of live values. 65bf215546Sopenharmony_ci * 66bf215546Sopenharmony_ci * If the only possible choices are instructions that increase the 67bf215546Sopenharmony_ci * number of live values, it will try to pick the one with the earliest 68bf215546Sopenharmony_ci * consumer (based on pre-sched program order). 69bf215546Sopenharmony_ci * 70bf215546Sopenharmony_ci * There are a few special cases that need to be handled, since sched 71bf215546Sopenharmony_ci * is currently independent of register allocation. Usages of address 72bf215546Sopenharmony_ci * register (a0.x) or predicate register (p0.x) must be serialized. Ie. 73bf215546Sopenharmony_ci * if you have two pairs of instructions that write the same special 74bf215546Sopenharmony_ci * register and then read it, then those pairs cannot be interleaved. 75bf215546Sopenharmony_ci * To solve this, when we are in such a scheduling "critical section", 76bf215546Sopenharmony_ci * and we encounter a conflicting write to a special register, we try 77bf215546Sopenharmony_ci * to schedule any remaining instructions that use that value first. 78bf215546Sopenharmony_ci * 79bf215546Sopenharmony_ci * TODO we can detect too-large live_values here.. would be a good place 80bf215546Sopenharmony_ci * to "spill" cheap things, like move from uniform/immed. (Constructing 81bf215546Sopenharmony_ci * list of ssa def consumers before sched pass would make this easier. 82bf215546Sopenharmony_ci * Also, in general it is general it might be best not to re-use load_immed 83bf215546Sopenharmony_ci * across blocks. 84bf215546Sopenharmony_ci * 85bf215546Sopenharmony_ci * TODO we can use (abs)/(neg) src modifiers in a lot of cases to reduce 86bf215546Sopenharmony_ci * the # of immediates in play (or at least that would help with 87bf215546Sopenharmony_ci * dEQP-GLES31.functional.ubo.random.all_per_block_buffers.*).. probably 88bf215546Sopenharmony_ci * do this in a nir pass that inserts fneg/etc? The cp pass should fold 89bf215546Sopenharmony_ci * these into src modifiers.. 90bf215546Sopenharmony_ci */ 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_cistruct ir3_sched_ctx { 93bf215546Sopenharmony_ci struct ir3_block *block; /* the current block */ 94bf215546Sopenharmony_ci struct dag *dag; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci struct list_head unscheduled_list; /* unscheduled instructions */ 97bf215546Sopenharmony_ci struct ir3_instruction *scheduled; /* last scheduled instr */ 98bf215546Sopenharmony_ci struct ir3_instruction *addr0; /* current a0.x user, if any */ 99bf215546Sopenharmony_ci struct ir3_instruction *addr1; /* current a1.x user, if any */ 100bf215546Sopenharmony_ci struct ir3_instruction *pred; /* current p0.x user, if any */ 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci struct ir3_instruction *split; /* most-recently-split a0/a1/p0 producer */ 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci int remaining_kills; 105bf215546Sopenharmony_ci int remaining_tex; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci bool error; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci unsigned ip; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci int sy_delay; 112bf215546Sopenharmony_ci int ss_delay; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci /* We order the scheduled (sy)/(ss) producers, and keep track of the 115bf215546Sopenharmony_ci * index of the last waited on instruction, so we can know which 116bf215546Sopenharmony_ci * instructions are still outstanding (and therefore would require us to 117bf215546Sopenharmony_ci * wait for all outstanding instructions before scheduling a use). 118bf215546Sopenharmony_ci */ 119bf215546Sopenharmony_ci int sy_index, first_outstanding_sy_index; 120bf215546Sopenharmony_ci int ss_index, first_outstanding_ss_index; 121bf215546Sopenharmony_ci}; 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_cistruct ir3_sched_node { 124bf215546Sopenharmony_ci struct dag_node dag; /* must be first for util_dynarray_foreach */ 125bf215546Sopenharmony_ci struct ir3_instruction *instr; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci unsigned delay; 128bf215546Sopenharmony_ci unsigned max_delay; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci unsigned sy_index; 131bf215546Sopenharmony_ci unsigned ss_index; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci /* For ready instructions, the earliest possible ip that it could be 134bf215546Sopenharmony_ci * scheduled. 135bf215546Sopenharmony_ci */ 136bf215546Sopenharmony_ci unsigned earliest_ip; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci /* For instructions that are a meta:collect src, once we schedule 139bf215546Sopenharmony_ci * the first src of the collect, the entire vecN is live (at least 140bf215546Sopenharmony_ci * from the PoV of the first RA pass.. the 2nd scalar pass can fill 141bf215546Sopenharmony_ci * in some of the gaps, but often not all). So we want to help out 142bf215546Sopenharmony_ci * RA, and realize that as soon as we schedule the first collect 143bf215546Sopenharmony_ci * src, there is no penalty to schedule the remainder (ie. they 144bf215546Sopenharmony_ci * don't make additional values live). In fact we'd prefer to 145bf215546Sopenharmony_ci * schedule the rest ASAP to minimize the live range of the vecN. 146bf215546Sopenharmony_ci * 147bf215546Sopenharmony_ci * For instructions that are the src of a collect, we track the 148bf215546Sopenharmony_ci * corresponding collect, and mark them as partially live as soon 149bf215546Sopenharmony_ci * as any one of the src's is scheduled. 150bf215546Sopenharmony_ci */ 151bf215546Sopenharmony_ci struct ir3_instruction *collect; 152bf215546Sopenharmony_ci bool partially_live; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci /* Is this instruction a direct or indirect dependency for a kill? 155bf215546Sopenharmony_ci * If so, we should prioritize it when possible 156bf215546Sopenharmony_ci */ 157bf215546Sopenharmony_ci bool kill_path; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci /* This node represents a shader output. A semi-common pattern in 160bf215546Sopenharmony_ci * shaders is something along the lines of: 161bf215546Sopenharmony_ci * 162bf215546Sopenharmony_ci * fragcolor.w = 1.0 163bf215546Sopenharmony_ci * 164bf215546Sopenharmony_ci * Which we'd prefer to schedule as late as possible, since it 165bf215546Sopenharmony_ci * produces a live value that is never killed/consumed. So detect 166bf215546Sopenharmony_ci * outputs up-front, and avoid scheduling them unless the reduce 167bf215546Sopenharmony_ci * register pressure (or at least are neutral) 168bf215546Sopenharmony_ci */ 169bf215546Sopenharmony_ci bool output; 170bf215546Sopenharmony_ci}; 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci#define foreach_sched_node(__n, __list) \ 173bf215546Sopenharmony_ci list_for_each_entry (struct ir3_sched_node, __n, __list, dag.link) 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cistatic void sched_node_init(struct ir3_sched_ctx *ctx, 176bf215546Sopenharmony_ci struct ir3_instruction *instr); 177bf215546Sopenharmony_cistatic void sched_node_add_dep(struct ir3_instruction *instr, 178bf215546Sopenharmony_ci struct ir3_instruction *src, int i); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_cistatic bool 181bf215546Sopenharmony_ciis_scheduled(struct ir3_instruction *instr) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci return !!(instr->flags & IR3_INSTR_MARK); 184bf215546Sopenharmony_ci} 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci/* check_src_cond() passing a ir3_sched_ctx. */ 187bf215546Sopenharmony_cistatic bool 188bf215546Sopenharmony_cisched_check_src_cond(struct ir3_instruction *instr, 189bf215546Sopenharmony_ci bool (*cond)(struct ir3_instruction *, 190bf215546Sopenharmony_ci struct ir3_sched_ctx *), 191bf215546Sopenharmony_ci struct ir3_sched_ctx *ctx) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci foreach_ssa_src (src, instr) { 194bf215546Sopenharmony_ci /* meta:split/collect aren't real instructions, the thing that 195bf215546Sopenharmony_ci * we actually care about is *their* srcs 196bf215546Sopenharmony_ci */ 197bf215546Sopenharmony_ci if ((src->opc == OPC_META_SPLIT) || (src->opc == OPC_META_COLLECT)) { 198bf215546Sopenharmony_ci if (sched_check_src_cond(src, cond, ctx)) 199bf215546Sopenharmony_ci return true; 200bf215546Sopenharmony_ci } else { 201bf215546Sopenharmony_ci if (cond(src, ctx)) 202bf215546Sopenharmony_ci return true; 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci return false; 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci/* Is this a sy producer that hasn't been waited on yet? */ 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_cistatic bool 212bf215546Sopenharmony_ciis_outstanding_sy(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci if (!is_sy_producer(instr)) 215bf215546Sopenharmony_ci return false; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci /* The sched node is only valid within the same block, we cannot 218bf215546Sopenharmony_ci * really say anything about src's from other blocks 219bf215546Sopenharmony_ci */ 220bf215546Sopenharmony_ci if (instr->block != ctx->block) 221bf215546Sopenharmony_ci return true; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 224bf215546Sopenharmony_ci return n->sy_index >= ctx->first_outstanding_sy_index; 225bf215546Sopenharmony_ci} 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_cistatic bool 228bf215546Sopenharmony_ciis_outstanding_ss(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx) 229bf215546Sopenharmony_ci{ 230bf215546Sopenharmony_ci if (!is_ss_producer(instr)) 231bf215546Sopenharmony_ci return false; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci /* The sched node is only valid within the same block, we cannot 234bf215546Sopenharmony_ci * really say anything about src's from other blocks 235bf215546Sopenharmony_ci */ 236bf215546Sopenharmony_ci if (instr->block != ctx->block) 237bf215546Sopenharmony_ci return true; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 240bf215546Sopenharmony_ci return n->ss_index >= ctx->first_outstanding_ss_index; 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cistatic unsigned 244bf215546Sopenharmony_cicycle_count(struct ir3_instruction *instr) 245bf215546Sopenharmony_ci{ 246bf215546Sopenharmony_ci if (instr->opc == OPC_META_COLLECT) { 247bf215546Sopenharmony_ci /* Assume that only immed/const sources produce moves */ 248bf215546Sopenharmony_ci unsigned n = 0; 249bf215546Sopenharmony_ci foreach_src (src, instr) { 250bf215546Sopenharmony_ci if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) 251bf215546Sopenharmony_ci n++; 252bf215546Sopenharmony_ci } 253bf215546Sopenharmony_ci return n; 254bf215546Sopenharmony_ci } else if (is_meta(instr)) { 255bf215546Sopenharmony_ci return 0; 256bf215546Sopenharmony_ci } else { 257bf215546Sopenharmony_ci return 1; 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci} 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_cistatic void 262bf215546Sopenharmony_cischedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci assert(ctx->block == instr->block); 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci /* remove from depth list: 267bf215546Sopenharmony_ci */ 268bf215546Sopenharmony_ci list_delinit(&instr->node); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (writes_addr0(instr)) { 271bf215546Sopenharmony_ci assert(ctx->addr0 == NULL); 272bf215546Sopenharmony_ci ctx->addr0 = instr; 273bf215546Sopenharmony_ci } 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci if (writes_addr1(instr)) { 276bf215546Sopenharmony_ci assert(ctx->addr1 == NULL); 277bf215546Sopenharmony_ci ctx->addr1 = instr; 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci if (writes_pred(instr)) { 281bf215546Sopenharmony_ci assert(ctx->pred == NULL); 282bf215546Sopenharmony_ci ctx->pred = instr; 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci instr->flags |= IR3_INSTR_MARK; 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci di(instr, "schedule"); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci list_addtail(&instr->node, &instr->block->instr_list); 290bf215546Sopenharmony_ci ctx->scheduled = instr; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci if (is_kill_or_demote(instr)) { 293bf215546Sopenharmony_ci assert(ctx->remaining_kills > 0); 294bf215546Sopenharmony_ci ctx->remaining_kills--; 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci /* If this instruction is a meta:collect src, mark the remaining 300bf215546Sopenharmony_ci * collect srcs as partially live. 301bf215546Sopenharmony_ci */ 302bf215546Sopenharmony_ci if (n->collect) { 303bf215546Sopenharmony_ci foreach_ssa_src (src, n->collect) { 304bf215546Sopenharmony_ci if (src->block != instr->block) 305bf215546Sopenharmony_ci continue; 306bf215546Sopenharmony_ci struct ir3_sched_node *sn = src->data; 307bf215546Sopenharmony_ci sn->partially_live = true; 308bf215546Sopenharmony_ci } 309bf215546Sopenharmony_ci } 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci bool counts_for_delay = is_alu(instr) || is_flow(instr); 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci /* TODO: switch to "cycles". For now try to match ir3_delay. */ 314bf215546Sopenharmony_ci unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0; 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci /* We insert any nop's needed to get to earliest_ip, then advance 317bf215546Sopenharmony_ci * delay_cycles by scheduling the instruction. 318bf215546Sopenharmony_ci */ 319bf215546Sopenharmony_ci ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 322bf215546Sopenharmony_ci unsigned delay = (unsigned)(uintptr_t)edge->data; 323bf215546Sopenharmony_ci struct ir3_sched_node *child = 324bf215546Sopenharmony_ci container_of(edge->child, struct ir3_sched_node, dag); 325bf215546Sopenharmony_ci child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay); 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci dag_prune_head(ctx->dag, &n->dag); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci unsigned cycles = cycle_count(instr); 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci if (is_ss_producer(instr)) { 333bf215546Sopenharmony_ci ctx->ss_delay = soft_ss_delay(instr); 334bf215546Sopenharmony_ci n->ss_index = ctx->ss_index++; 335bf215546Sopenharmony_ci } else if (!is_meta(instr) && 336bf215546Sopenharmony_ci sched_check_src_cond(instr, is_outstanding_ss, ctx)) { 337bf215546Sopenharmony_ci ctx->ss_delay = 0; 338bf215546Sopenharmony_ci ctx->first_outstanding_ss_index = ctx->ss_index; 339bf215546Sopenharmony_ci } else if (ctx->ss_delay > 0) { 340bf215546Sopenharmony_ci ctx->ss_delay -= MIN2(cycles, ctx->ss_delay); 341bf215546Sopenharmony_ci } 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci if (is_sy_producer(instr)) { 344bf215546Sopenharmony_ci /* NOTE that this isn't an attempt to hide texture fetch latency, 345bf215546Sopenharmony_ci * but an attempt to hide the cost of switching to another warp. 346bf215546Sopenharmony_ci * If we can, we'd like to try to schedule another texture fetch 347bf215546Sopenharmony_ci * before scheduling something that would sync. 348bf215546Sopenharmony_ci */ 349bf215546Sopenharmony_ci ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader); 350bf215546Sopenharmony_ci assert(ctx->remaining_tex > 0); 351bf215546Sopenharmony_ci ctx->remaining_tex--; 352bf215546Sopenharmony_ci n->sy_index = ctx->sy_index++; 353bf215546Sopenharmony_ci } else if (!is_meta(instr) && 354bf215546Sopenharmony_ci sched_check_src_cond(instr, is_outstanding_sy, ctx)) { 355bf215546Sopenharmony_ci ctx->sy_delay = 0; 356bf215546Sopenharmony_ci ctx->first_outstanding_sy_index = ctx->sy_index; 357bf215546Sopenharmony_ci } else if (ctx->sy_delay > 0) { 358bf215546Sopenharmony_ci ctx->sy_delay -= MIN2(cycles, ctx->sy_delay); 359bf215546Sopenharmony_ci } 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci} 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_cistruct ir3_sched_notes { 364bf215546Sopenharmony_ci /* there is at least one kill which could be scheduled, except 365bf215546Sopenharmony_ci * for unscheduled bary.f's: 366bf215546Sopenharmony_ci */ 367bf215546Sopenharmony_ci bool blocked_kill; 368bf215546Sopenharmony_ci /* there is at least one instruction that could be scheduled, 369bf215546Sopenharmony_ci * except for conflicting address/predicate register usage: 370bf215546Sopenharmony_ci */ 371bf215546Sopenharmony_ci bool addr0_conflict, addr1_conflict, pred_conflict; 372bf215546Sopenharmony_ci}; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_cistatic bool 375bf215546Sopenharmony_cishould_skip(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 376bf215546Sopenharmony_ci{ 377bf215546Sopenharmony_ci if (ctx->remaining_kills && (is_tex(instr) || is_mem(instr))) { 378bf215546Sopenharmony_ci /* avoid texture/memory access if we have unscheduled kills 379bf215546Sopenharmony_ci * that could make the expensive operation unnecessary. By 380bf215546Sopenharmony_ci * definition, if there are remaining kills, and this instr 381bf215546Sopenharmony_ci * is not a dependency of a kill, there are other instructions 382bf215546Sopenharmony_ci * that we can choose from. 383bf215546Sopenharmony_ci */ 384bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 385bf215546Sopenharmony_ci if (!n->kill_path) 386bf215546Sopenharmony_ci return true; 387bf215546Sopenharmony_ci } 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci return false; 390bf215546Sopenharmony_ci} 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci/* could an instruction be scheduled if specified ssa src was scheduled? */ 393bf215546Sopenharmony_cistatic bool 394bf215546Sopenharmony_cicould_sched(struct ir3_sched_ctx *ctx, 395bf215546Sopenharmony_ci struct ir3_instruction *instr, struct ir3_instruction *src) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci foreach_ssa_src (other_src, instr) { 398bf215546Sopenharmony_ci /* if dependency not scheduled, we aren't ready yet: */ 399bf215546Sopenharmony_ci if ((src != other_src) && !is_scheduled(other_src)) { 400bf215546Sopenharmony_ci return false; 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci } 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci /* Instructions not in the current block can never be scheduled. 405bf215546Sopenharmony_ci */ 406bf215546Sopenharmony_ci if (instr->block != src->block) 407bf215546Sopenharmony_ci return false; 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci return !should_skip(ctx, instr); 410bf215546Sopenharmony_ci} 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci/* Check if instruction is ok to schedule. Make sure it is not blocked 413bf215546Sopenharmony_ci * by use of addr/predicate register, etc. 414bf215546Sopenharmony_ci */ 415bf215546Sopenharmony_cistatic bool 416bf215546Sopenharmony_cicheck_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 417bf215546Sopenharmony_ci struct ir3_instruction *instr) 418bf215546Sopenharmony_ci{ 419bf215546Sopenharmony_ci assert(!is_scheduled(instr)); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci if (instr == ctx->split) { 422bf215546Sopenharmony_ci /* Don't schedule instructions created by splitting a a0.x/a1.x/p0.x 423bf215546Sopenharmony_ci * write until another "normal" instruction has been scheduled. 424bf215546Sopenharmony_ci */ 425bf215546Sopenharmony_ci return false; 426bf215546Sopenharmony_ci } 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci if (should_skip(ctx, instr)) 429bf215546Sopenharmony_ci return false; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci /* For instructions that write address register we need to 432bf215546Sopenharmony_ci * make sure there is at least one instruction that uses the 433bf215546Sopenharmony_ci * addr value which is otherwise ready. 434bf215546Sopenharmony_ci * 435bf215546Sopenharmony_ci * NOTE if any instructions use pred register and have other 436bf215546Sopenharmony_ci * src args, we would need to do the same for writes_pred().. 437bf215546Sopenharmony_ci */ 438bf215546Sopenharmony_ci if (writes_addr0(instr)) { 439bf215546Sopenharmony_ci struct ir3 *ir = instr->block->shader; 440bf215546Sopenharmony_ci bool ready = false; 441bf215546Sopenharmony_ci for (unsigned i = 0; (i < ir->a0_users_count) && !ready; i++) { 442bf215546Sopenharmony_ci struct ir3_instruction *indirect = ir->a0_users[i]; 443bf215546Sopenharmony_ci if (!indirect) 444bf215546Sopenharmony_ci continue; 445bf215546Sopenharmony_ci if (indirect->address->def != instr->dsts[0]) 446bf215546Sopenharmony_ci continue; 447bf215546Sopenharmony_ci ready = could_sched(ctx, indirect, instr); 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* nothing could be scheduled, so keep looking: */ 451bf215546Sopenharmony_ci if (!ready) 452bf215546Sopenharmony_ci return false; 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci if (writes_addr1(instr)) { 456bf215546Sopenharmony_ci struct ir3 *ir = instr->block->shader; 457bf215546Sopenharmony_ci bool ready = false; 458bf215546Sopenharmony_ci for (unsigned i = 0; (i < ir->a1_users_count) && !ready; i++) { 459bf215546Sopenharmony_ci struct ir3_instruction *indirect = ir->a1_users[i]; 460bf215546Sopenharmony_ci if (!indirect) 461bf215546Sopenharmony_ci continue; 462bf215546Sopenharmony_ci if (indirect->address->def != instr->dsts[0]) 463bf215546Sopenharmony_ci continue; 464bf215546Sopenharmony_ci ready = could_sched(ctx, indirect, instr); 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci /* nothing could be scheduled, so keep looking: */ 468bf215546Sopenharmony_ci if (!ready) 469bf215546Sopenharmony_ci return false; 470bf215546Sopenharmony_ci } 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci /* if this is a write to address/predicate register, and that 473bf215546Sopenharmony_ci * register is currently in use, we need to defer until it is 474bf215546Sopenharmony_ci * free: 475bf215546Sopenharmony_ci */ 476bf215546Sopenharmony_ci if (writes_addr0(instr) && ctx->addr0) { 477bf215546Sopenharmony_ci assert(ctx->addr0 != instr); 478bf215546Sopenharmony_ci notes->addr0_conflict = true; 479bf215546Sopenharmony_ci return false; 480bf215546Sopenharmony_ci } 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci if (writes_addr1(instr) && ctx->addr1) { 483bf215546Sopenharmony_ci assert(ctx->addr1 != instr); 484bf215546Sopenharmony_ci notes->addr1_conflict = true; 485bf215546Sopenharmony_ci return false; 486bf215546Sopenharmony_ci } 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci if (writes_pred(instr) && ctx->pred) { 489bf215546Sopenharmony_ci assert(ctx->pred != instr); 490bf215546Sopenharmony_ci notes->pred_conflict = true; 491bf215546Sopenharmony_ci return false; 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci /* if the instruction is a kill, we need to ensure *every* 495bf215546Sopenharmony_ci * bary.f is scheduled. The hw seems unhappy if the thread 496bf215546Sopenharmony_ci * gets killed before the end-input (ei) flag is hit. 497bf215546Sopenharmony_ci * 498bf215546Sopenharmony_ci * We could do this by adding each bary.f instruction as 499bf215546Sopenharmony_ci * virtual ssa src for the kill instruction. But we have 500bf215546Sopenharmony_ci * fixed length instr->srcs[]. 501bf215546Sopenharmony_ci * 502bf215546Sopenharmony_ci * TODO we could handle this by false-deps now, probably. 503bf215546Sopenharmony_ci */ 504bf215546Sopenharmony_ci if (is_kill_or_demote(instr)) { 505bf215546Sopenharmony_ci struct ir3 *ir = instr->block->shader; 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci for (unsigned i = 0; i < ir->baryfs_count; i++) { 508bf215546Sopenharmony_ci struct ir3_instruction *baryf = ir->baryfs[i]; 509bf215546Sopenharmony_ci if (baryf->flags & IR3_INSTR_UNUSED) 510bf215546Sopenharmony_ci continue; 511bf215546Sopenharmony_ci if (!is_scheduled(baryf)) { 512bf215546Sopenharmony_ci notes->blocked_kill = true; 513bf215546Sopenharmony_ci return false; 514bf215546Sopenharmony_ci } 515bf215546Sopenharmony_ci } 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci return true; 519bf215546Sopenharmony_ci} 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci/* Find the instr->ip of the closest use of an instruction, in 522bf215546Sopenharmony_ci * pre-sched order. This isn't going to be the same as post-sched 523bf215546Sopenharmony_ci * order, but it is a reasonable approximation to limit scheduling 524bf215546Sopenharmony_ci * instructions *too* early. This is mostly to prevent bad behavior 525bf215546Sopenharmony_ci * in cases where we have a large number of possible instructions 526bf215546Sopenharmony_ci * to choose, to avoid creating too much parallelism (ie. blowing 527bf215546Sopenharmony_ci * up register pressure) 528bf215546Sopenharmony_ci * 529bf215546Sopenharmony_ci * See 530bf215546Sopenharmony_ci * dEQP-GLES31.functional.atomic_counter.layout.reverse_offset.inc_dec.8_counters_5_calls_1_thread 531bf215546Sopenharmony_ci */ 532bf215546Sopenharmony_cistatic int 533bf215546Sopenharmony_cinearest_use(struct ir3_instruction *instr) 534bf215546Sopenharmony_ci{ 535bf215546Sopenharmony_ci unsigned nearest = ~0; 536bf215546Sopenharmony_ci foreach_ssa_use (use, instr) 537bf215546Sopenharmony_ci if (!is_scheduled(use)) 538bf215546Sopenharmony_ci nearest = MIN2(nearest, use->ip); 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci /* slight hack.. this heuristic tends to push bary.f's to later 541bf215546Sopenharmony_ci * in the shader, closer to their uses. But we actually would 542bf215546Sopenharmony_ci * prefer to get these scheduled earlier, to unlock varying 543bf215546Sopenharmony_ci * storage for more VS jobs: 544bf215546Sopenharmony_ci */ 545bf215546Sopenharmony_ci if (is_input(instr)) 546bf215546Sopenharmony_ci nearest /= 2; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci return nearest; 549bf215546Sopenharmony_ci} 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_cistatic bool 552bf215546Sopenharmony_ciis_only_nonscheduled_use(struct ir3_instruction *instr, 553bf215546Sopenharmony_ci struct ir3_instruction *use) 554bf215546Sopenharmony_ci{ 555bf215546Sopenharmony_ci foreach_ssa_use (other_use, instr) { 556bf215546Sopenharmony_ci if (other_use != use && !is_scheduled(other_use)) 557bf215546Sopenharmony_ci return false; 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci return true; 561bf215546Sopenharmony_ci} 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_cistatic unsigned 564bf215546Sopenharmony_cinew_regs(struct ir3_instruction *instr) 565bf215546Sopenharmony_ci{ 566bf215546Sopenharmony_ci unsigned regs = 0; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci foreach_dst (dst, instr) { 569bf215546Sopenharmony_ci if (!is_dest_gpr(dst)) 570bf215546Sopenharmony_ci continue; 571bf215546Sopenharmony_ci regs += reg_elems(dst); 572bf215546Sopenharmony_ci } 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci return regs; 575bf215546Sopenharmony_ci} 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci/* find net change to live values if instruction were scheduled: */ 578bf215546Sopenharmony_cistatic int 579bf215546Sopenharmony_cilive_effect(struct ir3_instruction *instr) 580bf215546Sopenharmony_ci{ 581bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 582bf215546Sopenharmony_ci int new_live = 583bf215546Sopenharmony_ci (n->partially_live || !instr->uses || instr->uses->entries == 0) 584bf215546Sopenharmony_ci ? 0 585bf215546Sopenharmony_ci : new_regs(instr); 586bf215546Sopenharmony_ci int freed_live = 0; 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* if we schedule something that causes a vecN to be live, 589bf215546Sopenharmony_ci * then count all it's other components too: 590bf215546Sopenharmony_ci */ 591bf215546Sopenharmony_ci if (n->collect) 592bf215546Sopenharmony_ci new_live *= n->collect->srcs_count; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci foreach_ssa_src_n (src, n, instr) { 595bf215546Sopenharmony_ci if (__is_false_dep(instr, n)) 596bf215546Sopenharmony_ci continue; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci if (instr->block != src->block) 599bf215546Sopenharmony_ci continue; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci if (is_only_nonscheduled_use(src, instr)) 602bf215546Sopenharmony_ci freed_live += new_regs(src); 603bf215546Sopenharmony_ci } 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci return new_live - freed_live; 606bf215546Sopenharmony_ci} 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci/* Determine if this is an instruction that we'd prefer not to schedule 609bf215546Sopenharmony_ci * yet, in order to avoid an (ss)/(sy) sync. This is limited by the 610bf215546Sopenharmony_ci * ss_delay/sy_delay counters, ie. the more cycles it has been since 611bf215546Sopenharmony_ci * the last SFU/tex, the less costly a sync would be, and the number of 612bf215546Sopenharmony_ci * outstanding SFU/tex instructions to prevent a blowup in register pressure. 613bf215546Sopenharmony_ci */ 614bf215546Sopenharmony_cistatic bool 615bf215546Sopenharmony_cishould_defer(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 616bf215546Sopenharmony_ci{ 617bf215546Sopenharmony_ci if (ctx->ss_delay) { 618bf215546Sopenharmony_ci if (sched_check_src_cond(instr, is_outstanding_ss, ctx)) 619bf215546Sopenharmony_ci return true; 620bf215546Sopenharmony_ci } 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci /* We mostly just want to try to schedule another texture fetch 623bf215546Sopenharmony_ci * before scheduling something that would (sy) sync, so we can 624bf215546Sopenharmony_ci * limit this rule to cases where there are remaining texture 625bf215546Sopenharmony_ci * fetches 626bf215546Sopenharmony_ci */ 627bf215546Sopenharmony_ci if (ctx->sy_delay && ctx->remaining_tex) { 628bf215546Sopenharmony_ci if (sched_check_src_cond(instr, is_outstanding_sy, ctx)) 629bf215546Sopenharmony_ci return true; 630bf215546Sopenharmony_ci } 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_ci /* Avoid scheduling too many outstanding texture or sfu instructions at 633bf215546Sopenharmony_ci * once by deferring further tex/SFU instructions. This both prevents 634bf215546Sopenharmony_ci * stalls when the queue of texture/sfu instructions becomes too large, 635bf215546Sopenharmony_ci * and prevents unacceptably large increases in register pressure from too 636bf215546Sopenharmony_ci * many outstanding texture instructions. 637bf215546Sopenharmony_ci */ 638bf215546Sopenharmony_ci if (ctx->sy_index - ctx->first_outstanding_sy_index >= 8 && is_sy_producer(instr)) 639bf215546Sopenharmony_ci return true; 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci if (ctx->ss_index - ctx->first_outstanding_ss_index >= 8 && is_ss_producer(instr)) 642bf215546Sopenharmony_ci return true; 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci return false; 645bf215546Sopenharmony_ci} 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_cistatic struct ir3_sched_node *choose_instr_inc(struct ir3_sched_ctx *ctx, 648bf215546Sopenharmony_ci struct ir3_sched_notes *notes, 649bf215546Sopenharmony_ci bool defer, bool avoid_output); 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_cienum choose_instr_dec_rank { 652bf215546Sopenharmony_ci DEC_NEUTRAL, 653bf215546Sopenharmony_ci DEC_NEUTRAL_READY, 654bf215546Sopenharmony_ci DEC_FREED, 655bf215546Sopenharmony_ci DEC_FREED_READY, 656bf215546Sopenharmony_ci}; 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_cistatic const char * 659bf215546Sopenharmony_cidec_rank_name(enum choose_instr_dec_rank rank) 660bf215546Sopenharmony_ci{ 661bf215546Sopenharmony_ci switch (rank) { 662bf215546Sopenharmony_ci case DEC_NEUTRAL: 663bf215546Sopenharmony_ci return "neutral"; 664bf215546Sopenharmony_ci case DEC_NEUTRAL_READY: 665bf215546Sopenharmony_ci return "neutral+ready"; 666bf215546Sopenharmony_ci case DEC_FREED: 667bf215546Sopenharmony_ci return "freed"; 668bf215546Sopenharmony_ci case DEC_FREED_READY: 669bf215546Sopenharmony_ci return "freed+ready"; 670bf215546Sopenharmony_ci default: 671bf215546Sopenharmony_ci return NULL; 672bf215546Sopenharmony_ci } 673bf215546Sopenharmony_ci} 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_cistatic unsigned 676bf215546Sopenharmony_cinode_delay(struct ir3_sched_ctx *ctx, struct ir3_sched_node *n) 677bf215546Sopenharmony_ci{ 678bf215546Sopenharmony_ci return MAX2(n->earliest_ip, ctx->ip) - ctx->ip; 679bf215546Sopenharmony_ci} 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci/** 682bf215546Sopenharmony_ci * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code 683bf215546Sopenharmony_ci * Scheduling for Register pressure) heuristic. 684bf215546Sopenharmony_ci * 685bf215546Sopenharmony_ci * Only handles the case of choosing instructions that reduce register pressure 686bf215546Sopenharmony_ci * or are even. 687bf215546Sopenharmony_ci */ 688bf215546Sopenharmony_cistatic struct ir3_sched_node * 689bf215546Sopenharmony_cichoose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 690bf215546Sopenharmony_ci bool defer) 691bf215546Sopenharmony_ci{ 692bf215546Sopenharmony_ci const char *mode = defer ? "-d" : ""; 693bf215546Sopenharmony_ci struct ir3_sched_node *chosen = NULL; 694bf215546Sopenharmony_ci enum choose_instr_dec_rank chosen_rank = DEC_NEUTRAL; 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 697bf215546Sopenharmony_ci if (defer && should_defer(ctx, n->instr)) 698bf215546Sopenharmony_ci continue; 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci unsigned d = node_delay(ctx, n); 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci int live = live_effect(n->instr); 703bf215546Sopenharmony_ci if (live > 0) 704bf215546Sopenharmony_ci continue; 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci if (!check_instr(ctx, notes, n->instr)) 707bf215546Sopenharmony_ci continue; 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci enum choose_instr_dec_rank rank; 710bf215546Sopenharmony_ci if (live < 0) { 711bf215546Sopenharmony_ci /* Prioritize instrs which free up regs and can be scheduled with no 712bf215546Sopenharmony_ci * delay. 713bf215546Sopenharmony_ci */ 714bf215546Sopenharmony_ci if (d == 0) 715bf215546Sopenharmony_ci rank = DEC_FREED_READY; 716bf215546Sopenharmony_ci else 717bf215546Sopenharmony_ci rank = DEC_FREED; 718bf215546Sopenharmony_ci } else { 719bf215546Sopenharmony_ci /* Contra the paper, pick a leader with no effect on used regs. This 720bf215546Sopenharmony_ci * may open up new opportunities, as otherwise a single-operand instr 721bf215546Sopenharmony_ci * consuming a value will tend to block finding freeing that value. 722bf215546Sopenharmony_ci * This had a massive effect on reducing spilling on V3D. 723bf215546Sopenharmony_ci * 724bf215546Sopenharmony_ci * XXX: Should this prioritize ready? 725bf215546Sopenharmony_ci */ 726bf215546Sopenharmony_ci if (d == 0) 727bf215546Sopenharmony_ci rank = DEC_NEUTRAL_READY; 728bf215546Sopenharmony_ci else 729bf215546Sopenharmony_ci rank = DEC_NEUTRAL; 730bf215546Sopenharmony_ci } 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci /* Prefer higher-ranked instructions, or in the case of a rank tie, the 733bf215546Sopenharmony_ci * highest latency-to-end-of-program instruction. 734bf215546Sopenharmony_ci */ 735bf215546Sopenharmony_ci if (!chosen || rank > chosen_rank || 736bf215546Sopenharmony_ci (rank == chosen_rank && chosen->max_delay < n->max_delay)) { 737bf215546Sopenharmony_ci chosen = n; 738bf215546Sopenharmony_ci chosen_rank = rank; 739bf215546Sopenharmony_ci } 740bf215546Sopenharmony_ci } 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci if (chosen) { 743bf215546Sopenharmony_ci di(chosen->instr, "dec%s: chose (%s)", mode, dec_rank_name(chosen_rank)); 744bf215546Sopenharmony_ci return chosen; 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci return choose_instr_inc(ctx, notes, defer, true); 748bf215546Sopenharmony_ci} 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_cienum choose_instr_inc_rank { 751bf215546Sopenharmony_ci INC_DISTANCE, 752bf215546Sopenharmony_ci INC_DISTANCE_READY, 753bf215546Sopenharmony_ci}; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_cistatic const char * 756bf215546Sopenharmony_ciinc_rank_name(enum choose_instr_inc_rank rank) 757bf215546Sopenharmony_ci{ 758bf215546Sopenharmony_ci switch (rank) { 759bf215546Sopenharmony_ci case INC_DISTANCE: 760bf215546Sopenharmony_ci return "distance"; 761bf215546Sopenharmony_ci case INC_DISTANCE_READY: 762bf215546Sopenharmony_ci return "distance+ready"; 763bf215546Sopenharmony_ci default: 764bf215546Sopenharmony_ci return NULL; 765bf215546Sopenharmony_ci } 766bf215546Sopenharmony_ci} 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci/** 769bf215546Sopenharmony_ci * When we can't choose an instruction that reduces register pressure or 770bf215546Sopenharmony_ci * is neutral, we end up here to try and pick the least bad option. 771bf215546Sopenharmony_ci */ 772bf215546Sopenharmony_cistatic struct ir3_sched_node * 773bf215546Sopenharmony_cichoose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 774bf215546Sopenharmony_ci bool defer, bool avoid_output) 775bf215546Sopenharmony_ci{ 776bf215546Sopenharmony_ci const char *mode = defer ? "-d" : ""; 777bf215546Sopenharmony_ci struct ir3_sched_node *chosen = NULL; 778bf215546Sopenharmony_ci enum choose_instr_inc_rank chosen_rank = INC_DISTANCE; 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci /* 781bf215546Sopenharmony_ci * From hear on out, we are picking something that increases 782bf215546Sopenharmony_ci * register pressure. So try to pick something which will 783bf215546Sopenharmony_ci * be consumed soon: 784bf215546Sopenharmony_ci */ 785bf215546Sopenharmony_ci unsigned chosen_distance = 0; 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci /* Pick the max delay of the remaining ready set. */ 788bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 789bf215546Sopenharmony_ci if (avoid_output && n->output) 790bf215546Sopenharmony_ci continue; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci if (defer && should_defer(ctx, n->instr)) 793bf215546Sopenharmony_ci continue; 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci if (!check_instr(ctx, notes, n->instr)) 796bf215546Sopenharmony_ci continue; 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci unsigned d = node_delay(ctx, n); 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci enum choose_instr_inc_rank rank; 801bf215546Sopenharmony_ci if (d == 0) 802bf215546Sopenharmony_ci rank = INC_DISTANCE_READY; 803bf215546Sopenharmony_ci else 804bf215546Sopenharmony_ci rank = INC_DISTANCE; 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci unsigned distance = nearest_use(n->instr); 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci if (!chosen || rank > chosen_rank || 809bf215546Sopenharmony_ci (rank == chosen_rank && distance < chosen_distance)) { 810bf215546Sopenharmony_ci chosen = n; 811bf215546Sopenharmony_ci chosen_distance = distance; 812bf215546Sopenharmony_ci chosen_rank = rank; 813bf215546Sopenharmony_ci } 814bf215546Sopenharmony_ci } 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci if (chosen) { 817bf215546Sopenharmony_ci di(chosen->instr, "inc%s: chose (%s)", mode, inc_rank_name(chosen_rank)); 818bf215546Sopenharmony_ci return chosen; 819bf215546Sopenharmony_ci } 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci return NULL; 822bf215546Sopenharmony_ci} 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci/* Handles instruction selections for instructions we want to prioritize 825bf215546Sopenharmony_ci * even if csp/csr would not pick them. 826bf215546Sopenharmony_ci */ 827bf215546Sopenharmony_cistatic struct ir3_sched_node * 828bf215546Sopenharmony_cichoose_instr_prio(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) 829bf215546Sopenharmony_ci{ 830bf215546Sopenharmony_ci struct ir3_sched_node *chosen = NULL; 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 833bf215546Sopenharmony_ci /* 834bf215546Sopenharmony_ci * - phi nodes and inputs must be scheduled first 835bf215546Sopenharmony_ci * - split should be scheduled first, so that the vector value is 836bf215546Sopenharmony_ci * killed as soon as possible. RA cannot split up the vector and 837bf215546Sopenharmony_ci * reuse components that have been killed until it's been killed. 838bf215546Sopenharmony_ci * - collect, on the other hand, should be treated as a "normal" 839bf215546Sopenharmony_ci * instruction, and may add to register pressure if its sources are 840bf215546Sopenharmony_ci * part of another vector or immediates. 841bf215546Sopenharmony_ci */ 842bf215546Sopenharmony_ci if (!is_meta(n->instr) || n->instr->opc == OPC_META_COLLECT) 843bf215546Sopenharmony_ci continue; 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci if (!chosen || (chosen->max_delay < n->max_delay)) 846bf215546Sopenharmony_ci chosen = n; 847bf215546Sopenharmony_ci } 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci if (chosen) { 850bf215546Sopenharmony_ci di(chosen->instr, "prio: chose (meta)"); 851bf215546Sopenharmony_ci return chosen; 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci return NULL; 855bf215546Sopenharmony_ci} 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_cistatic void 858bf215546Sopenharmony_cidump_state(struct ir3_sched_ctx *ctx) 859bf215546Sopenharmony_ci{ 860bf215546Sopenharmony_ci if (!SCHED_DEBUG) 861bf215546Sopenharmony_ci return; 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 864bf215546Sopenharmony_ci di(n->instr, "maxdel=%3d le=%d del=%u ", n->max_delay, 865bf215546Sopenharmony_ci live_effect(n->instr), node_delay(ctx, n)); 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 868bf215546Sopenharmony_ci struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci di(child->instr, " -> (%d parents) ", child->dag.parent_count); 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci } 873bf215546Sopenharmony_ci} 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci/* find instruction to schedule: */ 876bf215546Sopenharmony_cistatic struct ir3_instruction * 877bf215546Sopenharmony_cichoose_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci struct ir3_sched_node *chosen; 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci dump_state(ctx); 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci chosen = choose_instr_prio(ctx, notes); 884bf215546Sopenharmony_ci if (chosen) 885bf215546Sopenharmony_ci return chosen->instr; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci chosen = choose_instr_dec(ctx, notes, true); 888bf215546Sopenharmony_ci if (chosen) 889bf215546Sopenharmony_ci return chosen->instr; 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci chosen = choose_instr_dec(ctx, notes, false); 892bf215546Sopenharmony_ci if (chosen) 893bf215546Sopenharmony_ci return chosen->instr; 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci chosen = choose_instr_inc(ctx, notes, false, false); 896bf215546Sopenharmony_ci if (chosen) 897bf215546Sopenharmony_ci return chosen->instr; 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci return NULL; 900bf215546Sopenharmony_ci} 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_cistatic struct ir3_instruction * 903bf215546Sopenharmony_cisplit_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr) 904bf215546Sopenharmony_ci{ 905bf215546Sopenharmony_ci struct ir3_instruction *new_instr = ir3_instr_clone(orig_instr); 906bf215546Sopenharmony_ci di(new_instr, "split instruction"); 907bf215546Sopenharmony_ci sched_node_init(ctx, new_instr); 908bf215546Sopenharmony_ci return new_instr; 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci/* "spill" the address registers by remapping any unscheduled 912bf215546Sopenharmony_ci * instructions which depend on the current address register 913bf215546Sopenharmony_ci * to a clone of the instruction which wrote the address reg. 914bf215546Sopenharmony_ci */ 915bf215546Sopenharmony_cistatic struct ir3_instruction * 916bf215546Sopenharmony_cisplit_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr, 917bf215546Sopenharmony_ci struct ir3_instruction **users, unsigned users_count) 918bf215546Sopenharmony_ci{ 919bf215546Sopenharmony_ci struct ir3_instruction *new_addr = NULL; 920bf215546Sopenharmony_ci unsigned i; 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci assert(*addr); 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci for (i = 0; i < users_count; i++) { 925bf215546Sopenharmony_ci struct ir3_instruction *indirect = users[i]; 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci if (!indirect) 928bf215546Sopenharmony_ci continue; 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci /* skip instructions already scheduled: */ 931bf215546Sopenharmony_ci if (is_scheduled(indirect)) 932bf215546Sopenharmony_ci continue; 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci /* remap remaining instructions using current addr 935bf215546Sopenharmony_ci * to new addr: 936bf215546Sopenharmony_ci */ 937bf215546Sopenharmony_ci if (indirect->address->def == (*addr)->dsts[0]) { 938bf215546Sopenharmony_ci if (!new_addr) { 939bf215546Sopenharmony_ci new_addr = split_instr(ctx, *addr); 940bf215546Sopenharmony_ci /* original addr is scheduled, but new one isn't: */ 941bf215546Sopenharmony_ci new_addr->flags &= ~IR3_INSTR_MARK; 942bf215546Sopenharmony_ci } 943bf215546Sopenharmony_ci indirect->address->def = new_addr->dsts[0]; 944bf215546Sopenharmony_ci /* don't need to remove old dag edge since old addr is 945bf215546Sopenharmony_ci * already scheduled: 946bf215546Sopenharmony_ci */ 947bf215546Sopenharmony_ci sched_node_add_dep(indirect, new_addr, 0); 948bf215546Sopenharmony_ci di(indirect, "new address"); 949bf215546Sopenharmony_ci } 950bf215546Sopenharmony_ci } 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci /* all remaining indirects remapped to new addr: */ 953bf215546Sopenharmony_ci *addr = NULL; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci return new_addr; 956bf215546Sopenharmony_ci} 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci/* "spill" the predicate register by remapping any unscheduled 959bf215546Sopenharmony_ci * instructions which depend on the current predicate register 960bf215546Sopenharmony_ci * to a clone of the instruction which wrote the address reg. 961bf215546Sopenharmony_ci */ 962bf215546Sopenharmony_cistatic struct ir3_instruction * 963bf215546Sopenharmony_cisplit_pred(struct ir3_sched_ctx *ctx) 964bf215546Sopenharmony_ci{ 965bf215546Sopenharmony_ci struct ir3 *ir; 966bf215546Sopenharmony_ci struct ir3_instruction *new_pred = NULL; 967bf215546Sopenharmony_ci unsigned i; 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci assert(ctx->pred); 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci ir = ctx->pred->block->shader; 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci for (i = 0; i < ir->predicates_count; i++) { 974bf215546Sopenharmony_ci struct ir3_instruction *predicated = ir->predicates[i]; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci if (!predicated) 977bf215546Sopenharmony_ci continue; 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci /* skip instructions already scheduled: */ 980bf215546Sopenharmony_ci if (is_scheduled(predicated)) 981bf215546Sopenharmony_ci continue; 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci /* remap remaining instructions using current pred 984bf215546Sopenharmony_ci * to new pred: 985bf215546Sopenharmony_ci * 986bf215546Sopenharmony_ci * TODO is there ever a case when pred isn't first 987bf215546Sopenharmony_ci * (and only) src? 988bf215546Sopenharmony_ci */ 989bf215546Sopenharmony_ci if (ssa(predicated->srcs[0]) == ctx->pred) { 990bf215546Sopenharmony_ci if (!new_pred) { 991bf215546Sopenharmony_ci new_pred = split_instr(ctx, ctx->pred); 992bf215546Sopenharmony_ci /* original pred is scheduled, but new one isn't: */ 993bf215546Sopenharmony_ci new_pred->flags &= ~IR3_INSTR_MARK; 994bf215546Sopenharmony_ci } 995bf215546Sopenharmony_ci predicated->srcs[0]->def->instr = new_pred; 996bf215546Sopenharmony_ci /* don't need to remove old dag edge since old pred is 997bf215546Sopenharmony_ci * already scheduled: 998bf215546Sopenharmony_ci */ 999bf215546Sopenharmony_ci sched_node_add_dep(predicated, new_pred, 0); 1000bf215546Sopenharmony_ci di(predicated, "new predicate"); 1001bf215546Sopenharmony_ci } 1002bf215546Sopenharmony_ci } 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (ctx->block->condition == ctx->pred) { 1005bf215546Sopenharmony_ci if (!new_pred) { 1006bf215546Sopenharmony_ci new_pred = split_instr(ctx, ctx->pred); 1007bf215546Sopenharmony_ci /* original pred is scheduled, but new one isn't: */ 1008bf215546Sopenharmony_ci new_pred->flags &= ~IR3_INSTR_MARK; 1009bf215546Sopenharmony_ci } 1010bf215546Sopenharmony_ci ctx->block->condition = new_pred; 1011bf215546Sopenharmony_ci d("new branch condition"); 1012bf215546Sopenharmony_ci } 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci /* all remaining predicated remapped to new pred: */ 1015bf215546Sopenharmony_ci ctx->pred = NULL; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci return new_pred; 1018bf215546Sopenharmony_ci} 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_cistatic void 1021bf215546Sopenharmony_cisched_node_init(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 1022bf215546Sopenharmony_ci{ 1023bf215546Sopenharmony_ci struct ir3_sched_node *n = rzalloc(ctx->dag, struct ir3_sched_node); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci dag_init_node(ctx->dag, &n->dag); 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci n->instr = instr; 1028bf215546Sopenharmony_ci instr->data = n; 1029bf215546Sopenharmony_ci} 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_cistatic void 1032bf215546Sopenharmony_cisched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, 1033bf215546Sopenharmony_ci int i) 1034bf215546Sopenharmony_ci{ 1035bf215546Sopenharmony_ci /* don't consider dependencies in other blocks: */ 1036bf215546Sopenharmony_ci if (src->block != instr->block) 1037bf215546Sopenharmony_ci return; 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci /* we could have false-dep's that end up unused: */ 1040bf215546Sopenharmony_ci if (src->flags & IR3_INSTR_UNUSED) { 1041bf215546Sopenharmony_ci assert(__is_false_dep(instr, i)); 1042bf215546Sopenharmony_ci return; 1043bf215546Sopenharmony_ci } 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 1046bf215546Sopenharmony_ci struct ir3_sched_node *sn = src->data; 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci /* If src is consumed by a collect, track that to realize that once 1049bf215546Sopenharmony_ci * any of the collect srcs are live, we should hurry up and schedule 1050bf215546Sopenharmony_ci * the rest. 1051bf215546Sopenharmony_ci */ 1052bf215546Sopenharmony_ci if (instr->opc == OPC_META_COLLECT) 1053bf215546Sopenharmony_ci sn->collect = instr; 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci unsigned d_soft = ir3_delayslots(src, instr, i, true); 1056bf215546Sopenharmony_ci unsigned d = ir3_delayslots(src, instr, i, false); 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci /* delays from (ss) and (sy) are considered separately and more accurately in 1059bf215546Sopenharmony_ci * the scheduling heuristic, so ignore it when calculating the ip of 1060bf215546Sopenharmony_ci * instructions, but do consider it when prioritizing which instructions to 1061bf215546Sopenharmony_ci * schedule. 1062bf215546Sopenharmony_ci */ 1063bf215546Sopenharmony_ci dag_add_edge_max_data(&sn->dag, &n->dag, (uintptr_t)d); 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci n->delay = MAX2(n->delay, d_soft); 1066bf215546Sopenharmony_ci} 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_cistatic void 1069bf215546Sopenharmony_cimark_kill_path(struct ir3_instruction *instr) 1070bf215546Sopenharmony_ci{ 1071bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci if (n->kill_path) { 1074bf215546Sopenharmony_ci return; 1075bf215546Sopenharmony_ci } 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci n->kill_path = true; 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci foreach_ssa_src (src, instr) { 1080bf215546Sopenharmony_ci if (src->block != instr->block) 1081bf215546Sopenharmony_ci continue; 1082bf215546Sopenharmony_ci mark_kill_path(src); 1083bf215546Sopenharmony_ci } 1084bf215546Sopenharmony_ci} 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci/* Is it an output? */ 1087bf215546Sopenharmony_cistatic bool 1088bf215546Sopenharmony_ciis_output_collect(struct ir3_instruction *instr) 1089bf215546Sopenharmony_ci{ 1090bf215546Sopenharmony_ci if (instr->opc != OPC_META_COLLECT) 1091bf215546Sopenharmony_ci return false; 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_ci foreach_ssa_use (use, instr) { 1094bf215546Sopenharmony_ci if (use->opc != OPC_END && use->opc != OPC_CHMASK) 1095bf215546Sopenharmony_ci return false; 1096bf215546Sopenharmony_ci } 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci return true; 1099bf215546Sopenharmony_ci} 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci/* Is it's only use as output? */ 1102bf215546Sopenharmony_cistatic bool 1103bf215546Sopenharmony_ciis_output_only(struct ir3_instruction *instr) 1104bf215546Sopenharmony_ci{ 1105bf215546Sopenharmony_ci foreach_ssa_use (use, instr) 1106bf215546Sopenharmony_ci if (!is_output_collect(use)) 1107bf215546Sopenharmony_ci return false; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci return true; 1110bf215546Sopenharmony_ci} 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_cistatic void 1113bf215546Sopenharmony_cisched_node_add_deps(struct ir3_instruction *instr) 1114bf215546Sopenharmony_ci{ 1115bf215546Sopenharmony_ci /* There's nothing to do for phi nodes, since they always go first. And 1116bf215546Sopenharmony_ci * phi nodes can reference sources later in the same block, so handling 1117bf215546Sopenharmony_ci * sources is not only unnecessary but could cause problems. 1118bf215546Sopenharmony_ci */ 1119bf215546Sopenharmony_ci if (instr->opc == OPC_META_PHI) 1120bf215546Sopenharmony_ci return; 1121bf215546Sopenharmony_ci 1122bf215546Sopenharmony_ci /* Since foreach_ssa_src() already handles false-dep's we can construct 1123bf215546Sopenharmony_ci * the DAG easily in a single pass. 1124bf215546Sopenharmony_ci */ 1125bf215546Sopenharmony_ci foreach_ssa_src_n (src, i, instr) { 1126bf215546Sopenharmony_ci sched_node_add_dep(instr, src, i); 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci /* NOTE that all inputs must be scheduled before a kill, so 1130bf215546Sopenharmony_ci * mark these to be prioritized as well: 1131bf215546Sopenharmony_ci */ 1132bf215546Sopenharmony_ci if (is_kill_or_demote(instr) || is_input(instr)) { 1133bf215546Sopenharmony_ci mark_kill_path(instr); 1134bf215546Sopenharmony_ci } 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci if (is_output_only(instr)) { 1137bf215546Sopenharmony_ci struct ir3_sched_node *n = instr->data; 1138bf215546Sopenharmony_ci n->output = true; 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci} 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_cistatic void 1143bf215546Sopenharmony_cisched_dag_max_delay_cb(struct dag_node *node, void *state) 1144bf215546Sopenharmony_ci{ 1145bf215546Sopenharmony_ci struct ir3_sched_node *n = (struct ir3_sched_node *)node; 1146bf215546Sopenharmony_ci uint32_t max_delay = 0; 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 1149bf215546Sopenharmony_ci struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child; 1150bf215546Sopenharmony_ci max_delay = MAX2(child->max_delay, max_delay); 1151bf215546Sopenharmony_ci } 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci n->max_delay = MAX2(n->max_delay, max_delay + n->delay); 1154bf215546Sopenharmony_ci} 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_cistatic void 1157bf215546Sopenharmony_cisched_dag_init(struct ir3_sched_ctx *ctx) 1158bf215546Sopenharmony_ci{ 1159bf215546Sopenharmony_ci ctx->dag = dag_create(ctx); 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci foreach_instr (instr, &ctx->unscheduled_list) 1162bf215546Sopenharmony_ci sched_node_init(ctx, instr); 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci foreach_instr (instr, &ctx->unscheduled_list) 1165bf215546Sopenharmony_ci sched_node_add_deps(instr); 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL); 1168bf215546Sopenharmony_ci} 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_cistatic void 1171bf215546Sopenharmony_cisched_dag_destroy(struct ir3_sched_ctx *ctx) 1172bf215546Sopenharmony_ci{ 1173bf215546Sopenharmony_ci ralloc_free(ctx->dag); 1174bf215546Sopenharmony_ci ctx->dag = NULL; 1175bf215546Sopenharmony_ci} 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_cistatic void 1178bf215546Sopenharmony_cisched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) 1179bf215546Sopenharmony_ci{ 1180bf215546Sopenharmony_ci ctx->block = block; 1181bf215546Sopenharmony_ci 1182bf215546Sopenharmony_ci /* addr/pred writes are per-block: */ 1183bf215546Sopenharmony_ci ctx->addr0 = NULL; 1184bf215546Sopenharmony_ci ctx->addr1 = NULL; 1185bf215546Sopenharmony_ci ctx->pred = NULL; 1186bf215546Sopenharmony_ci ctx->sy_delay = 0; 1187bf215546Sopenharmony_ci ctx->ss_delay = 0; 1188bf215546Sopenharmony_ci ctx->sy_index = ctx->first_outstanding_sy_index = 0; 1189bf215546Sopenharmony_ci ctx->ss_index = ctx->first_outstanding_ss_index = 0; 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci /* move all instructions to the unscheduled list, and 1192bf215546Sopenharmony_ci * empty the block's instruction list (to which we will 1193bf215546Sopenharmony_ci * be inserting). 1194bf215546Sopenharmony_ci */ 1195bf215546Sopenharmony_ci list_replace(&block->instr_list, &ctx->unscheduled_list); 1196bf215546Sopenharmony_ci list_inithead(&block->instr_list); 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci sched_dag_init(ctx); 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_ci ctx->remaining_kills = 0; 1201bf215546Sopenharmony_ci ctx->remaining_tex = 0; 1202bf215546Sopenharmony_ci foreach_instr_safe (instr, &ctx->unscheduled_list) { 1203bf215546Sopenharmony_ci if (is_kill_or_demote(instr)) 1204bf215546Sopenharmony_ci ctx->remaining_kills++; 1205bf215546Sopenharmony_ci if (is_sy_producer(instr)) 1206bf215546Sopenharmony_ci ctx->remaining_tex++; 1207bf215546Sopenharmony_ci } 1208bf215546Sopenharmony_ci 1209bf215546Sopenharmony_ci /* First schedule all meta:input and meta:phi instructions, followed by 1210bf215546Sopenharmony_ci * tex-prefetch. We want all of the instructions that load values into 1211bf215546Sopenharmony_ci * registers before the shader starts to go before any other instructions. 1212bf215546Sopenharmony_ci * But in particular we want inputs to come before prefetches. This is 1213bf215546Sopenharmony_ci * because a FS's bary_ij input may not actually be live in the shader, 1214bf215546Sopenharmony_ci * but it should not be scheduled on top of any other input (but can be 1215bf215546Sopenharmony_ci * overwritten by a tex prefetch) 1216bf215546Sopenharmony_ci * 1217bf215546Sopenharmony_ci * Note: Because the first block cannot have predecessors, meta:input and 1218bf215546Sopenharmony_ci * meta:phi cannot exist in the same block. 1219bf215546Sopenharmony_ci */ 1220bf215546Sopenharmony_ci foreach_instr_safe (instr, &ctx->unscheduled_list) 1221bf215546Sopenharmony_ci if (instr->opc == OPC_META_INPUT || instr->opc == OPC_META_PHI) 1222bf215546Sopenharmony_ci schedule(ctx, instr); 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci foreach_instr_safe (instr, &ctx->unscheduled_list) 1225bf215546Sopenharmony_ci if (instr->opc == OPC_META_TEX_PREFETCH) 1226bf215546Sopenharmony_ci schedule(ctx, instr); 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci while (!list_is_empty(&ctx->unscheduled_list)) { 1229bf215546Sopenharmony_ci struct ir3_sched_notes notes = {0}; 1230bf215546Sopenharmony_ci struct ir3_instruction *instr; 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci instr = choose_instr(ctx, ¬es); 1233bf215546Sopenharmony_ci if (instr) { 1234bf215546Sopenharmony_ci unsigned delay = node_delay(ctx, instr->data); 1235bf215546Sopenharmony_ci d("delay=%u", delay); 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci assert(delay <= 6); 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci schedule(ctx, instr); 1240bf215546Sopenharmony_ci 1241bf215546Sopenharmony_ci /* Since we've scheduled a "real" instruction, we can now 1242bf215546Sopenharmony_ci * schedule any split instruction created by the scheduler again. 1243bf215546Sopenharmony_ci */ 1244bf215546Sopenharmony_ci ctx->split = NULL; 1245bf215546Sopenharmony_ci } else { 1246bf215546Sopenharmony_ci struct ir3_instruction *new_instr = NULL; 1247bf215546Sopenharmony_ci struct ir3 *ir = block->shader; 1248bf215546Sopenharmony_ci 1249bf215546Sopenharmony_ci /* nothing available to schedule.. if we are blocked on 1250bf215546Sopenharmony_ci * address/predicate register conflict, then break the 1251bf215546Sopenharmony_ci * deadlock by cloning the instruction that wrote that 1252bf215546Sopenharmony_ci * reg: 1253bf215546Sopenharmony_ci */ 1254bf215546Sopenharmony_ci if (notes.addr0_conflict) { 1255bf215546Sopenharmony_ci new_instr = 1256bf215546Sopenharmony_ci split_addr(ctx, &ctx->addr0, ir->a0_users, ir->a0_users_count); 1257bf215546Sopenharmony_ci } else if (notes.addr1_conflict) { 1258bf215546Sopenharmony_ci new_instr = 1259bf215546Sopenharmony_ci split_addr(ctx, &ctx->addr1, ir->a1_users, ir->a1_users_count); 1260bf215546Sopenharmony_ci } else if (notes.pred_conflict) { 1261bf215546Sopenharmony_ci new_instr = split_pred(ctx); 1262bf215546Sopenharmony_ci } else { 1263bf215546Sopenharmony_ci d("unscheduled_list:"); 1264bf215546Sopenharmony_ci foreach_instr (instr, &ctx->unscheduled_list) 1265bf215546Sopenharmony_ci di(instr, "unscheduled: "); 1266bf215546Sopenharmony_ci assert(0); 1267bf215546Sopenharmony_ci ctx->error = true; 1268bf215546Sopenharmony_ci return; 1269bf215546Sopenharmony_ci } 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci if (new_instr) { 1272bf215546Sopenharmony_ci list_delinit(&new_instr->node); 1273bf215546Sopenharmony_ci list_addtail(&new_instr->node, &ctx->unscheduled_list); 1274bf215546Sopenharmony_ci } 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci /* If we produced a new instruction, do not schedule it next to 1277bf215546Sopenharmony_ci * guarantee progress. 1278bf215546Sopenharmony_ci */ 1279bf215546Sopenharmony_ci ctx->split = new_instr; 1280bf215546Sopenharmony_ci } 1281bf215546Sopenharmony_ci } 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_ci sched_dag_destroy(ctx); 1284bf215546Sopenharmony_ci} 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ciint 1287bf215546Sopenharmony_ciir3_sched(struct ir3 *ir) 1288bf215546Sopenharmony_ci{ 1289bf215546Sopenharmony_ci struct ir3_sched_ctx *ctx = rzalloc(NULL, struct ir3_sched_ctx); 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1292bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1293bf215546Sopenharmony_ci instr->data = NULL; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci } 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci ir3_count_instructions(ir); 1298bf215546Sopenharmony_ci ir3_clear_mark(ir); 1299bf215546Sopenharmony_ci ir3_find_ssa_uses(ir, ctx, false); 1300bf215546Sopenharmony_ci 1301bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1302bf215546Sopenharmony_ci sched_block(ctx, block); 1303bf215546Sopenharmony_ci } 1304bf215546Sopenharmony_ci 1305bf215546Sopenharmony_ci int ret = ctx->error ? -1 : 0; 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_ci ralloc_free(ctx); 1308bf215546Sopenharmony_ci 1309bf215546Sopenharmony_ci return ret; 1310bf215546Sopenharmony_ci} 1311bf215546Sopenharmony_ci 1312bf215546Sopenharmony_cistatic unsigned 1313bf215546Sopenharmony_ciget_array_id(struct ir3_instruction *instr) 1314bf215546Sopenharmony_ci{ 1315bf215546Sopenharmony_ci /* The expectation is that there is only a single array 1316bf215546Sopenharmony_ci * src or dst, ir3_cp should enforce this. 1317bf215546Sopenharmony_ci */ 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_ci foreach_dst (dst, instr) 1320bf215546Sopenharmony_ci if (dst->flags & IR3_REG_ARRAY) 1321bf215546Sopenharmony_ci return dst->array.id; 1322bf215546Sopenharmony_ci foreach_src (src, instr) 1323bf215546Sopenharmony_ci if (src->flags & IR3_REG_ARRAY) 1324bf215546Sopenharmony_ci return src->array.id; 1325bf215546Sopenharmony_ci 1326bf215546Sopenharmony_ci unreachable("this was unexpected"); 1327bf215546Sopenharmony_ci} 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci/* does instruction 'prior' need to be scheduled before 'instr'? */ 1330bf215546Sopenharmony_cistatic bool 1331bf215546Sopenharmony_cidepends_on(struct ir3_instruction *instr, struct ir3_instruction *prior) 1332bf215546Sopenharmony_ci{ 1333bf215546Sopenharmony_ci /* TODO for dependencies that are related to a specific object, ie 1334bf215546Sopenharmony_ci * a specific SSBO/image/array, we could relax this constraint to 1335bf215546Sopenharmony_ci * make accesses to unrelated objects not depend on each other (at 1336bf215546Sopenharmony_ci * least as long as not declared coherent) 1337bf215546Sopenharmony_ci */ 1338bf215546Sopenharmony_ci if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && 1339bf215546Sopenharmony_ci prior->barrier_class) || 1340bf215546Sopenharmony_ci ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && 1341bf215546Sopenharmony_ci instr->barrier_class)) 1342bf215546Sopenharmony_ci return true; 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci if (instr->barrier_class & prior->barrier_conflict) { 1345bf215546Sopenharmony_ci if (!(instr->barrier_class & 1346bf215546Sopenharmony_ci ~(IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W))) { 1347bf215546Sopenharmony_ci /* if only array barrier, then we can further limit false-deps 1348bf215546Sopenharmony_ci * by considering the array-id, ie reads/writes to different 1349bf215546Sopenharmony_ci * arrays do not depend on each other (no aliasing) 1350bf215546Sopenharmony_ci */ 1351bf215546Sopenharmony_ci if (get_array_id(instr) != get_array_id(prior)) { 1352bf215546Sopenharmony_ci return false; 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci } 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci return true; 1357bf215546Sopenharmony_ci } 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci return false; 1360bf215546Sopenharmony_ci} 1361bf215546Sopenharmony_ci 1362bf215546Sopenharmony_cistatic void 1363bf215546Sopenharmony_ciadd_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr) 1364bf215546Sopenharmony_ci{ 1365bf215546Sopenharmony_ci struct list_head *prev = instr->node.prev; 1366bf215546Sopenharmony_ci struct list_head *next = instr->node.next; 1367bf215546Sopenharmony_ci 1368bf215546Sopenharmony_ci /* add dependencies on previous instructions that must be scheduled 1369bf215546Sopenharmony_ci * prior to the current instruction 1370bf215546Sopenharmony_ci */ 1371bf215546Sopenharmony_ci while (prev != &block->instr_list) { 1372bf215546Sopenharmony_ci struct ir3_instruction *pi = 1373bf215546Sopenharmony_ci list_entry(prev, struct ir3_instruction, node); 1374bf215546Sopenharmony_ci 1375bf215546Sopenharmony_ci prev = prev->prev; 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci if (is_meta(pi)) 1378bf215546Sopenharmony_ci continue; 1379bf215546Sopenharmony_ci 1380bf215546Sopenharmony_ci if (instr->barrier_class == pi->barrier_class) { 1381bf215546Sopenharmony_ci ir3_instr_add_dep(instr, pi); 1382bf215546Sopenharmony_ci break; 1383bf215546Sopenharmony_ci } 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci if (depends_on(instr, pi)) 1386bf215546Sopenharmony_ci ir3_instr_add_dep(instr, pi); 1387bf215546Sopenharmony_ci } 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci /* add dependencies on this instruction to following instructions 1390bf215546Sopenharmony_ci * that must be scheduled after the current instruction: 1391bf215546Sopenharmony_ci */ 1392bf215546Sopenharmony_ci while (next != &block->instr_list) { 1393bf215546Sopenharmony_ci struct ir3_instruction *ni = 1394bf215546Sopenharmony_ci list_entry(next, struct ir3_instruction, node); 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_ci next = next->next; 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci if (is_meta(ni)) 1399bf215546Sopenharmony_ci continue; 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci if (instr->barrier_class == ni->barrier_class) { 1402bf215546Sopenharmony_ci ir3_instr_add_dep(ni, instr); 1403bf215546Sopenharmony_ci break; 1404bf215546Sopenharmony_ci } 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_ci if (depends_on(ni, instr)) 1407bf215546Sopenharmony_ci ir3_instr_add_dep(ni, instr); 1408bf215546Sopenharmony_ci } 1409bf215546Sopenharmony_ci} 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci/* before scheduling a block, we need to add any necessary false-dependencies 1412bf215546Sopenharmony_ci * to ensure that: 1413bf215546Sopenharmony_ci * 1414bf215546Sopenharmony_ci * (1) barriers are scheduled in the right order wrt instructions related 1415bf215546Sopenharmony_ci * to the barrier 1416bf215546Sopenharmony_ci * 1417bf215546Sopenharmony_ci * (2) reads that come before a write actually get scheduled before the 1418bf215546Sopenharmony_ci * write 1419bf215546Sopenharmony_ci */ 1420bf215546Sopenharmony_cibool 1421bf215546Sopenharmony_ciir3_sched_add_deps(struct ir3 *ir) 1422bf215546Sopenharmony_ci{ 1423bf215546Sopenharmony_ci bool progress = false; 1424bf215546Sopenharmony_ci 1425bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1426bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1427bf215546Sopenharmony_ci if (instr->barrier_class) { 1428bf215546Sopenharmony_ci add_barrier_deps(block, instr); 1429bf215546Sopenharmony_ci progress = true; 1430bf215546Sopenharmony_ci } 1431bf215546Sopenharmony_ci } 1432bf215546Sopenharmony_ci } 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci return progress; 1435bf215546Sopenharmony_ci} 1436