1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2019 Google, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/dag.h" 28bf215546Sopenharmony_ci#include "util/u_math.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "ir3.h" 31bf215546Sopenharmony_ci#include "ir3_compiler.h" 32bf215546Sopenharmony_ci#include "ir3_context.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#ifdef DEBUG 35bf215546Sopenharmony_ci#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS) 36bf215546Sopenharmony_ci#else 37bf215546Sopenharmony_ci#define SCHED_DEBUG 0 38bf215546Sopenharmony_ci#endif 39bf215546Sopenharmony_ci#define d(fmt, ...) \ 40bf215546Sopenharmony_ci do { \ 41bf215546Sopenharmony_ci if (SCHED_DEBUG) { \ 42bf215546Sopenharmony_ci mesa_logi("PSCHED: " fmt, ##__VA_ARGS__); \ 43bf215546Sopenharmony_ci } \ 44bf215546Sopenharmony_ci } while (0) 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci#define di(instr, fmt, ...) \ 47bf215546Sopenharmony_ci do { \ 48bf215546Sopenharmony_ci if (SCHED_DEBUG) { \ 49bf215546Sopenharmony_ci struct log_stream *stream = mesa_log_streami(); \ 50bf215546Sopenharmony_ci mesa_log_stream_printf(stream, "PSCHED: " fmt ": ", ##__VA_ARGS__); \ 51bf215546Sopenharmony_ci ir3_print_instr_stream(stream, instr); \ 52bf215546Sopenharmony_ci mesa_log_stream_destroy(stream); \ 53bf215546Sopenharmony_ci } \ 54bf215546Sopenharmony_ci } while (0) 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci/* 57bf215546Sopenharmony_ci * Post RA Instruction Scheduling 58bf215546Sopenharmony_ci */ 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistruct ir3_postsched_ctx { 61bf215546Sopenharmony_ci struct ir3 *ir; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci struct ir3_shader_variant *v; 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci void *mem_ctx; 66bf215546Sopenharmony_ci struct ir3_block *block; /* the current block */ 67bf215546Sopenharmony_ci struct dag *dag; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci struct list_head unscheduled_list; /* unscheduled instructions */ 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci unsigned ip; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci int ss_delay; 74bf215546Sopenharmony_ci int sy_delay; 75bf215546Sopenharmony_ci}; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistruct ir3_postsched_node { 78bf215546Sopenharmony_ci struct dag_node dag; /* must be first for util_dynarray_foreach */ 79bf215546Sopenharmony_ci struct ir3_instruction *instr; 80bf215546Sopenharmony_ci bool partially_evaluated_path; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci unsigned earliest_ip; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci bool has_sy_src, has_ss_src; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci unsigned delay; 87bf215546Sopenharmony_ci unsigned max_delay; 88bf215546Sopenharmony_ci}; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci#define foreach_sched_node(__n, __list) \ 91bf215546Sopenharmony_ci list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link) 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_cistatic bool 94bf215546Sopenharmony_cihas_sy_src(struct ir3_instruction *instr) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci struct ir3_postsched_node *node = instr->data; 97bf215546Sopenharmony_ci return node->has_sy_src; 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_cistatic bool 101bf215546Sopenharmony_cihas_ss_src(struct ir3_instruction *instr) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci struct ir3_postsched_node *node = instr->data; 104bf215546Sopenharmony_ci return node->has_ss_src; 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic void 108bf215546Sopenharmony_cischedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci assert(ctx->block == instr->block); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci /* remove from unscheduled_list: 113bf215546Sopenharmony_ci */ 114bf215546Sopenharmony_ci list_delinit(&instr->node); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci di(instr, "schedule"); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci bool counts_for_delay = is_alu(instr) || is_flow(instr); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci struct ir3_postsched_node *n = instr->data; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci /* We insert any nop's needed to get to earliest_ip, then advance 125bf215546Sopenharmony_ci * delay_cycles by scheduling the instruction. 126bf215546Sopenharmony_ci */ 127bf215546Sopenharmony_ci ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 130bf215546Sopenharmony_ci unsigned delay = (unsigned)(uintptr_t)edge->data; 131bf215546Sopenharmony_ci struct ir3_postsched_node *child = 132bf215546Sopenharmony_ci container_of(edge->child, struct ir3_postsched_node, dag); 133bf215546Sopenharmony_ci child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay); 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci list_addtail(&instr->node, &instr->block->instr_list); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci dag_prune_head(ctx->dag, &n->dag); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH)) 141bf215546Sopenharmony_ci return; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci if (is_ss_producer(instr)) { 144bf215546Sopenharmony_ci ctx->ss_delay = soft_ss_delay(instr); 145bf215546Sopenharmony_ci } else if (has_ss_src(instr)) { 146bf215546Sopenharmony_ci ctx->ss_delay = 0; 147bf215546Sopenharmony_ci } else if (ctx->ss_delay > 0) { 148bf215546Sopenharmony_ci ctx->ss_delay--; 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci if (is_sy_producer(instr)) { 152bf215546Sopenharmony_ci ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader); 153bf215546Sopenharmony_ci } else if (has_sy_src(instr)) { 154bf215546Sopenharmony_ci ctx->sy_delay = 0; 155bf215546Sopenharmony_ci } else if (ctx->sy_delay > 0) { 156bf215546Sopenharmony_ci ctx->sy_delay--; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci} 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_cistatic void 161bf215546Sopenharmony_cidump_state(struct ir3_postsched_ctx *ctx) 162bf215546Sopenharmony_ci{ 163bf215546Sopenharmony_ci if (!SCHED_DEBUG) 164bf215546Sopenharmony_ci return; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 167bf215546Sopenharmony_ci di(n->instr, "maxdel=%3d ", n->max_delay); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 170bf215546Sopenharmony_ci struct ir3_postsched_node *child = 171bf215546Sopenharmony_ci (struct ir3_postsched_node *)edge->child; 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci di(child->instr, " -> (%d parents) ", child->dag.parent_count); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci} 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_cistatic unsigned 179bf215546Sopenharmony_cinode_delay(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n) 180bf215546Sopenharmony_ci{ 181bf215546Sopenharmony_ci return MAX2(n->earliest_ip, ctx->ip) - ctx->ip; 182bf215546Sopenharmony_ci} 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_cistatic unsigned 185bf215546Sopenharmony_cinode_delay_soft(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n) 186bf215546Sopenharmony_ci{ 187bf215546Sopenharmony_ci unsigned delay = node_delay(ctx, n); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci /* This takes into account that as when we schedule multiple tex or sfu, the 190bf215546Sopenharmony_ci * first user has to wait for all of them to complete. 191bf215546Sopenharmony_ci */ 192bf215546Sopenharmony_ci if (n->has_ss_src) 193bf215546Sopenharmony_ci delay = MAX2(delay, ctx->ss_delay); 194bf215546Sopenharmony_ci if (n->has_sy_src) 195bf215546Sopenharmony_ci delay = MAX2(delay, ctx->sy_delay); 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci return delay; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci/* find instruction to schedule: */ 201bf215546Sopenharmony_cistatic struct ir3_instruction * 202bf215546Sopenharmony_cichoose_instr(struct ir3_postsched_ctx *ctx) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci struct ir3_postsched_node *chosen = NULL; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci dump_state(ctx); 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 209bf215546Sopenharmony_ci if (!is_meta(n->instr)) 210bf215546Sopenharmony_ci continue; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci if (!chosen || (chosen->max_delay < n->max_delay)) 213bf215546Sopenharmony_ci chosen = n; 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci if (chosen) { 217bf215546Sopenharmony_ci di(chosen->instr, "prio: chose (meta)"); 218bf215546Sopenharmony_ci return chosen->instr; 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci /* Try to schedule inputs with a higher priority, if possible, as 222bf215546Sopenharmony_ci * the last bary.f unlocks varying storage to unblock more VS 223bf215546Sopenharmony_ci * warps. 224bf215546Sopenharmony_ci */ 225bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 226bf215546Sopenharmony_ci if (!is_input(n->instr)) 227bf215546Sopenharmony_ci continue; 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci if (!chosen || (chosen->max_delay < n->max_delay)) 230bf215546Sopenharmony_ci chosen = n; 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci if (chosen) { 234bf215546Sopenharmony_ci di(chosen->instr, "prio: chose (input)"); 235bf215546Sopenharmony_ci return chosen->instr; 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci /* Next prioritize discards: */ 239bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 240bf215546Sopenharmony_ci unsigned d = node_delay(ctx, n); 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci if (d > 0) 243bf215546Sopenharmony_ci continue; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci if (!is_kill_or_demote(n->instr)) 246bf215546Sopenharmony_ci continue; 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci if (!chosen || (chosen->max_delay < n->max_delay)) 249bf215546Sopenharmony_ci chosen = n; 250bf215546Sopenharmony_ci } 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci if (chosen) { 253bf215546Sopenharmony_ci di(chosen->instr, "csp: chose (kill, hard ready)"); 254bf215546Sopenharmony_ci return chosen->instr; 255bf215546Sopenharmony_ci } 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci /* Next prioritize expensive instructions: */ 258bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 259bf215546Sopenharmony_ci unsigned d = node_delay_soft(ctx, n); 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci if (d > 0) 262bf215546Sopenharmony_ci continue; 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci if (!(is_ss_producer(n->instr) || is_sy_producer(n->instr))) 265bf215546Sopenharmony_ci continue; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci if (!chosen || (chosen->max_delay < n->max_delay)) 268bf215546Sopenharmony_ci chosen = n; 269bf215546Sopenharmony_ci } 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci if (chosen) { 272bf215546Sopenharmony_ci di(chosen->instr, "csp: chose (sfu/tex, soft ready)"); 273bf215546Sopenharmony_ci return chosen->instr; 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci /* Next try to find a ready leader w/ soft delay (ie. including extra 277bf215546Sopenharmony_ci * delay for things like tex fetch which can be synchronized w/ sync 278bf215546Sopenharmony_ci * bit (but we probably do want to schedule some other instructions 279bf215546Sopenharmony_ci * while we wait). We also allow a small amount of nops, to prefer now-nops 280bf215546Sopenharmony_ci * over future-nops up to a point, as that gives better results. 281bf215546Sopenharmony_ci */ 282bf215546Sopenharmony_ci unsigned chosen_delay = 0; 283bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 284bf215546Sopenharmony_ci unsigned d = node_delay_soft(ctx, n); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci if (d > 3) 287bf215546Sopenharmony_ci continue; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci if (!chosen || d < chosen_delay) { 290bf215546Sopenharmony_ci chosen = n; 291bf215546Sopenharmony_ci chosen_delay = d; 292bf215546Sopenharmony_ci continue; 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci if (d > chosen_delay) 296bf215546Sopenharmony_ci continue; 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci if (chosen->max_delay < n->max_delay) { 299bf215546Sopenharmony_ci chosen = n; 300bf215546Sopenharmony_ci chosen_delay = d; 301bf215546Sopenharmony_ci } 302bf215546Sopenharmony_ci } 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci if (chosen) { 305bf215546Sopenharmony_ci di(chosen->instr, "csp: chose (soft ready)"); 306bf215546Sopenharmony_ci return chosen->instr; 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci /* Next try to find a ready leader that can be scheduled without nop's, 310bf215546Sopenharmony_ci * which in the case of things that need (sy)/(ss) could result in 311bf215546Sopenharmony_ci * stalls.. but we've already decided there is not a better option. 312bf215546Sopenharmony_ci */ 313bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 314bf215546Sopenharmony_ci unsigned d = node_delay(ctx, n); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci if (d > 0) 317bf215546Sopenharmony_ci continue; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci if (!chosen || (chosen->max_delay < n->max_delay)) 320bf215546Sopenharmony_ci chosen = n; 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci if (chosen) { 324bf215546Sopenharmony_ci di(chosen->instr, "csp: chose (hard ready)"); 325bf215546Sopenharmony_ci return chosen->instr; 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* Otherwise choose leader with maximum cost: 329bf215546Sopenharmony_ci */ 330bf215546Sopenharmony_ci foreach_sched_node (n, &ctx->dag->heads) { 331bf215546Sopenharmony_ci if (!chosen || chosen->max_delay < n->max_delay) 332bf215546Sopenharmony_ci chosen = n; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (chosen) { 336bf215546Sopenharmony_ci di(chosen->instr, "csp: chose (leader)"); 337bf215546Sopenharmony_ci return chosen->instr; 338bf215546Sopenharmony_ci } 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci return NULL; 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cistruct ir3_postsched_deps_state { 344bf215546Sopenharmony_ci struct ir3_postsched_ctx *ctx; 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci enum { F, R } direction; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci bool merged; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci /* Track the mapping between sched node (instruction) that last 351bf215546Sopenharmony_ci * wrote a given register (in whichever direction we are iterating 352bf215546Sopenharmony_ci * the block) 353bf215546Sopenharmony_ci * 354bf215546Sopenharmony_ci * Note, this table is twice as big as the # of regs, to deal with 355bf215546Sopenharmony_ci * half-precision regs. The approach differs depending on whether 356bf215546Sopenharmony_ci * the half and full precision register files are "merged" (conflict, 357bf215546Sopenharmony_ci * ie. a6xx+) in which case we consider each full precision dep 358bf215546Sopenharmony_ci * as two half-precision dependencies, vs older separate (non- 359bf215546Sopenharmony_ci * conflicting) in which case the first half of the table is used 360bf215546Sopenharmony_ci * for full precision and 2nd half for half-precision. 361bf215546Sopenharmony_ci */ 362bf215546Sopenharmony_ci struct ir3_postsched_node *regs[2 * 256]; 363bf215546Sopenharmony_ci unsigned dst_n[2 * 256]; 364bf215546Sopenharmony_ci}; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci/* bounds checking read/write accessors, since OoB access to stuff on 367bf215546Sopenharmony_ci * the stack is gonna cause a bad day. 368bf215546Sopenharmony_ci */ 369bf215546Sopenharmony_ci#define dep_reg(state, idx) \ 370bf215546Sopenharmony_ci *({ \ 371bf215546Sopenharmony_ci assert((idx) < ARRAY_SIZE((state)->regs)); \ 372bf215546Sopenharmony_ci &(state)->regs[(idx)]; \ 373bf215546Sopenharmony_ci }) 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_cistatic void 376bf215546Sopenharmony_ciadd_dep(struct ir3_postsched_deps_state *state, 377bf215546Sopenharmony_ci struct ir3_postsched_node *before, struct ir3_postsched_node *after, 378bf215546Sopenharmony_ci unsigned d) 379bf215546Sopenharmony_ci{ 380bf215546Sopenharmony_ci if (!before || !after) 381bf215546Sopenharmony_ci return; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci assert(before != after); 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci if (state->direction == F) { 386bf215546Sopenharmony_ci dag_add_edge_max_data(&before->dag, &after->dag, (uintptr_t)d); 387bf215546Sopenharmony_ci } else { 388bf215546Sopenharmony_ci dag_add_edge_max_data(&after->dag, &before->dag, 0); 389bf215546Sopenharmony_ci } 390bf215546Sopenharmony_ci} 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_cistatic void 393bf215546Sopenharmony_ciadd_single_reg_dep(struct ir3_postsched_deps_state *state, 394bf215546Sopenharmony_ci struct ir3_postsched_node *node, unsigned num, int src_n, 395bf215546Sopenharmony_ci int dst_n) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci struct ir3_postsched_node *dep = dep_reg(state, num); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci unsigned d = 0; 400bf215546Sopenharmony_ci if (src_n >= 0 && dep && state->direction == F) { 401bf215546Sopenharmony_ci /* get the dst_n this corresponds to */ 402bf215546Sopenharmony_ci unsigned dst_n = state->dst_n[num]; 403bf215546Sopenharmony_ci unsigned d_soft = ir3_delayslots(dep->instr, node->instr, src_n, true); 404bf215546Sopenharmony_ci d = ir3_delayslots_with_repeat(dep->instr, node->instr, dst_n, src_n); 405bf215546Sopenharmony_ci node->delay = MAX2(node->delay, d_soft); 406bf215546Sopenharmony_ci if (is_sy_producer(dep->instr)) 407bf215546Sopenharmony_ci node->has_sy_src = true; 408bf215546Sopenharmony_ci if (is_ss_producer(dep->instr)) 409bf215546Sopenharmony_ci node->has_ss_src = true; 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci add_dep(state, dep, node, d); 413bf215546Sopenharmony_ci if (src_n < 0) { 414bf215546Sopenharmony_ci dep_reg(state, num) = node; 415bf215546Sopenharmony_ci state->dst_n[num] = dst_n; 416bf215546Sopenharmony_ci } 417bf215546Sopenharmony_ci} 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci/* This is where we handled full vs half-precision, and potential conflicts 420bf215546Sopenharmony_ci * between half and full precision that result in additional dependencies. 421bf215546Sopenharmony_ci * The 'reg' arg is really just to know half vs full precision. 422bf215546Sopenharmony_ci * 423bf215546Sopenharmony_ci * If src_n is positive, then this adds a dependency on a source register, and 424bf215546Sopenharmony_ci * src_n is the index passed into ir3_delayslots() for calculating the delay: 425bf215546Sopenharmony_ci * it corresponds to node->instr->srcs[src_n]. If src_n is negative, then 426bf215546Sopenharmony_ci * this is for the destination register corresponding to dst_n. 427bf215546Sopenharmony_ci */ 428bf215546Sopenharmony_cistatic void 429bf215546Sopenharmony_ciadd_reg_dep(struct ir3_postsched_deps_state *state, 430bf215546Sopenharmony_ci struct ir3_postsched_node *node, const struct ir3_register *reg, 431bf215546Sopenharmony_ci unsigned num, int src_n, int dst_n) 432bf215546Sopenharmony_ci{ 433bf215546Sopenharmony_ci if (state->merged) { 434bf215546Sopenharmony_ci /* Make sure that special registers like a0.x that are written as 435bf215546Sopenharmony_ci * half-registers don't alias random full registers by pretending that 436bf215546Sopenharmony_ci * they're full registers: 437bf215546Sopenharmony_ci */ 438bf215546Sopenharmony_ci if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) { 439bf215546Sopenharmony_ci /* single conflict in half-reg space: */ 440bf215546Sopenharmony_ci add_single_reg_dep(state, node, num, src_n, dst_n); 441bf215546Sopenharmony_ci } else { 442bf215546Sopenharmony_ci /* two conflicts in half-reg space: */ 443bf215546Sopenharmony_ci add_single_reg_dep(state, node, 2 * num + 0, src_n, dst_n); 444bf215546Sopenharmony_ci add_single_reg_dep(state, node, 2 * num + 1, src_n, dst_n); 445bf215546Sopenharmony_ci } 446bf215546Sopenharmony_ci } else { 447bf215546Sopenharmony_ci if (reg->flags & IR3_REG_HALF) 448bf215546Sopenharmony_ci num += ARRAY_SIZE(state->regs) / 2; 449bf215546Sopenharmony_ci add_single_reg_dep(state, node, num, src_n, dst_n); 450bf215546Sopenharmony_ci } 451bf215546Sopenharmony_ci} 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_cistatic void 454bf215546Sopenharmony_cicalculate_deps(struct ir3_postsched_deps_state *state, 455bf215546Sopenharmony_ci struct ir3_postsched_node *node) 456bf215546Sopenharmony_ci{ 457bf215546Sopenharmony_ci /* Add dependencies on instructions that previously (or next, 458bf215546Sopenharmony_ci * in the reverse direction) wrote any of our src registers: 459bf215546Sopenharmony_ci */ 460bf215546Sopenharmony_ci foreach_src_n (reg, i, node->instr) { 461bf215546Sopenharmony_ci if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) 462bf215546Sopenharmony_ci continue; 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci if (reg->flags & IR3_REG_RELATIV) { 465bf215546Sopenharmony_ci /* mark entire array as read: */ 466bf215546Sopenharmony_ci for (unsigned j = 0; j < reg->size; j++) { 467bf215546Sopenharmony_ci add_reg_dep(state, node, reg, reg->array.base + j, i, -1); 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci } else { 470bf215546Sopenharmony_ci assert(reg->wrmask >= 1); 471bf215546Sopenharmony_ci u_foreach_bit (b, reg->wrmask) { 472bf215546Sopenharmony_ci add_reg_dep(state, node, reg, reg->num + b, i, -1); 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci } 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci /* And then after we update the state for what this instruction 478bf215546Sopenharmony_ci * wrote: 479bf215546Sopenharmony_ci */ 480bf215546Sopenharmony_ci foreach_dst_n (reg, i, node->instr) { 481bf215546Sopenharmony_ci if (reg->wrmask == 0) 482bf215546Sopenharmony_ci continue; 483bf215546Sopenharmony_ci if (reg->flags & IR3_REG_RELATIV) { 484bf215546Sopenharmony_ci /* mark the entire array as written: */ 485bf215546Sopenharmony_ci for (unsigned j = 0; j < reg->size; j++) { 486bf215546Sopenharmony_ci add_reg_dep(state, node, reg, reg->array.base + j, -1, i); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci } else { 489bf215546Sopenharmony_ci assert(reg->wrmask >= 1); 490bf215546Sopenharmony_ci u_foreach_bit (b, reg->wrmask) { 491bf215546Sopenharmony_ci add_reg_dep(state, node, reg, reg->num + b, -1, i); 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci } 495bf215546Sopenharmony_ci} 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_cistatic void 498bf215546Sopenharmony_cicalculate_forward_deps(struct ir3_postsched_ctx *ctx) 499bf215546Sopenharmony_ci{ 500bf215546Sopenharmony_ci struct ir3_postsched_deps_state state = { 501bf215546Sopenharmony_ci .ctx = ctx, 502bf215546Sopenharmony_ci .direction = F, 503bf215546Sopenharmony_ci .merged = ctx->v->mergedregs, 504bf215546Sopenharmony_ci }; 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci foreach_instr (instr, &ctx->unscheduled_list) { 507bf215546Sopenharmony_ci calculate_deps(&state, instr->data); 508bf215546Sopenharmony_ci } 509bf215546Sopenharmony_ci} 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_cistatic void 512bf215546Sopenharmony_cicalculate_reverse_deps(struct ir3_postsched_ctx *ctx) 513bf215546Sopenharmony_ci{ 514bf215546Sopenharmony_ci struct ir3_postsched_deps_state state = { 515bf215546Sopenharmony_ci .ctx = ctx, 516bf215546Sopenharmony_ci .direction = R, 517bf215546Sopenharmony_ci .merged = ctx->v->mergedregs, 518bf215546Sopenharmony_ci }; 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci foreach_instr_rev (instr, &ctx->unscheduled_list) { 521bf215546Sopenharmony_ci calculate_deps(&state, instr->data); 522bf215546Sopenharmony_ci } 523bf215546Sopenharmony_ci} 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_cistatic void 526bf215546Sopenharmony_cisched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr) 527bf215546Sopenharmony_ci{ 528bf215546Sopenharmony_ci struct ir3_postsched_node *n = 529bf215546Sopenharmony_ci rzalloc(ctx->mem_ctx, struct ir3_postsched_node); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci dag_init_node(ctx->dag, &n->dag); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci n->instr = instr; 534bf215546Sopenharmony_ci instr->data = n; 535bf215546Sopenharmony_ci} 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_cistatic void 538bf215546Sopenharmony_cisched_dag_max_delay_cb(struct dag_node *node, void *state) 539bf215546Sopenharmony_ci{ 540bf215546Sopenharmony_ci struct ir3_postsched_node *n = (struct ir3_postsched_node *)node; 541bf215546Sopenharmony_ci uint32_t max_delay = 0; 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 544bf215546Sopenharmony_ci struct ir3_postsched_node *child = 545bf215546Sopenharmony_ci (struct ir3_postsched_node *)edge->child; 546bf215546Sopenharmony_ci max_delay = MAX2(child->max_delay, max_delay); 547bf215546Sopenharmony_ci } 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci n->max_delay = MAX2(n->max_delay, max_delay + n->delay); 550bf215546Sopenharmony_ci} 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_cistatic void 553bf215546Sopenharmony_cisched_dag_init(struct ir3_postsched_ctx *ctx) 554bf215546Sopenharmony_ci{ 555bf215546Sopenharmony_ci ctx->mem_ctx = ralloc_context(NULL); 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci ctx->dag = dag_create(ctx->mem_ctx); 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci foreach_instr (instr, &ctx->unscheduled_list) 560bf215546Sopenharmony_ci sched_node_init(ctx, instr); 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci calculate_forward_deps(ctx); 563bf215546Sopenharmony_ci calculate_reverse_deps(ctx); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci /* 566bf215546Sopenharmony_ci * To avoid expensive texture fetches, etc, from being moved ahead 567bf215546Sopenharmony_ci * of kills, track the kills we've seen so far, so we can add an 568bf215546Sopenharmony_ci * extra dependency on them for tex/mem instructions 569bf215546Sopenharmony_ci */ 570bf215546Sopenharmony_ci struct util_dynarray kills; 571bf215546Sopenharmony_ci util_dynarray_init(&kills, ctx->mem_ctx); 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci /* The last bary.f with the (ei) flag must be scheduled before any kills, 574bf215546Sopenharmony_ci * or the hw gets angry. Keep track of inputs here so we can add the 575bf215546Sopenharmony_ci * false dep on the kill instruction. 576bf215546Sopenharmony_ci */ 577bf215546Sopenharmony_ci struct util_dynarray inputs; 578bf215546Sopenharmony_ci util_dynarray_init(&inputs, ctx->mem_ctx); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci /* 581bf215546Sopenharmony_ci * Normal srcs won't be in SSA at this point, those are dealt with in 582bf215546Sopenharmony_ci * calculate_forward_deps() and calculate_reverse_deps(). But we still 583bf215546Sopenharmony_ci * have the false-dep information in SSA form, so go ahead and add 584bf215546Sopenharmony_ci * dependencies for that here: 585bf215546Sopenharmony_ci */ 586bf215546Sopenharmony_ci foreach_instr (instr, &ctx->unscheduled_list) { 587bf215546Sopenharmony_ci struct ir3_postsched_node *n = instr->data; 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci foreach_ssa_src_n (src, i, instr) { 590bf215546Sopenharmony_ci if (src->block != instr->block) 591bf215546Sopenharmony_ci continue; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci /* we can end up with unused false-deps.. just skip them: */ 594bf215546Sopenharmony_ci if (src->flags & IR3_INSTR_UNUSED) 595bf215546Sopenharmony_ci continue; 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci struct ir3_postsched_node *sn = src->data; 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci /* don't consider dependencies in other blocks: */ 600bf215546Sopenharmony_ci if (src->block != instr->block) 601bf215546Sopenharmony_ci continue; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci dag_add_edge_max_data(&sn->dag, &n->dag, 0); 604bf215546Sopenharmony_ci } 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci if (is_input(instr)) { 607bf215546Sopenharmony_ci util_dynarray_append(&inputs, struct ir3_instruction *, instr); 608bf215546Sopenharmony_ci } else if (is_kill_or_demote(instr)) { 609bf215546Sopenharmony_ci util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) { 610bf215546Sopenharmony_ci struct ir3_instruction *input = *instrp; 611bf215546Sopenharmony_ci struct ir3_postsched_node *in = input->data; 612bf215546Sopenharmony_ci dag_add_edge_max_data(&in->dag, &n->dag, 0); 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci util_dynarray_append(&kills, struct ir3_instruction *, instr); 615bf215546Sopenharmony_ci } else if (is_tex(instr) || is_mem(instr)) { 616bf215546Sopenharmony_ci util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) { 617bf215546Sopenharmony_ci struct ir3_instruction *kill = *instrp; 618bf215546Sopenharmony_ci struct ir3_postsched_node *kn = kill->data; 619bf215546Sopenharmony_ci dag_add_edge_max_data(&kn->dag, &n->dag, 0); 620bf215546Sopenharmony_ci } 621bf215546Sopenharmony_ci } 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci // TODO do we want to do this after reverse-dependencies? 625bf215546Sopenharmony_ci dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL); 626bf215546Sopenharmony_ci} 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_cistatic void 629bf215546Sopenharmony_cisched_dag_destroy(struct ir3_postsched_ctx *ctx) 630bf215546Sopenharmony_ci{ 631bf215546Sopenharmony_ci ralloc_free(ctx->mem_ctx); 632bf215546Sopenharmony_ci ctx->mem_ctx = NULL; 633bf215546Sopenharmony_ci ctx->dag = NULL; 634bf215546Sopenharmony_ci} 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_cistatic void 637bf215546Sopenharmony_cisched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block) 638bf215546Sopenharmony_ci{ 639bf215546Sopenharmony_ci ctx->block = block; 640bf215546Sopenharmony_ci ctx->sy_delay = 0; 641bf215546Sopenharmony_ci ctx->ss_delay = 0; 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci /* move all instructions to the unscheduled list, and 644bf215546Sopenharmony_ci * empty the block's instruction list (to which we will 645bf215546Sopenharmony_ci * be inserting). 646bf215546Sopenharmony_ci */ 647bf215546Sopenharmony_ci list_replace(&block->instr_list, &ctx->unscheduled_list); 648bf215546Sopenharmony_ci list_inithead(&block->instr_list); 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci // TODO once we are using post-sched for everything we can 651bf215546Sopenharmony_ci // just not stick in NOP's prior to post-sched, and drop this. 652bf215546Sopenharmony_ci // for now keep this, since it makes post-sched optional: 653bf215546Sopenharmony_ci foreach_instr_safe (instr, &ctx->unscheduled_list) { 654bf215546Sopenharmony_ci switch (instr->opc) { 655bf215546Sopenharmony_ci case OPC_NOP: 656bf215546Sopenharmony_ci case OPC_B: 657bf215546Sopenharmony_ci case OPC_JUMP: 658bf215546Sopenharmony_ci list_delinit(&instr->node); 659bf215546Sopenharmony_ci break; 660bf215546Sopenharmony_ci default: 661bf215546Sopenharmony_ci break; 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci } 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci sched_dag_init(ctx); 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci /* First schedule all meta:input instructions, followed by 668bf215546Sopenharmony_ci * tex-prefetch. We want all of the instructions that load 669bf215546Sopenharmony_ci * values into registers before the shader starts to go 670bf215546Sopenharmony_ci * before any other instructions. But in particular we 671bf215546Sopenharmony_ci * want inputs to come before prefetches. This is because 672bf215546Sopenharmony_ci * a FS's bary_ij input may not actually be live in the 673bf215546Sopenharmony_ci * shader, but it should not be scheduled on top of any 674bf215546Sopenharmony_ci * other input (but can be overwritten by a tex prefetch) 675bf215546Sopenharmony_ci */ 676bf215546Sopenharmony_ci foreach_instr_safe (instr, &ctx->unscheduled_list) 677bf215546Sopenharmony_ci if (instr->opc == OPC_META_INPUT) 678bf215546Sopenharmony_ci schedule(ctx, instr); 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci foreach_instr_safe (instr, &ctx->unscheduled_list) 681bf215546Sopenharmony_ci if (instr->opc == OPC_META_TEX_PREFETCH) 682bf215546Sopenharmony_ci schedule(ctx, instr); 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci while (!list_is_empty(&ctx->unscheduled_list)) { 685bf215546Sopenharmony_ci struct ir3_instruction *instr = choose_instr(ctx); 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci unsigned delay = node_delay(ctx, instr->data); 688bf215546Sopenharmony_ci d("delay=%u", delay); 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci assert(delay <= 6); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci schedule(ctx, instr); 693bf215546Sopenharmony_ci } 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci sched_dag_destroy(ctx); 696bf215546Sopenharmony_ci} 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_cistatic bool 699bf215546Sopenharmony_ciis_self_mov(struct ir3_instruction *instr) 700bf215546Sopenharmony_ci{ 701bf215546Sopenharmony_ci if (!is_same_type_mov(instr)) 702bf215546Sopenharmony_ci return false; 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci if (instr->dsts[0]->num != instr->srcs[0]->num) 705bf215546Sopenharmony_ci return false; 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci if (instr->dsts[0]->flags & IR3_REG_RELATIV) 708bf215546Sopenharmony_ci return false; 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci if (instr->cat1.round != ROUND_ZERO) 711bf215546Sopenharmony_ci return false; 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci if (instr->srcs[0]->flags & 714bf215546Sopenharmony_ci (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG | 715bf215546Sopenharmony_ci IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) 716bf215546Sopenharmony_ci return false; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci return true; 719bf215546Sopenharmony_ci} 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci/* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y 722bf215546Sopenharmony_ci * as a result of places were before RA we are not sure that it is 723bf215546Sopenharmony_ci * safe to eliminate. We could eliminate these earlier, but sometimes 724bf215546Sopenharmony_ci * they are tangled up in false-dep's, etc, so it is easier just to 725bf215546Sopenharmony_ci * let them exist until after RA 726bf215546Sopenharmony_ci */ 727bf215546Sopenharmony_cistatic void 728bf215546Sopenharmony_cicleanup_self_movs(struct ir3 *ir) 729bf215546Sopenharmony_ci{ 730bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 731bf215546Sopenharmony_ci foreach_instr_safe (instr, &block->instr_list) { 732bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->deps_count; i++) { 733bf215546Sopenharmony_ci if (instr->deps[i] && is_self_mov(instr->deps[i])) { 734bf215546Sopenharmony_ci instr->deps[i] = NULL; 735bf215546Sopenharmony_ci } 736bf215546Sopenharmony_ci } 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci if (is_self_mov(instr)) 739bf215546Sopenharmony_ci list_delinit(&instr->node); 740bf215546Sopenharmony_ci } 741bf215546Sopenharmony_ci } 742bf215546Sopenharmony_ci} 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_cibool 745bf215546Sopenharmony_ciir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v) 746bf215546Sopenharmony_ci{ 747bf215546Sopenharmony_ci struct ir3_postsched_ctx ctx = { 748bf215546Sopenharmony_ci .ir = ir, 749bf215546Sopenharmony_ci .v = v, 750bf215546Sopenharmony_ci }; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci cleanup_self_movs(ir); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 755bf215546Sopenharmony_ci sched_block(&ctx, block); 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci return true; 759bf215546Sopenharmony_ci} 760