1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2022 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors (Collabora):
24bf215546Sopenharmony_ci *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/* Bottom-up local scheduler to reduce register pressure */
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "compiler.h"
30bf215546Sopenharmony_ci#include "util/dag.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistruct sched_ctx {
33bf215546Sopenharmony_ci        /* Dependency graph */
34bf215546Sopenharmony_ci        struct dag *dag;
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci        /* Live set */
37bf215546Sopenharmony_ci        uint8_t *live;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci        /* Size of the live set */
40bf215546Sopenharmony_ci        unsigned max;
41bf215546Sopenharmony_ci};
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_cistruct sched_node {
44bf215546Sopenharmony_ci        struct dag_node dag;
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci        /* Instruction this node represents */
47bf215546Sopenharmony_ci        bi_instr *instr;
48bf215546Sopenharmony_ci};
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistatic unsigned
51bf215546Sopenharmony_cilabel_index(bi_context *ctx, bi_index idx)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci        if (idx.reg) {
54bf215546Sopenharmony_ci                assert(idx.value < ctx->reg_alloc);
55bf215546Sopenharmony_ci                return idx.value + ctx->ssa_alloc;
56bf215546Sopenharmony_ci        } else {
57bf215546Sopenharmony_ci                assert(idx.value < ctx->ssa_alloc);
58bf215546Sopenharmony_ci                return idx.value;
59bf215546Sopenharmony_ci        }
60bf215546Sopenharmony_ci}
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_cistatic void
63bf215546Sopenharmony_ciadd_dep(struct sched_node *a, struct sched_node *b)
64bf215546Sopenharmony_ci{
65bf215546Sopenharmony_ci        if (a && b)
66bf215546Sopenharmony_ci                dag_add_edge(&a->dag, &b->dag, 0);
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_cistatic struct dag *
70bf215546Sopenharmony_cicreate_dag(bi_context *ctx, bi_block *block, void *memctx)
71bf215546Sopenharmony_ci{
72bf215546Sopenharmony_ci        struct dag *dag = dag_create(ctx);
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci        unsigned count = ctx->ssa_alloc + ctx->reg_alloc;
75bf215546Sopenharmony_ci        struct sched_node **last_read =
76bf215546Sopenharmony_ci                calloc(count, sizeof(struct sched_node *));
77bf215546Sopenharmony_ci        struct sched_node **last_write =
78bf215546Sopenharmony_ci                calloc(count, sizeof(struct sched_node *));
79bf215546Sopenharmony_ci        struct sched_node *coverage = NULL;
80bf215546Sopenharmony_ci        struct sched_node *preload = NULL;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci        /* Last memory load, to serialize stores against */
83bf215546Sopenharmony_ci        struct sched_node *memory_load = NULL;
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci        /* Last memory store, to serialize loads and stores against */
86bf215546Sopenharmony_ci        struct sched_node *memory_store = NULL;
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci        bi_foreach_instr_in_block(block, I) {
89bf215546Sopenharmony_ci                /* Leave branches at the end */
90bf215546Sopenharmony_ci                if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
91bf215546Sopenharmony_ci                        break;
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci                assert(I->branch_target == NULL);
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci                struct sched_node *node = rzalloc(memctx, struct sched_node);
96bf215546Sopenharmony_ci                node->instr = I;
97bf215546Sopenharmony_ci                dag_init_node(dag, &node->dag);
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci                /* Reads depend on writes */
100bf215546Sopenharmony_ci                bi_foreach_src(I, s) {
101bf215546Sopenharmony_ci                        bi_index src = I->src[s];
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci                        if (src.type == BI_INDEX_NORMAL) {
104bf215546Sopenharmony_ci                                add_dep(node, last_write[label_index(ctx, src)]);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci                                /* Serialize access to nir_register for
107bf215546Sopenharmony_ci                                 * simplicity. We could do better.
108bf215546Sopenharmony_ci                                 */
109bf215546Sopenharmony_ci                                if (src.reg)
110bf215546Sopenharmony_ci                                        add_dep(node, last_read[label_index(ctx, src)]);
111bf215546Sopenharmony_ci                        }
112bf215546Sopenharmony_ci                }
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci                /* Writes depend on reads and writes */
115bf215546Sopenharmony_ci                bi_foreach_dest(I, s) {
116bf215546Sopenharmony_ci                        bi_index dest = I->dest[s];
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci                        if (dest.type == BI_INDEX_NORMAL) {
119bf215546Sopenharmony_ci                                add_dep(node, last_read[label_index(ctx, dest)]);
120bf215546Sopenharmony_ci                                add_dep(node, last_write[label_index(ctx, dest)]);
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci                                last_write[label_index(ctx, dest)] = node;
123bf215546Sopenharmony_ci                        }
124bf215546Sopenharmony_ci                }
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci                bi_foreach_src(I, s) {
127bf215546Sopenharmony_ci                        bi_index src = I->src[s];
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci                        if (src.type == BI_INDEX_NORMAL) {
130bf215546Sopenharmony_ci                                last_read[label_index(ctx, src)] = node;
131bf215546Sopenharmony_ci                        }
132bf215546Sopenharmony_ci                }
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci                switch (bi_opcode_props[I->op].message) {
135bf215546Sopenharmony_ci                case BIFROST_MESSAGE_LOAD:
136bf215546Sopenharmony_ci                        /* Regular memory loads needs to be serialized against
137bf215546Sopenharmony_ci                         * other memory access. However, UBO memory is read-only
138bf215546Sopenharmony_ci                         * so it can be moved around freely.
139bf215546Sopenharmony_ci                         */
140bf215546Sopenharmony_ci                        if (I->seg != BI_SEG_UBO) {
141bf215546Sopenharmony_ci                                add_dep(node, memory_store);
142bf215546Sopenharmony_ci                                memory_load = node;
143bf215546Sopenharmony_ci                        }
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci                        break;
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci                case BIFROST_MESSAGE_ATTRIBUTE:
148bf215546Sopenharmony_ci                        /* Regular attribute loads can be reordered, but
149bf215546Sopenharmony_ci                         * writeable attributes can't be. Our one use of
150bf215546Sopenharmony_ci                         * writeable attributes are images.
151bf215546Sopenharmony_ci                         */
152bf215546Sopenharmony_ci                        if ((I->op == BI_OPCODE_LD_TEX) ||
153bf215546Sopenharmony_ci                            (I->op == BI_OPCODE_LD_TEX_IMM) ||
154bf215546Sopenharmony_ci                            (I->op == BI_OPCODE_LD_ATTR_TEX)) {
155bf215546Sopenharmony_ci                                add_dep(node, memory_store);
156bf215546Sopenharmony_ci                                memory_load = node;
157bf215546Sopenharmony_ci                        }
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci                        break;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci                case BIFROST_MESSAGE_STORE:
162bf215546Sopenharmony_ci                        assert(I->seg != BI_SEG_UBO);
163bf215546Sopenharmony_ci                        add_dep(node, memory_load);
164bf215546Sopenharmony_ci                        add_dep(node, memory_store);
165bf215546Sopenharmony_ci                        memory_store = node;
166bf215546Sopenharmony_ci                        break;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci                case BIFROST_MESSAGE_ATOMIC:
169bf215546Sopenharmony_ci                case BIFROST_MESSAGE_BARRIER:
170bf215546Sopenharmony_ci                        add_dep(node, memory_load);
171bf215546Sopenharmony_ci                        add_dep(node, memory_store);
172bf215546Sopenharmony_ci                        memory_load = node;
173bf215546Sopenharmony_ci                        memory_store = node;
174bf215546Sopenharmony_ci                        break;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci                case BIFROST_MESSAGE_BLEND:
177bf215546Sopenharmony_ci                case BIFROST_MESSAGE_Z_STENCIL:
178bf215546Sopenharmony_ci                case BIFROST_MESSAGE_TILE:
179bf215546Sopenharmony_ci                        add_dep(node, coverage);
180bf215546Sopenharmony_ci                        coverage = node;
181bf215546Sopenharmony_ci                        break;
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci                case BIFROST_MESSAGE_ATEST:
184bf215546Sopenharmony_ci                        /* ATEST signals the end of shader side effects */
185bf215546Sopenharmony_ci                        add_dep(node, memory_store);
186bf215546Sopenharmony_ci                        memory_store = node;
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci                        /* ATEST also updates coverage */
189bf215546Sopenharmony_ci                        add_dep(node, coverage);
190bf215546Sopenharmony_ci                        coverage = node;
191bf215546Sopenharmony_ci                        break;
192bf215546Sopenharmony_ci                default:
193bf215546Sopenharmony_ci                        break;
194bf215546Sopenharmony_ci                }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci                add_dep(node, preload);
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci                if (I->op == BI_OPCODE_DISCARD_F32) {
199bf215546Sopenharmony_ci                        /* Serialize against ATEST */
200bf215546Sopenharmony_ci                        add_dep(node, coverage);
201bf215546Sopenharmony_ci                        coverage = node;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci                        /* Also serialize against memory and barriers */
204bf215546Sopenharmony_ci                        add_dep(node, memory_load);
205bf215546Sopenharmony_ci                        add_dep(node, memory_store);
206bf215546Sopenharmony_ci                        memory_load = node;
207bf215546Sopenharmony_ci                        memory_store = node;
208bf215546Sopenharmony_ci                } else if (I->op == BI_OPCODE_MOV_I32 && I->src[0].type == BI_INDEX_REGISTER) {
209bf215546Sopenharmony_ci                        preload = node;
210bf215546Sopenharmony_ci                }
211bf215546Sopenharmony_ci        }
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci        free(last_read);
214bf215546Sopenharmony_ci        free(last_write);
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci        return dag;
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci/*
220bf215546Sopenharmony_ci * Calculate the change in register pressure from scheduling a given
221bf215546Sopenharmony_ci * instruction. Equivalently, calculate the difference in the number of live
222bf215546Sopenharmony_ci * registers before and after the instruction, given the live set after the
223bf215546Sopenharmony_ci * instruction. This calculation follows immediately from the dataflow
224bf215546Sopenharmony_ci * definition of liveness:
225bf215546Sopenharmony_ci *
226bf215546Sopenharmony_ci *      live_in = (live_out - KILL) + GEN
227bf215546Sopenharmony_ci */
228bf215546Sopenharmony_cistatic signed
229bf215546Sopenharmony_cicalculate_pressure_delta(bi_instr *I, uint8_t *live, unsigned max)
230bf215546Sopenharmony_ci{
231bf215546Sopenharmony_ci        signed delta = 0;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci        /* Destinations must be unique */
234bf215546Sopenharmony_ci        bi_foreach_dest(I, d) {
235bf215546Sopenharmony_ci                unsigned node = bi_get_node(I->dest[d]);
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci                if (node < max && live[node])
238bf215546Sopenharmony_ci                        delta -= bi_count_write_registers(I, d);
239bf215546Sopenharmony_ci        }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci        bi_foreach_src(I, src) {
242bf215546Sopenharmony_ci                unsigned node = bi_get_node(I->src[src]);
243bf215546Sopenharmony_ci                if (node >= max)
244bf215546Sopenharmony_ci                        continue;
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci                /* Filter duplicates */
247bf215546Sopenharmony_ci                bool dupe = false;
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci                for (unsigned i = 0; i < src; ++i) {
250bf215546Sopenharmony_ci                        if (bi_get_node(I->src[i]) == node) {
251bf215546Sopenharmony_ci                                dupe = true;
252bf215546Sopenharmony_ci                                break;
253bf215546Sopenharmony_ci                        }
254bf215546Sopenharmony_ci                }
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci                if (!dupe && !live[node])
257bf215546Sopenharmony_ci                        delta += bi_count_read_registers(I, src);
258bf215546Sopenharmony_ci        }
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci        return delta;
261bf215546Sopenharmony_ci}
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci/*
264bf215546Sopenharmony_ci * Choose the next instruction, bottom-up. For now we use a simple greedy
265bf215546Sopenharmony_ci * heuristic: choose the instruction that has the best effect on liveness.
266bf215546Sopenharmony_ci */
267bf215546Sopenharmony_cistatic struct sched_node *
268bf215546Sopenharmony_cichoose_instr(struct sched_ctx *s)
269bf215546Sopenharmony_ci{
270bf215546Sopenharmony_ci        int32_t min_delta = INT32_MAX;
271bf215546Sopenharmony_ci        struct sched_node *best = NULL;
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci        list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
274bf215546Sopenharmony_ci                int32_t delta = calculate_pressure_delta(n->instr, s->live, s->max);
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci                if (delta < min_delta) {
277bf215546Sopenharmony_ci                        best = n;
278bf215546Sopenharmony_ci                        min_delta = delta;
279bf215546Sopenharmony_ci                }
280bf215546Sopenharmony_ci        }
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci        return best;
283bf215546Sopenharmony_ci}
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_cistatic void
286bf215546Sopenharmony_cipressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
287bf215546Sopenharmony_ci{
288bf215546Sopenharmony_ci        /* off by a constant, that's ok */
289bf215546Sopenharmony_ci        signed pressure = 0;
290bf215546Sopenharmony_ci        signed orig_max_pressure = 0;
291bf215546Sopenharmony_ci        unsigned nr_ins = 0;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci        memcpy(s->live, block->live_out, s->max);
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci        bi_foreach_instr_in_block_rev(block, I) {
296bf215546Sopenharmony_ci                pressure += calculate_pressure_delta(I, s->live, s->max);
297bf215546Sopenharmony_ci                orig_max_pressure = MAX2(pressure, orig_max_pressure);
298bf215546Sopenharmony_ci                bi_liveness_ins_update(s->live, I, s->max);
299bf215546Sopenharmony_ci                nr_ins++;
300bf215546Sopenharmony_ci        }
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci        memcpy(s->live, block->live_out, s->max);
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci        /* off by a constant, that's ok */
305bf215546Sopenharmony_ci        signed max_pressure = 0;
306bf215546Sopenharmony_ci        pressure = 0;
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci        struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
309bf215546Sopenharmony_ci        nr_ins = 0;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci        while (!list_is_empty(&s->dag->heads)) {
312bf215546Sopenharmony_ci                struct sched_node *node = choose_instr(s);
313bf215546Sopenharmony_ci                pressure += calculate_pressure_delta(node->instr, s->live, s->max);
314bf215546Sopenharmony_ci                max_pressure = MAX2(pressure, max_pressure);
315bf215546Sopenharmony_ci                dag_prune_head(s->dag, &node->dag);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci                schedule[nr_ins++] = node;
318bf215546Sopenharmony_ci                bi_liveness_ins_update(s->live, node->instr, s->max);
319bf215546Sopenharmony_ci        }
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci        /* Bail if it looks like it's worse */
322bf215546Sopenharmony_ci        if (max_pressure >= orig_max_pressure) {
323bf215546Sopenharmony_ci                free(schedule);
324bf215546Sopenharmony_ci                return;
325bf215546Sopenharmony_ci        }
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci        /* Apply the schedule */
328bf215546Sopenharmony_ci        for (unsigned i = 0; i < nr_ins; ++i) {
329bf215546Sopenharmony_ci                bi_remove_instruction(schedule[i]->instr);
330bf215546Sopenharmony_ci                list_add(&schedule[i]->instr->link, &block->instructions);
331bf215546Sopenharmony_ci        }
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci        free(schedule);
334bf215546Sopenharmony_ci}
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_civoid
337bf215546Sopenharmony_cibi_pressure_schedule(bi_context *ctx)
338bf215546Sopenharmony_ci{
339bf215546Sopenharmony_ci        bi_compute_liveness(ctx);
340bf215546Sopenharmony_ci        unsigned temp_count = bi_max_temp(ctx);
341bf215546Sopenharmony_ci        void *memctx = ralloc_context(ctx);
342bf215546Sopenharmony_ci        uint8_t *live = ralloc_array(memctx, uint8_t, temp_count);
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci        bi_foreach_block(ctx, block) {
345bf215546Sopenharmony_ci                struct sched_ctx sctx = {
346bf215546Sopenharmony_ci                        .dag = create_dag(ctx, block, memctx),
347bf215546Sopenharmony_ci                        .max = temp_count,
348bf215546Sopenharmony_ci                        .live = live
349bf215546Sopenharmony_ci                };
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci                pressure_schedule_block(ctx, block, &sctx);
352bf215546Sopenharmony_ci        }
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci        ralloc_free(memctx);
355bf215546Sopenharmony_ci}
356