1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors (Collabora):
24bf215546Sopenharmony_ci *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "compiler.h"
28bf215546Sopenharmony_ci#include "bi_builder.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci/* Arguments common to worklist, passed by value for convenience */
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistruct bi_worklist {
33bf215546Sopenharmony_ci        /* # of instructions in the block */
34bf215546Sopenharmony_ci        unsigned count;
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci        /* Instructions in the block */
37bf215546Sopenharmony_ci        bi_instr **instructions;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci        /* Bitset of instructions in the block ready for scheduling */
40bf215546Sopenharmony_ci        BITSET_WORD *worklist;
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci        /* The backwards dependency graph. nr_dependencies is the number of
43bf215546Sopenharmony_ci         * unscheduled instructions that must still be scheduled after (before)
44bf215546Sopenharmony_ci         * this instruction. dependents are which instructions need to be
45bf215546Sopenharmony_ci         * scheduled before (after) this instruction. */
46bf215546Sopenharmony_ci        unsigned *dep_counts;
47bf215546Sopenharmony_ci        BITSET_WORD **dependents;
48bf215546Sopenharmony_ci};
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci/* State of a single tuple and clause under construction */
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_cistruct bi_reg_state {
53bf215546Sopenharmony_ci        /* Number of register writes */
54bf215546Sopenharmony_ci        unsigned nr_writes;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci        /* Register reads, expressed as (equivalence classes of)
57bf215546Sopenharmony_ci         * sources. Only 3 reads are allowed, but up to 2 may spill as
58bf215546Sopenharmony_ci         * "forced" for the next scheduled tuple, provided such a tuple
59bf215546Sopenharmony_ci         * can be constructed */
60bf215546Sopenharmony_ci        bi_index reads[5];
61bf215546Sopenharmony_ci        unsigned nr_reads;
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci        /* The previous tuple scheduled (= the next tuple executed in the
64bf215546Sopenharmony_ci         * program) may require certain writes, in order to bypass the register
65bf215546Sopenharmony_ci         * file and use a temporary passthrough for the value. Up to 2 such
66bf215546Sopenharmony_ci         * constraints are architecturally satisfiable */
67bf215546Sopenharmony_ci        unsigned forced_count;
68bf215546Sopenharmony_ci        bi_index forceds[2];
69bf215546Sopenharmony_ci};
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_cistruct bi_tuple_state {
72bf215546Sopenharmony_ci        /* Is this the last tuple in the clause */
73bf215546Sopenharmony_ci        bool last;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci        /* Scheduled ADD instruction, or null if none */
76bf215546Sopenharmony_ci        bi_instr *add;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci        /* Reads for previous (succeeding) tuple */
79bf215546Sopenharmony_ci        bi_index prev_reads[5];
80bf215546Sopenharmony_ci        unsigned nr_prev_reads;
81bf215546Sopenharmony_ci        bi_tuple *prev;
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci        /* Register slot state for current tuple */
84bf215546Sopenharmony_ci        struct bi_reg_state reg;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci        /* Constants are shared in the tuple. If constant_count is nonzero, it
87bf215546Sopenharmony_ci         * is a size for constant count. Otherwise, fau is the slot read from
88bf215546Sopenharmony_ci         * FAU, or zero if none is assigned. Ordinarily FAU slot 0 reads zero,
89bf215546Sopenharmony_ci         * but within a tuple, that should be encoded as constant_count != 0
90bf215546Sopenharmony_ci         * and constants[0] = constants[1] = 0 */
91bf215546Sopenharmony_ci        unsigned constant_count;
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci        union {
94bf215546Sopenharmony_ci                uint32_t constants[2];
95bf215546Sopenharmony_ci                enum bir_fau fau;
96bf215546Sopenharmony_ci        };
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci        unsigned pcrel_idx;
99bf215546Sopenharmony_ci};
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_cistruct bi_const_state {
102bf215546Sopenharmony_ci        unsigned constant_count;
103bf215546Sopenharmony_ci        bool pcrel; /* applies to first const */
104bf215546Sopenharmony_ci        uint32_t constants[2];
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci        /* Index of the constant into the clause */
107bf215546Sopenharmony_ci        unsigned word_idx;
108bf215546Sopenharmony_ci};
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_cienum bi_ftz_state {
111bf215546Sopenharmony_ci        /* No flush-to-zero state assigned yet */
112bf215546Sopenharmony_ci        BI_FTZ_STATE_NONE,
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci        /* Never flush-to-zero */
115bf215546Sopenharmony_ci        BI_FTZ_STATE_DISABLE,
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci        /* Always flush-to-zero */
118bf215546Sopenharmony_ci        BI_FTZ_STATE_ENABLE,
119bf215546Sopenharmony_ci};
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_cistruct bi_clause_state {
122bf215546Sopenharmony_ci        /* Has a message-passing instruction already been assigned? */
123bf215546Sopenharmony_ci        bool message;
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci        /* Indices already accessed, this needs to be tracked to avoid hazards
126bf215546Sopenharmony_ci         * around message-passing instructions */
127bf215546Sopenharmony_ci        unsigned access_count;
128bf215546Sopenharmony_ci        bi_index accesses[(BI_MAX_SRCS + BI_MAX_DESTS) * 16];
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci        unsigned tuple_count;
131bf215546Sopenharmony_ci        struct bi_const_state consts[8];
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci        /* Numerical state of the clause */
134bf215546Sopenharmony_ci        enum bi_ftz_state ftz;
135bf215546Sopenharmony_ci};
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci/* Determines messsage type by checking the table and a few special cases. Only
138bf215546Sopenharmony_ci * case missing is tilebuffer instructions that access depth/stencil, which
139bf215546Sopenharmony_ci * require a Z_STENCIL message (to implement
140bf215546Sopenharmony_ci * ARM_shader_framebuffer_fetch_depth_stencil) */
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_cistatic enum bifrost_message_type
143bf215546Sopenharmony_cibi_message_type_for_instr(bi_instr *ins)
144bf215546Sopenharmony_ci{
145bf215546Sopenharmony_ci        enum bifrost_message_type msg = bi_opcode_props[ins->op].message;
146bf215546Sopenharmony_ci        bool ld_var_special = (ins->op == BI_OPCODE_LD_VAR_SPECIAL);
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci        if (ld_var_special && ins->varying_name == BI_VARYING_NAME_FRAG_Z)
149bf215546Sopenharmony_ci                return BIFROST_MESSAGE_Z_STENCIL;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci        if (msg == BIFROST_MESSAGE_LOAD && ins->seg == BI_SEG_UBO)
152bf215546Sopenharmony_ci                return BIFROST_MESSAGE_ATTRIBUTE;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci        return msg;
155bf215546Sopenharmony_ci}
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci/* Attribute, texture, and UBO load (attribute message) instructions support
158bf215546Sopenharmony_ci * bindless, so just check the message type */
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ciASSERTED static bool
161bf215546Sopenharmony_cibi_supports_dtsel(bi_instr *ins)
162bf215546Sopenharmony_ci{
163bf215546Sopenharmony_ci        switch (bi_message_type_for_instr(ins)) {
164bf215546Sopenharmony_ci        case BIFROST_MESSAGE_ATTRIBUTE:
165bf215546Sopenharmony_ci                return ins->op != BI_OPCODE_LD_GCLK_U64;
166bf215546Sopenharmony_ci        case BIFROST_MESSAGE_TEX:
167bf215546Sopenharmony_ci                return true;
168bf215546Sopenharmony_ci        default:
169bf215546Sopenharmony_ci                return false;
170bf215546Sopenharmony_ci        }
171bf215546Sopenharmony_ci}
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci/* Adds an edge to the dependency graph */
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_cistatic void
176bf215546Sopenharmony_cibi_push_dependency(unsigned parent, unsigned child,
177bf215546Sopenharmony_ci                BITSET_WORD **dependents, unsigned *dep_counts)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci        if (!BITSET_TEST(dependents[parent], child)) {
180bf215546Sopenharmony_ci                BITSET_SET(dependents[parent], child);
181bf215546Sopenharmony_ci                dep_counts[child]++;
182bf215546Sopenharmony_ci        }
183bf215546Sopenharmony_ci}
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_cistatic void
186bf215546Sopenharmony_ciadd_dependency(struct util_dynarray *table, unsigned index, unsigned child,
187bf215546Sopenharmony_ci                BITSET_WORD **dependents, unsigned *dep_counts)
188bf215546Sopenharmony_ci{
189bf215546Sopenharmony_ci        assert(index < 64);
190bf215546Sopenharmony_ci        util_dynarray_foreach(table + index, unsigned, parent)
191bf215546Sopenharmony_ci                bi_push_dependency(*parent, child, dependents, dep_counts);
192bf215546Sopenharmony_ci}
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_cistatic void
195bf215546Sopenharmony_cimark_access(struct util_dynarray *table, unsigned index, unsigned parent)
196bf215546Sopenharmony_ci{
197bf215546Sopenharmony_ci        assert(index < 64);
198bf215546Sopenharmony_ci        util_dynarray_append(&table[index], unsigned, parent);
199bf215546Sopenharmony_ci}
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_cistatic bool
202bf215546Sopenharmony_cibi_is_sched_barrier(bi_instr *I)
203bf215546Sopenharmony_ci{
204bf215546Sopenharmony_ci        switch (I->op) {
205bf215546Sopenharmony_ci        case BI_OPCODE_BARRIER:
206bf215546Sopenharmony_ci        case BI_OPCODE_DISCARD_F32:
207bf215546Sopenharmony_ci                return true;
208bf215546Sopenharmony_ci        default:
209bf215546Sopenharmony_ci                return false;
210bf215546Sopenharmony_ci        }
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_cistatic void
214bf215546Sopenharmony_cibi_create_dependency_graph(struct bi_worklist st, bool inorder, bool is_blend)
215bf215546Sopenharmony_ci{
216bf215546Sopenharmony_ci        struct util_dynarray last_read[64], last_write[64];
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci        for (unsigned i = 0; i < 64; ++i) {
219bf215546Sopenharmony_ci                util_dynarray_init(&last_read[i], NULL);
220bf215546Sopenharmony_ci                util_dynarray_init(&last_write[i], NULL);
221bf215546Sopenharmony_ci        }
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci        /* Initialize dependency graph */
224bf215546Sopenharmony_ci        for (unsigned i = 0; i < st.count; ++i) {
225bf215546Sopenharmony_ci                st.dependents[i] =
226bf215546Sopenharmony_ci                        calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci                st.dep_counts[i] = 0;
229bf215546Sopenharmony_ci        }
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci        unsigned prev_msg = ~0;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci        /* Populate dependency graph */
234bf215546Sopenharmony_ci        for (signed i = st.count - 1; i >= 0; --i) {
235bf215546Sopenharmony_ci                bi_instr *ins = st.instructions[i];
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci                bi_foreach_src(ins, s) {
238bf215546Sopenharmony_ci                        if (ins->src[s].type != BI_INDEX_REGISTER) continue;
239bf215546Sopenharmony_ci                        unsigned count = bi_count_read_registers(ins, s);
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci                        for (unsigned c = 0; c < count; ++c)
242bf215546Sopenharmony_ci                                add_dependency(last_write, ins->src[s].value + c, i, st.dependents, st.dep_counts);
243bf215546Sopenharmony_ci                }
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci                /* Keep message-passing ops in order. (This pass only cares
246bf215546Sopenharmony_ci                 * about bundling; reordering of message-passing instructions
247bf215546Sopenharmony_ci                 * happens during earlier scheduling.) */
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci                if (bi_message_type_for_instr(ins)) {
250bf215546Sopenharmony_ci                        if (prev_msg != ~0)
251bf215546Sopenharmony_ci                                bi_push_dependency(prev_msg, i, st.dependents, st.dep_counts);
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci                        prev_msg = i;
254bf215546Sopenharmony_ci                }
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci                /* Handle schedule barriers, adding All the deps */
257bf215546Sopenharmony_ci                if (inorder || bi_is_sched_barrier(ins)) {
258bf215546Sopenharmony_ci                        for (unsigned j = 0; j < st.count; ++j) {
259bf215546Sopenharmony_ci                                if (i == j) continue;
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci                                bi_push_dependency(MAX2(i, j), MIN2(i, j),
262bf215546Sopenharmony_ci                                                st.dependents, st.dep_counts);
263bf215546Sopenharmony_ci                        }
264bf215546Sopenharmony_ci                }
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci                bi_foreach_dest(ins, d) {
267bf215546Sopenharmony_ci                        if (ins->dest[d].type != BI_INDEX_REGISTER) continue;
268bf215546Sopenharmony_ci                        unsigned dest = ins->dest[d].value;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci                        unsigned count = bi_count_write_registers(ins, d);
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci                        for (unsigned c = 0; c < count; ++c) {
273bf215546Sopenharmony_ci                                add_dependency(last_read, dest + c, i, st.dependents, st.dep_counts);
274bf215546Sopenharmony_ci                                add_dependency(last_write, dest + c, i, st.dependents, st.dep_counts);
275bf215546Sopenharmony_ci                                mark_access(last_write, dest + c, i);
276bf215546Sopenharmony_ci                        }
277bf215546Sopenharmony_ci                }
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci                /* Blend shaders are allowed to clobber R0-R15. Treat these
280bf215546Sopenharmony_ci                 * registers like extra destinations for scheduling purposes.
281bf215546Sopenharmony_ci                 */
282bf215546Sopenharmony_ci                if (ins->op == BI_OPCODE_BLEND && !is_blend) {
283bf215546Sopenharmony_ci                        for (unsigned c = 0; c < 16; ++c) {
284bf215546Sopenharmony_ci                                add_dependency(last_read, c, i, st.dependents, st.dep_counts);
285bf215546Sopenharmony_ci                                add_dependency(last_write, c, i, st.dependents, st.dep_counts);
286bf215546Sopenharmony_ci                                mark_access(last_write, c, i);
287bf215546Sopenharmony_ci                        }
288bf215546Sopenharmony_ci                }
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci                bi_foreach_src(ins, s) {
291bf215546Sopenharmony_ci                        if (ins->src[s].type != BI_INDEX_REGISTER) continue;
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci                        unsigned count = bi_count_read_registers(ins, s);
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci                        for (unsigned c = 0; c < count; ++c)
296bf215546Sopenharmony_ci                                mark_access(last_read, ins->src[s].value + c, i);
297bf215546Sopenharmony_ci                }
298bf215546Sopenharmony_ci        }
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci        /* If there is a branch, all instructions depend on it, as interblock
301bf215546Sopenharmony_ci         * execution must be purely in-order */
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci        bi_instr *last = st.instructions[st.count - 1];
304bf215546Sopenharmony_ci        if (last->branch_target || last->op == BI_OPCODE_JUMP) {
305bf215546Sopenharmony_ci                for (signed i = st.count - 2; i >= 0; --i)
306bf215546Sopenharmony_ci                        bi_push_dependency(st.count - 1, i, st.dependents, st.dep_counts);
307bf215546Sopenharmony_ci        }
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci        /* Free the intermediate structures */
310bf215546Sopenharmony_ci        for (unsigned i = 0; i < 64; ++i) {
311bf215546Sopenharmony_ci                util_dynarray_fini(&last_read[i]);
312bf215546Sopenharmony_ci                util_dynarray_fini(&last_write[i]);
313bf215546Sopenharmony_ci        }
314bf215546Sopenharmony_ci}
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci/* Scheduler pseudoinstruction lowerings to enable instruction pairings.
317bf215546Sopenharmony_ci * Currently only support CUBEFACE -> *CUBEFACE1/+CUBEFACE2
318bf215546Sopenharmony_ci */
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_cistatic bi_instr *
321bf215546Sopenharmony_cibi_lower_cubeface(bi_context *ctx,
322bf215546Sopenharmony_ci                struct bi_clause_state *clause, struct bi_tuple_state *tuple)
323bf215546Sopenharmony_ci{
324bf215546Sopenharmony_ci        bi_instr *pinstr = tuple->add;
325bf215546Sopenharmony_ci        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
326bf215546Sopenharmony_ci        bi_instr *cubeface1 = bi_cubeface1_to(&b, pinstr->dest[0],
327bf215546Sopenharmony_ci                        pinstr->src[0], pinstr->src[1], pinstr->src[2]);
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci        pinstr->op = BI_OPCODE_CUBEFACE2;
330bf215546Sopenharmony_ci        pinstr->dest[0] = pinstr->dest[1];
331bf215546Sopenharmony_ci        pinstr->dest[1] = bi_null();
332bf215546Sopenharmony_ci        pinstr->src[0] = cubeface1->dest[0];
333bf215546Sopenharmony_ci        pinstr->src[1] = bi_null();
334bf215546Sopenharmony_ci        pinstr->src[2] = bi_null();
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci        return cubeface1;
337bf215546Sopenharmony_ci}
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci/* Psuedo arguments are (rbase, address lo, address hi). We need *ATOM_C.i32 to
340bf215546Sopenharmony_ci * have the arguments (address lo, address hi, rbase), and +ATOM_CX to have the
341bf215546Sopenharmony_ci * arguments (rbase, address lo, address hi, rbase) */
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_cistatic bi_instr *
344bf215546Sopenharmony_cibi_lower_atom_c(bi_context *ctx, struct bi_clause_state *clause, struct
345bf215546Sopenharmony_ci                bi_tuple_state *tuple)
346bf215546Sopenharmony_ci{
347bf215546Sopenharmony_ci        bi_instr *pinstr = tuple->add;
348bf215546Sopenharmony_ci        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
349bf215546Sopenharmony_ci        bi_instr *atom_c = bi_atom_c_return_i32(&b,
350bf215546Sopenharmony_ci                        pinstr->src[1], pinstr->src[2], pinstr->src[0],
351bf215546Sopenharmony_ci                        pinstr->atom_opc);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci        if (bi_is_null(pinstr->dest[0]))
354bf215546Sopenharmony_ci                atom_c->op = BI_OPCODE_ATOM_C_I32;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci        pinstr->op = BI_OPCODE_ATOM_CX;
357bf215546Sopenharmony_ci        pinstr->src[3] = atom_c->src[2];
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci        return atom_c;
360bf215546Sopenharmony_ci}
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_cistatic bi_instr *
363bf215546Sopenharmony_cibi_lower_atom_c1(bi_context *ctx, struct bi_clause_state *clause, struct
364bf215546Sopenharmony_ci                bi_tuple_state *tuple)
365bf215546Sopenharmony_ci{
366bf215546Sopenharmony_ci        bi_instr *pinstr = tuple->add;
367bf215546Sopenharmony_ci        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
368bf215546Sopenharmony_ci        bi_instr *atom_c = bi_atom_c1_return_i32(&b,
369bf215546Sopenharmony_ci                        pinstr->src[0], pinstr->src[1], pinstr->atom_opc);
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci        if (bi_is_null(pinstr->dest[0]))
372bf215546Sopenharmony_ci                atom_c->op = BI_OPCODE_ATOM_C1_I32;
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci        pinstr->op = BI_OPCODE_ATOM_CX;
375bf215546Sopenharmony_ci        pinstr->src[2] = pinstr->src[1];
376bf215546Sopenharmony_ci        pinstr->src[1] = pinstr->src[0];
377bf215546Sopenharmony_ci        pinstr->src[3] = bi_dontcare(&b);
378bf215546Sopenharmony_ci        pinstr->src[0] = bi_null();
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci        return atom_c;
381bf215546Sopenharmony_ci}
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_cistatic bi_instr *
384bf215546Sopenharmony_cibi_lower_seg_add(bi_context *ctx,
385bf215546Sopenharmony_ci                struct bi_clause_state *clause, struct bi_tuple_state *tuple)
386bf215546Sopenharmony_ci{
387bf215546Sopenharmony_ci        bi_instr *pinstr = tuple->add;
388bf215546Sopenharmony_ci        bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr));
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci        bi_instr *fma = bi_seg_add_to(&b, pinstr->dest[0], pinstr->src[0],
391bf215546Sopenharmony_ci                        pinstr->preserve_null, pinstr->seg);
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci        pinstr->op = BI_OPCODE_SEG_ADD;
394bf215546Sopenharmony_ci        pinstr->src[0] = pinstr->src[1];
395bf215546Sopenharmony_ci        pinstr->src[1] = bi_null();
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci        assert(pinstr->dest[0].type == BI_INDEX_REGISTER);
398bf215546Sopenharmony_ci        pinstr->dest[0].value += 1;
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci        return fma;
401bf215546Sopenharmony_ci}
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_cistatic bi_instr *
404bf215546Sopenharmony_cibi_lower_dtsel(bi_context *ctx,
405bf215546Sopenharmony_ci                struct bi_clause_state *clause, struct bi_tuple_state *tuple)
406bf215546Sopenharmony_ci{
407bf215546Sopenharmony_ci        bi_instr *add = tuple->add;
408bf215546Sopenharmony_ci        bi_builder b = bi_init_builder(ctx, bi_before_instr(add));
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci        bi_instr *dtsel = bi_dtsel_imm_to(&b, bi_temp(b.shader),
411bf215546Sopenharmony_ci                        add->src[0], add->table);
412bf215546Sopenharmony_ci        add->src[0] = dtsel->dest[0];
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci        assert(bi_supports_dtsel(add));
415bf215546Sopenharmony_ci        return dtsel;
416bf215546Sopenharmony_ci}
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci/* Flatten linked list to array for O(1) indexing */
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_cistatic bi_instr **
421bf215546Sopenharmony_cibi_flatten_block(bi_block *block, unsigned *len)
422bf215546Sopenharmony_ci{
423bf215546Sopenharmony_ci        if (list_is_empty(&block->instructions))
424bf215546Sopenharmony_ci                return NULL;
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_ci        *len = list_length(&block->instructions);
427bf215546Sopenharmony_ci        bi_instr **instructions = malloc(sizeof(bi_instr *) * (*len));
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci        unsigned i = 0;
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci        bi_foreach_instr_in_block(block, ins)
432bf215546Sopenharmony_ci                instructions[i++] = ins;
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci        return instructions;
435bf215546Sopenharmony_ci}
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci/* The worklist would track instructions without outstanding dependencies. For
438bf215546Sopenharmony_ci * debug, force in-order scheduling (no dependency graph is constructed).
439bf215546Sopenharmony_ci */
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_cistatic struct bi_worklist
442bf215546Sopenharmony_cibi_initialize_worklist(bi_block *block, bool inorder, bool is_blend)
443bf215546Sopenharmony_ci{
444bf215546Sopenharmony_ci        struct bi_worklist st = { };
445bf215546Sopenharmony_ci        st.instructions = bi_flatten_block(block, &st.count);
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci        if (!st.count)
448bf215546Sopenharmony_ci                return st;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci        st.dependents = calloc(st.count, sizeof(st.dependents[0]));
451bf215546Sopenharmony_ci        st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0]));
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci        bi_create_dependency_graph(st, inorder, is_blend);
454bf215546Sopenharmony_ci        st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD));
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci        for (unsigned i = 0; i < st.count; ++i) {
457bf215546Sopenharmony_ci                if (st.dep_counts[i] == 0)
458bf215546Sopenharmony_ci                        BITSET_SET(st.worklist, i);
459bf215546Sopenharmony_ci        }
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci        return st;
462bf215546Sopenharmony_ci}
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_cistatic void
465bf215546Sopenharmony_cibi_free_worklist(struct bi_worklist st)
466bf215546Sopenharmony_ci{
467bf215546Sopenharmony_ci        free(st.dep_counts);
468bf215546Sopenharmony_ci        free(st.dependents);
469bf215546Sopenharmony_ci        free(st.instructions);
470bf215546Sopenharmony_ci        free(st.worklist);
471bf215546Sopenharmony_ci}
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_cistatic void
474bf215546Sopenharmony_cibi_update_worklist(struct bi_worklist st, unsigned idx)
475bf215546Sopenharmony_ci{
476bf215546Sopenharmony_ci        assert(st.dep_counts[idx] == 0);
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci        if (!st.dependents[idx])
479bf215546Sopenharmony_ci                return;
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci        /* Iterate each dependent to remove one dependency (`done`),
482bf215546Sopenharmony_ci         * adding dependents to the worklist where possible. */
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci        unsigned i;
485bf215546Sopenharmony_ci        BITSET_FOREACH_SET(i, st.dependents[idx], st.count) {
486bf215546Sopenharmony_ci                assert(st.dep_counts[i] != 0);
487bf215546Sopenharmony_ci                unsigned new_deps = --st.dep_counts[i];
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci                if (new_deps == 0)
490bf215546Sopenharmony_ci                        BITSET_SET(st.worklist, i);
491bf215546Sopenharmony_ci        }
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci        free(st.dependents[idx]);
494bf215546Sopenharmony_ci}
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci/* Scheduler predicates */
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci/* IADDC.i32 can implement IADD.u32 if no saturation or swizzling is in use */
499bf215546Sopenharmony_cistatic bool
500bf215546Sopenharmony_cibi_can_iaddc(bi_instr *ins)
501bf215546Sopenharmony_ci{
502bf215546Sopenharmony_ci        return (ins->op == BI_OPCODE_IADD_U32 && !ins->saturate &&
503bf215546Sopenharmony_ci                ins->src[0].swizzle == BI_SWIZZLE_H01 &&
504bf215546Sopenharmony_ci                ins->src[1].swizzle == BI_SWIZZLE_H01);
505bf215546Sopenharmony_ci}
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci/*
508bf215546Sopenharmony_ci * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs
509bf215546Sopenharmony_ci * encodings are permitted by swapping operands; however, this scheme fails if
510bf215546Sopenharmony_ci * both operands are equal. Test for this case.
511bf215546Sopenharmony_ci */
512bf215546Sopenharmony_cistatic bool
513bf215546Sopenharmony_cibi_impacted_abs(bi_instr *I)
514bf215546Sopenharmony_ci{
515bf215546Sopenharmony_ci        return I->src[0].abs && I->src[1].abs &&
516bf215546Sopenharmony_ci               bi_is_word_equiv(I->src[0], I->src[1]);
517bf215546Sopenharmony_ci}
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_cibool
520bf215546Sopenharmony_cibi_can_fma(bi_instr *ins)
521bf215546Sopenharmony_ci{
522bf215546Sopenharmony_ci        /* +IADD.i32 -> *IADDC.i32 */
523bf215546Sopenharmony_ci        if (bi_can_iaddc(ins))
524bf215546Sopenharmony_ci                return true;
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci        /* +MUX -> *CSEL */
527bf215546Sopenharmony_ci        if (bi_can_replace_with_csel(ins))
528bf215546Sopenharmony_ci                return true;
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci        /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */
531bf215546Sopenharmony_ci        if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins))
532bf215546Sopenharmony_ci                return false;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci        /* TODO: some additional fp16 constraints */
535bf215546Sopenharmony_ci        return bi_opcode_props[ins->op].fma;
536bf215546Sopenharmony_ci}
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_cistatic bool
539bf215546Sopenharmony_cibi_impacted_fadd_widens(bi_instr *I)
540bf215546Sopenharmony_ci{
541bf215546Sopenharmony_ci        enum bi_swizzle swz0 = I->src[0].swizzle;
542bf215546Sopenharmony_ci        enum bi_swizzle swz1 = I->src[1].swizzle;
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci        return (swz0 == BI_SWIZZLE_H00 && swz1 == BI_SWIZZLE_H11) ||
545bf215546Sopenharmony_ci                (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H11) ||
546bf215546Sopenharmony_ci                (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H00);
547bf215546Sopenharmony_ci}
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_cibool
550bf215546Sopenharmony_cibi_can_add(bi_instr *ins)
551bf215546Sopenharmony_ci{
552bf215546Sopenharmony_ci        /* +FADD.v2f16 lacks clamp modifier, use *FADD.v2f16 instead */
553bf215546Sopenharmony_ci        if (ins->op == BI_OPCODE_FADD_V2F16 && ins->clamp)
554bf215546Sopenharmony_ci                return false;
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci        /* +FCMP.v2f16 lacks abs modifier, use *FCMP.v2f16 instead */
557bf215546Sopenharmony_ci        if (ins->op == BI_OPCODE_FCMP_V2F16 && (ins->src[0].abs || ins->src[1].abs))
558bf215546Sopenharmony_ci                return false;
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci        /* +FADD.f32 has restricted widens, use +FADD.f32 for the full set */
561bf215546Sopenharmony_ci        if (ins->op == BI_OPCODE_FADD_F32 && bi_impacted_fadd_widens(ins))
562bf215546Sopenharmony_ci               return false;
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci        /* TODO: some additional fp16 constraints */
565bf215546Sopenharmony_ci        return bi_opcode_props[ins->op].add;
566bf215546Sopenharmony_ci}
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci/* Architecturally, no single instruction has a "not last" constraint. However,
569bf215546Sopenharmony_ci * pseudoinstructions writing multiple destinations (expanding to multiple
570bf215546Sopenharmony_ci * paired instructions) can run afoul of the "no two writes on the last clause"
571bf215546Sopenharmony_ci * constraint, so we check for that here.
572bf215546Sopenharmony_ci *
573bf215546Sopenharmony_ci * Exception to the exception: TEXC, which writes to multiple sets of staging
574bf215546Sopenharmony_ci * registers. Staging registers bypass the usual register write mechanism so
575bf215546Sopenharmony_ci * this restriction does not apply.
576bf215546Sopenharmony_ci */
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_cistatic bool
579bf215546Sopenharmony_cibi_must_not_last(bi_instr *ins)
580bf215546Sopenharmony_ci{
581bf215546Sopenharmony_ci        return !bi_is_null(ins->dest[0]) && !bi_is_null(ins->dest[1]) &&
582bf215546Sopenharmony_ci               (ins->op != BI_OPCODE_TEXC);
583bf215546Sopenharmony_ci}
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci/* Check for a message-passing instruction. +DISCARD.f32 is special-cased; we
586bf215546Sopenharmony_ci * treat it as a message-passing instruction for the purpose of scheduling
587bf215546Sopenharmony_ci * despite no passing no logical message. Otherwise invalid encoding faults may
588bf215546Sopenharmony_ci * be raised for unknown reasons (possibly an errata).
589bf215546Sopenharmony_ci */
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_cibool
592bf215546Sopenharmony_cibi_must_message(bi_instr *ins)
593bf215546Sopenharmony_ci{
594bf215546Sopenharmony_ci        return (bi_opcode_props[ins->op].message != BIFROST_MESSAGE_NONE) ||
595bf215546Sopenharmony_ci                (ins->op == BI_OPCODE_DISCARD_F32);
596bf215546Sopenharmony_ci}
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_cistatic bool
599bf215546Sopenharmony_cibi_fma_atomic(enum bi_opcode op)
600bf215546Sopenharmony_ci{
601bf215546Sopenharmony_ci        switch (op) {
602bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C_I32:
603bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C_I64:
604bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C1_I32:
605bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C1_I64:
606bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C1_RETURN_I32:
607bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C1_RETURN_I64:
608bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C_RETURN_I32:
609bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_C_RETURN_I64:
610bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_POST_I32:
611bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_POST_I64:
612bf215546Sopenharmony_ci        case BI_OPCODE_ATOM_PRE_I64:
613bf215546Sopenharmony_ci                return true;
614bf215546Sopenharmony_ci        default:
615bf215546Sopenharmony_ci                return false;
616bf215546Sopenharmony_ci        }
617bf215546Sopenharmony_ci}
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_cibool
620bf215546Sopenharmony_cibi_reads_zero(bi_instr *ins)
621bf215546Sopenharmony_ci{
622bf215546Sopenharmony_ci        return !(bi_fma_atomic(ins->op) || ins->op == BI_OPCODE_IMULD);
623bf215546Sopenharmony_ci}
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_cibool
626bf215546Sopenharmony_cibi_reads_temps(bi_instr *ins, unsigned src)
627bf215546Sopenharmony_ci{
628bf215546Sopenharmony_ci        switch (ins->op) {
629bf215546Sopenharmony_ci        /* Cannot permute a temporary */
630bf215546Sopenharmony_ci        case BI_OPCODE_CLPER_I32:
631bf215546Sopenharmony_ci        case BI_OPCODE_CLPER_OLD_I32:
632bf215546Sopenharmony_ci                return src != 0;
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci        /* ATEST isn't supposed to be restricted, but in practice it always
635bf215546Sopenharmony_ci         * wants to source its coverage mask input (source 0) from register 60,
636bf215546Sopenharmony_ci         * which won't work properly if we put the input in a temp. This
637bf215546Sopenharmony_ci         * requires workarounds in both RA and clause scheduling.
638bf215546Sopenharmony_ci         */
639bf215546Sopenharmony_ci        case BI_OPCODE_ATEST:
640bf215546Sopenharmony_ci                return src != 0;
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci        case BI_OPCODE_IMULD:
643bf215546Sopenharmony_ci                return false;
644bf215546Sopenharmony_ci        default:
645bf215546Sopenharmony_ci                return true;
646bf215546Sopenharmony_ci        }
647bf215546Sopenharmony_ci}
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_cistatic bool
650bf215546Sopenharmony_cibi_impacted_t_modifiers(bi_instr *I, unsigned src)
651bf215546Sopenharmony_ci{
652bf215546Sopenharmony_ci        enum bi_swizzle swizzle = I->src[src].swizzle;
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci        switch (I->op) {
655bf215546Sopenharmony_ci        case BI_OPCODE_F16_TO_F32:
656bf215546Sopenharmony_ci        case BI_OPCODE_F16_TO_S32:
657bf215546Sopenharmony_ci        case BI_OPCODE_F16_TO_U32:
658bf215546Sopenharmony_ci        case BI_OPCODE_MKVEC_V2I16:
659bf215546Sopenharmony_ci        case BI_OPCODE_S16_TO_F32:
660bf215546Sopenharmony_ci        case BI_OPCODE_S16_TO_S32:
661bf215546Sopenharmony_ci        case BI_OPCODE_U16_TO_F32:
662bf215546Sopenharmony_ci        case BI_OPCODE_U16_TO_U32:
663bf215546Sopenharmony_ci                return (swizzle != BI_SWIZZLE_H00);
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci        case BI_OPCODE_BRANCH_F32:
666bf215546Sopenharmony_ci        case BI_OPCODE_LOGB_F32:
667bf215546Sopenharmony_ci        case BI_OPCODE_ILOGB_F32:
668bf215546Sopenharmony_ci        case BI_OPCODE_FADD_F32:
669bf215546Sopenharmony_ci        case BI_OPCODE_FCMP_F32:
670bf215546Sopenharmony_ci        case BI_OPCODE_FREXPE_F32:
671bf215546Sopenharmony_ci        case BI_OPCODE_FREXPM_F32:
672bf215546Sopenharmony_ci        case BI_OPCODE_FROUND_F32:
673bf215546Sopenharmony_ci                return (swizzle != BI_SWIZZLE_H01);
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci        case BI_OPCODE_IADD_S32:
676bf215546Sopenharmony_ci        case BI_OPCODE_IADD_U32:
677bf215546Sopenharmony_ci        case BI_OPCODE_ISUB_S32:
678bf215546Sopenharmony_ci        case BI_OPCODE_ISUB_U32:
679bf215546Sopenharmony_ci        case BI_OPCODE_IADD_V4S8:
680bf215546Sopenharmony_ci        case BI_OPCODE_IADD_V4U8:
681bf215546Sopenharmony_ci        case BI_OPCODE_ISUB_V4S8:
682bf215546Sopenharmony_ci        case BI_OPCODE_ISUB_V4U8:
683bf215546Sopenharmony_ci                return (src == 1) && (swizzle != BI_SWIZZLE_H01);
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci        case BI_OPCODE_S8_TO_F32:
686bf215546Sopenharmony_ci        case BI_OPCODE_S8_TO_S32:
687bf215546Sopenharmony_ci        case BI_OPCODE_U8_TO_F32:
688bf215546Sopenharmony_ci        case BI_OPCODE_U8_TO_U32:
689bf215546Sopenharmony_ci                return (swizzle != BI_SWIZZLE_B0000);
690bf215546Sopenharmony_ci
691bf215546Sopenharmony_ci        case BI_OPCODE_V2S8_TO_V2F16:
692bf215546Sopenharmony_ci        case BI_OPCODE_V2S8_TO_V2S16:
693bf215546Sopenharmony_ci        case BI_OPCODE_V2U8_TO_V2F16:
694bf215546Sopenharmony_ci        case BI_OPCODE_V2U8_TO_V2U16:
695bf215546Sopenharmony_ci                return (swizzle != BI_SWIZZLE_B0022);
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci        case BI_OPCODE_IADD_V2S16:
698bf215546Sopenharmony_ci        case BI_OPCODE_IADD_V2U16:
699bf215546Sopenharmony_ci        case BI_OPCODE_ISUB_V2S16:
700bf215546Sopenharmony_ci        case BI_OPCODE_ISUB_V2U16:
701bf215546Sopenharmony_ci                return (src == 1) && (swizzle >= BI_SWIZZLE_H11);
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci#if 0
704bf215546Sopenharmony_ci        /* Restriction on IADD in 64-bit clauses on G72 */
705bf215546Sopenharmony_ci        case BI_OPCODE_IADD_S64:
706bf215546Sopenharmony_ci        case BI_OPCODE_IADD_U64:
707bf215546Sopenharmony_ci                return (src == 1) && (swizzle != BI_SWIZZLE_D0);
708bf215546Sopenharmony_ci#endif
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci        default:
711bf215546Sopenharmony_ci                return false;
712bf215546Sopenharmony_ci        }
713bf215546Sopenharmony_ci}
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_cibool
716bf215546Sopenharmony_cibi_reads_t(bi_instr *ins, unsigned src)
717bf215546Sopenharmony_ci{
718bf215546Sopenharmony_ci        /* Branch offset cannot come from passthrough */
719bf215546Sopenharmony_ci        if (bi_opcode_props[ins->op].branch)
720bf215546Sopenharmony_ci                return src != 2;
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci        /* Table can never read passthrough */
723bf215546Sopenharmony_ci        if (bi_opcode_props[ins->op].table)
724bf215546Sopenharmony_ci                return false;
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci        /* Staging register reads may happen before the succeeding register
727bf215546Sopenharmony_ci         * block encodes a write, so effectively there is no passthrough */
728bf215546Sopenharmony_ci        if (bi_is_staging_src(ins, src))
729bf215546Sopenharmony_ci                return false;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci        /* Bifrost cores newer than Mali G71 have restrictions on swizzles on
732bf215546Sopenharmony_ci         * same-cycle temporaries. Check the list for these hazards. */
733bf215546Sopenharmony_ci        if (bi_impacted_t_modifiers(ins, src))
734bf215546Sopenharmony_ci                return false;
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci        /* Descriptor must not come from a passthrough */
737bf215546Sopenharmony_ci        switch (ins->op) {
738bf215546Sopenharmony_ci        case BI_OPCODE_LD_CVT:
739bf215546Sopenharmony_ci        case BI_OPCODE_LD_TILE:
740bf215546Sopenharmony_ci        case BI_OPCODE_ST_CVT:
741bf215546Sopenharmony_ci        case BI_OPCODE_ST_TILE:
742bf215546Sopenharmony_ci        case BI_OPCODE_TEXC:
743bf215546Sopenharmony_ci                return src != 2;
744bf215546Sopenharmony_ci        case BI_OPCODE_BLEND:
745bf215546Sopenharmony_ci                return src != 2 && src != 3;
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci        /* +JUMP can't read the offset from T */
748bf215546Sopenharmony_ci        case BI_OPCODE_JUMP:
749bf215546Sopenharmony_ci                return false;
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci        /* Else, just check if we can read any temps */
752bf215546Sopenharmony_ci        default:
753bf215546Sopenharmony_ci                return bi_reads_temps(ins, src);
754bf215546Sopenharmony_ci        }
755bf215546Sopenharmony_ci}
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci/* Counts the number of 64-bit constants required by a clause. TODO: We
758bf215546Sopenharmony_ci * might want to account for merging, right now we overestimate, but
759bf215546Sopenharmony_ci * that's probably fine most of the time */
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_cistatic unsigned
762bf215546Sopenharmony_cibi_nconstants(struct bi_clause_state *clause)
763bf215546Sopenharmony_ci{
764bf215546Sopenharmony_ci        unsigned count_32 = 0;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci        for (unsigned i = 0; i < ARRAY_SIZE(clause->consts); ++i)
767bf215546Sopenharmony_ci                count_32 += clause->consts[i].constant_count;
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci        return DIV_ROUND_UP(count_32, 2);
770bf215546Sopenharmony_ci}
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci/* Would there be space for constants if we added one tuple? */
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_cistatic bool
775bf215546Sopenharmony_cibi_space_for_more_constants(struct bi_clause_state *clause)
776bf215546Sopenharmony_ci{
777bf215546Sopenharmony_ci        return (bi_nconstants(clause) < 13 - (clause->tuple_count + 1));
778bf215546Sopenharmony_ci}
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci/* Updates the FAU assignment for a tuple. A valid FAU assignment must be
781bf215546Sopenharmony_ci * possible (as a precondition), though not necessarily on the selected unit;
782bf215546Sopenharmony_ci * this is gauranteed per-instruction by bi_lower_fau and per-tuple by
783bf215546Sopenharmony_ci * bi_instr_schedulable */
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_cistatic bool
786bf215546Sopenharmony_cibi_update_fau(struct bi_clause_state *clause,
787bf215546Sopenharmony_ci                struct bi_tuple_state *tuple,
788bf215546Sopenharmony_ci                bi_instr *instr, bool fma, bool destructive)
789bf215546Sopenharmony_ci{
790bf215546Sopenharmony_ci        /* Maintain our own constants, for nondestructive mode */
791bf215546Sopenharmony_ci        uint32_t copied_constants[2], copied_count;
792bf215546Sopenharmony_ci        unsigned *constant_count = &tuple->constant_count;
793bf215546Sopenharmony_ci        uint32_t *constants = tuple->constants;
794bf215546Sopenharmony_ci        enum bir_fau fau = tuple->fau;
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_ci        if (!destructive) {
797bf215546Sopenharmony_ci                memcpy(copied_constants, tuple->constants,
798bf215546Sopenharmony_ci                                (*constant_count) * sizeof(constants[0]));
799bf215546Sopenharmony_ci                copied_count = tuple->constant_count;
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci                constant_count = &copied_count;
802bf215546Sopenharmony_ci                constants = copied_constants;
803bf215546Sopenharmony_ci        }
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci        bi_foreach_src(instr, s) {
806bf215546Sopenharmony_ci                bi_index src = instr->src[s];
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci                if (src.type == BI_INDEX_FAU) {
809bf215546Sopenharmony_ci                        bool no_constants = *constant_count == 0;
810bf215546Sopenharmony_ci                        bool no_other_fau = (fau == src.value) || !fau;
811bf215546Sopenharmony_ci                        bool mergable = no_constants && no_other_fau;
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci                        if (destructive) {
814bf215546Sopenharmony_ci                                assert(mergable);
815bf215546Sopenharmony_ci                                tuple->fau = src.value;
816bf215546Sopenharmony_ci                        } else if (!mergable) {
817bf215546Sopenharmony_ci                                return false;
818bf215546Sopenharmony_ci                        }
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci                        fau = src.value;
821bf215546Sopenharmony_ci                } else if (src.type == BI_INDEX_CONSTANT) {
822bf215546Sopenharmony_ci                        /* No need to reserve space if we have a fast 0 */
823bf215546Sopenharmony_ci                        if (src.value == 0 && fma && bi_reads_zero(instr))
824bf215546Sopenharmony_ci                                continue;
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_ci                        /* If there is a branch target, #0 by convention is the
827bf215546Sopenharmony_ci                         * PC-relative offset to the target */
828bf215546Sopenharmony_ci                        bool pcrel = instr->branch_target && src.value == 0;
829bf215546Sopenharmony_ci                        bool found = false;
830bf215546Sopenharmony_ci
831bf215546Sopenharmony_ci                        for (unsigned i = 0; i < *constant_count; ++i) {
832bf215546Sopenharmony_ci                                found |= (constants[i] == src.value) &&
833bf215546Sopenharmony_ci                                        (i != tuple->pcrel_idx);
834bf215546Sopenharmony_ci                        }
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_ci                        /* pcrel constants are unique, so don't match */
837bf215546Sopenharmony_ci                        if (found && !pcrel)
838bf215546Sopenharmony_ci                                continue;
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci                        bool no_fau = (*constant_count > 0) || !fau;
841bf215546Sopenharmony_ci                        bool mergable = no_fau && ((*constant_count) < 2);
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci                        if (destructive) {
844bf215546Sopenharmony_ci                                assert(mergable);
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci                                if (pcrel)
847bf215546Sopenharmony_ci                                        tuple->pcrel_idx = *constant_count;
848bf215546Sopenharmony_ci                        } else if (!mergable)
849bf215546Sopenharmony_ci                                return false;
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci                        constants[(*constant_count)++] = src.value;
852bf215546Sopenharmony_ci                }
853bf215546Sopenharmony_ci        }
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci        /* Constants per clause may be limited by tuple count */
856bf215546Sopenharmony_ci        bool room_for_constants = (*constant_count == 0) ||
857bf215546Sopenharmony_ci                bi_space_for_more_constants(clause);
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci        if (destructive)
860bf215546Sopenharmony_ci                assert(room_for_constants);
861bf215546Sopenharmony_ci        else if (!room_for_constants)
862bf215546Sopenharmony_ci                return false;
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci        return true;
865bf215546Sopenharmony_ci}
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci/* Given an in-progress tuple, a candidate new instruction to add to the tuple,
868bf215546Sopenharmony_ci * and a source (index) from that candidate, determine whether this source is
869bf215546Sopenharmony_ci * "new", in the sense of requiring an additional read slot. That is, checks
870bf215546Sopenharmony_ci * whether the specified source reads from the register file via a read slot
871bf215546Sopenharmony_ci * (determined by its type and placement) and whether the source was already
872bf215546Sopenharmony_ci * specified by a prior read slot (to avoid double counting) */
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_cistatic bool
875bf215546Sopenharmony_cibi_tuple_is_new_src(bi_instr *instr, struct bi_reg_state *reg, unsigned src_idx)
876bf215546Sopenharmony_ci{
877bf215546Sopenharmony_ci        bi_index src = instr->src[src_idx];
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci        /* Only consider sources which come from the register file */
880bf215546Sopenharmony_ci        if (!(src.type == BI_INDEX_NORMAL || src.type == BI_INDEX_REGISTER))
881bf215546Sopenharmony_ci                return false;
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci        /* Staging register reads bypass the usual register file mechanism */
884bf215546Sopenharmony_ci        if (bi_is_staging_src(instr, src_idx))
885bf215546Sopenharmony_ci                return false;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci        /* If a source is already read in the tuple, it is already counted */
888bf215546Sopenharmony_ci        for (unsigned t = 0; t < reg->nr_reads; ++t)
889bf215546Sopenharmony_ci                if (bi_is_word_equiv(src, reg->reads[t]))
890bf215546Sopenharmony_ci                        return false;
891bf215546Sopenharmony_ci
892bf215546Sopenharmony_ci        /* If a source is read in _this instruction_, it is already counted */
893bf215546Sopenharmony_ci        for (unsigned t = 0; t < src_idx; ++t)
894bf215546Sopenharmony_ci                if (bi_is_word_equiv(src, instr->src[t]))
895bf215546Sopenharmony_ci                        return false;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci        return true;
898bf215546Sopenharmony_ci}
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci/* Given two tuples in source order, count the number of register reads of the
901bf215546Sopenharmony_ci * successor, determined as the number of unique words accessed that aren't
902bf215546Sopenharmony_ci * written by the predecessor (since those are tempable).
903bf215546Sopenharmony_ci */
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_cistatic unsigned
906bf215546Sopenharmony_cibi_count_succ_reads(bi_index t0, bi_index t1,
907bf215546Sopenharmony_ci                bi_index *succ_reads, unsigned nr_succ_reads)
908bf215546Sopenharmony_ci{
909bf215546Sopenharmony_ci        unsigned reads = 0;
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci        for (unsigned i = 0; i < nr_succ_reads; ++i) {
912bf215546Sopenharmony_ci                bool unique = true;
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci                for (unsigned j = 0; j < i; ++j)
915bf215546Sopenharmony_ci                        if (bi_is_word_equiv(succ_reads[i], succ_reads[j]))
916bf215546Sopenharmony_ci                                unique = false;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci                if (!unique)
919bf215546Sopenharmony_ci                        continue;
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_ci                if (bi_is_word_equiv(succ_reads[i], t0))
922bf215546Sopenharmony_ci                        continue;
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci                if (bi_is_word_equiv(succ_reads[i], t1))
925bf215546Sopenharmony_ci                        continue;
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci                reads++;
928bf215546Sopenharmony_ci        }
929bf215546Sopenharmony_ci
930bf215546Sopenharmony_ci        return reads;
931bf215546Sopenharmony_ci}
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci/* Not all instructions can read from the staging passthrough (as determined by
934bf215546Sopenharmony_ci * reads_t), check if a given pair of instructions has such a restriction. Note
935bf215546Sopenharmony_ci * we also use this mechanism to prevent data races around staging register
936bf215546Sopenharmony_ci * reads, so we allow the input source to potentially be vector-valued */
937bf215546Sopenharmony_ci
938bf215546Sopenharmony_cistatic bool
939bf215546Sopenharmony_cibi_has_staging_passthrough_hazard(bi_index fma, bi_instr *add)
940bf215546Sopenharmony_ci{
941bf215546Sopenharmony_ci        bi_foreach_src(add, s) {
942bf215546Sopenharmony_ci                bi_index src = add->src[s];
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci                if (src.type != BI_INDEX_REGISTER)
945bf215546Sopenharmony_ci                        continue;
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci                unsigned count = bi_count_read_registers(add, s);
948bf215546Sopenharmony_ci                bool read = false;
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci                for (unsigned d = 0; d < count; ++d)
951bf215546Sopenharmony_ci                        read |= bi_is_equiv(fma, bi_register(src.value + d));
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci                if (read && !bi_reads_t(add, s))
954bf215546Sopenharmony_ci                        return true;
955bf215546Sopenharmony_ci        }
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_ci        return false;
958bf215546Sopenharmony_ci}
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci/* Likewise for cross-tuple passthrough (reads_temps) */
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_cistatic bool
963bf215546Sopenharmony_cibi_has_cross_passthrough_hazard(bi_tuple *succ, bi_instr *ins)
964bf215546Sopenharmony_ci{
965bf215546Sopenharmony_ci        bi_foreach_instr_in_tuple(succ, pins) {
966bf215546Sopenharmony_ci                bi_foreach_src(pins, s) {
967bf215546Sopenharmony_ci                        if (bi_is_word_equiv(ins->dest[0], pins->src[s]) &&
968bf215546Sopenharmony_ci                                        !bi_reads_temps(pins, s))
969bf215546Sopenharmony_ci                                return true;
970bf215546Sopenharmony_ci                }
971bf215546Sopenharmony_ci        }
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci        return false;
974bf215546Sopenharmony_ci}
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci/* Is a register written other than the staging mechanism? ATEST is special,
977bf215546Sopenharmony_ci * writing to both a staging register and a regular register (fixed packing).
978bf215546Sopenharmony_ci * BLEND is special since it has to write r48 the normal way even if it never
979bf215546Sopenharmony_ci * gets read. This depends on liveness analysis, as a register is not needed
980bf215546Sopenharmony_ci * for a write that will be discarded after one tuple. */
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_cistatic unsigned
983bf215546Sopenharmony_cibi_write_count(bi_instr *instr, uint64_t live_after_temp)
984bf215546Sopenharmony_ci{
985bf215546Sopenharmony_ci        if (instr->op == BI_OPCODE_ATEST || instr->op == BI_OPCODE_BLEND)
986bf215546Sopenharmony_ci                return 1;
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_ci        unsigned count = 0;
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_ci        bi_foreach_dest(instr, d) {
991bf215546Sopenharmony_ci                if (d == 0 && bi_opcode_props[instr->op].sr_write)
992bf215546Sopenharmony_ci                        continue;
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_ci                if (bi_is_null(instr->dest[d]))
995bf215546Sopenharmony_ci                        continue;
996bf215546Sopenharmony_ci
997bf215546Sopenharmony_ci                assert(instr->dest[0].type == BI_INDEX_REGISTER);
998bf215546Sopenharmony_ci                if (live_after_temp & BITFIELD64_BIT(instr->dest[0].value))
999bf215546Sopenharmony_ci                        count++;
1000bf215546Sopenharmony_ci        }
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci        return count;
1003bf215546Sopenharmony_ci}
1004bf215546Sopenharmony_ci
1005bf215546Sopenharmony_ci/*
1006bf215546Sopenharmony_ci * Test if an instruction required flush-to-zero mode. Currently only supported
1007bf215546Sopenharmony_ci * for f16<-->f32 conversions to implement fquantize16
1008bf215546Sopenharmony_ci */
1009bf215546Sopenharmony_cistatic bool
1010bf215546Sopenharmony_cibi_needs_ftz(bi_instr *I)
1011bf215546Sopenharmony_ci{
1012bf215546Sopenharmony_ci        return (I->op == BI_OPCODE_F16_TO_F32 ||
1013bf215546Sopenharmony_ci                I->op == BI_OPCODE_V2F32_TO_V2F16) && I->ftz;
1014bf215546Sopenharmony_ci}
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci/*
1017bf215546Sopenharmony_ci * Test if an instruction would be numerically incompatible with the clause. At
1018bf215546Sopenharmony_ci * present we only consider flush-to-zero modes.
1019bf215546Sopenharmony_ci */
1020bf215546Sopenharmony_cistatic bool
1021bf215546Sopenharmony_cibi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr)
1022bf215546Sopenharmony_ci{
1023bf215546Sopenharmony_ci        return (clause->ftz != BI_FTZ_STATE_NONE) &&
1024bf215546Sopenharmony_ci               ((clause->ftz == BI_FTZ_STATE_ENABLE) != bi_needs_ftz(instr));
1025bf215546Sopenharmony_ci}
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci/* Instruction placement entails two questions: what subset of instructions in
1028bf215546Sopenharmony_ci * the block can legally be scheduled? and of those which is the best? That is,
1029bf215546Sopenharmony_ci * we seek to maximize a cost function on a subset of the worklist satisfying a
1030bf215546Sopenharmony_ci * particular predicate. The necessary predicate is determined entirely by
1031bf215546Sopenharmony_ci * Bifrost's architectural limitations and is described in the accompanying
1032bf215546Sopenharmony_ci * whitepaper. The cost function is a heuristic. */
1033bf215546Sopenharmony_ci
1034bf215546Sopenharmony_cistatic bool
1035bf215546Sopenharmony_cibi_instr_schedulable(bi_instr *instr,
1036bf215546Sopenharmony_ci                struct bi_clause_state *clause,
1037bf215546Sopenharmony_ci                struct bi_tuple_state *tuple,
1038bf215546Sopenharmony_ci                uint64_t live_after_temp,
1039bf215546Sopenharmony_ci                bool fma)
1040bf215546Sopenharmony_ci{
1041bf215546Sopenharmony_ci        /* The units must match */
1042bf215546Sopenharmony_ci        if ((fma && !bi_can_fma(instr)) || (!fma && !bi_can_add(instr)))
1043bf215546Sopenharmony_ci                return false;
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci        /* There can only be one message-passing instruction per clause */
1046bf215546Sopenharmony_ci        if (bi_must_message(instr) && clause->message)
1047bf215546Sopenharmony_ci                return false;
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_ci        /* Some instructions have placement requirements */
1050bf215546Sopenharmony_ci        if (bi_opcode_props[instr->op].last && !tuple->last)
1051bf215546Sopenharmony_ci                return false;
1052bf215546Sopenharmony_ci
1053bf215546Sopenharmony_ci        if (bi_must_not_last(instr) && tuple->last)
1054bf215546Sopenharmony_ci                return false;
1055bf215546Sopenharmony_ci
1056bf215546Sopenharmony_ci        /* Numerical properties must be compatible with the clause */
1057bf215546Sopenharmony_ci        if (bi_numerically_incompatible(clause, instr))
1058bf215546Sopenharmony_ci                return false;
1059bf215546Sopenharmony_ci
1060bf215546Sopenharmony_ci        /* Message-passing instructions are not guaranteed write within the
1061bf215546Sopenharmony_ci         * same clause (most likely they will not), so if a later instruction
1062bf215546Sopenharmony_ci         * in the clause accesses the destination, the message-passing
1063bf215546Sopenharmony_ci         * instruction can't be scheduled */
1064bf215546Sopenharmony_ci        if (bi_opcode_props[instr->op].sr_write) {
1065bf215546Sopenharmony_ci                bi_foreach_dest(instr, d) {
1066bf215546Sopenharmony_ci                        if (bi_is_null(instr->dest[d]))
1067bf215546Sopenharmony_ci                                continue;
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_ci                        unsigned nr = bi_count_write_registers(instr, d);
1070bf215546Sopenharmony_ci                        assert(instr->dest[d].type == BI_INDEX_REGISTER);
1071bf215546Sopenharmony_ci                        unsigned reg = instr->dest[d].value;
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci                        for (unsigned i = 0; i < clause->access_count; ++i) {
1074bf215546Sopenharmony_ci                                bi_index idx = clause->accesses[i];
1075bf215546Sopenharmony_ci                                for (unsigned d = 0; d < nr; ++d) {
1076bf215546Sopenharmony_ci                                        if (bi_is_equiv(bi_register(reg + d), idx))
1077bf215546Sopenharmony_ci                                                return false;
1078bf215546Sopenharmony_ci                                }
1079bf215546Sopenharmony_ci                        }
1080bf215546Sopenharmony_ci                }
1081bf215546Sopenharmony_ci        }
1082bf215546Sopenharmony_ci
1083bf215546Sopenharmony_ci        if (bi_opcode_props[instr->op].sr_read && !bi_is_null(instr->src[0])) {
1084bf215546Sopenharmony_ci                unsigned nr = bi_count_read_registers(instr, 0);
1085bf215546Sopenharmony_ci                assert(instr->src[0].type == BI_INDEX_REGISTER);
1086bf215546Sopenharmony_ci                unsigned reg = instr->src[0].value;
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci                for (unsigned i = 0; i < clause->access_count; ++i) {
1089bf215546Sopenharmony_ci                        bi_index idx = clause->accesses[i];
1090bf215546Sopenharmony_ci                        for (unsigned d = 0; d < nr; ++d) {
1091bf215546Sopenharmony_ci                                if (bi_is_equiv(bi_register(reg + d), idx))
1092bf215546Sopenharmony_ci                                        return false;
1093bf215546Sopenharmony_ci                        }
1094bf215546Sopenharmony_ci                }
1095bf215546Sopenharmony_ci        }
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_ci        /* If FAU is already assigned, we may not disrupt that. Do a
1098bf215546Sopenharmony_ci         * non-disruptive test update */
1099bf215546Sopenharmony_ci        if (!bi_update_fau(clause, tuple, instr, fma, false))
1100bf215546Sopenharmony_ci                return false;
1101bf215546Sopenharmony_ci
1102bf215546Sopenharmony_ci        /* If this choice of FMA would force a staging passthrough, the ADD
1103bf215546Sopenharmony_ci         * instruction must support such a passthrough */
1104bf215546Sopenharmony_ci        if (tuple->add && bi_has_staging_passthrough_hazard(instr->dest[0], tuple->add))
1105bf215546Sopenharmony_ci                return false;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci        /* If this choice of destination would force a cross-tuple passthrough, the next tuple must support that */
1108bf215546Sopenharmony_ci        if (tuple->prev && bi_has_cross_passthrough_hazard(tuple->prev, instr))
1109bf215546Sopenharmony_ci                return false;
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_ci        /* Register file writes are limited */
1112bf215546Sopenharmony_ci        unsigned total_writes = tuple->reg.nr_writes;
1113bf215546Sopenharmony_ci        total_writes += bi_write_count(instr, live_after_temp);
1114bf215546Sopenharmony_ci
1115bf215546Sopenharmony_ci        /* Last tuple in a clause can only write a single value */
1116bf215546Sopenharmony_ci        if (tuple->last && total_writes > 1)
1117bf215546Sopenharmony_ci                return false;
1118bf215546Sopenharmony_ci
1119bf215546Sopenharmony_ci        /* Register file reads are limited, so count unique */
1120bf215546Sopenharmony_ci
1121bf215546Sopenharmony_ci        unsigned unique_new_srcs = 0;
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci        bi_foreach_src(instr, s) {
1124bf215546Sopenharmony_ci                if (bi_tuple_is_new_src(instr, &tuple->reg, s))
1125bf215546Sopenharmony_ci                        unique_new_srcs++;
1126bf215546Sopenharmony_ci        }
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci        unsigned total_srcs = tuple->reg.nr_reads + unique_new_srcs;
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci        bool can_spill_to_moves = (!tuple->add);
1131bf215546Sopenharmony_ci        can_spill_to_moves &= (bi_nconstants(clause) < 13 - (clause->tuple_count + 2));
1132bf215546Sopenharmony_ci        can_spill_to_moves &= (clause->tuple_count < 7);
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_ci        /* However, we can get an extra 1 or 2 sources by inserting moves */
1135bf215546Sopenharmony_ci        if (total_srcs > (can_spill_to_moves ? 4 : 3))
1136bf215546Sopenharmony_ci                return false;
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci        /* Count effective reads for the successor */
1139bf215546Sopenharmony_ci        unsigned succ_reads = bi_count_succ_reads(instr->dest[0],
1140bf215546Sopenharmony_ci                        tuple->add ? tuple->add->dest[0] : bi_null(),
1141bf215546Sopenharmony_ci                        tuple->prev_reads, tuple->nr_prev_reads);
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ci        /* Successor must satisfy R+W <= 4, so we require W <= 4-R */
1144bf215546Sopenharmony_ci        if ((signed) total_writes > (4 - (signed) succ_reads))
1145bf215546Sopenharmony_ci                return false;
1146bf215546Sopenharmony_ci
1147bf215546Sopenharmony_ci        return true;
1148bf215546Sopenharmony_ci}
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_cistatic signed
1151bf215546Sopenharmony_cibi_instr_cost(bi_instr *instr, struct bi_tuple_state *tuple)
1152bf215546Sopenharmony_ci{
1153bf215546Sopenharmony_ci        signed cost = 0;
1154bf215546Sopenharmony_ci
1155bf215546Sopenharmony_ci        /* Instructions that can schedule to either FMA or to ADD should be
1156bf215546Sopenharmony_ci         * deprioritized since they're easier to reschedule elsewhere */
1157bf215546Sopenharmony_ci        if (bi_can_fma(instr) && bi_can_add(instr))
1158bf215546Sopenharmony_ci                cost++;
1159bf215546Sopenharmony_ci
1160bf215546Sopenharmony_ci        /* Message-passing instructions impose constraints on the registers
1161bf215546Sopenharmony_ci         * later in the clause, so schedule them as late within a clause as
1162bf215546Sopenharmony_ci         * possible (<==> prioritize them since we're backwards <==> decrease
1163bf215546Sopenharmony_ci         * cost) */
1164bf215546Sopenharmony_ci        if (bi_must_message(instr))
1165bf215546Sopenharmony_ci                cost--;
1166bf215546Sopenharmony_ci
1167bf215546Sopenharmony_ci        /* Last instructions are big constraints (XXX: no effect on shader-db) */
1168bf215546Sopenharmony_ci        if (bi_opcode_props[instr->op].last)
1169bf215546Sopenharmony_ci                cost -= 2;
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci        return cost;
1172bf215546Sopenharmony_ci}
1173bf215546Sopenharmony_ci
1174bf215546Sopenharmony_cistatic unsigned
1175bf215546Sopenharmony_cibi_choose_index(struct bi_worklist st,
1176bf215546Sopenharmony_ci                struct bi_clause_state *clause,
1177bf215546Sopenharmony_ci                struct bi_tuple_state *tuple,
1178bf215546Sopenharmony_ci                uint64_t live_after_temp,
1179bf215546Sopenharmony_ci                bool fma)
1180bf215546Sopenharmony_ci{
1181bf215546Sopenharmony_ci        unsigned i, best_idx = ~0;
1182bf215546Sopenharmony_ci        signed best_cost = INT_MAX;
1183bf215546Sopenharmony_ci
1184bf215546Sopenharmony_ci        BITSET_FOREACH_SET(i, st.worklist, st.count) {
1185bf215546Sopenharmony_ci                bi_instr *instr = st.instructions[i];
1186bf215546Sopenharmony_ci
1187bf215546Sopenharmony_ci                if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma))
1188bf215546Sopenharmony_ci                        continue;
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_ci                signed cost = bi_instr_cost(instr, tuple);
1191bf215546Sopenharmony_ci
1192bf215546Sopenharmony_ci                /* Tie break in favour of later instructions, under the
1193bf215546Sopenharmony_ci                 * assumption this promotes temporary usage (reducing pressure
1194bf215546Sopenharmony_ci                 * on the register file). This is a side effect of a prepass
1195bf215546Sopenharmony_ci                 * scheduling for pressure. */
1196bf215546Sopenharmony_ci
1197bf215546Sopenharmony_ci                if (cost <= best_cost) {
1198bf215546Sopenharmony_ci                        best_idx = i;
1199bf215546Sopenharmony_ci                        best_cost = cost;
1200bf215546Sopenharmony_ci                }
1201bf215546Sopenharmony_ci        }
1202bf215546Sopenharmony_ci
1203bf215546Sopenharmony_ci        return best_idx;
1204bf215546Sopenharmony_ci}
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_cistatic void
1207bf215546Sopenharmony_cibi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple,
1208bf215546Sopenharmony_ci                bi_instr *instr, uint64_t live_after_temp, bool fma)
1209bf215546Sopenharmony_ci{
1210bf215546Sopenharmony_ci        bi_update_fau(clause, tuple, instr, fma, true);
1211bf215546Sopenharmony_ci
1212bf215546Sopenharmony_ci        /* TODO: maybe opt a bit? or maybe doesn't matter */
1213bf215546Sopenharmony_ci        assert(clause->access_count + BI_MAX_SRCS + BI_MAX_DESTS <= ARRAY_SIZE(clause->accesses));
1214bf215546Sopenharmony_ci        memcpy(clause->accesses + clause->access_count, instr->src, sizeof(instr->src));
1215bf215546Sopenharmony_ci        clause->access_count += BI_MAX_SRCS;
1216bf215546Sopenharmony_ci        memcpy(clause->accesses + clause->access_count, instr->dest, sizeof(instr->dest));
1217bf215546Sopenharmony_ci        clause->access_count += BI_MAX_DESTS;
1218bf215546Sopenharmony_ci        tuple->reg.nr_writes += bi_write_count(instr, live_after_temp);
1219bf215546Sopenharmony_ci
1220bf215546Sopenharmony_ci        bi_foreach_src(instr, s) {
1221bf215546Sopenharmony_ci                if (bi_tuple_is_new_src(instr, &tuple->reg, s))
1222bf215546Sopenharmony_ci                        tuple->reg.reads[tuple->reg.nr_reads++] = instr->src[s];
1223bf215546Sopenharmony_ci        }
1224bf215546Sopenharmony_ci
1225bf215546Sopenharmony_ci        /* This could be optimized to allow pairing integer instructions with
1226bf215546Sopenharmony_ci         * special flush-to-zero instructions, but punting on this until we have
1227bf215546Sopenharmony_ci         * a workload that cares.
1228bf215546Sopenharmony_ci         */
1229bf215546Sopenharmony_ci        clause->ftz = bi_needs_ftz(instr) ? BI_FTZ_STATE_ENABLE :
1230bf215546Sopenharmony_ci                                            BI_FTZ_STATE_DISABLE;
1231bf215546Sopenharmony_ci}
1232bf215546Sopenharmony_ci
1233bf215546Sopenharmony_ci/* Choose the best instruction and pop it off the worklist. Returns NULL if no
1234bf215546Sopenharmony_ci * instruction is available. This function is destructive. */
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_cistatic bi_instr *
1237bf215546Sopenharmony_cibi_take_instr(bi_context *ctx, struct bi_worklist st,
1238bf215546Sopenharmony_ci                struct bi_clause_state *clause,
1239bf215546Sopenharmony_ci                struct bi_tuple_state *tuple,
1240bf215546Sopenharmony_ci                uint64_t live_after_temp,
1241bf215546Sopenharmony_ci                bool fma)
1242bf215546Sopenharmony_ci{
1243bf215546Sopenharmony_ci        if (tuple->add && tuple->add->op == BI_OPCODE_CUBEFACE)
1244bf215546Sopenharmony_ci                return bi_lower_cubeface(ctx, clause, tuple);
1245bf215546Sopenharmony_ci        else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM_RETURN_I32)
1246bf215546Sopenharmony_ci                return bi_lower_atom_c(ctx, clause, tuple);
1247bf215546Sopenharmony_ci        else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM1_RETURN_I32)
1248bf215546Sopenharmony_ci                return bi_lower_atom_c1(ctx, clause, tuple);
1249bf215546Sopenharmony_ci        else if (tuple->add && tuple->add->op == BI_OPCODE_SEG_ADD_I64)
1250bf215546Sopenharmony_ci                return bi_lower_seg_add(ctx, clause, tuple);
1251bf215546Sopenharmony_ci        else if (tuple->add && tuple->add->table)
1252bf215546Sopenharmony_ci                return bi_lower_dtsel(ctx, clause, tuple);
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci        /* TODO: Optimize these moves */
1255bf215546Sopenharmony_ci        if (!fma && tuple->nr_prev_reads > 3) {
1256bf215546Sopenharmony_ci                /* Only spill by one source for now */
1257bf215546Sopenharmony_ci                assert(tuple->nr_prev_reads == 4);
1258bf215546Sopenharmony_ci
1259bf215546Sopenharmony_ci                /* Pick a source to spill */
1260bf215546Sopenharmony_ci                bi_index src = tuple->prev_reads[0];
1261bf215546Sopenharmony_ci
1262bf215546Sopenharmony_ci                /* Schedule the spill */
1263bf215546Sopenharmony_ci                bi_builder b = bi_init_builder(ctx, bi_before_tuple(tuple->prev));
1264bf215546Sopenharmony_ci                bi_instr *mov = bi_mov_i32_to(&b, src, src);
1265bf215546Sopenharmony_ci                bi_pop_instr(clause, tuple, mov, live_after_temp, fma);
1266bf215546Sopenharmony_ci                return mov;
1267bf215546Sopenharmony_ci        }
1268bf215546Sopenharmony_ci
1269bf215546Sopenharmony_ci#ifndef NDEBUG
1270bf215546Sopenharmony_ci        /* Don't pair instructions if debugging */
1271bf215546Sopenharmony_ci        if ((bifrost_debug & BIFROST_DBG_NOSCHED) && tuple->add)
1272bf215546Sopenharmony_ci                return NULL;
1273bf215546Sopenharmony_ci#endif
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_ci        unsigned idx = bi_choose_index(st, clause, tuple, live_after_temp, fma);
1276bf215546Sopenharmony_ci
1277bf215546Sopenharmony_ci        if (idx >= st.count)
1278bf215546Sopenharmony_ci                return NULL;
1279bf215546Sopenharmony_ci
1280bf215546Sopenharmony_ci        /* Update state to reflect taking the instruction */
1281bf215546Sopenharmony_ci        bi_instr *instr = st.instructions[idx];
1282bf215546Sopenharmony_ci
1283bf215546Sopenharmony_ci        BITSET_CLEAR(st.worklist, idx);
1284bf215546Sopenharmony_ci        bi_update_worklist(st, idx);
1285bf215546Sopenharmony_ci        bi_pop_instr(clause, tuple, instr, live_after_temp, fma);
1286bf215546Sopenharmony_ci
1287bf215546Sopenharmony_ci        /* Fixups */
1288bf215546Sopenharmony_ci        if (instr->op == BI_OPCODE_IADD_U32 && fma) {
1289bf215546Sopenharmony_ci                assert(bi_can_iaddc(instr));
1290bf215546Sopenharmony_ci                instr->op = BI_OPCODE_IADDC_I32;
1291bf215546Sopenharmony_ci                instr->src[2] = bi_zero();
1292bf215546Sopenharmony_ci        } else if (fma && bi_can_replace_with_csel(instr)) {
1293bf215546Sopenharmony_ci                bi_replace_mux_with_csel(instr, false);
1294bf215546Sopenharmony_ci        }
1295bf215546Sopenharmony_ci
1296bf215546Sopenharmony_ci        return instr;
1297bf215546Sopenharmony_ci}
1298bf215546Sopenharmony_ci
1299bf215546Sopenharmony_ci/* Variant of bi_rewrite_index_src_single that uses word-equivalence, rewriting
1300bf215546Sopenharmony_ci * to a passthrough register. If except_sr is true, the staging sources are
1301bf215546Sopenharmony_ci * skipped, so staging register reads are not accidentally encoded as
1302bf215546Sopenharmony_ci * passthrough (which is impossible) */
1303bf215546Sopenharmony_ci
1304bf215546Sopenharmony_cistatic void
1305bf215546Sopenharmony_cibi_use_passthrough(bi_instr *ins, bi_index old,
1306bf215546Sopenharmony_ci                enum bifrost_packed_src new,
1307bf215546Sopenharmony_ci                bool except_sr)
1308bf215546Sopenharmony_ci{
1309bf215546Sopenharmony_ci        /* Optional for convenience */
1310bf215546Sopenharmony_ci        if (!ins || bi_is_null(old))
1311bf215546Sopenharmony_ci                return;
1312bf215546Sopenharmony_ci
1313bf215546Sopenharmony_ci        bi_foreach_src(ins, i) {
1314bf215546Sopenharmony_ci                if ((i == 0 || i == 4) && except_sr)
1315bf215546Sopenharmony_ci                        continue;
1316bf215546Sopenharmony_ci
1317bf215546Sopenharmony_ci                if (bi_is_word_equiv(ins->src[i], old)) {
1318bf215546Sopenharmony_ci                        ins->src[i].type = BI_INDEX_PASS;
1319bf215546Sopenharmony_ci                        ins->src[i].value = new;
1320bf215546Sopenharmony_ci                        ins->src[i].reg = false;
1321bf215546Sopenharmony_ci                        ins->src[i].offset = 0;
1322bf215546Sopenharmony_ci                }
1323bf215546Sopenharmony_ci        }
1324bf215546Sopenharmony_ci}
1325bf215546Sopenharmony_ci
1326bf215546Sopenharmony_ci/* Rewrites an adjacent pair of tuples _prec_eding and _succ_eding to use
1327bf215546Sopenharmony_ci * intertuple passthroughs where necessary. Passthroughs are allowed as a
1328bf215546Sopenharmony_ci * post-condition of scheduling. Note we rewrite ADD first, FMA second --
1329bf215546Sopenharmony_ci * opposite the order of execution. This is deliberate -- if both FMA and ADD
1330bf215546Sopenharmony_ci * write to the same logical register, the next executed tuple will get the
1331bf215546Sopenharmony_ci * latter result. There's no interference issue under the assumption of correct
1332bf215546Sopenharmony_ci * register allocation. */
1333bf215546Sopenharmony_ci
1334bf215546Sopenharmony_cistatic void
1335bf215546Sopenharmony_cibi_rewrite_passthrough(bi_tuple prec, bi_tuple succ)
1336bf215546Sopenharmony_ci{
1337bf215546Sopenharmony_ci        bool sr_read = succ.add ? bi_opcode_props[succ.add->op].sr_read : false;
1338bf215546Sopenharmony_ci
1339bf215546Sopenharmony_ci        if (prec.add) {
1340bf215546Sopenharmony_ci                bi_use_passthrough(succ.fma, prec.add->dest[0], BIFROST_SRC_PASS_ADD, false);
1341bf215546Sopenharmony_ci                bi_use_passthrough(succ.add, prec.add->dest[0], BIFROST_SRC_PASS_ADD, sr_read);
1342bf215546Sopenharmony_ci        }
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci        if (prec.fma) {
1345bf215546Sopenharmony_ci                bi_use_passthrough(succ.fma, prec.fma->dest[0], BIFROST_SRC_PASS_FMA, false);
1346bf215546Sopenharmony_ci                bi_use_passthrough(succ.add, prec.fma->dest[0], BIFROST_SRC_PASS_FMA, sr_read);
1347bf215546Sopenharmony_ci        }
1348bf215546Sopenharmony_ci}
1349bf215546Sopenharmony_ci
1350bf215546Sopenharmony_cistatic void
1351bf215546Sopenharmony_cibi_rewrite_fau_to_pass(bi_tuple *tuple)
1352bf215546Sopenharmony_ci{
1353bf215546Sopenharmony_ci        bi_foreach_instr_and_src_in_tuple(tuple, ins, s) {
1354bf215546Sopenharmony_ci                if (ins->src[s].type != BI_INDEX_FAU) continue;
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci                bi_index pass = bi_passthrough(ins->src[s].offset ?
1357bf215546Sopenharmony_ci                                BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO);
1358bf215546Sopenharmony_ci
1359bf215546Sopenharmony_ci                ins->src[s] = bi_replace_index(ins->src[s], pass);
1360bf215546Sopenharmony_ci        }
1361bf215546Sopenharmony_ci}
1362bf215546Sopenharmony_ci
1363bf215546Sopenharmony_cistatic void
1364bf215546Sopenharmony_cibi_rewrite_zero(bi_instr *ins, bool fma)
1365bf215546Sopenharmony_ci{
1366bf215546Sopenharmony_ci        bi_index zero = bi_passthrough(fma ? BIFROST_SRC_STAGE : BIFROST_SRC_FAU_LO);
1367bf215546Sopenharmony_ci
1368bf215546Sopenharmony_ci        bi_foreach_src(ins, s) {
1369bf215546Sopenharmony_ci                bi_index src = ins->src[s];
1370bf215546Sopenharmony_ci
1371bf215546Sopenharmony_ci                if (src.type == BI_INDEX_CONSTANT && src.value == 0)
1372bf215546Sopenharmony_ci                        ins->src[s] = bi_replace_index(src, zero);
1373bf215546Sopenharmony_ci        }
1374bf215546Sopenharmony_ci}
1375bf215546Sopenharmony_ci
1376bf215546Sopenharmony_ci/* Assumes #0 to {T, FAU} rewrite has already occurred */
1377bf215546Sopenharmony_ci
1378bf215546Sopenharmony_cistatic void
1379bf215546Sopenharmony_cibi_rewrite_constants_to_pass(bi_tuple *tuple, uint64_t constant, bool pcrel)
1380bf215546Sopenharmony_ci{
1381bf215546Sopenharmony_ci        bi_foreach_instr_and_src_in_tuple(tuple, ins, s) {
1382bf215546Sopenharmony_ci                if (ins->src[s].type != BI_INDEX_CONSTANT) continue;
1383bf215546Sopenharmony_ci
1384bf215546Sopenharmony_ci                uint32_t cons = ins->src[s].value;
1385bf215546Sopenharmony_ci
1386bf215546Sopenharmony_ci                ASSERTED bool lo = (cons == (constant & 0xffffffff));
1387bf215546Sopenharmony_ci                bool hi = (cons == (constant >> 32ull));
1388bf215546Sopenharmony_ci
1389bf215546Sopenharmony_ci                /* PC offsets always live in the upper half, set to zero by
1390bf215546Sopenharmony_ci                 * convention before pack time. (This is safe, since if you
1391bf215546Sopenharmony_ci                 * wanted to compare against zero, you would use a BRANCHZ
1392bf215546Sopenharmony_ci                 * instruction instead.) */
1393bf215546Sopenharmony_ci                if (cons == 0 && ins->branch_target != NULL) {
1394bf215546Sopenharmony_ci                        assert(pcrel);
1395bf215546Sopenharmony_ci                        hi = true;
1396bf215546Sopenharmony_ci                        lo = false;
1397bf215546Sopenharmony_ci                } else if (pcrel) {
1398bf215546Sopenharmony_ci                        hi = false;
1399bf215546Sopenharmony_ci                }
1400bf215546Sopenharmony_ci
1401bf215546Sopenharmony_ci                assert(lo || hi);
1402bf215546Sopenharmony_ci
1403bf215546Sopenharmony_ci                ins->src[s] = bi_replace_index(ins->src[s],
1404bf215546Sopenharmony_ci                                bi_passthrough(hi ?  BIFROST_SRC_FAU_HI :
1405bf215546Sopenharmony_ci                                        BIFROST_SRC_FAU_LO));
1406bf215546Sopenharmony_ci        }
1407bf215546Sopenharmony_ci}
1408bf215546Sopenharmony_ci
1409bf215546Sopenharmony_ci/* Constructs a constant state given a tuple state. This has the
1410bf215546Sopenharmony_ci * postcondition that pcrel applies to the first constant by convention,
1411bf215546Sopenharmony_ci * and PC-relative constants will be #0 by convention here, so swap to
1412bf215546Sopenharmony_ci * match if needed */
1413bf215546Sopenharmony_ci
1414bf215546Sopenharmony_cistatic struct bi_const_state
1415bf215546Sopenharmony_cibi_get_const_state(struct bi_tuple_state *tuple)
1416bf215546Sopenharmony_ci{
1417bf215546Sopenharmony_ci        struct bi_const_state consts = {
1418bf215546Sopenharmony_ci                .constant_count = tuple->constant_count,
1419bf215546Sopenharmony_ci                .constants[0] = tuple->constants[0],
1420bf215546Sopenharmony_ci                .constants[1] = tuple->constants[1],
1421bf215546Sopenharmony_ci                .pcrel = tuple->add && tuple->add->branch_target,
1422bf215546Sopenharmony_ci        };
1423bf215546Sopenharmony_ci
1424bf215546Sopenharmony_ci        /* pcrel applies to the first constant by convention, and
1425bf215546Sopenharmony_ci         * PC-relative constants will be #0 by convention here, so swap
1426bf215546Sopenharmony_ci         * to match if needed */
1427bf215546Sopenharmony_ci        if (consts.pcrel && consts.constants[0]) {
1428bf215546Sopenharmony_ci                assert(consts.constant_count == 2);
1429bf215546Sopenharmony_ci                assert(consts.constants[1] == 0);
1430bf215546Sopenharmony_ci
1431bf215546Sopenharmony_ci                consts.constants[1] = consts.constants[0];
1432bf215546Sopenharmony_ci                consts.constants[0] = 0;
1433bf215546Sopenharmony_ci        }
1434bf215546Sopenharmony_ci
1435bf215546Sopenharmony_ci        return consts;
1436bf215546Sopenharmony_ci}
1437bf215546Sopenharmony_ci
1438bf215546Sopenharmony_ci/* Merges constants in a clause, satisfying the following rules, assuming no
1439bf215546Sopenharmony_ci * more than one tuple has pcrel:
1440bf215546Sopenharmony_ci *
1441bf215546Sopenharmony_ci * 1. If a tuple has two constants, they must be packed together. If one is
1442bf215546Sopenharmony_ci * pcrel, it must be the high constant to use the M1=4 modification [sx64(E0) +
1443bf215546Sopenharmony_ci * (PC << 32)]. Otherwise choose an arbitrary order.
1444bf215546Sopenharmony_ci *
1445bf215546Sopenharmony_ci * 4. If a tuple has one constant, it may be shared with an existing
1446bf215546Sopenharmony_ci * pair that already contains that constant, or it may be combined with another
1447bf215546Sopenharmony_ci * (distinct) tuple of a single constant.
1448bf215546Sopenharmony_ci *
1449bf215546Sopenharmony_ci * This gaurantees a packing is possible. The next routine handles modification
1450bf215546Sopenharmony_ci * related swapping, to satisfy format 12 and the lack of modification for
1451bf215546Sopenharmony_ci * tuple count 5/8 in EC0.
1452bf215546Sopenharmony_ci */
1453bf215546Sopenharmony_ci
1454bf215546Sopenharmony_cistatic uint64_t
1455bf215546Sopenharmony_cibi_merge_u32(uint32_t c0, uint32_t c1, bool pcrel)
1456bf215546Sopenharmony_ci{
1457bf215546Sopenharmony_ci        /* At this point in the constant merge algorithm, pcrel constants are
1458bf215546Sopenharmony_ci         * treated as zero, so pcrel implies at least one constants is zero */
1459bf215546Sopenharmony_ci        assert(!pcrel || (c0 == 0 || c1 == 0));
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_ci        /* Order: pcrel, maximum non-pcrel, minimum non-pcrel */
1462bf215546Sopenharmony_ci        uint32_t hi = pcrel ? 0 : MAX2(c0, c1);
1463bf215546Sopenharmony_ci        uint32_t lo = (c0 == hi) ? c1 : c0;
1464bf215546Sopenharmony_ci
1465bf215546Sopenharmony_ci        /* Merge in the selected order */
1466bf215546Sopenharmony_ci        return lo | (((uint64_t) hi) << 32ull);
1467bf215546Sopenharmony_ci}
1468bf215546Sopenharmony_ci
1469bf215546Sopenharmony_cistatic unsigned
1470bf215546Sopenharmony_cibi_merge_pairs(struct bi_const_state *consts, unsigned tuple_count,
1471bf215546Sopenharmony_ci                uint64_t *merged, unsigned *pcrel_pair)
1472bf215546Sopenharmony_ci{
1473bf215546Sopenharmony_ci        unsigned merge_count = 0;
1474bf215546Sopenharmony_ci
1475bf215546Sopenharmony_ci        for (unsigned t = 0; t < tuple_count; ++t) {
1476bf215546Sopenharmony_ci                if (consts[t].constant_count != 2) continue;
1477bf215546Sopenharmony_ci
1478bf215546Sopenharmony_ci                unsigned idx = ~0;
1479bf215546Sopenharmony_ci                uint64_t val = bi_merge_u32(consts[t].constants[0],
1480bf215546Sopenharmony_ci                                consts[t].constants[1], consts[t].pcrel);
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci                /* Skip the pcrel pair if assigned, because if one is assigned,
1483bf215546Sopenharmony_ci                 * this one is not pcrel by uniqueness so it's a mismatch */
1484bf215546Sopenharmony_ci                for (unsigned s = 0; s < merge_count; ++s) {
1485bf215546Sopenharmony_ci                        if (merged[s] == val && (*pcrel_pair) != s) {
1486bf215546Sopenharmony_ci                                idx = s;
1487bf215546Sopenharmony_ci                                break;
1488bf215546Sopenharmony_ci                        }
1489bf215546Sopenharmony_ci                }
1490bf215546Sopenharmony_ci
1491bf215546Sopenharmony_ci                if (idx == ~0) {
1492bf215546Sopenharmony_ci                        idx = merge_count++;
1493bf215546Sopenharmony_ci                        merged[idx] = val;
1494bf215546Sopenharmony_ci
1495bf215546Sopenharmony_ci                        if (consts[t].pcrel)
1496bf215546Sopenharmony_ci                                (*pcrel_pair) = idx;
1497bf215546Sopenharmony_ci                }
1498bf215546Sopenharmony_ci
1499bf215546Sopenharmony_ci                consts[t].word_idx = idx;
1500bf215546Sopenharmony_ci        }
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci        return merge_count;
1503bf215546Sopenharmony_ci}
1504bf215546Sopenharmony_ci
1505bf215546Sopenharmony_cistatic unsigned
1506bf215546Sopenharmony_cibi_merge_singles(struct bi_const_state *consts, unsigned tuple_count,
1507bf215546Sopenharmony_ci                uint64_t *pairs, unsigned pair_count, unsigned *pcrel_pair)
1508bf215546Sopenharmony_ci{
1509bf215546Sopenharmony_ci        bool pending = false, pending_pcrel = false;
1510bf215546Sopenharmony_ci        uint32_t pending_single = 0;
1511bf215546Sopenharmony_ci
1512bf215546Sopenharmony_ci        for (unsigned t = 0; t < tuple_count; ++t) {
1513bf215546Sopenharmony_ci                if (consts[t].constant_count != 1) continue;
1514bf215546Sopenharmony_ci
1515bf215546Sopenharmony_ci                uint32_t val = consts[t].constants[0];
1516bf215546Sopenharmony_ci                unsigned idx = ~0;
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci                /* Try to match, but don't match pcrel with non-pcrel, even
1519bf215546Sopenharmony_ci                 * though we can merge a pcrel with a non-pcrel single */
1520bf215546Sopenharmony_ci                for (unsigned i = 0; i < pair_count; ++i) {
1521bf215546Sopenharmony_ci                        bool lo = ((pairs[i] & 0xffffffff) == val);
1522bf215546Sopenharmony_ci                        bool hi = ((pairs[i] >> 32) == val);
1523bf215546Sopenharmony_ci                        bool match = (lo || hi);
1524bf215546Sopenharmony_ci                        match &= ((*pcrel_pair) != i);
1525bf215546Sopenharmony_ci                        if (match && !consts[t].pcrel) {
1526bf215546Sopenharmony_ci                                idx = i;
1527bf215546Sopenharmony_ci                                break;
1528bf215546Sopenharmony_ci                        }
1529bf215546Sopenharmony_ci                }
1530bf215546Sopenharmony_ci
1531bf215546Sopenharmony_ci                if (idx == ~0) {
1532bf215546Sopenharmony_ci                        idx = pair_count;
1533bf215546Sopenharmony_ci
1534bf215546Sopenharmony_ci                        if (pending && pending_single != val) {
1535bf215546Sopenharmony_ci                                assert(!(pending_pcrel && consts[t].pcrel));
1536bf215546Sopenharmony_ci                                bool pcrel = pending_pcrel || consts[t].pcrel;
1537bf215546Sopenharmony_ci
1538bf215546Sopenharmony_ci                                if (pcrel)
1539bf215546Sopenharmony_ci                                        *pcrel_pair = idx;
1540bf215546Sopenharmony_ci
1541bf215546Sopenharmony_ci                                pairs[pair_count++] = bi_merge_u32(pending_single, val, pcrel);
1542bf215546Sopenharmony_ci
1543bf215546Sopenharmony_ci                                pending = pending_pcrel = false;
1544bf215546Sopenharmony_ci                        } else {
1545bf215546Sopenharmony_ci                                pending = true;
1546bf215546Sopenharmony_ci                                pending_pcrel = consts[t].pcrel;
1547bf215546Sopenharmony_ci                                pending_single = val;
1548bf215546Sopenharmony_ci                        }
1549bf215546Sopenharmony_ci                }
1550bf215546Sopenharmony_ci
1551bf215546Sopenharmony_ci                consts[t].word_idx = idx;
1552bf215546Sopenharmony_ci        }
1553bf215546Sopenharmony_ci
1554bf215546Sopenharmony_ci        /* Shift so it works whether pending_pcrel is set or not */
1555bf215546Sopenharmony_ci        if (pending) {
1556bf215546Sopenharmony_ci                if (pending_pcrel)
1557bf215546Sopenharmony_ci                        *pcrel_pair = pair_count;
1558bf215546Sopenharmony_ci
1559bf215546Sopenharmony_ci                pairs[pair_count++] = ((uint64_t) pending_single) << 32ull;
1560bf215546Sopenharmony_ci        }
1561bf215546Sopenharmony_ci
1562bf215546Sopenharmony_ci        return pair_count;
1563bf215546Sopenharmony_ci}
1564bf215546Sopenharmony_ci
1565bf215546Sopenharmony_cistatic unsigned
1566bf215546Sopenharmony_cibi_merge_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned *pcrel_idx)
1567bf215546Sopenharmony_ci{
1568bf215546Sopenharmony_ci        unsigned pair_count = bi_merge_pairs(consts, 8, pairs, pcrel_idx);
1569bf215546Sopenharmony_ci        return bi_merge_singles(consts, 8, pairs, pair_count, pcrel_idx);
1570bf215546Sopenharmony_ci}
1571bf215546Sopenharmony_ci
1572bf215546Sopenharmony_ci/* Swap two constants at word i and i+1 by swapping their actual positions and
1573bf215546Sopenharmony_ci * swapping all references so the meaning of the clause is preserved */
1574bf215546Sopenharmony_ci
1575bf215546Sopenharmony_cistatic void
1576bf215546Sopenharmony_cibi_swap_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned i)
1577bf215546Sopenharmony_ci{
1578bf215546Sopenharmony_ci        uint64_t tmp_pair = pairs[i + 0];
1579bf215546Sopenharmony_ci        pairs[i + 0] = pairs[i + 1];
1580bf215546Sopenharmony_ci        pairs[i + 1] = tmp_pair;
1581bf215546Sopenharmony_ci
1582bf215546Sopenharmony_ci        for (unsigned t = 0; t < 8; ++t) {
1583bf215546Sopenharmony_ci                if (consts[t].word_idx == i)
1584bf215546Sopenharmony_ci                        consts[t].word_idx = (i + 1);
1585bf215546Sopenharmony_ci                else if (consts[t].word_idx == (i + 1))
1586bf215546Sopenharmony_ci                        consts[t].word_idx = i;
1587bf215546Sopenharmony_ci        }
1588bf215546Sopenharmony_ci}
1589bf215546Sopenharmony_ci
1590bf215546Sopenharmony_ci/* Given merged constants, one of which might be PC-relative, fix up the M
1591bf215546Sopenharmony_ci * values so the PC-relative constant (if it exists) has the M1=4 modification
1592bf215546Sopenharmony_ci * and other constants are used as-is (which might require swapping) */
1593bf215546Sopenharmony_ci
1594bf215546Sopenharmony_cistatic unsigned
1595bf215546Sopenharmony_cibi_apply_constant_modifiers(struct bi_const_state *consts,
1596bf215546Sopenharmony_ci                uint64_t *pairs, unsigned *pcrel_idx,
1597bf215546Sopenharmony_ci                unsigned tuple_count, unsigned constant_count)
1598bf215546Sopenharmony_ci{
1599bf215546Sopenharmony_ci        unsigned start = bi_ec0_packed(tuple_count) ? 1 : 0;
1600bf215546Sopenharmony_ci
1601bf215546Sopenharmony_ci        /* Clauses with these tuple counts lack an M field for the packed EC0,
1602bf215546Sopenharmony_ci         * so EC0 cannot be PC-relative, which might require swapping (and
1603bf215546Sopenharmony_ci         * possibly adding an unused constant) to fit */
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci        if (*pcrel_idx == 0 && (tuple_count == 5 || tuple_count == 8)) {
1606bf215546Sopenharmony_ci                constant_count = MAX2(constant_count, 2);
1607bf215546Sopenharmony_ci                *pcrel_idx = 1;
1608bf215546Sopenharmony_ci                bi_swap_constants(consts, pairs, 0);
1609bf215546Sopenharmony_ci        }
1610bf215546Sopenharmony_ci
1611bf215546Sopenharmony_ci        /* EC0 might be packed free, after that constants are packed in pairs
1612bf215546Sopenharmony_ci         * (with clause format 12), with M1 values computed from the pair */
1613bf215546Sopenharmony_ci
1614bf215546Sopenharmony_ci        for (unsigned i = start; i < constant_count; i += 2) {
1615bf215546Sopenharmony_ci                bool swap = false;
1616bf215546Sopenharmony_ci                bool last = (i + 1) == constant_count;
1617bf215546Sopenharmony_ci
1618bf215546Sopenharmony_ci                unsigned A1 = (pairs[i] >> 60);
1619bf215546Sopenharmony_ci                unsigned B1 = (pairs[i + 1] >> 60);
1620bf215546Sopenharmony_ci
1621bf215546Sopenharmony_ci                if (*pcrel_idx == i || *pcrel_idx == (i + 1)) {
1622bf215546Sopenharmony_ci                        /* PC-relative constant must be E0, not E1 */
1623bf215546Sopenharmony_ci                        swap = (*pcrel_idx == (i + 1));
1624bf215546Sopenharmony_ci
1625bf215546Sopenharmony_ci                        /* Set M1 = 4 by noting (A - B) mod 16 = 4 is
1626bf215546Sopenharmony_ci                         * equivalent to A = (B + 4) mod 16 and that we can
1627bf215546Sopenharmony_ci                         * control A */
1628bf215546Sopenharmony_ci                        unsigned B = swap ? A1 : B1;
1629bf215546Sopenharmony_ci                        unsigned A = (B + 4) & 0xF;
1630bf215546Sopenharmony_ci                        pairs[*pcrel_idx] |= ((uint64_t) A) << 60;
1631bf215546Sopenharmony_ci
1632bf215546Sopenharmony_ci                        /* Swapped if swap set, identity if swap not set */
1633bf215546Sopenharmony_ci                        *pcrel_idx = i;
1634bf215546Sopenharmony_ci                } else {
1635bf215546Sopenharmony_ci                        /* Compute M1 value if we don't swap */
1636bf215546Sopenharmony_ci                        unsigned M1 = (16 + A1 - B1) & 0xF;
1637bf215546Sopenharmony_ci
1638bf215546Sopenharmony_ci                        /* For M1 = 0 or M1 >= 8, the constants are unchanged,
1639bf215546Sopenharmony_ci                         * we have 0 < (A1 - B1) % 16 < 8, which implies (B1 -
1640bf215546Sopenharmony_ci                         * A1) % 16 >= 8, so swapping will let them be used
1641bf215546Sopenharmony_ci                         * unchanged */
1642bf215546Sopenharmony_ci                        swap = (M1 != 0) && (M1 < 8);
1643bf215546Sopenharmony_ci
1644bf215546Sopenharmony_ci                        /* However, we can't swap the last constant, so we
1645bf215546Sopenharmony_ci                         * force M1 = 0 instead for this case */
1646bf215546Sopenharmony_ci                        if (last && swap) {
1647bf215546Sopenharmony_ci                                pairs[i + 1] |= pairs[i] & (0xfull << 60);
1648bf215546Sopenharmony_ci                                swap = false;
1649bf215546Sopenharmony_ci                        }
1650bf215546Sopenharmony_ci                }
1651bf215546Sopenharmony_ci
1652bf215546Sopenharmony_ci                if (swap) {
1653bf215546Sopenharmony_ci                        assert(!last);
1654bf215546Sopenharmony_ci                        bi_swap_constants(consts, pairs, i);
1655bf215546Sopenharmony_ci                }
1656bf215546Sopenharmony_ci        }
1657bf215546Sopenharmony_ci
1658bf215546Sopenharmony_ci        return constant_count;
1659bf215546Sopenharmony_ci}
1660bf215546Sopenharmony_ci
1661bf215546Sopenharmony_ci/* Schedule a single clause. If no instructions remain, return NULL. */
1662bf215546Sopenharmony_ci
1663bf215546Sopenharmony_cistatic bi_clause *
1664bf215546Sopenharmony_cibi_schedule_clause(bi_context *ctx, bi_block *block, struct bi_worklist st, uint64_t *live)
1665bf215546Sopenharmony_ci{
1666bf215546Sopenharmony_ci        struct bi_clause_state clause_state = { 0 };
1667bf215546Sopenharmony_ci        bi_clause *clause = rzalloc(ctx, bi_clause);
1668bf215546Sopenharmony_ci        bi_tuple *tuple = NULL;
1669bf215546Sopenharmony_ci
1670bf215546Sopenharmony_ci        const unsigned max_tuples = ARRAY_SIZE(clause->tuples);
1671bf215546Sopenharmony_ci
1672bf215546Sopenharmony_ci        /* TODO: Decide flow control better */
1673bf215546Sopenharmony_ci        clause->flow_control = BIFROST_FLOW_NBTB;
1674bf215546Sopenharmony_ci
1675bf215546Sopenharmony_ci        /* The last clause can only write one instruction, so initialize that */
1676bf215546Sopenharmony_ci        struct bi_reg_state reg_state = {};
1677bf215546Sopenharmony_ci        bi_index prev_reads[5] = { bi_null() };
1678bf215546Sopenharmony_ci        unsigned nr_prev_reads = 0;
1679bf215546Sopenharmony_ci
1680bf215546Sopenharmony_ci        /* We need to track future liveness. The main *live set tracks what is
1681bf215546Sopenharmony_ci         * live at the current point int he program we are scheduling, but to
1682bf215546Sopenharmony_ci         * determine temp eligibility, we instead want what will be live after
1683bf215546Sopenharmony_ci         * the next tuple in the program. If you scheduled forwards, you'd need
1684bf215546Sopenharmony_ci         * a crystall ball for this. Luckily we schedule backwards, so we just
1685bf215546Sopenharmony_ci         * delay updates to the live_after_temp by an extra tuple. */
1686bf215546Sopenharmony_ci        uint64_t live_after_temp = *live;
1687bf215546Sopenharmony_ci        uint64_t live_next_tuple = live_after_temp;
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci        do {
1690bf215546Sopenharmony_ci                struct bi_tuple_state tuple_state = {
1691bf215546Sopenharmony_ci                        .last = (clause->tuple_count == 0),
1692bf215546Sopenharmony_ci                        .reg = reg_state,
1693bf215546Sopenharmony_ci                        .nr_prev_reads = nr_prev_reads,
1694bf215546Sopenharmony_ci                        .prev = tuple,
1695bf215546Sopenharmony_ci                        .pcrel_idx = ~0,
1696bf215546Sopenharmony_ci                };
1697bf215546Sopenharmony_ci
1698bf215546Sopenharmony_ci                assert(nr_prev_reads < ARRAY_SIZE(prev_reads));
1699bf215546Sopenharmony_ci                memcpy(tuple_state.prev_reads, prev_reads, sizeof(prev_reads));
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_ci                unsigned idx = max_tuples - clause->tuple_count - 1;
1702bf215546Sopenharmony_ci
1703bf215546Sopenharmony_ci                tuple = &clause->tuples[idx];
1704bf215546Sopenharmony_ci
1705bf215546Sopenharmony_ci                if (clause->message && bi_opcode_props[clause->message->op].sr_read && !bi_is_null(clause->message->src[0])) {
1706bf215546Sopenharmony_ci                        unsigned nr = bi_count_read_registers(clause->message, 0);
1707bf215546Sopenharmony_ci                        live_after_temp |= (BITFIELD64_MASK(nr) << clause->message->src[0].value);
1708bf215546Sopenharmony_ci                }
1709bf215546Sopenharmony_ci
1710bf215546Sopenharmony_ci                /* Since we schedule backwards, we schedule ADD first */
1711bf215546Sopenharmony_ci                tuple_state.add = bi_take_instr(ctx, st, &clause_state, &tuple_state, live_after_temp, false);
1712bf215546Sopenharmony_ci                tuple->fma = bi_take_instr(ctx, st, &clause_state, &tuple_state, live_after_temp, true);
1713bf215546Sopenharmony_ci                tuple->add = tuple_state.add;
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_ci                /* Update liveness from the new instructions */
1716bf215546Sopenharmony_ci                if (tuple->add)
1717bf215546Sopenharmony_ci                        *live = bi_postra_liveness_ins(*live, tuple->add);
1718bf215546Sopenharmony_ci
1719bf215546Sopenharmony_ci                if (tuple->fma)
1720bf215546Sopenharmony_ci                        *live = bi_postra_liveness_ins(*live, tuple->fma);
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci               /* Rotate in the new per-tuple liveness */
1723bf215546Sopenharmony_ci                live_after_temp = live_next_tuple;
1724bf215546Sopenharmony_ci                live_next_tuple = *live;
1725bf215546Sopenharmony_ci
1726bf215546Sopenharmony_ci                /* We may have a message, but only one per clause */
1727bf215546Sopenharmony_ci                if (tuple->add && bi_must_message(tuple->add)) {
1728bf215546Sopenharmony_ci                        assert(!clause_state.message);
1729bf215546Sopenharmony_ci                        clause_state.message = true;
1730bf215546Sopenharmony_ci
1731bf215546Sopenharmony_ci                        clause->message_type =
1732bf215546Sopenharmony_ci                                bi_message_type_for_instr(tuple->add);
1733bf215546Sopenharmony_ci                        clause->message = tuple->add;
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_ci                        /* We don't need to set dependencies for blend shaders
1736bf215546Sopenharmony_ci                         * because the BLEND instruction in the fragment
1737bf215546Sopenharmony_ci                         * shader should have already done the wait */
1738bf215546Sopenharmony_ci                        if (!ctx->inputs->is_blend) {
1739bf215546Sopenharmony_ci                                switch (tuple->add->op) {
1740bf215546Sopenharmony_ci                                case BI_OPCODE_ATEST:
1741bf215546Sopenharmony_ci                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
1742bf215546Sopenharmony_ci                                        break;
1743bf215546Sopenharmony_ci                                case BI_OPCODE_LD_TILE:
1744bf215546Sopenharmony_ci                                case BI_OPCODE_ST_TILE:
1745bf215546Sopenharmony_ci                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
1746bf215546Sopenharmony_ci                                        break;
1747bf215546Sopenharmony_ci                                case BI_OPCODE_BLEND:
1748bf215546Sopenharmony_ci                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
1749bf215546Sopenharmony_ci                                        clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
1750bf215546Sopenharmony_ci                                        break;
1751bf215546Sopenharmony_ci                                default:
1752bf215546Sopenharmony_ci                                        break;
1753bf215546Sopenharmony_ci                                }
1754bf215546Sopenharmony_ci                        }
1755bf215546Sopenharmony_ci                }
1756bf215546Sopenharmony_ci
1757bf215546Sopenharmony_ci                clause_state.consts[idx] = bi_get_const_state(&tuple_state);
1758bf215546Sopenharmony_ci
1759bf215546Sopenharmony_ci                /* Before merging constants, eliminate zeroes, otherwise the
1760bf215546Sopenharmony_ci                 * merging will fight over the #0 that never gets read (and is
1761bf215546Sopenharmony_ci                 * never marked as read by update_fau) */
1762bf215546Sopenharmony_ci                if (tuple->fma && bi_reads_zero(tuple->fma))
1763bf215546Sopenharmony_ci                        bi_rewrite_zero(tuple->fma, true);
1764bf215546Sopenharmony_ci
1765bf215546Sopenharmony_ci                /* Rewrite away FAU, constant write is deferred */
1766bf215546Sopenharmony_ci                if (!tuple_state.constant_count) {
1767bf215546Sopenharmony_ci                        tuple->fau_idx = tuple_state.fau;
1768bf215546Sopenharmony_ci                        bi_rewrite_fau_to_pass(tuple);
1769bf215546Sopenharmony_ci                }
1770bf215546Sopenharmony_ci
1771bf215546Sopenharmony_ci                /* Use passthrough register for cross-stage accesses. Since
1772bf215546Sopenharmony_ci                 * there are just FMA and ADD stages, that means we rewrite to
1773bf215546Sopenharmony_ci                 * passthrough the sources of the ADD that read from the
1774bf215546Sopenharmony_ci                 * destination of the FMA */
1775bf215546Sopenharmony_ci
1776bf215546Sopenharmony_ci                if (tuple->fma) {
1777bf215546Sopenharmony_ci                        bi_use_passthrough(tuple->add, tuple->fma->dest[0],
1778bf215546Sopenharmony_ci                                        BIFROST_SRC_STAGE, false);
1779bf215546Sopenharmony_ci                }
1780bf215546Sopenharmony_ci
1781bf215546Sopenharmony_ci                /* Don't add an empty tuple, unless the worklist has nothing
1782bf215546Sopenharmony_ci                 * but a (pseudo)instruction failing to schedule due to a "not
1783bf215546Sopenharmony_ci                 * last instruction" constraint */
1784bf215546Sopenharmony_ci
1785bf215546Sopenharmony_ci                int some_instruction = __bitset_ffs(st.worklist, BITSET_WORDS(st.count));
1786bf215546Sopenharmony_ci                bool not_last = (some_instruction > 0) &&
1787bf215546Sopenharmony_ci                        bi_must_not_last(st.instructions[some_instruction - 1]);
1788bf215546Sopenharmony_ci
1789bf215546Sopenharmony_ci                bool insert_empty = tuple_state.last && not_last;
1790bf215546Sopenharmony_ci
1791bf215546Sopenharmony_ci                if (!(tuple->fma || tuple->add || insert_empty))
1792bf215546Sopenharmony_ci                        break;
1793bf215546Sopenharmony_ci
1794bf215546Sopenharmony_ci                clause->tuple_count++;
1795bf215546Sopenharmony_ci
1796bf215546Sopenharmony_ci                /* Adding enough tuple might overflow constants */
1797bf215546Sopenharmony_ci                if (!bi_space_for_more_constants(&clause_state))
1798bf215546Sopenharmony_ci                        break;
1799bf215546Sopenharmony_ci
1800bf215546Sopenharmony_ci#ifndef NDEBUG
1801bf215546Sopenharmony_ci                /* Don't schedule more than 1 tuple if debugging */
1802bf215546Sopenharmony_ci                if ((bifrost_debug & BIFROST_DBG_NOSCHED) && !insert_empty)
1803bf215546Sopenharmony_ci                        break;
1804bf215546Sopenharmony_ci#endif
1805bf215546Sopenharmony_ci
1806bf215546Sopenharmony_ci                /* Link through the register state */
1807bf215546Sopenharmony_ci                STATIC_ASSERT(sizeof(prev_reads) == sizeof(tuple_state.reg.reads));
1808bf215546Sopenharmony_ci                memcpy(prev_reads, tuple_state.reg.reads, sizeof(prev_reads));
1809bf215546Sopenharmony_ci                nr_prev_reads = tuple_state.reg.nr_reads;
1810bf215546Sopenharmony_ci                clause_state.tuple_count++;
1811bf215546Sopenharmony_ci        } while(clause->tuple_count < 8);
1812bf215546Sopenharmony_ci
1813bf215546Sopenharmony_ci        /* Don't schedule an empty clause */
1814bf215546Sopenharmony_ci        if (!clause->tuple_count)
1815bf215546Sopenharmony_ci                return NULL;
1816bf215546Sopenharmony_ci
1817bf215546Sopenharmony_ci        /* Before merging, rewrite away any tuples that read only zero */
1818bf215546Sopenharmony_ci        for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) {
1819bf215546Sopenharmony_ci                bi_tuple *tuple = &clause->tuples[i];
1820bf215546Sopenharmony_ci                struct bi_const_state *st = &clause_state.consts[i];
1821bf215546Sopenharmony_ci
1822bf215546Sopenharmony_ci                if (st->constant_count == 0 || st->constants[0] || st->constants[1] || st->pcrel)
1823bf215546Sopenharmony_ci                        continue;
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci                bi_foreach_instr_in_tuple(tuple, ins)
1826bf215546Sopenharmony_ci                        bi_rewrite_zero(ins, false);
1827bf215546Sopenharmony_ci
1828bf215546Sopenharmony_ci                /* Constant has been demoted to FAU, so don't pack it separately */
1829bf215546Sopenharmony_ci                st->constant_count = 0;
1830bf215546Sopenharmony_ci
1831bf215546Sopenharmony_ci                /* Default */
1832bf215546Sopenharmony_ci                assert(tuple->fau_idx == BIR_FAU_ZERO);
1833bf215546Sopenharmony_ci        }
1834bf215546Sopenharmony_ci
1835bf215546Sopenharmony_ci        uint64_t constant_pairs[8] = { 0 };
1836bf215546Sopenharmony_ci        unsigned pcrel_idx = ~0;
1837bf215546Sopenharmony_ci        unsigned constant_words =
1838bf215546Sopenharmony_ci                bi_merge_constants(clause_state.consts, constant_pairs, &pcrel_idx);
1839bf215546Sopenharmony_ci
1840bf215546Sopenharmony_ci        constant_words = bi_apply_constant_modifiers(clause_state.consts,
1841bf215546Sopenharmony_ci                        constant_pairs, &pcrel_idx, clause->tuple_count,
1842bf215546Sopenharmony_ci                        constant_words);
1843bf215546Sopenharmony_ci
1844bf215546Sopenharmony_ci        clause->pcrel_idx = pcrel_idx;
1845bf215546Sopenharmony_ci
1846bf215546Sopenharmony_ci        for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) {
1847bf215546Sopenharmony_ci                bi_tuple *tuple = &clause->tuples[i];
1848bf215546Sopenharmony_ci
1849bf215546Sopenharmony_ci                /* If no constants, leave FAU as it is, possibly defaulting to 0 */
1850bf215546Sopenharmony_ci                if (clause_state.consts[i].constant_count == 0)
1851bf215546Sopenharmony_ci                        continue;
1852bf215546Sopenharmony_ci
1853bf215546Sopenharmony_ci                /* FAU is already handled */
1854bf215546Sopenharmony_ci                assert(!tuple->fau_idx);
1855bf215546Sopenharmony_ci
1856bf215546Sopenharmony_ci                unsigned word_idx = clause_state.consts[i].word_idx;
1857bf215546Sopenharmony_ci                assert(word_idx <= 8);
1858bf215546Sopenharmony_ci
1859bf215546Sopenharmony_ci                /* We could try to merge regardless of bottom bits as well, but
1860bf215546Sopenharmony_ci                 * that's probably diminishing returns */
1861bf215546Sopenharmony_ci                uint64_t pair = constant_pairs[word_idx];
1862bf215546Sopenharmony_ci                unsigned lo = pair & 0xF;
1863bf215546Sopenharmony_ci
1864bf215546Sopenharmony_ci                tuple->fau_idx = bi_constant_field(word_idx) | lo;
1865bf215546Sopenharmony_ci                bi_rewrite_constants_to_pass(tuple, pair, word_idx == pcrel_idx);
1866bf215546Sopenharmony_ci        }
1867bf215546Sopenharmony_ci
1868bf215546Sopenharmony_ci        clause->constant_count = constant_words;
1869bf215546Sopenharmony_ci        memcpy(clause->constants, constant_pairs, sizeof(constant_pairs));
1870bf215546Sopenharmony_ci
1871bf215546Sopenharmony_ci        /* Branches must be last, so this can be factored out */
1872bf215546Sopenharmony_ci        bi_instr *last = clause->tuples[max_tuples - 1].add;
1873bf215546Sopenharmony_ci        clause->next_clause_prefetch = !last || (last->op != BI_OPCODE_JUMP);
1874bf215546Sopenharmony_ci        clause->block = block;
1875bf215546Sopenharmony_ci
1876bf215546Sopenharmony_ci        clause->ftz = (clause_state.ftz == BI_FTZ_STATE_ENABLE);
1877bf215546Sopenharmony_ci
1878bf215546Sopenharmony_ci        /* We emit in reverse and emitted to the back of the tuples array, so
1879bf215546Sopenharmony_ci         * move it up front for easy indexing */
1880bf215546Sopenharmony_ci        memmove(clause->tuples,
1881bf215546Sopenharmony_ci                       clause->tuples + (max_tuples - clause->tuple_count),
1882bf215546Sopenharmony_ci                       clause->tuple_count * sizeof(clause->tuples[0]));
1883bf215546Sopenharmony_ci
1884bf215546Sopenharmony_ci        /* Use passthrough register for cross-tuple accesses. Note this is
1885bf215546Sopenharmony_ci         * after the memmove, so this is forwards. Skip the first tuple since
1886bf215546Sopenharmony_ci         * there is nothing before it to passthrough */
1887bf215546Sopenharmony_ci
1888bf215546Sopenharmony_ci        for (unsigned t = 1; t < clause->tuple_count; ++t)
1889bf215546Sopenharmony_ci                bi_rewrite_passthrough(clause->tuples[t - 1], clause->tuples[t]);
1890bf215546Sopenharmony_ci
1891bf215546Sopenharmony_ci        return clause;
1892bf215546Sopenharmony_ci}
1893bf215546Sopenharmony_ci
1894bf215546Sopenharmony_cistatic void
1895bf215546Sopenharmony_cibi_schedule_block(bi_context *ctx, bi_block *block)
1896bf215546Sopenharmony_ci{
1897bf215546Sopenharmony_ci        list_inithead(&block->clauses);
1898bf215546Sopenharmony_ci
1899bf215546Sopenharmony_ci        /* Copy list to dynamic array */
1900bf215546Sopenharmony_ci        struct bi_worklist st = bi_initialize_worklist(block,
1901bf215546Sopenharmony_ci                        bifrost_debug & BIFROST_DBG_INORDER,
1902bf215546Sopenharmony_ci                        ctx->inputs->is_blend);
1903bf215546Sopenharmony_ci
1904bf215546Sopenharmony_ci        if (!st.count) {
1905bf215546Sopenharmony_ci                bi_free_worklist(st);
1906bf215546Sopenharmony_ci                return;
1907bf215546Sopenharmony_ci        }
1908bf215546Sopenharmony_ci
1909bf215546Sopenharmony_ci        /* We need to track liveness during scheduling in order to determine whether we can use temporary (passthrough) registers */
1910bf215546Sopenharmony_ci        uint64_t live = block->reg_live_out;
1911bf215546Sopenharmony_ci
1912bf215546Sopenharmony_ci        /* Schedule as many clauses as needed to fill the block */
1913bf215546Sopenharmony_ci        bi_clause *u = NULL;
1914bf215546Sopenharmony_ci        while((u = bi_schedule_clause(ctx, block, st, &live)))
1915bf215546Sopenharmony_ci                list_add(&u->link, &block->clauses);
1916bf215546Sopenharmony_ci
1917bf215546Sopenharmony_ci        /* Back-to-back bit affects only the last clause of a block,
1918bf215546Sopenharmony_ci         * the rest are implicitly true */
1919bf215546Sopenharmony_ci        if (!list_is_empty(&block->clauses)) {
1920bf215546Sopenharmony_ci                bi_clause *last_clause = list_last_entry(&block->clauses, bi_clause, link);
1921bf215546Sopenharmony_ci                if (bi_reconverge_branches(block))
1922bf215546Sopenharmony_ci                        last_clause->flow_control = BIFROST_FLOW_NBTB_UNCONDITIONAL;
1923bf215546Sopenharmony_ci        }
1924bf215546Sopenharmony_ci
1925bf215546Sopenharmony_ci        /* Reorder instructions to match the new schedule. First remove
1926bf215546Sopenharmony_ci         * existing instructions and then recreate the list */
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci        bi_foreach_instr_in_block_safe(block, ins) {
1929bf215546Sopenharmony_ci                list_del(&ins->link);
1930bf215546Sopenharmony_ci        }
1931bf215546Sopenharmony_ci
1932bf215546Sopenharmony_ci        bi_foreach_clause_in_block(block, clause) {
1933bf215546Sopenharmony_ci                for (unsigned i = 0; i < clause->tuple_count; ++i)  {
1934bf215546Sopenharmony_ci                        bi_foreach_instr_in_tuple(&clause->tuples[i], ins) {
1935bf215546Sopenharmony_ci                                list_addtail(&ins->link, &block->instructions);
1936bf215546Sopenharmony_ci                        }
1937bf215546Sopenharmony_ci                }
1938bf215546Sopenharmony_ci        }
1939bf215546Sopenharmony_ci
1940bf215546Sopenharmony_ci        block->scheduled = true;
1941bf215546Sopenharmony_ci
1942bf215546Sopenharmony_ci#ifndef NDEBUG
1943bf215546Sopenharmony_ci        unsigned i;
1944bf215546Sopenharmony_ci        bool incomplete = false;
1945bf215546Sopenharmony_ci
1946bf215546Sopenharmony_ci        BITSET_FOREACH_SET(i, st.worklist, st.count) {
1947bf215546Sopenharmony_ci                bi_print_instr(st.instructions[i], stderr);
1948bf215546Sopenharmony_ci                incomplete = true;
1949bf215546Sopenharmony_ci        }
1950bf215546Sopenharmony_ci
1951bf215546Sopenharmony_ci        if (incomplete)
1952bf215546Sopenharmony_ci                unreachable("The above instructions failed to schedule.");
1953bf215546Sopenharmony_ci#endif
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_ci        bi_free_worklist(st);
1956bf215546Sopenharmony_ci}
1957bf215546Sopenharmony_ci
1958bf215546Sopenharmony_cistatic bool
1959bf215546Sopenharmony_cibi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants, unsigned *cwords, bi_index *fau)
1960bf215546Sopenharmony_ci{
1961bf215546Sopenharmony_ci        bi_index src = ins->src[s];
1962bf215546Sopenharmony_ci
1963bf215546Sopenharmony_ci        /* Staging registers can't have FAU accesses */
1964bf215546Sopenharmony_ci        if (bi_is_staging_src(ins, s))
1965bf215546Sopenharmony_ci                return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU);
1966bf215546Sopenharmony_ci
1967bf215546Sopenharmony_ci        if (src.type == BI_INDEX_CONSTANT) {
1968bf215546Sopenharmony_ci                /* Allow fast zero */
1969bf215546Sopenharmony_ci                if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins))
1970bf215546Sopenharmony_ci                        return true;
1971bf215546Sopenharmony_ci
1972bf215546Sopenharmony_ci                if (!bi_is_null(*fau))
1973bf215546Sopenharmony_ci                        return false;
1974bf215546Sopenharmony_ci
1975bf215546Sopenharmony_ci                /* Else, try to inline a constant */
1976bf215546Sopenharmony_ci                for (unsigned i = 0; i < *cwords; ++i) {
1977bf215546Sopenharmony_ci                        if (src.value == constants[i])
1978bf215546Sopenharmony_ci                                return true;
1979bf215546Sopenharmony_ci                }
1980bf215546Sopenharmony_ci
1981bf215546Sopenharmony_ci                if (*cwords >= 2)
1982bf215546Sopenharmony_ci                        return false;
1983bf215546Sopenharmony_ci
1984bf215546Sopenharmony_ci                constants[(*cwords)++] = src.value;
1985bf215546Sopenharmony_ci        } else if (src.type == BI_INDEX_FAU) {
1986bf215546Sopenharmony_ci                if (*cwords != 0)
1987bf215546Sopenharmony_ci                        return false;
1988bf215546Sopenharmony_ci
1989bf215546Sopenharmony_ci                /* Can only read from one pair of FAU words */
1990bf215546Sopenharmony_ci                if (!bi_is_null(*fau) && (src.value != fau->value))
1991bf215546Sopenharmony_ci                        return false;
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci                /* If there is a target, we'll need a PC-relative constant */
1994bf215546Sopenharmony_ci                if (ins->branch_target)
1995bf215546Sopenharmony_ci                        return false;
1996bf215546Sopenharmony_ci
1997bf215546Sopenharmony_ci                *fau = src;
1998bf215546Sopenharmony_ci        }
1999bf215546Sopenharmony_ci
2000bf215546Sopenharmony_ci        return true;
2001bf215546Sopenharmony_ci}
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_civoid
2004bf215546Sopenharmony_cibi_lower_fau(bi_context *ctx)
2005bf215546Sopenharmony_ci{
2006bf215546Sopenharmony_ci        bi_foreach_instr_global_safe(ctx, ins) {
2007bf215546Sopenharmony_ci                bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
2008bf215546Sopenharmony_ci
2009bf215546Sopenharmony_ci                uint32_t constants[2];
2010bf215546Sopenharmony_ci                unsigned cwords = 0;
2011bf215546Sopenharmony_ci                bi_index fau = bi_null();
2012bf215546Sopenharmony_ci
2013bf215546Sopenharmony_ci                /* ATEST must have the ATEST datum encoded, not any other
2014bf215546Sopenharmony_ci                 * uniform. See to it this is the case. */
2015bf215546Sopenharmony_ci                if (ins->op == BI_OPCODE_ATEST)
2016bf215546Sopenharmony_ci                        fau = ins->src[2];
2017bf215546Sopenharmony_ci
2018bf215546Sopenharmony_ci                /* Dual texturing requires the texture operation descriptor
2019bf215546Sopenharmony_ci                 * encoded as an immediate so we can fix up.
2020bf215546Sopenharmony_ci                 */
2021bf215546Sopenharmony_ci                if (ins->op == BI_OPCODE_TEXC) {
2022bf215546Sopenharmony_ci                        assert(ins->src[3].type == BI_INDEX_CONSTANT);
2023bf215546Sopenharmony_ci                        constants[cwords++] = ins->src[3].value;
2024bf215546Sopenharmony_ci                }
2025bf215546Sopenharmony_ci
2026bf215546Sopenharmony_ci                bi_foreach_src(ins, s) {
2027bf215546Sopenharmony_ci                        if (bi_check_fau_src(ins, s, constants, &cwords, &fau)) continue;
2028bf215546Sopenharmony_ci
2029bf215546Sopenharmony_ci                        bi_index copy = bi_mov_i32(&b, ins->src[s]);
2030bf215546Sopenharmony_ci                        ins->src[s] = bi_replace_index(ins->src[s], copy);
2031bf215546Sopenharmony_ci                }
2032bf215546Sopenharmony_ci        }
2033bf215546Sopenharmony_ci}
2034bf215546Sopenharmony_ci
2035bf215546Sopenharmony_ci/* Only v7 allows specifying a dependency on the tilebuffer for the first
2036bf215546Sopenharmony_ci * clause of a shader. v6 requires adding a NOP clause with the depedency. */
2037bf215546Sopenharmony_ci
2038bf215546Sopenharmony_cistatic void
2039bf215546Sopenharmony_cibi_add_nop_for_atest(bi_context *ctx)
2040bf215546Sopenharmony_ci{
2041bf215546Sopenharmony_ci        /* Only needed on v6 */
2042bf215546Sopenharmony_ci        if (ctx->arch >= 7)
2043bf215546Sopenharmony_ci                return;
2044bf215546Sopenharmony_ci
2045bf215546Sopenharmony_ci        if (list_is_empty(&ctx->blocks))
2046bf215546Sopenharmony_ci                return;
2047bf215546Sopenharmony_ci
2048bf215546Sopenharmony_ci        /* Fetch the first clause of the shader */
2049bf215546Sopenharmony_ci        bi_block *block = list_first_entry(&ctx->blocks, bi_block, link);
2050bf215546Sopenharmony_ci        bi_clause *clause = bi_next_clause(ctx, block, NULL);
2051bf215546Sopenharmony_ci
2052bf215546Sopenharmony_ci        if (!clause || !(clause->dependencies & ((1 << BIFROST_SLOT_ELDEST_DEPTH) |
2053bf215546Sopenharmony_ci                                                 (1 << BIFROST_SLOT_ELDEST_COLOUR))))
2054bf215546Sopenharmony_ci                return;
2055bf215546Sopenharmony_ci
2056bf215546Sopenharmony_ci        /* Add a NOP so we can wait for the dependencies required by the first
2057bf215546Sopenharmony_ci         * clause */
2058bf215546Sopenharmony_ci
2059bf215546Sopenharmony_ci        bi_instr *I = rzalloc(ctx, bi_instr);
2060bf215546Sopenharmony_ci        I->op = BI_OPCODE_NOP;
2061bf215546Sopenharmony_ci        I->dest[0] = bi_null();
2062bf215546Sopenharmony_ci
2063bf215546Sopenharmony_ci        bi_clause *new_clause = ralloc(ctx, bi_clause);
2064bf215546Sopenharmony_ci        *new_clause = (bi_clause) {
2065bf215546Sopenharmony_ci                .flow_control = BIFROST_FLOW_NBTB,
2066bf215546Sopenharmony_ci                .next_clause_prefetch = true,
2067bf215546Sopenharmony_ci                .block = clause->block,
2068bf215546Sopenharmony_ci
2069bf215546Sopenharmony_ci                .tuple_count = 1,
2070bf215546Sopenharmony_ci                .tuples[0] = { .fma = I, },
2071bf215546Sopenharmony_ci        };
2072bf215546Sopenharmony_ci
2073bf215546Sopenharmony_ci        list_add(&new_clause->link, &clause->block->clauses);
2074bf215546Sopenharmony_ci}
2075bf215546Sopenharmony_ci
2076bf215546Sopenharmony_civoid
2077bf215546Sopenharmony_cibi_schedule(bi_context *ctx)
2078bf215546Sopenharmony_ci{
2079bf215546Sopenharmony_ci        /* Fed into both scheduling and DCE */
2080bf215546Sopenharmony_ci        bi_postra_liveness(ctx);
2081bf215546Sopenharmony_ci
2082bf215546Sopenharmony_ci        bi_foreach_block(ctx, block) {
2083bf215546Sopenharmony_ci                bi_schedule_block(ctx, block);
2084bf215546Sopenharmony_ci        }
2085bf215546Sopenharmony_ci
2086bf215546Sopenharmony_ci        bi_opt_dce_post_ra(ctx);
2087bf215546Sopenharmony_ci        bi_add_nop_for_atest(ctx);
2088bf215546Sopenharmony_ci}
2089