1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora, Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "compiler.h"
25bf215546Sopenharmony_ci#include "bi_quirks.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/* This file contains the final passes of the compiler. Running after
28bf215546Sopenharmony_ci * scheduling and RA, the IR is now finalized, so we need to emit it to actual
29bf215546Sopenharmony_ci * bits on the wire (as well as fixup branches) */
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cistatic uint64_t
32bf215546Sopenharmony_cibi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
33bf215546Sopenharmony_ci{
34bf215546Sopenharmony_ci        /* next_dependencies are the union of the dependencies of successors'
35bf215546Sopenharmony_ci         * dependencies */
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci        unsigned dependency_wait = next_1 ? next_1->dependencies : 0;
38bf215546Sopenharmony_ci        dependency_wait |= next_2 ? next_2->dependencies : 0;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci        /* Signal barriers (slot #7) immediately. This is not optimal but good
41bf215546Sopenharmony_ci         * enough. Doing better requires extending the IR and scheduler.
42bf215546Sopenharmony_ci         */
43bf215546Sopenharmony_ci        if (clause->message_type == BIFROST_MESSAGE_BARRIER)
44bf215546Sopenharmony_ci                dependency_wait |= BITFIELD_BIT(7);
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci        bool staging_barrier = next_1 ? next_1->staging_barrier : false;
47bf215546Sopenharmony_ci        staging_barrier |= next_2 ? next_2->staging_barrier : 0;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci        struct bifrost_header header = {
50bf215546Sopenharmony_ci                .flow_control =
51bf215546Sopenharmony_ci                        (next_1 == NULL && next_2 == NULL) ?
52bf215546Sopenharmony_ci                        BIFROST_FLOW_END :  clause->flow_control,
53bf215546Sopenharmony_ci                .terminate_discarded_threads = clause->td,
54bf215546Sopenharmony_ci                .next_clause_prefetch = clause->next_clause_prefetch && next_1,
55bf215546Sopenharmony_ci                .staging_barrier = staging_barrier,
56bf215546Sopenharmony_ci                .staging_register = clause->staging_register,
57bf215546Sopenharmony_ci                .dependency_wait = dependency_wait,
58bf215546Sopenharmony_ci                .dependency_slot = clause->scoreboard_id,
59bf215546Sopenharmony_ci                .message_type = clause->message_type,
60bf215546Sopenharmony_ci                .next_message_type = next_1 ? next_1->message_type : 0,
61bf215546Sopenharmony_ci                .flush_to_zero = clause->ftz ? BIFROST_FTZ_ALWAYS : BIFROST_FTZ_DISABLE
62bf215546Sopenharmony_ci        };
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci        uint64_t u = 0;
65bf215546Sopenharmony_ci        memcpy(&u, &header, sizeof(header));
66bf215546Sopenharmony_ci        return u;
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci/* Assigns a slot for reading, before anything is written */
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_cistatic void
72bf215546Sopenharmony_cibi_assign_slot_read(bi_registers *regs, bi_index src)
73bf215546Sopenharmony_ci{
74bf215546Sopenharmony_ci        /* We only assign for registers */
75bf215546Sopenharmony_ci        if (src.type != BI_INDEX_REGISTER)
76bf215546Sopenharmony_ci                return;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci        /* Check if we already assigned the slot */
79bf215546Sopenharmony_ci        for (unsigned i = 0; i <= 1; ++i) {
80bf215546Sopenharmony_ci                if (regs->slot[i] == src.value && regs->enabled[i])
81bf215546Sopenharmony_ci                        return;
82bf215546Sopenharmony_ci        }
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci        if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)
85bf215546Sopenharmony_ci                return;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci        /* Assign it now */
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci        for (unsigned i = 0; i <= 1; ++i) {
90bf215546Sopenharmony_ci                if (!regs->enabled[i]) {
91bf215546Sopenharmony_ci                        regs->slot[i] = src.value;
92bf215546Sopenharmony_ci                        regs->enabled[i] = true;
93bf215546Sopenharmony_ci                        return;
94bf215546Sopenharmony_ci                }
95bf215546Sopenharmony_ci        }
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci        if (!regs->slot23.slot3) {
98bf215546Sopenharmony_ci                regs->slot[2] = src.value;
99bf215546Sopenharmony_ci                regs->slot23.slot2 = BIFROST_OP_READ;
100bf215546Sopenharmony_ci                return;
101bf215546Sopenharmony_ci        }
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci        bi_print_slots(regs, stderr);
104bf215546Sopenharmony_ci        unreachable("Failed to find a free slot for src");
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_cistatic bi_registers
108bf215546Sopenharmony_cibi_assign_slots(bi_tuple *now, bi_tuple *prev)
109bf215546Sopenharmony_ci{
110bf215546Sopenharmony_ci        /* We assign slots for the main register mechanism. Special ops
111bf215546Sopenharmony_ci         * use the data registers, which has its own mechanism entirely
112bf215546Sopenharmony_ci         * and thus gets skipped over here. */
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci        bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read;
115bf215546Sopenharmony_ci        bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write;
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci        /* First, assign reads */
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci        if (now->fma)
120bf215546Sopenharmony_ci                bi_foreach_src(now->fma, src)
121bf215546Sopenharmony_ci                        bi_assign_slot_read(&now->regs, (now->fma)->src[src]);
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci        if (now->add) {
124bf215546Sopenharmony_ci                bi_foreach_src(now->add, src) {
125bf215546Sopenharmony_ci                        /* This is not a real source, we shouldn't assign a
126bf215546Sopenharmony_ci                         * slot for it.
127bf215546Sopenharmony_ci                         */
128bf215546Sopenharmony_ci                        if (now->add->op == BI_OPCODE_BLEND && src == 4)
129bf215546Sopenharmony_ci                                continue;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci                        if (!(src == 0 && read_dreg))
132bf215546Sopenharmony_ci                                bi_assign_slot_read(&now->regs, (now->add)->src[src]);
133bf215546Sopenharmony_ci                }
134bf215546Sopenharmony_ci        }
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci        /* Next, assign writes. Staging writes are assigned separately, but
137bf215546Sopenharmony_ci         * +ATEST wants its destination written to both a staging register
138bf215546Sopenharmony_ci         * _and_ a regular write, because it may not generate a message */
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci        if (prev->add && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) {
141bf215546Sopenharmony_ci                bi_index idx = prev->add->dest[0];
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci                if (idx.type == BI_INDEX_REGISTER) {
144bf215546Sopenharmony_ci                        now->regs.slot[3] = idx.value;
145bf215546Sopenharmony_ci                        now->regs.slot23.slot3 = BIFROST_OP_WRITE;
146bf215546Sopenharmony_ci                }
147bf215546Sopenharmony_ci        }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci        if (prev->fma) {
150bf215546Sopenharmony_ci                bi_index idx = (prev->fma)->dest[0];
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci                if (idx.type == BI_INDEX_REGISTER) {
153bf215546Sopenharmony_ci                        if (now->regs.slot23.slot3) {
154bf215546Sopenharmony_ci                                /* Scheduler constraint: cannot read 3 and write 2 */
155bf215546Sopenharmony_ci                                assert(!now->regs.slot23.slot2);
156bf215546Sopenharmony_ci                                now->regs.slot[2] = idx.value;
157bf215546Sopenharmony_ci                                now->regs.slot23.slot2 = BIFROST_OP_WRITE;
158bf215546Sopenharmony_ci                        } else {
159bf215546Sopenharmony_ci                                now->regs.slot[3] = idx.value;
160bf215546Sopenharmony_ci                                now->regs.slot23.slot3 = BIFROST_OP_WRITE;
161bf215546Sopenharmony_ci                                now->regs.slot23.slot3_fma = true;
162bf215546Sopenharmony_ci                        }
163bf215546Sopenharmony_ci                }
164bf215546Sopenharmony_ci        }
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci        return now->regs;
167bf215546Sopenharmony_ci}
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_cistatic enum bifrost_reg_mode
170bf215546Sopenharmony_cibi_pack_register_mode(bi_registers r)
171bf215546Sopenharmony_ci{
172bf215546Sopenharmony_ci        /* Handle idle as a special case */
173bf215546Sopenharmony_ci        if (!(r.slot23.slot2 | r.slot23.slot3))
174bf215546Sopenharmony_ci                return r.first_instruction ? BIFROST_IDLE_1 : BIFROST_IDLE;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci        /* Otherwise, use the LUT */
177bf215546Sopenharmony_ci        for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) {
178bf215546Sopenharmony_ci                if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0)
179bf215546Sopenharmony_ci                        return i;
180bf215546Sopenharmony_ci        }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci        bi_print_slots(&r, stderr);
183bf215546Sopenharmony_ci        unreachable("Invalid slot assignment");
184bf215546Sopenharmony_ci}
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_cistatic uint64_t
187bf215546Sopenharmony_cibi_pack_registers(bi_registers regs)
188bf215546Sopenharmony_ci{
189bf215546Sopenharmony_ci        enum bifrost_reg_mode mode = bi_pack_register_mode(regs);
190bf215546Sopenharmony_ci        struct bifrost_regs s = { 0 };
191bf215546Sopenharmony_ci        uint64_t packed = 0;
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci        /* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for
194bf215546Sopenharmony_ci         * first instruction and adds 16 when reg 2 == reg 3 */
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci        unsigned ctrl;
197bf215546Sopenharmony_ci        bool r2_equals_r3 = false;
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci        if (regs.first_instruction) {
200bf215546Sopenharmony_ci                /* Bit 3 implicitly must be clear for first instructions.
201bf215546Sopenharmony_ci                 * The affected patterns all write both ADD/FMA, but that
202bf215546Sopenharmony_ci                 * is forbidden for the last instruction (whose writes are
203bf215546Sopenharmony_ci                 * encoded by the first), so this does not add additional
204bf215546Sopenharmony_ci                 * encoding constraints */
205bf215546Sopenharmony_ci                assert(!(mode & 0x8));
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci                /* Move bit 4 to bit 3, since bit 3 is clear */
208bf215546Sopenharmony_ci                ctrl = (mode & 0x7) | ((mode & 0x10) >> 1);
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci                /* If we can let r2 equal r3, we have to or the hardware raises
211bf215546Sopenharmony_ci                 * INSTR_INVALID_ENC (it's unclear why). */
212bf215546Sopenharmony_ci                if (!(regs.slot23.slot2 && regs.slot23.slot3))
213bf215546Sopenharmony_ci                        r2_equals_r3 = true;
214bf215546Sopenharmony_ci        } else {
215bf215546Sopenharmony_ci                /* We force r2=r3 or not for the upper bit */
216bf215546Sopenharmony_ci                ctrl = (mode & 0xF);
217bf215546Sopenharmony_ci                r2_equals_r3 = (mode & 0x10);
218bf215546Sopenharmony_ci        }
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci        if (regs.enabled[1]) {
221bf215546Sopenharmony_ci                /* Gotta save that bit!~ Required by the 63-x trick */
222bf215546Sopenharmony_ci                assert(regs.slot[1] > regs.slot[0]);
223bf215546Sopenharmony_ci                assert(regs.enabled[0]);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci                /* Do the 63-x trick, see docs/disasm */
226bf215546Sopenharmony_ci                if (regs.slot[0] > 31) {
227bf215546Sopenharmony_ci                        regs.slot[0] = 63 - regs.slot[0];
228bf215546Sopenharmony_ci                        regs.slot[1] = 63 - regs.slot[1];
229bf215546Sopenharmony_ci                }
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci                assert(regs.slot[0] <= 31);
232bf215546Sopenharmony_ci                assert(regs.slot[1] <= 63);
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci                s.ctrl = ctrl;
235bf215546Sopenharmony_ci                s.reg1 = regs.slot[1];
236bf215546Sopenharmony_ci                s.reg0 = regs.slot[0];
237bf215546Sopenharmony_ci        } else {
238bf215546Sopenharmony_ci                /* slot 1 disabled, so set to zero and use slot 1 for ctrl */
239bf215546Sopenharmony_ci                s.ctrl = 0;
240bf215546Sopenharmony_ci                s.reg1 = ctrl << 2;
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci                if (regs.enabled[0]) {
243bf215546Sopenharmony_ci                        /* Bit 0 upper bit of slot 0 */
244bf215546Sopenharmony_ci                        s.reg1 |= (regs.slot[0] >> 5);
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci                        /* Rest of slot 0 in usual spot */
247bf215546Sopenharmony_ci                        s.reg0 = (regs.slot[0] & 0b11111);
248bf215546Sopenharmony_ci                } else {
249bf215546Sopenharmony_ci                        /* Bit 1 set if slot 0 also disabled */
250bf215546Sopenharmony_ci                        s.reg1 |= (1 << 1);
251bf215546Sopenharmony_ci                }
252bf215546Sopenharmony_ci        }
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci        /* Force r2 =/!= r3 as needed */
255bf215546Sopenharmony_ci        if (r2_equals_r3) {
256bf215546Sopenharmony_ci                assert(regs.slot[3] == regs.slot[2] || !(regs.slot23.slot2 && regs.slot23.slot3));
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci                if (regs.slot23.slot2)
259bf215546Sopenharmony_ci                        regs.slot[3] = regs.slot[2];
260bf215546Sopenharmony_ci                else
261bf215546Sopenharmony_ci                        regs.slot[2] = regs.slot[3];
262bf215546Sopenharmony_ci        } else if (!regs.first_instruction) {
263bf215546Sopenharmony_ci                /* Enforced by the encoding anyway */
264bf215546Sopenharmony_ci                assert(regs.slot[2] != regs.slot[3]);
265bf215546Sopenharmony_ci        }
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci        s.reg2 = regs.slot[2];
268bf215546Sopenharmony_ci        s.reg3 = regs.slot[3];
269bf215546Sopenharmony_ci        s.fau_idx = regs.fau_idx;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci        memcpy(&packed, &s, sizeof(s));
272bf215546Sopenharmony_ci        return packed;
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci/* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix
276bf215546Sopenharmony_ci * this up at pack time. (Scheduling doesn't care.) */
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_cistatic void
279bf215546Sopenharmony_cibi_flip_slots(bi_registers *regs)
280bf215546Sopenharmony_ci{
281bf215546Sopenharmony_ci        if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) {
282bf215546Sopenharmony_ci                unsigned temp = regs->slot[0];
283bf215546Sopenharmony_ci                regs->slot[0] = regs->slot[1];
284bf215546Sopenharmony_ci                regs->slot[1] = temp;
285bf215546Sopenharmony_ci        }
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci}
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_cistatic inline enum bifrost_packed_src
290bf215546Sopenharmony_cibi_get_src_slot(bi_registers *regs, unsigned reg)
291bf215546Sopenharmony_ci{
292bf215546Sopenharmony_ci        if (regs->slot[0] == reg && regs->enabled[0])
293bf215546Sopenharmony_ci                return BIFROST_SRC_PORT0;
294bf215546Sopenharmony_ci        else if (regs->slot[1] == reg && regs->enabled[1])
295bf215546Sopenharmony_ci                return BIFROST_SRC_PORT1;
296bf215546Sopenharmony_ci        else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
297bf215546Sopenharmony_ci                return BIFROST_SRC_PORT2;
298bf215546Sopenharmony_ci        else
299bf215546Sopenharmony_ci                unreachable("Tried to access register with no port");
300bf215546Sopenharmony_ci}
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_cistatic inline enum bifrost_packed_src
303bf215546Sopenharmony_cibi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)
304bf215546Sopenharmony_ci{
305bf215546Sopenharmony_ci        if (!ins)
306bf215546Sopenharmony_ci                return 0;
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci        bi_index src = ins->src[s];
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci        if (src.type == BI_INDEX_REGISTER)
311bf215546Sopenharmony_ci                return bi_get_src_slot(regs, src.value);
312bf215546Sopenharmony_ci        else if (src.type == BI_INDEX_PASS)
313bf215546Sopenharmony_ci                return src.value;
314bf215546Sopenharmony_ci        else {
315bf215546Sopenharmony_ci                /* TODO make safer */
316bf215546Sopenharmony_ci                return BIFROST_SRC_STAGE;
317bf215546Sopenharmony_ci        }
318bf215546Sopenharmony_ci}
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_cistatic struct bi_packed_tuple
321bf215546Sopenharmony_cibi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)
322bf215546Sopenharmony_ci{
323bf215546Sopenharmony_ci        bi_assign_slots(tuple, prev);
324bf215546Sopenharmony_ci        tuple->regs.fau_idx = tuple->fau_idx;
325bf215546Sopenharmony_ci        tuple->regs.first_instruction = first_tuple;
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci        bi_flip_slots(&tuple->regs);
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci        bool sr_read = tuple->add &&
330bf215546Sopenharmony_ci                bi_opcode_props[(tuple->add)->op].sr_read;
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci        uint64_t reg = bi_pack_registers(tuple->regs);
333bf215546Sopenharmony_ci        uint64_t fma = bi_pack_fma(tuple->fma,
334bf215546Sopenharmony_ci                        bi_get_src_new(tuple->fma, &tuple->regs, 0),
335bf215546Sopenharmony_ci                        bi_get_src_new(tuple->fma, &tuple->regs, 1),
336bf215546Sopenharmony_ci                        bi_get_src_new(tuple->fma, &tuple->regs, 2),
337bf215546Sopenharmony_ci                        bi_get_src_new(tuple->fma, &tuple->regs, 3));
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci        uint64_t add = bi_pack_add(tuple->add,
340bf215546Sopenharmony_ci                        bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0),
341bf215546Sopenharmony_ci                        bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1),
342bf215546Sopenharmony_ci                        bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2),
343bf215546Sopenharmony_ci                        0);
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci        if (tuple->add) {
346bf215546Sopenharmony_ci                bi_instr *add = tuple->add;
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci                bool sr_write = bi_opcode_props[add->op].sr_write &&
349bf215546Sopenharmony_ci                        !bi_is_null(add->dest[0]);
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci                if (sr_read && !bi_is_null(add->src[0])) {
352bf215546Sopenharmony_ci                        assert(add->src[0].type == BI_INDEX_REGISTER);
353bf215546Sopenharmony_ci                        clause->staging_register = add->src[0].value;
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci                        if (sr_write)
356bf215546Sopenharmony_ci                                assert(bi_is_equiv(add->src[0], add->dest[0]));
357bf215546Sopenharmony_ci                } else if (sr_write) {
358bf215546Sopenharmony_ci                        assert(add->dest[0].type == BI_INDEX_REGISTER);
359bf215546Sopenharmony_ci                        clause->staging_register = add->dest[0].value;
360bf215546Sopenharmony_ci                }
361bf215546Sopenharmony_ci        }
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_ci        struct bi_packed_tuple packed = {
364bf215546Sopenharmony_ci                .lo = reg | (fma << 35) | ((add & 0b111111) << 58),
365bf215546Sopenharmony_ci                .hi = add >> 6
366bf215546Sopenharmony_ci        };
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci        return packed;
369bf215546Sopenharmony_ci}
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci/* A block contains at most one PC-relative constant, from a terminal branch.
372bf215546Sopenharmony_ci * Find the last instruction and if it is a relative branch, fix up the
373bf215546Sopenharmony_ci * PC-relative constant to contain the absolute offset. This occurs at pack
374bf215546Sopenharmony_ci * time instead of schedule time because the number of quadwords between each
375bf215546Sopenharmony_ci * block is not known until after all other passes have finished.
376bf215546Sopenharmony_ci */
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_cistatic void
379bf215546Sopenharmony_cibi_assign_branch_offset(bi_context *ctx, bi_block *block)
380bf215546Sopenharmony_ci{
381bf215546Sopenharmony_ci        if (list_is_empty(&block->clauses))
382bf215546Sopenharmony_ci                return;
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci        bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);
385bf215546Sopenharmony_ci        bi_instr *br = bi_last_instr_in_clause(clause);
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci        if (!br->branch_target)
388bf215546Sopenharmony_ci                return;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci        /* Put it in the high place */
391bf215546Sopenharmony_ci        int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
392bf215546Sopenharmony_ci        int32_t bytes = qwords * 16;
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci        /* Copy so we can toy with the sign without undefined behaviour */
395bf215546Sopenharmony_ci        uint32_t raw = 0;
396bf215546Sopenharmony_ci        memcpy(&raw, &bytes, sizeof(raw));
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci        /* Clear off top bits for A1/B1 bits */
399bf215546Sopenharmony_ci        raw &= ~0xF0000000;
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci        /* Put in top 32-bits */
402bf215546Sopenharmony_ci        assert(clause->pcrel_idx < 8);
403bf215546Sopenharmony_ci        clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull;
404bf215546Sopenharmony_ci}
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_cistatic void
407bf215546Sopenharmony_cibi_pack_constants(unsigned tuple_count, uint64_t *constants,
408bf215546Sopenharmony_ci                unsigned word_idx, unsigned constant_words, bool ec0_packed,
409bf215546Sopenharmony_ci                struct util_dynarray *emission)
410bf215546Sopenharmony_ci{
411bf215546Sopenharmony_ci        unsigned index = (word_idx << 1) + ec0_packed;
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci        /* Do more constants follow */
414bf215546Sopenharmony_ci        bool more = (word_idx + 1) < constant_words;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci        /* Indexed first by tuple count and second by constant word number,
417bf215546Sopenharmony_ci         * indicates the position in the clause */
418bf215546Sopenharmony_ci        unsigned pos_lookup[8][3] = {
419bf215546Sopenharmony_ci                { 0 },
420bf215546Sopenharmony_ci                { 1 },
421bf215546Sopenharmony_ci                { 3 },
422bf215546Sopenharmony_ci                { 2, 5 },
423bf215546Sopenharmony_ci                { 4, 8 },
424bf215546Sopenharmony_ci                { 7, 11, 14 },
425bf215546Sopenharmony_ci                { 6, 10, 13 },
426bf215546Sopenharmony_ci                { 9, 12 }
427bf215546Sopenharmony_ci        };
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci        /* Compute the pos, and check everything is reasonable */
430bf215546Sopenharmony_ci        assert((tuple_count - 1) < 8);
431bf215546Sopenharmony_ci        assert(word_idx < 3);
432bf215546Sopenharmony_ci        unsigned pos = pos_lookup[tuple_count - 1][word_idx];
433bf215546Sopenharmony_ci        assert(pos != 0 || (tuple_count == 1 && word_idx == 0));
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci        struct bifrost_fmt_constant quad = {
436bf215546Sopenharmony_ci                .pos = pos,
437bf215546Sopenharmony_ci                .tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,
438bf215546Sopenharmony_ci                .imm_1 = constants[index + 0] >> 4,
439bf215546Sopenharmony_ci                .imm_2 = constants[index + 1] >> 4,
440bf215546Sopenharmony_ci        };
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci        util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
443bf215546Sopenharmony_ci}
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_ciuint8_t
446bf215546Sopenharmony_cibi_pack_literal(enum bi_clause_subword literal)
447bf215546Sopenharmony_ci{
448bf215546Sopenharmony_ci        assert(literal >= BI_CLAUSE_SUBWORD_LITERAL_0);
449bf215546Sopenharmony_ci        assert(literal <= BI_CLAUSE_SUBWORD_LITERAL_7);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci        return (literal - BI_CLAUSE_SUBWORD_LITERAL_0);
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_cistatic inline uint8_t
455bf215546Sopenharmony_cibi_clause_upper(unsigned val,
456bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
457bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count)
458bf215546Sopenharmony_ci{
459bf215546Sopenharmony_ci        assert(val < tuple_count);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci        /* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */
462bf215546Sopenharmony_ci        struct bi_packed_tuple tuple = tuples[val];
463bf215546Sopenharmony_ci        return (tuple.hi >> 11);
464bf215546Sopenharmony_ci}
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ciuint8_t
467bf215546Sopenharmony_cibi_pack_upper(enum bi_clause_subword upper,
468bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
469bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count)
470bf215546Sopenharmony_ci{
471bf215546Sopenharmony_ci        assert(upper >= BI_CLAUSE_SUBWORD_UPPER_0);
472bf215546Sopenharmony_ci        assert(upper <= BI_CLAUSE_SUBWORD_UPPER_7);
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci        return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples,
475bf215546Sopenharmony_ci                        tuple_count);
476bf215546Sopenharmony_ci}
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ciuint64_t
479bf215546Sopenharmony_cibi_pack_tuple_bits(enum bi_clause_subword idx,
480bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
481bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
482bf215546Sopenharmony_ci                unsigned offset, unsigned nbits)
483bf215546Sopenharmony_ci{
484bf215546Sopenharmony_ci        assert(idx >= BI_CLAUSE_SUBWORD_TUPLE_0);
485bf215546Sopenharmony_ci        assert(idx <= BI_CLAUSE_SUBWORD_TUPLE_7);
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci        unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0);
488bf215546Sopenharmony_ci        assert(val < tuple_count);
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci        struct bi_packed_tuple tuple = tuples[val];
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci        assert(offset + nbits < 78);
493bf215546Sopenharmony_ci        assert(nbits <= 64);
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci        /* (X >> start) & m
496bf215546Sopenharmony_ci         * = (((hi << 64) | lo) >> start) & m
497bf215546Sopenharmony_ci         * = (((hi << 64) >> start) | (lo >> start)) & m
498bf215546Sopenharmony_ci         * = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64
499bf215546Sopenharmony_ci         *   { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64
500bf215546Sopenharmony_ci         * = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64
501bf215546Sopenharmony_ci         *   { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64
502bf215546Sopenharmony_ci         *
503bf215546Sopenharmony_ci         * By setting m = 2^64 - 1, we justify doing the respective shifts as
504bf215546Sopenharmony_ci         * 64-bit integers. Zero special cased to avoid undefined behaviour.
505bf215546Sopenharmony_ci         */
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci        uint64_t lo = (tuple.lo >> offset);
508bf215546Sopenharmony_ci        uint64_t hi = (offset == 0) ? 0
509bf215546Sopenharmony_ci                : (offset > 64) ? (tuple.hi >> (offset - 64))
510bf215546Sopenharmony_ci                : (tuple.hi << (64 - offset));
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci        return (lo | hi) & ((1ULL << nbits) - 1);
513bf215546Sopenharmony_ci}
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_cistatic inline uint16_t
516bf215546Sopenharmony_cibi_pack_lu(enum bi_clause_subword word,
517bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
518bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count)
519bf215546Sopenharmony_ci{
520bf215546Sopenharmony_ci        return (word >= BI_CLAUSE_SUBWORD_UPPER_0) ?
521bf215546Sopenharmony_ci                bi_pack_upper(word, tuples, tuple_count) :
522bf215546Sopenharmony_ci                bi_pack_literal(word);
523bf215546Sopenharmony_ci}
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ciuint8_t
526bf215546Sopenharmony_cibi_pack_sync(enum bi_clause_subword t1,
527bf215546Sopenharmony_ci             enum bi_clause_subword t2,
528bf215546Sopenharmony_ci             enum bi_clause_subword t3,
529bf215546Sopenharmony_ci             struct bi_packed_tuple *tuples,
530bf215546Sopenharmony_ci             ASSERTED unsigned tuple_count,
531bf215546Sopenharmony_ci             bool z)
532bf215546Sopenharmony_ci{
533bf215546Sopenharmony_ci        uint8_t sync =
534bf215546Sopenharmony_ci                (bi_pack_lu(t3, tuples, tuple_count) << 0) |
535bf215546Sopenharmony_ci                (bi_pack_lu(t2, tuples, tuple_count) << 3);
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci        if (t1 == BI_CLAUSE_SUBWORD_Z)
538bf215546Sopenharmony_ci                sync |= z << 6;
539bf215546Sopenharmony_ci        else
540bf215546Sopenharmony_ci                sync |= bi_pack_literal(t1) << 6;
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci        return sync;
543bf215546Sopenharmony_ci}
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_cistatic inline uint64_t
546bf215546Sopenharmony_cibi_pack_t_ec(enum bi_clause_subword word,
547bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
548bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
549bf215546Sopenharmony_ci                uint64_t ec0)
550bf215546Sopenharmony_ci{
551bf215546Sopenharmony_ci        if (word == BI_CLAUSE_SUBWORD_CONSTANT)
552bf215546Sopenharmony_ci                return ec0;
553bf215546Sopenharmony_ci        else
554bf215546Sopenharmony_ci                return bi_pack_tuple_bits(word, tuples, tuple_count, 0, 60);
555bf215546Sopenharmony_ci}
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_cistatic uint32_t
558bf215546Sopenharmony_cibi_pack_subwords_56(enum bi_clause_subword t,
559bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
560bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
561bf215546Sopenharmony_ci                uint64_t header, uint64_t ec0,
562bf215546Sopenharmony_ci                unsigned tuple_subword)
563bf215546Sopenharmony_ci{
564bf215546Sopenharmony_ci        switch (t) {
565bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_HEADER:
566bf215546Sopenharmony_ci                return (header & ((1 << 30) - 1));
567bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_RESERVED:
568bf215546Sopenharmony_ci                return 0;
569bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_CONSTANT:
570bf215546Sopenharmony_ci                return (ec0 >> 15) & ((1 << 30) - 1);
571bf215546Sopenharmony_ci        default:
572bf215546Sopenharmony_ci                return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 30);
573bf215546Sopenharmony_ci        }
574bf215546Sopenharmony_ci}
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_cistatic uint16_t
577bf215546Sopenharmony_cibi_pack_subword(enum bi_clause_subword t, unsigned format,
578bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
579bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
580bf215546Sopenharmony_ci                uint64_t header, uint64_t ec0, unsigned m0,
581bf215546Sopenharmony_ci                unsigned tuple_subword)
582bf215546Sopenharmony_ci{
583bf215546Sopenharmony_ci        switch (t) {
584bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_HEADER:
585bf215546Sopenharmony_ci                return header >> 30;
586bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_M:
587bf215546Sopenharmony_ci                return m0;
588bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_CONSTANT:
589bf215546Sopenharmony_ci                return (format == 5 || format == 10) ?
590bf215546Sopenharmony_ci                        (ec0 & ((1 << 15) - 1)) :
591bf215546Sopenharmony_ci                        (ec0 >> (15 + 30));
592bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_UPPER_23:
593bf215546Sopenharmony_ci                return (bi_clause_upper(2, tuples, tuple_count) << 12) |
594bf215546Sopenharmony_ci                        (bi_clause_upper(3, tuples, tuple_count) << 9);
595bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_UPPER_56:
596bf215546Sopenharmony_ci                return (bi_clause_upper(5, tuples, tuple_count) << 12) |
597bf215546Sopenharmony_ci                        (bi_clause_upper(6, tuples, tuple_count) << 9);
598bf215546Sopenharmony_ci        case BI_CLAUSE_SUBWORD_UPPER_0 ... BI_CLAUSE_SUBWORD_UPPER_7:
599bf215546Sopenharmony_ci                return bi_pack_upper(t, tuples, tuple_count) << 12;
600bf215546Sopenharmony_ci        default:
601bf215546Sopenharmony_ci                return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 15);
602bf215546Sopenharmony_ci        }
603bf215546Sopenharmony_ci}
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci/* EC0 is 60-bits (bottom 4 already shifted off) */
606bf215546Sopenharmony_civoid
607bf215546Sopenharmony_cibi_pack_format(struct util_dynarray *emission,
608bf215546Sopenharmony_ci                unsigned index,
609bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
610bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
611bf215546Sopenharmony_ci                uint64_t header, uint64_t ec0,
612bf215546Sopenharmony_ci                unsigned m0, bool z)
613bf215546Sopenharmony_ci{
614bf215546Sopenharmony_ci        struct bi_clause_format format = bi_clause_formats[index];
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci        uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3,
617bf215546Sopenharmony_ci                        tuples, tuple_count, z);
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci        uint64_t s0_s3 = bi_pack_t_ec(format.s0_s3, tuples, tuple_count, ec0);
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci        uint16_t s4 = bi_pack_subword(format.s4, format.format, tuples, tuple_count, header, ec0, m0, 4);
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci        uint32_t s5_s6 = bi_pack_subwords_56(format.s5_s6,
624bf215546Sopenharmony_ci                        tuples, tuple_count, header, ec0,
625bf215546Sopenharmony_ci                        (format.format == 2 || format.format == 7) ? 0 : 3);
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci        uint64_t s7 = bi_pack_subword(format.s7, format.format, tuples, tuple_count, header, ec0, m0, 2);
628bf215546Sopenharmony_ci
629bf215546Sopenharmony_ci        /* Now that subwords are packed, split into 64-bit halves and emit */
630bf215546Sopenharmony_ci        uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8);
631bf215546Sopenharmony_ci        uint64_t hi = (s0_s3 >> 56) | ((uint64_t) s4 << 4) | ((uint64_t) s5_s6 << 19) | ((uint64_t) s7 << 49);
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci        util_dynarray_append(emission, uint64_t, lo);
634bf215546Sopenharmony_ci        util_dynarray_append(emission, uint64_t, hi);
635bf215546Sopenharmony_ci}
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_cistatic void
638bf215546Sopenharmony_cibi_pack_clause(bi_context *ctx, bi_clause *clause,
639bf215546Sopenharmony_ci                bi_clause *next_1, bi_clause *next_2,
640bf215546Sopenharmony_ci                struct util_dynarray *emission, gl_shader_stage stage)
641bf215546Sopenharmony_ci{
642bf215546Sopenharmony_ci        struct bi_packed_tuple ins[8] = { 0 };
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci        for (unsigned i = 0; i < clause->tuple_count; ++i) {
645bf215546Sopenharmony_ci                unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1;
646bf215546Sopenharmony_ci                ins[i] = bi_pack_tuple(clause, &clause->tuples[i],
647bf215546Sopenharmony_ci                                &clause->tuples[prev], i == 0, stage);
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci                bi_instr *add = clause->tuples[i].add;
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_ci                /* Different GPUs support different forms of the CLPER.i32
652bf215546Sopenharmony_ci                 * instruction. Check we use the right one for the target.
653bf215546Sopenharmony_ci                 */
654bf215546Sopenharmony_ci                if (add && add->op == BI_OPCODE_CLPER_OLD_I32)
655bf215546Sopenharmony_ci                        assert(ctx->quirks & BIFROST_LIMITED_CLPER);
656bf215546Sopenharmony_ci                else if (add && add->op == BI_OPCODE_CLPER_I32)
657bf215546Sopenharmony_ci                        assert(!(ctx->quirks & BIFROST_LIMITED_CLPER));
658bf215546Sopenharmony_ci        }
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci        bool ec0_packed = bi_ec0_packed(clause->tuple_count);
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci        if (ec0_packed)
663bf215546Sopenharmony_ci                clause->constant_count = MAX2(clause->constant_count, 1);
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci        unsigned constant_quads =
666bf215546Sopenharmony_ci                DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci        uint64_t header = bi_pack_header(clause, next_1, next_2);
669bf215546Sopenharmony_ci        uint64_t ec0 = (clause->constants[0] >> 4);
670bf215546Sopenharmony_ci        unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci        unsigned counts[8] = {
673bf215546Sopenharmony_ci                1, 2, 3, 3, 4, 5, 5, 6
674bf215546Sopenharmony_ci        };
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_ci        unsigned indices[8][6] = {
677bf215546Sopenharmony_ci                { 1 },
678bf215546Sopenharmony_ci                { 0, 2 },
679bf215546Sopenharmony_ci                { 0, 3, 4 },
680bf215546Sopenharmony_ci                { 0, 3, 6 },
681bf215546Sopenharmony_ci                { 0, 3, 7, 8 },
682bf215546Sopenharmony_ci                { 0, 3, 5, 9, 10 },
683bf215546Sopenharmony_ci                { 0, 3, 5, 9, 11 },
684bf215546Sopenharmony_ci                { 0, 3, 5, 9, 12, 13 },
685bf215546Sopenharmony_ci        };
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci        unsigned count = counts[clause->tuple_count - 1];
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_ci        for (unsigned pos = 0; pos < count; ++pos) {
690bf215546Sopenharmony_ci                ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos];
691bf215546Sopenharmony_ci                assert(bi_clause_formats[idx].pos == pos);
692bf215546Sopenharmony_ci                assert((bi_clause_formats[idx].tag_1 == BI_CLAUSE_SUBWORD_Z) ==
693bf215546Sopenharmony_ci                                (pos == count - 1));
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci                /* Whether to end the clause immediately after the last tuple */
696bf215546Sopenharmony_ci                bool z = (constant_quads == 0);
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci                bi_pack_format(emission, indices[clause->tuple_count - 1][pos],
699bf215546Sopenharmony_ci                                ins, clause->tuple_count, header, ec0, m0,
700bf215546Sopenharmony_ci                                z);
701bf215546Sopenharmony_ci        }
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci        /* Pack the remaining constants */
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci        for (unsigned pos = 0; pos < constant_quads; ++pos) {
706bf215546Sopenharmony_ci                bi_pack_constants(clause->tuple_count, clause->constants,
707bf215546Sopenharmony_ci                                pos, constant_quads, ec0_packed, emission);
708bf215546Sopenharmony_ci        }
709bf215546Sopenharmony_ci}
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_cistatic void
712bf215546Sopenharmony_cibi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
713bf215546Sopenharmony_ci                          const bi_clause *clause)
714bf215546Sopenharmony_ci{
715bf215546Sopenharmony_ci        /* No need to collect return addresses when we're in a blend shader. */
716bf215546Sopenharmony_ci        if (ctx->inputs->is_blend)
717bf215546Sopenharmony_ci                return;
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci        const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1];
720bf215546Sopenharmony_ci        const bi_instr *ins = tuple->add;
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci        if (!ins || ins->op != BI_OPCODE_BLEND)
723bf215546Sopenharmony_ci                return;
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci        unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
727bf215546Sopenharmony_ci        assert(loc < ARRAY_SIZE(ctx->info.bifrost->blend));
728bf215546Sopenharmony_ci        assert(!ctx->info.bifrost->blend[loc].return_offset);
729bf215546Sopenharmony_ci        ctx->info.bifrost->blend[loc].return_offset =
730bf215546Sopenharmony_ci                util_dynarray_num_elements(emission, uint8_t);
731bf215546Sopenharmony_ci        assert(!(ctx->info.bifrost->blend[loc].return_offset & 0x7));
732bf215546Sopenharmony_ci}
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ciunsigned
735bf215546Sopenharmony_cibi_pack(bi_context *ctx, struct util_dynarray *emission)
736bf215546Sopenharmony_ci{
737bf215546Sopenharmony_ci        unsigned previous_size = emission->size;
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci        bi_foreach_block(ctx, block) {
740bf215546Sopenharmony_ci                bi_assign_branch_offset(ctx, block);
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci                bi_foreach_clause_in_block(block, clause) {
743bf215546Sopenharmony_ci                        bool is_last = (clause->link.next == &block->clauses);
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci                        /* Get the succeeding clauses, either two successors of
746bf215546Sopenharmony_ci                         * the block for the last clause in the block or just
747bf215546Sopenharmony_ci                         * the next clause within the block */
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci                        bi_clause *next = NULL, *next_2 = NULL;
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci                        if (is_last) {
752bf215546Sopenharmony_ci                                next = bi_next_clause(ctx, block->successors[0], NULL);
753bf215546Sopenharmony_ci                                next_2 = bi_next_clause(ctx, block->successors[1], NULL);
754bf215546Sopenharmony_ci                        } else {
755bf215546Sopenharmony_ci                                next = bi_next_clause(ctx, block, clause);
756bf215546Sopenharmony_ci                        }
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci                        previous_size = emission->size;
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci                        bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci                        if (!is_last)
764bf215546Sopenharmony_ci                                bi_collect_blend_ret_addr(ctx, emission, clause);
765bf215546Sopenharmony_ci                }
766bf215546Sopenharmony_ci        }
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci        return emission->size - previous_size;
769bf215546Sopenharmony_ci}
770