1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com>
3bf215546Sopenharmony_ci * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com>
4bf215546Sopenharmony_ci * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
12bf215546Sopenharmony_ci *
13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
15bf215546Sopenharmony_ci * Software.
16bf215546Sopenharmony_ci *
17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23bf215546Sopenharmony_ci * SOFTWARE.
24bf215546Sopenharmony_ci */
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include <stdbool.h>
27bf215546Sopenharmony_ci#include <stdio.h>
28bf215546Sopenharmony_ci#include <stdint.h>
29bf215546Sopenharmony_ci#include <assert.h>
30bf215546Sopenharmony_ci#include <inttypes.h>
31bf215546Sopenharmony_ci#include <string.h>
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#include "bifrost.h"
34bf215546Sopenharmony_ci#include "disassemble.h"
35bf215546Sopenharmony_ci#include "bi_print_common.h"
36bf215546Sopenharmony_ci#include "util/compiler.h"
37bf215546Sopenharmony_ci#include "util/macros.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci// return bits (high, lo]
40bf215546Sopenharmony_cistatic uint64_t bits(uint32_t word, unsigned lo, unsigned high)
41bf215546Sopenharmony_ci{
42bf215546Sopenharmony_ci        if (high == 32)
43bf215546Sopenharmony_ci                return word >> lo;
44bf215546Sopenharmony_ci        return (word & ((1 << high) - 1)) >> lo;
45bf215546Sopenharmony_ci}
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci// each of these structs represents an instruction that's dispatched in one
48bf215546Sopenharmony_ci// cycle. Note that these instructions are packed in funny ways within the
49bf215546Sopenharmony_ci// clause, hence the need for a separate struct.
50bf215546Sopenharmony_cistruct bifrost_alu_inst {
51bf215546Sopenharmony_ci        uint32_t fma_bits;
52bf215546Sopenharmony_ci        uint32_t add_bits;
53bf215546Sopenharmony_ci        uint64_t reg_bits;
54bf215546Sopenharmony_ci};
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_cistatic unsigned get_reg0(struct bifrost_regs regs)
57bf215546Sopenharmony_ci{
58bf215546Sopenharmony_ci        if (regs.ctrl == 0)
59bf215546Sopenharmony_ci                return regs.reg0 | ((regs.reg1 & 0x1) << 5);
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci        return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
62bf215546Sopenharmony_ci}
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistatic unsigned get_reg1(struct bifrost_regs regs)
65bf215546Sopenharmony_ci{
66bf215546Sopenharmony_ci        return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci// this represents the decoded version of the ctrl register field.
70bf215546Sopenharmony_cistruct bifrost_reg_ctrl {
71bf215546Sopenharmony_ci        bool read_reg0;
72bf215546Sopenharmony_ci        bool read_reg1;
73bf215546Sopenharmony_ci        struct bifrost_reg_ctrl_23 slot23;
74bf215546Sopenharmony_ci};
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_cistatic void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci        fprintf(fp, "ds(%u) ", header.dependency_slot);
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci        if (header.staging_barrier)
81bf215546Sopenharmony_ci                fprintf(fp, "osrb ");
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci        fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci        if (header.suppress_inf)
86bf215546Sopenharmony_ci                fprintf(fp, "inf_suppress ");
87bf215546Sopenharmony_ci        if (header.suppress_nan)
88bf215546Sopenharmony_ci                fprintf(fp, "nan_suppress ");
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci        if (header.flush_to_zero == BIFROST_FTZ_DX11)
91bf215546Sopenharmony_ci                fprintf(fp, "ftz_dx11 ");
92bf215546Sopenharmony_ci        else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
93bf215546Sopenharmony_ci                fprintf(fp, "ftz_hsa ");
94bf215546Sopenharmony_ci        if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
95bf215546Sopenharmony_ci                fprintf(fp, "ftz_au ");
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci        assert(!header.zero1);
98bf215546Sopenharmony_ci        assert(!header.zero2);
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci        if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
101bf215546Sopenharmony_ci                fprintf(fp, "fpe_ts ");
102bf215546Sopenharmony_ci        else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
103bf215546Sopenharmony_ci                fprintf(fp, "fpe_pd ");
104bf215546Sopenharmony_ci        else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
105bf215546Sopenharmony_ci                fprintf(fp, "fpe_psqr ");
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci        if (header.message_type)
108bf215546Sopenharmony_ci                fprintf(fp, "%s ", bi_message_type_name(header.message_type));
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci        if (header.terminate_discarded_threads)
111bf215546Sopenharmony_ci                fprintf(fp, "td ");
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci        if (header.next_clause_prefetch)
114bf215546Sopenharmony_ci                fprintf(fp, "ncph ");
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci        if (header.next_message_type)
117bf215546Sopenharmony_ci                fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
118bf215546Sopenharmony_ci        if (header.dependency_wait != 0) {
119bf215546Sopenharmony_ci                fprintf(fp, "dwb(");
120bf215546Sopenharmony_ci                bool first = true;
121bf215546Sopenharmony_ci                for (unsigned i = 0; i < 8; i++) {
122bf215546Sopenharmony_ci                        if (header.dependency_wait & (1 << i)) {
123bf215546Sopenharmony_ci                                if (!first) {
124bf215546Sopenharmony_ci                                        fprintf(fp, ", ");
125bf215546Sopenharmony_ci                                }
126bf215546Sopenharmony_ci                                fprintf(fp, "%u", i);
127bf215546Sopenharmony_ci                                first = false;
128bf215546Sopenharmony_ci                        }
129bf215546Sopenharmony_ci                }
130bf215546Sopenharmony_ci                fprintf(fp, ") ");
131bf215546Sopenharmony_ci        }
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci        fprintf(fp, "\n");
134bf215546Sopenharmony_ci}
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_cistatic struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
137bf215546Sopenharmony_ci{
138bf215546Sopenharmony_ci        struct bifrost_reg_ctrl decoded = {};
139bf215546Sopenharmony_ci        unsigned ctrl;
140bf215546Sopenharmony_ci        if (regs.ctrl == 0) {
141bf215546Sopenharmony_ci                ctrl = regs.reg1 >> 2;
142bf215546Sopenharmony_ci                decoded.read_reg0 = !(regs.reg1 & 0x2);
143bf215546Sopenharmony_ci                decoded.read_reg1 = false;
144bf215546Sopenharmony_ci        } else {
145bf215546Sopenharmony_ci                ctrl = regs.ctrl;
146bf215546Sopenharmony_ci                decoded.read_reg0 = decoded.read_reg1 = true;
147bf215546Sopenharmony_ci        }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci        /* Modify control based on state */
150bf215546Sopenharmony_ci        if (first)
151bf215546Sopenharmony_ci                ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
152bf215546Sopenharmony_ci        else if (regs.reg2 == regs.reg3)
153bf215546Sopenharmony_ci                ctrl += 16;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci        decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
156bf215546Sopenharmony_ci        ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };
157bf215546Sopenharmony_ci        assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci        return decoded;
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
163bf215546Sopenharmony_ci{
164bf215546Sopenharmony_ci        struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
165bf215546Sopenharmony_ci        fprintf(fp, "    # ");
166bf215546Sopenharmony_ci        if (ctrl.read_reg0)
167bf215546Sopenharmony_ci                fprintf(fp, "slot 0: r%u ", get_reg0(srcs));
168bf215546Sopenharmony_ci        if (ctrl.read_reg1)
169bf215546Sopenharmony_ci                fprintf(fp, "slot 1: r%u ", get_reg1(srcs));
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci        const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci        if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
174bf215546Sopenharmony_ci                fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2);
175bf215546Sopenharmony_ci        else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
176bf215546Sopenharmony_ci                fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2);
177bf215546Sopenharmony_ci        else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
178bf215546Sopenharmony_ci                fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2);
179bf215546Sopenharmony_ci        else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
180bf215546Sopenharmony_ci                fprintf(fp, "slot 2: r%u (read) ", srcs.reg2);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci        if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
183bf215546Sopenharmony_ci                fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma);
184bf215546Sopenharmony_ci        else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
185bf215546Sopenharmony_ci                fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma);
186bf215546Sopenharmony_ci        else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
187bf215546Sopenharmony_ci                fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma);
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci        if (srcs.fau_idx)
190bf215546Sopenharmony_ci                fprintf(fp, "fau %X ", srcs.fau_idx);
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci        fprintf(fp, "\n");
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_cistatic void
196bf215546Sopenharmony_cibi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci        if (op == BIFROST_OP_WRITE_LO)
199bf215546Sopenharmony_ci                fprintf(fp, ".h0");
200bf215546Sopenharmony_ci        else if (op == BIFROST_OP_WRITE_HI)
201bf215546Sopenharmony_ci                fprintf(fp, ".h1");
202bf215546Sopenharmony_ci}
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_civoid
205bf215546Sopenharmony_cibi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
206bf215546Sopenharmony_ci{
207bf215546Sopenharmony_ci    /* If this is the last instruction, next_regs points to the first reg entry. */
208bf215546Sopenharmony_ci    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
209bf215546Sopenharmony_ci    if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
210bf215546Sopenharmony_ci        fprintf(fp, "r%u:t0", next_regs->reg2);
211bf215546Sopenharmony_ci        bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
212bf215546Sopenharmony_ci    } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
213bf215546Sopenharmony_ci        fprintf(fp, "r%u:t0", next_regs->reg3);
214bf215546Sopenharmony_ci        bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
215bf215546Sopenharmony_ci    } else
216bf215546Sopenharmony_ci        fprintf(fp, "t0");
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_civoid
220bf215546Sopenharmony_cibi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
221bf215546Sopenharmony_ci{
222bf215546Sopenharmony_ci    /* If this is the last instruction, next_regs points to the first reg entry. */
223bf215546Sopenharmony_ci    struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci    if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
226bf215546Sopenharmony_ci        fprintf(fp, "r%u:t1", next_regs->reg3);
227bf215546Sopenharmony_ci        bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
228bf215546Sopenharmony_ci    } else
229bf215546Sopenharmony_ci        fprintf(fp, "t1");
230bf215546Sopenharmony_ci}
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_cistatic void dump_const_imm(FILE *fp, uint32_t imm)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci        union {
235bf215546Sopenharmony_ci                float f;
236bf215546Sopenharmony_ci                uint32_t i;
237bf215546Sopenharmony_ci        } fi;
238bf215546Sopenharmony_ci        fi.i = imm;
239bf215546Sopenharmony_ci        fprintf(fp, "0x%08x /* %f */", imm, fi.f);
240bf215546Sopenharmony_ci}
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_cistatic void
243bf215546Sopenharmony_cidump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset, enum bi_constmod mod, bool high32)
244bf215546Sopenharmony_ci{
245bf215546Sopenharmony_ci        if (mod == BI_CONSTMOD_PC_HI && !high32) {
246bf215546Sopenharmony_ci                dump_const_imm(fp, imm);
247bf215546Sopenharmony_ci                return;
248bf215546Sopenharmony_ci        }
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci        /* 60-bit sign-extend */
251bf215546Sopenharmony_ci        uint64_t zx64 = (imm << 4);
252bf215546Sopenharmony_ci        int64_t sx64 = zx64;
253bf215546Sopenharmony_ci        sx64 >>= 4;
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci        /* 28-bit sign extend x 2 */
256bf215546Sopenharmony_ci        uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) };
257bf215546Sopenharmony_ci        uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 };
258bf215546Sopenharmony_ci        int32_t sx32[2] = { zx32[0], zx32[1] };
259bf215546Sopenharmony_ci        sx32[0] >>= 4;
260bf215546Sopenharmony_ci        sx32[1] >>= 4;
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci        int64_t offs = 0;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci        switch (mod) {
265bf215546Sopenharmony_ci        case BI_CONSTMOD_PC_LO:
266bf215546Sopenharmony_ci                offs = sx64;
267bf215546Sopenharmony_ci                break;
268bf215546Sopenharmony_ci        case BI_CONSTMOD_PC_HI:
269bf215546Sopenharmony_ci                offs = sx32[1];
270bf215546Sopenharmony_ci                break;
271bf215546Sopenharmony_ci        case BI_CONSTMOD_PC_LO_HI:
272bf215546Sopenharmony_ci                offs = sx32[high32];
273bf215546Sopenharmony_ci                break;
274bf215546Sopenharmony_ci        default:
275bf215546Sopenharmony_ci                unreachable("Invalid PC modifier");
276bf215546Sopenharmony_ci        }
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci        assert((offs & 15) == 0);
279bf215546Sopenharmony_ci        fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16));
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci        if (mod == BI_CONSTMOD_PC_LO && high32)
282bf215546Sopenharmony_ci                fprintf(fp, " >> 32");
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci        /* While technically in spec, referencing the current clause as (pc +
285bf215546Sopenharmony_ci         * 0) likely indicates an unintended infinite loop  */
286bf215546Sopenharmony_ci        if (offs == 0)
287bf215546Sopenharmony_ci                fprintf(fp, " /* XXX: likely an infinite loop */");
288bf215546Sopenharmony_ci}
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci/* Convert an index to an embedded constant in FAU-RAM to the index of the
291bf215546Sopenharmony_ci * embedded constant. No, it's not in order. Yes, really. */
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_cistatic unsigned
294bf215546Sopenharmony_ciconst_fau_to_idx(unsigned fau_value)
295bf215546Sopenharmony_ci{
296bf215546Sopenharmony_ci        unsigned map[8] = {
297bf215546Sopenharmony_ci                ~0, ~0, 4, 5, 0, 1, 2, 3
298bf215546Sopenharmony_ci        };
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci        assert(map[fau_value] < 6);
301bf215546Sopenharmony_ci        return map[fau_value];
302bf215546Sopenharmony_ci}
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_cistatic void dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool high32)
305bf215546Sopenharmony_ci{
306bf215546Sopenharmony_ci        if (srcs.fau_idx & 0x80) {
307bf215546Sopenharmony_ci                unsigned uniform = (srcs.fau_idx & 0x7f);
308bf215546Sopenharmony_ci                fprintf(fp, "u%u.w%u", uniform, high32);
309bf215546Sopenharmony_ci        } else if (srcs.fau_idx >= 0x20) {
310bf215546Sopenharmony_ci                unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
311bf215546Sopenharmony_ci                uint64_t imm = consts->raw[idx];
312bf215546Sopenharmony_ci                imm |= (srcs.fau_idx & 0xf);
313bf215546Sopenharmony_ci                if (consts->mods[idx] != BI_CONSTMOD_NONE)
314bf215546Sopenharmony_ci                        dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32);
315bf215546Sopenharmony_ci                else if (high32)
316bf215546Sopenharmony_ci                        dump_const_imm(fp, imm >> 32);
317bf215546Sopenharmony_ci                else
318bf215546Sopenharmony_ci                        dump_const_imm(fp, imm);
319bf215546Sopenharmony_ci        } else {
320bf215546Sopenharmony_ci                switch (srcs.fau_idx) {
321bf215546Sopenharmony_ci                case 0:
322bf215546Sopenharmony_ci                        fprintf(fp, "#0");
323bf215546Sopenharmony_ci                        break;
324bf215546Sopenharmony_ci                case 1:
325bf215546Sopenharmony_ci                        fprintf(fp, "lane_id");
326bf215546Sopenharmony_ci                        break;
327bf215546Sopenharmony_ci                case 2:
328bf215546Sopenharmony_ci                        fprintf(fp, "warp_id");
329bf215546Sopenharmony_ci                        break;
330bf215546Sopenharmony_ci                case 3:
331bf215546Sopenharmony_ci                        fprintf(fp, "core_id");
332bf215546Sopenharmony_ci                        break;
333bf215546Sopenharmony_ci                case 4:
334bf215546Sopenharmony_ci                        fprintf(fp, "framebuffer_size");
335bf215546Sopenharmony_ci                        break;
336bf215546Sopenharmony_ci                case 5:
337bf215546Sopenharmony_ci                        fprintf(fp, "atest_datum");
338bf215546Sopenharmony_ci                        break;
339bf215546Sopenharmony_ci                case 6:
340bf215546Sopenharmony_ci                        fprintf(fp, "sample");
341bf215546Sopenharmony_ci                        break;
342bf215546Sopenharmony_ci                case 8:
343bf215546Sopenharmony_ci                case 9:
344bf215546Sopenharmony_ci                case 10:
345bf215546Sopenharmony_ci                case 11:
346bf215546Sopenharmony_ci                case 12:
347bf215546Sopenharmony_ci                case 13:
348bf215546Sopenharmony_ci                case 14:
349bf215546Sopenharmony_ci                case 15:
350bf215546Sopenharmony_ci                        fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8);
351bf215546Sopenharmony_ci                        break;
352bf215546Sopenharmony_ci                default:
353bf215546Sopenharmony_ci                        fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx);
354bf215546Sopenharmony_ci                        break;
355bf215546Sopenharmony_ci                }
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci                if (high32)
358bf215546Sopenharmony_ci                        fprintf(fp, ".y");
359bf215546Sopenharmony_ci                else
360bf215546Sopenharmony_ci                        fprintf(fp, ".x");
361bf215546Sopenharmony_ci        }
362bf215546Sopenharmony_ci}
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_civoid
365bf215546Sopenharmony_cidump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA)
366bf215546Sopenharmony_ci{
367bf215546Sopenharmony_ci        switch (src) {
368bf215546Sopenharmony_ci        case 0:
369bf215546Sopenharmony_ci                fprintf(fp, "r%u", get_reg0(srcs));
370bf215546Sopenharmony_ci                break;
371bf215546Sopenharmony_ci        case 1:
372bf215546Sopenharmony_ci                fprintf(fp, "r%u", get_reg1(srcs));
373bf215546Sopenharmony_ci                break;
374bf215546Sopenharmony_ci        case 2:
375bf215546Sopenharmony_ci                fprintf(fp, "r%u", srcs.reg2);
376bf215546Sopenharmony_ci                break;
377bf215546Sopenharmony_ci        case 3:
378bf215546Sopenharmony_ci                if (isFMA)
379bf215546Sopenharmony_ci                        fprintf(fp, "#0");
380bf215546Sopenharmony_ci                else
381bf215546Sopenharmony_ci                        fprintf(fp, "t"); // i.e. the output of FMA this cycle
382bf215546Sopenharmony_ci                break;
383bf215546Sopenharmony_ci        case 4:
384bf215546Sopenharmony_ci                dump_fau_src(fp, srcs, branch_offset, consts, false);
385bf215546Sopenharmony_ci                break;
386bf215546Sopenharmony_ci        case 5:
387bf215546Sopenharmony_ci                dump_fau_src(fp, srcs, branch_offset, consts, true);
388bf215546Sopenharmony_ci                break;
389bf215546Sopenharmony_ci        case 6:
390bf215546Sopenharmony_ci                fprintf(fp, "t0");
391bf215546Sopenharmony_ci                break;
392bf215546Sopenharmony_ci        case 7:
393bf215546Sopenharmony_ci                fprintf(fp, "t1");
394bf215546Sopenharmony_ci                break;
395bf215546Sopenharmony_ci        }
396bf215546Sopenharmony_ci}
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci/* Tables for decoding M0, or if M0 == 7, M1 respectively.
399bf215546Sopenharmony_ci *
400bf215546Sopenharmony_ci * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)
401bf215546Sopenharmony_ci * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would
402bf215546Sopenharmony_ci * say this needs testing but no code should ever actually use this mode.
403bf215546Sopenharmony_ci */
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_cistatic const enum bi_constmod M1_table[7][2] = {
406bf215546Sopenharmony_ci        { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE },
407bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
408bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO },
409bf215546Sopenharmony_ci        { ~0, ~0 },
410bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE },
411bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI },
412bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
413bf215546Sopenharmony_ci};
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_cistatic const enum bi_constmod M2_table[4][2] = {
416bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE },
417bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
418bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI },
419bf215546Sopenharmony_ci        { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
420bf215546Sopenharmony_ci};
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_cistatic void
423bf215546Sopenharmony_cidecode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
424bf215546Sopenharmony_ci{
425bf215546Sopenharmony_ci        if (M1 >= 8) {
426bf215546Sopenharmony_ci                mod[0] = BI_CONSTMOD_NONE;
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci                if (!single)
429bf215546Sopenharmony_ci                        mod[1] = BI_CONSTMOD_NONE;
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci                return;
432bf215546Sopenharmony_ci        } else if (M1 == 7) {
433bf215546Sopenharmony_ci                assert(M2 < 4);
434bf215546Sopenharmony_ci                memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
435bf215546Sopenharmony_ci        } else {
436bf215546Sopenharmony_ci                assert(M1 != 3);
437bf215546Sopenharmony_ci                memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
438bf215546Sopenharmony_ci        }
439bf215546Sopenharmony_ci}
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_cistatic void dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose)
442bf215546Sopenharmony_ci{
443bf215546Sopenharmony_ci        // State for a decoded clause
444bf215546Sopenharmony_ci        struct bifrost_alu_inst instrs[8] = {};
445bf215546Sopenharmony_ci        struct bi_constants consts = {};
446bf215546Sopenharmony_ci        unsigned num_instrs = 0;
447bf215546Sopenharmony_ci        unsigned num_consts = 0;
448bf215546Sopenharmony_ci        uint64_t header_bits = 0;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci        unsigned i;
451bf215546Sopenharmony_ci        for (i = 0; ; i++, words += 4) {
452bf215546Sopenharmony_ci                if (verbose) {
453bf215546Sopenharmony_ci                        fprintf(fp, "# ");
454bf215546Sopenharmony_ci                        for (int j = 0; j < 4; j++)
455bf215546Sopenharmony_ci                                fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
456bf215546Sopenharmony_ci                        fprintf(fp, "\n");
457bf215546Sopenharmony_ci                }
458bf215546Sopenharmony_ci                unsigned tag = bits(words[0], 0, 8);
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci                // speculatively decode some things that are common between many formats, so we can share some code
461bf215546Sopenharmony_ci                struct bifrost_alu_inst main_instr = {};
462bf215546Sopenharmony_ci                // 20 bits
463bf215546Sopenharmony_ci                main_instr.add_bits = bits(words[2], 2, 32 - 13);
464bf215546Sopenharmony_ci                // 23 bits
465bf215546Sopenharmony_ci                main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);
466bf215546Sopenharmony_ci                // 35 bits
467bf215546Sopenharmony_ci                main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci                uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;
470bf215546Sopenharmony_ci                uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci                /* Z-bit */
473bf215546Sopenharmony_ci                bool stop = tag & 0x40;
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci                if (verbose) {
476bf215546Sopenharmony_ci                        fprintf(fp, "# tag: 0x%02x\n", tag);
477bf215546Sopenharmony_ci                }
478bf215546Sopenharmony_ci                if (tag & 0x80) {
479bf215546Sopenharmony_ci                        /* Format 5 or 10 */
480bf215546Sopenharmony_ci                        unsigned idx = stop ? 5 : 2;
481bf215546Sopenharmony_ci                        main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
482bf215546Sopenharmony_ci                        instrs[idx + 1] = main_instr;
483bf215546Sopenharmony_ci                        instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
484bf215546Sopenharmony_ci                        instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
485bf215546Sopenharmony_ci                        consts.raw[0] = bits(words[3], 17, 32) << 4;
486bf215546Sopenharmony_ci                } else {
487bf215546Sopenharmony_ci                        bool done = false;
488bf215546Sopenharmony_ci                        switch ((tag >> 3) & 0x7) {
489bf215546Sopenharmony_ci                        case 0x0:
490bf215546Sopenharmony_ci                                switch (tag & 0x7) {
491bf215546Sopenharmony_ci                                case 0x3:
492bf215546Sopenharmony_ci                                        /* Format 1 */
493bf215546Sopenharmony_ci                                        main_instr.add_bits |= bits(words[3], 29, 32) << 17;
494bf215546Sopenharmony_ci                                        instrs[1] = main_instr;
495bf215546Sopenharmony_ci                                        num_instrs = 2;
496bf215546Sopenharmony_ci                                        done = stop;
497bf215546Sopenharmony_ci                                        break;
498bf215546Sopenharmony_ci                                case 0x4:
499bf215546Sopenharmony_ci                                        /* Format 3 */
500bf215546Sopenharmony_ci                                        instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
501bf215546Sopenharmony_ci                                        instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
502bf215546Sopenharmony_ci                                        consts.raw[0] = const0;
503bf215546Sopenharmony_ci                                        decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
504bf215546Sopenharmony_ci                                        num_instrs = 3;
505bf215546Sopenharmony_ci                                        num_consts = 1;
506bf215546Sopenharmony_ci                                        done = stop;
507bf215546Sopenharmony_ci                                        break;
508bf215546Sopenharmony_ci                                case 0x1:
509bf215546Sopenharmony_ci                                case 0x5:
510bf215546Sopenharmony_ci                                        /* Format 4 */
511bf215546Sopenharmony_ci                                        instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
512bf215546Sopenharmony_ci                                        instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
513bf215546Sopenharmony_ci                                        main_instr.add_bits |= bits(words[3], 26, 29) << 17;
514bf215546Sopenharmony_ci                                        instrs[3] = main_instr;
515bf215546Sopenharmony_ci                                        if ((tag & 0x7) == 0x5) {
516bf215546Sopenharmony_ci                                                num_instrs = 4;
517bf215546Sopenharmony_ci                                                done = stop;
518bf215546Sopenharmony_ci                                        }
519bf215546Sopenharmony_ci                                        break;
520bf215546Sopenharmony_ci                                case 0x6:
521bf215546Sopenharmony_ci                                        /* Format 8 */
522bf215546Sopenharmony_ci                                        instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
523bf215546Sopenharmony_ci                                        instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
524bf215546Sopenharmony_ci                                        consts.raw[0] = const0;
525bf215546Sopenharmony_ci                                        decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
526bf215546Sopenharmony_ci                                        num_instrs = 6;
527bf215546Sopenharmony_ci                                        num_consts = 1;
528bf215546Sopenharmony_ci                                        done = stop;
529bf215546Sopenharmony_ci                                        break;
530bf215546Sopenharmony_ci                                case 0x7:
531bf215546Sopenharmony_ci                                        /* Format 9 */
532bf215546Sopenharmony_ci                                        instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
533bf215546Sopenharmony_ci                                        instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
534bf215546Sopenharmony_ci                                        main_instr.add_bits |= bits(words[3], 26, 29) << 17;
535bf215546Sopenharmony_ci                                        instrs[6] = main_instr;
536bf215546Sopenharmony_ci                                        num_instrs = 7;
537bf215546Sopenharmony_ci                                        done = stop;
538bf215546Sopenharmony_ci                                        break;
539bf215546Sopenharmony_ci                                default:
540bf215546Sopenharmony_ci                                        unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
541bf215546Sopenharmony_ci                                }
542bf215546Sopenharmony_ci                                break;
543bf215546Sopenharmony_ci                        case 0x2:
544bf215546Sopenharmony_ci                        case 0x3: {
545bf215546Sopenharmony_ci                                /* Format 6 or 11 */
546bf215546Sopenharmony_ci                                unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
547bf215546Sopenharmony_ci                                main_instr.add_bits |= (tag & 0x7) << 17;
548bf215546Sopenharmony_ci                                instrs[idx] = main_instr;
549bf215546Sopenharmony_ci                                consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;
550bf215546Sopenharmony_ci                                num_consts = 1;
551bf215546Sopenharmony_ci                                num_instrs = idx + 1;
552bf215546Sopenharmony_ci                                done = stop;
553bf215546Sopenharmony_ci                                break;
554bf215546Sopenharmony_ci                        }
555bf215546Sopenharmony_ci                        case 0x4: {
556bf215546Sopenharmony_ci                                /* Format 2 */
557bf215546Sopenharmony_ci                                unsigned idx = stop ? 4 : 1;
558bf215546Sopenharmony_ci                                main_instr.add_bits |= (tag & 0x7) << 17;
559bf215546Sopenharmony_ci                                instrs[idx] = main_instr;
560bf215546Sopenharmony_ci                                instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
561bf215546Sopenharmony_ci                                instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
562bf215546Sopenharmony_ci                                break;
563bf215546Sopenharmony_ci                        }
564bf215546Sopenharmony_ci                        case 0x1:
565bf215546Sopenharmony_ci                                /* Format 0 - followed by constants */
566bf215546Sopenharmony_ci                                num_instrs = 1;
567bf215546Sopenharmony_ci                                done = stop;
568bf215546Sopenharmony_ci                                FALLTHROUGH;
569bf215546Sopenharmony_ci                        case 0x5:
570bf215546Sopenharmony_ci                                /* Format 0 - followed by instructions */
571bf215546Sopenharmony_ci                                header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
572bf215546Sopenharmony_ci                                main_instr.add_bits |= (tag & 0x7) << 17;
573bf215546Sopenharmony_ci                                instrs[0] = main_instr;
574bf215546Sopenharmony_ci                                break;
575bf215546Sopenharmony_ci                        case 0x6:
576bf215546Sopenharmony_ci                        case 0x7: {
577bf215546Sopenharmony_ci                                /* Format 12 */
578bf215546Sopenharmony_ci                                unsigned pos = tag & 0xf;
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci                                struct {
581bf215546Sopenharmony_ci                                        unsigned const_idx;
582bf215546Sopenharmony_ci                                        unsigned nr_tuples;
583bf215546Sopenharmony_ci                                } pos_table[0x10] = {
584bf215546Sopenharmony_ci                                        { 0, 1 },
585bf215546Sopenharmony_ci                                        { 0, 2 },
586bf215546Sopenharmony_ci                                        { 0, 4 },
587bf215546Sopenharmony_ci                                        { 1, 3 },
588bf215546Sopenharmony_ci                                        { 1, 5 },
589bf215546Sopenharmony_ci                                        { 2, 4 },
590bf215546Sopenharmony_ci                                        { 0, 7 },
591bf215546Sopenharmony_ci                                        { 1, 6 },
592bf215546Sopenharmony_ci                                        { 3, 5 },
593bf215546Sopenharmony_ci                                        { 1, 8 },
594bf215546Sopenharmony_ci                                        { 2, 7 },
595bf215546Sopenharmony_ci                                        { 3, 6 },
596bf215546Sopenharmony_ci                                        { 3, 8 },
597bf215546Sopenharmony_ci                                        { 4, 7 },
598bf215546Sopenharmony_ci                                        { 5, 6 },
599bf215546Sopenharmony_ci                                        { ~0, ~0 }
600bf215546Sopenharmony_ci                                };
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci                                ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;
603bf215546Sopenharmony_ci                                assert(valid_count && "INSTR_INVALID_ENC");
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci                                unsigned const_idx = pos_table[pos].const_idx;
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci                                if (num_consts < const_idx + 2)
608bf215546Sopenharmony_ci                                        num_consts = const_idx + 2;
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci                                consts.raw[const_idx] = const0;
611bf215546Sopenharmony_ci                                consts.raw[const_idx + 1] = const1;
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci                                /* Calculate M values from A, B and 4-bit
614bf215546Sopenharmony_ci                                 * unsigned arithmetic. Mathematically it
615bf215546Sopenharmony_ci                                 * should be (A - B) % 16 but we use this
616bf215546Sopenharmony_ci                                 * alternate form to avoid sign issues */
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci                                unsigned A1 = bits(words[2], 0, 4);
619bf215546Sopenharmony_ci                                unsigned B1 = bits(words[3], 28, 32);
620bf215546Sopenharmony_ci                                unsigned A2 = bits(words[1], 0, 4);
621bf215546Sopenharmony_ci                                unsigned B2 = bits(words[2], 28, 32);
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci                                unsigned M1 = (16 + A1 - B1) & 0xF;
624bf215546Sopenharmony_ci                                unsigned M2 = (16 + A2 - B2) & 0xF;
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci                                decode_M(&consts.mods[const_idx], M1, M2, false);
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci                                done = stop;
629bf215546Sopenharmony_ci                                break;
630bf215546Sopenharmony_ci                        }
631bf215546Sopenharmony_ci                        default:
632bf215546Sopenharmony_ci                                break;
633bf215546Sopenharmony_ci                        }
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci                        if (done)
636bf215546Sopenharmony_ci                                break;
637bf215546Sopenharmony_ci                }
638bf215546Sopenharmony_ci        }
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci        *size = i + 1;
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci        if (verbose) {
643bf215546Sopenharmony_ci                fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
644bf215546Sopenharmony_ci        }
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci        struct bifrost_header header;
647bf215546Sopenharmony_ci        memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header));
648bf215546Sopenharmony_ci        dump_header(fp, header, verbose);
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci        fprintf(fp, "{\n");
651bf215546Sopenharmony_ci        for (i = 0; i < num_instrs; i++) {
652bf215546Sopenharmony_ci                struct bifrost_regs regs, next_regs;
653bf215546Sopenharmony_ci                if (i + 1 == num_instrs) {
654bf215546Sopenharmony_ci                        memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits,
655bf215546Sopenharmony_ci                               sizeof(next_regs));
656bf215546Sopenharmony_ci                } else {
657bf215546Sopenharmony_ci                        memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits,
658bf215546Sopenharmony_ci                               sizeof(next_regs));
659bf215546Sopenharmony_ci                }
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci                memcpy((char *) &regs, (char *) &instrs[i].reg_bits, sizeof(regs));
662bf215546Sopenharmony_ci
663bf215546Sopenharmony_ci                if (verbose) {
664bf215546Sopenharmony_ci                        fprintf(fp, "    # regs: %016" PRIx64 "\n", instrs[i].reg_bits);
665bf215546Sopenharmony_ci                        dump_regs(fp, regs, i == 0);
666bf215546Sopenharmony_ci                }
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci                bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
669bf215546Sopenharmony_ci                                header.staging_register, offset, &consts,
670bf215546Sopenharmony_ci                                i + 1 == num_instrs);
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci                bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
673bf215546Sopenharmony_ci                                header.staging_register, offset, &consts,
674bf215546Sopenharmony_ci                                i + 1 == num_instrs);
675bf215546Sopenharmony_ci        }
676bf215546Sopenharmony_ci        fprintf(fp, "}\n");
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci        if (verbose) {
679bf215546Sopenharmony_ci                for (unsigned i = 0; i < num_consts; i++) {
680bf215546Sopenharmony_ci                        fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff);
681bf215546Sopenharmony_ci                        fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32);
682bf215546Sopenharmony_ci                }
683bf215546Sopenharmony_ci        }
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci        fprintf(fp, "\n");
686bf215546Sopenharmony_ci        return;
687bf215546Sopenharmony_ci}
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_civoid disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
690bf215546Sopenharmony_ci{
691bf215546Sopenharmony_ci        uint32_t *words = (uint32_t *) code;
692bf215546Sopenharmony_ci        uint32_t *words_end = words + (size / 4);
693bf215546Sopenharmony_ci        // used for displaying branch targets
694bf215546Sopenharmony_ci        unsigned offset = 0;
695bf215546Sopenharmony_ci        while (words != words_end) {
696bf215546Sopenharmony_ci                /* Shaders have zero bytes at the end for padding; stop
697bf215546Sopenharmony_ci                 * disassembling when we hit them. */
698bf215546Sopenharmony_ci                if (*words == 0)
699bf215546Sopenharmony_ci                        break;
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci                fprintf(fp, "clause_%u:\n", offset);
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci                unsigned size;
704bf215546Sopenharmony_ci                dump_clause(fp, words, &size, offset, verbose);
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci                words += size * 4;
707bf215546Sopenharmony_ci                offset += size;
708bf215546Sopenharmony_ci        }
709bf215546Sopenharmony_ci}
710bf215546Sopenharmony_ci
711