1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2014 Broadcom
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/**
25bf215546Sopenharmony_ci * @file
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci * Validates the QPU instruction sequence after register allocation and
28bf215546Sopenharmony_ci * scheduling.
29bf215546Sopenharmony_ci */
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include <assert.h>
32bf215546Sopenharmony_ci#include <stdio.h>
33bf215546Sopenharmony_ci#include <stdlib.h>
34bf215546Sopenharmony_ci#include "v3d_compiler.h"
35bf215546Sopenharmony_ci#include "qpu/qpu_disasm.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistruct v3d_qpu_validate_state {
38bf215546Sopenharmony_ci        struct v3d_compile *c;
39bf215546Sopenharmony_ci        const struct v3d_qpu_instr *last;
40bf215546Sopenharmony_ci        int ip;
41bf215546Sopenharmony_ci        int last_sfu_write;
42bf215546Sopenharmony_ci        int last_branch_ip;
43bf215546Sopenharmony_ci        int last_thrsw_ip;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci        /* Set when we've found the last-THRSW signal, or if we were started
46bf215546Sopenharmony_ci         * in single-segment mode.
47bf215546Sopenharmony_ci         */
48bf215546Sopenharmony_ci        bool last_thrsw_found;
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci        /* Set when we've found the THRSW after the last THRSW */
51bf215546Sopenharmony_ci        bool thrend_found;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci        int thrsw_count;
54bf215546Sopenharmony_ci};
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_cistatic void
57bf215546Sopenharmony_cifail_instr(struct v3d_qpu_validate_state *state, const char *msg)
58bf215546Sopenharmony_ci{
59bf215546Sopenharmony_ci        struct v3d_compile *c = state->c;
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci        fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci        int dump_ip = 0;
64bf215546Sopenharmony_ci        vir_for_each_inst_inorder(inst, c) {
65bf215546Sopenharmony_ci                v3d_qpu_dump(c->devinfo, &inst->qpu);
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci                if (dump_ip++ == state->ip)
68bf215546Sopenharmony_ci                        fprintf(stderr, " *** ERROR ***");
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci                fprintf(stderr, "\n");
71bf215546Sopenharmony_ci        }
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci        fprintf(stderr, "\n");
74bf215546Sopenharmony_ci        abort();
75bf215546Sopenharmony_ci}
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistatic bool
78bf215546Sopenharmony_ciin_branch_delay_slots(struct v3d_qpu_validate_state *state)
79bf215546Sopenharmony_ci{
80bf215546Sopenharmony_ci        return (state->ip - state->last_branch_ip) < 3;
81bf215546Sopenharmony_ci}
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_cistatic bool
84bf215546Sopenharmony_ciin_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci        return (state->ip - state->last_thrsw_ip) < 3;
87bf215546Sopenharmony_ci}
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_cistatic bool
90bf215546Sopenharmony_ciqpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
91bf215546Sopenharmony_ci                        bool (*predicate)(enum v3d_qpu_waddr waddr))
92bf215546Sopenharmony_ci{
93bf215546Sopenharmony_ci        if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
94bf215546Sopenharmony_ci                return false;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci        if (inst->alu.add.op != V3D_QPU_A_NOP &&
97bf215546Sopenharmony_ci            inst->alu.add.magic_write &&
98bf215546Sopenharmony_ci            predicate(inst->alu.add.waddr))
99bf215546Sopenharmony_ci                return true;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci        if (inst->alu.mul.op != V3D_QPU_M_NOP &&
102bf215546Sopenharmony_ci            inst->alu.mul.magic_write &&
103bf215546Sopenharmony_ci            predicate(inst->alu.mul.waddr))
104bf215546Sopenharmony_ci                return true;
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci        return false;
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_cistatic void
110bf215546Sopenharmony_ciqpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
111bf215546Sopenharmony_ci{
112bf215546Sopenharmony_ci        const struct v3d_device_info *devinfo = state->c->devinfo;
113bf215546Sopenharmony_ci        const struct v3d_qpu_instr *inst = &qinst->qpu;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
116bf215546Sopenharmony_ci                return;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci        /* LDVARY writes r5 two instructions later and LDUNIF writes
119bf215546Sopenharmony_ci         * r5 one instruction later, which is illegal to have
120bf215546Sopenharmony_ci         * together.
121bf215546Sopenharmony_ci         */
122bf215546Sopenharmony_ci        if (state->last && state->last->sig.ldvary &&
123bf215546Sopenharmony_ci            (inst->sig.ldunif || inst->sig.ldunifa)) {
124bf215546Sopenharmony_ci                fail_instr(state, "LDUNIF after a LDVARY");
125bf215546Sopenharmony_ci        }
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci        /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
128bf215546Sopenharmony_ci         *
129bf215546Sopenharmony_ci         * FIXME: This would not check correctly for V3D 4.2 versions lower
130bf215546Sopenharmony_ci         * than V3D 4.2.14, but that is not a real issue because the simulator
131bf215546Sopenharmony_ci         * will still catch this, and we are not really targetting any such
132bf215546Sopenharmony_ci         * versions anyway.
133bf215546Sopenharmony_ci         */
134bf215546Sopenharmony_ci        if (state->c->devinfo->ver < 42) {
135bf215546Sopenharmony_ci                bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
136bf215546Sopenharmony_ci                                                          state->last->sig.ldunifrf));
137bf215546Sopenharmony_ci                bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
138bf215546Sopenharmony_ci                                                           state->last->sig.ldunifarf));
139bf215546Sopenharmony_ci                bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
140bf215546Sopenharmony_ci                bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
141bf215546Sopenharmony_ci                if ((last_reads_ldunif && reads_ldunifa) ||
142bf215546Sopenharmony_ci                    (last_reads_ldunifa && reads_ldunif)) {
143bf215546Sopenharmony_ci                        fail_instr(state,
144bf215546Sopenharmony_ci                                   "LDUNIF and LDUNIFA can't be next to each other");
145bf215546Sopenharmony_ci                }
146bf215546Sopenharmony_ci        }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci        int tmu_writes = 0;
149bf215546Sopenharmony_ci        int sfu_writes = 0;
150bf215546Sopenharmony_ci        int vpm_writes = 0;
151bf215546Sopenharmony_ci        int tlb_writes = 0;
152bf215546Sopenharmony_ci        int tsy_writes = 0;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci        if (inst->alu.add.op != V3D_QPU_A_NOP) {
155bf215546Sopenharmony_ci                if (inst->alu.add.magic_write) {
156bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
157bf215546Sopenharmony_ci                                                       inst->alu.add.waddr)) {
158bf215546Sopenharmony_ci                                tmu_writes++;
159bf215546Sopenharmony_ci                        }
160bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
161bf215546Sopenharmony_ci                                sfu_writes++;
162bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
163bf215546Sopenharmony_ci                                vpm_writes++;
164bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
165bf215546Sopenharmony_ci                                tlb_writes++;
166bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
167bf215546Sopenharmony_ci                                tsy_writes++;
168bf215546Sopenharmony_ci                }
169bf215546Sopenharmony_ci        }
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci        if (inst->alu.mul.op != V3D_QPU_M_NOP) {
172bf215546Sopenharmony_ci                if (inst->alu.mul.magic_write) {
173bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
174bf215546Sopenharmony_ci                                                       inst->alu.mul.waddr)) {
175bf215546Sopenharmony_ci                                tmu_writes++;
176bf215546Sopenharmony_ci                        }
177bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
178bf215546Sopenharmony_ci                                sfu_writes++;
179bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
180bf215546Sopenharmony_ci                                vpm_writes++;
181bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
182bf215546Sopenharmony_ci                                tlb_writes++;
183bf215546Sopenharmony_ci                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
184bf215546Sopenharmony_ci                                tsy_writes++;
185bf215546Sopenharmony_ci                }
186bf215546Sopenharmony_ci        }
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci        if (in_thrsw_delay_slots(state)) {
189bf215546Sopenharmony_ci                /* There's no way you want to start SFU during the THRSW delay
190bf215546Sopenharmony_ci                 * slots, since the result would land in the other thread.
191bf215546Sopenharmony_ci                 */
192bf215546Sopenharmony_ci                if (sfu_writes) {
193bf215546Sopenharmony_ci                        fail_instr(state,
194bf215546Sopenharmony_ci                                   "SFU write started during THRSW delay slots ");
195bf215546Sopenharmony_ci                }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci                if (inst->sig.ldvary)
198bf215546Sopenharmony_ci                        fail_instr(state, "LDVARY during THRSW delay slots");
199bf215546Sopenharmony_ci        }
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci        (void)qpu_magic_waddr_matches; /* XXX */
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci        /* SFU r4 results come back two instructions later.  No doing
204bf215546Sopenharmony_ci         * r4 read/writes or other SFU lookups until it's done.
205bf215546Sopenharmony_ci         */
206bf215546Sopenharmony_ci        if (state->ip - state->last_sfu_write < 2) {
207bf215546Sopenharmony_ci                if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
208bf215546Sopenharmony_ci                        fail_instr(state, "R4 read too soon after SFU");
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci                if (v3d_qpu_writes_r4(devinfo, inst))
211bf215546Sopenharmony_ci                        fail_instr(state, "R4 write too soon after SFU");
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci                if (sfu_writes)
214bf215546Sopenharmony_ci                        fail_instr(state, "SFU write too soon after SFU");
215bf215546Sopenharmony_ci        }
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci        /* XXX: The docs say VPM can happen with the others, but the simulator
218bf215546Sopenharmony_ci         * disagrees.
219bf215546Sopenharmony_ci         */
220bf215546Sopenharmony_ci        if (tmu_writes +
221bf215546Sopenharmony_ci            sfu_writes +
222bf215546Sopenharmony_ci            vpm_writes +
223bf215546Sopenharmony_ci            tlb_writes +
224bf215546Sopenharmony_ci            tsy_writes +
225bf215546Sopenharmony_ci            inst->sig.ldtmu +
226bf215546Sopenharmony_ci            inst->sig.ldtlb +
227bf215546Sopenharmony_ci            inst->sig.ldvpm +
228bf215546Sopenharmony_ci            inst->sig.ldtlbu > 1) {
229bf215546Sopenharmony_ci                fail_instr(state,
230bf215546Sopenharmony_ci                           "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
231bf215546Sopenharmony_ci        }
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci        if (sfu_writes)
234bf215546Sopenharmony_ci                state->last_sfu_write = state->ip;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci        if (inst->sig.thrsw) {
237bf215546Sopenharmony_ci                if (in_branch_delay_slots(state))
238bf215546Sopenharmony_ci                        fail_instr(state, "THRSW in a branch delay slot.");
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci                if (state->last_thrsw_found)
241bf215546Sopenharmony_ci                        state->thrend_found = true;
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci                if (state->last_thrsw_ip == state->ip - 1) {
244bf215546Sopenharmony_ci                        /* If it's the second THRSW in a row, then it's just a
245bf215546Sopenharmony_ci                         * last-thrsw signal.
246bf215546Sopenharmony_ci                         */
247bf215546Sopenharmony_ci                        if (state->last_thrsw_found)
248bf215546Sopenharmony_ci                                fail_instr(state, "Two last-THRSW signals");
249bf215546Sopenharmony_ci                        state->last_thrsw_found = true;
250bf215546Sopenharmony_ci                } else {
251bf215546Sopenharmony_ci                        if (in_thrsw_delay_slots(state)) {
252bf215546Sopenharmony_ci                                fail_instr(state,
253bf215546Sopenharmony_ci                                           "THRSW too close to another THRSW.");
254bf215546Sopenharmony_ci                        }
255bf215546Sopenharmony_ci                        state->thrsw_count++;
256bf215546Sopenharmony_ci                        state->last_thrsw_ip = state->ip;
257bf215546Sopenharmony_ci                }
258bf215546Sopenharmony_ci        }
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci        if (state->thrend_found &&
261bf215546Sopenharmony_ci            state->last_thrsw_ip - state->ip <= 2 &&
262bf215546Sopenharmony_ci            inst->type == V3D_QPU_INSTR_TYPE_ALU) {
263bf215546Sopenharmony_ci                if ((inst->alu.add.op != V3D_QPU_A_NOP &&
264bf215546Sopenharmony_ci                     !inst->alu.add.magic_write)) {
265bf215546Sopenharmony_ci                        fail_instr(state, "RF write after THREND");
266bf215546Sopenharmony_ci                }
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci                if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
269bf215546Sopenharmony_ci                     !inst->alu.mul.magic_write)) {
270bf215546Sopenharmony_ci                        fail_instr(state, "RF write after THREND");
271bf215546Sopenharmony_ci                }
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci                if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
274bf215546Sopenharmony_ci                    !inst->sig_magic) {
275bf215546Sopenharmony_ci                        fail_instr(state, "RF write after THREND");
276bf215546Sopenharmony_ci                }
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci                /* GFXH-1625: No TMUWT in the last instruction */
279bf215546Sopenharmony_ci                if (state->last_thrsw_ip - state->ip == 2 &&
280bf215546Sopenharmony_ci                    inst->alu.add.op == V3D_QPU_A_TMUWT)
281bf215546Sopenharmony_ci                        fail_instr(state, "TMUWT in last instruction");
282bf215546Sopenharmony_ci        }
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
285bf215546Sopenharmony_ci                if (in_branch_delay_slots(state))
286bf215546Sopenharmony_ci                        fail_instr(state, "branch in a branch delay slot.");
287bf215546Sopenharmony_ci                if (in_thrsw_delay_slots(state))
288bf215546Sopenharmony_ci                        fail_instr(state, "branch in a THRSW delay slot.");
289bf215546Sopenharmony_ci                state->last_branch_ip = state->ip;
290bf215546Sopenharmony_ci        }
291bf215546Sopenharmony_ci}
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_cistatic void
294bf215546Sopenharmony_ciqpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
295bf215546Sopenharmony_ci{
296bf215546Sopenharmony_ci        vir_for_each_inst(qinst, block) {
297bf215546Sopenharmony_ci                qpu_validate_inst(state, qinst);
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci                state->last = &qinst->qpu;
300bf215546Sopenharmony_ci                state->ip++;
301bf215546Sopenharmony_ci        }
302bf215546Sopenharmony_ci}
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci/**
305bf215546Sopenharmony_ci * Checks for the instruction restrictions from page 37 ("Summary of
306bf215546Sopenharmony_ci * Instruction Restrictions").
307bf215546Sopenharmony_ci */
308bf215546Sopenharmony_civoid
309bf215546Sopenharmony_ciqpu_validate(struct v3d_compile *c)
310bf215546Sopenharmony_ci{
311bf215546Sopenharmony_ci        /* We don't want to do validation in release builds, but we want to
312bf215546Sopenharmony_ci         * keep compiling the validation code to make sure it doesn't get
313bf215546Sopenharmony_ci         * broken.
314bf215546Sopenharmony_ci         */
315bf215546Sopenharmony_ci#ifndef DEBUG
316bf215546Sopenharmony_ci        return;
317bf215546Sopenharmony_ci#endif
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci        struct v3d_qpu_validate_state state = {
320bf215546Sopenharmony_ci                .c = c,
321bf215546Sopenharmony_ci                .last_sfu_write = -10,
322bf215546Sopenharmony_ci                .last_thrsw_ip = -10,
323bf215546Sopenharmony_ci                .last_branch_ip = -10,
324bf215546Sopenharmony_ci                .ip = 0,
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci                .last_thrsw_found = !c->last_thrsw,
327bf215546Sopenharmony_ci        };
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci        vir_for_each_block(block, c) {
330bf215546Sopenharmony_ci                qpu_validate_block(&state, block);
331bf215546Sopenharmony_ci        }
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci        if (state.thrsw_count > 1 && !state.last_thrsw_found) {
334bf215546Sopenharmony_ci                fail_instr(&state,
335bf215546Sopenharmony_ci                           "thread switch found without last-THRSW in program");
336bf215546Sopenharmony_ci        }
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci        if (!state.thrend_found)
339bf215546Sopenharmony_ci                fail_instr(&state, "No program-end THRSW found");
340bf215546Sopenharmony_ci}
341