1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * @file
26 *
27 * Validates the QPU instruction sequence after register allocation and
28 * scheduling.
29 */
30
31#include <assert.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include "v3d_compiler.h"
35#include "qpu/qpu_disasm.h"
36
37struct v3d_qpu_validate_state {
38        struct v3d_compile *c;
39        const struct v3d_qpu_instr *last;
40        int ip;
41        int last_sfu_write;
42        int last_branch_ip;
43        int last_thrsw_ip;
44
45        /* Set when we've found the last-THRSW signal, or if we were started
46         * in single-segment mode.
47         */
48        bool last_thrsw_found;
49
50        /* Set when we've found the THRSW after the last THRSW */
51        bool thrend_found;
52
53        int thrsw_count;
54};
55
56static void
57fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
58{
59        struct v3d_compile *c = state->c;
60
61        fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
62
63        int dump_ip = 0;
64        vir_for_each_inst_inorder(inst, c) {
65                v3d_qpu_dump(c->devinfo, &inst->qpu);
66
67                if (dump_ip++ == state->ip)
68                        fprintf(stderr, " *** ERROR ***");
69
70                fprintf(stderr, "\n");
71        }
72
73        fprintf(stderr, "\n");
74        abort();
75}
76
77static bool
78in_branch_delay_slots(struct v3d_qpu_validate_state *state)
79{
80        return (state->ip - state->last_branch_ip) < 3;
81}
82
83static bool
84in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
85{
86        return (state->ip - state->last_thrsw_ip) < 3;
87}
88
89static bool
90qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
91                        bool (*predicate)(enum v3d_qpu_waddr waddr))
92{
93        if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
94                return false;
95
96        if (inst->alu.add.op != V3D_QPU_A_NOP &&
97            inst->alu.add.magic_write &&
98            predicate(inst->alu.add.waddr))
99                return true;
100
101        if (inst->alu.mul.op != V3D_QPU_M_NOP &&
102            inst->alu.mul.magic_write &&
103            predicate(inst->alu.mul.waddr))
104                return true;
105
106        return false;
107}
108
109static void
110qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
111{
112        const struct v3d_device_info *devinfo = state->c->devinfo;
113        const struct v3d_qpu_instr *inst = &qinst->qpu;
114
115        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
116                return;
117
118        /* LDVARY writes r5 two instructions later and LDUNIF writes
119         * r5 one instruction later, which is illegal to have
120         * together.
121         */
122        if (state->last && state->last->sig.ldvary &&
123            (inst->sig.ldunif || inst->sig.ldunifa)) {
124                fail_instr(state, "LDUNIF after a LDVARY");
125        }
126
127        /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
128         *
129         * FIXME: This would not check correctly for V3D 4.2 versions lower
130         * than V3D 4.2.14, but that is not a real issue because the simulator
131         * will still catch this, and we are not really targetting any such
132         * versions anyway.
133         */
134        if (state->c->devinfo->ver < 42) {
135                bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
136                                                          state->last->sig.ldunifrf));
137                bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
138                                                           state->last->sig.ldunifarf));
139                bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
140                bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
141                if ((last_reads_ldunif && reads_ldunifa) ||
142                    (last_reads_ldunifa && reads_ldunif)) {
143                        fail_instr(state,
144                                   "LDUNIF and LDUNIFA can't be next to each other");
145                }
146        }
147
148        int tmu_writes = 0;
149        int sfu_writes = 0;
150        int vpm_writes = 0;
151        int tlb_writes = 0;
152        int tsy_writes = 0;
153
154        if (inst->alu.add.op != V3D_QPU_A_NOP) {
155                if (inst->alu.add.magic_write) {
156                        if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
157                                                       inst->alu.add.waddr)) {
158                                tmu_writes++;
159                        }
160                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
161                                sfu_writes++;
162                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
163                                vpm_writes++;
164                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
165                                tlb_writes++;
166                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
167                                tsy_writes++;
168                }
169        }
170
171        if (inst->alu.mul.op != V3D_QPU_M_NOP) {
172                if (inst->alu.mul.magic_write) {
173                        if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
174                                                       inst->alu.mul.waddr)) {
175                                tmu_writes++;
176                        }
177                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
178                                sfu_writes++;
179                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
180                                vpm_writes++;
181                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
182                                tlb_writes++;
183                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
184                                tsy_writes++;
185                }
186        }
187
188        if (in_thrsw_delay_slots(state)) {
189                /* There's no way you want to start SFU during the THRSW delay
190                 * slots, since the result would land in the other thread.
191                 */
192                if (sfu_writes) {
193                        fail_instr(state,
194                                   "SFU write started during THRSW delay slots ");
195                }
196
197                if (inst->sig.ldvary)
198                        fail_instr(state, "LDVARY during THRSW delay slots");
199        }
200
201        (void)qpu_magic_waddr_matches; /* XXX */
202
203        /* SFU r4 results come back two instructions later.  No doing
204         * r4 read/writes or other SFU lookups until it's done.
205         */
206        if (state->ip - state->last_sfu_write < 2) {
207                if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
208                        fail_instr(state, "R4 read too soon after SFU");
209
210                if (v3d_qpu_writes_r4(devinfo, inst))
211                        fail_instr(state, "R4 write too soon after SFU");
212
213                if (sfu_writes)
214                        fail_instr(state, "SFU write too soon after SFU");
215        }
216
217        /* XXX: The docs say VPM can happen with the others, but the simulator
218         * disagrees.
219         */
220        if (tmu_writes +
221            sfu_writes +
222            vpm_writes +
223            tlb_writes +
224            tsy_writes +
225            inst->sig.ldtmu +
226            inst->sig.ldtlb +
227            inst->sig.ldvpm +
228            inst->sig.ldtlbu > 1) {
229                fail_instr(state,
230                           "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
231        }
232
233        if (sfu_writes)
234                state->last_sfu_write = state->ip;
235
236        if (inst->sig.thrsw) {
237                if (in_branch_delay_slots(state))
238                        fail_instr(state, "THRSW in a branch delay slot.");
239
240                if (state->last_thrsw_found)
241                        state->thrend_found = true;
242
243                if (state->last_thrsw_ip == state->ip - 1) {
244                        /* If it's the second THRSW in a row, then it's just a
245                         * last-thrsw signal.
246                         */
247                        if (state->last_thrsw_found)
248                                fail_instr(state, "Two last-THRSW signals");
249                        state->last_thrsw_found = true;
250                } else {
251                        if (in_thrsw_delay_slots(state)) {
252                                fail_instr(state,
253                                           "THRSW too close to another THRSW.");
254                        }
255                        state->thrsw_count++;
256                        state->last_thrsw_ip = state->ip;
257                }
258        }
259
260        if (state->thrend_found &&
261            state->last_thrsw_ip - state->ip <= 2 &&
262            inst->type == V3D_QPU_INSTR_TYPE_ALU) {
263                if ((inst->alu.add.op != V3D_QPU_A_NOP &&
264                     !inst->alu.add.magic_write)) {
265                        fail_instr(state, "RF write after THREND");
266                }
267
268                if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
269                     !inst->alu.mul.magic_write)) {
270                        fail_instr(state, "RF write after THREND");
271                }
272
273                if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
274                    !inst->sig_magic) {
275                        fail_instr(state, "RF write after THREND");
276                }
277
278                /* GFXH-1625: No TMUWT in the last instruction */
279                if (state->last_thrsw_ip - state->ip == 2 &&
280                    inst->alu.add.op == V3D_QPU_A_TMUWT)
281                        fail_instr(state, "TMUWT in last instruction");
282        }
283
284        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
285                if (in_branch_delay_slots(state))
286                        fail_instr(state, "branch in a branch delay slot.");
287                if (in_thrsw_delay_slots(state))
288                        fail_instr(state, "branch in a THRSW delay slot.");
289                state->last_branch_ip = state->ip;
290        }
291}
292
293static void
294qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
295{
296        vir_for_each_inst(qinst, block) {
297                qpu_validate_inst(state, qinst);
298
299                state->last = &qinst->qpu;
300                state->ip++;
301        }
302}
303
304/**
305 * Checks for the instruction restrictions from page 37 ("Summary of
306 * Instruction Restrictions").
307 */
308void
309qpu_validate(struct v3d_compile *c)
310{
311        /* We don't want to do validation in release builds, but we want to
312         * keep compiling the validation code to make sure it doesn't get
313         * broken.
314         */
315#ifndef DEBUG
316        return;
317#endif
318
319        struct v3d_qpu_validate_state state = {
320                .c = c,
321                .last_sfu_write = -10,
322                .last_thrsw_ip = -10,
323                .last_branch_ip = -10,
324                .ip = 0,
325
326                .last_thrsw_found = !c->last_thrsw,
327        };
328
329        vir_for_each_block(block, c) {
330                qpu_validate_block(&state, block);
331        }
332
333        if (state.thrsw_count > 1 && !state.last_thrsw_found) {
334                fail_instr(&state,
335                           "thread switch found without last-THRSW in program");
336        }
337
338        if (!state.thrend_found)
339                fail_instr(&state, "No program-end THRSW found");
340}
341