1bf215546Sopenharmony_ci
2bf215546Sopenharmony_ci/*
3bf215546Sopenharmony_ci * Copyright © 2014 Broadcom
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22bf215546Sopenharmony_ci * IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include <stdlib.h>
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "vc4_qpu.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_cistatic void
30bf215546Sopenharmony_cifail_instr(uint64_t inst, const char *msg)
31bf215546Sopenharmony_ci{
32bf215546Sopenharmony_ci        fprintf(stderr, "vc4_qpu_validate: %s: ", msg);
33bf215546Sopenharmony_ci        vc4_qpu_disasm(&inst, 1);
34bf215546Sopenharmony_ci        fprintf(stderr, "\n");
35bf215546Sopenharmony_ci        abort();
36bf215546Sopenharmony_ci}
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_cistatic bool
39bf215546Sopenharmony_ciwrites_reg(uint64_t inst, uint32_t w)
40bf215546Sopenharmony_ci{
41bf215546Sopenharmony_ci        return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
42bf215546Sopenharmony_ci                QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
43bf215546Sopenharmony_ci}
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_cistatic bool
46bf215546Sopenharmony_ci_reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
47bf215546Sopenharmony_ci{
48bf215546Sopenharmony_ci        struct {
49bf215546Sopenharmony_ci                uint32_t mux, addr;
50bf215546Sopenharmony_ci        } src_regs[] = {
51bf215546Sopenharmony_ci                { QPU_GET_FIELD(inst, QPU_ADD_A) },
52bf215546Sopenharmony_ci                { QPU_GET_FIELD(inst, QPU_ADD_B) },
53bf215546Sopenharmony_ci                { QPU_GET_FIELD(inst, QPU_MUL_A) },
54bf215546Sopenharmony_ci                { QPU_GET_FIELD(inst, QPU_MUL_B) },
55bf215546Sopenharmony_ci        };
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci        /* Branches only reference raddr_a (no mux), and we don't use that
58bf215546Sopenharmony_ci         * feature of branching.
59bf215546Sopenharmony_ci         */
60bf215546Sopenharmony_ci        if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH)
61bf215546Sopenharmony_ci                return false;
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci        /* Load immediates don't read any registers. */
64bf215546Sopenharmony_ci        if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM)
65bf215546Sopenharmony_ci                return false;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci        for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
68bf215546Sopenharmony_ci                if (!ignore_a &&
69bf215546Sopenharmony_ci                    src_regs[i].mux == QPU_MUX_A &&
70bf215546Sopenharmony_ci                    (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
71bf215546Sopenharmony_ci                        return true;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci                if (!ignore_b &&
74bf215546Sopenharmony_ci                    QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
75bf215546Sopenharmony_ci                    src_regs[i].mux == QPU_MUX_B &&
76bf215546Sopenharmony_ci                    (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
77bf215546Sopenharmony_ci                        return true;
78bf215546Sopenharmony_ci        }
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci        return false;
81bf215546Sopenharmony_ci}
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_cistatic bool
84bf215546Sopenharmony_cireads_reg(uint64_t inst, uint32_t r)
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci        return _reads_reg(inst, r, false, false);
87bf215546Sopenharmony_ci}
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_cistatic bool
90bf215546Sopenharmony_cireads_a_reg(uint64_t inst, uint32_t r)
91bf215546Sopenharmony_ci{
92bf215546Sopenharmony_ci        return _reads_reg(inst, r, false, true);
93bf215546Sopenharmony_ci}
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_cistatic bool
96bf215546Sopenharmony_cireads_b_reg(uint64_t inst, uint32_t r)
97bf215546Sopenharmony_ci{
98bf215546Sopenharmony_ci        return _reads_reg(inst, r, true, false);
99bf215546Sopenharmony_ci}
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_cistatic bool
102bf215546Sopenharmony_ciwrites_sfu(uint64_t inst)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci        return (writes_reg(inst, QPU_W_SFU_RECIP) ||
105bf215546Sopenharmony_ci                writes_reg(inst, QPU_W_SFU_RECIPSQRT) ||
106bf215546Sopenharmony_ci                writes_reg(inst, QPU_W_SFU_EXP) ||
107bf215546Sopenharmony_ci                writes_reg(inst, QPU_W_SFU_LOG));
108bf215546Sopenharmony_ci}
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci/**
111bf215546Sopenharmony_ci * Checks for the instruction restrictions from page 37 ("Summary of
112bf215546Sopenharmony_ci * Instruction Restrictions").
113bf215546Sopenharmony_ci */
114bf215546Sopenharmony_civoid
115bf215546Sopenharmony_civc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
116bf215546Sopenharmony_ci{
117bf215546Sopenharmony_ci        bool scoreboard_locked = false;
118bf215546Sopenharmony_ci        bool threaded = false;
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci        /* We don't want to do validation in release builds, but we want to
121bf215546Sopenharmony_ci         * keep compiling the validation code to make sure it doesn't get
122bf215546Sopenharmony_ci         * broken.
123bf215546Sopenharmony_ci         */
124bf215546Sopenharmony_ci#ifndef DEBUG
125bf215546Sopenharmony_ci        return;
126bf215546Sopenharmony_ci#endif
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci        for (int i = 0; i < num_inst; i++) {
129bf215546Sopenharmony_ci                uint64_t inst = insts[i];
130bf215546Sopenharmony_ci                uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci                if (sig != QPU_SIG_PROG_END) {
133bf215546Sopenharmony_ci                        if (qpu_inst_is_tlb(inst))
134bf215546Sopenharmony_ci                                scoreboard_locked = true;
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci                        if (sig == QPU_SIG_THREAD_SWITCH ||
137bf215546Sopenharmony_ci                            sig == QPU_SIG_LAST_THREAD_SWITCH) {
138bf215546Sopenharmony_ci                                threaded = true;
139bf215546Sopenharmony_ci                        }
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci                        continue;
142bf215546Sopenharmony_ci                }
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci                /* "The Thread End instruction must not write to either physical
145bf215546Sopenharmony_ci                 *  regfile A or B."
146bf215546Sopenharmony_ci                 */
147bf215546Sopenharmony_ci                if (QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32 ||
148bf215546Sopenharmony_ci                    QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32) {
149bf215546Sopenharmony_ci                        fail_instr(inst, "write to phys reg in thread end");
150bf215546Sopenharmony_ci                }
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci                /* Can't trigger an implicit wait on scoreboard in the program
153bf215546Sopenharmony_ci                 * end instruction.
154bf215546Sopenharmony_ci                 */
155bf215546Sopenharmony_ci                if (qpu_inst_is_tlb(inst) && !scoreboard_locked)
156bf215546Sopenharmony_ci                        fail_instr(inst, "implicit sb wait in program end");
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci                /* Two delay slots will be executed. */
159bf215546Sopenharmony_ci                assert(i + 2 <= num_inst);
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci                 for (int j = i; j < i + 2; j++) {
162bf215546Sopenharmony_ci                         /* "The last three instructions of any program
163bf215546Sopenharmony_ci                          *  (Thread End plus the following two delay-slot
164bf215546Sopenharmony_ci                          *  instructions) must not do varyings read, uniforms
165bf215546Sopenharmony_ci                          *  read or any kind of VPM, VDR, or VDW read or
166bf215546Sopenharmony_ci                          *  write."
167bf215546Sopenharmony_ci                          */
168bf215546Sopenharmony_ci                         if (writes_reg(insts[j], QPU_W_VPM) ||
169bf215546Sopenharmony_ci                             reads_reg(insts[j], QPU_R_VARY) ||
170bf215546Sopenharmony_ci                             reads_reg(insts[j], QPU_R_UNIF) ||
171bf215546Sopenharmony_ci                             reads_reg(insts[j], QPU_R_VPM)) {
172bf215546Sopenharmony_ci                                 fail_instr(insts[j], "last 3 instructions "
173bf215546Sopenharmony_ci                                            "using fixed functions");
174bf215546Sopenharmony_ci                         }
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci                         /* "The Thread End instruction and the following two
177bf215546Sopenharmony_ci                          *  delay slot instructions must not write or read
178bf215546Sopenharmony_ci                          *  address 14 in either regfile A or B."
179bf215546Sopenharmony_ci                          */
180bf215546Sopenharmony_ci                         if (writes_reg(insts[j], 14) ||
181bf215546Sopenharmony_ci                             reads_reg(insts[j], 14)) {
182bf215546Sopenharmony_ci                                 fail_instr(insts[j], "last 3 instructions "
183bf215546Sopenharmony_ci                                            "must not use r14");
184bf215546Sopenharmony_ci                         }
185bf215546Sopenharmony_ci                 }
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci                 /* "The final program instruction (the second delay slot
188bf215546Sopenharmony_ci                  *  instruction) must not do a TLB Z write."
189bf215546Sopenharmony_ci                  */
190bf215546Sopenharmony_ci                 if (writes_reg(insts[i + 2], QPU_W_TLB_Z)) {
191bf215546Sopenharmony_ci                         fail_instr(insts[i + 2], "final instruction doing "
192bf215546Sopenharmony_ci                                    "Z write");
193bf215546Sopenharmony_ci                 }
194bf215546Sopenharmony_ci        }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci        /* "A scoreboard wait must not occur in the first two instructions of
197bf215546Sopenharmony_ci         *  a fragment shader. This is either the explicit Wait for Scoreboard
198bf215546Sopenharmony_ci         *  signal or an implicit wait with the first tile-buffer read or
199bf215546Sopenharmony_ci         *  write instruction."
200bf215546Sopenharmony_ci         */
201bf215546Sopenharmony_ci        for (int i = 0; i < 2; i++) {
202bf215546Sopenharmony_ci                uint64_t inst = insts[i];
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci                if (qpu_inst_is_tlb(inst))
205bf215546Sopenharmony_ci                        fail_instr(inst, "sb wait in first two insts");
206bf215546Sopenharmony_ci        }
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci        /* "If TMU_NOSWAP is written, the write must be three instructions
209bf215546Sopenharmony_ci         *  before the first TMU write instruction.  For example, if
210bf215546Sopenharmony_ci         *  TMU_NOSWAP is written in the first shader instruction, the first
211bf215546Sopenharmony_ci         *  TMU write cannot occur before the 4th shader instruction."
212bf215546Sopenharmony_ci         */
213bf215546Sopenharmony_ci        int last_tmu_noswap = -10;
214bf215546Sopenharmony_ci        for (int i = 0; i < num_inst; i++) {
215bf215546Sopenharmony_ci                uint64_t inst = insts[i];
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci                if ((i - last_tmu_noswap) <= 3 &&
218bf215546Sopenharmony_ci                    (writes_reg(inst, QPU_W_TMU0_S) ||
219bf215546Sopenharmony_ci                     writes_reg(inst, QPU_W_TMU1_S))) {
220bf215546Sopenharmony_ci                        fail_instr(inst, "TMU write too soon after TMU_NOSWAP");
221bf215546Sopenharmony_ci                }
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci                if (writes_reg(inst, QPU_W_TMU_NOSWAP))
224bf215546Sopenharmony_ci                    last_tmu_noswap = i;
225bf215546Sopenharmony_ci        }
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci        /* "An instruction must not read from a location in physical regfile A
228bf215546Sopenharmony_ci         *  or B that was written to by the previous instruction."
229bf215546Sopenharmony_ci         */
230bf215546Sopenharmony_ci        for (int i = 0; i < num_inst - 1; i++) {
231bf215546Sopenharmony_ci                uint64_t inst = insts[i];
232bf215546Sopenharmony_ci                uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
233bf215546Sopenharmony_ci                uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
234bf215546Sopenharmony_ci                uint32_t waddr_a, waddr_b;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci                if (inst & QPU_WS) {
237bf215546Sopenharmony_ci                        waddr_b = add_waddr;
238bf215546Sopenharmony_ci                        waddr_a = mul_waddr;
239bf215546Sopenharmony_ci                } else {
240bf215546Sopenharmony_ci                        waddr_a = add_waddr;
241bf215546Sopenharmony_ci                        waddr_b = mul_waddr;
242bf215546Sopenharmony_ci                }
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci                if ((waddr_a < 32 && reads_a_reg(insts[i + 1], waddr_a)) ||
245bf215546Sopenharmony_ci                    (waddr_b < 32 && reads_b_reg(insts[i + 1], waddr_b))) {
246bf215546Sopenharmony_ci                        fail_instr(insts[i + 1],
247bf215546Sopenharmony_ci                                   "Reads physical reg too soon after write");
248bf215546Sopenharmony_ci                }
249bf215546Sopenharmony_ci        }
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci        /* "After an SFU lookup instruction, accumulator r4 must not be read
252bf215546Sopenharmony_ci         *  in the following two instructions. Any other instruction that
253bf215546Sopenharmony_ci         *  results in r4 being written (that is, TMU read, TLB read, SFU
254bf215546Sopenharmony_ci         *  lookup) cannot occur in the two instructions following an SFU
255bf215546Sopenharmony_ci         *  lookup."
256bf215546Sopenharmony_ci         */
257bf215546Sopenharmony_ci        int last_sfu_inst = -10;
258bf215546Sopenharmony_ci        for (int i = 0; i < num_inst - 1; i++) {
259bf215546Sopenharmony_ci                uint64_t inst = insts[i];
260bf215546Sopenharmony_ci                uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci                if (i - last_sfu_inst <= 2 &&
263bf215546Sopenharmony_ci                    (writes_sfu(inst) ||
264bf215546Sopenharmony_ci                     sig == QPU_SIG_LOAD_TMU0 ||
265bf215546Sopenharmony_ci                     sig == QPU_SIG_LOAD_TMU1 ||
266bf215546Sopenharmony_ci                     sig == QPU_SIG_COLOR_LOAD)) {
267bf215546Sopenharmony_ci                        fail_instr(inst, "R4 write too soon after SFU write");
268bf215546Sopenharmony_ci                }
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci                if (writes_sfu(inst))
271bf215546Sopenharmony_ci                        last_sfu_inst = i;
272bf215546Sopenharmony_ci        }
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci        for (int i = 0; i < num_inst - 1; i++) {
275bf215546Sopenharmony_ci                uint64_t inst = insts[i];
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci                if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM &&
278bf215546Sopenharmony_ci                    QPU_GET_FIELD(inst, QPU_SMALL_IMM) >=
279bf215546Sopenharmony_ci                    QPU_SMALL_IMM_MUL_ROT) {
280bf215546Sopenharmony_ci                        uint32_t mux_a = QPU_GET_FIELD(inst, QPU_MUL_A);
281bf215546Sopenharmony_ci                        uint32_t mux_b = QPU_GET_FIELD(inst, QPU_MUL_B);
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci                        /* "The full horizontal vector rotate is only
284bf215546Sopenharmony_ci                         *  available when both of the mul ALU input arguments
285bf215546Sopenharmony_ci                         *  are taken from accumulators r0-r3."
286bf215546Sopenharmony_ci                         */
287bf215546Sopenharmony_ci                        if (mux_a > QPU_MUX_R3 || mux_b > QPU_MUX_R3) {
288bf215546Sopenharmony_ci                                fail_instr(inst,
289bf215546Sopenharmony_ci                                           "MUL rotate using non-accumulator "
290bf215546Sopenharmony_ci                                           "input");
291bf215546Sopenharmony_ci                        }
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci                        if (QPU_GET_FIELD(inst, QPU_SMALL_IMM) ==
294bf215546Sopenharmony_ci                            QPU_SMALL_IMM_MUL_ROT) {
295bf215546Sopenharmony_ci                                /* "An instruction that does a vector rotate
296bf215546Sopenharmony_ci                                 *  by r5 must not immediately follow an
297bf215546Sopenharmony_ci                                 *  instruction that writes to r5."
298bf215546Sopenharmony_ci                                 */
299bf215546Sopenharmony_ci                                if (writes_reg(insts[i - 1], QPU_W_ACC5)) {
300bf215546Sopenharmony_ci                                        fail_instr(inst,
301bf215546Sopenharmony_ci                                                   "vector rotate by r5 "
302bf215546Sopenharmony_ci                                                   "immediately after r5 write");
303bf215546Sopenharmony_ci                                }
304bf215546Sopenharmony_ci                        }
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci                        /* "An instruction that does a vector rotate must not
307bf215546Sopenharmony_ci                         *  immediately follow an instruction that writes to the
308bf215546Sopenharmony_ci                         *  accumulator that is being rotated."
309bf215546Sopenharmony_ci                         */
310bf215546Sopenharmony_ci                        if (writes_reg(insts[i - 1], QPU_W_ACC0 + mux_a) ||
311bf215546Sopenharmony_ci                            writes_reg(insts[i - 1], QPU_W_ACC0 + mux_b)) {
312bf215546Sopenharmony_ci                                fail_instr(inst,
313bf215546Sopenharmony_ci                                           "vector rotate of value "
314bf215546Sopenharmony_ci                                           "written in previous instruction");
315bf215546Sopenharmony_ci                        }
316bf215546Sopenharmony_ci                }
317bf215546Sopenharmony_ci        }
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci        /* "An instruction that does a vector rotate must not immediately
320bf215546Sopenharmony_ci         *  follow an instruction that writes to the accumulator that is being
321bf215546Sopenharmony_ci         *  rotated.
322bf215546Sopenharmony_ci         *
323bf215546Sopenharmony_ci         * XXX: TODO.
324bf215546Sopenharmony_ci         */
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci        /* "After an instruction that does a TLB Z write, the multisample mask
327bf215546Sopenharmony_ci         *  must not be read as an instruction input argument in the following
328bf215546Sopenharmony_ci         *  two instruction. The TLB Z write instruction can, however, be
329bf215546Sopenharmony_ci         *  followed immediately by a TLB color write."
330bf215546Sopenharmony_ci         */
331bf215546Sopenharmony_ci        for (int i = 0; i < num_inst - 1; i++) {
332bf215546Sopenharmony_ci                uint64_t inst = insts[i];
333bf215546Sopenharmony_ci                if (writes_reg(inst, QPU_W_TLB_Z) &&
334bf215546Sopenharmony_ci                    (reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS) ||
335bf215546Sopenharmony_ci                     reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS))) {
336bf215546Sopenharmony_ci                        fail_instr(inst, "TLB Z write followed by MS mask read");
337bf215546Sopenharmony_ci                }
338bf215546Sopenharmony_ci        }
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci        /*
341bf215546Sopenharmony_ci         * "A single instruction can only perform a maximum of one of the
342bf215546Sopenharmony_ci         *  following closely coupled peripheral accesses in a single
343bf215546Sopenharmony_ci         *  instruction: TMU write, TMU read, TLB write, TLB read, TLB
344bf215546Sopenharmony_ci         *  combined color read and write, SFU write, Mutex read or Semaphore
345bf215546Sopenharmony_ci         *  access."
346bf215546Sopenharmony_ci         */
347bf215546Sopenharmony_ci        for (int i = 0; i < num_inst - 1; i++) {
348bf215546Sopenharmony_ci                uint64_t inst = insts[i];
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci                if (qpu_num_sf_accesses(inst) > 1)
351bf215546Sopenharmony_ci                        fail_instr(inst, "Single instruction writes SFU twice");
352bf215546Sopenharmony_ci        }
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci        /* "The uniform base pointer can be written (from SIMD element 0) by
355bf215546Sopenharmony_ci         *  the processor to reset the stream, there must be at least two
356bf215546Sopenharmony_ci         *  nonuniform-accessing instructions following a pointer change
357bf215546Sopenharmony_ci         *  before uniforms can be accessed once more."
358bf215546Sopenharmony_ci         */
359bf215546Sopenharmony_ci        int last_unif_pointer_update = -3;
360bf215546Sopenharmony_ci        for (int i = 0; i < num_inst; i++) {
361bf215546Sopenharmony_ci                uint64_t inst = insts[i];
362bf215546Sopenharmony_ci                uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
363bf215546Sopenharmony_ci                uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci                if (reads_reg(inst, QPU_R_UNIF) &&
366bf215546Sopenharmony_ci                    i - last_unif_pointer_update <= 2) {
367bf215546Sopenharmony_ci                        fail_instr(inst,
368bf215546Sopenharmony_ci                                   "uniform read too soon after pointer update");
369bf215546Sopenharmony_ci                }
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci                if (waddr_add == QPU_W_UNIFORMS_ADDRESS ||
372bf215546Sopenharmony_ci                    waddr_mul == QPU_W_UNIFORMS_ADDRESS)
373bf215546Sopenharmony_ci                        last_unif_pointer_update = i;
374bf215546Sopenharmony_ci        }
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci        if (threaded) {
377bf215546Sopenharmony_ci                bool last_thrsw_found = false;
378bf215546Sopenharmony_ci                bool scoreboard_locked = false;
379bf215546Sopenharmony_ci                int tex_samples_outstanding = 0;
380bf215546Sopenharmony_ci                int last_tex_samples_outstanding = 0;
381bf215546Sopenharmony_ci                int thrsw_ip = -1;
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci                for (int i = 0; i < num_inst; i++) {
384bf215546Sopenharmony_ci                        uint64_t inst = insts[i];
385bf215546Sopenharmony_ci                        uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci                        if (i == thrsw_ip) {
388bf215546Sopenharmony_ci                                /* In order to get texture results back in the
389bf215546Sopenharmony_ci                                 * correct order, before a new thrsw we have
390bf215546Sopenharmony_ci                                 * to read all the texture results from before
391bf215546Sopenharmony_ci                                 * the previous thrsw.
392bf215546Sopenharmony_ci                                 *
393bf215546Sopenharmony_ci                                 * FIXME: Is collecting the remaining results
394bf215546Sopenharmony_ci                                 * during the delay slots OK, or should we do
395bf215546Sopenharmony_ci                                 * this at THRSW signal time?
396bf215546Sopenharmony_ci                                 */
397bf215546Sopenharmony_ci                                if (last_tex_samples_outstanding != 0) {
398bf215546Sopenharmony_ci                                        fail_instr(inst, "THRSW with texture "
399bf215546Sopenharmony_ci                                                   "results from the previous "
400bf215546Sopenharmony_ci                                                   "THRSW still in the FIFO.");
401bf215546Sopenharmony_ci                                }
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci                                last_tex_samples_outstanding =
404bf215546Sopenharmony_ci                                        tex_samples_outstanding;
405bf215546Sopenharmony_ci                                tex_samples_outstanding = 0;
406bf215546Sopenharmony_ci                        }
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci                        if (qpu_inst_is_tlb(inst))
409bf215546Sopenharmony_ci                                scoreboard_locked = true;
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci                        switch (sig) {
412bf215546Sopenharmony_ci                        case QPU_SIG_THREAD_SWITCH:
413bf215546Sopenharmony_ci                        case QPU_SIG_LAST_THREAD_SWITCH:
414bf215546Sopenharmony_ci                                /* No thread switching with the scoreboard
415bf215546Sopenharmony_ci                                 * locked.  Doing so means we may deadlock
416bf215546Sopenharmony_ci                                 * when the other thread tries to lock
417bf215546Sopenharmony_ci                                 * scoreboard.
418bf215546Sopenharmony_ci                                 */
419bf215546Sopenharmony_ci                                if (scoreboard_locked) {
420bf215546Sopenharmony_ci                                        fail_instr(inst, "THRSW with the "
421bf215546Sopenharmony_ci                                                   "scoreboard locked.");
422bf215546Sopenharmony_ci                                }
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci                                /* No thread switching after lthrsw, since
425bf215546Sopenharmony_ci                                 * lthrsw means that we get delayed until the
426bf215546Sopenharmony_ci                                 * other shader is ready for us to terminate.
427bf215546Sopenharmony_ci                                 */
428bf215546Sopenharmony_ci                                if (last_thrsw_found) {
429bf215546Sopenharmony_ci                                        fail_instr(inst, "THRSW after a "
430bf215546Sopenharmony_ci                                                   "previous LTHRSW");
431bf215546Sopenharmony_ci                                }
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci                                if (sig == QPU_SIG_LAST_THREAD_SWITCH)
434bf215546Sopenharmony_ci                                        last_thrsw_found = true;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci                                /* No THRSW while we already have a THRSW
437bf215546Sopenharmony_ci                                 * queued.
438bf215546Sopenharmony_ci                                 */
439bf215546Sopenharmony_ci                                if (i < thrsw_ip) {
440bf215546Sopenharmony_ci                                        fail_instr(inst,
441bf215546Sopenharmony_ci                                                   "THRSW with a THRSW queued.");
442bf215546Sopenharmony_ci                                }
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci                                thrsw_ip = i + 3;
445bf215546Sopenharmony_ci                                break;
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci                        case QPU_SIG_LOAD_TMU0:
448bf215546Sopenharmony_ci                        case QPU_SIG_LOAD_TMU1:
449bf215546Sopenharmony_ci                                if (last_tex_samples_outstanding == 0) {
450bf215546Sopenharmony_ci                                        fail_instr(inst, "TMU load with nothing "
451bf215546Sopenharmony_ci                                                   "in the results fifo from "
452bf215546Sopenharmony_ci                                                   "the previous THRSW.");
453bf215546Sopenharmony_ci                                }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci                                last_tex_samples_outstanding--;
456bf215546Sopenharmony_ci                                break;
457bf215546Sopenharmony_ci                        }
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci                        uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
460bf215546Sopenharmony_ci                        uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
461bf215546Sopenharmony_ci                        if (waddr_add == QPU_W_TMU0_S ||
462bf215546Sopenharmony_ci                            waddr_add == QPU_W_TMU1_S ||
463bf215546Sopenharmony_ci                            waddr_mul == QPU_W_TMU0_S ||
464bf215546Sopenharmony_ci                            waddr_mul == QPU_W_TMU1_S) {
465bf215546Sopenharmony_ci                                tex_samples_outstanding++;
466bf215546Sopenharmony_ci                        }
467bf215546Sopenharmony_ci                }
468bf215546Sopenharmony_ci        }
469bf215546Sopenharmony_ci}
470