1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <stdlib.h>
25#include <string.h>
26#include "util/macros.h"
27#include "broadcom/common/v3d_device_info.h"
28#include "qpu_instr.h"
29
30const char *
31v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32                         enum v3d_qpu_waddr waddr)
33{
34        /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35        if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36                return "tmu";
37
38        static const char *waddr_magic[] = {
39                [V3D_QPU_WADDR_R0] = "r0",
40                [V3D_QPU_WADDR_R1] = "r1",
41                [V3D_QPU_WADDR_R2] = "r2",
42                [V3D_QPU_WADDR_R3] = "r3",
43                [V3D_QPU_WADDR_R4] = "r4",
44                [V3D_QPU_WADDR_R5] = "r5",
45                [V3D_QPU_WADDR_NOP] = "-",
46                [V3D_QPU_WADDR_TLB] = "tlb",
47                [V3D_QPU_WADDR_TLBU] = "tlbu",
48                [V3D_QPU_WADDR_UNIFA] = "unifa",
49                [V3D_QPU_WADDR_TMUL] = "tmul",
50                [V3D_QPU_WADDR_TMUD] = "tmud",
51                [V3D_QPU_WADDR_TMUA] = "tmua",
52                [V3D_QPU_WADDR_TMUAU] = "tmuau",
53                [V3D_QPU_WADDR_VPM] = "vpm",
54                [V3D_QPU_WADDR_VPMU] = "vpmu",
55                [V3D_QPU_WADDR_SYNC] = "sync",
56                [V3D_QPU_WADDR_SYNCU] = "syncu",
57                [V3D_QPU_WADDR_SYNCB] = "syncb",
58                [V3D_QPU_WADDR_RECIP] = "recip",
59                [V3D_QPU_WADDR_RSQRT] = "rsqrt",
60                [V3D_QPU_WADDR_EXP] = "exp",
61                [V3D_QPU_WADDR_LOG] = "log",
62                [V3D_QPU_WADDR_SIN] = "sin",
63                [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
64                [V3D_QPU_WADDR_TMUC] = "tmuc",
65                [V3D_QPU_WADDR_TMUS] = "tmus",
66                [V3D_QPU_WADDR_TMUT] = "tmut",
67                [V3D_QPU_WADDR_TMUR] = "tmur",
68                [V3D_QPU_WADDR_TMUI] = "tmui",
69                [V3D_QPU_WADDR_TMUB] = "tmub",
70                [V3D_QPU_WADDR_TMUDREF] = "tmudref",
71                [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
72                [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
73                [V3D_QPU_WADDR_TMUSF] = "tmusf",
74                [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
75                [V3D_QPU_WADDR_TMUHS] = "tmuhs",
76                [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
77                [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
78                [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
79                [V3D_QPU_WADDR_R5REP] = "r5rep",
80        };
81
82        return waddr_magic[waddr];
83}
84
85const char *
86v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
87{
88        static const char *op_names[] = {
89                [V3D_QPU_A_FADD] = "fadd",
90                [V3D_QPU_A_FADDNF] = "faddnf",
91                [V3D_QPU_A_VFPACK] = "vfpack",
92                [V3D_QPU_A_ADD] = "add",
93                [V3D_QPU_A_SUB] = "sub",
94                [V3D_QPU_A_FSUB] = "fsub",
95                [V3D_QPU_A_MIN] = "min",
96                [V3D_QPU_A_MAX] = "max",
97                [V3D_QPU_A_UMIN] = "umin",
98                [V3D_QPU_A_UMAX] = "umax",
99                [V3D_QPU_A_SHL] = "shl",
100                [V3D_QPU_A_SHR] = "shr",
101                [V3D_QPU_A_ASR] = "asr",
102                [V3D_QPU_A_ROR] = "ror",
103                [V3D_QPU_A_FMIN] = "fmin",
104                [V3D_QPU_A_FMAX] = "fmax",
105                [V3D_QPU_A_VFMIN] = "vfmin",
106                [V3D_QPU_A_AND] = "and",
107                [V3D_QPU_A_OR] = "or",
108                [V3D_QPU_A_XOR] = "xor",
109                [V3D_QPU_A_VADD] = "vadd",
110                [V3D_QPU_A_VSUB] = "vsub",
111                [V3D_QPU_A_NOT] = "not",
112                [V3D_QPU_A_NEG] = "neg",
113                [V3D_QPU_A_FLAPUSH] = "flapush",
114                [V3D_QPU_A_FLBPUSH] = "flbpush",
115                [V3D_QPU_A_FLPOP] = "flpop",
116                [V3D_QPU_A_RECIP] = "recip",
117                [V3D_QPU_A_SETMSF] = "setmsf",
118                [V3D_QPU_A_SETREVF] = "setrevf",
119                [V3D_QPU_A_NOP] = "nop",
120                [V3D_QPU_A_TIDX] = "tidx",
121                [V3D_QPU_A_EIDX] = "eidx",
122                [V3D_QPU_A_LR] = "lr",
123                [V3D_QPU_A_VFLA] = "vfla",
124                [V3D_QPU_A_VFLNA] = "vflna",
125                [V3D_QPU_A_VFLB] = "vflb",
126                [V3D_QPU_A_VFLNB] = "vflnb",
127                [V3D_QPU_A_FXCD] = "fxcd",
128                [V3D_QPU_A_XCD] = "xcd",
129                [V3D_QPU_A_FYCD] = "fycd",
130                [V3D_QPU_A_YCD] = "ycd",
131                [V3D_QPU_A_MSF] = "msf",
132                [V3D_QPU_A_REVF] = "revf",
133                [V3D_QPU_A_VDWWT] = "vdwwt",
134                [V3D_QPU_A_IID] = "iid",
135                [V3D_QPU_A_SAMPID] = "sampid",
136                [V3D_QPU_A_BARRIERID] = "barrierid",
137                [V3D_QPU_A_TMUWT] = "tmuwt",
138                [V3D_QPU_A_VPMSETUP] = "vpmsetup",
139                [V3D_QPU_A_VPMWT] = "vpmwt",
140                [V3D_QPU_A_FLAFIRST] = "flafirst",
141                [V3D_QPU_A_FLNAFIRST] = "flnafirst",
142                [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
143                [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
144                [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
145                [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
146                [V3D_QPU_A_LDVPMP] = "ldvpmp",
147                [V3D_QPU_A_RSQRT] = "rsqrt",
148                [V3D_QPU_A_EXP] = "exp",
149                [V3D_QPU_A_LOG] = "log",
150                [V3D_QPU_A_SIN] = "sin",
151                [V3D_QPU_A_RSQRT2] = "rsqrt2",
152                [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
153                [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
154                [V3D_QPU_A_FCMP] = "fcmp",
155                [V3D_QPU_A_VFMAX] = "vfmax",
156                [V3D_QPU_A_FROUND] = "fround",
157                [V3D_QPU_A_FTOIN] = "ftoin",
158                [V3D_QPU_A_FTRUNC] = "ftrunc",
159                [V3D_QPU_A_FTOIZ] = "ftoiz",
160                [V3D_QPU_A_FFLOOR] = "ffloor",
161                [V3D_QPU_A_FTOUZ] = "ftouz",
162                [V3D_QPU_A_FCEIL] = "fceil",
163                [V3D_QPU_A_FTOC] = "ftoc",
164                [V3D_QPU_A_FDX] = "fdx",
165                [V3D_QPU_A_FDY] = "fdy",
166                [V3D_QPU_A_STVPMV] = "stvpmv",
167                [V3D_QPU_A_STVPMD] = "stvpmd",
168                [V3D_QPU_A_STVPMP] = "stvpmp",
169                [V3D_QPU_A_ITOF] = "itof",
170                [V3D_QPU_A_CLZ] = "clz",
171                [V3D_QPU_A_UTOF] = "utof",
172        };
173
174        if (op >= ARRAY_SIZE(op_names))
175                return NULL;
176
177        return op_names[op];
178}
179
180const char *
181v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
182{
183        static const char *op_names[] = {
184                [V3D_QPU_M_ADD] = "add",
185                [V3D_QPU_M_SUB] = "sub",
186                [V3D_QPU_M_UMUL24] = "umul24",
187                [V3D_QPU_M_VFMUL] = "vfmul",
188                [V3D_QPU_M_SMUL24] = "smul24",
189                [V3D_QPU_M_MULTOP] = "multop",
190                [V3D_QPU_M_FMOV] = "fmov",
191                [V3D_QPU_M_MOV] = "mov",
192                [V3D_QPU_M_NOP] = "nop",
193                [V3D_QPU_M_FMUL] = "fmul",
194        };
195
196        if (op >= ARRAY_SIZE(op_names))
197                return NULL;
198
199        return op_names[op];
200}
201
202const char *
203v3d_qpu_cond_name(enum v3d_qpu_cond cond)
204{
205        switch (cond) {
206        case V3D_QPU_COND_NONE:
207                return "";
208        case V3D_QPU_COND_IFA:
209                return ".ifa";
210        case V3D_QPU_COND_IFB:
211                return ".ifb";
212        case V3D_QPU_COND_IFNA:
213                return ".ifna";
214        case V3D_QPU_COND_IFNB:
215                return ".ifnb";
216        default:
217                unreachable("bad cond value");
218        }
219}
220
221const char *
222v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
223{
224        switch (cond) {
225        case V3D_QPU_BRANCH_COND_ALWAYS:
226                return "";
227        case V3D_QPU_BRANCH_COND_A0:
228                return ".a0";
229        case V3D_QPU_BRANCH_COND_NA0:
230                return ".na0";
231        case V3D_QPU_BRANCH_COND_ALLA:
232                return ".alla";
233        case V3D_QPU_BRANCH_COND_ANYNA:
234                return ".anyna";
235        case V3D_QPU_BRANCH_COND_ANYA:
236                return ".anya";
237        case V3D_QPU_BRANCH_COND_ALLNA:
238                return ".allna";
239        default:
240                unreachable("bad branch cond value");
241        }
242}
243
244const char *
245v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
246{
247        switch (msfign) {
248        case V3D_QPU_MSFIGN_NONE:
249                return "";
250        case V3D_QPU_MSFIGN_P:
251                return "p";
252        case V3D_QPU_MSFIGN_Q:
253                return "q";
254        default:
255                unreachable("bad branch cond value");
256        }
257}
258
259const char *
260v3d_qpu_pf_name(enum v3d_qpu_pf pf)
261{
262        switch (pf) {
263        case V3D_QPU_PF_NONE:
264                return "";
265        case V3D_QPU_PF_PUSHZ:
266                return ".pushz";
267        case V3D_QPU_PF_PUSHN:
268                return ".pushn";
269        case V3D_QPU_PF_PUSHC:
270                return ".pushc";
271        default:
272                unreachable("bad pf value");
273        }
274}
275
276const char *
277v3d_qpu_uf_name(enum v3d_qpu_uf uf)
278{
279        switch (uf) {
280        case V3D_QPU_UF_NONE:
281                return "";
282        case V3D_QPU_UF_ANDZ:
283                return ".andz";
284        case V3D_QPU_UF_ANDNZ:
285                return ".andnz";
286        case V3D_QPU_UF_NORZ:
287                return ".norz";
288        case V3D_QPU_UF_NORNZ:
289                return ".nornz";
290        case V3D_QPU_UF_ANDN:
291                return ".andn";
292        case V3D_QPU_UF_ANDNN:
293                return ".andnn";
294        case V3D_QPU_UF_NORN:
295                return ".norn";
296        case V3D_QPU_UF_NORNN:
297                return ".nornn";
298        case V3D_QPU_UF_ANDC:
299                return ".andc";
300        case V3D_QPU_UF_ANDNC:
301                return ".andnc";
302        case V3D_QPU_UF_NORC:
303                return ".norc";
304        case V3D_QPU_UF_NORNC:
305                return ".nornc";
306        default:
307                unreachable("bad pf value");
308        }
309}
310
311const char *
312v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
313{
314        switch (pack) {
315        case V3D_QPU_PACK_NONE:
316                return "";
317        case V3D_QPU_PACK_L:
318                return ".l";
319        case V3D_QPU_PACK_H:
320                return ".h";
321        default:
322                unreachable("bad pack value");
323        }
324}
325
326const char *
327v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
328{
329        switch (unpack) {
330        case V3D_QPU_UNPACK_NONE:
331                return "";
332        case V3D_QPU_UNPACK_L:
333                return ".l";
334        case V3D_QPU_UNPACK_H:
335                return ".h";
336        case V3D_QPU_UNPACK_ABS:
337                return ".abs";
338        case V3D_QPU_UNPACK_REPLICATE_32F_16:
339                return ".ff";
340        case V3D_QPU_UNPACK_REPLICATE_L_16:
341                return ".ll";
342        case V3D_QPU_UNPACK_REPLICATE_H_16:
343                return ".hh";
344        case V3D_QPU_UNPACK_SWAP_16:
345                return ".swp";
346        default:
347                unreachable("bad unpack value");
348        }
349}
350
351#define D	1
352#define A	2
353#define B	4
354static const uint8_t add_op_args[] = {
355        [V3D_QPU_A_FADD] = D | A | B,
356        [V3D_QPU_A_FADDNF] = D | A | B,
357        [V3D_QPU_A_VFPACK] = D | A | B,
358        [V3D_QPU_A_ADD] = D | A | B,
359        [V3D_QPU_A_VFPACK] = D | A | B,
360        [V3D_QPU_A_SUB] = D | A | B,
361        [V3D_QPU_A_VFPACK] = D | A | B,
362        [V3D_QPU_A_FSUB] = D | A | B,
363        [V3D_QPU_A_MIN] = D | A | B,
364        [V3D_QPU_A_MAX] = D | A | B,
365        [V3D_QPU_A_UMIN] = D | A | B,
366        [V3D_QPU_A_UMAX] = D | A | B,
367        [V3D_QPU_A_SHL] = D | A | B,
368        [V3D_QPU_A_SHR] = D | A | B,
369        [V3D_QPU_A_ASR] = D | A | B,
370        [V3D_QPU_A_ROR] = D | A | B,
371        [V3D_QPU_A_FMIN] = D | A | B,
372        [V3D_QPU_A_FMAX] = D | A | B,
373        [V3D_QPU_A_VFMIN] = D | A | B,
374
375        [V3D_QPU_A_AND] = D | A | B,
376        [V3D_QPU_A_OR] = D | A | B,
377        [V3D_QPU_A_XOR] = D | A | B,
378
379        [V3D_QPU_A_VADD] = D | A | B,
380        [V3D_QPU_A_VSUB] = D | A | B,
381        [V3D_QPU_A_NOT] = D | A,
382        [V3D_QPU_A_NEG] = D | A,
383        [V3D_QPU_A_FLAPUSH] = D | A,
384        [V3D_QPU_A_FLBPUSH] = D | A,
385        [V3D_QPU_A_FLPOP] = D | A,
386        [V3D_QPU_A_RECIP] = D | A,
387        [V3D_QPU_A_SETMSF] = D | A,
388        [V3D_QPU_A_SETREVF] = D | A,
389        [V3D_QPU_A_NOP] = 0,
390        [V3D_QPU_A_TIDX] = D,
391        [V3D_QPU_A_EIDX] = D,
392        [V3D_QPU_A_LR] = D,
393        [V3D_QPU_A_VFLA] = D,
394        [V3D_QPU_A_VFLNA] = D,
395        [V3D_QPU_A_VFLB] = D,
396        [V3D_QPU_A_VFLNB] = D,
397
398        [V3D_QPU_A_FXCD] = D,
399        [V3D_QPU_A_XCD] = D,
400        [V3D_QPU_A_FYCD] = D,
401        [V3D_QPU_A_YCD] = D,
402
403        [V3D_QPU_A_MSF] = D,
404        [V3D_QPU_A_REVF] = D,
405        [V3D_QPU_A_VDWWT] = D,
406        [V3D_QPU_A_IID] = D,
407        [V3D_QPU_A_SAMPID] = D,
408        [V3D_QPU_A_BARRIERID] = D,
409        [V3D_QPU_A_TMUWT] = D,
410        [V3D_QPU_A_VPMWT] = D,
411        [V3D_QPU_A_FLAFIRST] = D,
412        [V3D_QPU_A_FLNAFIRST] = D,
413
414        [V3D_QPU_A_VPMSETUP] = D | A,
415
416        [V3D_QPU_A_LDVPMV_IN] = D | A,
417        [V3D_QPU_A_LDVPMV_OUT] = D | A,
418        [V3D_QPU_A_LDVPMD_IN] = D | A,
419        [V3D_QPU_A_LDVPMD_OUT] = D | A,
420        [V3D_QPU_A_LDVPMP] = D | A,
421        [V3D_QPU_A_RSQRT] = D | A,
422        [V3D_QPU_A_EXP] = D | A,
423        [V3D_QPU_A_LOG] = D | A,
424        [V3D_QPU_A_SIN] = D | A,
425        [V3D_QPU_A_RSQRT2] = D | A,
426        [V3D_QPU_A_LDVPMG_IN] = D | A | B,
427        [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
428
429        /* FIXME: MOVABSNEG */
430
431        [V3D_QPU_A_FCMP] = D | A | B,
432        [V3D_QPU_A_VFMAX] = D | A | B,
433
434        [V3D_QPU_A_FROUND] = D | A,
435        [V3D_QPU_A_FTOIN] = D | A,
436        [V3D_QPU_A_FTRUNC] = D | A,
437        [V3D_QPU_A_FTOIZ] = D | A,
438        [V3D_QPU_A_FFLOOR] = D | A,
439        [V3D_QPU_A_FTOUZ] = D | A,
440        [V3D_QPU_A_FCEIL] = D | A,
441        [V3D_QPU_A_FTOC] = D | A,
442
443        [V3D_QPU_A_FDX] = D | A,
444        [V3D_QPU_A_FDY] = D | A,
445
446        [V3D_QPU_A_STVPMV] = A | B,
447        [V3D_QPU_A_STVPMD] = A | B,
448        [V3D_QPU_A_STVPMP] = A | B,
449
450        [V3D_QPU_A_ITOF] = D | A,
451        [V3D_QPU_A_CLZ] = D | A,
452        [V3D_QPU_A_UTOF] = D | A,
453};
454
455static const uint8_t mul_op_args[] = {
456        [V3D_QPU_M_ADD] = D | A | B,
457        [V3D_QPU_M_SUB] = D | A | B,
458        [V3D_QPU_M_UMUL24] = D | A | B,
459        [V3D_QPU_M_VFMUL] = D | A | B,
460        [V3D_QPU_M_SMUL24] = D | A | B,
461        [V3D_QPU_M_MULTOP] = D | A | B,
462        [V3D_QPU_M_FMOV] = D | A,
463        [V3D_QPU_M_NOP] = 0,
464        [V3D_QPU_M_MOV] = D | A,
465        [V3D_QPU_M_FMUL] = D | A | B,
466};
467
468bool
469v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
470{
471        assert(op < ARRAY_SIZE(add_op_args));
472
473        return add_op_args[op] & D;
474}
475
476bool
477v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
478{
479        assert(op < ARRAY_SIZE(mul_op_args));
480
481        return mul_op_args[op] & D;
482}
483
484int
485v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
486{
487        assert(op < ARRAY_SIZE(add_op_args));
488
489        uint8_t args = add_op_args[op];
490        if (args & B)
491                return 2;
492        else if (args & A)
493                return 1;
494        else
495                return 0;
496}
497
498int
499v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
500{
501        assert(op < ARRAY_SIZE(mul_op_args));
502
503        uint8_t args = mul_op_args[op];
504        if (args & B)
505                return 2;
506        else if (args & A)
507                return 1;
508        else
509                return 0;
510}
511
512enum v3d_qpu_cond
513v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
514{
515        switch (cond) {
516        case V3D_QPU_COND_IFA:
517                return V3D_QPU_COND_IFNA;
518        case V3D_QPU_COND_IFNA:
519                return V3D_QPU_COND_IFA;
520        case V3D_QPU_COND_IFB:
521                return V3D_QPU_COND_IFNB;
522        case V3D_QPU_COND_IFNB:
523                return V3D_QPU_COND_IFB;
524        default:
525                unreachable("Non-invertible cond");
526        }
527}
528
529bool
530v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
531{
532        switch (waddr) {
533        case V3D_QPU_WADDR_RECIP:
534        case V3D_QPU_WADDR_RSQRT:
535        case V3D_QPU_WADDR_EXP:
536        case V3D_QPU_WADDR_LOG:
537        case V3D_QPU_WADDR_SIN:
538        case V3D_QPU_WADDR_RSQRT2:
539                return true;
540        default:
541                return false;
542        }
543}
544
545bool
546v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
547                           enum v3d_qpu_waddr waddr)
548{
549        if (devinfo->ver >= 40) {
550                return ((waddr >= V3D_QPU_WADDR_TMUD &&
551                         waddr <= V3D_QPU_WADDR_TMUAU) ||
552                       (waddr >= V3D_QPU_WADDR_TMUC &&
553                        waddr <= V3D_QPU_WADDR_TMUHSLOD));
554        } else {
555                return ((waddr >= V3D_QPU_WADDR_TMU &&
556                         waddr <= V3D_QPU_WADDR_TMUAU) ||
557                       (waddr >= V3D_QPU_WADDR_TMUC &&
558                        waddr <= V3D_QPU_WADDR_TMUHSLOD));
559        }
560}
561
562bool
563v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
564{
565        return (inst->sig.ldtmu ||
566                (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
567                 inst->alu.add.op == V3D_QPU_A_TMUWT));
568}
569
570bool
571v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
572{
573        return (waddr == V3D_QPU_WADDR_TLB ||
574                waddr == V3D_QPU_WADDR_TLBU);
575}
576
577bool
578v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
579{
580        return (waddr == V3D_QPU_WADDR_VPM ||
581                waddr == V3D_QPU_WADDR_VPMU);
582}
583
584bool
585v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
586{
587        return (waddr == V3D_QPU_WADDR_SYNC ||
588                waddr == V3D_QPU_WADDR_SYNCB ||
589                waddr == V3D_QPU_WADDR_SYNCU);
590}
591
592bool
593v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
594{
595        switch (waddr) {
596        case V3D_QPU_WADDR_VPMU:
597        case V3D_QPU_WADDR_TLBU:
598        case V3D_QPU_WADDR_TMUAU:
599        case V3D_QPU_WADDR_SYNCU:
600                return true;
601        default:
602                return false;
603        }
604}
605
606static bool
607v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
608{
609        switch (op) {
610        case V3D_QPU_A_VPMSETUP:
611        case V3D_QPU_A_LDVPMV_IN:
612        case V3D_QPU_A_LDVPMV_OUT:
613        case V3D_QPU_A_LDVPMD_IN:
614        case V3D_QPU_A_LDVPMD_OUT:
615        case V3D_QPU_A_LDVPMP:
616        case V3D_QPU_A_LDVPMG_IN:
617        case V3D_QPU_A_LDVPMG_OUT:
618                return true;
619        default:
620                return false;
621        }
622}
623
624static bool
625v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
626{
627        switch (op) {
628        case V3D_QPU_A_VPMSETUP:
629        case V3D_QPU_A_STVPMV:
630        case V3D_QPU_A_STVPMD:
631        case V3D_QPU_A_STVPMP:
632                return true;
633        default:
634                return false;
635        }
636}
637
638bool
639v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
640{
641        if (inst->sig.ldtlb ||
642            inst->sig.ldtlbu)
643                return true;
644
645        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
646                if (inst->alu.add.magic_write &&
647                    v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
648                        return true;
649                }
650
651                if (inst->alu.mul.magic_write &&
652                    v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
653                        return true;
654                }
655        }
656
657        return false;
658}
659
660bool
661v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
662{
663        if (v3d_qpu_instr_is_sfu(inst))
664                return true;
665
666        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
667                if (inst->alu.add.magic_write &&
668                    v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
669                        return true;
670                }
671
672                if (inst->alu.mul.magic_write &&
673                    v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
674                        return true;
675                }
676        }
677
678        return false;
679}
680
681bool
682v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
683{
684        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
685                switch (inst->alu.add.op) {
686                case V3D_QPU_A_RECIP:
687                case V3D_QPU_A_RSQRT:
688                case V3D_QPU_A_EXP:
689                case V3D_QPU_A_LOG:
690                case V3D_QPU_A_SIN:
691                case V3D_QPU_A_RSQRT2:
692                        return true;
693                default:
694                        return false;
695                }
696        }
697        return false;
698}
699
700bool
701v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
702                   const struct v3d_qpu_instr *inst)
703{
704        return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
705                ((inst->alu.add.magic_write &&
706                  v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
707                 (inst->alu.mul.magic_write &&
708                  v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
709}
710
711bool
712v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
713                            const struct v3d_qpu_instr *inst)
714{
715        return v3d_qpu_writes_tmu(devinfo, inst) &&
716               (!inst->alu.add.magic_write ||
717                inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
718               (!inst->alu.mul.magic_write ||
719                inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
720}
721
722bool
723v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
724{
725        if (inst->sig.ldvpm)
726                return true;
727
728        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
729                if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
730                        return true;
731        }
732
733        return false;
734}
735
736bool
737v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
738{
739        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
740                if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
741                        return true;
742
743                if (inst->alu.add.magic_write &&
744                    v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
745                        return true;
746                }
747
748                if (inst->alu.mul.magic_write &&
749                    v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
750                        return true;
751                }
752        }
753
754        return false;
755}
756
757bool
758v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
759                     const struct v3d_qpu_instr *inst)
760{
761        if (devinfo->ver < 40)
762                return false;
763
764        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
765                if (inst->alu.add.op != V3D_QPU_A_NOP &&
766                    inst->alu.add.magic_write &&
767                    inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
768                        return true;
769                }
770
771                if (inst->alu.mul.op != V3D_QPU_M_NOP &&
772                    inst->alu.mul.magic_write &&
773                    inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
774                        return true;
775                }
776        }
777
778        return false;
779}
780
781bool
782v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
783{
784        return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
785               inst->alu.add.op == V3D_QPU_A_VPMWT;
786}
787
788bool
789v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
790{
791        return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
792}
793
794bool
795v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
796{
797        return v3d_qpu_reads_vpm(inst) ||
798               v3d_qpu_writes_vpm(inst) ||
799               v3d_qpu_waits_vpm(inst);
800}
801
802static bool
803qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
804                                  const struct v3d_qpu_instr *inst,
805                                  uint32_t waddr)
806{
807        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
808                if (inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
809                        return true;
810
811                if (inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
812                        return true;
813        }
814
815        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
816            inst->sig_magic && inst->sig_addr == waddr) {
817                return true;
818        }
819
820        return false;
821}
822
823bool
824v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
825                  const struct v3d_qpu_instr *inst)
826{
827        if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
828                return true;
829
830        return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
831}
832
833bool
834v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
835                  const struct v3d_qpu_instr *inst)
836{
837        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
838                if (inst->alu.add.magic_write &&
839                    (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
840                     v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
841                        return true;
842                }
843
844                if (inst->alu.mul.magic_write &&
845                    (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
846                     v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
847                        return true;
848                }
849        }
850
851        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
852                if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
853                        return true;
854        } else if (inst->sig.ldtmu) {
855                return true;
856        }
857
858        return false;
859}
860
861bool
862v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
863                  const struct v3d_qpu_instr *inst)
864{
865        if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
866                return true;
867
868        return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
869}
870
871bool
872v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
873                     const struct v3d_qpu_instr *inst)
874{
875        if (v3d_qpu_writes_r5(devinfo, inst))
876                return true;
877        if (v3d_qpu_writes_r4(devinfo, inst))
878                return true;
879        if (v3d_qpu_writes_r3(devinfo, inst))
880                return true;
881        if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
882                return true;
883        if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
884                return true;
885        if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
886                return true;
887
888        return false;
889}
890
891bool
892v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
893{
894        int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
895        int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
896
897        return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
898                (add_nsrc > 1 && inst->alu.add.b == mux) ||
899                (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
900                (mul_nsrc > 1 && inst->alu.mul.b == mux));
901}
902
903bool
904v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
905                           const struct v3d_qpu_sig *sig)
906{
907        if (devinfo->ver < 41)
908                return false;
909
910        return (sig->ldunifrf ||
911                sig->ldunifarf ||
912                sig->ldvary ||
913                sig->ldtmu ||
914                sig->ldtlb ||
915                sig->ldtlbu);
916}
917
918bool
919v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
920{
921        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
922                return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
923        } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
924                if (inst->flags.ac != V3D_QPU_COND_NONE ||
925                    inst->flags.mc != V3D_QPU_COND_NONE ||
926                    inst->flags.auf != V3D_QPU_UF_NONE ||
927                    inst->flags.muf != V3D_QPU_UF_NONE)
928                        return true;
929
930                switch (inst->alu.add.op) {
931                case V3D_QPU_A_VFLA:
932                case V3D_QPU_A_VFLNA:
933                case V3D_QPU_A_VFLB:
934                case V3D_QPU_A_VFLNB:
935                case V3D_QPU_A_FLAPUSH:
936                case V3D_QPU_A_FLBPUSH:
937                case V3D_QPU_A_FLAFIRST:
938                case V3D_QPU_A_FLNAFIRST:
939                        return true;
940                default:
941                        break;
942                }
943        }
944
945        return false;
946}
947
948bool
949v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
950{
951        if (inst->flags.apf != V3D_QPU_PF_NONE ||
952            inst->flags.mpf != V3D_QPU_PF_NONE ||
953            inst->flags.auf != V3D_QPU_UF_NONE ||
954            inst->flags.muf != V3D_QPU_UF_NONE) {
955                return true;
956        }
957
958        return false;
959}
960
961bool
962v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
963{
964        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
965                return false;
966
967        switch (inst->alu.add.op) {
968        case V3D_QPU_A_FADD:
969        case V3D_QPU_A_FADDNF:
970        case V3D_QPU_A_FSUB:
971        case V3D_QPU_A_FMIN:
972        case V3D_QPU_A_FMAX:
973        case V3D_QPU_A_FCMP:
974        case V3D_QPU_A_FROUND:
975        case V3D_QPU_A_FTRUNC:
976        case V3D_QPU_A_FFLOOR:
977        case V3D_QPU_A_FCEIL:
978        case V3D_QPU_A_FDX:
979        case V3D_QPU_A_FDY:
980        case V3D_QPU_A_FTOIN:
981        case V3D_QPU_A_FTOIZ:
982        case V3D_QPU_A_FTOUZ:
983        case V3D_QPU_A_FTOC:
984        case V3D_QPU_A_VFPACK:
985                return true;
986                break;
987        default:
988                break;
989        }
990
991        switch (inst->alu.mul.op) {
992        case V3D_QPU_M_FMOV:
993        case V3D_QPU_M_FMUL:
994                return true;
995                break;
996        default:
997                break;
998        }
999
1000        return false;
1001}
1002bool
1003v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1004{
1005        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1006                return false;
1007
1008        switch (inst->alu.add.op) {
1009        case V3D_QPU_A_VFMIN:
1010        case V3D_QPU_A_VFMAX:
1011                return true;
1012                break;
1013        default:
1014                break;
1015        }
1016
1017        switch (inst->alu.mul.op) {
1018        case V3D_QPU_M_VFMUL:
1019                return true;
1020                break;
1021        default:
1022                break;
1023        }
1024
1025        return false;
1026}
1027
1028bool
1029v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1030{
1031        static const struct v3d_qpu_sig nosig = { 0 };
1032
1033        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1034                return false;
1035        if (inst->alu.add.op != V3D_QPU_A_NOP)
1036                return false;
1037        if (inst->alu.mul.op != V3D_QPU_M_NOP)
1038                return false;
1039        if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1040                return false;
1041        return true;
1042}
1043