1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * @file qpu_instr.h
26 *
27 * Definitions of the unpacked form of QPU instructions.  Assembly and
28 * disassembly will use this for talking about instructions, with qpu_encode.c
29 * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU
30 * instruction.
31 */
32
33#ifndef QPU_INSTR_H
34#define QPU_INSTR_H
35
36#include <stdbool.h>
37#include <stdint.h>
38#include "util/macros.h"
39
40struct v3d_device_info;
41
42struct v3d_qpu_sig {
43        bool thrsw:1;
44        bool ldunif:1;
45        bool ldunifa:1;
46        bool ldunifrf:1;
47        bool ldunifarf:1;
48        bool ldtmu:1;
49        bool ldvary:1;
50        bool ldvpm:1;
51        bool ldtlb:1;
52        bool ldtlbu:1;
53        bool small_imm:1;
54        bool ucb:1;
55        bool rotate:1;
56        bool wrtmuc:1;
57};
58
59enum v3d_qpu_cond {
60        V3D_QPU_COND_NONE,
61        V3D_QPU_COND_IFA,
62        V3D_QPU_COND_IFB,
63        V3D_QPU_COND_IFNA,
64        V3D_QPU_COND_IFNB,
65};
66
67enum v3d_qpu_pf {
68        V3D_QPU_PF_NONE,
69        V3D_QPU_PF_PUSHZ,
70        V3D_QPU_PF_PUSHN,
71        V3D_QPU_PF_PUSHC,
72};
73
74enum v3d_qpu_uf {
75        V3D_QPU_UF_NONE,
76        V3D_QPU_UF_ANDZ,
77        V3D_QPU_UF_ANDNZ,
78        V3D_QPU_UF_NORNZ,
79        V3D_QPU_UF_NORZ,
80        V3D_QPU_UF_ANDN,
81        V3D_QPU_UF_ANDNN,
82        V3D_QPU_UF_NORNN,
83        V3D_QPU_UF_NORN,
84        V3D_QPU_UF_ANDC,
85        V3D_QPU_UF_ANDNC,
86        V3D_QPU_UF_NORNC,
87        V3D_QPU_UF_NORC,
88};
89
90enum v3d_qpu_waddr {
91        V3D_QPU_WADDR_R0 = 0,
92        V3D_QPU_WADDR_R1 = 1,
93        V3D_QPU_WADDR_R2 = 2,
94        V3D_QPU_WADDR_R3 = 3,
95        V3D_QPU_WADDR_R4 = 4,
96        V3D_QPU_WADDR_R5 = 5,
97        V3D_QPU_WADDR_NOP = 6,
98        V3D_QPU_WADDR_TLB = 7,
99        V3D_QPU_WADDR_TLBU = 8,
100        V3D_QPU_WADDR_TMU = 9,   /* V3D 3.x */
101        V3D_QPU_WADDR_UNIFA = 9, /* V3D 4.x */
102        V3D_QPU_WADDR_TMUL = 10,
103        V3D_QPU_WADDR_TMUD = 11,
104        V3D_QPU_WADDR_TMUA = 12,
105        V3D_QPU_WADDR_TMUAU = 13,
106        V3D_QPU_WADDR_VPM = 14,
107        V3D_QPU_WADDR_VPMU = 15,
108        V3D_QPU_WADDR_SYNC = 16,
109        V3D_QPU_WADDR_SYNCU = 17,
110        V3D_QPU_WADDR_SYNCB = 18,
111        V3D_QPU_WADDR_RECIP = 19,
112        V3D_QPU_WADDR_RSQRT = 20,
113        V3D_QPU_WADDR_EXP = 21,
114        V3D_QPU_WADDR_LOG = 22,
115        V3D_QPU_WADDR_SIN = 23,
116        V3D_QPU_WADDR_RSQRT2 = 24,
117        V3D_QPU_WADDR_TMUC = 32,
118        V3D_QPU_WADDR_TMUS = 33,
119        V3D_QPU_WADDR_TMUT = 34,
120        V3D_QPU_WADDR_TMUR = 35,
121        V3D_QPU_WADDR_TMUI = 36,
122        V3D_QPU_WADDR_TMUB = 37,
123        V3D_QPU_WADDR_TMUDREF = 38,
124        V3D_QPU_WADDR_TMUOFF = 39,
125        V3D_QPU_WADDR_TMUSCM = 40,
126        V3D_QPU_WADDR_TMUSF = 41,
127        V3D_QPU_WADDR_TMUSLOD = 42,
128        V3D_QPU_WADDR_TMUHS = 43,
129        V3D_QPU_WADDR_TMUHSCM = 44,
130        V3D_QPU_WADDR_TMUHSF = 45,
131        V3D_QPU_WADDR_TMUHSLOD = 46,
132        V3D_QPU_WADDR_R5REP = 55,
133};
134
135struct v3d_qpu_flags {
136        enum v3d_qpu_cond ac, mc;
137        enum v3d_qpu_pf apf, mpf;
138        enum v3d_qpu_uf auf, muf;
139};
140
141enum v3d_qpu_add_op {
142        V3D_QPU_A_FADD,
143        V3D_QPU_A_FADDNF,
144        V3D_QPU_A_VFPACK,
145        V3D_QPU_A_ADD,
146        V3D_QPU_A_SUB,
147        V3D_QPU_A_FSUB,
148        V3D_QPU_A_MIN,
149        V3D_QPU_A_MAX,
150        V3D_QPU_A_UMIN,
151        V3D_QPU_A_UMAX,
152        V3D_QPU_A_SHL,
153        V3D_QPU_A_SHR,
154        V3D_QPU_A_ASR,
155        V3D_QPU_A_ROR,
156        V3D_QPU_A_FMIN,
157        V3D_QPU_A_FMAX,
158        V3D_QPU_A_VFMIN,
159        V3D_QPU_A_AND,
160        V3D_QPU_A_OR,
161        V3D_QPU_A_XOR,
162        V3D_QPU_A_VADD,
163        V3D_QPU_A_VSUB,
164        V3D_QPU_A_NOT,
165        V3D_QPU_A_NEG,
166        V3D_QPU_A_FLAPUSH,
167        V3D_QPU_A_FLBPUSH,
168        V3D_QPU_A_FLPOP,
169        V3D_QPU_A_RECIP,
170        V3D_QPU_A_SETMSF,
171        V3D_QPU_A_SETREVF,
172        V3D_QPU_A_NOP,
173        V3D_QPU_A_TIDX,
174        V3D_QPU_A_EIDX,
175        V3D_QPU_A_LR,
176        V3D_QPU_A_VFLA,
177        V3D_QPU_A_VFLNA,
178        V3D_QPU_A_VFLB,
179        V3D_QPU_A_VFLNB,
180        V3D_QPU_A_FXCD,
181        V3D_QPU_A_XCD,
182        V3D_QPU_A_FYCD,
183        V3D_QPU_A_YCD,
184        V3D_QPU_A_MSF,
185        V3D_QPU_A_REVF,
186        V3D_QPU_A_VDWWT,
187        V3D_QPU_A_IID,
188        V3D_QPU_A_SAMPID,
189        V3D_QPU_A_BARRIERID,
190        V3D_QPU_A_TMUWT,
191        V3D_QPU_A_VPMSETUP,
192        V3D_QPU_A_VPMWT,
193        V3D_QPU_A_FLAFIRST,
194        V3D_QPU_A_FLNAFIRST,
195        V3D_QPU_A_LDVPMV_IN,
196        V3D_QPU_A_LDVPMV_OUT,
197        V3D_QPU_A_LDVPMD_IN,
198        V3D_QPU_A_LDVPMD_OUT,
199        V3D_QPU_A_LDVPMP,
200        V3D_QPU_A_RSQRT,
201        V3D_QPU_A_EXP,
202        V3D_QPU_A_LOG,
203        V3D_QPU_A_SIN,
204        V3D_QPU_A_RSQRT2,
205        V3D_QPU_A_LDVPMG_IN,
206        V3D_QPU_A_LDVPMG_OUT,
207        V3D_QPU_A_FCMP,
208        V3D_QPU_A_VFMAX,
209        V3D_QPU_A_FROUND,
210        V3D_QPU_A_FTOIN,
211        V3D_QPU_A_FTRUNC,
212        V3D_QPU_A_FTOIZ,
213        V3D_QPU_A_FFLOOR,
214        V3D_QPU_A_FTOUZ,
215        V3D_QPU_A_FCEIL,
216        V3D_QPU_A_FTOC,
217        V3D_QPU_A_FDX,
218        V3D_QPU_A_FDY,
219        V3D_QPU_A_STVPMV,
220        V3D_QPU_A_STVPMD,
221        V3D_QPU_A_STVPMP,
222        V3D_QPU_A_ITOF,
223        V3D_QPU_A_CLZ,
224        V3D_QPU_A_UTOF,
225};
226
227enum v3d_qpu_mul_op {
228        V3D_QPU_M_ADD,
229        V3D_QPU_M_SUB,
230        V3D_QPU_M_UMUL24,
231        V3D_QPU_M_VFMUL,
232        V3D_QPU_M_SMUL24,
233        V3D_QPU_M_MULTOP,
234        V3D_QPU_M_FMOV,
235        V3D_QPU_M_MOV,
236        V3D_QPU_M_NOP,
237        V3D_QPU_M_FMUL,
238};
239
240enum v3d_qpu_output_pack {
241        V3D_QPU_PACK_NONE,
242        /**
243         * Convert to 16-bit float, put in low 16 bits of destination leaving
244         * high unmodified.
245         */
246        V3D_QPU_PACK_L,
247        /**
248         * Convert to 16-bit float, put in high 16 bits of destination leaving
249         * low unmodified.
250         */
251        V3D_QPU_PACK_H,
252};
253
254enum v3d_qpu_input_unpack {
255        /**
256         * No-op input unpacking.  Note that this enum's value doesn't match
257         * the packed QPU instruction value of the field (we use 0 so that the
258         * default on new instruction creation is no-op).
259         */
260        V3D_QPU_UNPACK_NONE,
261        /** Absolute value.  Only available for some operations. */
262        V3D_QPU_UNPACK_ABS,
263        /** Convert low 16 bits from 16-bit float to 32-bit float. */
264        V3D_QPU_UNPACK_L,
265        /** Convert high 16 bits from 16-bit float to 32-bit float. */
266        V3D_QPU_UNPACK_H,
267
268        /** Convert to 16f and replicate it to the high bits. */
269        V3D_QPU_UNPACK_REPLICATE_32F_16,
270
271        /** Replicate low 16 bits to high */
272        V3D_QPU_UNPACK_REPLICATE_L_16,
273
274        /** Replicate high 16 bits to low */
275        V3D_QPU_UNPACK_REPLICATE_H_16,
276
277        /** Swap high and low 16 bits */
278        V3D_QPU_UNPACK_SWAP_16,
279};
280
281enum v3d_qpu_mux {
282        V3D_QPU_MUX_R0,
283        V3D_QPU_MUX_R1,
284        V3D_QPU_MUX_R2,
285        V3D_QPU_MUX_R3,
286        V3D_QPU_MUX_R4,
287        V3D_QPU_MUX_R5,
288        V3D_QPU_MUX_A,
289        V3D_QPU_MUX_B,
290};
291
292struct v3d_qpu_alu_instr {
293        struct {
294                enum v3d_qpu_add_op op;
295                enum v3d_qpu_mux a, b;
296                uint8_t waddr;
297                bool magic_write;
298                enum v3d_qpu_output_pack output_pack;
299                enum v3d_qpu_input_unpack a_unpack;
300                enum v3d_qpu_input_unpack b_unpack;
301        } add;
302
303        struct {
304                enum v3d_qpu_mul_op op;
305                enum v3d_qpu_mux a, b;
306                uint8_t waddr;
307                bool magic_write;
308                enum v3d_qpu_output_pack output_pack;
309                enum v3d_qpu_input_unpack a_unpack;
310                enum v3d_qpu_input_unpack b_unpack;
311        } mul;
312};
313
314enum v3d_qpu_branch_cond {
315        V3D_QPU_BRANCH_COND_ALWAYS,
316        V3D_QPU_BRANCH_COND_A0,
317        V3D_QPU_BRANCH_COND_NA0,
318        V3D_QPU_BRANCH_COND_ALLA,
319        V3D_QPU_BRANCH_COND_ANYNA,
320        V3D_QPU_BRANCH_COND_ANYA,
321        V3D_QPU_BRANCH_COND_ALLNA,
322};
323
324enum v3d_qpu_msfign {
325        /** Ignore multisample flags when determining branch condition. */
326        V3D_QPU_MSFIGN_NONE,
327        /**
328         * If no multisample flags are set in the lane (a pixel in the FS, a
329         * vertex in the VS), ignore the lane's condition when computing the
330         * branch condition.
331         */
332        V3D_QPU_MSFIGN_P,
333        /**
334         * If no multisample flags are set in a 2x2 quad in the FS, ignore the
335         * quad's a/b conditions.
336         */
337        V3D_QPU_MSFIGN_Q,
338};
339
340enum v3d_qpu_branch_dest {
341        V3D_QPU_BRANCH_DEST_ABS,
342        V3D_QPU_BRANCH_DEST_REL,
343        V3D_QPU_BRANCH_DEST_LINK_REG,
344        V3D_QPU_BRANCH_DEST_REGFILE,
345};
346
347struct v3d_qpu_branch_instr {
348        enum v3d_qpu_branch_cond cond;
349        enum v3d_qpu_msfign msfign;
350
351        /** Selects how to compute the new IP if the branch is taken. */
352        enum v3d_qpu_branch_dest bdi;
353
354        /**
355         * Selects how to compute the new uniforms pointer if the branch is
356         * taken.  (ABS/REL implicitly load a uniform and use that)
357         */
358        enum v3d_qpu_branch_dest bdu;
359
360        /**
361         * If set, then udest determines how the uniform stream will branch,
362         * otherwise the uniform stream is left as is.
363         */
364        bool ub;
365
366        uint8_t raddr_a;
367
368        uint32_t offset;
369};
370
371enum v3d_qpu_instr_type {
372        V3D_QPU_INSTR_TYPE_ALU,
373        V3D_QPU_INSTR_TYPE_BRANCH,
374};
375
376struct v3d_qpu_instr {
377        enum v3d_qpu_instr_type type;
378
379        struct v3d_qpu_sig sig;
380        uint8_t sig_addr;
381        bool sig_magic; /* If the signal writes to a magic address */
382        uint8_t raddr_a;
383        uint8_t raddr_b;
384        struct v3d_qpu_flags flags;
385
386        union {
387                struct v3d_qpu_alu_instr alu;
388                struct v3d_qpu_branch_instr branch;
389        };
390};
391
392const char *v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
393                                     enum v3d_qpu_waddr waddr);
394const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op);
395const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op);
396const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond);
397const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf);
398const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf);
399const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack);
400const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack);
401const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
402const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
403
404enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST;
405
406bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
407bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
408int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);
409int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op);
410
411bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
412                      const struct v3d_qpu_sig *sig,
413                      uint32_t *packed_sig);
414bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
415                        uint32_t packed_sig,
416                        struct v3d_qpu_sig *sig);
417
418bool
419v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
420                   const struct v3d_qpu_flags *cond,
421                   uint32_t *packed_cond);
422bool
423v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
424                     uint32_t packed_cond,
425                     struct v3d_qpu_flags *cond);
426
427bool
428v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
429                       uint32_t value,
430                       uint32_t *packed_small_immediate);
431
432bool
433v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
434                         uint32_t packed_small_immediate,
435                         uint32_t *small_immediate);
436
437bool
438v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
439                   const struct v3d_qpu_instr *instr,
440                   uint64_t *packed_instr);
441bool
442v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
443                     uint64_t packed_instr,
444                     struct v3d_qpu_instr *instr);
445
446bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
447bool v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
448                                enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
449bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
450bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
451bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
452bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
453bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
454bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
455bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
456bool v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
457                        const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
458bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
459                                 const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
460bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
461                       const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
462bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
463                       const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
464bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
465                       const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
466bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
467                          const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
468bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
469bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
470bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
471bool v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
472bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
473bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
474bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
475bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
476bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
477bool v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
478                          const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
479bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
480                                const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
481bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
482bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
483
484bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
485#endif
486