xref: /third_party/mesa3d/src/broadcom/qpu/qpu_pack.c (revision bf215546)
1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <string.h>
25#include "util/macros.h"
26#include "util/bitscan.h"
27
28#include "broadcom/common/v3d_device_info.h"
29#include "qpu_instr.h"
30
31#ifndef QPU_MASK
32#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33/* Using the GNU statement expression extension */
34#define QPU_SET_FIELD(value, field)                                       \
35        ({                                                                \
36                uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                assert((fieldval & ~ field ## _MASK) == 0);               \
38                fieldval & field ## _MASK;                                \
39         })
40
41#define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42
43#define QPU_UPDATE_FIELD(inst, value, field)                              \
44        (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45#endif /* QPU_MASK */
46
47#define V3D_QPU_OP_MUL_SHIFT                58
48#define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49
50#define V3D_QPU_SIG_SHIFT                   53
51#define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52
53#define V3D_QPU_COND_SHIFT                  46
54#define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55#define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56
57#define V3D_QPU_MM                          QPU_MASK(45, 45)
58#define V3D_QPU_MA                          QPU_MASK(44, 44)
59
60#define V3D_QPU_WADDR_M_SHIFT               38
61#define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62
63#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64#define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65
66#define V3D_QPU_WADDR_A_SHIFT               32
67#define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68
69#define V3D_QPU_BRANCH_COND_SHIFT           32
70#define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71
72#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73#define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74
75#define V3D_QPU_OP_ADD_SHIFT                24
76#define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77
78#define V3D_QPU_MUL_B_SHIFT                 21
79#define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80
81#define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82#define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83
84#define V3D_QPU_MUL_A_SHIFT                 18
85#define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86
87#define V3D_QPU_ADD_B_SHIFT                 15
88#define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
89
90#define V3D_QPU_BRANCH_BDU_SHIFT            15
91#define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
92
93#define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
94
95#define V3D_QPU_ADD_A_SHIFT                 12
96#define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
97
98#define V3D_QPU_BRANCH_BDI_SHIFT            12
99#define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
100
101#define V3D_QPU_RADDR_A_SHIFT               6
102#define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
103
104#define V3D_QPU_RADDR_B_SHIFT               0
105#define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
106
107#define THRSW .thrsw = true
108#define LDUNIF .ldunif = true
109#define LDUNIFRF .ldunifrf = true
110#define LDUNIFA .ldunifa = true
111#define LDUNIFARF .ldunifarf = true
112#define LDTMU .ldtmu = true
113#define LDVARY .ldvary = true
114#define LDVPM .ldvpm = true
115#define SMIMM .small_imm = true
116#define LDTLB .ldtlb = true
117#define LDTLBU .ldtlbu = true
118#define UCB .ucb = true
119#define ROT .rotate = true
120#define WRTMUC .wrtmuc = true
121
122static const struct v3d_qpu_sig v33_sig_map[] = {
123        /*      MISC   R3       R4      R5 */
124        [0]  = {                               },
125        [1]  = { THRSW,                        },
126        [2]  = {                        LDUNIF },
127        [3]  = { THRSW,                 LDUNIF },
128        [4]  = {                LDTMU,         },
129        [5]  = { THRSW,         LDTMU,         },
130        [6]  = {                LDTMU,  LDUNIF },
131        [7]  = { THRSW,         LDTMU,  LDUNIF },
132        [8]  = {        LDVARY,                },
133        [9]  = { THRSW, LDVARY,                },
134        [10] = {        LDVARY,         LDUNIF },
135        [11] = { THRSW, LDVARY,         LDUNIF },
136        [12] = {        LDVARY, LDTMU,         },
137        [13] = { THRSW, LDVARY, LDTMU,         },
138        [14] = { SMIMM, LDVARY,                },
139        [15] = { SMIMM,                        },
140        [16] = {        LDTLB,                 },
141        [17] = {        LDTLBU,                },
142        /* 18-21 reserved */
143        [22] = { UCB,                          },
144        [23] = { ROT,                          },
145        [24] = {        LDVPM,                 },
146        [25] = { THRSW, LDVPM,                 },
147        [26] = {        LDVPM,          LDUNIF },
148        [27] = { THRSW, LDVPM,          LDUNIF },
149        [28] = {        LDVPM, LDTMU,          },
150        [29] = { THRSW, LDVPM, LDTMU,          },
151        [30] = { SMIMM, LDVPM,                 },
152        [31] = { SMIMM,                        },
153};
154
155static const struct v3d_qpu_sig v40_sig_map[] = {
156        /*      MISC    R3      R4      R5 */
157        [0]  = {                               },
158        [1]  = { THRSW,                        },
159        [2]  = {                        LDUNIF },
160        [3]  = { THRSW,                 LDUNIF },
161        [4]  = {                LDTMU,         },
162        [5]  = { THRSW,         LDTMU,         },
163        [6]  = {                LDTMU,  LDUNIF },
164        [7]  = { THRSW,         LDTMU,  LDUNIF },
165        [8]  = {        LDVARY,                },
166        [9]  = { THRSW, LDVARY,                },
167        [10] = {        LDVARY,         LDUNIF },
168        [11] = { THRSW, LDVARY,         LDUNIF },
169        /* 12-13 reserved */
170        [14] = { SMIMM, LDVARY,                },
171        [15] = { SMIMM,                        },
172        [16] = {        LDTLB,                 },
173        [17] = {        LDTLBU,                },
174        [18] = {                        WRTMUC },
175        [19] = { THRSW,                 WRTMUC },
176        [20] = {        LDVARY,         WRTMUC },
177        [21] = { THRSW, LDVARY,         WRTMUC },
178        [22] = { UCB,                          },
179        [23] = { ROT,                          },
180        /* 24-30 reserved */
181        [31] = { SMIMM,         LDTMU,         },
182};
183
184static const struct v3d_qpu_sig v41_sig_map[] = {
185        /*      MISC       phys    R5 */
186        [0]  = {                          },
187        [1]  = { THRSW,                   },
188        [2]  = {                   LDUNIF },
189        [3]  = { THRSW,            LDUNIF },
190        [4]  = {           LDTMU,         },
191        [5]  = { THRSW,    LDTMU,         },
192        [6]  = {           LDTMU,  LDUNIF },
193        [7]  = { THRSW,    LDTMU,  LDUNIF },
194        [8]  = {           LDVARY,        },
195        [9]  = { THRSW,    LDVARY,        },
196        [10] = {           LDVARY, LDUNIF },
197        [11] = { THRSW,    LDVARY, LDUNIF },
198        [12] = { LDUNIFRF                 },
199        [13] = { THRSW,    LDUNIFRF       },
200        [14] = { SMIMM,    LDVARY,        },
201        [15] = { SMIMM,                   },
202        [16] = {           LDTLB,         },
203        [17] = {           LDTLBU,        },
204        [18] = {                          WRTMUC },
205        [19] = { THRSW,                   WRTMUC },
206        [20] = {           LDVARY,        WRTMUC },
207        [21] = { THRSW,    LDVARY,        WRTMUC },
208        [22] = { UCB,                     },
209        [23] = { ROT,                     },
210        [24] = {                   LDUNIFA},
211        [25] = { LDUNIFARF                },
212        /* 26-30 reserved */
213        [31] = { SMIMM,            LDTMU, },
214};
215
216bool
217v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218                   uint32_t packed_sig,
219                   struct v3d_qpu_sig *sig)
220{
221        if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222                return false;
223
224        if (devinfo->ver >= 41)
225                *sig = v41_sig_map[packed_sig];
226        else if (devinfo->ver == 40)
227                *sig = v40_sig_map[packed_sig];
228        else
229                *sig = v33_sig_map[packed_sig];
230
231        /* Signals with zeroed unpacked contents after element 0 are reserved. */
232        return (packed_sig == 0 ||
233                memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234}
235
236bool
237v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238                 const struct v3d_qpu_sig *sig,
239                 uint32_t *packed_sig)
240{
241        static const struct v3d_qpu_sig *map;
242
243        if (devinfo->ver >= 41)
244                map = v41_sig_map;
245        else if (devinfo->ver == 40)
246                map = v40_sig_map;
247        else
248                map = v33_sig_map;
249
250        for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251                if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252                        *packed_sig = i;
253                        return true;
254                }
255        }
256
257        return false;
258}
259
260static const uint32_t small_immediates[] = {
261        0, 1, 2, 3,
262        4, 5, 6, 7,
263        8, 9, 10, 11,
264        12, 13, 14, 15,
265        -16, -15, -14, -13,
266        -12, -11, -10, -9,
267        -8, -7, -6, -5,
268        -4, -3, -2, -1,
269        0x3b800000, /* 2.0^-8 */
270        0x3c000000, /* 2.0^-7 */
271        0x3c800000, /* 2.0^-6 */
272        0x3d000000, /* 2.0^-5 */
273        0x3d800000, /* 2.0^-4 */
274        0x3e000000, /* 2.0^-3 */
275        0x3e800000, /* 2.0^-2 */
276        0x3f000000, /* 2.0^-1 */
277        0x3f800000, /* 2.0^0 */
278        0x40000000, /* 2.0^1 */
279        0x40800000, /* 2.0^2 */
280        0x41000000, /* 2.0^3 */
281        0x41800000, /* 2.0^4 */
282        0x42000000, /* 2.0^5 */
283        0x42800000, /* 2.0^6 */
284        0x43000000, /* 2.0^7 */
285};
286
287bool
288v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
289                         uint32_t packed_small_immediate,
290                         uint32_t *small_immediate)
291{
292        if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
293                return false;
294
295        *small_immediate = small_immediates[packed_small_immediate];
296        return true;
297}
298
299bool
300v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
301                       uint32_t value,
302                       uint32_t *packed_small_immediate)
303{
304        STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
305
306        for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
307                if (small_immediates[i] == value) {
308                        *packed_small_immediate = i;
309                        return true;
310                }
311        }
312
313        return false;
314}
315
316bool
317v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
318                     uint32_t packed_cond,
319                     struct v3d_qpu_flags *cond)
320{
321        static const enum v3d_qpu_cond cond_map[4] = {
322                [0] = V3D_QPU_COND_IFA,
323                [1] = V3D_QPU_COND_IFB,
324                [2] = V3D_QPU_COND_IFNA,
325                [3] = V3D_QPU_COND_IFNB,
326        };
327
328        cond->ac = V3D_QPU_COND_NONE;
329        cond->mc = V3D_QPU_COND_NONE;
330        cond->apf = V3D_QPU_PF_NONE;
331        cond->mpf = V3D_QPU_PF_NONE;
332        cond->auf = V3D_QPU_UF_NONE;
333        cond->muf = V3D_QPU_UF_NONE;
334
335        if (packed_cond == 0) {
336                return true;
337        } else if (packed_cond >> 2 == 0) {
338                cond->apf = packed_cond & 0x3;
339        } else if (packed_cond >> 4 == 0) {
340                cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
341        } else if (packed_cond == 0x10) {
342                return false;
343        } else if (packed_cond >> 2 == 0x4) {
344                cond->mpf = packed_cond & 0x3;
345        } else if (packed_cond >> 4 == 0x1) {
346                cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347        } else if (packed_cond >> 4 == 0x2) {
348                cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
349                cond->mpf = packed_cond & 0x3;
350        } else if (packed_cond >> 4 == 0x3) {
351                cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
352                cond->apf = packed_cond & 0x3;
353        } else if (packed_cond >> 6) {
354                cond->mc = cond_map[(packed_cond >> 4) & 0x3];
355                if (((packed_cond >> 2) & 0x3) == 0) {
356                        cond->ac = cond_map[packed_cond & 0x3];
357                } else {
358                        cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
359                }
360        }
361
362        return true;
363}
364
365bool
366v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
367                   const struct v3d_qpu_flags *cond,
368                   uint32_t *packed_cond)
369{
370#define AC (1 << 0)
371#define MC (1 << 1)
372#define APF (1 << 2)
373#define MPF (1 << 3)
374#define AUF (1 << 4)
375#define MUF (1 << 5)
376        static const struct {
377                uint8_t flags_present;
378                uint8_t bits;
379        } flags_table[] = {
380                { 0,        0 },
381                { APF,      0 },
382                { AUF,      0 },
383                { MPF,      (1 << 4) },
384                { MUF,      (1 << 4) },
385                { AC,       (1 << 5) },
386                { AC | MPF, (1 << 5) },
387                { MC,       (1 << 5) | (1 << 4) },
388                { MC | APF, (1 << 5) | (1 << 4) },
389                { MC | AC,  (1 << 6) },
390                { MC | AUF, (1 << 6) },
391        };
392
393        uint8_t flags_present = 0;
394        if (cond->ac != V3D_QPU_COND_NONE)
395                flags_present |= AC;
396        if (cond->mc != V3D_QPU_COND_NONE)
397                flags_present |= MC;
398        if (cond->apf != V3D_QPU_PF_NONE)
399                flags_present |= APF;
400        if (cond->mpf != V3D_QPU_PF_NONE)
401                flags_present |= MPF;
402        if (cond->auf != V3D_QPU_UF_NONE)
403                flags_present |= AUF;
404        if (cond->muf != V3D_QPU_UF_NONE)
405                flags_present |= MUF;
406
407        for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
408                if (flags_table[i].flags_present != flags_present)
409                        continue;
410
411                *packed_cond = flags_table[i].bits;
412
413                *packed_cond |= cond->apf;
414                *packed_cond |= cond->mpf;
415
416                if (flags_present & AUF)
417                        *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
418                if (flags_present & MUF)
419                        *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
420
421                if (flags_present & AC) {
422                        if (*packed_cond & (1 << 6))
423                                *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
424                        else
425                                *packed_cond |= (cond->ac -
426                                                 V3D_QPU_COND_IFA) << 2;
427                }
428
429                if (flags_present & MC) {
430                        if (*packed_cond & (1 << 6))
431                                *packed_cond |= (cond->mc -
432                                                 V3D_QPU_COND_IFA) << 4;
433                        else
434                                *packed_cond |= (cond->mc -
435                                                 V3D_QPU_COND_IFA) << 2;
436                }
437
438                return true;
439        }
440
441        return false;
442}
443
444/* Make a mapping of the table of opcodes in the spec.  The opcode is
445 * determined by a combination of the opcode field, and in the case of 0 or
446 * 1-arg opcodes, the mux_b field as well.
447 */
448#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
449#define ANYMUX MUX_MASK(0, 7)
450
451struct opcode_desc {
452        uint8_t opcode_first;
453        uint8_t opcode_last;
454        uint8_t mux_b_mask;
455        uint8_t mux_a_mask;
456        uint8_t op;
457
458        /* first_ver == 0 if it's the same across all V3D versions.
459         * first_ver == X, last_ver == 0 if it's the same for all V3D versions
460         *   starting from X
461         * first_ver == X, last_ver == Y if it's the same for all V3D versions
462         *   on the range X through Y
463         */
464        uint8_t first_ver;
465        uint8_t last_ver;
466};
467
468static const struct opcode_desc add_ops[] = {
469        /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
470        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
471        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
472        { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
473        { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
474        { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
475        { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
476        { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
477        { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
478        { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
479        { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
480        { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
481        { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
482        { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
483        { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
484        { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
485        { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
486        /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
487        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
488        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
489        { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
490
491        { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
492        { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
493        { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
494
495        { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
496        { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
497        { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
498        { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
499        { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
500        { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
501        { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
502        { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
503        { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
504        { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
505        { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
506        { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
507        { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
508        { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
509        { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
510        { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
511        { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
512        { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
513
514        { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
515        { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
516        { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
517        { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
518
519        { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
520        { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
521        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
522        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
523        { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
524        { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
525        { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
526        { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
527        { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
528        { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
529        { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
530
531        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
532        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
533        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
534        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
535        { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
536        { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
537        { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
538        { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
539        { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
540        { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
541        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
542        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
543
544        /* FIXME: MORE COMPLICATED */
545        /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
546
547        { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
548        { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
549
550        { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
551        { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
552        { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
553        { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
554        { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
555        { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
556        { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
557        { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
558
559        { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
560        { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
561
562        /* The stvpms are distinguished by the waddr field. */
563        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
564        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
565        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
566
567        { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
568        { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
569        { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
570};
571
572static const struct opcode_desc mul_ops[] = {
573        { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
574        { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
575        { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
576        { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
577        { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
578        { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
579        { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
580        { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
581        { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
582        { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
583        { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
584};
585
586/* Returns true if op_desc should be filtered out based on devinfo->ver
587 * against op_desc->first_ver and op_desc->last_ver. Check notes about
588 * first_ver/last_ver on struct opcode_desc comments.
589 */
590static bool
591opcode_invalid_in_version(const struct v3d_device_info *devinfo,
592                          const struct opcode_desc *op_desc)
593{
594        return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
595                (op_desc->last_ver != 0  && devinfo->ver > op_desc->last_ver);
596}
597
598static const struct opcode_desc *
599lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
600                          const struct opcode_desc *opcodes,
601                          size_t num_opcodes, uint32_t opcode,
602                          uint32_t mux_a, uint32_t mux_b)
603{
604        for (int i = 0; i < num_opcodes; i++) {
605                const struct opcode_desc *op_desc = &opcodes[i];
606
607                if (opcode < op_desc->opcode_first ||
608                    opcode > op_desc->opcode_last)
609                        continue;
610
611                if (opcode_invalid_in_version(devinfo, op_desc))
612                        continue;
613
614                if (!(op_desc->mux_b_mask & (1 << mux_b)))
615                        continue;
616
617                if (!(op_desc->mux_a_mask & (1 << mux_a)))
618                        continue;
619
620                return op_desc;
621        }
622
623        return NULL;
624}
625
626static bool
627v3d_qpu_float32_unpack_unpack(uint32_t packed,
628                              enum v3d_qpu_input_unpack *unpacked)
629{
630        switch (packed) {
631        case 0:
632                *unpacked = V3D_QPU_UNPACK_ABS;
633                return true;
634        case 1:
635                *unpacked = V3D_QPU_UNPACK_NONE;
636                return true;
637        case 2:
638                *unpacked = V3D_QPU_UNPACK_L;
639                return true;
640        case 3:
641                *unpacked = V3D_QPU_UNPACK_H;
642                return true;
643        default:
644                return false;
645        }
646}
647
648static bool
649v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
650                            uint32_t *packed)
651{
652        switch (unpacked) {
653        case V3D_QPU_UNPACK_ABS:
654                *packed = 0;
655                return true;
656        case V3D_QPU_UNPACK_NONE:
657                *packed = 1;
658                return true;
659        case V3D_QPU_UNPACK_L:
660                *packed = 2;
661                return true;
662        case V3D_QPU_UNPACK_H:
663                *packed = 3;
664                return true;
665        default:
666                return false;
667        }
668}
669
670static bool
671v3d_qpu_float16_unpack_unpack(uint32_t packed,
672                              enum v3d_qpu_input_unpack *unpacked)
673{
674        switch (packed) {
675        case 0:
676                *unpacked = V3D_QPU_UNPACK_NONE;
677                return true;
678        case 1:
679                *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
680                return true;
681        case 2:
682                *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
683                return true;
684        case 3:
685                *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
686                return true;
687        case 4:
688                *unpacked = V3D_QPU_UNPACK_SWAP_16;
689                return true;
690        default:
691                return false;
692        }
693}
694
695static bool
696v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
697                            uint32_t *packed)
698{
699        switch (unpacked) {
700        case V3D_QPU_UNPACK_NONE:
701                *packed = 0;
702                return true;
703        case V3D_QPU_UNPACK_REPLICATE_32F_16:
704                *packed = 1;
705                return true;
706        case V3D_QPU_UNPACK_REPLICATE_L_16:
707                *packed = 2;
708                return true;
709        case V3D_QPU_UNPACK_REPLICATE_H_16:
710                *packed = 3;
711                return true;
712        case V3D_QPU_UNPACK_SWAP_16:
713                *packed = 4;
714                return true;
715        default:
716                return false;
717        }
718}
719
720static bool
721v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
722                          uint32_t *packed)
723{
724        switch (unpacked) {
725        case V3D_QPU_PACK_NONE:
726                *packed = 0;
727                return true;
728        case V3D_QPU_PACK_L:
729                *packed = 1;
730                return true;
731        case V3D_QPU_PACK_H:
732                *packed = 2;
733                return true;
734        default:
735                return false;
736        }
737}
738
739static bool
740v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
741                   struct v3d_qpu_instr *instr)
742{
743        uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
744        uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
745        uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
746        uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
747
748        uint32_t map_op = op;
749        /* Some big clusters of opcodes are replicated with unpack
750         * flags
751         */
752        if (map_op >= 249 && map_op <= 251)
753                map_op = (map_op - 249 + 245);
754        if (map_op >= 253 && map_op <= 255)
755                map_op = (map_op - 253 + 245);
756
757        const struct opcode_desc *desc =
758                lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
759                                          map_op, mux_a, mux_b);
760
761        if (!desc)
762                return false;
763
764        instr->alu.add.op = desc->op;
765
766        /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
767         * operands.
768         */
769        if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
770                if (instr->alu.add.op == V3D_QPU_A_FMIN)
771                        instr->alu.add.op = V3D_QPU_A_FMAX;
772                if (instr->alu.add.op == V3D_QPU_A_FADD)
773                        instr->alu.add.op = V3D_QPU_A_FADDNF;
774        }
775
776        /* Some QPU ops require a bit more than just basic opcode and mux a/b
777         * comparisons to distinguish them.
778         */
779        switch (instr->alu.add.op) {
780        case V3D_QPU_A_STVPMV:
781        case V3D_QPU_A_STVPMD:
782        case V3D_QPU_A_STVPMP:
783                switch (waddr) {
784                case 0:
785                        instr->alu.add.op = V3D_QPU_A_STVPMV;
786                        break;
787                case 1:
788                        instr->alu.add.op = V3D_QPU_A_STVPMD;
789                        break;
790                case 2:
791                        instr->alu.add.op = V3D_QPU_A_STVPMP;
792                        break;
793                default:
794                        return false;
795                }
796                break;
797        default:
798                break;
799        }
800
801        switch (instr->alu.add.op) {
802        case V3D_QPU_A_FADD:
803        case V3D_QPU_A_FADDNF:
804        case V3D_QPU_A_FSUB:
805        case V3D_QPU_A_FMIN:
806        case V3D_QPU_A_FMAX:
807        case V3D_QPU_A_FCMP:
808        case V3D_QPU_A_VFPACK:
809                if (instr->alu.add.op != V3D_QPU_A_VFPACK)
810                        instr->alu.add.output_pack = (op >> 4) & 0x3;
811                else
812                        instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
813
814                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
815                                                   &instr->alu.add.a_unpack)) {
816                        return false;
817                }
818
819                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
820                                                   &instr->alu.add.b_unpack)) {
821                        return false;
822                }
823                break;
824
825        case V3D_QPU_A_FFLOOR:
826        case V3D_QPU_A_FROUND:
827        case V3D_QPU_A_FTRUNC:
828        case V3D_QPU_A_FCEIL:
829        case V3D_QPU_A_FDX:
830        case V3D_QPU_A_FDY:
831                instr->alu.add.output_pack = mux_b & 0x3;
832
833                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
834                                                   &instr->alu.add.a_unpack)) {
835                        return false;
836                }
837                break;
838
839        case V3D_QPU_A_FTOIN:
840        case V3D_QPU_A_FTOIZ:
841        case V3D_QPU_A_FTOUZ:
842        case V3D_QPU_A_FTOC:
843                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
844
845                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
846                                                   &instr->alu.add.a_unpack)) {
847                        return false;
848                }
849                break;
850
851        case V3D_QPU_A_VFMIN:
852        case V3D_QPU_A_VFMAX:
853                if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
854                                                   &instr->alu.add.a_unpack)) {
855                        return false;
856                }
857
858                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
859                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
860                break;
861
862        default:
863                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
864                instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
865                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
866                break;
867        }
868
869        instr->alu.add.a = mux_a;
870        instr->alu.add.b = mux_b;
871        instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
872
873        instr->alu.add.magic_write = false;
874        if (packed_inst & V3D_QPU_MA) {
875                switch (instr->alu.add.op) {
876                case V3D_QPU_A_LDVPMV_IN:
877                        instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
878                        break;
879                case V3D_QPU_A_LDVPMD_IN:
880                        instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
881                        break;
882                case V3D_QPU_A_LDVPMG_IN:
883                        instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
884                        break;
885                default:
886                        instr->alu.add.magic_write = true;
887                        break;
888                }
889        }
890
891        return true;
892}
893
894static bool
895v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
896                   struct v3d_qpu_instr *instr)
897{
898        uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
899        uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
900        uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
901
902        {
903                const struct opcode_desc *desc =
904                        lookup_opcode_from_packed(devinfo, mul_ops,
905                                                  ARRAY_SIZE(mul_ops),
906                                                  op, mux_a, mux_b);
907                if (!desc)
908                        return false;
909
910                instr->alu.mul.op = desc->op;
911        }
912
913        switch (instr->alu.mul.op) {
914        case V3D_QPU_M_FMUL:
915                instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
916
917                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
918                                                   &instr->alu.mul.a_unpack)) {
919                        return false;
920                }
921
922                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
923                                                   &instr->alu.mul.b_unpack)) {
924                        return false;
925                }
926
927                break;
928
929        case V3D_QPU_M_FMOV:
930                instr->alu.mul.output_pack = (((op & 1) << 1) +
931                                              ((mux_b >> 2) & 1));
932
933                if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
934                                                   &instr->alu.mul.a_unpack)) {
935                        return false;
936                }
937
938                break;
939
940        case V3D_QPU_M_VFMUL:
941                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
942
943                if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
944                                                   &instr->alu.mul.a_unpack)) {
945                        return false;
946                }
947
948                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
949
950                break;
951
952        default:
953                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
954                instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
955                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
956                break;
957        }
958
959        instr->alu.mul.a = mux_a;
960        instr->alu.mul.b = mux_b;
961        instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
962        instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
963
964        return true;
965}
966
967static const struct opcode_desc *
968lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
969                         const struct opcode_desc *opcodes, size_t num_opcodes,
970                         uint8_t op)
971{
972        for (int i = 0; i < num_opcodes; i++) {
973                const struct opcode_desc *op_desc = &opcodes[i];
974
975                if (op_desc->op != op)
976                        continue;
977
978                if (opcode_invalid_in_version(devinfo, op_desc))
979                        continue;
980
981                return op_desc;
982        }
983
984        return NULL;
985}
986
987static bool
988v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
989                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
990{
991        uint32_t waddr = instr->alu.add.waddr;
992        uint32_t mux_a = instr->alu.add.a;
993        uint32_t mux_b = instr->alu.add.b;
994        int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
995        const struct opcode_desc *desc =
996                lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
997                                         instr->alu.add.op);
998
999        if (!desc)
1000                return false;
1001
1002        uint32_t opcode = desc->opcode_first;
1003
1004        /* If an operation doesn't use an arg, its mux values may be used to
1005         * identify the operation type.
1006         */
1007        if (nsrc < 2)
1008                mux_b = ffs(desc->mux_b_mask) - 1;
1009
1010        if (nsrc < 1)
1011                mux_a = ffs(desc->mux_a_mask) - 1;
1012
1013        bool no_magic_write = false;
1014
1015        switch (instr->alu.add.op) {
1016        case V3D_QPU_A_STVPMV:
1017                waddr = 0;
1018                no_magic_write = true;
1019                break;
1020        case V3D_QPU_A_STVPMD:
1021                waddr = 1;
1022                no_magic_write = true;
1023                break;
1024        case V3D_QPU_A_STVPMP:
1025                waddr = 2;
1026                no_magic_write = true;
1027                break;
1028
1029        case V3D_QPU_A_LDVPMV_IN:
1030        case V3D_QPU_A_LDVPMD_IN:
1031        case V3D_QPU_A_LDVPMP:
1032        case V3D_QPU_A_LDVPMG_IN:
1033                assert(!instr->alu.add.magic_write);
1034                break;
1035
1036        case V3D_QPU_A_LDVPMV_OUT:
1037        case V3D_QPU_A_LDVPMD_OUT:
1038        case V3D_QPU_A_LDVPMG_OUT:
1039                assert(!instr->alu.add.magic_write);
1040                *packed_instr |= V3D_QPU_MA;
1041                break;
1042
1043        default:
1044                break;
1045        }
1046
1047        switch (instr->alu.add.op) {
1048        case V3D_QPU_A_FADD:
1049        case V3D_QPU_A_FADDNF:
1050        case V3D_QPU_A_FSUB:
1051        case V3D_QPU_A_FMIN:
1052        case V3D_QPU_A_FMAX:
1053        case V3D_QPU_A_FCMP: {
1054                uint32_t output_pack;
1055                uint32_t a_unpack;
1056                uint32_t b_unpack;
1057
1058                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1059                                               &output_pack)) {
1060                        return false;
1061                }
1062                opcode |= output_pack << 4;
1063
1064                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1065                                                 &a_unpack)) {
1066                        return false;
1067                }
1068
1069                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1070                                                 &b_unpack)) {
1071                        return false;
1072                }
1073
1074                /* These operations with commutative operands are
1075                 * distinguished by which order their operands come in.
1076                 */
1077                bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1078                if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1079                      instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1080                    ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1081                      instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1082                        uint32_t temp;
1083
1084                        temp = a_unpack;
1085                        a_unpack = b_unpack;
1086                        b_unpack = temp;
1087
1088                        temp = mux_a;
1089                        mux_a = mux_b;
1090                        mux_b = temp;
1091                }
1092
1093                opcode |= a_unpack << 2;
1094                opcode |= b_unpack << 0;
1095
1096                break;
1097        }
1098
1099        case V3D_QPU_A_VFPACK: {
1100                uint32_t a_unpack;
1101                uint32_t b_unpack;
1102
1103                if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1104                    instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1105                        return false;
1106                }
1107
1108                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1109                                                 &a_unpack)) {
1110                        return false;
1111                }
1112
1113                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1114                                                 &b_unpack)) {
1115                        return false;
1116                }
1117
1118                opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1119                opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1120
1121                break;
1122        }
1123
1124        case V3D_QPU_A_FFLOOR:
1125        case V3D_QPU_A_FROUND:
1126        case V3D_QPU_A_FTRUNC:
1127        case V3D_QPU_A_FCEIL:
1128        case V3D_QPU_A_FDX:
1129        case V3D_QPU_A_FDY: {
1130                uint32_t packed;
1131
1132                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1133                                               &packed)) {
1134                        return false;
1135                }
1136                mux_b |= packed;
1137
1138                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1139                                                 &packed)) {
1140                        return false;
1141                }
1142                if (packed == 0)
1143                        return false;
1144                opcode = (opcode & ~(1 << 2)) | packed << 2;
1145                break;
1146        }
1147
1148        case V3D_QPU_A_FTOIN:
1149        case V3D_QPU_A_FTOIZ:
1150        case V3D_QPU_A_FTOUZ:
1151        case V3D_QPU_A_FTOC:
1152                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1153                        return false;
1154
1155                uint32_t packed;
1156                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1157                                                 &packed)) {
1158                        return false;
1159                }
1160                if (packed == 0)
1161                        return false;
1162                opcode |= packed << 2;
1163
1164                break;
1165
1166        case V3D_QPU_A_VFMIN:
1167        case V3D_QPU_A_VFMAX:
1168                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1169                    instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1170                        return false;
1171                }
1172
1173                if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1174                                                 &packed)) {
1175                        return false;
1176                }
1177                opcode |= packed;
1178                break;
1179
1180        default:
1181                if (instr->alu.add.op != V3D_QPU_A_NOP &&
1182                    (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1183                     instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1184                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1185                        return false;
1186                }
1187                break;
1188        }
1189
1190        *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1191        *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1192        *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1193        *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1194        if (instr->alu.add.magic_write && !no_magic_write)
1195                *packed_instr |= V3D_QPU_MA;
1196
1197        return true;
1198}
1199
1200static bool
1201v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1202                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1203{
1204        uint32_t mux_a = instr->alu.mul.a;
1205        uint32_t mux_b = instr->alu.mul.b;
1206        int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1207
1208        const struct opcode_desc *desc =
1209                lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
1210                                         instr->alu.mul.op);
1211
1212        if (!desc)
1213                return false;
1214
1215        uint32_t opcode = desc->opcode_first;
1216
1217        /* Some opcodes have a single valid value for their mux a/b, so set
1218         * that here.  If mux a/b determine packing, it will be set below.
1219         */
1220        if (nsrc < 2)
1221                mux_b = ffs(desc->mux_b_mask) - 1;
1222
1223        if (nsrc < 1)
1224                mux_a = ffs(desc->mux_a_mask) - 1;
1225
1226        switch (instr->alu.mul.op) {
1227        case V3D_QPU_M_FMUL: {
1228                uint32_t packed;
1229
1230                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1231                                               &packed)) {
1232                        return false;
1233                }
1234                /* No need for a +1 because desc->opcode_first has a 1 in this
1235                 * field.
1236                 */
1237                opcode += packed << 4;
1238
1239                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1240                                                 &packed)) {
1241                        return false;
1242                }
1243                opcode |= packed << 2;
1244
1245                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1246                                                 &packed)) {
1247                        return false;
1248                }
1249                opcode |= packed << 0;
1250                break;
1251        }
1252
1253        case V3D_QPU_M_FMOV: {
1254                uint32_t packed;
1255
1256                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1257                                               &packed)) {
1258                        return false;
1259                }
1260                opcode |= (packed >> 1) & 1;
1261                mux_b = (packed & 1) << 2;
1262
1263                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1264                                                 &packed)) {
1265                        return false;
1266                }
1267                mux_b |= packed;
1268                break;
1269        }
1270
1271        case V3D_QPU_M_VFMUL: {
1272                uint32_t packed;
1273
1274                if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1275                        return false;
1276
1277                if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1278                                                 &packed)) {
1279                        return false;
1280                }
1281                if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1282                        opcode = 8;
1283                else
1284                        opcode |= (packed + 4) & 7;
1285
1286                if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1287                        return false;
1288
1289                break;
1290        }
1291
1292        default:
1293                break;
1294        }
1295
1296        *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1297        *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1298
1299        *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1300        *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1301        if (instr->alu.mul.magic_write)
1302                *packed_instr |= V3D_QPU_MM;
1303
1304        return true;
1305}
1306
1307static bool
1308v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1309                         uint64_t packed_instr,
1310                         struct v3d_qpu_instr *instr)
1311{
1312        instr->type = V3D_QPU_INSTR_TYPE_ALU;
1313
1314        if (!v3d_qpu_sig_unpack(devinfo,
1315                                QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1316                                &instr->sig))
1317                return false;
1318
1319        uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1320        if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1321                instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1322                instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1323
1324                instr->flags.ac = V3D_QPU_COND_NONE;
1325                instr->flags.mc = V3D_QPU_COND_NONE;
1326                instr->flags.apf = V3D_QPU_PF_NONE;
1327                instr->flags.mpf = V3D_QPU_PF_NONE;
1328                instr->flags.auf = V3D_QPU_UF_NONE;
1329                instr->flags.muf = V3D_QPU_UF_NONE;
1330        } else {
1331                if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1332                        return false;
1333        }
1334
1335        instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1336        instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1337
1338        if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1339                return false;
1340
1341        if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1342                return false;
1343
1344        return true;
1345}
1346
1347static bool
1348v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1349                            uint64_t packed_instr,
1350                            struct v3d_qpu_instr *instr)
1351{
1352        instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1353
1354        uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1355        if (cond == 0)
1356                instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1357        else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1358                 V3D_QPU_BRANCH_COND_ALLNA)
1359                instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1360        else
1361                return false;
1362
1363        uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1364        if (msfign == 3)
1365                return false;
1366        instr->branch.msfign = msfign;
1367
1368        instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1369
1370        instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1371        if (instr->branch.ub) {
1372                instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1373                                                  V3D_QPU_BRANCH_BDU);
1374        }
1375
1376        instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1377                                              V3D_QPU_RADDR_A);
1378
1379        instr->branch.offset = 0;
1380
1381        instr->branch.offset +=
1382                QPU_GET_FIELD(packed_instr,
1383                              V3D_QPU_BRANCH_ADDR_LOW) << 3;
1384
1385        instr->branch.offset +=
1386                QPU_GET_FIELD(packed_instr,
1387                              V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1388
1389        return true;
1390}
1391
1392bool
1393v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1394                     uint64_t packed_instr,
1395                     struct v3d_qpu_instr *instr)
1396{
1397        if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1398                return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1399        } else {
1400                uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1401
1402                if ((sig & 24) == 16) {
1403                        return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1404                                                           instr);
1405                } else {
1406                        return false;
1407                }
1408        }
1409}
1410
1411static bool
1412v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1413                       const struct v3d_qpu_instr *instr,
1414                       uint64_t *packed_instr)
1415{
1416        uint32_t sig;
1417        if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1418                return false;
1419        *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1420
1421        if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1422                *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1423                *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1424
1425                if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1426                        return false;
1427                if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1428                        return false;
1429
1430                uint32_t flags;
1431                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1432                        if (instr->flags.ac != V3D_QPU_COND_NONE ||
1433                            instr->flags.mc != V3D_QPU_COND_NONE ||
1434                            instr->flags.apf != V3D_QPU_PF_NONE ||
1435                            instr->flags.mpf != V3D_QPU_PF_NONE ||
1436                            instr->flags.auf != V3D_QPU_UF_NONE ||
1437                            instr->flags.muf != V3D_QPU_UF_NONE) {
1438                                return false;
1439                        }
1440
1441                        flags = instr->sig_addr;
1442                        if (instr->sig_magic)
1443                                flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1444                } else {
1445                        if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1446                                return false;
1447                }
1448
1449                *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1450        } else {
1451                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1452                        return false;
1453        }
1454
1455        return true;
1456}
1457
1458static bool
1459v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1460                          const struct v3d_qpu_instr *instr,
1461                          uint64_t *packed_instr)
1462{
1463        *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1464
1465        if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1466                *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1467                                                    V3D_QPU_BRANCH_COND_A0),
1468                                               V3D_QPU_BRANCH_COND);
1469        }
1470
1471        *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1472                                       V3D_QPU_BRANCH_MSFIGN);
1473
1474        *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1475                                       V3D_QPU_BRANCH_BDI);
1476
1477        if (instr->branch.ub) {
1478                *packed_instr |= V3D_QPU_BRANCH_UB;
1479                *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1480                                               V3D_QPU_BRANCH_BDU);
1481        }
1482
1483        switch (instr->branch.bdi) {
1484        case V3D_QPU_BRANCH_DEST_ABS:
1485        case V3D_QPU_BRANCH_DEST_REL:
1486                *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1487                                               V3D_QPU_BRANCH_MSFIGN);
1488
1489                *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1490                                                ~0xff000000) >> 3,
1491                                               V3D_QPU_BRANCH_ADDR_LOW);
1492
1493                *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1494                                               V3D_QPU_BRANCH_ADDR_HIGH);
1495                break;
1496        default:
1497                break;
1498        }
1499
1500        if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1501            instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1502                *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1503                                               V3D_QPU_RADDR_A);
1504        }
1505
1506        return true;
1507}
1508
1509bool
1510v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1511                   const struct v3d_qpu_instr *instr,
1512                   uint64_t *packed_instr)
1513{
1514        *packed_instr = 0;
1515
1516        switch (instr->type) {
1517        case V3D_QPU_INSTR_TYPE_ALU:
1518                return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1519        case V3D_QPU_INSTR_TYPE_BRANCH:
1520                return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1521        default:
1522                return false;
1523        }
1524}
1525