1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <string.h> 25#include "util/macros.h" 26#include "util/bitscan.h" 27 28#include "broadcom/common/v3d_device_info.h" 29#include "qpu_instr.h" 30 31#ifndef QPU_MASK 32#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 33/* Using the GNU statement expression extension */ 34#define QPU_SET_FIELD(value, field) \ 35 ({ \ 36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 37 assert((fieldval & ~ field ## _MASK) == 0); \ 38 fieldval & field ## _MASK; \ 39 }) 40 41#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 42 43#define QPU_UPDATE_FIELD(inst, value, field) \ 44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 45#endif /* QPU_MASK */ 46 47#define V3D_QPU_OP_MUL_SHIFT 58 48#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58) 49 50#define V3D_QPU_SIG_SHIFT 53 51#define V3D_QPU_SIG_MASK QPU_MASK(57, 53) 52 53#define V3D_QPU_COND_SHIFT 46 54#define V3D_QPU_COND_MASK QPU_MASK(52, 46) 55#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6) 56 57#define V3D_QPU_MM QPU_MASK(45, 45) 58#define V3D_QPU_MA QPU_MASK(44, 44) 59 60#define V3D_QPU_WADDR_M_SHIFT 38 61#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) 62 63#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35 64#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) 65 66#define V3D_QPU_WADDR_A_SHIFT 32 67#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) 68 69#define V3D_QPU_BRANCH_COND_SHIFT 32 70#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) 71 72#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24 73#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) 74 75#define V3D_QPU_OP_ADD_SHIFT 24 76#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24) 77 78#define V3D_QPU_MUL_B_SHIFT 21 79#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21) 80 81#define V3D_QPU_BRANCH_MSFIGN_SHIFT 21 82#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) 83 84#define V3D_QPU_MUL_A_SHIFT 18 85#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18) 86 87#define V3D_QPU_ADD_B_SHIFT 15 88#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15) 89 90#define V3D_QPU_BRANCH_BDU_SHIFT 15 91#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) 92 93#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14) 94 95#define V3D_QPU_ADD_A_SHIFT 12 96#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12) 97 98#define V3D_QPU_BRANCH_BDI_SHIFT 12 99#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) 100 101#define V3D_QPU_RADDR_A_SHIFT 6 102#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6) 103 104#define V3D_QPU_RADDR_B_SHIFT 0 105#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0) 106 107#define THRSW .thrsw = true 108#define LDUNIF .ldunif = true 109#define LDUNIFRF .ldunifrf = true 110#define LDUNIFA .ldunifa = true 111#define LDUNIFARF .ldunifarf = true 112#define LDTMU .ldtmu = true 113#define LDVARY .ldvary = true 114#define LDVPM .ldvpm = true 115#define SMIMM .small_imm = true 116#define LDTLB .ldtlb = true 117#define LDTLBU .ldtlbu = true 118#define UCB .ucb = true 119#define ROT .rotate = true 120#define WRTMUC .wrtmuc = true 121 122static const struct v3d_qpu_sig v33_sig_map[] = { 123 /* MISC R3 R4 R5 */ 124 [0] = { }, 125 [1] = { THRSW, }, 126 [2] = { LDUNIF }, 127 [3] = { THRSW, LDUNIF }, 128 [4] = { LDTMU, }, 129 [5] = { THRSW, LDTMU, }, 130 [6] = { LDTMU, LDUNIF }, 131 [7] = { THRSW, LDTMU, LDUNIF }, 132 [8] = { LDVARY, }, 133 [9] = { THRSW, LDVARY, }, 134 [10] = { LDVARY, LDUNIF }, 135 [11] = { THRSW, LDVARY, LDUNIF }, 136 [12] = { LDVARY, LDTMU, }, 137 [13] = { THRSW, LDVARY, LDTMU, }, 138 [14] = { SMIMM, LDVARY, }, 139 [15] = { SMIMM, }, 140 [16] = { LDTLB, }, 141 [17] = { LDTLBU, }, 142 /* 18-21 reserved */ 143 [22] = { UCB, }, 144 [23] = { ROT, }, 145 [24] = { LDVPM, }, 146 [25] = { THRSW, LDVPM, }, 147 [26] = { LDVPM, LDUNIF }, 148 [27] = { THRSW, LDVPM, LDUNIF }, 149 [28] = { LDVPM, LDTMU, }, 150 [29] = { THRSW, LDVPM, LDTMU, }, 151 [30] = { SMIMM, LDVPM, }, 152 [31] = { SMIMM, }, 153}; 154 155static const struct v3d_qpu_sig v40_sig_map[] = { 156 /* MISC R3 R4 R5 */ 157 [0] = { }, 158 [1] = { THRSW, }, 159 [2] = { LDUNIF }, 160 [3] = { THRSW, LDUNIF }, 161 [4] = { LDTMU, }, 162 [5] = { THRSW, LDTMU, }, 163 [6] = { LDTMU, LDUNIF }, 164 [7] = { THRSW, LDTMU, LDUNIF }, 165 [8] = { LDVARY, }, 166 [9] = { THRSW, LDVARY, }, 167 [10] = { LDVARY, LDUNIF }, 168 [11] = { THRSW, LDVARY, LDUNIF }, 169 /* 12-13 reserved */ 170 [14] = { SMIMM, LDVARY, }, 171 [15] = { SMIMM, }, 172 [16] = { LDTLB, }, 173 [17] = { LDTLBU, }, 174 [18] = { WRTMUC }, 175 [19] = { THRSW, WRTMUC }, 176 [20] = { LDVARY, WRTMUC }, 177 [21] = { THRSW, LDVARY, WRTMUC }, 178 [22] = { UCB, }, 179 [23] = { ROT, }, 180 /* 24-30 reserved */ 181 [31] = { SMIMM, LDTMU, }, 182}; 183 184static const struct v3d_qpu_sig v41_sig_map[] = { 185 /* MISC phys R5 */ 186 [0] = { }, 187 [1] = { THRSW, }, 188 [2] = { LDUNIF }, 189 [3] = { THRSW, LDUNIF }, 190 [4] = { LDTMU, }, 191 [5] = { THRSW, LDTMU, }, 192 [6] = { LDTMU, LDUNIF }, 193 [7] = { THRSW, LDTMU, LDUNIF }, 194 [8] = { LDVARY, }, 195 [9] = { THRSW, LDVARY, }, 196 [10] = { LDVARY, LDUNIF }, 197 [11] = { THRSW, LDVARY, LDUNIF }, 198 [12] = { LDUNIFRF }, 199 [13] = { THRSW, LDUNIFRF }, 200 [14] = { SMIMM, LDVARY, }, 201 [15] = { SMIMM, }, 202 [16] = { LDTLB, }, 203 [17] = { LDTLBU, }, 204 [18] = { WRTMUC }, 205 [19] = { THRSW, WRTMUC }, 206 [20] = { LDVARY, WRTMUC }, 207 [21] = { THRSW, LDVARY, WRTMUC }, 208 [22] = { UCB, }, 209 [23] = { ROT, }, 210 [24] = { LDUNIFA}, 211 [25] = { LDUNIFARF }, 212 /* 26-30 reserved */ 213 [31] = { SMIMM, LDTMU, }, 214}; 215 216bool 217v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 218 uint32_t packed_sig, 219 struct v3d_qpu_sig *sig) 220{ 221 if (packed_sig >= ARRAY_SIZE(v33_sig_map)) 222 return false; 223 224 if (devinfo->ver >= 41) 225 *sig = v41_sig_map[packed_sig]; 226 else if (devinfo->ver == 40) 227 *sig = v40_sig_map[packed_sig]; 228 else 229 *sig = v33_sig_map[packed_sig]; 230 231 /* Signals with zeroed unpacked contents after element 0 are reserved. */ 232 return (packed_sig == 0 || 233 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0); 234} 235 236bool 237v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 238 const struct v3d_qpu_sig *sig, 239 uint32_t *packed_sig) 240{ 241 static const struct v3d_qpu_sig *map; 242 243 if (devinfo->ver >= 41) 244 map = v41_sig_map; 245 else if (devinfo->ver == 40) 246 map = v40_sig_map; 247 else 248 map = v33_sig_map; 249 250 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { 251 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { 252 *packed_sig = i; 253 return true; 254 } 255 } 256 257 return false; 258} 259 260static const uint32_t small_immediates[] = { 261 0, 1, 2, 3, 262 4, 5, 6, 7, 263 8, 9, 10, 11, 264 12, 13, 14, 15, 265 -16, -15, -14, -13, 266 -12, -11, -10, -9, 267 -8, -7, -6, -5, 268 -4, -3, -2, -1, 269 0x3b800000, /* 2.0^-8 */ 270 0x3c000000, /* 2.0^-7 */ 271 0x3c800000, /* 2.0^-6 */ 272 0x3d000000, /* 2.0^-5 */ 273 0x3d800000, /* 2.0^-4 */ 274 0x3e000000, /* 2.0^-3 */ 275 0x3e800000, /* 2.0^-2 */ 276 0x3f000000, /* 2.0^-1 */ 277 0x3f800000, /* 2.0^0 */ 278 0x40000000, /* 2.0^1 */ 279 0x40800000, /* 2.0^2 */ 280 0x41000000, /* 2.0^3 */ 281 0x41800000, /* 2.0^4 */ 282 0x42000000, /* 2.0^5 */ 283 0x42800000, /* 2.0^6 */ 284 0x43000000, /* 2.0^7 */ 285}; 286 287bool 288v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 289 uint32_t packed_small_immediate, 290 uint32_t *small_immediate) 291{ 292 if (packed_small_immediate >= ARRAY_SIZE(small_immediates)) 293 return false; 294 295 *small_immediate = small_immediates[packed_small_immediate]; 296 return true; 297} 298 299bool 300v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 301 uint32_t value, 302 uint32_t *packed_small_immediate) 303{ 304 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48); 305 306 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) { 307 if (small_immediates[i] == value) { 308 *packed_small_immediate = i; 309 return true; 310 } 311 } 312 313 return false; 314} 315 316bool 317v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 318 uint32_t packed_cond, 319 struct v3d_qpu_flags *cond) 320{ 321 static const enum v3d_qpu_cond cond_map[4] = { 322 [0] = V3D_QPU_COND_IFA, 323 [1] = V3D_QPU_COND_IFB, 324 [2] = V3D_QPU_COND_IFNA, 325 [3] = V3D_QPU_COND_IFNB, 326 }; 327 328 cond->ac = V3D_QPU_COND_NONE; 329 cond->mc = V3D_QPU_COND_NONE; 330 cond->apf = V3D_QPU_PF_NONE; 331 cond->mpf = V3D_QPU_PF_NONE; 332 cond->auf = V3D_QPU_UF_NONE; 333 cond->muf = V3D_QPU_UF_NONE; 334 335 if (packed_cond == 0) { 336 return true; 337 } else if (packed_cond >> 2 == 0) { 338 cond->apf = packed_cond & 0x3; 339 } else if (packed_cond >> 4 == 0) { 340 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 341 } else if (packed_cond == 0x10) { 342 return false; 343 } else if (packed_cond >> 2 == 0x4) { 344 cond->mpf = packed_cond & 0x3; 345 } else if (packed_cond >> 4 == 0x1) { 346 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 347 } else if (packed_cond >> 4 == 0x2) { 348 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 349 cond->mpf = packed_cond & 0x3; 350 } else if (packed_cond >> 4 == 0x3) { 351 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 352 cond->apf = packed_cond & 0x3; 353 } else if (packed_cond >> 6) { 354 cond->mc = cond_map[(packed_cond >> 4) & 0x3]; 355 if (((packed_cond >> 2) & 0x3) == 0) { 356 cond->ac = cond_map[packed_cond & 0x3]; 357 } else { 358 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 359 } 360 } 361 362 return true; 363} 364 365bool 366v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 367 const struct v3d_qpu_flags *cond, 368 uint32_t *packed_cond) 369{ 370#define AC (1 << 0) 371#define MC (1 << 1) 372#define APF (1 << 2) 373#define MPF (1 << 3) 374#define AUF (1 << 4) 375#define MUF (1 << 5) 376 static const struct { 377 uint8_t flags_present; 378 uint8_t bits; 379 } flags_table[] = { 380 { 0, 0 }, 381 { APF, 0 }, 382 { AUF, 0 }, 383 { MPF, (1 << 4) }, 384 { MUF, (1 << 4) }, 385 { AC, (1 << 5) }, 386 { AC | MPF, (1 << 5) }, 387 { MC, (1 << 5) | (1 << 4) }, 388 { MC | APF, (1 << 5) | (1 << 4) }, 389 { MC | AC, (1 << 6) }, 390 { MC | AUF, (1 << 6) }, 391 }; 392 393 uint8_t flags_present = 0; 394 if (cond->ac != V3D_QPU_COND_NONE) 395 flags_present |= AC; 396 if (cond->mc != V3D_QPU_COND_NONE) 397 flags_present |= MC; 398 if (cond->apf != V3D_QPU_PF_NONE) 399 flags_present |= APF; 400 if (cond->mpf != V3D_QPU_PF_NONE) 401 flags_present |= MPF; 402 if (cond->auf != V3D_QPU_UF_NONE) 403 flags_present |= AUF; 404 if (cond->muf != V3D_QPU_UF_NONE) 405 flags_present |= MUF; 406 407 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { 408 if (flags_table[i].flags_present != flags_present) 409 continue; 410 411 *packed_cond = flags_table[i].bits; 412 413 *packed_cond |= cond->apf; 414 *packed_cond |= cond->mpf; 415 416 if (flags_present & AUF) 417 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; 418 if (flags_present & MUF) 419 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; 420 421 if (flags_present & AC) { 422 if (*packed_cond & (1 << 6)) 423 *packed_cond |= cond->ac - V3D_QPU_COND_IFA; 424 else 425 *packed_cond |= (cond->ac - 426 V3D_QPU_COND_IFA) << 2; 427 } 428 429 if (flags_present & MC) { 430 if (*packed_cond & (1 << 6)) 431 *packed_cond |= (cond->mc - 432 V3D_QPU_COND_IFA) << 4; 433 else 434 *packed_cond |= (cond->mc - 435 V3D_QPU_COND_IFA) << 2; 436 } 437 438 return true; 439 } 440 441 return false; 442} 443 444/* Make a mapping of the table of opcodes in the spec. The opcode is 445 * determined by a combination of the opcode field, and in the case of 0 or 446 * 1-arg opcodes, the mux_b field as well. 447 */ 448#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) 449#define ANYMUX MUX_MASK(0, 7) 450 451struct opcode_desc { 452 uint8_t opcode_first; 453 uint8_t opcode_last; 454 uint8_t mux_b_mask; 455 uint8_t mux_a_mask; 456 uint8_t op; 457 458 /* first_ver == 0 if it's the same across all V3D versions. 459 * first_ver == X, last_ver == 0 if it's the same for all V3D versions 460 * starting from X 461 * first_ver == X, last_ver == Y if it's the same for all V3D versions 462 * on the range X through Y 463 */ 464 uint8_t first_ver; 465 uint8_t last_ver; 466}; 467 468static const struct opcode_desc add_ops[] = { 469 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ 470 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, 471 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, 472 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 473 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, 474 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 475 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, 476 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 477 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, 478 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, 479 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, 480 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, 481 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, 482 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, 483 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, 484 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, 485 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, 486 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ 487 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, 488 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, 489 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, 490 491 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, 492 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, 493 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, 494 495 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, 496 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, 497 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, 498 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, 499 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, 500 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, 501 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP }, 502 { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP }, 503 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, 504 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, 505 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, 506 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, 507 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, 508 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, 509 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, 510 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, 511 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, 512 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, 513 514 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, 515 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, 516 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, 517 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, 518 519 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, 520 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, 521 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, 522 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, 523 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, 524 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, 525 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, 526 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, 527 { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 }, 528 { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 }, 529 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, 530 531 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, 532 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 }, 533 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, 534 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 }, 535 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, 536 { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 }, 537 { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 }, 538 { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 }, 539 { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 }, 540 { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 }, 541 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, 542 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 }, 543 544 /* FIXME: MORE COMPLICATED */ 545 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ 546 547 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, 548 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, 549 550 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, 551 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, 552 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, 553 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, 554 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, 555 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, 556 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, 557 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, 558 559 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, 560 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, 561 562 /* The stvpms are distinguished by the waddr field. */ 563 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, 564 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, 565 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, 566 567 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, 568 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, 569 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, 570}; 571 572static const struct opcode_desc mul_ops[] = { 573 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, 574 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, 575 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, 576 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, 577 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, 578 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, 579 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, 580 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, 581 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, 582 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, 583 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, 584}; 585 586/* Returns true if op_desc should be filtered out based on devinfo->ver 587 * against op_desc->first_ver and op_desc->last_ver. Check notes about 588 * first_ver/last_ver on struct opcode_desc comments. 589 */ 590static bool 591opcode_invalid_in_version(const struct v3d_device_info *devinfo, 592 const struct opcode_desc *op_desc) 593{ 594 return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) || 595 (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver); 596} 597 598static const struct opcode_desc * 599lookup_opcode_from_packed(const struct v3d_device_info *devinfo, 600 const struct opcode_desc *opcodes, 601 size_t num_opcodes, uint32_t opcode, 602 uint32_t mux_a, uint32_t mux_b) 603{ 604 for (int i = 0; i < num_opcodes; i++) { 605 const struct opcode_desc *op_desc = &opcodes[i]; 606 607 if (opcode < op_desc->opcode_first || 608 opcode > op_desc->opcode_last) 609 continue; 610 611 if (opcode_invalid_in_version(devinfo, op_desc)) 612 continue; 613 614 if (!(op_desc->mux_b_mask & (1 << mux_b))) 615 continue; 616 617 if (!(op_desc->mux_a_mask & (1 << mux_a))) 618 continue; 619 620 return op_desc; 621 } 622 623 return NULL; 624} 625 626static bool 627v3d_qpu_float32_unpack_unpack(uint32_t packed, 628 enum v3d_qpu_input_unpack *unpacked) 629{ 630 switch (packed) { 631 case 0: 632 *unpacked = V3D_QPU_UNPACK_ABS; 633 return true; 634 case 1: 635 *unpacked = V3D_QPU_UNPACK_NONE; 636 return true; 637 case 2: 638 *unpacked = V3D_QPU_UNPACK_L; 639 return true; 640 case 3: 641 *unpacked = V3D_QPU_UNPACK_H; 642 return true; 643 default: 644 return false; 645 } 646} 647 648static bool 649v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, 650 uint32_t *packed) 651{ 652 switch (unpacked) { 653 case V3D_QPU_UNPACK_ABS: 654 *packed = 0; 655 return true; 656 case V3D_QPU_UNPACK_NONE: 657 *packed = 1; 658 return true; 659 case V3D_QPU_UNPACK_L: 660 *packed = 2; 661 return true; 662 case V3D_QPU_UNPACK_H: 663 *packed = 3; 664 return true; 665 default: 666 return false; 667 } 668} 669 670static bool 671v3d_qpu_float16_unpack_unpack(uint32_t packed, 672 enum v3d_qpu_input_unpack *unpacked) 673{ 674 switch (packed) { 675 case 0: 676 *unpacked = V3D_QPU_UNPACK_NONE; 677 return true; 678 case 1: 679 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; 680 return true; 681 case 2: 682 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; 683 return true; 684 case 3: 685 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; 686 return true; 687 case 4: 688 *unpacked = V3D_QPU_UNPACK_SWAP_16; 689 return true; 690 default: 691 return false; 692 } 693} 694 695static bool 696v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, 697 uint32_t *packed) 698{ 699 switch (unpacked) { 700 case V3D_QPU_UNPACK_NONE: 701 *packed = 0; 702 return true; 703 case V3D_QPU_UNPACK_REPLICATE_32F_16: 704 *packed = 1; 705 return true; 706 case V3D_QPU_UNPACK_REPLICATE_L_16: 707 *packed = 2; 708 return true; 709 case V3D_QPU_UNPACK_REPLICATE_H_16: 710 *packed = 3; 711 return true; 712 case V3D_QPU_UNPACK_SWAP_16: 713 *packed = 4; 714 return true; 715 default: 716 return false; 717 } 718} 719 720static bool 721v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, 722 uint32_t *packed) 723{ 724 switch (unpacked) { 725 case V3D_QPU_PACK_NONE: 726 *packed = 0; 727 return true; 728 case V3D_QPU_PACK_L: 729 *packed = 1; 730 return true; 731 case V3D_QPU_PACK_H: 732 *packed = 2; 733 return true; 734 default: 735 return false; 736 } 737} 738 739static bool 740v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 741 struct v3d_qpu_instr *instr) 742{ 743 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD); 744 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A); 745 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B); 746 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 747 748 uint32_t map_op = op; 749 /* Some big clusters of opcodes are replicated with unpack 750 * flags 751 */ 752 if (map_op >= 249 && map_op <= 251) 753 map_op = (map_op - 249 + 245); 754 if (map_op >= 253 && map_op <= 255) 755 map_op = (map_op - 253 + 245); 756 757 const struct opcode_desc *desc = 758 lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops), 759 map_op, mux_a, mux_b); 760 761 if (!desc) 762 return false; 763 764 instr->alu.add.op = desc->op; 765 766 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the 767 * operands. 768 */ 769 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { 770 if (instr->alu.add.op == V3D_QPU_A_FMIN) 771 instr->alu.add.op = V3D_QPU_A_FMAX; 772 if (instr->alu.add.op == V3D_QPU_A_FADD) 773 instr->alu.add.op = V3D_QPU_A_FADDNF; 774 } 775 776 /* Some QPU ops require a bit more than just basic opcode and mux a/b 777 * comparisons to distinguish them. 778 */ 779 switch (instr->alu.add.op) { 780 case V3D_QPU_A_STVPMV: 781 case V3D_QPU_A_STVPMD: 782 case V3D_QPU_A_STVPMP: 783 switch (waddr) { 784 case 0: 785 instr->alu.add.op = V3D_QPU_A_STVPMV; 786 break; 787 case 1: 788 instr->alu.add.op = V3D_QPU_A_STVPMD; 789 break; 790 case 2: 791 instr->alu.add.op = V3D_QPU_A_STVPMP; 792 break; 793 default: 794 return false; 795 } 796 break; 797 default: 798 break; 799 } 800 801 switch (instr->alu.add.op) { 802 case V3D_QPU_A_FADD: 803 case V3D_QPU_A_FADDNF: 804 case V3D_QPU_A_FSUB: 805 case V3D_QPU_A_FMIN: 806 case V3D_QPU_A_FMAX: 807 case V3D_QPU_A_FCMP: 808 case V3D_QPU_A_VFPACK: 809 if (instr->alu.add.op != V3D_QPU_A_VFPACK) 810 instr->alu.add.output_pack = (op >> 4) & 0x3; 811 else 812 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 813 814 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 815 &instr->alu.add.a_unpack)) { 816 return false; 817 } 818 819 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 820 &instr->alu.add.b_unpack)) { 821 return false; 822 } 823 break; 824 825 case V3D_QPU_A_FFLOOR: 826 case V3D_QPU_A_FROUND: 827 case V3D_QPU_A_FTRUNC: 828 case V3D_QPU_A_FCEIL: 829 case V3D_QPU_A_FDX: 830 case V3D_QPU_A_FDY: 831 instr->alu.add.output_pack = mux_b & 0x3; 832 833 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 834 &instr->alu.add.a_unpack)) { 835 return false; 836 } 837 break; 838 839 case V3D_QPU_A_FTOIN: 840 case V3D_QPU_A_FTOIZ: 841 case V3D_QPU_A_FTOUZ: 842 case V3D_QPU_A_FTOC: 843 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 844 845 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 846 &instr->alu.add.a_unpack)) { 847 return false; 848 } 849 break; 850 851 case V3D_QPU_A_VFMIN: 852 case V3D_QPU_A_VFMAX: 853 if (!v3d_qpu_float16_unpack_unpack(op & 0x7, 854 &instr->alu.add.a_unpack)) { 855 return false; 856 } 857 858 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 859 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 860 break; 861 862 default: 863 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 864 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; 865 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 866 break; 867 } 868 869 instr->alu.add.a = mux_a; 870 instr->alu.add.b = mux_b; 871 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 872 873 instr->alu.add.magic_write = false; 874 if (packed_inst & V3D_QPU_MA) { 875 switch (instr->alu.add.op) { 876 case V3D_QPU_A_LDVPMV_IN: 877 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; 878 break; 879 case V3D_QPU_A_LDVPMD_IN: 880 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; 881 break; 882 case V3D_QPU_A_LDVPMG_IN: 883 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; 884 break; 885 default: 886 instr->alu.add.magic_write = true; 887 break; 888 } 889 } 890 891 return true; 892} 893 894static bool 895v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 896 struct v3d_qpu_instr *instr) 897{ 898 uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL); 899 uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A); 900 uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B); 901 902 { 903 const struct opcode_desc *desc = 904 lookup_opcode_from_packed(devinfo, mul_ops, 905 ARRAY_SIZE(mul_ops), 906 op, mux_a, mux_b); 907 if (!desc) 908 return false; 909 910 instr->alu.mul.op = desc->op; 911 } 912 913 switch (instr->alu.mul.op) { 914 case V3D_QPU_M_FMUL: 915 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; 916 917 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 918 &instr->alu.mul.a_unpack)) { 919 return false; 920 } 921 922 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 923 &instr->alu.mul.b_unpack)) { 924 return false; 925 } 926 927 break; 928 929 case V3D_QPU_M_FMOV: 930 instr->alu.mul.output_pack = (((op & 1) << 1) + 931 ((mux_b >> 2) & 1)); 932 933 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, 934 &instr->alu.mul.a_unpack)) { 935 return false; 936 } 937 938 break; 939 940 case V3D_QPU_M_VFMUL: 941 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 942 943 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, 944 &instr->alu.mul.a_unpack)) { 945 return false; 946 } 947 948 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 949 950 break; 951 952 default: 953 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 954 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; 955 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 956 break; 957 } 958 959 instr->alu.mul.a = mux_a; 960 instr->alu.mul.b = mux_b; 961 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); 962 instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM; 963 964 return true; 965} 966 967static const struct opcode_desc * 968lookup_opcode_from_instr(const struct v3d_device_info *devinfo, 969 const struct opcode_desc *opcodes, size_t num_opcodes, 970 uint8_t op) 971{ 972 for (int i = 0; i < num_opcodes; i++) { 973 const struct opcode_desc *op_desc = &opcodes[i]; 974 975 if (op_desc->op != op) 976 continue; 977 978 if (opcode_invalid_in_version(devinfo, op_desc)) 979 continue; 980 981 return op_desc; 982 } 983 984 return NULL; 985} 986 987static bool 988v3d_qpu_add_pack(const struct v3d_device_info *devinfo, 989 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 990{ 991 uint32_t waddr = instr->alu.add.waddr; 992 uint32_t mux_a = instr->alu.add.a; 993 uint32_t mux_b = instr->alu.add.b; 994 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); 995 const struct opcode_desc *desc = 996 lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops), 997 instr->alu.add.op); 998 999 if (!desc) 1000 return false; 1001 1002 uint32_t opcode = desc->opcode_first; 1003 1004 /* If an operation doesn't use an arg, its mux values may be used to 1005 * identify the operation type. 1006 */ 1007 if (nsrc < 2) 1008 mux_b = ffs(desc->mux_b_mask) - 1; 1009 1010 if (nsrc < 1) 1011 mux_a = ffs(desc->mux_a_mask) - 1; 1012 1013 bool no_magic_write = false; 1014 1015 switch (instr->alu.add.op) { 1016 case V3D_QPU_A_STVPMV: 1017 waddr = 0; 1018 no_magic_write = true; 1019 break; 1020 case V3D_QPU_A_STVPMD: 1021 waddr = 1; 1022 no_magic_write = true; 1023 break; 1024 case V3D_QPU_A_STVPMP: 1025 waddr = 2; 1026 no_magic_write = true; 1027 break; 1028 1029 case V3D_QPU_A_LDVPMV_IN: 1030 case V3D_QPU_A_LDVPMD_IN: 1031 case V3D_QPU_A_LDVPMP: 1032 case V3D_QPU_A_LDVPMG_IN: 1033 assert(!instr->alu.add.magic_write); 1034 break; 1035 1036 case V3D_QPU_A_LDVPMV_OUT: 1037 case V3D_QPU_A_LDVPMD_OUT: 1038 case V3D_QPU_A_LDVPMG_OUT: 1039 assert(!instr->alu.add.magic_write); 1040 *packed_instr |= V3D_QPU_MA; 1041 break; 1042 1043 default: 1044 break; 1045 } 1046 1047 switch (instr->alu.add.op) { 1048 case V3D_QPU_A_FADD: 1049 case V3D_QPU_A_FADDNF: 1050 case V3D_QPU_A_FSUB: 1051 case V3D_QPU_A_FMIN: 1052 case V3D_QPU_A_FMAX: 1053 case V3D_QPU_A_FCMP: { 1054 uint32_t output_pack; 1055 uint32_t a_unpack; 1056 uint32_t b_unpack; 1057 1058 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1059 &output_pack)) { 1060 return false; 1061 } 1062 opcode |= output_pack << 4; 1063 1064 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1065 &a_unpack)) { 1066 return false; 1067 } 1068 1069 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1070 &b_unpack)) { 1071 return false; 1072 } 1073 1074 /* These operations with commutative operands are 1075 * distinguished by which order their operands come in. 1076 */ 1077 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; 1078 if (((instr->alu.add.op == V3D_QPU_A_FMIN || 1079 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || 1080 ((instr->alu.add.op == V3D_QPU_A_FMAX || 1081 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { 1082 uint32_t temp; 1083 1084 temp = a_unpack; 1085 a_unpack = b_unpack; 1086 b_unpack = temp; 1087 1088 temp = mux_a; 1089 mux_a = mux_b; 1090 mux_b = temp; 1091 } 1092 1093 opcode |= a_unpack << 2; 1094 opcode |= b_unpack << 0; 1095 1096 break; 1097 } 1098 1099 case V3D_QPU_A_VFPACK: { 1100 uint32_t a_unpack; 1101 uint32_t b_unpack; 1102 1103 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS || 1104 instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) { 1105 return false; 1106 } 1107 1108 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1109 &a_unpack)) { 1110 return false; 1111 } 1112 1113 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1114 &b_unpack)) { 1115 return false; 1116 } 1117 1118 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2); 1119 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0); 1120 1121 break; 1122 } 1123 1124 case V3D_QPU_A_FFLOOR: 1125 case V3D_QPU_A_FROUND: 1126 case V3D_QPU_A_FTRUNC: 1127 case V3D_QPU_A_FCEIL: 1128 case V3D_QPU_A_FDX: 1129 case V3D_QPU_A_FDY: { 1130 uint32_t packed; 1131 1132 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1133 &packed)) { 1134 return false; 1135 } 1136 mux_b |= packed; 1137 1138 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1139 &packed)) { 1140 return false; 1141 } 1142 if (packed == 0) 1143 return false; 1144 opcode = (opcode & ~(1 << 2)) | packed << 2; 1145 break; 1146 } 1147 1148 case V3D_QPU_A_FTOIN: 1149 case V3D_QPU_A_FTOIZ: 1150 case V3D_QPU_A_FTOUZ: 1151 case V3D_QPU_A_FTOC: 1152 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) 1153 return false; 1154 1155 uint32_t packed; 1156 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1157 &packed)) { 1158 return false; 1159 } 1160 if (packed == 0) 1161 return false; 1162 opcode |= packed << 2; 1163 1164 break; 1165 1166 case V3D_QPU_A_VFMIN: 1167 case V3D_QPU_A_VFMAX: 1168 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1169 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { 1170 return false; 1171 } 1172 1173 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, 1174 &packed)) { 1175 return false; 1176 } 1177 opcode |= packed; 1178 break; 1179 1180 default: 1181 if (instr->alu.add.op != V3D_QPU_A_NOP && 1182 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1183 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 1184 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { 1185 return false; 1186 } 1187 break; 1188 } 1189 1190 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A); 1191 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B); 1192 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD); 1193 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); 1194 if (instr->alu.add.magic_write && !no_magic_write) 1195 *packed_instr |= V3D_QPU_MA; 1196 1197 return true; 1198} 1199 1200static bool 1201v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, 1202 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 1203{ 1204 uint32_t mux_a = instr->alu.mul.a; 1205 uint32_t mux_b = instr->alu.mul.b; 1206 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); 1207 1208 const struct opcode_desc *desc = 1209 lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops), 1210 instr->alu.mul.op); 1211 1212 if (!desc) 1213 return false; 1214 1215 uint32_t opcode = desc->opcode_first; 1216 1217 /* Some opcodes have a single valid value for their mux a/b, so set 1218 * that here. If mux a/b determine packing, it will be set below. 1219 */ 1220 if (nsrc < 2) 1221 mux_b = ffs(desc->mux_b_mask) - 1; 1222 1223 if (nsrc < 1) 1224 mux_a = ffs(desc->mux_a_mask) - 1; 1225 1226 switch (instr->alu.mul.op) { 1227 case V3D_QPU_M_FMUL: { 1228 uint32_t packed; 1229 1230 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1231 &packed)) { 1232 return false; 1233 } 1234 /* No need for a +1 because desc->opcode_first has a 1 in this 1235 * field. 1236 */ 1237 opcode += packed << 4; 1238 1239 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1240 &packed)) { 1241 return false; 1242 } 1243 opcode |= packed << 2; 1244 1245 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, 1246 &packed)) { 1247 return false; 1248 } 1249 opcode |= packed << 0; 1250 break; 1251 } 1252 1253 case V3D_QPU_M_FMOV: { 1254 uint32_t packed; 1255 1256 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1257 &packed)) { 1258 return false; 1259 } 1260 opcode |= (packed >> 1) & 1; 1261 mux_b = (packed & 1) << 2; 1262 1263 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1264 &packed)) { 1265 return false; 1266 } 1267 mux_b |= packed; 1268 break; 1269 } 1270 1271 case V3D_QPU_M_VFMUL: { 1272 uint32_t packed; 1273 1274 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) 1275 return false; 1276 1277 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, 1278 &packed)) { 1279 return false; 1280 } 1281 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) 1282 opcode = 8; 1283 else 1284 opcode |= (packed + 4) & 7; 1285 1286 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) 1287 return false; 1288 1289 break; 1290 } 1291 1292 default: 1293 break; 1294 } 1295 1296 *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A); 1297 *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B); 1298 1299 *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL); 1300 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); 1301 if (instr->alu.mul.magic_write) 1302 *packed_instr |= V3D_QPU_MM; 1303 1304 return true; 1305} 1306 1307static bool 1308v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, 1309 uint64_t packed_instr, 1310 struct v3d_qpu_instr *instr) 1311{ 1312 instr->type = V3D_QPU_INSTR_TYPE_ALU; 1313 1314 if (!v3d_qpu_sig_unpack(devinfo, 1315 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG), 1316 &instr->sig)) 1317 return false; 1318 1319 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND); 1320 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1321 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR; 1322 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR; 1323 1324 instr->flags.ac = V3D_QPU_COND_NONE; 1325 instr->flags.mc = V3D_QPU_COND_NONE; 1326 instr->flags.apf = V3D_QPU_PF_NONE; 1327 instr->flags.mpf = V3D_QPU_PF_NONE; 1328 instr->flags.auf = V3D_QPU_UF_NONE; 1329 instr->flags.muf = V3D_QPU_UF_NONE; 1330 } else { 1331 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) 1332 return false; 1333 } 1334 1335 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A); 1336 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B); 1337 1338 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) 1339 return false; 1340 1341 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) 1342 return false; 1343 1344 return true; 1345} 1346 1347static bool 1348v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, 1349 uint64_t packed_instr, 1350 struct v3d_qpu_instr *instr) 1351{ 1352 instr->type = V3D_QPU_INSTR_TYPE_BRANCH; 1353 1354 uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND); 1355 if (cond == 0) 1356 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; 1357 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= 1358 V3D_QPU_BRANCH_COND_ALLNA) 1359 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); 1360 else 1361 return false; 1362 1363 uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN); 1364 if (msfign == 3) 1365 return false; 1366 instr->branch.msfign = msfign; 1367 1368 instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI); 1369 1370 instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB; 1371 if (instr->branch.ub) { 1372 instr->branch.bdu = QPU_GET_FIELD(packed_instr, 1373 V3D_QPU_BRANCH_BDU); 1374 } 1375 1376 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, 1377 V3D_QPU_RADDR_A); 1378 1379 instr->branch.offset = 0; 1380 1381 instr->branch.offset += 1382 QPU_GET_FIELD(packed_instr, 1383 V3D_QPU_BRANCH_ADDR_LOW) << 3; 1384 1385 instr->branch.offset += 1386 QPU_GET_FIELD(packed_instr, 1387 V3D_QPU_BRANCH_ADDR_HIGH) << 24; 1388 1389 return true; 1390} 1391 1392bool 1393v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 1394 uint64_t packed_instr, 1395 struct v3d_qpu_instr *instr) 1396{ 1397 if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) { 1398 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); 1399 } else { 1400 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG); 1401 1402 if ((sig & 24) == 16) { 1403 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, 1404 instr); 1405 } else { 1406 return false; 1407 } 1408 } 1409} 1410 1411static bool 1412v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, 1413 const struct v3d_qpu_instr *instr, 1414 uint64_t *packed_instr) 1415{ 1416 uint32_t sig; 1417 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) 1418 return false; 1419 *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG); 1420 1421 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { 1422 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A); 1423 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B); 1424 1425 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) 1426 return false; 1427 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) 1428 return false; 1429 1430 uint32_t flags; 1431 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1432 if (instr->flags.ac != V3D_QPU_COND_NONE || 1433 instr->flags.mc != V3D_QPU_COND_NONE || 1434 instr->flags.apf != V3D_QPU_PF_NONE || 1435 instr->flags.mpf != V3D_QPU_PF_NONE || 1436 instr->flags.auf != V3D_QPU_UF_NONE || 1437 instr->flags.muf != V3D_QPU_UF_NONE) { 1438 return false; 1439 } 1440 1441 flags = instr->sig_addr; 1442 if (instr->sig_magic) 1443 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR; 1444 } else { 1445 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) 1446 return false; 1447 } 1448 1449 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND); 1450 } else { 1451 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) 1452 return false; 1453 } 1454 1455 return true; 1456} 1457 1458static bool 1459v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, 1460 const struct v3d_qpu_instr *instr, 1461 uint64_t *packed_instr) 1462{ 1463 *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG); 1464 1465 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { 1466 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - 1467 V3D_QPU_BRANCH_COND_A0), 1468 V3D_QPU_BRANCH_COND); 1469 } 1470 1471 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1472 V3D_QPU_BRANCH_MSFIGN); 1473 1474 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, 1475 V3D_QPU_BRANCH_BDI); 1476 1477 if (instr->branch.ub) { 1478 *packed_instr |= V3D_QPU_BRANCH_UB; 1479 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, 1480 V3D_QPU_BRANCH_BDU); 1481 } 1482 1483 switch (instr->branch.bdi) { 1484 case V3D_QPU_BRANCH_DEST_ABS: 1485 case V3D_QPU_BRANCH_DEST_REL: 1486 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1487 V3D_QPU_BRANCH_MSFIGN); 1488 1489 *packed_instr |= QPU_SET_FIELD((instr->branch.offset & 1490 ~0xff000000) >> 3, 1491 V3D_QPU_BRANCH_ADDR_LOW); 1492 1493 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, 1494 V3D_QPU_BRANCH_ADDR_HIGH); 1495 break; 1496 default: 1497 break; 1498 } 1499 1500 if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE || 1501 instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) { 1502 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, 1503 V3D_QPU_RADDR_A); 1504 } 1505 1506 return true; 1507} 1508 1509bool 1510v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 1511 const struct v3d_qpu_instr *instr, 1512 uint64_t *packed_instr) 1513{ 1514 *packed_instr = 0; 1515 1516 switch (instr->type) { 1517 case V3D_QPU_INSTR_TYPE_ALU: 1518 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); 1519 case V3D_QPU_INSTR_TYPE_BRANCH: 1520 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); 1521 default: 1522 return false; 1523 } 1524} 1525