1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <stdlib.h> 25#include <string.h> 26#include "util/macros.h" 27#include "broadcom/common/v3d_device_info.h" 28#include "qpu_instr.h" 29 30const char * 31v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo, 32 enum v3d_qpu_waddr waddr) 33{ 34 /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */ 35 if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU) 36 return "tmu"; 37 38 static const char *waddr_magic[] = { 39 [V3D_QPU_WADDR_R0] = "r0", 40 [V3D_QPU_WADDR_R1] = "r1", 41 [V3D_QPU_WADDR_R2] = "r2", 42 [V3D_QPU_WADDR_R3] = "r3", 43 [V3D_QPU_WADDR_R4] = "r4", 44 [V3D_QPU_WADDR_R5] = "r5", 45 [V3D_QPU_WADDR_NOP] = "-", 46 [V3D_QPU_WADDR_TLB] = "tlb", 47 [V3D_QPU_WADDR_TLBU] = "tlbu", 48 [V3D_QPU_WADDR_UNIFA] = "unifa", 49 [V3D_QPU_WADDR_TMUL] = "tmul", 50 [V3D_QPU_WADDR_TMUD] = "tmud", 51 [V3D_QPU_WADDR_TMUA] = "tmua", 52 [V3D_QPU_WADDR_TMUAU] = "tmuau", 53 [V3D_QPU_WADDR_VPM] = "vpm", 54 [V3D_QPU_WADDR_VPMU] = "vpmu", 55 [V3D_QPU_WADDR_SYNC] = "sync", 56 [V3D_QPU_WADDR_SYNCU] = "syncu", 57 [V3D_QPU_WADDR_SYNCB] = "syncb", 58 [V3D_QPU_WADDR_RECIP] = "recip", 59 [V3D_QPU_WADDR_RSQRT] = "rsqrt", 60 [V3D_QPU_WADDR_EXP] = "exp", 61 [V3D_QPU_WADDR_LOG] = "log", 62 [V3D_QPU_WADDR_SIN] = "sin", 63 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2", 64 [V3D_QPU_WADDR_TMUC] = "tmuc", 65 [V3D_QPU_WADDR_TMUS] = "tmus", 66 [V3D_QPU_WADDR_TMUT] = "tmut", 67 [V3D_QPU_WADDR_TMUR] = "tmur", 68 [V3D_QPU_WADDR_TMUI] = "tmui", 69 [V3D_QPU_WADDR_TMUB] = "tmub", 70 [V3D_QPU_WADDR_TMUDREF] = "tmudref", 71 [V3D_QPU_WADDR_TMUOFF] = "tmuoff", 72 [V3D_QPU_WADDR_TMUSCM] = "tmuscm", 73 [V3D_QPU_WADDR_TMUSF] = "tmusf", 74 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod", 75 [V3D_QPU_WADDR_TMUHS] = "tmuhs", 76 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm", 77 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf", 78 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod", 79 [V3D_QPU_WADDR_R5REP] = "r5rep", 80 }; 81 82 return waddr_magic[waddr]; 83} 84 85const char * 86v3d_qpu_add_op_name(enum v3d_qpu_add_op op) 87{ 88 static const char *op_names[] = { 89 [V3D_QPU_A_FADD] = "fadd", 90 [V3D_QPU_A_FADDNF] = "faddnf", 91 [V3D_QPU_A_VFPACK] = "vfpack", 92 [V3D_QPU_A_ADD] = "add", 93 [V3D_QPU_A_SUB] = "sub", 94 [V3D_QPU_A_FSUB] = "fsub", 95 [V3D_QPU_A_MIN] = "min", 96 [V3D_QPU_A_MAX] = "max", 97 [V3D_QPU_A_UMIN] = "umin", 98 [V3D_QPU_A_UMAX] = "umax", 99 [V3D_QPU_A_SHL] = "shl", 100 [V3D_QPU_A_SHR] = "shr", 101 [V3D_QPU_A_ASR] = "asr", 102 [V3D_QPU_A_ROR] = "ror", 103 [V3D_QPU_A_FMIN] = "fmin", 104 [V3D_QPU_A_FMAX] = "fmax", 105 [V3D_QPU_A_VFMIN] = "vfmin", 106 [V3D_QPU_A_AND] = "and", 107 [V3D_QPU_A_OR] = "or", 108 [V3D_QPU_A_XOR] = "xor", 109 [V3D_QPU_A_VADD] = "vadd", 110 [V3D_QPU_A_VSUB] = "vsub", 111 [V3D_QPU_A_NOT] = "not", 112 [V3D_QPU_A_NEG] = "neg", 113 [V3D_QPU_A_FLAPUSH] = "flapush", 114 [V3D_QPU_A_FLBPUSH] = "flbpush", 115 [V3D_QPU_A_FLPOP] = "flpop", 116 [V3D_QPU_A_RECIP] = "recip", 117 [V3D_QPU_A_SETMSF] = "setmsf", 118 [V3D_QPU_A_SETREVF] = "setrevf", 119 [V3D_QPU_A_NOP] = "nop", 120 [V3D_QPU_A_TIDX] = "tidx", 121 [V3D_QPU_A_EIDX] = "eidx", 122 [V3D_QPU_A_LR] = "lr", 123 [V3D_QPU_A_VFLA] = "vfla", 124 [V3D_QPU_A_VFLNA] = "vflna", 125 [V3D_QPU_A_VFLB] = "vflb", 126 [V3D_QPU_A_VFLNB] = "vflnb", 127 [V3D_QPU_A_FXCD] = "fxcd", 128 [V3D_QPU_A_XCD] = "xcd", 129 [V3D_QPU_A_FYCD] = "fycd", 130 [V3D_QPU_A_YCD] = "ycd", 131 [V3D_QPU_A_MSF] = "msf", 132 [V3D_QPU_A_REVF] = "revf", 133 [V3D_QPU_A_VDWWT] = "vdwwt", 134 [V3D_QPU_A_IID] = "iid", 135 [V3D_QPU_A_SAMPID] = "sampid", 136 [V3D_QPU_A_BARRIERID] = "barrierid", 137 [V3D_QPU_A_TMUWT] = "tmuwt", 138 [V3D_QPU_A_VPMSETUP] = "vpmsetup", 139 [V3D_QPU_A_VPMWT] = "vpmwt", 140 [V3D_QPU_A_FLAFIRST] = "flafirst", 141 [V3D_QPU_A_FLNAFIRST] = "flnafirst", 142 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in", 143 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out", 144 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in", 145 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out", 146 [V3D_QPU_A_LDVPMP] = "ldvpmp", 147 [V3D_QPU_A_RSQRT] = "rsqrt", 148 [V3D_QPU_A_EXP] = "exp", 149 [V3D_QPU_A_LOG] = "log", 150 [V3D_QPU_A_SIN] = "sin", 151 [V3D_QPU_A_RSQRT2] = "rsqrt2", 152 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in", 153 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out", 154 [V3D_QPU_A_FCMP] = "fcmp", 155 [V3D_QPU_A_VFMAX] = "vfmax", 156 [V3D_QPU_A_FROUND] = "fround", 157 [V3D_QPU_A_FTOIN] = "ftoin", 158 [V3D_QPU_A_FTRUNC] = "ftrunc", 159 [V3D_QPU_A_FTOIZ] = "ftoiz", 160 [V3D_QPU_A_FFLOOR] = "ffloor", 161 [V3D_QPU_A_FTOUZ] = "ftouz", 162 [V3D_QPU_A_FCEIL] = "fceil", 163 [V3D_QPU_A_FTOC] = "ftoc", 164 [V3D_QPU_A_FDX] = "fdx", 165 [V3D_QPU_A_FDY] = "fdy", 166 [V3D_QPU_A_STVPMV] = "stvpmv", 167 [V3D_QPU_A_STVPMD] = "stvpmd", 168 [V3D_QPU_A_STVPMP] = "stvpmp", 169 [V3D_QPU_A_ITOF] = "itof", 170 [V3D_QPU_A_CLZ] = "clz", 171 [V3D_QPU_A_UTOF] = "utof", 172 }; 173 174 if (op >= ARRAY_SIZE(op_names)) 175 return NULL; 176 177 return op_names[op]; 178} 179 180const char * 181v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op) 182{ 183 static const char *op_names[] = { 184 [V3D_QPU_M_ADD] = "add", 185 [V3D_QPU_M_SUB] = "sub", 186 [V3D_QPU_M_UMUL24] = "umul24", 187 [V3D_QPU_M_VFMUL] = "vfmul", 188 [V3D_QPU_M_SMUL24] = "smul24", 189 [V3D_QPU_M_MULTOP] = "multop", 190 [V3D_QPU_M_FMOV] = "fmov", 191 [V3D_QPU_M_MOV] = "mov", 192 [V3D_QPU_M_NOP] = "nop", 193 [V3D_QPU_M_FMUL] = "fmul", 194 }; 195 196 if (op >= ARRAY_SIZE(op_names)) 197 return NULL; 198 199 return op_names[op]; 200} 201 202const char * 203v3d_qpu_cond_name(enum v3d_qpu_cond cond) 204{ 205 switch (cond) { 206 case V3D_QPU_COND_NONE: 207 return ""; 208 case V3D_QPU_COND_IFA: 209 return ".ifa"; 210 case V3D_QPU_COND_IFB: 211 return ".ifb"; 212 case V3D_QPU_COND_IFNA: 213 return ".ifna"; 214 case V3D_QPU_COND_IFNB: 215 return ".ifnb"; 216 default: 217 unreachable("bad cond value"); 218 } 219} 220 221const char * 222v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond) 223{ 224 switch (cond) { 225 case V3D_QPU_BRANCH_COND_ALWAYS: 226 return ""; 227 case V3D_QPU_BRANCH_COND_A0: 228 return ".a0"; 229 case V3D_QPU_BRANCH_COND_NA0: 230 return ".na0"; 231 case V3D_QPU_BRANCH_COND_ALLA: 232 return ".alla"; 233 case V3D_QPU_BRANCH_COND_ANYNA: 234 return ".anyna"; 235 case V3D_QPU_BRANCH_COND_ANYA: 236 return ".anya"; 237 case V3D_QPU_BRANCH_COND_ALLNA: 238 return ".allna"; 239 default: 240 unreachable("bad branch cond value"); 241 } 242} 243 244const char * 245v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign) 246{ 247 switch (msfign) { 248 case V3D_QPU_MSFIGN_NONE: 249 return ""; 250 case V3D_QPU_MSFIGN_P: 251 return "p"; 252 case V3D_QPU_MSFIGN_Q: 253 return "q"; 254 default: 255 unreachable("bad branch cond value"); 256 } 257} 258 259const char * 260v3d_qpu_pf_name(enum v3d_qpu_pf pf) 261{ 262 switch (pf) { 263 case V3D_QPU_PF_NONE: 264 return ""; 265 case V3D_QPU_PF_PUSHZ: 266 return ".pushz"; 267 case V3D_QPU_PF_PUSHN: 268 return ".pushn"; 269 case V3D_QPU_PF_PUSHC: 270 return ".pushc"; 271 default: 272 unreachable("bad pf value"); 273 } 274} 275 276const char * 277v3d_qpu_uf_name(enum v3d_qpu_uf uf) 278{ 279 switch (uf) { 280 case V3D_QPU_UF_NONE: 281 return ""; 282 case V3D_QPU_UF_ANDZ: 283 return ".andz"; 284 case V3D_QPU_UF_ANDNZ: 285 return ".andnz"; 286 case V3D_QPU_UF_NORZ: 287 return ".norz"; 288 case V3D_QPU_UF_NORNZ: 289 return ".nornz"; 290 case V3D_QPU_UF_ANDN: 291 return ".andn"; 292 case V3D_QPU_UF_ANDNN: 293 return ".andnn"; 294 case V3D_QPU_UF_NORN: 295 return ".norn"; 296 case V3D_QPU_UF_NORNN: 297 return ".nornn"; 298 case V3D_QPU_UF_ANDC: 299 return ".andc"; 300 case V3D_QPU_UF_ANDNC: 301 return ".andnc"; 302 case V3D_QPU_UF_NORC: 303 return ".norc"; 304 case V3D_QPU_UF_NORNC: 305 return ".nornc"; 306 default: 307 unreachable("bad pf value"); 308 } 309} 310 311const char * 312v3d_qpu_pack_name(enum v3d_qpu_output_pack pack) 313{ 314 switch (pack) { 315 case V3D_QPU_PACK_NONE: 316 return ""; 317 case V3D_QPU_PACK_L: 318 return ".l"; 319 case V3D_QPU_PACK_H: 320 return ".h"; 321 default: 322 unreachable("bad pack value"); 323 } 324} 325 326const char * 327v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack) 328{ 329 switch (unpack) { 330 case V3D_QPU_UNPACK_NONE: 331 return ""; 332 case V3D_QPU_UNPACK_L: 333 return ".l"; 334 case V3D_QPU_UNPACK_H: 335 return ".h"; 336 case V3D_QPU_UNPACK_ABS: 337 return ".abs"; 338 case V3D_QPU_UNPACK_REPLICATE_32F_16: 339 return ".ff"; 340 case V3D_QPU_UNPACK_REPLICATE_L_16: 341 return ".ll"; 342 case V3D_QPU_UNPACK_REPLICATE_H_16: 343 return ".hh"; 344 case V3D_QPU_UNPACK_SWAP_16: 345 return ".swp"; 346 default: 347 unreachable("bad unpack value"); 348 } 349} 350 351#define D 1 352#define A 2 353#define B 4 354static const uint8_t add_op_args[] = { 355 [V3D_QPU_A_FADD] = D | A | B, 356 [V3D_QPU_A_FADDNF] = D | A | B, 357 [V3D_QPU_A_VFPACK] = D | A | B, 358 [V3D_QPU_A_ADD] = D | A | B, 359 [V3D_QPU_A_VFPACK] = D | A | B, 360 [V3D_QPU_A_SUB] = D | A | B, 361 [V3D_QPU_A_VFPACK] = D | A | B, 362 [V3D_QPU_A_FSUB] = D | A | B, 363 [V3D_QPU_A_MIN] = D | A | B, 364 [V3D_QPU_A_MAX] = D | A | B, 365 [V3D_QPU_A_UMIN] = D | A | B, 366 [V3D_QPU_A_UMAX] = D | A | B, 367 [V3D_QPU_A_SHL] = D | A | B, 368 [V3D_QPU_A_SHR] = D | A | B, 369 [V3D_QPU_A_ASR] = D | A | B, 370 [V3D_QPU_A_ROR] = D | A | B, 371 [V3D_QPU_A_FMIN] = D | A | B, 372 [V3D_QPU_A_FMAX] = D | A | B, 373 [V3D_QPU_A_VFMIN] = D | A | B, 374 375 [V3D_QPU_A_AND] = D | A | B, 376 [V3D_QPU_A_OR] = D | A | B, 377 [V3D_QPU_A_XOR] = D | A | B, 378 379 [V3D_QPU_A_VADD] = D | A | B, 380 [V3D_QPU_A_VSUB] = D | A | B, 381 [V3D_QPU_A_NOT] = D | A, 382 [V3D_QPU_A_NEG] = D | A, 383 [V3D_QPU_A_FLAPUSH] = D | A, 384 [V3D_QPU_A_FLBPUSH] = D | A, 385 [V3D_QPU_A_FLPOP] = D | A, 386 [V3D_QPU_A_RECIP] = D | A, 387 [V3D_QPU_A_SETMSF] = D | A, 388 [V3D_QPU_A_SETREVF] = D | A, 389 [V3D_QPU_A_NOP] = 0, 390 [V3D_QPU_A_TIDX] = D, 391 [V3D_QPU_A_EIDX] = D, 392 [V3D_QPU_A_LR] = D, 393 [V3D_QPU_A_VFLA] = D, 394 [V3D_QPU_A_VFLNA] = D, 395 [V3D_QPU_A_VFLB] = D, 396 [V3D_QPU_A_VFLNB] = D, 397 398 [V3D_QPU_A_FXCD] = D, 399 [V3D_QPU_A_XCD] = D, 400 [V3D_QPU_A_FYCD] = D, 401 [V3D_QPU_A_YCD] = D, 402 403 [V3D_QPU_A_MSF] = D, 404 [V3D_QPU_A_REVF] = D, 405 [V3D_QPU_A_VDWWT] = D, 406 [V3D_QPU_A_IID] = D, 407 [V3D_QPU_A_SAMPID] = D, 408 [V3D_QPU_A_BARRIERID] = D, 409 [V3D_QPU_A_TMUWT] = D, 410 [V3D_QPU_A_VPMWT] = D, 411 [V3D_QPU_A_FLAFIRST] = D, 412 [V3D_QPU_A_FLNAFIRST] = D, 413 414 [V3D_QPU_A_VPMSETUP] = D | A, 415 416 [V3D_QPU_A_LDVPMV_IN] = D | A, 417 [V3D_QPU_A_LDVPMV_OUT] = D | A, 418 [V3D_QPU_A_LDVPMD_IN] = D | A, 419 [V3D_QPU_A_LDVPMD_OUT] = D | A, 420 [V3D_QPU_A_LDVPMP] = D | A, 421 [V3D_QPU_A_RSQRT] = D | A, 422 [V3D_QPU_A_EXP] = D | A, 423 [V3D_QPU_A_LOG] = D | A, 424 [V3D_QPU_A_SIN] = D | A, 425 [V3D_QPU_A_RSQRT2] = D | A, 426 [V3D_QPU_A_LDVPMG_IN] = D | A | B, 427 [V3D_QPU_A_LDVPMG_OUT] = D | A | B, 428 429 /* FIXME: MOVABSNEG */ 430 431 [V3D_QPU_A_FCMP] = D | A | B, 432 [V3D_QPU_A_VFMAX] = D | A | B, 433 434 [V3D_QPU_A_FROUND] = D | A, 435 [V3D_QPU_A_FTOIN] = D | A, 436 [V3D_QPU_A_FTRUNC] = D | A, 437 [V3D_QPU_A_FTOIZ] = D | A, 438 [V3D_QPU_A_FFLOOR] = D | A, 439 [V3D_QPU_A_FTOUZ] = D | A, 440 [V3D_QPU_A_FCEIL] = D | A, 441 [V3D_QPU_A_FTOC] = D | A, 442 443 [V3D_QPU_A_FDX] = D | A, 444 [V3D_QPU_A_FDY] = D | A, 445 446 [V3D_QPU_A_STVPMV] = A | B, 447 [V3D_QPU_A_STVPMD] = A | B, 448 [V3D_QPU_A_STVPMP] = A | B, 449 450 [V3D_QPU_A_ITOF] = D | A, 451 [V3D_QPU_A_CLZ] = D | A, 452 [V3D_QPU_A_UTOF] = D | A, 453}; 454 455static const uint8_t mul_op_args[] = { 456 [V3D_QPU_M_ADD] = D | A | B, 457 [V3D_QPU_M_SUB] = D | A | B, 458 [V3D_QPU_M_UMUL24] = D | A | B, 459 [V3D_QPU_M_VFMUL] = D | A | B, 460 [V3D_QPU_M_SMUL24] = D | A | B, 461 [V3D_QPU_M_MULTOP] = D | A | B, 462 [V3D_QPU_M_FMOV] = D | A, 463 [V3D_QPU_M_NOP] = 0, 464 [V3D_QPU_M_MOV] = D | A, 465 [V3D_QPU_M_FMUL] = D | A | B, 466}; 467 468bool 469v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op) 470{ 471 assert(op < ARRAY_SIZE(add_op_args)); 472 473 return add_op_args[op] & D; 474} 475 476bool 477v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op) 478{ 479 assert(op < ARRAY_SIZE(mul_op_args)); 480 481 return mul_op_args[op] & D; 482} 483 484int 485v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op) 486{ 487 assert(op < ARRAY_SIZE(add_op_args)); 488 489 uint8_t args = add_op_args[op]; 490 if (args & B) 491 return 2; 492 else if (args & A) 493 return 1; 494 else 495 return 0; 496} 497 498int 499v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op) 500{ 501 assert(op < ARRAY_SIZE(mul_op_args)); 502 503 uint8_t args = mul_op_args[op]; 504 if (args & B) 505 return 2; 506 else if (args & A) 507 return 1; 508 else 509 return 0; 510} 511 512enum v3d_qpu_cond 513v3d_qpu_cond_invert(enum v3d_qpu_cond cond) 514{ 515 switch (cond) { 516 case V3D_QPU_COND_IFA: 517 return V3D_QPU_COND_IFNA; 518 case V3D_QPU_COND_IFNA: 519 return V3D_QPU_COND_IFA; 520 case V3D_QPU_COND_IFB: 521 return V3D_QPU_COND_IFNB; 522 case V3D_QPU_COND_IFNB: 523 return V3D_QPU_COND_IFB; 524 default: 525 unreachable("Non-invertible cond"); 526 } 527} 528 529bool 530v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) 531{ 532 switch (waddr) { 533 case V3D_QPU_WADDR_RECIP: 534 case V3D_QPU_WADDR_RSQRT: 535 case V3D_QPU_WADDR_EXP: 536 case V3D_QPU_WADDR_LOG: 537 case V3D_QPU_WADDR_SIN: 538 case V3D_QPU_WADDR_RSQRT2: 539 return true; 540 default: 541 return false; 542 } 543} 544 545bool 546v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo, 547 enum v3d_qpu_waddr waddr) 548{ 549 if (devinfo->ver >= 40) { 550 return ((waddr >= V3D_QPU_WADDR_TMUD && 551 waddr <= V3D_QPU_WADDR_TMUAU) || 552 (waddr >= V3D_QPU_WADDR_TMUC && 553 waddr <= V3D_QPU_WADDR_TMUHSLOD)); 554 } else { 555 return ((waddr >= V3D_QPU_WADDR_TMU && 556 waddr <= V3D_QPU_WADDR_TMUAU) || 557 (waddr >= V3D_QPU_WADDR_TMUC && 558 waddr <= V3D_QPU_WADDR_TMUHSLOD)); 559 } 560} 561 562bool 563v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) 564{ 565 return (inst->sig.ldtmu || 566 (inst->type == V3D_QPU_INSTR_TYPE_ALU && 567 inst->alu.add.op == V3D_QPU_A_TMUWT)); 568} 569 570bool 571v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) 572{ 573 return (waddr == V3D_QPU_WADDR_TLB || 574 waddr == V3D_QPU_WADDR_TLBU); 575} 576 577bool 578v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) 579{ 580 return (waddr == V3D_QPU_WADDR_VPM || 581 waddr == V3D_QPU_WADDR_VPMU); 582} 583 584bool 585v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) 586{ 587 return (waddr == V3D_QPU_WADDR_SYNC || 588 waddr == V3D_QPU_WADDR_SYNCB || 589 waddr == V3D_QPU_WADDR_SYNCU); 590} 591 592bool 593v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) 594{ 595 switch (waddr) { 596 case V3D_QPU_WADDR_VPMU: 597 case V3D_QPU_WADDR_TLBU: 598 case V3D_QPU_WADDR_TMUAU: 599 case V3D_QPU_WADDR_SYNCU: 600 return true; 601 default: 602 return false; 603 } 604} 605 606static bool 607v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op) 608{ 609 switch (op) { 610 case V3D_QPU_A_VPMSETUP: 611 case V3D_QPU_A_LDVPMV_IN: 612 case V3D_QPU_A_LDVPMV_OUT: 613 case V3D_QPU_A_LDVPMD_IN: 614 case V3D_QPU_A_LDVPMD_OUT: 615 case V3D_QPU_A_LDVPMP: 616 case V3D_QPU_A_LDVPMG_IN: 617 case V3D_QPU_A_LDVPMG_OUT: 618 return true; 619 default: 620 return false; 621 } 622} 623 624static bool 625v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op) 626{ 627 switch (op) { 628 case V3D_QPU_A_VPMSETUP: 629 case V3D_QPU_A_STVPMV: 630 case V3D_QPU_A_STVPMD: 631 case V3D_QPU_A_STVPMP: 632 return true; 633 default: 634 return false; 635 } 636} 637 638bool 639v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) 640{ 641 if (inst->sig.ldtlb || 642 inst->sig.ldtlbu) 643 return true; 644 645 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 646 if (inst->alu.add.magic_write && 647 v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) { 648 return true; 649 } 650 651 if (inst->alu.mul.magic_write && 652 v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) { 653 return true; 654 } 655 } 656 657 return false; 658} 659 660bool 661v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) 662{ 663 if (v3d_qpu_instr_is_sfu(inst)) 664 return true; 665 666 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 667 if (inst->alu.add.magic_write && 668 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) { 669 return true; 670 } 671 672 if (inst->alu.mul.magic_write && 673 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) { 674 return true; 675 } 676 } 677 678 return false; 679} 680 681bool 682v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) 683{ 684 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 685 switch (inst->alu.add.op) { 686 case V3D_QPU_A_RECIP: 687 case V3D_QPU_A_RSQRT: 688 case V3D_QPU_A_EXP: 689 case V3D_QPU_A_LOG: 690 case V3D_QPU_A_SIN: 691 case V3D_QPU_A_RSQRT2: 692 return true; 693 default: 694 return false; 695 } 696 } 697 return false; 698} 699 700bool 701v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo, 702 const struct v3d_qpu_instr *inst) 703{ 704 return (inst->type == V3D_QPU_INSTR_TYPE_ALU && 705 ((inst->alu.add.magic_write && 706 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) || 707 (inst->alu.mul.magic_write && 708 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr)))); 709} 710 711bool 712v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo, 713 const struct v3d_qpu_instr *inst) 714{ 715 return v3d_qpu_writes_tmu(devinfo, inst) && 716 (!inst->alu.add.magic_write || 717 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) && 718 (!inst->alu.mul.magic_write || 719 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC); 720} 721 722bool 723v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) 724{ 725 if (inst->sig.ldvpm) 726 return true; 727 728 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 729 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op)) 730 return true; 731 } 732 733 return false; 734} 735 736bool 737v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) 738{ 739 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 740 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op)) 741 return true; 742 743 if (inst->alu.add.magic_write && 744 v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) { 745 return true; 746 } 747 748 if (inst->alu.mul.magic_write && 749 v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) { 750 return true; 751 } 752 } 753 754 return false; 755} 756 757bool 758v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo, 759 const struct v3d_qpu_instr *inst) 760{ 761 if (devinfo->ver < 40) 762 return false; 763 764 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 765 if (inst->alu.add.op != V3D_QPU_A_NOP && 766 inst->alu.add.magic_write && 767 inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) { 768 return true; 769 } 770 771 if (inst->alu.mul.op != V3D_QPU_M_NOP && 772 inst->alu.mul.magic_write && 773 inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) { 774 return true; 775 } 776 } 777 778 return false; 779} 780 781bool 782v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst) 783{ 784 return inst->type == V3D_QPU_INSTR_TYPE_ALU && 785 inst->alu.add.op == V3D_QPU_A_VPMWT; 786} 787 788bool 789v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) 790{ 791 return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst); 792} 793 794bool 795v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) 796{ 797 return v3d_qpu_reads_vpm(inst) || 798 v3d_qpu_writes_vpm(inst) || 799 v3d_qpu_waits_vpm(inst); 800} 801 802static bool 803qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo, 804 const struct v3d_qpu_instr *inst, 805 uint32_t waddr) 806{ 807 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 808 if (inst->alu.add.magic_write && inst->alu.add.waddr == waddr) 809 return true; 810 811 if (inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr) 812 return true; 813 } 814 815 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && 816 inst->sig_magic && inst->sig_addr == waddr) { 817 return true; 818 } 819 820 return false; 821} 822 823bool 824v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, 825 const struct v3d_qpu_instr *inst) 826{ 827 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3)) 828 return true; 829 830 return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm; 831} 832 833bool 834v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, 835 const struct v3d_qpu_instr *inst) 836{ 837 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 838 if (inst->alu.add.magic_write && 839 (inst->alu.add.waddr == V3D_QPU_WADDR_R4 || 840 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) { 841 return true; 842 } 843 844 if (inst->alu.mul.magic_write && 845 (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 || 846 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) { 847 return true; 848 } 849 } 850 851 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) { 852 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) 853 return true; 854 } else if (inst->sig.ldtmu) { 855 return true; 856 } 857 858 return false; 859} 860 861bool 862v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, 863 const struct v3d_qpu_instr *inst) 864{ 865 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5)) 866 return true; 867 868 return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa; 869} 870 871bool 872v3d_qpu_writes_accum(const struct v3d_device_info *devinfo, 873 const struct v3d_qpu_instr *inst) 874{ 875 if (v3d_qpu_writes_r5(devinfo, inst)) 876 return true; 877 if (v3d_qpu_writes_r4(devinfo, inst)) 878 return true; 879 if (v3d_qpu_writes_r3(devinfo, inst)) 880 return true; 881 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2)) 882 return true; 883 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1)) 884 return true; 885 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0)) 886 return true; 887 888 return false; 889} 890 891bool 892v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) 893{ 894 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); 895 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); 896 897 return ((add_nsrc > 0 && inst->alu.add.a == mux) || 898 (add_nsrc > 1 && inst->alu.add.b == mux) || 899 (mul_nsrc > 0 && inst->alu.mul.a == mux) || 900 (mul_nsrc > 1 && inst->alu.mul.b == mux)); 901} 902 903bool 904v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, 905 const struct v3d_qpu_sig *sig) 906{ 907 if (devinfo->ver < 41) 908 return false; 909 910 return (sig->ldunifrf || 911 sig->ldunifarf || 912 sig->ldvary || 913 sig->ldtmu || 914 sig->ldtlb || 915 sig->ldtlbu); 916} 917 918bool 919v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) 920{ 921 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { 922 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS; 923 } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { 924 if (inst->flags.ac != V3D_QPU_COND_NONE || 925 inst->flags.mc != V3D_QPU_COND_NONE || 926 inst->flags.auf != V3D_QPU_UF_NONE || 927 inst->flags.muf != V3D_QPU_UF_NONE) 928 return true; 929 930 switch (inst->alu.add.op) { 931 case V3D_QPU_A_VFLA: 932 case V3D_QPU_A_VFLNA: 933 case V3D_QPU_A_VFLB: 934 case V3D_QPU_A_VFLNB: 935 case V3D_QPU_A_FLAPUSH: 936 case V3D_QPU_A_FLBPUSH: 937 case V3D_QPU_A_FLAFIRST: 938 case V3D_QPU_A_FLNAFIRST: 939 return true; 940 default: 941 break; 942 } 943 } 944 945 return false; 946} 947 948bool 949v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) 950{ 951 if (inst->flags.apf != V3D_QPU_PF_NONE || 952 inst->flags.mpf != V3D_QPU_PF_NONE || 953 inst->flags.auf != V3D_QPU_UF_NONE || 954 inst->flags.muf != V3D_QPU_UF_NONE) { 955 return true; 956 } 957 958 return false; 959} 960 961bool 962v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) 963{ 964 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 965 return false; 966 967 switch (inst->alu.add.op) { 968 case V3D_QPU_A_FADD: 969 case V3D_QPU_A_FADDNF: 970 case V3D_QPU_A_FSUB: 971 case V3D_QPU_A_FMIN: 972 case V3D_QPU_A_FMAX: 973 case V3D_QPU_A_FCMP: 974 case V3D_QPU_A_FROUND: 975 case V3D_QPU_A_FTRUNC: 976 case V3D_QPU_A_FFLOOR: 977 case V3D_QPU_A_FCEIL: 978 case V3D_QPU_A_FDX: 979 case V3D_QPU_A_FDY: 980 case V3D_QPU_A_FTOIN: 981 case V3D_QPU_A_FTOIZ: 982 case V3D_QPU_A_FTOUZ: 983 case V3D_QPU_A_FTOC: 984 case V3D_QPU_A_VFPACK: 985 return true; 986 break; 987 default: 988 break; 989 } 990 991 switch (inst->alu.mul.op) { 992 case V3D_QPU_M_FMOV: 993 case V3D_QPU_M_FMUL: 994 return true; 995 break; 996 default: 997 break; 998 } 999 1000 return false; 1001} 1002bool 1003v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) 1004{ 1005 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 1006 return false; 1007 1008 switch (inst->alu.add.op) { 1009 case V3D_QPU_A_VFMIN: 1010 case V3D_QPU_A_VFMAX: 1011 return true; 1012 break; 1013 default: 1014 break; 1015 } 1016 1017 switch (inst->alu.mul.op) { 1018 case V3D_QPU_M_VFMUL: 1019 return true; 1020 break; 1021 default: 1022 break; 1023 } 1024 1025 return false; 1026} 1027 1028bool 1029v3d_qpu_is_nop(struct v3d_qpu_instr *inst) 1030{ 1031 static const struct v3d_qpu_sig nosig = { 0 }; 1032 1033 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 1034 return false; 1035 if (inst->alu.add.op != V3D_QPU_A_NOP) 1036 return false; 1037 if (inst->alu.mul.op != V3D_QPU_M_NOP) 1038 return false; 1039 if (memcmp(&inst->sig, &nosig, sizeof(nosig))) 1040 return false; 1041 return true; 1042} 1043