1/* 2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <ctype.h> 26#include <err.h> 27#include <errno.h> 28#include <fcntl.h> 29#include <inttypes.h> 30#include <signal.h> 31#include <stdarg.h> 32#include <stdbool.h> 33#include <stdint.h> 34#include <stdio.h> 35#include <stdlib.h> 36#include <string.h> 37#include <unistd.h> 38#include <sys/stat.h> 39#include <sys/types.h> 40#include <sys/wait.h> 41 42#include "freedreno_pm4.h" 43 44#include "buffers.h" 45#include "cffdec.h" 46#include "disasm.h" 47#include "redump.h" 48#include "rnnutil.h" 49#include "script.h" 50 51/* ************************************************************************* */ 52/* originally based on kernel recovery dump code: */ 53 54static const struct cffdec_options *options; 55 56static bool needs_wfi = false; 57static bool summary = false; 58static bool in_summary = false; 59static int vertices; 60 61static inline unsigned 62regcnt(void) 63{ 64 if (options->gpu_id >= 500) 65 return 0xffff; 66 else 67 return 0x7fff; 68} 69 70static int 71is_64b(void) 72{ 73 return options->gpu_id >= 500; 74} 75 76static int draws[4]; 77static struct { 78 uint64_t base; 79 uint32_t size; /* in dwords */ 80 /* Generally cmdstream consists of multiple IB calls to different 81 * buffers, which are themselves often re-used for each tile. The 82 * triggered flag serves two purposes to help make it more clear 83 * what part of the cmdstream is before vs after the the GPU hang: 84 * 85 * 1) if in IB2 we are passed the point within the IB2 buffer where 86 * the GPU hung, but IB1 is not passed the point within its 87 * buffer where the GPU had hung, then we know the GPU hang 88 * happens on a future use of that IB2 buffer. 89 * 90 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU 91 * hung, but we've already passed the trigger point at the same 92 * IB level, we know that we are passed the point where the GPU 93 * had hung. 94 * 95 * So this is a one way switch, false->true. And a higher #'d 96 * IB level isn't considered triggered unless the lower #'d IB 97 * level is. 98 */ 99 bool triggered; 100} ibs[4]; 101static int ib; 102 103static int draw_count; 104static int current_draw_count; 105 106/* query mode.. to handle symbolic register name queries, we need to 107 * defer parsing query string until after gpu_id is know and rnn db 108 * loaded: 109 */ 110static int *queryvals; 111 112static bool 113quiet(int lvl) 114{ 115 if ((options->draw_filter != -1) && 116 (options->draw_filter != current_draw_count)) 117 return true; 118 if ((lvl >= 3) && (summary || options->querystrs || options->script)) 119 return true; 120 if ((lvl >= 2) && (options->querystrs || options->script)) 121 return true; 122 return false; 123} 124 125void 126printl(int lvl, const char *fmt, ...) 127{ 128 va_list args; 129 if (quiet(lvl)) 130 return; 131 va_start(args, fmt); 132 vprintf(fmt, args); 133 va_end(args); 134} 135 136static const char *levels[] = { 137 "\t", 138 "\t\t", 139 "\t\t\t", 140 "\t\t\t\t", 141 "\t\t\t\t\t", 142 "\t\t\t\t\t\t", 143 "\t\t\t\t\t\t\t", 144 "\t\t\t\t\t\t\t\t", 145 "\t\t\t\t\t\t\t\t\t", 146 "x", 147 "x", 148 "x", 149 "x", 150 "x", 151 "x", 152}; 153 154enum state_src_t { 155 STATE_SRC_DIRECT, 156 STATE_SRC_INDIRECT, 157 STATE_SRC_BINDLESS, 158}; 159 160/* SDS (CP_SET_DRAW_STATE) helpers: */ 161static void load_all_groups(int level); 162static void disable_all_groups(void); 163 164static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, 165 int level); 166static void dump_tex_const(uint32_t *texsamp, int num_unit, int level); 167 168static bool 169highlight_gpuaddr(uint64_t gpuaddr) 170{ 171 if (!options->ibs[ib].base) 172 return false; 173 174 if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered) 175 return false; 176 177 if (ibs[ib].triggered) 178 return options->color; 179 180 if (options->ibs[ib].base != ibs[ib].base) 181 return false; 182 183 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem); 184 uint64_t end = ibs[ib].base + 4 * ibs[ib].size; 185 186 bool triggered = (start <= gpuaddr) && (gpuaddr <= end); 187 188 ibs[ib].triggered |= triggered; 189 190 if (triggered) 191 printf("ESTIMATED CRASH LOCATION!\n"); 192 193 return triggered & options->color; 194} 195 196static void 197dump_hex(uint32_t *dwords, uint32_t sizedwords, int level) 198{ 199 int i, j; 200 int lastzero = 1; 201 202 if (quiet(2)) 203 return; 204 205 for (i = 0; i < sizedwords; i += 8) { 206 int zero = 1; 207 208 /* always show first row: */ 209 if (i == 0) 210 zero = 0; 211 212 for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++) 213 if (dwords[i + j]) 214 zero = 0; 215 216 if (zero && !lastzero) 217 printf("*\n"); 218 219 lastzero = zero; 220 221 if (zero) 222 continue; 223 224 uint64_t addr = gpuaddr(&dwords[i]); 225 bool highlight = highlight_gpuaddr(addr); 226 227 if (highlight) 228 printf("\x1b[0;1;31m"); 229 230 if (is_64b()) { 231 printf("%016" PRIx64 ":%s", addr, levels[level]); 232 } else { 233 printf("%08x:%s", (uint32_t)addr, levels[level]); 234 } 235 236 if (highlight) 237 printf("\x1b[0m"); 238 239 printf("%04x:", i * 4); 240 241 for (j = 0; (j < 8) && (i + j < sizedwords); j++) { 242 printf(" %08x", dwords[i + j]); 243 } 244 245 printf("\n"); 246 } 247} 248 249static void 250dump_float(float *dwords, uint32_t sizedwords, int level) 251{ 252 int i; 253 for (i = 0; i < sizedwords; i++) { 254 if ((i % 8) == 0) { 255 if (is_64b()) { 256 printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]); 257 } else { 258 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]); 259 } 260 } else { 261 printf(" "); 262 } 263 printf("%8f", *(dwords++)); 264 if ((i % 8) == 7) 265 printf("\n"); 266 } 267 if (i % 8) 268 printf("\n"); 269} 270 271/* I believe the surface format is low bits: 272#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL 273comments in sys2gmem_tex_const indicate that address is [31:12], but 274looks like at least some of the bits above the format have different meaning.. 275*/ 276static void 277parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags, 278 uint32_t mask) 279{ 280 assert(!is_64b()); /* this is only used on a2xx */ 281 *gpuaddr = dword & ~mask; 282 *flags = dword & mask; 283} 284 285static uint32_t type0_reg_vals[0xffff + 1]; 286static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) / 287 8]; /* written since last draw */ 288static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8]; 289static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)]; 290 291static bool 292reg_rewritten(uint32_t regbase) 293{ 294 return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8))); 295} 296 297bool 298reg_written(uint32_t regbase) 299{ 300 return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8))); 301} 302 303static void 304clear_rewritten(void) 305{ 306 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten)); 307} 308 309static void 310clear_written(void) 311{ 312 memset(type0_reg_written, 0, sizeof(type0_reg_written)); 313 clear_rewritten(); 314} 315 316uint32_t 317reg_lastval(uint32_t regbase) 318{ 319 return lastvals[regbase]; 320} 321 322static void 323clear_lastvals(void) 324{ 325 memset(lastvals, 0, sizeof(lastvals)); 326} 327 328uint32_t 329reg_val(uint32_t regbase) 330{ 331 return type0_reg_vals[regbase]; 332} 333 334void 335reg_set(uint32_t regbase, uint32_t val) 336{ 337 assert(regbase < regcnt()); 338 type0_reg_vals[regbase] = val; 339 type0_reg_written[regbase / 8] |= (1 << (regbase % 8)); 340 type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8)); 341} 342 343static void 344reg_dump_scratch(const char *name, uint32_t dword, int level) 345{ 346 unsigned r; 347 348 if (quiet(3)) 349 return; 350 351 r = regbase("CP_SCRATCH[0].REG"); 352 353 // if not, try old a2xx/a3xx version: 354 if (!r) 355 r = regbase("CP_SCRATCH_REG0"); 356 357 if (!r) 358 return; 359 360 printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5), 361 reg_val(r + 6), reg_val(r + 7)); 362} 363 364static void 365dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl) 366{ 367 void *buf; 368 369 if (quiet(quietlvl)) 370 return; 371 372 buf = hostptr(gpuaddr); 373 if (buf) { 374 dump_hex(buf, sizedwords, level + 1); 375 } 376} 377 378static void 379dump_gpuaddr(uint64_t gpuaddr, int level) 380{ 381 dump_gpuaddr_size(gpuaddr, level, 64, 3); 382} 383 384static void 385reg_dump_gpuaddr(const char *name, uint32_t dword, int level) 386{ 387 dump_gpuaddr(dword, level); 388} 389 390uint32_t gpuaddr_lo; 391static void 392reg_gpuaddr_lo(const char *name, uint32_t dword, int level) 393{ 394 gpuaddr_lo = dword; 395} 396 397static void 398reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level) 399{ 400 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level); 401} 402 403static void 404reg_dump_gpuaddr64(const char *name, uint64_t qword, int level) 405{ 406 dump_gpuaddr(qword, level); 407} 408 409static void 410dump_shader(const char *ext, void *buf, int bufsz) 411{ 412 if (options->dump_shaders) { 413 static int n = 0; 414 char filename[16]; 415 int fd; 416 sprintf(filename, "%04d.%s", n++, ext); 417 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644); 418 if (fd != -1) { 419 write(fd, buf, bufsz); 420 close(fd); 421 } 422 } 423} 424 425static void 426disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level) 427{ 428 void *buf; 429 430 gpuaddr &= 0xfffffffffffffff0; 431 432 if (quiet(3)) 433 return; 434 435 buf = hostptr(gpuaddr); 436 if (buf) { 437 uint32_t sizedwords = hostlen(gpuaddr) / 4; 438 const char *ext; 439 440 dump_hex(buf, min(64, sizedwords), level + 1); 441 try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id); 442 443 /* this is a bit ugly way, but oh well.. */ 444 if (strstr(name, "SP_VS_OBJ")) { 445 ext = "vo3"; 446 } else if (strstr(name, "SP_FS_OBJ")) { 447 ext = "fo3"; 448 } else if (strstr(name, "SP_GS_OBJ")) { 449 ext = "go3"; 450 } else if (strstr(name, "SP_CS_OBJ")) { 451 ext = "co3"; 452 } else { 453 ext = NULL; 454 } 455 456 if (ext) 457 dump_shader(ext, buf, sizedwords * 4); 458 } 459} 460 461static void 462reg_disasm_gpuaddr(const char *name, uint32_t dword, int level) 463{ 464 disasm_gpuaddr(name, dword, level); 465} 466 467static void 468reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level) 469{ 470 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level); 471} 472 473static void 474reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level) 475{ 476 disasm_gpuaddr(name, qword, level); 477} 478 479/* Find the value of the TEX_COUNT register that corresponds to the named 480 * TEX_SAMP/TEX_CONST reg. 481 * 482 * Note, this kinda assumes an equal # of samplers and textures, but not 483 * really sure if there is a much better option. I suppose on a6xx we 484 * could instead decode the bitfields in SP_xS_CONFIG 485 */ 486static int 487get_tex_count(const char *name) 488{ 489 char count_reg[strlen(name) + 5]; 490 char *p; 491 492 p = strstr(name, "CONST"); 493 if (!p) 494 p = strstr(name, "SAMP"); 495 if (!p) 496 return 0; 497 498 int n = p - name; 499 strncpy(count_reg, name, n); 500 strcpy(count_reg + n, "COUNT"); 501 502 return reg_val(regbase(count_reg)); 503} 504 505static void 506reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level) 507{ 508 if (!in_summary) 509 return; 510 511 int num_unit = get_tex_count(name); 512 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32); 513 void *buf = hostptr(gpuaddr); 514 515 if (!buf) 516 return; 517 518 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1); 519} 520 521static void 522reg_dump_tex_const_hi(const char *name, uint32_t dword, int level) 523{ 524 if (!in_summary) 525 return; 526 527 int num_unit = get_tex_count(name); 528 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32); 529 void *buf = hostptr(gpuaddr); 530 531 if (!buf) 532 return; 533 534 dump_tex_const(buf, num_unit, level + 1); 535} 536 537/* 538 * Registers with special handling (rnndec_decode() handles rest): 539 */ 540#define REG(x, fxn) { #x, fxn } 541#define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true } 542static struct { 543 const char *regname; 544 void (*fxn)(const char *name, uint32_t dword, int level); 545 void (*fxn64)(const char *name, uint64_t qword, int level); 546 uint32_t regbase; 547 bool is_reg64; 548} reg_a2xx[] = { 549 REG(CP_SCRATCH_REG0, reg_dump_scratch), 550 REG(CP_SCRATCH_REG1, reg_dump_scratch), 551 REG(CP_SCRATCH_REG2, reg_dump_scratch), 552 REG(CP_SCRATCH_REG3, reg_dump_scratch), 553 REG(CP_SCRATCH_REG4, reg_dump_scratch), 554 REG(CP_SCRATCH_REG5, reg_dump_scratch), 555 REG(CP_SCRATCH_REG6, reg_dump_scratch), 556 REG(CP_SCRATCH_REG7, reg_dump_scratch), 557 {NULL}, 558}, reg_a3xx[] = { 559 REG(CP_SCRATCH_REG0, reg_dump_scratch), 560 REG(CP_SCRATCH_REG1, reg_dump_scratch), 561 REG(CP_SCRATCH_REG2, reg_dump_scratch), 562 REG(CP_SCRATCH_REG3, reg_dump_scratch), 563 REG(CP_SCRATCH_REG4, reg_dump_scratch), 564 REG(CP_SCRATCH_REG5, reg_dump_scratch), 565 REG(CP_SCRATCH_REG6, reg_dump_scratch), 566 REG(CP_SCRATCH_REG7, reg_dump_scratch), 567 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr), 568 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr), 569 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr), 570 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr), 571 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr), 572 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), 573 {NULL}, 574}, reg_a4xx[] = { 575 REG(CP_SCRATCH[0].REG, reg_dump_scratch), 576 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch), 577 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch), 578 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch), 579 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), 580 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), 581 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), 582 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), 583 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr), 584 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr), 585 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr), 586 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr), 587 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr), 588 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr), 589 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr), 590 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr), 591 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr), 592 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr), 593 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr), 594 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr), 595 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), 596 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), 597 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), 598 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), 599 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr), 600 {NULL}, 601}, reg_a5xx[] = { 602 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), 603 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), 604 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), 605 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), 606 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo), 607 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi), 608 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo), 609 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi), 610 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo), 611 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi), 612 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo), 613 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi), 614 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo), 615 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi), 616 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo), 617 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi), 618 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo), 619 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi), 620 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo), 621 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi), 622 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo), 623 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi), 624 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo), 625 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi), 626 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo), 627 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi), 628 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo), 629 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi), 630 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo), 631 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi), 632 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo), 633 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi), 634 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo), 635 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi), 636 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo), 637 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi), 638 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo), 639 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi), 640 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo), 641 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi), 642 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo), 643 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi), 644// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo), 645// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi), 646// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo), 647// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi), 648// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo), 649// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi), 650// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo), 651// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi), 652// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo), 653// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi), 654// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo), 655// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi), 656// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo), 657// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi), 658// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo), 659// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi), 660// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo), 661// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi), 662// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo), 663// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi), 664// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo), 665// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi), 666// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo), 667// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi), 668// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo), 669// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi), 670 671// REG(RB_2D_SRC_LO, reg_gpuaddr_lo), 672// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi), 673// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo), 674// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi), 675// REG(RB_2D_DST_LO, reg_gpuaddr_lo), 676// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi), 677// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo), 678// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi), 679 680 {NULL}, 681}, reg_a6xx[] = { 682 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch), 683 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch), 684 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch), 685 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch), 686 687 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64), 688 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64), 689 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64), 690 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64), 691 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64), 692 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64), 693 694 REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64), 695 REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64), 696 REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64), 697 REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64), 698 REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64), 699 REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64), 700 REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64), 701 REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64), 702 REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64), 703 REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64), 704 REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64), 705 REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64), 706 707 {NULL}, 708}, *type0_reg; 709 710static struct rnn *rnn; 711 712static void 713init_rnn(const char *gpuname) 714{ 715 rnn = rnn_new(!options->color); 716 717 rnn_load(rnn, gpuname); 718 719 if (options->querystrs) { 720 int i; 721 queryvals = calloc(options->nquery, sizeof(queryvals[0])); 722 723 for (i = 0; i < options->nquery; i++) { 724 int val = strtol(options->querystrs[i], NULL, 0); 725 726 if (val == 0) 727 val = regbase(options->querystrs[i]); 728 729 queryvals[i] = val; 730 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]); 731 } 732 } 733 734 for (unsigned idx = 0; type0_reg[idx].regname; idx++) { 735 type0_reg[idx].regbase = regbase(type0_reg[idx].regname); 736 if (!type0_reg[idx].regbase) { 737 printf("invalid register name: %s\n", type0_reg[idx].regname); 738 exit(1); 739 } 740 } 741} 742 743void 744reset_regs(void) 745{ 746 clear_written(); 747 clear_lastvals(); 748 memset(&ibs, 0, sizeof(ibs)); 749} 750 751void 752cffdec_init(const struct cffdec_options *_options) 753{ 754 options = _options; 755 summary = options->summary; 756 757 /* in case we're decoding multiple files: */ 758 free(queryvals); 759 reset_regs(); 760 draw_count = 0; 761 762 /* TODO we need an API to free/cleanup any previous rnn */ 763 764 switch (options->gpu_id) { 765 case 200 ... 299: 766 type0_reg = reg_a2xx; 767 init_rnn("a2xx"); 768 break; 769 case 300 ... 399: 770 type0_reg = reg_a3xx; 771 init_rnn("a3xx"); 772 break; 773 case 400 ... 499: 774 type0_reg = reg_a4xx; 775 init_rnn("a4xx"); 776 break; 777 case 500 ... 599: 778 type0_reg = reg_a5xx; 779 init_rnn("a5xx"); 780 break; 781 case 600 ... 699: 782 type0_reg = reg_a6xx; 783 init_rnn("a6xx"); 784 break; 785 default: 786 errx(-1, "unsupported gpu"); 787 } 788} 789 790const char * 791pktname(unsigned opc) 792{ 793 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc); 794} 795 796const char * 797regname(uint32_t regbase, int color) 798{ 799 return rnn_regname(rnn, regbase, color); 800} 801 802uint32_t 803regbase(const char *name) 804{ 805 return rnn_regbase(rnn, name); 806} 807 808static int 809endswith(uint32_t regbase, const char *suffix) 810{ 811 const char *name = regname(regbase, 0); 812 const char *s = strstr(name, suffix); 813 if (!s) 814 return 0; 815 return (s - strlen(name) + strlen(suffix)) == name; 816} 817 818void 819dump_register_val(uint32_t regbase, uint32_t dword, int level) 820{ 821 struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase); 822 823 if (info && info->typeinfo) { 824 uint64_t gpuaddr = 0; 825 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword); 826 printf("%s%s: %s", levels[level], info->name, decoded); 827 828 /* Try and figure out if we are looking at a gpuaddr.. this 829 * might be useful for other gen's too, but at least a5xx has 830 * the _HI/_LO suffix we can look for. Maybe a better approach 831 * would be some special annotation in the xml.. 832 * for a6xx use "address" and "waddress" types 833 */ 834 if (options->gpu_id >= 600) { 835 if (!strcmp(info->typeinfo->name, "address") || 836 !strcmp(info->typeinfo->name, "waddress")) { 837 gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword; 838 } 839 } else if (options->gpu_id >= 500) { 840 if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) { 841 gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1); 842 } else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) { 843 gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword; 844 } 845 } 846 847 if (gpuaddr && hostptr(gpuaddr)) { 848 printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u", 849 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr), 850 hostlen(gpubaseaddr(gpuaddr))); 851 } 852 853 printf("\n"); 854 855 free(decoded); 856 } else if (info) { 857 printf("%s%s: %08x\n", levels[level], info->name, dword); 858 } else { 859 printf("%s<%04x>: %08x\n", levels[level], regbase, dword); 860 } 861 862 if (info) { 863 free(info->name); 864 free(info); 865 } 866} 867 868static void 869dump_register(uint32_t regbase, uint32_t dword, int level) 870{ 871 if (!quiet(3)) { 872 dump_register_val(regbase, dword, level); 873 } 874 875 for (unsigned idx = 0; type0_reg[idx].regname; idx++) { 876 if (type0_reg[idx].regbase == regbase) { 877 if (type0_reg[idx].is_reg64) { 878 uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword; 879 type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level); 880 } else { 881 type0_reg[idx].fxn(type0_reg[idx].regname, dword, level); 882 } 883 break; 884 } 885 } 886} 887 888static bool 889is_banked_reg(uint32_t regbase) 890{ 891 return (0x2000 <= regbase) && (regbase < 0x2400); 892} 893 894static void 895dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, 896 int level) 897{ 898 while (sizedwords--) { 899 int last_summary = summary; 900 901 /* access to non-banked registers needs a WFI: 902 * TODO banked register range for a2xx?? 903 */ 904 if (needs_wfi && !is_banked_reg(regbase)) 905 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase); 906 907 reg_set(regbase, *dwords); 908 dump_register(regbase, *dwords, level); 909 regbase++; 910 dwords++; 911 summary = last_summary; 912 } 913} 914 915static void 916dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name) 917{ 918 struct rnndomain *dom; 919 int i; 920 921 dom = rnn_finddomain(rnn->db, name); 922 923 if (!dom) 924 return; 925 926 if (script_packet) 927 script_packet(dwords, sizedwords, rnn, dom); 928 929 if (quiet(2)) 930 return; 931 932 for (i = 0; i < sizedwords; i++) { 933 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0); 934 char *decoded; 935 if (!(info && info->typeinfo)) 936 break; 937 uint64_t value = dwords[i]; 938 if (info->typeinfo->high >= 32 && i < sizedwords - 1) { 939 value |= (uint64_t)dwords[i + 1] << 32; 940 i++; /* skip the next dword since we're printing it now */ 941 } 942 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); 943 /* Unlike the register printing path, we don't print the name 944 * of the register, so if it doesn't contain other named 945 * things (i.e. it isn't a bitset) then print the register 946 * name as if it's a bitset with a single entry. This avoids 947 * having to create a dummy register with a single entry to 948 * get a name in the decoding. 949 */ 950 if (info->typeinfo->type == RNN_TTYPE_BITSET || 951 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) { 952 printf("%s%s\n", levels[level], decoded); 953 } else { 954 printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname, 955 info->name, rnn->vc->colors->reset, decoded); 956 } 957 free(decoded); 958 free(info->name); 959 free(info); 960 } 961} 962 963static uint32_t bin_x1, bin_x2, bin_y1, bin_y2; 964static unsigned mode; 965static const char *render_mode; 966static enum { 967 MODE_BINNING = 0x1, 968 MODE_GMEM = 0x2, 969 MODE_BYPASS = 0x4, 970 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS, 971} enable_mask = MODE_ALL; 972static bool skip_ib2_enable_global; 973static bool skip_ib2_enable_local; 974 975static void 976print_mode(int level) 977{ 978 if ((options->gpu_id >= 500) && !quiet(2)) { 979 printf("%smode: %s\n", levels[level], render_mode); 980 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, 981 skip_ib2_enable_local); 982 } 983} 984 985static bool 986skip_query(void) 987{ 988 switch (options->query_mode) { 989 case QUERY_ALL: 990 /* never skip: */ 991 return false; 992 case QUERY_WRITTEN: 993 for (int i = 0; i < options->nquery; i++) { 994 uint32_t regbase = queryvals[i]; 995 if (!reg_written(regbase)) { 996 continue; 997 } 998 if (reg_rewritten(regbase)) { 999 return false; 1000 } 1001 } 1002 return true; 1003 case QUERY_DELTA: 1004 for (int i = 0; i < options->nquery; i++) { 1005 uint32_t regbase = queryvals[i]; 1006 if (!reg_written(regbase)) { 1007 continue; 1008 } 1009 uint32_t lastval = reg_val(regbase); 1010 if (lastval != lastvals[regbase]) { 1011 return false; 1012 } 1013 } 1014 return true; 1015 } 1016 return true; 1017} 1018 1019static void 1020__do_query(const char *primtype, uint32_t num_indices) 1021{ 1022 int n = 0; 1023 1024 if ((500 <= options->gpu_id) && (options->gpu_id < 700)) { 1025 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL")); 1026 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR")); 1027 1028 bin_x1 = scissor_tl & 0xffff; 1029 bin_y1 = scissor_tl >> 16; 1030 bin_x2 = scissor_br & 0xffff; 1031 bin_y2 = scissor_br >> 16; 1032 } 1033 1034 for (int i = 0; i < options->nquery; i++) { 1035 uint32_t regbase = queryvals[i]; 1036 if (reg_written(regbase)) { 1037 uint32_t lastval = reg_val(regbase); 1038 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1, 1039 bin_y1, bin_x2, bin_y2, num_indices); 1040 if (options->gpu_id >= 500) 1041 printf("%s:", render_mode); 1042 printf("\t%08x", lastval); 1043 if (lastval != lastvals[regbase]) { 1044 printf("!"); 1045 } else { 1046 printf(" "); 1047 } 1048 if (reg_rewritten(regbase)) { 1049 printf("+"); 1050 } else { 1051 printf(" "); 1052 } 1053 dump_register_val(regbase, lastval, 0); 1054 n++; 1055 } 1056 } 1057 1058 if (n > 1) 1059 printf("\n"); 1060} 1061 1062static void 1063do_query_compare(const char *primtype, uint32_t num_indices) 1064{ 1065 unsigned saved_enable_mask = enable_mask; 1066 const char *saved_render_mode = render_mode; 1067 1068 /* in 'query-compare' mode, we want to see if the register is writtten 1069 * or changed in any mode: 1070 * 1071 * (NOTE: this could cause false-positive for 'query-delta' if the reg 1072 * is written with different values in binning vs sysmem/gmem mode, as 1073 * we don't track previous values per-mode, but I think we can live with 1074 * that) 1075 */ 1076 enable_mask = MODE_ALL; 1077 1078 clear_rewritten(); 1079 load_all_groups(0); 1080 1081 if (!skip_query()) { 1082 /* dump binning pass values: */ 1083 enable_mask = MODE_BINNING; 1084 render_mode = "BINNING"; 1085 clear_rewritten(); 1086 load_all_groups(0); 1087 __do_query(primtype, num_indices); 1088 1089 /* dump draw pass values: */ 1090 enable_mask = MODE_GMEM | MODE_BYPASS; 1091 render_mode = "DRAW"; 1092 clear_rewritten(); 1093 load_all_groups(0); 1094 __do_query(primtype, num_indices); 1095 1096 printf("\n"); 1097 } 1098 1099 enable_mask = saved_enable_mask; 1100 render_mode = saved_render_mode; 1101 1102 disable_all_groups(); 1103} 1104 1105/* well, actually query and script.. 1106 * NOTE: call this before dump_register_summary() 1107 */ 1108static void 1109do_query(const char *primtype, uint32_t num_indices) 1110{ 1111 if (script_draw) 1112 script_draw(primtype, num_indices); 1113 1114 if (options->query_compare) { 1115 do_query_compare(primtype, num_indices); 1116 return; 1117 } 1118 1119 if (skip_query()) 1120 return; 1121 1122 __do_query(primtype, num_indices); 1123} 1124 1125static void 1126cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level) 1127{ 1128 uint32_t start = dwords[1] >> 16; 1129 uint32_t size = dwords[1] & 0xffff; 1130 const char *type = NULL, *ext = NULL; 1131 gl_shader_stage disasm_type; 1132 1133 switch (dwords[0]) { 1134 case 0: 1135 type = "vertex"; 1136 ext = "vo"; 1137 disasm_type = MESA_SHADER_VERTEX; 1138 break; 1139 case 1: 1140 type = "fragment"; 1141 ext = "fo"; 1142 disasm_type = MESA_SHADER_FRAGMENT; 1143 break; 1144 default: 1145 type = "<unknown>"; 1146 disasm_type = 0; 1147 break; 1148 } 1149 1150 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, 1151 size); 1152 disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type); 1153 1154 /* dump raw shader: */ 1155 if (ext) 1156 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4); 1157} 1158 1159static void 1160cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level) 1161{ 1162 uint32_t reg = dwords[0] & 0xffff; 1163 int i; 1164 for (i = 1; i < sizedwords; i++) { 1165 dump_register(reg, dwords[i], level + 1); 1166 reg_set(reg, dwords[i]); 1167 reg++; 1168 } 1169} 1170 1171enum state_t { 1172 TEX_SAMP = 1, 1173 TEX_CONST, 1174 TEX_MIPADDR, /* a3xx only */ 1175 SHADER_PROG, 1176 SHADER_CONST, 1177 1178 // image/ssbo state: 1179 SSBO_0, 1180 SSBO_1, 1181 SSBO_2, 1182 1183 UBO, 1184 1185 // unknown things, just to hexdumps: 1186 UNKNOWN_DWORDS, 1187 UNKNOWN_2DWORDS, 1188 UNKNOWN_4DWORDS, 1189}; 1190 1191enum adreno_state_block { 1192 SB_VERT_TEX = 0, 1193 SB_VERT_MIPADDR = 1, 1194 SB_FRAG_TEX = 2, 1195 SB_FRAG_MIPADDR = 3, 1196 SB_VERT_SHADER = 4, 1197 SB_GEOM_SHADER = 5, 1198 SB_FRAG_SHADER = 6, 1199 SB_COMPUTE_SHADER = 7, 1200}; 1201 1202/* TODO there is probably a clever way to let rnndec parse things so 1203 * we don't have to care about packet format differences across gens 1204 */ 1205 1206static void 1207a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, 1208 enum state_t *state, enum state_src_t *src) 1209{ 1210 unsigned state_block_id = (dwords[0] >> 19) & 0x7; 1211 unsigned state_type = dwords[1] & 0x3; 1212 static const struct { 1213 gl_shader_stage stage; 1214 enum state_t state; 1215 } lookup[0xf][0x3] = { 1216 [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP}, 1217 [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST}, 1218 [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP}, 1219 [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST}, 1220 [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG}, 1221 [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST}, 1222 [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG}, 1223 [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST}, 1224 }; 1225 1226 *stage = lookup[state_block_id][state_type].stage; 1227 *state = lookup[state_block_id][state_type].state; 1228 unsigned state_src = (dwords[0] >> 16) & 0x7; 1229 if (state_src == 0 /* SS_DIRECT */) 1230 *src = STATE_SRC_DIRECT; 1231 else 1232 *src = STATE_SRC_INDIRECT; 1233} 1234 1235static enum state_src_t 1236_get_state_src(unsigned dword0) 1237{ 1238 switch ((dword0 >> 16) & 0x3) { 1239 case 0: /* SS4_DIRECT / SS6_DIRECT */ 1240 return STATE_SRC_DIRECT; 1241 case 2: /* SS4_INDIRECT / SS6_INDIRECT */ 1242 return STATE_SRC_INDIRECT; 1243 case 1: /* SS6_BINDLESS */ 1244 return STATE_SRC_BINDLESS; 1245 default: 1246 return STATE_SRC_DIRECT; 1247 } 1248} 1249 1250static void 1251_get_state_type(unsigned state_block_id, unsigned state_type, 1252 gl_shader_stage *stage, enum state_t *state) 1253{ 1254 static const struct { 1255 gl_shader_stage stage; 1256 enum state_t state; 1257 } lookup[0x10][0x4] = { 1258 // SB4_VS_TEX: 1259 [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP}, 1260 [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST}, 1261 [0x0][2] = {MESA_SHADER_VERTEX, UBO}, 1262 // SB4_HS_TEX: 1263 [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP}, 1264 [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST}, 1265 [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO}, 1266 // SB4_DS_TEX: 1267 [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP}, 1268 [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST}, 1269 [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO}, 1270 // SB4_GS_TEX: 1271 [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP}, 1272 [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST}, 1273 [0x3][2] = {MESA_SHADER_GEOMETRY, UBO}, 1274 // SB4_FS_TEX: 1275 [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP}, 1276 [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST}, 1277 [0x4][2] = {MESA_SHADER_FRAGMENT, UBO}, 1278 // SB4_CS_TEX: 1279 [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP}, 1280 [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST}, 1281 [0x5][2] = {MESA_SHADER_COMPUTE, UBO}, 1282 // SB4_VS_SHADER: 1283 [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG}, 1284 [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST}, 1285 [0x8][2] = {MESA_SHADER_VERTEX, UBO}, 1286 // SB4_HS_SHADER 1287 [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG}, 1288 [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST}, 1289 [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO}, 1290 // SB4_DS_SHADER 1291 [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG}, 1292 [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST}, 1293 [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO}, 1294 // SB4_GS_SHADER 1295 [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG}, 1296 [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST}, 1297 [0xb][2] = {MESA_SHADER_GEOMETRY, UBO}, 1298 // SB4_FS_SHADER: 1299 [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG}, 1300 [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST}, 1301 [0xc][2] = {MESA_SHADER_FRAGMENT, UBO}, 1302 // SB4_CS_SHADER: 1303 [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG}, 1304 [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST}, 1305 [0xd][2] = {MESA_SHADER_COMPUTE, UBO}, 1306 [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */ 1307 // SB4_SSBO (shared across all stages) 1308 [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */ 1309 [0xe][1] = {0, SSBO_1}, 1310 [0xe][2] = {0, SSBO_2}, 1311 // SB4_CS_SSBO 1312 [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0}, 1313 [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1}, 1314 [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2}, 1315 // unknown things 1316 /* This looks like combined UBO state for 3d stages (a5xx and 1317 * before?? I think a6xx has UBO state per shader stage: 1318 */ 1319 [0x6][2] = {0, UBO}, 1320 [0x7][1] = {0, UNKNOWN_2DWORDS}, 1321 }; 1322 1323 *stage = lookup[state_block_id][state_type].stage; 1324 *state = lookup[state_block_id][state_type].state; 1325} 1326 1327static void 1328a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, 1329 enum state_t *state, enum state_src_t *src) 1330{ 1331 unsigned state_block_id = (dwords[0] >> 18) & 0xf; 1332 unsigned state_type = dwords[1] & 0x3; 1333 _get_state_type(state_block_id, state_type, stage, state); 1334 *src = _get_state_src(dwords[0]); 1335} 1336 1337static void 1338a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, 1339 enum state_t *state, enum state_src_t *src) 1340{ 1341 unsigned state_block_id = (dwords[0] >> 18) & 0xf; 1342 unsigned state_type = (dwords[0] >> 14) & 0x3; 1343 _get_state_type(state_block_id, state_type, stage, state); 1344 *src = _get_state_src(dwords[0]); 1345} 1346 1347static void 1348dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level) 1349{ 1350 for (int i = 0; i < num_unit; i++) { 1351 /* work-around to reduce noise for opencl blob which always 1352 * writes the max # regardless of # of textures used 1353 */ 1354 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0)) 1355 break; 1356 1357 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) { 1358 dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP"); 1359 dump_hex(texsamp, 2, level + 1); 1360 texsamp += 2; 1361 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) { 1362 dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP"); 1363 dump_hex(texsamp, 2, level + 1); 1364 texsamp += 2; 1365 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { 1366 dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP"); 1367 dump_hex(texsamp, 4, level + 1); 1368 texsamp += 4; 1369 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) { 1370 dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP"); 1371 dump_hex(texsamp, 4, level + 1); 1372 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4; 1373 } 1374 } 1375} 1376 1377static void 1378dump_tex_const(uint32_t *texconst, int num_unit, int level) 1379{ 1380 for (int i = 0; i < num_unit; i++) { 1381 /* work-around to reduce noise for opencl blob which always 1382 * writes the max # regardless of # of textures used 1383 */ 1384 if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) && 1385 (texconst[2] == 0) && (texconst[3] == 0)) 1386 break; 1387 1388 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) { 1389 dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST"); 1390 dump_hex(texconst, 4, level + 1); 1391 texconst += 4; 1392 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) { 1393 dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST"); 1394 if (options->dump_textures) { 1395 uint32_t addr = texconst[4] & ~0x1f; 1396 dump_gpuaddr(addr, level - 2); 1397 } 1398 dump_hex(texconst, 8, level + 1); 1399 texconst += 8; 1400 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { 1401 dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST"); 1402 if (options->dump_textures) { 1403 uint64_t addr = 1404 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4]; 1405 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3); 1406 } 1407 dump_hex(texconst, 12, level + 1); 1408 texconst += 12; 1409 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) { 1410 dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST"); 1411 if (options->dump_textures) { 1412 uint64_t addr = 1413 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4]; 1414 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3); 1415 } 1416 dump_hex(texconst, 16, level + 1); 1417 texconst += 16; 1418 } 1419 } 1420} 1421 1422static void 1423cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level) 1424{ 1425 gl_shader_stage stage; 1426 enum state_t state; 1427 enum state_src_t src; 1428 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff; 1429 uint64_t ext_src_addr; 1430 void *contents; 1431 int i; 1432 1433 if (quiet(2) && !options->script) 1434 return; 1435 1436 if (options->gpu_id >= 600) 1437 a6xx_get_state_type(dwords, &stage, &state, &src); 1438 else if (options->gpu_id >= 400) 1439 a4xx_get_state_type(dwords, &stage, &state, &src); 1440 else 1441 a3xx_get_state_type(dwords, &stage, &state, &src); 1442 1443 switch (src) { 1444 case STATE_SRC_DIRECT: 1445 ext_src_addr = 0; 1446 break; 1447 case STATE_SRC_INDIRECT: 1448 if (is_64b()) { 1449 ext_src_addr = dwords[1] & 0xfffffffc; 1450 ext_src_addr |= ((uint64_t)dwords[2]) << 32; 1451 } else { 1452 ext_src_addr = dwords[1] & 0xfffffffc; 1453 } 1454 1455 break; 1456 case STATE_SRC_BINDLESS: { 1457 const unsigned base_reg = stage == MESA_SHADER_COMPUTE 1458 ? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR") 1459 : regbase("HLSQ_BINDLESS_BASE[0].ADDR"); 1460 1461 if (is_64b()) { 1462 const unsigned reg = base_reg + (dwords[1] >> 28) * 2; 1463 ext_src_addr = reg_val(reg) & 0xfffffffc; 1464 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32; 1465 } else { 1466 const unsigned reg = base_reg + (dwords[1] >> 28); 1467 ext_src_addr = reg_val(reg) & 0xfffffffc; 1468 } 1469 1470 ext_src_addr += 4 * (dwords[1] & 0xffffff); 1471 break; 1472 } 1473 } 1474 1475 if (ext_src_addr) 1476 contents = hostptr(ext_src_addr); 1477 else 1478 contents = is_64b() ? dwords + 3 : dwords + 2; 1479 1480 if (!contents) 1481 return; 1482 1483 switch (state) { 1484 case SHADER_PROG: { 1485 const char *ext = NULL; 1486 1487 if (quiet(2)) 1488 return; 1489 1490 if (options->gpu_id >= 400) 1491 num_unit *= 16; 1492 else if (options->gpu_id >= 300) 1493 num_unit *= 4; 1494 1495 /* shaders: 1496 * 1497 * note: num_unit seems to be # of instruction groups, where 1498 * an instruction group has 4 64bit instructions. 1499 */ 1500 if (stage == MESA_SHADER_VERTEX) { 1501 ext = "vo3"; 1502 } else if (stage == MESA_SHADER_GEOMETRY) { 1503 ext = "go3"; 1504 } else if (stage == MESA_SHADER_COMPUTE) { 1505 ext = "co3"; 1506 } else if (stage == MESA_SHADER_FRAGMENT) { 1507 ext = "fo3"; 1508 } 1509 1510 if (contents) 1511 try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout, 1512 options->gpu_id); 1513 1514 /* dump raw shader: */ 1515 if (ext) 1516 dump_shader(ext, contents, num_unit * 2 * 4); 1517 1518 break; 1519 } 1520 case SHADER_CONST: { 1521 if (quiet(2)) 1522 return; 1523 1524 /* uniforms/consts: 1525 * 1526 * note: num_unit seems to be # of pairs of dwords?? 1527 */ 1528 1529 if (options->gpu_id >= 400) 1530 num_unit *= 2; 1531 1532 dump_float(contents, num_unit * 2, level + 1); 1533 dump_hex(contents, num_unit * 2, level + 1); 1534 1535 break; 1536 } 1537 case TEX_MIPADDR: { 1538 uint32_t *addrs = contents; 1539 1540 if (quiet(2)) 1541 return; 1542 1543 /* mipmap consts block just appears to be array of num_unit gpu addr's: */ 1544 for (i = 0; i < num_unit; i++) { 1545 void *ptr = hostptr(addrs[i]); 1546 printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]); 1547 if (options->dump_textures) { 1548 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i])); 1549 dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1); 1550 } 1551 } 1552 break; 1553 } 1554 case TEX_SAMP: { 1555 dump_tex_samp(contents, src, num_unit, level); 1556 break; 1557 } 1558 case TEX_CONST: { 1559 dump_tex_const(contents, num_unit, level); 1560 break; 1561 } 1562 case SSBO_0: { 1563 uint32_t *ssboconst = (uint32_t *)contents; 1564 1565 for (i = 0; i < num_unit; i++) { 1566 int sz = 4; 1567 if (400 <= options->gpu_id && options->gpu_id < 500) { 1568 dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0"); 1569 } else if (500 <= options->gpu_id && options->gpu_id < 600) { 1570 dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0"); 1571 } else if (600 <= options->gpu_id && options->gpu_id < 700) { 1572 sz = 16; 1573 dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST"); 1574 } 1575 dump_hex(ssboconst, sz, level + 1); 1576 ssboconst += sz; 1577 } 1578 break; 1579 } 1580 case SSBO_1: { 1581 uint32_t *ssboconst = (uint32_t *)contents; 1582 1583 for (i = 0; i < num_unit; i++) { 1584 if (400 <= options->gpu_id && options->gpu_id < 500) 1585 dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1"); 1586 else if (500 <= options->gpu_id && options->gpu_id < 600) 1587 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1"); 1588 dump_hex(ssboconst, 2, level + 1); 1589 ssboconst += 2; 1590 } 1591 break; 1592 } 1593 case SSBO_2: { 1594 uint32_t *ssboconst = (uint32_t *)contents; 1595 1596 for (i = 0; i < num_unit; i++) { 1597 /* TODO a4xx and a5xx might be same: */ 1598 if ((500 <= options->gpu_id) && (options->gpu_id < 600)) { 1599 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2"); 1600 dump_hex(ssboconst, 2, level + 1); 1601 } 1602 if (options->dump_textures) { 1603 uint64_t addr = 1604 (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0]; 1605 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3); 1606 } 1607 ssboconst += 2; 1608 } 1609 break; 1610 } 1611 case UBO: { 1612 uint32_t *uboconst = (uint32_t *)contents; 1613 1614 for (i = 0; i < num_unit; i++) { 1615 // TODO probably similar on a4xx.. 1616 if (500 <= options->gpu_id && options->gpu_id < 600) 1617 dump_domain(uboconst, 2, level + 2, "A5XX_UBO"); 1618 else if (600 <= options->gpu_id && options->gpu_id < 700) 1619 dump_domain(uboconst, 2, level + 2, "A6XX_UBO"); 1620 dump_hex(uboconst, 2, level + 1); 1621 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2; 1622 } 1623 break; 1624 } 1625 case UNKNOWN_DWORDS: { 1626 if (quiet(2)) 1627 return; 1628 dump_hex(contents, num_unit, level + 1); 1629 break; 1630 } 1631 case UNKNOWN_2DWORDS: { 1632 if (quiet(2)) 1633 return; 1634 dump_hex(contents, num_unit * 2, level + 1); 1635 break; 1636 } 1637 case UNKNOWN_4DWORDS: { 1638 if (quiet(2)) 1639 return; 1640 dump_hex(contents, num_unit * 4, level + 1); 1641 break; 1642 } 1643 default: 1644 if (quiet(2)) 1645 return; 1646 /* hmm.. */ 1647 dump_hex(contents, num_unit, level + 1); 1648 break; 1649 } 1650} 1651 1652static void 1653cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level) 1654{ 1655 bin_x1 = dwords[1] & 0xffff; 1656 bin_y1 = dwords[1] >> 16; 1657 bin_x2 = dwords[2] & 0xffff; 1658 bin_y2 = dwords[2] >> 16; 1659} 1660 1661static void 1662dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, 1663 int level) 1664{ 1665 uint32_t w, h, p; 1666 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags; 1667 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z; 1668 static const char *filter[] = { 1669 "point", 1670 "bilinear", 1671 "bicubic", 1672 }; 1673 static const char *clamp[] = { 1674 "wrap", 1675 "mirror", 1676 "clamp-last-texel", 1677 }; 1678 static const char swiznames[] = "xyzw01??"; 1679 1680 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */ 1681 1682 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat, 1683 * RFMode=ZeroClamp-1, Dim=1:2d, pitch 1684 */ 1685 p = (dwords[0] >> 22) << 5; 1686 clamp_x = (dwords[0] >> 10) & 0x3; 1687 clamp_y = (dwords[0] >> 13) & 0x3; 1688 clamp_z = (dwords[0] >> 16) & 0x3; 1689 1690 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, 1691 * NearestClamp=1:OGL Mode 1692 */ 1693 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff); 1694 1695 /* Width, Height, EndianSwap=0:None */ 1696 w = (dwords[2] & 0x1fff) + 1; 1697 h = ((dwords[2] >> 13) & 0x1fff) + 1; 1698 1699 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, 1700 * Mip=2:BaseMap 1701 */ 1702 mag = (dwords[3] >> 19) & 0x3; 1703 min = (dwords[3] >> 21) & 0x3; 1704 swiz = (dwords[3] >> 1) & 0xfff; 1705 1706 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, 1707 * Dim3d=0 1708 */ 1709 // XXX 1710 1711 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, 1712 * Dim=1:2d, MipPacking=0 1713 */ 1714 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff); 1715 1716 printf("%sset texture const %04x\n", levels[level], val); 1717 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x], 1718 clamp[clamp_y], clamp[clamp_z]); 1719 printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min], 1720 filter[mag]); 1721 printf("%sswizzle: %c%c%c%c\n", levels[level + 1], 1722 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7], 1723 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]); 1724 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n", 1725 levels[level + 1], gpuaddr, flags, w, h, p, 1726 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf)); 1727 printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr, 1728 mip_flags); 1729} 1730 1731static void 1732dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, 1733 int level) 1734{ 1735 int i; 1736 printf("%sset shader const %04x\n", levels[level], val); 1737 for (i = 0; i < sizedwords;) { 1738 uint32_t gpuaddr, flags; 1739 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf); 1740 void *addr = hostptr(gpuaddr); 1741 if (addr) { 1742 const char *fmt = 1743 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf); 1744 uint32_t size = dwords[i++]; 1745 printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr, 1746 size, fmt); 1747 // TODO maybe dump these as bytes instead of dwords? 1748 size = (size + 3) / 4; // for now convert to dwords 1749 dump_hex(addr, min(size, 64), level + 1); 1750 if (size > min(size, 64)) 1751 printf("%s\t\t...\n", levels[level + 1]); 1752 dump_float(addr, min(size, 64), level + 1); 1753 if (size > min(size, 64)) 1754 printf("%s\t\t...\n", levels[level + 1]); 1755 } 1756 } 1757} 1758 1759static void 1760cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level) 1761{ 1762 uint32_t val = dwords[0] & 0xffff; 1763 switch ((dwords[0] >> 16) & 0xf) { 1764 case 0x0: 1765 dump_float((float *)(dwords + 1), sizedwords - 1, level + 1); 1766 break; 1767 case 0x1: 1768 /* need to figure out how const space is partitioned between 1769 * attributes, textures, etc.. 1770 */ 1771 if (val < 0x78) { 1772 dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level); 1773 } else { 1774 dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level); 1775 } 1776 break; 1777 case 0x2: 1778 printf("%sset bool const %04x\n", levels[level], val); 1779 break; 1780 case 0x3: 1781 printf("%sset loop const %04x\n", levels[level], val); 1782 break; 1783 case 0x4: 1784 val += 0x2000; 1785 if (dwords[0] & 0x80000000) { 1786 uint32_t srcreg = dwords[1]; 1787 uint32_t dstval = dwords[2]; 1788 1789 /* TODO: not sure what happens w/ payload != 2.. */ 1790 assert(sizedwords == 3); 1791 assert(srcreg < ARRAY_SIZE(type0_reg_vals)); 1792 1793 /* note: rnn_regname uses a static buf so we can't do 1794 * two regname() calls for one printf.. 1795 */ 1796 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval); 1797 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]); 1798 1799 dstval += type0_reg_vals[srcreg]; 1800 1801 dump_registers(val, &dstval, 1, level + 1); 1802 } else { 1803 dump_registers(val, dwords + 1, sizedwords - 1, level + 1); 1804 } 1805 break; 1806 } 1807} 1808 1809static void dump_register_summary(int level); 1810 1811static void 1812cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level) 1813{ 1814 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]); 1815 printl(2, "%sevent %s\n", levels[level], name); 1816 1817 if (name && (options->gpu_id > 500)) { 1818 char eventname[64]; 1819 snprintf(eventname, sizeof(eventname), "EVENT:%s", name); 1820 if (!strcmp(name, "BLIT")) { 1821 do_query(eventname, 0); 1822 print_mode(level); 1823 dump_register_summary(level); 1824 } 1825 } 1826} 1827 1828static void 1829dump_register_summary(int level) 1830{ 1831 uint32_t i; 1832 bool saved_summary = summary; 1833 summary = false; 1834 1835 in_summary = true; 1836 1837 /* dump current state of registers: */ 1838 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count); 1839 for (i = 0; i < regcnt(); i++) { 1840 uint32_t regbase = i; 1841 uint32_t lastval = reg_val(regbase); 1842 /* skip registers that haven't been updated since last draw/blit: */ 1843 if (!(options->allregs || reg_rewritten(regbase))) 1844 continue; 1845 if (!reg_written(regbase)) 1846 continue; 1847 if (lastval != lastvals[regbase]) { 1848 printl(2, "!"); 1849 lastvals[regbase] = lastval; 1850 } else { 1851 printl(2, " "); 1852 } 1853 if (reg_rewritten(regbase)) { 1854 printl(2, "+"); 1855 } else { 1856 printl(2, " "); 1857 } 1858 printl(2, "\t%08x", lastval); 1859 if (!quiet(2)) { 1860 dump_register(regbase, lastval, level); 1861 } 1862 } 1863 1864 clear_rewritten(); 1865 1866 in_summary = false; 1867 1868 draw_count++; 1869 summary = saved_summary; 1870} 1871 1872static uint32_t 1873draw_indx_common(uint32_t *dwords, int level) 1874{ 1875 uint32_t prim_type = dwords[1] & 0x1f; 1876 uint32_t source_select = (dwords[1] >> 6) & 0x3; 1877 uint32_t num_indices = dwords[2]; 1878 const char *primtype; 1879 1880 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type); 1881 1882 do_query(primtype, num_indices); 1883 1884 printl(2, "%sdraw: %d\n", levels[level], draws[ib]); 1885 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type); 1886 printl(2, "%ssource_select: %s (%d)\n", levels[level], 1887 rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select); 1888 printl(2, "%snum_indices: %d\n", levels[level], num_indices); 1889 1890 vertices += num_indices; 1891 1892 draws[ib]++; 1893 1894 return num_indices; 1895} 1896 1897enum pc_di_index_size { 1898 INDEX_SIZE_IGN = 0, 1899 INDEX_SIZE_16_BIT = 0, 1900 INDEX_SIZE_32_BIT = 1, 1901 INDEX_SIZE_8_BIT = 2, 1902 INDEX_SIZE_INVALID = 0, 1903}; 1904 1905static void 1906cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level) 1907{ 1908 uint32_t num_indices = draw_indx_common(dwords, level); 1909 1910 assert(!is_64b()); 1911 1912 /* if we have an index buffer, dump that: */ 1913 if (sizedwords == 5) { 1914 void *ptr = hostptr(dwords[3]); 1915 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]); 1916 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]); 1917 if (ptr) { 1918 enum pc_di_index_size size = 1919 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2); 1920 if (!quiet(2)) { 1921 int i; 1922 printf("%sidxs: ", levels[level]); 1923 if (size == INDEX_SIZE_8_BIT) { 1924 uint8_t *idx = ptr; 1925 for (i = 0; i < dwords[4]; i++) 1926 printf(" %u", idx[i]); 1927 } else if (size == INDEX_SIZE_16_BIT) { 1928 uint16_t *idx = ptr; 1929 for (i = 0; i < dwords[4] / 2; i++) 1930 printf(" %u", idx[i]); 1931 } else if (size == INDEX_SIZE_32_BIT) { 1932 uint32_t *idx = ptr; 1933 for (i = 0; i < dwords[4] / 4; i++) 1934 printf(" %u", idx[i]); 1935 } 1936 printf("\n"); 1937 dump_hex(ptr, dwords[4] / 4, level + 1); 1938 } 1939 } 1940 } 1941 1942 /* don't bother dumping registers for the dummy draw_indx's.. */ 1943 if (num_indices > 0) 1944 dump_register_summary(level); 1945 1946 needs_wfi = true; 1947} 1948 1949static void 1950cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level) 1951{ 1952 uint32_t num_indices = draw_indx_common(dwords, level); 1953 enum pc_di_index_size size = 1954 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2); 1955 void *ptr = &dwords[3]; 1956 int sz = 0; 1957 1958 assert(!is_64b()); 1959 1960 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */ 1961 if (!quiet(2)) { 1962 int i; 1963 printf("%sidxs: ", levels[level]); 1964 if (size == INDEX_SIZE_8_BIT) { 1965 uint8_t *idx = ptr; 1966 for (i = 0; i < num_indices; i++) 1967 printf(" %u", idx[i]); 1968 sz = num_indices; 1969 } else if (size == INDEX_SIZE_16_BIT) { 1970 uint16_t *idx = ptr; 1971 for (i = 0; i < num_indices; i++) 1972 printf(" %u", idx[i]); 1973 sz = num_indices * 2; 1974 } else if (size == INDEX_SIZE_32_BIT) { 1975 uint32_t *idx = ptr; 1976 for (i = 0; i < num_indices; i++) 1977 printf(" %u", idx[i]); 1978 sz = num_indices * 4; 1979 } 1980 printf("\n"); 1981 dump_hex(ptr, sz / 4, level + 1); 1982 } 1983 1984 /* don't bother dumping registers for the dummy draw_indx's.. */ 1985 if (num_indices > 0) 1986 dump_register_summary(level); 1987} 1988 1989static void 1990cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level) 1991{ 1992 uint32_t num_indices = dwords[2]; 1993 uint32_t prim_type = dwords[0] & 0x1f; 1994 1995 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices); 1996 print_mode(level); 1997 1998 /* don't bother dumping registers for the dummy draw_indx's.. */ 1999 if (num_indices > 0) 2000 dump_register_summary(level); 2001} 2002 2003static void 2004cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level) 2005{ 2006 uint32_t prim_type = dwords[0] & 0x1f; 2007 uint64_t addr; 2008 2009 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); 2010 print_mode(level); 2011 2012 if (is_64b()) 2013 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; 2014 else 2015 addr = dwords[1]; 2016 dump_gpuaddr_size(addr, level, 0x10, 2); 2017 2018 if (is_64b()) 2019 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4]; 2020 else 2021 addr = dwords[3]; 2022 dump_gpuaddr_size(addr, level, 0x10, 2); 2023 2024 dump_register_summary(level); 2025} 2026 2027static void 2028cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level) 2029{ 2030 uint32_t prim_type = dwords[0] & 0x1f; 2031 uint64_t addr; 2032 2033 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); 2034 print_mode(level); 2035 2036 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; 2037 dump_gpuaddr_size(addr, level, 0x10, 2); 2038 2039 dump_register_summary(level); 2040} 2041 2042static void 2043cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level) 2044{ 2045 uint32_t prim_type = dwords[0] & 0x1f; 2046 uint32_t count = dwords[2]; 2047 2048 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0); 2049 print_mode(level); 2050 2051 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI"); 2052 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT"); 2053 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT"); 2054 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE"); 2055 2056 if (count_dword) { 2057 uint64_t count_addr = 2058 ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword]; 2059 uint32_t *buf = hostptr(count_addr); 2060 2061 /* Don't print more draws than this if we don't know the indirect 2062 * count. It's possible the user will give ~0 or some other large 2063 * value, expecting the GPU to fill in the draw count, and we don't 2064 * want to print a gazillion draws in that case: 2065 */ 2066 const uint32_t max_draw_count = 0x100; 2067 2068 /* Assume the indirect count is garbage if it's larger than this 2069 * (quite large) value or 0. Hopefully this catches most cases. 2070 */ 2071 const uint32_t max_indirect_draw_count = 0x10000; 2072 2073 if (buf) { 2074 printf("%sindirect count: %u\n", levels[level], *buf); 2075 if (*buf == 0 || *buf > max_indirect_draw_count) { 2076 /* garbage value */ 2077 count = min(count, max_draw_count); 2078 } else { 2079 /* not garbage */ 2080 count = min(count, *buf); 2081 } 2082 } else { 2083 count = min(count, max_draw_count); 2084 } 2085 } 2086 2087 if (addr_dword && stride_dword) { 2088 uint64_t addr = 2089 ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword]; 2090 uint32_t stride = dwords[stride_dword]; 2091 2092 for (unsigned i = 0; i < count; i++, addr += stride) { 2093 printf("%sdraw %d:\n", levels[level], i); 2094 dump_gpuaddr_size(addr, level, 0x10, 2); 2095 } 2096 } 2097 2098 dump_register_summary(level); 2099} 2100 2101static void 2102cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level) 2103{ 2104 do_query("COMPUTE", 1); 2105 dump_register_summary(level); 2106} 2107 2108static void 2109cp_nop(uint32_t *dwords, uint32_t sizedwords, int level) 2110{ 2111 const char *buf = (void *)dwords; 2112 int i; 2113 2114 if (quiet(3)) 2115 return; 2116 2117 // blob doesn't use CP_NOP for string_marker but it does 2118 // use it for things that end up looking like, but aren't 2119 // ascii chars: 2120 if (!options->decode_markers) 2121 return; 2122 2123 for (i = 0; i < 4 * sizedwords; i++) { 2124 if (buf[i] == '\0') 2125 break; 2126 if (isascii(buf[i])) 2127 printf("%c", buf[i]); 2128 } 2129 printf("\n"); 2130} 2131 2132static void 2133cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level) 2134{ 2135 /* traverse indirect buffers */ 2136 uint64_t ibaddr; 2137 uint32_t ibsize; 2138 uint32_t *ptr = NULL; 2139 2140 if (is_64b()) { 2141 /* a5xx+.. high 32b of gpu addr, then size: */ 2142 ibaddr = dwords[0]; 2143 ibaddr |= ((uint64_t)dwords[1]) << 32; 2144 ibsize = dwords[2]; 2145 } else { 2146 ibaddr = dwords[0]; 2147 ibsize = dwords[1]; 2148 } 2149 2150 if (!quiet(3)) { 2151 if (is_64b()) { 2152 printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr); 2153 } else { 2154 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr); 2155 } 2156 printf("%sibsize:%08x\n", levels[level], ibsize); 2157 } 2158 2159 if (options->once && has_dumped(ibaddr, enable_mask)) 2160 return; 2161 2162 /* 'query-compare' mode implies 'once' mode, although we need only to 2163 * process the cmdstream for *any* enable_mask mode, since we are 2164 * comparing binning vs draw reg values at the same time, ie. it is 2165 * not useful to process the same draw in both binning and draw pass. 2166 */ 2167 if (options->query_compare && has_dumped(ibaddr, MODE_ALL)) 2168 return; 2169 2170 /* map gpuaddr back to hostptr: */ 2171 ptr = hostptr(ibaddr); 2172 2173 if (ptr) { 2174 /* If the GPU hung within the target IB, the trigger point will be 2175 * just after the current CP_INDIRECT_BUFFER. Because the IB is 2176 * executed but never returns. Account for this by checking if 2177 * the IB returned: 2178 */ 2179 highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2])); 2180 2181 ib++; 2182 ibs[ib].base = ibaddr; 2183 ibs[ib].size = ibsize; 2184 2185 dump_commands(ptr, ibsize, level); 2186 ib--; 2187 } else { 2188 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize); 2189 } 2190} 2191 2192static void 2193cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level) 2194{ 2195 uint64_t ibaddr; 2196 uint32_t ibsize; 2197 uint32_t loopcount; 2198 uint32_t *ptr = NULL; 2199 2200 loopcount = dwords[0]; 2201 ibaddr = dwords[1]; 2202 ibaddr |= ((uint64_t)dwords[2]) << 32; 2203 ibsize = dwords[3]; 2204 2205 /* map gpuaddr back to hostptr: */ 2206 ptr = hostptr(ibaddr); 2207 2208 if (ptr) { 2209 /* If the GPU hung within the target IB, the trigger point will be 2210 * just after the current CP_START_BIN. Because the IB is 2211 * executed but never returns. Account for this by checking if 2212 * the IB returned: 2213 */ 2214 highlight_gpuaddr(gpuaddr(&dwords[5])); 2215 2216 /* TODO: we should duplicate the body of the loop after each bin, so 2217 * that draws get the correct state. We should also figure out if there 2218 * are any registers that can tell us what bin we're in when we hang so 2219 * that crashdec points to the right place. 2220 */ 2221 ib++; 2222 for (uint32_t i = 0; i < loopcount; i++) { 2223 ibs[ib].base = ibaddr; 2224 ibs[ib].size = ibsize; 2225 printf("%sbin %u\n", levels[level], i); 2226 dump_commands(ptr, ibsize, level); 2227 ibaddr += ibsize; 2228 ptr += ibsize; 2229 } 2230 ib--; 2231 } else { 2232 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize); 2233 } 2234} 2235 2236static void 2237cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level) 2238{ 2239 needs_wfi = false; 2240} 2241 2242static void 2243cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level) 2244{ 2245 if (quiet(2)) 2246 return; 2247 2248 if (is_64b()) { 2249 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32); 2250 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr); 2251 dump_hex(&dwords[2], sizedwords - 2, level + 1); 2252 2253 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2])) 2254 dump_commands(&dwords[2], sizedwords - 2, level + 1); 2255 } else { 2256 uint32_t gpuaddr = dwords[0]; 2257 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr); 2258 dump_float((float *)&dwords[1], sizedwords - 1, level + 1); 2259 } 2260} 2261 2262static void 2263cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level) 2264{ 2265 uint32_t val = dwords[0] & 0xffff; 2266 uint32_t and = dwords[1]; 2267 uint32_t or = dwords[2]; 2268 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), 2269 and, or); 2270 if (needs_wfi) 2271 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), 2272 and, or); 2273 reg_set(val, (reg_val(val) & and) | or); 2274} 2275 2276static void 2277cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level) 2278{ 2279 uint32_t val = dwords[0] & 0xffff; 2280 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1)); 2281 2282 if (quiet(2)) 2283 return; 2284 2285 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32); 2286 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr); 2287 void *ptr = hostptr(gpuaddr); 2288 if (ptr) { 2289 uint32_t cnt = (dwords[0] >> 19) & 0x3ff; 2290 dump_hex(ptr, cnt, level + 1); 2291 } 2292} 2293 2294struct draw_state { 2295 uint16_t enable_mask; 2296 uint16_t flags; 2297 uint32_t count; 2298 uint64_t addr; 2299}; 2300 2301struct draw_state state[32]; 2302 2303#define FLAG_DIRTY 0x1 2304#define FLAG_DISABLE 0x2 2305#define FLAG_DISABLE_ALL_GROUPS 0x4 2306#define FLAG_LOAD_IMMED 0x8 2307 2308static int draw_mode; 2309 2310static void 2311disable_group(unsigned group_id) 2312{ 2313 struct draw_state *ds = &state[group_id]; 2314 memset(ds, 0, sizeof(*ds)); 2315} 2316 2317static void 2318disable_all_groups(void) 2319{ 2320 for (unsigned i = 0; i < ARRAY_SIZE(state); i++) 2321 disable_group(i); 2322} 2323 2324static void 2325load_group(unsigned group_id, int level) 2326{ 2327 struct draw_state *ds = &state[group_id]; 2328 2329 if (!ds->count) 2330 return; 2331 2332 printl(2, "%sgroup_id: %u\n", levels[level], group_id); 2333 printl(2, "%scount: %d\n", levels[level], ds->count); 2334 printl(2, "%saddr: %016llx\n", levels[level], ds->addr); 2335 printl(2, "%sflags: %x\n", levels[level], ds->flags); 2336 2337 if (options->gpu_id >= 600) { 2338 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask); 2339 2340 if (!(ds->enable_mask & enable_mask)) { 2341 printl(2, "%s\tskipped!\n\n", levels[level]); 2342 return; 2343 } 2344 } 2345 2346 void *ptr = hostptr(ds->addr); 2347 if (ptr) { 2348 if (!quiet(2)) 2349 dump_hex(ptr, ds->count, level + 1); 2350 2351 ib++; 2352 dump_commands(ptr, ds->count, level + 1); 2353 ib--; 2354 } 2355} 2356 2357static void 2358load_all_groups(int level) 2359{ 2360 /* sanity check, we should never recursively hit recursion here, and if 2361 * we do bad things happen: 2362 */ 2363 static bool loading_groups = false; 2364 if (loading_groups) { 2365 printf("ERROR: nothing in draw state should trigger recursively loading " 2366 "groups!\n"); 2367 return; 2368 } 2369 loading_groups = true; 2370 for (unsigned i = 0; i < ARRAY_SIZE(state); i++) 2371 load_group(i, level); 2372 loading_groups = false; 2373 2374 /* in 'query-compare' mode, defer disabling all groups until we have a 2375 * chance to process the query: 2376 */ 2377 if (!options->query_compare) 2378 disable_all_groups(); 2379} 2380 2381static void 2382cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level) 2383{ 2384 uint32_t i; 2385 2386 for (i = 0; i < sizedwords;) { 2387 struct draw_state *ds; 2388 uint32_t count = dwords[i] & 0xffff; 2389 uint32_t group_id = (dwords[i] >> 24) & 0x1f; 2390 uint32_t enable_mask = (dwords[i] >> 20) & 0xf; 2391 uint32_t flags = (dwords[i] >> 16) & 0xf; 2392 uint64_t addr; 2393 2394 if (is_64b()) { 2395 addr = dwords[i + 1]; 2396 addr |= ((uint64_t)dwords[i + 2]) << 32; 2397 i += 3; 2398 } else { 2399 addr = dwords[i + 1]; 2400 i += 2; 2401 } 2402 2403 if (flags & FLAG_DISABLE_ALL_GROUPS) { 2404 disable_all_groups(); 2405 continue; 2406 } 2407 2408 if (flags & FLAG_DISABLE) { 2409 disable_group(group_id); 2410 continue; 2411 } 2412 2413 assert(group_id < ARRAY_SIZE(state)); 2414 disable_group(group_id); 2415 2416 ds = &state[group_id]; 2417 2418 ds->enable_mask = enable_mask; 2419 ds->flags = flags; 2420 ds->count = count; 2421 ds->addr = addr; 2422 2423 if (flags & FLAG_LOAD_IMMED) { 2424 load_group(group_id, level); 2425 disable_group(group_id); 2426 } 2427 } 2428} 2429 2430static void 2431cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level) 2432{ 2433 draw_mode = dwords[0]; 2434} 2435 2436/* execute compute shader */ 2437static void 2438cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level) 2439{ 2440 do_query("compute", 0); 2441 dump_register_summary(level); 2442} 2443 2444static void 2445cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level) 2446{ 2447 uint64_t addr; 2448 2449 if (is_64b()) { 2450 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1]; 2451 } else { 2452 addr = dwords[1]; 2453 } 2454 2455 printl(3, "%saddr: %016llx\n", levels[level], addr); 2456 dump_gpuaddr_size(addr, level, 0x10, 2); 2457 2458 do_query("compute", 0); 2459 dump_register_summary(level); 2460} 2461 2462static void 2463cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level) 2464{ 2465 render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf); 2466 2467 if (!strcmp(render_mode, "RM6_BINNING")) { 2468 enable_mask = MODE_BINNING; 2469 } else if (!strcmp(render_mode, "RM6_GMEM")) { 2470 enable_mask = MODE_GMEM; 2471 } else if (!strcmp(render_mode, "RM6_BYPASS")) { 2472 enable_mask = MODE_BYPASS; 2473 } 2474} 2475 2476static void 2477cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level) 2478{ 2479 uint64_t addr; 2480 uint32_t *ptr, len; 2481 2482 assert(is_64b()); 2483 2484 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr).. 2485 * not sure if this can come in different sizes. 2486 * 2487 * First ptr doesn't seem to be cmdstream, second one does. 2488 * 2489 * Comment from downstream kernel: 2490 * 2491 * SRM -- set render mode (ex binning, direct render etc) 2492 * SRM is set by UMD usually at start of IB to tell CP the type of 2493 * preemption. 2494 * KMD needs to set SRM to NULL to indicate CP that rendering is 2495 * done by IB. 2496 * ------------------------------------------------------------------ 2497 * 2498 * Seems to always be one of these two: 2499 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 2500 * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 2501 * 001c2000 00000000 2502 * 2503 */ 2504 2505 assert(options->gpu_id >= 500); 2506 2507 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]); 2508 2509 if (sizedwords == 1) 2510 return; 2511 2512 addr = dwords[1]; 2513 addr |= ((uint64_t)dwords[2]) << 32; 2514 2515 mode = dwords[3]; 2516 2517 dump_gpuaddr(addr, level + 1); 2518 2519 if (sizedwords == 5) 2520 return; 2521 2522 assert(sizedwords == 8); 2523 2524 len = dwords[5]; 2525 addr = dwords[6]; 2526 addr |= ((uint64_t)dwords[7]) << 32; 2527 2528 printl(3, "%saddr: 0x%016lx\n", levels[level], addr); 2529 printl(3, "%slen: 0x%x\n", levels[level], len); 2530 2531 ptr = hostptr(addr); 2532 2533 if (ptr) { 2534 if (!quiet(2)) { 2535 ib++; 2536 dump_commands(ptr, len, level + 1); 2537 ib--; 2538 dump_hex(ptr, len, level + 1); 2539 } 2540 } 2541} 2542 2543static void 2544cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level) 2545{ 2546 uint64_t addr; 2547 uint32_t *ptr, len; 2548 2549 assert(is_64b()); 2550 assert(options->gpu_id >= 500); 2551 2552 assert(sizedwords == 8); 2553 2554 addr = dwords[5]; 2555 addr |= ((uint64_t)dwords[6]) << 32; 2556 len = dwords[7]; 2557 2558 printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr); 2559 printl(3, "%slen: 0x%x\n", levels[level], len); 2560 2561 ptr = hostptr(addr); 2562 2563 if (ptr) { 2564 if (!quiet(2)) { 2565 ib++; 2566 dump_commands(ptr, len, level + 1); 2567 ib--; 2568 dump_hex(ptr, len, level + 1); 2569 } 2570 } 2571} 2572 2573static void 2574cp_blit(uint32_t *dwords, uint32_t sizedwords, int level) 2575{ 2576 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0); 2577 print_mode(level); 2578 dump_register_summary(level); 2579} 2580 2581static void 2582cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level) 2583{ 2584 int i; 2585 2586 /* NOTE: seems to write same reg multiple times.. not sure if different parts 2587 * of these are triggered by the FLUSH_SO_n events?? (if that is what they 2588 * actually are?) 2589 */ 2590 bool saved_summary = summary; 2591 summary = false; 2592 2593 for (i = 0; i < sizedwords; i += 2) { 2594 dump_register(dwords[i + 0], dwords[i + 1], level + 1); 2595 reg_set(dwords[i + 0], dwords[i + 1]); 2596 } 2597 2598 summary = saved_summary; 2599} 2600 2601static void 2602cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level) 2603{ 2604 uint32_t reg = dwords[1] & 0xffff; 2605 2606 dump_register(reg, dwords[2], level + 1); 2607 reg_set(reg, dwords[2]); 2608} 2609 2610static void 2611cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level) 2612{ 2613 uint64_t addr; 2614 uint32_t size = dwords[2] & 0xffff; 2615 void *ptr; 2616 2617 addr = dwords[0] | ((uint64_t)dwords[1] << 32); 2618 2619 if (!quiet(3)) { 2620 printf("%saddr=%" PRIx64 "\n", levels[level], addr); 2621 } 2622 2623 ptr = hostptr(addr); 2624 if (ptr) { 2625 dump_commands(ptr, size, level + 1); 2626 } 2627} 2628 2629static void 2630cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level) 2631{ 2632 skip_ib2_enable_global = dwords[0]; 2633} 2634 2635static void 2636cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level) 2637{ 2638 skip_ib2_enable_local = dwords[0]; 2639} 2640 2641#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ } 2642static const struct type3_op { 2643 const char *name; 2644 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level); 2645 struct { 2646 bool load_all_groups; 2647 } options; 2648} type3_op[] = { 2649 CP(NOP, cp_nop), 2650 CP(INDIRECT_BUFFER, cp_indirect), 2651 CP(INDIRECT_BUFFER_PFD, cp_indirect), 2652 CP(WAIT_FOR_IDLE, cp_wfi), 2653 CP(REG_RMW, cp_rmw), 2654 CP(REG_TO_MEM, cp_reg_mem), 2655 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */ 2656 CP(MEM_WRITE, cp_mem_write), 2657 CP(EVENT_WRITE, cp_event_write), 2658 CP(RUN_OPENCL, cp_run_cl), 2659 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}), 2660 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}), 2661 CP(SET_CONSTANT, cp_set_const), 2662 CP(IM_LOAD_IMMEDIATE, cp_im_loadi), 2663 CP(WIDE_REG_WRITE, cp_wide_reg_write), 2664 2665 /* for a3xx */ 2666 CP(LOAD_STATE, cp_load_state), 2667 CP(SET_BIN, cp_set_bin), 2668 2669 /* for a4xx */ 2670 CP(LOAD_STATE4, cp_load_state), 2671 CP(SET_DRAW_STATE, cp_set_draw_state), 2672 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}), 2673 CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}), 2674 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}), 2675 2676 /* for a5xx */ 2677 CP(SET_RENDER_MODE, cp_set_render_mode), 2678 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint), 2679 CP(BLIT, cp_blit), 2680 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch), 2681 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}), 2682 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}), 2683 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}), 2684 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global), 2685 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local), 2686 2687 /* for a6xx */ 2688 CP(LOAD_STATE6_GEOM, cp_load_state), 2689 CP(LOAD_STATE6_FRAG, cp_load_state), 2690 CP(LOAD_STATE6, cp_load_state), 2691 CP(SET_MODE, cp_set_mode), 2692 CP(SET_MARKER, cp_set_marker), 2693 CP(REG_WRITE, cp_reg_write), 2694 2695 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib), 2696 2697 CP(START_BIN, cp_start_bin), 2698}; 2699 2700static void 2701noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level) 2702{ 2703} 2704 2705static const struct type3_op * 2706get_type3_op(unsigned opc) 2707{ 2708 static const struct type3_op dummy_op = { 2709 .fxn = noop_fxn, 2710 }; 2711 const char *name = pktname(opc); 2712 2713 if (!name) 2714 return &dummy_op; 2715 2716 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++) 2717 if (!strcmp(name, type3_op[i].name)) 2718 return &type3_op[i]; 2719 2720 return &dummy_op; 2721} 2722 2723void 2724dump_commands(uint32_t *dwords, uint32_t sizedwords, int level) 2725{ 2726 int dwords_left = sizedwords; 2727 uint32_t count = 0; /* dword count including packet header */ 2728 uint32_t val; 2729 2730 // assert(dwords); 2731 if (!dwords) { 2732 printf("NULL cmd buffer!\n"); 2733 return; 2734 } 2735 2736 assert(ib < ARRAY_SIZE(draws)); 2737 draws[ib] = 0; 2738 2739 while (dwords_left > 0) { 2740 2741 current_draw_count = draw_count; 2742 2743 /* hack, this looks like a -1 underflow, in some versions 2744 * when it tries to write zero registers via pkt0 2745 */ 2746 // if ((dwords[0] >> 16) == 0xffff) 2747 // goto skip; 2748 2749 if (pkt_is_type0(dwords[0])) { 2750 printl(3, "t0"); 2751 count = type0_pkt_size(dwords[0]) + 1; 2752 val = type0_pkt_offset(dwords[0]); 2753 assert(val < regcnt()); 2754 printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1), 2755 (dwords[0] & 0x8000) ? " (same register)" : "", val); 2756 dump_registers(val, dwords + 1, count - 1, level + 2); 2757 if (!quiet(3)) 2758 dump_hex(dwords, count, level + 1); 2759 } else if (pkt_is_type4(dwords[0])) { 2760 /* basically the same(ish) as type0 prior to a5xx */ 2761 printl(3, "t4"); 2762 count = type4_pkt_size(dwords[0]) + 1; 2763 val = type4_pkt_offset(dwords[0]); 2764 assert(val < regcnt()); 2765 printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1), 2766 val); 2767 dump_registers(val, dwords + 1, count - 1, level + 2); 2768 if (!quiet(3)) 2769 dump_hex(dwords, count, level + 1); 2770#if 0 2771 } else if (pkt_is_type1(dwords[0])) { 2772 printl(3, "t1"); 2773 count = 3; 2774 val = dwords[0] & 0xfff; 2775 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1)); 2776 dump_registers(val, dwords+1, 1, level+2); 2777 val = (dwords[0] >> 12) & 0xfff; 2778 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1)); 2779 dump_registers(val, dwords+2, 1, level+2); 2780 if (!quiet(3)) 2781 dump_hex(dwords, count, level+1); 2782 } else if (pkt_is_type2(dwords[0])) { 2783 printl(3, "t2"); 2784 printf("%sNOP\n", levels[level+1]); 2785 count = 1; 2786 if (!quiet(3)) 2787 dump_hex(dwords, count, level+1); 2788#endif 2789 } else if (pkt_is_type3(dwords[0])) { 2790 count = type3_pkt_size(dwords[0]) + 1; 2791 val = cp_type3_opcode(dwords[0]); 2792 const struct type3_op *op = get_type3_op(val); 2793 if (op->options.load_all_groups) 2794 load_all_groups(level + 1); 2795 printl(3, "t3"); 2796 const char *name = pktname(val); 2797 if (!quiet(2)) { 2798 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level], 2799 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val, 2800 count, (dwords[0] & 0x1) ? " (predicated)" : ""); 2801 } 2802 if (name) 2803 dump_domain(dwords + 1, count - 1, level + 2, name); 2804 op->fxn(dwords + 1, count - 1, level + 1); 2805 if (!quiet(2)) 2806 dump_hex(dwords, count, level + 1); 2807 } else if (pkt_is_type7(dwords[0])) { 2808 count = type7_pkt_size(dwords[0]) + 1; 2809 val = cp_type7_opcode(dwords[0]); 2810 const struct type3_op *op = get_type3_op(val); 2811 if (op->options.load_all_groups) 2812 load_all_groups(level + 1); 2813 printl(3, "t7"); 2814 const char *name = pktname(val); 2815 if (!quiet(2)) { 2816 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level], 2817 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val, 2818 count); 2819 } 2820 if (name) { 2821 /* special hack for two packets that decode the same way 2822 * on a6xx: 2823 */ 2824 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") || 2825 !strcmp(name, "CP_LOAD_STATE6_GEOM")) 2826 name = "CP_LOAD_STATE6"; 2827 dump_domain(dwords + 1, count - 1, level + 2, name); 2828 } 2829 op->fxn(dwords + 1, count - 1, level + 1); 2830 if (!quiet(2)) 2831 dump_hex(dwords, count, level + 1); 2832 } else if (pkt_is_type2(dwords[0])) { 2833 printl(3, "t2"); 2834 printl(3, "%snop\n", levels[level + 1]); 2835 } else { 2836 /* for 5xx+ we can do a passable job of looking for start of next valid 2837 * packet: */ 2838 if (options->gpu_id >= 500) { 2839 while (dwords_left > 0) { 2840 if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0])) 2841 break; 2842 printf("bad type! %08x\n", dwords[0]); 2843 dwords++; 2844 dwords_left--; 2845 } 2846 } else { 2847 printf("bad type! %08x\n", dwords[0]); 2848 return; 2849 } 2850 } 2851 2852 dwords += count; 2853 dwords_left -= count; 2854 } 2855 2856 if (dwords_left < 0) 2857 printf("**** this ain't right!! dwords_left=%d\n", dwords_left); 2858} 2859