1/*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <assert.h>
25#include <ctype.h>
26#include <err.h>
27#include <errno.h>
28#include <fcntl.h>
29#include <inttypes.h>
30#include <signal.h>
31#include <stdarg.h>
32#include <stdbool.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38#include <sys/stat.h>
39#include <sys/types.h>
40#include <sys/wait.h>
41
42#include "freedreno_pm4.h"
43
44#include "buffers.h"
45#include "cffdec.h"
46#include "disasm.h"
47#include "redump.h"
48#include "rnnutil.h"
49#include "script.h"
50
51/* ************************************************************************* */
52/* originally based on kernel recovery dump code: */
53
54static const struct cffdec_options *options;
55
56static bool needs_wfi = false;
57static bool summary = false;
58static bool in_summary = false;
59static int vertices;
60
61static inline unsigned
62regcnt(void)
63{
64   if (options->gpu_id >= 500)
65      return 0xffff;
66   else
67      return 0x7fff;
68}
69
70static int
71is_64b(void)
72{
73   return options->gpu_id >= 500;
74}
75
76static int draws[4];
77static struct {
78   uint64_t base;
79   uint32_t size; /* in dwords */
80   /* Generally cmdstream consists of multiple IB calls to different
81    * buffers, which are themselves often re-used for each tile.  The
82    * triggered flag serves two purposes to help make it more clear
83    * what part of the cmdstream is before vs after the the GPU hang:
84    *
85    * 1) if in IB2 we are passed the point within the IB2 buffer where
86    *    the GPU hung, but IB1 is not passed the point within its
87    *    buffer where the GPU had hung, then we know the GPU hang
88    *    happens on a future use of that IB2 buffer.
89    *
90    * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91    *    hung, but we've already passed the trigger point at the same
92    *    IB level, we know that we are passed the point where the GPU
93    *    had hung.
94    *
95    * So this is a one way switch, false->true.  And a higher #'d
96    * IB level isn't considered triggered unless the lower #'d IB
97    * level is.
98    */
99   bool triggered;
100} ibs[4];
101static int ib;
102
103static int draw_count;
104static int current_draw_count;
105
106/* query mode.. to handle symbolic register name queries, we need to
107 * defer parsing query string until after gpu_id is know and rnn db
108 * loaded:
109 */
110static int *queryvals;
111
112static bool
113quiet(int lvl)
114{
115   if ((options->draw_filter != -1) &&
116       (options->draw_filter != current_draw_count))
117      return true;
118   if ((lvl >= 3) && (summary || options->querystrs || options->script))
119      return true;
120   if ((lvl >= 2) && (options->querystrs || options->script))
121      return true;
122   return false;
123}
124
125void
126printl(int lvl, const char *fmt, ...)
127{
128   va_list args;
129   if (quiet(lvl))
130      return;
131   va_start(args, fmt);
132   vprintf(fmt, args);
133   va_end(args);
134}
135
136static const char *levels[] = {
137   "\t",
138   "\t\t",
139   "\t\t\t",
140   "\t\t\t\t",
141   "\t\t\t\t\t",
142   "\t\t\t\t\t\t",
143   "\t\t\t\t\t\t\t",
144   "\t\t\t\t\t\t\t\t",
145   "\t\t\t\t\t\t\t\t\t",
146   "x",
147   "x",
148   "x",
149   "x",
150   "x",
151   "x",
152};
153
154enum state_src_t {
155   STATE_SRC_DIRECT,
156   STATE_SRC_INDIRECT,
157   STATE_SRC_BINDLESS,
158};
159
160/* SDS (CP_SET_DRAW_STATE) helpers: */
161static void load_all_groups(int level);
162static void disable_all_groups(void);
163
164static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
165                          int level);
166static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
167
168static bool
169highlight_gpuaddr(uint64_t gpuaddr)
170{
171   if (!options->ibs[ib].base)
172      return false;
173
174   if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)
175      return false;
176
177   if (ibs[ib].triggered)
178      return options->color;
179
180   if (options->ibs[ib].base != ibs[ib].base)
181      return false;
182
183   uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
184   uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
185
186   bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
187
188   ibs[ib].triggered |= triggered;
189
190   if (triggered)
191      printf("ESTIMATED CRASH LOCATION!\n");
192
193   return triggered & options->color;
194}
195
196static void
197dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
198{
199   int i, j;
200   int lastzero = 1;
201
202   if (quiet(2))
203      return;
204
205   for (i = 0; i < sizedwords; i += 8) {
206      int zero = 1;
207
208      /* always show first row: */
209      if (i == 0)
210         zero = 0;
211
212      for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
213         if (dwords[i + j])
214            zero = 0;
215
216      if (zero && !lastzero)
217         printf("*\n");
218
219      lastzero = zero;
220
221      if (zero)
222         continue;
223
224      uint64_t addr = gpuaddr(&dwords[i]);
225      bool highlight = highlight_gpuaddr(addr);
226
227      if (highlight)
228         printf("\x1b[0;1;31m");
229
230      if (is_64b()) {
231         printf("%016" PRIx64 ":%s", addr, levels[level]);
232      } else {
233         printf("%08x:%s", (uint32_t)addr, levels[level]);
234      }
235
236      if (highlight)
237         printf("\x1b[0m");
238
239      printf("%04x:", i * 4);
240
241      for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
242         printf(" %08x", dwords[i + j]);
243      }
244
245      printf("\n");
246   }
247}
248
249static void
250dump_float(float *dwords, uint32_t sizedwords, int level)
251{
252   int i;
253   for (i = 0; i < sizedwords; i++) {
254      if ((i % 8) == 0) {
255         if (is_64b()) {
256            printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
257         } else {
258            printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
259         }
260      } else {
261         printf(" ");
262      }
263      printf("%8f", *(dwords++));
264      if ((i % 8) == 7)
265         printf("\n");
266   }
267   if (i % 8)
268      printf("\n");
269}
270
271/* I believe the surface format is low bits:
272#define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
273comments in sys2gmem_tex_const indicate that address is [31:12], but
274looks like at least some of the bits above the format have different meaning..
275*/
276static void
277parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
278                 uint32_t mask)
279{
280   assert(!is_64b()); /* this is only used on a2xx */
281   *gpuaddr = dword & ~mask;
282   *flags = dword & mask;
283}
284
285static uint32_t type0_reg_vals[0xffff + 1];
286static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
287                                   8]; /* written since last draw */
288static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
289static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
290
291static bool
292reg_rewritten(uint32_t regbase)
293{
294   return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
295}
296
297bool
298reg_written(uint32_t regbase)
299{
300   return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
301}
302
303static void
304clear_rewritten(void)
305{
306   memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
307}
308
309static void
310clear_written(void)
311{
312   memset(type0_reg_written, 0, sizeof(type0_reg_written));
313   clear_rewritten();
314}
315
316uint32_t
317reg_lastval(uint32_t regbase)
318{
319   return lastvals[regbase];
320}
321
322static void
323clear_lastvals(void)
324{
325   memset(lastvals, 0, sizeof(lastvals));
326}
327
328uint32_t
329reg_val(uint32_t regbase)
330{
331   return type0_reg_vals[regbase];
332}
333
334void
335reg_set(uint32_t regbase, uint32_t val)
336{
337   assert(regbase < regcnt());
338   type0_reg_vals[regbase] = val;
339   type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
340   type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
341}
342
343static void
344reg_dump_scratch(const char *name, uint32_t dword, int level)
345{
346   unsigned r;
347
348   if (quiet(3))
349      return;
350
351   r = regbase("CP_SCRATCH[0].REG");
352
353   // if not, try old a2xx/a3xx version:
354   if (!r)
355      r = regbase("CP_SCRATCH_REG0");
356
357   if (!r)
358      return;
359
360   printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
361          reg_val(r + 6), reg_val(r + 7));
362}
363
364static void
365dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
366{
367   void *buf;
368
369   if (quiet(quietlvl))
370      return;
371
372   buf = hostptr(gpuaddr);
373   if (buf) {
374      dump_hex(buf, sizedwords, level + 1);
375   }
376}
377
378static void
379dump_gpuaddr(uint64_t gpuaddr, int level)
380{
381   dump_gpuaddr_size(gpuaddr, level, 64, 3);
382}
383
384static void
385reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
386{
387   dump_gpuaddr(dword, level);
388}
389
390uint32_t gpuaddr_lo;
391static void
392reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
393{
394   gpuaddr_lo = dword;
395}
396
397static void
398reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
399{
400   dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
401}
402
403static void
404reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
405{
406   dump_gpuaddr(qword, level);
407}
408
409static void
410dump_shader(const char *ext, void *buf, int bufsz)
411{
412   if (options->dump_shaders) {
413      static int n = 0;
414      char filename[16];
415      int fd;
416      sprintf(filename, "%04d.%s", n++, ext);
417      fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
418      if (fd != -1) {
419         write(fd, buf, bufsz);
420         close(fd);
421      }
422   }
423}
424
425static void
426disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
427{
428   void *buf;
429
430   gpuaddr &= 0xfffffffffffffff0;
431
432   if (quiet(3))
433      return;
434
435   buf = hostptr(gpuaddr);
436   if (buf) {
437      uint32_t sizedwords = hostlen(gpuaddr) / 4;
438      const char *ext;
439
440      dump_hex(buf, min(64, sizedwords), level + 1);
441      try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);
442
443      /* this is a bit ugly way, but oh well.. */
444      if (strstr(name, "SP_VS_OBJ")) {
445         ext = "vo3";
446      } else if (strstr(name, "SP_FS_OBJ")) {
447         ext = "fo3";
448      } else if (strstr(name, "SP_GS_OBJ")) {
449         ext = "go3";
450      } else if (strstr(name, "SP_CS_OBJ")) {
451         ext = "co3";
452      } else {
453         ext = NULL;
454      }
455
456      if (ext)
457         dump_shader(ext, buf, sizedwords * 4);
458   }
459}
460
461static void
462reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
463{
464   disasm_gpuaddr(name, dword, level);
465}
466
467static void
468reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
469{
470   disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
471}
472
473static void
474reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
475{
476   disasm_gpuaddr(name, qword, level);
477}
478
479/* Find the value of the TEX_COUNT register that corresponds to the named
480 * TEX_SAMP/TEX_CONST reg.
481 *
482 * Note, this kinda assumes an equal # of samplers and textures, but not
483 * really sure if there is a much better option.  I suppose on a6xx we
484 * could instead decode the bitfields in SP_xS_CONFIG
485 */
486static int
487get_tex_count(const char *name)
488{
489   char count_reg[strlen(name) + 5];
490   char *p;
491
492   p = strstr(name, "CONST");
493   if (!p)
494      p = strstr(name, "SAMP");
495   if (!p)
496      return 0;
497
498   int n = p - name;
499   strncpy(count_reg, name, n);
500   strcpy(count_reg + n, "COUNT");
501
502   return reg_val(regbase(count_reg));
503}
504
505static void
506reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
507{
508   if (!in_summary)
509      return;
510
511   int num_unit = get_tex_count(name);
512   uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
513   void *buf = hostptr(gpuaddr);
514
515   if (!buf)
516      return;
517
518   dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
519}
520
521static void
522reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
523{
524   if (!in_summary)
525      return;
526
527   int num_unit = get_tex_count(name);
528   uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
529   void *buf = hostptr(gpuaddr);
530
531   if (!buf)
532      return;
533
534   dump_tex_const(buf, num_unit, level + 1);
535}
536
537/*
538 * Registers with special handling (rnndec_decode() handles rest):
539 */
540#define REG(x, fxn)    { #x, fxn }
541#define REG64(x, fxn)  { #x, .fxn64 = fxn, .is_reg64 = true }
542static struct {
543   const char *regname;
544   void (*fxn)(const char *name, uint32_t dword, int level);
545   void (*fxn64)(const char *name, uint64_t qword, int level);
546   uint32_t regbase;
547   bool is_reg64;
548} reg_a2xx[] = {
549      REG(CP_SCRATCH_REG0, reg_dump_scratch),
550      REG(CP_SCRATCH_REG1, reg_dump_scratch),
551      REG(CP_SCRATCH_REG2, reg_dump_scratch),
552      REG(CP_SCRATCH_REG3, reg_dump_scratch),
553      REG(CP_SCRATCH_REG4, reg_dump_scratch),
554      REG(CP_SCRATCH_REG5, reg_dump_scratch),
555      REG(CP_SCRATCH_REG6, reg_dump_scratch),
556      REG(CP_SCRATCH_REG7, reg_dump_scratch),
557      {NULL},
558}, reg_a3xx[] = {
559      REG(CP_SCRATCH_REG0, reg_dump_scratch),
560      REG(CP_SCRATCH_REG1, reg_dump_scratch),
561      REG(CP_SCRATCH_REG2, reg_dump_scratch),
562      REG(CP_SCRATCH_REG3, reg_dump_scratch),
563      REG(CP_SCRATCH_REG4, reg_dump_scratch),
564      REG(CP_SCRATCH_REG5, reg_dump_scratch),
565      REG(CP_SCRATCH_REG6, reg_dump_scratch),
566      REG(CP_SCRATCH_REG7, reg_dump_scratch),
567      REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
568      REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
569      REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
570      REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
571      REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
572      REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
573      {NULL},
574}, reg_a4xx[] = {
575      REG(CP_SCRATCH[0].REG, reg_dump_scratch),
576      REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
577      REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
578      REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
579      REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580      REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581      REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582      REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583      REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
584      REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
585      REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
586      REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
587      REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
588      REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
589      REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
590      REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
591      REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
592      REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
593      REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
594      REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
595      REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
596      REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
597      REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
598      REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
599      REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
600      {NULL},
601}, reg_a5xx[] = {
602      REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
603      REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
604      REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
605      REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
606      REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
607      REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
608      REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
609      REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
610      REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
611      REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
612      REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
613      REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
614      REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
615      REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
616      REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
617      REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
618      REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
619      REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
620      REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
621      REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
622      REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
623      REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
624      REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
625      REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
626      REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
627      REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
628      REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
629      REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
630      REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
631      REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
632      REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
633      REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
634      REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
635      REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
636      REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
637      REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
638      REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
639      REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
640      REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
641      REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
642      REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
643      REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
644//      REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
645//      REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
646//      REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
647//      REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
648//      REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
649//      REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
650//      REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
651//      REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
652//      REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
653//      REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
654//      REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
655//      REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
656//      REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
657//      REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
658//      REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
659//      REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
660//      REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
661//      REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
662//      REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
663//      REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
664//      REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
665//      REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
666//      REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
667//      REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
668//      REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
669//      REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
670
671//      REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
672//      REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
673//      REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
674//      REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
675//      REG(RB_2D_DST_LO, reg_gpuaddr_lo),
676//      REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
677//      REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
678//      REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
679
680      {NULL},
681}, reg_a6xx[] = {
682      REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
683      REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
684      REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
685      REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
686
687      REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
688      REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
689      REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
690      REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
691      REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
692      REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
693
694      REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
695      REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
696      REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
697      REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
698      REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
699      REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
700      REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
701      REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
702      REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
703      REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
704      REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
705      REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
706
707      {NULL},
708}, *type0_reg;
709
710static struct rnn *rnn;
711
712static void
713init_rnn(const char *gpuname)
714{
715   rnn = rnn_new(!options->color);
716
717   rnn_load(rnn, gpuname);
718
719   if (options->querystrs) {
720      int i;
721      queryvals = calloc(options->nquery, sizeof(queryvals[0]));
722
723      for (i = 0; i < options->nquery; i++) {
724         int val = strtol(options->querystrs[i], NULL, 0);
725
726         if (val == 0)
727            val = regbase(options->querystrs[i]);
728
729         queryvals[i] = val;
730         printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
731      }
732   }
733
734   for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
735      type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
736      if (!type0_reg[idx].regbase) {
737         printf("invalid register name: %s\n", type0_reg[idx].regname);
738         exit(1);
739      }
740   }
741}
742
743void
744reset_regs(void)
745{
746   clear_written();
747   clear_lastvals();
748   memset(&ibs, 0, sizeof(ibs));
749}
750
751void
752cffdec_init(const struct cffdec_options *_options)
753{
754   options = _options;
755   summary = options->summary;
756
757   /* in case we're decoding multiple files: */
758   free(queryvals);
759   reset_regs();
760   draw_count = 0;
761
762   /* TODO we need an API to free/cleanup any previous rnn */
763
764   switch (options->gpu_id) {
765   case 200 ... 299:
766      type0_reg = reg_a2xx;
767      init_rnn("a2xx");
768      break;
769   case 300 ... 399:
770      type0_reg = reg_a3xx;
771      init_rnn("a3xx");
772      break;
773   case 400 ... 499:
774      type0_reg = reg_a4xx;
775      init_rnn("a4xx");
776      break;
777   case 500 ... 599:
778      type0_reg = reg_a5xx;
779      init_rnn("a5xx");
780      break;
781   case 600 ... 699:
782      type0_reg = reg_a6xx;
783      init_rnn("a6xx");
784      break;
785   default:
786      errx(-1, "unsupported gpu");
787   }
788}
789
790const char *
791pktname(unsigned opc)
792{
793   return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
794}
795
796const char *
797regname(uint32_t regbase, int color)
798{
799   return rnn_regname(rnn, regbase, color);
800}
801
802uint32_t
803regbase(const char *name)
804{
805   return rnn_regbase(rnn, name);
806}
807
808static int
809endswith(uint32_t regbase, const char *suffix)
810{
811   const char *name = regname(regbase, 0);
812   const char *s = strstr(name, suffix);
813   if (!s)
814      return 0;
815   return (s - strlen(name) + strlen(suffix)) == name;
816}
817
818void
819dump_register_val(uint32_t regbase, uint32_t dword, int level)
820{
821   struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
822
823   if (info && info->typeinfo) {
824      uint64_t gpuaddr = 0;
825      char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
826      printf("%s%s: %s", levels[level], info->name, decoded);
827
828      /* Try and figure out if we are looking at a gpuaddr.. this
829       * might be useful for other gen's too, but at least a5xx has
830       * the _HI/_LO suffix we can look for.  Maybe a better approach
831       * would be some special annotation in the xml..
832       * for a6xx use "address" and "waddress" types
833       */
834      if (options->gpu_id >= 600) {
835         if (!strcmp(info->typeinfo->name, "address") ||
836             !strcmp(info->typeinfo->name, "waddress")) {
837            gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
838         }
839      } else if (options->gpu_id >= 500) {
840         if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {
841            gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);
842         } else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {
843            gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
844         }
845      }
846
847      if (gpuaddr && hostptr(gpuaddr)) {
848         printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
849                gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
850                hostlen(gpubaseaddr(gpuaddr)));
851      }
852
853      printf("\n");
854
855      free(decoded);
856   } else if (info) {
857      printf("%s%s: %08x\n", levels[level], info->name, dword);
858   } else {
859      printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
860   }
861
862   if (info) {
863      free(info->name);
864      free(info);
865   }
866}
867
868static void
869dump_register(uint32_t regbase, uint32_t dword, int level)
870{
871   if (!quiet(3)) {
872      dump_register_val(regbase, dword, level);
873   }
874
875   for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
876      if (type0_reg[idx].regbase == regbase) {
877         if (type0_reg[idx].is_reg64) {
878            uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
879            type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);
880         } else {
881            type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
882         }
883         break;
884      }
885   }
886}
887
888static bool
889is_banked_reg(uint32_t regbase)
890{
891   return (0x2000 <= regbase) && (regbase < 0x2400);
892}
893
894static void
895dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
896               int level)
897{
898   while (sizedwords--) {
899      int last_summary = summary;
900
901      /* access to non-banked registers needs a WFI:
902       * TODO banked register range for a2xx??
903       */
904      if (needs_wfi && !is_banked_reg(regbase))
905         printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
906
907      reg_set(regbase, *dwords);
908      dump_register(regbase, *dwords, level);
909      regbase++;
910      dwords++;
911      summary = last_summary;
912   }
913}
914
915static void
916dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
917{
918   struct rnndomain *dom;
919   int i;
920
921   dom = rnn_finddomain(rnn->db, name);
922
923   if (!dom)
924      return;
925
926   if (script_packet)
927      script_packet(dwords, sizedwords, rnn, dom);
928
929   if (quiet(2))
930      return;
931
932   for (i = 0; i < sizedwords; i++) {
933      struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
934      char *decoded;
935      if (!(info && info->typeinfo))
936         break;
937      uint64_t value = dwords[i];
938      if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
939         value |= (uint64_t)dwords[i + 1] << 32;
940         i++; /* skip the next dword since we're printing it now */
941      }
942      decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
943      /* Unlike the register printing path, we don't print the name
944       * of the register, so if it doesn't contain other named
945       * things (i.e. it isn't a bitset) then print the register
946       * name as if it's a bitset with a single entry. This avoids
947       * having to create a dummy register with a single entry to
948       * get a name in the decoding.
949       */
950      if (info->typeinfo->type == RNN_TTYPE_BITSET ||
951          info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
952         printf("%s%s\n", levels[level], decoded);
953      } else {
954         printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
955                info->name, rnn->vc->colors->reset, decoded);
956      }
957      free(decoded);
958      free(info->name);
959      free(info);
960   }
961}
962
963static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
964static unsigned mode;
965static const char *render_mode;
966static enum {
967   MODE_BINNING = 0x1,
968   MODE_GMEM = 0x2,
969   MODE_BYPASS = 0x4,
970   MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
971} enable_mask = MODE_ALL;
972static bool skip_ib2_enable_global;
973static bool skip_ib2_enable_local;
974
975static void
976print_mode(int level)
977{
978   if ((options->gpu_id >= 500) && !quiet(2)) {
979      printf("%smode: %s\n", levels[level], render_mode);
980      printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
981             skip_ib2_enable_local);
982   }
983}
984
985static bool
986skip_query(void)
987{
988   switch (options->query_mode) {
989   case QUERY_ALL:
990      /* never skip: */
991      return false;
992   case QUERY_WRITTEN:
993      for (int i = 0; i < options->nquery; i++) {
994         uint32_t regbase = queryvals[i];
995         if (!reg_written(regbase)) {
996            continue;
997         }
998         if (reg_rewritten(regbase)) {
999            return false;
1000         }
1001      }
1002      return true;
1003   case QUERY_DELTA:
1004      for (int i = 0; i < options->nquery; i++) {
1005         uint32_t regbase = queryvals[i];
1006         if (!reg_written(regbase)) {
1007            continue;
1008         }
1009         uint32_t lastval = reg_val(regbase);
1010         if (lastval != lastvals[regbase]) {
1011            return false;
1012         }
1013      }
1014      return true;
1015   }
1016   return true;
1017}
1018
1019static void
1020__do_query(const char *primtype, uint32_t num_indices)
1021{
1022   int n = 0;
1023
1024   if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1025      uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1026      uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1027
1028      bin_x1 = scissor_tl & 0xffff;
1029      bin_y1 = scissor_tl >> 16;
1030      bin_x2 = scissor_br & 0xffff;
1031      bin_y2 = scissor_br >> 16;
1032   }
1033
1034   for (int i = 0; i < options->nquery; i++) {
1035      uint32_t regbase = queryvals[i];
1036      if (reg_written(regbase)) {
1037         uint32_t lastval = reg_val(regbase);
1038         printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1039                bin_y1, bin_x2, bin_y2, num_indices);
1040         if (options->gpu_id >= 500)
1041            printf("%s:", render_mode);
1042         printf("\t%08x", lastval);
1043         if (lastval != lastvals[regbase]) {
1044            printf("!");
1045         } else {
1046            printf(" ");
1047         }
1048         if (reg_rewritten(regbase)) {
1049            printf("+");
1050         } else {
1051            printf(" ");
1052         }
1053         dump_register_val(regbase, lastval, 0);
1054         n++;
1055      }
1056   }
1057
1058   if (n > 1)
1059      printf("\n");
1060}
1061
1062static void
1063do_query_compare(const char *primtype, uint32_t num_indices)
1064{
1065   unsigned saved_enable_mask = enable_mask;
1066   const char *saved_render_mode = render_mode;
1067
1068   /* in 'query-compare' mode, we want to see if the register is writtten
1069    * or changed in any mode:
1070    *
1071    * (NOTE: this could cause false-positive for 'query-delta' if the reg
1072    * is written with different values in binning vs sysmem/gmem mode, as
1073    * we don't track previous values per-mode, but I think we can live with
1074    * that)
1075    */
1076   enable_mask = MODE_ALL;
1077
1078   clear_rewritten();
1079   load_all_groups(0);
1080
1081   if (!skip_query()) {
1082      /* dump binning pass values: */
1083      enable_mask = MODE_BINNING;
1084      render_mode = "BINNING";
1085      clear_rewritten();
1086      load_all_groups(0);
1087      __do_query(primtype, num_indices);
1088
1089      /* dump draw pass values: */
1090      enable_mask = MODE_GMEM | MODE_BYPASS;
1091      render_mode = "DRAW";
1092      clear_rewritten();
1093      load_all_groups(0);
1094      __do_query(primtype, num_indices);
1095
1096      printf("\n");
1097   }
1098
1099   enable_mask = saved_enable_mask;
1100   render_mode = saved_render_mode;
1101
1102   disable_all_groups();
1103}
1104
1105/* well, actually query and script..
1106 * NOTE: call this before dump_register_summary()
1107 */
1108static void
1109do_query(const char *primtype, uint32_t num_indices)
1110{
1111   if (script_draw)
1112      script_draw(primtype, num_indices);
1113
1114   if (options->query_compare) {
1115      do_query_compare(primtype, num_indices);
1116      return;
1117   }
1118
1119   if (skip_query())
1120      return;
1121
1122   __do_query(primtype, num_indices);
1123}
1124
1125static void
1126cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1127{
1128   uint32_t start = dwords[1] >> 16;
1129   uint32_t size = dwords[1] & 0xffff;
1130   const char *type = NULL, *ext = NULL;
1131   gl_shader_stage disasm_type;
1132
1133   switch (dwords[0]) {
1134   case 0:
1135      type = "vertex";
1136      ext = "vo";
1137      disasm_type = MESA_SHADER_VERTEX;
1138      break;
1139   case 1:
1140      type = "fragment";
1141      ext = "fo";
1142      disasm_type = MESA_SHADER_FRAGMENT;
1143      break;
1144   default:
1145      type = "<unknown>";
1146      disasm_type = 0;
1147      break;
1148   }
1149
1150   printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1151          size);
1152   disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1153
1154   /* dump raw shader: */
1155   if (ext)
1156      dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1157}
1158
1159static void
1160cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1161{
1162   uint32_t reg = dwords[0] & 0xffff;
1163   int i;
1164   for (i = 1; i < sizedwords; i++) {
1165      dump_register(reg, dwords[i], level + 1);
1166      reg_set(reg, dwords[i]);
1167      reg++;
1168   }
1169}
1170
1171enum state_t {
1172   TEX_SAMP = 1,
1173   TEX_CONST,
1174   TEX_MIPADDR, /* a3xx only */
1175   SHADER_PROG,
1176   SHADER_CONST,
1177
1178   // image/ssbo state:
1179   SSBO_0,
1180   SSBO_1,
1181   SSBO_2,
1182
1183   UBO,
1184
1185   // unknown things, just to hexdumps:
1186   UNKNOWN_DWORDS,
1187   UNKNOWN_2DWORDS,
1188   UNKNOWN_4DWORDS,
1189};
1190
1191enum adreno_state_block {
1192   SB_VERT_TEX = 0,
1193   SB_VERT_MIPADDR = 1,
1194   SB_FRAG_TEX = 2,
1195   SB_FRAG_MIPADDR = 3,
1196   SB_VERT_SHADER = 4,
1197   SB_GEOM_SHADER = 5,
1198   SB_FRAG_SHADER = 6,
1199   SB_COMPUTE_SHADER = 7,
1200};
1201
1202/* TODO there is probably a clever way to let rnndec parse things so
1203 * we don't have to care about packet format differences across gens
1204 */
1205
1206static void
1207a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1208                    enum state_t *state, enum state_src_t *src)
1209{
1210   unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1211   unsigned state_type = dwords[1] & 0x3;
1212   static const struct {
1213      gl_shader_stage stage;
1214      enum state_t state;
1215   } lookup[0xf][0x3] = {
1216      [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1217      [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1218      [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1219      [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1220      [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1221      [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1222      [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1223      [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1224   };
1225
1226   *stage = lookup[state_block_id][state_type].stage;
1227   *state = lookup[state_block_id][state_type].state;
1228   unsigned state_src = (dwords[0] >> 16) & 0x7;
1229   if (state_src == 0 /* SS_DIRECT */)
1230      *src = STATE_SRC_DIRECT;
1231   else
1232      *src = STATE_SRC_INDIRECT;
1233}
1234
1235static enum state_src_t
1236_get_state_src(unsigned dword0)
1237{
1238   switch ((dword0 >> 16) & 0x3) {
1239   case 0: /* SS4_DIRECT / SS6_DIRECT */
1240      return STATE_SRC_DIRECT;
1241   case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1242      return STATE_SRC_INDIRECT;
1243   case 1: /* SS6_BINDLESS */
1244      return STATE_SRC_BINDLESS;
1245   default:
1246      return STATE_SRC_DIRECT;
1247   }
1248}
1249
1250static void
1251_get_state_type(unsigned state_block_id, unsigned state_type,
1252                gl_shader_stage *stage, enum state_t *state)
1253{
1254   static const struct {
1255      gl_shader_stage stage;
1256      enum state_t state;
1257   } lookup[0x10][0x4] = {
1258      // SB4_VS_TEX:
1259      [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1260      [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1261      [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1262      // SB4_HS_TEX:
1263      [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1264      [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1265      [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1266      // SB4_DS_TEX:
1267      [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1268      [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1269      [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1270      // SB4_GS_TEX:
1271      [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1272      [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1273      [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1274      // SB4_FS_TEX:
1275      [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1276      [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1277      [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1278      // SB4_CS_TEX:
1279      [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1280      [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1281      [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1282      // SB4_VS_SHADER:
1283      [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1284      [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1285      [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1286      // SB4_HS_SHADER
1287      [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1288      [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1289      [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1290      // SB4_DS_SHADER
1291      [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1292      [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1293      [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1294      // SB4_GS_SHADER
1295      [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1296      [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1297      [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1298      // SB4_FS_SHADER:
1299      [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1300      [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1301      [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1302      // SB4_CS_SHADER:
1303      [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1304      [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1305      [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1306      [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1307      // SB4_SSBO (shared across all stages)
1308      [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1309      [0xe][1] = {0, SSBO_1},
1310      [0xe][2] = {0, SSBO_2},
1311      // SB4_CS_SSBO
1312      [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1313      [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1314      [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1315      // unknown things
1316      /* This looks like combined UBO state for 3d stages (a5xx and
1317       * before??  I think a6xx has UBO state per shader stage:
1318       */
1319      [0x6][2] = {0, UBO},
1320      [0x7][1] = {0, UNKNOWN_2DWORDS},
1321   };
1322
1323   *stage = lookup[state_block_id][state_type].stage;
1324   *state = lookup[state_block_id][state_type].state;
1325}
1326
1327static void
1328a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1329                    enum state_t *state, enum state_src_t *src)
1330{
1331   unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1332   unsigned state_type = dwords[1] & 0x3;
1333   _get_state_type(state_block_id, state_type, stage, state);
1334   *src = _get_state_src(dwords[0]);
1335}
1336
1337static void
1338a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1339                    enum state_t *state, enum state_src_t *src)
1340{
1341   unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1342   unsigned state_type = (dwords[0] >> 14) & 0x3;
1343   _get_state_type(state_block_id, state_type, stage, state);
1344   *src = _get_state_src(dwords[0]);
1345}
1346
1347static void
1348dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1349{
1350   for (int i = 0; i < num_unit; i++) {
1351      /* work-around to reduce noise for opencl blob which always
1352       * writes the max # regardless of # of textures used
1353       */
1354      if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1355         break;
1356
1357      if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1358         dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1359         dump_hex(texsamp, 2, level + 1);
1360         texsamp += 2;
1361      } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1362         dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1363         dump_hex(texsamp, 2, level + 1);
1364         texsamp += 2;
1365      } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1366         dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1367         dump_hex(texsamp, 4, level + 1);
1368         texsamp += 4;
1369      } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1370         dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1371         dump_hex(texsamp, 4, level + 1);
1372         texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1373      }
1374   }
1375}
1376
1377static void
1378dump_tex_const(uint32_t *texconst, int num_unit, int level)
1379{
1380   for (int i = 0; i < num_unit; i++) {
1381      /* work-around to reduce noise for opencl blob which always
1382       * writes the max # regardless of # of textures used
1383       */
1384      if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1385          (texconst[2] == 0) && (texconst[3] == 0))
1386         break;
1387
1388      if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1389         dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1390         dump_hex(texconst, 4, level + 1);
1391         texconst += 4;
1392      } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1393         dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1394         if (options->dump_textures) {
1395            uint32_t addr = texconst[4] & ~0x1f;
1396            dump_gpuaddr(addr, level - 2);
1397         }
1398         dump_hex(texconst, 8, level + 1);
1399         texconst += 8;
1400      } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1401         dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1402         if (options->dump_textures) {
1403            uint64_t addr =
1404               (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1405            dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1406         }
1407         dump_hex(texconst, 12, level + 1);
1408         texconst += 12;
1409      } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1410         dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1411         if (options->dump_textures) {
1412            uint64_t addr =
1413               (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1414            dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1415         }
1416         dump_hex(texconst, 16, level + 1);
1417         texconst += 16;
1418      }
1419   }
1420}
1421
1422static void
1423cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1424{
1425   gl_shader_stage stage;
1426   enum state_t state;
1427   enum state_src_t src;
1428   uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1429   uint64_t ext_src_addr;
1430   void *contents;
1431   int i;
1432
1433   if (quiet(2) && !options->script)
1434      return;
1435
1436   if (options->gpu_id >= 600)
1437      a6xx_get_state_type(dwords, &stage, &state, &src);
1438   else if (options->gpu_id >= 400)
1439      a4xx_get_state_type(dwords, &stage, &state, &src);
1440   else
1441      a3xx_get_state_type(dwords, &stage, &state, &src);
1442
1443   switch (src) {
1444   case STATE_SRC_DIRECT:
1445      ext_src_addr = 0;
1446      break;
1447   case STATE_SRC_INDIRECT:
1448      if (is_64b()) {
1449         ext_src_addr = dwords[1] & 0xfffffffc;
1450         ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1451      } else {
1452         ext_src_addr = dwords[1] & 0xfffffffc;
1453      }
1454
1455      break;
1456   case STATE_SRC_BINDLESS: {
1457      const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1458                                   ? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")
1459                                   : regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1460
1461      if (is_64b()) {
1462         const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1463         ext_src_addr = reg_val(reg) & 0xfffffffc;
1464         ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1465      } else {
1466         const unsigned reg = base_reg + (dwords[1] >> 28);
1467         ext_src_addr = reg_val(reg) & 0xfffffffc;
1468      }
1469
1470      ext_src_addr += 4 * (dwords[1] & 0xffffff);
1471      break;
1472   }
1473   }
1474
1475   if (ext_src_addr)
1476      contents = hostptr(ext_src_addr);
1477   else
1478      contents = is_64b() ? dwords + 3 : dwords + 2;
1479
1480   if (!contents)
1481      return;
1482
1483   switch (state) {
1484   case SHADER_PROG: {
1485      const char *ext = NULL;
1486
1487      if (quiet(2))
1488         return;
1489
1490      if (options->gpu_id >= 400)
1491         num_unit *= 16;
1492      else if (options->gpu_id >= 300)
1493         num_unit *= 4;
1494
1495      /* shaders:
1496       *
1497       * note: num_unit seems to be # of instruction groups, where
1498       * an instruction group has 4 64bit instructions.
1499       */
1500      if (stage == MESA_SHADER_VERTEX) {
1501         ext = "vo3";
1502      } else if (stage == MESA_SHADER_GEOMETRY) {
1503         ext = "go3";
1504      } else if (stage == MESA_SHADER_COMPUTE) {
1505         ext = "co3";
1506      } else if (stage == MESA_SHADER_FRAGMENT) {
1507         ext = "fo3";
1508      }
1509
1510      if (contents)
1511         try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1512                         options->gpu_id);
1513
1514      /* dump raw shader: */
1515      if (ext)
1516         dump_shader(ext, contents, num_unit * 2 * 4);
1517
1518      break;
1519   }
1520   case SHADER_CONST: {
1521      if (quiet(2))
1522         return;
1523
1524      /* uniforms/consts:
1525       *
1526       * note: num_unit seems to be # of pairs of dwords??
1527       */
1528
1529      if (options->gpu_id >= 400)
1530         num_unit *= 2;
1531
1532      dump_float(contents, num_unit * 2, level + 1);
1533      dump_hex(contents, num_unit * 2, level + 1);
1534
1535      break;
1536   }
1537   case TEX_MIPADDR: {
1538      uint32_t *addrs = contents;
1539
1540      if (quiet(2))
1541         return;
1542
1543      /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1544      for (i = 0; i < num_unit; i++) {
1545         void *ptr = hostptr(addrs[i]);
1546         printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1547         if (options->dump_textures) {
1548            printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1549            dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1550         }
1551      }
1552      break;
1553   }
1554   case TEX_SAMP: {
1555      dump_tex_samp(contents, src, num_unit, level);
1556      break;
1557   }
1558   case TEX_CONST: {
1559      dump_tex_const(contents, num_unit, level);
1560      break;
1561   }
1562   case SSBO_0: {
1563      uint32_t *ssboconst = (uint32_t *)contents;
1564
1565      for (i = 0; i < num_unit; i++) {
1566         int sz = 4;
1567         if (400 <= options->gpu_id && options->gpu_id < 500) {
1568            dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1569         } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1570            dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1571         } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1572            sz = 16;
1573            dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1574         }
1575         dump_hex(ssboconst, sz, level + 1);
1576         ssboconst += sz;
1577      }
1578      break;
1579   }
1580   case SSBO_1: {
1581      uint32_t *ssboconst = (uint32_t *)contents;
1582
1583      for (i = 0; i < num_unit; i++) {
1584         if (400 <= options->gpu_id && options->gpu_id < 500)
1585            dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1586         else if (500 <= options->gpu_id && options->gpu_id < 600)
1587            dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1588         dump_hex(ssboconst, 2, level + 1);
1589         ssboconst += 2;
1590      }
1591      break;
1592   }
1593   case SSBO_2: {
1594      uint32_t *ssboconst = (uint32_t *)contents;
1595
1596      for (i = 0; i < num_unit; i++) {
1597         /* TODO a4xx and a5xx might be same: */
1598         if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1599            dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1600            dump_hex(ssboconst, 2, level + 1);
1601         }
1602         if (options->dump_textures) {
1603            uint64_t addr =
1604               (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1605            dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1606         }
1607         ssboconst += 2;
1608      }
1609      break;
1610   }
1611   case UBO: {
1612      uint32_t *uboconst = (uint32_t *)contents;
1613
1614      for (i = 0; i < num_unit; i++) {
1615         // TODO probably similar on a4xx..
1616         if (500 <= options->gpu_id && options->gpu_id < 600)
1617            dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1618         else if (600 <= options->gpu_id && options->gpu_id < 700)
1619            dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1620         dump_hex(uboconst, 2, level + 1);
1621         uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1622      }
1623      break;
1624   }
1625   case UNKNOWN_DWORDS: {
1626      if (quiet(2))
1627         return;
1628      dump_hex(contents, num_unit, level + 1);
1629      break;
1630   }
1631   case UNKNOWN_2DWORDS: {
1632      if (quiet(2))
1633         return;
1634      dump_hex(contents, num_unit * 2, level + 1);
1635      break;
1636   }
1637   case UNKNOWN_4DWORDS: {
1638      if (quiet(2))
1639         return;
1640      dump_hex(contents, num_unit * 4, level + 1);
1641      break;
1642   }
1643   default:
1644      if (quiet(2))
1645         return;
1646      /* hmm.. */
1647      dump_hex(contents, num_unit, level + 1);
1648      break;
1649   }
1650}
1651
1652static void
1653cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1654{
1655   bin_x1 = dwords[1] & 0xffff;
1656   bin_y1 = dwords[1] >> 16;
1657   bin_x2 = dwords[2] & 0xffff;
1658   bin_y2 = dwords[2] >> 16;
1659}
1660
1661static void
1662dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1663                    int level)
1664{
1665   uint32_t w, h, p;
1666   uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1667   uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1668   static const char *filter[] = {
1669      "point",
1670      "bilinear",
1671      "bicubic",
1672   };
1673   static const char *clamp[] = {
1674      "wrap",
1675      "mirror",
1676      "clamp-last-texel",
1677   };
1678   static const char swiznames[] = "xyzw01??";
1679
1680   /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1681
1682   /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1683    * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1684    */
1685   p = (dwords[0] >> 22) << 5;
1686   clamp_x = (dwords[0] >> 10) & 0x3;
1687   clamp_y = (dwords[0] >> 13) & 0x3;
1688   clamp_z = (dwords[0] >> 16) & 0x3;
1689
1690   /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1691    * NearestClamp=1:OGL Mode
1692    */
1693   parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1694
1695   /* Width, Height, EndianSwap=0:None */
1696   w = (dwords[2] & 0x1fff) + 1;
1697   h = ((dwords[2] >> 13) & 0x1fff) + 1;
1698
1699   /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1700    * Mip=2:BaseMap
1701    */
1702   mag = (dwords[3] >> 19) & 0x3;
1703   min = (dwords[3] >> 21) & 0x3;
1704   swiz = (dwords[3] >> 1) & 0xfff;
1705
1706   /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1707    * Dim3d=0
1708    */
1709   // XXX
1710
1711   /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1712    * Dim=1:2d, MipPacking=0
1713    */
1714   parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1715
1716   printf("%sset texture const %04x\n", levels[level], val);
1717   printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1718          clamp[clamp_y], clamp[clamp_z]);
1719   printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1720          filter[mag]);
1721   printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1722          swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1723          swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1724   printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1725          levels[level + 1], gpuaddr, flags, w, h, p,
1726          rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1727   printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1728          mip_flags);
1729}
1730
1731static void
1732dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1733                       int level)
1734{
1735   int i;
1736   printf("%sset shader const %04x\n", levels[level], val);
1737   for (i = 0; i < sizedwords;) {
1738      uint32_t gpuaddr, flags;
1739      parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1740      void *addr = hostptr(gpuaddr);
1741      if (addr) {
1742         const char *fmt =
1743            rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1744         uint32_t size = dwords[i++];
1745         printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1746                size, fmt);
1747         // TODO maybe dump these as bytes instead of dwords?
1748         size = (size + 3) / 4; // for now convert to dwords
1749         dump_hex(addr, min(size, 64), level + 1);
1750         if (size > min(size, 64))
1751            printf("%s\t\t...\n", levels[level + 1]);
1752         dump_float(addr, min(size, 64), level + 1);
1753         if (size > min(size, 64))
1754            printf("%s\t\t...\n", levels[level + 1]);
1755      }
1756   }
1757}
1758
1759static void
1760cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1761{
1762   uint32_t val = dwords[0] & 0xffff;
1763   switch ((dwords[0] >> 16) & 0xf) {
1764   case 0x0:
1765      dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1766      break;
1767   case 0x1:
1768      /* need to figure out how const space is partitioned between
1769       * attributes, textures, etc..
1770       */
1771      if (val < 0x78) {
1772         dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1773      } else {
1774         dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1775      }
1776      break;
1777   case 0x2:
1778      printf("%sset bool const %04x\n", levels[level], val);
1779      break;
1780   case 0x3:
1781      printf("%sset loop const %04x\n", levels[level], val);
1782      break;
1783   case 0x4:
1784      val += 0x2000;
1785      if (dwords[0] & 0x80000000) {
1786         uint32_t srcreg = dwords[1];
1787         uint32_t dstval = dwords[2];
1788
1789         /* TODO: not sure what happens w/ payload != 2.. */
1790         assert(sizedwords == 3);
1791         assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1792
1793         /* note: rnn_regname uses a static buf so we can't do
1794          * two regname() calls for one printf..
1795          */
1796         printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1797         printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1798
1799         dstval += type0_reg_vals[srcreg];
1800
1801         dump_registers(val, &dstval, 1, level + 1);
1802      } else {
1803         dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1804      }
1805      break;
1806   }
1807}
1808
1809static void dump_register_summary(int level);
1810
1811static void
1812cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1813{
1814   const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1815   printl(2, "%sevent %s\n", levels[level], name);
1816
1817   if (name && (options->gpu_id > 500)) {
1818      char eventname[64];
1819      snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1820      if (!strcmp(name, "BLIT")) {
1821         do_query(eventname, 0);
1822         print_mode(level);
1823         dump_register_summary(level);
1824      }
1825   }
1826}
1827
1828static void
1829dump_register_summary(int level)
1830{
1831   uint32_t i;
1832   bool saved_summary = summary;
1833   summary = false;
1834
1835   in_summary = true;
1836
1837   /* dump current state of registers: */
1838   printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1839   for (i = 0; i < regcnt(); i++) {
1840      uint32_t regbase = i;
1841      uint32_t lastval = reg_val(regbase);
1842      /* skip registers that haven't been updated since last draw/blit: */
1843      if (!(options->allregs || reg_rewritten(regbase)))
1844         continue;
1845      if (!reg_written(regbase))
1846         continue;
1847      if (lastval != lastvals[regbase]) {
1848         printl(2, "!");
1849         lastvals[regbase] = lastval;
1850      } else {
1851         printl(2, " ");
1852      }
1853      if (reg_rewritten(regbase)) {
1854         printl(2, "+");
1855      } else {
1856         printl(2, " ");
1857      }
1858      printl(2, "\t%08x", lastval);
1859      if (!quiet(2)) {
1860         dump_register(regbase, lastval, level);
1861      }
1862   }
1863
1864   clear_rewritten();
1865
1866   in_summary = false;
1867
1868   draw_count++;
1869   summary = saved_summary;
1870}
1871
1872static uint32_t
1873draw_indx_common(uint32_t *dwords, int level)
1874{
1875   uint32_t prim_type = dwords[1] & 0x1f;
1876   uint32_t source_select = (dwords[1] >> 6) & 0x3;
1877   uint32_t num_indices = dwords[2];
1878   const char *primtype;
1879
1880   primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1881
1882   do_query(primtype, num_indices);
1883
1884   printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
1885   printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype, prim_type);
1886   printl(2, "%ssource_select: %s (%d)\n", levels[level],
1887          rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1888   printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
1889
1890   vertices += num_indices;
1891
1892   draws[ib]++;
1893
1894   return num_indices;
1895}
1896
1897enum pc_di_index_size {
1898   INDEX_SIZE_IGN = 0,
1899   INDEX_SIZE_16_BIT = 0,
1900   INDEX_SIZE_32_BIT = 1,
1901   INDEX_SIZE_8_BIT = 2,
1902   INDEX_SIZE_INVALID = 0,
1903};
1904
1905static void
1906cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1907{
1908   uint32_t num_indices = draw_indx_common(dwords, level);
1909
1910   assert(!is_64b());
1911
1912   /* if we have an index buffer, dump that: */
1913   if (sizedwords == 5) {
1914      void *ptr = hostptr(dwords[3]);
1915      printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
1916      printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
1917      if (ptr) {
1918         enum pc_di_index_size size =
1919            ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1920         if (!quiet(2)) {
1921            int i;
1922            printf("%sidxs:         ", levels[level]);
1923            if (size == INDEX_SIZE_8_BIT) {
1924               uint8_t *idx = ptr;
1925               for (i = 0; i < dwords[4]; i++)
1926                  printf(" %u", idx[i]);
1927            } else if (size == INDEX_SIZE_16_BIT) {
1928               uint16_t *idx = ptr;
1929               for (i = 0; i < dwords[4] / 2; i++)
1930                  printf(" %u", idx[i]);
1931            } else if (size == INDEX_SIZE_32_BIT) {
1932               uint32_t *idx = ptr;
1933               for (i = 0; i < dwords[4] / 4; i++)
1934                  printf(" %u", idx[i]);
1935            }
1936            printf("\n");
1937            dump_hex(ptr, dwords[4] / 4, level + 1);
1938         }
1939      }
1940   }
1941
1942   /* don't bother dumping registers for the dummy draw_indx's.. */
1943   if (num_indices > 0)
1944      dump_register_summary(level);
1945
1946   needs_wfi = true;
1947}
1948
1949static void
1950cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1951{
1952   uint32_t num_indices = draw_indx_common(dwords, level);
1953   enum pc_di_index_size size =
1954      ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1955   void *ptr = &dwords[3];
1956   int sz = 0;
1957
1958   assert(!is_64b());
1959
1960   /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1961   if (!quiet(2)) {
1962      int i;
1963      printf("%sidxs:         ", levels[level]);
1964      if (size == INDEX_SIZE_8_BIT) {
1965         uint8_t *idx = ptr;
1966         for (i = 0; i < num_indices; i++)
1967            printf(" %u", idx[i]);
1968         sz = num_indices;
1969      } else if (size == INDEX_SIZE_16_BIT) {
1970         uint16_t *idx = ptr;
1971         for (i = 0; i < num_indices; i++)
1972            printf(" %u", idx[i]);
1973         sz = num_indices * 2;
1974      } else if (size == INDEX_SIZE_32_BIT) {
1975         uint32_t *idx = ptr;
1976         for (i = 0; i < num_indices; i++)
1977            printf(" %u", idx[i]);
1978         sz = num_indices * 4;
1979      }
1980      printf("\n");
1981      dump_hex(ptr, sz / 4, level + 1);
1982   }
1983
1984   /* don't bother dumping registers for the dummy draw_indx's.. */
1985   if (num_indices > 0)
1986      dump_register_summary(level);
1987}
1988
1989static void
1990cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1991{
1992   uint32_t num_indices = dwords[2];
1993   uint32_t prim_type = dwords[0] & 0x1f;
1994
1995   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1996   print_mode(level);
1997
1998   /* don't bother dumping registers for the dummy draw_indx's.. */
1999   if (num_indices > 0)
2000      dump_register_summary(level);
2001}
2002
2003static void
2004cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2005{
2006   uint32_t prim_type = dwords[0] & 0x1f;
2007   uint64_t addr;
2008
2009   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2010   print_mode(level);
2011
2012   if (is_64b())
2013      addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2014   else
2015      addr = dwords[1];
2016   dump_gpuaddr_size(addr, level, 0x10, 2);
2017
2018   if (is_64b())
2019      addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2020   else
2021      addr = dwords[3];
2022   dump_gpuaddr_size(addr, level, 0x10, 2);
2023
2024   dump_register_summary(level);
2025}
2026
2027static void
2028cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2029{
2030   uint32_t prim_type = dwords[0] & 0x1f;
2031   uint64_t addr;
2032
2033   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2034   print_mode(level);
2035
2036   addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2037   dump_gpuaddr_size(addr, level, 0x10, 2);
2038
2039   dump_register_summary(level);
2040}
2041
2042static void
2043cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2044{
2045   uint32_t prim_type = dwords[0] & 0x1f;
2046   uint32_t count = dwords[2];
2047
2048   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2049   print_mode(level);
2050
2051   struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2052   uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2053   uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2054   uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2055
2056   if (count_dword) {
2057      uint64_t count_addr =
2058         ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2059      uint32_t *buf = hostptr(count_addr);
2060
2061      /* Don't print more draws than this if we don't know the indirect
2062       * count. It's possible the user will give ~0 or some other large
2063       * value, expecting the GPU to fill in the draw count, and we don't
2064       * want to print a gazillion draws in that case:
2065       */
2066      const uint32_t max_draw_count = 0x100;
2067
2068      /* Assume the indirect count is garbage if it's larger than this
2069       * (quite large) value or 0. Hopefully this catches most cases.
2070       */
2071      const uint32_t max_indirect_draw_count = 0x10000;
2072
2073      if (buf) {
2074         printf("%sindirect count: %u\n", levels[level], *buf);
2075         if (*buf == 0 || *buf > max_indirect_draw_count) {
2076            /* garbage value */
2077            count = min(count, max_draw_count);
2078         } else {
2079            /* not garbage */
2080            count = min(count, *buf);
2081         }
2082      } else {
2083         count = min(count, max_draw_count);
2084      }
2085   }
2086
2087   if (addr_dword && stride_dword) {
2088      uint64_t addr =
2089         ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2090      uint32_t stride = dwords[stride_dword];
2091
2092      for (unsigned i = 0; i < count; i++, addr += stride) {
2093         printf("%sdraw %d:\n", levels[level], i);
2094         dump_gpuaddr_size(addr, level, 0x10, 2);
2095      }
2096   }
2097
2098   dump_register_summary(level);
2099}
2100
2101static void
2102cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2103{
2104   do_query("COMPUTE", 1);
2105   dump_register_summary(level);
2106}
2107
2108static void
2109cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2110{
2111   const char *buf = (void *)dwords;
2112   int i;
2113
2114   if (quiet(3))
2115      return;
2116
2117   // blob doesn't use CP_NOP for string_marker but it does
2118   // use it for things that end up looking like, but aren't
2119   // ascii chars:
2120   if (!options->decode_markers)
2121      return;
2122
2123   for (i = 0; i < 4 * sizedwords; i++) {
2124      if (buf[i] == '\0')
2125         break;
2126      if (isascii(buf[i]))
2127         printf("%c", buf[i]);
2128   }
2129   printf("\n");
2130}
2131
2132static void
2133cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2134{
2135   /* traverse indirect buffers */
2136   uint64_t ibaddr;
2137   uint32_t ibsize;
2138   uint32_t *ptr = NULL;
2139
2140   if (is_64b()) {
2141      /* a5xx+.. high 32b of gpu addr, then size: */
2142      ibaddr = dwords[0];
2143      ibaddr |= ((uint64_t)dwords[1]) << 32;
2144      ibsize = dwords[2];
2145   } else {
2146      ibaddr = dwords[0];
2147      ibsize = dwords[1];
2148   }
2149
2150   if (!quiet(3)) {
2151      if (is_64b()) {
2152         printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2153      } else {
2154         printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2155      }
2156      printf("%sibsize:%08x\n", levels[level], ibsize);
2157   }
2158
2159   if (options->once && has_dumped(ibaddr, enable_mask))
2160      return;
2161
2162   /* 'query-compare' mode implies 'once' mode, although we need only to
2163    * process the cmdstream for *any* enable_mask mode, since we are
2164    * comparing binning vs draw reg values at the same time, ie. it is
2165    * not useful to process the same draw in both binning and draw pass.
2166    */
2167   if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2168      return;
2169
2170   /* map gpuaddr back to hostptr: */
2171   ptr = hostptr(ibaddr);
2172
2173   if (ptr) {
2174      /* If the GPU hung within the target IB, the trigger point will be
2175       * just after the current CP_INDIRECT_BUFFER.  Because the IB is
2176       * executed but never returns.  Account for this by checking if
2177       * the IB returned:
2178       */
2179      highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2180
2181      ib++;
2182      ibs[ib].base = ibaddr;
2183      ibs[ib].size = ibsize;
2184
2185      dump_commands(ptr, ibsize, level);
2186      ib--;
2187   } else {
2188      fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2189   }
2190}
2191
2192static void
2193cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2194{
2195   uint64_t ibaddr;
2196   uint32_t ibsize;
2197   uint32_t loopcount;
2198   uint32_t *ptr = NULL;
2199
2200   loopcount = dwords[0];
2201   ibaddr = dwords[1];
2202   ibaddr |= ((uint64_t)dwords[2]) << 32;
2203   ibsize = dwords[3];
2204
2205   /* map gpuaddr back to hostptr: */
2206   ptr = hostptr(ibaddr);
2207
2208   if (ptr) {
2209      /* If the GPU hung within the target IB, the trigger point will be
2210       * just after the current CP_START_BIN.  Because the IB is
2211       * executed but never returns.  Account for this by checking if
2212       * the IB returned:
2213       */
2214      highlight_gpuaddr(gpuaddr(&dwords[5]));
2215
2216      /* TODO: we should duplicate the body of the loop after each bin, so
2217       * that draws get the correct state. We should also figure out if there
2218       * are any registers that can tell us what bin we're in when we hang so
2219       * that crashdec points to the right place.
2220       */
2221      ib++;
2222      for (uint32_t i = 0; i < loopcount; i++) {
2223         ibs[ib].base = ibaddr;
2224         ibs[ib].size = ibsize;
2225         printf("%sbin %u\n", levels[level], i);
2226         dump_commands(ptr, ibsize, level);
2227         ibaddr += ibsize;
2228         ptr += ibsize;
2229      }
2230      ib--;
2231   } else {
2232      fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2233   }
2234}
2235
2236static void
2237cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2238{
2239   needs_wfi = false;
2240}
2241
2242static void
2243cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2244{
2245   if (quiet(2))
2246      return;
2247
2248   if (is_64b()) {
2249      uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2250      printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2251      dump_hex(&dwords[2], sizedwords - 2, level + 1);
2252
2253      if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2254         dump_commands(&dwords[2], sizedwords - 2, level + 1);
2255   } else {
2256      uint32_t gpuaddr = dwords[0];
2257      printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2258      dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2259   }
2260}
2261
2262static void
2263cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2264{
2265   uint32_t val = dwords[0] & 0xffff;
2266   uint32_t and = dwords[1];
2267   uint32_t or = dwords[2];
2268   printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2269          and, or);
2270   if (needs_wfi)
2271      printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2272             and, or);
2273   reg_set(val, (reg_val(val) & and) | or);
2274}
2275
2276static void
2277cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2278{
2279   uint32_t val = dwords[0] & 0xffff;
2280   printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2281
2282   if (quiet(2))
2283      return;
2284
2285   uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2286   printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2287   void *ptr = hostptr(gpuaddr);
2288   if (ptr) {
2289      uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2290      dump_hex(ptr, cnt, level + 1);
2291   }
2292}
2293
2294struct draw_state {
2295   uint16_t enable_mask;
2296   uint16_t flags;
2297   uint32_t count;
2298   uint64_t addr;
2299};
2300
2301struct draw_state state[32];
2302
2303#define FLAG_DIRTY              0x1
2304#define FLAG_DISABLE            0x2
2305#define FLAG_DISABLE_ALL_GROUPS 0x4
2306#define FLAG_LOAD_IMMED         0x8
2307
2308static int draw_mode;
2309
2310static void
2311disable_group(unsigned group_id)
2312{
2313   struct draw_state *ds = &state[group_id];
2314   memset(ds, 0, sizeof(*ds));
2315}
2316
2317static void
2318disable_all_groups(void)
2319{
2320   for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2321      disable_group(i);
2322}
2323
2324static void
2325load_group(unsigned group_id, int level)
2326{
2327   struct draw_state *ds = &state[group_id];
2328
2329   if (!ds->count)
2330      return;
2331
2332   printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2333   printl(2, "%scount: %d\n", levels[level], ds->count);
2334   printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2335   printl(2, "%sflags: %x\n", levels[level], ds->flags);
2336
2337   if (options->gpu_id >= 600) {
2338      printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2339
2340      if (!(ds->enable_mask & enable_mask)) {
2341         printl(2, "%s\tskipped!\n\n", levels[level]);
2342         return;
2343      }
2344   }
2345
2346   void *ptr = hostptr(ds->addr);
2347   if (ptr) {
2348      if (!quiet(2))
2349         dump_hex(ptr, ds->count, level + 1);
2350
2351      ib++;
2352      dump_commands(ptr, ds->count, level + 1);
2353      ib--;
2354   }
2355}
2356
2357static void
2358load_all_groups(int level)
2359{
2360   /* sanity check, we should never recursively hit recursion here, and if
2361    * we do bad things happen:
2362    */
2363   static bool loading_groups = false;
2364   if (loading_groups) {
2365      printf("ERROR: nothing in draw state should trigger recursively loading "
2366             "groups!\n");
2367      return;
2368   }
2369   loading_groups = true;
2370   for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2371      load_group(i, level);
2372   loading_groups = false;
2373
2374   /* in 'query-compare' mode, defer disabling all groups until we have a
2375    * chance to process the query:
2376    */
2377   if (!options->query_compare)
2378      disable_all_groups();
2379}
2380
2381static void
2382cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2383{
2384   uint32_t i;
2385
2386   for (i = 0; i < sizedwords;) {
2387      struct draw_state *ds;
2388      uint32_t count = dwords[i] & 0xffff;
2389      uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2390      uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2391      uint32_t flags = (dwords[i] >> 16) & 0xf;
2392      uint64_t addr;
2393
2394      if (is_64b()) {
2395         addr = dwords[i + 1];
2396         addr |= ((uint64_t)dwords[i + 2]) << 32;
2397         i += 3;
2398      } else {
2399         addr = dwords[i + 1];
2400         i += 2;
2401      }
2402
2403      if (flags & FLAG_DISABLE_ALL_GROUPS) {
2404         disable_all_groups();
2405         continue;
2406      }
2407
2408      if (flags & FLAG_DISABLE) {
2409         disable_group(group_id);
2410         continue;
2411      }
2412
2413      assert(group_id < ARRAY_SIZE(state));
2414      disable_group(group_id);
2415
2416      ds = &state[group_id];
2417
2418      ds->enable_mask = enable_mask;
2419      ds->flags = flags;
2420      ds->count = count;
2421      ds->addr = addr;
2422
2423      if (flags & FLAG_LOAD_IMMED) {
2424         load_group(group_id, level);
2425         disable_group(group_id);
2426      }
2427   }
2428}
2429
2430static void
2431cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2432{
2433   draw_mode = dwords[0];
2434}
2435
2436/* execute compute shader */
2437static void
2438cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2439{
2440   do_query("compute", 0);
2441   dump_register_summary(level);
2442}
2443
2444static void
2445cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2446{
2447   uint64_t addr;
2448
2449   if (is_64b()) {
2450      addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2451   } else {
2452      addr = dwords[1];
2453   }
2454
2455   printl(3, "%saddr: %016llx\n", levels[level], addr);
2456   dump_gpuaddr_size(addr, level, 0x10, 2);
2457
2458   do_query("compute", 0);
2459   dump_register_summary(level);
2460}
2461
2462static void
2463cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2464{
2465   render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf);
2466
2467   if (!strcmp(render_mode, "RM6_BINNING")) {
2468      enable_mask = MODE_BINNING;
2469   } else if (!strcmp(render_mode, "RM6_GMEM")) {
2470      enable_mask = MODE_GMEM;
2471   } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2472      enable_mask = MODE_BYPASS;
2473   }
2474}
2475
2476static void
2477cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2478{
2479   uint64_t addr;
2480   uint32_t *ptr, len;
2481
2482   assert(is_64b());
2483
2484   /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2485    * not sure if this can come in different sizes.
2486    *
2487    * First ptr doesn't seem to be cmdstream, second one does.
2488    *
2489    * Comment from downstream kernel:
2490    *
2491    * SRM -- set render mode (ex binning, direct render etc)
2492    * SRM is set by UMD usually at start of IB to tell CP the type of
2493    * preemption.
2494    * KMD needs to set SRM to NULL to indicate CP that rendering is
2495    * done by IB.
2496    * ------------------------------------------------------------------
2497    *
2498    * Seems to always be one of these two:
2499    * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2500    * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2501    * 001c2000 00000000
2502    *
2503    */
2504
2505   assert(options->gpu_id >= 500);
2506
2507   render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2508
2509   if (sizedwords == 1)
2510      return;
2511
2512   addr = dwords[1];
2513   addr |= ((uint64_t)dwords[2]) << 32;
2514
2515   mode = dwords[3];
2516
2517   dump_gpuaddr(addr, level + 1);
2518
2519   if (sizedwords == 5)
2520      return;
2521
2522   assert(sizedwords == 8);
2523
2524   len = dwords[5];
2525   addr = dwords[6];
2526   addr |= ((uint64_t)dwords[7]) << 32;
2527
2528   printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2529   printl(3, "%slen:  0x%x\n", levels[level], len);
2530
2531   ptr = hostptr(addr);
2532
2533   if (ptr) {
2534      if (!quiet(2)) {
2535         ib++;
2536         dump_commands(ptr, len, level + 1);
2537         ib--;
2538         dump_hex(ptr, len, level + 1);
2539      }
2540   }
2541}
2542
2543static void
2544cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2545{
2546   uint64_t addr;
2547   uint32_t *ptr, len;
2548
2549   assert(is_64b());
2550   assert(options->gpu_id >= 500);
2551
2552   assert(sizedwords == 8);
2553
2554   addr = dwords[5];
2555   addr |= ((uint64_t)dwords[6]) << 32;
2556   len = dwords[7];
2557
2558   printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2559   printl(3, "%slen:  0x%x\n", levels[level], len);
2560
2561   ptr = hostptr(addr);
2562
2563   if (ptr) {
2564      if (!quiet(2)) {
2565         ib++;
2566         dump_commands(ptr, len, level + 1);
2567         ib--;
2568         dump_hex(ptr, len, level + 1);
2569      }
2570   }
2571}
2572
2573static void
2574cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2575{
2576   do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2577   print_mode(level);
2578   dump_register_summary(level);
2579}
2580
2581static void
2582cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2583{
2584   int i;
2585
2586   /* NOTE: seems to write same reg multiple times.. not sure if different parts
2587    * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2588    * actually are?)
2589    */
2590   bool saved_summary = summary;
2591   summary = false;
2592
2593   for (i = 0; i < sizedwords; i += 2) {
2594      dump_register(dwords[i + 0], dwords[i + 1], level + 1);
2595      reg_set(dwords[i + 0], dwords[i + 1]);
2596   }
2597
2598   summary = saved_summary;
2599}
2600
2601static void
2602cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2603{
2604   uint32_t reg = dwords[1] & 0xffff;
2605
2606   dump_register(reg, dwords[2], level + 1);
2607   reg_set(reg, dwords[2]);
2608}
2609
2610static void
2611cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2612{
2613   uint64_t addr;
2614   uint32_t size = dwords[2] & 0xffff;
2615   void *ptr;
2616
2617   addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2618
2619   if (!quiet(3)) {
2620      printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2621   }
2622
2623   ptr = hostptr(addr);
2624   if (ptr) {
2625      dump_commands(ptr, size, level + 1);
2626   }
2627}
2628
2629static void
2630cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2631{
2632   skip_ib2_enable_global = dwords[0];
2633}
2634
2635static void
2636cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2637{
2638   skip_ib2_enable_local = dwords[0];
2639}
2640
2641#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2642static const struct type3_op {
2643   const char *name;
2644   void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2645   struct {
2646      bool load_all_groups;
2647   } options;
2648} type3_op[] = {
2649   CP(NOP, cp_nop),
2650   CP(INDIRECT_BUFFER, cp_indirect),
2651   CP(INDIRECT_BUFFER_PFD, cp_indirect),
2652   CP(WAIT_FOR_IDLE, cp_wfi),
2653   CP(REG_RMW, cp_rmw),
2654   CP(REG_TO_MEM, cp_reg_mem),
2655   CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2656   CP(MEM_WRITE, cp_mem_write),
2657   CP(EVENT_WRITE, cp_event_write),
2658   CP(RUN_OPENCL, cp_run_cl),
2659   CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2660   CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2661   CP(SET_CONSTANT, cp_set_const),
2662   CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2663   CP(WIDE_REG_WRITE, cp_wide_reg_write),
2664
2665   /* for a3xx */
2666   CP(LOAD_STATE, cp_load_state),
2667   CP(SET_BIN, cp_set_bin),
2668
2669   /* for a4xx */
2670   CP(LOAD_STATE4, cp_load_state),
2671   CP(SET_DRAW_STATE, cp_set_draw_state),
2672   CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2673   CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2674   CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2675
2676   /* for a5xx */
2677   CP(SET_RENDER_MODE, cp_set_render_mode),
2678   CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2679   CP(BLIT, cp_blit),
2680   CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2681   CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2682   CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2683   CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2684   CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2685   CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2686
2687   /* for a6xx */
2688   CP(LOAD_STATE6_GEOM, cp_load_state),
2689   CP(LOAD_STATE6_FRAG, cp_load_state),
2690   CP(LOAD_STATE6, cp_load_state),
2691   CP(SET_MODE, cp_set_mode),
2692   CP(SET_MARKER, cp_set_marker),
2693   CP(REG_WRITE, cp_reg_write),
2694
2695   CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2696
2697   CP(START_BIN, cp_start_bin),
2698};
2699
2700static void
2701noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2702{
2703}
2704
2705static const struct type3_op *
2706get_type3_op(unsigned opc)
2707{
2708   static const struct type3_op dummy_op = {
2709      .fxn = noop_fxn,
2710   };
2711   const char *name = pktname(opc);
2712
2713   if (!name)
2714      return &dummy_op;
2715
2716   for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2717      if (!strcmp(name, type3_op[i].name))
2718         return &type3_op[i];
2719
2720   return &dummy_op;
2721}
2722
2723void
2724dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2725{
2726   int dwords_left = sizedwords;
2727   uint32_t count = 0; /* dword count including packet header */
2728   uint32_t val;
2729
2730   //	assert(dwords);
2731   if (!dwords) {
2732      printf("NULL cmd buffer!\n");
2733      return;
2734   }
2735
2736   assert(ib < ARRAY_SIZE(draws));
2737   draws[ib] = 0;
2738
2739   while (dwords_left > 0) {
2740
2741      current_draw_count = draw_count;
2742
2743      /* hack, this looks like a -1 underflow, in some versions
2744       * when it tries to write zero registers via pkt0
2745       */
2746      //		if ((dwords[0] >> 16) == 0xffff)
2747      //			goto skip;
2748
2749      if (pkt_is_type0(dwords[0])) {
2750         printl(3, "t0");
2751         count = type0_pkt_size(dwords[0]) + 1;
2752         val = type0_pkt_offset(dwords[0]);
2753         assert(val < regcnt());
2754         printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),
2755                (dwords[0] & 0x8000) ? " (same register)" : "", val);
2756         dump_registers(val, dwords + 1, count - 1, level + 2);
2757         if (!quiet(3))
2758            dump_hex(dwords, count, level + 1);
2759      } else if (pkt_is_type4(dwords[0])) {
2760         /* basically the same(ish) as type0 prior to a5xx */
2761         printl(3, "t4");
2762         count = type4_pkt_size(dwords[0]) + 1;
2763         val = type4_pkt_offset(dwords[0]);
2764         assert(val < regcnt());
2765         printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2766                val);
2767         dump_registers(val, dwords + 1, count - 1, level + 2);
2768         if (!quiet(3))
2769            dump_hex(dwords, count, level + 1);
2770#if 0
2771      } else if (pkt_is_type1(dwords[0])) {
2772         printl(3, "t1");
2773         count = 3;
2774         val = dwords[0] & 0xfff;
2775         printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2776         dump_registers(val, dwords+1, 1, level+2);
2777         val = (dwords[0] >> 12) & 0xfff;
2778         printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2779         dump_registers(val, dwords+2, 1, level+2);
2780         if (!quiet(3))
2781            dump_hex(dwords, count, level+1);
2782      } else if (pkt_is_type2(dwords[0])) {
2783         printl(3, "t2");
2784         printf("%sNOP\n", levels[level+1]);
2785         count = 1;
2786         if (!quiet(3))
2787            dump_hex(dwords, count, level+1);
2788#endif
2789      } else if (pkt_is_type3(dwords[0])) {
2790         count = type3_pkt_size(dwords[0]) + 1;
2791         val = cp_type3_opcode(dwords[0]);
2792         const struct type3_op *op = get_type3_op(val);
2793         if (op->options.load_all_groups)
2794            load_all_groups(level + 1);
2795         printl(3, "t3");
2796         const char *name = pktname(val);
2797         if (!quiet(2)) {
2798            printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2799                   rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2800                   count, (dwords[0] & 0x1) ? " (predicated)" : "");
2801         }
2802         if (name)
2803            dump_domain(dwords + 1, count - 1, level + 2, name);
2804         op->fxn(dwords + 1, count - 1, level + 1);
2805         if (!quiet(2))
2806            dump_hex(dwords, count, level + 1);
2807      } else if (pkt_is_type7(dwords[0])) {
2808         count = type7_pkt_size(dwords[0]) + 1;
2809         val = cp_type7_opcode(dwords[0]);
2810         const struct type3_op *op = get_type3_op(val);
2811         if (op->options.load_all_groups)
2812            load_all_groups(level + 1);
2813         printl(3, "t7");
2814         const char *name = pktname(val);
2815         if (!quiet(2)) {
2816            printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2817                   rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2818                   count);
2819         }
2820         if (name) {
2821            /* special hack for two packets that decode the same way
2822             * on a6xx:
2823             */
2824            if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2825                !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2826               name = "CP_LOAD_STATE6";
2827            dump_domain(dwords + 1, count - 1, level + 2, name);
2828         }
2829         op->fxn(dwords + 1, count - 1, level + 1);
2830         if (!quiet(2))
2831            dump_hex(dwords, count, level + 1);
2832      } else if (pkt_is_type2(dwords[0])) {
2833         printl(3, "t2");
2834         printl(3, "%snop\n", levels[level + 1]);
2835      } else {
2836         /* for 5xx+ we can do a passable job of looking for start of next valid
2837          * packet: */
2838         if (options->gpu_id >= 500) {
2839            while (dwords_left > 0) {
2840               if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2841                  break;
2842               printf("bad type! %08x\n", dwords[0]);
2843               dwords++;
2844               dwords_left--;
2845            }
2846         } else {
2847            printf("bad type! %08x\n", dwords[0]);
2848            return;
2849         }
2850      }
2851
2852      dwords += count;
2853      dwords_left -= count;
2854   }
2855
2856   if (dwords_left < 0)
2857      printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2858}
2859