1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "common/intel_decoder.h"
25#include "intel_disasm.h"
26#include "util/macros.h"
27#include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
28
29#include <string.h>
30
31void
32intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
33                            const struct brw_isa_info *isa,
34                            const struct intel_device_info *devinfo,
35                            FILE *fp, enum intel_batch_decode_flags flags,
36                            const char *xml_path,
37                            struct intel_batch_decode_bo (*get_bo)(void *,
38                                                                   bool,
39                                                                   uint64_t),
40                            unsigned (*get_state_size)(void *, uint64_t,
41                                                       uint64_t),
42                            void *user_data)
43{
44   memset(ctx, 0, sizeof(*ctx));
45
46   ctx->isa = isa;
47   ctx->devinfo = *devinfo;
48   ctx->get_bo = get_bo;
49   ctx->get_state_size = get_state_size;
50   ctx->user_data = user_data;
51   ctx->fp = fp;
52   ctx->flags = flags;
53   ctx->max_vbo_decoded_lines = -1; /* No limit! */
54   ctx->engine = I915_ENGINE_CLASS_RENDER;
55
56   if (xml_path == NULL)
57      ctx->spec = intel_spec_load(devinfo);
58   else
59      ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
60}
61
62void
63intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
64{
65   intel_spec_destroy(ctx->spec);
66}
67
68#define CSI "\e["
69#define RED_COLOR    CSI "31m"
70#define BLUE_HEADER  CSI "0;44m" CSI "1;37m"
71#define GREEN_HEADER CSI "1;42m"
72#define NORMAL       CSI "0m"
73
74static void
75ctx_print_group(struct intel_batch_decode_ctx *ctx,
76                struct intel_group *group,
77                uint64_t address, const void *map)
78{
79   intel_print_group(ctx->fp, group, address, map, 0,
80                   (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
81}
82
83static struct intel_batch_decode_bo
84ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
85{
86   if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
87      /* On Broadwell and above, we have 48-bit addresses which consume two
88       * dwords.  Some packets require that these get stored in a "canonical
89       * form" which means that bit 47 is sign-extended through the upper
90       * bits. In order to correctly handle those aub dumps, we need to mask
91       * off the top 16 bits.
92       */
93      addr &= (~0ull >> 16);
94   }
95
96   struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
97
98   if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
99      bo.addr &= (~0ull >> 16);
100
101   /* We may actually have an offset into the bo */
102   if (bo.map != NULL) {
103      assert(bo.addr <= addr);
104      uint64_t offset = addr - bo.addr;
105      bo.map += offset;
106      bo.addr += offset;
107      bo.size -= offset;
108   }
109
110   return bo;
111}
112
113static int
114update_count(struct intel_batch_decode_ctx *ctx,
115             uint64_t address,
116             uint64_t base_address,
117             unsigned element_dwords,
118             unsigned guess)
119{
120   unsigned size = 0;
121
122   if (ctx->get_state_size)
123      size = ctx->get_state_size(ctx->user_data, address, base_address);
124
125   if (size > 0)
126      return size / (sizeof(uint32_t) * element_dwords);
127
128   /* In the absence of any information, just guess arbitrarily. */
129   return guess;
130}
131
132static void
133ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
134                        uint32_t ksp, const char *type)
135{
136   uint64_t addr = ctx->instruction_base + ksp;
137   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
138   if (!bo.map)
139      return;
140
141   fprintf(ctx->fp, "\nReferenced %s:\n", type);
142   intel_disassemble(ctx->isa, bo.map, 0, ctx->fp);
143}
144
145/* Heuristic to determine whether a uint32_t is probably actually a float
146 * (http://stackoverflow.com/a/2953466)
147 */
148
149static bool
150probably_float(uint32_t bits)
151{
152   int exp = ((bits & 0x7f800000U) >> 23) - 127;
153   uint32_t mant = bits & 0x007fffff;
154
155   /* +- 0.0 */
156   if (exp == -127 && mant == 0)
157      return true;
158
159   /* +- 1 billionth to 1 billion */
160   if (-30 <= exp && exp <= 30)
161      return true;
162
163   /* some value with only a few binary digits */
164   if ((mant & 0x0000ffff) == 0)
165      return true;
166
167   return false;
168}
169
170static void
171ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
172                 struct intel_batch_decode_bo bo,
173                 uint32_t read_length,
174                 uint32_t pitch,
175                 int max_lines)
176{
177   const uint32_t *dw_end =
178         bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
179
180   int column_count = 0, pitch_col_count = 0, line_count = -1;
181   for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
182      if (pitch_col_count * 4 == pitch || column_count == 8) {
183         fprintf(ctx->fp, "\n");
184         column_count = 0;
185         if (pitch_col_count * 4 == pitch)
186            pitch_col_count = 0;
187         line_count++;
188
189         if (max_lines >= 0 && line_count >= max_lines)
190            break;
191      }
192      fprintf(ctx->fp, column_count == 0 ? "  " : " ");
193
194      if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
195         fprintf(ctx->fp, "  %8.2f", *(float *) dw);
196      else
197         fprintf(ctx->fp, "  0x%08x", *dw);
198
199      column_count++;
200      pitch_col_count++;
201   }
202   fprintf(ctx->fp, "\n");
203}
204
205static struct intel_group *
206intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
207{
208   return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
209}
210
211static void
212handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
213{
214   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
215
216   struct intel_field_iterator iter;
217   intel_field_iterator_init(&iter, inst, p, 0, false);
218
219   uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
220   bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
221
222   while (intel_field_iterator_next(&iter)) {
223      if (strcmp(iter.name, "Surface State Base Address") == 0) {
224         surface_base = iter.raw_value;
225      } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
226         dynamic_base = iter.raw_value;
227      } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
228         instruction_base = iter.raw_value;
229      } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
230         surface_modify = iter.raw_value;
231      } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
232         dynamic_modify = iter.raw_value;
233      } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
234         instruction_modify = iter.raw_value;
235      }
236   }
237
238   if (dynamic_modify)
239      ctx->dynamic_base = dynamic_base;
240
241   if (surface_modify)
242      ctx->surface_base = surface_base;
243
244   if (instruction_modify)
245      ctx->instruction_base = instruction_base;
246}
247
248static void
249handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
250                                const uint32_t *p)
251{
252   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
253
254   struct intel_field_iterator iter;
255   intel_field_iterator_init(&iter, inst, p, 0, false);
256
257   uint64_t bt_pool_base = 0;
258   bool bt_pool_enable = false;
259
260   while (intel_field_iterator_next(&iter)) {
261      if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
262         bt_pool_base = iter.raw_value;
263      } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
264         bt_pool_enable = iter.raw_value;
265      }
266   }
267
268   if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
269      ctx->bt_pool_base = bt_pool_base;
270   } else {
271      ctx->bt_pool_base = 0;
272   }
273}
274
275static void
276dump_binding_table(struct intel_batch_decode_ctx *ctx,
277                   uint32_t offset, int count)
278{
279   struct intel_group *strct =
280      intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
281   if (strct == NULL) {
282      fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
283      return;
284   }
285
286   /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
287   uint32_t btp_alignment = 32;
288   uint32_t btp_pointer_bits = 16;
289
290   if (ctx->devinfo.verx10 >= 125) {
291      /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
292      btp_pointer_bits = 21;
293   } else if (ctx->use_256B_binding_tables) {
294      /* When 256B binding tables are enabled, we have to shift the offset
295       * which is stored in bits 15:5 but interpreted as bits 18:8 of the
296       * actual offset.  The effective pointer is 19-bit with 256B alignment.
297       */
298      offset <<= 3;
299      btp_pointer_bits = 19;
300      btp_alignment = 256;
301   }
302
303   const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
304                                                     ctx->surface_base;
305
306   if (count < 0) {
307      count = update_count(ctx, bt_pool_base + offset,
308                           bt_pool_base, 1, 8);
309   }
310
311   if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
312      fprintf(ctx->fp, "  invalid binding table pointer\n");
313      return;
314   }
315
316   struct intel_batch_decode_bo bind_bo =
317      ctx_get_bo(ctx, true, bt_pool_base + offset);
318
319   if (bind_bo.map == NULL) {
320      fprintf(ctx->fp, "  binding table unavailable\n");
321      return;
322   }
323
324   const uint32_t *pointers = bind_bo.map;
325   for (int i = 0; i < count; i++) {
326      if (pointers[i] == 0)
327         continue;
328
329      uint64_t addr = ctx->surface_base + pointers[i];
330      struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
331      uint32_t size = strct->dw_length * 4;
332
333      if (pointers[i] % 32 != 0 ||
334          addr < bo.addr || addr + size >= bo.addr + bo.size) {
335         fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
336         continue;
337      }
338
339      fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
340      ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
341   }
342}
343
344static void
345dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
346{
347   struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
348   uint64_t state_addr = ctx->dynamic_base + offset;
349
350   assert(count > 0);
351
352   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
353   const void *state_map = bo.map;
354
355   if (state_map == NULL) {
356      fprintf(ctx->fp, "  samplers unavailable\n");
357      return;
358   }
359
360   if (offset % 32 != 0) {
361      fprintf(ctx->fp, "  invalid sampler state pointer\n");
362      return;
363   }
364
365   const unsigned sampler_state_size = strct->dw_length * 4;
366
367   if (count * sampler_state_size >= bo.size) {
368      fprintf(ctx->fp, "  sampler state ends after bo ends\n");
369      assert(!"sampler state ends after bo ends");
370      return;
371   }
372
373   for (int i = 0; i < count; i++) {
374      fprintf(ctx->fp, "sampler state %d\n", i);
375      ctx_print_group(ctx, strct, state_addr, state_map);
376      state_addr += sampler_state_size;
377      state_map += sampler_state_size;
378   }
379}
380
381static void
382handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
383                                 struct intel_group *desc, const uint32_t *p)
384{
385   uint64_t ksp = 0;
386   uint32_t sampler_offset = 0, sampler_count = 0;
387   uint32_t binding_table_offset = 0, binding_entry_count = 0;
388
389   struct intel_field_iterator iter;
390   intel_field_iterator_init(&iter, desc, p, 0, false);
391   while (intel_field_iterator_next(&iter)) {
392      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
393         ksp = strtoll(iter.value, NULL, 16);
394      } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
395         sampler_offset = strtol(iter.value, NULL, 16);
396      } else if (strcmp(iter.name, "Sampler Count") == 0) {
397         sampler_count = strtol(iter.value, NULL, 10);
398      } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
399         binding_table_offset = strtol(iter.value, NULL, 16);
400      } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
401         binding_entry_count = strtol(iter.value, NULL, 10);
402      }
403   }
404
405   ctx_disassemble_program(ctx, ksp, "compute shader");
406   fprintf(ctx->fp, "\n");
407
408   if (sampler_count)
409      dump_samplers(ctx, sampler_offset, sampler_count);
410   if (binding_entry_count)
411      dump_binding_table(ctx, binding_table_offset, binding_entry_count);
412}
413
414static void
415handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
416                                       const uint32_t *p)
417{
418   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
419   struct intel_group *desc =
420      intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
421
422   struct intel_field_iterator iter;
423   intel_field_iterator_init(&iter, inst, p, 0, false);
424   uint32_t descriptor_offset = 0;
425   int descriptor_count = 0;
426   while (intel_field_iterator_next(&iter)) {
427      if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
428         descriptor_offset = strtol(iter.value, NULL, 16);
429      } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
430         descriptor_count =
431            strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
432      }
433   }
434
435   uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
436   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
437   const void *desc_map = bo.map;
438
439   if (desc_map == NULL) {
440      fprintf(ctx->fp, "  interface descriptors unavailable\n");
441      return;
442   }
443
444   for (int i = 0; i < descriptor_count; i++) {
445      fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
446
447      ctx_print_group(ctx, desc, desc_addr, desc_map);
448
449      handle_interface_descriptor_data(ctx, desc, desc_map);
450
451      desc_map += desc->dw_length;
452      desc_addr += desc->dw_length * 4;
453   }
454}
455
456static void
457handle_compute_walker(struct intel_batch_decode_ctx *ctx,
458                      const uint32_t *p)
459{
460   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
461
462   struct intel_field_iterator iter;
463   intel_field_iterator_init(&iter, inst, p, 0, false);
464   while (intel_field_iterator_next(&iter)) {
465      if (strcmp(iter.name, "Interface Descriptor") == 0) {
466         handle_interface_descriptor_data(ctx, iter.struct_desc,
467                                          &iter.p[iter.start_bit / 32]);
468      }
469   }
470}
471
472static void
473handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
474                              const uint32_t *p)
475{
476   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
477   struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
478
479   struct intel_batch_decode_bo vb = {};
480   uint32_t vb_size = 0;
481   int index = -1;
482   int pitch = -1;
483   bool ready = false;
484
485   struct intel_field_iterator iter;
486   intel_field_iterator_init(&iter, inst, p, 0, false);
487   while (intel_field_iterator_next(&iter)) {
488      if (iter.struct_desc != vbs)
489         continue;
490
491      struct intel_field_iterator vbs_iter;
492      intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
493      while (intel_field_iterator_next(&vbs_iter)) {
494         if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
495            index = vbs_iter.raw_value;
496         } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
497            pitch = vbs_iter.raw_value;
498         } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
499            vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
500         } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
501            vb_size = vbs_iter.raw_value;
502            ready = true;
503         } else if (strcmp(vbs_iter.name, "End Address") == 0) {
504            if (vb.map && vbs_iter.raw_value >= vb.addr)
505               vb_size = (vbs_iter.raw_value + 1) - vb.addr;
506            else
507               vb_size = 0;
508            ready = true;
509         }
510
511         if (!ready)
512            continue;
513
514         fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
515
516         if (vb.map == NULL) {
517            fprintf(ctx->fp, "  buffer contents unavailable\n");
518            continue;
519         }
520
521         if (vb.map == 0 || vb_size == 0)
522            continue;
523
524         ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
525
526         vb.map = NULL;
527         vb_size = 0;
528         index = -1;
529         pitch = -1;
530         ready = false;
531      }
532   }
533}
534
535static void
536handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
537                            const uint32_t *p)
538{
539   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
540
541   struct intel_batch_decode_bo ib = {};
542   uint32_t ib_size = 0;
543   uint32_t format = 0;
544
545   struct intel_field_iterator iter;
546   intel_field_iterator_init(&iter, inst, p, 0, false);
547   while (intel_field_iterator_next(&iter)) {
548      if (strcmp(iter.name, "Index Format") == 0) {
549         format = iter.raw_value;
550      } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
551         ib = ctx_get_bo(ctx, true, iter.raw_value);
552      } else if (strcmp(iter.name, "Buffer Size") == 0) {
553         ib_size = iter.raw_value;
554      }
555   }
556
557   if (ib.map == NULL) {
558      fprintf(ctx->fp, "  buffer contents unavailable\n");
559      return;
560   }
561
562   const void *m = ib.map;
563   const void *ib_end = ib.map + MIN2(ib.size, ib_size);
564   for (int i = 0; m < ib_end && i < 10; i++) {
565      switch (format) {
566      case 0:
567         fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
568         m += 1;
569         break;
570      case 1:
571         fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
572         m += 2;
573         break;
574      case 2:
575         fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
576         m += 4;
577         break;
578      }
579   }
580
581   if (m < ib_end)
582      fprintf(ctx->fp, "...");
583   fprintf(ctx->fp, "\n");
584}
585
586static void
587decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
588{
589   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
590
591   uint64_t ksp = 0;
592   bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
593   bool is_enabled = true;
594
595   struct intel_field_iterator iter;
596   intel_field_iterator_init(&iter, inst, p, 0, false);
597   while (intel_field_iterator_next(&iter)) {
598      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
599         ksp = iter.raw_value;
600      } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
601         is_simd8 = iter.raw_value;
602      } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
603         is_simd8 = strcmp(iter.value, "SIMD8") == 0;
604      } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
605         is_simd8 = strcmp(iter.value, "SIMD8") == 0;
606      } else if (strcmp(iter.name, "Enable") == 0) {
607         is_enabled = iter.raw_value;
608      }
609   }
610
611   const char *type =
612      strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
613      strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
614      strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
615      strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
616      strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
617      strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
618      strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
619      strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
620      NULL;
621
622   if (is_enabled) {
623      ctx_disassemble_program(ctx, ksp, type);
624      fprintf(ctx->fp, "\n");
625   }
626}
627
628static void
629decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
630{
631   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
632
633   uint64_t ksp = 0;
634   uint64_t local_x_maximum = 0;
635   uint64_t threads = 0;
636
637   struct intel_field_iterator iter;
638   intel_field_iterator_init(&iter, inst, p, 0, false);
639   while (intel_field_iterator_next(&iter)) {
640      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
641         ksp = iter.raw_value;
642      } else if (strcmp(iter.name, "Local X Maximum") == 0) {
643         local_x_maximum = iter.raw_value;
644      } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
645         threads = iter.raw_value;
646      }
647   }
648
649   const char *type =
650      strcmp(inst->name,   "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
651      strcmp(inst->name,   "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
652      NULL;
653
654   if (threads && local_x_maximum) {
655      ctx_disassemble_program(ctx, ksp, type);
656      fprintf(ctx->fp, "\n");
657   }
658}
659
660static void
661decode_ps_kern(struct intel_batch_decode_ctx *ctx,
662               struct intel_group *inst, const uint32_t *p)
663{
664   bool single_ksp = ctx->devinfo.ver == 4;
665   uint64_t ksp[3] = {0, 0, 0};
666   bool enabled[3] = {false, false, false};
667
668   struct intel_field_iterator iter;
669   intel_field_iterator_init(&iter, inst, p, 0, false);
670   while (intel_field_iterator_next(&iter)) {
671      if (strncmp(iter.name, "Kernel Start Pointer ",
672                  strlen("Kernel Start Pointer ")) == 0) {
673         int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
674         ksp[idx] = strtol(iter.value, NULL, 16);
675      } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
676         enabled[0] = strcmp(iter.value, "true") == 0;
677      } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
678         enabled[1] = strcmp(iter.value, "true") == 0;
679      } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
680         enabled[2] = strcmp(iter.value, "true") == 0;
681      }
682   }
683
684   if (single_ksp)
685      ksp[1] = ksp[2] = ksp[0];
686
687   /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
688   if (enabled[0] + enabled[1] + enabled[2] == 1) {
689      if (enabled[1]) {
690         ksp[1] = ksp[0];
691         ksp[0] = 0;
692      } else if (enabled[2]) {
693         ksp[2] = ksp[0];
694         ksp[0] = 0;
695      }
696   } else {
697      uint64_t tmp = ksp[1];
698      ksp[1] = ksp[2];
699      ksp[2] = tmp;
700   }
701
702   if (enabled[0])
703      ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
704   if (enabled[1])
705      ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
706   if (enabled[2])
707      ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
708
709   if (enabled[0] || enabled[1] || enabled[2])
710      fprintf(ctx->fp, "\n");
711}
712
713static void
714decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
715                  const uint32_t *p)
716{
717   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
718   decode_ps_kern(ctx, inst, p);
719}
720
721static void
722decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
723{
724   struct intel_group *inst =
725      intel_spec_find_instruction(ctx->spec, ctx->engine, p);
726   struct intel_group *body =
727      intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
728
729   uint32_t read_length[4] = {0};
730   struct intel_batch_decode_bo buffer[4];
731   memset(buffer, 0, sizeof(buffer));
732
733   struct intel_field_iterator outer;
734   intel_field_iterator_init(&outer, inst, p, 0, false);
735   int idx = 0;
736   while (intel_field_iterator_next(&outer)) {
737      if (outer.struct_desc != body)
738         continue;
739
740      struct intel_field_iterator iter;
741      intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
742                              0, false);
743      while (intel_field_iterator_next(&iter)) {
744         if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
745            buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
746         } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
747            read_length[idx] = iter.raw_value;
748         }
749      }
750      idx++;
751   }
752
753   for (int i = 0; i < 4; i++) {
754      if (read_length[i] == 0 || buffer[i].map == NULL)
755         continue;
756
757      unsigned size = read_length[i] * 32;
758      fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
759
760      ctx_print_buffer(ctx, buffer[i], size, 0, -1);
761   }
762}
763
764static void
765decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
766{
767   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
768   struct intel_group *body =
769      intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
770
771   uint32_t read_length[4] = {0};
772   uint64_t read_addr[4] = {0};
773
774   struct intel_field_iterator outer;
775   intel_field_iterator_init(&outer, inst, p, 0, false);
776   while (intel_field_iterator_next(&outer)) {
777      if (outer.struct_desc != body)
778         continue;
779
780      struct intel_field_iterator iter;
781      intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
782                              0, false);
783
784      while (intel_field_iterator_next(&iter)) {
785         int idx;
786         if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
787            read_length[idx] = iter.raw_value;
788         } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
789            read_addr[idx] = iter.raw_value;
790         }
791      }
792
793      for (int i = 0; i < 4; i++) {
794         if (read_length[i] == 0)
795            continue;
796
797         struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
798         if (!buffer.map) {
799            fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
800            continue;
801         }
802
803         unsigned size = read_length[i] * 32;
804         fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
805
806         ctx_print_buffer(ctx, buffer, size, 0, -1);
807      }
808   }
809}
810
811static void
812decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
813{
814   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
815   uint64_t read_length = 0, read_addr = 0, valid = 0;
816   struct intel_field_iterator iter;
817   intel_field_iterator_init(&iter, inst, p, 0, false);
818
819   while (intel_field_iterator_next(&iter)) {
820      if (!strcmp(iter.name, "Buffer Length")) {
821         read_length = iter.raw_value;
822      } else if (!strcmp(iter.name, "Valid")) {
823         valid = iter.raw_value;
824      } else if (!strcmp(iter.name, "Buffer Starting Address")) {
825         read_addr = iter.raw_value;
826      }
827   }
828
829   if (!valid)
830      return;
831
832   struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
833   if (!buffer.map) {
834      fprintf(ctx->fp, "constant buffer unavailable\n");
835      return;
836   }
837   unsigned size = (read_length + 1) * 16 * sizeof(float);
838   fprintf(ctx->fp, "constant buffer size %u\n", size);
839
840   ctx_print_buffer(ctx, buffer, size, 0, -1);
841}
842
843
844static void
845decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
846                                           const uint32_t *p)
847{
848   fprintf(ctx->fp, "VS Binding Table:\n");
849   dump_binding_table(ctx, p[1], -1);
850
851   fprintf(ctx->fp, "GS Binding Table:\n");
852   dump_binding_table(ctx, p[2], -1);
853
854   if (ctx->devinfo.ver < 6) {
855      fprintf(ctx->fp, "CLIP Binding Table:\n");
856      dump_binding_table(ctx, p[3], -1);
857      fprintf(ctx->fp, "SF Binding Table:\n");
858      dump_binding_table(ctx, p[4], -1);
859      fprintf(ctx->fp, "PS Binding Table:\n");
860      dump_binding_table(ctx, p[5], -1);
861   } else {
862      fprintf(ctx->fp, "PS Binding Table:\n");
863      dump_binding_table(ctx, p[3], -1);
864   }
865}
866
867static void
868decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
869                                      const uint32_t *p)
870{
871   dump_binding_table(ctx, p[1], -1);
872}
873
874static void
875decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
876                                      const uint32_t *p)
877{
878   dump_samplers(ctx, p[1], 1);
879}
880
881static void
882decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
883                                           const uint32_t *p)
884{
885   dump_samplers(ctx, p[1], 1);
886   dump_samplers(ctx, p[2], 1);
887   dump_samplers(ctx, p[3], 1);
888}
889
890static bool
891str_ends_with(const char *str, const char *end)
892{
893   int offset = strlen(str) - strlen(end);
894   if (offset < 0)
895      return false;
896
897   return strcmp(str + offset, end) == 0;
898}
899
900static void
901decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
902                       const char *struct_type, uint32_t state_offset,
903                       int count)
904{
905   uint64_t state_addr = ctx->dynamic_base + state_offset;
906   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
907   const void *state_map = bo.map;
908
909   if (state_map == NULL) {
910      fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
911      return;
912   }
913
914   struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
915   if (strcmp(struct_type, "BLEND_STATE") == 0) {
916      /* Blend states are different from the others because they have a header
917       * struct called BLEND_STATE which is followed by a variable number of
918       * BLEND_STATE_ENTRY structs.
919       */
920      fprintf(ctx->fp, "%s\n", struct_type);
921      ctx_print_group(ctx, state, state_addr, state_map);
922
923      state_addr += state->dw_length * 4;
924      state_map += state->dw_length * 4;
925
926      struct_type = "BLEND_STATE_ENTRY";
927      state = intel_spec_find_struct(ctx->spec, struct_type);
928   }
929
930   count = update_count(ctx, ctx->dynamic_base + state_offset,
931                        ctx->dynamic_base, state->dw_length, count);
932
933   for (int i = 0; i < count; i++) {
934      fprintf(ctx->fp, "%s %d\n", struct_type, i);
935      ctx_print_group(ctx, state, state_addr, state_map);
936
937      state_addr += state->dw_length * 4;
938      state_map += state->dw_length * 4;
939   }
940}
941
942static void
943decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
944                              const char *struct_type, const uint32_t *p,
945                              int count)
946{
947   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
948
949   uint32_t state_offset = 0;
950
951   struct intel_field_iterator iter;
952   intel_field_iterator_init(&iter, inst, p, 0, false);
953   while (intel_field_iterator_next(&iter)) {
954      if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
955         state_offset = iter.raw_value;
956         break;
957      }
958   }
959   decode_dynamic_state(ctx, struct_type, state_offset, count);
960}
961
962static void
963decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
964                                       const uint32_t *p)
965{
966   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
967   uint32_t state_offset = 0;
968   bool clip = false, sf = false, cc = false;
969   struct intel_field_iterator iter;
970   intel_field_iterator_init(&iter, inst, p, 0, false);
971   while (intel_field_iterator_next(&iter)) {
972      if (!strcmp(iter.name, "CLIP Viewport State Change"))
973         clip = iter.raw_value;
974      if (!strcmp(iter.name, "SF Viewport State Change"))
975         sf = iter.raw_value;
976      if (!strcmp(iter.name, "CC Viewport State Change"))
977         cc = iter.raw_value;
978      else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
979         state_offset = iter.raw_value;
980         decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
981      }
982      else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
983         state_offset = iter.raw_value;
984         decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
985      }
986      else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
987         state_offset = iter.raw_value;
988         decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
989      }
990   }
991}
992
993static void
994decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
995                                          const uint32_t *p)
996{
997   decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
998}
999
1000static void
1001decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1002                                               const uint32_t *p)
1003{
1004   decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1005}
1006
1007static void
1008decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1009                                    const uint32_t *p)
1010{
1011   decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
1012}
1013
1014static void
1015decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1016                                 const uint32_t *p)
1017{
1018   if (ctx->devinfo.ver != 6) {
1019      decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1020      return;
1021   }
1022
1023   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1024
1025   uint32_t state_offset = 0;
1026   bool blend_change = false, ds_change = false, cc_change = false;
1027   struct intel_field_iterator iter;
1028   intel_field_iterator_init(&iter, inst, p, 0, false);
1029   while (intel_field_iterator_next(&iter)) {
1030      if (!strcmp(iter.name, "BLEND_STATE Change"))
1031         blend_change = iter.raw_value;
1032      else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1033         ds_change = iter.raw_value;
1034      else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1035         cc_change = iter.raw_value;
1036      else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1037         state_offset = iter.raw_value;
1038         decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1039      }
1040      else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1041         state_offset = iter.raw_value;
1042         decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1043      }
1044      else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1045         state_offset = iter.raw_value;
1046         decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1047      }
1048   }
1049}
1050
1051static void
1052decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1053                                 const uint32_t *p)
1054{
1055   decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1056}
1057
1058static void
1059decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1060                                      const uint32_t *p)
1061{
1062   decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1063}
1064
1065static void
1066decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1067                                          const uint32_t *p)
1068{
1069   decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1070}
1071
1072static void
1073handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1074               uint32_t reg_addr, uint32_t val)
1075{
1076   struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1077
1078   assert(intel_group_get_length(reg, &val) == 1);
1079
1080   struct intel_field_iterator iter;
1081   intel_field_iterator_init(&iter, reg, &val, 0, false);
1082
1083   uint32_t bt_alignment;
1084   bool bt_alignment_mask = 0;
1085
1086   while (intel_field_iterator_next(&iter)) {
1087      if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1088         bt_alignment = iter.raw_value;
1089      } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1090         bt_alignment_mask = iter.raw_value;
1091      }
1092   }
1093
1094   if (bt_alignment_mask)
1095      ctx->use_256B_binding_tables = bt_alignment;
1096}
1097
1098struct reg_handler {
1099   const char *name;
1100   void (*handler)(struct intel_batch_decode_ctx *ctx,
1101                   uint32_t reg_addr, uint32_t val);
1102} reg_handlers[] = {
1103   { "GT_MODE", handle_gt_mode }
1104};
1105
1106static void
1107decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1108{
1109   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1110   const unsigned length = intel_group_get_length(inst, p);
1111   assert(length & 1);
1112   const unsigned nr_regs = (length - 1) / 2;
1113
1114   for (unsigned i = 0; i < nr_regs; i++) {
1115      struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1116      if (reg != NULL) {
1117         fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1118                 reg->name, reg->register_offset, p[2]);
1119         ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1120
1121         for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1122            if (strcmp(reg->name, reg_handlers[i].name) == 0)
1123               reg_handlers[i].handler(ctx, p[1], p[2]);
1124         }
1125      }
1126   }
1127}
1128
1129static void
1130decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1131{
1132   struct intel_group *strct =
1133      intel_spec_find_struct(ctx->spec, "VS_STATE");
1134   if (strct == NULL) {
1135      fprintf(ctx->fp, "did not find VS_STATE info\n");
1136      return;
1137   }
1138
1139   struct intel_batch_decode_bo bind_bo =
1140      ctx_get_bo(ctx, true, offset);
1141
1142   if (bind_bo.map == NULL) {
1143      fprintf(ctx->fp, " vs state unavailable\n");
1144      return;
1145   }
1146
1147   ctx_print_group(ctx, strct, offset, bind_bo.map);
1148
1149   uint64_t ksp = 0;
1150   bool is_enabled = true;
1151   struct intel_field_iterator iter;
1152   intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false);
1153   while (intel_field_iterator_next(&iter)) {
1154      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1155         ksp = iter.raw_value;
1156      } else if (strcmp(iter.name, "Enable") == 0) {
1157	is_enabled = iter.raw_value;
1158      }
1159   }
1160   if (is_enabled) {
1161      ctx_disassemble_program(ctx, ksp, "vertex shader");
1162      fprintf(ctx->fp, "\n");
1163   }
1164}
1165
1166static void
1167decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1168{
1169   struct intel_group *strct =
1170      intel_spec_find_struct(ctx->spec, "GS_STATE");
1171   if (strct == NULL) {
1172      fprintf(ctx->fp, "did not find GS_STATE info\n");
1173      return;
1174   }
1175
1176   struct intel_batch_decode_bo bind_bo =
1177      ctx_get_bo(ctx, true, offset);
1178
1179   if (bind_bo.map == NULL) {
1180      fprintf(ctx->fp, " gs state unavailable\n");
1181      return;
1182   }
1183
1184   ctx_print_group(ctx, strct, offset, bind_bo.map);
1185}
1186
1187static void
1188decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1189{
1190   struct intel_group *strct =
1191      intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1192   if (strct == NULL) {
1193      fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1194      return;
1195   }
1196
1197   struct intel_batch_decode_bo bind_bo =
1198      ctx_get_bo(ctx, true, offset);
1199
1200   if (bind_bo.map == NULL) {
1201      fprintf(ctx->fp, " clip state unavailable\n");
1202      return;
1203   }
1204
1205   ctx_print_group(ctx, strct, offset, bind_bo.map);
1206
1207   struct intel_group *vp_strct =
1208      intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1209   if (vp_strct == NULL) {
1210      fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1211      return;
1212   }
1213   uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1214   struct intel_batch_decode_bo vp_bo =
1215      ctx_get_bo(ctx, true, clip_vp_offset);
1216   if (vp_bo.map == NULL) {
1217      fprintf(ctx->fp, " clip vp state unavailable\n");
1218      return;
1219   }
1220   ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1221}
1222
1223static void
1224decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1225{
1226   struct intel_group *strct =
1227      intel_spec_find_struct(ctx->spec, "SF_STATE");
1228   if (strct == NULL) {
1229      fprintf(ctx->fp, "did not find SF_STATE info\n");
1230      return;
1231   }
1232
1233   struct intel_batch_decode_bo bind_bo =
1234      ctx_get_bo(ctx, true, offset);
1235
1236   if (bind_bo.map == NULL) {
1237      fprintf(ctx->fp, " sf state unavailable\n");
1238      return;
1239   }
1240
1241   ctx_print_group(ctx, strct, offset, bind_bo.map);
1242
1243   struct intel_group *vp_strct =
1244      intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1245   if (vp_strct == NULL) {
1246      fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1247      return;
1248   }
1249
1250   uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1251   struct intel_batch_decode_bo vp_bo =
1252      ctx_get_bo(ctx, true, sf_vp_offset);
1253   if (vp_bo.map == NULL) {
1254      fprintf(ctx->fp, " sf vp state unavailable\n");
1255      return;
1256   }
1257   ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1258}
1259
1260static void
1261decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1262{
1263   struct intel_group *strct =
1264      intel_spec_find_struct(ctx->spec, "WM_STATE");
1265   if (strct == NULL) {
1266      fprintf(ctx->fp, "did not find WM_STATE info\n");
1267      return;
1268   }
1269
1270   struct intel_batch_decode_bo bind_bo =
1271      ctx_get_bo(ctx, true, offset);
1272
1273   if (bind_bo.map == NULL) {
1274      fprintf(ctx->fp, " wm state unavailable\n");
1275      return;
1276   }
1277
1278   ctx_print_group(ctx, strct, offset, bind_bo.map);
1279
1280   decode_ps_kern(ctx, strct, bind_bo.map);
1281}
1282
1283static void
1284decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1285{
1286   struct intel_group *strct =
1287      intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1288   if (strct == NULL) {
1289      fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1290      return;
1291   }
1292
1293   struct intel_batch_decode_bo bind_bo =
1294      ctx_get_bo(ctx, true, offset);
1295
1296   if (bind_bo.map == NULL) {
1297      fprintf(ctx->fp, " cc state unavailable\n");
1298      return;
1299   }
1300
1301   ctx_print_group(ctx, strct, offset, bind_bo.map);
1302
1303   struct intel_group *vp_strct =
1304      intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1305   if (vp_strct == NULL) {
1306      fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1307      return;
1308   }
1309   uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1310   struct intel_batch_decode_bo vp_bo =
1311      ctx_get_bo(ctx, true, cc_vp_offset);
1312   if (vp_bo.map == NULL) {
1313      fprintf(ctx->fp, " cc vp state unavailable\n");
1314      return;
1315   }
1316   ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1317}
1318static void
1319decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1320{
1321   fprintf(ctx->fp, "VS State Table:\n");
1322   decode_vs_state(ctx, p[1]);
1323   if (p[2] & 1) {
1324      fprintf(ctx->fp, "GS State Table:\n");
1325      decode_gs_state(ctx, p[2] & ~1);
1326   }
1327   fprintf(ctx->fp, "Clip State Table:\n");
1328   decode_clip_state(ctx, p[3] & ~1);
1329   fprintf(ctx->fp, "SF State Table:\n");
1330   decode_sf_state(ctx, p[4]);
1331   fprintf(ctx->fp, "WM State Table:\n");
1332   decode_wm_state(ctx, p[5]);
1333   fprintf(ctx->fp, "CC State Table:\n");
1334   decode_cc_state(ctx, p[6]);
1335}
1336
1337static void
1338decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1339{
1340   decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1341}
1342
1343struct custom_decoder {
1344   const char *cmd_name;
1345   void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1346} custom_decoders[] = {
1347   { "STATE_BASE_ADDRESS", handle_state_base_address },
1348   { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1349   { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1350   { "COMPUTE_WALKER", handle_compute_walker },
1351   { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1352   { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1353   { "3DSTATE_VS", decode_single_ksp },
1354   { "3DSTATE_GS", decode_single_ksp },
1355   { "3DSTATE_DS", decode_single_ksp },
1356   { "3DSTATE_HS", decode_single_ksp },
1357   { "3DSTATE_PS", decode_ps_kernels },
1358   { "3DSTATE_WM", decode_ps_kernels },
1359   { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1360   { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1361   { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1362   { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1363   { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1364   { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1365   { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1366   { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1367
1368   { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1369   { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1370   { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1371   { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1372   { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1373   { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1374
1375   { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1376   { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1377   { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1378   { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1379   { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1380   { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1381
1382   { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1383   { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1384   { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1385   { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1386   { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1387   { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1388   { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1389   { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1390   { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1391   { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1392   { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1393   { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1394};
1395
1396void
1397intel_print_batch(struct intel_batch_decode_ctx *ctx,
1398                  const uint32_t *batch, uint32_t batch_size,
1399                  uint64_t batch_addr, bool from_ring)
1400{
1401   const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1402   int length;
1403   struct intel_group *inst;
1404   const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1405
1406   if (ctx->n_batch_buffer_start >= 100) {
1407      fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1408              (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1409              (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1410              reset_color);
1411      return;
1412   }
1413
1414   ctx->n_batch_buffer_start++;
1415
1416   for (p = batch; p < end; p += length) {
1417      inst = intel_ctx_find_instruction(ctx, p);
1418      length = intel_group_get_length(inst, p);
1419      assert(inst == NULL || length > 0);
1420      length = MAX2(1, length);
1421
1422      uint64_t offset;
1423      if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1424         offset = batch_addr + ((char *)p - (char *)batch);
1425      else
1426         offset = 0;
1427
1428      if (inst == NULL) {
1429         fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1430                 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1431                 offset, p[0], reset_color);
1432
1433         for (int i=1; i < length; i++) {
1434            fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1435                 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1436                 offset + i * 4, p[i], reset_color);
1437         }
1438
1439         continue;
1440      }
1441
1442      const char *color;
1443      const char *inst_name = intel_group_get_name(inst);
1444      if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1445         reset_color = NORMAL;
1446         if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1447            if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1448                strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1449               color = GREEN_HEADER;
1450            else
1451               color = BLUE_HEADER;
1452         } else {
1453            color = NORMAL;
1454         }
1455      } else {
1456         color = "";
1457         reset_color = "";
1458      }
1459
1460      fprintf(ctx->fp, "%s0x%08"PRIx64"%s:  0x%08x:  %-80s%s\n", color, offset,
1461              ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1462              inst_name, reset_color);
1463
1464      if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1465         ctx_print_group(ctx, inst, offset, p);
1466
1467         for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1468            if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1469               custom_decoders[i].decode(ctx, p);
1470               break;
1471            }
1472         }
1473      }
1474
1475      if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1476         uint64_t next_batch_addr = 0;
1477         bool ppgtt = false;
1478         bool second_level = false;
1479         bool predicate = false;
1480         struct intel_field_iterator iter;
1481         intel_field_iterator_init(&iter, inst, p, 0, false);
1482         while (intel_field_iterator_next(&iter)) {
1483            if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1484               next_batch_addr = iter.raw_value;
1485            } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1486               second_level = iter.raw_value;
1487            } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1488               ppgtt = iter.raw_value;
1489            } else if (strcmp(iter.name, "Predication Enable") == 0) {
1490               predicate = iter.raw_value;
1491            }
1492         }
1493
1494         if (!predicate) {
1495            struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1496
1497            if (next_batch.map == NULL) {
1498               fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1499                       next_batch_addr);
1500            } else {
1501               intel_print_batch(ctx, next_batch.map, next_batch.size,
1502                                 next_batch.addr, false);
1503            }
1504            if (second_level) {
1505               /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1506                * like a subroutine call.  Commands that come afterwards get
1507                * processed once the 2nd level batch buffer returns with
1508                * MI_BATCH_BUFFER_END.
1509                */
1510               continue;
1511            } else if (!from_ring) {
1512               /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1513                * like a goto.  Nothing after it will ever get processed.  In
1514                * order to prevent the recursion from growing, we just reset the
1515                * loop and continue;
1516                */
1517               break;
1518            }
1519         }
1520      } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1521         break;
1522      }
1523   }
1524
1525   ctx->n_batch_buffer_start--;
1526}
1527