1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "common/intel_decoder.h"
25 #include "intel_disasm.h"
26 #include "util/macros.h"
27 #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
28 
29 #include <string.h>
30 
31 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx, const struct brw_isa_info *isa, const struct intel_device_info *devinfo, FILE *fp, enum intel_batch_decode_flags flags, const char *xml_path, struct intel_batch_decode_bo (*get_bo)(void *, bool, uint64_t), unsigned (*get_state_size)(void *, uint64_t, uint64_t), void *user_data)32 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
33                             const struct brw_isa_info *isa,
34                             const struct intel_device_info *devinfo,
35                             FILE *fp, enum intel_batch_decode_flags flags,
36                             const char *xml_path,
37                             struct intel_batch_decode_bo (*get_bo)(void *,
38                                                                    bool,
39                                                                    uint64_t),
40                             unsigned (*get_state_size)(void *, uint64_t,
41                                                        uint64_t),
42                             void *user_data)
43 {
44    memset(ctx, 0, sizeof(*ctx));
45 
46    ctx->isa = isa;
47    ctx->devinfo = *devinfo;
48    ctx->get_bo = get_bo;
49    ctx->get_state_size = get_state_size;
50    ctx->user_data = user_data;
51    ctx->fp = fp;
52    ctx->flags = flags;
53    ctx->max_vbo_decoded_lines = -1; /* No limit! */
54    ctx->engine = I915_ENGINE_CLASS_RENDER;
55 
56    if (xml_path == NULL)
57       ctx->spec = intel_spec_load(devinfo);
58    else
59       ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
60 }
61 
62 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)63 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
64 {
65    intel_spec_destroy(ctx->spec);
66 }
67 
68 #define CSI "\e["
69 #define RED_COLOR    CSI "31m"
70 #define BLUE_HEADER  CSI "0;44m" CSI "1;37m"
71 #define GREEN_HEADER CSI "1;42m"
72 #define NORMAL       CSI "0m"
73 
74 static void
ctx_print_group(struct intel_batch_decode_ctx *ctx, struct intel_group *group, uint64_t address, const void *map)75 ctx_print_group(struct intel_batch_decode_ctx *ctx,
76                 struct intel_group *group,
77                 uint64_t address, const void *map)
78 {
79    intel_print_group(ctx->fp, group, address, map, 0,
80                    (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
81 }
82 
83 static struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)84 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
85 {
86    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
87       /* On Broadwell and above, we have 48-bit addresses which consume two
88        * dwords.  Some packets require that these get stored in a "canonical
89        * form" which means that bit 47 is sign-extended through the upper
90        * bits. In order to correctly handle those aub dumps, we need to mask
91        * off the top 16 bits.
92        */
93       addr &= (~0ull >> 16);
94    }
95 
96    struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
97 
98    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
99       bo.addr &= (~0ull >> 16);
100 
101    /* We may actually have an offset into the bo */
102    if (bo.map != NULL) {
103       assert(bo.addr <= addr);
104       uint64_t offset = addr - bo.addr;
105       bo.map += offset;
106       bo.addr += offset;
107       bo.size -= offset;
108    }
109 
110    return bo;
111 }
112 
113 static int
update_count(struct intel_batch_decode_ctx *ctx, uint64_t address, uint64_t base_address, unsigned element_dwords, unsigned guess)114 update_count(struct intel_batch_decode_ctx *ctx,
115              uint64_t address,
116              uint64_t base_address,
117              unsigned element_dwords,
118              unsigned guess)
119 {
120    unsigned size = 0;
121 
122    if (ctx->get_state_size)
123       size = ctx->get_state_size(ctx->user_data, address, base_address);
124 
125    if (size > 0)
126       return size / (sizeof(uint32_t) * element_dwords);
127 
128    /* In the absence of any information, just guess arbitrarily. */
129    return guess;
130 }
131 
132 static void
ctx_disassemble_program(struct intel_batch_decode_ctx *ctx, uint32_t ksp, const char *type)133 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
134                         uint32_t ksp, const char *type)
135 {
136    uint64_t addr = ctx->instruction_base + ksp;
137    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
138    if (!bo.map)
139       return;
140 
141    fprintf(ctx->fp, "\nReferenced %s:\n", type);
142    intel_disassemble(ctx->isa, bo.map, 0, ctx->fp);
143 }
144 
145 /* Heuristic to determine whether a uint32_t is probably actually a float
146  * (http://stackoverflow.com/a/2953466)
147  */
148 
149 static bool
probably_float(uint32_t bits)150 probably_float(uint32_t bits)
151 {
152    int exp = ((bits & 0x7f800000U) >> 23) - 127;
153    uint32_t mant = bits & 0x007fffff;
154 
155    /* +- 0.0 */
156    if (exp == -127 && mant == 0)
157       return true;
158 
159    /* +- 1 billionth to 1 billion */
160    if (-30 <= exp && exp <= 30)
161       return true;
162 
163    /* some value with only a few binary digits */
164    if ((mant & 0x0000ffff) == 0)
165       return true;
166 
167    return false;
168 }
169 
170 static void
ctx_print_buffer(struct intel_batch_decode_ctx *ctx, struct intel_batch_decode_bo bo, uint32_t read_length, uint32_t pitch, int max_lines)171 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
172                  struct intel_batch_decode_bo bo,
173                  uint32_t read_length,
174                  uint32_t pitch,
175                  int max_lines)
176 {
177    const uint32_t *dw_end =
178          bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
179 
180    int column_count = 0, pitch_col_count = 0, line_count = -1;
181    for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
182       if (pitch_col_count * 4 == pitch || column_count == 8) {
183          fprintf(ctx->fp, "\n");
184          column_count = 0;
185          if (pitch_col_count * 4 == pitch)
186             pitch_col_count = 0;
187          line_count++;
188 
189          if (max_lines >= 0 && line_count >= max_lines)
190             break;
191       }
192       fprintf(ctx->fp, column_count == 0 ? "  " : " ");
193 
194       if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
195          fprintf(ctx->fp, "  %8.2f", *(float *) dw);
196       else
197          fprintf(ctx->fp, "  0x%08x", *dw);
198 
199       column_count++;
200       pitch_col_count++;
201    }
202    fprintf(ctx->fp, "\n");
203 }
204 
205 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)206 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
207 {
208    return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
209 }
210 
211 static void
handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)212 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
213 {
214    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
215 
216    struct intel_field_iterator iter;
217    intel_field_iterator_init(&iter, inst, p, 0, false);
218 
219    uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
220    bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
221 
222    while (intel_field_iterator_next(&iter)) {
223       if (strcmp(iter.name, "Surface State Base Address") == 0) {
224          surface_base = iter.raw_value;
225       } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
226          dynamic_base = iter.raw_value;
227       } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
228          instruction_base = iter.raw_value;
229       } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
230          surface_modify = iter.raw_value;
231       } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
232          dynamic_modify = iter.raw_value;
233       } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
234          instruction_modify = iter.raw_value;
235       }
236    }
237 
238    if (dynamic_modify)
239       ctx->dynamic_base = dynamic_base;
240 
241    if (surface_modify)
242       ctx->surface_base = surface_base;
243 
244    if (instruction_modify)
245       ctx->instruction_base = instruction_base;
246 }
247 
248 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx, const uint32_t *p)249 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
250                                 const uint32_t *p)
251 {
252    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
253 
254    struct intel_field_iterator iter;
255    intel_field_iterator_init(&iter, inst, p, 0, false);
256 
257    uint64_t bt_pool_base = 0;
258    bool bt_pool_enable = false;
259 
260    while (intel_field_iterator_next(&iter)) {
261       if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
262          bt_pool_base = iter.raw_value;
263       } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
264          bt_pool_enable = iter.raw_value;
265       }
266    }
267 
268    if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
269       ctx->bt_pool_base = bt_pool_base;
270    } else {
271       ctx->bt_pool_base = 0;
272    }
273 }
274 
275 static void
dump_binding_table(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)276 dump_binding_table(struct intel_batch_decode_ctx *ctx,
277                    uint32_t offset, int count)
278 {
279    struct intel_group *strct =
280       intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
281    if (strct == NULL) {
282       fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
283       return;
284    }
285 
286    /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
287    uint32_t btp_alignment = 32;
288    uint32_t btp_pointer_bits = 16;
289 
290    if (ctx->devinfo.verx10 >= 125) {
291       /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
292       btp_pointer_bits = 21;
293    } else if (ctx->use_256B_binding_tables) {
294       /* When 256B binding tables are enabled, we have to shift the offset
295        * which is stored in bits 15:5 but interpreted as bits 18:8 of the
296        * actual offset.  The effective pointer is 19-bit with 256B alignment.
297        */
298       offset <<= 3;
299       btp_pointer_bits = 19;
300       btp_alignment = 256;
301    }
302 
303    const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
304                                                      ctx->surface_base;
305 
306    if (count < 0) {
307       count = update_count(ctx, bt_pool_base + offset,
308                            bt_pool_base, 1, 8);
309    }
310 
311    if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
312       fprintf(ctx->fp, "  invalid binding table pointer\n");
313       return;
314    }
315 
316    struct intel_batch_decode_bo bind_bo =
317       ctx_get_bo(ctx, true, bt_pool_base + offset);
318 
319    if (bind_bo.map == NULL) {
320       fprintf(ctx->fp, "  binding table unavailable\n");
321       return;
322    }
323 
324    const uint32_t *pointers = bind_bo.map;
325    for (int i = 0; i < count; i++) {
326       if (pointers[i] == 0)
327          continue;
328 
329       uint64_t addr = ctx->surface_base + pointers[i];
330       struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
331       uint32_t size = strct->dw_length * 4;
332 
333       if (pointers[i] % 32 != 0 ||
334           addr < bo.addr || addr + size >= bo.addr + bo.size) {
335          fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
336          continue;
337       }
338 
339       fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
340       ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
341    }
342 }
343 
344 static void
dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)345 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
346 {
347    struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
348    uint64_t state_addr = ctx->dynamic_base + offset;
349 
350    assert(count > 0);
351 
352    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
353    const void *state_map = bo.map;
354 
355    if (state_map == NULL) {
356       fprintf(ctx->fp, "  samplers unavailable\n");
357       return;
358    }
359 
360    if (offset % 32 != 0) {
361       fprintf(ctx->fp, "  invalid sampler state pointer\n");
362       return;
363    }
364 
365    const unsigned sampler_state_size = strct->dw_length * 4;
366 
367    if (count * sampler_state_size >= bo.size) {
368       fprintf(ctx->fp, "  sampler state ends after bo ends\n");
369       assert(!"sampler state ends after bo ends");
370       return;
371    }
372 
373    for (int i = 0; i < count; i++) {
374       fprintf(ctx->fp, "sampler state %d\n", i);
375       ctx_print_group(ctx, strct, state_addr, state_map);
376       state_addr += sampler_state_size;
377       state_map += sampler_state_size;
378    }
379 }
380 
381 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx, struct intel_group *desc, const uint32_t *p)382 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
383                                  struct intel_group *desc, const uint32_t *p)
384 {
385    uint64_t ksp = 0;
386    uint32_t sampler_offset = 0, sampler_count = 0;
387    uint32_t binding_table_offset = 0, binding_entry_count = 0;
388 
389    struct intel_field_iterator iter;
390    intel_field_iterator_init(&iter, desc, p, 0, false);
391    while (intel_field_iterator_next(&iter)) {
392       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
393          ksp = strtoll(iter.value, NULL, 16);
394       } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
395          sampler_offset = strtol(iter.value, NULL, 16);
396       } else if (strcmp(iter.name, "Sampler Count") == 0) {
397          sampler_count = strtol(iter.value, NULL, 10);
398       } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
399          binding_table_offset = strtol(iter.value, NULL, 16);
400       } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
401          binding_entry_count = strtol(iter.value, NULL, 10);
402       }
403    }
404 
405    ctx_disassemble_program(ctx, ksp, "compute shader");
406    fprintf(ctx->fp, "\n");
407 
408    if (sampler_count)
409       dump_samplers(ctx, sampler_offset, sampler_count);
410    if (binding_entry_count)
411       dump_binding_table(ctx, binding_table_offset, binding_entry_count);
412 }
413 
414 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx, const uint32_t *p)415 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
416                                        const uint32_t *p)
417 {
418    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
419    struct intel_group *desc =
420       intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
421 
422    struct intel_field_iterator iter;
423    intel_field_iterator_init(&iter, inst, p, 0, false);
424    uint32_t descriptor_offset = 0;
425    int descriptor_count = 0;
426    while (intel_field_iterator_next(&iter)) {
427       if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
428          descriptor_offset = strtol(iter.value, NULL, 16);
429       } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
430          descriptor_count =
431             strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
432       }
433    }
434 
435    uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
436    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
437    const void *desc_map = bo.map;
438 
439    if (desc_map == NULL) {
440       fprintf(ctx->fp, "  interface descriptors unavailable\n");
441       return;
442    }
443 
444    for (int i = 0; i < descriptor_count; i++) {
445       fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
446 
447       ctx_print_group(ctx, desc, desc_addr, desc_map);
448 
449       handle_interface_descriptor_data(ctx, desc, desc_map);
450 
451       desc_map += desc->dw_length;
452       desc_addr += desc->dw_length * 4;
453    }
454 }
455 
456 static void
handle_compute_walker(struct intel_batch_decode_ctx *ctx, const uint32_t *p)457 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
458                       const uint32_t *p)
459 {
460    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
461 
462    struct intel_field_iterator iter;
463    intel_field_iterator_init(&iter, inst, p, 0, false);
464    while (intel_field_iterator_next(&iter)) {
465       if (strcmp(iter.name, "Interface Descriptor") == 0) {
466          handle_interface_descriptor_data(ctx, iter.struct_desc,
467                                           &iter.p[iter.start_bit / 32]);
468       }
469    }
470 }
471 
472 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)473 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
474                               const uint32_t *p)
475 {
476    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
477    struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
478 
479    struct intel_batch_decode_bo vb = {};
480    uint32_t vb_size = 0;
481    int index = -1;
482    int pitch = -1;
483    bool ready = false;
484 
485    struct intel_field_iterator iter;
486    intel_field_iterator_init(&iter, inst, p, 0, false);
487    while (intel_field_iterator_next(&iter)) {
488       if (iter.struct_desc != vbs)
489          continue;
490 
491       struct intel_field_iterator vbs_iter;
492       intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
493       while (intel_field_iterator_next(&vbs_iter)) {
494          if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
495             index = vbs_iter.raw_value;
496          } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
497             pitch = vbs_iter.raw_value;
498          } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
499             vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
500          } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
501             vb_size = vbs_iter.raw_value;
502             ready = true;
503          } else if (strcmp(vbs_iter.name, "End Address") == 0) {
504             if (vb.map && vbs_iter.raw_value >= vb.addr)
505                vb_size = (vbs_iter.raw_value + 1) - vb.addr;
506             else
507                vb_size = 0;
508             ready = true;
509          }
510 
511          if (!ready)
512             continue;
513 
514          fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
515 
516          if (vb.map == NULL) {
517             fprintf(ctx->fp, "  buffer contents unavailable\n");
518             continue;
519          }
520 
521          if (vb.map == 0 || vb_size == 0)
522             continue;
523 
524          ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
525 
526          vb.map = NULL;
527          vb_size = 0;
528          index = -1;
529          pitch = -1;
530          ready = false;
531       }
532    }
533 }
534 
535 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)536 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
537                             const uint32_t *p)
538 {
539    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
540 
541    struct intel_batch_decode_bo ib = {};
542    uint32_t ib_size = 0;
543    uint32_t format = 0;
544 
545    struct intel_field_iterator iter;
546    intel_field_iterator_init(&iter, inst, p, 0, false);
547    while (intel_field_iterator_next(&iter)) {
548       if (strcmp(iter.name, "Index Format") == 0) {
549          format = iter.raw_value;
550       } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
551          ib = ctx_get_bo(ctx, true, iter.raw_value);
552       } else if (strcmp(iter.name, "Buffer Size") == 0) {
553          ib_size = iter.raw_value;
554       }
555    }
556 
557    if (ib.map == NULL) {
558       fprintf(ctx->fp, "  buffer contents unavailable\n");
559       return;
560    }
561 
562    const void *m = ib.map;
563    const void *ib_end = ib.map + MIN2(ib.size, ib_size);
564    for (int i = 0; m < ib_end && i < 10; i++) {
565       switch (format) {
566       case 0:
567          fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
568          m += 1;
569          break;
570       case 1:
571          fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
572          m += 2;
573          break;
574       case 2:
575          fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
576          m += 4;
577          break;
578       }
579    }
580 
581    if (m < ib_end)
582       fprintf(ctx->fp, "...");
583    fprintf(ctx->fp, "\n");
584 }
585 
586 static void
decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)587 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
588 {
589    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
590 
591    uint64_t ksp = 0;
592    bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
593    bool is_enabled = true;
594 
595    struct intel_field_iterator iter;
596    intel_field_iterator_init(&iter, inst, p, 0, false);
597    while (intel_field_iterator_next(&iter)) {
598       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
599          ksp = iter.raw_value;
600       } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
601          is_simd8 = iter.raw_value;
602       } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
603          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
604       } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
605          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
606       } else if (strcmp(iter.name, "Enable") == 0) {
607          is_enabled = iter.raw_value;
608       }
609    }
610 
611    const char *type =
612       strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
613       strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
614       strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
615       strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
616       strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
617       strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
618       strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
619       strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
620       NULL;
621 
622    if (is_enabled) {
623       ctx_disassemble_program(ctx, ksp, type);
624       fprintf(ctx->fp, "\n");
625    }
626 }
627 
628 static void
decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)629 decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
630 {
631    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
632 
633    uint64_t ksp = 0;
634    uint64_t local_x_maximum = 0;
635    uint64_t threads = 0;
636 
637    struct intel_field_iterator iter;
638    intel_field_iterator_init(&iter, inst, p, 0, false);
639    while (intel_field_iterator_next(&iter)) {
640       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
641          ksp = iter.raw_value;
642       } else if (strcmp(iter.name, "Local X Maximum") == 0) {
643          local_x_maximum = iter.raw_value;
644       } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
645          threads = iter.raw_value;
646       }
647    }
648 
649    const char *type =
650       strcmp(inst->name,   "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
651       strcmp(inst->name,   "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
652       NULL;
653 
654    if (threads && local_x_maximum) {
655       ctx_disassemble_program(ctx, ksp, type);
656       fprintf(ctx->fp, "\n");
657    }
658 }
659 
660 static void
decode_ps_kern(struct intel_batch_decode_ctx *ctx, struct intel_group *inst, const uint32_t *p)661 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
662                struct intel_group *inst, const uint32_t *p)
663 {
664    bool single_ksp = ctx->devinfo.ver == 4;
665    uint64_t ksp[3] = {0, 0, 0};
666    bool enabled[3] = {false, false, false};
667 
668    struct intel_field_iterator iter;
669    intel_field_iterator_init(&iter, inst, p, 0, false);
670    while (intel_field_iterator_next(&iter)) {
671       if (strncmp(iter.name, "Kernel Start Pointer ",
672                   strlen("Kernel Start Pointer ")) == 0) {
673          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
674          ksp[idx] = strtol(iter.value, NULL, 16);
675       } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
676          enabled[0] = strcmp(iter.value, "true") == 0;
677       } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
678          enabled[1] = strcmp(iter.value, "true") == 0;
679       } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
680          enabled[2] = strcmp(iter.value, "true") == 0;
681       }
682    }
683 
684    if (single_ksp)
685       ksp[1] = ksp[2] = ksp[0];
686 
687    /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
688    if (enabled[0] + enabled[1] + enabled[2] == 1) {
689       if (enabled[1]) {
690          ksp[1] = ksp[0];
691          ksp[0] = 0;
692       } else if (enabled[2]) {
693          ksp[2] = ksp[0];
694          ksp[0] = 0;
695       }
696    } else {
697       uint64_t tmp = ksp[1];
698       ksp[1] = ksp[2];
699       ksp[2] = tmp;
700    }
701 
702    if (enabled[0])
703       ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
704    if (enabled[1])
705       ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
706    if (enabled[2])
707       ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
708 
709    if (enabled[0] || enabled[1] || enabled[2])
710       fprintf(ctx->fp, "\n");
711 }
712 
713 static void
decode_ps_kernels(struct intel_batch_decode_ctx *ctx, const uint32_t *p)714 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
715                   const uint32_t *p)
716 {
717    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
718    decode_ps_kern(ctx, inst, p);
719 }
720 
721 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)722 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
723 {
724    struct intel_group *inst =
725       intel_spec_find_instruction(ctx->spec, ctx->engine, p);
726    struct intel_group *body =
727       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
728 
729    uint32_t read_length[4] = {0};
730    struct intel_batch_decode_bo buffer[4];
731    memset(buffer, 0, sizeof(buffer));
732 
733    struct intel_field_iterator outer;
734    intel_field_iterator_init(&outer, inst, p, 0, false);
735    int idx = 0;
736    while (intel_field_iterator_next(&outer)) {
737       if (outer.struct_desc != body)
738          continue;
739 
740       struct intel_field_iterator iter;
741       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
742                               0, false);
743       while (intel_field_iterator_next(&iter)) {
744          if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
745             buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
746          } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
747             read_length[idx] = iter.raw_value;
748          }
749       }
750       idx++;
751    }
752 
753    for (int i = 0; i < 4; i++) {
754       if (read_length[i] == 0 || buffer[i].map == NULL)
755          continue;
756 
757       unsigned size = read_length[i] * 32;
758       fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
759 
760       ctx_print_buffer(ctx, buffer[i], size, 0, -1);
761    }
762 }
763 
764 static void
decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)765 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
766 {
767    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
768    struct intel_group *body =
769       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
770 
771    uint32_t read_length[4] = {0};
772    uint64_t read_addr[4] = {0};
773 
774    struct intel_field_iterator outer;
775    intel_field_iterator_init(&outer, inst, p, 0, false);
776    while (intel_field_iterator_next(&outer)) {
777       if (outer.struct_desc != body)
778          continue;
779 
780       struct intel_field_iterator iter;
781       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
782                               0, false);
783 
784       while (intel_field_iterator_next(&iter)) {
785          int idx;
786          if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
787             read_length[idx] = iter.raw_value;
788          } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
789             read_addr[idx] = iter.raw_value;
790          }
791       }
792 
793       for (int i = 0; i < 4; i++) {
794          if (read_length[i] == 0)
795             continue;
796 
797          struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
798          if (!buffer.map) {
799             fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
800             continue;
801          }
802 
803          unsigned size = read_length[i] * 32;
804          fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
805 
806          ctx_print_buffer(ctx, buffer, size, 0, -1);
807       }
808    }
809 }
810 
811 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)812 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
813 {
814    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
815    uint64_t read_length = 0, read_addr = 0, valid = 0;
816    struct intel_field_iterator iter;
817    intel_field_iterator_init(&iter, inst, p, 0, false);
818 
819    while (intel_field_iterator_next(&iter)) {
820       if (!strcmp(iter.name, "Buffer Length")) {
821          read_length = iter.raw_value;
822       } else if (!strcmp(iter.name, "Valid")) {
823          valid = iter.raw_value;
824       } else if (!strcmp(iter.name, "Buffer Starting Address")) {
825          read_addr = iter.raw_value;
826       }
827    }
828 
829    if (!valid)
830       return;
831 
832    struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
833    if (!buffer.map) {
834       fprintf(ctx->fp, "constant buffer unavailable\n");
835       return;
836    }
837    unsigned size = (read_length + 1) * 16 * sizeof(float);
838    fprintf(ctx->fp, "constant buffer size %u\n", size);
839 
840    ctx_print_buffer(ctx, buffer, size, 0, -1);
841 }
842 
843 
844 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)845 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
846                                            const uint32_t *p)
847 {
848    fprintf(ctx->fp, "VS Binding Table:\n");
849    dump_binding_table(ctx, p[1], -1);
850 
851    fprintf(ctx->fp, "GS Binding Table:\n");
852    dump_binding_table(ctx, p[2], -1);
853 
854    if (ctx->devinfo.ver < 6) {
855       fprintf(ctx->fp, "CLIP Binding Table:\n");
856       dump_binding_table(ctx, p[3], -1);
857       fprintf(ctx->fp, "SF Binding Table:\n");
858       dump_binding_table(ctx, p[4], -1);
859       fprintf(ctx->fp, "PS Binding Table:\n");
860       dump_binding_table(ctx, p[5], -1);
861    } else {
862       fprintf(ctx->fp, "PS Binding Table:\n");
863       dump_binding_table(ctx, p[3], -1);
864    }
865 }
866 
867 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)868 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
869                                       const uint32_t *p)
870 {
871    dump_binding_table(ctx, p[1], -1);
872 }
873 
874 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)875 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
876                                       const uint32_t *p)
877 {
878    dump_samplers(ctx, p[1], 1);
879 }
880 
881 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx, const uint32_t *p)882 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
883                                            const uint32_t *p)
884 {
885    dump_samplers(ctx, p[1], 1);
886    dump_samplers(ctx, p[2], 1);
887    dump_samplers(ctx, p[3], 1);
888 }
889 
890 static bool
str_ends_with(const char *str, const char *end)891 str_ends_with(const char *str, const char *end)
892 {
893    int offset = strlen(str) - strlen(end);
894    if (offset < 0)
895       return false;
896 
897    return strcmp(str + offset, end) == 0;
898 }
899 
900 static void
decode_dynamic_state(struct intel_batch_decode_ctx *ctx, const char *struct_type, uint32_t state_offset, int count)901 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
902                        const char *struct_type, uint32_t state_offset,
903                        int count)
904 {
905    uint64_t state_addr = ctx->dynamic_base + state_offset;
906    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
907    const void *state_map = bo.map;
908 
909    if (state_map == NULL) {
910       fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
911       return;
912    }
913 
914    struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
915    if (strcmp(struct_type, "BLEND_STATE") == 0) {
916       /* Blend states are different from the others because they have a header
917        * struct called BLEND_STATE which is followed by a variable number of
918        * BLEND_STATE_ENTRY structs.
919        */
920       fprintf(ctx->fp, "%s\n", struct_type);
921       ctx_print_group(ctx, state, state_addr, state_map);
922 
923       state_addr += state->dw_length * 4;
924       state_map += state->dw_length * 4;
925 
926       struct_type = "BLEND_STATE_ENTRY";
927       state = intel_spec_find_struct(ctx->spec, struct_type);
928    }
929 
930    count = update_count(ctx, ctx->dynamic_base + state_offset,
931                         ctx->dynamic_base, state->dw_length, count);
932 
933    for (int i = 0; i < count; i++) {
934       fprintf(ctx->fp, "%s %d\n", struct_type, i);
935       ctx_print_group(ctx, state, state_addr, state_map);
936 
937       state_addr += state->dw_length * 4;
938       state_map += state->dw_length * 4;
939    }
940 }
941 
942 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx, const char *struct_type, const uint32_t *p, int count)943 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
944                               const char *struct_type, const uint32_t *p,
945                               int count)
946 {
947    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
948 
949    uint32_t state_offset = 0;
950 
951    struct intel_field_iterator iter;
952    intel_field_iterator_init(&iter, inst, p, 0, false);
953    while (intel_field_iterator_next(&iter)) {
954       if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
955          state_offset = iter.raw_value;
956          break;
957       }
958    }
959    decode_dynamic_state(ctx, struct_type, state_offset, count);
960 }
961 
962 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)963 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
964                                        const uint32_t *p)
965 {
966    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
967    uint32_t state_offset = 0;
968    bool clip = false, sf = false, cc = false;
969    struct intel_field_iterator iter;
970    intel_field_iterator_init(&iter, inst, p, 0, false);
971    while (intel_field_iterator_next(&iter)) {
972       if (!strcmp(iter.name, "CLIP Viewport State Change"))
973          clip = iter.raw_value;
974       if (!strcmp(iter.name, "SF Viewport State Change"))
975          sf = iter.raw_value;
976       if (!strcmp(iter.name, "CC Viewport State Change"))
977          cc = iter.raw_value;
978       else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
979          state_offset = iter.raw_value;
980          decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
981       }
982       else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
983          state_offset = iter.raw_value;
984          decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
985       }
986       else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
987          state_offset = iter.raw_value;
988          decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
989       }
990    }
991 }
992 
993 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx, const uint32_t *p)994 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
995                                           const uint32_t *p)
996 {
997    decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
998 }
999 
1000 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1001 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1002                                                const uint32_t *p)
1003 {
1004    decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1005 }
1006 
1007 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1008 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1009                                     const uint32_t *p)
1010 {
1011    decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
1012 }
1013 
1014 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1015 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1016                                  const uint32_t *p)
1017 {
1018    if (ctx->devinfo.ver != 6) {
1019       decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1020       return;
1021    }
1022 
1023    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1024 
1025    uint32_t state_offset = 0;
1026    bool blend_change = false, ds_change = false, cc_change = false;
1027    struct intel_field_iterator iter;
1028    intel_field_iterator_init(&iter, inst, p, 0, false);
1029    while (intel_field_iterator_next(&iter)) {
1030       if (!strcmp(iter.name, "BLEND_STATE Change"))
1031          blend_change = iter.raw_value;
1032       else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1033          ds_change = iter.raw_value;
1034       else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1035          cc_change = iter.raw_value;
1036       else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1037          state_offset = iter.raw_value;
1038          decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1039       }
1040       else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1041          state_offset = iter.raw_value;
1042          decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1043       }
1044       else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1045          state_offset = iter.raw_value;
1046          decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1047       }
1048    }
1049 }
1050 
1051 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1052 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1053                                  const uint32_t *p)
1054 {
1055    decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1056 }
1057 
1058 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1059 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1060                                       const uint32_t *p)
1061 {
1062    decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1063 }
1064 
1065 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1066 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1067                                           const uint32_t *p)
1068 {
1069    decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1070 }
1071 
1072 static void
handle_gt_mode(struct intel_batch_decode_ctx *ctx, uint32_t reg_addr, uint32_t val)1073 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1074                uint32_t reg_addr, uint32_t val)
1075 {
1076    struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1077 
1078    assert(intel_group_get_length(reg, &val) == 1);
1079 
1080    struct intel_field_iterator iter;
1081    intel_field_iterator_init(&iter, reg, &val, 0, false);
1082 
1083    uint32_t bt_alignment;
1084    bool bt_alignment_mask = 0;
1085 
1086    while (intel_field_iterator_next(&iter)) {
1087       if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1088          bt_alignment = iter.raw_value;
1089       } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1090          bt_alignment_mask = iter.raw_value;
1091       }
1092    }
1093 
1094    if (bt_alignment_mask)
1095       ctx->use_256B_binding_tables = bt_alignment;
1096 }
1097 
1098 struct reg_handler {
1099    const char *name;
1100    void (*handler)(struct intel_batch_decode_ctx *ctx,
1101                    uint32_t reg_addr, uint32_t val);
1102 } reg_handlers[] = {
1103    { "GT_MODE", handle_gt_mode }
1104 };
1105 
1106 static void
decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1107 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1108 {
1109    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1110    const unsigned length = intel_group_get_length(inst, p);
1111    assert(length & 1);
1112    const unsigned nr_regs = (length - 1) / 2;
1113 
1114    for (unsigned i = 0; i < nr_regs; i++) {
1115       struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1116       if (reg != NULL) {
1117          fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1118                  reg->name, reg->register_offset, p[2]);
1119          ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1120 
1121          for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1122             if (strcmp(reg->name, reg_handlers[i].name) == 0)
1123                reg_handlers[i].handler(ctx, p[1], p[2]);
1124          }
1125       }
1126    }
1127 }
1128 
1129 static void
decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1130 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1131 {
1132    struct intel_group *strct =
1133       intel_spec_find_struct(ctx->spec, "VS_STATE");
1134    if (strct == NULL) {
1135       fprintf(ctx->fp, "did not find VS_STATE info\n");
1136       return;
1137    }
1138 
1139    struct intel_batch_decode_bo bind_bo =
1140       ctx_get_bo(ctx, true, offset);
1141 
1142    if (bind_bo.map == NULL) {
1143       fprintf(ctx->fp, " vs state unavailable\n");
1144       return;
1145    }
1146 
1147    ctx_print_group(ctx, strct, offset, bind_bo.map);
1148 
1149    uint64_t ksp = 0;
1150    bool is_enabled = true;
1151    struct intel_field_iterator iter;
1152    intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false);
1153    while (intel_field_iterator_next(&iter)) {
1154       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1155          ksp = iter.raw_value;
1156       } else if (strcmp(iter.name, "Enable") == 0) {
1157 	is_enabled = iter.raw_value;
1158       }
1159    }
1160    if (is_enabled) {
1161       ctx_disassemble_program(ctx, ksp, "vertex shader");
1162       fprintf(ctx->fp, "\n");
1163    }
1164 }
1165 
1166 static void
decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1167 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1168 {
1169    struct intel_group *strct =
1170       intel_spec_find_struct(ctx->spec, "GS_STATE");
1171    if (strct == NULL) {
1172       fprintf(ctx->fp, "did not find GS_STATE info\n");
1173       return;
1174    }
1175 
1176    struct intel_batch_decode_bo bind_bo =
1177       ctx_get_bo(ctx, true, offset);
1178 
1179    if (bind_bo.map == NULL) {
1180       fprintf(ctx->fp, " gs state unavailable\n");
1181       return;
1182    }
1183 
1184    ctx_print_group(ctx, strct, offset, bind_bo.map);
1185 }
1186 
1187 static void
decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1188 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1189 {
1190    struct intel_group *strct =
1191       intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1192    if (strct == NULL) {
1193       fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1194       return;
1195    }
1196 
1197    struct intel_batch_decode_bo bind_bo =
1198       ctx_get_bo(ctx, true, offset);
1199 
1200    if (bind_bo.map == NULL) {
1201       fprintf(ctx->fp, " clip state unavailable\n");
1202       return;
1203    }
1204 
1205    ctx_print_group(ctx, strct, offset, bind_bo.map);
1206 
1207    struct intel_group *vp_strct =
1208       intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1209    if (vp_strct == NULL) {
1210       fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1211       return;
1212    }
1213    uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1214    struct intel_batch_decode_bo vp_bo =
1215       ctx_get_bo(ctx, true, clip_vp_offset);
1216    if (vp_bo.map == NULL) {
1217       fprintf(ctx->fp, " clip vp state unavailable\n");
1218       return;
1219    }
1220    ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1221 }
1222 
1223 static void
decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1224 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1225 {
1226    struct intel_group *strct =
1227       intel_spec_find_struct(ctx->spec, "SF_STATE");
1228    if (strct == NULL) {
1229       fprintf(ctx->fp, "did not find SF_STATE info\n");
1230       return;
1231    }
1232 
1233    struct intel_batch_decode_bo bind_bo =
1234       ctx_get_bo(ctx, true, offset);
1235 
1236    if (bind_bo.map == NULL) {
1237       fprintf(ctx->fp, " sf state unavailable\n");
1238       return;
1239    }
1240 
1241    ctx_print_group(ctx, strct, offset, bind_bo.map);
1242 
1243    struct intel_group *vp_strct =
1244       intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1245    if (vp_strct == NULL) {
1246       fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1247       return;
1248    }
1249 
1250    uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1251    struct intel_batch_decode_bo vp_bo =
1252       ctx_get_bo(ctx, true, sf_vp_offset);
1253    if (vp_bo.map == NULL) {
1254       fprintf(ctx->fp, " sf vp state unavailable\n");
1255       return;
1256    }
1257    ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1258 }
1259 
1260 static void
decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1261 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1262 {
1263    struct intel_group *strct =
1264       intel_spec_find_struct(ctx->spec, "WM_STATE");
1265    if (strct == NULL) {
1266       fprintf(ctx->fp, "did not find WM_STATE info\n");
1267       return;
1268    }
1269 
1270    struct intel_batch_decode_bo bind_bo =
1271       ctx_get_bo(ctx, true, offset);
1272 
1273    if (bind_bo.map == NULL) {
1274       fprintf(ctx->fp, " wm state unavailable\n");
1275       return;
1276    }
1277 
1278    ctx_print_group(ctx, strct, offset, bind_bo.map);
1279 
1280    decode_ps_kern(ctx, strct, bind_bo.map);
1281 }
1282 
1283 static void
decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1284 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1285 {
1286    struct intel_group *strct =
1287       intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1288    if (strct == NULL) {
1289       fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1290       return;
1291    }
1292 
1293    struct intel_batch_decode_bo bind_bo =
1294       ctx_get_bo(ctx, true, offset);
1295 
1296    if (bind_bo.map == NULL) {
1297       fprintf(ctx->fp, " cc state unavailable\n");
1298       return;
1299    }
1300 
1301    ctx_print_group(ctx, strct, offset, bind_bo.map);
1302 
1303    struct intel_group *vp_strct =
1304       intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1305    if (vp_strct == NULL) {
1306       fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1307       return;
1308    }
1309    uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1310    struct intel_batch_decode_bo vp_bo =
1311       ctx_get_bo(ctx, true, cc_vp_offset);
1312    if (vp_bo.map == NULL) {
1313       fprintf(ctx->fp, " cc vp state unavailable\n");
1314       return;
1315    }
1316    ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1317 }
1318 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1319 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1320 {
1321    fprintf(ctx->fp, "VS State Table:\n");
1322    decode_vs_state(ctx, p[1]);
1323    if (p[2] & 1) {
1324       fprintf(ctx->fp, "GS State Table:\n");
1325       decode_gs_state(ctx, p[2] & ~1);
1326    }
1327    fprintf(ctx->fp, "Clip State Table:\n");
1328    decode_clip_state(ctx, p[3] & ~1);
1329    fprintf(ctx->fp, "SF State Table:\n");
1330    decode_sf_state(ctx, p[4]);
1331    fprintf(ctx->fp, "WM State Table:\n");
1332    decode_wm_state(ctx, p[5]);
1333    fprintf(ctx->fp, "CC State Table:\n");
1334    decode_cc_state(ctx, p[6]);
1335 }
1336 
1337 static void
decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1338 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1339 {
1340    decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1341 }
1342 
1343 struct custom_decoder {
1344    const char *cmd_name;
1345    void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1346 } custom_decoders[] = {
1347    { "STATE_BASE_ADDRESS", handle_state_base_address },
1348    { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1349    { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1350    { "COMPUTE_WALKER", handle_compute_walker },
1351    { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1352    { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1353    { "3DSTATE_VS", decode_single_ksp },
1354    { "3DSTATE_GS", decode_single_ksp },
1355    { "3DSTATE_DS", decode_single_ksp },
1356    { "3DSTATE_HS", decode_single_ksp },
1357    { "3DSTATE_PS", decode_ps_kernels },
1358    { "3DSTATE_WM", decode_ps_kernels },
1359    { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1360    { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1361    { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1362    { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1363    { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1364    { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1365    { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1366    { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1367 
1368    { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1369    { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1370    { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1371    { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1372    { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1373    { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1374 
1375    { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1376    { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1377    { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1378    { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1379    { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1380    { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1381 
1382    { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1383    { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1384    { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1385    { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1386    { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1387    { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1388    { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1389    { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1390    { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1391    { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1392    { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1393    { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1394 };
1395 
1396 void
intel_print_batch(struct intel_batch_decode_ctx *ctx, const uint32_t *batch, uint32_t batch_size, uint64_t batch_addr, bool from_ring)1397 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1398                   const uint32_t *batch, uint32_t batch_size,
1399                   uint64_t batch_addr, bool from_ring)
1400 {
1401    const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1402    int length;
1403    struct intel_group *inst;
1404    const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1405 
1406    if (ctx->n_batch_buffer_start >= 100) {
1407       fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1408               (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1409               (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1410               reset_color);
1411       return;
1412    }
1413 
1414    ctx->n_batch_buffer_start++;
1415 
1416    for (p = batch; p < end; p += length) {
1417       inst = intel_ctx_find_instruction(ctx, p);
1418       length = intel_group_get_length(inst, p);
1419       assert(inst == NULL || length > 0);
1420       length = MAX2(1, length);
1421 
1422       uint64_t offset;
1423       if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1424          offset = batch_addr + ((char *)p - (char *)batch);
1425       else
1426          offset = 0;
1427 
1428       if (inst == NULL) {
1429          fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1430                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1431                  offset, p[0], reset_color);
1432 
1433          for (int i=1; i < length; i++) {
1434             fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1435                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1436                  offset + i * 4, p[i], reset_color);
1437          }
1438 
1439          continue;
1440       }
1441 
1442       const char *color;
1443       const char *inst_name = intel_group_get_name(inst);
1444       if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1445          reset_color = NORMAL;
1446          if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1447             if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1448                 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1449                color = GREEN_HEADER;
1450             else
1451                color = BLUE_HEADER;
1452          } else {
1453             color = NORMAL;
1454          }
1455       } else {
1456          color = "";
1457          reset_color = "";
1458       }
1459 
1460       fprintf(ctx->fp, "%s0x%08"PRIx64"%s:  0x%08x:  %-80s%s\n", color, offset,
1461               ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1462               inst_name, reset_color);
1463 
1464       if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1465          ctx_print_group(ctx, inst, offset, p);
1466 
1467          for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1468             if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1469                custom_decoders[i].decode(ctx, p);
1470                break;
1471             }
1472          }
1473       }
1474 
1475       if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1476          uint64_t next_batch_addr = 0;
1477          bool ppgtt = false;
1478          bool second_level = false;
1479          bool predicate = false;
1480          struct intel_field_iterator iter;
1481          intel_field_iterator_init(&iter, inst, p, 0, false);
1482          while (intel_field_iterator_next(&iter)) {
1483             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1484                next_batch_addr = iter.raw_value;
1485             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1486                second_level = iter.raw_value;
1487             } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1488                ppgtt = iter.raw_value;
1489             } else if (strcmp(iter.name, "Predication Enable") == 0) {
1490                predicate = iter.raw_value;
1491             }
1492          }
1493 
1494          if (!predicate) {
1495             struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1496 
1497             if (next_batch.map == NULL) {
1498                fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1499                        next_batch_addr);
1500             } else {
1501                intel_print_batch(ctx, next_batch.map, next_batch.size,
1502                                  next_batch.addr, false);
1503             }
1504             if (second_level) {
1505                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1506                 * like a subroutine call.  Commands that come afterwards get
1507                 * processed once the 2nd level batch buffer returns with
1508                 * MI_BATCH_BUFFER_END.
1509                 */
1510                continue;
1511             } else if (!from_ring) {
1512                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1513                 * like a goto.  Nothing after it will ever get processed.  In
1514                 * order to prevent the recursion from growing, we just reset the
1515                 * loop and continue;
1516                 */
1517                break;
1518             }
1519          }
1520       } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1521          break;
1522       }
1523    }
1524 
1525    ctx->n_batch_buffer_start--;
1526 }
1527