1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "common/intel_decoder.h"
25 #include "intel_disasm.h"
26 #include "util/macros.h"
27 #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
28
29 #include <string.h>
30
31 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx, const struct brw_isa_info *isa, const struct intel_device_info *devinfo, FILE *fp, enum intel_batch_decode_flags flags, const char *xml_path, struct intel_batch_decode_bo (*get_bo)(void *, bool, uint64_t), unsigned (*get_state_size)(void *, uint64_t, uint64_t), void *user_data)32 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
33 const struct brw_isa_info *isa,
34 const struct intel_device_info *devinfo,
35 FILE *fp, enum intel_batch_decode_flags flags,
36 const char *xml_path,
37 struct intel_batch_decode_bo (*get_bo)(void *,
38 bool,
39 uint64_t),
40 unsigned (*get_state_size)(void *, uint64_t,
41 uint64_t),
42 void *user_data)
43 {
44 memset(ctx, 0, sizeof(*ctx));
45
46 ctx->isa = isa;
47 ctx->devinfo = *devinfo;
48 ctx->get_bo = get_bo;
49 ctx->get_state_size = get_state_size;
50 ctx->user_data = user_data;
51 ctx->fp = fp;
52 ctx->flags = flags;
53 ctx->max_vbo_decoded_lines = -1; /* No limit! */
54 ctx->engine = I915_ENGINE_CLASS_RENDER;
55
56 if (xml_path == NULL)
57 ctx->spec = intel_spec_load(devinfo);
58 else
59 ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
60 }
61
62 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)63 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
64 {
65 intel_spec_destroy(ctx->spec);
66 }
67
68 #define CSI "\e["
69 #define RED_COLOR CSI "31m"
70 #define BLUE_HEADER CSI "0;44m" CSI "1;37m"
71 #define GREEN_HEADER CSI "1;42m"
72 #define NORMAL CSI "0m"
73
74 static void
ctx_print_group(struct intel_batch_decode_ctx *ctx, struct intel_group *group, uint64_t address, const void *map)75 ctx_print_group(struct intel_batch_decode_ctx *ctx,
76 struct intel_group *group,
77 uint64_t address, const void *map)
78 {
79 intel_print_group(ctx->fp, group, address, map, 0,
80 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
81 }
82
83 static struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)84 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
85 {
86 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
87 /* On Broadwell and above, we have 48-bit addresses which consume two
88 * dwords. Some packets require that these get stored in a "canonical
89 * form" which means that bit 47 is sign-extended through the upper
90 * bits. In order to correctly handle those aub dumps, we need to mask
91 * off the top 16 bits.
92 */
93 addr &= (~0ull >> 16);
94 }
95
96 struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
97
98 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
99 bo.addr &= (~0ull >> 16);
100
101 /* We may actually have an offset into the bo */
102 if (bo.map != NULL) {
103 assert(bo.addr <= addr);
104 uint64_t offset = addr - bo.addr;
105 bo.map += offset;
106 bo.addr += offset;
107 bo.size -= offset;
108 }
109
110 return bo;
111 }
112
113 static int
update_count(struct intel_batch_decode_ctx *ctx, uint64_t address, uint64_t base_address, unsigned element_dwords, unsigned guess)114 update_count(struct intel_batch_decode_ctx *ctx,
115 uint64_t address,
116 uint64_t base_address,
117 unsigned element_dwords,
118 unsigned guess)
119 {
120 unsigned size = 0;
121
122 if (ctx->get_state_size)
123 size = ctx->get_state_size(ctx->user_data, address, base_address);
124
125 if (size > 0)
126 return size / (sizeof(uint32_t) * element_dwords);
127
128 /* In the absence of any information, just guess arbitrarily. */
129 return guess;
130 }
131
132 static void
ctx_disassemble_program(struct intel_batch_decode_ctx *ctx, uint32_t ksp, const char *type)133 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
134 uint32_t ksp, const char *type)
135 {
136 uint64_t addr = ctx->instruction_base + ksp;
137 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
138 if (!bo.map)
139 return;
140
141 fprintf(ctx->fp, "\nReferenced %s:\n", type);
142 intel_disassemble(ctx->isa, bo.map, 0, ctx->fp);
143 }
144
145 /* Heuristic to determine whether a uint32_t is probably actually a float
146 * (http://stackoverflow.com/a/2953466)
147 */
148
149 static bool
probably_float(uint32_t bits)150 probably_float(uint32_t bits)
151 {
152 int exp = ((bits & 0x7f800000U) >> 23) - 127;
153 uint32_t mant = bits & 0x007fffff;
154
155 /* +- 0.0 */
156 if (exp == -127 && mant == 0)
157 return true;
158
159 /* +- 1 billionth to 1 billion */
160 if (-30 <= exp && exp <= 30)
161 return true;
162
163 /* some value with only a few binary digits */
164 if ((mant & 0x0000ffff) == 0)
165 return true;
166
167 return false;
168 }
169
170 static void
ctx_print_buffer(struct intel_batch_decode_ctx *ctx, struct intel_batch_decode_bo bo, uint32_t read_length, uint32_t pitch, int max_lines)171 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
172 struct intel_batch_decode_bo bo,
173 uint32_t read_length,
174 uint32_t pitch,
175 int max_lines)
176 {
177 const uint32_t *dw_end =
178 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
179
180 int column_count = 0, pitch_col_count = 0, line_count = -1;
181 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
182 if (pitch_col_count * 4 == pitch || column_count == 8) {
183 fprintf(ctx->fp, "\n");
184 column_count = 0;
185 if (pitch_col_count * 4 == pitch)
186 pitch_col_count = 0;
187 line_count++;
188
189 if (max_lines >= 0 && line_count >= max_lines)
190 break;
191 }
192 fprintf(ctx->fp, column_count == 0 ? " " : " ");
193
194 if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
195 fprintf(ctx->fp, " %8.2f", *(float *) dw);
196 else
197 fprintf(ctx->fp, " 0x%08x", *dw);
198
199 column_count++;
200 pitch_col_count++;
201 }
202 fprintf(ctx->fp, "\n");
203 }
204
205 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)206 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
207 {
208 return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
209 }
210
211 static void
handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)212 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
213 {
214 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
215
216 struct intel_field_iterator iter;
217 intel_field_iterator_init(&iter, inst, p, 0, false);
218
219 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
220 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
221
222 while (intel_field_iterator_next(&iter)) {
223 if (strcmp(iter.name, "Surface State Base Address") == 0) {
224 surface_base = iter.raw_value;
225 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
226 dynamic_base = iter.raw_value;
227 } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
228 instruction_base = iter.raw_value;
229 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
230 surface_modify = iter.raw_value;
231 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
232 dynamic_modify = iter.raw_value;
233 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
234 instruction_modify = iter.raw_value;
235 }
236 }
237
238 if (dynamic_modify)
239 ctx->dynamic_base = dynamic_base;
240
241 if (surface_modify)
242 ctx->surface_base = surface_base;
243
244 if (instruction_modify)
245 ctx->instruction_base = instruction_base;
246 }
247
248 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx, const uint32_t *p)249 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
250 const uint32_t *p)
251 {
252 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
253
254 struct intel_field_iterator iter;
255 intel_field_iterator_init(&iter, inst, p, 0, false);
256
257 uint64_t bt_pool_base = 0;
258 bool bt_pool_enable = false;
259
260 while (intel_field_iterator_next(&iter)) {
261 if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
262 bt_pool_base = iter.raw_value;
263 } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
264 bt_pool_enable = iter.raw_value;
265 }
266 }
267
268 if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
269 ctx->bt_pool_base = bt_pool_base;
270 } else {
271 ctx->bt_pool_base = 0;
272 }
273 }
274
275 static void
dump_binding_table(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)276 dump_binding_table(struct intel_batch_decode_ctx *ctx,
277 uint32_t offset, int count)
278 {
279 struct intel_group *strct =
280 intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
281 if (strct == NULL) {
282 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
283 return;
284 }
285
286 /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
287 uint32_t btp_alignment = 32;
288 uint32_t btp_pointer_bits = 16;
289
290 if (ctx->devinfo.verx10 >= 125) {
291 /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
292 btp_pointer_bits = 21;
293 } else if (ctx->use_256B_binding_tables) {
294 /* When 256B binding tables are enabled, we have to shift the offset
295 * which is stored in bits 15:5 but interpreted as bits 18:8 of the
296 * actual offset. The effective pointer is 19-bit with 256B alignment.
297 */
298 offset <<= 3;
299 btp_pointer_bits = 19;
300 btp_alignment = 256;
301 }
302
303 const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
304 ctx->surface_base;
305
306 if (count < 0) {
307 count = update_count(ctx, bt_pool_base + offset,
308 bt_pool_base, 1, 8);
309 }
310
311 if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
312 fprintf(ctx->fp, " invalid binding table pointer\n");
313 return;
314 }
315
316 struct intel_batch_decode_bo bind_bo =
317 ctx_get_bo(ctx, true, bt_pool_base + offset);
318
319 if (bind_bo.map == NULL) {
320 fprintf(ctx->fp, " binding table unavailable\n");
321 return;
322 }
323
324 const uint32_t *pointers = bind_bo.map;
325 for (int i = 0; i < count; i++) {
326 if (pointers[i] == 0)
327 continue;
328
329 uint64_t addr = ctx->surface_base + pointers[i];
330 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
331 uint32_t size = strct->dw_length * 4;
332
333 if (pointers[i] % 32 != 0 ||
334 addr < bo.addr || addr + size >= bo.addr + bo.size) {
335 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
336 continue;
337 }
338
339 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
340 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
341 }
342 }
343
344 static void
dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)345 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
346 {
347 struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
348 uint64_t state_addr = ctx->dynamic_base + offset;
349
350 assert(count > 0);
351
352 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
353 const void *state_map = bo.map;
354
355 if (state_map == NULL) {
356 fprintf(ctx->fp, " samplers unavailable\n");
357 return;
358 }
359
360 if (offset % 32 != 0) {
361 fprintf(ctx->fp, " invalid sampler state pointer\n");
362 return;
363 }
364
365 const unsigned sampler_state_size = strct->dw_length * 4;
366
367 if (count * sampler_state_size >= bo.size) {
368 fprintf(ctx->fp, " sampler state ends after bo ends\n");
369 assert(!"sampler state ends after bo ends");
370 return;
371 }
372
373 for (int i = 0; i < count; i++) {
374 fprintf(ctx->fp, "sampler state %d\n", i);
375 ctx_print_group(ctx, strct, state_addr, state_map);
376 state_addr += sampler_state_size;
377 state_map += sampler_state_size;
378 }
379 }
380
381 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx, struct intel_group *desc, const uint32_t *p)382 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
383 struct intel_group *desc, const uint32_t *p)
384 {
385 uint64_t ksp = 0;
386 uint32_t sampler_offset = 0, sampler_count = 0;
387 uint32_t binding_table_offset = 0, binding_entry_count = 0;
388
389 struct intel_field_iterator iter;
390 intel_field_iterator_init(&iter, desc, p, 0, false);
391 while (intel_field_iterator_next(&iter)) {
392 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
393 ksp = strtoll(iter.value, NULL, 16);
394 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
395 sampler_offset = strtol(iter.value, NULL, 16);
396 } else if (strcmp(iter.name, "Sampler Count") == 0) {
397 sampler_count = strtol(iter.value, NULL, 10);
398 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
399 binding_table_offset = strtol(iter.value, NULL, 16);
400 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
401 binding_entry_count = strtol(iter.value, NULL, 10);
402 }
403 }
404
405 ctx_disassemble_program(ctx, ksp, "compute shader");
406 fprintf(ctx->fp, "\n");
407
408 if (sampler_count)
409 dump_samplers(ctx, sampler_offset, sampler_count);
410 if (binding_entry_count)
411 dump_binding_table(ctx, binding_table_offset, binding_entry_count);
412 }
413
414 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx, const uint32_t *p)415 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
416 const uint32_t *p)
417 {
418 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
419 struct intel_group *desc =
420 intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
421
422 struct intel_field_iterator iter;
423 intel_field_iterator_init(&iter, inst, p, 0, false);
424 uint32_t descriptor_offset = 0;
425 int descriptor_count = 0;
426 while (intel_field_iterator_next(&iter)) {
427 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
428 descriptor_offset = strtol(iter.value, NULL, 16);
429 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
430 descriptor_count =
431 strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
432 }
433 }
434
435 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
436 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
437 const void *desc_map = bo.map;
438
439 if (desc_map == NULL) {
440 fprintf(ctx->fp, " interface descriptors unavailable\n");
441 return;
442 }
443
444 for (int i = 0; i < descriptor_count; i++) {
445 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
446
447 ctx_print_group(ctx, desc, desc_addr, desc_map);
448
449 handle_interface_descriptor_data(ctx, desc, desc_map);
450
451 desc_map += desc->dw_length;
452 desc_addr += desc->dw_length * 4;
453 }
454 }
455
456 static void
handle_compute_walker(struct intel_batch_decode_ctx *ctx, const uint32_t *p)457 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
458 const uint32_t *p)
459 {
460 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
461
462 struct intel_field_iterator iter;
463 intel_field_iterator_init(&iter, inst, p, 0, false);
464 while (intel_field_iterator_next(&iter)) {
465 if (strcmp(iter.name, "Interface Descriptor") == 0) {
466 handle_interface_descriptor_data(ctx, iter.struct_desc,
467 &iter.p[iter.start_bit / 32]);
468 }
469 }
470 }
471
472 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)473 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
474 const uint32_t *p)
475 {
476 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
477 struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
478
479 struct intel_batch_decode_bo vb = {};
480 uint32_t vb_size = 0;
481 int index = -1;
482 int pitch = -1;
483 bool ready = false;
484
485 struct intel_field_iterator iter;
486 intel_field_iterator_init(&iter, inst, p, 0, false);
487 while (intel_field_iterator_next(&iter)) {
488 if (iter.struct_desc != vbs)
489 continue;
490
491 struct intel_field_iterator vbs_iter;
492 intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
493 while (intel_field_iterator_next(&vbs_iter)) {
494 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
495 index = vbs_iter.raw_value;
496 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
497 pitch = vbs_iter.raw_value;
498 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
499 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
500 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
501 vb_size = vbs_iter.raw_value;
502 ready = true;
503 } else if (strcmp(vbs_iter.name, "End Address") == 0) {
504 if (vb.map && vbs_iter.raw_value >= vb.addr)
505 vb_size = (vbs_iter.raw_value + 1) - vb.addr;
506 else
507 vb_size = 0;
508 ready = true;
509 }
510
511 if (!ready)
512 continue;
513
514 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
515
516 if (vb.map == NULL) {
517 fprintf(ctx->fp, " buffer contents unavailable\n");
518 continue;
519 }
520
521 if (vb.map == 0 || vb_size == 0)
522 continue;
523
524 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
525
526 vb.map = NULL;
527 vb_size = 0;
528 index = -1;
529 pitch = -1;
530 ready = false;
531 }
532 }
533 }
534
535 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)536 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
537 const uint32_t *p)
538 {
539 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
540
541 struct intel_batch_decode_bo ib = {};
542 uint32_t ib_size = 0;
543 uint32_t format = 0;
544
545 struct intel_field_iterator iter;
546 intel_field_iterator_init(&iter, inst, p, 0, false);
547 while (intel_field_iterator_next(&iter)) {
548 if (strcmp(iter.name, "Index Format") == 0) {
549 format = iter.raw_value;
550 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
551 ib = ctx_get_bo(ctx, true, iter.raw_value);
552 } else if (strcmp(iter.name, "Buffer Size") == 0) {
553 ib_size = iter.raw_value;
554 }
555 }
556
557 if (ib.map == NULL) {
558 fprintf(ctx->fp, " buffer contents unavailable\n");
559 return;
560 }
561
562 const void *m = ib.map;
563 const void *ib_end = ib.map + MIN2(ib.size, ib_size);
564 for (int i = 0; m < ib_end && i < 10; i++) {
565 switch (format) {
566 case 0:
567 fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
568 m += 1;
569 break;
570 case 1:
571 fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
572 m += 2;
573 break;
574 case 2:
575 fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
576 m += 4;
577 break;
578 }
579 }
580
581 if (m < ib_end)
582 fprintf(ctx->fp, "...");
583 fprintf(ctx->fp, "\n");
584 }
585
586 static void
decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)587 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
588 {
589 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
590
591 uint64_t ksp = 0;
592 bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
593 bool is_enabled = true;
594
595 struct intel_field_iterator iter;
596 intel_field_iterator_init(&iter, inst, p, 0, false);
597 while (intel_field_iterator_next(&iter)) {
598 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
599 ksp = iter.raw_value;
600 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
601 is_simd8 = iter.raw_value;
602 } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
603 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
604 } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
605 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
606 } else if (strcmp(iter.name, "Enable") == 0) {
607 is_enabled = iter.raw_value;
608 }
609 }
610
611 const char *type =
612 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" :
613 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" :
614 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" :
615 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
616 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
617 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
618 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
619 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
620 NULL;
621
622 if (is_enabled) {
623 ctx_disassemble_program(ctx, ksp, type);
624 fprintf(ctx->fp, "\n");
625 }
626 }
627
628 static void
decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)629 decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
630 {
631 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
632
633 uint64_t ksp = 0;
634 uint64_t local_x_maximum = 0;
635 uint64_t threads = 0;
636
637 struct intel_field_iterator iter;
638 intel_field_iterator_init(&iter, inst, p, 0, false);
639 while (intel_field_iterator_next(&iter)) {
640 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
641 ksp = iter.raw_value;
642 } else if (strcmp(iter.name, "Local X Maximum") == 0) {
643 local_x_maximum = iter.raw_value;
644 } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
645 threads = iter.raw_value;
646 }
647 }
648
649 const char *type =
650 strcmp(inst->name, "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
651 strcmp(inst->name, "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
652 NULL;
653
654 if (threads && local_x_maximum) {
655 ctx_disassemble_program(ctx, ksp, type);
656 fprintf(ctx->fp, "\n");
657 }
658 }
659
660 static void
decode_ps_kern(struct intel_batch_decode_ctx *ctx, struct intel_group *inst, const uint32_t *p)661 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
662 struct intel_group *inst, const uint32_t *p)
663 {
664 bool single_ksp = ctx->devinfo.ver == 4;
665 uint64_t ksp[3] = {0, 0, 0};
666 bool enabled[3] = {false, false, false};
667
668 struct intel_field_iterator iter;
669 intel_field_iterator_init(&iter, inst, p, 0, false);
670 while (intel_field_iterator_next(&iter)) {
671 if (strncmp(iter.name, "Kernel Start Pointer ",
672 strlen("Kernel Start Pointer ")) == 0) {
673 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
674 ksp[idx] = strtol(iter.value, NULL, 16);
675 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
676 enabled[0] = strcmp(iter.value, "true") == 0;
677 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
678 enabled[1] = strcmp(iter.value, "true") == 0;
679 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
680 enabled[2] = strcmp(iter.value, "true") == 0;
681 }
682 }
683
684 if (single_ksp)
685 ksp[1] = ksp[2] = ksp[0];
686
687 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
688 if (enabled[0] + enabled[1] + enabled[2] == 1) {
689 if (enabled[1]) {
690 ksp[1] = ksp[0];
691 ksp[0] = 0;
692 } else if (enabled[2]) {
693 ksp[2] = ksp[0];
694 ksp[0] = 0;
695 }
696 } else {
697 uint64_t tmp = ksp[1];
698 ksp[1] = ksp[2];
699 ksp[2] = tmp;
700 }
701
702 if (enabled[0])
703 ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
704 if (enabled[1])
705 ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
706 if (enabled[2])
707 ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
708
709 if (enabled[0] || enabled[1] || enabled[2])
710 fprintf(ctx->fp, "\n");
711 }
712
713 static void
decode_ps_kernels(struct intel_batch_decode_ctx *ctx, const uint32_t *p)714 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
715 const uint32_t *p)
716 {
717 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
718 decode_ps_kern(ctx, inst, p);
719 }
720
721 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)722 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
723 {
724 struct intel_group *inst =
725 intel_spec_find_instruction(ctx->spec, ctx->engine, p);
726 struct intel_group *body =
727 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
728
729 uint32_t read_length[4] = {0};
730 struct intel_batch_decode_bo buffer[4];
731 memset(buffer, 0, sizeof(buffer));
732
733 struct intel_field_iterator outer;
734 intel_field_iterator_init(&outer, inst, p, 0, false);
735 int idx = 0;
736 while (intel_field_iterator_next(&outer)) {
737 if (outer.struct_desc != body)
738 continue;
739
740 struct intel_field_iterator iter;
741 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
742 0, false);
743 while (intel_field_iterator_next(&iter)) {
744 if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
745 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
746 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
747 read_length[idx] = iter.raw_value;
748 }
749 }
750 idx++;
751 }
752
753 for (int i = 0; i < 4; i++) {
754 if (read_length[i] == 0 || buffer[i].map == NULL)
755 continue;
756
757 unsigned size = read_length[i] * 32;
758 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
759
760 ctx_print_buffer(ctx, buffer[i], size, 0, -1);
761 }
762 }
763
764 static void
decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)765 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
766 {
767 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
768 struct intel_group *body =
769 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
770
771 uint32_t read_length[4] = {0};
772 uint64_t read_addr[4] = {0};
773
774 struct intel_field_iterator outer;
775 intel_field_iterator_init(&outer, inst, p, 0, false);
776 while (intel_field_iterator_next(&outer)) {
777 if (outer.struct_desc != body)
778 continue;
779
780 struct intel_field_iterator iter;
781 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
782 0, false);
783
784 while (intel_field_iterator_next(&iter)) {
785 int idx;
786 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
787 read_length[idx] = iter.raw_value;
788 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
789 read_addr[idx] = iter.raw_value;
790 }
791 }
792
793 for (int i = 0; i < 4; i++) {
794 if (read_length[i] == 0)
795 continue;
796
797 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
798 if (!buffer.map) {
799 fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
800 continue;
801 }
802
803 unsigned size = read_length[i] * 32;
804 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
805
806 ctx_print_buffer(ctx, buffer, size, 0, -1);
807 }
808 }
809 }
810
811 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)812 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
813 {
814 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
815 uint64_t read_length = 0, read_addr = 0, valid = 0;
816 struct intel_field_iterator iter;
817 intel_field_iterator_init(&iter, inst, p, 0, false);
818
819 while (intel_field_iterator_next(&iter)) {
820 if (!strcmp(iter.name, "Buffer Length")) {
821 read_length = iter.raw_value;
822 } else if (!strcmp(iter.name, "Valid")) {
823 valid = iter.raw_value;
824 } else if (!strcmp(iter.name, "Buffer Starting Address")) {
825 read_addr = iter.raw_value;
826 }
827 }
828
829 if (!valid)
830 return;
831
832 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
833 if (!buffer.map) {
834 fprintf(ctx->fp, "constant buffer unavailable\n");
835 return;
836 }
837 unsigned size = (read_length + 1) * 16 * sizeof(float);
838 fprintf(ctx->fp, "constant buffer size %u\n", size);
839
840 ctx_print_buffer(ctx, buffer, size, 0, -1);
841 }
842
843
844 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)845 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
846 const uint32_t *p)
847 {
848 fprintf(ctx->fp, "VS Binding Table:\n");
849 dump_binding_table(ctx, p[1], -1);
850
851 fprintf(ctx->fp, "GS Binding Table:\n");
852 dump_binding_table(ctx, p[2], -1);
853
854 if (ctx->devinfo.ver < 6) {
855 fprintf(ctx->fp, "CLIP Binding Table:\n");
856 dump_binding_table(ctx, p[3], -1);
857 fprintf(ctx->fp, "SF Binding Table:\n");
858 dump_binding_table(ctx, p[4], -1);
859 fprintf(ctx->fp, "PS Binding Table:\n");
860 dump_binding_table(ctx, p[5], -1);
861 } else {
862 fprintf(ctx->fp, "PS Binding Table:\n");
863 dump_binding_table(ctx, p[3], -1);
864 }
865 }
866
867 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)868 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
869 const uint32_t *p)
870 {
871 dump_binding_table(ctx, p[1], -1);
872 }
873
874 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)875 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
876 const uint32_t *p)
877 {
878 dump_samplers(ctx, p[1], 1);
879 }
880
881 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx, const uint32_t *p)882 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
883 const uint32_t *p)
884 {
885 dump_samplers(ctx, p[1], 1);
886 dump_samplers(ctx, p[2], 1);
887 dump_samplers(ctx, p[3], 1);
888 }
889
890 static bool
str_ends_with(const char *str, const char *end)891 str_ends_with(const char *str, const char *end)
892 {
893 int offset = strlen(str) - strlen(end);
894 if (offset < 0)
895 return false;
896
897 return strcmp(str + offset, end) == 0;
898 }
899
900 static void
decode_dynamic_state(struct intel_batch_decode_ctx *ctx, const char *struct_type, uint32_t state_offset, int count)901 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
902 const char *struct_type, uint32_t state_offset,
903 int count)
904 {
905 uint64_t state_addr = ctx->dynamic_base + state_offset;
906 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
907 const void *state_map = bo.map;
908
909 if (state_map == NULL) {
910 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
911 return;
912 }
913
914 struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
915 if (strcmp(struct_type, "BLEND_STATE") == 0) {
916 /* Blend states are different from the others because they have a header
917 * struct called BLEND_STATE which is followed by a variable number of
918 * BLEND_STATE_ENTRY structs.
919 */
920 fprintf(ctx->fp, "%s\n", struct_type);
921 ctx_print_group(ctx, state, state_addr, state_map);
922
923 state_addr += state->dw_length * 4;
924 state_map += state->dw_length * 4;
925
926 struct_type = "BLEND_STATE_ENTRY";
927 state = intel_spec_find_struct(ctx->spec, struct_type);
928 }
929
930 count = update_count(ctx, ctx->dynamic_base + state_offset,
931 ctx->dynamic_base, state->dw_length, count);
932
933 for (int i = 0; i < count; i++) {
934 fprintf(ctx->fp, "%s %d\n", struct_type, i);
935 ctx_print_group(ctx, state, state_addr, state_map);
936
937 state_addr += state->dw_length * 4;
938 state_map += state->dw_length * 4;
939 }
940 }
941
942 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx, const char *struct_type, const uint32_t *p, int count)943 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
944 const char *struct_type, const uint32_t *p,
945 int count)
946 {
947 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
948
949 uint32_t state_offset = 0;
950
951 struct intel_field_iterator iter;
952 intel_field_iterator_init(&iter, inst, p, 0, false);
953 while (intel_field_iterator_next(&iter)) {
954 if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
955 state_offset = iter.raw_value;
956 break;
957 }
958 }
959 decode_dynamic_state(ctx, struct_type, state_offset, count);
960 }
961
962 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)963 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
964 const uint32_t *p)
965 {
966 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
967 uint32_t state_offset = 0;
968 bool clip = false, sf = false, cc = false;
969 struct intel_field_iterator iter;
970 intel_field_iterator_init(&iter, inst, p, 0, false);
971 while (intel_field_iterator_next(&iter)) {
972 if (!strcmp(iter.name, "CLIP Viewport State Change"))
973 clip = iter.raw_value;
974 if (!strcmp(iter.name, "SF Viewport State Change"))
975 sf = iter.raw_value;
976 if (!strcmp(iter.name, "CC Viewport State Change"))
977 cc = iter.raw_value;
978 else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
979 state_offset = iter.raw_value;
980 decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
981 }
982 else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
983 state_offset = iter.raw_value;
984 decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
985 }
986 else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
987 state_offset = iter.raw_value;
988 decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
989 }
990 }
991 }
992
993 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx, const uint32_t *p)994 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
995 const uint32_t *p)
996 {
997 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
998 }
999
1000 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1001 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1002 const uint32_t *p)
1003 {
1004 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1005 }
1006
1007 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1008 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1009 const uint32_t *p)
1010 {
1011 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
1012 }
1013
1014 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1015 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1016 const uint32_t *p)
1017 {
1018 if (ctx->devinfo.ver != 6) {
1019 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1020 return;
1021 }
1022
1023 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1024
1025 uint32_t state_offset = 0;
1026 bool blend_change = false, ds_change = false, cc_change = false;
1027 struct intel_field_iterator iter;
1028 intel_field_iterator_init(&iter, inst, p, 0, false);
1029 while (intel_field_iterator_next(&iter)) {
1030 if (!strcmp(iter.name, "BLEND_STATE Change"))
1031 blend_change = iter.raw_value;
1032 else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1033 ds_change = iter.raw_value;
1034 else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1035 cc_change = iter.raw_value;
1036 else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1037 state_offset = iter.raw_value;
1038 decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1039 }
1040 else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1041 state_offset = iter.raw_value;
1042 decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1043 }
1044 else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1045 state_offset = iter.raw_value;
1046 decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1047 }
1048 }
1049 }
1050
1051 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1052 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1053 const uint32_t *p)
1054 {
1055 decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1056 }
1057
1058 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1059 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1060 const uint32_t *p)
1061 {
1062 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1063 }
1064
1065 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1066 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1067 const uint32_t *p)
1068 {
1069 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1070 }
1071
1072 static void
handle_gt_mode(struct intel_batch_decode_ctx *ctx, uint32_t reg_addr, uint32_t val)1073 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1074 uint32_t reg_addr, uint32_t val)
1075 {
1076 struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1077
1078 assert(intel_group_get_length(reg, &val) == 1);
1079
1080 struct intel_field_iterator iter;
1081 intel_field_iterator_init(&iter, reg, &val, 0, false);
1082
1083 uint32_t bt_alignment;
1084 bool bt_alignment_mask = 0;
1085
1086 while (intel_field_iterator_next(&iter)) {
1087 if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1088 bt_alignment = iter.raw_value;
1089 } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1090 bt_alignment_mask = iter.raw_value;
1091 }
1092 }
1093
1094 if (bt_alignment_mask)
1095 ctx->use_256B_binding_tables = bt_alignment;
1096 }
1097
1098 struct reg_handler {
1099 const char *name;
1100 void (*handler)(struct intel_batch_decode_ctx *ctx,
1101 uint32_t reg_addr, uint32_t val);
1102 } reg_handlers[] = {
1103 { "GT_MODE", handle_gt_mode }
1104 };
1105
1106 static void
decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1107 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1108 {
1109 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1110 const unsigned length = intel_group_get_length(inst, p);
1111 assert(length & 1);
1112 const unsigned nr_regs = (length - 1) / 2;
1113
1114 for (unsigned i = 0; i < nr_regs; i++) {
1115 struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1116 if (reg != NULL) {
1117 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1118 reg->name, reg->register_offset, p[2]);
1119 ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1120
1121 for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1122 if (strcmp(reg->name, reg_handlers[i].name) == 0)
1123 reg_handlers[i].handler(ctx, p[1], p[2]);
1124 }
1125 }
1126 }
1127 }
1128
1129 static void
decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1130 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1131 {
1132 struct intel_group *strct =
1133 intel_spec_find_struct(ctx->spec, "VS_STATE");
1134 if (strct == NULL) {
1135 fprintf(ctx->fp, "did not find VS_STATE info\n");
1136 return;
1137 }
1138
1139 struct intel_batch_decode_bo bind_bo =
1140 ctx_get_bo(ctx, true, offset);
1141
1142 if (bind_bo.map == NULL) {
1143 fprintf(ctx->fp, " vs state unavailable\n");
1144 return;
1145 }
1146
1147 ctx_print_group(ctx, strct, offset, bind_bo.map);
1148
1149 uint64_t ksp = 0;
1150 bool is_enabled = true;
1151 struct intel_field_iterator iter;
1152 intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false);
1153 while (intel_field_iterator_next(&iter)) {
1154 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1155 ksp = iter.raw_value;
1156 } else if (strcmp(iter.name, "Enable") == 0) {
1157 is_enabled = iter.raw_value;
1158 }
1159 }
1160 if (is_enabled) {
1161 ctx_disassemble_program(ctx, ksp, "vertex shader");
1162 fprintf(ctx->fp, "\n");
1163 }
1164 }
1165
1166 static void
decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1167 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1168 {
1169 struct intel_group *strct =
1170 intel_spec_find_struct(ctx->spec, "GS_STATE");
1171 if (strct == NULL) {
1172 fprintf(ctx->fp, "did not find GS_STATE info\n");
1173 return;
1174 }
1175
1176 struct intel_batch_decode_bo bind_bo =
1177 ctx_get_bo(ctx, true, offset);
1178
1179 if (bind_bo.map == NULL) {
1180 fprintf(ctx->fp, " gs state unavailable\n");
1181 return;
1182 }
1183
1184 ctx_print_group(ctx, strct, offset, bind_bo.map);
1185 }
1186
1187 static void
decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1188 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1189 {
1190 struct intel_group *strct =
1191 intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1192 if (strct == NULL) {
1193 fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1194 return;
1195 }
1196
1197 struct intel_batch_decode_bo bind_bo =
1198 ctx_get_bo(ctx, true, offset);
1199
1200 if (bind_bo.map == NULL) {
1201 fprintf(ctx->fp, " clip state unavailable\n");
1202 return;
1203 }
1204
1205 ctx_print_group(ctx, strct, offset, bind_bo.map);
1206
1207 struct intel_group *vp_strct =
1208 intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1209 if (vp_strct == NULL) {
1210 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1211 return;
1212 }
1213 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1214 struct intel_batch_decode_bo vp_bo =
1215 ctx_get_bo(ctx, true, clip_vp_offset);
1216 if (vp_bo.map == NULL) {
1217 fprintf(ctx->fp, " clip vp state unavailable\n");
1218 return;
1219 }
1220 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1221 }
1222
1223 static void
decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1224 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1225 {
1226 struct intel_group *strct =
1227 intel_spec_find_struct(ctx->spec, "SF_STATE");
1228 if (strct == NULL) {
1229 fprintf(ctx->fp, "did not find SF_STATE info\n");
1230 return;
1231 }
1232
1233 struct intel_batch_decode_bo bind_bo =
1234 ctx_get_bo(ctx, true, offset);
1235
1236 if (bind_bo.map == NULL) {
1237 fprintf(ctx->fp, " sf state unavailable\n");
1238 return;
1239 }
1240
1241 ctx_print_group(ctx, strct, offset, bind_bo.map);
1242
1243 struct intel_group *vp_strct =
1244 intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1245 if (vp_strct == NULL) {
1246 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1247 return;
1248 }
1249
1250 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1251 struct intel_batch_decode_bo vp_bo =
1252 ctx_get_bo(ctx, true, sf_vp_offset);
1253 if (vp_bo.map == NULL) {
1254 fprintf(ctx->fp, " sf vp state unavailable\n");
1255 return;
1256 }
1257 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1258 }
1259
1260 static void
decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1261 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1262 {
1263 struct intel_group *strct =
1264 intel_spec_find_struct(ctx->spec, "WM_STATE");
1265 if (strct == NULL) {
1266 fprintf(ctx->fp, "did not find WM_STATE info\n");
1267 return;
1268 }
1269
1270 struct intel_batch_decode_bo bind_bo =
1271 ctx_get_bo(ctx, true, offset);
1272
1273 if (bind_bo.map == NULL) {
1274 fprintf(ctx->fp, " wm state unavailable\n");
1275 return;
1276 }
1277
1278 ctx_print_group(ctx, strct, offset, bind_bo.map);
1279
1280 decode_ps_kern(ctx, strct, bind_bo.map);
1281 }
1282
1283 static void
decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)1284 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1285 {
1286 struct intel_group *strct =
1287 intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1288 if (strct == NULL) {
1289 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1290 return;
1291 }
1292
1293 struct intel_batch_decode_bo bind_bo =
1294 ctx_get_bo(ctx, true, offset);
1295
1296 if (bind_bo.map == NULL) {
1297 fprintf(ctx->fp, " cc state unavailable\n");
1298 return;
1299 }
1300
1301 ctx_print_group(ctx, strct, offset, bind_bo.map);
1302
1303 struct intel_group *vp_strct =
1304 intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1305 if (vp_strct == NULL) {
1306 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1307 return;
1308 }
1309 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1310 struct intel_batch_decode_bo vp_bo =
1311 ctx_get_bo(ctx, true, cc_vp_offset);
1312 if (vp_bo.map == NULL) {
1313 fprintf(ctx->fp, " cc vp state unavailable\n");
1314 return;
1315 }
1316 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1317 }
1318 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1319 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1320 {
1321 fprintf(ctx->fp, "VS State Table:\n");
1322 decode_vs_state(ctx, p[1]);
1323 if (p[2] & 1) {
1324 fprintf(ctx->fp, "GS State Table:\n");
1325 decode_gs_state(ctx, p[2] & ~1);
1326 }
1327 fprintf(ctx->fp, "Clip State Table:\n");
1328 decode_clip_state(ctx, p[3] & ~1);
1329 fprintf(ctx->fp, "SF State Table:\n");
1330 decode_sf_state(ctx, p[4]);
1331 fprintf(ctx->fp, "WM State Table:\n");
1332 decode_wm_state(ctx, p[5]);
1333 fprintf(ctx->fp, "CC State Table:\n");
1334 decode_cc_state(ctx, p[6]);
1335 }
1336
1337 static void
decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)1338 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1339 {
1340 decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1341 }
1342
1343 struct custom_decoder {
1344 const char *cmd_name;
1345 void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1346 } custom_decoders[] = {
1347 { "STATE_BASE_ADDRESS", handle_state_base_address },
1348 { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1349 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1350 { "COMPUTE_WALKER", handle_compute_walker },
1351 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1352 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1353 { "3DSTATE_VS", decode_single_ksp },
1354 { "3DSTATE_GS", decode_single_ksp },
1355 { "3DSTATE_DS", decode_single_ksp },
1356 { "3DSTATE_HS", decode_single_ksp },
1357 { "3DSTATE_PS", decode_ps_kernels },
1358 { "3DSTATE_WM", decode_ps_kernels },
1359 { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1360 { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1361 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1362 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1363 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1364 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1365 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1366 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1367
1368 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1369 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1370 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1371 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1372 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1373 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1374
1375 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1376 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1377 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1378 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1379 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1380 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1381
1382 { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1383 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1384 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1385 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1386 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1387 { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1388 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1389 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1390 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1391 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1392 { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1393 { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1394 };
1395
1396 void
intel_print_batch(struct intel_batch_decode_ctx *ctx, const uint32_t *batch, uint32_t batch_size, uint64_t batch_addr, bool from_ring)1397 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1398 const uint32_t *batch, uint32_t batch_size,
1399 uint64_t batch_addr, bool from_ring)
1400 {
1401 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1402 int length;
1403 struct intel_group *inst;
1404 const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1405
1406 if (ctx->n_batch_buffer_start >= 100) {
1407 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1408 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1409 (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1410 reset_color);
1411 return;
1412 }
1413
1414 ctx->n_batch_buffer_start++;
1415
1416 for (p = batch; p < end; p += length) {
1417 inst = intel_ctx_find_instruction(ctx, p);
1418 length = intel_group_get_length(inst, p);
1419 assert(inst == NULL || length > 0);
1420 length = MAX2(1, length);
1421
1422 uint64_t offset;
1423 if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1424 offset = batch_addr + ((char *)p - (char *)batch);
1425 else
1426 offset = 0;
1427
1428 if (inst == NULL) {
1429 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1430 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1431 offset, p[0], reset_color);
1432
1433 for (int i=1; i < length; i++) {
1434 fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1435 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1436 offset + i * 4, p[i], reset_color);
1437 }
1438
1439 continue;
1440 }
1441
1442 const char *color;
1443 const char *inst_name = intel_group_get_name(inst);
1444 if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1445 reset_color = NORMAL;
1446 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1447 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1448 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1449 color = GREEN_HEADER;
1450 else
1451 color = BLUE_HEADER;
1452 } else {
1453 color = NORMAL;
1454 }
1455 } else {
1456 color = "";
1457 reset_color = "";
1458 }
1459
1460 fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n", color, offset,
1461 ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1462 inst_name, reset_color);
1463
1464 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1465 ctx_print_group(ctx, inst, offset, p);
1466
1467 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1468 if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1469 custom_decoders[i].decode(ctx, p);
1470 break;
1471 }
1472 }
1473 }
1474
1475 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1476 uint64_t next_batch_addr = 0;
1477 bool ppgtt = false;
1478 bool second_level = false;
1479 bool predicate = false;
1480 struct intel_field_iterator iter;
1481 intel_field_iterator_init(&iter, inst, p, 0, false);
1482 while (intel_field_iterator_next(&iter)) {
1483 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1484 next_batch_addr = iter.raw_value;
1485 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1486 second_level = iter.raw_value;
1487 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1488 ppgtt = iter.raw_value;
1489 } else if (strcmp(iter.name, "Predication Enable") == 0) {
1490 predicate = iter.raw_value;
1491 }
1492 }
1493
1494 if (!predicate) {
1495 struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1496
1497 if (next_batch.map == NULL) {
1498 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1499 next_batch_addr);
1500 } else {
1501 intel_print_batch(ctx, next_batch.map, next_batch.size,
1502 next_batch.addr, false);
1503 }
1504 if (second_level) {
1505 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1506 * like a subroutine call. Commands that come afterwards get
1507 * processed once the 2nd level batch buffer returns with
1508 * MI_BATCH_BUFFER_END.
1509 */
1510 continue;
1511 } else if (!from_ring) {
1512 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1513 * like a goto. Nothing after it will ever get processed. In
1514 * order to prevent the recursion from growing, we just reset the
1515 * loop and continue;
1516 */
1517 break;
1518 }
1519 }
1520 } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1521 break;
1522 }
1523 }
1524
1525 ctx->n_batch_buffer_start--;
1526 }
1527