1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "common/intel_decoder.h" 25#include "intel_disasm.h" 26#include "util/macros.h" 27#include "util/u_math.h" /* Needed for ROUND_DOWN_TO */ 28 29#include <string.h> 30 31void 32intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx, 33 const struct brw_isa_info *isa, 34 const struct intel_device_info *devinfo, 35 FILE *fp, enum intel_batch_decode_flags flags, 36 const char *xml_path, 37 struct intel_batch_decode_bo (*get_bo)(void *, 38 bool, 39 uint64_t), 40 unsigned (*get_state_size)(void *, uint64_t, 41 uint64_t), 42 void *user_data) 43{ 44 memset(ctx, 0, sizeof(*ctx)); 45 46 ctx->isa = isa; 47 ctx->devinfo = *devinfo; 48 ctx->get_bo = get_bo; 49 ctx->get_state_size = get_state_size; 50 ctx->user_data = user_data; 51 ctx->fp = fp; 52 ctx->flags = flags; 53 ctx->max_vbo_decoded_lines = -1; /* No limit! */ 54 ctx->engine = I915_ENGINE_CLASS_RENDER; 55 56 if (xml_path == NULL) 57 ctx->spec = intel_spec_load(devinfo); 58 else 59 ctx->spec = intel_spec_load_from_path(devinfo, xml_path); 60} 61 62void 63intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx) 64{ 65 intel_spec_destroy(ctx->spec); 66} 67 68#define CSI "\e[" 69#define RED_COLOR CSI "31m" 70#define BLUE_HEADER CSI "0;44m" CSI "1;37m" 71#define GREEN_HEADER CSI "1;42m" 72#define NORMAL CSI "0m" 73 74static void 75ctx_print_group(struct intel_batch_decode_ctx *ctx, 76 struct intel_group *group, 77 uint64_t address, const void *map) 78{ 79 intel_print_group(ctx->fp, group, address, map, 0, 80 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0); 81} 82 83static struct intel_batch_decode_bo 84ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr) 85{ 86 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) { 87 /* On Broadwell and above, we have 48-bit addresses which consume two 88 * dwords. Some packets require that these get stored in a "canonical 89 * form" which means that bit 47 is sign-extended through the upper 90 * bits. In order to correctly handle those aub dumps, we need to mask 91 * off the top 16 bits. 92 */ 93 addr &= (~0ull >> 16); 94 } 95 96 struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr); 97 98 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) 99 bo.addr &= (~0ull >> 16); 100 101 /* We may actually have an offset into the bo */ 102 if (bo.map != NULL) { 103 assert(bo.addr <= addr); 104 uint64_t offset = addr - bo.addr; 105 bo.map += offset; 106 bo.addr += offset; 107 bo.size -= offset; 108 } 109 110 return bo; 111} 112 113static int 114update_count(struct intel_batch_decode_ctx *ctx, 115 uint64_t address, 116 uint64_t base_address, 117 unsigned element_dwords, 118 unsigned guess) 119{ 120 unsigned size = 0; 121 122 if (ctx->get_state_size) 123 size = ctx->get_state_size(ctx->user_data, address, base_address); 124 125 if (size > 0) 126 return size / (sizeof(uint32_t) * element_dwords); 127 128 /* In the absence of any information, just guess arbitrarily. */ 129 return guess; 130} 131 132static void 133ctx_disassemble_program(struct intel_batch_decode_ctx *ctx, 134 uint32_t ksp, const char *type) 135{ 136 uint64_t addr = ctx->instruction_base + ksp; 137 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr); 138 if (!bo.map) 139 return; 140 141 fprintf(ctx->fp, "\nReferenced %s:\n", type); 142 intel_disassemble(ctx->isa, bo.map, 0, ctx->fp); 143} 144 145/* Heuristic to determine whether a uint32_t is probably actually a float 146 * (http://stackoverflow.com/a/2953466) 147 */ 148 149static bool 150probably_float(uint32_t bits) 151{ 152 int exp = ((bits & 0x7f800000U) >> 23) - 127; 153 uint32_t mant = bits & 0x007fffff; 154 155 /* +- 0.0 */ 156 if (exp == -127 && mant == 0) 157 return true; 158 159 /* +- 1 billionth to 1 billion */ 160 if (-30 <= exp && exp <= 30) 161 return true; 162 163 /* some value with only a few binary digits */ 164 if ((mant & 0x0000ffff) == 0) 165 return true; 166 167 return false; 168} 169 170static void 171ctx_print_buffer(struct intel_batch_decode_ctx *ctx, 172 struct intel_batch_decode_bo bo, 173 uint32_t read_length, 174 uint32_t pitch, 175 int max_lines) 176{ 177 const uint32_t *dw_end = 178 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4); 179 180 int column_count = 0, pitch_col_count = 0, line_count = -1; 181 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) { 182 if (pitch_col_count * 4 == pitch || column_count == 8) { 183 fprintf(ctx->fp, "\n"); 184 column_count = 0; 185 if (pitch_col_count * 4 == pitch) 186 pitch_col_count = 0; 187 line_count++; 188 189 if (max_lines >= 0 && line_count >= max_lines) 190 break; 191 } 192 fprintf(ctx->fp, column_count == 0 ? " " : " "); 193 194 if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw)) 195 fprintf(ctx->fp, " %8.2f", *(float *) dw); 196 else 197 fprintf(ctx->fp, " 0x%08x", *dw); 198 199 column_count++; 200 pitch_col_count++; 201 } 202 fprintf(ctx->fp, "\n"); 203} 204 205static struct intel_group * 206intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 207{ 208 return intel_spec_find_instruction(ctx->spec, ctx->engine, p); 209} 210 211static void 212handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 213{ 214 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 215 216 struct intel_field_iterator iter; 217 intel_field_iterator_init(&iter, inst, p, 0, false); 218 219 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0; 220 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0; 221 222 while (intel_field_iterator_next(&iter)) { 223 if (strcmp(iter.name, "Surface State Base Address") == 0) { 224 surface_base = iter.raw_value; 225 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) { 226 dynamic_base = iter.raw_value; 227 } else if (strcmp(iter.name, "Instruction Base Address") == 0) { 228 instruction_base = iter.raw_value; 229 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) { 230 surface_modify = iter.raw_value; 231 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) { 232 dynamic_modify = iter.raw_value; 233 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) { 234 instruction_modify = iter.raw_value; 235 } 236 } 237 238 if (dynamic_modify) 239 ctx->dynamic_base = dynamic_base; 240 241 if (surface_modify) 242 ctx->surface_base = surface_base; 243 244 if (instruction_modify) 245 ctx->instruction_base = instruction_base; 246} 247 248static void 249handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx, 250 const uint32_t *p) 251{ 252 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 253 254 struct intel_field_iterator iter; 255 intel_field_iterator_init(&iter, inst, p, 0, false); 256 257 uint64_t bt_pool_base = 0; 258 bool bt_pool_enable = false; 259 260 while (intel_field_iterator_next(&iter)) { 261 if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) { 262 bt_pool_base = iter.raw_value; 263 } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) { 264 bt_pool_enable = iter.raw_value; 265 } 266 } 267 268 if (bt_pool_enable || ctx->devinfo.verx10 >= 125) { 269 ctx->bt_pool_base = bt_pool_base; 270 } else { 271 ctx->bt_pool_base = 0; 272 } 273} 274 275static void 276dump_binding_table(struct intel_batch_decode_ctx *ctx, 277 uint32_t offset, int count) 278{ 279 struct intel_group *strct = 280 intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE"); 281 if (strct == NULL) { 282 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n"); 283 return; 284 } 285 286 /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */ 287 uint32_t btp_alignment = 32; 288 uint32_t btp_pointer_bits = 16; 289 290 if (ctx->devinfo.verx10 >= 125) { 291 /* The pointer is now 21-bit with 32B alignment in bits 20:5. */ 292 btp_pointer_bits = 21; 293 } else if (ctx->use_256B_binding_tables) { 294 /* When 256B binding tables are enabled, we have to shift the offset 295 * which is stored in bits 15:5 but interpreted as bits 18:8 of the 296 * actual offset. The effective pointer is 19-bit with 256B alignment. 297 */ 298 offset <<= 3; 299 btp_pointer_bits = 19; 300 btp_alignment = 256; 301 } 302 303 const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base : 304 ctx->surface_base; 305 306 if (count < 0) { 307 count = update_count(ctx, bt_pool_base + offset, 308 bt_pool_base, 1, 8); 309 } 310 311 if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) { 312 fprintf(ctx->fp, " invalid binding table pointer\n"); 313 return; 314 } 315 316 struct intel_batch_decode_bo bind_bo = 317 ctx_get_bo(ctx, true, bt_pool_base + offset); 318 319 if (bind_bo.map == NULL) { 320 fprintf(ctx->fp, " binding table unavailable\n"); 321 return; 322 } 323 324 const uint32_t *pointers = bind_bo.map; 325 for (int i = 0; i < count; i++) { 326 if (pointers[i] == 0) 327 continue; 328 329 uint64_t addr = ctx->surface_base + pointers[i]; 330 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr); 331 uint32_t size = strct->dw_length * 4; 332 333 if (pointers[i] % 32 != 0 || 334 addr < bo.addr || addr + size >= bo.addr + bo.size) { 335 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]); 336 continue; 337 } 338 339 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]); 340 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr)); 341 } 342} 343 344static void 345dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count) 346{ 347 struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE"); 348 uint64_t state_addr = ctx->dynamic_base + offset; 349 350 assert(count > 0); 351 352 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr); 353 const void *state_map = bo.map; 354 355 if (state_map == NULL) { 356 fprintf(ctx->fp, " samplers unavailable\n"); 357 return; 358 } 359 360 if (offset % 32 != 0) { 361 fprintf(ctx->fp, " invalid sampler state pointer\n"); 362 return; 363 } 364 365 const unsigned sampler_state_size = strct->dw_length * 4; 366 367 if (count * sampler_state_size >= bo.size) { 368 fprintf(ctx->fp, " sampler state ends after bo ends\n"); 369 assert(!"sampler state ends after bo ends"); 370 return; 371 } 372 373 for (int i = 0; i < count; i++) { 374 fprintf(ctx->fp, "sampler state %d\n", i); 375 ctx_print_group(ctx, strct, state_addr, state_map); 376 state_addr += sampler_state_size; 377 state_map += sampler_state_size; 378 } 379} 380 381static void 382handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx, 383 struct intel_group *desc, const uint32_t *p) 384{ 385 uint64_t ksp = 0; 386 uint32_t sampler_offset = 0, sampler_count = 0; 387 uint32_t binding_table_offset = 0, binding_entry_count = 0; 388 389 struct intel_field_iterator iter; 390 intel_field_iterator_init(&iter, desc, p, 0, false); 391 while (intel_field_iterator_next(&iter)) { 392 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 393 ksp = strtoll(iter.value, NULL, 16); 394 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) { 395 sampler_offset = strtol(iter.value, NULL, 16); 396 } else if (strcmp(iter.name, "Sampler Count") == 0) { 397 sampler_count = strtol(iter.value, NULL, 10); 398 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) { 399 binding_table_offset = strtol(iter.value, NULL, 16); 400 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) { 401 binding_entry_count = strtol(iter.value, NULL, 10); 402 } 403 } 404 405 ctx_disassemble_program(ctx, ksp, "compute shader"); 406 fprintf(ctx->fp, "\n"); 407 408 if (sampler_count) 409 dump_samplers(ctx, sampler_offset, sampler_count); 410 if (binding_entry_count) 411 dump_binding_table(ctx, binding_table_offset, binding_entry_count); 412} 413 414static void 415handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx, 416 const uint32_t *p) 417{ 418 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 419 struct intel_group *desc = 420 intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA"); 421 422 struct intel_field_iterator iter; 423 intel_field_iterator_init(&iter, inst, p, 0, false); 424 uint32_t descriptor_offset = 0; 425 int descriptor_count = 0; 426 while (intel_field_iterator_next(&iter)) { 427 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) { 428 descriptor_offset = strtol(iter.value, NULL, 16); 429 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) { 430 descriptor_count = 431 strtol(iter.value, NULL, 16) / (desc->dw_length * 4); 432 } 433 } 434 435 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset; 436 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr); 437 const void *desc_map = bo.map; 438 439 if (desc_map == NULL) { 440 fprintf(ctx->fp, " interface descriptors unavailable\n"); 441 return; 442 } 443 444 for (int i = 0; i < descriptor_count; i++) { 445 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset); 446 447 ctx_print_group(ctx, desc, desc_addr, desc_map); 448 449 handle_interface_descriptor_data(ctx, desc, desc_map); 450 451 desc_map += desc->dw_length; 452 desc_addr += desc->dw_length * 4; 453 } 454} 455 456static void 457handle_compute_walker(struct intel_batch_decode_ctx *ctx, 458 const uint32_t *p) 459{ 460 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 461 462 struct intel_field_iterator iter; 463 intel_field_iterator_init(&iter, inst, p, 0, false); 464 while (intel_field_iterator_next(&iter)) { 465 if (strcmp(iter.name, "Interface Descriptor") == 0) { 466 handle_interface_descriptor_data(ctx, iter.struct_desc, 467 &iter.p[iter.start_bit / 32]); 468 } 469 } 470} 471 472static void 473handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx, 474 const uint32_t *p) 475{ 476 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 477 struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE"); 478 479 struct intel_batch_decode_bo vb = {}; 480 uint32_t vb_size = 0; 481 int index = -1; 482 int pitch = -1; 483 bool ready = false; 484 485 struct intel_field_iterator iter; 486 intel_field_iterator_init(&iter, inst, p, 0, false); 487 while (intel_field_iterator_next(&iter)) { 488 if (iter.struct_desc != vbs) 489 continue; 490 491 struct intel_field_iterator vbs_iter; 492 intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false); 493 while (intel_field_iterator_next(&vbs_iter)) { 494 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) { 495 index = vbs_iter.raw_value; 496 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) { 497 pitch = vbs_iter.raw_value; 498 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) { 499 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value); 500 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) { 501 vb_size = vbs_iter.raw_value; 502 ready = true; 503 } else if (strcmp(vbs_iter.name, "End Address") == 0) { 504 if (vb.map && vbs_iter.raw_value >= vb.addr) 505 vb_size = (vbs_iter.raw_value + 1) - vb.addr; 506 else 507 vb_size = 0; 508 ready = true; 509 } 510 511 if (!ready) 512 continue; 513 514 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size); 515 516 if (vb.map == NULL) { 517 fprintf(ctx->fp, " buffer contents unavailable\n"); 518 continue; 519 } 520 521 if (vb.map == 0 || vb_size == 0) 522 continue; 523 524 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines); 525 526 vb.map = NULL; 527 vb_size = 0; 528 index = -1; 529 pitch = -1; 530 ready = false; 531 } 532 } 533} 534 535static void 536handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx, 537 const uint32_t *p) 538{ 539 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 540 541 struct intel_batch_decode_bo ib = {}; 542 uint32_t ib_size = 0; 543 uint32_t format = 0; 544 545 struct intel_field_iterator iter; 546 intel_field_iterator_init(&iter, inst, p, 0, false); 547 while (intel_field_iterator_next(&iter)) { 548 if (strcmp(iter.name, "Index Format") == 0) { 549 format = iter.raw_value; 550 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) { 551 ib = ctx_get_bo(ctx, true, iter.raw_value); 552 } else if (strcmp(iter.name, "Buffer Size") == 0) { 553 ib_size = iter.raw_value; 554 } 555 } 556 557 if (ib.map == NULL) { 558 fprintf(ctx->fp, " buffer contents unavailable\n"); 559 return; 560 } 561 562 const void *m = ib.map; 563 const void *ib_end = ib.map + MIN2(ib.size, ib_size); 564 for (int i = 0; m < ib_end && i < 10; i++) { 565 switch (format) { 566 case 0: 567 fprintf(ctx->fp, "%3d ", *(uint8_t *)m); 568 m += 1; 569 break; 570 case 1: 571 fprintf(ctx->fp, "%3d ", *(uint16_t *)m); 572 m += 2; 573 break; 574 case 2: 575 fprintf(ctx->fp, "%3d ", *(uint32_t *)m); 576 m += 4; 577 break; 578 } 579 } 580 581 if (m < ib_end) 582 fprintf(ctx->fp, "..."); 583 fprintf(ctx->fp, "\n"); 584} 585 586static void 587decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 588{ 589 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 590 591 uint64_t ksp = 0; 592 bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */ 593 bool is_enabled = true; 594 595 struct intel_field_iterator iter; 596 intel_field_iterator_init(&iter, inst, p, 0, false); 597 while (intel_field_iterator_next(&iter)) { 598 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 599 ksp = iter.raw_value; 600 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) { 601 is_simd8 = iter.raw_value; 602 } else if (strcmp(iter.name, "Dispatch Mode") == 0) { 603 is_simd8 = strcmp(iter.value, "SIMD8") == 0; 604 } else if (strcmp(iter.name, "Dispatch Enable") == 0) { 605 is_simd8 = strcmp(iter.value, "SIMD8") == 0; 606 } else if (strcmp(iter.name, "Enable") == 0) { 607 is_enabled = iter.raw_value; 608 } 609 } 610 611 const char *type = 612 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" : 613 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" : 614 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" : 615 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" : 616 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" : 617 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" : 618 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") : 619 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") : 620 NULL; 621 622 if (is_enabled) { 623 ctx_disassemble_program(ctx, ksp, type); 624 fprintf(ctx->fp, "\n"); 625 } 626} 627 628static void 629decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 630{ 631 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 632 633 uint64_t ksp = 0; 634 uint64_t local_x_maximum = 0; 635 uint64_t threads = 0; 636 637 struct intel_field_iterator iter; 638 intel_field_iterator_init(&iter, inst, p, 0, false); 639 while (intel_field_iterator_next(&iter)) { 640 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 641 ksp = iter.raw_value; 642 } else if (strcmp(iter.name, "Local X Maximum") == 0) { 643 local_x_maximum = iter.raw_value; 644 } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) { 645 threads = iter.raw_value; 646 } 647 } 648 649 const char *type = 650 strcmp(inst->name, "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" : 651 strcmp(inst->name, "3DSTATE_TASK_SHADER") == 0 ? "task shader" : 652 NULL; 653 654 if (threads && local_x_maximum) { 655 ctx_disassemble_program(ctx, ksp, type); 656 fprintf(ctx->fp, "\n"); 657 } 658} 659 660static void 661decode_ps_kern(struct intel_batch_decode_ctx *ctx, 662 struct intel_group *inst, const uint32_t *p) 663{ 664 bool single_ksp = ctx->devinfo.ver == 4; 665 uint64_t ksp[3] = {0, 0, 0}; 666 bool enabled[3] = {false, false, false}; 667 668 struct intel_field_iterator iter; 669 intel_field_iterator_init(&iter, inst, p, 0, false); 670 while (intel_field_iterator_next(&iter)) { 671 if (strncmp(iter.name, "Kernel Start Pointer ", 672 strlen("Kernel Start Pointer ")) == 0) { 673 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0'; 674 ksp[idx] = strtol(iter.value, NULL, 16); 675 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) { 676 enabled[0] = strcmp(iter.value, "true") == 0; 677 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) { 678 enabled[1] = strcmp(iter.value, "true") == 0; 679 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) { 680 enabled[2] = strcmp(iter.value, "true") == 0; 681 } 682 } 683 684 if (single_ksp) 685 ksp[1] = ksp[2] = ksp[0]; 686 687 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */ 688 if (enabled[0] + enabled[1] + enabled[2] == 1) { 689 if (enabled[1]) { 690 ksp[1] = ksp[0]; 691 ksp[0] = 0; 692 } else if (enabled[2]) { 693 ksp[2] = ksp[0]; 694 ksp[0] = 0; 695 } 696 } else { 697 uint64_t tmp = ksp[1]; 698 ksp[1] = ksp[2]; 699 ksp[2] = tmp; 700 } 701 702 if (enabled[0]) 703 ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader"); 704 if (enabled[1]) 705 ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader"); 706 if (enabled[2]) 707 ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader"); 708 709 if (enabled[0] || enabled[1] || enabled[2]) 710 fprintf(ctx->fp, "\n"); 711} 712 713static void 714decode_ps_kernels(struct intel_batch_decode_ctx *ctx, 715 const uint32_t *p) 716{ 717 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 718 decode_ps_kern(ctx, inst, p); 719} 720 721static void 722decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 723{ 724 struct intel_group *inst = 725 intel_spec_find_instruction(ctx->spec, ctx->engine, p); 726 struct intel_group *body = 727 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA"); 728 729 uint32_t read_length[4] = {0}; 730 struct intel_batch_decode_bo buffer[4]; 731 memset(buffer, 0, sizeof(buffer)); 732 733 struct intel_field_iterator outer; 734 intel_field_iterator_init(&outer, inst, p, 0, false); 735 int idx = 0; 736 while (intel_field_iterator_next(&outer)) { 737 if (outer.struct_desc != body) 738 continue; 739 740 struct intel_field_iterator iter; 741 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32], 742 0, false); 743 while (intel_field_iterator_next(&iter)) { 744 if (!strcmp(iter.name, "Pointer To Constant Buffer")) { 745 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value); 746 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) { 747 read_length[idx] = iter.raw_value; 748 } 749 } 750 idx++; 751 } 752 753 for (int i = 0; i < 4; i++) { 754 if (read_length[i] == 0 || buffer[i].map == NULL) 755 continue; 756 757 unsigned size = read_length[i] * 32; 758 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size); 759 760 ctx_print_buffer(ctx, buffer[i], size, 0, -1); 761 } 762} 763 764static void 765decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 766{ 767 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 768 struct intel_group *body = 769 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY"); 770 771 uint32_t read_length[4] = {0}; 772 uint64_t read_addr[4] = {0}; 773 774 struct intel_field_iterator outer; 775 intel_field_iterator_init(&outer, inst, p, 0, false); 776 while (intel_field_iterator_next(&outer)) { 777 if (outer.struct_desc != body) 778 continue; 779 780 struct intel_field_iterator iter; 781 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32], 782 0, false); 783 784 while (intel_field_iterator_next(&iter)) { 785 int idx; 786 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) { 787 read_length[idx] = iter.raw_value; 788 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) { 789 read_addr[idx] = iter.raw_value; 790 } 791 } 792 793 for (int i = 0; i < 4; i++) { 794 if (read_length[i] == 0) 795 continue; 796 797 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]); 798 if (!buffer.map) { 799 fprintf(ctx->fp, "constant buffer %d unavailable\n", i); 800 continue; 801 } 802 803 unsigned size = read_length[i] * 32; 804 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size); 805 806 ctx_print_buffer(ctx, buffer, size, 0, -1); 807 } 808 } 809} 810 811static void 812decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 813{ 814 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 815 uint64_t read_length = 0, read_addr = 0, valid = 0; 816 struct intel_field_iterator iter; 817 intel_field_iterator_init(&iter, inst, p, 0, false); 818 819 while (intel_field_iterator_next(&iter)) { 820 if (!strcmp(iter.name, "Buffer Length")) { 821 read_length = iter.raw_value; 822 } else if (!strcmp(iter.name, "Valid")) { 823 valid = iter.raw_value; 824 } else if (!strcmp(iter.name, "Buffer Starting Address")) { 825 read_addr = iter.raw_value; 826 } 827 } 828 829 if (!valid) 830 return; 831 832 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr); 833 if (!buffer.map) { 834 fprintf(ctx->fp, "constant buffer unavailable\n"); 835 return; 836 } 837 unsigned size = (read_length + 1) * 16 * sizeof(float); 838 fprintf(ctx->fp, "constant buffer size %u\n", size); 839 840 ctx_print_buffer(ctx, buffer, size, 0, -1); 841} 842 843 844static void 845decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, 846 const uint32_t *p) 847{ 848 fprintf(ctx->fp, "VS Binding Table:\n"); 849 dump_binding_table(ctx, p[1], -1); 850 851 fprintf(ctx->fp, "GS Binding Table:\n"); 852 dump_binding_table(ctx, p[2], -1); 853 854 if (ctx->devinfo.ver < 6) { 855 fprintf(ctx->fp, "CLIP Binding Table:\n"); 856 dump_binding_table(ctx, p[3], -1); 857 fprintf(ctx->fp, "SF Binding Table:\n"); 858 dump_binding_table(ctx, p[4], -1); 859 fprintf(ctx->fp, "PS Binding Table:\n"); 860 dump_binding_table(ctx, p[5], -1); 861 } else { 862 fprintf(ctx->fp, "PS Binding Table:\n"); 863 dump_binding_table(ctx, p[3], -1); 864 } 865} 866 867static void 868decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, 869 const uint32_t *p) 870{ 871 dump_binding_table(ctx, p[1], -1); 872} 873 874static void 875decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx, 876 const uint32_t *p) 877{ 878 dump_samplers(ctx, p[1], 1); 879} 880 881static void 882decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx, 883 const uint32_t *p) 884{ 885 dump_samplers(ctx, p[1], 1); 886 dump_samplers(ctx, p[2], 1); 887 dump_samplers(ctx, p[3], 1); 888} 889 890static bool 891str_ends_with(const char *str, const char *end) 892{ 893 int offset = strlen(str) - strlen(end); 894 if (offset < 0) 895 return false; 896 897 return strcmp(str + offset, end) == 0; 898} 899 900static void 901decode_dynamic_state(struct intel_batch_decode_ctx *ctx, 902 const char *struct_type, uint32_t state_offset, 903 int count) 904{ 905 uint64_t state_addr = ctx->dynamic_base + state_offset; 906 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr); 907 const void *state_map = bo.map; 908 909 if (state_map == NULL) { 910 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type); 911 return; 912 } 913 914 struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type); 915 if (strcmp(struct_type, "BLEND_STATE") == 0) { 916 /* Blend states are different from the others because they have a header 917 * struct called BLEND_STATE which is followed by a variable number of 918 * BLEND_STATE_ENTRY structs. 919 */ 920 fprintf(ctx->fp, "%s\n", struct_type); 921 ctx_print_group(ctx, state, state_addr, state_map); 922 923 state_addr += state->dw_length * 4; 924 state_map += state->dw_length * 4; 925 926 struct_type = "BLEND_STATE_ENTRY"; 927 state = intel_spec_find_struct(ctx->spec, struct_type); 928 } 929 930 count = update_count(ctx, ctx->dynamic_base + state_offset, 931 ctx->dynamic_base, state->dw_length, count); 932 933 for (int i = 0; i < count; i++) { 934 fprintf(ctx->fp, "%s %d\n", struct_type, i); 935 ctx_print_group(ctx, state, state_addr, state_map); 936 937 state_addr += state->dw_length * 4; 938 state_map += state->dw_length * 4; 939 } 940} 941 942static void 943decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx, 944 const char *struct_type, const uint32_t *p, 945 int count) 946{ 947 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 948 949 uint32_t state_offset = 0; 950 951 struct intel_field_iterator iter; 952 intel_field_iterator_init(&iter, inst, p, 0, false); 953 while (intel_field_iterator_next(&iter)) { 954 if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) { 955 state_offset = iter.raw_value; 956 break; 957 } 958 } 959 decode_dynamic_state(ctx, struct_type, state_offset, count); 960} 961 962static void 963decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx, 964 const uint32_t *p) 965{ 966 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 967 uint32_t state_offset = 0; 968 bool clip = false, sf = false, cc = false; 969 struct intel_field_iterator iter; 970 intel_field_iterator_init(&iter, inst, p, 0, false); 971 while (intel_field_iterator_next(&iter)) { 972 if (!strcmp(iter.name, "CLIP Viewport State Change")) 973 clip = iter.raw_value; 974 if (!strcmp(iter.name, "SF Viewport State Change")) 975 sf = iter.raw_value; 976 if (!strcmp(iter.name, "CC Viewport State Change")) 977 cc = iter.raw_value; 978 else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) { 979 state_offset = iter.raw_value; 980 decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1); 981 } 982 else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) { 983 state_offset = iter.raw_value; 984 decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1); 985 } 986 else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) { 987 state_offset = iter.raw_value; 988 decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1); 989 } 990 } 991} 992 993static void 994decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx, 995 const uint32_t *p) 996{ 997 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4); 998} 999 1000static void 1001decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx, 1002 const uint32_t *p) 1003{ 1004 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4); 1005} 1006 1007static void 1008decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx, 1009 const uint32_t *p) 1010{ 1011 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1); 1012} 1013 1014static void 1015decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx, 1016 const uint32_t *p) 1017{ 1018 if (ctx->devinfo.ver != 6) { 1019 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1); 1020 return; 1021 } 1022 1023 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 1024 1025 uint32_t state_offset = 0; 1026 bool blend_change = false, ds_change = false, cc_change = false; 1027 struct intel_field_iterator iter; 1028 intel_field_iterator_init(&iter, inst, p, 0, false); 1029 while (intel_field_iterator_next(&iter)) { 1030 if (!strcmp(iter.name, "BLEND_STATE Change")) 1031 blend_change = iter.raw_value; 1032 else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change")) 1033 ds_change = iter.raw_value; 1034 else if (!strcmp(iter.name, "Color Calc State Pointer Valid")) 1035 cc_change = iter.raw_value; 1036 else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) { 1037 state_offset = iter.raw_value; 1038 decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1); 1039 } 1040 else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) { 1041 state_offset = iter.raw_value; 1042 decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1); 1043 } 1044 else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) { 1045 state_offset = iter.raw_value; 1046 decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1); 1047 } 1048 } 1049} 1050 1051static void 1052decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx, 1053 const uint32_t *p) 1054{ 1055 decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1); 1056} 1057 1058static void 1059decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx, 1060 const uint32_t *p) 1061{ 1062 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1); 1063} 1064 1065static void 1066decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx, 1067 const uint32_t *p) 1068{ 1069 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1); 1070} 1071 1072static void 1073handle_gt_mode(struct intel_batch_decode_ctx *ctx, 1074 uint32_t reg_addr, uint32_t val) 1075{ 1076 struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr); 1077 1078 assert(intel_group_get_length(reg, &val) == 1); 1079 1080 struct intel_field_iterator iter; 1081 intel_field_iterator_init(&iter, reg, &val, 0, false); 1082 1083 uint32_t bt_alignment; 1084 bool bt_alignment_mask = 0; 1085 1086 while (intel_field_iterator_next(&iter)) { 1087 if (strcmp(iter.name, "Binding Table Alignment") == 0) { 1088 bt_alignment = iter.raw_value; 1089 } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) { 1090 bt_alignment_mask = iter.raw_value; 1091 } 1092 } 1093 1094 if (bt_alignment_mask) 1095 ctx->use_256B_binding_tables = bt_alignment; 1096} 1097 1098struct reg_handler { 1099 const char *name; 1100 void (*handler)(struct intel_batch_decode_ctx *ctx, 1101 uint32_t reg_addr, uint32_t val); 1102} reg_handlers[] = { 1103 { "GT_MODE", handle_gt_mode } 1104}; 1105 1106static void 1107decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 1108{ 1109 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 1110 const unsigned length = intel_group_get_length(inst, p); 1111 assert(length & 1); 1112 const unsigned nr_regs = (length - 1) / 2; 1113 1114 for (unsigned i = 0; i < nr_regs; i++) { 1115 struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]); 1116 if (reg != NULL) { 1117 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n", 1118 reg->name, reg->register_offset, p[2]); 1119 ctx_print_group(ctx, reg, reg->register_offset, &p[2]); 1120 1121 for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) { 1122 if (strcmp(reg->name, reg_handlers[i].name) == 0) 1123 reg_handlers[i].handler(ctx, p[1], p[2]); 1124 } 1125 } 1126 } 1127} 1128 1129static void 1130decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1131{ 1132 struct intel_group *strct = 1133 intel_spec_find_struct(ctx->spec, "VS_STATE"); 1134 if (strct == NULL) { 1135 fprintf(ctx->fp, "did not find VS_STATE info\n"); 1136 return; 1137 } 1138 1139 struct intel_batch_decode_bo bind_bo = 1140 ctx_get_bo(ctx, true, offset); 1141 1142 if (bind_bo.map == NULL) { 1143 fprintf(ctx->fp, " vs state unavailable\n"); 1144 return; 1145 } 1146 1147 ctx_print_group(ctx, strct, offset, bind_bo.map); 1148 1149 uint64_t ksp = 0; 1150 bool is_enabled = true; 1151 struct intel_field_iterator iter; 1152 intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false); 1153 while (intel_field_iterator_next(&iter)) { 1154 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 1155 ksp = iter.raw_value; 1156 } else if (strcmp(iter.name, "Enable") == 0) { 1157 is_enabled = iter.raw_value; 1158 } 1159 } 1160 if (is_enabled) { 1161 ctx_disassemble_program(ctx, ksp, "vertex shader"); 1162 fprintf(ctx->fp, "\n"); 1163 } 1164} 1165 1166static void 1167decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1168{ 1169 struct intel_group *strct = 1170 intel_spec_find_struct(ctx->spec, "GS_STATE"); 1171 if (strct == NULL) { 1172 fprintf(ctx->fp, "did not find GS_STATE info\n"); 1173 return; 1174 } 1175 1176 struct intel_batch_decode_bo bind_bo = 1177 ctx_get_bo(ctx, true, offset); 1178 1179 if (bind_bo.map == NULL) { 1180 fprintf(ctx->fp, " gs state unavailable\n"); 1181 return; 1182 } 1183 1184 ctx_print_group(ctx, strct, offset, bind_bo.map); 1185} 1186 1187static void 1188decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1189{ 1190 struct intel_group *strct = 1191 intel_spec_find_struct(ctx->spec, "CLIP_STATE"); 1192 if (strct == NULL) { 1193 fprintf(ctx->fp, "did not find CLIP_STATE info\n"); 1194 return; 1195 } 1196 1197 struct intel_batch_decode_bo bind_bo = 1198 ctx_get_bo(ctx, true, offset); 1199 1200 if (bind_bo.map == NULL) { 1201 fprintf(ctx->fp, " clip state unavailable\n"); 1202 return; 1203 } 1204 1205 ctx_print_group(ctx, strct, offset, bind_bo.map); 1206 1207 struct intel_group *vp_strct = 1208 intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT"); 1209 if (vp_strct == NULL) { 1210 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n"); 1211 return; 1212 } 1213 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3; 1214 struct intel_batch_decode_bo vp_bo = 1215 ctx_get_bo(ctx, true, clip_vp_offset); 1216 if (vp_bo.map == NULL) { 1217 fprintf(ctx->fp, " clip vp state unavailable\n"); 1218 return; 1219 } 1220 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map); 1221} 1222 1223static void 1224decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1225{ 1226 struct intel_group *strct = 1227 intel_spec_find_struct(ctx->spec, "SF_STATE"); 1228 if (strct == NULL) { 1229 fprintf(ctx->fp, "did not find SF_STATE info\n"); 1230 return; 1231 } 1232 1233 struct intel_batch_decode_bo bind_bo = 1234 ctx_get_bo(ctx, true, offset); 1235 1236 if (bind_bo.map == NULL) { 1237 fprintf(ctx->fp, " sf state unavailable\n"); 1238 return; 1239 } 1240 1241 ctx_print_group(ctx, strct, offset, bind_bo.map); 1242 1243 struct intel_group *vp_strct = 1244 intel_spec_find_struct(ctx->spec, "SF_VIEWPORT"); 1245 if (vp_strct == NULL) { 1246 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n"); 1247 return; 1248 } 1249 1250 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3; 1251 struct intel_batch_decode_bo vp_bo = 1252 ctx_get_bo(ctx, true, sf_vp_offset); 1253 if (vp_bo.map == NULL) { 1254 fprintf(ctx->fp, " sf vp state unavailable\n"); 1255 return; 1256 } 1257 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map); 1258} 1259 1260static void 1261decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1262{ 1263 struct intel_group *strct = 1264 intel_spec_find_struct(ctx->spec, "WM_STATE"); 1265 if (strct == NULL) { 1266 fprintf(ctx->fp, "did not find WM_STATE info\n"); 1267 return; 1268 } 1269 1270 struct intel_batch_decode_bo bind_bo = 1271 ctx_get_bo(ctx, true, offset); 1272 1273 if (bind_bo.map == NULL) { 1274 fprintf(ctx->fp, " wm state unavailable\n"); 1275 return; 1276 } 1277 1278 ctx_print_group(ctx, strct, offset, bind_bo.map); 1279 1280 decode_ps_kern(ctx, strct, bind_bo.map); 1281} 1282 1283static void 1284decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1285{ 1286 struct intel_group *strct = 1287 intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE"); 1288 if (strct == NULL) { 1289 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n"); 1290 return; 1291 } 1292 1293 struct intel_batch_decode_bo bind_bo = 1294 ctx_get_bo(ctx, true, offset); 1295 1296 if (bind_bo.map == NULL) { 1297 fprintf(ctx->fp, " cc state unavailable\n"); 1298 return; 1299 } 1300 1301 ctx_print_group(ctx, strct, offset, bind_bo.map); 1302 1303 struct intel_group *vp_strct = 1304 intel_spec_find_struct(ctx->spec, "CC_VIEWPORT"); 1305 if (vp_strct == NULL) { 1306 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n"); 1307 return; 1308 } 1309 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3; 1310 struct intel_batch_decode_bo vp_bo = 1311 ctx_get_bo(ctx, true, cc_vp_offset); 1312 if (vp_bo.map == NULL) { 1313 fprintf(ctx->fp, " cc vp state unavailable\n"); 1314 return; 1315 } 1316 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map); 1317} 1318static void 1319decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 1320{ 1321 fprintf(ctx->fp, "VS State Table:\n"); 1322 decode_vs_state(ctx, p[1]); 1323 if (p[2] & 1) { 1324 fprintf(ctx->fp, "GS State Table:\n"); 1325 decode_gs_state(ctx, p[2] & ~1); 1326 } 1327 fprintf(ctx->fp, "Clip State Table:\n"); 1328 decode_clip_state(ctx, p[3] & ~1); 1329 fprintf(ctx->fp, "SF State Table:\n"); 1330 decode_sf_state(ctx, p[4]); 1331 fprintf(ctx->fp, "WM State Table:\n"); 1332 decode_wm_state(ctx, p[5]); 1333 fprintf(ctx->fp, "CC State Table:\n"); 1334 decode_cc_state(ctx, p[6]); 1335} 1336 1337static void 1338decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 1339{ 1340 decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1); 1341} 1342 1343struct custom_decoder { 1344 const char *cmd_name; 1345 void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p); 1346} custom_decoders[] = { 1347 { "STATE_BASE_ADDRESS", handle_state_base_address }, 1348 { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc }, 1349 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load }, 1350 { "COMPUTE_WALKER", handle_compute_walker }, 1351 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers }, 1352 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer }, 1353 { "3DSTATE_VS", decode_single_ksp }, 1354 { "3DSTATE_GS", decode_single_ksp }, 1355 { "3DSTATE_DS", decode_single_ksp }, 1356 { "3DSTATE_HS", decode_single_ksp }, 1357 { "3DSTATE_PS", decode_ps_kernels }, 1358 { "3DSTATE_WM", decode_ps_kernels }, 1359 { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp }, 1360 { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp }, 1361 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant }, 1362 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant }, 1363 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant }, 1364 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant }, 1365 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant }, 1366 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all }, 1367 1368 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers }, 1369 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers }, 1370 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers }, 1371 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers }, 1372 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers }, 1373 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers }, 1374 1375 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers }, 1376 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers }, 1377 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers }, 1378 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers }, 1379 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers }, 1380 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 }, 1381 1382 { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers }, 1383 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc }, 1384 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip }, 1385 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers }, 1386 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers }, 1387 { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers }, 1388 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers }, 1389 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers }, 1390 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm }, 1391 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers }, 1392 { "3DSTATE_CPS_POINTERS", decode_cps_pointers }, 1393 { "CONSTANT_BUFFER", decode_gfx4_constant_buffer }, 1394}; 1395 1396void 1397intel_print_batch(struct intel_batch_decode_ctx *ctx, 1398 const uint32_t *batch, uint32_t batch_size, 1399 uint64_t batch_addr, bool from_ring) 1400{ 1401 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t); 1402 int length; 1403 struct intel_group *inst; 1404 const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : ""; 1405 1406 if (ctx->n_batch_buffer_start >= 100) { 1407 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n", 1408 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "", 1409 (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0, 1410 reset_color); 1411 return; 1412 } 1413 1414 ctx->n_batch_buffer_start++; 1415 1416 for (p = batch; p < end; p += length) { 1417 inst = intel_ctx_find_instruction(ctx, p); 1418 length = intel_group_get_length(inst, p); 1419 assert(inst == NULL || length > 0); 1420 length = MAX2(1, length); 1421 1422 uint64_t offset; 1423 if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) 1424 offset = batch_addr + ((char *)p - (char *)batch); 1425 else 1426 offset = 0; 1427 1428 if (inst == NULL) { 1429 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n", 1430 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "", 1431 offset, p[0], reset_color); 1432 1433 for (int i=1; i < length; i++) { 1434 fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n", 1435 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "", 1436 offset + i * 4, p[i], reset_color); 1437 } 1438 1439 continue; 1440 } 1441 1442 const char *color; 1443 const char *inst_name = intel_group_get_name(inst); 1444 if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) { 1445 reset_color = NORMAL; 1446 if (ctx->flags & INTEL_BATCH_DECODE_FULL) { 1447 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 || 1448 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) 1449 color = GREEN_HEADER; 1450 else 1451 color = BLUE_HEADER; 1452 } else { 1453 color = NORMAL; 1454 } 1455 } else { 1456 color = ""; 1457 reset_color = ""; 1458 } 1459 1460 fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n", color, offset, 1461 ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0], 1462 inst_name, reset_color); 1463 1464 if (ctx->flags & INTEL_BATCH_DECODE_FULL) { 1465 ctx_print_group(ctx, inst, offset, p); 1466 1467 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) { 1468 if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) { 1469 custom_decoders[i].decode(ctx, p); 1470 break; 1471 } 1472 } 1473 } 1474 1475 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) { 1476 uint64_t next_batch_addr = 0; 1477 bool ppgtt = false; 1478 bool second_level = false; 1479 bool predicate = false; 1480 struct intel_field_iterator iter; 1481 intel_field_iterator_init(&iter, inst, p, 0, false); 1482 while (intel_field_iterator_next(&iter)) { 1483 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) { 1484 next_batch_addr = iter.raw_value; 1485 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) { 1486 second_level = iter.raw_value; 1487 } else if (strcmp(iter.name, "Address Space Indicator") == 0) { 1488 ppgtt = iter.raw_value; 1489 } else if (strcmp(iter.name, "Predication Enable") == 0) { 1490 predicate = iter.raw_value; 1491 } 1492 } 1493 1494 if (!predicate) { 1495 struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr); 1496 1497 if (next_batch.map == NULL) { 1498 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n", 1499 next_batch_addr); 1500 } else { 1501 intel_print_batch(ctx, next_batch.map, next_batch.size, 1502 next_batch.addr, false); 1503 } 1504 if (second_level) { 1505 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts 1506 * like a subroutine call. Commands that come afterwards get 1507 * processed once the 2nd level batch buffer returns with 1508 * MI_BATCH_BUFFER_END. 1509 */ 1510 continue; 1511 } else if (!from_ring) { 1512 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts 1513 * like a goto. Nothing after it will ever get processed. In 1514 * order to prevent the recursion from growing, we just reset the 1515 * loop and continue; 1516 */ 1517 break; 1518 } 1519 } 1520 } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) { 1521 break; 1522 } 1523 } 1524 1525 ctx->n_batch_buffer_start--; 1526} 1527