1/* 2 * Copyright (C) 2017-2019 Alyssa Rosenzweig 3 * Copyright (C) 2017-2019 Connor Abbott 4 * Copyright (C) 2019 Collabora, Ltd. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 */ 25 26#include <genxml/gen_macros.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <memory.h> 30#include <stdbool.h> 31#include <stdarg.h> 32#include <errno.h> 33#include <ctype.h> 34#include "decode.h" 35 36#include "midgard/disassemble.h" 37#include "bifrost/disassemble.h" 38#include "bifrost/valhall/disassemble.h" 39 40#define DUMP_UNPACKED(T, var, ...) { \ 41 pandecode_log(__VA_ARGS__); \ 42 pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \ 43} 44 45#define DUMP_CL(T, cl, ...) {\ 46 pan_unpack(cl, T, temp); \ 47 DUMP_UNPACKED(T, temp, __VA_ARGS__); \ 48} 49 50#define DUMP_SECTION(A, S, cl, ...) { \ 51 pan_section_unpack(cl, A, S, temp); \ 52 pandecode_log(__VA_ARGS__); \ 53 pan_section_print(pandecode_dump_stream, A, S, temp, (pandecode_indent + 1) * 2); \ 54} 55 56#define MAP_ADDR(T, addr, cl) \ 57 const uint8_t *cl = 0; \ 58 { \ 59 struct pandecode_mapped_memory *mapped_mem = pandecode_find_mapped_gpu_mem_containing(addr); \ 60 cl = pandecode_fetch_gpu_mem(mapped_mem, addr, pan_size(T)); \ 61 } 62 63#define DUMP_ADDR(T, addr, ...) {\ 64 MAP_ADDR(T, addr, cl) \ 65 DUMP_CL(T, cl, __VA_ARGS__); \ 66} 67 68/* Semantic logging type. 69 * 70 * Raw: for raw messages to be printed as is. 71 * Message: for helpful information to be commented out in replays. 72 * 73 * Use one of pandecode_log or pandecode_msg as syntax sugar. 74 */ 75 76enum pandecode_log_type { 77 PANDECODE_RAW, 78 PANDECODE_MESSAGE, 79}; 80 81#define pandecode_log(...) pandecode_log_typed(PANDECODE_RAW, __VA_ARGS__) 82#define pandecode_msg(...) pandecode_log_typed(PANDECODE_MESSAGE, __VA_ARGS__) 83 84static unsigned pandecode_indent = 0; 85 86static void 87pandecode_make_indent(void) 88{ 89 for (unsigned i = 0; i < pandecode_indent; ++i) 90 fprintf(pandecode_dump_stream, " "); 91} 92 93static void PRINTFLIKE(2, 3) 94pandecode_log_typed(enum pandecode_log_type type, const char *format, ...) 95{ 96 va_list ap; 97 98 pandecode_make_indent(); 99 100 if (type == PANDECODE_MESSAGE) 101 fprintf(pandecode_dump_stream, "// "); 102 103 va_start(ap, format); 104 vfprintf(pandecode_dump_stream, format, ap); 105 va_end(ap); 106} 107 108static void 109pandecode_log_cont(const char *format, ...) 110{ 111 va_list ap; 112 113 va_start(ap, format); 114 vfprintf(pandecode_dump_stream, format, ap); 115 va_end(ap); 116} 117 118/* To check for memory safety issues, validates that the given pointer in GPU 119 * memory is valid, containing at least sz bytes. The goal is to eliminate 120 * GPU-side memory bugs (NULL pointer dereferences, buffer overflows, or buffer 121 * overruns) by statically validating pointers. 122 */ 123 124static void 125pandecode_validate_buffer(mali_ptr addr, size_t sz) 126{ 127 if (!addr) { 128 pandecode_msg("XXX: null pointer deref\n"); 129 return; 130 } 131 132 /* Find a BO */ 133 134 struct pandecode_mapped_memory *bo = 135 pandecode_find_mapped_gpu_mem_containing(addr); 136 137 if (!bo) { 138 pandecode_msg("XXX: invalid memory dereference\n"); 139 return; 140 } 141 142 /* Bounds check */ 143 144 unsigned offset = addr - bo->gpu_va; 145 unsigned total = offset + sz; 146 147 if (total > bo->length) { 148 pandecode_msg("XXX: buffer overrun. " 149 "Chunk of size %zu at offset %d in buffer of size %zu. " 150 "Overrun by %zu bytes. \n", 151 sz, offset, bo->length, total - bo->length); 152 return; 153 } 154} 155 156#if PAN_ARCH <= 5 157/* Midgard's tiler descriptor is embedded within the 158 * larger FBD */ 159 160static void 161pandecode_midgard_tiler_descriptor( 162 const struct mali_tiler_context_packed *tp, 163 const struct mali_tiler_weights_packed *wp) 164{ 165 pan_unpack(tp, TILER_CONTEXT, t); 166 DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n"); 167 168 /* We've never seen weights used in practice, but they exist */ 169 pan_unpack(wp, TILER_WEIGHTS, w); 170 bool nonzero_weights = false; 171 172 nonzero_weights |= w.weight0 != 0x0; 173 nonzero_weights |= w.weight1 != 0x0; 174 nonzero_weights |= w.weight2 != 0x0; 175 nonzero_weights |= w.weight3 != 0x0; 176 nonzero_weights |= w.weight4 != 0x0; 177 nonzero_weights |= w.weight5 != 0x0; 178 nonzero_weights |= w.weight6 != 0x0; 179 nonzero_weights |= w.weight7 != 0x0; 180 181 if (nonzero_weights) 182 DUMP_UNPACKED(TILER_WEIGHTS, w, "Tiler Weights:\n"); 183} 184#endif 185 186/* Information about the framebuffer passed back for 187 * additional analysis */ 188 189struct pandecode_fbd { 190 unsigned width; 191 unsigned height; 192 unsigned rt_count; 193 bool has_extra; 194}; 195 196#if PAN_ARCH == 4 197static struct pandecode_fbd 198pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id) 199{ 200 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); 201 const void *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va); 202 203 struct pandecode_fbd info = { 204 .has_extra = false, 205 .rt_count = 1 206 }; 207 208 pandecode_log("Framebuffer:\n"); 209 pandecode_indent++; 210 211 DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, s, "Local Storage:\n"); 212 pan_section_unpack(s, FRAMEBUFFER, PARAMETERS, p); 213 DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, p, "Parameters:\n"); 214 215 const void *t = pan_section_ptr(s, FRAMEBUFFER, TILER); 216 const void *w = pan_section_ptr(s, FRAMEBUFFER, TILER_WEIGHTS); 217 218 pandecode_midgard_tiler_descriptor(t, w); 219 220 pandecode_indent--; 221 222 /* Dummy unpack of the padding section to make sure all words are 0. 223 * No need to call print here since the section is supposed to be empty. 224 */ 225 pan_section_unpack(s, FRAMEBUFFER, PADDING_1, padding1); 226 pan_section_unpack(s, FRAMEBUFFER, PADDING_2, padding2); 227 pandecode_log("\n"); 228 229 return info; 230} 231#endif 232 233#if PAN_ARCH >= 5 234static void 235pandecode_local_storage(uint64_t gpu_va, int job_no) 236{ 237 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); 238 const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va); 239 DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n"); 240} 241 242static void 243pandecode_render_target(uint64_t gpu_va, unsigned job_no, unsigned gpu_id, 244 const struct MALI_FRAMEBUFFER_PARAMETERS *fb) 245{ 246 pandecode_log("Color Render Targets:\n"); 247 pandecode_indent++; 248 249 for (int i = 0; i < (fb->render_target_count); i++) { 250 mali_ptr rt_va = gpu_va + i * pan_size(RENDER_TARGET); 251 struct pandecode_mapped_memory *mem = 252 pandecode_find_mapped_gpu_mem_containing(rt_va); 253 const struct mali_render_target_packed *PANDECODE_PTR_VAR(rtp, mem, (mali_ptr) rt_va); 254 DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i); 255 } 256 257 pandecode_indent--; 258 pandecode_log("\n"); 259} 260#endif 261 262#if PAN_ARCH >= 6 263static void 264pandecode_sample_locations(const void *fb, int job_no) 265{ 266 pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params); 267 268 struct pandecode_mapped_memory *smem = 269 pandecode_find_mapped_gpu_mem_containing(params.sample_locations); 270 271 const u16 *PANDECODE_PTR_VAR(samples, smem, params.sample_locations); 272 273 pandecode_log("Sample locations:\n"); 274 for (int i = 0; i < 33; i++) { 275 pandecode_log(" (%d, %d),\n", 276 samples[2 * i] - 128, 277 samples[2 * i + 1] - 128); 278 } 279} 280#endif 281 282static void 283pandecode_dcd(const struct MALI_DRAW *p, 284 int job_no, enum mali_job_type job_type, 285 char *suffix, unsigned gpu_id); 286 287#if PAN_ARCH >= 5 288static struct pandecode_fbd 289pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id) 290{ 291 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); 292 const void *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va); 293 pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params); 294 295 struct pandecode_fbd info; 296 297#if PAN_ARCH >= 6 298 pandecode_sample_locations(fb, job_no); 299 300 pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, bparams); 301 unsigned dcd_size = pan_size(DRAW); 302 struct pandecode_mapped_memory *dcdmem = 303 pandecode_find_mapped_gpu_mem_containing(bparams.frame_shader_dcds); 304 305 if (bparams.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { 306 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (0 * dcd_size)); 307 pan_unpack(dcd, DRAW, draw); 308 pandecode_log("Pre frame 0:\n"); 309 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id); 310 } 311 312 if (bparams.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { 313 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (1 * dcd_size)); 314 pan_unpack(dcd, DRAW, draw); 315 pandecode_log("Pre frame 1:\n"); 316 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id); 317 } 318 319 if (bparams.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { 320 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (2 * dcd_size)); 321 pan_unpack(dcd, DRAW, draw); 322 pandecode_log("Post frame:\n"); 323 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id); 324 } 325#endif 326 327 pandecode_log("Multi-Target Framebuffer:\n"); 328 pandecode_indent++; 329 330#if PAN_ARCH <= 5 331 DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n"); 332#endif 333 334 info.width = params.width; 335 info.height = params.height; 336 info.rt_count = params.render_target_count; 337 DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n"); 338 339#if PAN_ARCH <= 5 340 const void *t = pan_section_ptr(fb, FRAMEBUFFER, TILER); 341 const void *w = pan_section_ptr(fb, FRAMEBUFFER, TILER_WEIGHTS); 342 pandecode_midgard_tiler_descriptor(t, w); 343#endif 344 345 pandecode_indent--; 346 pandecode_log("\n"); 347 348 gpu_va += pan_size(FRAMEBUFFER); 349 350 info.has_extra = params.has_zs_crc_extension; 351 352 if (info.has_extra) { 353 struct pandecode_mapped_memory *mem = 354 pandecode_find_mapped_gpu_mem_containing(gpu_va); 355 const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(zs_crc, mem, (mali_ptr)gpu_va); 356 DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n"); 357 pandecode_log("\n"); 358 359 gpu_va += pan_size(ZS_CRC_EXTENSION); 360 } 361 362 if (is_fragment) 363 pandecode_render_target(gpu_va, job_no, gpu_id, ¶ms); 364 365 return info; 366} 367#endif 368 369#if PAN_ARCH <= 7 370static void 371pandecode_attributes(const struct pandecode_mapped_memory *mem, 372 mali_ptr addr, int job_no, char *suffix, 373 int count, bool varying, enum mali_job_type job_type) 374{ 375 char *prefix = varying ? "Varying" : "Attribute"; 376 assert(addr); 377 378 if (!count) { 379 pandecode_msg("warn: No %s records\n", prefix); 380 return; 381 } 382 383 MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl); 384 385 for (int i = 0; i < count; ++i) { 386 pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp); 387 DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix); 388 389 switch (temp.type) { 390 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION: 391 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: { 392 pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER), 393 ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2); 394 pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, 395 temp2, (pandecode_indent + 1) * 2); 396 i++; 397 break; 398 } 399 case MALI_ATTRIBUTE_TYPE_3D_LINEAR: 400 case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: { 401 pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D), 402 ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2); 403 pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D, 404 temp2, (pandecode_indent + 1) * 2); 405 i++; 406 break; 407 } 408 default: 409 break; 410 } 411 } 412 pandecode_log("\n"); 413} 414#endif 415 416#if PAN_ARCH >= 6 417/* Decodes a Bifrost blend constant. See the notes in bifrost_blend_rt */ 418 419static mali_ptr 420pandecode_bifrost_blend(void *descs, int job_no, int rt_no, mali_ptr frag_shader) 421{ 422 pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b); 423 DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no); 424 if (b.internal.mode != MALI_BLEND_MODE_SHADER) 425 return 0; 426 427 return (frag_shader & 0xFFFFFFFF00000000ULL) | b.internal.shader.pc; 428} 429#elif PAN_ARCH == 5 430static mali_ptr 431pandecode_midgard_blend_mrt(void *descs, int job_no, int rt_no) 432{ 433 pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b); 434 DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no); 435 return b.blend_shader ? (b.shader_pc & ~0xf) : 0; 436} 437#endif 438 439#if PAN_ARCH <= 7 440static unsigned 441pandecode_attribute_meta(int count, mali_ptr attribute, bool varying) 442{ 443 unsigned max = 0; 444 445 for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) { 446 MAP_ADDR(ATTRIBUTE, attribute, cl); 447 pan_unpack(cl, ATTRIBUTE, a); 448 DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute"); 449 max = MAX2(max, a.buffer_index); 450 } 451 452 pandecode_log("\n"); 453 return MIN2(max + 1, 256); 454} 455 456/* return bits [lo, hi) of word */ 457static u32 458bits(u32 word, u32 lo, u32 hi) 459{ 460 if (hi - lo >= 32) 461 return word; // avoid undefined behavior with the shift 462 463 if (lo >= 32) 464 return 0; 465 466 return (word >> lo) & ((1 << (hi - lo)) - 1); 467} 468 469static void 470pandecode_invocation(const void *i) 471{ 472 /* Decode invocation_count. See the comment before the definition of 473 * invocation_count for an explanation. 474 */ 475 pan_unpack(i, INVOCATION, invocation); 476 477 unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1; 478 unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1; 479 unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1; 480 481 unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1; 482 unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1; 483 unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1; 484 485 pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n", 486 size_x, size_y, size_z, 487 groups_x, groups_y, groups_z); 488 489 DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n") 490} 491#endif 492 493static void 494pandecode_primitive(const void *p) 495{ 496 pan_unpack(p, PRIMITIVE, primitive); 497 DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n"); 498 499#if PAN_ARCH <= 7 500 /* Validate an index buffer is present if we need one. TODO: verify 501 * relationship between invocation_count and index_count */ 502 503 if (primitive.indices) { 504 /* Grab the size */ 505 unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32) ? 506 sizeof(uint32_t) : primitive.index_type; 507 508 /* Ensure we got a size, and if so, validate the index buffer 509 * is large enough to hold a full set of indices of the given 510 * size */ 511 512 if (!size) 513 pandecode_msg("XXX: index size missing\n"); 514 else 515 pandecode_validate_buffer(primitive.indices, primitive.index_count * size); 516 } else if (primitive.index_type) 517 pandecode_msg("XXX: unexpected index size\n"); 518#endif 519} 520 521static void 522pandecode_primitive_size(const void *s, bool constant) 523{ 524 pan_unpack(s, PRIMITIVE_SIZE, ps); 525 if (ps.size_array == 0x0) 526 return; 527 528 DUMP_UNPACKED(PRIMITIVE_SIZE, ps, "Primitive Size:\n") 529} 530 531#if PAN_ARCH <= 7 532static void 533pandecode_uniform_buffers(mali_ptr pubufs, int ubufs_count, int job_no) 534{ 535 struct pandecode_mapped_memory *umem = pandecode_find_mapped_gpu_mem_containing(pubufs); 536 uint64_t *PANDECODE_PTR_VAR(ubufs, umem, pubufs); 537 538 for (int i = 0; i < ubufs_count; i++) { 539 mali_ptr addr = (ubufs[i] >> 10) << 2; 540 unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0; 541 542 pandecode_validate_buffer(addr, size); 543 544 char *ptr = pointer_as_memory_reference(addr); 545 pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr); 546 free(ptr); 547 } 548 549 pandecode_log("\n"); 550} 551 552static void 553pandecode_uniforms(mali_ptr uniforms, unsigned uniform_count) 554{ 555 pandecode_validate_buffer(uniforms, uniform_count * 16); 556 557 char *ptr = pointer_as_memory_reference(uniforms); 558 pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr); 559 free(ptr); 560 pandecode_log("\n"); 561} 562#endif 563 564static const char * 565shader_type_for_job(unsigned type) 566{ 567 switch (type) { 568#if PAN_ARCH <= 7 569 case MALI_JOB_TYPE_VERTEX: return "VERTEX"; 570#endif 571 case MALI_JOB_TYPE_TILER: return "FRAGMENT"; 572 case MALI_JOB_TYPE_FRAGMENT: return "FRAGMENT"; 573 case MALI_JOB_TYPE_COMPUTE: return "COMPUTE"; 574 default: return "UNKNOWN"; 575 } 576} 577 578static unsigned shader_id = 0; 579 580static struct midgard_disasm_stats 581pandecode_shader_disassemble(mali_ptr shader_ptr, int shader_no, int type, 582 unsigned gpu_id) 583{ 584 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(shader_ptr); 585 uint8_t *PANDECODE_PTR_VAR(code, mem, shader_ptr); 586 587 /* Compute maximum possible size */ 588 size_t sz = mem->length - (shader_ptr - mem->gpu_va); 589 590 /* Print some boilerplate to clearly denote the assembly (which doesn't 591 * obey indentation rules), and actually do the disassembly! */ 592 593 pandecode_log_cont("\n\n"); 594 595 struct midgard_disasm_stats stats = { 0 }; 596 597#if PAN_ARCH >= 9 598 disassemble_valhall(pandecode_dump_stream, (const uint64_t *) code, sz, true); 599#elif PAN_ARCH >= 6 && PAN_ARCH <= 7 600 disassemble_bifrost(pandecode_dump_stream, code, sz, false); 601#else 602 stats = disassemble_midgard(pandecode_dump_stream, 603 code, sz, gpu_id, true); 604#endif 605 606 unsigned nr_threads = 607 (stats.work_count <= 4) ? 4 : 608 (stats.work_count <= 8) ? 2 : 609 1; 610 611 pandecode_log_cont("shader%d - MESA_SHADER_%s shader: " 612 "%u inst, %u bundles, %u quadwords, " 613 "%u registers, %u threads, 0 loops, 0:0 spills:fills\n\n\n", 614 shader_id++, 615 shader_type_for_job(type), 616 stats.instruction_count, stats.bundle_count, stats.quadword_count, 617 stats.work_count, nr_threads); 618 619 return stats; 620} 621 622#if PAN_ARCH <= 7 623static void 624pandecode_texture_payload(mali_ptr payload, 625 enum mali_texture_dimension dim, 626 enum mali_texture_layout layout, 627 bool manual_stride, 628 uint8_t levels, 629 uint16_t nr_samples, 630 uint16_t array_size, 631 struct pandecode_mapped_memory *tmem) 632{ 633 pandecode_log(".payload = {\n"); 634 pandecode_indent++; 635 636 /* A bunch of bitmap pointers follow. 637 * We work out the correct number, 638 * based on the mipmap/cubemap 639 * properties, but dump extra 640 * possibilities to futureproof */ 641 642 int bitmap_count = levels; 643 644 /* Miptree for each face */ 645 if (dim == MALI_TEXTURE_DIMENSION_CUBE) 646 bitmap_count *= 6; 647 648 /* Array of layers */ 649 bitmap_count *= nr_samples; 650 651 /* Array of textures */ 652 bitmap_count *= array_size; 653 654 /* Stride for each element */ 655 if (manual_stride) 656 bitmap_count *= 2; 657 658 mali_ptr *pointers_and_strides = pandecode_fetch_gpu_mem(tmem, 659 payload, sizeof(mali_ptr) * bitmap_count); 660 for (int i = 0; i < bitmap_count; ++i) { 661 /* How we dump depends if this is a stride or a pointer */ 662 663 if (manual_stride && (i & 1)) { 664 /* signed 32-bit snuck in as a 64-bit pointer */ 665 uint64_t stride_set = pointers_and_strides[i]; 666 int32_t row_stride = stride_set; 667 int32_t surface_stride = stride_set >> 32; 668 pandecode_log("(mali_ptr) %d /* surface stride */ %d /* row stride */, \n", 669 surface_stride, row_stride); 670 } else { 671 char *a = pointer_as_memory_reference(pointers_and_strides[i]); 672 pandecode_log("%s, \n", a); 673 free(a); 674 } 675 } 676 677 pandecode_indent--; 678 pandecode_log("},\n"); 679} 680#endif 681 682#if PAN_ARCH <= 5 683static void 684pandecode_texture(mali_ptr u, 685 struct pandecode_mapped_memory *tmem, 686 unsigned job_no, unsigned tex) 687{ 688 struct pandecode_mapped_memory *mapped_mem = pandecode_find_mapped_gpu_mem_containing(u); 689 const uint8_t *cl = pandecode_fetch_gpu_mem(mapped_mem, u, pan_size(TEXTURE)); 690 691 pan_unpack(cl, TEXTURE, temp); 692 DUMP_UNPACKED(TEXTURE, temp, "Texture:\n") 693 694 pandecode_indent++; 695 unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ? 696 1 : temp.sample_count; 697 pandecode_texture_payload(u + pan_size(TEXTURE), 698 temp.dimension, temp.texel_ordering, temp.manual_stride, 699 temp.levels, nr_samples, temp.array_size, mapped_mem); 700 pandecode_indent--; 701} 702#else 703static void 704pandecode_bifrost_texture( 705 const void *cl, 706 unsigned job_no, 707 unsigned tex) 708{ 709 pan_unpack(cl, TEXTURE, temp); 710 DUMP_UNPACKED(TEXTURE, temp, "Texture:\n") 711 712 pandecode_indent++; 713 714#if PAN_ARCH >= 9 715 int plane_count = temp.levels * temp.array_size; 716 717 /* Miptree for each face */ 718 if (temp.dimension == MALI_TEXTURE_DIMENSION_CUBE) 719 plane_count *= 6; 720 721 for (unsigned i = 0; i < plane_count; ++i) 722 DUMP_ADDR(PLANE, temp.surfaces + i * pan_size(PLANE), "Plane %u:\n", i); 723#else 724 struct pandecode_mapped_memory *tmem = pandecode_find_mapped_gpu_mem_containing(temp.surfaces); 725 unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ? 726 1 : temp.sample_count; 727 728 pandecode_texture_payload(temp.surfaces, temp.dimension, temp.texel_ordering, 729 true, temp.levels, nr_samples, temp.array_size, tmem); 730#endif 731 pandecode_indent--; 732} 733#endif 734 735#if PAN_ARCH <= 7 736static void 737pandecode_blend_shader_disassemble(mali_ptr shader, int job_no, int job_type, 738 unsigned gpu_id) 739{ 740 struct midgard_disasm_stats stats = 741 pandecode_shader_disassemble(shader, job_no, job_type, gpu_id); 742 743 bool has_texture = (stats.texture_count > 0); 744 bool has_sampler = (stats.sampler_count > 0); 745 bool has_attribute = (stats.attribute_count > 0); 746 bool has_varying = (stats.varying_count > 0); 747 bool has_uniform = (stats.uniform_count > 0); 748 bool has_ubo = (stats.uniform_buffer_count > 0); 749 750 if (has_texture || has_sampler) 751 pandecode_msg("XXX: blend shader accessing textures\n"); 752 753 if (has_attribute || has_varying) 754 pandecode_msg("XXX: blend shader accessing interstage\n"); 755 756 if (has_uniform || has_ubo) 757 pandecode_msg("XXX: blend shader accessing uniforms\n"); 758} 759 760static void 761pandecode_textures(mali_ptr textures, unsigned texture_count, int job_no) 762{ 763 struct pandecode_mapped_memory *mmem = pandecode_find_mapped_gpu_mem_containing(textures); 764 765 if (!mmem) 766 return; 767 768 pandecode_log("Textures %"PRIx64"_%d:\n", textures, job_no); 769 pandecode_indent++; 770 771#if PAN_ARCH >= 6 772 const void *cl = 773 pandecode_fetch_gpu_mem(mmem, 774 textures, 775 pan_size(TEXTURE) * 776 texture_count); 777 778 for (unsigned tex = 0; tex < texture_count; ++tex) { 779 pandecode_bifrost_texture(cl + pan_size(TEXTURE) * tex, 780 job_no, tex); 781 } 782#else 783 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures); 784 785 for (int tex = 0; tex < texture_count; ++tex) { 786 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures + tex * sizeof(mali_ptr)); 787 char *a = pointer_as_memory_reference(*u); 788 pandecode_log("%s,\n", a); 789 free(a); 790 } 791 792 /* Now, finally, descend down into the texture descriptor */ 793 for (unsigned tex = 0; tex < texture_count; ++tex) { 794 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures + tex * sizeof(mali_ptr)); 795 struct pandecode_mapped_memory *tmem = pandecode_find_mapped_gpu_mem_containing(*u); 796 if (tmem) 797 pandecode_texture(*u, tmem, job_no, tex); 798 } 799#endif 800 pandecode_indent--; 801 pandecode_log("\n"); 802} 803 804static void 805pandecode_samplers(mali_ptr samplers, unsigned sampler_count, int job_no) 806{ 807 pandecode_log("Samplers %"PRIx64"_%d:\n", samplers, job_no); 808 pandecode_indent++; 809 810 for (int i = 0; i < sampler_count; ++i) 811 DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n", i); 812 813 pandecode_indent--; 814 pandecode_log("\n"); 815} 816 817static void 818pandecode_dcd(const struct MALI_DRAW *p, 819 int job_no, enum mali_job_type job_type, 820 char *suffix, unsigned gpu_id) 821{ 822 struct pandecode_mapped_memory *attr_mem; 823 824#if PAN_ARCH >= 5 825 struct pandecode_fbd fbd_info = { 826 /* Default for Bifrost */ 827 .rt_count = 1 828 }; 829#endif 830 831#if PAN_ARCH >= 6 832 pandecode_local_storage(p->thread_storage & ~1, job_no); 833#elif PAN_ARCH == 5 834 if (job_type != MALI_JOB_TYPE_TILER) { 835 pandecode_local_storage(p->thread_storage & ~1, job_no); 836 } else { 837 assert(p->fbd & MALI_FBD_TAG_IS_MFBD); 838 fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->fbd) & ~MALI_FBD_TAG_MASK, 839 job_no, false, gpu_id); 840 } 841#else 842 pandecode_sfbd((u64) (uintptr_t) p->fbd, job_no, false, gpu_id); 843#endif 844 845 int varying_count = 0, attribute_count = 0, uniform_count = 0, uniform_buffer_count = 0; 846 int texture_count = 0, sampler_count = 0; 847 848 if (p->state) { 849 struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(p->state); 850 uint32_t *cl = pandecode_fetch_gpu_mem(smem, p->state, pan_size(RENDERER_STATE)); 851 852 pan_unpack(cl, RENDERER_STATE, state); 853 854 if (state.shader.shader & ~0xF) 855 pandecode_shader_disassemble(state.shader.shader & ~0xF, job_no, job_type, gpu_id); 856 857#if PAN_ARCH >= 6 858 bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX); 859 860 if (idvs && state.secondary_shader) 861 pandecode_shader_disassemble(state.secondary_shader, job_no, job_type, gpu_id); 862#endif 863 DUMP_UNPACKED(RENDERER_STATE, state, "State:\n"); 864 pandecode_indent++; 865 866 /* Save for dumps */ 867 attribute_count = state.shader.attribute_count; 868 varying_count = state.shader.varying_count; 869 texture_count = state.shader.texture_count; 870 sampler_count = state.shader.sampler_count; 871 uniform_buffer_count = state.properties.uniform_buffer_count; 872 873#if PAN_ARCH >= 6 874 uniform_count = state.preload.uniform_count; 875#else 876 uniform_count = state.properties.uniform_count; 877#endif 878 879#if PAN_ARCH == 4 880 mali_ptr shader = state.blend_shader & ~0xF; 881 if (state.multisample_misc.blend_shader && shader) 882 pandecode_blend_shader_disassemble(shader, job_no, job_type, gpu_id); 883#endif 884 pandecode_indent--; 885 pandecode_log("\n"); 886 887 /* MRT blend fields are used whenever MFBD is used, with 888 * per-RT descriptors */ 889 890#if PAN_ARCH >= 5 891 if ((job_type == MALI_JOB_TYPE_TILER || job_type == MALI_JOB_TYPE_FRAGMENT) && 892 (PAN_ARCH >= 6 || p->thread_storage & MALI_FBD_TAG_IS_MFBD)) { 893 void* blend_base = ((void *) cl) + pan_size(RENDERER_STATE); 894 895 for (unsigned i = 0; i < fbd_info.rt_count; i++) { 896 mali_ptr shader = 0; 897 898#if PAN_ARCH >= 6 899 shader = pandecode_bifrost_blend(blend_base, job_no, i, 900 state.shader.shader); 901#else 902 shader = pandecode_midgard_blend_mrt(blend_base, job_no, i); 903#endif 904 if (shader & ~0xF) 905 pandecode_blend_shader_disassemble(shader, job_no, job_type, 906 gpu_id); 907 } 908 } 909#endif 910 } else 911 pandecode_msg("XXX: missing shader descriptor\n"); 912 913 if (p->viewport) { 914 DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n"); 915 pandecode_log("\n"); 916 } 917 918 unsigned max_attr_index = 0; 919 920 if (p->attributes) 921 max_attr_index = pandecode_attribute_meta(attribute_count, p->attributes, false); 922 923 if (p->attribute_buffers) { 924 attr_mem = pandecode_find_mapped_gpu_mem_containing(p->attribute_buffers); 925 pandecode_attributes(attr_mem, p->attribute_buffers, job_no, suffix, max_attr_index, false, job_type); 926 } 927 928 if (p->varyings) { 929 varying_count = pandecode_attribute_meta(varying_count, p->varyings, true); 930 } 931 932 if (p->varying_buffers) { 933 attr_mem = pandecode_find_mapped_gpu_mem_containing(p->varying_buffers); 934 pandecode_attributes(attr_mem, p->varying_buffers, job_no, suffix, varying_count, true, job_type); 935 } 936 937 if (p->uniform_buffers) { 938 if (uniform_buffer_count) 939 pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count, job_no); 940 else 941 pandecode_msg("warn: UBOs specified but not referenced\n"); 942 } else if (uniform_buffer_count) 943 pandecode_msg("XXX: UBOs referenced but not specified\n"); 944 945 /* We don't want to actually dump uniforms, but we do need to validate 946 * that the counts we were given are sane */ 947 948 if (p->push_uniforms) { 949 if (uniform_count) 950 pandecode_uniforms(p->push_uniforms, uniform_count); 951 else 952 pandecode_msg("warn: Uniforms specified but not referenced\n"); 953 } else if (uniform_count) 954 pandecode_msg("XXX: Uniforms referenced but not specified\n"); 955 956 if (p->textures) 957 pandecode_textures(p->textures, texture_count, job_no); 958 959 if (p->samplers) 960 pandecode_samplers(p->samplers, sampler_count, job_no); 961} 962 963static void 964pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h, 965 const struct pandecode_mapped_memory *mem, 966 mali_ptr job, int job_no, unsigned gpu_id) 967{ 968 struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, mem, job); 969 pan_section_unpack(p, COMPUTE_JOB, DRAW, draw); 970 pandecode_dcd(&draw, job_no, h->type, "", gpu_id); 971 972 pandecode_log("Vertex Job Payload:\n"); 973 pandecode_indent++; 974 pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION)); 975 DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n"); 976 DUMP_UNPACKED(DRAW, draw, "Draw:\n"); 977 pandecode_indent--; 978 pandecode_log("\n"); 979} 980#endif 981 982#if PAN_ARCH >= 6 983static void 984pandecode_bifrost_tiler_heap(mali_ptr gpu_va, int job_no) 985{ 986 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); 987 pan_unpack(PANDECODE_PTR(mem, gpu_va, void), TILER_HEAP, h); 988 DUMP_UNPACKED(TILER_HEAP, h, "Bifrost Tiler Heap:\n"); 989} 990 991static void 992pandecode_bifrost_tiler(mali_ptr gpu_va, int job_no) 993{ 994 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); 995 pan_unpack(PANDECODE_PTR(mem, gpu_va, void), TILER_CONTEXT, t); 996 997 if (t.heap) 998 pandecode_bifrost_tiler_heap(t.heap, job_no); 999 1000 DUMP_UNPACKED(TILER_CONTEXT, t, "Bifrost Tiler:\n"); 1001} 1002 1003#if PAN_ARCH <= 7 1004static void 1005pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h, 1006 const struct pandecode_mapped_memory *mem, 1007 mali_ptr job, int job_no, unsigned gpu_id) 1008{ 1009 struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1010 1011 pandecode_log("Vertex:\n"); 1012 pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw); 1013 pandecode_dcd(&vert_draw, job_no, h->type, "", gpu_id); 1014 DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n"); 1015 1016 pandecode_log("Fragment:\n"); 1017 pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw); 1018 pandecode_dcd(&frag_draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id); 1019 DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n"); 1020 1021 pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr); 1022 pandecode_log("Tiler Job Payload:\n"); 1023 pandecode_indent++; 1024 pandecode_bifrost_tiler(tiler_ptr.address, job_no); 1025 pandecode_indent--; 1026 1027 pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION)); 1028 pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE)); 1029 1030 /* TODO: gl_PointSize on Bifrost */ 1031 pandecode_primitive_size(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE), true); 1032 1033 pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding); 1034} 1035#endif 1036#endif 1037 1038static void 1039pandecode_tiler_job(const struct MALI_JOB_HEADER *h, 1040 const struct pandecode_mapped_memory *mem, 1041 mali_ptr job, int job_no, unsigned gpu_id) 1042{ 1043 struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1044 pan_section_unpack(p, TILER_JOB, DRAW, draw); 1045 pandecode_dcd(&draw, job_no, h->type, "", gpu_id); 1046 pandecode_log("Tiler Job Payload:\n"); 1047 pandecode_indent++; 1048 1049#if PAN_ARCH <= 7 1050 pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION)); 1051#endif 1052 1053 pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE)); 1054 DUMP_UNPACKED(DRAW, draw, "Draw:\n"); 1055 1056#if PAN_ARCH >= 6 1057 pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr); 1058 pandecode_bifrost_tiler(tiler_ptr.address, job_no); 1059 1060 /* TODO: gl_PointSize on Bifrost */ 1061 pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE), true); 1062 1063#if PAN_ARCH >= 9 1064 DUMP_SECTION(TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n"); 1065 DUMP_SECTION(TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n"); 1066 DUMP_SECTION(TILER_JOB, SCISSOR, p, "Scissor:\n"); 1067 DUMP_SECTION(TILER_JOB, INDICES, p, "Indices:\n"); 1068#else 1069 pan_section_unpack(p, TILER_JOB, PADDING, padding); 1070#endif 1071 1072#else 1073 pan_section_unpack(p, TILER_JOB, PRIMITIVE, primitive); 1074 pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE), 1075 primitive.point_size_array_format == MALI_POINT_SIZE_ARRAY_FORMAT_NONE); 1076#endif 1077 pandecode_indent--; 1078 pandecode_log("\n"); 1079} 1080 1081static void 1082pandecode_fragment_job(const struct pandecode_mapped_memory *mem, 1083 mali_ptr job, int job_no, unsigned gpu_id) 1084{ 1085 struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1086 pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s); 1087 1088 1089#if PAN_ARCH == 4 1090 pandecode_sfbd(s.framebuffer, job_no, true, gpu_id); 1091#else 1092 assert(s.framebuffer & MALI_FBD_TAG_IS_MFBD); 1093 1094 struct pandecode_fbd info; 1095 1096 info = pandecode_mfbd_bfr(s.framebuffer & ~MALI_FBD_TAG_MASK, job_no, 1097 true, gpu_id); 1098#endif 1099 1100#if PAN_ARCH >= 5 1101 unsigned expected_tag = 0; 1102 1103 /* Compute the tag for the tagged pointer. This contains the type of 1104 * FBD (MFBD/SFBD), and in the case of an MFBD, information about which 1105 * additional structures follow the MFBD header (an extra payload or 1106 * not, as well as a count of render targets) */ 1107 1108 expected_tag = MALI_FBD_TAG_IS_MFBD; 1109 if (info.has_extra) 1110 expected_tag |= MALI_FBD_TAG_HAS_ZS_RT; 1111 1112 expected_tag |= MALI_FBD_TAG_IS_MFBD | (MALI_POSITIVE(info.rt_count) << 2); 1113#endif 1114 1115 DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n"); 1116 1117#if PAN_ARCH >= 5 1118 /* The FBD is a tagged pointer */ 1119 1120 unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK); 1121 1122 if (tag != expected_tag) 1123 pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag); 1124#endif 1125 1126 pandecode_log("\n"); 1127} 1128 1129static void 1130pandecode_write_value_job(const struct pandecode_mapped_memory *mem, 1131 mali_ptr job, int job_no) 1132{ 1133 struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1134 pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u); 1135 DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n"); 1136 pandecode_log("\n"); 1137} 1138 1139static void 1140pandecode_cache_flush_job(const struct pandecode_mapped_memory *mem, 1141 mali_ptr job, int job_no) 1142{ 1143 struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1144 pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u); 1145 DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n"); 1146 pandecode_log("\n"); 1147} 1148 1149#if PAN_ARCH >= 9 1150static void 1151dump_fau(mali_ptr addr, unsigned count, const char *name) 1152{ 1153 struct pandecode_mapped_memory *mem = 1154 pandecode_find_mapped_gpu_mem_containing(addr); 1155 const uint32_t *PANDECODE_PTR_VAR(raw, mem, addr); 1156 1157 pandecode_validate_buffer(addr, count * 8); 1158 1159 fprintf(pandecode_dump_stream, "%s:\n", name); 1160 for (unsigned i = 0; i < count; ++i) { 1161 fprintf(pandecode_dump_stream, " %08X %08X\n", 1162 raw[2*i], raw[2*i + 1]); 1163 } 1164 fprintf(pandecode_dump_stream, "\n"); 1165} 1166 1167static mali_ptr 1168pandecode_shader(mali_ptr addr, const char *label, unsigned gpu_id) 1169{ 1170 MAP_ADDR(SHADER_PROGRAM, addr, cl); 1171 pan_unpack(cl, SHADER_PROGRAM, desc); 1172 1173 assert(desc.type == 8); 1174 1175 DUMP_UNPACKED(SHADER_PROGRAM, desc, "%s Shader:\n", label); 1176 pandecode_shader_disassemble(desc.binary, 0, 0, gpu_id); 1177 return desc.binary; 1178} 1179 1180static void 1181pandecode_resources(mali_ptr addr, unsigned size) 1182{ 1183 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(addr); 1184 const uint8_t *cl = pandecode_fetch_gpu_mem(mem, addr, size); 1185 assert((size % 0x20) == 0); 1186 1187 for (unsigned i = 0; i < size; i += 0x20) { 1188 unsigned type = (cl[i] & 0xF); 1189 1190 switch (type) { 1191 case MALI_DESCRIPTOR_TYPE_SAMPLER: 1192 DUMP_CL(SAMPLER, cl + i, "Sampler:\n"); 1193 break; 1194 case MALI_DESCRIPTOR_TYPE_TEXTURE: 1195 pandecode_bifrost_texture(cl + i, 0, i); 1196 break; 1197 case MALI_DESCRIPTOR_TYPE_ATTRIBUTE: 1198 DUMP_CL(ATTRIBUTE, cl + i, "Attribute:\n"); 1199 break; 1200 case MALI_DESCRIPTOR_TYPE_BUFFER: 1201 DUMP_CL(BUFFER, cl + i, "Buffer:\n"); 1202 break; 1203 default: 1204 fprintf(pandecode_dump_stream, "Unknown descriptor type %X\n", type); 1205 break; 1206 } 1207 } 1208} 1209 1210static void 1211pandecode_resource_tables(mali_ptr addr, const char *label) 1212{ 1213 unsigned count = addr & 0x3F; 1214 addr = addr & ~0x3F; 1215 1216 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(addr); 1217 const uint8_t *cl = pandecode_fetch_gpu_mem(mem, addr, MALI_RESOURCE_LENGTH * count); 1218 1219 for (unsigned i = 0; i < count; ++i) { 1220 pan_unpack(cl + i * MALI_RESOURCE_LENGTH, RESOURCE, entry); 1221 DUMP_UNPACKED(RESOURCE, entry, "Entry %u:\n", i); 1222 1223 pandecode_indent += 2; 1224 if (entry.address) 1225 pandecode_resources(entry.address, entry.size); 1226 pandecode_indent -= 2; 1227 } 1228} 1229 1230static void 1231pandecode_depth_stencil(mali_ptr addr) 1232{ 1233 MAP_ADDR(DEPTH_STENCIL, addr, cl); 1234 pan_unpack(cl, DEPTH_STENCIL, desc); 1235 DUMP_UNPACKED(DEPTH_STENCIL, desc, "Depth/stencil"); 1236} 1237 1238static void 1239pandecode_shader_environment(const struct MALI_SHADER_ENVIRONMENT *p, 1240 unsigned gpu_id) 1241{ 1242 if (p->shader) 1243 pandecode_shader(p->shader, "Shader", gpu_id); 1244 1245 if (p->resources) 1246 pandecode_resource_tables(p->resources, "Resources"); 1247 1248 if (p->thread_storage) 1249 pandecode_local_storage(p->thread_storage, 0); 1250 1251 if (p->fau) 1252 dump_fau(p->fau, p->fau_count, "FAU"); 1253} 1254 1255static void 1256pandecode_dcd(const struct MALI_DRAW *p, 1257 int job_no, enum mali_job_type job_type, 1258 char *suffix, unsigned gpu_id) 1259{ 1260 mali_ptr frag_shader = 0; 1261 1262 pandecode_depth_stencil(p->depth_stencil); 1263 1264 for (unsigned i = 0; i < p->blend_count; ++i) { 1265 struct pandecode_mapped_memory *blend_mem = 1266 pandecode_find_mapped_gpu_mem_containing(p->blend); 1267 1268 struct mali_blend_packed *PANDECODE_PTR_VAR(blend_descs, blend_mem, p->blend); 1269 1270 mali_ptr blend_shader = pandecode_bifrost_blend(blend_descs, 0, i, frag_shader); 1271 if (blend_shader) { 1272 fprintf(pandecode_dump_stream, "Blend shader %u", i); 1273 pandecode_shader_disassemble(blend_shader, 0, 0, gpu_id); 1274 } 1275 } 1276 1277 pandecode_shader_environment(&p->shader, gpu_id); 1278 DUMP_UNPACKED(DRAW, *p, "Draw:\n"); 1279} 1280 1281static void 1282pandecode_malloc_vertex_job(const struct pandecode_mapped_memory *mem, 1283 mali_ptr job, unsigned gpu_id) 1284{ 1285 struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1286 1287 DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n"); 1288 DUMP_SECTION(MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n"); 1289 DUMP_SECTION(MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n"); 1290 DUMP_SECTION(MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n"); 1291 DUMP_SECTION(MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n"); 1292 DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n"); 1293 DUMP_SECTION(MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n"); 1294 1295 pan_section_unpack(p, MALLOC_VERTEX_JOB, DRAW, dcd); 1296 1297 pan_section_unpack(p, MALLOC_VERTEX_JOB, TILER, tiler_ptr); 1298 pandecode_log("Tiler Job Payload:\n"); 1299 pandecode_indent++; 1300 if (tiler_ptr.address) 1301 pandecode_bifrost_tiler(tiler_ptr.address, 0); 1302 else 1303 pandecode_log("<omitted>\n"); 1304 pandecode_indent--; 1305 1306 pandecode_dcd(&dcd, 0, 0, NULL, gpu_id); 1307 1308 pan_section_unpack(p, MALLOC_VERTEX_JOB, POSITION, position); 1309 pan_section_unpack(p, MALLOC_VERTEX_JOB, VARYING, varying); 1310 pandecode_shader_environment(&position, gpu_id); 1311 pandecode_shader_environment(&varying, gpu_id); 1312} 1313 1314static void 1315pandecode_compute_job(const struct pandecode_mapped_memory *mem, mali_ptr job, unsigned gpu_id) 1316{ 1317 struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, mem, job); 1318 pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload); 1319 1320 pandecode_shader(payload.compute.shader, "Shader", gpu_id); 1321 if (payload.compute.thread_storage) 1322 pandecode_local_storage(payload.compute.thread_storage, 0); 1323 if (payload.compute.fau) 1324 dump_fau(payload.compute.fau, payload.compute.fau_count, "FAU"); 1325 if (payload.compute.resources) 1326 pandecode_resource_tables(payload.compute.resources, "Resources"); 1327 1328 DUMP_UNPACKED(COMPUTE_PAYLOAD, payload, "Compute:\n"); 1329} 1330#endif 1331 1332/* Entrypoint to start tracing. jc_gpu_va is the GPU address for the first job 1333 * in the chain; later jobs are found by walking the chain. Bifrost is, well, 1334 * if it's bifrost or not. GPU ID is the more finegrained ID (at some point, we 1335 * might wish to combine this with the bifrost parameter) because some details 1336 * are model-specific even within a particular architecture. */ 1337 1338void 1339GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id) 1340{ 1341 pandecode_dump_file_open(); 1342 1343 unsigned job_descriptor_number = 0; 1344 mali_ptr next_job = 0; 1345 1346 do { 1347 struct pandecode_mapped_memory *mem = 1348 pandecode_find_mapped_gpu_mem_containing(jc_gpu_va); 1349 1350 pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed), 1351 JOB_HEADER, h); 1352 next_job = h.next; 1353 1354 int job_no = job_descriptor_number++; 1355 1356 DUMP_UNPACKED(JOB_HEADER, h, "Job Header (%" PRIx64 "):\n", jc_gpu_va); 1357 pandecode_log("\n"); 1358 1359 switch (h.type) { 1360 case MALI_JOB_TYPE_WRITE_VALUE: 1361 pandecode_write_value_job(mem, jc_gpu_va, job_no); 1362 break; 1363 1364 case MALI_JOB_TYPE_CACHE_FLUSH: 1365 pandecode_cache_flush_job(mem, jc_gpu_va, job_no); 1366 break; 1367 1368 case MALI_JOB_TYPE_TILER: 1369 pandecode_tiler_job(&h, mem, jc_gpu_va, job_no, gpu_id); 1370 break; 1371 1372#if PAN_ARCH <= 7 1373 case MALI_JOB_TYPE_VERTEX: 1374 case MALI_JOB_TYPE_COMPUTE: 1375 pandecode_vertex_compute_geometry_job(&h, mem, jc_gpu_va, job_no, gpu_id); 1376 break; 1377 1378#if PAN_ARCH >= 6 1379 case MALI_JOB_TYPE_INDEXED_VERTEX: 1380 pandecode_indexed_vertex_job(&h, mem, jc_gpu_va, job_no, gpu_id); 1381 break; 1382#endif 1383#else 1384 case MALI_JOB_TYPE_COMPUTE: 1385 pandecode_compute_job(mem, jc_gpu_va, gpu_id); 1386 break; 1387 1388 case MALI_JOB_TYPE_MALLOC_VERTEX: 1389 pandecode_malloc_vertex_job(mem, jc_gpu_va, gpu_id); 1390 break; 1391#endif 1392 1393 case MALI_JOB_TYPE_FRAGMENT: 1394 pandecode_fragment_job(mem, jc_gpu_va, job_no, gpu_id); 1395 break; 1396 1397 default: 1398 break; 1399 } 1400 } while ((jc_gpu_va = next_job)); 1401 1402 fflush(pandecode_dump_stream); 1403 pandecode_map_read_write(); 1404} 1405 1406void 1407GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va) 1408{ 1409 mali_ptr next_job = 0; 1410 1411 do { 1412 struct pandecode_mapped_memory *mem = 1413 pandecode_find_mapped_gpu_mem_containing(jc_gpu_va); 1414 1415 pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed), 1416 JOB_HEADER, h); 1417 next_job = h.next; 1418 1419 /* Ensure the job is marked COMPLETE */ 1420 if (h.exception_status != 0x1) { 1421 fprintf(stderr, "Incomplete job or timeout\n"); 1422 fflush(NULL); 1423 abort(); 1424 } 1425 } while ((jc_gpu_va = next_job)); 1426 1427 pandecode_map_read_write(); 1428} 1429