1/* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "ac_debug.h" 26#include "ac_rtld.h" 27#include "driver_ddebug/dd_util.h" 28#include "si_compute.h" 29#include "si_pipe.h" 30#include "sid.h" 31#include "sid_tables.h" 32#include "tgsi/tgsi_from_mesa.h" 33#include "util/u_dump.h" 34#include "util/u_log.h" 35#include "util/u_memory.h" 36#include "util/u_string.h" 37 38static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_cs *saved, FILE *f); 39 40DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) 41 42/** 43 * Store a linearized copy of all chunks of \p cs together with the buffer 44 * list in \p saved. 45 */ 46void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved, 47 bool get_buffer_list) 48{ 49 uint32_t *buf; 50 unsigned i; 51 52 /* Save the IB chunks. */ 53 saved->num_dw = cs->prev_dw + cs->current.cdw; 54 saved->ib = MALLOC(4 * saved->num_dw); 55 if (!saved->ib) 56 goto oom; 57 58 buf = saved->ib; 59 for (i = 0; i < cs->num_prev; ++i) { 60 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); 61 buf += cs->prev[i].cdw; 62 } 63 memcpy(buf, cs->current.buf, cs->current.cdw * 4); 64 65 if (!get_buffer_list) 66 return; 67 68 /* Save the buffer list. */ 69 saved->bo_count = ws->cs_get_buffer_list(cs, NULL); 70 saved->bo_list = CALLOC(saved->bo_count, sizeof(saved->bo_list[0])); 71 if (!saved->bo_list) { 72 FREE(saved->ib); 73 goto oom; 74 } 75 ws->cs_get_buffer_list(cs, saved->bo_list); 76 77 return; 78 79oom: 80 fprintf(stderr, "%s: out of memory\n", __func__); 81 memset(saved, 0, sizeof(*saved)); 82} 83 84void si_clear_saved_cs(struct radeon_saved_cs *saved) 85{ 86 FREE(saved->ib); 87 FREE(saved->bo_list); 88 89 memset(saved, 0, sizeof(*saved)); 90} 91 92void si_destroy_saved_cs(struct si_saved_cs *scs) 93{ 94 si_clear_saved_cs(&scs->gfx); 95 si_resource_reference(&scs->trace_buf, NULL); 96 free(scs); 97} 98 99static void si_dump_shader(struct si_screen *sscreen, struct si_shader *shader, FILE *f) 100{ 101 if (shader->shader_log) 102 fwrite(shader->shader_log, shader->shader_log_size, 1, f); 103 else 104 si_shader_dump(sscreen, shader, NULL, f, false); 105 106 if (shader->bo && sscreen->options.dump_shader_binary) { 107 unsigned size = shader->bo->b.b.width0; 108 fprintf(f, "BO: VA=%" PRIx64 " Size=%u\n", shader->bo->gpu_address, size); 109 110 const char *mapped = sscreen->ws->buffer_map(sscreen->ws, 111 shader->bo->buf, NULL, 112 PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_READ | RADEON_MAP_TEMPORARY); 113 114 for (unsigned i = 0; i < size; i += 4) { 115 fprintf(f, " %4x: %08x\n", i, *(uint32_t *)(mapped + i)); 116 } 117 118 sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf); 119 120 fprintf(f, "\n"); 121 } 122} 123 124struct si_log_chunk_shader { 125 /* The shader destroy code assumes a current context for unlinking of 126 * PM4 packets etc. 127 * 128 * While we should be able to destroy shaders without a context, doing 129 * so would happen only very rarely and be therefore likely to fail 130 * just when you're trying to debug something. Let's just remember the 131 * current context in the chunk. 132 */ 133 struct si_context *ctx; 134 struct si_shader *shader; 135 136 /* For keep-alive reference counts */ 137 struct si_shader_selector *sel; 138 struct si_compute *program; 139}; 140 141static void si_log_chunk_shader_destroy(void *data) 142{ 143 struct si_log_chunk_shader *chunk = data; 144 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL); 145 si_compute_reference(&chunk->program, NULL); 146 FREE(chunk); 147} 148 149static void si_log_chunk_shader_print(void *data, FILE *f) 150{ 151 struct si_log_chunk_shader *chunk = data; 152 struct si_screen *sscreen = chunk->ctx->screen; 153 si_dump_shader(sscreen, chunk->shader, f); 154} 155 156static struct u_log_chunk_type si_log_chunk_type_shader = { 157 .destroy = si_log_chunk_shader_destroy, 158 .print = si_log_chunk_shader_print, 159}; 160 161static void si_dump_gfx_shader(struct si_context *ctx, const struct si_shader_ctx_state *state, 162 struct u_log_context *log) 163{ 164 struct si_shader *current = state->current; 165 166 if (!state->cso || !current) 167 return; 168 169 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 170 chunk->ctx = ctx; 171 chunk->shader = current; 172 si_shader_selector_reference(ctx, &chunk->sel, current->selector); 173 u_log_chunk(log, &si_log_chunk_type_shader, chunk); 174} 175 176static void si_dump_compute_shader(struct si_context *ctx, struct u_log_context *log) 177{ 178 const struct si_cs_shader_state *state = &ctx->cs_shader_state; 179 180 if (!state->program) 181 return; 182 183 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 184 chunk->ctx = ctx; 185 chunk->shader = &state->program->shader; 186 si_compute_reference(&chunk->program, state->program); 187 u_log_chunk(log, &si_log_chunk_type_shader, chunk); 188} 189 190/** 191 * Shader compiles can be overridden with arbitrary ELF objects by setting 192 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] 193 * 194 * TODO: key this off some hash 195 */ 196bool si_replace_shader(unsigned num, struct si_shader_binary *binary) 197{ 198 const char *p = debug_get_option_replace_shaders(); 199 const char *semicolon; 200 char *copy = NULL; 201 FILE *f; 202 long filesize, nread; 203 bool replaced = false; 204 205 if (!p) 206 return false; 207 208 while (*p) { 209 unsigned long i; 210 char *endp; 211 i = strtoul(p, &endp, 0); 212 213 p = endp; 214 if (*p != ':') { 215 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n"); 216 exit(1); 217 } 218 ++p; 219 220 if (i == num) 221 break; 222 223 p = strchr(p, ';'); 224 if (!p) 225 return false; 226 ++p; 227 } 228 if (!*p) 229 return false; 230 231 semicolon = strchr(p, ';'); 232 if (semicolon) { 233 p = copy = strndup(p, semicolon - p); 234 if (!copy) { 235 fprintf(stderr, "out of memory\n"); 236 return false; 237 } 238 } 239 240 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p); 241 242 f = fopen(p, "r"); 243 if (!f) { 244 perror("radeonsi: failed to open file"); 245 goto out_free; 246 } 247 248 if (fseek(f, 0, SEEK_END) != 0) 249 goto file_error; 250 251 filesize = ftell(f); 252 if (filesize < 0) 253 goto file_error; 254 255 if (fseek(f, 0, SEEK_SET) != 0) 256 goto file_error; 257 258 binary->elf_buffer = MALLOC(filesize); 259 if (!binary->elf_buffer) { 260 fprintf(stderr, "out of memory\n"); 261 goto out_close; 262 } 263 264 nread = fread((void *)binary->elf_buffer, 1, filesize, f); 265 if (nread != filesize) { 266 FREE((void *)binary->elf_buffer); 267 binary->elf_buffer = NULL; 268 goto file_error; 269 } 270 271 binary->elf_size = nread; 272 replaced = true; 273 274out_close: 275 fclose(f); 276out_free: 277 free(copy); 278 return replaced; 279 280file_error: 281 perror("radeonsi: reading shader"); 282 goto out_close; 283} 284 285/* Parsed IBs are difficult to read without colors. Use "less -R file" to 286 * read them, or use "aha -b -f file" to convert them to html. 287 */ 288#define COLOR_RESET "\033[0m" 289#define COLOR_RED "\033[31m" 290#define COLOR_GREEN "\033[1;32m" 291#define COLOR_YELLOW "\033[1;33m" 292#define COLOR_CYAN "\033[1;36m" 293 294static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, unsigned offset) 295{ 296 struct radeon_winsys *ws = sctx->ws; 297 uint32_t value; 298 299 if (ws->read_registers(ws, offset, 1, &value)) 300 ac_dump_reg(f, sctx->gfx_level, offset, value, ~0); 301} 302 303static void si_dump_debug_registers(struct si_context *sctx, FILE *f) 304{ 305 fprintf(f, "Memory-mapped registers:\n"); 306 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); 307 308 /* No other registers can be read on radeon. */ 309 if (!sctx->screen->info.is_amdgpu) { 310 fprintf(f, "\n"); 311 return; 312 } 313 314 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2); 315 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0); 316 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1); 317 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2); 318 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); 319 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); 320 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); 321 if (sctx->gfx_level <= GFX8) { 322 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); 323 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); 324 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); 325 } 326 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT); 327 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1); 328 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2); 329 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3); 330 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS); 331 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT); 332 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1); 333 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS); 334 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT); 335 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1); 336 fprintf(f, "\n"); 337} 338 339struct si_log_chunk_cs { 340 struct si_context *ctx; 341 struct si_saved_cs *cs; 342 bool dump_bo_list; 343 unsigned gfx_begin, gfx_end; 344}; 345 346static void si_log_chunk_type_cs_destroy(void *data) 347{ 348 struct si_log_chunk_cs *chunk = data; 349 si_saved_cs_reference(&chunk->cs, NULL); 350 free(chunk); 351} 352 353static void si_parse_current_ib(FILE *f, struct radeon_cmdbuf *cs, unsigned begin, unsigned end, 354 int *last_trace_id, unsigned trace_id_count, const char *name, 355 enum amd_gfx_level gfx_level) 356{ 357 unsigned orig_end = end; 358 359 assert(begin <= end); 360 361 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", name, begin); 362 363 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) { 364 struct radeon_cmdbuf_chunk *chunk = &cs->prev[prev_idx]; 365 366 if (begin < chunk->cdw) { 367 ac_parse_ib_chunk(f, chunk->buf + begin, MIN2(end, chunk->cdw) - begin, last_trace_id, 368 trace_id_count, gfx_level, NULL, NULL); 369 } 370 371 if (end <= chunk->cdw) 372 return; 373 374 if (begin < chunk->cdw) 375 fprintf(f, "\n---------- Next %s Chunk ----------\n\n", name); 376 377 begin -= MIN2(begin, chunk->cdw); 378 end -= chunk->cdw; 379 } 380 381 assert(end <= cs->current.cdw); 382 383 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, trace_id_count, 384 gfx_level, NULL, NULL); 385 386 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", name, orig_end); 387} 388 389void si_print_current_ib(struct si_context *sctx, FILE *f) 390{ 391 si_parse_current_ib(f, &sctx->gfx_cs, 0, sctx->gfx_cs.prev_dw + sctx->gfx_cs.current.cdw, 392 NULL, 0, "GFX", sctx->gfx_level); 393} 394 395static void si_log_chunk_type_cs_print(void *data, FILE *f) 396{ 397 struct si_log_chunk_cs *chunk = data; 398 struct si_context *ctx = chunk->ctx; 399 struct si_saved_cs *scs = chunk->cs; 400 int last_trace_id = -1; 401 402 /* We are expecting that the ddebug pipe has already 403 * waited for the context, so this buffer should be idle. 404 * If the GPU is hung, there is no point in waiting for it. 405 */ 406 uint32_t *map = ctx->ws->buffer_map(ctx->ws, scs->trace_buf->buf, NULL, 407 PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_READ); 408 if (map) 409 last_trace_id = map[0]; 410 411 if (chunk->gfx_end != chunk->gfx_begin) { 412 if (chunk->gfx_begin == 0) { 413 if (ctx->cs_preamble_state) 414 ac_parse_ib(f, ctx->cs_preamble_state->pm4, ctx->cs_preamble_state->ndw, NULL, 0, 415 "IB2: Init config", ctx->gfx_level, NULL, NULL); 416 } 417 418 if (scs->flushed) { 419 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, chunk->gfx_end - chunk->gfx_begin, 420 &last_trace_id, map ? 1 : 0, "IB", ctx->gfx_level, NULL, NULL); 421 } else { 422 si_parse_current_ib(f, &ctx->gfx_cs, chunk->gfx_begin, chunk->gfx_end, &last_trace_id, 423 map ? 1 : 0, "IB", ctx->gfx_level); 424 } 425 } 426 427 if (chunk->dump_bo_list) { 428 fprintf(f, "Flushing. Time: "); 429 util_dump_ns(f, scs->time_flush); 430 fprintf(f, "\n\n"); 431 si_dump_bo_list(ctx, &scs->gfx, f); 432 } 433} 434 435static const struct u_log_chunk_type si_log_chunk_type_cs = { 436 .destroy = si_log_chunk_type_cs_destroy, 437 .print = si_log_chunk_type_cs_print, 438}; 439 440static void si_log_cs(struct si_context *ctx, struct u_log_context *log, bool dump_bo_list) 441{ 442 assert(ctx->current_saved_cs); 443 444 struct si_saved_cs *scs = ctx->current_saved_cs; 445 unsigned gfx_cur = ctx->gfx_cs.prev_dw + ctx->gfx_cs.current.cdw; 446 447 if (!dump_bo_list && gfx_cur == scs->gfx_last_dw) 448 return; 449 450 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk)); 451 452 chunk->ctx = ctx; 453 si_saved_cs_reference(&chunk->cs, scs); 454 chunk->dump_bo_list = dump_bo_list; 455 456 chunk->gfx_begin = scs->gfx_last_dw; 457 chunk->gfx_end = gfx_cur; 458 scs->gfx_last_dw = gfx_cur; 459 460 u_log_chunk(log, &si_log_chunk_type_cs, chunk); 461} 462 463void si_auto_log_cs(void *data, struct u_log_context *log) 464{ 465 struct si_context *ctx = (struct si_context *)data; 466 si_log_cs(ctx, log, false); 467} 468 469void si_log_hw_flush(struct si_context *sctx) 470{ 471 if (!sctx->log) 472 return; 473 474 si_log_cs(sctx, sctx->log, true); 475 476 if (&sctx->b == sctx->screen->aux_context) { 477 /* The aux context isn't captured by the ddebug wrapper, 478 * so we dump it on a flush-by-flush basis here. 479 */ 480 FILE *f = dd_get_debug_file(false); 481 if (!f) { 482 fprintf(stderr, "radeonsi: error opening aux context dump file.\n"); 483 } else { 484 dd_write_header(f, &sctx->screen->b, 0); 485 486 fprintf(f, "Aux context dump:\n\n"); 487 u_log_new_page_print(sctx->log, f); 488 489 fclose(f); 490 } 491 } 492} 493 494static const char *priority_to_string(unsigned priority) 495{ 496#define ITEM(x) if (priority == RADEON_PRIO_##x) return #x 497 ITEM(FENCE_TRACE); 498 ITEM(SO_FILLED_SIZE); 499 ITEM(QUERY); 500 ITEM(IB); 501 ITEM(DRAW_INDIRECT); 502 ITEM(INDEX_BUFFER); 503 ITEM(CP_DMA); 504 ITEM(BORDER_COLORS); 505 ITEM(CONST_BUFFER); 506 ITEM(DESCRIPTORS); 507 ITEM(SAMPLER_BUFFER); 508 ITEM(VERTEX_BUFFER); 509 ITEM(SHADER_RW_BUFFER); 510 ITEM(SAMPLER_TEXTURE); 511 ITEM(SHADER_RW_IMAGE); 512 ITEM(SAMPLER_TEXTURE_MSAA); 513 ITEM(COLOR_BUFFER); 514 ITEM(DEPTH_BUFFER); 515 ITEM(COLOR_BUFFER_MSAA); 516 ITEM(DEPTH_BUFFER_MSAA); 517 ITEM(SEPARATE_META); 518 ITEM(SHADER_BINARY); 519 ITEM(SHADER_RINGS); 520 ITEM(SCRATCH_BUFFER); 521#undef ITEM 522 523 return ""; 524} 525 526static int bo_list_compare_va(const struct radeon_bo_list_item *a, 527 const struct radeon_bo_list_item *b) 528{ 529 return a->vm_address < b->vm_address ? -1 : a->vm_address > b->vm_address ? 1 : 0; 530} 531 532static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_cs *saved, FILE *f) 533{ 534 unsigned i, j; 535 536 if (!saved->bo_list) 537 return; 538 539 /* Sort the list according to VM adddresses first. */ 540 qsort(saved->bo_list, saved->bo_count, sizeof(saved->bo_list[0]), (void *)bo_list_compare_va); 541 542 fprintf(f, "Buffer list (in units of pages = 4kB):\n" COLOR_YELLOW 543 " Size VM start page " 544 "VM end page Usage" COLOR_RESET "\n"); 545 546 for (i = 0; i < saved->bo_count; i++) { 547 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */ 548 const unsigned page_size = sctx->screen->info.gart_page_size; 549 uint64_t va = saved->bo_list[i].vm_address; 550 uint64_t size = saved->bo_list[i].bo_size; 551 bool hit = false; 552 553 /* If there's unused virtual memory between 2 buffers, print it. */ 554 if (i) { 555 uint64_t previous_va_end = 556 saved->bo_list[i - 1].vm_address + saved->bo_list[i - 1].bo_size; 557 558 if (va > previous_va_end) { 559 fprintf(f, " %10" PRIu64 " -- hole --\n", (va - previous_va_end) / page_size); 560 } 561 } 562 563 /* Print the buffer. */ 564 fprintf(f, " %10" PRIu64 " 0x%013" PRIX64 " 0x%013" PRIX64 " ", 565 size / page_size, va / page_size, (va + size) / page_size); 566 567 /* Print the usage. */ 568 for (j = 0; j < 32; j++) { 569 if (!(saved->bo_list[i].priority_usage & (1u << j))) 570 continue; 571 572 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(1u << j)); 573 hit = true; 574 } 575 fprintf(f, "\n"); 576 } 577 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n" 578 " Other buffers can still be allocated there.\n\n"); 579} 580 581static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) 582{ 583 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 584 struct si_texture *tex; 585 int i; 586 587 for (i = 0; i < state->nr_cbufs; i++) { 588 if (!state->cbufs[i]) 589 continue; 590 591 tex = (struct si_texture *)state->cbufs[i]->texture; 592 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); 593 si_print_texture_info(sctx->screen, tex, log); 594 u_log_printf(log, "\n"); 595 } 596 597 if (state->zsbuf) { 598 tex = (struct si_texture *)state->zsbuf->texture; 599 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); 600 si_print_texture_info(sctx->screen, tex, log); 601 u_log_printf(log, "\n"); 602 } 603} 604 605typedef unsigned (*slot_remap_func)(unsigned); 606 607struct si_log_chunk_desc_list { 608 /** Pointer to memory map of buffer where the list is uploader */ 609 uint32_t *gpu_list; 610 /** Reference of buffer where the list is uploaded, so that gpu_list 611 * is kept live. */ 612 struct si_resource *buf; 613 614 const char *shader_name; 615 const char *elem_name; 616 slot_remap_func slot_remap; 617 enum amd_gfx_level gfx_level; 618 unsigned element_dw_size; 619 unsigned num_elements; 620 621 uint32_t list[0]; 622}; 623 624static void si_log_chunk_desc_list_destroy(void *data) 625{ 626 struct si_log_chunk_desc_list *chunk = data; 627 si_resource_reference(&chunk->buf, NULL); 628 FREE(chunk); 629} 630 631static void si_log_chunk_desc_list_print(void *data, FILE *f) 632{ 633 struct si_log_chunk_desc_list *chunk = data; 634 unsigned sq_img_rsrc_word0 = 635 chunk->gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; 636 637 for (unsigned i = 0; i < chunk->num_elements; i++) { 638 unsigned cpu_dw_offset = i * chunk->element_dw_size; 639 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size; 640 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list"; 641 uint32_t *cpu_list = chunk->list + cpu_dw_offset; 642 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list; 643 644 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", chunk->shader_name, 645 chunk->elem_name, i, list_note); 646 647 switch (chunk->element_dw_size) { 648 case 4: 649 for (unsigned j = 0; j < 4; j++) 650 ac_dump_reg(f, chunk->gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[j], 651 0xffffffff); 652 break; 653 case 8: 654 for (unsigned j = 0; j < 8; j++) 655 ac_dump_reg(f, chunk->gfx_level, sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff); 656 657 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 658 for (unsigned j = 0; j < 4; j++) 659 ac_dump_reg(f, chunk->gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j], 660 0xffffffff); 661 break; 662 case 16: 663 for (unsigned j = 0; j < 8; j++) 664 ac_dump_reg(f, chunk->gfx_level, sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff); 665 666 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 667 for (unsigned j = 0; j < 4; j++) 668 ac_dump_reg(f, chunk->gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j], 669 0xffffffff); 670 671 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 672 for (unsigned j = 0; j < 8; j++) 673 ac_dump_reg(f, chunk->gfx_level, sq_img_rsrc_word0 + j * 4, gpu_list[8 + j], 674 0xffffffff); 675 676 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 677 for (unsigned j = 0; j < 4; j++) 678 ac_dump_reg(f, chunk->gfx_level, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, gpu_list[12 + j], 679 0xffffffff); 680 break; 681 } 682 683 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) { 684 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" COLOR_RESET "\n"); 685 } 686 687 fprintf(f, "\n"); 688 } 689} 690 691static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = { 692 .destroy = si_log_chunk_desc_list_destroy, 693 .print = si_log_chunk_desc_list_print, 694}; 695 696static void si_dump_descriptor_list(struct si_screen *screen, struct si_descriptors *desc, 697 const char *shader_name, const char *elem_name, 698 unsigned element_dw_size, unsigned num_elements, 699 slot_remap_func slot_remap, struct u_log_context *log) 700{ 701 if (!desc->list) 702 return; 703 704 /* In some cases, the caller doesn't know how many elements are really 705 * uploaded. Reduce num_elements to fit in the range of active slots. */ 706 unsigned active_range_dw_begin = desc->first_active_slot * desc->element_dw_size; 707 unsigned active_range_dw_end = 708 active_range_dw_begin + desc->num_active_slots * desc->element_dw_size; 709 710 while (num_elements > 0) { 711 int i = slot_remap(num_elements - 1); 712 unsigned dw_begin = i * element_dw_size; 713 unsigned dw_end = dw_begin + element_dw_size; 714 715 if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end) 716 break; 717 718 num_elements--; 719 } 720 721 struct si_log_chunk_desc_list *chunk = 722 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 4 * element_dw_size * num_elements); 723 chunk->shader_name = shader_name; 724 chunk->elem_name = elem_name; 725 chunk->element_dw_size = element_dw_size; 726 chunk->num_elements = num_elements; 727 chunk->slot_remap = slot_remap; 728 chunk->gfx_level = screen->info.gfx_level; 729 730 si_resource_reference(&chunk->buf, desc->buffer); 731 chunk->gpu_list = desc->gpu_list; 732 733 for (unsigned i = 0; i < num_elements; ++i) { 734 memcpy(&chunk->list[i * element_dw_size], &desc->list[slot_remap(i) * element_dw_size], 735 4 * element_dw_size); 736 } 737 738 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk); 739} 740 741static unsigned si_identity(unsigned slot) 742{ 743 return slot; 744} 745 746static void si_dump_descriptors(struct si_context *sctx, gl_shader_stage stage, 747 const struct si_shader_info *info, struct u_log_context *log) 748{ 749 enum pipe_shader_type processor = pipe_shader_type_from_mesa(stage); 750 struct si_descriptors *descs = 751 &sctx->descriptors[SI_DESCS_FIRST_SHADER + processor * SI_NUM_SHADER_DESCS]; 752 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"}; 753 const char *name = shader_name[processor]; 754 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers; 755 unsigned enabled_images; 756 757 if (info) { 758 enabled_constbuf = u_bit_consecutive(0, info->base.num_ubos); 759 enabled_shaderbuf = u_bit_consecutive(0, info->base.num_ssbos); 760 enabled_samplers = info->base.textures_used[0]; 761 enabled_images = u_bit_consecutive(0, info->base.num_images); 762 } else { 763 enabled_constbuf = 764 sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS; 765 enabled_shaderbuf = 0; 766 for (int i = 0; i < SI_NUM_SHADER_BUFFERS; i++) { 767 enabled_shaderbuf |= 768 (sctx->const_and_shader_buffers[processor].enabled_mask & 769 1llu << (SI_NUM_SHADER_BUFFERS - i - 1)) << i; 770 } 771 enabled_samplers = sctx->samplers[processor].enabled_mask; 772 enabled_images = sctx->images[processor].enabled_mask; 773 } 774 775 if (stage == MESA_SHADER_VERTEX && sctx->vb_descriptors_buffer && 776 sctx->vb_descriptors_gpu_list) { 777 assert(info); /* only CS may not have an info struct */ 778 struct si_descriptors desc = {}; 779 780 desc.buffer = sctx->vb_descriptors_buffer; 781 desc.list = sctx->vb_descriptors_gpu_list; 782 desc.gpu_list = sctx->vb_descriptors_gpu_list; 783 desc.element_dw_size = 4; 784 desc.num_active_slots = sctx->vertex_elements->vb_desc_list_alloc_size / 16; 785 786 si_dump_descriptor_list(sctx->screen, &desc, name, " - Vertex buffer", 4, info->num_inputs, 787 si_identity, log); 788 } 789 790 si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, 791 " - Constant buffer", 4, util_last_bit(enabled_constbuf), 792 si_get_constbuf_slot, log); 793 si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, 794 " - Shader buffer", 4, util_last_bit(enabled_shaderbuf), 795 si_get_shaderbuf_slot, log); 796 si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name, 797 " - Sampler", 16, util_last_bit(enabled_samplers), si_get_sampler_slot, 798 log); 799 si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name, 800 " - Image", 8, util_last_bit(enabled_images), si_get_image_slot, log); 801} 802 803static void si_dump_gfx_descriptors(struct si_context *sctx, 804 const struct si_shader_ctx_state *state, 805 struct u_log_context *log) 806{ 807 if (!state->cso || !state->current) 808 return; 809 810 si_dump_descriptors(sctx, state->cso->stage, &state->cso->info, log); 811} 812 813static void si_dump_compute_descriptors(struct si_context *sctx, struct u_log_context *log) 814{ 815 if (!sctx->cs_shader_state.program) 816 return; 817 818 si_dump_descriptors(sctx, MESA_SHADER_COMPUTE, NULL, log); 819} 820 821struct si_shader_inst { 822 const char *text; /* start of disassembly for this instruction */ 823 unsigned textlen; 824 unsigned size; /* instruction size = 4 or 8 */ 825 uint64_t addr; /* instruction address */ 826}; 827 828/** 829 * Open the given \p binary as \p rtld_binary and split the contained 830 * disassembly string into instructions and add them to the array 831 * pointed to by \p instructions, which must be sufficiently large. 832 * 833 * Labels are considered to be part of the following instruction. 834 * 835 * The caller must keep \p rtld_binary alive as long as \p instructions are 836 * used and then close it afterwards. 837 */ 838static void si_add_split_disasm(struct si_screen *screen, struct ac_rtld_binary *rtld_binary, 839 struct si_shader_binary *binary, uint64_t *addr, unsigned *num, 840 struct si_shader_inst *instructions, 841 gl_shader_stage stage, unsigned wave_size) 842{ 843 if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){ 844 .info = &screen->info, 845 .shader_type = stage, 846 .wave_size = wave_size, 847 .num_parts = 1, 848 .elf_ptrs = &binary->elf_buffer, 849 .elf_sizes = &binary->elf_size})) 850 return; 851 852 const char *disasm; 853 size_t nbytes; 854 if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm", &disasm, &nbytes)) 855 return; 856 857 const char *end = disasm + nbytes; 858 while (disasm < end) { 859 const char *semicolon = memchr(disasm, ';', end - disasm); 860 if (!semicolon) 861 break; 862 863 struct si_shader_inst *inst = &instructions[(*num)++]; 864 const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1); 865 if (!inst_end) 866 inst_end = end; 867 868 inst->text = disasm; 869 inst->textlen = inst_end - disasm; 870 871 inst->addr = *addr; 872 /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 873 inst->size = inst_end - semicolon > 16 ? 8 : 4; 874 *addr += inst->size; 875 876 if (inst_end == end) 877 break; 878 disasm = inst_end + 1; 879 } 880} 881 882/* If the shader is being executed, print its asm instructions, and annotate 883 * those that are being executed right now with information about waves that 884 * execute them. This is most useful during a GPU hang. 885 */ 886static void si_print_annotated_shader(struct si_shader *shader, struct ac_wave_info *waves, 887 unsigned num_waves, FILE *f) 888{ 889 if (!shader) 890 return; 891 892 struct si_screen *screen = shader->selector->screen; 893 gl_shader_stage stage = shader->selector->stage; 894 uint64_t start_addr = shader->bo->gpu_address; 895 uint64_t end_addr = start_addr + shader->bo->b.b.width0; 896 unsigned i; 897 898 /* See if any wave executes the shader. */ 899 for (i = 0; i < num_waves; i++) { 900 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 901 break; 902 } 903 if (i == num_waves) 904 return; /* the shader is not being executed */ 905 906 /* Remember the first found wave. The waves are sorted according to PC. */ 907 waves = &waves[i]; 908 num_waves -= i; 909 910 /* Get the list of instructions. 911 * Buffer size / 4 is the upper bound of the instruction count. 912 */ 913 unsigned num_inst = 0; 914 uint64_t inst_addr = start_addr; 915 struct ac_rtld_binary rtld_binaries[5] = {}; 916 struct si_shader_inst *instructions = 917 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); 918 919 if (shader->prolog) { 920 si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary, &inst_addr, &num_inst, 921 instructions, stage, shader->wave_size); 922 } 923 if (shader->previous_stage) { 924 si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary, &inst_addr, 925 &num_inst, instructions, stage, shader->wave_size); 926 } 927 si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary, &inst_addr, &num_inst, 928 instructions, stage, shader->wave_size); 929 if (shader->epilog) { 930 si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary, &inst_addr, &num_inst, 931 instructions, stage, shader->wave_size); 932 } 933 934 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 935 si_get_shader_name(shader)); 936 937 /* Print instructions with annotations. */ 938 for (i = 0; i < num_inst; i++) { 939 struct si_shader_inst *inst = &instructions[i]; 940 941 fprintf(f, "%.*s [PC=0x%" PRIx64 ", size=%u]\n", inst->textlen, inst->text, inst->addr, 942 inst->size); 943 944 /* Print which waves execute the instruction right now. */ 945 while (num_waves && inst->addr == waves->pc) { 946 fprintf(f, 947 " " COLOR_GREEN "^ SE%u SH%u CU%u " 948 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", 949 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); 950 951 if (inst->size == 4) { 952 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); 953 } else { 954 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); 955 } 956 957 waves->matched = true; 958 waves = &waves[1]; 959 num_waves--; 960 } 961 } 962 963 fprintf(f, "\n\n"); 964 free(instructions); 965 for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i) 966 ac_rtld_close(&rtld_binaries[i]); 967} 968 969static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) 970{ 971 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 972 unsigned num_waves = ac_get_wave_info(sctx->gfx_level, waves); 973 974 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); 975 976 si_print_annotated_shader(sctx->shader.vs.current, waves, num_waves, f); 977 si_print_annotated_shader(sctx->shader.tcs.current, waves, num_waves, f); 978 si_print_annotated_shader(sctx->shader.tes.current, waves, num_waves, f); 979 si_print_annotated_shader(sctx->shader.gs.current, waves, num_waves, f); 980 si_print_annotated_shader(sctx->shader.ps.current, waves, num_waves, f); 981 982 /* Print waves executing shaders that are not currently bound. */ 983 unsigned i; 984 bool found = false; 985 for (i = 0; i < num_waves; i++) { 986 if (waves[i].matched) 987 continue; 988 989 if (!found) { 990 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); 991 found = true; 992 } 993 fprintf(f, 994 " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 995 "\n", 996 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, 997 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); 998 } 999 if (found) 1000 fprintf(f, "\n\n"); 1001} 1002 1003static void si_dump_command(const char *title, const char *command, FILE *f) 1004{ 1005 char line[2000]; 1006 1007 FILE *p = popen(command, "r"); 1008 if (!p) 1009 return; 1010 1011 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title); 1012 while (fgets(line, sizeof(line), p)) 1013 fputs(line, f); 1014 fprintf(f, "\n\n"); 1015 pclose(p); 1016} 1017 1018static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags) 1019{ 1020 struct si_context *sctx = (struct si_context *)ctx; 1021 1022 if (sctx->log) 1023 u_log_flush(sctx->log); 1024 1025 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) { 1026 si_dump_debug_registers(sctx, f); 1027 1028 si_dump_annotated_shaders(sctx, f); 1029 si_dump_command("Active waves (raw data)", "umr -O halt_waves -wa | column -t", f); 1030 si_dump_command("Wave information", "umr -O halt_waves,bits -wa", f); 1031 } 1032} 1033 1034void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) 1035{ 1036 if (!log) 1037 return; 1038 1039 si_dump_framebuffer(sctx, log); 1040 1041 si_dump_gfx_shader(sctx, &sctx->shader.vs, log); 1042 si_dump_gfx_shader(sctx, &sctx->shader.tcs, log); 1043 si_dump_gfx_shader(sctx, &sctx->shader.tes, log); 1044 si_dump_gfx_shader(sctx, &sctx->shader.gs, log); 1045 si_dump_gfx_shader(sctx, &sctx->shader.ps, log); 1046 1047 si_dump_descriptor_list(sctx->screen, &sctx->descriptors[SI_DESCS_INTERNAL], "", "RW buffers", 1048 4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity, 1049 log); 1050 si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log); 1051 si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log); 1052 si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log); 1053 si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log); 1054 si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log); 1055} 1056 1057void si_log_compute_state(struct si_context *sctx, struct u_log_context *log) 1058{ 1059 if (!log) 1060 return; 1061 1062 si_dump_compute_shader(sctx, log); 1063 si_dump_compute_descriptors(sctx, log); 1064} 1065 1066void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved, enum amd_ip_type ring) 1067{ 1068 struct pipe_screen *screen = sctx->b.screen; 1069 FILE *f; 1070 uint64_t addr; 1071 char cmd_line[4096]; 1072 1073 if (!ac_vm_fault_occured(sctx->gfx_level, &sctx->dmesg_timestamp, &addr)) 1074 return; 1075 1076 f = dd_get_debug_file(false); 1077 if (!f) 1078 return; 1079 1080 fprintf(f, "VM fault report.\n\n"); 1081 if (os_get_command_line(cmd_line, sizeof(cmd_line))) 1082 fprintf(f, "Command: %s\n", cmd_line); 1083 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); 1084 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); 1085 fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); 1086 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr); 1087 1088 if (sctx->apitrace_call_number) 1089 fprintf(f, "Last apitrace call: %u\n\n", sctx->apitrace_call_number); 1090 1091 switch (ring) { 1092 case AMD_IP_GFX: { 1093 struct u_log_context log; 1094 u_log_context_init(&log); 1095 1096 si_log_draw_state(sctx, &log); 1097 si_log_compute_state(sctx, &log); 1098 si_log_cs(sctx, &log, true); 1099 1100 u_log_new_page_print(&log, f); 1101 u_log_context_destroy(&log); 1102 break; 1103 } 1104 1105 default: 1106 break; 1107 } 1108 1109 fclose(f); 1110 1111 fprintf(stderr, "Detected a VM fault, exiting...\n"); 1112 exit(0); 1113} 1114 1115void si_init_debug_functions(struct si_context *sctx) 1116{ 1117 sctx->b.dump_debug_state = si_dump_debug_state; 1118 1119 /* Set the initial dmesg timestamp for this context, so that 1120 * only new messages will be checked for VM faults. 1121 */ 1122 if (sctx->screen->debug_flags & DBG(CHECK_VM)) 1123 ac_vm_fault_occured(sctx->gfx_level, &sctx->dmesg_timestamp, NULL); 1124} 1125