1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#include <stdio.h> 29#include <stdlib.h> 30#ifndef _WIN32 31#include <sys/utsname.h> 32#endif 33#include <sys/stat.h> 34 35#include "util/mesa-sha1.h" 36#include "util/os_time.h" 37#include "ac_debug.h" 38#include "radv_debug.h" 39#include "radv_shader.h" 40#include "sid.h" 41 42#define TRACE_BO_SIZE 4096 43#define TMA_BO_SIZE 4096 44 45#define COLOR_RESET "\033[0m" 46#define COLOR_RED "\033[31m" 47#define COLOR_GREEN "\033[1;32m" 48#define COLOR_YELLOW "\033[1;33m" 49#define COLOR_CYAN "\033[1;36m" 50 51#define RADV_DUMP_DIR "radv_dumps" 52 53/* Trace BO layout (offsets are 4 bytes): 54 * 55 * [0]: primary trace ID 56 * [1]: secondary trace ID 57 * [2-3]: 64-bit GFX ring pipeline pointer 58 * [4-5]: 64-bit COMPUTE ring pipeline pointer 59 * [6-7]: Vertex descriptors pointer 60 * [8-9]: 64-bit Vertex prolog pointer 61 * [10-11]: 64-bit descriptor set #0 pointer 62 * ... 63 * [72-73]: 64-bit descriptor set #31 pointer 64 */ 65 66bool 67radv_init_trace(struct radv_device *device) 68{ 69 struct radeon_winsys *ws = device->ws; 70 VkResult result; 71 72 result = ws->buffer_create( 73 ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, 74 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | 75 RADEON_FLAG_VA_UNCACHED, RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo); 76 if (result != VK_SUCCESS) 77 return false; 78 79 result = ws->buffer_make_resident(ws, device->trace_bo, true); 80 if (result != VK_SUCCESS) 81 return false; 82 83 device->trace_id_ptr = ws->buffer_map(device->trace_bo); 84 if (!device->trace_id_ptr) 85 return false; 86 87 ac_vm_fault_occured(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, NULL); 88 89 return true; 90} 91 92void 93radv_finish_trace(struct radv_device *device) 94{ 95 struct radeon_winsys *ws = device->ws; 96 97 if (unlikely(device->trace_bo)) { 98 ws->buffer_make_resident(ws, device->trace_bo, false); 99 ws->buffer_destroy(ws, device->trace_bo); 100 } 101} 102 103static void 104radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f) 105{ 106 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); 107 device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2); 108} 109 110static void 111radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset) 112{ 113 struct radeon_winsys *ws = device->ws; 114 uint32_t value; 115 116 if (ws->read_registers(ws, offset, 1, &value)) 117 ac_dump_reg(f, device->physical_device->rad_info.gfx_level, offset, value, ~0); 118} 119 120static void 121radv_dump_debug_registers(struct radv_device *device, FILE *f) 122{ 123 struct radeon_info *info = &device->physical_device->rad_info; 124 125 fprintf(f, "Memory-mapped registers:\n"); 126 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); 127 128 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2); 129 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0); 130 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1); 131 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2); 132 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); 133 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); 134 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); 135 if (info->gfx_level <= GFX8) { 136 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); 137 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); 138 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); 139 } 140 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT); 141 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1); 142 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2); 143 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3); 144 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS); 145 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT); 146 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1); 147 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS); 148 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT); 149 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1); 150 fprintf(f, "\n"); 151} 152 153static void 154radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f) 155{ 156 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 157 for (unsigned j = 0; j < 4; j++) 158 ac_dump_reg(f, gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff); 159} 160 161static void 162radv_dump_image_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f) 163{ 164 unsigned sq_img_rsrc_word0 = 165 gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; 166 167 fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); 168 for (unsigned j = 0; j < 8; j++) 169 ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff); 170 171 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 172 for (unsigned j = 0; j < 8; j++) 173 ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff); 174} 175 176static void 177radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f) 178{ 179 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 180 for (unsigned j = 0; j < 4; j++) { 181 ac_dump_reg(f, gfx_level, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff); 182 } 183} 184 185static void 186radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, 187 FILE *f) 188{ 189 radv_dump_image_descriptor(gfx_level, desc, f); 190 radv_dump_sampler_descriptor(gfx_level, desc + 16, f); 191} 192 193static void 194radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id, 195 FILE *f) 196{ 197 enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; 198 const struct radv_descriptor_set_layout *layout; 199 int i; 200 201 if (!set) 202 return; 203 layout = set->header.layout; 204 205 for (i = 0; i < set->header.layout->binding_count; i++) { 206 uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4; 207 208 switch (layout->binding[i].type) { 209 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 210 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 211 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 212 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 213 radv_dump_buffer_descriptor(gfx_level, desc, f); 214 break; 215 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 216 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 217 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 218 radv_dump_image_descriptor(gfx_level, desc, f); 219 break; 220 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 221 radv_dump_combined_image_sampler_descriptor(gfx_level, desc, f); 222 break; 223 case VK_DESCRIPTOR_TYPE_SAMPLER: 224 radv_dump_sampler_descriptor(gfx_level, desc, f); 225 break; 226 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 227 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 228 case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: 229 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: 230 /* todo */ 231 break; 232 default: 233 assert(!"unknown descriptor type"); 234 break; 235 } 236 fprintf(f, "\n"); 237 } 238 fprintf(f, "\n\n"); 239} 240 241static void 242radv_dump_descriptors(struct radv_device *device, FILE *f) 243{ 244 uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 245 int i; 246 247 fprintf(f, "Descriptors:\n"); 248 for (i = 0; i < MAX_SETS; i++) { 249 struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 5); 250 251 radv_dump_descriptor_set(device, set, i, f); 252 } 253} 254 255struct radv_shader_inst { 256 char text[160]; /* one disasm line */ 257 unsigned offset; /* instruction offset */ 258 unsigned size; /* instruction size = 4 or 8 */ 259}; 260 261/* Split a disassembly string into lines and add them to the array pointed 262 * to by "instructions". */ 263static void 264si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, 265 struct radv_shader_inst *instructions) 266{ 267 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; 268 char *next; 269 270 while ((next = strchr(disasm, '\n'))) { 271 struct radv_shader_inst *inst = &instructions[*num]; 272 unsigned len = next - disasm; 273 274 if (!memchr(disasm, ';', len)) { 275 /* Ignore everything that is not an instruction. */ 276 disasm = next + 1; 277 continue; 278 } 279 280 assert(len < ARRAY_SIZE(inst->text)); 281 memcpy(inst->text, disasm, len); 282 inst->text[len] = 0; 283 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; 284 285 const char *semicolon = strchr(disasm, ';'); 286 assert(semicolon); 287 /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 288 inst->size = next - semicolon > 16 ? 8 : 4; 289 290 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, 291 " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset, 292 inst->size); 293 294 last_inst = inst; 295 (*num)++; 296 disasm = next + 1; 297 } 298} 299 300static void 301radv_dump_annotated_shader(struct radv_shader *shader, gl_shader_stage stage, 302 struct ac_wave_info *waves, unsigned num_waves, FILE *f) 303{ 304 uint64_t start_addr, end_addr; 305 unsigned i; 306 307 if (!shader) 308 return; 309 310 start_addr = radv_shader_get_va(shader); 311 end_addr = start_addr + shader->code_size; 312 313 /* See if any wave executes the shader. */ 314 for (i = 0; i < num_waves; i++) { 315 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 316 break; 317 } 318 319 if (i == num_waves) 320 return; /* the shader is not being executed */ 321 322 /* Remember the first found wave. The waves are sorted according to PC. */ 323 waves = &waves[i]; 324 num_waves -= i; 325 326 /* Get the list of instructions. 327 * Buffer size / 4 is the upper bound of the instruction count. 328 */ 329 unsigned num_inst = 0; 330 struct radv_shader_inst *instructions = 331 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); 332 333 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); 334 335 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 336 radv_get_shader_name(&shader->info, stage)); 337 338 /* Print instructions with annotations. */ 339 for (i = 0; i < num_inst; i++) { 340 struct radv_shader_inst *inst = &instructions[i]; 341 342 fprintf(f, "%s\n", inst->text); 343 344 /* Print which waves execute the instruction right now. */ 345 while (num_waves && start_addr + inst->offset == waves->pc) { 346 fprintf(f, 347 " " COLOR_GREEN "^ SE%u SH%u CU%u " 348 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", 349 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); 350 351 if (inst->size == 4) { 352 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); 353 } else { 354 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); 355 } 356 357 waves->matched = true; 358 waves = &waves[1]; 359 num_waves--; 360 } 361 } 362 363 fprintf(f, "\n\n"); 364 free(instructions); 365} 366 367static void 368radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, 369 FILE *f) 370{ 371 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 372 enum amd_gfx_level gfx_level = pipeline->device->physical_device->rad_info.gfx_level; 373 unsigned num_waves = ac_get_wave_info(gfx_level, waves); 374 375 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); 376 377 /* Dump annotated active graphics shaders. */ 378 unsigned stages = active_stages; 379 while (stages) { 380 int stage = u_bit_scan(&stages); 381 382 radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f); 383 } 384 385 /* Print waves executing shaders that are not currently bound. */ 386 unsigned i; 387 bool found = false; 388 for (i = 0; i < num_waves; i++) { 389 if (waves[i].matched) 390 continue; 391 392 if (!found) { 393 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); 394 found = true; 395 } 396 fprintf(f, 397 " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 398 "\n", 399 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, 400 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); 401 } 402 if (found) 403 fprintf(f, "\n\n"); 404} 405 406static void 407radv_dump_spirv(struct radv_shader *shader, const char *sha1, const char *dump_dir) 408{ 409 char dump_path[512]; 410 FILE *f; 411 412 snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1); 413 414 f = fopen(dump_path, "w+"); 415 if (f) { 416 fwrite(shader->spirv, shader->spirv_size, 1, f); 417 fclose(f); 418 } 419} 420 421static void 422radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader *shader, 423 gl_shader_stage stage, const char *dump_dir, FILE *f) 424{ 425 if (!shader) 426 return; 427 428 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage)); 429 430 if (shader->spirv) { 431 unsigned char sha1[21]; 432 char sha1buf[41]; 433 434 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1); 435 _mesa_sha1_format(sha1buf, sha1); 436 437 fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf); 438 radv_dump_spirv(shader, sha1buf, dump_dir); 439 } 440 441 if (shader->nir_string) { 442 fprintf(f, "NIR:\n%s\n", shader->nir_string); 443 } 444 445 fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO", 446 shader->ir_string); 447 fprintf(f, "DISASM:\n%s\n", shader->disasm_string); 448 449 radv_dump_shader_stats(pipeline->device, pipeline, stage, f); 450} 451 452static void 453radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, 454 const char *dump_dir, FILE *f) 455{ 456 /* Dump active graphics shaders. */ 457 unsigned stages = active_stages; 458 while (stages) { 459 int stage = u_bit_scan(&stages); 460 461 radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f); 462 } 463} 464 465static void 466radv_dump_vertex_descriptors(struct radv_graphics_pipeline *pipeline, FILE *f) 467{ 468 void *ptr = (uint64_t *)pipeline->base.device->trace_id_ptr; 469 uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask); 470 uint32_t *vb_ptr = &((uint32_t *)ptr)[3]; 471 472 if (!count) 473 return; 474 475 fprintf(f, "Num vertex %s: %d\n", 476 pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count); 477 for (uint32_t i = 0; i < count; i++) { 478 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; 479 uint64_t va = 0; 480 481 va |= desc[0]; 482 va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32; 483 484 fprintf(f, "VBO#%d:\n", i); 485 fprintf(f, "\tVA: 0x%" PRIx64 "\n", va); 486 fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1])); 487 fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]); 488 } 489} 490 491static struct radv_shader_part * 492radv_get_saved_vs_prolog(struct radv_device *device) 493{ 494 uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 495 return *(struct radv_shader_part **)(ptr + 4); 496} 497 498static void 499radv_dump_vs_prolog(struct radv_pipeline *pipeline, FILE *f) 500{ 501 struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(pipeline->device); 502 struct radv_shader *vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX); 503 504 if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog) 505 return; 506 507 fprintf(f, "Vertex prolog:\n\n"); 508 fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string); 509} 510 511static struct radv_pipeline * 512radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring) 513{ 514 uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 515 int offset = ring == AMD_IP_GFX ? 1 : 2; 516 517 return *(struct radv_pipeline **)(ptr + offset); 518} 519 520static void 521radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) 522{ 523 enum amd_ip_type ring = radv_queue_ring(queue); 524 struct radv_pipeline *pipeline; 525 526 fprintf(f, "AMD_IP_%s:\n", ring == AMD_IP_GFX ? "GFX" : "COMPUTE"); 527 528 pipeline = radv_get_saved_pipeline(queue->device, ring); 529 if (pipeline) { 530 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 531 VkShaderStageFlags active_stages; 532 533 if (pipeline->type == RADV_PIPELINE_GRAPHICS) { 534 active_stages = graphics_pipeline->active_stages; 535 } else { 536 active_stages = VK_SHADER_STAGE_COMPUTE_BIT; 537 } 538 539 radv_dump_vs_prolog(pipeline, f); 540 radv_dump_shaders(pipeline, active_stages, dump_dir, f); 541 if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) 542 radv_dump_annotated_shaders(pipeline, active_stages, f); 543 radv_dump_vertex_descriptors(graphics_pipeline, f); 544 radv_dump_descriptors(queue->device, f); 545 } 546} 547 548static void 549radv_dump_cmd(const char *cmd, FILE *f) 550{ 551#ifndef _WIN32 552 char line[2048]; 553 FILE *p; 554 555 p = popen(cmd, "r"); 556 if (p) { 557 while (fgets(line, sizeof(line), p)) 558 fputs(line, f); 559 fprintf(f, "\n"); 560 pclose(p); 561 } 562#endif 563} 564 565static void 566radv_dump_dmesg(FILE *f) 567{ 568 fprintf(f, "\nLast 60 lines of dmesg:\n\n"); 569 radv_dump_cmd("dmesg | tail -n60", f); 570} 571 572void 573radv_dump_enabled_options(struct radv_device *device, FILE *f) 574{ 575 uint64_t mask; 576 577 if (device->instance->debug_flags) { 578 fprintf(f, "Enabled debug options: "); 579 580 mask = device->instance->debug_flags; 581 while (mask) { 582 int i = u_bit_scan64(&mask); 583 fprintf(f, "%s, ", radv_get_debug_option_name(i)); 584 } 585 fprintf(f, "\n"); 586 } 587 588 if (device->instance->perftest_flags) { 589 fprintf(f, "Enabled perftest options: "); 590 591 mask = device->instance->perftest_flags; 592 while (mask) { 593 int i = u_bit_scan64(&mask); 594 fprintf(f, "%s, ", radv_get_perftest_option_name(i)); 595 } 596 fprintf(f, "\n"); 597 } 598} 599 600static void 601radv_dump_app_info(struct radv_device *device, FILE *f) 602{ 603 struct radv_instance *instance = device->instance; 604 605 fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name); 606 fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version); 607 fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name); 608 fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version); 609 fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version), 610 VK_VERSION_MINOR(instance->vk.app_info.api_version), 611 VK_VERSION_PATCH(instance->vk.app_info.api_version)); 612 613 radv_dump_enabled_options(device, f); 614} 615 616static void 617radv_dump_device_name(struct radv_device *device, FILE *f) 618{ 619 struct radeon_info *info = &device->physical_device->rad_info; 620#ifndef _WIN32 621 char kernel_version[128] = {0}; 622 struct utsname uname_data; 623#endif 624 625#ifdef _WIN32 626 fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, 627 info->drm_major, info->drm_minor, info->drm_patchlevel); 628#else 629 if (uname(&uname_data) == 0) 630 snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); 631 632 fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, 633 info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version); 634#endif 635} 636 637static void 638radv_dump_umr_ring(struct radv_queue *queue, FILE *f) 639{ 640 enum amd_ip_type ring = radv_queue_ring(queue); 641 struct radv_device *device = queue->device; 642 char cmd[128]; 643 644 /* TODO: Dump compute ring. */ 645 if (ring != AMD_IP_GFX) 646 return; 647 648 sprintf(cmd, "umr -R %s 2>&1", 649 device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); 650 651 fprintf(f, "\nUMR GFX ring:\n\n"); 652 radv_dump_cmd(cmd, f); 653} 654 655static void 656radv_dump_umr_waves(struct radv_queue *queue, FILE *f) 657{ 658 enum amd_ip_type ring = radv_queue_ring(queue); 659 struct radv_device *device = queue->device; 660 char cmd[128]; 661 662 /* TODO: Dump compute ring. */ 663 if (ring != AMD_IP_GFX) 664 return; 665 666 sprintf(cmd, "umr -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", 667 device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); 668 669 fprintf(f, "\nUMR GFX waves:\n\n"); 670 radv_dump_cmd(cmd, f); 671} 672 673static bool 674radv_gpu_hang_occured(struct radv_queue *queue, enum amd_ip_type ring) 675{ 676 struct radeon_winsys *ws = queue->device->ws; 677 678 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family)) 679 return true; 680 681 return false; 682} 683 684void 685radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) 686{ 687 struct radv_device *device = queue->device; 688 enum amd_ip_type ring; 689 uint64_t addr; 690 691 ring = radv_queue_ring(queue); 692 693 bool hang_occurred = radv_gpu_hang_occured(queue, ring); 694 bool vm_fault_occurred = false; 695 if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) 696 vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.gfx_level, 697 &device->dmesg_timestamp, &addr); 698 if (!hang_occurred && !vm_fault_occurred) 699 return; 700 701 fprintf(stderr, "radv: GPU hang detected...\n"); 702 703#ifndef _WIN32 704 /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save 705 * various debugging info about that GPU hang. 706 */ 707 struct tm *timep, result; 708 time_t raw_time; 709 FILE *f; 710 char dump_dir[256], dump_path[512], buf_time[128]; 711 712 time(&raw_time); 713 timep = os_localtime(&raw_time, &result); 714 strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep); 715 716 snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), 717 getpid(), buf_time); 718 if (mkdir(dump_dir, 0774) && errno != EEXIST) { 719 fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno); 720 abort(); 721 } 722 723 fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir); 724 725 /* Dump trace file. */ 726 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log"); 727 f = fopen(dump_path, "w+"); 728 if (f) { 729 radv_dump_trace(queue->device, cs, f); 730 fclose(f); 731 } 732 733 /* Dump pipeline state. */ 734 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log"); 735 f = fopen(dump_path, "w+"); 736 if (f) { 737 radv_dump_queue_state(queue, dump_dir, f); 738 fclose(f); 739 } 740 741 if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) { 742 /* Dump UMR waves. */ 743 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log"); 744 f = fopen(dump_path, "w+"); 745 if (f) { 746 radv_dump_umr_waves(queue, f); 747 fclose(f); 748 } 749 750 /* Dump UMR ring. */ 751 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log"); 752 f = fopen(dump_path, "w+"); 753 if (f) { 754 radv_dump_umr_ring(queue, f); 755 fclose(f); 756 } 757 } 758 759 /* Dump debug registers. */ 760 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log"); 761 f = fopen(dump_path, "w+"); 762 if (f) { 763 radv_dump_debug_registers(device, f); 764 fclose(f); 765 } 766 767 /* Dump BO ranges. */ 768 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log"); 769 f = fopen(dump_path, "w+"); 770 if (f) { 771 device->ws->dump_bo_ranges(device->ws, f); 772 fclose(f); 773 } 774 775 /* Dump BO log. */ 776 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log"); 777 f = fopen(dump_path, "w+"); 778 if (f) { 779 device->ws->dump_bo_log(device->ws, f); 780 fclose(f); 781 } 782 783 /* Dump VM fault info. */ 784 if (vm_fault_occurred) { 785 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log"); 786 f = fopen(dump_path, "w+"); 787 if (f) { 788 fprintf(f, "VM fault report.\n\n"); 789 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr); 790 fclose(f); 791 } 792 } 793 794 /* Dump app info. */ 795 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log"); 796 f = fopen(dump_path, "w+"); 797 if (f) { 798 radv_dump_app_info(device, f); 799 fclose(f); 800 } 801 802 /* Dump GPU info. */ 803 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log"); 804 f = fopen(dump_path, "w+"); 805 if (f) { 806 radv_dump_device_name(device, f); 807 ac_print_gpu_info(&device->physical_device->rad_info, f); 808 fclose(f); 809 } 810 811 /* Dump dmesg. */ 812 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log"); 813 f = fopen(dump_path, "w+"); 814 if (f) { 815 radv_dump_dmesg(f); 816 fclose(f); 817 } 818#endif 819 820 fprintf(stderr, "radv: GPU hang report saved successfully!\n"); 821 abort(); 822} 823 824void 825radv_print_spirv(const char *data, uint32_t size, FILE *fp) 826{ 827#ifndef _WIN32 828 char path[] = "/tmp/fileXXXXXX"; 829 char command[128]; 830 int fd; 831 832 /* Dump the binary into a temporary file. */ 833 fd = mkstemp(path); 834 if (fd < 0) 835 return; 836 837 if (write(fd, data, size) == -1) 838 goto fail; 839 840 /* Disassemble using spirv-dis if installed. */ 841 sprintf(command, "spirv-dis %s", path); 842 radv_dump_cmd(command, fp); 843 844fail: 845 close(fd); 846 unlink(path); 847#endif 848} 849 850bool 851radv_trap_handler_init(struct radv_device *device) 852{ 853 struct radeon_winsys *ws = device->ws; 854 VkResult result; 855 856 /* Create the trap handler shader and upload it like other shaders. */ 857 device->trap_handler_shader = radv_create_trap_handler_shader(device); 858 if (!device->trap_handler_shader) { 859 fprintf(stderr, "radv: failed to create the trap handler shader.\n"); 860 return false; 861 } 862 863 result = ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, true); 864 if (result != VK_SUCCESS) 865 return false; 866 867 result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM, 868 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | 869 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT, 870 RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo); 871 if (result != VK_SUCCESS) 872 return false; 873 874 result = ws->buffer_make_resident(ws, device->tma_bo, true); 875 if (result != VK_SUCCESS) 876 return false; 877 878 device->tma_ptr = ws->buffer_map(device->tma_bo); 879 if (!device->tma_ptr) 880 return false; 881 882 /* Upload a buffer descriptor to store various info from the trap. */ 883 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16; 884 uint32_t desc[4]; 885 886 desc[0] = tma_va; 887 desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32); 888 desc[2] = TMA_BO_SIZE; 889 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 890 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 891 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 892 893 memcpy(device->tma_ptr, desc, sizeof(desc)); 894 895 return true; 896} 897 898void 899radv_trap_handler_finish(struct radv_device *device) 900{ 901 struct radeon_winsys *ws = device->ws; 902 903 if (unlikely(device->trap_handler_shader)) { 904 ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, false); 905 radv_trap_handler_shader_destroy(device, device->trap_handler_shader); 906 } 907 908 if (unlikely(device->tma_bo)) { 909 ws->buffer_make_resident(ws, device->tma_bo, false); 910 ws->buffer_destroy(ws, device->tma_bo); 911 } 912} 913 914static void 915radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc) 916{ 917 struct radv_shader *shader; 918 uint64_t start_addr, end_addr; 919 uint32_t instr_offset; 920 921 shader = radv_find_shader(device, faulty_pc); 922 if (!shader) 923 return; 924 925 start_addr = radv_shader_get_va(shader); 926 end_addr = start_addr + shader->code_size; 927 instr_offset = faulty_pc - start_addr; 928 929 fprintf(stderr, 930 "Faulty shader found " 931 "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n", 932 start_addr, end_addr, instr_offset); 933 934 /* Get the list of instructions. 935 * Buffer size / 4 is the upper bound of the instruction count. 936 */ 937 unsigned num_inst = 0; 938 struct radv_shader_inst *instructions = 939 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); 940 941 /* Split the disassembly string into instructions. */ 942 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); 943 944 /* Print instructions with annotations. */ 945 for (unsigned i = 0; i < num_inst; i++) { 946 struct radv_shader_inst *inst = &instructions[i]; 947 948 if (start_addr + inst->offset == faulty_pc) { 949 fprintf(stderr, "\n!!! Faulty instruction below !!!\n"); 950 fprintf(stderr, "%s\n", inst->text); 951 fprintf(stderr, "\n"); 952 } else { 953 fprintf(stderr, "%s\n", inst->text); 954 } 955 } 956 957 free(instructions); 958} 959 960struct radv_sq_hw_reg { 961 uint32_t status; 962 uint32_t trap_sts; 963 uint32_t hw_id; 964 uint32_t ib_sts; 965}; 966 967static void 968radv_dump_sq_hw_regs(struct radv_device *device) 969{ 970 struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6]; 971 972 fprintf(stderr, "\nHardware registers:\n"); 973 if (device->physical_device->rad_info.gfx_level >= GFX10) { 974 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000408_SQ_WAVE_STATUS, 975 regs->status, ~0); 976 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00040C_SQ_WAVE_TRAPSTS, 977 regs->trap_sts, ~0); 978 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00045C_SQ_WAVE_HW_ID1, 979 regs->hw_id, ~0); 980 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00041C_SQ_WAVE_IB_STS, 981 regs->ib_sts, ~0); 982 } else { 983 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000048_SQ_WAVE_STATUS, 984 regs->status, ~0); 985 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00004C_SQ_WAVE_TRAPSTS, 986 regs->trap_sts, ~0); 987 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000050_SQ_WAVE_HW_ID, 988 regs->hw_id, ~0); 989 ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00005C_SQ_WAVE_IB_STS, 990 regs->ib_sts, ~0); 991 } 992 fprintf(stderr, "\n\n"); 993} 994 995void 996radv_check_trap_handler(struct radv_queue *queue) 997{ 998 enum amd_ip_type ring = radv_queue_ring(queue); 999 struct radv_device *device = queue->device; 1000 struct radeon_winsys *ws = device->ws; 1001 1002 /* Wait for the context to be idle in a finite time. */ 1003 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family); 1004 1005 /* Try to detect if the trap handler has been reached by the hw by 1006 * looking at ttmp0 which should be non-zero if a shader exception 1007 * happened. 1008 */ 1009 if (!device->tma_ptr[4]) 1010 return; 1011 1012#if 0 1013 fprintf(stderr, "tma_ptr:\n"); 1014 for (unsigned i = 0; i < 10; i++) 1015 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]); 1016#endif 1017 1018 radv_dump_sq_hw_regs(device); 1019 1020 uint32_t ttmp0 = device->tma_ptr[4]; 1021 uint32_t ttmp1 = device->tma_ptr[5]; 1022 1023 /* According to the ISA docs, 3.10 Trap and Exception Registers: 1024 * 1025 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}" 1026 * 1027 * "When the trap handler is entered, the PC of the faulting 1028 * instruction is: (PC - PC_rewind * 4)." 1029 * */ 1030 uint8_t trap_id = (ttmp1 >> 16) & 0xff; 1031 uint8_t ht = (ttmp1 >> 24) & 0x1; 1032 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf; 1033 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4); 1034 1035 fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, 1036 pc_rewind); 1037 1038 radv_dump_faulty_shader(device, pc); 1039 1040 abort(); 1041} 1042