xref: /third_party/mesa3d/src/amd/vulkan/radv_debug.c (revision bf215546)
1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#include <stdio.h>
29#include <stdlib.h>
30#ifndef _WIN32
31#include <sys/utsname.h>
32#endif
33#include <sys/stat.h>
34
35#include "util/mesa-sha1.h"
36#include "util/os_time.h"
37#include "ac_debug.h"
38#include "radv_debug.h"
39#include "radv_shader.h"
40#include "sid.h"
41
42#define TRACE_BO_SIZE 4096
43#define TMA_BO_SIZE   4096
44
45#define COLOR_RESET  "\033[0m"
46#define COLOR_RED    "\033[31m"
47#define COLOR_GREEN  "\033[1;32m"
48#define COLOR_YELLOW "\033[1;33m"
49#define COLOR_CYAN   "\033[1;36m"
50
51#define RADV_DUMP_DIR "radv_dumps"
52
53/* Trace BO layout (offsets are 4 bytes):
54 *
55 * [0]: primary trace ID
56 * [1]: secondary trace ID
57 * [2-3]: 64-bit GFX ring pipeline pointer
58 * [4-5]: 64-bit COMPUTE ring pipeline pointer
59 * [6-7]: Vertex descriptors pointer
60 * [8-9]: 64-bit Vertex prolog pointer
61 * [10-11]: 64-bit descriptor set #0 pointer
62 * ...
63 * [72-73]: 64-bit descriptor set #31 pointer
64 */
65
66bool
67radv_init_trace(struct radv_device *device)
68{
69   struct radeon_winsys *ws = device->ws;
70   VkResult result;
71
72   result = ws->buffer_create(
73      ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
74      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM |
75      RADEON_FLAG_VA_UNCACHED, RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
76   if (result != VK_SUCCESS)
77      return false;
78
79   result = ws->buffer_make_resident(ws, device->trace_bo, true);
80   if (result != VK_SUCCESS)
81      return false;
82
83   device->trace_id_ptr = ws->buffer_map(device->trace_bo);
84   if (!device->trace_id_ptr)
85      return false;
86
87   ac_vm_fault_occured(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, NULL);
88
89   return true;
90}
91
92void
93radv_finish_trace(struct radv_device *device)
94{
95   struct radeon_winsys *ws = device->ws;
96
97   if (unlikely(device->trace_bo)) {
98      ws->buffer_make_resident(ws, device->trace_bo, false);
99      ws->buffer_destroy(ws, device->trace_bo);
100   }
101}
102
103static void
104radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
105{
106   fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
107   device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
108}
109
110static void
111radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
112{
113   struct radeon_winsys *ws = device->ws;
114   uint32_t value;
115
116   if (ws->read_registers(ws, offset, 1, &value))
117      ac_dump_reg(f, device->physical_device->rad_info.gfx_level, offset, value, ~0);
118}
119
120static void
121radv_dump_debug_registers(struct radv_device *device, FILE *f)
122{
123   struct radeon_info *info = &device->physical_device->rad_info;
124
125   fprintf(f, "Memory-mapped registers:\n");
126   radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
127
128   radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
129   radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
130   radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
131   radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
132   radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
133   radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
134   radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
135   if (info->gfx_level <= GFX8) {
136      radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
137      radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
138      radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
139   }
140   radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
141   radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
142   radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
143   radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
144   radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
145   radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
146   radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
147   radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
148   radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
149   radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
150   fprintf(f, "\n");
151}
152
153static void
154radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
155{
156   fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
157   for (unsigned j = 0; j < 4; j++)
158      ac_dump_reg(f, gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
159}
160
161static void
162radv_dump_image_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
163{
164   unsigned sq_img_rsrc_word0 =
165      gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
166
167   fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
168   for (unsigned j = 0; j < 8; j++)
169      ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
170
171   fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
172   for (unsigned j = 0; j < 8; j++)
173      ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
174}
175
176static void
177radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
178{
179   fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
180   for (unsigned j = 0; j < 4; j++) {
181      ac_dump_reg(f, gfx_level, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
182   }
183}
184
185static void
186radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc,
187                                            FILE *f)
188{
189   radv_dump_image_descriptor(gfx_level, desc, f);
190   radv_dump_sampler_descriptor(gfx_level, desc + 16, f);
191}
192
193static void
194radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
195                         FILE *f)
196{
197   enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
198   const struct radv_descriptor_set_layout *layout;
199   int i;
200
201   if (!set)
202      return;
203   layout = set->header.layout;
204
205   for (i = 0; i < set->header.layout->binding_count; i++) {
206      uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
207
208      switch (layout->binding[i].type) {
209      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
210      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
211      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
212      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
213         radv_dump_buffer_descriptor(gfx_level, desc, f);
214         break;
215      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
216      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
217      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
218         radv_dump_image_descriptor(gfx_level, desc, f);
219         break;
220      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
221         radv_dump_combined_image_sampler_descriptor(gfx_level, desc, f);
222         break;
223      case VK_DESCRIPTOR_TYPE_SAMPLER:
224         radv_dump_sampler_descriptor(gfx_level, desc, f);
225         break;
226      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
227      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
228      case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
229      case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
230         /* todo */
231         break;
232      default:
233         assert(!"unknown descriptor type");
234         break;
235      }
236      fprintf(f, "\n");
237   }
238   fprintf(f, "\n\n");
239}
240
241static void
242radv_dump_descriptors(struct radv_device *device, FILE *f)
243{
244   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
245   int i;
246
247   fprintf(f, "Descriptors:\n");
248   for (i = 0; i < MAX_SETS; i++) {
249      struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 5);
250
251      radv_dump_descriptor_set(device, set, i, f);
252   }
253}
254
255struct radv_shader_inst {
256   char text[160];  /* one disasm line */
257   unsigned offset; /* instruction offset */
258   unsigned size;   /* instruction size = 4 or 8 */
259};
260
261/* Split a disassembly string into lines and add them to the array pointed
262 * to by "instructions". */
263static void
264si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
265                    struct radv_shader_inst *instructions)
266{
267   struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
268   char *next;
269
270   while ((next = strchr(disasm, '\n'))) {
271      struct radv_shader_inst *inst = &instructions[*num];
272      unsigned len = next - disasm;
273
274      if (!memchr(disasm, ';', len)) {
275         /* Ignore everything that is not an instruction. */
276         disasm = next + 1;
277         continue;
278      }
279
280      assert(len < ARRAY_SIZE(inst->text));
281      memcpy(inst->text, disasm, len);
282      inst->text[len] = 0;
283      inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
284
285      const char *semicolon = strchr(disasm, ';');
286      assert(semicolon);
287      /* More than 16 chars after ";" means the instruction is 8 bytes long. */
288      inst->size = next - semicolon > 16 ? 8 : 4;
289
290      snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
291               " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
292               inst->size);
293
294      last_inst = inst;
295      (*num)++;
296      disasm = next + 1;
297   }
298}
299
300static void
301radv_dump_annotated_shader(struct radv_shader *shader, gl_shader_stage stage,
302                           struct ac_wave_info *waves, unsigned num_waves, FILE *f)
303{
304   uint64_t start_addr, end_addr;
305   unsigned i;
306
307   if (!shader)
308      return;
309
310   start_addr = radv_shader_get_va(shader);
311   end_addr = start_addr + shader->code_size;
312
313   /* See if any wave executes the shader. */
314   for (i = 0; i < num_waves; i++) {
315      if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
316         break;
317   }
318
319   if (i == num_waves)
320      return; /* the shader is not being executed */
321
322   /* Remember the first found wave. The waves are sorted according to PC. */
323   waves = &waves[i];
324   num_waves -= i;
325
326   /* Get the list of instructions.
327    * Buffer size / 4 is the upper bound of the instruction count.
328    */
329   unsigned num_inst = 0;
330   struct radv_shader_inst *instructions =
331      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
332
333   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
334
335   fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
336           radv_get_shader_name(&shader->info, stage));
337
338   /* Print instructions with annotations. */
339   for (i = 0; i < num_inst; i++) {
340      struct radv_shader_inst *inst = &instructions[i];
341
342      fprintf(f, "%s\n", inst->text);
343
344      /* Print which waves execute the instruction right now. */
345      while (num_waves && start_addr + inst->offset == waves->pc) {
346         fprintf(f,
347                 "          " COLOR_GREEN "^ SE%u SH%u CU%u "
348                 "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
349                 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
350
351         if (inst->size == 4) {
352            fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
353         } else {
354            fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
355         }
356
357         waves->matched = true;
358         waves = &waves[1];
359         num_waves--;
360      }
361   }
362
363   fprintf(f, "\n\n");
364   free(instructions);
365}
366
367static void
368radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
369                            FILE *f)
370{
371   struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
372   enum amd_gfx_level gfx_level = pipeline->device->physical_device->rad_info.gfx_level;
373   unsigned num_waves = ac_get_wave_info(gfx_level, waves);
374
375   fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
376
377   /* Dump annotated active graphics shaders. */
378   unsigned stages = active_stages;
379   while (stages) {
380      int stage = u_bit_scan(&stages);
381
382      radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
383   }
384
385   /* Print waves executing shaders that are not currently bound. */
386   unsigned i;
387   bool found = false;
388   for (i = 0; i < num_waves; i++) {
389      if (waves[i].matched)
390         continue;
391
392      if (!found) {
393         fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
394         found = true;
395      }
396      fprintf(f,
397              "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64
398              "\n",
399              waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
400              waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
401   }
402   if (found)
403      fprintf(f, "\n\n");
404}
405
406static void
407radv_dump_spirv(struct radv_shader *shader, const char *sha1, const char *dump_dir)
408{
409   char dump_path[512];
410   FILE *f;
411
412   snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
413
414   f = fopen(dump_path, "w+");
415   if (f) {
416      fwrite(shader->spirv, shader->spirv_size, 1, f);
417      fclose(f);
418   }
419}
420
421static void
422radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader *shader,
423                 gl_shader_stage stage, const char *dump_dir, FILE *f)
424{
425   if (!shader)
426      return;
427
428   fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
429
430   if (shader->spirv) {
431      unsigned char sha1[21];
432      char sha1buf[41];
433
434      _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
435      _mesa_sha1_format(sha1buf, sha1);
436
437      fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
438      radv_dump_spirv(shader, sha1buf, dump_dir);
439   }
440
441   if (shader->nir_string) {
442      fprintf(f, "NIR:\n%s\n", shader->nir_string);
443   }
444
445   fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
446           shader->ir_string);
447   fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
448
449   radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
450}
451
452static void
453radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
454                  const char *dump_dir, FILE *f)
455{
456   /* Dump active graphics shaders. */
457   unsigned stages = active_stages;
458   while (stages) {
459      int stage = u_bit_scan(&stages);
460
461      radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
462   }
463}
464
465static void
466radv_dump_vertex_descriptors(struct radv_graphics_pipeline *pipeline, FILE *f)
467{
468   void *ptr = (uint64_t *)pipeline->base.device->trace_id_ptr;
469   uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
470   uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
471
472   if (!count)
473      return;
474
475   fprintf(f, "Num vertex %s: %d\n",
476           pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
477   for (uint32_t i = 0; i < count; i++) {
478      uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
479      uint64_t va = 0;
480
481      va |= desc[0];
482      va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
483
484      fprintf(f, "VBO#%d:\n", i);
485      fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
486      fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
487      fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
488   }
489}
490
491static struct radv_shader_part *
492radv_get_saved_vs_prolog(struct radv_device *device)
493{
494   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
495   return *(struct radv_shader_part **)(ptr + 4);
496}
497
498static void
499radv_dump_vs_prolog(struct radv_pipeline *pipeline, FILE *f)
500{
501   struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(pipeline->device);
502   struct radv_shader *vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX);
503
504   if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
505      return;
506
507   fprintf(f, "Vertex prolog:\n\n");
508   fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
509}
510
511static struct radv_pipeline *
512radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring)
513{
514   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
515   int offset = ring == AMD_IP_GFX ? 1 : 2;
516
517   return *(struct radv_pipeline **)(ptr + offset);
518}
519
520static void
521radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
522{
523   enum amd_ip_type ring = radv_queue_ring(queue);
524   struct radv_pipeline *pipeline;
525
526   fprintf(f, "AMD_IP_%s:\n", ring == AMD_IP_GFX ? "GFX" : "COMPUTE");
527
528   pipeline = radv_get_saved_pipeline(queue->device, ring);
529   if (pipeline) {
530      struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
531      VkShaderStageFlags active_stages;
532
533      if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
534         active_stages = graphics_pipeline->active_stages;
535      } else {
536         active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
537      }
538
539      radv_dump_vs_prolog(pipeline, f);
540      radv_dump_shaders(pipeline, active_stages, dump_dir, f);
541      if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
542         radv_dump_annotated_shaders(pipeline, active_stages, f);
543      radv_dump_vertex_descriptors(graphics_pipeline, f);
544      radv_dump_descriptors(queue->device, f);
545   }
546}
547
548static void
549radv_dump_cmd(const char *cmd, FILE *f)
550{
551#ifndef _WIN32
552   char line[2048];
553   FILE *p;
554
555   p = popen(cmd, "r");
556   if (p) {
557      while (fgets(line, sizeof(line), p))
558         fputs(line, f);
559      fprintf(f, "\n");
560      pclose(p);
561   }
562#endif
563}
564
565static void
566radv_dump_dmesg(FILE *f)
567{
568   fprintf(f, "\nLast 60 lines of dmesg:\n\n");
569   radv_dump_cmd("dmesg | tail -n60", f);
570}
571
572void
573radv_dump_enabled_options(struct radv_device *device, FILE *f)
574{
575   uint64_t mask;
576
577   if (device->instance->debug_flags) {
578      fprintf(f, "Enabled debug options: ");
579
580      mask = device->instance->debug_flags;
581      while (mask) {
582         int i = u_bit_scan64(&mask);
583         fprintf(f, "%s, ", radv_get_debug_option_name(i));
584      }
585      fprintf(f, "\n");
586   }
587
588   if (device->instance->perftest_flags) {
589      fprintf(f, "Enabled perftest options: ");
590
591      mask = device->instance->perftest_flags;
592      while (mask) {
593         int i = u_bit_scan64(&mask);
594         fprintf(f, "%s, ", radv_get_perftest_option_name(i));
595      }
596      fprintf(f, "\n");
597   }
598}
599
600static void
601radv_dump_app_info(struct radv_device *device, FILE *f)
602{
603   struct radv_instance *instance = device->instance;
604
605   fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
606   fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
607   fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
608   fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
609   fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
610           VK_VERSION_MINOR(instance->vk.app_info.api_version),
611           VK_VERSION_PATCH(instance->vk.app_info.api_version));
612
613   radv_dump_enabled_options(device, f);
614}
615
616static void
617radv_dump_device_name(struct radv_device *device, FILE *f)
618{
619   struct radeon_info *info = &device->physical_device->rad_info;
620#ifndef _WIN32
621   char kernel_version[128] = {0};
622   struct utsname uname_data;
623#endif
624
625#ifdef _WIN32
626   fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name,
627           info->drm_major, info->drm_minor, info->drm_patchlevel);
628#else
629   if (uname(&uname_data) == 0)
630      snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
631
632   fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name,
633           info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
634#endif
635}
636
637static void
638radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
639{
640   enum amd_ip_type ring = radv_queue_ring(queue);
641   struct radv_device *device = queue->device;
642   char cmd[128];
643
644   /* TODO: Dump compute ring. */
645   if (ring != AMD_IP_GFX)
646      return;
647
648   sprintf(cmd, "umr -R %s 2>&1",
649           device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
650
651   fprintf(f, "\nUMR GFX ring:\n\n");
652   radv_dump_cmd(cmd, f);
653}
654
655static void
656radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
657{
658   enum amd_ip_type ring = radv_queue_ring(queue);
659   struct radv_device *device = queue->device;
660   char cmd[128];
661
662   /* TODO: Dump compute ring. */
663   if (ring != AMD_IP_GFX)
664      return;
665
666   sprintf(cmd, "umr -O bits,halt_waves -go 0 -wa %s -go 1 2>&1",
667           device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
668
669   fprintf(f, "\nUMR GFX waves:\n\n");
670   radv_dump_cmd(cmd, f);
671}
672
673static bool
674radv_gpu_hang_occured(struct radv_queue *queue, enum amd_ip_type ring)
675{
676   struct radeon_winsys *ws = queue->device->ws;
677
678   if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
679      return true;
680
681   return false;
682}
683
684void
685radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
686{
687   struct radv_device *device = queue->device;
688   enum amd_ip_type ring;
689   uint64_t addr;
690
691   ring = radv_queue_ring(queue);
692
693   bool hang_occurred = radv_gpu_hang_occured(queue, ring);
694   bool vm_fault_occurred = false;
695   if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
696      vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.gfx_level,
697                                              &device->dmesg_timestamp, &addr);
698   if (!hang_occurred && !vm_fault_occurred)
699      return;
700
701   fprintf(stderr, "radv: GPU hang detected...\n");
702
703#ifndef _WIN32
704   /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
705    * various debugging info about that GPU hang.
706    */
707   struct tm *timep, result;
708   time_t raw_time;
709   FILE *f;
710   char dump_dir[256], dump_path[512], buf_time[128];
711
712   time(&raw_time);
713   timep = os_localtime(&raw_time, &result);
714   strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
715
716   snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
717            getpid(), buf_time);
718   if (mkdir(dump_dir, 0774) && errno != EEXIST) {
719      fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
720      abort();
721   }
722
723   fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
724
725   /* Dump trace file. */
726   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
727   f = fopen(dump_path, "w+");
728   if (f) {
729      radv_dump_trace(queue->device, cs, f);
730      fclose(f);
731   }
732
733   /* Dump pipeline state. */
734   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
735   f = fopen(dump_path, "w+");
736   if (f) {
737      radv_dump_queue_state(queue, dump_dir, f);
738      fclose(f);
739   }
740
741   if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
742      /* Dump UMR waves. */
743      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
744      f = fopen(dump_path, "w+");
745      if (f) {
746         radv_dump_umr_waves(queue, f);
747         fclose(f);
748      }
749
750      /* Dump UMR ring. */
751      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
752      f = fopen(dump_path, "w+");
753      if (f) {
754         radv_dump_umr_ring(queue, f);
755         fclose(f);
756      }
757   }
758
759   /* Dump debug registers. */
760   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
761   f = fopen(dump_path, "w+");
762   if (f) {
763      radv_dump_debug_registers(device, f);
764      fclose(f);
765   }
766
767   /* Dump BO ranges. */
768   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
769   f = fopen(dump_path, "w+");
770   if (f) {
771      device->ws->dump_bo_ranges(device->ws, f);
772      fclose(f);
773   }
774
775   /* Dump BO log. */
776   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
777   f = fopen(dump_path, "w+");
778   if (f) {
779      device->ws->dump_bo_log(device->ws, f);
780      fclose(f);
781   }
782
783   /* Dump VM fault info. */
784   if (vm_fault_occurred) {
785      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
786      f = fopen(dump_path, "w+");
787      if (f) {
788         fprintf(f, "VM fault report.\n\n");
789         fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
790         fclose(f);
791      }
792   }
793
794   /* Dump app info. */
795   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
796   f = fopen(dump_path, "w+");
797   if (f) {
798      radv_dump_app_info(device, f);
799      fclose(f);
800   }
801
802   /* Dump GPU info. */
803   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
804   f = fopen(dump_path, "w+");
805   if (f) {
806      radv_dump_device_name(device, f);
807      ac_print_gpu_info(&device->physical_device->rad_info, f);
808      fclose(f);
809   }
810
811   /* Dump dmesg. */
812   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
813   f = fopen(dump_path, "w+");
814   if (f) {
815      radv_dump_dmesg(f);
816      fclose(f);
817   }
818#endif
819
820   fprintf(stderr, "radv: GPU hang report saved successfully!\n");
821   abort();
822}
823
824void
825radv_print_spirv(const char *data, uint32_t size, FILE *fp)
826{
827#ifndef _WIN32
828   char path[] = "/tmp/fileXXXXXX";
829   char command[128];
830   int fd;
831
832   /* Dump the binary into a temporary file. */
833   fd = mkstemp(path);
834   if (fd < 0)
835      return;
836
837   if (write(fd, data, size) == -1)
838      goto fail;
839
840   /* Disassemble using spirv-dis if installed. */
841   sprintf(command, "spirv-dis %s", path);
842   radv_dump_cmd(command, fp);
843
844fail:
845   close(fd);
846   unlink(path);
847#endif
848}
849
850bool
851radv_trap_handler_init(struct radv_device *device)
852{
853   struct radeon_winsys *ws = device->ws;
854   VkResult result;
855
856   /* Create the trap handler shader and upload it like other shaders. */
857   device->trap_handler_shader = radv_create_trap_handler_shader(device);
858   if (!device->trap_handler_shader) {
859      fprintf(stderr, "radv: failed to create the trap handler shader.\n");
860      return false;
861   }
862
863   result = ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, true);
864   if (result != VK_SUCCESS)
865      return false;
866
867   result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
868                              RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
869                                 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
870                              RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
871   if (result != VK_SUCCESS)
872      return false;
873
874   result = ws->buffer_make_resident(ws, device->tma_bo, true);
875   if (result != VK_SUCCESS)
876      return false;
877
878   device->tma_ptr = ws->buffer_map(device->tma_bo);
879   if (!device->tma_ptr)
880      return false;
881
882   /* Upload a buffer descriptor to store various info from the trap. */
883   uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
884   uint32_t desc[4];
885
886   desc[0] = tma_va;
887   desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
888   desc[2] = TMA_BO_SIZE;
889   desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
890             S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
891             S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
892
893   memcpy(device->tma_ptr, desc, sizeof(desc));
894
895   return true;
896}
897
898void
899radv_trap_handler_finish(struct radv_device *device)
900{
901   struct radeon_winsys *ws = device->ws;
902
903   if (unlikely(device->trap_handler_shader)) {
904      ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, false);
905      radv_trap_handler_shader_destroy(device, device->trap_handler_shader);
906   }
907
908   if (unlikely(device->tma_bo)) {
909      ws->buffer_make_resident(ws, device->tma_bo, false);
910      ws->buffer_destroy(ws, device->tma_bo);
911   }
912}
913
914static void
915radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
916{
917   struct radv_shader *shader;
918   uint64_t start_addr, end_addr;
919   uint32_t instr_offset;
920
921   shader = radv_find_shader(device, faulty_pc);
922   if (!shader)
923      return;
924
925   start_addr = radv_shader_get_va(shader);
926   end_addr = start_addr + shader->code_size;
927   instr_offset = faulty_pc - start_addr;
928
929   fprintf(stderr,
930           "Faulty shader found "
931           "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
932           start_addr, end_addr, instr_offset);
933
934   /* Get the list of instructions.
935    * Buffer size / 4 is the upper bound of the instruction count.
936    */
937   unsigned num_inst = 0;
938   struct radv_shader_inst *instructions =
939      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
940
941   /* Split the disassembly string into instructions. */
942   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
943
944   /* Print instructions with annotations. */
945   for (unsigned i = 0; i < num_inst; i++) {
946      struct radv_shader_inst *inst = &instructions[i];
947
948      if (start_addr + inst->offset == faulty_pc) {
949         fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
950         fprintf(stderr, "%s\n", inst->text);
951         fprintf(stderr, "\n");
952      } else {
953         fprintf(stderr, "%s\n", inst->text);
954      }
955   }
956
957   free(instructions);
958}
959
960struct radv_sq_hw_reg {
961   uint32_t status;
962   uint32_t trap_sts;
963   uint32_t hw_id;
964   uint32_t ib_sts;
965};
966
967static void
968radv_dump_sq_hw_regs(struct radv_device *device)
969{
970   struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
971
972   fprintf(stderr, "\nHardware registers:\n");
973   if (device->physical_device->rad_info.gfx_level >= GFX10) {
974      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000408_SQ_WAVE_STATUS,
975                  regs->status, ~0);
976      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00040C_SQ_WAVE_TRAPSTS,
977                  regs->trap_sts, ~0);
978      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00045C_SQ_WAVE_HW_ID1,
979                  regs->hw_id, ~0);
980      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00041C_SQ_WAVE_IB_STS,
981                  regs->ib_sts, ~0);
982   } else {
983      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000048_SQ_WAVE_STATUS,
984                  regs->status, ~0);
985      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00004C_SQ_WAVE_TRAPSTS,
986                  regs->trap_sts, ~0);
987      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000050_SQ_WAVE_HW_ID,
988                  regs->hw_id, ~0);
989      ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00005C_SQ_WAVE_IB_STS,
990                  regs->ib_sts, ~0);
991   }
992   fprintf(stderr, "\n\n");
993}
994
995void
996radv_check_trap_handler(struct radv_queue *queue)
997{
998   enum amd_ip_type ring = radv_queue_ring(queue);
999   struct radv_device *device = queue->device;
1000   struct radeon_winsys *ws = device->ws;
1001
1002   /* Wait for the context to be idle in a finite time. */
1003   ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
1004
1005   /* Try to detect if the trap handler has been reached by the hw by
1006    * looking at ttmp0 which should be non-zero if a shader exception
1007    * happened.
1008    */
1009   if (!device->tma_ptr[4])
1010      return;
1011
1012#if 0
1013	fprintf(stderr, "tma_ptr:\n");
1014	for (unsigned i = 0; i < 10; i++)
1015		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1016#endif
1017
1018   radv_dump_sq_hw_regs(device);
1019
1020   uint32_t ttmp0 = device->tma_ptr[4];
1021   uint32_t ttmp1 = device->tma_ptr[5];
1022
1023   /* According to the ISA docs, 3.10 Trap and Exception Registers:
1024    *
1025    * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1026    *
1027    * "When the trap handler is entered, the PC of the faulting
1028    *  instruction is: (PC - PC_rewind * 4)."
1029    * */
1030   uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1031   uint8_t ht = (ttmp1 >> 24) & 0x1;
1032   uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1033   uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1034
1035   fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
1036           pc_rewind);
1037
1038   radv_dump_faulty_shader(device, pc);
1039
1040   abort();
1041}
1042