1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2022 Igalia S.L. 3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT 4bf215546Sopenharmony_ci */ 5bf215546Sopenharmony_ci 6bf215546Sopenharmony_ci#include "tu_cs.h" 7bf215546Sopenharmony_ci 8bf215546Sopenharmony_ci#include <arpa/inet.h> 9bf215546Sopenharmony_ci#include <netinet/in.h> 10bf215546Sopenharmony_ci#include <sys/socket.h> 11bf215546Sopenharmony_ci 12bf215546Sopenharmony_ci#include "tu_device.h" 13bf215546Sopenharmony_ci 14bf215546Sopenharmony_ci/* A simple implementations of breadcrumbs tracking of GPU progress 15bf215546Sopenharmony_ci * intended to be a last resort when debugging unrecoverable hangs. 16bf215546Sopenharmony_ci * For best results use Vulkan traces to have a predictable place of hang. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * For ordinary hangs as a more user-friendly solution use GFR 19bf215546Sopenharmony_ci * "Graphics Flight Recorder". 20bf215546Sopenharmony_ci * 21bf215546Sopenharmony_ci * This implementation aims to handle cases where we cannot do anything 22bf215546Sopenharmony_ci * after the hang, which is achieved by: 23bf215546Sopenharmony_ci * - On GPU after each breadcrumb we wait until CPU acks it and sends udp 24bf215546Sopenharmony_ci * packet to the remote host; 25bf215546Sopenharmony_ci * - At specified breadcrumb require explicit user input to continue 26bf215546Sopenharmony_ci * execution up to the next breadcrumb. 27bf215546Sopenharmony_ci * 28bf215546Sopenharmony_ci * In-driver breadcrumbs also allow more precise tracking since we could 29bf215546Sopenharmony_ci * target a single GPU packet. 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * Breadcrumbs settings: 33bf215546Sopenharmony_ci * 34bf215546Sopenharmony_ci * TU_BREADCRUMBS=$IP:$PORT,break=$BREAKPOINT:$BREAKPOINT_HITS 35bf215546Sopenharmony_ci * Where: 36bf215546Sopenharmony_ci * $BREAKPOINT - the breadcrumb from which we require explicit ack 37bf215546Sopenharmony_ci * $BREAKPOINT_HITS - how many times breakpoint should be reached for 38bf215546Sopenharmony_ci * break to occur. Necessary for a gmem mode and re-usable cmdbuffers 39bf215546Sopenharmony_ci * in both of which the same cmdstream could be executed several times. 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * 42bf215546Sopenharmony_ci * A typical work flow would be: 43bf215546Sopenharmony_ci * - Start listening for breadcrumbs on remote host: 44bf215546Sopenharmony_ci * nc -lvup $PORT | stdbuf -o0 xxd -pc -c 4 | awk -Wposix '{printf("%u:%u\n", "0x" $0, a[$0]++)}' 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * - Start capturing command stream: 47bf215546Sopenharmony_ci * sudo cat /sys/kernel/debug/dri/0/rd > ~/cmdstream.rd 48bf215546Sopenharmony_ci * 49bf215546Sopenharmony_ci * - On device replay the hanging trace with: 50bf215546Sopenharmony_ci * TU_BREADCRUMBS=$IP:$PORT,break=-1:0 51bf215546Sopenharmony_ci * ! Try to reproduce the hang in a sysmem mode because it would 52bf215546Sopenharmony_ci * require much less breadcrumb writes and syncs. 53bf215546Sopenharmony_ci * 54bf215546Sopenharmony_ci * - Increase hangcheck period: 55bf215546Sopenharmony_ci * echo -n 60000 > /sys/kernel/debug/dri/0/hangcheck_period_ms 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * - After GPU hang note the last breadcrumb and relaunch trace with: 58bf215546Sopenharmony_ci * TU_BREADCRUMBS=$IP:$PORT,break=$LAST_BREADCRUMB:$HITS 59bf215546Sopenharmony_ci * 60bf215546Sopenharmony_ci * - After the breakpoint is reached each breadcrumb would require 61bf215546Sopenharmony_ci * explicit ack from the user. This way it's possible to find 62bf215546Sopenharmony_ci * the last packet which did't hang. 63bf215546Sopenharmony_ci * 64bf215546Sopenharmony_ci * - Find the packet in the decoded cmdstream. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistruct breadcrumbs_context 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci char remote_host[64]; 70bf215546Sopenharmony_ci int remote_port; 71bf215546Sopenharmony_ci uint32_t breadcrumb_breakpoint; 72bf215546Sopenharmony_ci uint32_t breadcrumb_breakpoint_hits; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci bool thread_stop; 75bf215546Sopenharmony_ci pthread_t breadcrumbs_thread; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci struct tu_device *device; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci uint32_t breadcrumb_idx; 80bf215546Sopenharmony_ci}; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cistatic void * 83bf215546Sopenharmony_cisync_gpu_with_cpu(void *_job) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job; 86bf215546Sopenharmony_ci struct tu6_global *global = 87bf215546Sopenharmony_ci (struct tu6_global *) ctx->device->global_bo->map; 88bf215546Sopenharmony_ci uint32_t last_breadcrumb = 0; 89bf215546Sopenharmony_ci uint32_t breakpoint_hits = 0; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci int s = socket(AF_INET, SOCK_DGRAM, 0); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci if (s < 0) { 94bf215546Sopenharmony_ci mesa_loge("TU_BREADCRUMBS: Error while creating socket"); 95bf215546Sopenharmony_ci return NULL; 96bf215546Sopenharmony_ci } 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci struct sockaddr_in to_addr; 99bf215546Sopenharmony_ci to_addr.sin_family = AF_INET; 100bf215546Sopenharmony_ci to_addr.sin_port = htons(ctx->remote_port); 101bf215546Sopenharmony_ci to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci /* Run until we know that no more work would be submitted, 104bf215546Sopenharmony_ci * because each breadcrumb requires an ack from cpu side and without 105bf215546Sopenharmony_ci * the ack GPU would timeout. 106bf215546Sopenharmony_ci */ 107bf215546Sopenharmony_ci while (!ctx->thread_stop) { 108bf215546Sopenharmony_ci uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci if (current_breadcrumb != last_breadcrumb) { 111bf215546Sopenharmony_ci last_breadcrumb = current_breadcrumb; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci uint32_t data = htonl(last_breadcrumb); 114bf215546Sopenharmony_ci if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr, 115bf215546Sopenharmony_ci sizeof(to_addr)) < 0) { 116bf215546Sopenharmony_ci mesa_loge("TU_BREADCRUMBS: sendto failed"); 117bf215546Sopenharmony_ci goto fail; 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci if (last_breadcrumb >= ctx->breadcrumb_breakpoint && 121bf215546Sopenharmony_ci breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) { 122bf215546Sopenharmony_ci printf("GPU is on breadcrumb %d, continue?", last_breadcrumb); 123bf215546Sopenharmony_ci while (getchar() != 'y') 124bf215546Sopenharmony_ci ; 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci if (ctx->breadcrumb_breakpoint == last_breadcrumb) 128bf215546Sopenharmony_ci breakpoint_hits++; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci /* ack that we received the value */ 131bf215546Sopenharmony_ci global->breadcrumb_cpu_sync_seqno = last_breadcrumb; 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_cifail: 136bf215546Sopenharmony_ci close(s); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci return NULL; 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci/* Same as tu_cs_emit_pkt7 but without instrumentation */ 142bf215546Sopenharmony_cistatic inline void 143bf215546Sopenharmony_ciemit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci tu_cs_reserve(cs, cnt + 1); 146bf215546Sopenharmony_ci tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt)); 147bf215546Sopenharmony_ci} 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_civoid 150bf215546Sopenharmony_citu_breadcrumbs_init(struct tu_device *device) 151bf215546Sopenharmony_ci{ 152bf215546Sopenharmony_ci const char *breadcrumbs_opt = NULL; 153bf215546Sopenharmony_ci#ifdef TU_BREADCRUMBS_ENABLED 154bf215546Sopenharmony_ci breadcrumbs_opt = os_get_option("TU_BREADCRUMBS"); 155bf215546Sopenharmony_ci#endif 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci device->breadcrumbs_ctx = NULL; 158bf215546Sopenharmony_ci if (!breadcrumbs_opt) { 159bf215546Sopenharmony_ci return; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci struct breadcrumbs_context *ctx = 163bf215546Sopenharmony_ci malloc(sizeof(struct breadcrumbs_context)); 164bf215546Sopenharmony_ci ctx->device = device; 165bf215546Sopenharmony_ci ctx->breadcrumb_idx = 0; 166bf215546Sopenharmony_ci ctx->thread_stop = false; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host, 169bf215546Sopenharmony_ci &ctx->remote_port, &ctx->breadcrumb_breakpoint, 170bf215546Sopenharmony_ci &ctx->breadcrumb_breakpoint_hits) != 4) { 171bf215546Sopenharmony_ci free(ctx); 172bf215546Sopenharmony_ci mesa_loge("Wrong TU_BREADCRUMBS value"); 173bf215546Sopenharmony_ci return; 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci device->breadcrumbs_ctx = ctx; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci struct tu6_global *global = device->global_bo->map; 179bf215546Sopenharmony_ci global->breadcrumb_cpu_sync_seqno = 0; 180bf215546Sopenharmony_ci global->breadcrumb_gpu_sync_seqno = 0; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx); 183bf215546Sopenharmony_ci} 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_civoid 186bf215546Sopenharmony_citu_breadcrumbs_finish(struct tu_device *device) 187bf215546Sopenharmony_ci{ 188bf215546Sopenharmony_ci struct breadcrumbs_context *ctx = device->breadcrumbs_ctx; 189bf215546Sopenharmony_ci if (!ctx || ctx->thread_stop) 190bf215546Sopenharmony_ci return; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci ctx->thread_stop = true; 193bf215546Sopenharmony_ci pthread_join(ctx->breadcrumbs_thread, NULL); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci free(ctx); 196bf215546Sopenharmony_ci} 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_civoid 199bf215546Sopenharmony_citu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) 200bf215546Sopenharmony_ci{ 201bf215546Sopenharmony_ci /* TODO: we may run out of space if we add breadcrumbs 202bf215546Sopenharmony_ci * to non-growable CS. 203bf215546Sopenharmony_ci */ 204bf215546Sopenharmony_ci if (cs->mode != TU_CS_MODE_GROW) 205bf215546Sopenharmony_ci return; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci struct tu_device *device = cs->device; 208bf215546Sopenharmony_ci struct breadcrumbs_context *ctx = device->breadcrumbs_ctx; 209bf215546Sopenharmony_ci if (!ctx || ctx->thread_stop) 210bf215546Sopenharmony_ci return; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci bool before_packet = (cnt != 0); 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci if (before_packet) { 215bf215546Sopenharmony_ci switch (opcode) { 216bf215546Sopenharmony_ci case CP_EXEC_CS_INDIRECT: 217bf215546Sopenharmony_ci case CP_EXEC_CS: 218bf215546Sopenharmony_ci case CP_DRAW_INDX: 219bf215546Sopenharmony_ci case CP_DRAW_INDX_OFFSET: 220bf215546Sopenharmony_ci case CP_DRAW_INDIRECT: 221bf215546Sopenharmony_ci case CP_DRAW_INDX_INDIRECT: 222bf215546Sopenharmony_ci case CP_DRAW_INDIRECT_MULTI: 223bf215546Sopenharmony_ci case CP_DRAW_AUTO: 224bf215546Sopenharmony_ci case CP_BLIT: 225bf215546Sopenharmony_ci // case CP_SET_DRAW_STATE: 226bf215546Sopenharmony_ci // case CP_LOAD_STATE6_FRAG: 227bf215546Sopenharmony_ci // case CP_LOAD_STATE6_GEOM: 228bf215546Sopenharmony_ci break; 229bf215546Sopenharmony_ci default: 230bf215546Sopenharmony_ci return; 231bf215546Sopenharmony_ci }; 232bf215546Sopenharmony_ci } else { 233bf215546Sopenharmony_ci assert(cs->breadcrumb_emit_after == 0); 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx); 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci if (ctx->breadcrumb_breakpoint != -1 && 239bf215546Sopenharmony_ci current_breadcrumb < ctx->breadcrumb_breakpoint) 240bf215546Sopenharmony_ci return; 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); 243bf215546Sopenharmony_ci emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0); 244bf215546Sopenharmony_ci emit_pkt7(cs, CP_WAIT_FOR_ME, 0); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci emit_pkt7(cs, CP_MEM_WRITE, 3); 247bf215546Sopenharmony_ci tu_cs_emit_qw( 248bf215546Sopenharmony_ci cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno)); 249bf215546Sopenharmony_ci tu_cs_emit(cs, current_breadcrumb); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci /* Wait until CPU acknowledges the value written by GPU */ 252bf215546Sopenharmony_ci emit_pkt7(cs, CP_WAIT_REG_MEM, 6); 253bf215546Sopenharmony_ci tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | 254bf215546Sopenharmony_ci CP_WAIT_REG_MEM_0_POLL_MEMORY); 255bf215546Sopenharmony_ci tu_cs_emit_qw( 256bf215546Sopenharmony_ci cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno)); 257bf215546Sopenharmony_ci tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb)); 258bf215546Sopenharmony_ci tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0)); 259bf215546Sopenharmony_ci tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci if (before_packet) 262bf215546Sopenharmony_ci cs->breadcrumb_emit_after = cnt; 263bf215546Sopenharmony_ci} 264