1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2022 Igalia S.L.
3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT
4bf215546Sopenharmony_ci */
5bf215546Sopenharmony_ci
6bf215546Sopenharmony_ci#include "tu_cs.h"
7bf215546Sopenharmony_ci
8bf215546Sopenharmony_ci#include <arpa/inet.h>
9bf215546Sopenharmony_ci#include <netinet/in.h>
10bf215546Sopenharmony_ci#include <sys/socket.h>
11bf215546Sopenharmony_ci
12bf215546Sopenharmony_ci#include "tu_device.h"
13bf215546Sopenharmony_ci
14bf215546Sopenharmony_ci/* A simple implementations of breadcrumbs tracking of GPU progress
15bf215546Sopenharmony_ci * intended to be a last resort when debugging unrecoverable hangs.
16bf215546Sopenharmony_ci * For best results use Vulkan traces to have a predictable place of hang.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * For ordinary hangs as a more user-friendly solution use GFR
19bf215546Sopenharmony_ci * "Graphics Flight Recorder".
20bf215546Sopenharmony_ci *
21bf215546Sopenharmony_ci * This implementation aims to handle cases where we cannot do anything
22bf215546Sopenharmony_ci * after the hang, which is achieved by:
23bf215546Sopenharmony_ci * - On GPU after each breadcrumb we wait until CPU acks it and sends udp
24bf215546Sopenharmony_ci *    packet to the remote host;
25bf215546Sopenharmony_ci * - At specified breadcrumb require explicit user input to continue
26bf215546Sopenharmony_ci *   execution up to the next breadcrumb.
27bf215546Sopenharmony_ci *
28bf215546Sopenharmony_ci * In-driver breadcrumbs also allow more precise tracking since we could
29bf215546Sopenharmony_ci * target a single GPU packet.
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * Breadcrumbs settings:
33bf215546Sopenharmony_ci *
34bf215546Sopenharmony_ci *  TU_BREADCRUMBS=$IP:$PORT,break=$BREAKPOINT:$BREAKPOINT_HITS
35bf215546Sopenharmony_ci * Where:
36bf215546Sopenharmony_ci *  $BREAKPOINT - the breadcrumb from which we require explicit ack
37bf215546Sopenharmony_ci *  $BREAKPOINT_HITS - how many times breakpoint should be reached for
38bf215546Sopenharmony_ci *   break to occur. Necessary for a gmem mode and re-usable cmdbuffers
39bf215546Sopenharmony_ci *   in both of which the same cmdstream could be executed several times.
40bf215546Sopenharmony_ci *
41bf215546Sopenharmony_ci *
42bf215546Sopenharmony_ci * A typical work flow would be:
43bf215546Sopenharmony_ci * - Start listening for breadcrumbs on remote host:
44bf215546Sopenharmony_ci *    nc -lvup $PORT | stdbuf -o0 xxd -pc -c 4 | awk -Wposix '{printf("%u:%u\n", "0x" $0, a[$0]++)}'
45bf215546Sopenharmony_ci *
46bf215546Sopenharmony_ci * - Start capturing command stream:
47bf215546Sopenharmony_ci *    sudo cat /sys/kernel/debug/dri/0/rd > ~/cmdstream.rd
48bf215546Sopenharmony_ci *
49bf215546Sopenharmony_ci * - On device replay the hanging trace with:
50bf215546Sopenharmony_ci *    TU_BREADCRUMBS=$IP:$PORT,break=-1:0
51bf215546Sopenharmony_ci *   ! Try to reproduce the hang in a sysmem mode because it would
52bf215546Sopenharmony_ci *   require much less breadcrumb writes and syncs.
53bf215546Sopenharmony_ci *
54bf215546Sopenharmony_ci * - Increase hangcheck period:
55bf215546Sopenharmony_ci *    echo -n 60000 > /sys/kernel/debug/dri/0/hangcheck_period_ms
56bf215546Sopenharmony_ci *
57bf215546Sopenharmony_ci * - After GPU hang note the last breadcrumb and relaunch trace with:
58bf215546Sopenharmony_ci *    TU_BREADCRUMBS=$IP:$PORT,break=$LAST_BREADCRUMB:$HITS
59bf215546Sopenharmony_ci *
60bf215546Sopenharmony_ci * - After the breakpoint is reached each breadcrumb would require
61bf215546Sopenharmony_ci *   explicit ack from the user. This way it's possible to find
62bf215546Sopenharmony_ci *   the last packet which did't hang.
63bf215546Sopenharmony_ci *
64bf215546Sopenharmony_ci * - Find the packet in the decoded cmdstream.
65bf215546Sopenharmony_ci */
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistruct breadcrumbs_context
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci   char remote_host[64];
70bf215546Sopenharmony_ci   int remote_port;
71bf215546Sopenharmony_ci   uint32_t breadcrumb_breakpoint;
72bf215546Sopenharmony_ci   uint32_t breadcrumb_breakpoint_hits;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   bool thread_stop;
75bf215546Sopenharmony_ci   pthread_t breadcrumbs_thread;
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   struct tu_device *device;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   uint32_t breadcrumb_idx;
80bf215546Sopenharmony_ci};
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_cistatic void *
83bf215546Sopenharmony_cisync_gpu_with_cpu(void *_job)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job;
86bf215546Sopenharmony_ci   struct tu6_global *global =
87bf215546Sopenharmony_ci      (struct tu6_global *) ctx->device->global_bo->map;
88bf215546Sopenharmony_ci   uint32_t last_breadcrumb = 0;
89bf215546Sopenharmony_ci   uint32_t breakpoint_hits = 0;
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   int s = socket(AF_INET, SOCK_DGRAM, 0);
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   if (s < 0) {
94bf215546Sopenharmony_ci      mesa_loge("TU_BREADCRUMBS: Error while creating socket");
95bf215546Sopenharmony_ci      return NULL;
96bf215546Sopenharmony_ci   }
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   struct sockaddr_in to_addr;
99bf215546Sopenharmony_ci   to_addr.sin_family = AF_INET;
100bf215546Sopenharmony_ci   to_addr.sin_port = htons(ctx->remote_port);
101bf215546Sopenharmony_ci   to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host);
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   /* Run until we know that no more work would be submitted,
104bf215546Sopenharmony_ci    * because each breadcrumb requires an ack from cpu side and without
105bf215546Sopenharmony_ci    * the ack GPU would timeout.
106bf215546Sopenharmony_ci    */
107bf215546Sopenharmony_ci   while (!ctx->thread_stop) {
108bf215546Sopenharmony_ci      uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno;
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci      if (current_breadcrumb != last_breadcrumb) {
111bf215546Sopenharmony_ci         last_breadcrumb = current_breadcrumb;
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci         uint32_t data = htonl(last_breadcrumb);
114bf215546Sopenharmony_ci         if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr,
115bf215546Sopenharmony_ci                    sizeof(to_addr)) < 0) {
116bf215546Sopenharmony_ci            mesa_loge("TU_BREADCRUMBS: sendto failed");
117bf215546Sopenharmony_ci            goto fail;
118bf215546Sopenharmony_ci         }
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci         if (last_breadcrumb >= ctx->breadcrumb_breakpoint &&
121bf215546Sopenharmony_ci             breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) {
122bf215546Sopenharmony_ci            printf("GPU is on breadcrumb %d, continue?", last_breadcrumb);
123bf215546Sopenharmony_ci            while (getchar() != 'y')
124bf215546Sopenharmony_ci               ;
125bf215546Sopenharmony_ci         }
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci         if (ctx->breadcrumb_breakpoint == last_breadcrumb)
128bf215546Sopenharmony_ci            breakpoint_hits++;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci         /* ack that we received the value */
131bf215546Sopenharmony_ci         global->breadcrumb_cpu_sync_seqno = last_breadcrumb;
132bf215546Sopenharmony_ci      }
133bf215546Sopenharmony_ci   }
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_cifail:
136bf215546Sopenharmony_ci   close(s);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   return NULL;
139bf215546Sopenharmony_ci}
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci/* Same as tu_cs_emit_pkt7 but without instrumentation */
142bf215546Sopenharmony_cistatic inline void
143bf215546Sopenharmony_ciemit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
144bf215546Sopenharmony_ci{
145bf215546Sopenharmony_ci   tu_cs_reserve(cs, cnt + 1);
146bf215546Sopenharmony_ci   tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
147bf215546Sopenharmony_ci}
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_civoid
150bf215546Sopenharmony_citu_breadcrumbs_init(struct tu_device *device)
151bf215546Sopenharmony_ci{
152bf215546Sopenharmony_ci   const char *breadcrumbs_opt = NULL;
153bf215546Sopenharmony_ci#ifdef TU_BREADCRUMBS_ENABLED
154bf215546Sopenharmony_ci   breadcrumbs_opt = os_get_option("TU_BREADCRUMBS");
155bf215546Sopenharmony_ci#endif
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   device->breadcrumbs_ctx = NULL;
158bf215546Sopenharmony_ci   if (!breadcrumbs_opt) {
159bf215546Sopenharmony_ci      return;
160bf215546Sopenharmony_ci   }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   struct breadcrumbs_context *ctx =
163bf215546Sopenharmony_ci      malloc(sizeof(struct breadcrumbs_context));
164bf215546Sopenharmony_ci   ctx->device = device;
165bf215546Sopenharmony_ci   ctx->breadcrumb_idx = 0;
166bf215546Sopenharmony_ci   ctx->thread_stop = false;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host,
169bf215546Sopenharmony_ci              &ctx->remote_port, &ctx->breadcrumb_breakpoint,
170bf215546Sopenharmony_ci              &ctx->breadcrumb_breakpoint_hits) != 4) {
171bf215546Sopenharmony_ci      free(ctx);
172bf215546Sopenharmony_ci      mesa_loge("Wrong TU_BREADCRUMBS value");
173bf215546Sopenharmony_ci      return;
174bf215546Sopenharmony_ci   }
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   device->breadcrumbs_ctx = ctx;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   struct tu6_global *global = device->global_bo->map;
179bf215546Sopenharmony_ci   global->breadcrumb_cpu_sync_seqno = 0;
180bf215546Sopenharmony_ci   global->breadcrumb_gpu_sync_seqno = 0;
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx);
183bf215546Sopenharmony_ci}
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_civoid
186bf215546Sopenharmony_citu_breadcrumbs_finish(struct tu_device *device)
187bf215546Sopenharmony_ci{
188bf215546Sopenharmony_ci   struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
189bf215546Sopenharmony_ci   if (!ctx || ctx->thread_stop)
190bf215546Sopenharmony_ci      return;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   ctx->thread_stop = true;
193bf215546Sopenharmony_ci   pthread_join(ctx->breadcrumbs_thread, NULL);
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   free(ctx);
196bf215546Sopenharmony_ci}
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_civoid
199bf215546Sopenharmony_citu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
200bf215546Sopenharmony_ci{
201bf215546Sopenharmony_ci   /* TODO: we may run out of space if we add breadcrumbs
202bf215546Sopenharmony_ci    * to non-growable CS.
203bf215546Sopenharmony_ci    */
204bf215546Sopenharmony_ci   if (cs->mode != TU_CS_MODE_GROW)
205bf215546Sopenharmony_ci      return;
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   struct tu_device *device = cs->device;
208bf215546Sopenharmony_ci   struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
209bf215546Sopenharmony_ci   if (!ctx || ctx->thread_stop)
210bf215546Sopenharmony_ci      return;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   bool before_packet = (cnt != 0);
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   if (before_packet) {
215bf215546Sopenharmony_ci      switch (opcode) {
216bf215546Sopenharmony_ci      case CP_EXEC_CS_INDIRECT:
217bf215546Sopenharmony_ci      case CP_EXEC_CS:
218bf215546Sopenharmony_ci      case CP_DRAW_INDX:
219bf215546Sopenharmony_ci      case CP_DRAW_INDX_OFFSET:
220bf215546Sopenharmony_ci      case CP_DRAW_INDIRECT:
221bf215546Sopenharmony_ci      case CP_DRAW_INDX_INDIRECT:
222bf215546Sopenharmony_ci      case CP_DRAW_INDIRECT_MULTI:
223bf215546Sopenharmony_ci      case CP_DRAW_AUTO:
224bf215546Sopenharmony_ci      case CP_BLIT:
225bf215546Sopenharmony_ci         // case CP_SET_DRAW_STATE:
226bf215546Sopenharmony_ci         // case CP_LOAD_STATE6_FRAG:
227bf215546Sopenharmony_ci         // case CP_LOAD_STATE6_GEOM:
228bf215546Sopenharmony_ci         break;
229bf215546Sopenharmony_ci      default:
230bf215546Sopenharmony_ci         return;
231bf215546Sopenharmony_ci      };
232bf215546Sopenharmony_ci   } else {
233bf215546Sopenharmony_ci      assert(cs->breadcrumb_emit_after == 0);
234bf215546Sopenharmony_ci   }
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx);
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   if (ctx->breadcrumb_breakpoint != -1 &&
239bf215546Sopenharmony_ci       current_breadcrumb < ctx->breadcrumb_breakpoint)
240bf215546Sopenharmony_ci      return;
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci   emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
243bf215546Sopenharmony_ci   emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
244bf215546Sopenharmony_ci   emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   emit_pkt7(cs, CP_MEM_WRITE, 3);
247bf215546Sopenharmony_ci   tu_cs_emit_qw(
248bf215546Sopenharmony_ci      cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno));
249bf215546Sopenharmony_ci   tu_cs_emit(cs, current_breadcrumb);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   /* Wait until CPU acknowledges the value written by GPU */
252bf215546Sopenharmony_ci   emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
253bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
254bf215546Sopenharmony_ci                     CP_WAIT_REG_MEM_0_POLL_MEMORY);
255bf215546Sopenharmony_ci   tu_cs_emit_qw(
256bf215546Sopenharmony_ci      cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno));
257bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb));
258bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0));
259bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci   if (before_packet)
262bf215546Sopenharmony_ci      cs->breadcrumb_emit_after = cnt;
263bf215546Sopenharmony_ci}
264