xref: /third_party/mesa3d/src/util/perf/u_trace.h (revision bf215546)
1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2020 Google, Inc.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#ifndef _U_TRACE_H
25bf215546Sopenharmony_ci#define _U_TRACE_H
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <stdbool.h>
28bf215546Sopenharmony_ci#include <stdint.h>
29bf215546Sopenharmony_ci#include <stdio.h>
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "util/u_queue.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#ifdef __cplusplus
34bf215546Sopenharmony_ciextern "C" {
35bf215546Sopenharmony_ci#endif
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci/* A trace mechanism (very) loosely inspired by the linux kernel tracepoint
38bf215546Sopenharmony_ci * mechanism, in that it allows for defining driver specific (or common)
39bf215546Sopenharmony_ci * tracepoints, which generate 'trace_$name()' functions that can be
40bf215546Sopenharmony_ci * called at various points in commandstream emit.
41bf215546Sopenharmony_ci *
42bf215546Sopenharmony_ci * Currently a printf backend is implemented, but the expectation is to
43bf215546Sopenharmony_ci * also implement a perfetto backend for shipping out traces to a tool like
44bf215546Sopenharmony_ci * AGI.
45bf215546Sopenharmony_ci *
46bf215546Sopenharmony_ci * Notable differences:
47bf215546Sopenharmony_ci *
48bf215546Sopenharmony_ci *  - GPU timestamps!  A driver provided callback is used to emit timestamps
49bf215546Sopenharmony_ci *    to a buffer.  At a later point in time (when stalling to wait for the
50bf215546Sopenharmony_ci *    GPU is not required), the timestamps are re-united with the trace
51bf215546Sopenharmony_ci *    payload.  This makes the trace mechanism suitable for profiling.
52bf215546Sopenharmony_ci *
53bf215546Sopenharmony_ci *  - Instead of a systemwide trace ringbuffer, buffering of un-retired
54bf215546Sopenharmony_ci *    tracepoints is split into two stages.  Traces are emitted to a
55bf215546Sopenharmony_ci *    'u_trace' instance, and at a later time flushed to a 'u_trace_context'
56bf215546Sopenharmony_ci *    instance.  This avoids the requirement that commandstream containing
57bf215546Sopenharmony_ci *    tracepoints is emitted in the same order as it is generated.
58bf215546Sopenharmony_ci *
59bf215546Sopenharmony_ci *    If the hw has multiple parallel "engines" (for example, 3d/blit/compute)
60bf215546Sopenharmony_ci *    then a `u_trace_context` per-engine should be used.
61bf215546Sopenharmony_ci *
62bf215546Sopenharmony_ci *  - Unlike kernel tracepoints, u_trace tracepoints are defined in py
63bf215546Sopenharmony_ci *    from which header and src files are generated.  Since we already have
64bf215546Sopenharmony_ci *    a build dependency on python+mako, this gives more flexibility than
65bf215546Sopenharmony_ci *    clunky preprocessor macro magic.
66bf215546Sopenharmony_ci *
67bf215546Sopenharmony_ci */
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_cistruct u_trace_context;
70bf215546Sopenharmony_cistruct u_trace;
71bf215546Sopenharmony_cistruct u_trace_chunk;
72bf215546Sopenharmony_cistruct u_trace_printer;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci/**
75bf215546Sopenharmony_ci * Special reserved value to indicate that no timestamp was captured,
76bf215546Sopenharmony_ci * and that the timestamp of the previous trace should be reused.
77bf215546Sopenharmony_ci */
78bf215546Sopenharmony_ci#define U_TRACE_NO_TIMESTAMP ((uint64_t)0)
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci/**
81bf215546Sopenharmony_ci * Driver provided callback to create a timestamp buffer which will be
82bf215546Sopenharmony_ci * read by u_trace_read_ts function.
83bf215546Sopenharmony_ci */
84bf215546Sopenharmony_citypedef void* (*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
85bf215546Sopenharmony_ci      uint32_t timestamps_count);
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci/**
88bf215546Sopenharmony_ci * Driver provided callback to delete a timestamp buffer.
89bf215546Sopenharmony_ci */
90bf215546Sopenharmony_citypedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
91bf215546Sopenharmony_ci      void *timestamps);
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci/**
94bf215546Sopenharmony_ci * Driver provided callback to emit commands into the soecified command
95bf215546Sopenharmony_ci * stream to capture a 64b timestamp into the specified timestamps buffer,
96bf215546Sopenharmony_ci * at the specified index.
97bf215546Sopenharmony_ci *
98bf215546Sopenharmony_ci * The hw counter that the driver records should be something that runs at
99bf215546Sopenharmony_ci * a fixed rate, even as the GPU freq changes.  The same source used for
100bf215546Sopenharmony_ci * GL_TIMESTAMP queries should be appropriate.
101bf215546Sopenharmony_ci */
102bf215546Sopenharmony_citypedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs,
103bf215546Sopenharmony_ci                                  void *timestamps, unsigned idx,
104bf215546Sopenharmony_ci                                  bool end_of_pipe);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci/**
107bf215546Sopenharmony_ci * Driver provided callback to read back a previously recorded timestamp.
108bf215546Sopenharmony_ci * If necessary, this should block until the GPU has finished writing back
109bf215546Sopenharmony_ci * the timestamps.  (The timestamps will be read back in order, so it is
110bf215546Sopenharmony_ci * safe to only synchronize on idx==0.)
111bf215546Sopenharmony_ci *
112bf215546Sopenharmony_ci * flush_data is data provided by the driver via u_trace_flush.
113bf215546Sopenharmony_ci *
114bf215546Sopenharmony_ci * The returned timestamp should be in units of nanoseconds.  The same
115bf215546Sopenharmony_ci * timebase as GL_TIMESTAMP queries should be used.
116bf215546Sopenharmony_ci *
117bf215546Sopenharmony_ci * The driver can return the special U_TRACE_NO_TIMESTAMP value to indicate
118bf215546Sopenharmony_ci * that no timestamp was captured and the timestamp from the previous trace
119bf215546Sopenharmony_ci * will be re-used.  (The first trace in the u_trace buf may not do this.)
120bf215546Sopenharmony_ci * This allows the driver to detect cases where multiple tracepoints are
121bf215546Sopenharmony_ci * emitted with no other intervening cmdstream, to avoid pointlessly
122bf215546Sopenharmony_ci * capturing the same timestamp multiple times in a row.
123bf215546Sopenharmony_ci */
124bf215546Sopenharmony_citypedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx,
125bf215546Sopenharmony_ci      void *timestamps, unsigned idx, void *flush_data);
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci/**
128bf215546Sopenharmony_ci * Driver provided callback to delete flush data.
129bf215546Sopenharmony_ci */
130bf215546Sopenharmony_citypedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx,
131bf215546Sopenharmony_ci      void *flush_data);
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci/**
134bf215546Sopenharmony_ci * The trace context provides tracking for "in-flight" traces, once the
135bf215546Sopenharmony_ci * cmdstream that records timestamps has been flushed.
136bf215546Sopenharmony_ci */
137bf215546Sopenharmony_cistruct u_trace_context {
138bf215546Sopenharmony_ci   void *pctx;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   u_trace_create_ts_buffer  create_timestamp_buffer;
141bf215546Sopenharmony_ci   u_trace_delete_ts_buffer  delete_timestamp_buffer;
142bf215546Sopenharmony_ci   u_trace_record_ts         record_timestamp;
143bf215546Sopenharmony_ci   u_trace_read_ts           read_timestamp;
144bf215546Sopenharmony_ci   u_trace_delete_flush_data delete_flush_data;
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   FILE *out;
147bf215546Sopenharmony_ci   struct u_trace_printer *out_printer;
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   /* Once u_trace_flush() is called u_trace_chunk's are queued up to
150bf215546Sopenharmony_ci    * render tracepoints on a queue.  The per-chunk queue jobs block until
151bf215546Sopenharmony_ci    * timestamps are available.
152bf215546Sopenharmony_ci    */
153bf215546Sopenharmony_ci   struct util_queue queue;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci#ifdef HAVE_PERFETTO
156bf215546Sopenharmony_ci   /* node in global list of trace contexts. */
157bf215546Sopenharmony_ci   struct list_head node;
158bf215546Sopenharmony_ci#endif
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   /* State to accumulate time across N chunks associated with a single
161bf215546Sopenharmony_ci    * batch (u_trace).
162bf215546Sopenharmony_ci    */
163bf215546Sopenharmony_ci   uint64_t last_time_ns;
164bf215546Sopenharmony_ci   uint64_t first_time_ns;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   uint32_t frame_nr;
167bf215546Sopenharmony_ci   uint32_t batch_nr;
168bf215546Sopenharmony_ci   uint32_t event_nr;
169bf215546Sopenharmony_ci   bool start_of_frame;
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   /* list of unprocessed trace chunks in fifo order: */
172bf215546Sopenharmony_ci   struct list_head flushed_trace_chunks;
173bf215546Sopenharmony_ci};
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci/**
176bf215546Sopenharmony_ci * The u_trace ptr is passed as the first arg to generated tracepoints.
177bf215546Sopenharmony_ci * It provides buffering for tracepoint payload until the corresponding
178bf215546Sopenharmony_ci * driver cmdstream containing the emitted commands to capture is
179bf215546Sopenharmony_ci * flushed.
180bf215546Sopenharmony_ci *
181bf215546Sopenharmony_ci * Individual tracepoints emitted to u_trace are expected to be "executed"
182bf215546Sopenharmony_ci * (ie. timestamp captured) in FIFO order with respect to other tracepoints
183bf215546Sopenharmony_ci * emitted to the same u_trace.  But the order WRT other u_trace instances
184bf215546Sopenharmony_ci * is undefined util u_trace_flush().
185bf215546Sopenharmony_ci */
186bf215546Sopenharmony_cistruct u_trace {
187bf215546Sopenharmony_ci   struct u_trace_context *utctx;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   struct list_head trace_chunks;  /* list of unflushed trace chunks in fifo order */
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   bool enabled;
192bf215546Sopenharmony_ci};
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_civoid u_trace_context_init(struct u_trace_context *utctx,
195bf215546Sopenharmony_ci      void *pctx,
196bf215546Sopenharmony_ci      u_trace_create_ts_buffer   create_timestamp_buffer,
197bf215546Sopenharmony_ci      u_trace_delete_ts_buffer   delete_timestamp_buffer,
198bf215546Sopenharmony_ci      u_trace_record_ts          record_timestamp,
199bf215546Sopenharmony_ci      u_trace_read_ts            read_timestamp,
200bf215546Sopenharmony_ci      u_trace_delete_flush_data  delete_flush_data);
201bf215546Sopenharmony_civoid u_trace_context_fini(struct u_trace_context *utctx);
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci/**
204bf215546Sopenharmony_ci * Flush (trigger processing) of traces previously flushed to the trace-context
205bf215546Sopenharmony_ci * by u_trace_flush().
206bf215546Sopenharmony_ci *
207bf215546Sopenharmony_ci * This should typically be called in the driver's pctx->flush().
208bf215546Sopenharmony_ci */
209bf215546Sopenharmony_civoid u_trace_context_process(struct u_trace_context *utctx, bool eof);
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_civoid u_trace_init(struct u_trace *ut, struct u_trace_context *utctx);
212bf215546Sopenharmony_civoid u_trace_fini(struct u_trace *ut);
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_cibool u_trace_has_points(struct u_trace *ut);
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_cistruct u_trace_iterator
217bf215546Sopenharmony_ci{
218bf215546Sopenharmony_ci   struct u_trace *ut;
219bf215546Sopenharmony_ci   struct u_trace_chunk *chunk;
220bf215546Sopenharmony_ci   uint32_t event_idx;
221bf215546Sopenharmony_ci};
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_cistruct u_trace_iterator
224bf215546Sopenharmony_ciu_trace_begin_iterator(struct u_trace *ut);
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_cistruct u_trace_iterator
227bf215546Sopenharmony_ciu_trace_end_iterator(struct u_trace *ut);
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cibool
230bf215546Sopenharmony_ciu_trace_iterator_equal(struct u_trace_iterator a,
231bf215546Sopenharmony_ci                       struct u_trace_iterator b);
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_citypedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
234bf215546Sopenharmony_ci      void *cmdstream,
235bf215546Sopenharmony_ci      void *ts_from, uint32_t from_offset,
236bf215546Sopenharmony_ci      void *ts_to, uint32_t to_offset,
237bf215546Sopenharmony_ci      uint32_t count);
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci/**
240bf215546Sopenharmony_ci * Clones tracepoints range into target u_trace.
241bf215546Sopenharmony_ci * Provides callback for driver to copy timestamps on GPU from
242bf215546Sopenharmony_ci * one buffer to another.
243bf215546Sopenharmony_ci *
244bf215546Sopenharmony_ci * It allows:
245bf215546Sopenharmony_ci * - Tracing re-usable command buffer in Vulkan, by copying tracepoints
246bf215546Sopenharmony_ci *   each time it is submitted.
247bf215546Sopenharmony_ci * - Per-tile tracing for tiling GPUs, by copying a range of tracepoints
248bf215546Sopenharmony_ci *   corresponding to a tile.
249bf215546Sopenharmony_ci */
250bf215546Sopenharmony_civoid u_trace_clone_append(struct u_trace_iterator begin_it,
251bf215546Sopenharmony_ci                          struct u_trace_iterator end_it,
252bf215546Sopenharmony_ci                          struct u_trace *into,
253bf215546Sopenharmony_ci                          void *cmdstream,
254bf215546Sopenharmony_ci                          u_trace_copy_ts_buffer copy_ts_buffer);
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_civoid u_trace_disable_event_range(struct u_trace_iterator begin_it,
257bf215546Sopenharmony_ci                                 struct u_trace_iterator end_it);
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci/**
260bf215546Sopenharmony_ci * Flush traces to the parent trace-context.  At this point, the expectation
261bf215546Sopenharmony_ci * is that all the tracepoints are "executed" by the GPU following any previously
262bf215546Sopenharmony_ci * flushed u_trace batch.
263bf215546Sopenharmony_ci *
264bf215546Sopenharmony_ci * flush_data is a way for driver to pass additional data, which becomes available
265bf215546Sopenharmony_ci * only at the point of flush, to the u_trace_read_ts callback and perfetto.
266bf215546Sopenharmony_ci * The typical example of such data would be a fence to wait on in u_trace_read_ts,
267bf215546Sopenharmony_ci * and a submission_id to pass into perfetto.
268bf215546Sopenharmony_ci * The destruction of the data is done via u_trace_delete_flush_data.
269bf215546Sopenharmony_ci *
270bf215546Sopenharmony_ci * This should typically be called when the corresponding cmdstream (containing
271bf215546Sopenharmony_ci * the timestamp reads) is flushed to the kernel.
272bf215546Sopenharmony_ci */
273bf215546Sopenharmony_civoid u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data);
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci/**
276bf215546Sopenharmony_ci * Whether command buffers should be instrumented even if not collecting
277bf215546Sopenharmony_ci * traces.
278bf215546Sopenharmony_ci */
279bf215546Sopenharmony_ciextern bool ut_trace_instrument;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci#ifdef HAVE_PERFETTO
282bf215546Sopenharmony_ciextern int ut_perfetto_enabled;
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_civoid u_trace_perfetto_start(void);
285bf215546Sopenharmony_civoid u_trace_perfetto_stop(void);
286bf215546Sopenharmony_ci#else
287bf215546Sopenharmony_ci#  define ut_perfetto_enabled 0
288bf215546Sopenharmony_ci#endif
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_cistatic inline bool
291bf215546Sopenharmony_ciu_trace_context_actively_tracing(struct u_trace_context *utctx)
292bf215546Sopenharmony_ci{
293bf215546Sopenharmony_ci   return !!utctx->out || (ut_perfetto_enabled > 0);
294bf215546Sopenharmony_ci}
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_cistatic inline bool
297bf215546Sopenharmony_ciu_trace_context_instrumenting(struct u_trace_context *utctx)
298bf215546Sopenharmony_ci{
299bf215546Sopenharmony_ci   return !!utctx->out || ut_trace_instrument || (ut_perfetto_enabled > 0);
300bf215546Sopenharmony_ci}
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci#ifdef __cplusplus
303bf215546Sopenharmony_ci}
304bf215546Sopenharmony_ci#endif
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci#endif  /* _U_TRACE_H */
307