1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2020 Google, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef _U_TRACE_H 25bf215546Sopenharmony_ci#define _U_TRACE_H 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <stdbool.h> 28bf215546Sopenharmony_ci#include <stdint.h> 29bf215546Sopenharmony_ci#include <stdio.h> 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "util/u_queue.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#ifdef __cplusplus 34bf215546Sopenharmony_ciextern "C" { 35bf215546Sopenharmony_ci#endif 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci/* A trace mechanism (very) loosely inspired by the linux kernel tracepoint 38bf215546Sopenharmony_ci * mechanism, in that it allows for defining driver specific (or common) 39bf215546Sopenharmony_ci * tracepoints, which generate 'trace_$name()' functions that can be 40bf215546Sopenharmony_ci * called at various points in commandstream emit. 41bf215546Sopenharmony_ci * 42bf215546Sopenharmony_ci * Currently a printf backend is implemented, but the expectation is to 43bf215546Sopenharmony_ci * also implement a perfetto backend for shipping out traces to a tool like 44bf215546Sopenharmony_ci * AGI. 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * Notable differences: 47bf215546Sopenharmony_ci * 48bf215546Sopenharmony_ci * - GPU timestamps! A driver provided callback is used to emit timestamps 49bf215546Sopenharmony_ci * to a buffer. At a later point in time (when stalling to wait for the 50bf215546Sopenharmony_ci * GPU is not required), the timestamps are re-united with the trace 51bf215546Sopenharmony_ci * payload. This makes the trace mechanism suitable for profiling. 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci * - Instead of a systemwide trace ringbuffer, buffering of un-retired 54bf215546Sopenharmony_ci * tracepoints is split into two stages. Traces are emitted to a 55bf215546Sopenharmony_ci * 'u_trace' instance, and at a later time flushed to a 'u_trace_context' 56bf215546Sopenharmony_ci * instance. This avoids the requirement that commandstream containing 57bf215546Sopenharmony_ci * tracepoints is emitted in the same order as it is generated. 58bf215546Sopenharmony_ci * 59bf215546Sopenharmony_ci * If the hw has multiple parallel "engines" (for example, 3d/blit/compute) 60bf215546Sopenharmony_ci * then a `u_trace_context` per-engine should be used. 61bf215546Sopenharmony_ci * 62bf215546Sopenharmony_ci * - Unlike kernel tracepoints, u_trace tracepoints are defined in py 63bf215546Sopenharmony_ci * from which header and src files are generated. Since we already have 64bf215546Sopenharmony_ci * a build dependency on python+mako, this gives more flexibility than 65bf215546Sopenharmony_ci * clunky preprocessor macro magic. 66bf215546Sopenharmony_ci * 67bf215546Sopenharmony_ci */ 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_cistruct u_trace_context; 70bf215546Sopenharmony_cistruct u_trace; 71bf215546Sopenharmony_cistruct u_trace_chunk; 72bf215546Sopenharmony_cistruct u_trace_printer; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci/** 75bf215546Sopenharmony_ci * Special reserved value to indicate that no timestamp was captured, 76bf215546Sopenharmony_ci * and that the timestamp of the previous trace should be reused. 77bf215546Sopenharmony_ci */ 78bf215546Sopenharmony_ci#define U_TRACE_NO_TIMESTAMP ((uint64_t)0) 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci/** 81bf215546Sopenharmony_ci * Driver provided callback to create a timestamp buffer which will be 82bf215546Sopenharmony_ci * read by u_trace_read_ts function. 83bf215546Sopenharmony_ci */ 84bf215546Sopenharmony_citypedef void* (*u_trace_create_ts_buffer)(struct u_trace_context *utctx, 85bf215546Sopenharmony_ci uint32_t timestamps_count); 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci/** 88bf215546Sopenharmony_ci * Driver provided callback to delete a timestamp buffer. 89bf215546Sopenharmony_ci */ 90bf215546Sopenharmony_citypedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx, 91bf215546Sopenharmony_ci void *timestamps); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci/** 94bf215546Sopenharmony_ci * Driver provided callback to emit commands into the soecified command 95bf215546Sopenharmony_ci * stream to capture a 64b timestamp into the specified timestamps buffer, 96bf215546Sopenharmony_ci * at the specified index. 97bf215546Sopenharmony_ci * 98bf215546Sopenharmony_ci * The hw counter that the driver records should be something that runs at 99bf215546Sopenharmony_ci * a fixed rate, even as the GPU freq changes. The same source used for 100bf215546Sopenharmony_ci * GL_TIMESTAMP queries should be appropriate. 101bf215546Sopenharmony_ci */ 102bf215546Sopenharmony_citypedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs, 103bf215546Sopenharmony_ci void *timestamps, unsigned idx, 104bf215546Sopenharmony_ci bool end_of_pipe); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci/** 107bf215546Sopenharmony_ci * Driver provided callback to read back a previously recorded timestamp. 108bf215546Sopenharmony_ci * If necessary, this should block until the GPU has finished writing back 109bf215546Sopenharmony_ci * the timestamps. (The timestamps will be read back in order, so it is 110bf215546Sopenharmony_ci * safe to only synchronize on idx==0.) 111bf215546Sopenharmony_ci * 112bf215546Sopenharmony_ci * flush_data is data provided by the driver via u_trace_flush. 113bf215546Sopenharmony_ci * 114bf215546Sopenharmony_ci * The returned timestamp should be in units of nanoseconds. The same 115bf215546Sopenharmony_ci * timebase as GL_TIMESTAMP queries should be used. 116bf215546Sopenharmony_ci * 117bf215546Sopenharmony_ci * The driver can return the special U_TRACE_NO_TIMESTAMP value to indicate 118bf215546Sopenharmony_ci * that no timestamp was captured and the timestamp from the previous trace 119bf215546Sopenharmony_ci * will be re-used. (The first trace in the u_trace buf may not do this.) 120bf215546Sopenharmony_ci * This allows the driver to detect cases where multiple tracepoints are 121bf215546Sopenharmony_ci * emitted with no other intervening cmdstream, to avoid pointlessly 122bf215546Sopenharmony_ci * capturing the same timestamp multiple times in a row. 123bf215546Sopenharmony_ci */ 124bf215546Sopenharmony_citypedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx, 125bf215546Sopenharmony_ci void *timestamps, unsigned idx, void *flush_data); 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci/** 128bf215546Sopenharmony_ci * Driver provided callback to delete flush data. 129bf215546Sopenharmony_ci */ 130bf215546Sopenharmony_citypedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx, 131bf215546Sopenharmony_ci void *flush_data); 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci/** 134bf215546Sopenharmony_ci * The trace context provides tracking for "in-flight" traces, once the 135bf215546Sopenharmony_ci * cmdstream that records timestamps has been flushed. 136bf215546Sopenharmony_ci */ 137bf215546Sopenharmony_cistruct u_trace_context { 138bf215546Sopenharmony_ci void *pctx; 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci u_trace_create_ts_buffer create_timestamp_buffer; 141bf215546Sopenharmony_ci u_trace_delete_ts_buffer delete_timestamp_buffer; 142bf215546Sopenharmony_ci u_trace_record_ts record_timestamp; 143bf215546Sopenharmony_ci u_trace_read_ts read_timestamp; 144bf215546Sopenharmony_ci u_trace_delete_flush_data delete_flush_data; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci FILE *out; 147bf215546Sopenharmony_ci struct u_trace_printer *out_printer; 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci /* Once u_trace_flush() is called u_trace_chunk's are queued up to 150bf215546Sopenharmony_ci * render tracepoints on a queue. The per-chunk queue jobs block until 151bf215546Sopenharmony_ci * timestamps are available. 152bf215546Sopenharmony_ci */ 153bf215546Sopenharmony_ci struct util_queue queue; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci#ifdef HAVE_PERFETTO 156bf215546Sopenharmony_ci /* node in global list of trace contexts. */ 157bf215546Sopenharmony_ci struct list_head node; 158bf215546Sopenharmony_ci#endif 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci /* State to accumulate time across N chunks associated with a single 161bf215546Sopenharmony_ci * batch (u_trace). 162bf215546Sopenharmony_ci */ 163bf215546Sopenharmony_ci uint64_t last_time_ns; 164bf215546Sopenharmony_ci uint64_t first_time_ns; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci uint32_t frame_nr; 167bf215546Sopenharmony_ci uint32_t batch_nr; 168bf215546Sopenharmony_ci uint32_t event_nr; 169bf215546Sopenharmony_ci bool start_of_frame; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci /* list of unprocessed trace chunks in fifo order: */ 172bf215546Sopenharmony_ci struct list_head flushed_trace_chunks; 173bf215546Sopenharmony_ci}; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci/** 176bf215546Sopenharmony_ci * The u_trace ptr is passed as the first arg to generated tracepoints. 177bf215546Sopenharmony_ci * It provides buffering for tracepoint payload until the corresponding 178bf215546Sopenharmony_ci * driver cmdstream containing the emitted commands to capture is 179bf215546Sopenharmony_ci * flushed. 180bf215546Sopenharmony_ci * 181bf215546Sopenharmony_ci * Individual tracepoints emitted to u_trace are expected to be "executed" 182bf215546Sopenharmony_ci * (ie. timestamp captured) in FIFO order with respect to other tracepoints 183bf215546Sopenharmony_ci * emitted to the same u_trace. But the order WRT other u_trace instances 184bf215546Sopenharmony_ci * is undefined util u_trace_flush(). 185bf215546Sopenharmony_ci */ 186bf215546Sopenharmony_cistruct u_trace { 187bf215546Sopenharmony_ci struct u_trace_context *utctx; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci struct list_head trace_chunks; /* list of unflushed trace chunks in fifo order */ 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci bool enabled; 192bf215546Sopenharmony_ci}; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_civoid u_trace_context_init(struct u_trace_context *utctx, 195bf215546Sopenharmony_ci void *pctx, 196bf215546Sopenharmony_ci u_trace_create_ts_buffer create_timestamp_buffer, 197bf215546Sopenharmony_ci u_trace_delete_ts_buffer delete_timestamp_buffer, 198bf215546Sopenharmony_ci u_trace_record_ts record_timestamp, 199bf215546Sopenharmony_ci u_trace_read_ts read_timestamp, 200bf215546Sopenharmony_ci u_trace_delete_flush_data delete_flush_data); 201bf215546Sopenharmony_civoid u_trace_context_fini(struct u_trace_context *utctx); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci/** 204bf215546Sopenharmony_ci * Flush (trigger processing) of traces previously flushed to the trace-context 205bf215546Sopenharmony_ci * by u_trace_flush(). 206bf215546Sopenharmony_ci * 207bf215546Sopenharmony_ci * This should typically be called in the driver's pctx->flush(). 208bf215546Sopenharmony_ci */ 209bf215546Sopenharmony_civoid u_trace_context_process(struct u_trace_context *utctx, bool eof); 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_civoid u_trace_init(struct u_trace *ut, struct u_trace_context *utctx); 212bf215546Sopenharmony_civoid u_trace_fini(struct u_trace *ut); 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_cibool u_trace_has_points(struct u_trace *ut); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_cistruct u_trace_iterator 217bf215546Sopenharmony_ci{ 218bf215546Sopenharmony_ci struct u_trace *ut; 219bf215546Sopenharmony_ci struct u_trace_chunk *chunk; 220bf215546Sopenharmony_ci uint32_t event_idx; 221bf215546Sopenharmony_ci}; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_cistruct u_trace_iterator 224bf215546Sopenharmony_ciu_trace_begin_iterator(struct u_trace *ut); 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_cistruct u_trace_iterator 227bf215546Sopenharmony_ciu_trace_end_iterator(struct u_trace *ut); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_cibool 230bf215546Sopenharmony_ciu_trace_iterator_equal(struct u_trace_iterator a, 231bf215546Sopenharmony_ci struct u_trace_iterator b); 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_citypedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx, 234bf215546Sopenharmony_ci void *cmdstream, 235bf215546Sopenharmony_ci void *ts_from, uint32_t from_offset, 236bf215546Sopenharmony_ci void *ts_to, uint32_t to_offset, 237bf215546Sopenharmony_ci uint32_t count); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci/** 240bf215546Sopenharmony_ci * Clones tracepoints range into target u_trace. 241bf215546Sopenharmony_ci * Provides callback for driver to copy timestamps on GPU from 242bf215546Sopenharmony_ci * one buffer to another. 243bf215546Sopenharmony_ci * 244bf215546Sopenharmony_ci * It allows: 245bf215546Sopenharmony_ci * - Tracing re-usable command buffer in Vulkan, by copying tracepoints 246bf215546Sopenharmony_ci * each time it is submitted. 247bf215546Sopenharmony_ci * - Per-tile tracing for tiling GPUs, by copying a range of tracepoints 248bf215546Sopenharmony_ci * corresponding to a tile. 249bf215546Sopenharmony_ci */ 250bf215546Sopenharmony_civoid u_trace_clone_append(struct u_trace_iterator begin_it, 251bf215546Sopenharmony_ci struct u_trace_iterator end_it, 252bf215546Sopenharmony_ci struct u_trace *into, 253bf215546Sopenharmony_ci void *cmdstream, 254bf215546Sopenharmony_ci u_trace_copy_ts_buffer copy_ts_buffer); 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_civoid u_trace_disable_event_range(struct u_trace_iterator begin_it, 257bf215546Sopenharmony_ci struct u_trace_iterator end_it); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci/** 260bf215546Sopenharmony_ci * Flush traces to the parent trace-context. At this point, the expectation 261bf215546Sopenharmony_ci * is that all the tracepoints are "executed" by the GPU following any previously 262bf215546Sopenharmony_ci * flushed u_trace batch. 263bf215546Sopenharmony_ci * 264bf215546Sopenharmony_ci * flush_data is a way for driver to pass additional data, which becomes available 265bf215546Sopenharmony_ci * only at the point of flush, to the u_trace_read_ts callback and perfetto. 266bf215546Sopenharmony_ci * The typical example of such data would be a fence to wait on in u_trace_read_ts, 267bf215546Sopenharmony_ci * and a submission_id to pass into perfetto. 268bf215546Sopenharmony_ci * The destruction of the data is done via u_trace_delete_flush_data. 269bf215546Sopenharmony_ci * 270bf215546Sopenharmony_ci * This should typically be called when the corresponding cmdstream (containing 271bf215546Sopenharmony_ci * the timestamp reads) is flushed to the kernel. 272bf215546Sopenharmony_ci */ 273bf215546Sopenharmony_civoid u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data); 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci/** 276bf215546Sopenharmony_ci * Whether command buffers should be instrumented even if not collecting 277bf215546Sopenharmony_ci * traces. 278bf215546Sopenharmony_ci */ 279bf215546Sopenharmony_ciextern bool ut_trace_instrument; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci#ifdef HAVE_PERFETTO 282bf215546Sopenharmony_ciextern int ut_perfetto_enabled; 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_civoid u_trace_perfetto_start(void); 285bf215546Sopenharmony_civoid u_trace_perfetto_stop(void); 286bf215546Sopenharmony_ci#else 287bf215546Sopenharmony_ci# define ut_perfetto_enabled 0 288bf215546Sopenharmony_ci#endif 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_cistatic inline bool 291bf215546Sopenharmony_ciu_trace_context_actively_tracing(struct u_trace_context *utctx) 292bf215546Sopenharmony_ci{ 293bf215546Sopenharmony_ci return !!utctx->out || (ut_perfetto_enabled > 0); 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_cistatic inline bool 297bf215546Sopenharmony_ciu_trace_context_instrumenting(struct u_trace_context *utctx) 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci return !!utctx->out || ut_trace_instrument || (ut_perfetto_enabled > 0); 300bf215546Sopenharmony_ci} 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci#ifdef __cplusplus 303bf215546Sopenharmony_ci} 304bf215546Sopenharmony_ci#endif 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci#endif /* _U_TRACE_H */ 307