1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Google, Inc.
3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT
4bf215546Sopenharmony_ci */
5bf215546Sopenharmony_ci
6bf215546Sopenharmony_ci#include <perfetto.h>
7bf215546Sopenharmony_ci
8bf215546Sopenharmony_ci#include "tu_perfetto.h"
9bf215546Sopenharmony_ci
10bf215546Sopenharmony_ci#include "util/u_perfetto.h"
11bf215546Sopenharmony_ci#include "util/hash_table.h"
12bf215546Sopenharmony_ci
13bf215546Sopenharmony_ci#include "tu_tracepoints.h"
14bf215546Sopenharmony_ci#include "tu_tracepoints_perfetto.h"
15bf215546Sopenharmony_ci
16bf215546Sopenharmony_cistatic uint32_t gpu_clock_id;
17bf215546Sopenharmony_cistatic uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
18bf215546Sopenharmony_ci
19bf215546Sopenharmony_ci/**
20bf215546Sopenharmony_ci * The timestamp at the point where we first emitted the clock_sync..
21bf215546Sopenharmony_ci * this  will be a *later* timestamp that the first GPU traces (since
22bf215546Sopenharmony_ci * we capture the first clock_sync from the CPU *after* the first GPU
23bf215546Sopenharmony_ci * tracepoints happen).  To avoid confusing perfetto we need to drop
24bf215546Sopenharmony_ci * the GPU traces with timestamps before this.
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_cistatic uint64_t sync_gpu_ts;
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_cistatic uint64_t last_suspend_count;
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_cistatic uint64_t gpu_max_timestamp;
31bf215546Sopenharmony_cistatic uint64_t gpu_timestamp_offset;
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cistruct TuRenderpassIncrementalState {
34bf215546Sopenharmony_ci   bool was_cleared = true;
35bf215546Sopenharmony_ci};
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistruct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits {
38bf215546Sopenharmony_ci   using IncrementalStateType = TuRenderpassIncrementalState;
39bf215546Sopenharmony_ci};
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ciclass TuRenderpassDataSource : public perfetto::DataSource<TuRenderpassDataSource, TuRenderpassTraits> {
42bf215546Sopenharmony_cipublic:
43bf215546Sopenharmony_ci   void OnSetup(const SetupArgs &) override
44bf215546Sopenharmony_ci   {
45bf215546Sopenharmony_ci      // Use this callback to apply any custom configuration to your data source
46bf215546Sopenharmony_ci      // based on the TraceConfig in SetupArgs.
47bf215546Sopenharmony_ci   }
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci   void OnStart(const StartArgs &) override
50bf215546Sopenharmony_ci   {
51bf215546Sopenharmony_ci      // This notification can be used to initialize the GPU driver, enable
52bf215546Sopenharmony_ci      // counters, etc. StartArgs will contains the DataSourceDescriptor,
53bf215546Sopenharmony_ci      // which can be extended.
54bf215546Sopenharmony_ci      u_trace_perfetto_start();
55bf215546Sopenharmony_ci      PERFETTO_LOG("Tracing started");
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci      /* Note: clock_id's below 128 are reserved.. for custom clock sources,
58bf215546Sopenharmony_ci       * using the hash of a namespaced string is the recommended approach.
59bf215546Sopenharmony_ci       * See: https://perfetto.dev/docs/concepts/clock-sync
60bf215546Sopenharmony_ci       */
61bf215546Sopenharmony_ci      gpu_clock_id =
62bf215546Sopenharmony_ci         _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci      gpu_timestamp_offset = 0;
65bf215546Sopenharmony_ci      gpu_max_timestamp = 0;
66bf215546Sopenharmony_ci      last_suspend_count = 0;
67bf215546Sopenharmony_ci   }
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   void OnStop(const StopArgs &) override
70bf215546Sopenharmony_ci   {
71bf215546Sopenharmony_ci      PERFETTO_LOG("Tracing stopped");
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci      // Undo any initialization done in OnStart.
74bf215546Sopenharmony_ci      u_trace_perfetto_stop();
75bf215546Sopenharmony_ci      // TODO we should perhaps block until queued traces are flushed?
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci      Trace([](TuRenderpassDataSource::TraceContext ctx) {
78bf215546Sopenharmony_ci         auto packet = ctx.NewTracePacket();
79bf215546Sopenharmony_ci         packet->Finalize();
80bf215546Sopenharmony_ci         ctx.Flush();
81bf215546Sopenharmony_ci      });
82bf215546Sopenharmony_ci   }
83bf215546Sopenharmony_ci};
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ciPERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
86bf215546Sopenharmony_ciPERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource);
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_cistatic void
89bf215546Sopenharmony_cisend_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
90bf215546Sopenharmony_ci{
91bf215546Sopenharmony_ci   PERFETTO_LOG("Sending renderstage descriptors");
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   auto packet = ctx.NewTracePacket();
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   packet->set_timestamp(0);
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci   auto event = packet->set_gpu_render_stage_event();
98bf215546Sopenharmony_ci   event->set_gpu_id(0);
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   auto spec = event->set_specifications();
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
103bf215546Sopenharmony_ci      auto desc = spec->add_hw_queue();
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci      desc->set_name(queues[i].name);
106bf215546Sopenharmony_ci      desc->set_description(queues[i].desc);
107bf215546Sopenharmony_ci   }
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
110bf215546Sopenharmony_ci      auto desc = spec->add_stage();
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci      desc->set_name(stages[i].name);
113bf215546Sopenharmony_ci      if (stages[i].desc)
114bf215546Sopenharmony_ci         desc->set_description(stages[i].desc);
115bf215546Sopenharmony_ci   }
116bf215546Sopenharmony_ci}
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_cistatic void
119bf215546Sopenharmony_cistage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage)
120bf215546Sopenharmony_ci{
121bf215546Sopenharmony_ci   struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   p->start_ts[stage] = ts_ns;
124bf215546Sopenharmony_ci}
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_citypedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_cistatic void
129bf215546Sopenharmony_cistage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage,
130bf215546Sopenharmony_ci          uint32_t submission_id, const void* payload = nullptr,
131bf215546Sopenharmony_ci          trace_payload_as_extra_func payload_as_extra = nullptr)
132bf215546Sopenharmony_ci{
133bf215546Sopenharmony_ci   struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev);
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   /* If we haven't managed to calibrate the alignment between GPU and CPU
136bf215546Sopenharmony_ci    * timestamps yet, then skip this trace, otherwise perfetto won't know
137bf215546Sopenharmony_ci    * what to do with it.
138bf215546Sopenharmony_ci    */
139bf215546Sopenharmony_ci   if (!sync_gpu_ts)
140bf215546Sopenharmony_ci      return;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
143bf215546Sopenharmony_ci      if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
144bf215546Sopenharmony_ci         send_descriptors(tctx, p->start_ts[stage]);
145bf215546Sopenharmony_ci         state->was_cleared = false;
146bf215546Sopenharmony_ci      }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci      auto packet = tctx.NewTracePacket();
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci      gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_timestamp_offset);
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci      packet->set_timestamp(p->start_ts[stage] + gpu_timestamp_offset);
153bf215546Sopenharmony_ci      packet->set_timestamp_clock_id(gpu_clock_id);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci      auto event = packet->set_gpu_render_stage_event();
156bf215546Sopenharmony_ci      event->set_event_id(0); // ???
157bf215546Sopenharmony_ci      event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
158bf215546Sopenharmony_ci      event->set_duration(ts_ns - p->start_ts[stage]);
159bf215546Sopenharmony_ci      event->set_stage_id(stage);
160bf215546Sopenharmony_ci      event->set_context((uintptr_t)dev);
161bf215546Sopenharmony_ci      event->set_submission_id(submission_id);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci      if (payload && payload_as_extra) {
164bf215546Sopenharmony_ci         payload_as_extra(event, payload);
165bf215546Sopenharmony_ci      }
166bf215546Sopenharmony_ci   });
167bf215546Sopenharmony_ci}
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci#ifdef __cplusplus
170bf215546Sopenharmony_ciextern "C" {
171bf215546Sopenharmony_ci#endif
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_civoid
174bf215546Sopenharmony_citu_perfetto_init(void)
175bf215546Sopenharmony_ci{
176bf215546Sopenharmony_ci   util_perfetto_init();
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   perfetto::DataSourceDescriptor dsd;
179bf215546Sopenharmony_ci   dsd.set_name("gpu.renderstages.msm");
180bf215546Sopenharmony_ci   TuRenderpassDataSource::Register(dsd);
181bf215546Sopenharmony_ci}
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_cistatic void
184bf215546Sopenharmony_cisync_timestamp(struct tu_device *dev)
185bf215546Sopenharmony_ci{
186bf215546Sopenharmony_ci   uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
187bf215546Sopenharmony_ci   uint64_t gpu_ts = 0;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   if (cpu_ts < next_clock_sync_ns)
190bf215546Sopenharmony_ci      return;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci    if (tu_device_get_gpu_timestamp(dev, &gpu_ts)) {
193bf215546Sopenharmony_ci      PERFETTO_ELOG("Could not sync CPU and GPU clocks");
194bf215546Sopenharmony_ci      return;
195bf215546Sopenharmony_ci    }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   uint64_t current_suspend_count = 0;
198bf215546Sopenharmony_ci   /* If we fail to get it we will use a fallback */
199bf215546Sopenharmony_ci   tu_device_get_suspend_count(dev, &current_suspend_count);
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   /* convert GPU ts into ns: */
202bf215546Sopenharmony_ci   gpu_ts = tu_device_ticks_to_ns(dev, gpu_ts);
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   /* GPU timestamp is being reset after suspend-resume cycle.
205bf215546Sopenharmony_ci    * Perfetto requires clock snapshots to be monotonic,
206bf215546Sopenharmony_ci    * so we have to fix-up the time.
207bf215546Sopenharmony_ci    */
208bf215546Sopenharmony_ci   if (current_suspend_count != last_suspend_count) {
209bf215546Sopenharmony_ci      gpu_timestamp_offset = gpu_max_timestamp;
210bf215546Sopenharmony_ci      last_suspend_count = current_suspend_count;
211bf215546Sopenharmony_ci   }
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   gpu_ts += gpu_timestamp_offset;
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   /* Fallback check, detect non-monotonic cases which would happen
216bf215546Sopenharmony_ci    * if we cannot retrieve suspend count.
217bf215546Sopenharmony_ci    */
218bf215546Sopenharmony_ci   if (sync_gpu_ts > gpu_ts) {
219bf215546Sopenharmony_ci      gpu_ts += (gpu_max_timestamp - gpu_timestamp_offset);
220bf215546Sopenharmony_ci      gpu_timestamp_offset = gpu_max_timestamp;
221bf215546Sopenharmony_ci   }
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   if (sync_gpu_ts > gpu_ts) {
224bf215546Sopenharmony_ci      PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out");
225bf215546Sopenharmony_ci      return;
226bf215546Sopenharmony_ci   }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   gpu_max_timestamp = gpu_ts;
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
231bf215546Sopenharmony_ci      auto packet = tctx.NewTracePacket();
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci      packet->set_timestamp(cpu_ts);
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci      auto event = packet->set_clock_snapshot();
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci      {
238bf215546Sopenharmony_ci         auto clock = event->add_clocks();
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci         clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
241bf215546Sopenharmony_ci         clock->set_timestamp(cpu_ts);
242bf215546Sopenharmony_ci      }
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci      {
245bf215546Sopenharmony_ci         auto clock = event->add_clocks();
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci         clock->set_clock_id(gpu_clock_id);
248bf215546Sopenharmony_ci         clock->set_timestamp(gpu_ts);
249bf215546Sopenharmony_ci      }
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci      sync_gpu_ts = gpu_ts;
252bf215546Sopenharmony_ci      next_clock_sync_ns = cpu_ts + 30000000;
253bf215546Sopenharmony_ci   });
254bf215546Sopenharmony_ci}
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_cistatic void
257bf215546Sopenharmony_ciemit_submit_id(uint32_t submission_id)
258bf215546Sopenharmony_ci{
259bf215546Sopenharmony_ci   TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) {
260bf215546Sopenharmony_ci      auto packet = tctx.NewTracePacket();
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci      packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      auto event = packet->set_vulkan_api_event();
265bf215546Sopenharmony_ci      auto submit = event->set_vk_queue_submit();
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci      submit->set_submission_id(submission_id);
268bf215546Sopenharmony_ci   });
269bf215546Sopenharmony_ci}
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_civoid
272bf215546Sopenharmony_citu_perfetto_submit(struct tu_device *dev, uint32_t submission_id)
273bf215546Sopenharmony_ci{
274bf215546Sopenharmony_ci   /* sync_timestamp isn't free */
275bf215546Sopenharmony_ci   if (!ut_perfetto_enabled)
276bf215546Sopenharmony_ci      return;
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   sync_timestamp(dev);
279bf215546Sopenharmony_ci   emit_submit_id(submission_id);
280bf215546Sopenharmony_ci}
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci/*
283bf215546Sopenharmony_ci * Trace callbacks, called from u_trace once the timestamps from GPU have been
284bf215546Sopenharmony_ci * collected.
285bf215546Sopenharmony_ci */
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci#define CREATE_EVENT_CALLBACK(event_name, stage)                              \
288bf215546Sopenharmony_civoid                                                                          \
289bf215546Sopenharmony_citu_start_##event_name(struct tu_device *dev, uint64_t ts_ns,                  \
290bf215546Sopenharmony_ci                   const void *flush_data,                                    \
291bf215546Sopenharmony_ci                   const struct trace_start_##event_name *payload)            \
292bf215546Sopenharmony_ci{                                                                             \
293bf215546Sopenharmony_ci   stage_start(dev, ts_ns, stage);                                            \
294bf215546Sopenharmony_ci}                                                                             \
295bf215546Sopenharmony_ci                                                                              \
296bf215546Sopenharmony_civoid                                                                          \
297bf215546Sopenharmony_citu_end_##event_name(struct tu_device *dev, uint64_t ts_ns,                    \
298bf215546Sopenharmony_ci                   const void *flush_data,                                    \
299bf215546Sopenharmony_ci                   const struct trace_end_##event_name *payload)              \
300bf215546Sopenharmony_ci{                                                                             \
301bf215546Sopenharmony_ci   auto trace_flush_data = (const struct tu_u_trace_submission_data *) flush_data; \
302bf215546Sopenharmony_ci   uint32_t submission_id =                                                        \
303bf215546Sopenharmony_ci      tu_u_trace_submission_data_get_submit_id(trace_flush_data);                  \
304bf215546Sopenharmony_ci   stage_end(dev, ts_ns, stage, submission_id, payload,                            \
305bf215546Sopenharmony_ci      (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name);     \
306bf215546Sopenharmony_ci}
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(render_pass, SURFACE_STAGE_ID)
309bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID)
310bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
311bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
312bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
313bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
314bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
315bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
316bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
317bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID)
318bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID)
319bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID)
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci#ifdef __cplusplus
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci#endif
324