1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Google, Inc. 3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT 4bf215546Sopenharmony_ci */ 5bf215546Sopenharmony_ci 6bf215546Sopenharmony_ci#include <perfetto.h> 7bf215546Sopenharmony_ci 8bf215546Sopenharmony_ci#include "tu_perfetto.h" 9bf215546Sopenharmony_ci 10bf215546Sopenharmony_ci#include "util/u_perfetto.h" 11bf215546Sopenharmony_ci#include "util/hash_table.h" 12bf215546Sopenharmony_ci 13bf215546Sopenharmony_ci#include "tu_tracepoints.h" 14bf215546Sopenharmony_ci#include "tu_tracepoints_perfetto.h" 15bf215546Sopenharmony_ci 16bf215546Sopenharmony_cistatic uint32_t gpu_clock_id; 17bf215546Sopenharmony_cistatic uint64_t next_clock_sync_ns; /* cpu time of next clk sync */ 18bf215546Sopenharmony_ci 19bf215546Sopenharmony_ci/** 20bf215546Sopenharmony_ci * The timestamp at the point where we first emitted the clock_sync.. 21bf215546Sopenharmony_ci * this will be a *later* timestamp that the first GPU traces (since 22bf215546Sopenharmony_ci * we capture the first clock_sync from the CPU *after* the first GPU 23bf215546Sopenharmony_ci * tracepoints happen). To avoid confusing perfetto we need to drop 24bf215546Sopenharmony_ci * the GPU traces with timestamps before this. 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_cistatic uint64_t sync_gpu_ts; 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_cistatic uint64_t last_suspend_count; 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cistatic uint64_t gpu_max_timestamp; 31bf215546Sopenharmony_cistatic uint64_t gpu_timestamp_offset; 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cistruct TuRenderpassIncrementalState { 34bf215546Sopenharmony_ci bool was_cleared = true; 35bf215546Sopenharmony_ci}; 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cistruct TuRenderpassTraits : public perfetto::DefaultDataSourceTraits { 38bf215546Sopenharmony_ci using IncrementalStateType = TuRenderpassIncrementalState; 39bf215546Sopenharmony_ci}; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ciclass TuRenderpassDataSource : public perfetto::DataSource<TuRenderpassDataSource, TuRenderpassTraits> { 42bf215546Sopenharmony_cipublic: 43bf215546Sopenharmony_ci void OnSetup(const SetupArgs &) override 44bf215546Sopenharmony_ci { 45bf215546Sopenharmony_ci // Use this callback to apply any custom configuration to your data source 46bf215546Sopenharmony_ci // based on the TraceConfig in SetupArgs. 47bf215546Sopenharmony_ci } 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci void OnStart(const StartArgs &) override 50bf215546Sopenharmony_ci { 51bf215546Sopenharmony_ci // This notification can be used to initialize the GPU driver, enable 52bf215546Sopenharmony_ci // counters, etc. StartArgs will contains the DataSourceDescriptor, 53bf215546Sopenharmony_ci // which can be extended. 54bf215546Sopenharmony_ci u_trace_perfetto_start(); 55bf215546Sopenharmony_ci PERFETTO_LOG("Tracing started"); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci /* Note: clock_id's below 128 are reserved.. for custom clock sources, 58bf215546Sopenharmony_ci * using the hash of a namespaced string is the recommended approach. 59bf215546Sopenharmony_ci * See: https://perfetto.dev/docs/concepts/clock-sync 60bf215546Sopenharmony_ci */ 61bf215546Sopenharmony_ci gpu_clock_id = 62bf215546Sopenharmony_ci _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci gpu_timestamp_offset = 0; 65bf215546Sopenharmony_ci gpu_max_timestamp = 0; 66bf215546Sopenharmony_ci last_suspend_count = 0; 67bf215546Sopenharmony_ci } 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci void OnStop(const StopArgs &) override 70bf215546Sopenharmony_ci { 71bf215546Sopenharmony_ci PERFETTO_LOG("Tracing stopped"); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci // Undo any initialization done in OnStart. 74bf215546Sopenharmony_ci u_trace_perfetto_stop(); 75bf215546Sopenharmony_ci // TODO we should perhaps block until queued traces are flushed? 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci Trace([](TuRenderpassDataSource::TraceContext ctx) { 78bf215546Sopenharmony_ci auto packet = ctx.NewTracePacket(); 79bf215546Sopenharmony_ci packet->Finalize(); 80bf215546Sopenharmony_ci ctx.Flush(); 81bf215546Sopenharmony_ci }); 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci}; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ciPERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource); 86bf215546Sopenharmony_ciPERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(TuRenderpassDataSource); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic void 89bf215546Sopenharmony_cisend_descriptors(TuRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns) 90bf215546Sopenharmony_ci{ 91bf215546Sopenharmony_ci PERFETTO_LOG("Sending renderstage descriptors"); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci auto packet = ctx.NewTracePacket(); 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci packet->set_timestamp(0); 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci auto event = packet->set_gpu_render_stage_event(); 98bf215546Sopenharmony_ci event->set_gpu_id(0); 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci auto spec = event->set_specifications(); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) { 103bf215546Sopenharmony_ci auto desc = spec->add_hw_queue(); 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci desc->set_name(queues[i].name); 106bf215546Sopenharmony_ci desc->set_description(queues[i].desc); 107bf215546Sopenharmony_ci } 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) { 110bf215546Sopenharmony_ci auto desc = spec->add_stage(); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci desc->set_name(stages[i].name); 113bf215546Sopenharmony_ci if (stages[i].desc) 114bf215546Sopenharmony_ci desc->set_description(stages[i].desc); 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci} 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_cistatic void 119bf215546Sopenharmony_cistage_start(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage) 120bf215546Sopenharmony_ci{ 121bf215546Sopenharmony_ci struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev); 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci p->start_ts[stage] = ts_ns; 124bf215546Sopenharmony_ci} 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_citypedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic void 129bf215546Sopenharmony_cistage_end(struct tu_device *dev, uint64_t ts_ns, enum tu_stage_id stage, 130bf215546Sopenharmony_ci uint32_t submission_id, const void* payload = nullptr, 131bf215546Sopenharmony_ci trace_payload_as_extra_func payload_as_extra = nullptr) 132bf215546Sopenharmony_ci{ 133bf215546Sopenharmony_ci struct tu_perfetto_state *p = tu_device_get_perfetto_state(dev); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci /* If we haven't managed to calibrate the alignment between GPU and CPU 136bf215546Sopenharmony_ci * timestamps yet, then skip this trace, otherwise perfetto won't know 137bf215546Sopenharmony_ci * what to do with it. 138bf215546Sopenharmony_ci */ 139bf215546Sopenharmony_ci if (!sync_gpu_ts) 140bf215546Sopenharmony_ci return; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { 143bf215546Sopenharmony_ci if (auto state = tctx.GetIncrementalState(); state->was_cleared) { 144bf215546Sopenharmony_ci send_descriptors(tctx, p->start_ts[stage]); 145bf215546Sopenharmony_ci state->was_cleared = false; 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci auto packet = tctx.NewTracePacket(); 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci gpu_max_timestamp = MAX2(gpu_max_timestamp, ts_ns + gpu_timestamp_offset); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci packet->set_timestamp(p->start_ts[stage] + gpu_timestamp_offset); 153bf215546Sopenharmony_ci packet->set_timestamp_clock_id(gpu_clock_id); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci auto event = packet->set_gpu_render_stage_event(); 156bf215546Sopenharmony_ci event->set_event_id(0); // ??? 157bf215546Sopenharmony_ci event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID); 158bf215546Sopenharmony_ci event->set_duration(ts_ns - p->start_ts[stage]); 159bf215546Sopenharmony_ci event->set_stage_id(stage); 160bf215546Sopenharmony_ci event->set_context((uintptr_t)dev); 161bf215546Sopenharmony_ci event->set_submission_id(submission_id); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci if (payload && payload_as_extra) { 164bf215546Sopenharmony_ci payload_as_extra(event, payload); 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci }); 167bf215546Sopenharmony_ci} 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci#ifdef __cplusplus 170bf215546Sopenharmony_ciextern "C" { 171bf215546Sopenharmony_ci#endif 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_civoid 174bf215546Sopenharmony_citu_perfetto_init(void) 175bf215546Sopenharmony_ci{ 176bf215546Sopenharmony_ci util_perfetto_init(); 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci perfetto::DataSourceDescriptor dsd; 179bf215546Sopenharmony_ci dsd.set_name("gpu.renderstages.msm"); 180bf215546Sopenharmony_ci TuRenderpassDataSource::Register(dsd); 181bf215546Sopenharmony_ci} 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_cistatic void 184bf215546Sopenharmony_cisync_timestamp(struct tu_device *dev) 185bf215546Sopenharmony_ci{ 186bf215546Sopenharmony_ci uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); 187bf215546Sopenharmony_ci uint64_t gpu_ts = 0; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci if (cpu_ts < next_clock_sync_ns) 190bf215546Sopenharmony_ci return; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (tu_device_get_gpu_timestamp(dev, &gpu_ts)) { 193bf215546Sopenharmony_ci PERFETTO_ELOG("Could not sync CPU and GPU clocks"); 194bf215546Sopenharmony_ci return; 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci uint64_t current_suspend_count = 0; 198bf215546Sopenharmony_ci /* If we fail to get it we will use a fallback */ 199bf215546Sopenharmony_ci tu_device_get_suspend_count(dev, ¤t_suspend_count); 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci /* convert GPU ts into ns: */ 202bf215546Sopenharmony_ci gpu_ts = tu_device_ticks_to_ns(dev, gpu_ts); 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci /* GPU timestamp is being reset after suspend-resume cycle. 205bf215546Sopenharmony_ci * Perfetto requires clock snapshots to be monotonic, 206bf215546Sopenharmony_ci * so we have to fix-up the time. 207bf215546Sopenharmony_ci */ 208bf215546Sopenharmony_ci if (current_suspend_count != last_suspend_count) { 209bf215546Sopenharmony_ci gpu_timestamp_offset = gpu_max_timestamp; 210bf215546Sopenharmony_ci last_suspend_count = current_suspend_count; 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci gpu_ts += gpu_timestamp_offset; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci /* Fallback check, detect non-monotonic cases which would happen 216bf215546Sopenharmony_ci * if we cannot retrieve suspend count. 217bf215546Sopenharmony_ci */ 218bf215546Sopenharmony_ci if (sync_gpu_ts > gpu_ts) { 219bf215546Sopenharmony_ci gpu_ts += (gpu_max_timestamp - gpu_timestamp_offset); 220bf215546Sopenharmony_ci gpu_timestamp_offset = gpu_max_timestamp; 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci if (sync_gpu_ts > gpu_ts) { 224bf215546Sopenharmony_ci PERFETTO_ELOG("Non-monotonic gpu timestamp detected, bailing out"); 225bf215546Sopenharmony_ci return; 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci gpu_max_timestamp = gpu_ts; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { 231bf215546Sopenharmony_ci auto packet = tctx.NewTracePacket(); 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci packet->set_timestamp(cpu_ts); 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci auto event = packet->set_clock_snapshot(); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci { 238bf215546Sopenharmony_ci auto clock = event->add_clocks(); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 241bf215546Sopenharmony_ci clock->set_timestamp(cpu_ts); 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci { 245bf215546Sopenharmony_ci auto clock = event->add_clocks(); 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci clock->set_clock_id(gpu_clock_id); 248bf215546Sopenharmony_ci clock->set_timestamp(gpu_ts); 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci sync_gpu_ts = gpu_ts; 252bf215546Sopenharmony_ci next_clock_sync_ns = cpu_ts + 30000000; 253bf215546Sopenharmony_ci }); 254bf215546Sopenharmony_ci} 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_cistatic void 257bf215546Sopenharmony_ciemit_submit_id(uint32_t submission_id) 258bf215546Sopenharmony_ci{ 259bf215546Sopenharmony_ci TuRenderpassDataSource::Trace([=](TuRenderpassDataSource::TraceContext tctx) { 260bf215546Sopenharmony_ci auto packet = tctx.NewTracePacket(); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci packet->set_timestamp(perfetto::base::GetBootTimeNs().count()); 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci auto event = packet->set_vulkan_api_event(); 265bf215546Sopenharmony_ci auto submit = event->set_vk_queue_submit(); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci submit->set_submission_id(submission_id); 268bf215546Sopenharmony_ci }); 269bf215546Sopenharmony_ci} 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_civoid 272bf215546Sopenharmony_citu_perfetto_submit(struct tu_device *dev, uint32_t submission_id) 273bf215546Sopenharmony_ci{ 274bf215546Sopenharmony_ci /* sync_timestamp isn't free */ 275bf215546Sopenharmony_ci if (!ut_perfetto_enabled) 276bf215546Sopenharmony_ci return; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci sync_timestamp(dev); 279bf215546Sopenharmony_ci emit_submit_id(submission_id); 280bf215546Sopenharmony_ci} 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci/* 283bf215546Sopenharmony_ci * Trace callbacks, called from u_trace once the timestamps from GPU have been 284bf215546Sopenharmony_ci * collected. 285bf215546Sopenharmony_ci */ 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci#define CREATE_EVENT_CALLBACK(event_name, stage) \ 288bf215546Sopenharmony_civoid \ 289bf215546Sopenharmony_citu_start_##event_name(struct tu_device *dev, uint64_t ts_ns, \ 290bf215546Sopenharmony_ci const void *flush_data, \ 291bf215546Sopenharmony_ci const struct trace_start_##event_name *payload) \ 292bf215546Sopenharmony_ci{ \ 293bf215546Sopenharmony_ci stage_start(dev, ts_ns, stage); \ 294bf215546Sopenharmony_ci} \ 295bf215546Sopenharmony_ci \ 296bf215546Sopenharmony_civoid \ 297bf215546Sopenharmony_citu_end_##event_name(struct tu_device *dev, uint64_t ts_ns, \ 298bf215546Sopenharmony_ci const void *flush_data, \ 299bf215546Sopenharmony_ci const struct trace_end_##event_name *payload) \ 300bf215546Sopenharmony_ci{ \ 301bf215546Sopenharmony_ci auto trace_flush_data = (const struct tu_u_trace_submission_data *) flush_data; \ 302bf215546Sopenharmony_ci uint32_t submission_id = \ 303bf215546Sopenharmony_ci tu_u_trace_submission_data_get_submit_id(trace_flush_data); \ 304bf215546Sopenharmony_ci stage_end(dev, ts_ns, stage, submission_id, payload, \ 305bf215546Sopenharmony_ci (trace_payload_as_extra_func) &trace_payload_as_extra_end_##event_name); \ 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(render_pass, SURFACE_STAGE_ID) 309bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(binning_ib, BINNING_STAGE_ID) 310bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID) 311bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID) 312bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID) 313bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID) 314bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID) 315bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID) 316bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID) 317bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID) 318bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID) 319bf215546Sopenharmony_ciCREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID) 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci#ifdef __cplusplus 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci#endif 324