1/*
2 * Copyright © 2019-2021 Collabora, Ltd.
3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
4 * Author: Rohan Garg <rohan.garg@collabora.com>
5 * Author: Robert Beckett <bob.beckett@collabora.com>
6 * Author: Corentin Noël <corentin.noel@collabora.com>
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11#include "pps_datasource.h"
12#include "pps_driver.h"
13
14#include <condition_variable>
15#include <thread>
16#include <variant>
17
18// Minimum supported sampling period in nanoseconds
19#define MIN_SAMPLING_PERIOD_NS 50000
20
21#define CORRELATION_TIMESTAMP_PERIOD (1000000000ull)
22
23namespace pps
24{
25static std::string driver_name;
26
27/// Synchronize access to started_cv and started
28static std::mutex started_m;
29static std::condition_variable started_cv;
30static bool started = false;
31
32float ms(const std::chrono::nanoseconds &t)
33{
34   return t.count() / 1000000.0f;
35}
36
37void GpuDataSource::OnSetup(const SetupArgs &args)
38{
39   // Create drivers for all supported devices
40   auto drm_devices = DrmDevice::create_all();
41   for (auto &drm_device : drm_devices) {
42      if (drm_device.name != driver_name)
43         continue;
44
45      if (auto driver = Driver::get_driver(std::move(drm_device))) {
46         if (!driver->init_perfcnt()) {
47            // Skip failing driver
48            PPS_LOG_ERROR("Failed to initialize %s driver", driver->drm_device.name.c_str());
49            continue;
50         }
51
52         this->driver = driver;
53      }
54   }
55   if (driver == nullptr) {
56      PPS_LOG_FATAL("No DRM devices supported");
57   }
58
59   // Parse perfetto config
60   const std::string &config_raw = args.config->gpu_counter_config_raw();
61   perfetto::protos::pbzero::GpuCounterConfig::Decoder config(config_raw);
62
63   if (config.has_counter_ids()) {
64      // Get enabled counters
65      PPS_LOG_IMPORTANT("Selecting counters");
66      for (auto it = config.counter_ids(); it; ++it) {
67         uint32_t counter_id = it->as_uint32();
68         driver->enable_counter(counter_id);
69      }
70   } else {
71      // Enable all counters
72      driver->enable_all_counters();
73   }
74
75   // Get sampling period
76   auto min_sampling_period = std::chrono::nanoseconds(MIN_SAMPLING_PERIOD_NS);
77
78   auto dev_supported = std::chrono::nanoseconds(driver->get_min_sampling_period_ns());
79   if (dev_supported > min_sampling_period) {
80      min_sampling_period = dev_supported;
81   }
82
83   time_to_sleep = std::max(time_to_sleep, min_sampling_period);
84
85   if (config.has_counter_period_ns()) {
86      auto requested_sampling_period = std::chrono::nanoseconds(config.counter_period_ns());
87      if (requested_sampling_period < min_sampling_period) {
88         PPS_LOG_ERROR("Sampling period should be greater than %" PRIu64 " ns (%.2f ms)",
89            uint64_t(min_sampling_period.count()),
90            ms(min_sampling_period));
91      } else {
92         time_to_sleep = requested_sampling_period;
93      }
94   }
95   PPS_LOG("Sampling period set to %" PRIu64 " ns", uint64_t(time_to_sleep.count()));
96}
97
98void GpuDataSource::OnStart(const StartArgs &args)
99{
100   driver->enable_perfcnt(time_to_sleep.count());
101
102   state = State::Start;
103
104   {
105      std::lock_guard<std::mutex> lock(started_m);
106      started = true;
107   }
108   started_cv.notify_all();
109}
110
111void close_callback(GpuDataSource::TraceContext ctx)
112{
113   auto packet = ctx.NewTracePacket();
114   packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
115   packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
116   packet->Finalize();
117   ctx.Flush();
118   PPS_LOG("Context flushed");
119}
120
121void GpuDataSource::OnStop(const StopArgs &args)
122{
123   state = State::Stop;
124   auto stop_closure = args.HandleStopAsynchronously();
125   Trace(close_callback);
126   stop_closure();
127
128   driver->disable_perfcnt();
129   driver = nullptr;
130
131   std::lock_guard<std::mutex> lock(started_m);
132   started = false;
133}
134
135void GpuDataSource::wait_started()
136{
137   std::unique_lock<std::mutex> lock(started_m);
138   if (!started) {
139      PPS_LOG("Waiting for start");
140      started_cv.wait(lock, [] { return started; });
141   }
142}
143
144void GpuDataSource::register_data_source(const std::string &_driver_name)
145{
146   driver_name = _driver_name;
147   static perfetto::DataSourceDescriptor dsd;
148   dsd.set_name("gpu.counters." + driver_name);
149   Register(dsd);
150}
151
152void add_group(perfetto::protos::pbzero::GpuCounterDescriptor *desc,
153   const CounterGroup &group,
154   const std::string &prefix,
155   int32_t gpu_num)
156{
157   if (!group.counters.empty()) {
158      // Define a block for each group containing counters
159      auto block_desc = desc->add_blocks();
160      block_desc->set_name(prefix + "." + group.name);
161      block_desc->set_block_id(group.id);
162
163      // Associate counters to blocks
164      for (auto id : group.counters) {
165         block_desc->add_counter_ids(id);
166      }
167   }
168
169   for (auto const &sub : group.subgroups) {
170      // Perfetto doesnt currently support nested groups.
171      // Flatten group hierarchy, using dot separator
172      add_group(desc, sub, prefix + "." + group.name, gpu_num);
173   }
174}
175
176void add_descriptors(perfetto::protos::pbzero::GpuCounterEvent *event,
177   std::vector<CounterGroup> const &groups,
178   std::vector<Counter> const &counters,
179   Driver &driver)
180{
181   // Start a counter descriptor
182   auto desc = event->set_counter_descriptor();
183
184   // Add the groups
185   for (auto const &group : groups) {
186      add_group(desc, group, driver.drm_device.name, driver.drm_device.gpu_num);
187   }
188
189   // Add the counters
190   for (auto const &counter : counters) {
191      auto spec = desc->add_specs();
192      spec->set_counter_id(counter.id);
193      spec->set_name(counter.name);
194
195      auto units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
196      switch (counter.units) {
197      case Counter::Units::Percent:
198         units = perfetto::protos::pbzero::GpuCounterDescriptor::PERCENT;
199         break;
200      case Counter::Units::Byte:
201         units = perfetto::protos::pbzero::GpuCounterDescriptor::BYTE;
202         break;
203      case Counter::Units::Hertz:
204         units = perfetto::protos::pbzero::GpuCounterDescriptor::HERTZ;
205         break;
206      case Counter::Units::None:
207         units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
208         break;
209      default:
210         assert(false && "Missing counter units type!");
211         break;
212      }
213      spec->add_numerator_units(units);
214   }
215}
216
217void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver &driver)
218{
219   if (driver.enabled_counters.size() == 0) {
220      PPS_LOG_FATAL("There are no counters enabled");
221   }
222
223   for (const auto &counter : driver.enabled_counters) {
224      auto counter_event = event.add_counters();
225
226      counter_event->set_counter_id(counter.id);
227
228      auto value = counter.get_value(driver);
229      if (auto d_value = std::get_if<double>(&value)) {
230         counter_event->set_double_value(*d_value);
231      } else if (auto i_value = std::get_if<int64_t>(&value)) {
232         counter_event->set_int_value(*i_value);
233      } else {
234         PPS_LOG_ERROR("Failed to get value for counter %s", counter.name.c_str());
235      }
236   }
237}
238
239void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver *driver)
240{
241   uint32_t gpu_clock_id = driver->gpu_clock_id();
242   if (perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME == gpu_clock_id)
243      return;
244
245   // Send a correlation event between GPU & CPU timestamps
246   uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
247   uint64_t gpu_ts = driver->gpu_timestamp();
248
249   {
250      auto clock = event->add_clocks();
251
252      clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
253      clock->set_timestamp(cpu_ts);
254   }
255
256   {
257      auto clock = event->add_clocks();
258
259      clock->set_clock_id(gpu_clock_id);
260      clock->set_timestamp(gpu_ts);
261   }
262}
263
264void GpuDataSource::trace(TraceContext &ctx)
265{
266   using namespace perfetto::protos::pbzero;
267
268   if (auto state = ctx.GetIncrementalState(); state->was_cleared) {
269      descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
270
271      {
272         // Mark any incremental state before this point invalid
273         auto packet = ctx.NewTracePacket();
274         packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
275         packet->set_timestamp(descriptor_timestamp);
276         packet->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
277      }
278
279      descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
280      {
281         // Counter descriptions
282         auto packet = ctx.NewTracePacket();
283         packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
284         packet->set_timestamp(descriptor_timestamp);
285         auto event = packet->set_gpu_counter_event();
286         event->set_gpu_id(driver->drm_device.gpu_num);
287
288         auto &groups = driver->groups;
289         auto &counters = driver->enabled_counters;
290         add_descriptors(event, groups, counters, *driver);
291      }
292
293      {
294         // Initial timestamp correlation event
295         auto packet = ctx.NewTracePacket();
296         packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
297         packet->set_timestamp(descriptor_timestamp);
298         last_correlation_timestamp = perfetto::base::GetBootTimeNs().count();
299         auto event = packet->set_clock_snapshot();
300         add_timestamp(event, driver);
301      }
302
303      // Capture GPU timestamp of the first packet. Anything prior to this can
304      // be discarded.
305      descriptor_gpu_timestamp = driver->gpu_timestamp();
306      state->was_cleared = false;
307   }
308
309   // Save current scheduler for restoring later
310   int prev_sched_policy = sched_getscheduler(0);
311   sched_param prev_priority_param;
312   sched_getparam(0, &prev_priority_param);
313
314   // Use FIFO policy to avoid preemption while collecting counters
315   int sched_policy = SCHED_FIFO;
316   // Do not use max priority to avoid starving migration and watchdog threads
317   int priority_value = sched_get_priority_max(sched_policy) - 1;
318   sched_param priority_param { priority_value };
319   sched_setscheduler(0, sched_policy, &priority_param);
320
321   if (driver->dump_perfcnt()) {
322      while (auto gpu_timestamp = driver->next()) {
323         if (gpu_timestamp <= descriptor_gpu_timestamp) {
324            // Do not send counter values before counter descriptors
325            PPS_LOG_ERROR("Skipping counter values coming before descriptors");
326            continue;
327         }
328
329         auto packet = ctx.NewTracePacket();
330         packet->set_timestamp_clock_id(driver->gpu_clock_id());
331         packet->set_timestamp(gpu_timestamp);
332
333         auto event = packet->set_gpu_counter_event();
334         event->set_gpu_id(driver->drm_device.gpu_num);
335
336         add_samples(*event, *driver);
337      }
338   }
339
340   uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
341   if ((cpu_ts - last_correlation_timestamp) > CORRELATION_TIMESTAMP_PERIOD) {
342      auto packet = ctx.NewTracePacket();
343      packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
344      packet->set_timestamp(cpu_ts);
345      auto event = packet->set_clock_snapshot();
346      add_timestamp(event, driver);
347      last_correlation_timestamp = cpu_ts;
348   }
349
350   // Reset normal scheduler
351   sched_setscheduler(0, prev_sched_policy, &prev_priority_param);
352}
353
354void GpuDataSource::trace_callback(TraceContext ctx)
355{
356   using namespace std::chrono;
357
358   nanoseconds sleep_time = nanoseconds(0);
359
360   if (auto data_source = ctx.GetDataSourceLocked()) {
361      if (data_source->time_to_sleep > data_source->time_to_trace) {
362         sleep_time = data_source->time_to_sleep - data_source->time_to_trace;
363      }
364   }
365
366   // Wait sampling period before tracing
367   std::this_thread::sleep_for(sleep_time);
368
369   auto time_zero = perfetto::base::GetBootTimeNs();
370   if (auto data_source = ctx.GetDataSourceLocked()) {
371      // Check data source is still running
372      if (data_source->state == pps::State::Start) {
373         data_source->trace(ctx);
374         data_source->time_to_trace = perfetto::base::GetBootTimeNs() - time_zero;
375      }
376   } else {
377      PPS_LOG("Tracing finished");
378   }
379}
380
381} // namespace pps
382