1/* 2 * Copyright © 2019-2021 Collabora, Ltd. 3 * Author: Antonio Caggiano <antonio.caggiano@collabora.com> 4 * Author: Rohan Garg <rohan.garg@collabora.com> 5 * Author: Robert Beckett <bob.beckett@collabora.com> 6 * Author: Corentin Noël <corentin.noel@collabora.com> 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 11#include "pps_datasource.h" 12#include "pps_driver.h" 13 14#include <condition_variable> 15#include <thread> 16#include <variant> 17 18// Minimum supported sampling period in nanoseconds 19#define MIN_SAMPLING_PERIOD_NS 50000 20 21#define CORRELATION_TIMESTAMP_PERIOD (1000000000ull) 22 23namespace pps 24{ 25static std::string driver_name; 26 27/// Synchronize access to started_cv and started 28static std::mutex started_m; 29static std::condition_variable started_cv; 30static bool started = false; 31 32float ms(const std::chrono::nanoseconds &t) 33{ 34 return t.count() / 1000000.0f; 35} 36 37void GpuDataSource::OnSetup(const SetupArgs &args) 38{ 39 // Create drivers for all supported devices 40 auto drm_devices = DrmDevice::create_all(); 41 for (auto &drm_device : drm_devices) { 42 if (drm_device.name != driver_name) 43 continue; 44 45 if (auto driver = Driver::get_driver(std::move(drm_device))) { 46 if (!driver->init_perfcnt()) { 47 // Skip failing driver 48 PPS_LOG_ERROR("Failed to initialize %s driver", driver->drm_device.name.c_str()); 49 continue; 50 } 51 52 this->driver = driver; 53 } 54 } 55 if (driver == nullptr) { 56 PPS_LOG_FATAL("No DRM devices supported"); 57 } 58 59 // Parse perfetto config 60 const std::string &config_raw = args.config->gpu_counter_config_raw(); 61 perfetto::protos::pbzero::GpuCounterConfig::Decoder config(config_raw); 62 63 if (config.has_counter_ids()) { 64 // Get enabled counters 65 PPS_LOG_IMPORTANT("Selecting counters"); 66 for (auto it = config.counter_ids(); it; ++it) { 67 uint32_t counter_id = it->as_uint32(); 68 driver->enable_counter(counter_id); 69 } 70 } else { 71 // Enable all counters 72 driver->enable_all_counters(); 73 } 74 75 // Get sampling period 76 auto min_sampling_period = std::chrono::nanoseconds(MIN_SAMPLING_PERIOD_NS); 77 78 auto dev_supported = std::chrono::nanoseconds(driver->get_min_sampling_period_ns()); 79 if (dev_supported > min_sampling_period) { 80 min_sampling_period = dev_supported; 81 } 82 83 time_to_sleep = std::max(time_to_sleep, min_sampling_period); 84 85 if (config.has_counter_period_ns()) { 86 auto requested_sampling_period = std::chrono::nanoseconds(config.counter_period_ns()); 87 if (requested_sampling_period < min_sampling_period) { 88 PPS_LOG_ERROR("Sampling period should be greater than %" PRIu64 " ns (%.2f ms)", 89 uint64_t(min_sampling_period.count()), 90 ms(min_sampling_period)); 91 } else { 92 time_to_sleep = requested_sampling_period; 93 } 94 } 95 PPS_LOG("Sampling period set to %" PRIu64 " ns", uint64_t(time_to_sleep.count())); 96} 97 98void GpuDataSource::OnStart(const StartArgs &args) 99{ 100 driver->enable_perfcnt(time_to_sleep.count()); 101 102 state = State::Start; 103 104 { 105 std::lock_guard<std::mutex> lock(started_m); 106 started = true; 107 } 108 started_cv.notify_all(); 109} 110 111void close_callback(GpuDataSource::TraceContext ctx) 112{ 113 auto packet = ctx.NewTracePacket(); 114 packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 115 packet->set_timestamp(perfetto::base::GetBootTimeNs().count()); 116 packet->Finalize(); 117 ctx.Flush(); 118 PPS_LOG("Context flushed"); 119} 120 121void GpuDataSource::OnStop(const StopArgs &args) 122{ 123 state = State::Stop; 124 auto stop_closure = args.HandleStopAsynchronously(); 125 Trace(close_callback); 126 stop_closure(); 127 128 driver->disable_perfcnt(); 129 driver = nullptr; 130 131 std::lock_guard<std::mutex> lock(started_m); 132 started = false; 133} 134 135void GpuDataSource::wait_started() 136{ 137 std::unique_lock<std::mutex> lock(started_m); 138 if (!started) { 139 PPS_LOG("Waiting for start"); 140 started_cv.wait(lock, [] { return started; }); 141 } 142} 143 144void GpuDataSource::register_data_source(const std::string &_driver_name) 145{ 146 driver_name = _driver_name; 147 static perfetto::DataSourceDescriptor dsd; 148 dsd.set_name("gpu.counters." + driver_name); 149 Register(dsd); 150} 151 152void add_group(perfetto::protos::pbzero::GpuCounterDescriptor *desc, 153 const CounterGroup &group, 154 const std::string &prefix, 155 int32_t gpu_num) 156{ 157 if (!group.counters.empty()) { 158 // Define a block for each group containing counters 159 auto block_desc = desc->add_blocks(); 160 block_desc->set_name(prefix + "." + group.name); 161 block_desc->set_block_id(group.id); 162 163 // Associate counters to blocks 164 for (auto id : group.counters) { 165 block_desc->add_counter_ids(id); 166 } 167 } 168 169 for (auto const &sub : group.subgroups) { 170 // Perfetto doesnt currently support nested groups. 171 // Flatten group hierarchy, using dot separator 172 add_group(desc, sub, prefix + "." + group.name, gpu_num); 173 } 174} 175 176void add_descriptors(perfetto::protos::pbzero::GpuCounterEvent *event, 177 std::vector<CounterGroup> const &groups, 178 std::vector<Counter> const &counters, 179 Driver &driver) 180{ 181 // Start a counter descriptor 182 auto desc = event->set_counter_descriptor(); 183 184 // Add the groups 185 for (auto const &group : groups) { 186 add_group(desc, group, driver.drm_device.name, driver.drm_device.gpu_num); 187 } 188 189 // Add the counters 190 for (auto const &counter : counters) { 191 auto spec = desc->add_specs(); 192 spec->set_counter_id(counter.id); 193 spec->set_name(counter.name); 194 195 auto units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE; 196 switch (counter.units) { 197 case Counter::Units::Percent: 198 units = perfetto::protos::pbzero::GpuCounterDescriptor::PERCENT; 199 break; 200 case Counter::Units::Byte: 201 units = perfetto::protos::pbzero::GpuCounterDescriptor::BYTE; 202 break; 203 case Counter::Units::Hertz: 204 units = perfetto::protos::pbzero::GpuCounterDescriptor::HERTZ; 205 break; 206 case Counter::Units::None: 207 units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE; 208 break; 209 default: 210 assert(false && "Missing counter units type!"); 211 break; 212 } 213 spec->add_numerator_units(units); 214 } 215} 216 217void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver &driver) 218{ 219 if (driver.enabled_counters.size() == 0) { 220 PPS_LOG_FATAL("There are no counters enabled"); 221 } 222 223 for (const auto &counter : driver.enabled_counters) { 224 auto counter_event = event.add_counters(); 225 226 counter_event->set_counter_id(counter.id); 227 228 auto value = counter.get_value(driver); 229 if (auto d_value = std::get_if<double>(&value)) { 230 counter_event->set_double_value(*d_value); 231 } else if (auto i_value = std::get_if<int64_t>(&value)) { 232 counter_event->set_int_value(*i_value); 233 } else { 234 PPS_LOG_ERROR("Failed to get value for counter %s", counter.name.c_str()); 235 } 236 } 237} 238 239void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver *driver) 240{ 241 uint32_t gpu_clock_id = driver->gpu_clock_id(); 242 if (perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME == gpu_clock_id) 243 return; 244 245 // Send a correlation event between GPU & CPU timestamps 246 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); 247 uint64_t gpu_ts = driver->gpu_timestamp(); 248 249 { 250 auto clock = event->add_clocks(); 251 252 clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 253 clock->set_timestamp(cpu_ts); 254 } 255 256 { 257 auto clock = event->add_clocks(); 258 259 clock->set_clock_id(gpu_clock_id); 260 clock->set_timestamp(gpu_ts); 261 } 262} 263 264void GpuDataSource::trace(TraceContext &ctx) 265{ 266 using namespace perfetto::protos::pbzero; 267 268 if (auto state = ctx.GetIncrementalState(); state->was_cleared) { 269 descriptor_timestamp = perfetto::base::GetBootTimeNs().count(); 270 271 { 272 // Mark any incremental state before this point invalid 273 auto packet = ctx.NewTracePacket(); 274 packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 275 packet->set_timestamp(descriptor_timestamp); 276 packet->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED); 277 } 278 279 descriptor_timestamp = perfetto::base::GetBootTimeNs().count(); 280 { 281 // Counter descriptions 282 auto packet = ctx.NewTracePacket(); 283 packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 284 packet->set_timestamp(descriptor_timestamp); 285 auto event = packet->set_gpu_counter_event(); 286 event->set_gpu_id(driver->drm_device.gpu_num); 287 288 auto &groups = driver->groups; 289 auto &counters = driver->enabled_counters; 290 add_descriptors(event, groups, counters, *driver); 291 } 292 293 { 294 // Initial timestamp correlation event 295 auto packet = ctx.NewTracePacket(); 296 packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 297 packet->set_timestamp(descriptor_timestamp); 298 last_correlation_timestamp = perfetto::base::GetBootTimeNs().count(); 299 auto event = packet->set_clock_snapshot(); 300 add_timestamp(event, driver); 301 } 302 303 // Capture GPU timestamp of the first packet. Anything prior to this can 304 // be discarded. 305 descriptor_gpu_timestamp = driver->gpu_timestamp(); 306 state->was_cleared = false; 307 } 308 309 // Save current scheduler for restoring later 310 int prev_sched_policy = sched_getscheduler(0); 311 sched_param prev_priority_param; 312 sched_getparam(0, &prev_priority_param); 313 314 // Use FIFO policy to avoid preemption while collecting counters 315 int sched_policy = SCHED_FIFO; 316 // Do not use max priority to avoid starving migration and watchdog threads 317 int priority_value = sched_get_priority_max(sched_policy) - 1; 318 sched_param priority_param { priority_value }; 319 sched_setscheduler(0, sched_policy, &priority_param); 320 321 if (driver->dump_perfcnt()) { 322 while (auto gpu_timestamp = driver->next()) { 323 if (gpu_timestamp <= descriptor_gpu_timestamp) { 324 // Do not send counter values before counter descriptors 325 PPS_LOG_ERROR("Skipping counter values coming before descriptors"); 326 continue; 327 } 328 329 auto packet = ctx.NewTracePacket(); 330 packet->set_timestamp_clock_id(driver->gpu_clock_id()); 331 packet->set_timestamp(gpu_timestamp); 332 333 auto event = packet->set_gpu_counter_event(); 334 event->set_gpu_id(driver->drm_device.gpu_num); 335 336 add_samples(*event, *driver); 337 } 338 } 339 340 uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count(); 341 if ((cpu_ts - last_correlation_timestamp) > CORRELATION_TIMESTAMP_PERIOD) { 342 auto packet = ctx.NewTracePacket(); 343 packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); 344 packet->set_timestamp(cpu_ts); 345 auto event = packet->set_clock_snapshot(); 346 add_timestamp(event, driver); 347 last_correlation_timestamp = cpu_ts; 348 } 349 350 // Reset normal scheduler 351 sched_setscheduler(0, prev_sched_policy, &prev_priority_param); 352} 353 354void GpuDataSource::trace_callback(TraceContext ctx) 355{ 356 using namespace std::chrono; 357 358 nanoseconds sleep_time = nanoseconds(0); 359 360 if (auto data_source = ctx.GetDataSourceLocked()) { 361 if (data_source->time_to_sleep > data_source->time_to_trace) { 362 sleep_time = data_source->time_to_sleep - data_source->time_to_trace; 363 } 364 } 365 366 // Wait sampling period before tracing 367 std::this_thread::sleep_for(sleep_time); 368 369 auto time_zero = perfetto::base::GetBootTimeNs(); 370 if (auto data_source = ctx.GetDataSourceLocked()) { 371 // Check data source is still running 372 if (data_source->state == pps::State::Start) { 373 data_source->trace(ctx); 374 data_source->time_to_trace = perfetto::base::GetBootTimeNs() - time_zero; 375 } 376 } else { 377 PPS_LOG("Tracing finished"); 378 } 379} 380 381} // namespace pps 382