1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7#include "fd_pps_driver.h" 8 9#include <cstring> 10#include <iostream> 11#include <perfetto.h> 12 13#include "pps/pps.h" 14#include "pps/pps_algorithm.h" 15 16namespace pps 17{ 18 19double 20safe_div(uint64_t a, uint64_t b) 21{ 22 if (b == 0) 23 return 0; 24 25 return a / static_cast<double>(b); 26} 27 28float 29percent(uint64_t a, uint64_t b) 30{ 31 /* Sometimes we get bogus values but we want for the timeline 32 * to look nice without higher than 100% values. 33 */ 34 if (b == 0 || a > b) 35 return 0; 36 37 return 100.f * (a / static_cast<double>(b)); 38} 39 40uint64_t 41FreedrenoDriver::get_min_sampling_period_ns() 42{ 43 return 100000; 44} 45 46/* 47TODO this sees like it would be largely the same for a5xx as well 48(ie. same countable names).. 49 */ 50void 51FreedrenoDriver::setup_a6xx_counters() 52{ 53 /* TODO is there a reason to want more than one group? */ 54 CounterGroup group = {}; 55 group.name = "counters"; 56 groups.clear(); 57 counters.clear(); 58 countables.clear(); 59 enabled_counters.clear(); 60 groups.emplace_back(std::move(group)); 61 62 /* 63 * Create the countables that we'll be using. 64 */ 65 66 auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT"); 67 auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES"); 68 auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS"); 69 auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES"); 70 auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS"); 71 72 auto PERF_TP_OUTPUT_PIXELS = countable("PERF_TP_OUTPUT_PIXELS"); 73 auto PERF_TP_OUTPUT_PIXELS_ANISO = countable("PERF_TP_OUTPUT_PIXELS_ANISO"); 74 auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR"); 75 auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT"); 76 auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD"); 77 78 auto PERF_TSE_INPUT_PRIM = countable("PERF_TSE_INPUT_PRIM"); 79 auto PERF_TSE_CLIPPED_PRIM = countable("PERF_TSE_CLIPPED_PRIM"); 80 auto PERF_TSE_TRIVAL_REJ_PRIM = countable("PERF_TSE_TRIVAL_REJ_PRIM"); 81 auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM"); 82 83 auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES"); 84 auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES"); 85 auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES"); 86 auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS"); 87 auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS"); 88 auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS"); 89 auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS"); 90 auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS"); 91 auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS"); 92 auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP"); 93 auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE"); 94 auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE"); 95 auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE"); 96 97 auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER"); 98 auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP"); 99 auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD"); 100 auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP"); 101 auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP"); 102 103 auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD"); 104 auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS"); 105 auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS"); 106 107 auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */ 108 109 auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS"); 110 auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY"); 111 112 /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */ 113 // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA"); 114 115 /* 116 * And then setup the derived counters that we are exporting to 117 * pps based on the captured countable values. 118 * 119 * We try to expose the same counters as blob: 120 * https://gpuinspector.dev/docs/gpu-counters/qualcomm 121 */ 122 123 counter("GPU Frequency", Counter::Units::Hertz, [=]() { 124 return PERF_CP_ALWAYS_COUNT / time; 125 } 126 ); 127 128 counter("GPU % Utilization", Counter::Units::Percent, [=]() { 129 return percent(PERF_CP_BUSY_CYCLES / time, max_freq); 130 } 131 ); 132 133 counter("TP L1 Cache Misses", Counter::Units::None, [=]() { 134 return PERF_TP_L1_CACHELINE_MISSES / time; 135 } 136 ); 137 138 counter("Shader Core Utilization", Counter::Units::Percent, [=]() { 139 return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores); 140 } 141 ); 142 143 /* TODO: verify */ 144 counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() { 145 return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores); 146 } 147 ); 148 149 /* TODO: verify */ 150 counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() { 151 return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores); 152 } 153 ); 154 155 counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() { 156 return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4); 157 } 158 ); 159 160 counter("% Texture L1 Miss", Counter::Units::Percent, [=]() { 161 return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS); 162 } 163 ); 164 165 counter("% Texture L2 Miss", Counter::Units::Percent, [=]() { 166 return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP); 167 } 168 ); 169 170 /* TODO: verify */ 171 counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() { 172 return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores); 173 } 174 ); 175 176 counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() { 177 return PERF_TSE_INPUT_PRIM * (1.f / time); 178 } 179 ); 180 181 counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() { 182 return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM); 183 } 184 ); 185 186 counter("% Prims Clipped", Counter::Units::Percent, [=]() { 187 return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM); 188 } 189 ); 190 191 counter("Average Vertices / Polygon", Counter::Units::None, [=]() { 192 return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM; 193 } 194 ); 195 196 counter("Reused Vertices / Second", Counter::Units::None, [=]() { 197 return PERF_PC_VERTEX_HITS * (1.f / time); 198 } 199 ); 200 201 counter("Average Polygon Area", Counter::Units::None, [=]() { 202 return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM); 203 } 204 ); 205 206 /* TODO: find formula */ 207 // counter("% Shaders Busy", Counter::Units::Percent, [=]() { 208 // return 100.0 * 0; 209 // } 210 // ); 211 212 counter("Vertices Shaded / Second", Counter::Units::None, [=]() { 213 return PERF_PC_VS_INVOCATIONS * (1.f / time); 214 } 215 ); 216 217 counter("Fragments Shaded / Second", Counter::Units::None, [=]() { 218 return PERF_HLSQ_QUADS * 4 * (1.f / time); 219 } 220 ); 221 222 counter("Vertex Instructions / Second", Counter::Units::None, [=]() { 223 return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS + 224 PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time); 225 } 226 ); 227 228 counter("Fragment Instructions / Second", Counter::Units::None, [=]() { 229 return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS + 230 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 + 231 PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time); 232 } 233 ); 234 235 counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() { 236 return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time); 237 } 238 ); 239 240 counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() { 241 return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time); 242 } 243 ); 244 245 counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() { 246 return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time); 247 } 248 ); 249 250 counter("Textures / Vertex", Counter::Units::None, [=]() { 251 return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS); 252 } 253 ); 254 255 counter("Textures / Fragment", Counter::Units::None, [=]() { 256 return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4); 257 } 258 ); 259 260 counter("ALU / Vertex", Counter::Units::None, [=]() { 261 return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS); 262 } 263 ); 264 265 counter("EFU / Vertex", Counter::Units::None, [=]() { 266 return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS); 267 } 268 ); 269 270 counter("ALU / Fragment", Counter::Units::None, [=]() { 271 return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS + 272 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS); 273 } 274 ); 275 276 counter("EFU / Fragment", Counter::Units::None, [=]() { 277 return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS); 278 } 279 ); 280 281 counter("% Time Shading Vertices", Counter::Units::Percent, [=]() { 282 return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE, 283 (PERF_SP_ANY_EU_WORKING_VS_STAGE + 284 PERF_SP_ANY_EU_WORKING_FS_STAGE + 285 PERF_SP_ANY_EU_WORKING_CS_STAGE)); 286 } 287 ); 288 289 counter("% Time Shading Fragments", Counter::Units::Percent, [=]() { 290 return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE, 291 (PERF_SP_ANY_EU_WORKING_VS_STAGE + 292 PERF_SP_ANY_EU_WORKING_FS_STAGE + 293 PERF_SP_ANY_EU_WORKING_CS_STAGE)); 294 } 295 ); 296 297 counter("% Time Compute", Counter::Units::Percent, [=]() { 298 return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE, 299 (PERF_SP_ANY_EU_WORKING_VS_STAGE + 300 PERF_SP_ANY_EU_WORKING_FS_STAGE + 301 PERF_SP_ANY_EU_WORKING_CS_STAGE)); 302 } 303 ); 304 305 counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() { 306 return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS + 307 PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS + 308 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64, 309 PERF_SP_BUSY_CYCLES); 310 } 311 ); 312 313 counter("% Time ALUs Working", Counter::Units::Percent, [=]() { 314 return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES); 315 } 316 ); 317 318 counter("% Time EFUs Working", Counter::Units::Percent, [=]() { 319 return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES); 320 } 321 ); 322 323 counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() { 324 return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS); 325 } 326 ); 327 328 counter("% Linear Filtered", Counter::Units::Percent, [=]() { 329 return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS); 330 } 331 ); 332 333 counter("% Nearest Filtered", Counter::Units::Percent, [=]() { 334 return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS); 335 } 336 ); 337 338 counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() { 339 return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS); 340 } 341 ); 342 343 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */ 344 // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() { 345 // return * (1.f / time); 346 // } 347 // ); 348 349 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */ 350 // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() { 351 // return * (1.f / time); 352 // } 353 // ); 354 355 /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */ 356 // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() { 357 // return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time); 358 // } 359 // ); 360 361 /* TODO: verify */ 362 counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() { 363 return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time); 364 } 365 ); 366 367 /* TODO: verify */ 368 counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() { 369 return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time); 370 } 371 ); 372 373 counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() { 374 return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4); 375 } 376 ); 377 378 counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() { 379 return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS); 380 } 381 ); 382 383 counter("Preemptions / second", Counter::Units::None, [=]() { 384 return PERF_CP_NUM_PREEMPTIONS * (1.f / time); 385 } 386 ); 387 388 counter("Avg Preemption Delay", Counter::Units::None, [=]() { 389 return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time); 390 } 391 ); 392} 393 394/** 395 * Generate an submit the cmdstream to configure the counter/countable 396 * muxing 397 */ 398void 399FreedrenoDriver::configure_counters(bool reset, bool wait) 400{ 401 struct fd_submit *submit = fd_submit_new(pipe); 402 enum fd_ringbuffer_flags flags = 403 (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); 404 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags); 405 406 for (auto countable : countables) 407 countable.configure(ring, reset); 408 409 struct fd_submit_fence fence = {}; 410 util_queue_fence_init(&fence.ready); 411 412 fd_submit_flush(submit, -1, &fence); 413 414 util_queue_fence_wait(&fence.ready); 415 416 fd_ringbuffer_del(ring); 417 fd_submit_del(submit); 418 419 if (wait) 420 fd_pipe_wait(pipe, &fence.fence); 421} 422 423/** 424 * Read the current counter values and record the time. 425 */ 426void 427FreedrenoDriver::collect_countables() 428{ 429 last_dump_ts = perfetto::base::GetBootTimeNs().count(); 430 431 for (auto countable : countables) 432 countable.collect(); 433} 434 435bool 436FreedrenoDriver::init_perfcnt() 437{ 438 uint64_t val; 439 440 dev = fd_device_new(drm_device.fd); 441 pipe = fd_pipe_new(dev, FD_PIPE_3D); 442 dev_id = fd_pipe_dev_id(pipe); 443 444 if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) { 445 PERFETTO_FATAL("Could not get MAX_FREQ"); 446 return false; 447 } 448 max_freq = val; 449 450 if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) { 451 PERFETTO_ILOG("Could not get SUSPEND_COUNT"); 452 } else { 453 suspend_count = val; 454 has_suspend_count = true; 455 } 456 457 fd_pipe_set_param(pipe, FD_SYSPROF, 1); 458 459 perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs); 460 if (num_perfcntrs == 0) { 461 PERFETTO_FATAL("No hw counters available"); 462 return false; 463 } 464 465 assigned_counters.resize(num_perfcntrs); 466 assigned_counters.assign(assigned_counters.size(), 0); 467 468 switch (fd_dev_gen(dev_id)) { 469 case 6: 470 setup_a6xx_counters(); 471 break; 472 default: 473 PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id)); 474 return false; 475 } 476 477 state.resize(next_countable_id); 478 479 for (auto countable : countables) 480 countable.resolve(); 481 482 info = fd_dev_info(dev_id); 483 484 io = fd_dt_find_io(); 485 if (!io) { 486 PERFETTO_FATAL("Could not map GPU I/O space"); 487 return false; 488 } 489 490 configure_counters(true, true); 491 collect_countables(); 492 493 return true; 494} 495 496void 497FreedrenoDriver::enable_counter(const uint32_t counter_id) 498{ 499 enabled_counters.push_back(counters[counter_id]); 500} 501 502void 503FreedrenoDriver::enable_all_counters() 504{ 505 enabled_counters.reserve(counters.size()); 506 for (auto &counter : counters) { 507 enabled_counters.push_back(counter); 508 } 509} 510 511void 512FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */) 513{ 514} 515 516bool 517FreedrenoDriver::dump_perfcnt() 518{ 519 if (has_suspend_count) { 520 uint64_t val; 521 522 fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val); 523 524 if (suspend_count != val) { 525 PERFETTO_ILOG("Device had suspended!"); 526 527 suspend_count = val; 528 529 configure_counters(true, true); 530 collect_countables(); 531 532 /* We aren't going to have anything sensible by comparing 533 * current values to values from prior to the suspend, so 534 * just skip this sampling period. 535 */ 536 return false; 537 } 538 } 539 540 auto last_ts = last_dump_ts; 541 542 /* Capture the timestamp from the *start* of the sampling period: */ 543 last_capture_ts = last_dump_ts; 544 545 collect_countables(); 546 547 auto elapsed_time_ns = last_dump_ts - last_ts; 548 549 time = (float)elapsed_time_ns / 1000000000.0; 550 551 /* On older kernels that dont' support querying the suspend- 552 * count, just send configuration cmdstream regularly to keep 553 * the GPU alive and correctly configured for the countables 554 * we want 555 */ 556 if (!has_suspend_count) { 557 configure_counters(false, false); 558 } 559 560 return true; 561} 562 563uint64_t FreedrenoDriver::next() 564{ 565 auto ret = last_capture_ts; 566 last_capture_ts = 0; 567 return ret; 568} 569 570void FreedrenoDriver::disable_perfcnt() 571{ 572 /* There isn't really any disable, only reconfiguring which countables 573 * get muxed to which counters 574 */ 575} 576 577/* 578 * Countable 579 */ 580 581FreedrenoDriver::Countable 582FreedrenoDriver::countable(std::string name) 583{ 584 auto countable = Countable(this, name); 585 countables.emplace_back(countable); 586 return countable; 587} 588 589FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name) 590 : id {d->next_countable_id++}, d {d}, name {name} 591{ 592} 593 594/* Emit register writes on ring to configure counter/countable muxing: */ 595void 596FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) 597{ 598 const struct fd_perfcntr_countable *countable = d->state[id].countable; 599 const struct fd_perfcntr_counter *counter = d->state[id].counter; 600 601 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); 602 603 if (counter->enable && reset) { 604 OUT_PKT4(ring, counter->enable, 1); 605 OUT_RING(ring, 0); 606 } 607 608 if (counter->clear && reset) { 609 OUT_PKT4(ring, counter->clear, 1); 610 OUT_RING(ring, 1); 611 612 OUT_PKT4(ring, counter->clear, 1); 613 OUT_RING(ring, 0); 614 } 615 616 OUT_PKT4(ring, counter->select_reg, 1); 617 OUT_RING(ring, countable->selector); 618 619 if (counter->enable && reset) { 620 OUT_PKT4(ring, counter->enable, 1); 621 OUT_RING(ring, 1); 622 } 623} 624 625/* Collect current counter value and calculate delta since last sample: */ 626void 627FreedrenoDriver::Countable::collect() 628{ 629 const struct fd_perfcntr_counter *counter = d->state[id].counter; 630 631 d->state[id].last_value = d->state[id].value; 632 633 uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo; 634 uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi; 635 636 uint32_t lo = *reg_lo; 637 uint32_t hi = *reg_hi; 638 639 d->state[id].value = lo | ((uint64_t)hi << 32); 640} 641 642/* Resolve the countable and assign next counter from it's group: */ 643void 644FreedrenoDriver::Countable::resolve() 645{ 646 for (unsigned i = 0; i < d->num_perfcntrs; i++) { 647 const struct fd_perfcntr_group *g = &d->perfcntrs[i]; 648 for (unsigned j = 0; j < g->num_countables; j++) { 649 const struct fd_perfcntr_countable *c = &g->countables[j]; 650 if (name == c->name) { 651 d->state[id].countable = c; 652 653 /* Assign a counter from the same group: */ 654 assert(d->assigned_counters[i] < g->num_counters); 655 d->state[id].counter = &g->counters[d->assigned_counters[i]++]; 656 657 std::cout << "Countable: " << name << ", group=" << g->name << 658 ", counter=" << d->assigned_counters[i] - 1 << "\n"; 659 660 return; 661 } 662 } 663 } 664 unreachable("no such countable!"); 665} 666 667uint64_t 668FreedrenoDriver::Countable::get_value() const 669{ 670 return d->state[id].value - d->state[id].last_value; 671} 672 673/* 674 * DerivedCounter 675 */ 676 677FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name, 678 Counter::Units units, 679 std::function<int64_t()> derive) 680 : Counter(d->next_counter_id++, name, 0) 681{ 682 std::cout << "DerivedCounter: " << name << ", id=" << id << "\n"; 683 this->units = units; 684 set_getter([=](const Counter &c, const Driver &d) { 685 return derive(); 686 } 687 ); 688} 689 690FreedrenoDriver::DerivedCounter 691FreedrenoDriver::counter(std::string name, Counter::Units units, 692 std::function<int64_t()> derive) 693{ 694 auto counter = DerivedCounter(this, name, units, derive); 695 counters.emplace_back(counter); 696 return counter; 697} 698 699uint32_t 700FreedrenoDriver::gpu_clock_id() const 701{ 702 return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; 703} 704 705uint64_t 706FreedrenoDriver::gpu_timestamp() const 707{ 708 return perfetto::base::GetBootTimeNs().count(); 709} 710 711} // namespace pps 712