1/*
2 * Copyright © 2021 Google, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7#include "fd_pps_driver.h"
8
9#include <cstring>
10#include <iostream>
11#include <perfetto.h>
12
13#include "pps/pps.h"
14#include "pps/pps_algorithm.h"
15
16namespace pps
17{
18
19double
20safe_div(uint64_t a, uint64_t b)
21{
22   if (b == 0)
23      return 0;
24
25   return a / static_cast<double>(b);
26}
27
28float
29percent(uint64_t a, uint64_t b)
30{
31   /* Sometimes we get bogus values but we want for the timeline
32    * to look nice without higher than 100% values.
33    */
34   if (b == 0 || a > b)
35      return 0;
36
37   return 100.f * (a / static_cast<double>(b));
38}
39
40uint64_t
41FreedrenoDriver::get_min_sampling_period_ns()
42{
43   return 100000;
44}
45
46/*
47TODO this sees like it would be largely the same for a5xx as well
48(ie. same countable names)..
49 */
50void
51FreedrenoDriver::setup_a6xx_counters()
52{
53   /* TODO is there a reason to want more than one group? */
54   CounterGroup group = {};
55   group.name = "counters";
56   groups.clear();
57   counters.clear();
58   countables.clear();
59   enabled_counters.clear();
60   groups.emplace_back(std::move(group));
61
62   /*
63    * Create the countables that we'll be using.
64    */
65
66   auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
67   auto PERF_CP_BUSY_CYCLES  = countable("PERF_CP_BUSY_CYCLES");
68   auto PERF_RB_3D_PIXELS    = countable("PERF_RB_3D_PIXELS");
69   auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
70   auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
71
72   auto PERF_TP_OUTPUT_PIXELS  = countable("PERF_TP_OUTPUT_PIXELS");
73   auto PERF_TP_OUTPUT_PIXELS_ANISO  = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
74   auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
75   auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
76   auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
77
78   auto PERF_TSE_INPUT_PRIM  = countable("PERF_TSE_INPUT_PRIM");
79   auto PERF_TSE_CLIPPED_PRIM  = countable("PERF_TSE_CLIPPED_PRIM");
80   auto PERF_TSE_TRIVAL_REJ_PRIM  = countable("PERF_TSE_TRIVAL_REJ_PRIM");
81   auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
82
83   auto PERF_SP_BUSY_CYCLES  = countable("PERF_SP_BUSY_CYCLES");
84   auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
85   auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
86   auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
87   auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
88   auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
89   auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
90   auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
91   auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
92   auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
93   auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
94   auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
95   auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
96
97   auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
98   auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
99   auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
100   auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
101   auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
102
103   auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
104   auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
105   auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
106
107   auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
108
109   auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
110   auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
111
112   /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
113   // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
114
115   /*
116    * And then setup the derived counters that we are exporting to
117    * pps based on the captured countable values.
118    *
119    * We try to expose the same counters as blob:
120    * https://gpuinspector.dev/docs/gpu-counters/qualcomm
121    */
122
123   counter("GPU Frequency", Counter::Units::Hertz, [=]() {
124         return PERF_CP_ALWAYS_COUNT / time;
125      }
126   );
127
128   counter("GPU % Utilization", Counter::Units::Percent, [=]() {
129         return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
130      }
131   );
132
133   counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
134         return PERF_TP_L1_CACHELINE_MISSES / time;
135      }
136   );
137
138   counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
139         return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
140      }
141   );
142
143   /* TODO: verify */
144   counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
145         return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
146      }
147   );
148
149   /* TODO: verify */
150   counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
151         return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
152      }
153   );
154
155   counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
156         return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
157      }
158   );
159
160   counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
161         return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
162      }
163   );
164
165   counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
166         return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
167      }
168   );
169
170   /* TODO: verify */
171   counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
172         return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
173      }
174   );
175
176   counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
177         return PERF_TSE_INPUT_PRIM * (1.f / time);
178      }
179   );
180
181   counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
182         return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
183      }
184   );
185
186   counter("% Prims Clipped", Counter::Units::Percent, [=]() {
187         return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
188      }
189   );
190
191   counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
192         return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
193      }
194   );
195
196   counter("Reused Vertices / Second", Counter::Units::None, [=]() {
197         return PERF_PC_VERTEX_HITS * (1.f / time);
198      }
199   );
200
201   counter("Average Polygon Area", Counter::Units::None, [=]() {
202         return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
203      }
204   );
205
206   /* TODO: find formula */
207   // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
208   //       return 100.0 * 0;
209   //    }
210   // );
211
212   counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
213         return PERF_PC_VS_INVOCATIONS * (1.f / time);
214      }
215   );
216
217   counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
218         return PERF_HLSQ_QUADS * 4 * (1.f / time);
219      }
220   );
221
222   counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
223         return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
224                 PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
225      }
226   );
227
228   counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
229         return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
230                 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
231                 PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
232      }
233   );
234
235   counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
236         return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
237      }
238   );
239
240   counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
241         return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
242      }
243   );
244
245   counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
246         return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
247      }
248   );
249
250   counter("Textures / Vertex", Counter::Units::None, [=]() {
251         return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
252      }
253   );
254
255   counter("Textures / Fragment", Counter::Units::None, [=]() {
256         return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
257      }
258   );
259
260   counter("ALU / Vertex", Counter::Units::None, [=]() {
261         return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
262      }
263   );
264
265   counter("EFU / Vertex", Counter::Units::None, [=]() {
266         return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
267      }
268   );
269
270   counter("ALU / Fragment", Counter::Units::None, [=]() {
271         return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
272                         PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
273      }
274   );
275
276   counter("EFU / Fragment", Counter::Units::None, [=]() {
277         return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
278      }
279   );
280
281   counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
282         return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
283                        (PERF_SP_ANY_EU_WORKING_VS_STAGE +
284                         PERF_SP_ANY_EU_WORKING_FS_STAGE +
285                         PERF_SP_ANY_EU_WORKING_CS_STAGE));
286      }
287   );
288
289   counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
290         return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
291                        (PERF_SP_ANY_EU_WORKING_VS_STAGE +
292                         PERF_SP_ANY_EU_WORKING_FS_STAGE +
293                         PERF_SP_ANY_EU_WORKING_CS_STAGE));
294      }
295   );
296
297   counter("% Time Compute", Counter::Units::Percent, [=]() {
298         return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
299                        (PERF_SP_ANY_EU_WORKING_VS_STAGE +
300                         PERF_SP_ANY_EU_WORKING_FS_STAGE +
301                         PERF_SP_ANY_EU_WORKING_CS_STAGE));
302      }
303   );
304
305   counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
306         return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
307                         PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
308                         PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
309                        PERF_SP_BUSY_CYCLES);
310      }
311   );
312
313   counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
314         return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
315      }
316   );
317
318   counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
319         return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
320      }
321   );
322
323   counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
324         return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
325      }
326   );
327
328   counter("% Linear Filtered", Counter::Units::Percent, [=]() {
329         return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
330      }
331   );
332
333   counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
334         return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
335      }
336   );
337
338   counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
339         return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
340      }
341   );
342
343   /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
344   // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
345   //       return  * (1.f / time);
346   //    }
347   // );
348
349   /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
350   // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
351   //       return  * (1.f / time);
352   //    }
353   // );
354
355   /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
356   // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
357   //       return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
358   //    }
359   // );
360
361   /* TODO: verify */
362   counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
363         return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
364      }
365   );
366
367   /* TODO: verify */
368   counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
369         return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
370      }
371   );
372
373   counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
374         return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
375      }
376   );
377
378   counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
379         return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
380      }
381   );
382
383   counter("Preemptions / second", Counter::Units::None, [=]() {
384         return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
385      }
386   );
387
388   counter("Avg Preemption Delay", Counter::Units::None, [=]() {
389         return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
390      }
391   );
392}
393
394/**
395 * Generate an submit the cmdstream to configure the counter/countable
396 * muxing
397 */
398void
399FreedrenoDriver::configure_counters(bool reset, bool wait)
400{
401   struct fd_submit *submit = fd_submit_new(pipe);
402   enum fd_ringbuffer_flags flags =
403      (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
404   struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
405
406   for (auto countable : countables)
407      countable.configure(ring, reset);
408
409   struct fd_submit_fence fence = {};
410   util_queue_fence_init(&fence.ready);
411
412   fd_submit_flush(submit, -1, &fence);
413
414   util_queue_fence_wait(&fence.ready);
415
416   fd_ringbuffer_del(ring);
417   fd_submit_del(submit);
418
419   if (wait)
420      fd_pipe_wait(pipe, &fence.fence);
421}
422
423/**
424 * Read the current counter values and record the time.
425 */
426void
427FreedrenoDriver::collect_countables()
428{
429   last_dump_ts = perfetto::base::GetBootTimeNs().count();
430
431   for (auto countable : countables)
432      countable.collect();
433}
434
435bool
436FreedrenoDriver::init_perfcnt()
437{
438   uint64_t val;
439
440   dev = fd_device_new(drm_device.fd);
441   pipe = fd_pipe_new(dev, FD_PIPE_3D);
442   dev_id = fd_pipe_dev_id(pipe);
443
444   if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
445      PERFETTO_FATAL("Could not get MAX_FREQ");
446      return false;
447   }
448   max_freq = val;
449
450   if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
451      PERFETTO_ILOG("Could not get SUSPEND_COUNT");
452   } else {
453      suspend_count = val;
454      has_suspend_count = true;
455   }
456
457   fd_pipe_set_param(pipe, FD_SYSPROF, 1);
458
459   perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
460   if (num_perfcntrs == 0) {
461      PERFETTO_FATAL("No hw counters available");
462      return false;
463   }
464
465   assigned_counters.resize(num_perfcntrs);
466   assigned_counters.assign(assigned_counters.size(), 0);
467
468   switch (fd_dev_gen(dev_id)) {
469   case 6:
470      setup_a6xx_counters();
471      break;
472   default:
473      PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
474      return false;
475   }
476
477   state.resize(next_countable_id);
478
479   for (auto countable : countables)
480      countable.resolve();
481
482   info = fd_dev_info(dev_id);
483
484   io = fd_dt_find_io();
485   if (!io) {
486      PERFETTO_FATAL("Could not map GPU I/O space");
487      return false;
488   }
489
490   configure_counters(true, true);
491   collect_countables();
492
493   return true;
494}
495
496void
497FreedrenoDriver::enable_counter(const uint32_t counter_id)
498{
499   enabled_counters.push_back(counters[counter_id]);
500}
501
502void
503FreedrenoDriver::enable_all_counters()
504{
505   enabled_counters.reserve(counters.size());
506   for (auto &counter : counters) {
507      enabled_counters.push_back(counter);
508   }
509}
510
511void
512FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
513{
514}
515
516bool
517FreedrenoDriver::dump_perfcnt()
518{
519   if (has_suspend_count) {
520      uint64_t val;
521
522      fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
523
524      if (suspend_count != val) {
525         PERFETTO_ILOG("Device had suspended!");
526
527         suspend_count = val;
528
529         configure_counters(true, true);
530         collect_countables();
531
532         /* We aren't going to have anything sensible by comparing
533          * current values to values from prior to the suspend, so
534          * just skip this sampling period.
535          */
536         return false;
537      }
538   }
539
540   auto last_ts = last_dump_ts;
541
542   /* Capture the timestamp from the *start* of the sampling period: */
543   last_capture_ts = last_dump_ts;
544
545   collect_countables();
546
547   auto elapsed_time_ns = last_dump_ts - last_ts;
548
549   time = (float)elapsed_time_ns / 1000000000.0;
550
551   /* On older kernels that dont' support querying the suspend-
552    * count, just send configuration cmdstream regularly to keep
553    * the GPU alive and correctly configured for the countables
554    * we want
555    */
556   if (!has_suspend_count) {
557      configure_counters(false, false);
558   }
559
560   return true;
561}
562
563uint64_t FreedrenoDriver::next()
564{
565   auto ret = last_capture_ts;
566   last_capture_ts = 0;
567   return ret;
568}
569
570void FreedrenoDriver::disable_perfcnt()
571{
572   /* There isn't really any disable, only reconfiguring which countables
573    * get muxed to which counters
574    */
575}
576
577/*
578 * Countable
579 */
580
581FreedrenoDriver::Countable
582FreedrenoDriver::countable(std::string name)
583{
584   auto countable = Countable(this, name);
585   countables.emplace_back(countable);
586   return countable;
587}
588
589FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
590   : id {d->next_countable_id++}, d {d}, name {name}
591{
592}
593
594/* Emit register writes on ring to configure counter/countable muxing: */
595void
596FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
597{
598   const struct fd_perfcntr_countable *countable = d->state[id].countable;
599   const struct fd_perfcntr_counter   *counter   = d->state[id].counter;
600
601   OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
602
603   if (counter->enable && reset) {
604      OUT_PKT4(ring, counter->enable, 1);
605      OUT_RING(ring, 0);
606   }
607
608   if (counter->clear && reset) {
609      OUT_PKT4(ring, counter->clear, 1);
610      OUT_RING(ring, 1);
611
612      OUT_PKT4(ring, counter->clear, 1);
613      OUT_RING(ring, 0);
614   }
615
616   OUT_PKT4(ring, counter->select_reg, 1);
617   OUT_RING(ring, countable->selector);
618
619   if (counter->enable && reset) {
620      OUT_PKT4(ring, counter->enable, 1);
621      OUT_RING(ring, 1);
622   }
623}
624
625/* Collect current counter value and calculate delta since last sample: */
626void
627FreedrenoDriver::Countable::collect()
628{
629   const struct fd_perfcntr_counter *counter = d->state[id].counter;
630
631   d->state[id].last_value = d->state[id].value;
632
633   uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
634   uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
635
636   uint32_t lo = *reg_lo;
637   uint32_t hi = *reg_hi;
638
639   d->state[id].value = lo | ((uint64_t)hi << 32);
640}
641
642/* Resolve the countable and assign next counter from it's group: */
643void
644FreedrenoDriver::Countable::resolve()
645{
646   for (unsigned i = 0; i < d->num_perfcntrs; i++) {
647      const struct fd_perfcntr_group *g = &d->perfcntrs[i];
648      for (unsigned j = 0; j < g->num_countables; j++) {
649         const struct fd_perfcntr_countable *c = &g->countables[j];
650         if (name == c->name) {
651            d->state[id].countable = c;
652
653            /* Assign a counter from the same group: */
654            assert(d->assigned_counters[i] < g->num_counters);
655            d->state[id].counter = &g->counters[d->assigned_counters[i]++];
656
657            std::cout << "Countable: " << name << ", group=" << g->name <<
658                  ", counter=" << d->assigned_counters[i] - 1 << "\n";
659
660            return;
661         }
662      }
663   }
664   unreachable("no such countable!");
665}
666
667uint64_t
668FreedrenoDriver::Countable::get_value() const
669{
670   return d->state[id].value - d->state[id].last_value;
671}
672
673/*
674 * DerivedCounter
675 */
676
677FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
678                                                Counter::Units units,
679                                                std::function<int64_t()> derive)
680   : Counter(d->next_counter_id++, name, 0)
681{
682   std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
683   this->units = units;
684   set_getter([=](const Counter &c, const Driver &d) {
685         return derive();
686      }
687   );
688}
689
690FreedrenoDriver::DerivedCounter
691FreedrenoDriver::counter(std::string name, Counter::Units units,
692                         std::function<int64_t()> derive)
693{
694   auto counter = DerivedCounter(this, name, units, derive);
695   counters.emplace_back(counter);
696   return counter;
697}
698
699uint32_t
700FreedrenoDriver::gpu_clock_id() const
701{
702   return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
703}
704
705uint64_t
706FreedrenoDriver::gpu_timestamp() const
707{
708   return perfetto::base::GetBootTimeNs().count();
709}
710
711} // namespace pps
712