1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <assert.h>
26 #include <curses.h>
27 #include <err.h>
28 #include <inttypes.h>
29 #include <libconfig.h>
30 #include <locale.h>
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <time.h>
36 #include <unistd.h>
37 #include <xf86drm.h>
38 
39 #include "drm/freedreno_drmif.h"
40 #include "drm/freedreno_ringbuffer.h"
41 
42 #include "util/os_file.h"
43 
44 #include "freedreno_dt.h"
45 #include "freedreno_perfcntr.h"
46 
47 #define MAX_CNTR_PER_GROUP 24
48 #define REFRESH_MS         500
49 
50 static struct {
51    int refresh_ms;
52    bool dump;
53 } options = {
54    .refresh_ms = REFRESH_MS,
55    .dump = false,
56 };
57 
58 /* NOTE first counter group should always be CP, since we unconditionally
59  * use CP counter to measure the gpu freq.
60  */
61 
62 struct counter_group {
63    const struct fd_perfcntr_group *group;
64 
65    struct {
66       const struct fd_perfcntr_counter *counter;
67       uint16_t select_val;
68       volatile uint32_t *val_hi;
69       volatile uint32_t *val_lo;
70    } counter[MAX_CNTR_PER_GROUP];
71 
72    /* last sample time: */
73    uint32_t stime[MAX_CNTR_PER_GROUP];
74    /* for now just care about the low 32b value.. at least then we don't
75     * have to really care that we can't sample both hi and lo regs at the
76     * same time:
77     */
78    uint32_t last[MAX_CNTR_PER_GROUP];
79    /* current value, ie. by how many did the counter increase in last
80     * sampling period divided by the sampling period:
81     */
82    float current[MAX_CNTR_PER_GROUP];
83    /* name of currently selected counters (for UI): */
84    const char *label[MAX_CNTR_PER_GROUP];
85 };
86 
87 static struct {
88    void *io;
89    uint32_t chipid;
90    uint32_t min_freq;
91    uint32_t max_freq;
92    /* per-generation table of counters: */
93    unsigned ngroups;
94    struct counter_group *groups;
95    /* drm device (for writing select regs via ring): */
96    struct fd_device *dev;
97    struct fd_pipe *pipe;
98    struct fd_submit *submit;
99    struct fd_ringbuffer *ring;
100 } dev;
101 
102 static void config_save(void);
103 static void config_restore(void);
104 static void restore_counter_groups(void);
105 
106 /*
107  * helpers
108  */
109 
110 static uint32_t
gettime_us(void)111 gettime_us(void)
112 {
113    struct timespec ts;
114    clock_gettime(CLOCK_MONOTONIC, &ts);
115    return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
116 }
117 
118 static void
sleep_us(uint32_t us)119 sleep_us(uint32_t us)
120 {
121    const struct timespec ts = {
122       .tv_sec = us / 1000000,
123       .tv_nsec = (us % 1000000) * 1000,
124    };
125    clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
126 }
127 
128 static uint32_t
delta(uint32_t a, uint32_t b)129 delta(uint32_t a, uint32_t b)
130 {
131    /* deal with rollover: */
132    if (a > b)
133       return 0xffffffff - a + b;
134    else
135       return b - a;
136 }
137 
138 static void
find_device(void)139 find_device(void)
140 {
141    int ret;
142 
143    dev.dev = fd_device_open();
144    if (!dev.dev)
145       err(1, "could not open drm device");
146 
147    dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
148 
149    uint64_t val;
150    ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);
151    if (ret) {
152       err(1, "could not get gpu-id");
153    }
154    dev.chipid = val;
155 
156 #define CHIP_FMT "d%d%d.%d"
157 #define CHIP_ARGS(chipid)                                                      \
158    ((chipid) >> 24) & 0xff, ((chipid) >> 16) & 0xff, ((chipid) >> 8) & 0xff,   \
159       ((chipid) >> 0) & 0xff
160    printf("device: a%" CHIP_FMT "\n", CHIP_ARGS(dev.chipid));
161 
162    /* try MAX_FREQ first as that will work regardless of old dt
163     * dt bindings vs upstream bindings:
164     */
165    ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
166    if (ret) {
167       printf("falling back to parsing DT bindings for freq\n");
168       if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
169          err(1, "could not find GPU freqs");
170    } else {
171       dev.min_freq = 0;
172       dev.max_freq = val;
173    }
174 
175    printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
176 
177    dev.io = fd_dt_find_io();
178    if (!dev.io) {
179       err(1, "could not map device");
180    }
181 
182    fd_pipe_set_param(dev.pipe, FD_SYSPROF, 1);
183 }
184 
185 /*
186  * perf-monitor
187  */
188 
189 static void
flush_ring(void)190 flush_ring(void)
191 {
192    int ret;
193 
194    if (!dev.submit)
195       return;
196 
197    struct fd_submit_fence fence = {};
198    util_queue_fence_init(&fence.ready);
199 
200    ret = fd_submit_flush(dev.submit, -1, &fence);
201 
202    if (ret)
203       errx(1, "submit failed: %d", ret);
204    util_queue_fence_wait(&fence.ready);
205    fd_ringbuffer_del(dev.ring);
206    fd_submit_del(dev.submit);
207 
208    dev.ring = NULL;
209    dev.submit = NULL;
210 }
211 
212 static void
select_counter(struct counter_group *group, int ctr, int n)213 select_counter(struct counter_group *group, int ctr, int n)
214 {
215    assert(n < group->group->num_countables);
216    assert(ctr < group->group->num_counters);
217 
218    group->label[ctr] = group->group->countables[n].name;
219    group->counter[ctr].select_val = n;
220 
221    if (!dev.submit) {
222       dev.submit = fd_submit_new(dev.pipe);
223       dev.ring = fd_submit_new_ringbuffer(
224          dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
225    }
226 
227    /* bashing select register directly while gpu is active will end
228     * in tears.. so we need to write it via the ring:
229     *
230     * TODO it would help startup time, if gpu is loaded, to batch
231     * all the initial writes and do a single flush.. although that
232     * makes things more complicated for capturing inital sample value
233     */
234    struct fd_ringbuffer *ring = dev.ring;
235    switch (dev.chipid >> 24) {
236    case 2:
237    case 3:
238    case 4:
239       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
240       OUT_RING(ring, 0x00000000);
241 
242       if (group->group->counters[ctr].enable) {
243          OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
244          OUT_RING(ring, 0);
245       }
246 
247       if (group->group->counters[ctr].clear) {
248          OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
249          OUT_RING(ring, 1);
250 
251          OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
252          OUT_RING(ring, 0);
253       }
254 
255       OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
256       OUT_RING(ring, n);
257 
258       if (group->group->counters[ctr].enable) {
259          OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
260          OUT_RING(ring, 1);
261       }
262 
263       break;
264    case 5:
265    case 6:
266       OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
267 
268       if (group->group->counters[ctr].enable) {
269          OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
270          OUT_RING(ring, 0);
271       }
272 
273       if (group->group->counters[ctr].clear) {
274          OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
275          OUT_RING(ring, 1);
276 
277          OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
278          OUT_RING(ring, 0);
279       }
280 
281       OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
282       OUT_RING(ring, n);
283 
284       if (group->group->counters[ctr].enable) {
285          OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
286          OUT_RING(ring, 1);
287       }
288 
289       break;
290    }
291 
292    group->last[ctr] = *group->counter[ctr].val_lo;
293    group->stime[ctr] = gettime_us();
294 }
295 
296 static void
resample_counter(struct counter_group *group, int ctr)297 resample_counter(struct counter_group *group, int ctr)
298 {
299    uint32_t val = *group->counter[ctr].val_lo;
300    uint32_t t = gettime_us();
301    uint32_t dt = delta(group->stime[ctr], t);
302    uint32_t dval = delta(group->last[ctr], val);
303    group->current[ctr] = (float)dval * 1000000.0 / (float)dt;
304    group->last[ctr] = val;
305    group->stime[ctr] = t;
306 }
307 
308 /* sample all the counters: */
309 static void
resample(void)310 resample(void)
311 {
312    static uint64_t last_time;
313    uint64_t current_time = gettime_us();
314 
315    if ((current_time - last_time) < (options.refresh_ms * 1000 / 2))
316       return;
317 
318    last_time = current_time;
319 
320    for (unsigned i = 0; i < dev.ngroups; i++) {
321       struct counter_group *group = &dev.groups[i];
322       for (unsigned j = 0; j < group->group->num_counters; j++) {
323          resample_counter(group, j);
324       }
325    }
326 }
327 
328 /*
329  * The UI
330  */
331 
332 #define COLOR_GROUP_HEADER 1
333 #define COLOR_FOOTER       2
334 #define COLOR_INVERSE      3
335 
336 static int w, h;
337 static int ctr_width;
338 static int max_rows, current_cntr = 1;
339 
340 static void
redraw_footer(WINDOW *win)341 redraw_footer(WINDOW *win)
342 {
343    char *footer;
344    int n;
345 
346    n = asprintf(&footer, " fdperf: a%" CHIP_FMT " (%.2fMHz..%.2fMHz)",
347                 CHIP_ARGS(dev.chipid), ((float)dev.min_freq) / 1000000.0,
348                 ((float)dev.max_freq) / 1000000.0);
349 
350    wmove(win, h - 1, 0);
351    wattron(win, COLOR_PAIR(COLOR_FOOTER));
352    waddstr(win, footer);
353    whline(win, ' ', w - n);
354    wattroff(win, COLOR_PAIR(COLOR_FOOTER));
355 
356    free(footer);
357 }
358 
359 static void
redraw_group_header(WINDOW *win, int row, const char *name)360 redraw_group_header(WINDOW *win, int row, const char *name)
361 {
362    wmove(win, row, 0);
363    wattron(win, A_BOLD);
364    wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
365    waddstr(win, name);
366    whline(win, ' ', w - strlen(name));
367    wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
368    wattroff(win, A_BOLD);
369 }
370 
371 static void
redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)372 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
373 {
374    int n = strlen(name);
375    assert(n <= ctr_width);
376    wmove(win, row, 0);
377    whline(win, ' ', ctr_width - n);
378    wmove(win, row, ctr_width - n);
379    if (selected)
380       wattron(win, COLOR_PAIR(COLOR_INVERSE));
381    waddstr(win, name);
382    if (selected)
383       wattroff(win, COLOR_PAIR(COLOR_INVERSE));
384    waddstr(win, ": ");
385 }
386 
387 static void
redraw_counter_value_cycles(WINDOW *win, float val)388 redraw_counter_value_cycles(WINDOW *win, float val)
389 {
390    char *str;
391    int x = getcurx(win);
392    int valwidth = w - x;
393    int barwidth, n;
394 
395    /* convert to fraction of max freq: */
396    val = val / (float)dev.max_freq;
397 
398    /* figure out percentage-bar width: */
399    barwidth = (int)(val * valwidth);
400 
401    /* sometimes things go over 100%.. idk why, could be
402     * things running faster than base clock, or counter
403     * summing up cycles in multiple cores?
404     */
405    barwidth = MIN2(barwidth, valwidth - 1);
406 
407    n = asprintf(&str, "%.2f%%", 100.0 * val);
408    wattron(win, COLOR_PAIR(COLOR_INVERSE));
409    waddnstr(win, str, barwidth);
410    if (barwidth > n) {
411       whline(win, ' ', barwidth - n);
412       wmove(win, getcury(win), x + barwidth);
413    }
414    wattroff(win, COLOR_PAIR(COLOR_INVERSE));
415    if (barwidth < n)
416       waddstr(win, str + barwidth);
417    whline(win, ' ', w - getcurx(win));
418 
419    free(str);
420 }
421 
422 static void
redraw_counter_value_raw(WINDOW *win, float val)423 redraw_counter_value_raw(WINDOW *win, float val)
424 {
425    char *str;
426    (void)asprintf(&str, "%'.2f", val);
427    waddstr(win, str);
428    whline(win, ' ', w - getcurx(win));
429    free(str);
430 }
431 
432 static void
redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr, bool selected)433 redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
434                bool selected)
435 {
436    redraw_counter_label(win, row, group->label[ctr], selected);
437 
438    /* quick hack, if the label has "CYCLE" in the name, it is
439     * probably a cycle counter ;-)
440     * Perhaps add more info in rnndb schema to know how to
441     * treat individual counters (ie. which are cycles, and
442     * for those we want to present as a percentage do we
443     * need to scale the result.. ie. is it running at some
444     * multiple or divisor of core clk, etc)
445     *
446     * TODO it would be much more clever to get this from xml
447     * Also.. in some cases I think we want to know how many
448     * units the counter is counting for, ie. if a320 has 2x
449     * shader as a306 we might need to scale the result..
450     */
451    if (strstr(group->label[ctr], "CYCLE") ||
452        strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE"))
453       redraw_counter_value_cycles(win, group->current[ctr]);
454    else
455       redraw_counter_value_raw(win, group->current[ctr]);
456 }
457 
458 static void
redraw(WINDOW *win)459 redraw(WINDOW *win)
460 {
461    static int scroll = 0;
462    int max, row = 0;
463 
464    w = getmaxx(win);
465    h = getmaxy(win);
466 
467    max = h - 3;
468 
469    if ((current_cntr - scroll) > (max - 1)) {
470       scroll = current_cntr - (max - 1);
471    } else if ((current_cntr - 1) < scroll) {
472       scroll = current_cntr - 1;
473    }
474 
475    for (unsigned i = 0; i < dev.ngroups; i++) {
476       struct counter_group *group = &dev.groups[i];
477       unsigned j = 0;
478 
479       /* NOTE skip CP the first CP counter */
480       if (i == 0)
481          j++;
482 
483       if (j < group->group->num_counters) {
484          if ((scroll <= row) && ((row - scroll) < max))
485             redraw_group_header(win, row - scroll, group->group->name);
486          row++;
487       }
488 
489       for (; j < group->group->num_counters; j++) {
490          if ((scroll <= row) && ((row - scroll) < max))
491             redraw_counter(win, row - scroll, group, j, row == current_cntr);
492          row++;
493       }
494    }
495 
496    /* convert back to physical (unscrolled) offset: */
497    row = max;
498 
499    redraw_group_header(win, row, "Status");
500    row++;
501 
502    /* Draw GPU freq row: */
503    redraw_counter_label(win, row, "Freq (MHz)", false);
504    redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);
505    row++;
506 
507    redraw_footer(win);
508 
509    refresh();
510 }
511 
512 static struct counter_group *
current_counter(int *ctr)513 current_counter(int *ctr)
514 {
515    int n = 0;
516 
517    for (unsigned i = 0; i < dev.ngroups; i++) {
518       struct counter_group *group = &dev.groups[i];
519       unsigned j = 0;
520 
521       /* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */
522       if (i == 0)
523          j++;
524 
525       /* account for group header: */
526       if (j < group->group->num_counters) {
527          /* cannot select group header.. return null to indicate this
528           * main_ui():
529           */
530          if (n == current_cntr)
531             return NULL;
532          n++;
533       }
534 
535       for (; j < group->group->num_counters; j++) {
536          if (n == current_cntr) {
537             if (ctr)
538                *ctr = j;
539             return group;
540          }
541          n++;
542       }
543    }
544 
545    assert(0);
546    return NULL;
547 }
548 
549 static void
counter_dialog(void)550 counter_dialog(void)
551 {
552    WINDOW *dialog;
553    struct counter_group *group;
554    int cnt = 0, current = 0, scroll;
555 
556    /* figure out dialog size: */
557    int dh = h / 2;
558    int dw = ctr_width + 2;
559 
560    group = current_counter(&cnt);
561 
562    /* find currently selected idx (note there can be discontinuities
563     * so the selected value does not map 1:1 to current idx)
564     */
565    uint32_t selected = group->counter[cnt].select_val;
566    for (int i = 0; i < group->group->num_countables; i++) {
567       if (group->group->countables[i].selector == selected) {
568          current = i;
569          break;
570       }
571    }
572 
573    /* scrolling offset, if dialog is too small for all the choices: */
574    scroll = 0;
575 
576    dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
577    box(dialog, 0, 0);
578    wrefresh(dialog);
579    keypad(dialog, TRUE);
580 
581    while (true) {
582       int max = MIN2(dh - 2, group->group->num_countables);
583       int selector = -1;
584 
585       if ((current - scroll) >= (dh - 3)) {
586          scroll = current - (dh - 3);
587       } else if (current < scroll) {
588          scroll = current;
589       }
590 
591       for (int i = 0; i < max; i++) {
592          int n = scroll + i;
593          wmove(dialog, i + 1, 1);
594          if (n == current) {
595             assert(n < group->group->num_countables);
596             selector = group->group->countables[n].selector;
597             wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
598          }
599          if (n < group->group->num_countables)
600             waddstr(dialog, group->group->countables[n].name);
601          whline(dialog, ' ', dw - getcurx(dialog) - 1);
602          if (n == current)
603             wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
604       }
605 
606       assert(selector >= 0);
607 
608       switch (wgetch(dialog)) {
609       case KEY_UP:
610          current = MAX2(0, current - 1);
611          break;
612       case KEY_DOWN:
613          current = MIN2(group->group->num_countables - 1, current + 1);
614          break;
615       case KEY_LEFT:
616       case KEY_ENTER:
617          /* select new sampler */
618          select_counter(group, cnt, selector);
619          flush_ring();
620          config_save();
621          goto out;
622       case 'q':
623          goto out;
624       default:
625          /* ignore */
626          break;
627       }
628 
629       resample();
630    }
631 
632 out:
633    wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
634    delwin(dialog);
635 }
636 
637 static void
scroll_cntr(int amount)638 scroll_cntr(int amount)
639 {
640    if (amount < 0) {
641       current_cntr = MAX2(1, current_cntr + amount);
642       if (current_counter(NULL) == NULL) {
643          current_cntr = MAX2(1, current_cntr - 1);
644       }
645    } else {
646       current_cntr = MIN2(max_rows - 1, current_cntr + amount);
647       if (current_counter(NULL) == NULL)
648          current_cntr = MIN2(max_rows - 1, current_cntr + 1);
649    }
650 }
651 
652 static void
main_ui(void)653 main_ui(void)
654 {
655    WINDOW *mainwin;
656    uint32_t last_time = gettime_us();
657 
658    /* curses setup: */
659    mainwin = initscr();
660    if (!mainwin)
661       goto out;
662 
663    cbreak();
664    wtimeout(mainwin, options.refresh_ms);
665    noecho();
666    keypad(mainwin, TRUE);
667    curs_set(0);
668    start_color();
669    init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
670    init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
671    init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
672 
673    while (true) {
674       switch (wgetch(mainwin)) {
675       case KEY_UP:
676          scroll_cntr(-1);
677          break;
678       case KEY_DOWN:
679          scroll_cntr(+1);
680          break;
681       case KEY_NPAGE: /* page-down */
682          /* TODO figure out # of rows visible? */
683          scroll_cntr(+15);
684          break;
685       case KEY_PPAGE: /* page-up */
686          /* TODO figure out # of rows visible? */
687          scroll_cntr(-15);
688          break;
689       case KEY_RIGHT:
690          counter_dialog();
691          break;
692       case 'q':
693          goto out;
694          break;
695       default:
696          /* ignore */
697          break;
698       }
699       resample();
700       redraw(mainwin);
701 
702       /* restore the counters every 0.5s in case the GPU has suspended,
703        * in which case the current selected countables will have reset:
704        */
705       uint32_t t = gettime_us();
706       if (delta(last_time, t) > 500000) {
707          restore_counter_groups();
708          flush_ring();
709          last_time = t;
710       }
711    }
712 
713    /* restore settings.. maybe we need an atexit()??*/
714 out:
715    delwin(mainwin);
716    endwin();
717    refresh();
718 }
719 
720 static void
dump_counters(void)721 dump_counters(void)
722 {
723    resample();
724    sleep_us(options.refresh_ms * 1000);
725    resample();
726 
727    for (unsigned i = 0; i < dev.ngroups; i++) {
728       const struct counter_group *group = &dev.groups[i];
729       for (unsigned j = 0; j < group->group->num_counters; j++) {
730          const char *label = group->label[j];
731          float val = group->current[j];
732 
733          /* we did not config the first CP counter */
734          if (i == 0 && j == 0)
735             label = group->group->countables[0].name;
736 
737          int n = printf("%s: ", label) - 2;
738          while (n++ < ctr_width)
739             fputc(' ', stdout);
740 
741          if (strstr(label, "CYCLE") ||
742              strstr(label, "BUSY") ||
743              strstr(label, "IDLE")) {
744             val = val / dev.max_freq * 100.0f;
745             printf("%.2f%%\n", val);
746          } else {
747             printf("%'.2f\n", val);
748          }
749       }
750    }
751 }
752 
753 static void
restore_counter_groups(void)754 restore_counter_groups(void)
755 {
756    for (unsigned i = 0; i < dev.ngroups; i++) {
757       struct counter_group *group = &dev.groups[i];
758       unsigned j = 0;
759 
760       /* NOTE skip CP the first CP counter */
761       if (i == 0)
762          j++;
763 
764       for (; j < group->group->num_counters; j++) {
765          select_counter(group, j, group->counter[j].select_val);
766       }
767    }
768 }
769 
770 static void
setup_counter_groups(const struct fd_perfcntr_group *groups)771 setup_counter_groups(const struct fd_perfcntr_group *groups)
772 {
773    for (unsigned i = 0; i < dev.ngroups; i++) {
774       struct counter_group *group = &dev.groups[i];
775 
776       group->group = &groups[i];
777 
778       max_rows += group->group->num_counters + 1;
779 
780       /* the first CP counter is hidden: */
781       if (i == 0) {
782          max_rows--;
783          if (group->group->num_counters <= 1)
784             max_rows--;
785       }
786 
787       for (unsigned j = 0; j < group->group->num_counters; j++) {
788          group->counter[j].counter = &group->group->counters[j];
789 
790          group->counter[j].val_hi =
791             dev.io + (group->counter[j].counter->counter_reg_hi * 4);
792          group->counter[j].val_lo =
793             dev.io + (group->counter[j].counter->counter_reg_lo * 4);
794 
795          group->counter[j].select_val = j;
796       }
797 
798       for (unsigned j = 0; j < group->group->num_countables; j++) {
799          ctr_width =
800             MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
801       }
802    }
803 }
804 
805 /*
806  * configuration / persistence
807  */
808 
809 static config_t cfg;
810 static config_setting_t *setting;
811 
812 static void
config_save(void)813 config_save(void)
814 {
815    for (unsigned i = 0; i < dev.ngroups; i++) {
816       struct counter_group *group = &dev.groups[i];
817       unsigned j = 0;
818 
819       /* NOTE skip CP the first CP counter */
820       if (i == 0)
821          j++;
822 
823       config_setting_t *sect =
824          config_setting_get_member(setting, group->group->name);
825 
826       for (; j < group->group->num_counters; j++) {
827          char name[] = "counter0000";
828          sprintf(name, "counter%d", j);
829          config_setting_t *s = config_setting_lookup(sect, name);
830          config_setting_set_int(s, group->counter[j].select_val);
831       }
832    }
833 
834    config_write_file(&cfg, "fdperf.cfg");
835 }
836 
837 static void
config_restore(void)838 config_restore(void)
839 {
840    char *str;
841 
842    config_init(&cfg);
843 
844    /* Read the file. If there is an error, report it and exit. */
845    if (!config_read_file(&cfg, "fdperf.cfg")) {
846       warn("could not restore settings");
847    }
848 
849    config_setting_t *root = config_root_setting(&cfg);
850 
851    /* per device settings: */
852    (void)asprintf(&str, "a%dxx", dev.chipid >> 24);
853    setting = config_setting_get_member(root, str);
854    if (!setting)
855       setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);
856    free(str);
857 
858    for (unsigned i = 0; i < dev.ngroups; i++) {
859       struct counter_group *group = &dev.groups[i];
860       unsigned j = 0;
861 
862       /* NOTE skip CP the first CP counter */
863       if (i == 0)
864          j++;
865 
866       config_setting_t *sect =
867          config_setting_get_member(setting, group->group->name);
868 
869       if (!sect) {
870          sect =
871             config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
872       }
873 
874       for (; j < group->group->num_counters; j++) {
875          char name[] = "counter0000";
876          sprintf(name, "counter%d", j);
877          config_setting_t *s = config_setting_lookup(sect, name);
878          if (!s) {
879             config_setting_add(sect, name, CONFIG_TYPE_INT);
880             continue;
881          }
882          select_counter(group, j, config_setting_get_int(s));
883       }
884    }
885 }
886 
887 static void
print_usage(const char *argv0)888 print_usage(const char *argv0)
889 {
890    fprintf(stderr,
891            "Usage: %s [OPTION]...\n"
892            "\n"
893            "  -r <N>     refresh every N milliseconds\n"
894            "  -d         dump counters and exit\n"
895            "  -h         show this message\n",
896            argv0);
897    exit(2);
898 }
899 
900 static void
parse_options(int argc, char **argv)901 parse_options(int argc, char **argv)
902 {
903    int c;
904 
905    while ((c = getopt(argc, argv, "r:d")) != -1) {
906       switch (c) {
907       case 'r':
908          options.refresh_ms = atoi(optarg);
909          break;
910       case 'd':
911          options.dump = true;
912          break;
913       default:
914          print_usage(argv[0]);
915          break;
916       }
917    }
918 }
919 
920 /*
921  * main
922  */
923 
924 int
main(int argc, char **argv)925 main(int argc, char **argv)
926 {
927    parse_options(argc, argv);
928 
929    find_device();
930 
931    const struct fd_perfcntr_group *groups;
932    struct fd_dev_id dev_id = {
933          .gpu_id = (dev.chipid >> 24) * 100,
934    };
935    groups = fd_perfcntrs(&dev_id, &dev.ngroups);
936    if (!groups) {
937       errx(1, "no perfcntr support");
938    }
939 
940    dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
941 
942    setlocale(LC_NUMERIC, "en_US.UTF-8");
943 
944    setup_counter_groups(groups);
945    restore_counter_groups();
946    config_restore();
947    flush_ring();
948 
949    if (options.dump)
950       dump_counters();
951    else
952       main_ui();
953 
954    return 0;
955 }
956