1/*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file intel_measure.c
25 */
26
27#include "intel_measure.h"
28
29#include <errno.h>
30#include <fcntl.h>
31#include <stdlib.h>
32#include <string.h>
33#include <sys/stat.h>
34#include <sys/types.h>
35#include <unistd.h>
36
37#define __STDC_FORMAT_MACROS 1
38#include <inttypes.h>
39
40#include "dev/intel_device_info.h"
41#include "util/debug.h"
42#include "util/macros.h"
43#include "util/u_debug.h"
44
45
46static const struct debug_control debug_control[] = {
47   { "draw",            INTEL_MEASURE_DRAW       },
48   { "rt",              INTEL_MEASURE_RENDERPASS },
49   { "shader",          INTEL_MEASURE_SHADER     },
50   { "batch",           INTEL_MEASURE_BATCH      },
51   { "frame",           INTEL_MEASURE_FRAME      },
52   { NULL, 0 }
53};
54static struct intel_measure_config config;
55
56void
57intel_measure_init(struct intel_measure_device *device)
58{
59   static bool once = false;
60   const char *env = getenv("INTEL_MEASURE");
61   if (unlikely(!once)) {
62      once = true;
63      memset(&config, 0, sizeof(struct intel_measure_config));
64      if (!env)
65         return;
66
67      char env_copy[1024];
68      strncpy(env_copy, env, 1024);
69      env_copy[1023] = '\0';
70
71      config.file = stderr;
72      config.flags = parse_debug_string(env_copy, debug_control);
73      if (!config.flags)
74         config.flags = INTEL_MEASURE_DRAW;
75      config.enabled = true;
76      config.event_interval = 1;
77      config.control_fh = -1;
78
79      /* Overflows of the following defaults will drop data and generate a
80       * warning on the output filehandle.
81       */
82
83      /* default batch_size allows for 64k renders in a single batch */
84      const int DEFAULT_BATCH_SIZE = 64 * 1024;
85      config.batch_size = DEFAULT_BATCH_SIZE;
86
87      /* Default buffer_size allows for 64k batches per line of output in the
88       * csv.  Overflow may occur for offscreen workloads or large 'interval'
89       * settings.
90       */
91      const int DEFAULT_BUFFER_SIZE = 64 * 1024;
92      config.buffer_size = DEFAULT_BUFFER_SIZE;
93
94      const char *filename = strstr(env_copy, "file=");
95      const char *start_frame_s = strstr(env_copy, "start=");
96      const char *count_frame_s = strstr(env_copy, "count=");
97      const char *control_path = strstr(env_copy, "control=");
98      const char *interval_s = strstr(env_copy, "interval=");
99      const char *batch_size_s = strstr(env_copy, "batch_size=");
100      const char *buffer_size_s = strstr(env_copy, "buffer_size=");
101      while (true) {
102         char *sep = strrchr(env_copy, ',');
103         if (sep == NULL)
104            break;
105         *sep = '\0';
106      }
107
108      if (filename && !__check_suid()) {
109         filename += 5;
110         config.file = fopen(filename, "w");
111         if (!config.file) {
112            fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
113                    filename, strerror (errno));
114            abort();
115         }
116      }
117
118      if (start_frame_s) {
119         start_frame_s += 6;
120         const int start_frame = atoi(start_frame_s);
121         if (start_frame < 0) {
122            fprintf(stderr, "INTEL_MEASURE start frame may "
123                    "not be negative: %d\n", start_frame);
124            abort();
125         }
126
127         config.start_frame = start_frame;
128         config.enabled = false;
129      }
130
131      if (count_frame_s) {
132         count_frame_s += 6;
133         const int count_frame = atoi(count_frame_s);
134         if (count_frame <= 0) {
135            fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
136                    count_frame);
137            abort();
138         }
139
140         config.end_frame = config.start_frame + count_frame;
141      }
142
143      if (control_path) {
144         control_path += 8;
145         if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
146            if (errno != EEXIST) {
147               fprintf(stderr, "INTEL_MEASURE failed to create control "
148                       "fifo %s: %s\n", control_path, strerror (errno));
149               abort();
150            }
151         }
152
153         config.control_fh = openat(AT_FDCWD, control_path,
154                                    O_RDONLY | O_NONBLOCK);
155         if (config.control_fh == -1) {
156            fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
157                    "%s: %s\n", control_path, strerror (errno));
158            abort();
159         }
160
161         /* when using a control fifo, do not start until the user triggers
162          * capture
163          */
164         config.enabled = false;
165      }
166
167      if (interval_s) {
168         interval_s += 9;
169         const int event_interval = atoi(interval_s);
170         if (event_interval < 1) {
171            fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
172                    "%d\n", event_interval);
173            abort();
174         }
175         config.event_interval = event_interval;
176      }
177
178      if (batch_size_s) {
179         batch_size_s += 11;
180         const int batch_size = atoi(batch_size_s);
181         if (batch_size < DEFAULT_BATCH_SIZE) {
182            fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "
183                    "%d\n", batch_size);
184            abort();
185         }
186         if (batch_size > DEFAULT_BATCH_SIZE * 1024) {
187            fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
188                    "%d\n", batch_size);
189            abort();
190         }
191
192         config.batch_size = batch_size;
193      }
194
195      if (buffer_size_s) {
196         buffer_size_s += 12;
197         const int buffer_size = atoi(buffer_size_s);
198         if (buffer_size < DEFAULT_BUFFER_SIZE) {
199            fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
200                    "%d\n", DEFAULT_BUFFER_SIZE);
201         }
202         if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {
203            fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
204                    "%d\n", buffer_size);
205         }
206
207         config.buffer_size = buffer_size;
208      }
209
210      fputs("draw_start,draw_end,frame,batch,"
211            "event_index,event_count,type,count,vs,tcs,tes,"
212            "gs,fs,cs,framebuffer,idle_us,time_us\n",
213            config.file);
214   }
215
216   device->config = NULL;
217   device->frame = 0;
218   device->release_batch = NULL;
219   pthread_mutex_init(&device->mutex, NULL);
220   list_inithead(&device->queued_snapshots);
221
222   if (env)
223      device->config = &config;
224}
225
226const char *
227intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
228{
229   const char *names[] = {
230      [INTEL_SNAPSHOT_UNDEFINED]           = "undefined",
231      [INTEL_SNAPSHOT_BLIT]                = "blit",
232      [INTEL_SNAPSHOT_CCS_AMBIGUATE]       = "ccs ambiguate",
233      [INTEL_SNAPSHOT_CCS_COLOR_CLEAR]     = "ccs color clear",
234      [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
235      [INTEL_SNAPSHOT_CCS_RESOLVE]         = "ccs resolve",
236      [INTEL_SNAPSHOT_COMPUTE]             = "compute",
237      [INTEL_SNAPSHOT_COPY]                = "copy",
238      [INTEL_SNAPSHOT_DRAW]                = "draw",
239      [INTEL_SNAPSHOT_HIZ_AMBIGUATE]       = "hiz ambiguate",
240      [INTEL_SNAPSHOT_HIZ_CLEAR]           = "hiz clear",
241      [INTEL_SNAPSHOT_HIZ_RESOLVE]         = "hiz resolve",
242      [INTEL_SNAPSHOT_MCS_COLOR_CLEAR]     = "mcs color clear",
243      [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
244      [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR]    = "slow color clear",
245      [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR]    = "slow depth clear",
246      [INTEL_SNAPSHOT_SECONDARY_BATCH]     = "secondary command buffer",
247      [INTEL_SNAPSHOT_END]                 = "end",
248   };
249   assert(type < ARRAY_SIZE(names));
250   assert(names[type] != NULL);
251   assert(type != INTEL_SNAPSHOT_UNDEFINED);
252   return names[type];
253}
254
255/**
256 * Indicate to the caller whether a new snapshot should be started.
257 *
258 * Callers provide rendering state to this method to determine whether the
259 * current start event should be skipped. Depending on the configuration
260 * flags, a new snapshot may start:
261 *  - at every event
262 *  - when the program changes
263 *  - after a batch is submitted
264 *  - at frame boundaries
265 *
266 * Returns true if a snapshot should be started.
267 */
268bool
269intel_measure_state_changed(const struct intel_measure_batch *batch,
270                            uintptr_t vs, uintptr_t tcs, uintptr_t tes,
271                            uintptr_t gs, uintptr_t fs, uintptr_t cs)
272{
273   if (batch->index == 0) {
274      /* always record the first event */
275      return true;
276   }
277
278   const struct intel_measure_snapshot *last_snap =
279      &batch->snapshots[batch->index - 1];
280
281   if (config.flags & INTEL_MEASURE_DRAW)
282      return true;
283
284   if (batch->index % 2 == 0) {
285      /* no snapshot is running, but we have a start event */
286      return true;
287   }
288
289   if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
290      /* only start collection when index == 0, at the beginning of a batch */
291      return false;
292   }
293
294   if (config.flags & INTEL_MEASURE_RENDERPASS) {
295      return ((last_snap->framebuffer != batch->framebuffer) ||
296              /* compute workloads are always in their own renderpass */
297              (cs != 0));
298   }
299
300   /* remaining comparisons check the state of the render pipeline for
301    * INTEL_MEASURE_PROGRAM
302    */
303   assert(config.flags & INTEL_MEASURE_SHADER);
304
305   if (!vs && !tcs && !tes && !gs && !fs && !cs) {
306      /* blorp always changes program */
307      return true;
308   }
309
310   return (last_snap->vs  != (uintptr_t) vs ||
311           last_snap->tcs != (uintptr_t) tcs ||
312           last_snap->tes != (uintptr_t) tes ||
313           last_snap->gs  != (uintptr_t) gs ||
314           last_snap->fs  != (uintptr_t) fs ||
315           last_snap->cs  != (uintptr_t) cs);
316}
317
318/**
319 * Notify intel_measure that a frame is about to begin.
320 *
321 * Configuration values and the control fifo may commence measurement at frame
322 * boundaries.
323 */
324void
325intel_measure_frame_transition(unsigned frame)
326{
327   if (frame == config.start_frame)
328      config.enabled = true;
329   else if (frame == config.end_frame)
330      config.enabled = false;
331
332   /* user commands to the control fifo will override any start/count
333    * environment settings
334    */
335   if (config.control_fh != -1) {
336      while (true) {
337         const unsigned BUF_SIZE = 128;
338         char buf[BUF_SIZE];
339         ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
340         if (bytes == 0)
341            break;
342         if (bytes == -1) {
343            fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
344                    strerror(errno));
345            abort();
346         }
347
348         buf[bytes] = '\0';
349         char *nptr = buf, *endptr = buf;
350         while (*nptr != '\0' && *endptr != '\0') {
351            long fcount = strtol(nptr, &endptr, 10);
352            if (nptr == endptr) {
353               config.enabled = false;
354               fprintf(stderr, "INTEL_MEASURE invalid frame count on "
355                       "control fifo.\n");
356               lseek(config.control_fh, 0, SEEK_END);
357               break;
358            } else if (fcount == 0) {
359               config.enabled = false;
360            } else {
361               config.enabled = true;
362               config.end_frame = frame + fcount;
363            }
364
365            nptr = endptr + 1;
366         }
367      }
368   }
369}
370
371#define TIMESTAMP_BITS 36
372static uint64_t
373raw_timestamp_delta(uint64_t time0, uint64_t time1)
374{
375   if (time0 > time1) {
376      return (1ULL << TIMESTAMP_BITS) + time1 - time0;
377   } else {
378      return time1 - time0;
379   }
380}
381
382/**
383 * Verify that rendering has completed for the batch
384 *
385 * Rendering is complete when the last timestamp has been written.
386*/
387bool
388intel_measure_ready(struct intel_measure_batch *batch)
389{
390   assert(batch->timestamps);
391   assert(batch->index > 1);
392   return (batch->timestamps[batch->index - 1] != 0);
393}
394
395/**
396 * Submit completed snapshots for buffering.
397 *
398 * Snapshot data becomes available when asynchronous rendering completes.
399 * Depending on configuration, snapshot data may need to be collated before
400 * writing to the output file.
401 */
402static void
403intel_measure_push_result(struct intel_measure_device *device,
404                          struct intel_measure_batch *batch)
405{
406   struct intel_measure_ringbuffer *rb = device->ringbuffer;
407
408   uint64_t *timestamps = batch->timestamps;
409   assert(timestamps != NULL);
410   assert(batch->index == 0 || timestamps[0] != 0);
411
412   for (int i = 0; i < batch->index; i += 2) {
413      const struct intel_measure_snapshot *begin = &batch->snapshots[i];
414      const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
415
416      assert (end->type == INTEL_SNAPSHOT_END);
417
418      if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
419         assert(begin->secondary != NULL);
420         begin->secondary->batch_count = batch->batch_count;
421         intel_measure_push_result(device, begin->secondary);
422         continue;
423      }
424
425      const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
426
427      /* advance ring buffer */
428      if (++rb->head == config.buffer_size)
429         rb->head = 0;
430      if (rb->head == rb->tail) {
431         static bool warned = false;
432         if (unlikely(!warned)) {
433            fprintf(config.file,
434                    "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
435                    "Data has been dropped. "
436                    "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
437                    config.buffer_size);
438            warned = true;
439         }
440         break;
441      }
442
443      struct intel_measure_buffered_result *buffered_result =
444         &rb->results[rb->head];
445
446      memset(buffered_result, 0, sizeof(*buffered_result));
447      memcpy(&buffered_result->snapshot, begin,
448             sizeof(struct intel_measure_snapshot));
449      buffered_result->start_ts = timestamps[i];
450      buffered_result->end_ts = timestamps[i+1];
451      buffered_result->idle_duration =
452         raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
453      buffered_result->frame = batch->frame;
454      buffered_result->batch_count = batch->batch_count;
455      buffered_result->event_index = i / 2;
456      buffered_result->snapshot.event_count = end->event_count;
457   }
458}
459
460static unsigned
461ringbuffer_size(const struct intel_measure_ringbuffer *rb)
462{
463   unsigned head = rb->head;
464   if (head < rb->tail)
465      head += config.buffer_size;
466   return head - rb->tail;
467}
468
469static const struct intel_measure_buffered_result *
470ringbuffer_pop(struct intel_measure_ringbuffer *rb)
471{
472   if (rb->tail == rb->head) {
473      /* encountered ringbuffer overflow while processing events */
474      return NULL;
475   }
476
477   if (++rb->tail == config.buffer_size)
478      rb->tail = 0;
479   return &rb->results[rb->tail];
480}
481
482static const struct intel_measure_buffered_result *
483ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
484{
485   int result_offset = rb->tail + index + 1;
486   if (result_offset >= config.buffer_size)
487      result_offset -= config.buffer_size;
488   return &rb->results[result_offset];
489}
490
491
492/**
493 * Determine the number of buffered events that must be combined for the next
494 * line of csv output. Returns 0 if more events are needed.
495 */
496static unsigned
497buffered_event_count(struct intel_measure_device *device)
498{
499   const struct intel_measure_ringbuffer *rb = device->ringbuffer;
500   const unsigned buffered_event_count = ringbuffer_size(rb);
501   if (buffered_event_count == 0) {
502      /* no events to collect */
503      return 0;
504   }
505
506   /* count the number of buffered events required to meet the configuration */
507   if (config.flags & (INTEL_MEASURE_DRAW |
508                       INTEL_MEASURE_RENDERPASS |
509                       INTEL_MEASURE_SHADER)) {
510      /* For these flags, every buffered event represents a line in the
511       * output.  None of these events span batches.  If the event interval
512       * crosses a batch boundary, then the next interval starts with the new
513       * batch.
514       */
515      return 1;
516   }
517
518   const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
519   if (config.flags & INTEL_MEASURE_BATCH) {
520      /* each buffered event is a command buffer.  The number of events to
521       * process is the same as the interval, unless the interval crosses a
522       * frame boundary
523       */
524      if (buffered_event_count < config.event_interval) {
525         /* not enough events */
526         return 0;
527      }
528
529      /* Imperfect frame tracking requires us to allow for *older* frames */
530      if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
531         /* No frame transition.  The next {interval} events should be combined. */
532         return config.event_interval;
533      }
534
535      /* Else a frame transition occurs within the interval.  Find the
536       * transition, so the following line of output begins with the batch
537       * that starts the new frame.
538       */
539      for (int event_index = 1;
540           event_index <= config.event_interval;
541           ++event_index) {
542         if (ringbuffer_peek(rb, event_index)->frame > start_frame)
543            return event_index;
544      }
545
546      assert(false);
547   }
548
549   /* Else we need to search buffered events to find the matching frame
550    * transition for our interval.
551    */
552   assert(config.flags & INTEL_MEASURE_FRAME);
553   for (int event_index = 1;
554        event_index < buffered_event_count;
555        ++event_index) {
556      const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
557      if (latest_frame - start_frame >= config.event_interval)
558         return event_index;
559   }
560
561   return 0;
562}
563
564/**
565 * Take result_count events from the ringbuffer and output them as a single
566 * line.
567 */
568static void
569print_combined_results(struct intel_measure_device *measure_device,
570                       int result_count,
571                       struct intel_device_info *info)
572{
573   if (result_count == 0)
574      return;
575
576   struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
577   assert(ringbuffer_size(result_rb) >= result_count);
578   const struct intel_measure_buffered_result* start_result =
579      ringbuffer_pop(result_rb);
580   const struct intel_measure_buffered_result* current_result = start_result;
581
582   if (start_result == NULL)
583      return;
584   --result_count;
585
586   uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
587                                              current_result->end_ts);
588   unsigned event_count = start_result->snapshot.event_count;
589   while (result_count-- > 0) {
590      assert(ringbuffer_size(result_rb) > 0);
591      current_result = ringbuffer_pop(result_rb);
592      if (current_result == NULL)
593         return;
594      duration_ts += raw_timestamp_delta(current_result->start_ts,
595                                         current_result->end_ts);
596      event_count += current_result->snapshot.event_count;
597   }
598
599   uint64_t duration_idle_ns =
600      intel_device_info_timebase_scale(info, start_result->idle_duration);
601   uint64_t duration_time_ns =
602      intel_device_info_timebase_scale(info, duration_ts);
603   const struct intel_measure_snapshot *begin = &start_result->snapshot;
604   fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"
605           "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","
606           "0x%"PRIxPTR",0x%"PRIxPTR",%.3lf,%.3lf\n",
607           start_result->start_ts, current_result->end_ts,
608           start_result->frame, start_result->batch_count,
609           start_result->event_index, event_count,
610           begin->event_name, begin->count,
611           begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,
612           begin->framebuffer,
613           (double)duration_idle_ns / 1000.0,
614           (double)duration_time_ns / 1000.0);
615}
616
617/**
618 * Empty the ringbuffer of events that can be printed.
619 */
620static void
621intel_measure_print(struct intel_measure_device *device,
622                    struct intel_device_info *info)
623{
624   while (true) {
625      const int events_to_combine = buffered_event_count(device);
626      if (events_to_combine == 0)
627         break;
628      print_combined_results(device, events_to_combine, info);
629   }
630}
631
632/**
633 * Collect snapshots from completed command buffers and submit them to
634 * intel_measure for printing.
635 */
636void
637intel_measure_gather(struct intel_measure_device *measure_device,
638                     struct intel_device_info *info)
639{
640   pthread_mutex_lock(&measure_device->mutex);
641
642   /* Iterate snapshots and collect if ready.  Each snapshot queue will be
643    * in-order, but we must determine which queue has the oldest batch.
644    */
645   /* iterate snapshots and collect if ready */
646   while (!list_is_empty(&measure_device->queued_snapshots)) {
647      struct intel_measure_batch *batch =
648         list_first_entry(&measure_device->queued_snapshots,
649                          struct intel_measure_batch, link);
650
651      if (!intel_measure_ready(batch)) {
652         /* command buffer has begun execution on the gpu, but has not
653          * completed.
654          */
655         break;
656      }
657
658      list_del(&batch->link);
659      assert(batch->index % 2 == 0);
660
661      intel_measure_push_result(measure_device, batch);
662
663      batch->index = 0;
664      batch->frame = 0;
665      if (measure_device->release_batch)
666         measure_device->release_batch(batch);
667   }
668
669   intel_measure_print(measure_device, info);
670   pthread_mutex_unlock(&measure_device->mutex);
671}
672
673