1/* 2 * Copyright © 2020 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file intel_measure.c 25 */ 26 27#include "intel_measure.h" 28 29#include <errno.h> 30#include <fcntl.h> 31#include <stdlib.h> 32#include <string.h> 33#include <sys/stat.h> 34#include <sys/types.h> 35#include <unistd.h> 36 37#define __STDC_FORMAT_MACROS 1 38#include <inttypes.h> 39 40#include "dev/intel_device_info.h" 41#include "util/debug.h" 42#include "util/macros.h" 43#include "util/u_debug.h" 44 45 46static const struct debug_control debug_control[] = { 47 { "draw", INTEL_MEASURE_DRAW }, 48 { "rt", INTEL_MEASURE_RENDERPASS }, 49 { "shader", INTEL_MEASURE_SHADER }, 50 { "batch", INTEL_MEASURE_BATCH }, 51 { "frame", INTEL_MEASURE_FRAME }, 52 { NULL, 0 } 53}; 54static struct intel_measure_config config; 55 56void 57intel_measure_init(struct intel_measure_device *device) 58{ 59 static bool once = false; 60 const char *env = getenv("INTEL_MEASURE"); 61 if (unlikely(!once)) { 62 once = true; 63 memset(&config, 0, sizeof(struct intel_measure_config)); 64 if (!env) 65 return; 66 67 char env_copy[1024]; 68 strncpy(env_copy, env, 1024); 69 env_copy[1023] = '\0'; 70 71 config.file = stderr; 72 config.flags = parse_debug_string(env_copy, debug_control); 73 if (!config.flags) 74 config.flags = INTEL_MEASURE_DRAW; 75 config.enabled = true; 76 config.event_interval = 1; 77 config.control_fh = -1; 78 79 /* Overflows of the following defaults will drop data and generate a 80 * warning on the output filehandle. 81 */ 82 83 /* default batch_size allows for 64k renders in a single batch */ 84 const int DEFAULT_BATCH_SIZE = 64 * 1024; 85 config.batch_size = DEFAULT_BATCH_SIZE; 86 87 /* Default buffer_size allows for 64k batches per line of output in the 88 * csv. Overflow may occur for offscreen workloads or large 'interval' 89 * settings. 90 */ 91 const int DEFAULT_BUFFER_SIZE = 64 * 1024; 92 config.buffer_size = DEFAULT_BUFFER_SIZE; 93 94 const char *filename = strstr(env_copy, "file="); 95 const char *start_frame_s = strstr(env_copy, "start="); 96 const char *count_frame_s = strstr(env_copy, "count="); 97 const char *control_path = strstr(env_copy, "control="); 98 const char *interval_s = strstr(env_copy, "interval="); 99 const char *batch_size_s = strstr(env_copy, "batch_size="); 100 const char *buffer_size_s = strstr(env_copy, "buffer_size="); 101 while (true) { 102 char *sep = strrchr(env_copy, ','); 103 if (sep == NULL) 104 break; 105 *sep = '\0'; 106 } 107 108 if (filename && !__check_suid()) { 109 filename += 5; 110 config.file = fopen(filename, "w"); 111 if (!config.file) { 112 fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n", 113 filename, strerror (errno)); 114 abort(); 115 } 116 } 117 118 if (start_frame_s) { 119 start_frame_s += 6; 120 const int start_frame = atoi(start_frame_s); 121 if (start_frame < 0) { 122 fprintf(stderr, "INTEL_MEASURE start frame may " 123 "not be negative: %d\n", start_frame); 124 abort(); 125 } 126 127 config.start_frame = start_frame; 128 config.enabled = false; 129 } 130 131 if (count_frame_s) { 132 count_frame_s += 6; 133 const int count_frame = atoi(count_frame_s); 134 if (count_frame <= 0) { 135 fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n", 136 count_frame); 137 abort(); 138 } 139 140 config.end_frame = config.start_frame + count_frame; 141 } 142 143 if (control_path) { 144 control_path += 8; 145 if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) { 146 if (errno != EEXIST) { 147 fprintf(stderr, "INTEL_MEASURE failed to create control " 148 "fifo %s: %s\n", control_path, strerror (errno)); 149 abort(); 150 } 151 } 152 153 config.control_fh = openat(AT_FDCWD, control_path, 154 O_RDONLY | O_NONBLOCK); 155 if (config.control_fh == -1) { 156 fprintf(stderr, "INTEL_MEASURE failed to open control fifo " 157 "%s: %s\n", control_path, strerror (errno)); 158 abort(); 159 } 160 161 /* when using a control fifo, do not start until the user triggers 162 * capture 163 */ 164 config.enabled = false; 165 } 166 167 if (interval_s) { 168 interval_s += 9; 169 const int event_interval = atoi(interval_s); 170 if (event_interval < 1) { 171 fprintf(stderr, "INTEL_MEASURE event_interval must be positive: " 172 "%d\n", event_interval); 173 abort(); 174 } 175 config.event_interval = event_interval; 176 } 177 178 if (batch_size_s) { 179 batch_size_s += 11; 180 const int batch_size = atoi(batch_size_s); 181 if (batch_size < DEFAULT_BATCH_SIZE) { 182 fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: " 183 "%d\n", batch_size); 184 abort(); 185 } 186 if (batch_size > DEFAULT_BATCH_SIZE * 1024) { 187 fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: " 188 "%d\n", batch_size); 189 abort(); 190 } 191 192 config.batch_size = batch_size; 193 } 194 195 if (buffer_size_s) { 196 buffer_size_s += 12; 197 const int buffer_size = atoi(buffer_size_s); 198 if (buffer_size < DEFAULT_BUFFER_SIZE) { 199 fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: " 200 "%d\n", DEFAULT_BUFFER_SIZE); 201 } 202 if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) { 203 fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: " 204 "%d\n", buffer_size); 205 } 206 207 config.buffer_size = buffer_size; 208 } 209 210 fputs("draw_start,draw_end,frame,batch," 211 "event_index,event_count,type,count,vs,tcs,tes," 212 "gs,fs,cs,framebuffer,idle_us,time_us\n", 213 config.file); 214 } 215 216 device->config = NULL; 217 device->frame = 0; 218 device->release_batch = NULL; 219 pthread_mutex_init(&device->mutex, NULL); 220 list_inithead(&device->queued_snapshots); 221 222 if (env) 223 device->config = &config; 224} 225 226const char * 227intel_measure_snapshot_string(enum intel_measure_snapshot_type type) 228{ 229 const char *names[] = { 230 [INTEL_SNAPSHOT_UNDEFINED] = "undefined", 231 [INTEL_SNAPSHOT_BLIT] = "blit", 232 [INTEL_SNAPSHOT_CCS_AMBIGUATE] = "ccs ambiguate", 233 [INTEL_SNAPSHOT_CCS_COLOR_CLEAR] = "ccs color clear", 234 [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve", 235 [INTEL_SNAPSHOT_CCS_RESOLVE] = "ccs resolve", 236 [INTEL_SNAPSHOT_COMPUTE] = "compute", 237 [INTEL_SNAPSHOT_COPY] = "copy", 238 [INTEL_SNAPSHOT_DRAW] = "draw", 239 [INTEL_SNAPSHOT_HIZ_AMBIGUATE] = "hiz ambiguate", 240 [INTEL_SNAPSHOT_HIZ_CLEAR] = "hiz clear", 241 [INTEL_SNAPSHOT_HIZ_RESOLVE] = "hiz resolve", 242 [INTEL_SNAPSHOT_MCS_COLOR_CLEAR] = "mcs color clear", 243 [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve", 244 [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear", 245 [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear", 246 [INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer", 247 [INTEL_SNAPSHOT_END] = "end", 248 }; 249 assert(type < ARRAY_SIZE(names)); 250 assert(names[type] != NULL); 251 assert(type != INTEL_SNAPSHOT_UNDEFINED); 252 return names[type]; 253} 254 255/** 256 * Indicate to the caller whether a new snapshot should be started. 257 * 258 * Callers provide rendering state to this method to determine whether the 259 * current start event should be skipped. Depending on the configuration 260 * flags, a new snapshot may start: 261 * - at every event 262 * - when the program changes 263 * - after a batch is submitted 264 * - at frame boundaries 265 * 266 * Returns true if a snapshot should be started. 267 */ 268bool 269intel_measure_state_changed(const struct intel_measure_batch *batch, 270 uintptr_t vs, uintptr_t tcs, uintptr_t tes, 271 uintptr_t gs, uintptr_t fs, uintptr_t cs) 272{ 273 if (batch->index == 0) { 274 /* always record the first event */ 275 return true; 276 } 277 278 const struct intel_measure_snapshot *last_snap = 279 &batch->snapshots[batch->index - 1]; 280 281 if (config.flags & INTEL_MEASURE_DRAW) 282 return true; 283 284 if (batch->index % 2 == 0) { 285 /* no snapshot is running, but we have a start event */ 286 return true; 287 } 288 289 if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) { 290 /* only start collection when index == 0, at the beginning of a batch */ 291 return false; 292 } 293 294 if (config.flags & INTEL_MEASURE_RENDERPASS) { 295 return ((last_snap->framebuffer != batch->framebuffer) || 296 /* compute workloads are always in their own renderpass */ 297 (cs != 0)); 298 } 299 300 /* remaining comparisons check the state of the render pipeline for 301 * INTEL_MEASURE_PROGRAM 302 */ 303 assert(config.flags & INTEL_MEASURE_SHADER); 304 305 if (!vs && !tcs && !tes && !gs && !fs && !cs) { 306 /* blorp always changes program */ 307 return true; 308 } 309 310 return (last_snap->vs != (uintptr_t) vs || 311 last_snap->tcs != (uintptr_t) tcs || 312 last_snap->tes != (uintptr_t) tes || 313 last_snap->gs != (uintptr_t) gs || 314 last_snap->fs != (uintptr_t) fs || 315 last_snap->cs != (uintptr_t) cs); 316} 317 318/** 319 * Notify intel_measure that a frame is about to begin. 320 * 321 * Configuration values and the control fifo may commence measurement at frame 322 * boundaries. 323 */ 324void 325intel_measure_frame_transition(unsigned frame) 326{ 327 if (frame == config.start_frame) 328 config.enabled = true; 329 else if (frame == config.end_frame) 330 config.enabled = false; 331 332 /* user commands to the control fifo will override any start/count 333 * environment settings 334 */ 335 if (config.control_fh != -1) { 336 while (true) { 337 const unsigned BUF_SIZE = 128; 338 char buf[BUF_SIZE]; 339 ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1); 340 if (bytes == 0) 341 break; 342 if (bytes == -1) { 343 fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n", 344 strerror(errno)); 345 abort(); 346 } 347 348 buf[bytes] = '\0'; 349 char *nptr = buf, *endptr = buf; 350 while (*nptr != '\0' && *endptr != '\0') { 351 long fcount = strtol(nptr, &endptr, 10); 352 if (nptr == endptr) { 353 config.enabled = false; 354 fprintf(stderr, "INTEL_MEASURE invalid frame count on " 355 "control fifo.\n"); 356 lseek(config.control_fh, 0, SEEK_END); 357 break; 358 } else if (fcount == 0) { 359 config.enabled = false; 360 } else { 361 config.enabled = true; 362 config.end_frame = frame + fcount; 363 } 364 365 nptr = endptr + 1; 366 } 367 } 368 } 369} 370 371#define TIMESTAMP_BITS 36 372static uint64_t 373raw_timestamp_delta(uint64_t time0, uint64_t time1) 374{ 375 if (time0 > time1) { 376 return (1ULL << TIMESTAMP_BITS) + time1 - time0; 377 } else { 378 return time1 - time0; 379 } 380} 381 382/** 383 * Verify that rendering has completed for the batch 384 * 385 * Rendering is complete when the last timestamp has been written. 386*/ 387bool 388intel_measure_ready(struct intel_measure_batch *batch) 389{ 390 assert(batch->timestamps); 391 assert(batch->index > 1); 392 return (batch->timestamps[batch->index - 1] != 0); 393} 394 395/** 396 * Submit completed snapshots for buffering. 397 * 398 * Snapshot data becomes available when asynchronous rendering completes. 399 * Depending on configuration, snapshot data may need to be collated before 400 * writing to the output file. 401 */ 402static void 403intel_measure_push_result(struct intel_measure_device *device, 404 struct intel_measure_batch *batch) 405{ 406 struct intel_measure_ringbuffer *rb = device->ringbuffer; 407 408 uint64_t *timestamps = batch->timestamps; 409 assert(timestamps != NULL); 410 assert(batch->index == 0 || timestamps[0] != 0); 411 412 for (int i = 0; i < batch->index; i += 2) { 413 const struct intel_measure_snapshot *begin = &batch->snapshots[i]; 414 const struct intel_measure_snapshot *end = &batch->snapshots[i+1]; 415 416 assert (end->type == INTEL_SNAPSHOT_END); 417 418 if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) { 419 assert(begin->secondary != NULL); 420 begin->secondary->batch_count = batch->batch_count; 421 intel_measure_push_result(device, begin->secondary); 422 continue; 423 } 424 425 const uint64_t prev_end_ts = rb->results[rb->head].end_ts; 426 427 /* advance ring buffer */ 428 if (++rb->head == config.buffer_size) 429 rb->head = 0; 430 if (rb->head == rb->tail) { 431 static bool warned = false; 432 if (unlikely(!warned)) { 433 fprintf(config.file, 434 "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. " 435 "Data has been dropped. " 436 "Increase setting with INTEL_MEASURE=buffer_size={count}\n", 437 config.buffer_size); 438 warned = true; 439 } 440 break; 441 } 442 443 struct intel_measure_buffered_result *buffered_result = 444 &rb->results[rb->head]; 445 446 memset(buffered_result, 0, sizeof(*buffered_result)); 447 memcpy(&buffered_result->snapshot, begin, 448 sizeof(struct intel_measure_snapshot)); 449 buffered_result->start_ts = timestamps[i]; 450 buffered_result->end_ts = timestamps[i+1]; 451 buffered_result->idle_duration = 452 raw_timestamp_delta(prev_end_ts, buffered_result->start_ts); 453 buffered_result->frame = batch->frame; 454 buffered_result->batch_count = batch->batch_count; 455 buffered_result->event_index = i / 2; 456 buffered_result->snapshot.event_count = end->event_count; 457 } 458} 459 460static unsigned 461ringbuffer_size(const struct intel_measure_ringbuffer *rb) 462{ 463 unsigned head = rb->head; 464 if (head < rb->tail) 465 head += config.buffer_size; 466 return head - rb->tail; 467} 468 469static const struct intel_measure_buffered_result * 470ringbuffer_pop(struct intel_measure_ringbuffer *rb) 471{ 472 if (rb->tail == rb->head) { 473 /* encountered ringbuffer overflow while processing events */ 474 return NULL; 475 } 476 477 if (++rb->tail == config.buffer_size) 478 rb->tail = 0; 479 return &rb->results[rb->tail]; 480} 481 482static const struct intel_measure_buffered_result * 483ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index) 484{ 485 int result_offset = rb->tail + index + 1; 486 if (result_offset >= config.buffer_size) 487 result_offset -= config.buffer_size; 488 return &rb->results[result_offset]; 489} 490 491 492/** 493 * Determine the number of buffered events that must be combined for the next 494 * line of csv output. Returns 0 if more events are needed. 495 */ 496static unsigned 497buffered_event_count(struct intel_measure_device *device) 498{ 499 const struct intel_measure_ringbuffer *rb = device->ringbuffer; 500 const unsigned buffered_event_count = ringbuffer_size(rb); 501 if (buffered_event_count == 0) { 502 /* no events to collect */ 503 return 0; 504 } 505 506 /* count the number of buffered events required to meet the configuration */ 507 if (config.flags & (INTEL_MEASURE_DRAW | 508 INTEL_MEASURE_RENDERPASS | 509 INTEL_MEASURE_SHADER)) { 510 /* For these flags, every buffered event represents a line in the 511 * output. None of these events span batches. If the event interval 512 * crosses a batch boundary, then the next interval starts with the new 513 * batch. 514 */ 515 return 1; 516 } 517 518 const unsigned start_frame = ringbuffer_peek(rb, 0)->frame; 519 if (config.flags & INTEL_MEASURE_BATCH) { 520 /* each buffered event is a command buffer. The number of events to 521 * process is the same as the interval, unless the interval crosses a 522 * frame boundary 523 */ 524 if (buffered_event_count < config.event_interval) { 525 /* not enough events */ 526 return 0; 527 } 528 529 /* Imperfect frame tracking requires us to allow for *older* frames */ 530 if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) { 531 /* No frame transition. The next {interval} events should be combined. */ 532 return config.event_interval; 533 } 534 535 /* Else a frame transition occurs within the interval. Find the 536 * transition, so the following line of output begins with the batch 537 * that starts the new frame. 538 */ 539 for (int event_index = 1; 540 event_index <= config.event_interval; 541 ++event_index) { 542 if (ringbuffer_peek(rb, event_index)->frame > start_frame) 543 return event_index; 544 } 545 546 assert(false); 547 } 548 549 /* Else we need to search buffered events to find the matching frame 550 * transition for our interval. 551 */ 552 assert(config.flags & INTEL_MEASURE_FRAME); 553 for (int event_index = 1; 554 event_index < buffered_event_count; 555 ++event_index) { 556 const int latest_frame = ringbuffer_peek(rb, event_index)->frame; 557 if (latest_frame - start_frame >= config.event_interval) 558 return event_index; 559 } 560 561 return 0; 562} 563 564/** 565 * Take result_count events from the ringbuffer and output them as a single 566 * line. 567 */ 568static void 569print_combined_results(struct intel_measure_device *measure_device, 570 int result_count, 571 struct intel_device_info *info) 572{ 573 if (result_count == 0) 574 return; 575 576 struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer; 577 assert(ringbuffer_size(result_rb) >= result_count); 578 const struct intel_measure_buffered_result* start_result = 579 ringbuffer_pop(result_rb); 580 const struct intel_measure_buffered_result* current_result = start_result; 581 582 if (start_result == NULL) 583 return; 584 --result_count; 585 586 uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts, 587 current_result->end_ts); 588 unsigned event_count = start_result->snapshot.event_count; 589 while (result_count-- > 0) { 590 assert(ringbuffer_size(result_rb) > 0); 591 current_result = ringbuffer_pop(result_rb); 592 if (current_result == NULL) 593 return; 594 duration_ts += raw_timestamp_delta(current_result->start_ts, 595 current_result->end_ts); 596 event_count += current_result->snapshot.event_count; 597 } 598 599 uint64_t duration_idle_ns = 600 intel_device_info_timebase_scale(info, start_result->idle_duration); 601 uint64_t duration_time_ns = 602 intel_device_info_timebase_scale(info, duration_ts); 603 const struct intel_measure_snapshot *begin = &start_result->snapshot; 604 fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u," 605 "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR"," 606 "0x%"PRIxPTR",0x%"PRIxPTR",%.3lf,%.3lf\n", 607 start_result->start_ts, current_result->end_ts, 608 start_result->frame, start_result->batch_count, 609 start_result->event_index, event_count, 610 begin->event_name, begin->count, 611 begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs, 612 begin->framebuffer, 613 (double)duration_idle_ns / 1000.0, 614 (double)duration_time_ns / 1000.0); 615} 616 617/** 618 * Empty the ringbuffer of events that can be printed. 619 */ 620static void 621intel_measure_print(struct intel_measure_device *device, 622 struct intel_device_info *info) 623{ 624 while (true) { 625 const int events_to_combine = buffered_event_count(device); 626 if (events_to_combine == 0) 627 break; 628 print_combined_results(device, events_to_combine, info); 629 } 630} 631 632/** 633 * Collect snapshots from completed command buffers and submit them to 634 * intel_measure for printing. 635 */ 636void 637intel_measure_gather(struct intel_measure_device *measure_device, 638 struct intel_device_info *info) 639{ 640 pthread_mutex_lock(&measure_device->mutex); 641 642 /* Iterate snapshots and collect if ready. Each snapshot queue will be 643 * in-order, but we must determine which queue has the oldest batch. 644 */ 645 /* iterate snapshots and collect if ready */ 646 while (!list_is_empty(&measure_device->queued_snapshots)) { 647 struct intel_measure_batch *batch = 648 list_first_entry(&measure_device->queued_snapshots, 649 struct intel_measure_batch, link); 650 651 if (!intel_measure_ready(batch)) { 652 /* command buffer has begun execution on the gpu, but has not 653 * completed. 654 */ 655 break; 656 } 657 658 list_del(&batch->link); 659 assert(batch->index % 2 == 0); 660 661 intel_measure_push_result(measure_device, batch); 662 663 batch->index = 0; 664 batch->frame = 0; 665 if (measure_device->release_batch) 666 measure_device->release_batch(batch); 667 } 668 669 intel_measure_print(measure_device, info); 670 pthread_mutex_unlock(&measure_device->mutex); 671} 672 673