1/* 2 * Copyright © 2015-2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Robert Bragg <robert@sixbynine.org> 25 */ 26 27 28/** 29 * DOC: i915 Perf Overview 30 * 31 * Gen graphics supports a large number of performance counters that can help 32 * driver and application developers understand and optimize their use of the 33 * GPU. 34 * 35 * This i915 perf interface enables userspace to configure and open a file 36 * descriptor representing a stream of GPU metrics which can then be read() as 37 * a stream of sample records. 38 * 39 * The interface is particularly suited to exposing buffered metrics that are 40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU. 41 * 42 * Streams representing a single context are accessible to applications with a 43 * corresponding drm file descriptor, such that OpenGL can use the interface 44 * without special privileges. Access to system-wide metrics requires root 45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid 46 * sysctl option. 47 * 48 */ 49 50/** 51 * DOC: i915 Perf History and Comparison with Core Perf 52 * 53 * The interface was initially inspired by the core Perf infrastructure but 54 * some notable differences are: 55 * 56 * i915 perf file descriptors represent a "stream" instead of an "event"; where 57 * a perf event primarily corresponds to a single 64bit value, while a stream 58 * might sample sets of tightly-coupled counters, depending on the 59 * configuration. For example the Gen OA unit isn't designed to support 60 * orthogonal configurations of individual counters; it's configured for a set 61 * of related counters. Samples for an i915 perf stream capturing OA metrics 62 * will include a set of counter values packed in a compact HW specific format. 63 * The OA unit supports a number of different packing formats which can be 64 * selected by the user opening the stream. Perf has support for grouping 65 * events, but each event in the group is configured, validated and 66 * authenticated individually with separate system calls. 67 * 68 * i915 perf stream configurations are provided as an array of u64 (key,value) 69 * pairs, instead of a fixed struct with multiple miscellaneous config members, 70 * interleaved with event-type specific members. 71 * 72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer. 73 * The supported metrics are being written to memory by the GPU unsynchronized 74 * with the CPU, using HW specific packing formats for counter sets. Sometimes 75 * the constraints on HW configuration require reports to be filtered before it 76 * would be acceptable to expose them to unprivileged applications - to hide 77 * the metrics of other processes/contexts. For these use cases a read() based 78 * interface is a good fit, and provides an opportunity to filter data as it 79 * gets copied from the GPU mapped buffers to userspace buffers. 80 * 81 * 82 * Issues hit with first prototype based on Core Perf 83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 * 85 * The first prototype of this driver was based on the core perf 86 * infrastructure, and while we did make that mostly work, with some changes to 87 * perf, we found we were breaking or working around too many assumptions baked 88 * into perf's currently cpu centric design. 89 * 90 * In the end we didn't see a clear benefit to making perf's implementation and 91 * interface more complex by changing design assumptions while we knew we still 92 * wouldn't be able to use any existing perf based userspace tools. 93 * 94 * Also considering the Gen specific nature of the Observability hardware and 95 * how userspace will sometimes need to combine i915 perf OA metrics with 96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're 97 * expecting the interface to be used by a platform specific userspace such as 98 * OpenGL or tools. This is to say; we aren't inherently missing out on having 99 * a standard vendor/architecture agnostic interface by not using perf. 100 * 101 * 102 * For posterity, in case we might re-visit trying to adapt core perf to be 103 * better suited to exposing i915 metrics these were the main pain points we 104 * hit: 105 * 106 * - The perf based OA PMU driver broke some significant design assumptions: 107 * 108 * Existing perf pmus are used for profiling work on a cpu and we were 109 * introducing the idea of _IS_DEVICE pmus with different security 110 * implications, the need to fake cpu-related data (such as user/kernel 111 * registers) to fit with perf's current design, and adding _DEVICE records 112 * as a way to forward device-specific status records. 113 * 114 * The OA unit writes reports of counters into a circular buffer, without 115 * involvement from the CPU, making our PMU driver the first of a kind. 116 * 117 * Given the way we were periodically forward data from the GPU-mapped, OA 118 * buffer to perf's buffer, those bursts of sample writes looked to perf like 119 * we were sampling too fast and so we had to subvert its throttling checks. 120 * 121 * Perf supports groups of counters and allows those to be read via 122 * transactions internally but transactions currently seem designed to be 123 * explicitly initiated from the cpu (say in response to a userspace read()) 124 * and while we could pull a report out of the OA buffer we can't 125 * trigger a report from the cpu on demand. 126 * 127 * Related to being report based; the OA counters are configured in HW as a 128 * set while perf generally expects counter configurations to be orthogonal. 129 * Although counters can be associated with a group leader as they are 130 * opened, there's no clear precedent for being able to provide group-wide 131 * configuration attributes (for example we want to let userspace choose the 132 * OA unit report format used to capture all counters in a set, or specify a 133 * GPU context to filter metrics on). We avoided using perf's grouping 134 * feature and forwarded OA reports to userspace via perf's 'raw' sample 135 * field. This suited our userspace well considering how coupled the counters 136 * are when dealing with normalizing. It would be inconvenient to split 137 * counters up into separate events, only to require userspace to recombine 138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports 139 * for combining with the side-band raw reports it captures using 140 * MI_REPORT_PERF_COUNT commands. 141 * 142 * - As a side note on perf's grouping feature; there was also some concern 143 * that using PERF_FORMAT_GROUP as a way to pack together counter values 144 * would quite drastically inflate our sample sizes, which would likely 145 * lower the effective sampling resolutions we could use when the available 146 * memory bandwidth is limited. 147 * 148 * With the OA unit's report formats, counters are packed together as 32 149 * or 40bit values, with the largest report size being 256 bytes. 150 * 151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a 152 * documented ordering to the values, implying PERF_FORMAT_ID must also be 153 * used to add a 64bit ID before each value; giving 16 bytes per counter. 154 * 155 * Related to counter orthogonality; we can't time share the OA unit, while 156 * event scheduling is a central design idea within perf for allowing 157 * userspace to open + enable more events than can be configured in HW at any 158 * one time. The OA unit is not designed to allow re-configuration while in 159 * use. We can't reconfigure the OA unit without losing internal OA unit 160 * state which we can't access explicitly to save and restore. Reconfiguring 161 * the OA unit is also relatively slow, involving ~100 register writes. From 162 * userspace Mesa also depends on a stable OA configuration when emitting 163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be 164 * disabled while there are outstanding MI_RPC commands lest we hang the 165 * command streamer. 166 * 167 * The contents of sample records aren't extensible by device drivers (i.e. 168 * the sample_type bits). As an example; Sourab Gupta had been looking to 169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports 170 * into sample records by using the 'raw' field, but it's tricky to pack more 171 * than one thing into this field because events/core.c currently only lets a 172 * pmu give a single raw data pointer plus len which will be copied into the 173 * ring buffer. To include more than the OA report we'd have to copy the 174 * report into an intermediate larger buffer. I'd been considering allowing a 175 * vector of data+len values to be specified for copying the raw data, but 176 * it felt like a kludge to being using the raw field for this purpose. 177 * 178 * - It felt like our perf based PMU was making some technical compromises 179 * just for the sake of using perf: 180 * 181 * perf_event_open() requires events to either relate to a pid or a specific 182 * cpu core, while our device pmu related to neither. Events opened with a 183 * pid will be automatically enabled/disabled according to the scheduling of 184 * that process - so not appropriate for us. When an event is related to a 185 * cpu id, perf ensures pmu methods will be invoked via an inter process 186 * interrupt on that core. To avoid invasive changes our userspace opened OA 187 * perf events for a specific cpu. This was workable but it meant the 188 * majority of the OA driver ran in atomic context, including all OA report 189 * forwarding, which wasn't really necessary in our case and seems to make 190 * our locking requirements somewhat complex as we handled the interaction 191 * with the rest of the i915 driver. 192 */ 193 194#include <linux/anon_inodes.h> 195#include <linux/sizes.h> 196#include <linux/uuid.h> 197 198#include "gem/i915_gem_context.h" 199#include "gt/intel_engine_pm.h" 200#include "gt/intel_engine_user.h" 201#include "gt/intel_gt.h" 202#include "gt/intel_lrc_reg.h" 203#include "gt/intel_ring.h" 204 205#include "i915_drv.h" 206#include "i915_perf.h" 207 208/* HW requires this to be a power of two, between 128k and 16M, though driver 209 * is currently generally designed assuming the largest 16M size is used such 210 * that the overflow cases are unlikely in normal operation. 211 */ 212#define OA_BUFFER_SIZE SZ_16M 213 214#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) 215 216/** 217 * DOC: OA Tail Pointer Race 218 * 219 * There's a HW race condition between OA unit tail pointer register updates and 220 * writes to memory whereby the tail pointer can sometimes get ahead of what's 221 * been written out to the OA buffer so far (in terms of what's visible to the 222 * CPU). 223 * 224 * Although this can be observed explicitly while copying reports to userspace 225 * by checking for a zeroed report-id field in tail reports, we want to account 226 * for this earlier, as part of the oa_buffer_check_unlocked to avoid lots of 227 * redundant read() attempts. 228 * 229 * We workaround this issue in oa_buffer_check_unlocked() by reading the reports 230 * in the OA buffer, starting from the tail reported by the HW until we find a 231 * report with its first 2 dwords not 0 meaning its previous report is 232 * completely in memory and ready to be read. Those dwords are also set to 0 233 * once read and the whole buffer is cleared upon OA buffer initialization. The 234 * first dword is the reason for this report while the second is the timestamp, 235 * making the chances of having those 2 fields at 0 fairly unlikely. A more 236 * detailed explanation is available in oa_buffer_check_unlocked(). 237 * 238 * Most of the implementation details for this workaround are in 239 * oa_buffer_check_unlocked() and _append_oa_reports() 240 * 241 * Note for posterity: previously the driver used to define an effective tail 242 * pointer that lagged the real pointer by a 'tail margin' measured in bytes 243 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency. 244 * This was flawed considering that the OA unit may also automatically generate 245 * non-periodic reports (such as on context switch) or the OA unit may be 246 * enabled without any periodic sampling. 247 */ 248#define OA_TAIL_MARGIN_NSEC 100000ULL 249#define INVALID_TAIL_PTR 0xffffffff 250 251/* The default frequency for checking whether the OA unit has written new 252 * reports to the circular OA buffer... 253 */ 254#define DEFAULT_POLL_FREQUENCY_HZ 200 255#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) 256 257/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ 258static u32 i915_perf_stream_paranoid = true; 259 260/* The maximum exponent the hardware accepts is 63 (essentially it selects one 261 * of the 64bit timestamp bits to trigger reports from) but there's currently 262 * no known use case for sampling as infrequently as once per 47 thousand years. 263 * 264 * Since the timestamps included in OA reports are only 32bits it seems 265 * reasonable to limit the OA exponent where it's still possible to account for 266 * overflow in OA report timestamps. 267 */ 268#define OA_EXPONENT_MAX 31 269 270#define INVALID_CTX_ID 0xffffffff 271 272/* On Gen8+ automatically triggered OA reports include a 'reason' field... */ 273#define OAREPORT_REASON_MASK 0x3f 274#define OAREPORT_REASON_MASK_EXTENDED 0x7f 275#define OAREPORT_REASON_SHIFT 19 276#define OAREPORT_REASON_TIMER (1<<0) 277#define OAREPORT_REASON_CTX_SWITCH (1<<3) 278#define OAREPORT_REASON_CLK_RATIO (1<<5) 279 280 281/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate 282 * 283 * The highest sampling frequency we can theoretically program the OA unit 284 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell. 285 * 286 * Initialized just before we register the sysctl parameter. 287 */ 288static int oa_sample_rate_hard_limit; 289 290/* Theoretically we can program the OA unit to sample every 160ns but don't 291 * allow that by default unless root... 292 * 293 * The default threshold of 100000Hz is based on perf's similar 294 * kernel.perf_event_max_sample_rate sysctl parameter. 295 */ 296static u32 i915_oa_max_sample_rate = 100000; 297 298/* XXX: beware if future OA HW adds new report formats that the current 299 * code assumes all reports have a power-of-two size and ~(size - 1) can 300 * be used as a mask to align the OA tail pointer. 301 */ 302static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { 303 [I915_OA_FORMAT_A13] = { 0, 64 }, 304 [I915_OA_FORMAT_A29] = { 1, 128 }, 305 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 }, 306 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */ 307 [I915_OA_FORMAT_B4_C8] = { 4, 64 }, 308 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 }, 309 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 }, 310 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 311}; 312 313static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { 314 [I915_OA_FORMAT_A12] = { 0, 64 }, 315 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, 316 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, 317 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 318}; 319 320static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { 321 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, 322}; 323 324#define SAMPLE_OA_REPORT (1<<0) 325 326/** 327 * struct perf_open_properties - for validated properties given to open a stream 328 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags 329 * @single_context: Whether a single or all gpu contexts should be monitored 330 * @hold_preemption: Whether the preemption is disabled for the filtered 331 * context 332 * @ctx_handle: A gem ctx handle for use with @single_context 333 * @metrics_set: An ID for an OA unit metric set advertised via sysfs 334 * @oa_format: An OA unit HW report format 335 * @oa_periodic: Whether to enable periodic OA unit sampling 336 * @oa_period_exponent: The OA unit sampling period is derived from this 337 * @engine: The engine (typically rcs0) being monitored by the OA unit 338 * @has_sseu: Whether @sseu was specified by userspace 339 * @sseu: internal SSEU configuration computed either from the userspace 340 * specified configuration in the opening parameters or a default value 341 * (see get_default_sseu_config()) 342 * @poll_oa_period: The period in nanoseconds at which the CPU will check for OA 343 * data availability 344 * 345 * As read_properties_unlocked() enumerates and validates the properties given 346 * to open a stream of metrics the configuration is built up in the structure 347 * which starts out zero initialized. 348 */ 349struct perf_open_properties { 350 u32 sample_flags; 351 352 u64 single_context:1; 353 u64 hold_preemption:1; 354 u64 ctx_handle; 355 356 /* OA sampling state */ 357 int metrics_set; 358 int oa_format; 359 bool oa_periodic; 360 int oa_period_exponent; 361 362 struct intel_engine_cs *engine; 363 364 bool has_sseu; 365 struct intel_sseu sseu; 366 367 u64 poll_oa_period; 368}; 369 370struct i915_oa_config_bo { 371 struct llist_node node; 372 373 struct i915_oa_config *oa_config; 374 struct i915_vma *vma; 375}; 376 377static struct ctl_table_header *sysctl_header; 378 379static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); 380 381void i915_oa_config_release(struct kref *ref) 382{ 383 struct i915_oa_config *oa_config = 384 container_of(ref, typeof(*oa_config), ref); 385 386 kfree(oa_config->flex_regs); 387 kfree(oa_config->b_counter_regs); 388 kfree(oa_config->mux_regs); 389 390 kfree_rcu(oa_config, rcu); 391} 392 393struct i915_oa_config * 394i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) 395{ 396 struct i915_oa_config *oa_config; 397 398 rcu_read_lock(); 399 oa_config = idr_find(&perf->metrics_idr, metrics_set); 400 if (oa_config) 401 oa_config = i915_oa_config_get(oa_config); 402 rcu_read_unlock(); 403 404 return oa_config; 405} 406 407static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) 408{ 409 i915_oa_config_put(oa_bo->oa_config); 410 i915_vma_put(oa_bo->vma); 411 kfree(oa_bo); 412} 413 414static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) 415{ 416 struct intel_uncore *uncore = stream->uncore; 417 418 return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & 419 GEN12_OAG_OATAILPTR_MASK; 420} 421 422static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) 423{ 424 struct intel_uncore *uncore = stream->uncore; 425 426 return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; 427} 428 429static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) 430{ 431 struct intel_uncore *uncore = stream->uncore; 432 u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); 433 434 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 435} 436 437/** 438 * oa_buffer_check_unlocked - check for data and update tail ptr state 439 * @stream: i915 stream instance 440 * 441 * This is either called via fops (for blocking reads in user ctx) or the poll 442 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check 443 * if there is data available for userspace to read. 444 * 445 * This function is central to providing a workaround for the OA unit tail 446 * pointer having a race with respect to what data is visible to the CPU. 447 * It is responsible for reading tail pointers from the hardware and giving 448 * the pointers time to 'age' before they are made available for reading. 449 * (See description of OA_TAIL_MARGIN_NSEC above for further details.) 450 * 451 * Besides returning true when there is data available to read() this function 452 * also updates the tail, aging_tail and aging_timestamp in the oa_buffer 453 * object. 454 * 455 * Note: It's safe to read OA config state here unlocked, assuming that this is 456 * only called while the stream is enabled, while the global OA configuration 457 * can't be modified. 458 * 459 * Returns: %true if the OA buffer contains data, else %false 460 */ 461static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) 462{ 463 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 464 int report_size = stream->oa_buffer.format_size; 465 unsigned long flags; 466 bool pollin; 467 u32 hw_tail; 468 u64 now; 469 470 /* We have to consider the (unlikely) possibility that read() errors 471 * could result in an OA buffer reset which might reset the head and 472 * tail state. 473 */ 474 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 475 476 hw_tail = stream->perf->ops.oa_hw_tail_read(stream); 477 478 /* The tail pointer increases in 64 byte increments, 479 * not in report_size steps... 480 */ 481 hw_tail &= ~(report_size - 1); 482 483 now = ktime_get_mono_fast_ns(); 484 485 if (hw_tail == stream->oa_buffer.aging_tail && 486 (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) { 487 /* If the HW tail hasn't move since the last check and the HW 488 * tail has been aging for long enough, declare it the new 489 * tail. 490 */ 491 stream->oa_buffer.tail = stream->oa_buffer.aging_tail; 492 } else { 493 u32 head, tail, aged_tail; 494 495 /* NB: The head we observe here might effectively be a little 496 * out of date. If a read() is in progress, the head could be 497 * anywhere between this head and stream->oa_buffer.tail. 498 */ 499 head = stream->oa_buffer.head - gtt_offset; 500 aged_tail = stream->oa_buffer.tail - gtt_offset; 501 502 hw_tail -= gtt_offset; 503 tail = hw_tail; 504 505 /* Walk the stream backward until we find a report with dword 0 506 * & 1 not at 0. Since the circular buffer pointers progress by 507 * increments of 64 bytes and that reports can be up to 256 508 * bytes long, we can't tell whether a report has fully landed 509 * in memory before the first 2 dwords of the following report 510 * have effectively landed. 511 * 512 * This is assuming that the writes of the OA unit land in 513 * memory in the order they were written to. 514 * If not : (╯°□°)╯︵ ┻━┻ 515 */ 516 while (OA_TAKEN(tail, aged_tail) >= report_size) { 517 u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail); 518 519 if (report32[0] != 0 || report32[1] != 0) 520 break; 521 522 tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); 523 } 524 525 if (OA_TAKEN(hw_tail, tail) > report_size && 526 __ratelimit(&stream->perf->tail_pointer_race)) 527 DRM_NOTE("unlanded report(s) head=0x%x " 528 "tail=0x%x hw_tail=0x%x\n", 529 head, tail, hw_tail); 530 531 stream->oa_buffer.tail = gtt_offset + tail; 532 stream->oa_buffer.aging_tail = gtt_offset + hw_tail; 533 stream->oa_buffer.aging_timestamp = now; 534 } 535 536 pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset, 537 stream->oa_buffer.head - gtt_offset) >= report_size; 538 539 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 540 541 return pollin; 542} 543 544/** 545 * append_oa_status - Appends a status record to a userspace read() buffer. 546 * @stream: An i915-perf stream opened for OA metrics 547 * @buf: destination buffer given by userspace 548 * @count: the number of bytes userspace wants to read 549 * @offset: (inout): the current position for writing into @buf 550 * @type: The kind of status to report to userspace 551 * 552 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`) 553 * into the userspace read() buffer. 554 * 555 * The @buf @offset will only be updated on success. 556 * 557 * Returns: 0 on success, negative error code on failure. 558 */ 559static int append_oa_status(struct i915_perf_stream *stream, 560 char __user *buf, 561 size_t count, 562 size_t *offset, 563 enum drm_i915_perf_record_type type) 564{ 565 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) }; 566 567 if ((count - *offset) < header.size) 568 return -ENOSPC; 569 570 if (copy_to_user(buf + *offset, &header, sizeof(header))) 571 return -EFAULT; 572 573 (*offset) += header.size; 574 575 return 0; 576} 577 578/** 579 * append_oa_sample - Copies single OA report into userspace read() buffer. 580 * @stream: An i915-perf stream opened for OA metrics 581 * @buf: destination buffer given by userspace 582 * @count: the number of bytes userspace wants to read 583 * @offset: (inout): the current position for writing into @buf 584 * @report: A single OA report to (optionally) include as part of the sample 585 * 586 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*` 587 * properties when opening a stream, tracked as `stream->sample_flags`. This 588 * function copies the requested components of a single sample to the given 589 * read() @buf. 590 * 591 * The @buf @offset will only be updated on success. 592 * 593 * Returns: 0 on success, negative error code on failure. 594 */ 595static int append_oa_sample(struct i915_perf_stream *stream, 596 char __user *buf, 597 size_t count, 598 size_t *offset, 599 const u8 *report) 600{ 601 int report_size = stream->oa_buffer.format_size; 602 struct drm_i915_perf_record_header header; 603 604 header.type = DRM_I915_PERF_RECORD_SAMPLE; 605 header.pad = 0; 606 header.size = stream->sample_size; 607 608 if ((count - *offset) < header.size) 609 return -ENOSPC; 610 611 buf += *offset; 612 if (copy_to_user(buf, &header, sizeof(header))) 613 return -EFAULT; 614 buf += sizeof(header); 615 616 if (copy_to_user(buf, report, report_size)) 617 return -EFAULT; 618 619 (*offset) += header.size; 620 621 return 0; 622} 623 624/** 625 * Copies all buffered OA reports into userspace read() buffer. 626 * @stream: An i915-perf stream opened for OA metrics 627 * @buf: destination buffer given by userspace 628 * @count: the number of bytes userspace wants to read 629 * @offset: (inout): the current position for writing into @buf 630 * 631 * Notably any error condition resulting in a short read (-%ENOSPC or 632 * -%EFAULT) will be returned even though one or more records may 633 * have been successfully copied. In this case it's up to the caller 634 * to decide if the error should be squashed before returning to 635 * userspace. 636 * 637 * Note: reports are consumed from the head, and appended to the 638 * tail, so the tail chases the head?... If you think that's mad 639 * and back-to-front you're not alone, but this follows the 640 * Gen PRM naming convention. 641 * 642 * Returns: 0 on success, negative error code on failure. 643 */ 644static int gen8_append_oa_reports(struct i915_perf_stream *stream, 645 char __user *buf, 646 size_t count, 647 size_t *offset) 648{ 649 struct intel_uncore *uncore = stream->uncore; 650 int report_size = stream->oa_buffer.format_size; 651 u8 *oa_buf_base = stream->oa_buffer.vaddr; 652 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 653 u32 mask = (OA_BUFFER_SIZE - 1); 654 size_t start_offset = *offset; 655 unsigned long flags; 656 u32 head, tail; 657 u32 taken; 658 int ret = 0; 659 660 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) 661 return -EIO; 662 663 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 664 665 head = stream->oa_buffer.head; 666 tail = stream->oa_buffer.tail; 667 668 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 669 670 /* 671 * NB: oa_buffer.head/tail include the gtt_offset which we don't want 672 * while indexing relative to oa_buf_base. 673 */ 674 head -= gtt_offset; 675 tail -= gtt_offset; 676 677 /* 678 * An out of bounds or misaligned head or tail pointer implies a driver 679 * bug since we validate + align the tail pointers we read from the 680 * hardware and we are in full control of the head pointer which should 681 * only be incremented by multiples of the report size (notably also 682 * all a power of two). 683 */ 684 if (drm_WARN_ONCE(&uncore->i915->drm, 685 head > OA_BUFFER_SIZE || head % report_size || 686 tail > OA_BUFFER_SIZE || tail % report_size, 687 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 688 head, tail)) 689 return -EIO; 690 691 692 for (/* none */; 693 (taken = OA_TAKEN(tail, head)); 694 head = (head + report_size) & mask) { 695 u8 *report = oa_buf_base + head; 696 u32 *report32 = (void *)report; 697 u32 ctx_id; 698 u32 reason; 699 700 /* 701 * All the report sizes factor neatly into the buffer 702 * size so we never expect to see a report split 703 * between the beginning and end of the buffer. 704 * 705 * Given the initial alignment check a misalignment 706 * here would imply a driver bug that would result 707 * in an overrun. 708 */ 709 if (drm_WARN_ON(&uncore->i915->drm, 710 (OA_BUFFER_SIZE - head) < report_size)) { 711 drm_err(&uncore->i915->drm, 712 "Spurious OA head ptr: non-integral report offset\n"); 713 break; 714 } 715 716 /* 717 * The reason field includes flags identifying what 718 * triggered this specific report (mostly timer 719 * triggered or e.g. due to a context switch). 720 * 721 * This field is never expected to be zero so we can 722 * check that the report isn't invalid before copying 723 * it to userspace... 724 */ 725 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & 726 (IS_GEN(stream->perf->i915, 12) ? 727 OAREPORT_REASON_MASK_EXTENDED : 728 OAREPORT_REASON_MASK)); 729 if (reason == 0) { 730 if (__ratelimit(&stream->perf->spurious_report_rs)) 731 DRM_NOTE("Skipping spurious, invalid OA report\n"); 732 continue; 733 } 734 735 ctx_id = report32[2] & stream->specific_ctx_id_mask; 736 737 /* 738 * Squash whatever is in the CTX_ID field if it's marked as 739 * invalid to be sure we avoid false-positive, single-context 740 * filtering below... 741 * 742 * Note: that we don't clear the valid_ctx_bit so userspace can 743 * understand that the ID has been squashed by the kernel. 744 */ 745 if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && 746 INTEL_GEN(stream->perf->i915) <= 11) 747 ctx_id = report32[2] = INVALID_CTX_ID; 748 749 /* 750 * NB: For Gen 8 the OA unit no longer supports clock gating 751 * off for a specific context and the kernel can't securely 752 * stop the counters from updating as system-wide / global 753 * values. 754 * 755 * Automatic reports now include a context ID so reports can be 756 * filtered on the cpu but it's not worth trying to 757 * automatically subtract/hide counter progress for other 758 * contexts while filtering since we can't stop userspace 759 * issuing MI_REPORT_PERF_COUNT commands which would still 760 * provide a side-band view of the real values. 761 * 762 * To allow userspace (such as Mesa/GL_INTEL_performance_query) 763 * to normalize counters for a single filtered context then it 764 * needs be forwarded bookend context-switch reports so that it 765 * can track switches in between MI_REPORT_PERF_COUNT commands 766 * and can itself subtract/ignore the progress of counters 767 * associated with other contexts. Note that the hardware 768 * automatically triggers reports when switching to a new 769 * context which are tagged with the ID of the newly active 770 * context. To avoid the complexity (and likely fragility) of 771 * reading ahead while parsing reports to try and minimize 772 * forwarding redundant context switch reports (i.e. between 773 * other, unrelated contexts) we simply elect to forward them 774 * all. 775 * 776 * We don't rely solely on the reason field to identify context 777 * switches since it's not-uncommon for periodic samples to 778 * identify a switch before any 'context switch' report. 779 */ 780 if (!stream->perf->exclusive_stream->ctx || 781 stream->specific_ctx_id == ctx_id || 782 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || 783 reason & OAREPORT_REASON_CTX_SWITCH) { 784 785 /* 786 * While filtering for a single context we avoid 787 * leaking the IDs of other contexts. 788 */ 789 if (stream->perf->exclusive_stream->ctx && 790 stream->specific_ctx_id != ctx_id) { 791 report32[2] = INVALID_CTX_ID; 792 } 793 794 ret = append_oa_sample(stream, buf, count, offset, 795 report); 796 if (ret) 797 break; 798 799 stream->oa_buffer.last_ctx_id = ctx_id; 800 } 801 802 /* 803 * Clear out the first 2 dword as a mean to detect unlanded 804 * reports. 805 */ 806 report32[0] = 0; 807 report32[1] = 0; 808 } 809 810 if (start_offset != *offset) { 811 i915_reg_t oaheadptr; 812 813 oaheadptr = IS_GEN(stream->perf->i915, 12) ? 814 GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; 815 816 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 817 818 /* 819 * We removed the gtt_offset for the copy loop above, indexing 820 * relative to oa_buf_base so put back here... 821 */ 822 head += gtt_offset; 823 intel_uncore_write(uncore, oaheadptr, 824 head & GEN12_OAG_OAHEADPTR_MASK); 825 stream->oa_buffer.head = head; 826 827 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 828 } 829 830 return ret; 831} 832 833/** 834 * gen8_oa_read - copy status records then buffered OA reports 835 * @stream: An i915-perf stream opened for OA metrics 836 * @buf: destination buffer given by userspace 837 * @count: the number of bytes userspace wants to read 838 * @offset: (inout): the current position for writing into @buf 839 * 840 * Checks OA unit status registers and if necessary appends corresponding 841 * status records for userspace (such as for a buffer full condition) and then 842 * initiate appending any buffered OA reports. 843 * 844 * Updates @offset according to the number of bytes successfully copied into 845 * the userspace buffer. 846 * 847 * NB: some data may be successfully copied to the userspace buffer 848 * even if an error is returned, and this is reflected in the 849 * updated @offset. 850 * 851 * Returns: zero on success or a negative error code 852 */ 853static int gen8_oa_read(struct i915_perf_stream *stream, 854 char __user *buf, 855 size_t count, 856 size_t *offset) 857{ 858 struct intel_uncore *uncore = stream->uncore; 859 u32 oastatus; 860 i915_reg_t oastatus_reg; 861 int ret; 862 863 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)) 864 return -EIO; 865 866 oastatus_reg = IS_GEN(stream->perf->i915, 12) ? 867 GEN12_OAG_OASTATUS : GEN8_OASTATUS; 868 869 oastatus = intel_uncore_read(uncore, oastatus_reg); 870 871 /* 872 * We treat OABUFFER_OVERFLOW as a significant error: 873 * 874 * Although theoretically we could handle this more gracefully 875 * sometimes, some Gens don't correctly suppress certain 876 * automatically triggered reports in this condition and so we 877 * have to assume that old reports are now being trampled 878 * over. 879 * 880 * Considering how we don't currently give userspace control 881 * over the OA buffer size and always configure a large 16MB 882 * buffer, then a buffer overflow does anyway likely indicate 883 * that something has gone quite badly wrong. 884 */ 885 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { 886 ret = append_oa_status(stream, buf, count, offset, 887 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 888 if (ret) 889 return ret; 890 891 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 892 stream->period_exponent); 893 894 stream->perf->ops.oa_disable(stream); 895 stream->perf->ops.oa_enable(stream); 896 897 /* 898 * Note: .oa_enable() is expected to re-init the oabuffer and 899 * reset GEN8_OASTATUS for us 900 */ 901 oastatus = intel_uncore_read(uncore, oastatus_reg); 902 } 903 904 if (oastatus & GEN8_OASTATUS_REPORT_LOST) { 905 ret = append_oa_status(stream, buf, count, offset, 906 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 907 if (ret) 908 return ret; 909 910 intel_uncore_rmw(uncore, oastatus_reg, 911 GEN8_OASTATUS_COUNTER_OVERFLOW | 912 GEN8_OASTATUS_REPORT_LOST, 913 IS_GEN_RANGE(uncore->i915, 8, 10) ? 914 (GEN8_OASTATUS_HEAD_POINTER_WRAP | 915 GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0); 916 } 917 918 return gen8_append_oa_reports(stream, buf, count, offset); 919} 920 921/** 922 * Copies all buffered OA reports into userspace read() buffer. 923 * @stream: An i915-perf stream opened for OA metrics 924 * @buf: destination buffer given by userspace 925 * @count: the number of bytes userspace wants to read 926 * @offset: (inout): the current position for writing into @buf 927 * 928 * Notably any error condition resulting in a short read (-%ENOSPC or 929 * -%EFAULT) will be returned even though one or more records may 930 * have been successfully copied. In this case it's up to the caller 931 * to decide if the error should be squashed before returning to 932 * userspace. 933 * 934 * Note: reports are consumed from the head, and appended to the 935 * tail, so the tail chases the head?... If you think that's mad 936 * and back-to-front you're not alone, but this follows the 937 * Gen PRM naming convention. 938 * 939 * Returns: 0 on success, negative error code on failure. 940 */ 941static int gen7_append_oa_reports(struct i915_perf_stream *stream, 942 char __user *buf, 943 size_t count, 944 size_t *offset) 945{ 946 struct intel_uncore *uncore = stream->uncore; 947 int report_size = stream->oa_buffer.format_size; 948 u8 *oa_buf_base = stream->oa_buffer.vaddr; 949 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 950 u32 mask = (OA_BUFFER_SIZE - 1); 951 size_t start_offset = *offset; 952 unsigned long flags; 953 u32 head, tail; 954 u32 taken; 955 int ret = 0; 956 957 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)) 958 return -EIO; 959 960 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 961 962 head = stream->oa_buffer.head; 963 tail = stream->oa_buffer.tail; 964 965 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 966 967 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want 968 * while indexing relative to oa_buf_base. 969 */ 970 head -= gtt_offset; 971 tail -= gtt_offset; 972 973 /* An out of bounds or misaligned head or tail pointer implies a driver 974 * bug since we validate + align the tail pointers we read from the 975 * hardware and we are in full control of the head pointer which should 976 * only be incremented by multiples of the report size (notably also 977 * all a power of two). 978 */ 979 if (drm_WARN_ONCE(&uncore->i915->drm, 980 head > OA_BUFFER_SIZE || head % report_size || 981 tail > OA_BUFFER_SIZE || tail % report_size, 982 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 983 head, tail)) 984 return -EIO; 985 986 987 for (/* none */; 988 (taken = OA_TAKEN(tail, head)); 989 head = (head + report_size) & mask) { 990 u8 *report = oa_buf_base + head; 991 u32 *report32 = (void *)report; 992 993 /* All the report sizes factor neatly into the buffer 994 * size so we never expect to see a report split 995 * between the beginning and end of the buffer. 996 * 997 * Given the initial alignment check a misalignment 998 * here would imply a driver bug that would result 999 * in an overrun. 1000 */ 1001 if (drm_WARN_ON(&uncore->i915->drm, 1002 (OA_BUFFER_SIZE - head) < report_size)) { 1003 drm_err(&uncore->i915->drm, 1004 "Spurious OA head ptr: non-integral report offset\n"); 1005 break; 1006 } 1007 1008 /* The report-ID field for periodic samples includes 1009 * some undocumented flags related to what triggered 1010 * the report and is never expected to be zero so we 1011 * can check that the report isn't invalid before 1012 * copying it to userspace... 1013 */ 1014 if (report32[0] == 0) { 1015 if (__ratelimit(&stream->perf->spurious_report_rs)) 1016 DRM_NOTE("Skipping spurious, invalid OA report\n"); 1017 continue; 1018 } 1019 1020 ret = append_oa_sample(stream, buf, count, offset, report); 1021 if (ret) 1022 break; 1023 1024 /* Clear out the first 2 dwords as a mean to detect unlanded 1025 * reports. 1026 */ 1027 report32[0] = 0; 1028 report32[1] = 0; 1029 } 1030 1031 if (start_offset != *offset) { 1032 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1033 1034 /* We removed the gtt_offset for the copy loop above, indexing 1035 * relative to oa_buf_base so put back here... 1036 */ 1037 head += gtt_offset; 1038 1039 intel_uncore_write(uncore, GEN7_OASTATUS2, 1040 (head & GEN7_OASTATUS2_HEAD_MASK) | 1041 GEN7_OASTATUS2_MEM_SELECT_GGTT); 1042 stream->oa_buffer.head = head; 1043 1044 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1045 } 1046 1047 return ret; 1048} 1049 1050/** 1051 * gen7_oa_read - copy status records then buffered OA reports 1052 * @stream: An i915-perf stream opened for OA metrics 1053 * @buf: destination buffer given by userspace 1054 * @count: the number of bytes userspace wants to read 1055 * @offset: (inout): the current position for writing into @buf 1056 * 1057 * Checks Gen 7 specific OA unit status registers and if necessary appends 1058 * corresponding status records for userspace (such as for a buffer full 1059 * condition) and then initiate appending any buffered OA reports. 1060 * 1061 * Updates @offset according to the number of bytes successfully copied into 1062 * the userspace buffer. 1063 * 1064 * Returns: zero on success or a negative error code 1065 */ 1066static int gen7_oa_read(struct i915_perf_stream *stream, 1067 char __user *buf, 1068 size_t count, 1069 size_t *offset) 1070{ 1071 struct intel_uncore *uncore = stream->uncore; 1072 u32 oastatus1; 1073 int ret; 1074 1075 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)) 1076 return -EIO; 1077 1078 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); 1079 1080 /* XXX: On Haswell we don't have a safe way to clear oastatus1 1081 * bits while the OA unit is enabled (while the tail pointer 1082 * may be updated asynchronously) so we ignore status bits 1083 * that have already been reported to userspace. 1084 */ 1085 oastatus1 &= ~stream->perf->gen7_latched_oastatus1; 1086 1087 /* We treat OABUFFER_OVERFLOW as a significant error: 1088 * 1089 * - The status can be interpreted to mean that the buffer is 1090 * currently full (with a higher precedence than OA_TAKEN() 1091 * which will start to report a near-empty buffer after an 1092 * overflow) but it's awkward that we can't clear the status 1093 * on Haswell, so without a reset we won't be able to catch 1094 * the state again. 1095 * 1096 * - Since it also implies the HW has started overwriting old 1097 * reports it may also affect our sanity checks for invalid 1098 * reports when copying to userspace that assume new reports 1099 * are being written to cleared memory. 1100 * 1101 * - In the future we may want to introduce a flight recorder 1102 * mode where the driver will automatically maintain a safe 1103 * guard band between head/tail, avoiding this overflow 1104 * condition, but we avoid the added driver complexity for 1105 * now. 1106 */ 1107 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { 1108 ret = append_oa_status(stream, buf, count, offset, 1109 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 1110 if (ret) 1111 return ret; 1112 1113 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 1114 stream->period_exponent); 1115 1116 stream->perf->ops.oa_disable(stream); 1117 stream->perf->ops.oa_enable(stream); 1118 1119 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); 1120 } 1121 1122 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { 1123 ret = append_oa_status(stream, buf, count, offset, 1124 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 1125 if (ret) 1126 return ret; 1127 stream->perf->gen7_latched_oastatus1 |= 1128 GEN7_OASTATUS1_REPORT_LOST; 1129 } 1130 1131 return gen7_append_oa_reports(stream, buf, count, offset); 1132} 1133 1134/** 1135 * i915_oa_wait_unlocked - handles blocking IO until OA data available 1136 * @stream: An i915-perf stream opened for OA metrics 1137 * 1138 * Called when userspace tries to read() from a blocking stream FD opened 1139 * for OA metrics. It waits until the hrtimer callback finds a non-empty 1140 * OA buffer and wakes us. 1141 * 1142 * Note: it's acceptable to have this return with some false positives 1143 * since any subsequent read handling will return -EAGAIN if there isn't 1144 * really data ready for userspace yet. 1145 * 1146 * Returns: zero on success or a negative error code 1147 */ 1148static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) 1149{ 1150 /* We would wait indefinitely if periodic sampling is not enabled */ 1151 if (!stream->periodic) 1152 return -EIO; 1153 1154 return wait_event_interruptible(stream->poll_wq, 1155 oa_buffer_check_unlocked(stream)); 1156} 1157 1158/** 1159 * i915_oa_poll_wait - call poll_wait() for an OA stream poll() 1160 * @stream: An i915-perf stream opened for OA metrics 1161 * @file: An i915 perf stream file 1162 * @wait: poll() state table 1163 * 1164 * For handling userspace polling on an i915 perf stream opened for OA metrics, 1165 * this starts a poll_wait with the wait queue that our hrtimer callback wakes 1166 * when it sees data ready to read in the circular OA buffer. 1167 */ 1168static void i915_oa_poll_wait(struct i915_perf_stream *stream, 1169 struct file *file, 1170 poll_table *wait) 1171{ 1172 poll_wait(file, &stream->poll_wq, wait); 1173} 1174 1175/** 1176 * i915_oa_read - just calls through to &i915_oa_ops->read 1177 * @stream: An i915-perf stream opened for OA metrics 1178 * @buf: destination buffer given by userspace 1179 * @count: the number of bytes userspace wants to read 1180 * @offset: (inout): the current position for writing into @buf 1181 * 1182 * Updates @offset according to the number of bytes successfully copied into 1183 * the userspace buffer. 1184 * 1185 * Returns: zero on success or a negative error code 1186 */ 1187static int i915_oa_read(struct i915_perf_stream *stream, 1188 char __user *buf, 1189 size_t count, 1190 size_t *offset) 1191{ 1192 return stream->perf->ops.read(stream, buf, count, offset); 1193} 1194 1195static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) 1196{ 1197 struct i915_gem_engines_iter it; 1198 struct i915_gem_context *ctx = stream->ctx; 1199 struct intel_context *ce; 1200 struct i915_gem_ww_ctx ww; 1201 int err = -ENODEV; 1202 1203 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1204 if (ce->engine != stream->engine) /* first match! */ 1205 continue; 1206 1207 err = 0; 1208 break; 1209 } 1210 i915_gem_context_unlock_engines(ctx); 1211 1212 if (err) 1213 return ERR_PTR(err); 1214 1215 i915_gem_ww_ctx_init(&ww, true); 1216retry: 1217 /* 1218 * As the ID is the gtt offset of the context's vma we 1219 * pin the vma to ensure the ID remains fixed. 1220 */ 1221 err = intel_context_pin_ww(ce, &ww); 1222 if (err == -EDEADLK) { 1223 err = i915_gem_ww_ctx_backoff(&ww); 1224 if (!err) 1225 goto retry; 1226 } 1227 i915_gem_ww_ctx_fini(&ww); 1228 1229 if (err) 1230 return ERR_PTR(err); 1231 1232 stream->pinned_ctx = ce; 1233 return stream->pinned_ctx; 1234} 1235 1236/** 1237 * oa_get_render_ctx_id - determine and hold ctx hw id 1238 * @stream: An i915-perf stream opened for OA metrics 1239 * 1240 * Determine the render context hw id, and ensure it remains fixed for the 1241 * lifetime of the stream. This ensures that we don't have to worry about 1242 * updating the context ID in OACONTROL on the fly. 1243 * 1244 * Returns: zero on success or a negative error code 1245 */ 1246static int oa_get_render_ctx_id(struct i915_perf_stream *stream) 1247{ 1248 struct intel_context *ce; 1249 1250 ce = oa_pin_context(stream); 1251 if (IS_ERR(ce)) 1252 return PTR_ERR(ce); 1253 1254 switch (INTEL_GEN(ce->engine->i915)) { 1255 case 7: { 1256 /* 1257 * On Haswell we don't do any post processing of the reports 1258 * and don't need to use the mask. 1259 */ 1260 stream->specific_ctx_id = i915_ggtt_offset(ce->state); 1261 stream->specific_ctx_id_mask = 0; 1262 break; 1263 } 1264 1265 case 8: 1266 case 9: 1267 case 10: 1268 if (intel_engine_in_execlists_submission_mode(ce->engine)) { 1269 stream->specific_ctx_id_mask = 1270 (1U << GEN8_CTX_ID_WIDTH) - 1; 1271 stream->specific_ctx_id = stream->specific_ctx_id_mask; 1272 } else { 1273 /* 1274 * When using GuC, the context descriptor we write in 1275 * i915 is read by GuC and rewritten before it's 1276 * actually written into the hardware. The LRCA is 1277 * what is put into the context id field of the 1278 * context descriptor by GuC. Because it's aligned to 1279 * a page, the lower 12bits are always at 0 and 1280 * dropped by GuC. They won't be part of the context 1281 * ID in the OA reports, so squash those lower bits. 1282 */ 1283 stream->specific_ctx_id = ce->lrc.lrca >> 12; 1284 1285 /* 1286 * GuC uses the top bit to signal proxy submission, so 1287 * ignore that bit. 1288 */ 1289 stream->specific_ctx_id_mask = 1290 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; 1291 } 1292 break; 1293 1294 case 11: 1295 case 12: { 1296 stream->specific_ctx_id_mask = 1297 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); 1298 /* 1299 * Pick an unused context id 1300 * 0 - BITS_PER_LONG are used by other contexts 1301 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context 1302 */ 1303 stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); 1304 break; 1305 } 1306 1307 default: 1308 MISSING_CASE(INTEL_GEN(ce->engine->i915)); 1309 } 1310 1311 ce->tag = stream->specific_ctx_id; 1312 1313 drm_dbg(&stream->perf->i915->drm, 1314 "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", 1315 stream->specific_ctx_id, 1316 stream->specific_ctx_id_mask); 1317 1318 return 0; 1319} 1320 1321/** 1322 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold 1323 * @stream: An i915-perf stream opened for OA metrics 1324 * 1325 * In case anything needed doing to ensure the context HW ID would remain valid 1326 * for the lifetime of the stream, then that can be undone here. 1327 */ 1328static void oa_put_render_ctx_id(struct i915_perf_stream *stream) 1329{ 1330 struct intel_context *ce; 1331 1332 ce = fetch_and_zero(&stream->pinned_ctx); 1333 if (ce) { 1334 ce->tag = 0; /* recomputed on next submission after parking */ 1335 intel_context_unpin(ce); 1336 } 1337 1338 stream->specific_ctx_id = INVALID_CTX_ID; 1339 stream->specific_ctx_id_mask = 0; 1340} 1341 1342static void 1343free_oa_buffer(struct i915_perf_stream *stream) 1344{ 1345 i915_vma_unpin_and_release(&stream->oa_buffer.vma, 1346 I915_VMA_RELEASE_MAP); 1347 1348 stream->oa_buffer.vaddr = NULL; 1349} 1350 1351static void 1352free_oa_configs(struct i915_perf_stream *stream) 1353{ 1354 struct i915_oa_config_bo *oa_bo, *tmp; 1355 1356 i915_oa_config_put(stream->oa_config); 1357 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) 1358 free_oa_config_bo(oa_bo); 1359} 1360 1361static void 1362free_noa_wait(struct i915_perf_stream *stream) 1363{ 1364 i915_vma_unpin_and_release(&stream->noa_wait, 0); 1365} 1366 1367static void i915_oa_stream_destroy(struct i915_perf_stream *stream) 1368{ 1369 struct i915_perf *perf = stream->perf; 1370 1371 BUG_ON(stream != perf->exclusive_stream); 1372 1373 /* 1374 * Unset exclusive_stream first, it will be checked while disabling 1375 * the metric set on gen8+. 1376 * 1377 * See i915_oa_init_reg_state() and lrc_configure_all_contexts() 1378 */ 1379 WRITE_ONCE(perf->exclusive_stream, NULL); 1380 perf->ops.disable_metric_set(stream); 1381 1382 free_oa_buffer(stream); 1383 1384 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); 1385 intel_engine_pm_put(stream->engine); 1386 1387 if (stream->ctx) 1388 oa_put_render_ctx_id(stream); 1389 1390 free_oa_configs(stream); 1391 free_noa_wait(stream); 1392 1393 if (perf->spurious_report_rs.missed) { 1394 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", 1395 perf->spurious_report_rs.missed); 1396 } 1397} 1398 1399static void gen7_init_oa_buffer(struct i915_perf_stream *stream) 1400{ 1401 struct intel_uncore *uncore = stream->uncore; 1402 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1403 unsigned long flags; 1404 1405 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1406 1407 /* Pre-DevBDW: OABUFFER must be set with counters off, 1408 * before OASTATUS1, but after OASTATUS2 1409 */ 1410 intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ 1411 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); 1412 stream->oa_buffer.head = gtt_offset; 1413 1414 intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); 1415 1416 intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */ 1417 gtt_offset | OABUFFER_SIZE_16M); 1418 1419 /* Mark that we need updated tail pointers to read from... */ 1420 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR; 1421 stream->oa_buffer.tail = gtt_offset; 1422 1423 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1424 1425 /* On Haswell we have to track which OASTATUS1 flags we've 1426 * already seen since they can't be cleared while periodic 1427 * sampling is enabled. 1428 */ 1429 stream->perf->gen7_latched_oastatus1 = 0; 1430 1431 /* NB: although the OA buffer will initially be allocated 1432 * zeroed via shmfs (and so this memset is redundant when 1433 * first allocating), we may re-init the OA buffer, either 1434 * when re-enabling a stream or in error/reset paths. 1435 * 1436 * The reason we clear the buffer for each re-init is for the 1437 * sanity check in gen7_append_oa_reports() that looks at the 1438 * report-id field to make sure it's non-zero which relies on 1439 * the assumption that new reports are being written to zeroed 1440 * memory... 1441 */ 1442 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1443} 1444 1445static void gen8_init_oa_buffer(struct i915_perf_stream *stream) 1446{ 1447 struct intel_uncore *uncore = stream->uncore; 1448 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1449 unsigned long flags; 1450 1451 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1452 1453 intel_uncore_write(uncore, GEN8_OASTATUS, 0); 1454 intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); 1455 stream->oa_buffer.head = gtt_offset; 1456 1457 intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); 1458 1459 /* 1460 * PRM says: 1461 * 1462 * "This MMIO must be set before the OATAILPTR 1463 * register and after the OAHEADPTR register. This is 1464 * to enable proper functionality of the overflow 1465 * bit." 1466 */ 1467 intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset | 1468 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); 1469 intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); 1470 1471 /* Mark that we need updated tail pointers to read from... */ 1472 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR; 1473 stream->oa_buffer.tail = gtt_offset; 1474 1475 /* 1476 * Reset state used to recognise context switches, affecting which 1477 * reports we will forward to userspace while filtering for a single 1478 * context. 1479 */ 1480 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; 1481 1482 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1483 1484 /* 1485 * NB: although the OA buffer will initially be allocated 1486 * zeroed via shmfs (and so this memset is redundant when 1487 * first allocating), we may re-init the OA buffer, either 1488 * when re-enabling a stream or in error/reset paths. 1489 * 1490 * The reason we clear the buffer for each re-init is for the 1491 * sanity check in gen8_append_oa_reports() that looks at the 1492 * reason field to make sure it's non-zero which relies on 1493 * the assumption that new reports are being written to zeroed 1494 * memory... 1495 */ 1496 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1497} 1498 1499static void gen12_init_oa_buffer(struct i915_perf_stream *stream) 1500{ 1501 struct intel_uncore *uncore = stream->uncore; 1502 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1503 unsigned long flags; 1504 1505 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1506 1507 intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); 1508 intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, 1509 gtt_offset & GEN12_OAG_OAHEADPTR_MASK); 1510 stream->oa_buffer.head = gtt_offset; 1511 1512 /* 1513 * PRM says: 1514 * 1515 * "This MMIO must be set before the OATAILPTR 1516 * register and after the OAHEADPTR register. This is 1517 * to enable proper functionality of the overflow 1518 * bit." 1519 */ 1520 intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | 1521 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); 1522 intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, 1523 gtt_offset & GEN12_OAG_OATAILPTR_MASK); 1524 1525 /* Mark that we need updated tail pointers to read from... */ 1526 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR; 1527 stream->oa_buffer.tail = gtt_offset; 1528 1529 /* 1530 * Reset state used to recognise context switches, affecting which 1531 * reports we will forward to userspace while filtering for a single 1532 * context. 1533 */ 1534 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; 1535 1536 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1537 1538 /* 1539 * NB: although the OA buffer will initially be allocated 1540 * zeroed via shmfs (and so this memset is redundant when 1541 * first allocating), we may re-init the OA buffer, either 1542 * when re-enabling a stream or in error/reset paths. 1543 * 1544 * The reason we clear the buffer for each re-init is for the 1545 * sanity check in gen8_append_oa_reports() that looks at the 1546 * reason field to make sure it's non-zero which relies on 1547 * the assumption that new reports are being written to zeroed 1548 * memory... 1549 */ 1550 memset(stream->oa_buffer.vaddr, 0, 1551 stream->oa_buffer.vma->size); 1552} 1553 1554static int alloc_oa_buffer(struct i915_perf_stream *stream) 1555{ 1556 struct drm_i915_private *i915 = stream->perf->i915; 1557 struct drm_i915_gem_object *bo; 1558 struct i915_vma *vma; 1559 int ret; 1560 1561 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma)) 1562 return -ENODEV; 1563 1564 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); 1565 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); 1566 1567 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); 1568 if (IS_ERR(bo)) { 1569 drm_err(&i915->drm, "Failed to allocate OA buffer\n"); 1570 return PTR_ERR(bo); 1571 } 1572 1573 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); 1574 1575 /* PreHSW required 512K alignment, HSW requires 16M */ 1576 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); 1577 if (IS_ERR(vma)) { 1578 ret = PTR_ERR(vma); 1579 goto err_unref; 1580 } 1581 stream->oa_buffer.vma = vma; 1582 1583 stream->oa_buffer.vaddr = 1584 i915_gem_object_pin_map(bo, I915_MAP_WB); 1585 if (IS_ERR(stream->oa_buffer.vaddr)) { 1586 ret = PTR_ERR(stream->oa_buffer.vaddr); 1587 goto err_unpin; 1588 } 1589 1590 return 0; 1591 1592err_unpin: 1593 __i915_vma_unpin(vma); 1594 1595err_unref: 1596 i915_gem_object_put(bo); 1597 1598 stream->oa_buffer.vaddr = NULL; 1599 stream->oa_buffer.vma = NULL; 1600 1601 return ret; 1602} 1603 1604static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, 1605 bool save, i915_reg_t reg, u32 offset, 1606 u32 dword_count) 1607{ 1608 u32 cmd; 1609 u32 d; 1610 1611 cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM; 1612 cmd |= MI_SRM_LRM_GLOBAL_GTT; 1613 if (INTEL_GEN(stream->perf->i915) >= 8) 1614 cmd++; 1615 1616 for (d = 0; d < dword_count; d++) { 1617 *cs++ = cmd; 1618 *cs++ = i915_mmio_reg_offset(reg) + 4 * d; 1619 *cs++ = intel_gt_scratch_offset(stream->engine->gt, 1620 offset) + 4 * d; 1621 *cs++ = 0; 1622 } 1623 1624 return cs; 1625} 1626 1627static int alloc_noa_wait(struct i915_perf_stream *stream) 1628{ 1629 struct drm_i915_private *i915 = stream->perf->i915; 1630 struct drm_i915_gem_object *bo; 1631 struct i915_vma *vma; 1632 const u64 delay_ticks = 0xffffffffffffffff - 1633 i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay)); 1634 const u32 base = stream->engine->mmio_base; 1635#define CS_GPR(x) GEN8_RING_CS_GPR(base, x) 1636 u32 *batch, *ts0, *cs, *jump; 1637 int ret, i; 1638 enum { 1639 START_TS, 1640 NOW_TS, 1641 DELTA_TS, 1642 JUMP_PREDICATE, 1643 DELTA_TARGET, 1644 N_CS_GPR 1645 }; 1646 1647 bo = i915_gem_object_create_internal(i915, 4096); 1648 if (IS_ERR(bo)) { 1649 drm_err(&i915->drm, 1650 "Failed to allocate NOA wait batchbuffer\n"); 1651 return PTR_ERR(bo); 1652 } 1653 1654 /* 1655 * We pin in GGTT because we jump into this buffer now because 1656 * multiple OA config BOs will have a jump to this address and it 1657 * needs to be fixed during the lifetime of the i915/perf stream. 1658 */ 1659 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); 1660 if (IS_ERR(vma)) { 1661 ret = PTR_ERR(vma); 1662 goto err_unref; 1663 } 1664 1665 batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); 1666 if (IS_ERR(batch)) { 1667 ret = PTR_ERR(batch); 1668 goto err_unpin; 1669 } 1670 1671 /* Save registers. */ 1672 for (i = 0; i < N_CS_GPR; i++) 1673 cs = save_restore_register( 1674 stream, cs, true /* save */, CS_GPR(i), 1675 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); 1676 cs = save_restore_register( 1677 stream, cs, true /* save */, MI_PREDICATE_RESULT_1, 1678 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); 1679 1680 /* First timestamp snapshot location. */ 1681 ts0 = cs; 1682 1683 /* 1684 * Initial snapshot of the timestamp register to implement the wait. 1685 * We work with 32b values, so clear out the top 32b bits of the 1686 * register because the ALU works 64bits. 1687 */ 1688 *cs++ = MI_LOAD_REGISTER_IMM(1); 1689 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4; 1690 *cs++ = 0; 1691 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); 1692 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); 1693 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)); 1694 1695 /* 1696 * This is the location we're going to jump back into until the 1697 * required amount of time has passed. 1698 */ 1699 jump = cs; 1700 1701 /* 1702 * Take another snapshot of the timestamp register. Take care to clear 1703 * up the top 32bits of CS_GPR(1) as we're using it for other 1704 * operations below. 1705 */ 1706 *cs++ = MI_LOAD_REGISTER_IMM(1); 1707 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4; 1708 *cs++ = 0; 1709 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); 1710 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); 1711 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)); 1712 1713 /* 1714 * Do a diff between the 2 timestamps and store the result back into 1715 * CS_GPR(1). 1716 */ 1717 *cs++ = MI_MATH(5); 1718 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); 1719 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); 1720 *cs++ = MI_MATH_SUB; 1721 *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU); 1722 *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); 1723 1724 /* 1725 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the 1726 * timestamp have rolled over the 32bits) into the predicate register 1727 * to be used for the predicated jump. 1728 */ 1729 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); 1730 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); 1731 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); 1732 1733 /* Restart from the beginning if we had timestamps roll over. */ 1734 *cs++ = (INTEL_GEN(i915) < 8 ? 1735 MI_BATCH_BUFFER_START : 1736 MI_BATCH_BUFFER_START_GEN8) | 1737 MI_BATCH_PREDICATE; 1738 *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; 1739 *cs++ = 0; 1740 1741 /* 1742 * Now add the diff between to previous timestamps and add it to : 1743 * (((1 * << 64) - 1) - delay_ns) 1744 * 1745 * When the Carry Flag contains 1 this means the elapsed time is 1746 * longer than the expected delay, and we can exit the wait loop. 1747 */ 1748 *cs++ = MI_LOAD_REGISTER_IMM(2); 1749 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)); 1750 *cs++ = lower_32_bits(delay_ticks); 1751 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4; 1752 *cs++ = upper_32_bits(delay_ticks); 1753 1754 *cs++ = MI_MATH(4); 1755 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS)); 1756 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET)); 1757 *cs++ = MI_MATH_ADD; 1758 *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); 1759 1760 *cs++ = MI_ARB_CHECK; 1761 1762 /* 1763 * Transfer the result into the predicate register to be used for the 1764 * predicated jump. 1765 */ 1766 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); 1767 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); 1768 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); 1769 1770 /* Predicate the jump. */ 1771 *cs++ = (INTEL_GEN(i915) < 8 ? 1772 MI_BATCH_BUFFER_START : 1773 MI_BATCH_BUFFER_START_GEN8) | 1774 MI_BATCH_PREDICATE; 1775 *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; 1776 *cs++ = 0; 1777 1778 /* Restore registers. */ 1779 for (i = 0; i < N_CS_GPR; i++) 1780 cs = save_restore_register( 1781 stream, cs, false /* restore */, CS_GPR(i), 1782 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); 1783 cs = save_restore_register( 1784 stream, cs, false /* restore */, MI_PREDICATE_RESULT_1, 1785 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); 1786 1787 /* And return to the ring. */ 1788 *cs++ = MI_BATCH_BUFFER_END; 1789 1790 GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); 1791 1792 i915_gem_object_flush_map(bo); 1793 __i915_gem_object_release_map(bo); 1794 1795 stream->noa_wait = vma; 1796 return 0; 1797 1798err_unpin: 1799 i915_vma_unpin_and_release(&vma, 0); 1800err_unref: 1801 i915_gem_object_put(bo); 1802 return ret; 1803} 1804 1805static u32 *write_cs_mi_lri(u32 *cs, 1806 const struct i915_oa_reg *reg_data, 1807 u32 n_regs) 1808{ 1809 u32 i; 1810 1811 for (i = 0; i < n_regs; i++) { 1812 if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { 1813 u32 n_lri = min_t(u32, 1814 n_regs - i, 1815 MI_LOAD_REGISTER_IMM_MAX_REGS); 1816 1817 *cs++ = MI_LOAD_REGISTER_IMM(n_lri); 1818 } 1819 *cs++ = i915_mmio_reg_offset(reg_data[i].addr); 1820 *cs++ = reg_data[i].value; 1821 } 1822 1823 return cs; 1824} 1825 1826static int num_lri_dwords(int num_regs) 1827{ 1828 int count = 0; 1829 1830 if (num_regs > 0) { 1831 count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); 1832 count += num_regs * 2; 1833 } 1834 1835 return count; 1836} 1837 1838static struct i915_oa_config_bo * 1839alloc_oa_config_buffer(struct i915_perf_stream *stream, 1840 struct i915_oa_config *oa_config) 1841{ 1842 struct drm_i915_gem_object *obj; 1843 struct i915_oa_config_bo *oa_bo; 1844 size_t config_length = 0; 1845 u32 *cs; 1846 int err; 1847 1848 oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); 1849 if (!oa_bo) 1850 return ERR_PTR(-ENOMEM); 1851 1852 config_length += num_lri_dwords(oa_config->mux_regs_len); 1853 config_length += num_lri_dwords(oa_config->b_counter_regs_len); 1854 config_length += num_lri_dwords(oa_config->flex_regs_len); 1855 config_length += 3; /* MI_BATCH_BUFFER_START */ 1856 config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); 1857 1858 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); 1859 if (IS_ERR(obj)) { 1860 err = PTR_ERR(obj); 1861 goto err_free; 1862 } 1863 1864 cs = i915_gem_object_pin_map(obj, I915_MAP_WB); 1865 if (IS_ERR(cs)) { 1866 err = PTR_ERR(cs); 1867 goto err_oa_bo; 1868 } 1869 1870 cs = write_cs_mi_lri(cs, 1871 oa_config->mux_regs, 1872 oa_config->mux_regs_len); 1873 cs = write_cs_mi_lri(cs, 1874 oa_config->b_counter_regs, 1875 oa_config->b_counter_regs_len); 1876 cs = write_cs_mi_lri(cs, 1877 oa_config->flex_regs, 1878 oa_config->flex_regs_len); 1879 1880 /* Jump into the active wait. */ 1881 *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ? 1882 MI_BATCH_BUFFER_START : 1883 MI_BATCH_BUFFER_START_GEN8); 1884 *cs++ = i915_ggtt_offset(stream->noa_wait); 1885 *cs++ = 0; 1886 1887 i915_gem_object_flush_map(obj); 1888 __i915_gem_object_release_map(obj); 1889 1890 oa_bo->vma = i915_vma_instance(obj, 1891 &stream->engine->gt->ggtt->vm, 1892 NULL); 1893 if (IS_ERR(oa_bo->vma)) { 1894 err = PTR_ERR(oa_bo->vma); 1895 goto err_oa_bo; 1896 } 1897 1898 oa_bo->oa_config = i915_oa_config_get(oa_config); 1899 llist_add(&oa_bo->node, &stream->oa_config_bos); 1900 1901 return oa_bo; 1902 1903err_oa_bo: 1904 i915_gem_object_put(obj); 1905err_free: 1906 kfree(oa_bo); 1907 return ERR_PTR(err); 1908} 1909 1910static struct i915_vma * 1911get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) 1912{ 1913 struct i915_oa_config_bo *oa_bo; 1914 1915 /* 1916 * Look for the buffer in the already allocated BOs attached 1917 * to the stream. 1918 */ 1919 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { 1920 if (oa_bo->oa_config == oa_config && 1921 memcmp(oa_bo->oa_config->uuid, 1922 oa_config->uuid, 1923 sizeof(oa_config->uuid)) == 0) 1924 goto out; 1925 } 1926 1927 oa_bo = alloc_oa_config_buffer(stream, oa_config); 1928 if (IS_ERR(oa_bo)) 1929 return ERR_CAST(oa_bo); 1930 1931out: 1932 return i915_vma_get(oa_bo->vma); 1933} 1934 1935static int 1936emit_oa_config(struct i915_perf_stream *stream, 1937 struct i915_oa_config *oa_config, 1938 struct intel_context *ce, 1939 struct i915_active *active) 1940{ 1941 struct i915_request *rq; 1942 struct i915_vma *vma; 1943 struct i915_gem_ww_ctx ww; 1944 int err; 1945 1946 vma = get_oa_vma(stream, oa_config); 1947 if (IS_ERR(vma)) 1948 return PTR_ERR(vma); 1949 1950 i915_gem_ww_ctx_init(&ww, true); 1951retry: 1952 err = i915_gem_object_lock(vma->obj, &ww); 1953 if (err) 1954 goto err; 1955 1956 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); 1957 if (err) 1958 goto err; 1959 1960 intel_engine_pm_get(ce->engine); 1961 rq = i915_request_create(ce); 1962 intel_engine_pm_put(ce->engine); 1963 if (IS_ERR(rq)) { 1964 err = PTR_ERR(rq); 1965 goto err_vma_unpin; 1966 } 1967 1968 if (!IS_ERR_OR_NULL(active)) { 1969 /* After all individual context modifications */ 1970 err = i915_request_await_active(rq, active, 1971 I915_ACTIVE_AWAIT_ACTIVE); 1972 if (err) 1973 goto err_add_request; 1974 1975 err = i915_active_add_request(active, rq); 1976 if (err) 1977 goto err_add_request; 1978 } 1979 1980 err = i915_request_await_object(rq, vma->obj, 0); 1981 if (!err) 1982 err = i915_vma_move_to_active(vma, rq, 0); 1983 if (err) 1984 goto err_add_request; 1985 1986 err = rq->engine->emit_bb_start(rq, 1987 vma->node.start, 0, 1988 I915_DISPATCH_SECURE); 1989 if (err) 1990 goto err_add_request; 1991 1992err_add_request: 1993 i915_request_add(rq); 1994err_vma_unpin: 1995 i915_vma_unpin(vma); 1996err: 1997 if (err == -EDEADLK) { 1998 err = i915_gem_ww_ctx_backoff(&ww); 1999 if (!err) 2000 goto retry; 2001 } 2002 2003 i915_gem_ww_ctx_fini(&ww); 2004 i915_vma_put(vma); 2005 return err; 2006} 2007 2008static struct intel_context *oa_context(struct i915_perf_stream *stream) 2009{ 2010 return stream->pinned_ctx ?: stream->engine->kernel_context; 2011} 2012 2013static int 2014hsw_enable_metric_set(struct i915_perf_stream *stream, 2015 struct i915_active *active) 2016{ 2017 struct intel_uncore *uncore = stream->uncore; 2018 2019 /* 2020 * PRM: 2021 * 2022 * OA unit is using “crclk” for its functionality. When trunk 2023 * level clock gating takes place, OA clock would be gated, 2024 * unable to count the events from non-render clock domain. 2025 * Render clock gating must be disabled when OA is enabled to 2026 * count the events from non-render domain. Unit level clock 2027 * gating for RCS should also be disabled. 2028 */ 2029 intel_uncore_rmw(uncore, GEN7_MISCCPCTL, 2030 GEN7_DOP_CLOCK_GATE_ENABLE, 0); 2031 intel_uncore_rmw(uncore, GEN6_UCGCTL1, 2032 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); 2033 2034 return emit_oa_config(stream, 2035 stream->oa_config, oa_context(stream), 2036 active); 2037} 2038 2039static void hsw_disable_metric_set(struct i915_perf_stream *stream) 2040{ 2041 struct intel_uncore *uncore = stream->uncore; 2042 2043 intel_uncore_rmw(uncore, GEN6_UCGCTL1, 2044 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); 2045 intel_uncore_rmw(uncore, GEN7_MISCCPCTL, 2046 0, GEN7_DOP_CLOCK_GATE_ENABLE); 2047 2048 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); 2049} 2050 2051static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, 2052 i915_reg_t reg) 2053{ 2054 u32 mmio = i915_mmio_reg_offset(reg); 2055 int i; 2056 2057 /* 2058 * This arbitrary default will select the 'EU FPU0 Pipeline 2059 * Active' event. In the future it's anticipated that there 2060 * will be an explicit 'No Event' we can select, but not yet... 2061 */ 2062 if (!oa_config) 2063 return 0; 2064 2065 for (i = 0; i < oa_config->flex_regs_len; i++) { 2066 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) 2067 return oa_config->flex_regs[i].value; 2068 } 2069 2070 return 0; 2071} 2072/* 2073 * NB: It must always remain pointer safe to run this even if the OA unit 2074 * has been disabled. 2075 * 2076 * It's fine to put out-of-date values into these per-context registers 2077 * in the case that the OA unit has been disabled. 2078 */ 2079static void 2080gen8_update_reg_state_unlocked(const struct intel_context *ce, 2081 const struct i915_perf_stream *stream) 2082{ 2083 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; 2084 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; 2085 /* The MMIO offsets for Flex EU registers aren't contiguous */ 2086 i915_reg_t flex_regs[] = { 2087 EU_PERF_CNTL0, 2088 EU_PERF_CNTL1, 2089 EU_PERF_CNTL2, 2090 EU_PERF_CNTL3, 2091 EU_PERF_CNTL4, 2092 EU_PERF_CNTL5, 2093 EU_PERF_CNTL6, 2094 }; 2095 u32 *reg_state = ce->lrc_reg_state; 2096 int i; 2097 2098 reg_state[ctx_oactxctrl + 1] = 2099 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 2100 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | 2101 GEN8_OA_COUNTER_RESUME; 2102 2103 for (i = 0; i < ARRAY_SIZE(flex_regs); i++) 2104 reg_state[ctx_flexeu0 + i * 2 + 1] = 2105 oa_config_flex_reg(stream->oa_config, flex_regs[i]); 2106} 2107 2108struct flex { 2109 i915_reg_t reg; 2110 u32 offset; 2111 u32 value; 2112}; 2113 2114static int 2115gen8_store_flex(struct i915_request *rq, 2116 struct intel_context *ce, 2117 const struct flex *flex, unsigned int count) 2118{ 2119 u32 offset; 2120 u32 *cs; 2121 2122 cs = intel_ring_begin(rq, 4 * count); 2123 if (IS_ERR(cs)) 2124 return PTR_ERR(cs); 2125 2126 offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET; 2127 do { 2128 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 2129 *cs++ = offset + flex->offset * sizeof(u32); 2130 *cs++ = 0; 2131 *cs++ = flex->value; 2132 } while (flex++, --count); 2133 2134 intel_ring_advance(rq, cs); 2135 2136 return 0; 2137} 2138 2139static int 2140gen8_load_flex(struct i915_request *rq, 2141 struct intel_context *ce, 2142 const struct flex *flex, unsigned int count) 2143{ 2144 u32 *cs; 2145 2146 GEM_BUG_ON(!count || count > 63); 2147 2148 cs = intel_ring_begin(rq, 2 * count + 2); 2149 if (IS_ERR(cs)) 2150 return PTR_ERR(cs); 2151 2152 *cs++ = MI_LOAD_REGISTER_IMM(count); 2153 do { 2154 *cs++ = i915_mmio_reg_offset(flex->reg); 2155 *cs++ = flex->value; 2156 } while (flex++, --count); 2157 *cs++ = MI_NOOP; 2158 2159 intel_ring_advance(rq, cs); 2160 2161 return 0; 2162} 2163 2164static int gen8_modify_context(struct intel_context *ce, 2165 const struct flex *flex, unsigned int count) 2166{ 2167 struct i915_request *rq; 2168 int err; 2169 2170 rq = intel_engine_create_kernel_request(ce->engine); 2171 if (IS_ERR(rq)) 2172 return PTR_ERR(rq); 2173 2174 /* Serialise with the remote context */ 2175 err = intel_context_prepare_remote_request(ce, rq); 2176 if (err == 0) 2177 err = gen8_store_flex(rq, ce, flex, count); 2178 2179 i915_request_add(rq); 2180 return err; 2181} 2182 2183static int 2184gen8_modify_self(struct intel_context *ce, 2185 const struct flex *flex, unsigned int count, 2186 struct i915_active *active) 2187{ 2188 struct i915_request *rq; 2189 int err; 2190 2191 intel_engine_pm_get(ce->engine); 2192 rq = i915_request_create(ce); 2193 intel_engine_pm_put(ce->engine); 2194 if (IS_ERR(rq)) 2195 return PTR_ERR(rq); 2196 2197 if (!IS_ERR_OR_NULL(active)) { 2198 err = i915_active_add_request(active, rq); 2199 if (err) 2200 goto err_add_request; 2201 } 2202 2203 err = gen8_load_flex(rq, ce, flex, count); 2204 if (err) 2205 goto err_add_request; 2206 2207err_add_request: 2208 i915_request_add(rq); 2209 return err; 2210} 2211 2212static int gen8_configure_context(struct i915_gem_context *ctx, 2213 struct flex *flex, unsigned int count) 2214{ 2215 struct i915_gem_engines_iter it; 2216 struct intel_context *ce; 2217 int err = 0; 2218 2219 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 2220 GEM_BUG_ON(ce == ce->engine->kernel_context); 2221 2222 if (ce->engine->class != RENDER_CLASS) 2223 continue; 2224 2225 /* Otherwise OA settings will be set upon first use */ 2226 if (!intel_context_pin_if_active(ce)) 2227 continue; 2228 2229 flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu); 2230 err = gen8_modify_context(ce, flex, count); 2231 2232 intel_context_unpin(ce); 2233 if (err) 2234 break; 2235 } 2236 i915_gem_context_unlock_engines(ctx); 2237 2238 return err; 2239} 2240 2241static int gen12_configure_oar_context(struct i915_perf_stream *stream, 2242 struct i915_active *active) 2243{ 2244 int err; 2245 struct intel_context *ce = stream->pinned_ctx; 2246 u32 format = stream->oa_buffer.format; 2247 struct flex regs_context[] = { 2248 { 2249 GEN8_OACTXCONTROL, 2250 stream->perf->ctx_oactxctrl_offset + 1, 2251 active ? GEN8_OA_COUNTER_RESUME : 0, 2252 }, 2253 }; 2254 /* Offsets in regs_lri are not used since this configuration is only 2255 * applied using LRI. Initialize the correct offsets for posterity. 2256 */ 2257#define GEN12_OAR_OACONTROL_OFFSET 0x5B0 2258 struct flex regs_lri[] = { 2259 { 2260 GEN12_OAR_OACONTROL, 2261 GEN12_OAR_OACONTROL_OFFSET + 1, 2262 (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | 2263 (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) 2264 }, 2265 { 2266 RING_CONTEXT_CONTROL(ce->engine->mmio_base), 2267 CTX_CONTEXT_CONTROL, 2268 _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, 2269 active ? 2270 GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 2271 0) 2272 }, 2273 }; 2274 2275 /* Modify the context image of pinned context with regs_context*/ 2276 err = intel_context_lock_pinned(ce); 2277 if (err) 2278 return err; 2279 2280 err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); 2281 intel_context_unlock_pinned(ce); 2282 if (err) 2283 return err; 2284 2285 /* Apply regs_lri using LRI with pinned context */ 2286 return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active); 2287} 2288 2289/* 2290 * Manages updating the per-context aspects of the OA stream 2291 * configuration across all contexts. 2292 * 2293 * The awkward consideration here is that OACTXCONTROL controls the 2294 * exponent for periodic sampling which is primarily used for system 2295 * wide profiling where we'd like a consistent sampling period even in 2296 * the face of context switches. 2297 * 2298 * Our approach of updating the register state context (as opposed to 2299 * say using a workaround batch buffer) ensures that the hardware 2300 * won't automatically reload an out-of-date timer exponent even 2301 * transiently before a WA BB could be parsed. 2302 * 2303 * This function needs to: 2304 * - Ensure the currently running context's per-context OA state is 2305 * updated 2306 * - Ensure that all existing contexts will have the correct per-context 2307 * OA state if they are scheduled for use. 2308 * - Ensure any new contexts will be initialized with the correct 2309 * per-context OA state. 2310 * 2311 * Note: it's only the RCS/Render context that has any OA state. 2312 * Note: the first flex register passed must always be R_PWR_CLK_STATE 2313 */ 2314static int 2315oa_configure_all_contexts(struct i915_perf_stream *stream, 2316 struct flex *regs, 2317 size_t num_regs, 2318 struct i915_active *active) 2319{ 2320 struct drm_i915_private *i915 = stream->perf->i915; 2321 struct intel_engine_cs *engine; 2322 struct i915_gem_context *ctx, *cn; 2323 int err; 2324 2325 lockdep_assert_held(&stream->perf->lock); 2326 2327 /* 2328 * The OA register config is setup through the context image. This image 2329 * might be written to by the GPU on context switch (in particular on 2330 * lite-restore). This means we can't safely update a context's image, 2331 * if this context is scheduled/submitted to run on the GPU. 2332 * 2333 * We could emit the OA register config through the batch buffer but 2334 * this might leave small interval of time where the OA unit is 2335 * configured at an invalid sampling period. 2336 * 2337 * Note that since we emit all requests from a single ring, there 2338 * is still an implicit global barrier here that may cause a high 2339 * priority context to wait for an otherwise independent low priority 2340 * context. Contexts idle at the time of reconfiguration are not 2341 * trapped behind the barrier. 2342 */ 2343 spin_lock(&i915->gem.contexts.lock); 2344 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { 2345 if (!kref_get_unless_zero(&ctx->ref)) 2346 continue; 2347 2348 spin_unlock(&i915->gem.contexts.lock); 2349 2350 err = gen8_configure_context(ctx, regs, num_regs); 2351 if (err) { 2352 i915_gem_context_put(ctx); 2353 return err; 2354 } 2355 2356 spin_lock(&i915->gem.contexts.lock); 2357 list_safe_reset_next(ctx, cn, link); 2358 i915_gem_context_put(ctx); 2359 } 2360 spin_unlock(&i915->gem.contexts.lock); 2361 2362 /* 2363 * After updating all other contexts, we need to modify ourselves. 2364 * If we don't modify the kernel_context, we do not get events while 2365 * idle. 2366 */ 2367 for_each_uabi_engine(engine, i915) { 2368 struct intel_context *ce = engine->kernel_context; 2369 2370 if (engine->class != RENDER_CLASS) 2371 continue; 2372 2373 regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu); 2374 2375 err = gen8_modify_self(ce, regs, num_regs, active); 2376 if (err) 2377 return err; 2378 } 2379 2380 return 0; 2381} 2382 2383static int 2384gen12_configure_all_contexts(struct i915_perf_stream *stream, 2385 const struct i915_oa_config *oa_config, 2386 struct i915_active *active) 2387{ 2388 struct flex regs[] = { 2389 { 2390 GEN8_R_PWR_CLK_STATE, 2391 CTX_R_PWR_CLK_STATE, 2392 }, 2393 }; 2394 2395 return oa_configure_all_contexts(stream, 2396 regs, ARRAY_SIZE(regs), 2397 active); 2398} 2399 2400static int 2401lrc_configure_all_contexts(struct i915_perf_stream *stream, 2402 const struct i915_oa_config *oa_config, 2403 struct i915_active *active) 2404{ 2405 /* The MMIO offsets for Flex EU registers aren't contiguous */ 2406 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; 2407#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) 2408 struct flex regs[] = { 2409 { 2410 GEN8_R_PWR_CLK_STATE, 2411 CTX_R_PWR_CLK_STATE, 2412 }, 2413 { 2414 GEN8_OACTXCONTROL, 2415 stream->perf->ctx_oactxctrl_offset + 1, 2416 }, 2417 { EU_PERF_CNTL0, ctx_flexeuN(0) }, 2418 { EU_PERF_CNTL1, ctx_flexeuN(1) }, 2419 { EU_PERF_CNTL2, ctx_flexeuN(2) }, 2420 { EU_PERF_CNTL3, ctx_flexeuN(3) }, 2421 { EU_PERF_CNTL4, ctx_flexeuN(4) }, 2422 { EU_PERF_CNTL5, ctx_flexeuN(5) }, 2423 { EU_PERF_CNTL6, ctx_flexeuN(6) }, 2424 }; 2425#undef ctx_flexeuN 2426 int i; 2427 2428 regs[1].value = 2429 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 2430 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | 2431 GEN8_OA_COUNTER_RESUME; 2432 2433 for (i = 2; i < ARRAY_SIZE(regs); i++) 2434 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); 2435 2436 return oa_configure_all_contexts(stream, 2437 regs, ARRAY_SIZE(regs), 2438 active); 2439} 2440 2441static int 2442gen8_enable_metric_set(struct i915_perf_stream *stream, 2443 struct i915_active *active) 2444{ 2445 struct intel_uncore *uncore = stream->uncore; 2446 struct i915_oa_config *oa_config = stream->oa_config; 2447 int ret; 2448 2449 /* 2450 * We disable slice/unslice clock ratio change reports on SKL since 2451 * they are too noisy. The HW generates a lot of redundant reports 2452 * where the ratio hasn't really changed causing a lot of redundant 2453 * work to processes and increasing the chances we'll hit buffer 2454 * overruns. 2455 * 2456 * Although we don't currently use the 'disable overrun' OABUFFER 2457 * feature it's worth noting that clock ratio reports have to be 2458 * disabled before considering to use that feature since the HW doesn't 2459 * correctly block these reports. 2460 * 2461 * Currently none of the high-level metrics we have depend on knowing 2462 * this ratio to normalize. 2463 * 2464 * Note: This register is not power context saved and restored, but 2465 * that's OK considering that we disable RC6 while the OA unit is 2466 * enabled. 2467 * 2468 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to 2469 * be read back from automatically triggered reports, as part of the 2470 * RPT_ID field. 2471 */ 2472 if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) { 2473 intel_uncore_write(uncore, GEN8_OA_DEBUG, 2474 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 2475 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); 2476 } 2477 2478 /* 2479 * Update all contexts prior writing the mux configurations as we need 2480 * to make sure all slices/subslices are ON before writing to NOA 2481 * registers. 2482 */ 2483 ret = lrc_configure_all_contexts(stream, oa_config, active); 2484 if (ret) 2485 return ret; 2486 2487 return emit_oa_config(stream, 2488 stream->oa_config, oa_context(stream), 2489 active); 2490} 2491 2492static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) 2493{ 2494 return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, 2495 (stream->sample_flags & SAMPLE_OA_REPORT) ? 2496 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); 2497} 2498 2499static int 2500gen12_enable_metric_set(struct i915_perf_stream *stream, 2501 struct i915_active *active) 2502{ 2503 struct intel_uncore *uncore = stream->uncore; 2504 struct i915_oa_config *oa_config = stream->oa_config; 2505 bool periodic = stream->periodic; 2506 u32 period_exponent = stream->period_exponent; 2507 int ret; 2508 2509 intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, 2510 /* Disable clk ratio reports, like previous Gens. */ 2511 _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 2512 GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | 2513 /* 2514 * If the user didn't require OA reports, instruct 2515 * the hardware not to emit ctx switch reports. 2516 */ 2517 oag_report_ctx_switches(stream)); 2518 2519 intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? 2520 (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | 2521 GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | 2522 (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) 2523 : 0); 2524 2525 /* 2526 * Update all contexts prior writing the mux configurations as we need 2527 * to make sure all slices/subslices are ON before writing to NOA 2528 * registers. 2529 */ 2530 ret = gen12_configure_all_contexts(stream, oa_config, active); 2531 if (ret) 2532 return ret; 2533 2534 /* 2535 * For Gen12, performance counters are context 2536 * saved/restored. Only enable it for the context that 2537 * requested this. 2538 */ 2539 if (stream->ctx) { 2540 ret = gen12_configure_oar_context(stream, active); 2541 if (ret) 2542 return ret; 2543 } 2544 2545 return emit_oa_config(stream, 2546 stream->oa_config, oa_context(stream), 2547 active); 2548} 2549 2550static void gen8_disable_metric_set(struct i915_perf_stream *stream) 2551{ 2552 struct intel_uncore *uncore = stream->uncore; 2553 2554 /* Reset all contexts' slices/subslices configurations. */ 2555 lrc_configure_all_contexts(stream, NULL, NULL); 2556 2557 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); 2558} 2559 2560static void gen10_disable_metric_set(struct i915_perf_stream *stream) 2561{ 2562 struct intel_uncore *uncore = stream->uncore; 2563 2564 /* Reset all contexts' slices/subslices configurations. */ 2565 lrc_configure_all_contexts(stream, NULL, NULL); 2566 2567 /* Make sure we disable noa to save power. */ 2568 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); 2569} 2570 2571static void gen12_disable_metric_set(struct i915_perf_stream *stream) 2572{ 2573 struct intel_uncore *uncore = stream->uncore; 2574 2575 /* Reset all contexts' slices/subslices configurations. */ 2576 gen12_configure_all_contexts(stream, NULL, NULL); 2577 2578 /* disable the context save/restore or OAR counters */ 2579 if (stream->ctx) 2580 gen12_configure_oar_context(stream, NULL); 2581 2582 /* Make sure we disable noa to save power. */ 2583 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); 2584} 2585 2586static void gen7_oa_enable(struct i915_perf_stream *stream) 2587{ 2588 struct intel_uncore *uncore = stream->uncore; 2589 struct i915_gem_context *ctx = stream->ctx; 2590 u32 ctx_id = stream->specific_ctx_id; 2591 bool periodic = stream->periodic; 2592 u32 period_exponent = stream->period_exponent; 2593 u32 report_format = stream->oa_buffer.format; 2594 2595 /* 2596 * Reset buf pointers so we don't forward reports from before now. 2597 * 2598 * Think carefully if considering trying to avoid this, since it 2599 * also ensures status flags and the buffer itself are cleared 2600 * in error paths, and we have checks for invalid reports based 2601 * on the assumption that certain fields are written to zeroed 2602 * memory which this helps maintains. 2603 */ 2604 gen7_init_oa_buffer(stream); 2605 2606 intel_uncore_write(uncore, GEN7_OACONTROL, 2607 (ctx_id & GEN7_OACONTROL_CTX_MASK) | 2608 (period_exponent << 2609 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | 2610 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | 2611 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | 2612 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | 2613 GEN7_OACONTROL_ENABLE); 2614} 2615 2616static void gen8_oa_enable(struct i915_perf_stream *stream) 2617{ 2618 struct intel_uncore *uncore = stream->uncore; 2619 u32 report_format = stream->oa_buffer.format; 2620 2621 /* 2622 * Reset buf pointers so we don't forward reports from before now. 2623 * 2624 * Think carefully if considering trying to avoid this, since it 2625 * also ensures status flags and the buffer itself are cleared 2626 * in error paths, and we have checks for invalid reports based 2627 * on the assumption that certain fields are written to zeroed 2628 * memory which this helps maintains. 2629 */ 2630 gen8_init_oa_buffer(stream); 2631 2632 /* 2633 * Note: we don't rely on the hardware to perform single context 2634 * filtering and instead filter on the cpu based on the context-id 2635 * field of reports 2636 */ 2637 intel_uncore_write(uncore, GEN8_OACONTROL, 2638 (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) | 2639 GEN8_OA_COUNTER_ENABLE); 2640} 2641 2642static void gen12_oa_enable(struct i915_perf_stream *stream) 2643{ 2644 struct intel_uncore *uncore = stream->uncore; 2645 u32 report_format = stream->oa_buffer.format; 2646 2647 /* 2648 * If we don't want OA reports from the OA buffer, then we don't even 2649 * need to program the OAG unit. 2650 */ 2651 if (!(stream->sample_flags & SAMPLE_OA_REPORT)) 2652 return; 2653 2654 gen12_init_oa_buffer(stream); 2655 2656 intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 2657 (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | 2658 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); 2659} 2660 2661/** 2662 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream 2663 * @stream: An i915 perf stream opened for OA metrics 2664 * 2665 * [Re]enables hardware periodic sampling according to the period configured 2666 * when opening the stream. This also starts a hrtimer that will periodically 2667 * check for data in the circular OA buffer for notifying userspace (e.g. 2668 * during a read() or poll()). 2669 */ 2670static void i915_oa_stream_enable(struct i915_perf_stream *stream) 2671{ 2672 stream->pollin = false; 2673 2674 stream->perf->ops.oa_enable(stream); 2675 2676 if (stream->sample_flags & SAMPLE_OA_REPORT) 2677 hrtimer_start(&stream->poll_check_timer, 2678 ns_to_ktime(stream->poll_oa_period), 2679 HRTIMER_MODE_REL_PINNED); 2680} 2681 2682static void gen7_oa_disable(struct i915_perf_stream *stream) 2683{ 2684 struct intel_uncore *uncore = stream->uncore; 2685 2686 intel_uncore_write(uncore, GEN7_OACONTROL, 0); 2687 if (intel_wait_for_register(uncore, 2688 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, 2689 50)) 2690 drm_err(&stream->perf->i915->drm, 2691 "wait for OA to be disabled timed out\n"); 2692} 2693 2694static void gen8_oa_disable(struct i915_perf_stream *stream) 2695{ 2696 struct intel_uncore *uncore = stream->uncore; 2697 2698 intel_uncore_write(uncore, GEN8_OACONTROL, 0); 2699 if (intel_wait_for_register(uncore, 2700 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, 2701 50)) 2702 drm_err(&stream->perf->i915->drm, 2703 "wait for OA to be disabled timed out\n"); 2704} 2705 2706static void gen12_oa_disable(struct i915_perf_stream *stream) 2707{ 2708 struct intel_uncore *uncore = stream->uncore; 2709 2710 intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); 2711 if (intel_wait_for_register(uncore, 2712 GEN12_OAG_OACONTROL, 2713 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 2714 50)) 2715 drm_err(&stream->perf->i915->drm, 2716 "wait for OA to be disabled timed out\n"); 2717 2718 intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1); 2719 if (intel_wait_for_register(uncore, 2720 GEN12_OA_TLB_INV_CR, 2721 1, 0, 2722 50)) 2723 drm_err(&stream->perf->i915->drm, 2724 "wait for OA tlb invalidate timed out\n"); 2725} 2726 2727/** 2728 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream 2729 * @stream: An i915 perf stream opened for OA metrics 2730 * 2731 * Stops the OA unit from periodically writing counter reports into the 2732 * circular OA buffer. This also stops the hrtimer that periodically checks for 2733 * data in the circular OA buffer, for notifying userspace. 2734 */ 2735static void i915_oa_stream_disable(struct i915_perf_stream *stream) 2736{ 2737 stream->perf->ops.oa_disable(stream); 2738 2739 if (stream->sample_flags & SAMPLE_OA_REPORT) 2740 hrtimer_cancel(&stream->poll_check_timer); 2741} 2742 2743static const struct i915_perf_stream_ops i915_oa_stream_ops = { 2744 .destroy = i915_oa_stream_destroy, 2745 .enable = i915_oa_stream_enable, 2746 .disable = i915_oa_stream_disable, 2747 .wait_unlocked = i915_oa_wait_unlocked, 2748 .poll_wait = i915_oa_poll_wait, 2749 .read = i915_oa_read, 2750}; 2751 2752static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) 2753{ 2754 struct i915_active *active; 2755 int err; 2756 2757 active = i915_active_create(); 2758 if (!active) 2759 return -ENOMEM; 2760 2761 err = stream->perf->ops.enable_metric_set(stream, active); 2762 if (err == 0) 2763 __i915_active_wait(active, TASK_UNINTERRUPTIBLE); 2764 2765 i915_active_put(active); 2766 return err; 2767} 2768 2769static void 2770get_default_sseu_config(struct intel_sseu *out_sseu, 2771 struct intel_engine_cs *engine) 2772{ 2773 const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu; 2774 2775 *out_sseu = intel_sseu_from_device_info(devinfo_sseu); 2776 2777 if (IS_GEN(engine->i915, 11)) { 2778 /* 2779 * We only need subslice count so it doesn't matter which ones 2780 * we select - just turn off low bits in the amount of half of 2781 * all available subslices per slice. 2782 */ 2783 out_sseu->subslice_mask = 2784 ~(~0 << (hweight8(out_sseu->subslice_mask) / 2)); 2785 out_sseu->slice_mask = 0x1; 2786 } 2787} 2788 2789static int 2790get_sseu_config(struct intel_sseu *out_sseu, 2791 struct intel_engine_cs *engine, 2792 const struct drm_i915_gem_context_param_sseu *drm_sseu) 2793{ 2794 if (drm_sseu->engine.engine_class != engine->uabi_class || 2795 drm_sseu->engine.engine_instance != engine->uabi_instance) 2796 return -EINVAL; 2797 2798 return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu); 2799} 2800 2801/** 2802 * i915_oa_stream_init - validate combined props for OA stream and init 2803 * @stream: An i915 perf stream 2804 * @param: The open parameters passed to `DRM_I915_PERF_OPEN` 2805 * @props: The property state that configures stream (individually validated) 2806 * 2807 * While read_properties_unlocked() validates properties in isolation it 2808 * doesn't ensure that the combination necessarily makes sense. 2809 * 2810 * At this point it has been determined that userspace wants a stream of 2811 * OA metrics, but still we need to further validate the combined 2812 * properties are OK. 2813 * 2814 * If the configuration makes sense then we can allocate memory for 2815 * a circular OA buffer and apply the requested metric set configuration. 2816 * 2817 * Returns: zero on success or a negative error code. 2818 */ 2819static int i915_oa_stream_init(struct i915_perf_stream *stream, 2820 struct drm_i915_perf_open_param *param, 2821 struct perf_open_properties *props) 2822{ 2823 struct drm_i915_private *i915 = stream->perf->i915; 2824 struct i915_perf *perf = stream->perf; 2825 int format_size; 2826 int ret; 2827 2828 if (!props->engine) { 2829 DRM_DEBUG("OA engine not specified\n"); 2830 return -EINVAL; 2831 } 2832 2833 /* 2834 * If the sysfs metrics/ directory wasn't registered for some 2835 * reason then don't let userspace try their luck with config 2836 * IDs 2837 */ 2838 if (!perf->metrics_kobj) { 2839 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 2840 return -EINVAL; 2841 } 2842 2843 if (!(props->sample_flags & SAMPLE_OA_REPORT) && 2844 (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) { 2845 DRM_DEBUG("Only OA report sampling supported\n"); 2846 return -EINVAL; 2847 } 2848 2849 if (!perf->ops.enable_metric_set) { 2850 DRM_DEBUG("OA unit not supported\n"); 2851 return -ENODEV; 2852 } 2853 2854 /* 2855 * To avoid the complexity of having to accurately filter 2856 * counter reports and marshal to the appropriate client 2857 * we currently only allow exclusive access 2858 */ 2859 if (perf->exclusive_stream) { 2860 DRM_DEBUG("OA unit already in use\n"); 2861 return -EBUSY; 2862 } 2863 2864 if (!props->oa_format) { 2865 DRM_DEBUG("OA report format not specified\n"); 2866 return -EINVAL; 2867 } 2868 2869 stream->engine = props->engine; 2870 stream->uncore = stream->engine->gt->uncore; 2871 2872 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 2873 2874 format_size = perf->oa_formats[props->oa_format].size; 2875 2876 stream->sample_flags = props->sample_flags; 2877 stream->sample_size += format_size; 2878 2879 stream->oa_buffer.format_size = format_size; 2880 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0)) 2881 return -EINVAL; 2882 2883 stream->hold_preemption = props->hold_preemption; 2884 2885 stream->oa_buffer.format = 2886 perf->oa_formats[props->oa_format].format; 2887 2888 stream->periodic = props->oa_periodic; 2889 if (stream->periodic) 2890 stream->period_exponent = props->oa_period_exponent; 2891 2892 if (stream->ctx) { 2893 ret = oa_get_render_ctx_id(stream); 2894 if (ret) { 2895 DRM_DEBUG("Invalid context id to filter with\n"); 2896 return ret; 2897 } 2898 } 2899 2900 ret = alloc_noa_wait(stream); 2901 if (ret) { 2902 DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); 2903 goto err_noa_wait_alloc; 2904 } 2905 2906 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); 2907 if (!stream->oa_config) { 2908 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); 2909 ret = -EINVAL; 2910 goto err_config; 2911 } 2912 2913 /* PRM - observability performance counters: 2914 * 2915 * OACONTROL, performance counter enable, note: 2916 * 2917 * "When this bit is set, in order to have coherent counts, 2918 * RC6 power state and trunk clock gating must be disabled. 2919 * This can be achieved by programming MMIO registers as 2920 * 0xA094=0 and 0xA090[31]=1" 2921 * 2922 * In our case we are expecting that taking pm + FORCEWAKE 2923 * references will effectively disable RC6. 2924 */ 2925 intel_engine_pm_get(stream->engine); 2926 intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); 2927 2928 ret = alloc_oa_buffer(stream); 2929 if (ret) 2930 goto err_oa_buf_alloc; 2931 2932 stream->ops = &i915_oa_stream_ops; 2933 2934 perf->sseu = props->sseu; 2935 WRITE_ONCE(perf->exclusive_stream, stream); 2936 2937 ret = i915_perf_stream_enable_sync(stream); 2938 if (ret) { 2939 DRM_DEBUG("Unable to enable metric set\n"); 2940 goto err_enable; 2941 } 2942 2943 DRM_DEBUG("opening stream oa config uuid=%s\n", 2944 stream->oa_config->uuid); 2945 2946 hrtimer_init(&stream->poll_check_timer, 2947 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2948 stream->poll_check_timer.function = oa_poll_check_timer_cb; 2949 init_waitqueue_head(&stream->poll_wq); 2950 spin_lock_init(&stream->oa_buffer.ptr_lock); 2951 2952 return 0; 2953 2954err_enable: 2955 WRITE_ONCE(perf->exclusive_stream, NULL); 2956 perf->ops.disable_metric_set(stream); 2957 2958 free_oa_buffer(stream); 2959 2960err_oa_buf_alloc: 2961 free_oa_configs(stream); 2962 2963 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); 2964 intel_engine_pm_put(stream->engine); 2965 2966err_config: 2967 free_noa_wait(stream); 2968 2969err_noa_wait_alloc: 2970 if (stream->ctx) 2971 oa_put_render_ctx_id(stream); 2972 2973 return ret; 2974} 2975 2976void i915_oa_init_reg_state(const struct intel_context *ce, 2977 const struct intel_engine_cs *engine) 2978{ 2979 struct i915_perf_stream *stream; 2980 2981 if (engine->class != RENDER_CLASS) 2982 return; 2983 2984 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ 2985 stream = READ_ONCE(engine->i915->perf.exclusive_stream); 2986 if (stream && INTEL_GEN(stream->perf->i915) < 12) 2987 gen8_update_reg_state_unlocked(ce, stream); 2988} 2989 2990/** 2991 * i915_perf_read - handles read() FOP for i915 perf stream FDs 2992 * @file: An i915 perf stream file 2993 * @buf: destination buffer given by userspace 2994 * @count: the number of bytes userspace wants to read 2995 * @ppos: (inout) file seek position (unused) 2996 * 2997 * The entry point for handling a read() on a stream file descriptor from 2998 * userspace. Most of the work is left to the i915_perf_read_locked() and 2999 * &i915_perf_stream_ops->read but to save having stream implementations (of 3000 * which we might have multiple later) we handle blocking read here. 3001 * 3002 * We can also consistently treat trying to read from a disabled stream 3003 * as an IO error so implementations can assume the stream is enabled 3004 * while reading. 3005 * 3006 * Returns: The number of bytes copied or a negative error code on failure. 3007 */ 3008static ssize_t i915_perf_read(struct file *file, 3009 char __user *buf, 3010 size_t count, 3011 loff_t *ppos) 3012{ 3013 struct i915_perf_stream *stream = file->private_data; 3014 struct i915_perf *perf = stream->perf; 3015 size_t offset = 0; 3016 int ret; 3017 3018 /* To ensure it's handled consistently we simply treat all reads of a 3019 * disabled stream as an error. In particular it might otherwise lead 3020 * to a deadlock for blocking file descriptors... 3021 */ 3022 if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT)) 3023 return -EIO; 3024 3025 if (!(file->f_flags & O_NONBLOCK)) { 3026 /* There's the small chance of false positives from 3027 * stream->ops->wait_unlocked. 3028 * 3029 * E.g. with single context filtering since we only wait until 3030 * oabuffer has >= 1 report we don't immediately know whether 3031 * any reports really belong to the current context 3032 */ 3033 do { 3034 ret = stream->ops->wait_unlocked(stream); 3035 if (ret) 3036 return ret; 3037 3038 mutex_lock(&perf->lock); 3039 ret = stream->ops->read(stream, buf, count, &offset); 3040 mutex_unlock(&perf->lock); 3041 } while (!offset && !ret); 3042 } else { 3043 mutex_lock(&perf->lock); 3044 ret = stream->ops->read(stream, buf, count, &offset); 3045 mutex_unlock(&perf->lock); 3046 } 3047 3048 /* We allow the poll checking to sometimes report false positive EPOLLIN 3049 * events where we might actually report EAGAIN on read() if there's 3050 * not really any data available. In this situation though we don't 3051 * want to enter a busy loop between poll() reporting a EPOLLIN event 3052 * and read() returning -EAGAIN. Clearing the oa.pollin state here 3053 * effectively ensures we back off until the next hrtimer callback 3054 * before reporting another EPOLLIN event. 3055 * The exception to this is if ops->read() returned -ENOSPC which means 3056 * that more OA data is available than could fit in the user provided 3057 * buffer. In this case we want the next poll() call to not block. 3058 */ 3059 if (ret != -ENOSPC) 3060 stream->pollin = false; 3061 3062 /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */ 3063 return offset ?: (ret ?: -EAGAIN); 3064} 3065 3066static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) 3067{ 3068 struct i915_perf_stream *stream = 3069 container_of(hrtimer, typeof(*stream), poll_check_timer); 3070 3071 if (oa_buffer_check_unlocked(stream)) { 3072 stream->pollin = true; 3073 wake_up(&stream->poll_wq); 3074 } 3075 3076 hrtimer_forward_now(hrtimer, 3077 ns_to_ktime(stream->poll_oa_period)); 3078 3079 return HRTIMER_RESTART; 3080} 3081 3082/** 3083 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream 3084 * @stream: An i915 perf stream 3085 * @file: An i915 perf stream file 3086 * @wait: poll() state table 3087 * 3088 * For handling userspace polling on an i915 perf stream, this calls through to 3089 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that 3090 * will be woken for new stream data. 3091 * 3092 * Note: The &perf->lock mutex has been taken to serialize 3093 * with any non-file-operation driver hooks. 3094 * 3095 * Returns: any poll events that are ready without sleeping 3096 */ 3097static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, 3098 struct file *file, 3099 poll_table *wait) 3100{ 3101 __poll_t events = 0; 3102 3103 stream->ops->poll_wait(stream, file, wait); 3104 3105 /* Note: we don't explicitly check whether there's something to read 3106 * here since this path may be very hot depending on what else 3107 * userspace is polling, or on the timeout in use. We rely solely on 3108 * the hrtimer/oa_poll_check_timer_cb to notify us when there are 3109 * samples to read. 3110 */ 3111 if (stream->pollin) 3112 events |= EPOLLIN; 3113 3114 return events; 3115} 3116 3117/** 3118 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream 3119 * @file: An i915 perf stream file 3120 * @wait: poll() state table 3121 * 3122 * For handling userspace polling on an i915 perf stream, this ensures 3123 * poll_wait() gets called with a wait queue that will be woken for new stream 3124 * data. 3125 * 3126 * Note: Implementation deferred to i915_perf_poll_locked() 3127 * 3128 * Returns: any poll events that are ready without sleeping 3129 */ 3130static __poll_t i915_perf_poll(struct file *file, poll_table *wait) 3131{ 3132 struct i915_perf_stream *stream = file->private_data; 3133 struct i915_perf *perf = stream->perf; 3134 __poll_t ret; 3135 3136 mutex_lock(&perf->lock); 3137 ret = i915_perf_poll_locked(stream, file, wait); 3138 mutex_unlock(&perf->lock); 3139 3140 return ret; 3141} 3142 3143/** 3144 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl 3145 * @stream: A disabled i915 perf stream 3146 * 3147 * [Re]enables the associated capture of data for this stream. 3148 * 3149 * If a stream was previously enabled then there's currently no intention 3150 * to provide userspace any guarantee about the preservation of previously 3151 * buffered data. 3152 */ 3153static void i915_perf_enable_locked(struct i915_perf_stream *stream) 3154{ 3155 if (stream->enabled) 3156 return; 3157 3158 /* Allow stream->ops->enable() to refer to this */ 3159 stream->enabled = true; 3160 3161 if (stream->ops->enable) 3162 stream->ops->enable(stream); 3163 3164 if (stream->hold_preemption) 3165 intel_context_set_nopreempt(stream->pinned_ctx); 3166} 3167 3168/** 3169 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl 3170 * @stream: An enabled i915 perf stream 3171 * 3172 * Disables the associated capture of data for this stream. 3173 * 3174 * The intention is that disabling an re-enabling a stream will ideally be 3175 * cheaper than destroying and re-opening a stream with the same configuration, 3176 * though there are no formal guarantees about what state or buffered data 3177 * must be retained between disabling and re-enabling a stream. 3178 * 3179 * Note: while a stream is disabled it's considered an error for userspace 3180 * to attempt to read from the stream (-EIO). 3181 */ 3182static void i915_perf_disable_locked(struct i915_perf_stream *stream) 3183{ 3184 if (!stream->enabled) 3185 return; 3186 3187 /* Allow stream->ops->disable() to refer to this */ 3188 stream->enabled = false; 3189 3190 if (stream->hold_preemption) 3191 intel_context_clear_nopreempt(stream->pinned_ctx); 3192 3193 if (stream->ops->disable) 3194 stream->ops->disable(stream); 3195} 3196 3197static long i915_perf_config_locked(struct i915_perf_stream *stream, 3198 unsigned long metrics_set) 3199{ 3200 struct i915_oa_config *config; 3201 long ret = stream->oa_config->id; 3202 3203 config = i915_perf_get_oa_config(stream->perf, metrics_set); 3204 if (!config) 3205 return -EINVAL; 3206 3207 if (config != stream->oa_config) { 3208 int err; 3209 3210 /* 3211 * If OA is bound to a specific context, emit the 3212 * reconfiguration inline from that context. The update 3213 * will then be ordered with respect to submission on that 3214 * context. 3215 * 3216 * When set globally, we use a low priority kernel context, 3217 * so it will effectively take effect when idle. 3218 */ 3219 err = emit_oa_config(stream, config, oa_context(stream), NULL); 3220 if (!err) 3221 config = xchg(&stream->oa_config, config); 3222 else 3223 ret = err; 3224 } 3225 3226 i915_oa_config_put(config); 3227 3228 return ret; 3229} 3230 3231/** 3232 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 3233 * @stream: An i915 perf stream 3234 * @cmd: the ioctl request 3235 * @arg: the ioctl data 3236 * 3237 * Note: The &perf->lock mutex has been taken to serialize 3238 * with any non-file-operation driver hooks. 3239 * 3240 * Returns: zero on success or a negative error code. Returns -EINVAL for 3241 * an unknown ioctl request. 3242 */ 3243static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, 3244 unsigned int cmd, 3245 unsigned long arg) 3246{ 3247 switch (cmd) { 3248 case I915_PERF_IOCTL_ENABLE: 3249 i915_perf_enable_locked(stream); 3250 return 0; 3251 case I915_PERF_IOCTL_DISABLE: 3252 i915_perf_disable_locked(stream); 3253 return 0; 3254 case I915_PERF_IOCTL_CONFIG: 3255 return i915_perf_config_locked(stream, arg); 3256 } 3257 3258 return -EINVAL; 3259} 3260 3261/** 3262 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 3263 * @file: An i915 perf stream file 3264 * @cmd: the ioctl request 3265 * @arg: the ioctl data 3266 * 3267 * Implementation deferred to i915_perf_ioctl_locked(). 3268 * 3269 * Returns: zero on success or a negative error code. Returns -EINVAL for 3270 * an unknown ioctl request. 3271 */ 3272static long i915_perf_ioctl(struct file *file, 3273 unsigned int cmd, 3274 unsigned long arg) 3275{ 3276 struct i915_perf_stream *stream = file->private_data; 3277 struct i915_perf *perf = stream->perf; 3278 long ret; 3279 3280 mutex_lock(&perf->lock); 3281 ret = i915_perf_ioctl_locked(stream, cmd, arg); 3282 mutex_unlock(&perf->lock); 3283 3284 return ret; 3285} 3286 3287/** 3288 * i915_perf_destroy_locked - destroy an i915 perf stream 3289 * @stream: An i915 perf stream 3290 * 3291 * Frees all resources associated with the given i915 perf @stream, disabling 3292 * any associated data capture in the process. 3293 * 3294 * Note: The &perf->lock mutex has been taken to serialize 3295 * with any non-file-operation driver hooks. 3296 */ 3297static void i915_perf_destroy_locked(struct i915_perf_stream *stream) 3298{ 3299 if (stream->enabled) 3300 i915_perf_disable_locked(stream); 3301 3302 if (stream->ops->destroy) 3303 stream->ops->destroy(stream); 3304 3305 if (stream->ctx) 3306 i915_gem_context_put(stream->ctx); 3307 3308 kfree(stream); 3309} 3310 3311/** 3312 * i915_perf_release - handles userspace close() of a stream file 3313 * @inode: anonymous inode associated with file 3314 * @file: An i915 perf stream file 3315 * 3316 * Cleans up any resources associated with an open i915 perf stream file. 3317 * 3318 * NB: close() can't really fail from the userspace point of view. 3319 * 3320 * Returns: zero on success or a negative error code. 3321 */ 3322static int i915_perf_release(struct inode *inode, struct file *file) 3323{ 3324 struct i915_perf_stream *stream = file->private_data; 3325 struct i915_perf *perf = stream->perf; 3326 3327 mutex_lock(&perf->lock); 3328 i915_perf_destroy_locked(stream); 3329 mutex_unlock(&perf->lock); 3330 3331 /* Release the reference the perf stream kept on the driver. */ 3332 drm_dev_put(&perf->i915->drm); 3333 3334 return 0; 3335} 3336 3337 3338static const struct file_operations fops = { 3339 .owner = THIS_MODULE, 3340 .llseek = no_llseek, 3341 .release = i915_perf_release, 3342 .poll = i915_perf_poll, 3343 .read = i915_perf_read, 3344 .unlocked_ioctl = i915_perf_ioctl, 3345 /* Our ioctl have no arguments, so it's safe to use the same function 3346 * to handle 32bits compatibility. 3347 */ 3348 .compat_ioctl = i915_perf_ioctl, 3349}; 3350 3351 3352/** 3353 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD 3354 * @perf: i915 perf instance 3355 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` 3356 * @props: individually validated u64 property value pairs 3357 * @file: drm file 3358 * 3359 * See i915_perf_ioctl_open() for interface details. 3360 * 3361 * Implements further stream config validation and stream initialization on 3362 * behalf of i915_perf_open_ioctl() with the &perf->lock mutex 3363 * taken to serialize with any non-file-operation driver hooks. 3364 * 3365 * Note: at this point the @props have only been validated in isolation and 3366 * it's still necessary to validate that the combination of properties makes 3367 * sense. 3368 * 3369 * In the case where userspace is interested in OA unit metrics then further 3370 * config validation and stream initialization details will be handled by 3371 * i915_oa_stream_init(). The code here should only validate config state that 3372 * will be relevant to all stream types / backends. 3373 * 3374 * Returns: zero on success or a negative error code. 3375 */ 3376static int 3377i915_perf_open_ioctl_locked(struct i915_perf *perf, 3378 struct drm_i915_perf_open_param *param, 3379 struct perf_open_properties *props, 3380 struct drm_file *file) 3381{ 3382 struct i915_gem_context *specific_ctx = NULL; 3383 struct i915_perf_stream *stream = NULL; 3384 unsigned long f_flags = 0; 3385 bool privileged_op = true; 3386 int stream_fd; 3387 int ret; 3388 3389 if (props->single_context) { 3390 u32 ctx_handle = props->ctx_handle; 3391 struct drm_i915_file_private *file_priv = file->driver_priv; 3392 3393 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); 3394 if (!specific_ctx) { 3395 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", 3396 ctx_handle); 3397 ret = -ENOENT; 3398 goto err; 3399 } 3400 } 3401 3402 /* 3403 * On Haswell the OA unit supports clock gating off for a specific 3404 * context and in this mode there's no visibility of metrics for the 3405 * rest of the system, which we consider acceptable for a 3406 * non-privileged client. 3407 * 3408 * For Gen8->11 the OA unit no longer supports clock gating off for a 3409 * specific context and the kernel can't securely stop the counters 3410 * from updating as system-wide / global values. Even though we can 3411 * filter reports based on the included context ID we can't block 3412 * clients from seeing the raw / global counter values via 3413 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to 3414 * enable the OA unit by default. 3415 * 3416 * For Gen12+ we gain a new OAR unit that only monitors the RCS on a 3417 * per context basis. So we can relax requirements there if the user 3418 * doesn't request global stream access (i.e. query based sampling 3419 * using MI_RECORD_PERF_COUNT. 3420 */ 3421 if (IS_HASWELL(perf->i915) && specific_ctx) 3422 privileged_op = false; 3423 else if (IS_GEN(perf->i915, 12) && specific_ctx && 3424 (props->sample_flags & SAMPLE_OA_REPORT) == 0) 3425 privileged_op = false; 3426 3427 if (props->hold_preemption) { 3428 if (!props->single_context) { 3429 DRM_DEBUG("preemption disable with no context\n"); 3430 ret = -EINVAL; 3431 goto err; 3432 } 3433 privileged_op = true; 3434 } 3435 3436 /* 3437 * Asking for SSEU configuration is a priviliged operation. 3438 */ 3439 if (props->has_sseu) 3440 privileged_op = true; 3441 else 3442 get_default_sseu_config(&props->sseu, props->engine); 3443 3444 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option 3445 * we check a dev.i915.perf_stream_paranoid sysctl option 3446 * to determine if it's ok to access system wide OA counters 3447 * without CAP_PERFMON or CAP_SYS_ADMIN privileges. 3448 */ 3449 if (privileged_op && 3450 i915_perf_stream_paranoid && !perfmon_capable()) { 3451 DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); 3452 ret = -EACCES; 3453 goto err_ctx; 3454 } 3455 3456 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 3457 if (!stream) { 3458 ret = -ENOMEM; 3459 goto err_ctx; 3460 } 3461 3462 stream->perf = perf; 3463 stream->ctx = specific_ctx; 3464 stream->poll_oa_period = props->poll_oa_period; 3465 3466 ret = i915_oa_stream_init(stream, param, props); 3467 if (ret) 3468 goto err_alloc; 3469 3470 /* we avoid simply assigning stream->sample_flags = props->sample_flags 3471 * to have _stream_init check the combination of sample flags more 3472 * thoroughly, but still this is the expected result at this point. 3473 */ 3474 if (WARN_ON(stream->sample_flags != props->sample_flags)) { 3475 ret = -ENODEV; 3476 goto err_flags; 3477 } 3478 3479 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) 3480 f_flags |= O_CLOEXEC; 3481 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) 3482 f_flags |= O_NONBLOCK; 3483 3484 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); 3485 if (stream_fd < 0) { 3486 ret = stream_fd; 3487 goto err_flags; 3488 } 3489 3490 if (!(param->flags & I915_PERF_FLAG_DISABLED)) 3491 i915_perf_enable_locked(stream); 3492 3493 /* Take a reference on the driver that will be kept with stream_fd 3494 * until its release. 3495 */ 3496 drm_dev_get(&perf->i915->drm); 3497 3498 return stream_fd; 3499 3500err_flags: 3501 if (stream->ops->destroy) 3502 stream->ops->destroy(stream); 3503err_alloc: 3504 kfree(stream); 3505err_ctx: 3506 if (specific_ctx) 3507 i915_gem_context_put(specific_ctx); 3508err: 3509 return ret; 3510} 3511 3512static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) 3513{ 3514 return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent); 3515} 3516 3517/** 3518 * read_properties_unlocked - validate + copy userspace stream open properties 3519 * @perf: i915 perf instance 3520 * @uprops: The array of u64 key value pairs given by userspace 3521 * @n_props: The number of key value pairs expected in @uprops 3522 * @props: The stream configuration built up while validating properties 3523 * 3524 * Note this function only validates properties in isolation it doesn't 3525 * validate that the combination of properties makes sense or that all 3526 * properties necessary for a particular kind of stream have been set. 3527 * 3528 * Note that there currently aren't any ordering requirements for properties so 3529 * we shouldn't validate or assume anything about ordering here. This doesn't 3530 * rule out defining new properties with ordering requirements in the future. 3531 */ 3532static int read_properties_unlocked(struct i915_perf *perf, 3533 u64 __user *uprops, 3534 u32 n_props, 3535 struct perf_open_properties *props) 3536{ 3537 u64 __user *uprop = uprops; 3538 u32 i; 3539 int ret; 3540 3541 memset(props, 0, sizeof(struct perf_open_properties)); 3542 props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; 3543 3544 if (!n_props) { 3545 DRM_DEBUG("No i915 perf properties given\n"); 3546 return -EINVAL; 3547 } 3548 3549 /* At the moment we only support using i915-perf on the RCS. */ 3550 props->engine = intel_engine_lookup_user(perf->i915, 3551 I915_ENGINE_CLASS_RENDER, 3552 0); 3553 if (!props->engine) { 3554 DRM_DEBUG("No RENDER-capable engines\n"); 3555 return -EINVAL; 3556 } 3557 3558 /* Considering that ID = 0 is reserved and assuming that we don't 3559 * (currently) expect any configurations to ever specify duplicate 3560 * values for a particular property ID then the last _PROP_MAX value is 3561 * one greater than the maximum number of properties we expect to get 3562 * from userspace. 3563 */ 3564 if (n_props >= DRM_I915_PERF_PROP_MAX) { 3565 DRM_DEBUG("More i915 perf properties specified than exist\n"); 3566 return -EINVAL; 3567 } 3568 3569 for (i = 0; i < n_props; i++) { 3570 u64 oa_period, oa_freq_hz; 3571 u64 id, value; 3572 3573 ret = get_user(id, uprop); 3574 if (ret) 3575 return ret; 3576 3577 ret = get_user(value, uprop + 1); 3578 if (ret) 3579 return ret; 3580 3581 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 3582 DRM_DEBUG("Unknown i915 perf property ID\n"); 3583 return -EINVAL; 3584 } 3585 3586 switch ((enum drm_i915_perf_property_id)id) { 3587 case DRM_I915_PERF_PROP_CTX_HANDLE: 3588 props->single_context = 1; 3589 props->ctx_handle = value; 3590 break; 3591 case DRM_I915_PERF_PROP_SAMPLE_OA: 3592 if (value) 3593 props->sample_flags |= SAMPLE_OA_REPORT; 3594 break; 3595 case DRM_I915_PERF_PROP_OA_METRICS_SET: 3596 if (value == 0) { 3597 DRM_DEBUG("Unknown OA metric set ID\n"); 3598 return -EINVAL; 3599 } 3600 props->metrics_set = value; 3601 break; 3602 case DRM_I915_PERF_PROP_OA_FORMAT: 3603 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 3604 DRM_DEBUG("Out-of-range OA report format %llu\n", 3605 value); 3606 return -EINVAL; 3607 } 3608 if (!perf->oa_formats[value].size) { 3609 DRM_DEBUG("Unsupported OA report format %llu\n", 3610 value); 3611 return -EINVAL; 3612 } 3613 props->oa_format = value; 3614 break; 3615 case DRM_I915_PERF_PROP_OA_EXPONENT: 3616 if (value > OA_EXPONENT_MAX) { 3617 DRM_DEBUG("OA timer exponent too high (> %u)\n", 3618 OA_EXPONENT_MAX); 3619 return -EINVAL; 3620 } 3621 3622 /* Theoretically we can program the OA unit to sample 3623 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns 3624 * for BXT. We don't allow such high sampling 3625 * frequencies by default unless root. 3626 */ 3627 3628 BUILD_BUG_ON(sizeof(oa_period) != 8); 3629 oa_period = oa_exponent_to_ns(perf, value); 3630 3631 /* This check is primarily to ensure that oa_period <= 3632 * UINT32_MAX (before passing to do_div which only 3633 * accepts a u32 denominator), but we can also skip 3634 * checking anything < 1Hz which implicitly can't be 3635 * limited via an integer oa_max_sample_rate. 3636 */ 3637 if (oa_period <= NSEC_PER_SEC) { 3638 u64 tmp = NSEC_PER_SEC; 3639 do_div(tmp, oa_period); 3640 oa_freq_hz = tmp; 3641 } else 3642 oa_freq_hz = 0; 3643 3644 if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) { 3645 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n", 3646 i915_oa_max_sample_rate); 3647 return -EACCES; 3648 } 3649 3650 props->oa_periodic = true; 3651 props->oa_period_exponent = value; 3652 break; 3653 case DRM_I915_PERF_PROP_HOLD_PREEMPTION: 3654 props->hold_preemption = !!value; 3655 break; 3656 case DRM_I915_PERF_PROP_GLOBAL_SSEU: { 3657 struct drm_i915_gem_context_param_sseu user_sseu; 3658 3659 if (copy_from_user(&user_sseu, 3660 u64_to_user_ptr(value), 3661 sizeof(user_sseu))) { 3662 DRM_DEBUG("Unable to copy global sseu parameter\n"); 3663 return -EFAULT; 3664 } 3665 3666 ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); 3667 if (ret) { 3668 DRM_DEBUG("Invalid SSEU configuration\n"); 3669 return ret; 3670 } 3671 props->has_sseu = true; 3672 break; 3673 } 3674 case DRM_I915_PERF_PROP_POLL_OA_PERIOD: 3675 if (value < 100000 /* 100us */) { 3676 DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n", 3677 value); 3678 return -EINVAL; 3679 } 3680 props->poll_oa_period = value; 3681 break; 3682 case DRM_I915_PERF_PROP_MAX: 3683 MISSING_CASE(id); 3684 return -EINVAL; 3685 } 3686 3687 uprop += 2; 3688 } 3689 3690 return 0; 3691} 3692 3693/** 3694 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD 3695 * @dev: drm device 3696 * @data: ioctl data copied from userspace (unvalidated) 3697 * @file: drm file 3698 * 3699 * Validates the stream open parameters given by userspace including flags 3700 * and an array of u64 key, value pair properties. 3701 * 3702 * Very little is assumed up front about the nature of the stream being 3703 * opened (for instance we don't assume it's for periodic OA unit metrics). An 3704 * i915-perf stream is expected to be a suitable interface for other forms of 3705 * buffered data written by the GPU besides periodic OA metrics. 3706 * 3707 * Note we copy the properties from userspace outside of the i915 perf 3708 * mutex to avoid an awkward lockdep with mmap_lock. 3709 * 3710 * Most of the implementation details are handled by 3711 * i915_perf_open_ioctl_locked() after taking the &perf->lock 3712 * mutex for serializing with any non-file-operation driver hooks. 3713 * 3714 * Return: A newly opened i915 Perf stream file descriptor or negative 3715 * error code on failure. 3716 */ 3717int i915_perf_open_ioctl(struct drm_device *dev, void *data, 3718 struct drm_file *file) 3719{ 3720 struct i915_perf *perf = &to_i915(dev)->perf; 3721 struct drm_i915_perf_open_param *param = data; 3722 struct perf_open_properties props; 3723 u32 known_open_flags; 3724 int ret; 3725 3726 if (!perf->i915) { 3727 DRM_DEBUG("i915 perf interface not available for this system\n"); 3728 return -ENOTSUPP; 3729 } 3730 3731 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | 3732 I915_PERF_FLAG_FD_NONBLOCK | 3733 I915_PERF_FLAG_DISABLED; 3734 if (param->flags & ~known_open_flags) { 3735 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); 3736 return -EINVAL; 3737 } 3738 3739 ret = read_properties_unlocked(perf, 3740 u64_to_user_ptr(param->properties_ptr), 3741 param->num_properties, 3742 &props); 3743 if (ret) 3744 return ret; 3745 3746 mutex_lock(&perf->lock); 3747 ret = i915_perf_open_ioctl_locked(perf, param, &props, file); 3748 mutex_unlock(&perf->lock); 3749 3750 return ret; 3751} 3752 3753/** 3754 * i915_perf_register - exposes i915-perf to userspace 3755 * @i915: i915 device instance 3756 * 3757 * In particular OA metric sets are advertised under a sysfs metrics/ 3758 * directory allowing userspace to enumerate valid IDs that can be 3759 * used to open an i915-perf stream. 3760 */ 3761void i915_perf_register(struct drm_i915_private *i915) 3762{ 3763 struct i915_perf *perf = &i915->perf; 3764 3765 if (!perf->i915) 3766 return; 3767 3768 /* To be sure we're synchronized with an attempted 3769 * i915_perf_open_ioctl(); considering that we register after 3770 * being exposed to userspace. 3771 */ 3772 mutex_lock(&perf->lock); 3773 3774 perf->metrics_kobj = 3775 kobject_create_and_add("metrics", 3776 &i915->drm.primary->kdev->kobj); 3777 3778 mutex_unlock(&perf->lock); 3779} 3780 3781/** 3782 * i915_perf_unregister - hide i915-perf from userspace 3783 * @i915: i915 device instance 3784 * 3785 * i915-perf state cleanup is split up into an 'unregister' and 3786 * 'deinit' phase where the interface is first hidden from 3787 * userspace by i915_perf_unregister() before cleaning up 3788 * remaining state in i915_perf_fini(). 3789 */ 3790void i915_perf_unregister(struct drm_i915_private *i915) 3791{ 3792 struct i915_perf *perf = &i915->perf; 3793 3794 if (!perf->metrics_kobj) 3795 return; 3796 3797 kobject_put(perf->metrics_kobj); 3798 perf->metrics_kobj = NULL; 3799} 3800 3801static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) 3802{ 3803 static const i915_reg_t flex_eu_regs[] = { 3804 EU_PERF_CNTL0, 3805 EU_PERF_CNTL1, 3806 EU_PERF_CNTL2, 3807 EU_PERF_CNTL3, 3808 EU_PERF_CNTL4, 3809 EU_PERF_CNTL5, 3810 EU_PERF_CNTL6, 3811 }; 3812 int i; 3813 3814 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { 3815 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) 3816 return true; 3817 } 3818 return false; 3819} 3820 3821#define ADDR_IN_RANGE(addr, start, end) \ 3822 ((addr) >= (start) && \ 3823 (addr) <= (end)) 3824 3825#define REG_IN_RANGE(addr, start, end) \ 3826 ((addr) >= i915_mmio_reg_offset(start) && \ 3827 (addr) <= i915_mmio_reg_offset(end)) 3828 3829#define REG_EQUAL(addr, mmio) \ 3830 ((addr) == i915_mmio_reg_offset(mmio)) 3831 3832static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) 3833{ 3834 return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || 3835 REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || 3836 REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); 3837} 3838 3839static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) 3840{ 3841 return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || 3842 REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || 3843 REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || 3844 REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); 3845} 3846 3847static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) 3848{ 3849 return gen7_is_valid_mux_addr(perf, addr) || 3850 REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || 3851 REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); 3852} 3853 3854static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) 3855{ 3856 return gen8_is_valid_mux_addr(perf, addr) || 3857 REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || 3858 REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); 3859} 3860 3861static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) 3862{ 3863 return gen7_is_valid_mux_addr(perf, addr) || 3864 ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || 3865 REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || 3866 REG_EQUAL(addr, HSW_MBVID2_MISR0); 3867} 3868 3869static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) 3870{ 3871 return gen7_is_valid_mux_addr(perf, addr) || 3872 ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); 3873} 3874 3875static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) 3876{ 3877 return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || 3878 REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || 3879 REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || 3880 REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || 3881 REG_EQUAL(addr, GEN12_OAA_DBG_REG) || 3882 REG_EQUAL(addr, GEN12_OAG_OA_PESS) || 3883 REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); 3884} 3885 3886static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) 3887{ 3888 return REG_EQUAL(addr, NOA_WRITE) || 3889 REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || 3890 REG_EQUAL(addr, GDT_CHICKEN_BITS) || 3891 REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || 3892 REG_EQUAL(addr, RPM_CONFIG0) || 3893 REG_EQUAL(addr, RPM_CONFIG1) || 3894 REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); 3895} 3896 3897static u32 mask_reg_value(u32 reg, u32 val) 3898{ 3899 /* HALF_SLICE_CHICKEN2 is programmed with a the 3900 * WaDisableSTUnitPowerOptimization workaround. Make sure the value 3901 * programmed by userspace doesn't change this. 3902 */ 3903 if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) 3904 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); 3905 3906 /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function 3907 * indicated by its name and a bunch of selection fields used by OA 3908 * configs. 3909 */ 3910 if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) 3911 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); 3912 3913 return val; 3914} 3915 3916static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, 3917 bool (*is_valid)(struct i915_perf *perf, u32 addr), 3918 u32 __user *regs, 3919 u32 n_regs) 3920{ 3921 struct i915_oa_reg *oa_regs; 3922 int err; 3923 u32 i; 3924 3925 if (!n_regs) 3926 return NULL; 3927 3928 /* No is_valid function means we're not allowing any register to be programmed. */ 3929 GEM_BUG_ON(!is_valid); 3930 if (!is_valid) 3931 return ERR_PTR(-EINVAL); 3932 3933 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); 3934 if (!oa_regs) 3935 return ERR_PTR(-ENOMEM); 3936 3937 for (i = 0; i < n_regs; i++) { 3938 u32 addr, value; 3939 3940 err = get_user(addr, regs); 3941 if (err) 3942 goto addr_err; 3943 3944 if (!is_valid(perf, addr)) { 3945 DRM_DEBUG("Invalid oa_reg address: %X\n", addr); 3946 err = -EINVAL; 3947 goto addr_err; 3948 } 3949 3950 err = get_user(value, regs + 1); 3951 if (err) 3952 goto addr_err; 3953 3954 oa_regs[i].addr = _MMIO(addr); 3955 oa_regs[i].value = mask_reg_value(addr, value); 3956 3957 regs += 2; 3958 } 3959 3960 return oa_regs; 3961 3962addr_err: 3963 kfree(oa_regs); 3964 return ERR_PTR(err); 3965} 3966 3967static ssize_t show_dynamic_id(struct kobject *kobj, 3968 struct kobj_attribute *attr, 3969 char *buf) 3970{ 3971 struct i915_oa_config *oa_config = 3972 container_of(attr, typeof(*oa_config), sysfs_metric_id); 3973 3974 return sprintf(buf, "%d\n", oa_config->id); 3975} 3976 3977static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf, 3978 struct i915_oa_config *oa_config) 3979{ 3980 sysfs_attr_init(&oa_config->sysfs_metric_id.attr); 3981 oa_config->sysfs_metric_id.attr.name = "id"; 3982 oa_config->sysfs_metric_id.attr.mode = S_IRUGO; 3983 oa_config->sysfs_metric_id.show = show_dynamic_id; 3984 oa_config->sysfs_metric_id.store = NULL; 3985 3986 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; 3987 oa_config->attrs[1] = NULL; 3988 3989 oa_config->sysfs_metric.name = oa_config->uuid; 3990 oa_config->sysfs_metric.attrs = oa_config->attrs; 3991 3992 return sysfs_create_group(perf->metrics_kobj, 3993 &oa_config->sysfs_metric); 3994} 3995 3996/** 3997 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config 3998 * @dev: drm device 3999 * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from 4000 * userspace (unvalidated) 4001 * @file: drm file 4002 * 4003 * Validates the submitted OA register to be saved into a new OA config that 4004 * can then be used for programming the OA unit and its NOA network. 4005 * 4006 * Returns: A new allocated config number to be used with the perf open ioctl 4007 * or a negative error code on failure. 4008 */ 4009int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, 4010 struct drm_file *file) 4011{ 4012 struct i915_perf *perf = &to_i915(dev)->perf; 4013 struct drm_i915_perf_oa_config *args = data; 4014 struct i915_oa_config *oa_config, *tmp; 4015 struct i915_oa_reg *regs; 4016 int err, id; 4017 4018 if (!perf->i915) { 4019 DRM_DEBUG("i915 perf interface not available for this system\n"); 4020 return -ENOTSUPP; 4021 } 4022 4023 if (!perf->metrics_kobj) { 4024 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 4025 return -EINVAL; 4026 } 4027 4028 if (i915_perf_stream_paranoid && !perfmon_capable()) { 4029 DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); 4030 return -EACCES; 4031 } 4032 4033 if ((!args->mux_regs_ptr || !args->n_mux_regs) && 4034 (!args->boolean_regs_ptr || !args->n_boolean_regs) && 4035 (!args->flex_regs_ptr || !args->n_flex_regs)) { 4036 DRM_DEBUG("No OA registers given\n"); 4037 return -EINVAL; 4038 } 4039 4040 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 4041 if (!oa_config) { 4042 DRM_DEBUG("Failed to allocate memory for the OA config\n"); 4043 return -ENOMEM; 4044 } 4045 4046 oa_config->perf = perf; 4047 kref_init(&oa_config->ref); 4048 4049 if (!uuid_is_valid(args->uuid)) { 4050 DRM_DEBUG("Invalid uuid format for OA config\n"); 4051 err = -EINVAL; 4052 goto reg_err; 4053 } 4054 4055 /* Last character in oa_config->uuid will be 0 because oa_config is 4056 * kzalloc. 4057 */ 4058 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); 4059 4060 oa_config->mux_regs_len = args->n_mux_regs; 4061 regs = alloc_oa_regs(perf, 4062 perf->ops.is_valid_mux_reg, 4063 u64_to_user_ptr(args->mux_regs_ptr), 4064 args->n_mux_regs); 4065 4066 if (IS_ERR(regs)) { 4067 DRM_DEBUG("Failed to create OA config for mux_regs\n"); 4068 err = PTR_ERR(regs); 4069 goto reg_err; 4070 } 4071 oa_config->mux_regs = regs; 4072 4073 oa_config->b_counter_regs_len = args->n_boolean_regs; 4074 regs = alloc_oa_regs(perf, 4075 perf->ops.is_valid_b_counter_reg, 4076 u64_to_user_ptr(args->boolean_regs_ptr), 4077 args->n_boolean_regs); 4078 4079 if (IS_ERR(regs)) { 4080 DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); 4081 err = PTR_ERR(regs); 4082 goto reg_err; 4083 } 4084 oa_config->b_counter_regs = regs; 4085 4086 if (INTEL_GEN(perf->i915) < 8) { 4087 if (args->n_flex_regs != 0) { 4088 err = -EINVAL; 4089 goto reg_err; 4090 } 4091 } else { 4092 oa_config->flex_regs_len = args->n_flex_regs; 4093 regs = alloc_oa_regs(perf, 4094 perf->ops.is_valid_flex_reg, 4095 u64_to_user_ptr(args->flex_regs_ptr), 4096 args->n_flex_regs); 4097 4098 if (IS_ERR(regs)) { 4099 DRM_DEBUG("Failed to create OA config for flex_regs\n"); 4100 err = PTR_ERR(regs); 4101 goto reg_err; 4102 } 4103 oa_config->flex_regs = regs; 4104 } 4105 4106 err = mutex_lock_interruptible(&perf->metrics_lock); 4107 if (err) 4108 goto reg_err; 4109 4110 /* We shouldn't have too many configs, so this iteration shouldn't be 4111 * too costly. 4112 */ 4113 idr_for_each_entry(&perf->metrics_idr, tmp, id) { 4114 if (!strcmp(tmp->uuid, oa_config->uuid)) { 4115 DRM_DEBUG("OA config already exists with this uuid\n"); 4116 err = -EADDRINUSE; 4117 goto sysfs_err; 4118 } 4119 } 4120 4121 err = create_dynamic_oa_sysfs_entry(perf, oa_config); 4122 if (err) { 4123 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 4124 goto sysfs_err; 4125 } 4126 4127 /* Config id 0 is invalid, id 1 for kernel stored test config. */ 4128 oa_config->id = idr_alloc(&perf->metrics_idr, 4129 oa_config, 2, 4130 0, GFP_KERNEL); 4131 if (oa_config->id < 0) { 4132 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 4133 err = oa_config->id; 4134 goto sysfs_err; 4135 } 4136 4137 mutex_unlock(&perf->metrics_lock); 4138 4139 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); 4140 4141 return oa_config->id; 4142 4143sysfs_err: 4144 mutex_unlock(&perf->metrics_lock); 4145reg_err: 4146 i915_oa_config_put(oa_config); 4147 DRM_DEBUG("Failed to add new OA config\n"); 4148 return err; 4149} 4150 4151/** 4152 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config 4153 * @dev: drm device 4154 * @data: ioctl data (pointer to u64 integer) copied from userspace 4155 * @file: drm file 4156 * 4157 * Configs can be removed while being used, the will stop appearing in sysfs 4158 * and their content will be freed when the stream using the config is closed. 4159 * 4160 * Returns: 0 on success or a negative error code on failure. 4161 */ 4162int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, 4163 struct drm_file *file) 4164{ 4165 struct i915_perf *perf = &to_i915(dev)->perf; 4166 u64 *arg = data; 4167 struct i915_oa_config *oa_config; 4168 int ret; 4169 4170 if (!perf->i915) { 4171 DRM_DEBUG("i915 perf interface not available for this system\n"); 4172 return -ENOTSUPP; 4173 } 4174 4175 if (i915_perf_stream_paranoid && !perfmon_capable()) { 4176 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); 4177 return -EACCES; 4178 } 4179 4180 ret = mutex_lock_interruptible(&perf->metrics_lock); 4181 if (ret) 4182 return ret; 4183 4184 oa_config = idr_find(&perf->metrics_idr, *arg); 4185 if (!oa_config) { 4186 DRM_DEBUG("Failed to remove unknown OA config\n"); 4187 ret = -ENOENT; 4188 goto err_unlock; 4189 } 4190 4191 GEM_BUG_ON(*arg != oa_config->id); 4192 4193 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); 4194 4195 idr_remove(&perf->metrics_idr, *arg); 4196 4197 mutex_unlock(&perf->metrics_lock); 4198 4199 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 4200 4201 i915_oa_config_put(oa_config); 4202 4203 return 0; 4204 4205err_unlock: 4206 mutex_unlock(&perf->metrics_lock); 4207 return ret; 4208} 4209 4210static struct ctl_table oa_table[] = { 4211 { 4212 .procname = "perf_stream_paranoid", 4213 .data = &i915_perf_stream_paranoid, 4214 .maxlen = sizeof(i915_perf_stream_paranoid), 4215 .mode = 0644, 4216 .proc_handler = proc_dointvec_minmax, 4217 .extra1 = SYSCTL_ZERO, 4218 .extra2 = SYSCTL_ONE, 4219 }, 4220 { 4221 .procname = "oa_max_sample_rate", 4222 .data = &i915_oa_max_sample_rate, 4223 .maxlen = sizeof(i915_oa_max_sample_rate), 4224 .mode = 0644, 4225 .proc_handler = proc_dointvec_minmax, 4226 .extra1 = SYSCTL_ZERO, 4227 .extra2 = &oa_sample_rate_hard_limit, 4228 }, 4229 {} 4230}; 4231 4232static struct ctl_table i915_root[] = { 4233 { 4234 .procname = "i915", 4235 .maxlen = 0, 4236 .mode = 0555, 4237 .child = oa_table, 4238 }, 4239 {} 4240}; 4241 4242static struct ctl_table dev_root[] = { 4243 { 4244 .procname = "dev", 4245 .maxlen = 0, 4246 .mode = 0555, 4247 .child = i915_root, 4248 }, 4249 {} 4250}; 4251 4252/** 4253 * i915_perf_init - initialize i915-perf state on module bind 4254 * @i915: i915 device instance 4255 * 4256 * Initializes i915-perf state without exposing anything to userspace. 4257 * 4258 * Note: i915-perf initialization is split into an 'init' and 'register' 4259 * phase with the i915_perf_register() exposing state to userspace. 4260 */ 4261void i915_perf_init(struct drm_i915_private *i915) 4262{ 4263 struct i915_perf *perf = &i915->perf; 4264 4265 /* XXX const struct i915_perf_ops! */ 4266 4267 if (IS_HASWELL(i915)) { 4268 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; 4269 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; 4270 perf->ops.is_valid_flex_reg = NULL; 4271 perf->ops.enable_metric_set = hsw_enable_metric_set; 4272 perf->ops.disable_metric_set = hsw_disable_metric_set; 4273 perf->ops.oa_enable = gen7_oa_enable; 4274 perf->ops.oa_disable = gen7_oa_disable; 4275 perf->ops.read = gen7_oa_read; 4276 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; 4277 4278 perf->oa_formats = hsw_oa_formats; 4279 } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) { 4280 /* Note: that although we could theoretically also support the 4281 * legacy ringbuffer mode on BDW (and earlier iterations of 4282 * this driver, before upstreaming did this) it didn't seem 4283 * worth the complexity to maintain now that BDW+ enable 4284 * execlist mode by default. 4285 */ 4286 perf->ops.read = gen8_oa_read; 4287 4288 if (IS_GEN_RANGE(i915, 8, 9)) { 4289 perf->oa_formats = gen8_plus_oa_formats; 4290 4291 perf->ops.is_valid_b_counter_reg = 4292 gen7_is_valid_b_counter_addr; 4293 perf->ops.is_valid_mux_reg = 4294 gen8_is_valid_mux_addr; 4295 perf->ops.is_valid_flex_reg = 4296 gen8_is_valid_flex_addr; 4297 4298 if (IS_CHERRYVIEW(i915)) { 4299 perf->ops.is_valid_mux_reg = 4300 chv_is_valid_mux_addr; 4301 } 4302 4303 perf->ops.oa_enable = gen8_oa_enable; 4304 perf->ops.oa_disable = gen8_oa_disable; 4305 perf->ops.enable_metric_set = gen8_enable_metric_set; 4306 perf->ops.disable_metric_set = gen8_disable_metric_set; 4307 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; 4308 4309 if (IS_GEN(i915, 8)) { 4310 perf->ctx_oactxctrl_offset = 0x120; 4311 perf->ctx_flexeu0_offset = 0x2ce; 4312 4313 perf->gen8_valid_ctx_bit = BIT(25); 4314 } else { 4315 perf->ctx_oactxctrl_offset = 0x128; 4316 perf->ctx_flexeu0_offset = 0x3de; 4317 4318 perf->gen8_valid_ctx_bit = BIT(16); 4319 } 4320 } else if (IS_GEN_RANGE(i915, 10, 11)) { 4321 perf->oa_formats = gen8_plus_oa_formats; 4322 4323 perf->ops.is_valid_b_counter_reg = 4324 gen7_is_valid_b_counter_addr; 4325 perf->ops.is_valid_mux_reg = 4326 gen10_is_valid_mux_addr; 4327 perf->ops.is_valid_flex_reg = 4328 gen8_is_valid_flex_addr; 4329 4330 perf->ops.oa_enable = gen8_oa_enable; 4331 perf->ops.oa_disable = gen8_oa_disable; 4332 perf->ops.enable_metric_set = gen8_enable_metric_set; 4333 perf->ops.disable_metric_set = gen10_disable_metric_set; 4334 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; 4335 4336 if (IS_GEN(i915, 10)) { 4337 perf->ctx_oactxctrl_offset = 0x128; 4338 perf->ctx_flexeu0_offset = 0x3de; 4339 } else { 4340 perf->ctx_oactxctrl_offset = 0x124; 4341 perf->ctx_flexeu0_offset = 0x78e; 4342 } 4343 perf->gen8_valid_ctx_bit = BIT(16); 4344 } else if (IS_GEN(i915, 12)) { 4345 perf->oa_formats = gen12_oa_formats; 4346 4347 perf->ops.is_valid_b_counter_reg = 4348 gen12_is_valid_b_counter_addr; 4349 perf->ops.is_valid_mux_reg = 4350 gen12_is_valid_mux_addr; 4351 perf->ops.is_valid_flex_reg = 4352 gen8_is_valid_flex_addr; 4353 4354 perf->ops.oa_enable = gen12_oa_enable; 4355 perf->ops.oa_disable = gen12_oa_disable; 4356 perf->ops.enable_metric_set = gen12_enable_metric_set; 4357 perf->ops.disable_metric_set = gen12_disable_metric_set; 4358 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; 4359 4360 perf->ctx_flexeu0_offset = 0; 4361 perf->ctx_oactxctrl_offset = 0x144; 4362 } 4363 } 4364 4365 if (perf->ops.enable_metric_set) { 4366 mutex_init(&perf->lock); 4367 4368 oa_sample_rate_hard_limit = 4369 RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2; 4370 4371 mutex_init(&perf->metrics_lock); 4372 idr_init(&perf->metrics_idr); 4373 4374 /* We set up some ratelimit state to potentially throttle any 4375 * _NOTES about spurious, invalid OA reports which we don't 4376 * forward to userspace. 4377 * 4378 * We print a _NOTE about any throttling when closing the 4379 * stream instead of waiting until driver _fini which no one 4380 * would ever see. 4381 * 4382 * Using the same limiting factors as printk_ratelimit() 4383 */ 4384 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); 4385 /* Since we use a DRM_NOTE for spurious reports it would be 4386 * inconsistent to let __ratelimit() automatically print a 4387 * warning for throttling. 4388 */ 4389 ratelimit_set_flags(&perf->spurious_report_rs, 4390 RATELIMIT_MSG_ON_RELEASE); 4391 4392 ratelimit_state_init(&perf->tail_pointer_race, 4393 5 * HZ, 10); 4394 ratelimit_set_flags(&perf->tail_pointer_race, 4395 RATELIMIT_MSG_ON_RELEASE); 4396 4397 atomic64_set(&perf->noa_programming_delay, 4398 500 * 1000 /* 500us */); 4399 4400 perf->i915 = i915; 4401 } 4402} 4403 4404static int destroy_config(int id, void *p, void *data) 4405{ 4406 i915_oa_config_put(p); 4407 return 0; 4408} 4409 4410void i915_perf_sysctl_register(void) 4411{ 4412 sysctl_header = register_sysctl_table(dev_root); 4413} 4414 4415void i915_perf_sysctl_unregister(void) 4416{ 4417 unregister_sysctl_table(sysctl_header); 4418} 4419 4420/** 4421 * i915_perf_fini - Counter part to i915_perf_init() 4422 * @i915: i915 device instance 4423 */ 4424void i915_perf_fini(struct drm_i915_private *i915) 4425{ 4426 struct i915_perf *perf = &i915->perf; 4427 4428 if (!perf->i915) 4429 return; 4430 4431 idr_for_each(&perf->metrics_idr, destroy_config, perf); 4432 idr_destroy(&perf->metrics_idr); 4433 4434 memset(&perf->ops, 0, sizeof(perf->ops)); 4435 perf->i915 = NULL; 4436} 4437 4438/** 4439 * i915_perf_ioctl_version - Version of the i915-perf subsystem 4440 * 4441 * This version number is used by userspace to detect available features. 4442 */ 4443int i915_perf_ioctl_version(void) 4444{ 4445 /* 4446 * 1: Initial version 4447 * I915_PERF_IOCTL_ENABLE 4448 * I915_PERF_IOCTL_DISABLE 4449 * 4450 * 2: Added runtime modification of OA config. 4451 * I915_PERF_IOCTL_CONFIG 4452 * 4453 * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold 4454 * preemption on a particular context so that performance data is 4455 * accessible from a delta of MI_RPC reports without looking at the 4456 * OA buffer. 4457 * 4458 * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can 4459 * be run for the duration of the performance recording based on 4460 * their SSEU configuration. 4461 * 4462 * 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the 4463 * interval for the hrtimer used to check for OA data. 4464 */ 4465 return 5; 4466} 4467 4468#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4469#include "selftests/i915_perf.c" 4470#endif 4471