Lines Matching refs:perf
35 * This i915 perf interface enables userspace to configure and open a file
56 * i915 perf file descriptors represent a "stream" instead of an "event"; where
57 * a perf event primarily corresponds to a single 64bit value, while a stream
61 * of related counters. Samples for an i915 perf stream capturing OA metrics
68 * i915 perf stream configurations are provided as an array of u64 (key,value)
72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
85 * The first prototype of this driver was based on the core perf
87 * perf, we found we were breaking or working around too many assumptions baked
88 * into perf's currently cpu centric design.
90 * In the end we didn't see a clear benefit to making perf's implementation and
92 * wouldn't be able to use any existing perf based userspace tools.
95 * how userspace will sometimes need to combine i915 perf OA metrics with
99 * a standard vendor/architecture agnostic interface by not using perf.
102 * For posterity, in case we might re-visit trying to adapt core perf to be
106 * - The perf based OA PMU driver broke some significant design assumptions:
108 * Existing perf pmus are used for profiling work on a cpu and we were
111 * registers) to fit with perf's current design, and adding _DEVICE records
118 * buffer to perf's buffer, those bursts of sample writes looked to perf like
128 * set while perf generally expects counter configurations to be orthogonal.
133 * GPU context to filter metrics on). We avoided using perf's grouping
134 * feature and forwarded OA reports to userspace via perf's 'raw' sample
142 * - As a side note on perf's grouping feature; there was also some concern
156 * event scheduling is a central design idea within perf for allowing
178 * - It felt like our perf based PMU was making some technical compromises
179 * just for the sake of using perf:
185 * cpu id, perf ensures pmu methods will be invoked via an inter process
187 * perf events for a specific cpu. This was workable but it meant the
293 * The default threshold of 100000Hz is based on perf's similar
394 i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
399 oa_config = idr_find(&perf->metrics_idr, metrics_set);
476 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
526 __ratelimit(&stream->perf->tail_pointer_race))
546 * @stream: An i915-perf stream opened for OA metrics
580 * @stream: An i915-perf stream opened for OA metrics
626 * @stream: An i915-perf stream opened for OA metrics
726 (IS_GEN(stream->perf->i915, 12) ?
730 if (__ratelimit(&stream->perf->spurious_report_rs))
745 if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
746 INTEL_GEN(stream->perf->i915) <= 11)
780 if (!stream->perf->exclusive_stream->ctx ||
789 if (stream->perf->exclusive_stream->ctx &&
813 oaheadptr = IS_GEN(stream->perf->i915, 12) ?
835 * @stream: An i915-perf stream opened for OA metrics
866 oastatus_reg = IS_GEN(stream->perf->i915, 12) ?
894 stream->perf->ops.oa_disable(stream);
895 stream->perf->ops.oa_enable(stream);
923 * @stream: An i915-perf stream opened for OA metrics
1015 if (__ratelimit(&stream->perf->spurious_report_rs))
1052 * @stream: An i915-perf stream opened for OA metrics
1085 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1116 stream->perf->ops.oa_disable(stream);
1117 stream->perf->ops.oa_enable(stream);
1127 stream->perf->gen7_latched_oastatus1 |=
1136 * @stream: An i915-perf stream opened for OA metrics
1160 * @stream: An i915-perf stream opened for OA metrics
1161 * @file: An i915 perf stream file
1164 * For handling userspace polling on an i915 perf stream opened for OA metrics,
1177 * @stream: An i915-perf stream opened for OA metrics
1192 return stream->perf->ops.read(stream, buf, count, offset);
1238 * @stream: An i915-perf stream opened for OA metrics
1313 drm_dbg(&stream->perf->i915->drm,
1323 * @stream: An i915-perf stream opened for OA metrics
1369 struct i915_perf *perf = stream->perf;
1371 BUG_ON(stream != perf->exclusive_stream);
1379 WRITE_ONCE(perf->exclusive_stream, NULL);
1380 perf->ops.disable_metric_set(stream);
1393 if (perf->spurious_report_rs.missed) {
1395 perf->spurious_report_rs.missed);
1429 stream->perf->gen7_latched_oastatus1 = 0;
1556 struct drm_i915_private *i915 = stream->perf->i915;
1567 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
1613 if (INTEL_GEN(stream->perf->i915) >= 8)
1629 struct drm_i915_private *i915 = stream->perf->i915;
1633 i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay));
1657 * needs to be fixed during the lifetime of the i915/perf stream.
1858 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
1881 *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
2083 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2084 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2250 stream->perf->ctx_oactxctrl_offset + 1,
2320 struct drm_i915_private *i915 = stream->perf->i915;
2325 lockdep_assert_held(&stream->perf->lock);
2406 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2415 stream->perf->ctx_oactxctrl_offset + 1,
2472 if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) {
2663 * @stream: An i915 perf stream opened for OA metrics
2674 stream->perf->ops.oa_enable(stream);
2690 drm_err(&stream->perf->i915->drm,
2702 drm_err(&stream->perf->i915->drm,
2715 drm_err(&stream->perf->i915->drm,
2723 drm_err(&stream->perf->i915->drm,
2729 * @stream: An i915 perf stream opened for OA metrics
2737 stream->perf->ops.oa_disable(stream);
2761 err = stream->perf->ops.enable_metric_set(stream, active);
2803 * @stream: An i915 perf stream
2823 struct drm_i915_private *i915 = stream->perf->i915;
2824 struct i915_perf *perf = stream->perf;
2838 if (!perf->metrics_kobj) {
2844 (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
2849 if (!perf->ops.enable_metric_set) {
2859 if (perf->exclusive_stream) {
2874 format_size = perf->oa_formats[props->oa_format].size;
2886 perf->oa_formats[props->oa_format].format;
2906 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
2934 perf->sseu = props->sseu;
2935 WRITE_ONCE(perf->exclusive_stream, stream);
2955 WRITE_ONCE(perf->exclusive_stream, NULL);
2956 perf->ops.disable_metric_set(stream);
2984 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
2985 stream = READ_ONCE(engine->i915->perf.exclusive_stream);
2986 if (stream && INTEL_GEN(stream->perf->i915) < 12)
2991 * i915_perf_read - handles read() FOP for i915 perf stream FDs
2992 * @file: An i915 perf stream file
3014 struct i915_perf *perf = stream->perf;
3038 mutex_lock(&perf->lock);
3040 mutex_unlock(&perf->lock);
3043 mutex_lock(&perf->lock);
3045 mutex_unlock(&perf->lock);
3084 * @stream: An i915 perf stream
3085 * @file: An i915 perf stream file
3088 * For handling userspace polling on an i915 perf stream, this calls through to
3092 * Note: The &perf->lock mutex has been taken to serialize
3119 * @file: An i915 perf stream file
3122 * For handling userspace polling on an i915 perf stream, this ensures
3133 struct i915_perf *perf = stream->perf;
3136 mutex_lock(&perf->lock);
3138 mutex_unlock(&perf->lock);
3145 * @stream: A disabled i915 perf stream
3170 * @stream: An enabled i915 perf stream
3203 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3232 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3233 * @stream: An i915 perf stream
3237 * Note: The &perf->lock mutex has been taken to serialize
3262 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3263 * @file: An i915 perf stream file
3277 struct i915_perf *perf = stream->perf;
3280 mutex_lock(&perf->lock);
3282 mutex_unlock(&perf->lock);
3288 * i915_perf_destroy_locked - destroy an i915 perf stream
3289 * @stream: An i915 perf stream
3291 * Frees all resources associated with the given i915 perf @stream, disabling
3294 * Note: The &perf->lock mutex has been taken to serialize
3314 * @file: An i915 perf stream file
3316 * Cleans up any resources associated with an open i915 perf stream file.
3325 struct i915_perf *perf = stream->perf;
3327 mutex_lock(&perf->lock);
3329 mutex_unlock(&perf->lock);
3331 /* Release the reference the perf stream kept on the driver. */
3332 drm_dev_put(&perf->i915->drm);
3354 * @perf: i915 perf instance
3362 * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
3377 i915_perf_open_ioctl_locked(struct i915_perf *perf,
3395 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
3421 if (IS_HASWELL(perf->i915) && specific_ctx)
3423 else if (IS_GEN(perf->i915, 12) && specific_ctx &&
3444 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3451 DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
3462 stream->perf = perf;
3496 drm_dev_get(&perf->i915->drm);
3512 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
3514 return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent);
3519 * @perf: i915 perf instance
3532 static int read_properties_unlocked(struct i915_perf *perf,
3545 DRM_DEBUG("No i915 perf properties given\n");
3549 /* At the moment we only support using i915-perf on the RCS. */
3550 props->engine = intel_engine_lookup_user(perf->i915,
3565 DRM_DEBUG("More i915 perf properties specified than exist\n");
3582 DRM_DEBUG("Unknown i915 perf property ID\n");
3608 if (!perf->oa_formats[value].size) {
3629 oa_period = oa_exponent_to_ns(perf, value);
3704 * i915-perf stream is expected to be a suitable interface for other forms of
3707 * Note we copy the properties from userspace outside of the i915 perf
3711 * i915_perf_open_ioctl_locked() after taking the &perf->lock
3720 struct i915_perf *perf = &to_i915(dev)->perf;
3726 if (!perf->i915) {
3727 DRM_DEBUG("i915 perf interface not available for this system\n");
3739 ret = read_properties_unlocked(perf,
3746 mutex_lock(&perf->lock);
3747 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
3748 mutex_unlock(&perf->lock);
3754 * i915_perf_register - exposes i915-perf to userspace
3759 * used to open an i915-perf stream.
3763 struct i915_perf *perf = &i915->perf;
3765 if (!perf->i915)
3772 mutex_lock(&perf->lock);
3774 perf->metrics_kobj =
3778 mutex_unlock(&perf->lock);
3782 * i915_perf_unregister - hide i915-perf from userspace
3785 * i915-perf state cleanup is split up into an 'unregister' and
3792 struct i915_perf *perf = &i915->perf;
3794 if (!perf->metrics_kobj)
3797 kobject_put(perf->metrics_kobj);
3798 perf->metrics_kobj = NULL;
3801 static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
3832 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3839 static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3847 static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3849 return gen7_is_valid_mux_addr(perf, addr) ||
3854 static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3856 return gen8_is_valid_mux_addr(perf, addr) ||
3861 static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3863 return gen7_is_valid_mux_addr(perf, addr) ||
3869 static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3871 return gen7_is_valid_mux_addr(perf, addr) ||
3875 static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3886 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3916 static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
3917 bool (*is_valid)(struct i915_perf *perf, u32 addr),
3944 if (!is_valid(perf, addr)) {
3977 static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
3992 return sysfs_create_group(perf->metrics_kobj,
4006 * Returns: A new allocated config number to be used with the perf open ioctl
4012 struct i915_perf *perf = &to_i915(dev)->perf;
4018 if (!perf->i915) {
4019 DRM_DEBUG("i915 perf interface not available for this system\n");
4023 if (!perf->metrics_kobj) {
4046 oa_config->perf = perf;
4061 regs = alloc_oa_regs(perf,
4062 perf->ops.is_valid_mux_reg,
4074 regs = alloc_oa_regs(perf,
4075 perf->ops.is_valid_b_counter_reg,
4086 if (INTEL_GEN(perf->i915) < 8) {
4093 regs = alloc_oa_regs(perf,
4094 perf->ops.is_valid_flex_reg,
4106 err = mutex_lock_interruptible(&perf->metrics_lock);
4113 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
4121 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4128 oa_config->id = idr_alloc(&perf->metrics_idr,
4137 mutex_unlock(&perf->metrics_lock);
4144 mutex_unlock(&perf->metrics_lock);
4165 struct i915_perf *perf = &to_i915(dev)->perf;
4170 if (!perf->i915) {
4171 DRM_DEBUG("i915 perf interface not available for this system\n");
4180 ret = mutex_lock_interruptible(&perf->metrics_lock);
4184 oa_config = idr_find(&perf->metrics_idr, *arg);
4193 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4195 idr_remove(&perf->metrics_idr, *arg);
4197 mutex_unlock(&perf->metrics_lock);
4206 mutex_unlock(&perf->metrics_lock);
4253 * i915_perf_init - initialize i915-perf state on module bind
4256 * Initializes i915-perf state without exposing anything to userspace.
4258 * Note: i915-perf initialization is split into an 'init' and 'register'
4263 struct i915_perf *perf = &i915->perf;
4268 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4269 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4270 perf->ops.is_valid_flex_reg = NULL;
4271 perf->ops.enable_metric_set = hsw_enable_metric_set;
4272 perf->ops.disable_metric_set = hsw_disable_metric_set;
4273 perf->ops.oa_enable = gen7_oa_enable;
4274 perf->ops.oa_disable = gen7_oa_disable;
4275 perf->ops.read = gen7_oa_read;
4276 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
4278 perf->oa_formats = hsw_oa_formats;
4286 perf->ops.read = gen8_oa_read;
4289 perf->oa_formats = gen8_plus_oa_formats;
4291 perf->ops.is_valid_b_counter_reg =
4293 perf->ops.is_valid_mux_reg =
4295 perf->ops.is_valid_flex_reg =
4299 perf->ops.is_valid_mux_reg =
4303 perf->ops.oa_enable = gen8_oa_enable;
4304 perf->ops.oa_disable = gen8_oa_disable;
4305 perf->ops.enable_metric_set = gen8_enable_metric_set;
4306 perf->ops.disable_metric_set = gen8_disable_metric_set;
4307 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4310 perf->ctx_oactxctrl_offset = 0x120;
4311 perf->ctx_flexeu0_offset = 0x2ce;
4313 perf->gen8_valid_ctx_bit = BIT(25);
4315 perf->ctx_oactxctrl_offset = 0x128;
4316 perf->ctx_flexeu0_offset = 0x3de;
4318 perf->gen8_valid_ctx_bit = BIT(16);
4321 perf->oa_formats = gen8_plus_oa_formats;
4323 perf->ops.is_valid_b_counter_reg =
4325 perf->ops.is_valid_mux_reg =
4327 perf->ops.is_valid_flex_reg =
4330 perf->ops.oa_enable = gen8_oa_enable;
4331 perf->ops.oa_disable = gen8_oa_disable;
4332 perf->ops.enable_metric_set = gen8_enable_metric_set;
4333 perf->ops.disable_metric_set = gen10_disable_metric_set;
4334 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4337 perf->ctx_oactxctrl_offset = 0x128;
4338 perf->ctx_flexeu0_offset = 0x3de;
4340 perf->ctx_oactxctrl_offset = 0x124;
4341 perf->ctx_flexeu0_offset = 0x78e;
4343 perf->gen8_valid_ctx_bit = BIT(16);
4345 perf->oa_formats = gen12_oa_formats;
4347 perf->ops.is_valid_b_counter_reg =
4349 perf->ops.is_valid_mux_reg =
4351 perf->ops.is_valid_flex_reg =
4354 perf->ops.oa_enable = gen12_oa_enable;
4355 perf->ops.oa_disable = gen12_oa_disable;
4356 perf->ops.enable_metric_set = gen12_enable_metric_set;
4357 perf->ops.disable_metric_set = gen12_disable_metric_set;
4358 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
4360 perf->ctx_flexeu0_offset = 0;
4361 perf->ctx_oactxctrl_offset = 0x144;
4365 if (perf->ops.enable_metric_set) {
4366 mutex_init(&perf->lock);
4371 mutex_init(&perf->metrics_lock);
4372 idr_init(&perf->metrics_idr);
4384 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
4389 ratelimit_set_flags(&perf->spurious_report_rs,
4392 ratelimit_state_init(&perf->tail_pointer_race,
4394 ratelimit_set_flags(&perf->tail_pointer_race,
4397 atomic64_set(&perf->noa_programming_delay,
4400 perf->i915 = i915;
4426 struct i915_perf *perf = &i915->perf;
4428 if (!perf->i915)
4431 idr_for_each(&perf->metrics_idr, destroy_config, perf);
4432 idr_destroy(&perf->metrics_idr);
4434 memset(&perf->ops, 0, sizeof(perf->ops));
4435 perf->i915 = NULL;
4439 * i915_perf_ioctl_version - Version of the i915-perf subsystem