Lines Matching refs:perf
35 * This i915 perf interface enables userspace to configure and open a file
56 * i915 perf file descriptors represent a "stream" instead of an "event"; where
57 * a perf event primarily corresponds to a single 64bit value, while a stream
61 * of related counters. Samples for an i915 perf stream capturing OA metrics
68 * i915 perf stream configurations are provided as an array of u64 (key,value)
72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
85 * The first prototype of this driver was based on the core perf
87 * perf, we found we were breaking or working around too many assumptions baked
88 * into perf's currently cpu centric design.
90 * In the end we didn't see a clear benefit to making perf's implementation and
92 * wouldn't be able to use any existing perf based userspace tools.
95 * how userspace will sometimes need to combine i915 perf OA metrics with
99 * a standard vendor/architecture agnostic interface by not using perf.
102 * For posterity, in case we might re-visit trying to adapt core perf to be
106 * - The perf based OA PMU driver broke some significant design assumptions:
108 * Existing perf pmus are used for profiling work on a cpu and we were
111 * registers) to fit with perf's current design, and adding _DEVICE records
118 * buffer to perf's buffer, those bursts of sample writes looked to perf like
128 * set while perf generally expects counter configurations to be orthogonal.
133 * GPU context to filter metrics on). We avoided using perf's grouping
134 * feature and forwarded OA reports to userspace via perf's 'raw' sample
142 * - As a side note on perf's grouping feature; there was also some concern
156 * event scheduling is a central design idea within perf for allowing
178 * - It felt like our perf based PMU was making some technical compromises
179 * just for the sake of using perf:
185 * cpu id, perf ensures pmu methods will be invoked via an inter process
187 * perf events for a specific cpu. This was workable but it meant the
308 * The default threshold of 100000Hz is based on perf's similar
409 i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
414 oa_config = idr_find(&perf->metrics_idr, metrics_set);
469 (GRAPHICS_VER(stream->perf->i915) == 12 ?
485 stream->perf->gen8_valid_ctx_bit);
557 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
600 __ratelimit(&stream->perf->tail_pointer_race))
617 * @stream: An i915-perf stream opened for OA metrics
651 * @stream: An i915-perf stream opened for OA metrics
712 * @stream: An i915-perf stream opened for OA metrics
916 oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
939 * @stream: An i915-perf stream opened for OA metrics
970 oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
996 drm_dbg(&stream->perf->i915->drm,
1000 stream->perf->ops.oa_disable(stream);
1001 stream->perf->ops.oa_enable(stream);
1030 * @stream: An i915-perf stream opened for OA metrics
1121 if (__ratelimit(&stream->perf->spurious_report_rs))
1159 * @stream: An i915-perf stream opened for OA metrics
1192 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1220 drm_dbg(&stream->perf->i915->drm,
1224 stream->perf->ops.oa_disable(stream);
1225 stream->perf->ops.oa_enable(stream);
1235 stream->perf->gen7_latched_oastatus1 |=
1244 * @stream: An i915-perf stream opened for OA metrics
1268 * @stream: An i915-perf stream opened for OA metrics
1269 * @file: An i915 perf stream file
1272 * For handling userspace polling on an i915 perf stream opened for OA metrics,
1285 * @stream: An i915-perf stream opened for OA metrics
1300 return stream->perf->ops.read(stream, buf, count, offset);
1514 struct i915_perf *perf = &ce->engine->i915->perf;
1515 u32 offset = perf->ctx_oactxctrl_offset;
1522 perf->ctx_oactxctrl_offset = offset;
1539 * @stream: An i915-perf stream opened for OA metrics
1557 HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) {
1559 * We are enabling perf query here. If we don't find the context
1565 drm_err(&stream->perf->i915->drm,
1566 "Enabling perf query failed for %s\n",
1622 drm_dbg(&stream->perf->i915->drm,
1632 * @stream: An i915-perf stream opened for OA metrics
1688 struct i915_perf *perf = stream->perf;
1702 perf->ops.disable_metric_set(stream);
1722 if (perf->spurious_report_rs.missed) {
1725 perf->spurious_report_rs.missed);
1758 stream->perf->gen7_latched_oastatus1 = 0;
1883 struct drm_i915_private *i915 = stream->perf->i915;
1895 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
1952 if (GRAPHICS_VER(stream->perf->i915) >= 8)
1967 struct drm_i915_private *i915 = stream->perf->i915;
1972 intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
1973 atomic64_read(&stream->perf->noa_programming_delay));
2013 * needs to be fixed during the lifetime of the i915/perf stream.
2242 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
2271 *cs++ = (GRAPHICS_VER(stream->perf->i915) < 8 ?
2480 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2481 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2645 u32 offset = stream->perf->ctx_oactxctrl_offset;
2720 struct drm_i915_private *i915 = stream->perf->i915;
2726 lockdep_assert_held(>->perf.lock);
2809 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2811 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2877 if (IS_GRAPHICS_VER(stream->perf->i915, 9, 11)) {
2908 struct drm_i915_private *i915 = stream->perf->i915;
3003 struct drm_i915_private *i915 = stream->perf->i915;
3113 * @stream: An i915 perf stream opened for OA metrics
3124 stream->perf->ops.oa_enable(stream);
3140 drm_err(&stream->perf->i915->drm,
3152 drm_err(&stream->perf->i915->drm,
3165 drm_err(&stream->perf->i915->drm,
3173 drm_err(&stream->perf->i915->drm,
3179 * @stream: An i915 perf stream opened for OA metrics
3187 stream->perf->ops.oa_disable(stream);
3211 err = stream->perf->ops.enable_metric_set(stream, active);
3280 * @stream: An i915 perf stream
3300 struct drm_i915_private *i915 = stream->perf->i915;
3301 struct i915_perf *perf = stream->perf;
3307 drm_dbg(&stream->perf->i915->drm,
3319 if (!perf->metrics_kobj) {
3320 drm_dbg(&stream->perf->i915->drm,
3326 (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
3327 drm_dbg(&stream->perf->i915->drm,
3332 if (!perf->ops.enable_metric_set) {
3333 drm_dbg(&stream->perf->i915->drm,
3344 drm_dbg(&stream->perf->i915->drm,
3350 drm_dbg(&stream->perf->i915->drm,
3360 stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
3376 drm_dbg(&stream->perf->i915->drm,
3384 drm_dbg(&stream->perf->i915->drm,
3389 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
3391 drm_dbg(&stream->perf->i915->drm,
3423 drm_dbg(&stream->perf->i915->drm,
3437 stream->engine->gt->perf.sseu = props->sseu;
3442 drm_dbg(&stream->perf->i915->drm,
3447 drm_dbg(&stream->perf->i915->drm,
3462 perf->ops.disable_metric_set(stream);
3494 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
3496 if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
3501 * i915_perf_read - handles read() FOP for i915 perf stream FDs
3502 * @file: An i915 perf stream file
3593 * @stream: An i915 perf stream
3594 * @file: An i915 perf stream file
3597 * For handling userspace polling on an i915 perf stream, this calls through to
3625 * @file: An i915 perf stream file
3628 * For handling userspace polling on an i915 perf stream, this ensures
3650 * @stream: A disabled i915 perf stream
3675 * @stream: An enabled i915 perf stream
3708 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3737 * i915_perf_ioctl_locked - support ioctl() usage with i915 perf stream FDs
3738 * @stream: An i915 perf stream
3764 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3765 * @file: An i915 perf stream file
3789 * i915_perf_destroy_locked - destroy an i915 perf stream
3790 * @stream: An i915 perf stream
3792 * Frees all resources associated with the given i915 perf @stream, disabling
3795 * Note: The >->perf.lock mutex has been taken to serialize
3815 * @file: An i915 perf stream file
3817 * Cleans up any resources associated with an open i915 perf stream file.
3826 struct i915_perf *perf = stream->perf;
3831 * other user of stream->lock. Use the perf lock to destroy the stream
3834 mutex_lock(>->perf.lock);
3836 mutex_unlock(>->perf.lock);
3838 /* Release the reference the perf stream kept on the driver. */
3839 drm_dev_put(&perf->i915->drm);
3861 * @perf: i915 perf instance
3869 * behalf of i915_perf_open_ioctl() with the >->perf.lock mutex
3884 i915_perf_open_ioctl_locked(struct i915_perf *perf,
3902 drm_dbg(&perf->i915->drm,
3903 "Failed to look up context with ID %u for opening perf stream\n",
3929 if (IS_HASWELL(perf->i915) && specific_ctx)
3931 else if (GRAPHICS_VER(perf->i915) == 12 && specific_ctx &&
3937 drm_dbg(&perf->i915->drm,
3953 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3960 drm_dbg(&perf->i915->drm,
3961 "Insufficient privileges to open i915 perf stream\n");
3972 stream->perf = perf;
4006 drm_dev_get(&perf->i915->drm);
4022 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
4025 u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
4031 oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format)
4033 return test_bit(format, perf->format_mask);
4037 oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format)
4039 __set_bit(format, perf->format_mask);
4044 * @perf: i915 perf instance
4057 static int read_properties_unlocked(struct i915_perf *perf,
4082 drm_dbg(&perf->i915->drm,
4083 "Invalid number of i915 perf properties given\n");
4104 drm_dbg(&perf->i915->drm,
4105 "Unknown i915 perf property ID\n");
4120 drm_dbg(&perf->i915->drm,
4128 drm_dbg(&perf->i915->drm,
4133 if (!oa_format_valid(perf, value)) {
4134 drm_dbg(&perf->i915->drm,
4143 drm_dbg(&perf->i915->drm,
4156 oa_period = oa_exponent_to_ns(perf, value);
4172 drm_dbg(&perf->i915->drm,
4185 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
4186 drm_dbg(&perf->i915->drm,
4188 GRAPHICS_VER_FULL(perf->i915));
4195 drm_dbg(&perf->i915->drm,
4204 drm_dbg(&perf->i915->drm,
4229 drm_dbg(&perf->i915->drm,
4234 props->engine = intel_engine_lookup_user(perf->i915, class, instance);
4236 drm_dbg(&perf->i915->drm,
4243 drm_dbg(&perf->i915->drm,
4257 drm_dbg(&perf->i915->drm,
4263 f = &perf->oa_formats[i];
4265 drm_dbg(&perf->i915->drm,
4274 drm_dbg(&perf->i915->drm,
4295 * i915-perf stream is expected to be a suitable interface for other forms of
4298 * Note we copy the properties from userspace outside of the i915 perf
4302 * i915_perf_open_ioctl_locked() after taking the >->perf.lock
4311 struct i915_perf *perf = &to_i915(dev)->perf;
4318 if (!perf->i915)
4325 drm_dbg(&perf->i915->drm,
4330 ret = read_properties_unlocked(perf,
4339 mutex_lock(>->perf.lock);
4340 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
4341 mutex_unlock(>->perf.lock);
4347 * i915_perf_register - exposes i915-perf to userspace
4352 * used to open an i915-perf stream.
4356 struct i915_perf *perf = &i915->perf;
4359 if (!perf->i915)
4366 mutex_lock(>->perf.lock);
4368 perf->metrics_kobj =
4372 mutex_unlock(>->perf.lock);
4376 * i915_perf_unregister - hide i915-perf from userspace
4379 * i915-perf state cleanup is split up into an 'unregister' and
4386 struct i915_perf *perf = &i915->perf;
4388 if (!perf->metrics_kobj)
4391 kobject_put(perf->metrics_kobj);
4392 perf->metrics_kobj = NULL;
4395 static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
4514 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4519 static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4525 static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4532 static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4538 static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4544 static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4549 static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
4551 if (HAS_OAM(perf->i915) &&
4552 GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
4558 static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4562 mtl_is_valid_oam_b_counter_addr(perf, addr);
4565 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4567 if (IS_METEORLAKE(perf->i915))
4592 static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
4593 bool (*is_valid)(struct i915_perf *perf, u32 addr),
4620 if (!is_valid(perf, addr)) {
4621 drm_dbg(&perf->i915->drm,
4654 static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
4669 return sysfs_create_group(perf->metrics_kobj,
4683 * Returns: A new allocated config number to be used with the perf open ioctl
4689 struct i915_perf *perf = &to_i915(dev)->perf;
4695 if (!perf->i915)
4698 if (!perf->metrics_kobj) {
4699 drm_dbg(&perf->i915->drm,
4705 drm_dbg(&perf->i915->drm,
4713 drm_dbg(&perf->i915->drm,
4720 drm_dbg(&perf->i915->drm,
4725 oa_config->perf = perf;
4729 drm_dbg(&perf->i915->drm,
4741 regs = alloc_oa_regs(perf,
4742 perf->ops.is_valid_mux_reg,
4747 drm_dbg(&perf->i915->drm,
4755 regs = alloc_oa_regs(perf,
4756 perf->ops.is_valid_b_counter_reg,
4761 drm_dbg(&perf->i915->drm,
4768 if (GRAPHICS_VER(perf->i915) < 8) {
4775 regs = alloc_oa_regs(perf,
4776 perf->ops.is_valid_flex_reg,
4781 drm_dbg(&perf->i915->drm,
4789 err = mutex_lock_interruptible(&perf->metrics_lock);
4796 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
4798 drm_dbg(&perf->i915->drm,
4805 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4807 drm_dbg(&perf->i915->drm,
4813 oa_config->id = idr_alloc(&perf->metrics_idr,
4817 drm_dbg(&perf->i915->drm,
4824 drm_dbg(&perf->i915->drm,
4826 mutex_unlock(&perf->metrics_lock);
4831 mutex_unlock(&perf->metrics_lock);
4834 drm_dbg(&perf->i915->drm,
4853 struct i915_perf *perf = &to_i915(dev)->perf;
4858 if (!perf->i915)
4862 drm_dbg(&perf->i915->drm,
4867 ret = mutex_lock_interruptible(&perf->metrics_lock);
4871 oa_config = idr_find(&perf->metrics_idr, *arg);
4873 drm_dbg(&perf->i915->drm,
4881 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4883 idr_remove(&perf->metrics_idr, *arg);
4885 mutex_unlock(&perf->metrics_lock);
4887 drm_dbg(&perf->i915->drm,
4895 mutex_unlock(&perf->metrics_lock);
4989 int i, num_groups = gt->perf.num_perf_groups;
4992 struct i915_perf_group *g = >->perf.group[i];
5029 gt->perf.num_perf_groups = num_groups;
5030 gt->perf.group = g;
5037 static int oa_init_engine_groups(struct i915_perf *perf)
5042 for_each_gt(gt, perf->i915, i) {
5051 static void oa_init_supported_formats(struct i915_perf *perf)
5053 struct drm_i915_private *i915 = perf->i915;
5058 oa_format_add(perf, I915_OA_FORMAT_A13);
5059 oa_format_add(perf, I915_OA_FORMAT_A13);
5060 oa_format_add(perf, I915_OA_FORMAT_A29);
5061 oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8);
5062 oa_format_add(perf, I915_OA_FORMAT_B4_C8);
5063 oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8);
5064 oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16);
5065 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
5084 oa_format_add(perf, I915_OA_FORMAT_A12);
5085 oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8);
5086 oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8);
5087 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
5091 oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
5092 oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
5096 oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
5097 oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
5098 oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
5099 oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
5109 struct i915_perf *perf = &i915->perf;
5113 perf->ctx_oactxctrl_offset = 0x120;
5114 perf->ctx_flexeu0_offset = 0x2ce;
5115 perf->gen8_valid_ctx_bit = BIT(25);
5118 perf->ctx_oactxctrl_offset = 0x128;
5119 perf->ctx_flexeu0_offset = 0x3de;
5120 perf->gen8_valid_ctx_bit = BIT(16);
5123 perf->ctx_oactxctrl_offset = 0x124;
5124 perf->ctx_flexeu0_offset = 0x78e;
5125 perf->gen8_valid_ctx_bit = BIT(16);
5128 perf->gen8_valid_ctx_bit = BIT(16);
5131 * cache the value in perf->ctx_oactxctrl_offset.
5140 * i915_perf_init - initialize i915-perf state on module bind
5143 * Initializes i915-perf state without exposing anything to userspace.
5145 * Note: i915-perf initialization is split into an 'init' and 'register'
5150 struct i915_perf *perf = &i915->perf;
5152 perf->oa_formats = oa_formats;
5154 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
5155 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
5156 perf->ops.is_valid_flex_reg = NULL;
5157 perf->ops.enable_metric_set = hsw_enable_metric_set;
5158 perf->ops.disable_metric_set = hsw_disable_metric_set;
5159 perf->ops.oa_enable = gen7_oa_enable;
5160 perf->ops.oa_disable = gen7_oa_disable;
5161 perf->ops.read = gen7_oa_read;
5162 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
5170 perf->ops.read = gen8_oa_read;
5174 perf->ops.is_valid_b_counter_reg =
5176 perf->ops.is_valid_mux_reg =
5178 perf->ops.is_valid_flex_reg =
5182 perf->ops.is_valid_mux_reg =
5186 perf->ops.oa_enable = gen8_oa_enable;
5187 perf->ops.oa_disable = gen8_oa_disable;
5188 perf->ops.enable_metric_set = gen8_enable_metric_set;
5189 perf->ops.disable_metric_set = gen8_disable_metric_set;
5190 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
5192 perf->ops.is_valid_b_counter_reg =
5194 perf->ops.is_valid_mux_reg =
5196 perf->ops.is_valid_flex_reg =
5199 perf->ops.oa_enable = gen8_oa_enable;
5200 perf->ops.oa_disable = gen8_oa_disable;
5201 perf->ops.enable_metric_set = gen8_enable_metric_set;
5202 perf->ops.disable_metric_set = gen11_disable_metric_set;
5203 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
5205 perf->ops.is_valid_b_counter_reg =
5209 perf->ops.is_valid_mux_reg =
5211 perf->ops.is_valid_flex_reg =
5214 perf->ops.oa_enable = gen12_oa_enable;
5215 perf->ops.oa_disable = gen12_oa_disable;
5216 perf->ops.enable_metric_set = gen12_enable_metric_set;
5217 perf->ops.disable_metric_set = gen12_disable_metric_set;
5218 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
5222 if (perf->ops.enable_metric_set) {
5227 mutex_init(>->perf.lock);
5232 mutex_init(&perf->metrics_lock);
5233 idr_init_base(&perf->metrics_idr, 1);
5245 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
5250 ratelimit_set_flags(&perf->spurious_report_rs,
5253 ratelimit_state_init(&perf->tail_pointer_race,
5255 ratelimit_set_flags(&perf->tail_pointer_race,
5258 atomic64_set(&perf->noa_programming_delay,
5261 perf->i915 = i915;
5263 ret = oa_init_engine_groups(perf);
5270 oa_init_supported_formats(perf);
5299 struct i915_perf *perf = &i915->perf;
5303 if (!perf->i915)
5306 for_each_gt(gt, perf->i915, i)
5307 kfree(gt->perf.group);
5309 idr_for_each(&perf->metrics_idr, destroy_config, perf);
5310 idr_destroy(&perf->metrics_idr);
5312 memset(&perf->ops, 0, sizeof(perf->ops));
5313 perf->i915 = NULL;
5317 * i915_perf_ioctl_version - Version of the i915-perf subsystem