1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2013 Advanced Micro Devices, Inc. 4 * 5 * Author: Steven Kinney <Steven.Kinney@amd.com> 6 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> 7 * 8 * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation 9 */ 10 11#define pr_fmt(fmt) "perf/amd_iommu: " fmt 12 13#include <linux/perf_event.h> 14#include <linux/init.h> 15#include <linux/cpumask.h> 16#include <linux/slab.h> 17 18#include "../perf_event.h" 19#include "iommu.h" 20 21/* iommu pmu conf masks */ 22#define GET_CSOURCE(x) ((x)->conf & 0xFFULL) 23#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) 24#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL) 25#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL) 26 27/* iommu pmu conf1 masks */ 28#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL) 29#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) 30#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) 31 32#define IOMMU_NAME_SIZE 16 33 34struct perf_amd_iommu { 35 struct list_head list; 36 struct pmu pmu; 37 struct amd_iommu *iommu; 38 char name[IOMMU_NAME_SIZE]; 39 u8 max_banks; 40 u8 max_counters; 41 u64 cntr_assign_mask; 42 raw_spinlock_t lock; 43}; 44 45static LIST_HEAD(perf_amd_iommu_list); 46 47/*--------------------------------------------- 48 * sysfs format attributes 49 *---------------------------------------------*/ 50PMU_FORMAT_ATTR(csource, "config:0-7"); 51PMU_FORMAT_ATTR(devid, "config:8-23"); 52PMU_FORMAT_ATTR(domid, "config:24-39"); 53PMU_FORMAT_ATTR(pasid, "config:40-59"); 54PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); 55PMU_FORMAT_ATTR(domid_mask, "config1:16-31"); 56PMU_FORMAT_ATTR(pasid_mask, "config1:32-51"); 57 58static struct attribute *iommu_format_attrs[] = { 59 &format_attr_csource.attr, 60 &format_attr_devid.attr, 61 &format_attr_pasid.attr, 62 &format_attr_domid.attr, 63 &format_attr_devid_mask.attr, 64 &format_attr_pasid_mask.attr, 65 &format_attr_domid_mask.attr, 66 NULL, 67}; 68 69static struct attribute_group amd_iommu_format_group = { 70 .name = "format", 71 .attrs = iommu_format_attrs, 72}; 73 74/*--------------------------------------------- 75 * sysfs events attributes 76 *---------------------------------------------*/ 77static struct attribute_group amd_iommu_events_group = { 78 .name = "events", 79}; 80 81struct amd_iommu_event_desc { 82 struct device_attribute attr; 83 const char *event; 84}; 85 86static ssize_t _iommu_event_show(struct device *dev, 87 struct device_attribute *attr, char *buf) 88{ 89 struct amd_iommu_event_desc *event = 90 container_of(attr, struct amd_iommu_event_desc, attr); 91 return sprintf(buf, "%s\n", event->event); 92} 93 94#define AMD_IOMMU_EVENT_DESC(_name, _event) \ 95{ \ 96 .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ 97 .event = _event, \ 98} 99 100static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { 101 AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"), 102 AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"), 103 AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"), 104 AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"), 105 AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"), 106 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"), 107 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"), 108 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"), 109 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"), 110 AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"), 111 AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"), 112 AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"), 113 AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"), 114 AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"), 115 AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"), 116 AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"), 117 AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), 118 AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), 119 AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), 120 AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"), 121 AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"), 122 AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"), 123 AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"), 124 AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"), 125 { /* end: all zeroes */ }, 126}; 127 128/*--------------------------------------------- 129 * sysfs cpumask attributes 130 *---------------------------------------------*/ 131static cpumask_t iommu_cpumask; 132 133static ssize_t _iommu_cpumask_show(struct device *dev, 134 struct device_attribute *attr, 135 char *buf) 136{ 137 return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask); 138} 139static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); 140 141static struct attribute *iommu_cpumask_attrs[] = { 142 &dev_attr_cpumask.attr, 143 NULL, 144}; 145 146static struct attribute_group amd_iommu_cpumask_group = { 147 .attrs = iommu_cpumask_attrs, 148}; 149 150/*---------------------------------------------*/ 151 152static int get_next_avail_iommu_bnk_cntr(struct perf_event *event) 153{ 154 struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu); 155 int max_cntrs = piommu->max_counters; 156 int max_banks = piommu->max_banks; 157 u32 shift, bank, cntr; 158 unsigned long flags; 159 int retval; 160 161 raw_spin_lock_irqsave(&piommu->lock, flags); 162 163 for (bank = 0, shift = 0; bank < max_banks; bank++) { 164 for (cntr = 0; cntr < max_cntrs; cntr++) { 165 shift = bank + (bank*3) + cntr; 166 if (piommu->cntr_assign_mask & BIT_ULL(shift)) { 167 continue; 168 } else { 169 piommu->cntr_assign_mask |= BIT_ULL(shift); 170 event->hw.iommu_bank = bank; 171 event->hw.iommu_cntr = cntr; 172 retval = 0; 173 goto out; 174 } 175 } 176 } 177 retval = -ENOSPC; 178out: 179 raw_spin_unlock_irqrestore(&piommu->lock, flags); 180 return retval; 181} 182 183static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, 184 u8 bank, u8 cntr) 185{ 186 unsigned long flags; 187 int max_banks, max_cntrs; 188 int shift = 0; 189 190 max_banks = perf_iommu->max_banks; 191 max_cntrs = perf_iommu->max_counters; 192 193 if ((bank > max_banks) || (cntr > max_cntrs)) 194 return -EINVAL; 195 196 shift = bank + cntr + (bank*3); 197 198 raw_spin_lock_irqsave(&perf_iommu->lock, flags); 199 perf_iommu->cntr_assign_mask &= ~(1ULL<<shift); 200 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); 201 202 return 0; 203} 204 205static int perf_iommu_event_init(struct perf_event *event) 206{ 207 struct hw_perf_event *hwc = &event->hw; 208 209 /* test the event attr type check for PMU enumeration */ 210 if (event->attr.type != event->pmu->type) 211 return -ENOENT; 212 213 /* 214 * IOMMU counters are shared across all cores. 215 * Therefore, it does not support per-process mode. 216 * Also, it does not support event sampling mode. 217 */ 218 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) 219 return -EINVAL; 220 221 if (event->cpu < 0) 222 return -EINVAL; 223 224 /* update the hw_perf_event struct with the iommu config data */ 225 hwc->conf = event->attr.config; 226 hwc->conf1 = event->attr.config1; 227 228 return 0; 229} 230 231static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev) 232{ 233 return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu; 234} 235 236static void perf_iommu_enable_event(struct perf_event *ev) 237{ 238 struct amd_iommu *iommu = perf_event_2_iommu(ev); 239 struct hw_perf_event *hwc = &ev->hw; 240 u8 bank = hwc->iommu_bank; 241 u8 cntr = hwc->iommu_cntr; 242 u64 reg = 0ULL; 243 244 reg = GET_CSOURCE(hwc); 245 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®); 246 247 reg = GET_DEVID_MASK(hwc); 248 reg = GET_DEVID(hwc) | (reg << 32); 249 if (reg) 250 reg |= BIT(31); 251 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®); 252 253 reg = GET_PASID_MASK(hwc); 254 reg = GET_PASID(hwc) | (reg << 32); 255 if (reg) 256 reg |= BIT(31); 257 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®); 258 259 reg = GET_DOMID_MASK(hwc); 260 reg = GET_DOMID(hwc) | (reg << 32); 261 if (reg) 262 reg |= BIT(31); 263 amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®); 264} 265 266static void perf_iommu_disable_event(struct perf_event *event) 267{ 268 struct amd_iommu *iommu = perf_event_2_iommu(event); 269 struct hw_perf_event *hwc = &event->hw; 270 u64 reg = 0ULL; 271 272 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 273 IOMMU_PC_COUNTER_SRC_REG, ®); 274} 275 276static void perf_iommu_start(struct perf_event *event, int flags) 277{ 278 struct hw_perf_event *hwc = &event->hw; 279 280 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 281 return; 282 283 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 284 hwc->state = 0; 285 286 /* 287 * To account for power-gating, which prevents write to 288 * the counter, we need to enable the counter 289 * before setting up counter register. 290 */ 291 perf_iommu_enable_event(event); 292 293 if (flags & PERF_EF_RELOAD) { 294 u64 count = 0; 295 struct amd_iommu *iommu = perf_event_2_iommu(event); 296 297 /* 298 * Since the IOMMU PMU only support counting mode, 299 * the counter always start with value zero. 300 */ 301 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 302 IOMMU_PC_COUNTER_REG, &count); 303 } 304 305 perf_event_update_userpage(event); 306} 307 308static void perf_iommu_read(struct perf_event *event) 309{ 310 u64 count; 311 struct hw_perf_event *hwc = &event->hw; 312 struct amd_iommu *iommu = perf_event_2_iommu(event); 313 314 if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 315 IOMMU_PC_COUNTER_REG, &count)) 316 return; 317 318 /* IOMMU pc counter register is only 48 bits */ 319 count &= GENMASK_ULL(47, 0); 320 321 /* 322 * Since the counter always start with value zero, 323 * simply just accumulate the count for the event. 324 */ 325 local64_add(count, &event->count); 326} 327 328static void perf_iommu_stop(struct perf_event *event, int flags) 329{ 330 struct hw_perf_event *hwc = &event->hw; 331 332 if (hwc->state & PERF_HES_UPTODATE) 333 return; 334 335 /* 336 * To account for power-gating, in which reading the counter would 337 * return zero, we need to read the register before disabling. 338 */ 339 perf_iommu_read(event); 340 hwc->state |= PERF_HES_UPTODATE; 341 342 perf_iommu_disable_event(event); 343 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 344 hwc->state |= PERF_HES_STOPPED; 345} 346 347static int perf_iommu_add(struct perf_event *event, int flags) 348{ 349 int retval; 350 351 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 352 353 /* request an iommu bank/counter */ 354 retval = get_next_avail_iommu_bnk_cntr(event); 355 if (retval) 356 return retval; 357 358 if (flags & PERF_EF_START) 359 perf_iommu_start(event, PERF_EF_RELOAD); 360 361 return 0; 362} 363 364static void perf_iommu_del(struct perf_event *event, int flags) 365{ 366 struct hw_perf_event *hwc = &event->hw; 367 struct perf_amd_iommu *perf_iommu = 368 container_of(event->pmu, struct perf_amd_iommu, pmu); 369 370 perf_iommu_stop(event, PERF_EF_UPDATE); 371 372 /* clear the assigned iommu bank/counter */ 373 clear_avail_iommu_bnk_cntr(perf_iommu, 374 hwc->iommu_bank, hwc->iommu_cntr); 375 376 perf_event_update_userpage(event); 377} 378 379static __init int _init_events_attrs(void) 380{ 381 int i = 0, j; 382 struct attribute **attrs; 383 384 while (amd_iommu_v2_event_descs[i].attr.attr.name) 385 i++; 386 387 attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL); 388 if (!attrs) 389 return -ENOMEM; 390 391 for (j = 0; j < i; j++) 392 attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; 393 394 amd_iommu_events_group.attrs = attrs; 395 return 0; 396} 397 398static const struct attribute_group *amd_iommu_attr_groups[] = { 399 &amd_iommu_format_group, 400 &amd_iommu_cpumask_group, 401 &amd_iommu_events_group, 402 NULL, 403}; 404 405static const struct pmu iommu_pmu __initconst = { 406 .event_init = perf_iommu_event_init, 407 .add = perf_iommu_add, 408 .del = perf_iommu_del, 409 .start = perf_iommu_start, 410 .stop = perf_iommu_stop, 411 .read = perf_iommu_read, 412 .task_ctx_nr = perf_invalid_context, 413 .attr_groups = amd_iommu_attr_groups, 414 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 415}; 416 417static __init int init_one_iommu(unsigned int idx) 418{ 419 struct perf_amd_iommu *perf_iommu; 420 int ret; 421 422 perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL); 423 if (!perf_iommu) 424 return -ENOMEM; 425 426 raw_spin_lock_init(&perf_iommu->lock); 427 428 perf_iommu->pmu = iommu_pmu; 429 perf_iommu->iommu = get_amd_iommu(idx); 430 perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx); 431 perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx); 432 433 if (!perf_iommu->iommu || 434 !perf_iommu->max_banks || 435 !perf_iommu->max_counters) { 436 kfree(perf_iommu); 437 return -EINVAL; 438 } 439 440 snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx); 441 442 ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1); 443 if (!ret) { 444 pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n", 445 idx, perf_iommu->max_banks, perf_iommu->max_counters); 446 list_add_tail(&perf_iommu->list, &perf_amd_iommu_list); 447 } else { 448 pr_warn("Error initializing IOMMU %d.\n", idx); 449 kfree(perf_iommu); 450 } 451 return ret; 452} 453 454static __init int amd_iommu_pc_init(void) 455{ 456 unsigned int i, cnt = 0; 457 int ret; 458 459 /* Make sure the IOMMU PC resource is available */ 460 if (!amd_iommu_pc_supported()) 461 return -ENODEV; 462 463 ret = _init_events_attrs(); 464 if (ret) 465 return ret; 466 467 /* 468 * An IOMMU PMU is specific to an IOMMU, and can function independently. 469 * So we go through all IOMMUs and ignore the one that fails init 470 * unless all IOMMU are failing. 471 */ 472 for (i = 0; i < amd_iommu_get_num_iommus(); i++) { 473 ret = init_one_iommu(i); 474 if (!ret) 475 cnt++; 476 } 477 478 if (!cnt) { 479 kfree(amd_iommu_events_group.attrs); 480 return -ENODEV; 481 } 482 483 /* Init cpumask attributes to only core 0 */ 484 cpumask_set_cpu(0, &iommu_cpumask); 485 return 0; 486} 487 488device_initcall(amd_iommu_pc_init); 489