1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * This driver adds support for perf events to use the Performance 5 * Monitor Counter Groups (PMCG) associated with an SMMUv3 node 6 * to monitor that node. 7 * 8 * SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where 9 * <phys_addr_page> is the physical page address of the SMMU PMCG wrapped 10 * to 4K boundary. For example, the PMCG at 0xff88840000 is named 11 * smmuv3_pmcg_ff88840 12 * 13 * Filtering by stream id is done by specifying filtering parameters 14 * with the event. options are: 15 * filter_enable - 0 = no filtering, 1 = filtering enabled 16 * filter_span - 0 = exact match, 1 = pattern match 17 * filter_stream_id - pattern to filter against 18 * 19 * To match a partial StreamID where the X most-significant bits must match 20 * but the Y least-significant bits might differ, STREAMID is programmed 21 * with a value that contains: 22 * STREAMID[Y - 1] == 0. 23 * STREAMID[Y - 2:0] == 1 (where Y > 1). 24 * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards) 25 * contain a value to match from the corresponding bits of event StreamID. 26 * 27 * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1, 28 * filter_span=1,filter_stream_id=0x42/ -a netperf 29 * Applies filter pattern 0x42 to transaction events, which means events 30 * matching stream ids 0x42 and 0x43 are counted. Further filtering 31 * information is available in the SMMU documentation. 32 * 33 * SMMU events are not attributable to a CPU, so task mode and sampling 34 * are not supported. 35 */ 36 37#include <linux/acpi.h> 38#include <linux/acpi_iort.h> 39#include <linux/bitfield.h> 40#include <linux/bitops.h> 41#include <linux/cpuhotplug.h> 42#include <linux/cpumask.h> 43#include <linux/device.h> 44#include <linux/errno.h> 45#include <linux/interrupt.h> 46#include <linux/irq.h> 47#include <linux/kernel.h> 48#include <linux/list.h> 49#include <linux/msi.h> 50#include <linux/perf_event.h> 51#include <linux/platform_device.h> 52#include <linux/smp.h> 53#include <linux/sysfs.h> 54#include <linux/types.h> 55 56#define SMMU_PMCG_EVCNTR0 0x0 57#define SMMU_PMCG_EVCNTR(n, stride) (SMMU_PMCG_EVCNTR0 + (n) * (stride)) 58#define SMMU_PMCG_EVTYPER0 0x400 59#define SMMU_PMCG_EVTYPER(n) (SMMU_PMCG_EVTYPER0 + (n) * 4) 60#define SMMU_PMCG_SID_SPAN_SHIFT 29 61#define SMMU_PMCG_SMR0 0xA00 62#define SMMU_PMCG_SMR(n) (SMMU_PMCG_SMR0 + (n) * 4) 63#define SMMU_PMCG_CNTENSET0 0xC00 64#define SMMU_PMCG_CNTENCLR0 0xC20 65#define SMMU_PMCG_INTENSET0 0xC40 66#define SMMU_PMCG_INTENCLR0 0xC60 67#define SMMU_PMCG_OVSCLR0 0xC80 68#define SMMU_PMCG_OVSSET0 0xCC0 69#define SMMU_PMCG_CFGR 0xE00 70#define SMMU_PMCG_CFGR_SID_FILTER_TYPE BIT(23) 71#define SMMU_PMCG_CFGR_MSI BIT(21) 72#define SMMU_PMCG_CFGR_RELOC_CTRS BIT(20) 73#define SMMU_PMCG_CFGR_SIZE GENMASK(13, 8) 74#define SMMU_PMCG_CFGR_NCTR GENMASK(5, 0) 75#define SMMU_PMCG_CR 0xE04 76#define SMMU_PMCG_CR_ENABLE BIT(0) 77#define SMMU_PMCG_CEID0 0xE20 78#define SMMU_PMCG_CEID1 0xE28 79#define SMMU_PMCG_IRQ_CTRL 0xE50 80#define SMMU_PMCG_IRQ_CTRL_IRQEN BIT(0) 81#define SMMU_PMCG_IRQ_CFG0 0xE58 82#define SMMU_PMCG_IRQ_CFG1 0xE60 83#define SMMU_PMCG_IRQ_CFG2 0xE64 84 85/* MSI config fields */ 86#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) 87#define MSI_CFG2_MEMATTR_DEVICE_nGnRE 0x1 88 89#define SMMU_PMCG_DEFAULT_FILTER_SPAN 1 90#define SMMU_PMCG_DEFAULT_FILTER_SID GENMASK(31, 0) 91 92#define SMMU_PMCG_MAX_COUNTERS 64 93#define SMMU_PMCG_ARCH_MAX_EVENTS 128 94 95#define SMMU_PMCG_PA_SHIFT 12 96 97#define SMMU_PMCG_EVCNTR_RDONLY BIT(0) 98#define SMMU_PMCG_HARDEN_DISABLE BIT(1) 99 100static int cpuhp_state_num; 101 102struct smmu_pmu { 103 struct hlist_node node; 104 struct perf_event *events[SMMU_PMCG_MAX_COUNTERS]; 105 DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS); 106 DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS); 107 unsigned int irq; 108 unsigned int on_cpu; 109 struct pmu pmu; 110 unsigned int num_counters; 111 struct device *dev; 112 void __iomem *reg_base; 113 void __iomem *reloc_base; 114 u64 counter_mask; 115 u32 options; 116 bool global_filter; 117}; 118 119#define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu)) 120 121#define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \ 122 static inline u32 get_##_name(struct perf_event *event) \ 123 { \ 124 return FIELD_GET(GENMASK_ULL(_end, _start), \ 125 event->attr._config); \ 126 } \ 127 128SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15); 129SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31); 130SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32); 131SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33); 132 133static inline void smmu_pmu_enable(struct pmu *pmu) 134{ 135 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); 136 137 writel(SMMU_PMCG_IRQ_CTRL_IRQEN, 138 smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); 139 writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR); 140} 141 142static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, 143 struct perf_event *event, int idx); 144 145static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu) 146{ 147 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); 148 unsigned int idx; 149 150 for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) 151 smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx); 152 153 smmu_pmu_enable(pmu); 154} 155 156static inline void smmu_pmu_disable(struct pmu *pmu) 157{ 158 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); 159 160 writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR); 161 writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); 162} 163 164static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu) 165{ 166 struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); 167 unsigned int idx; 168 169 /* 170 * The global disable of PMU sometimes fail to stop the counting. 171 * Harden this by writing an invalid event type to each used counter 172 * to forcibly stop counting. 173 */ 174 for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) 175 writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); 176 177 smmu_pmu_disable(pmu); 178} 179 180static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu, 181 u32 idx, u64 value) 182{ 183 if (smmu_pmu->counter_mask & BIT(32)) 184 writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8)); 185 else 186 writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4)); 187} 188 189static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx) 190{ 191 u64 value; 192 193 if (smmu_pmu->counter_mask & BIT(32)) 194 value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8)); 195 else 196 value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4)); 197 198 return value; 199} 200 201static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx) 202{ 203 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0); 204} 205 206static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx) 207{ 208 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0); 209} 210 211static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx) 212{ 213 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0); 214} 215 216static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu, 217 u32 idx) 218{ 219 writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0); 220} 221 222static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx, 223 u32 val) 224{ 225 writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); 226} 227 228static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val) 229{ 230 writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx)); 231} 232 233static void smmu_pmu_event_update(struct perf_event *event) 234{ 235 struct hw_perf_event *hwc = &event->hw; 236 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 237 u64 delta, prev, now; 238 u32 idx = hwc->idx; 239 240 do { 241 prev = local64_read(&hwc->prev_count); 242 now = smmu_pmu_counter_get_value(smmu_pmu, idx); 243 } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); 244 245 /* handle overflow. */ 246 delta = now - prev; 247 delta &= smmu_pmu->counter_mask; 248 249 local64_add(delta, &event->count); 250} 251 252static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu, 253 struct hw_perf_event *hwc) 254{ 255 u32 idx = hwc->idx; 256 u64 new; 257 258 if (smmu_pmu->options & SMMU_PMCG_EVCNTR_RDONLY) { 259 /* 260 * On platforms that require this quirk, if the counter starts 261 * at < half_counter value and wraps, the current logic of 262 * handling the overflow may not work. It is expected that, 263 * those platforms will have full 64 counter bits implemented 264 * so that such a possibility is remote(eg: HiSilicon HIP08). 265 */ 266 new = smmu_pmu_counter_get_value(smmu_pmu, idx); 267 } else { 268 /* 269 * We limit the max period to half the max counter value 270 * of the counter size, so that even in the case of extreme 271 * interrupt latency the counter will (hopefully) not wrap 272 * past its initial value. 273 */ 274 new = smmu_pmu->counter_mask >> 1; 275 smmu_pmu_counter_set_value(smmu_pmu, idx, new); 276 } 277 278 local64_set(&hwc->prev_count, new); 279} 280 281static void smmu_pmu_set_event_filter(struct perf_event *event, 282 int idx, u32 span, u32 sid) 283{ 284 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 285 u32 evtyper; 286 287 evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT; 288 smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper); 289 smmu_pmu_set_smr(smmu_pmu, idx, sid); 290} 291 292static bool smmu_pmu_check_global_filter(struct perf_event *curr, 293 struct perf_event *new) 294{ 295 if (get_filter_enable(new) != get_filter_enable(curr)) 296 return false; 297 298 if (!get_filter_enable(new)) 299 return true; 300 301 return get_filter_span(new) == get_filter_span(curr) && 302 get_filter_stream_id(new) == get_filter_stream_id(curr); 303} 304 305static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, 306 struct perf_event *event, int idx) 307{ 308 u32 span, sid; 309 unsigned int cur_idx, num_ctrs = smmu_pmu->num_counters; 310 bool filter_en = !!get_filter_enable(event); 311 312 span = filter_en ? get_filter_span(event) : 313 SMMU_PMCG_DEFAULT_FILTER_SPAN; 314 sid = filter_en ? get_filter_stream_id(event) : 315 SMMU_PMCG_DEFAULT_FILTER_SID; 316 317 cur_idx = find_first_bit(smmu_pmu->used_counters, num_ctrs); 318 /* 319 * Per-counter filtering, or scheduling the first globally-filtered 320 * event into an empty PMU so idx == 0 and it works out equivalent. 321 */ 322 if (!smmu_pmu->global_filter || cur_idx == num_ctrs) { 323 smmu_pmu_set_event_filter(event, idx, span, sid); 324 return 0; 325 } 326 327 /* Otherwise, must match whatever's currently scheduled */ 328 if (smmu_pmu_check_global_filter(smmu_pmu->events[cur_idx], event)) { 329 smmu_pmu_set_evtyper(smmu_pmu, idx, get_event(event)); 330 return 0; 331 } 332 333 return -EAGAIN; 334} 335 336static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu, 337 struct perf_event *event) 338{ 339 int idx, err; 340 unsigned int num_ctrs = smmu_pmu->num_counters; 341 342 idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs); 343 if (idx == num_ctrs) 344 /* The counters are all in use. */ 345 return -EAGAIN; 346 347 err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx); 348 if (err) 349 return err; 350 351 set_bit(idx, smmu_pmu->used_counters); 352 353 return idx; 354} 355 356static bool smmu_pmu_events_compatible(struct perf_event *curr, 357 struct perf_event *new) 358{ 359 if (new->pmu != curr->pmu) 360 return false; 361 362 if (to_smmu_pmu(new->pmu)->global_filter && 363 !smmu_pmu_check_global_filter(curr, new)) 364 return false; 365 366 return true; 367} 368 369/* 370 * Implementation of abstract pmu functionality required by 371 * the core perf events code. 372 */ 373 374static int smmu_pmu_event_init(struct perf_event *event) 375{ 376 struct hw_perf_event *hwc = &event->hw; 377 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 378 struct device *dev = smmu_pmu->dev; 379 struct perf_event *sibling; 380 int group_num_events = 1; 381 u16 event_id; 382 383 if (event->attr.type != event->pmu->type) 384 return -ENOENT; 385 386 if (hwc->sample_period) { 387 dev_dbg(dev, "Sampling not supported\n"); 388 return -EOPNOTSUPP; 389 } 390 391 if (event->cpu < 0) { 392 dev_dbg(dev, "Per-task mode not supported\n"); 393 return -EOPNOTSUPP; 394 } 395 396 /* Verify specified event is supported on this PMU */ 397 event_id = get_event(event); 398 if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS && 399 (!test_bit(event_id, smmu_pmu->supported_events))) { 400 dev_dbg(dev, "Invalid event %d for this PMU\n", event_id); 401 return -EINVAL; 402 } 403 404 /* Don't allow groups with mixed PMUs, except for s/w events */ 405 if (!is_software_event(event->group_leader)) { 406 if (!smmu_pmu_events_compatible(event->group_leader, event)) 407 return -EINVAL; 408 409 if (++group_num_events > smmu_pmu->num_counters) 410 return -EINVAL; 411 } 412 413 for_each_sibling_event(sibling, event->group_leader) { 414 if (is_software_event(sibling)) 415 continue; 416 417 if (!smmu_pmu_events_compatible(sibling, event)) 418 return -EINVAL; 419 420 if (++group_num_events > smmu_pmu->num_counters) 421 return -EINVAL; 422 } 423 424 hwc->idx = -1; 425 426 /* 427 * Ensure all events are on the same cpu so all events are in the 428 * same cpu context, to avoid races on pmu_enable etc. 429 */ 430 event->cpu = smmu_pmu->on_cpu; 431 432 return 0; 433} 434 435static void smmu_pmu_event_start(struct perf_event *event, int flags) 436{ 437 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 438 struct hw_perf_event *hwc = &event->hw; 439 int idx = hwc->idx; 440 441 hwc->state = 0; 442 443 smmu_pmu_set_period(smmu_pmu, hwc); 444 445 smmu_pmu_counter_enable(smmu_pmu, idx); 446} 447 448static void smmu_pmu_event_stop(struct perf_event *event, int flags) 449{ 450 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 451 struct hw_perf_event *hwc = &event->hw; 452 int idx = hwc->idx; 453 454 if (hwc->state & PERF_HES_STOPPED) 455 return; 456 457 smmu_pmu_counter_disable(smmu_pmu, idx); 458 /* As the counter gets updated on _start, ignore PERF_EF_UPDATE */ 459 smmu_pmu_event_update(event); 460 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 461} 462 463static int smmu_pmu_event_add(struct perf_event *event, int flags) 464{ 465 struct hw_perf_event *hwc = &event->hw; 466 int idx; 467 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 468 469 idx = smmu_pmu_get_event_idx(smmu_pmu, event); 470 if (idx < 0) 471 return idx; 472 473 hwc->idx = idx; 474 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 475 smmu_pmu->events[idx] = event; 476 local64_set(&hwc->prev_count, 0); 477 478 smmu_pmu_interrupt_enable(smmu_pmu, idx); 479 480 if (flags & PERF_EF_START) 481 smmu_pmu_event_start(event, flags); 482 483 /* Propagate changes to the userspace mapping. */ 484 perf_event_update_userpage(event); 485 486 return 0; 487} 488 489static void smmu_pmu_event_del(struct perf_event *event, int flags) 490{ 491 struct hw_perf_event *hwc = &event->hw; 492 struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); 493 int idx = hwc->idx; 494 495 smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE); 496 smmu_pmu_interrupt_disable(smmu_pmu, idx); 497 smmu_pmu->events[idx] = NULL; 498 clear_bit(idx, smmu_pmu->used_counters); 499 500 perf_event_update_userpage(event); 501} 502 503static void smmu_pmu_event_read(struct perf_event *event) 504{ 505 smmu_pmu_event_update(event); 506} 507 508/* cpumask */ 509 510static ssize_t smmu_pmu_cpumask_show(struct device *dev, 511 struct device_attribute *attr, 512 char *buf) 513{ 514 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); 515 516 return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu)); 517} 518 519static struct device_attribute smmu_pmu_cpumask_attr = 520 __ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL); 521 522static struct attribute *smmu_pmu_cpumask_attrs[] = { 523 &smmu_pmu_cpumask_attr.attr, 524 NULL 525}; 526 527static struct attribute_group smmu_pmu_cpumask_group = { 528 .attrs = smmu_pmu_cpumask_attrs, 529}; 530 531/* Events */ 532 533static ssize_t smmu_pmu_event_show(struct device *dev, 534 struct device_attribute *attr, char *page) 535{ 536 struct perf_pmu_events_attr *pmu_attr; 537 538 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); 539 540 return sprintf(page, "event=0x%02llx\n", pmu_attr->id); 541} 542 543#define SMMU_EVENT_ATTR(name, config) \ 544 PMU_EVENT_ATTR(name, smmu_event_attr_##name, \ 545 config, smmu_pmu_event_show) 546SMMU_EVENT_ATTR(cycles, 0); 547SMMU_EVENT_ATTR(transaction, 1); 548SMMU_EVENT_ATTR(tlb_miss, 2); 549SMMU_EVENT_ATTR(config_cache_miss, 3); 550SMMU_EVENT_ATTR(trans_table_walk_access, 4); 551SMMU_EVENT_ATTR(config_struct_access, 5); 552SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6); 553SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7); 554 555static struct attribute *smmu_pmu_events[] = { 556 &smmu_event_attr_cycles.attr.attr, 557 &smmu_event_attr_transaction.attr.attr, 558 &smmu_event_attr_tlb_miss.attr.attr, 559 &smmu_event_attr_config_cache_miss.attr.attr, 560 &smmu_event_attr_trans_table_walk_access.attr.attr, 561 &smmu_event_attr_config_struct_access.attr.attr, 562 &smmu_event_attr_pcie_ats_trans_rq.attr.attr, 563 &smmu_event_attr_pcie_ats_trans_passed.attr.attr, 564 NULL 565}; 566 567static umode_t smmu_pmu_event_is_visible(struct kobject *kobj, 568 struct attribute *attr, int unused) 569{ 570 struct device *dev = kobj_to_dev(kobj); 571 struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); 572 struct perf_pmu_events_attr *pmu_attr; 573 574 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); 575 576 if (test_bit(pmu_attr->id, smmu_pmu->supported_events)) 577 return attr->mode; 578 579 return 0; 580} 581 582static struct attribute_group smmu_pmu_events_group = { 583 .name = "events", 584 .attrs = smmu_pmu_events, 585 .is_visible = smmu_pmu_event_is_visible, 586}; 587 588/* Formats */ 589PMU_FORMAT_ATTR(event, "config:0-15"); 590PMU_FORMAT_ATTR(filter_stream_id, "config1:0-31"); 591PMU_FORMAT_ATTR(filter_span, "config1:32"); 592PMU_FORMAT_ATTR(filter_enable, "config1:33"); 593 594static struct attribute *smmu_pmu_formats[] = { 595 &format_attr_event.attr, 596 &format_attr_filter_stream_id.attr, 597 &format_attr_filter_span.attr, 598 &format_attr_filter_enable.attr, 599 NULL 600}; 601 602static struct attribute_group smmu_pmu_format_group = { 603 .name = "format", 604 .attrs = smmu_pmu_formats, 605}; 606 607static const struct attribute_group *smmu_pmu_attr_grps[] = { 608 &smmu_pmu_cpumask_group, 609 &smmu_pmu_events_group, 610 &smmu_pmu_format_group, 611 NULL 612}; 613 614/* 615 * Generic device handlers 616 */ 617 618static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) 619{ 620 struct smmu_pmu *smmu_pmu; 621 unsigned int target; 622 623 smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node); 624 if (cpu != smmu_pmu->on_cpu) 625 return 0; 626 627 target = cpumask_any_but(cpu_online_mask, cpu); 628 if (target >= nr_cpu_ids) 629 return 0; 630 631 perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target); 632 smmu_pmu->on_cpu = target; 633 WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target))); 634 635 return 0; 636} 637 638static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) 639{ 640 struct smmu_pmu *smmu_pmu = data; 641 u64 ovsr; 642 unsigned int idx; 643 644 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0); 645 if (!ovsr) 646 return IRQ_NONE; 647 648 writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); 649 650 for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) { 651 struct perf_event *event = smmu_pmu->events[idx]; 652 struct hw_perf_event *hwc; 653 654 if (WARN_ON_ONCE(!event)) 655 continue; 656 657 smmu_pmu_event_update(event); 658 hwc = &event->hw; 659 660 smmu_pmu_set_period(smmu_pmu, hwc); 661 } 662 663 return IRQ_HANDLED; 664} 665 666static void smmu_pmu_free_msis(void *data) 667{ 668 struct device *dev = data; 669 670 platform_msi_domain_free_irqs(dev); 671} 672 673static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 674{ 675 phys_addr_t doorbell; 676 struct device *dev = msi_desc_to_dev(desc); 677 struct smmu_pmu *pmu = dev_get_drvdata(dev); 678 679 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 680 doorbell &= MSI_CFG0_ADDR_MASK; 681 682 writeq_relaxed(doorbell, pmu->reg_base + SMMU_PMCG_IRQ_CFG0); 683 writel_relaxed(msg->data, pmu->reg_base + SMMU_PMCG_IRQ_CFG1); 684 writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, 685 pmu->reg_base + SMMU_PMCG_IRQ_CFG2); 686} 687 688static void smmu_pmu_setup_msi(struct smmu_pmu *pmu) 689{ 690 struct msi_desc *desc; 691 struct device *dev = pmu->dev; 692 int ret; 693 694 /* Clear MSI address reg */ 695 writeq_relaxed(0, pmu->reg_base + SMMU_PMCG_IRQ_CFG0); 696 697 /* MSI supported or not */ 698 if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI)) 699 return; 700 701 ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); 702 if (ret) { 703 dev_warn(dev, "failed to allocate MSIs\n"); 704 return; 705 } 706 707 desc = first_msi_entry(dev); 708 if (desc) 709 pmu->irq = desc->irq; 710 711 /* Add callback to free MSIs on teardown */ 712 devm_add_action(dev, smmu_pmu_free_msis, dev); 713} 714 715static int smmu_pmu_setup_irq(struct smmu_pmu *pmu) 716{ 717 unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD; 718 int irq, ret = -ENXIO; 719 720 smmu_pmu_setup_msi(pmu); 721 722 irq = pmu->irq; 723 if (irq) 724 ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq, 725 flags, "smmuv3-pmu", pmu); 726 return ret; 727} 728 729static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu) 730{ 731 u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0); 732 733 smmu_pmu_disable(&smmu_pmu->pmu); 734 735 /* Disable counter and interrupt */ 736 writeq_relaxed(counter_present_mask, 737 smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0); 738 writeq_relaxed(counter_present_mask, 739 smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0); 740 writeq_relaxed(counter_present_mask, 741 smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); 742} 743 744static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) 745{ 746 u32 model; 747 748 model = *(u32 *)dev_get_platdata(smmu_pmu->dev); 749 750 switch (model) { 751 case IORT_SMMU_V3_PMCG_HISI_HIP08: 752 /* HiSilicon Erratum 162001800 */ 753 smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE; 754 break; 755 case IORT_SMMU_V3_PMCG_HISI_HIP09: 756 smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE; 757 break; 758 } 759 760 dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options); 761} 762 763static int smmu_pmu_probe(struct platform_device *pdev) 764{ 765 struct smmu_pmu *smmu_pmu; 766 struct resource *res_0; 767 u32 cfgr, reg_size; 768 u64 ceid_64[2]; 769 int irq, err; 770 char *name; 771 struct device *dev = &pdev->dev; 772 773 smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL); 774 if (!smmu_pmu) 775 return -ENOMEM; 776 777 smmu_pmu->dev = dev; 778 platform_set_drvdata(pdev, smmu_pmu); 779 780 smmu_pmu->pmu = (struct pmu) { 781 .module = THIS_MODULE, 782 .task_ctx_nr = perf_invalid_context, 783 .pmu_enable = smmu_pmu_enable, 784 .pmu_disable = smmu_pmu_disable, 785 .event_init = smmu_pmu_event_init, 786 .add = smmu_pmu_event_add, 787 .del = smmu_pmu_event_del, 788 .start = smmu_pmu_event_start, 789 .stop = smmu_pmu_event_stop, 790 .read = smmu_pmu_event_read, 791 .attr_groups = smmu_pmu_attr_grps, 792 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 793 }; 794 795 smmu_pmu->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res_0); 796 if (IS_ERR(smmu_pmu->reg_base)) 797 return PTR_ERR(smmu_pmu->reg_base); 798 799 cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR); 800 801 /* Determine if page 1 is present */ 802 if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) { 803 smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1); 804 if (IS_ERR(smmu_pmu->reloc_base)) 805 return PTR_ERR(smmu_pmu->reloc_base); 806 } else { 807 smmu_pmu->reloc_base = smmu_pmu->reg_base; 808 } 809 810 irq = platform_get_irq_optional(pdev, 0); 811 if (irq > 0) 812 smmu_pmu->irq = irq; 813 814 ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0); 815 ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1); 816 bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64, 817 SMMU_PMCG_ARCH_MAX_EVENTS); 818 819 smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1; 820 821 smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE); 822 823 reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr); 824 smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0); 825 826 smmu_pmu_reset(smmu_pmu); 827 828 err = smmu_pmu_setup_irq(smmu_pmu); 829 if (err) { 830 dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start); 831 return err; 832 } 833 834 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx", 835 (res_0->start) >> SMMU_PMCG_PA_SHIFT); 836 if (!name) { 837 dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start); 838 return -EINVAL; 839 } 840 841 smmu_pmu_get_acpi_options(smmu_pmu); 842 843 /* 844 * For platforms suffer this quirk, the PMU disable sometimes fails to 845 * stop the counters. This will leads to inaccurate or error counting. 846 * Forcibly disable the counters with these quirk handler. 847 */ 848 if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) { 849 smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09; 850 smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09; 851 } 852 853 /* Pick one CPU to be the preferred one to use */ 854 smmu_pmu->on_cpu = raw_smp_processor_id(); 855 WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, 856 cpumask_of(smmu_pmu->on_cpu))); 857 858 err = cpuhp_state_add_instance_nocalls(cpuhp_state_num, 859 &smmu_pmu->node); 860 if (err) { 861 dev_err(dev, "Error %d registering hotplug, PMU @%pa\n", 862 err, &res_0->start); 863 goto out_clear_affinity; 864 } 865 866 err = perf_pmu_register(&smmu_pmu->pmu, name, -1); 867 if (err) { 868 dev_err(dev, "Error %d registering PMU @%pa\n", 869 err, &res_0->start); 870 goto out_unregister; 871 } 872 873 dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n", 874 &res_0->start, smmu_pmu->num_counters, 875 smmu_pmu->global_filter ? "Global(Counter0)" : 876 "Individual"); 877 878 return 0; 879 880out_unregister: 881 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); 882out_clear_affinity: 883 irq_set_affinity_hint(smmu_pmu->irq, NULL); 884 return err; 885} 886 887static int smmu_pmu_remove(struct platform_device *pdev) 888{ 889 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); 890 891 perf_pmu_unregister(&smmu_pmu->pmu); 892 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); 893 irq_set_affinity_hint(smmu_pmu->irq, NULL); 894 895 return 0; 896} 897 898static void smmu_pmu_shutdown(struct platform_device *pdev) 899{ 900 struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); 901 902 smmu_pmu_disable(&smmu_pmu->pmu); 903} 904 905static struct platform_driver smmu_pmu_driver = { 906 .driver = { 907 .name = "arm-smmu-v3-pmcg", 908 .suppress_bind_attrs = true, 909 }, 910 .probe = smmu_pmu_probe, 911 .remove = smmu_pmu_remove, 912 .shutdown = smmu_pmu_shutdown, 913}; 914 915static int __init arm_smmu_pmu_init(void) 916{ 917 int ret; 918 919 cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 920 "perf/arm/pmcg:online", 921 NULL, 922 smmu_pmu_offline_cpu); 923 if (cpuhp_state_num < 0) 924 return cpuhp_state_num; 925 926 ret = platform_driver_register(&smmu_pmu_driver); 927 if (ret) 928 cpuhp_remove_multi_state(cpuhp_state_num); 929 930 return ret; 931} 932module_init(arm_smmu_pmu_init); 933 934static void __exit arm_smmu_pmu_exit(void) 935{ 936 platform_driver_unregister(&smmu_pmu_driver); 937 cpuhp_remove_multi_state(cpuhp_state_num); 938} 939 940module_exit(arm_smmu_pmu_exit); 941 942MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension"); 943MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>"); 944MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>"); 945MODULE_LICENSE("GPL v2"); 946