162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Performance events - AMD IBS 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * For licencing details see kernel-base/COPYING 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/perf_event.h> 1062306a36Sopenharmony_ci#include <linux/init.h> 1162306a36Sopenharmony_ci#include <linux/export.h> 1262306a36Sopenharmony_ci#include <linux/pci.h> 1362306a36Sopenharmony_ci#include <linux/ptrace.h> 1462306a36Sopenharmony_ci#include <linux/syscore_ops.h> 1562306a36Sopenharmony_ci#include <linux/sched/clock.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include <asm/apic.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include "../perf_event.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistatic u32 ibs_caps; 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include <linux/kprobes.h> 2662306a36Sopenharmony_ci#include <linux/hardirq.h> 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include <asm/nmi.h> 2962306a36Sopenharmony_ci#include <asm/amd-ibs.h> 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) 3262306a36Sopenharmony_ci#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * IBS states: 3762306a36Sopenharmony_ci * 3862306a36Sopenharmony_ci * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken 3962306a36Sopenharmony_ci * and any further add()s must fail. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are 4262306a36Sopenharmony_ci * complicated by the fact that the IBS hardware can send late NMIs (ie. after 4362306a36Sopenharmony_ci * we've cleared the EN bit). 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * In order to consume these late NMIs we have the STOPPED state, any NMI that 4662306a36Sopenharmony_ci * happens after we've cleared the EN state will clear this bit and report the 4762306a36Sopenharmony_ci * NMI handled (this is fundamentally racy in the face or multiple NMI sources, 4862306a36Sopenharmony_ci * someone else can consume our BIT and our NMI will go unhandled). 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * And since we cannot set/clear this separate bit together with the EN bit, 5162306a36Sopenharmony_ci * there are races; if we cleared STARTED early, an NMI could land in 5262306a36Sopenharmony_ci * between clearing STARTED and clearing the EN bit (in fact multiple NMIs 5362306a36Sopenharmony_ci * could happen if the period is small enough), and consume our STOPPED bit 5462306a36Sopenharmony_ci * and trigger streams of unhandled NMIs. 5562306a36Sopenharmony_ci * 5662306a36Sopenharmony_ci * If, however, we clear STARTED late, an NMI can hit between clearing the 5762306a36Sopenharmony_ci * EN bit and clearing STARTED, still see STARTED set and process the event. 5862306a36Sopenharmony_ci * If this event will have the VALID bit clear, we bail properly, but this 5962306a36Sopenharmony_ci * is not a given. With VALID set we can end up calling pmu::stop() again 6062306a36Sopenharmony_ci * (the throttle logic) and trigger the WARNs in there. 6162306a36Sopenharmony_ci * 6262306a36Sopenharmony_ci * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() 6362306a36Sopenharmony_ci * nesting, and clear STARTED late, so that we have a well defined state over 6462306a36Sopenharmony_ci * the clearing of the EN bit. 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * XXX: we could probably be using !atomic bitops for all this. 6762306a36Sopenharmony_ci */ 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cienum ibs_states { 7062306a36Sopenharmony_ci IBS_ENABLED = 0, 7162306a36Sopenharmony_ci IBS_STARTED = 1, 7262306a36Sopenharmony_ci IBS_STOPPING = 2, 7362306a36Sopenharmony_ci IBS_STOPPED = 3, 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci IBS_MAX_STATES, 7662306a36Sopenharmony_ci}; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistruct cpu_perf_ibs { 7962306a36Sopenharmony_ci struct perf_event *event; 8062306a36Sopenharmony_ci unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; 8162306a36Sopenharmony_ci}; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_cistruct perf_ibs { 8462306a36Sopenharmony_ci struct pmu pmu; 8562306a36Sopenharmony_ci unsigned int msr; 8662306a36Sopenharmony_ci u64 config_mask; 8762306a36Sopenharmony_ci u64 cnt_mask; 8862306a36Sopenharmony_ci u64 enable_mask; 8962306a36Sopenharmony_ci u64 valid_mask; 9062306a36Sopenharmony_ci u64 max_period; 9162306a36Sopenharmony_ci unsigned long offset_mask[1]; 9262306a36Sopenharmony_ci int offset_max; 9362306a36Sopenharmony_ci unsigned int fetch_count_reset_broken : 1; 9462306a36Sopenharmony_ci unsigned int fetch_ignore_if_zero_rip : 1; 9562306a36Sopenharmony_ci struct cpu_perf_ibs __percpu *pcpu; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci u64 (*get_count)(u64 config); 9862306a36Sopenharmony_ci}; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic int 10162306a36Sopenharmony_ciperf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci s64 left = local64_read(&hwc->period_left); 10462306a36Sopenharmony_ci s64 period = hwc->sample_period; 10562306a36Sopenharmony_ci int overflow = 0; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci /* 10862306a36Sopenharmony_ci * If we are way outside a reasonable range then just skip forward: 10962306a36Sopenharmony_ci */ 11062306a36Sopenharmony_ci if (unlikely(left <= -period)) { 11162306a36Sopenharmony_ci left = period; 11262306a36Sopenharmony_ci local64_set(&hwc->period_left, left); 11362306a36Sopenharmony_ci hwc->last_period = period; 11462306a36Sopenharmony_ci overflow = 1; 11562306a36Sopenharmony_ci } 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci if (unlikely(left < (s64)min)) { 11862306a36Sopenharmony_ci left += period; 11962306a36Sopenharmony_ci local64_set(&hwc->period_left, left); 12062306a36Sopenharmony_ci hwc->last_period = period; 12162306a36Sopenharmony_ci overflow = 1; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* 12562306a36Sopenharmony_ci * If the hw period that triggers the sw overflow is too short 12662306a36Sopenharmony_ci * we might hit the irq handler. This biases the results. 12762306a36Sopenharmony_ci * Thus we shorten the next-to-last period and set the last 12862306a36Sopenharmony_ci * period to the max period. 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ci if (left > max) { 13162306a36Sopenharmony_ci left -= max; 13262306a36Sopenharmony_ci if (left > max) 13362306a36Sopenharmony_ci left = max; 13462306a36Sopenharmony_ci else if (left < min) 13562306a36Sopenharmony_ci left = min; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci *hw_period = (u64)left; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci return overflow; 14162306a36Sopenharmony_ci} 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_cistatic int 14462306a36Sopenharmony_ciperf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 14762306a36Sopenharmony_ci int shift = 64 - width; 14862306a36Sopenharmony_ci u64 prev_raw_count; 14962306a36Sopenharmony_ci u64 delta; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci /* 15262306a36Sopenharmony_ci * Careful: an NMI might modify the previous event value. 15362306a36Sopenharmony_ci * 15462306a36Sopenharmony_ci * Our tactic to handle this is to first atomically read and 15562306a36Sopenharmony_ci * exchange a new raw count - then add that new-prev delta 15662306a36Sopenharmony_ci * count to the generic event atomically: 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_ci prev_raw_count = local64_read(&hwc->prev_count); 15962306a36Sopenharmony_ci if (!local64_try_cmpxchg(&hwc->prev_count, 16062306a36Sopenharmony_ci &prev_raw_count, new_raw_count)) 16162306a36Sopenharmony_ci return 0; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci /* 16462306a36Sopenharmony_ci * Now we have the new raw value and have updated the prev 16562306a36Sopenharmony_ci * timestamp already. We can now calculate the elapsed delta 16662306a36Sopenharmony_ci * (event-)time and add that to the generic event. 16762306a36Sopenharmony_ci * 16862306a36Sopenharmony_ci * Careful, not all hw sign-extends above the physical width 16962306a36Sopenharmony_ci * of the count. 17062306a36Sopenharmony_ci */ 17162306a36Sopenharmony_ci delta = (new_raw_count << shift) - (prev_raw_count << shift); 17262306a36Sopenharmony_ci delta >>= shift; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci local64_add(delta, &event->count); 17562306a36Sopenharmony_ci local64_sub(delta, &hwc->period_left); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci return 1; 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic struct perf_ibs perf_ibs_fetch; 18162306a36Sopenharmony_cistatic struct perf_ibs perf_ibs_op; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_cistatic struct perf_ibs *get_ibs_pmu(int type) 18462306a36Sopenharmony_ci{ 18562306a36Sopenharmony_ci if (perf_ibs_fetch.pmu.type == type) 18662306a36Sopenharmony_ci return &perf_ibs_fetch; 18762306a36Sopenharmony_ci if (perf_ibs_op.pmu.type == type) 18862306a36Sopenharmony_ci return &perf_ibs_op; 18962306a36Sopenharmony_ci return NULL; 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci/* 19362306a36Sopenharmony_ci * core pmu config -> IBS config 19462306a36Sopenharmony_ci * 19562306a36Sopenharmony_ci * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count 19662306a36Sopenharmony_ci * perf record -a -e r076:p ... # same as -e cpu-cycles:p 19762306a36Sopenharmony_ci * perf record -a -e r0C1:p ... # use ibs op counting micro-ops 19862306a36Sopenharmony_ci * 19962306a36Sopenharmony_ci * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, 20062306a36Sopenharmony_ci * MSRC001_1033) is used to select either cycle or micro-ops counting 20162306a36Sopenharmony_ci * mode. 20262306a36Sopenharmony_ci */ 20362306a36Sopenharmony_cistatic int core_pmu_ibs_config(struct perf_event *event, u64 *config) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci switch (event->attr.type) { 20662306a36Sopenharmony_ci case PERF_TYPE_HARDWARE: 20762306a36Sopenharmony_ci switch (event->attr.config) { 20862306a36Sopenharmony_ci case PERF_COUNT_HW_CPU_CYCLES: 20962306a36Sopenharmony_ci *config = 0; 21062306a36Sopenharmony_ci return 0; 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci break; 21362306a36Sopenharmony_ci case PERF_TYPE_RAW: 21462306a36Sopenharmony_ci switch (event->attr.config) { 21562306a36Sopenharmony_ci case 0x0076: 21662306a36Sopenharmony_ci *config = 0; 21762306a36Sopenharmony_ci return 0; 21862306a36Sopenharmony_ci case 0x00C1: 21962306a36Sopenharmony_ci *config = IBS_OP_CNT_CTL; 22062306a36Sopenharmony_ci return 0; 22162306a36Sopenharmony_ci } 22262306a36Sopenharmony_ci break; 22362306a36Sopenharmony_ci default: 22462306a36Sopenharmony_ci return -ENOENT; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci return -EOPNOTSUPP; 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci/* 23162306a36Sopenharmony_ci * The rip of IBS samples has skid 0. Thus, IBS supports precise 23262306a36Sopenharmony_ci * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the 23362306a36Sopenharmony_ci * rip is invalid when IBS was not able to record the rip correctly. 23462306a36Sopenharmony_ci * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ciint forward_event_to_ibs(struct perf_event *event) 23762306a36Sopenharmony_ci{ 23862306a36Sopenharmony_ci u64 config = 0; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci if (!event->attr.precise_ip || event->attr.precise_ip > 2) 24162306a36Sopenharmony_ci return -EOPNOTSUPP; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci if (!core_pmu_ibs_config(event, &config)) { 24462306a36Sopenharmony_ci event->attr.type = perf_ibs_op.pmu.type; 24562306a36Sopenharmony_ci event->attr.config = config; 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci return -ENOENT; 24862306a36Sopenharmony_ci} 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci/* 25162306a36Sopenharmony_ci * Grouping of IBS events is not possible since IBS can have only 25262306a36Sopenharmony_ci * one event active at any point in time. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_cistatic int validate_group(struct perf_event *event) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci struct perf_event *sibling; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci if (event->group_leader == event) 25962306a36Sopenharmony_ci return 0; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci if (event->group_leader->pmu == event->pmu) 26262306a36Sopenharmony_ci return -EINVAL; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci for_each_sibling_event(sibling, event->group_leader) { 26562306a36Sopenharmony_ci if (sibling->pmu == event->pmu) 26662306a36Sopenharmony_ci return -EINVAL; 26762306a36Sopenharmony_ci } 26862306a36Sopenharmony_ci return 0; 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_cistatic int perf_ibs_init(struct perf_event *event) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 27462306a36Sopenharmony_ci struct perf_ibs *perf_ibs; 27562306a36Sopenharmony_ci u64 max_cnt, config; 27662306a36Sopenharmony_ci int ret; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci perf_ibs = get_ibs_pmu(event->attr.type); 27962306a36Sopenharmony_ci if (!perf_ibs) 28062306a36Sopenharmony_ci return -ENOENT; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci config = event->attr.config; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci if (event->pmu != &perf_ibs->pmu) 28562306a36Sopenharmony_ci return -ENOENT; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci if (config & ~perf_ibs->config_mask) 28862306a36Sopenharmony_ci return -EINVAL; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci ret = validate_group(event); 29162306a36Sopenharmony_ci if (ret) 29262306a36Sopenharmony_ci return ret; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci if (hwc->sample_period) { 29562306a36Sopenharmony_ci if (config & perf_ibs->cnt_mask) 29662306a36Sopenharmony_ci /* raw max_cnt may not be set */ 29762306a36Sopenharmony_ci return -EINVAL; 29862306a36Sopenharmony_ci if (!event->attr.sample_freq && hwc->sample_period & 0x0f) 29962306a36Sopenharmony_ci /* 30062306a36Sopenharmony_ci * lower 4 bits can not be set in ibs max cnt, 30162306a36Sopenharmony_ci * but allowing it in case we adjust the 30262306a36Sopenharmony_ci * sample period to set a frequency. 30362306a36Sopenharmony_ci */ 30462306a36Sopenharmony_ci return -EINVAL; 30562306a36Sopenharmony_ci hwc->sample_period &= ~0x0FULL; 30662306a36Sopenharmony_ci if (!hwc->sample_period) 30762306a36Sopenharmony_ci hwc->sample_period = 0x10; 30862306a36Sopenharmony_ci } else { 30962306a36Sopenharmony_ci max_cnt = config & perf_ibs->cnt_mask; 31062306a36Sopenharmony_ci config &= ~perf_ibs->cnt_mask; 31162306a36Sopenharmony_ci event->attr.sample_period = max_cnt << 4; 31262306a36Sopenharmony_ci hwc->sample_period = event->attr.sample_period; 31362306a36Sopenharmony_ci } 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci if (!hwc->sample_period) 31662306a36Sopenharmony_ci return -EINVAL; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* 31962306a36Sopenharmony_ci * If we modify hwc->sample_period, we also need to update 32062306a36Sopenharmony_ci * hwc->last_period and hwc->period_left. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ci hwc->last_period = hwc->sample_period; 32362306a36Sopenharmony_ci local64_set(&hwc->period_left, hwc->sample_period); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci hwc->config_base = perf_ibs->msr; 32662306a36Sopenharmony_ci hwc->config = config; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci return 0; 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cistatic int perf_ibs_set_period(struct perf_ibs *perf_ibs, 33262306a36Sopenharmony_ci struct hw_perf_event *hwc, u64 *period) 33362306a36Sopenharmony_ci{ 33462306a36Sopenharmony_ci int overflow; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci /* ignore lower 4 bits in min count: */ 33762306a36Sopenharmony_ci overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); 33862306a36Sopenharmony_ci local64_set(&hwc->prev_count, 0); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci return overflow; 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic u64 get_ibs_fetch_count(u64 config) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci return fetch_ctl.fetch_cnt << 4; 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_cistatic u64 get_ibs_op_count(u64 config) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; 35362306a36Sopenharmony_ci u64 count = 0; 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci /* 35662306a36Sopenharmony_ci * If the internal 27-bit counter rolled over, the count is MaxCnt 35762306a36Sopenharmony_ci * and the lower 7 bits of CurCnt are randomized. 35862306a36Sopenharmony_ci * Otherwise CurCnt has the full 27-bit current counter value. 35962306a36Sopenharmony_ci */ 36062306a36Sopenharmony_ci if (op_ctl.op_val) { 36162306a36Sopenharmony_ci count = op_ctl.opmaxcnt << 4; 36262306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_OPCNTEXT) 36362306a36Sopenharmony_ci count += op_ctl.opmaxcnt_ext << 20; 36462306a36Sopenharmony_ci } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { 36562306a36Sopenharmony_ci count = op_ctl.opcurcnt; 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci return count; 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_cistatic void 37262306a36Sopenharmony_ciperf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, 37362306a36Sopenharmony_ci u64 *config) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci u64 count = perf_ibs->get_count(*config); 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci /* 37862306a36Sopenharmony_ci * Set width to 64 since we do not overflow on max width but 37962306a36Sopenharmony_ci * instead on max count. In perf_ibs_set_period() we clear 38062306a36Sopenharmony_ci * prev count manually on overflow. 38162306a36Sopenharmony_ci */ 38262306a36Sopenharmony_ci while (!perf_event_try_update(event, count, 64)) { 38362306a36Sopenharmony_ci rdmsrl(event->hw.config_base, *config); 38462306a36Sopenharmony_ci count = perf_ibs->get_count(*config); 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_cistatic inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, 38962306a36Sopenharmony_ci struct hw_perf_event *hwc, u64 config) 39062306a36Sopenharmony_ci{ 39162306a36Sopenharmony_ci u64 tmp = hwc->config | config; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci if (perf_ibs->fetch_count_reset_broken) 39462306a36Sopenharmony_ci wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci/* 40062306a36Sopenharmony_ci * Erratum #420 Instruction-Based Sampling Engine May Generate 40162306a36Sopenharmony_ci * Interrupt that Cannot Be Cleared: 40262306a36Sopenharmony_ci * 40362306a36Sopenharmony_ci * Must clear counter mask first, then clear the enable bit. See 40462306a36Sopenharmony_ci * Revision Guide for AMD Family 10h Processors, Publication #41322. 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_cistatic inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, 40762306a36Sopenharmony_ci struct hw_perf_event *hwc, u64 config) 40862306a36Sopenharmony_ci{ 40962306a36Sopenharmony_ci config &= ~perf_ibs->cnt_mask; 41062306a36Sopenharmony_ci if (boot_cpu_data.x86 == 0x10) 41162306a36Sopenharmony_ci wrmsrl(hwc->config_base, config); 41262306a36Sopenharmony_ci config &= ~perf_ibs->enable_mask; 41362306a36Sopenharmony_ci wrmsrl(hwc->config_base, config); 41462306a36Sopenharmony_ci} 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci/* 41762306a36Sopenharmony_ci * We cannot restore the ibs pmu state, so we always needs to update 41862306a36Sopenharmony_ci * the event while stopping it and then reset the state when starting 41962306a36Sopenharmony_ci * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in 42062306a36Sopenharmony_ci * perf_ibs_start()/perf_ibs_stop() and instead always do it. 42162306a36Sopenharmony_ci */ 42262306a36Sopenharmony_cistatic void perf_ibs_start(struct perf_event *event, int flags) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 42562306a36Sopenharmony_ci struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 42662306a36Sopenharmony_ci struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 42762306a36Sopenharmony_ci u64 period, config = 0; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 43062306a36Sopenharmony_ci return; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 43362306a36Sopenharmony_ci hwc->state = 0; 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci perf_ibs_set_period(perf_ibs, hwc, &period); 43662306a36Sopenharmony_ci if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { 43762306a36Sopenharmony_ci config |= period & IBS_OP_MAX_CNT_EXT_MASK; 43862306a36Sopenharmony_ci period &= ~IBS_OP_MAX_CNT_EXT_MASK; 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci config |= period >> 4; 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci /* 44362306a36Sopenharmony_ci * Set STARTED before enabling the hardware, such that a subsequent NMI 44462306a36Sopenharmony_ci * must observe it. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_ci set_bit(IBS_STARTED, pcpu->state); 44762306a36Sopenharmony_ci clear_bit(IBS_STOPPING, pcpu->state); 44862306a36Sopenharmony_ci perf_ibs_enable_event(perf_ibs, hwc, config); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci perf_event_update_userpage(event); 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_cistatic void perf_ibs_stop(struct perf_event *event, int flags) 45462306a36Sopenharmony_ci{ 45562306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 45662306a36Sopenharmony_ci struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 45762306a36Sopenharmony_ci struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 45862306a36Sopenharmony_ci u64 config; 45962306a36Sopenharmony_ci int stopping; 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci if (test_and_set_bit(IBS_STOPPING, pcpu->state)) 46262306a36Sopenharmony_ci return; 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci stopping = test_bit(IBS_STARTED, pcpu->state); 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci if (!stopping && (hwc->state & PERF_HES_UPTODATE)) 46762306a36Sopenharmony_ci return; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci rdmsrl(hwc->config_base, config); 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci if (stopping) { 47262306a36Sopenharmony_ci /* 47362306a36Sopenharmony_ci * Set STOPPED before disabling the hardware, such that it 47462306a36Sopenharmony_ci * must be visible to NMIs the moment we clear the EN bit, 47562306a36Sopenharmony_ci * at which point we can generate an !VALID sample which 47662306a36Sopenharmony_ci * we need to consume. 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_ci set_bit(IBS_STOPPED, pcpu->state); 47962306a36Sopenharmony_ci perf_ibs_disable_event(perf_ibs, hwc, config); 48062306a36Sopenharmony_ci /* 48162306a36Sopenharmony_ci * Clear STARTED after disabling the hardware; if it were 48262306a36Sopenharmony_ci * cleared before an NMI hitting after the clear but before 48362306a36Sopenharmony_ci * clearing the EN bit might think it a spurious NMI and not 48462306a36Sopenharmony_ci * handle it. 48562306a36Sopenharmony_ci * 48662306a36Sopenharmony_ci * Clearing it after, however, creates the problem of the NMI 48762306a36Sopenharmony_ci * handler seeing STARTED but not having a valid sample. 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci clear_bit(IBS_STARTED, pcpu->state); 49062306a36Sopenharmony_ci WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 49162306a36Sopenharmony_ci hwc->state |= PERF_HES_STOPPED; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci if (hwc->state & PERF_HES_UPTODATE) 49562306a36Sopenharmony_ci return; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci /* 49862306a36Sopenharmony_ci * Clear valid bit to not count rollovers on update, rollovers 49962306a36Sopenharmony_ci * are only updated in the irq handler. 50062306a36Sopenharmony_ci */ 50162306a36Sopenharmony_ci config &= ~perf_ibs->valid_mask; 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci perf_ibs_event_update(perf_ibs, event, &config); 50462306a36Sopenharmony_ci hwc->state |= PERF_HES_UPTODATE; 50562306a36Sopenharmony_ci} 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_cistatic int perf_ibs_add(struct perf_event *event, int flags) 50862306a36Sopenharmony_ci{ 50962306a36Sopenharmony_ci struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 51062306a36Sopenharmony_ci struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci if (test_and_set_bit(IBS_ENABLED, pcpu->state)) 51362306a36Sopenharmony_ci return -ENOSPC; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci pcpu->event = event; 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (flags & PERF_EF_START) 52062306a36Sopenharmony_ci perf_ibs_start(event, PERF_EF_RELOAD); 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci return 0; 52362306a36Sopenharmony_ci} 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_cistatic void perf_ibs_del(struct perf_event *event, int flags) 52662306a36Sopenharmony_ci{ 52762306a36Sopenharmony_ci struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 52862306a36Sopenharmony_ci struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) 53162306a36Sopenharmony_ci return; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci perf_ibs_stop(event, PERF_EF_UPDATE); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci pcpu->event = NULL; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci perf_event_update_userpage(event); 53862306a36Sopenharmony_ci} 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_cistatic void perf_ibs_read(struct perf_event *event) { } 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci/* 54362306a36Sopenharmony_ci * We need to initialize with empty group if all attributes in the 54462306a36Sopenharmony_ci * group are dynamic. 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_cistatic struct attribute *attrs_empty[] = { 54762306a36Sopenharmony_ci NULL, 54862306a36Sopenharmony_ci}; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_cistatic struct attribute_group empty_format_group = { 55162306a36Sopenharmony_ci .name = "format", 55262306a36Sopenharmony_ci .attrs = attrs_empty, 55362306a36Sopenharmony_ci}; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_cistatic struct attribute_group empty_caps_group = { 55662306a36Sopenharmony_ci .name = "caps", 55762306a36Sopenharmony_ci .attrs = attrs_empty, 55862306a36Sopenharmony_ci}; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_cistatic const struct attribute_group *empty_attr_groups[] = { 56162306a36Sopenharmony_ci &empty_format_group, 56262306a36Sopenharmony_ci &empty_caps_group, 56362306a36Sopenharmony_ci NULL, 56462306a36Sopenharmony_ci}; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ciPMU_FORMAT_ATTR(rand_en, "config:57"); 56762306a36Sopenharmony_ciPMU_FORMAT_ATTR(cnt_ctl, "config:19"); 56862306a36Sopenharmony_ciPMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); 56962306a36Sopenharmony_ciPMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); 57062306a36Sopenharmony_ciPMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_cistatic umode_t 57362306a36Sopenharmony_cizen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) 57462306a36Sopenharmony_ci{ 57562306a36Sopenharmony_ci return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_cistatic struct attribute *rand_en_attrs[] = { 57962306a36Sopenharmony_ci &format_attr_rand_en.attr, 58062306a36Sopenharmony_ci NULL, 58162306a36Sopenharmony_ci}; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_cistatic struct attribute *fetch_l3missonly_attrs[] = { 58462306a36Sopenharmony_ci &fetch_l3missonly.attr.attr, 58562306a36Sopenharmony_ci NULL, 58662306a36Sopenharmony_ci}; 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic struct attribute *zen4_ibs_extensions_attrs[] = { 58962306a36Sopenharmony_ci &zen4_ibs_extensions.attr.attr, 59062306a36Sopenharmony_ci NULL, 59162306a36Sopenharmony_ci}; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_cistatic struct attribute_group group_rand_en = { 59462306a36Sopenharmony_ci .name = "format", 59562306a36Sopenharmony_ci .attrs = rand_en_attrs, 59662306a36Sopenharmony_ci}; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_cistatic struct attribute_group group_fetch_l3missonly = { 59962306a36Sopenharmony_ci .name = "format", 60062306a36Sopenharmony_ci .attrs = fetch_l3missonly_attrs, 60162306a36Sopenharmony_ci .is_visible = zen4_ibs_extensions_is_visible, 60262306a36Sopenharmony_ci}; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_cistatic struct attribute_group group_zen4_ibs_extensions = { 60562306a36Sopenharmony_ci .name = "caps", 60662306a36Sopenharmony_ci .attrs = zen4_ibs_extensions_attrs, 60762306a36Sopenharmony_ci .is_visible = zen4_ibs_extensions_is_visible, 60862306a36Sopenharmony_ci}; 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_cistatic const struct attribute_group *fetch_attr_groups[] = { 61162306a36Sopenharmony_ci &group_rand_en, 61262306a36Sopenharmony_ci &empty_caps_group, 61362306a36Sopenharmony_ci NULL, 61462306a36Sopenharmony_ci}; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_cistatic const struct attribute_group *fetch_attr_update[] = { 61762306a36Sopenharmony_ci &group_fetch_l3missonly, 61862306a36Sopenharmony_ci &group_zen4_ibs_extensions, 61962306a36Sopenharmony_ci NULL, 62062306a36Sopenharmony_ci}; 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_cistatic umode_t 62362306a36Sopenharmony_cicnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) 62462306a36Sopenharmony_ci{ 62562306a36Sopenharmony_ci return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; 62662306a36Sopenharmony_ci} 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_cistatic struct attribute *cnt_ctl_attrs[] = { 62962306a36Sopenharmony_ci &format_attr_cnt_ctl.attr, 63062306a36Sopenharmony_ci NULL, 63162306a36Sopenharmony_ci}; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_cistatic struct attribute *op_l3missonly_attrs[] = { 63462306a36Sopenharmony_ci &op_l3missonly.attr.attr, 63562306a36Sopenharmony_ci NULL, 63662306a36Sopenharmony_ci}; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_cistatic struct attribute_group group_cnt_ctl = { 63962306a36Sopenharmony_ci .name = "format", 64062306a36Sopenharmony_ci .attrs = cnt_ctl_attrs, 64162306a36Sopenharmony_ci .is_visible = cnt_ctl_is_visible, 64262306a36Sopenharmony_ci}; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_cistatic struct attribute_group group_op_l3missonly = { 64562306a36Sopenharmony_ci .name = "format", 64662306a36Sopenharmony_ci .attrs = op_l3missonly_attrs, 64762306a36Sopenharmony_ci .is_visible = zen4_ibs_extensions_is_visible, 64862306a36Sopenharmony_ci}; 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_cistatic const struct attribute_group *op_attr_update[] = { 65162306a36Sopenharmony_ci &group_cnt_ctl, 65262306a36Sopenharmony_ci &group_op_l3missonly, 65362306a36Sopenharmony_ci &group_zen4_ibs_extensions, 65462306a36Sopenharmony_ci NULL, 65562306a36Sopenharmony_ci}; 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_cistatic struct perf_ibs perf_ibs_fetch = { 65862306a36Sopenharmony_ci .pmu = { 65962306a36Sopenharmony_ci .task_ctx_nr = perf_hw_context, 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci .event_init = perf_ibs_init, 66262306a36Sopenharmony_ci .add = perf_ibs_add, 66362306a36Sopenharmony_ci .del = perf_ibs_del, 66462306a36Sopenharmony_ci .start = perf_ibs_start, 66562306a36Sopenharmony_ci .stop = perf_ibs_stop, 66662306a36Sopenharmony_ci .read = perf_ibs_read, 66762306a36Sopenharmony_ci .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 66862306a36Sopenharmony_ci }, 66962306a36Sopenharmony_ci .msr = MSR_AMD64_IBSFETCHCTL, 67062306a36Sopenharmony_ci .config_mask = IBS_FETCH_CONFIG_MASK, 67162306a36Sopenharmony_ci .cnt_mask = IBS_FETCH_MAX_CNT, 67262306a36Sopenharmony_ci .enable_mask = IBS_FETCH_ENABLE, 67362306a36Sopenharmony_ci .valid_mask = IBS_FETCH_VAL, 67462306a36Sopenharmony_ci .max_period = IBS_FETCH_MAX_CNT << 4, 67562306a36Sopenharmony_ci .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, 67662306a36Sopenharmony_ci .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci .get_count = get_ibs_fetch_count, 67962306a36Sopenharmony_ci}; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_cistatic struct perf_ibs perf_ibs_op = { 68262306a36Sopenharmony_ci .pmu = { 68362306a36Sopenharmony_ci .task_ctx_nr = perf_hw_context, 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci .event_init = perf_ibs_init, 68662306a36Sopenharmony_ci .add = perf_ibs_add, 68762306a36Sopenharmony_ci .del = perf_ibs_del, 68862306a36Sopenharmony_ci .start = perf_ibs_start, 68962306a36Sopenharmony_ci .stop = perf_ibs_stop, 69062306a36Sopenharmony_ci .read = perf_ibs_read, 69162306a36Sopenharmony_ci .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 69262306a36Sopenharmony_ci }, 69362306a36Sopenharmony_ci .msr = MSR_AMD64_IBSOPCTL, 69462306a36Sopenharmony_ci .config_mask = IBS_OP_CONFIG_MASK, 69562306a36Sopenharmony_ci .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | 69662306a36Sopenharmony_ci IBS_OP_CUR_CNT_RAND, 69762306a36Sopenharmony_ci .enable_mask = IBS_OP_ENABLE, 69862306a36Sopenharmony_ci .valid_mask = IBS_OP_VAL, 69962306a36Sopenharmony_ci .max_period = IBS_OP_MAX_CNT << 4, 70062306a36Sopenharmony_ci .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, 70162306a36Sopenharmony_ci .offset_max = MSR_AMD64_IBSOP_REG_COUNT, 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci .get_count = get_ibs_op_count, 70462306a36Sopenharmony_ci}; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_cistatic void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, 70762306a36Sopenharmony_ci struct perf_sample_data *data) 70862306a36Sopenharmony_ci{ 70962306a36Sopenharmony_ci union perf_mem_data_src *data_src = &data->data_src; 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci data_src->mem_op = PERF_MEM_OP_NA; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci if (op_data3->ld_op) 71462306a36Sopenharmony_ci data_src->mem_op = PERF_MEM_OP_LOAD; 71562306a36Sopenharmony_ci else if (op_data3->st_op) 71662306a36Sopenharmony_ci data_src->mem_op = PERF_MEM_OP_STORE; 71762306a36Sopenharmony_ci} 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci/* 72062306a36Sopenharmony_ci * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has 72162306a36Sopenharmony_ci * more fine granular DataSrc encodings. Others have coarse. 72262306a36Sopenharmony_ci */ 72362306a36Sopenharmony_cistatic u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) 72462306a36Sopenharmony_ci{ 72562306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_ZEN4) 72662306a36Sopenharmony_ci return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci return op_data2->data_src_lo; 72962306a36Sopenharmony_ci} 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci#define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT)) 73262306a36Sopenharmony_ci#define LN(x) PERF_MEM_S(LVLNUM, x) 73362306a36Sopenharmony_ci#define REM PERF_MEM_S(REMOTE, REMOTE) 73462306a36Sopenharmony_ci#define HOPS(x) PERF_MEM_S(HOPS, x) 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_cistatic u64 g_data_src[8] = { 73762306a36Sopenharmony_ci [IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0), 73862306a36Sopenharmony_ci [IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM), 73962306a36Sopenharmony_ci [IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), 74062306a36Sopenharmony_ci [IBS_DATA_SRC_IO] = L(IO) | LN(IO), 74162306a36Sopenharmony_ci}; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci#define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM) 74462306a36Sopenharmony_ci#define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x)) 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_cistatic u64 g_zen4_data_src[32] = { 74762306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3), 74862306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0), 74962306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM), 75062306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), 75162306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_PMEM] = LN(PMEM), 75262306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO), 75362306a36Sopenharmony_ci [IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL), 75462306a36Sopenharmony_ci}; 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci#define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \ 75762306a36Sopenharmony_ci (1 << IBS_DATA_SRC_EXT_PMEM) | \ 75862306a36Sopenharmony_ci (1 << IBS_DATA_SRC_EXT_EXT_MEM)) 75962306a36Sopenharmony_ci#define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x)) 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_cistatic __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, 76262306a36Sopenharmony_ci union ibs_op_data3 *op_data3, 76362306a36Sopenharmony_ci struct perf_sample_data *data) 76462306a36Sopenharmony_ci{ 76562306a36Sopenharmony_ci union perf_mem_data_src *data_src = &data->data_src; 76662306a36Sopenharmony_ci u8 ibs_data_src = perf_ibs_data_src(op_data2); 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_ci data_src->mem_lvl = 0; 76962306a36Sopenharmony_ci data_src->mem_lvl_num = 0; 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci /* 77262306a36Sopenharmony_ci * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached 77362306a36Sopenharmony_ci * memory accesses. So, check DcUcMemAcc bit early. 77462306a36Sopenharmony_ci */ 77562306a36Sopenharmony_ci if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) 77662306a36Sopenharmony_ci return L(UNC) | LN(UNC); 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci /* L1 Hit */ 77962306a36Sopenharmony_ci if (op_data3->dc_miss == 0) 78062306a36Sopenharmony_ci return L(L1) | LN(L1); 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci /* L2 Hit */ 78362306a36Sopenharmony_ci if (op_data3->l2_miss == 0) { 78462306a36Sopenharmony_ci /* Erratum #1293 */ 78562306a36Sopenharmony_ci if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || 78662306a36Sopenharmony_ci !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) 78762306a36Sopenharmony_ci return L(L2) | LN(L2); 78862306a36Sopenharmony_ci } 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci /* 79162306a36Sopenharmony_ci * OP_DATA2 is valid only for load ops. Skip all checks which 79262306a36Sopenharmony_ci * uses OP_DATA2[DataSrc]. 79362306a36Sopenharmony_ci */ 79462306a36Sopenharmony_ci if (data_src->mem_op != PERF_MEM_OP_LOAD) 79562306a36Sopenharmony_ci goto check_mab; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_ZEN4) { 79862306a36Sopenharmony_ci u64 val = g_zen4_data_src[ibs_data_src]; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci if (!val) 80162306a36Sopenharmony_ci goto check_mab; 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci /* HOPS_1 because IBS doesn't provide remote socket detail */ 80462306a36Sopenharmony_ci if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { 80562306a36Sopenharmony_ci if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) 80662306a36Sopenharmony_ci val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); 80762306a36Sopenharmony_ci else 80862306a36Sopenharmony_ci val |= REM | HOPS(1); 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci return val; 81262306a36Sopenharmony_ci } else { 81362306a36Sopenharmony_ci u64 val = g_data_src[ibs_data_src]; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci if (!val) 81662306a36Sopenharmony_ci goto check_mab; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci /* HOPS_1 because IBS doesn't provide remote socket detail */ 81962306a36Sopenharmony_ci if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { 82062306a36Sopenharmony_ci if (ibs_data_src == IBS_DATA_SRC_DRAM) 82162306a36Sopenharmony_ci val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); 82262306a36Sopenharmony_ci else 82362306a36Sopenharmony_ci val |= REM | HOPS(1); 82462306a36Sopenharmony_ci } 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci return val; 82762306a36Sopenharmony_ci } 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_cicheck_mab: 83062306a36Sopenharmony_ci /* 83162306a36Sopenharmony_ci * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding 83262306a36Sopenharmony_ci * DC misses. However, such data may come from any level in mem 83362306a36Sopenharmony_ci * hierarchy. IBS provides detail about both MAB as well as actual 83462306a36Sopenharmony_ci * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set 83562306a36Sopenharmony_ci * MAB only when IBS fails to provide DataSrc. 83662306a36Sopenharmony_ci */ 83762306a36Sopenharmony_ci if (op_data3->dc_miss_no_mab_alloc) 83862306a36Sopenharmony_ci return L(LFB) | LN(LFB); 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci /* Don't set HIT with NA */ 84162306a36Sopenharmony_ci return PERF_MEM_S(LVL, NA) | LN(NA); 84262306a36Sopenharmony_ci} 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_cistatic bool perf_ibs_cache_hit_st_valid(void) 84562306a36Sopenharmony_ci{ 84662306a36Sopenharmony_ci /* 0: Uninitialized, 1: Valid, -1: Invalid */ 84762306a36Sopenharmony_ci static int cache_hit_st_valid; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci if (unlikely(!cache_hit_st_valid)) { 85062306a36Sopenharmony_ci if (boot_cpu_data.x86 == 0x19 && 85162306a36Sopenharmony_ci (boot_cpu_data.x86_model <= 0xF || 85262306a36Sopenharmony_ci (boot_cpu_data.x86_model >= 0x20 && 85362306a36Sopenharmony_ci boot_cpu_data.x86_model <= 0x5F))) { 85462306a36Sopenharmony_ci cache_hit_st_valid = -1; 85562306a36Sopenharmony_ci } else { 85662306a36Sopenharmony_ci cache_hit_st_valid = 1; 85762306a36Sopenharmony_ci } 85862306a36Sopenharmony_ci } 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci return cache_hit_st_valid == 1; 86162306a36Sopenharmony_ci} 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_cistatic void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, 86462306a36Sopenharmony_ci struct perf_sample_data *data) 86562306a36Sopenharmony_ci{ 86662306a36Sopenharmony_ci union perf_mem_data_src *data_src = &data->data_src; 86762306a36Sopenharmony_ci u8 ibs_data_src; 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci data_src->mem_snoop = PERF_MEM_SNOOP_NA; 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci if (!perf_ibs_cache_hit_st_valid() || 87262306a36Sopenharmony_ci data_src->mem_op != PERF_MEM_OP_LOAD || 87362306a36Sopenharmony_ci data_src->mem_lvl & PERF_MEM_LVL_L1 || 87462306a36Sopenharmony_ci data_src->mem_lvl & PERF_MEM_LVL_L2 || 87562306a36Sopenharmony_ci op_data2->cache_hit_st) 87662306a36Sopenharmony_ci return; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci ibs_data_src = perf_ibs_data_src(op_data2); 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_ZEN4) { 88162306a36Sopenharmony_ci if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || 88262306a36Sopenharmony_ci ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || 88362306a36Sopenharmony_ci ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) 88462306a36Sopenharmony_ci data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 88562306a36Sopenharmony_ci } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { 88662306a36Sopenharmony_ci data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 88762306a36Sopenharmony_ci } 88862306a36Sopenharmony_ci} 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_cistatic void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, 89162306a36Sopenharmony_ci struct perf_sample_data *data) 89262306a36Sopenharmony_ci{ 89362306a36Sopenharmony_ci union perf_mem_data_src *data_src = &data->data_src; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci data_src->mem_dtlb = PERF_MEM_TLB_NA; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci if (!op_data3->dc_lin_addr_valid) 89862306a36Sopenharmony_ci return; 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci if (!op_data3->dc_l1tlb_miss) { 90162306a36Sopenharmony_ci data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; 90262306a36Sopenharmony_ci return; 90362306a36Sopenharmony_ci } 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci if (!op_data3->dc_l2tlb_miss) { 90662306a36Sopenharmony_ci data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; 90762306a36Sopenharmony_ci return; 90862306a36Sopenharmony_ci } 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; 91162306a36Sopenharmony_ci} 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_cistatic void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, 91462306a36Sopenharmony_ci struct perf_sample_data *data) 91562306a36Sopenharmony_ci{ 91662306a36Sopenharmony_ci union perf_mem_data_src *data_src = &data->data_src; 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci data_src->mem_lock = PERF_MEM_LOCK_NA; 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci if (op_data3->dc_locked_op) 92162306a36Sopenharmony_ci data_src->mem_lock = PERF_MEM_LOCK_LOCKED; 92262306a36Sopenharmony_ci} 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_cistatic void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, 92762306a36Sopenharmony_ci struct perf_sample_data *data, 92862306a36Sopenharmony_ci union ibs_op_data2 *op_data2, 92962306a36Sopenharmony_ci union ibs_op_data3 *op_data3) 93062306a36Sopenharmony_ci{ 93162306a36Sopenharmony_ci union perf_mem_data_src *data_src = &data->data_src; 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); 93462306a36Sopenharmony_ci perf_ibs_get_mem_snoop(op_data2, data); 93562306a36Sopenharmony_ci perf_ibs_get_tlb_lvl(op_data3, data); 93662306a36Sopenharmony_ci perf_ibs_get_mem_lock(op_data3, data); 93762306a36Sopenharmony_ci} 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_cistatic __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, 94062306a36Sopenharmony_ci union ibs_op_data3 *op_data3) 94162306a36Sopenharmony_ci{ 94262306a36Sopenharmony_ci __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci /* Erratum #1293 */ 94562306a36Sopenharmony_ci if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && 94662306a36Sopenharmony_ci (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { 94762306a36Sopenharmony_ci /* 94862306a36Sopenharmony_ci * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. 94962306a36Sopenharmony_ci * DataSrc=0 is 'No valid status' and RmtNode is invalid when 95062306a36Sopenharmony_ci * DataSrc=0. 95162306a36Sopenharmony_ci */ 95262306a36Sopenharmony_ci val = 0; 95362306a36Sopenharmony_ci } 95462306a36Sopenharmony_ci return val; 95562306a36Sopenharmony_ci} 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_cistatic void perf_ibs_parse_ld_st_data(__u64 sample_type, 95862306a36Sopenharmony_ci struct perf_ibs_data *ibs_data, 95962306a36Sopenharmony_ci struct perf_sample_data *data) 96062306a36Sopenharmony_ci{ 96162306a36Sopenharmony_ci union ibs_op_data3 op_data3; 96262306a36Sopenharmony_ci union ibs_op_data2 op_data2; 96362306a36Sopenharmony_ci union ibs_op_data op_data; 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci data->data_src.val = PERF_MEM_NA; 96662306a36Sopenharmony_ci op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci perf_ibs_get_mem_op(&op_data3, data); 96962306a36Sopenharmony_ci if (data->data_src.mem_op != PERF_MEM_OP_LOAD && 97062306a36Sopenharmony_ci data->data_src.mem_op != PERF_MEM_OP_STORE) 97162306a36Sopenharmony_ci return; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3); 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_DATA_SRC) { 97662306a36Sopenharmony_ci perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); 97762306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_DATA_SRC; 97862306a36Sopenharmony_ci } 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && 98162306a36Sopenharmony_ci data->data_src.mem_op == PERF_MEM_OP_LOAD) { 98262306a36Sopenharmony_ci op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { 98562306a36Sopenharmony_ci data->weight.var1_dw = op_data3.dc_miss_lat; 98662306a36Sopenharmony_ci data->weight.var2_w = op_data.tag_to_ret_ctr; 98762306a36Sopenharmony_ci } else if (sample_type & PERF_SAMPLE_WEIGHT) { 98862306a36Sopenharmony_ci data->weight.full = op_data3.dc_miss_lat; 98962306a36Sopenharmony_ci } 99062306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; 99162306a36Sopenharmony_ci } 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { 99462306a36Sopenharmony_ci data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; 99562306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_ADDR; 99662306a36Sopenharmony_ci } 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { 99962306a36Sopenharmony_ci data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; 100062306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; 100162306a36Sopenharmony_ci } 100262306a36Sopenharmony_ci} 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_cistatic int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, 100562306a36Sopenharmony_ci int check_rip) 100662306a36Sopenharmony_ci{ 100762306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_RAW || 100862306a36Sopenharmony_ci (perf_ibs == &perf_ibs_op && 100962306a36Sopenharmony_ci (sample_type & PERF_SAMPLE_DATA_SRC || 101062306a36Sopenharmony_ci sample_type & PERF_SAMPLE_WEIGHT_TYPE || 101162306a36Sopenharmony_ci sample_type & PERF_SAMPLE_ADDR || 101262306a36Sopenharmony_ci sample_type & PERF_SAMPLE_PHYS_ADDR))) 101362306a36Sopenharmony_ci return perf_ibs->offset_max; 101462306a36Sopenharmony_ci else if (check_rip) 101562306a36Sopenharmony_ci return 3; 101662306a36Sopenharmony_ci return 1; 101762306a36Sopenharmony_ci} 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_cistatic int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) 102062306a36Sopenharmony_ci{ 102162306a36Sopenharmony_ci struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 102262306a36Sopenharmony_ci struct perf_event *event = pcpu->event; 102362306a36Sopenharmony_ci struct hw_perf_event *hwc; 102462306a36Sopenharmony_ci struct perf_sample_data data; 102562306a36Sopenharmony_ci struct perf_raw_record raw; 102662306a36Sopenharmony_ci struct pt_regs regs; 102762306a36Sopenharmony_ci struct perf_ibs_data ibs_data; 102862306a36Sopenharmony_ci int offset, size, check_rip, offset_max, throttle = 0; 102962306a36Sopenharmony_ci unsigned int msr; 103062306a36Sopenharmony_ci u64 *buf, *config, period, new_config = 0; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci if (!test_bit(IBS_STARTED, pcpu->state)) { 103362306a36Sopenharmony_cifail: 103462306a36Sopenharmony_ci /* 103562306a36Sopenharmony_ci * Catch spurious interrupts after stopping IBS: After 103662306a36Sopenharmony_ci * disabling IBS there could be still incoming NMIs 103762306a36Sopenharmony_ci * with samples that even have the valid bit cleared. 103862306a36Sopenharmony_ci * Mark all this NMIs as handled. 103962306a36Sopenharmony_ci */ 104062306a36Sopenharmony_ci if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) 104162306a36Sopenharmony_ci return 1; 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci return 0; 104462306a36Sopenharmony_ci } 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci if (WARN_ON_ONCE(!event)) 104762306a36Sopenharmony_ci goto fail; 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci hwc = &event->hw; 105062306a36Sopenharmony_ci msr = hwc->config_base; 105162306a36Sopenharmony_ci buf = ibs_data.regs; 105262306a36Sopenharmony_ci rdmsrl(msr, *buf); 105362306a36Sopenharmony_ci if (!(*buf++ & perf_ibs->valid_mask)) 105462306a36Sopenharmony_ci goto fail; 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci config = &ibs_data.regs[0]; 105762306a36Sopenharmony_ci perf_ibs_event_update(perf_ibs, event, config); 105862306a36Sopenharmony_ci perf_sample_data_init(&data, 0, hwc->last_period); 105962306a36Sopenharmony_ci if (!perf_ibs_set_period(perf_ibs, hwc, &period)) 106062306a36Sopenharmony_ci goto out; /* no sw counter overflow */ 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci ibs_data.caps = ibs_caps; 106362306a36Sopenharmony_ci size = 1; 106462306a36Sopenharmony_ci offset = 1; 106562306a36Sopenharmony_ci check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci do { 107062306a36Sopenharmony_ci rdmsrl(msr + offset, *buf++); 107162306a36Sopenharmony_ci size++; 107262306a36Sopenharmony_ci offset = find_next_bit(perf_ibs->offset_mask, 107362306a36Sopenharmony_ci perf_ibs->offset_max, 107462306a36Sopenharmony_ci offset + 1); 107562306a36Sopenharmony_ci } while (offset < offset_max); 107662306a36Sopenharmony_ci /* 107762306a36Sopenharmony_ci * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately 107862306a36Sopenharmony_ci * depending on their availability. 107962306a36Sopenharmony_ci * Can't add to offset_max as they are staggered 108062306a36Sopenharmony_ci */ 108162306a36Sopenharmony_ci if (event->attr.sample_type & PERF_SAMPLE_RAW) { 108262306a36Sopenharmony_ci if (perf_ibs == &perf_ibs_op) { 108362306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_BRNTRGT) { 108462306a36Sopenharmony_ci rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); 108562306a36Sopenharmony_ci size++; 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_OPDATA4) { 108862306a36Sopenharmony_ci rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); 108962306a36Sopenharmony_ci size++; 109062306a36Sopenharmony_ci } 109162306a36Sopenharmony_ci } 109262306a36Sopenharmony_ci if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { 109362306a36Sopenharmony_ci rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); 109462306a36Sopenharmony_ci size++; 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci ibs_data.size = sizeof(u64) * size; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci regs = *iregs; 110062306a36Sopenharmony_ci if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { 110162306a36Sopenharmony_ci regs.flags &= ~PERF_EFLAGS_EXACT; 110262306a36Sopenharmony_ci } else { 110362306a36Sopenharmony_ci /* Workaround for erratum #1197 */ 110462306a36Sopenharmony_ci if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) 110562306a36Sopenharmony_ci goto out; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci set_linear_ip(®s, ibs_data.regs[1]); 110862306a36Sopenharmony_ci regs.flags |= PERF_EFLAGS_EXACT; 110962306a36Sopenharmony_ci } 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci if (event->attr.sample_type & PERF_SAMPLE_RAW) { 111262306a36Sopenharmony_ci raw = (struct perf_raw_record){ 111362306a36Sopenharmony_ci .frag = { 111462306a36Sopenharmony_ci .size = sizeof(u32) + ibs_data.size, 111562306a36Sopenharmony_ci .data = ibs_data.data, 111662306a36Sopenharmony_ci }, 111762306a36Sopenharmony_ci }; 111862306a36Sopenharmony_ci perf_sample_save_raw_data(&data, &raw); 111962306a36Sopenharmony_ci } 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_ci if (perf_ibs == &perf_ibs_op) 112262306a36Sopenharmony_ci perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci /* 112562306a36Sopenharmony_ci * rip recorded by IbsOpRip will not be consistent with rsp and rbp 112662306a36Sopenharmony_ci * recorded as part of interrupt regs. Thus we need to use rip from 112762306a36Sopenharmony_ci * interrupt regs while unwinding call stack. 112862306a36Sopenharmony_ci */ 112962306a36Sopenharmony_ci if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) 113062306a36Sopenharmony_ci perf_sample_save_callchain(&data, event, iregs); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci throttle = perf_event_overflow(event, &data, ®s); 113362306a36Sopenharmony_ciout: 113462306a36Sopenharmony_ci if (throttle) { 113562306a36Sopenharmony_ci perf_ibs_stop(event, 0); 113662306a36Sopenharmony_ci } else { 113762306a36Sopenharmony_ci if (perf_ibs == &perf_ibs_op) { 113862306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_OPCNTEXT) { 113962306a36Sopenharmony_ci new_config = period & IBS_OP_MAX_CNT_EXT_MASK; 114062306a36Sopenharmony_ci period &= ~IBS_OP_MAX_CNT_EXT_MASK; 114162306a36Sopenharmony_ci } 114262306a36Sopenharmony_ci if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) 114362306a36Sopenharmony_ci new_config |= *config & IBS_OP_CUR_CNT_RAND; 114462306a36Sopenharmony_ci } 114562306a36Sopenharmony_ci new_config |= period >> 4; 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci perf_ibs_enable_event(perf_ibs, hwc, new_config); 114862306a36Sopenharmony_ci } 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci perf_event_update_userpage(event); 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci return 1; 115362306a36Sopenharmony_ci} 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_cistatic int 115662306a36Sopenharmony_ciperf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) 115762306a36Sopenharmony_ci{ 115862306a36Sopenharmony_ci u64 stamp = sched_clock(); 115962306a36Sopenharmony_ci int handled = 0; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); 116262306a36Sopenharmony_ci handled += perf_ibs_handle_irq(&perf_ibs_op, regs); 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci if (handled) 116562306a36Sopenharmony_ci inc_irq_stat(apic_perf_irqs); 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci perf_sample_event_took(sched_clock() - stamp); 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci return handled; 117062306a36Sopenharmony_ci} 117162306a36Sopenharmony_ciNOKPROBE_SYMBOL(perf_ibs_nmi_handler); 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_cistatic __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) 117462306a36Sopenharmony_ci{ 117562306a36Sopenharmony_ci struct cpu_perf_ibs __percpu *pcpu; 117662306a36Sopenharmony_ci int ret; 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci pcpu = alloc_percpu(struct cpu_perf_ibs); 117962306a36Sopenharmony_ci if (!pcpu) 118062306a36Sopenharmony_ci return -ENOMEM; 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_ci perf_ibs->pcpu = pcpu; 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci ret = perf_pmu_register(&perf_ibs->pmu, name, -1); 118562306a36Sopenharmony_ci if (ret) { 118662306a36Sopenharmony_ci perf_ibs->pcpu = NULL; 118762306a36Sopenharmony_ci free_percpu(pcpu); 118862306a36Sopenharmony_ci } 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci return ret; 119162306a36Sopenharmony_ci} 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_cistatic __init int perf_ibs_fetch_init(void) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci /* 119662306a36Sopenharmony_ci * Some chips fail to reset the fetch count when it is written; instead 119762306a36Sopenharmony_ci * they need a 0-1 transition of IbsFetchEn. 119862306a36Sopenharmony_ci */ 119962306a36Sopenharmony_ci if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) 120062306a36Sopenharmony_ci perf_ibs_fetch.fetch_count_reset_broken = 1; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) 120362306a36Sopenharmony_ci perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_ZEN4) 120662306a36Sopenharmony_ci perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; 120962306a36Sopenharmony_ci perf_ibs_fetch.pmu.attr_update = fetch_attr_update; 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); 121262306a36Sopenharmony_ci} 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_cistatic __init int perf_ibs_op_init(void) 121562306a36Sopenharmony_ci{ 121662306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_OPCNT) 121762306a36Sopenharmony_ci perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_OPCNTEXT) { 122062306a36Sopenharmony_ci perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; 122162306a36Sopenharmony_ci perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; 122262306a36Sopenharmony_ci perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; 122362306a36Sopenharmony_ci } 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci if (ibs_caps & IBS_CAPS_ZEN4) 122662306a36Sopenharmony_ci perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci perf_ibs_op.pmu.attr_groups = empty_attr_groups; 122962306a36Sopenharmony_ci perf_ibs_op.pmu.attr_update = op_attr_update; 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); 123262306a36Sopenharmony_ci} 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_cistatic __init int perf_event_ibs_init(void) 123562306a36Sopenharmony_ci{ 123662306a36Sopenharmony_ci int ret; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci ret = perf_ibs_fetch_init(); 123962306a36Sopenharmony_ci if (ret) 124062306a36Sopenharmony_ci return ret; 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci ret = perf_ibs_op_init(); 124362306a36Sopenharmony_ci if (ret) 124462306a36Sopenharmony_ci goto err_op; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); 124762306a36Sopenharmony_ci if (ret) 124862306a36Sopenharmony_ci goto err_nmi; 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); 125162306a36Sopenharmony_ci return 0; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_cierr_nmi: 125462306a36Sopenharmony_ci perf_pmu_unregister(&perf_ibs_op.pmu); 125562306a36Sopenharmony_ci free_percpu(perf_ibs_op.pcpu); 125662306a36Sopenharmony_ci perf_ibs_op.pcpu = NULL; 125762306a36Sopenharmony_cierr_op: 125862306a36Sopenharmony_ci perf_pmu_unregister(&perf_ibs_fetch.pmu); 125962306a36Sopenharmony_ci free_percpu(perf_ibs_fetch.pcpu); 126062306a36Sopenharmony_ci perf_ibs_fetch.pcpu = NULL; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci return ret; 126362306a36Sopenharmony_ci} 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_cistatic __init int perf_event_ibs_init(void) 126862306a36Sopenharmony_ci{ 126962306a36Sopenharmony_ci return 0; 127062306a36Sopenharmony_ci} 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci#endif 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci/* IBS - apic initialization, for perf and oprofile */ 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_cistatic __init u32 __get_ibs_caps(void) 127762306a36Sopenharmony_ci{ 127862306a36Sopenharmony_ci u32 caps; 127962306a36Sopenharmony_ci unsigned int max_level; 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_IBS)) 128262306a36Sopenharmony_ci return 0; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci /* check IBS cpuid feature flags */ 128562306a36Sopenharmony_ci max_level = cpuid_eax(0x80000000); 128662306a36Sopenharmony_ci if (max_level < IBS_CPUID_FEATURES) 128762306a36Sopenharmony_ci return IBS_CAPS_DEFAULT; 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci caps = cpuid_eax(IBS_CPUID_FEATURES); 129062306a36Sopenharmony_ci if (!(caps & IBS_CAPS_AVAIL)) 129162306a36Sopenharmony_ci /* cpuid flags not valid */ 129262306a36Sopenharmony_ci return IBS_CAPS_DEFAULT; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci return caps; 129562306a36Sopenharmony_ci} 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ciu32 get_ibs_caps(void) 129862306a36Sopenharmony_ci{ 129962306a36Sopenharmony_ci return ibs_caps; 130062306a36Sopenharmony_ci} 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ciEXPORT_SYMBOL(get_ibs_caps); 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_cistatic inline int get_eilvt(int offset) 130562306a36Sopenharmony_ci{ 130662306a36Sopenharmony_ci return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); 130762306a36Sopenharmony_ci} 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_cistatic inline int put_eilvt(int offset) 131062306a36Sopenharmony_ci{ 131162306a36Sopenharmony_ci return !setup_APIC_eilvt(offset, 0, 0, 1); 131262306a36Sopenharmony_ci} 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci/* 131562306a36Sopenharmony_ci * Check and reserve APIC extended interrupt LVT offset for IBS if available. 131662306a36Sopenharmony_ci */ 131762306a36Sopenharmony_cistatic inline int ibs_eilvt_valid(void) 131862306a36Sopenharmony_ci{ 131962306a36Sopenharmony_ci int offset; 132062306a36Sopenharmony_ci u64 val; 132162306a36Sopenharmony_ci int valid = 0; 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci preempt_disable(); 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci rdmsrl(MSR_AMD64_IBSCTL, val); 132662306a36Sopenharmony_ci offset = val & IBSCTL_LVT_OFFSET_MASK; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci if (!(val & IBSCTL_LVT_OFFSET_VALID)) { 132962306a36Sopenharmony_ci pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", 133062306a36Sopenharmony_ci smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 133162306a36Sopenharmony_ci goto out; 133262306a36Sopenharmony_ci } 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci if (!get_eilvt(offset)) { 133562306a36Sopenharmony_ci pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", 133662306a36Sopenharmony_ci smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 133762306a36Sopenharmony_ci goto out; 133862306a36Sopenharmony_ci } 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci valid = 1; 134162306a36Sopenharmony_ciout: 134262306a36Sopenharmony_ci preempt_enable(); 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci return valid; 134562306a36Sopenharmony_ci} 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_cistatic int setup_ibs_ctl(int ibs_eilvt_off) 134862306a36Sopenharmony_ci{ 134962306a36Sopenharmony_ci struct pci_dev *cpu_cfg; 135062306a36Sopenharmony_ci int nodes; 135162306a36Sopenharmony_ci u32 value = 0; 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci nodes = 0; 135462306a36Sopenharmony_ci cpu_cfg = NULL; 135562306a36Sopenharmony_ci do { 135662306a36Sopenharmony_ci cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, 135762306a36Sopenharmony_ci PCI_DEVICE_ID_AMD_10H_NB_MISC, 135862306a36Sopenharmony_ci cpu_cfg); 135962306a36Sopenharmony_ci if (!cpu_cfg) 136062306a36Sopenharmony_ci break; 136162306a36Sopenharmony_ci ++nodes; 136262306a36Sopenharmony_ci pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off 136362306a36Sopenharmony_ci | IBSCTL_LVT_OFFSET_VALID); 136462306a36Sopenharmony_ci pci_read_config_dword(cpu_cfg, IBSCTL, &value); 136562306a36Sopenharmony_ci if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { 136662306a36Sopenharmony_ci pci_dev_put(cpu_cfg); 136762306a36Sopenharmony_ci pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", 136862306a36Sopenharmony_ci value); 136962306a36Sopenharmony_ci return -EINVAL; 137062306a36Sopenharmony_ci } 137162306a36Sopenharmony_ci } while (1); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci if (!nodes) { 137462306a36Sopenharmony_ci pr_debug("No CPU node configured for IBS\n"); 137562306a36Sopenharmony_ci return -ENODEV; 137662306a36Sopenharmony_ci } 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci return 0; 137962306a36Sopenharmony_ci} 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci/* 138262306a36Sopenharmony_ci * This runs only on the current cpu. We try to find an LVT offset and 138362306a36Sopenharmony_ci * setup the local APIC. For this we must disable preemption. On 138462306a36Sopenharmony_ci * success we initialize all nodes with this offset. This updates then 138562306a36Sopenharmony_ci * the offset in the IBS_CTL per-node msr. The per-core APIC setup of 138662306a36Sopenharmony_ci * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that 138762306a36Sopenharmony_ci * is using the new offset. 138862306a36Sopenharmony_ci */ 138962306a36Sopenharmony_cistatic void force_ibs_eilvt_setup(void) 139062306a36Sopenharmony_ci{ 139162306a36Sopenharmony_ci int offset; 139262306a36Sopenharmony_ci int ret; 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci preempt_disable(); 139562306a36Sopenharmony_ci /* find the next free available EILVT entry, skip offset 0 */ 139662306a36Sopenharmony_ci for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { 139762306a36Sopenharmony_ci if (get_eilvt(offset)) 139862306a36Sopenharmony_ci break; 139962306a36Sopenharmony_ci } 140062306a36Sopenharmony_ci preempt_enable(); 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci if (offset == APIC_EILVT_NR_MAX) { 140362306a36Sopenharmony_ci pr_debug("No EILVT entry available\n"); 140462306a36Sopenharmony_ci return; 140562306a36Sopenharmony_ci } 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci ret = setup_ibs_ctl(offset); 140862306a36Sopenharmony_ci if (ret) 140962306a36Sopenharmony_ci goto out; 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci if (!ibs_eilvt_valid()) 141262306a36Sopenharmony_ci goto out; 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci pr_info("LVT offset %d assigned\n", offset); 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci return; 141762306a36Sopenharmony_ciout: 141862306a36Sopenharmony_ci preempt_disable(); 141962306a36Sopenharmony_ci put_eilvt(offset); 142062306a36Sopenharmony_ci preempt_enable(); 142162306a36Sopenharmony_ci return; 142262306a36Sopenharmony_ci} 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_cistatic void ibs_eilvt_setup(void) 142562306a36Sopenharmony_ci{ 142662306a36Sopenharmony_ci /* 142762306a36Sopenharmony_ci * Force LVT offset assignment for family 10h: The offsets are 142862306a36Sopenharmony_ci * not assigned by the BIOS for this family, so the OS is 142962306a36Sopenharmony_ci * responsible for doing it. If the OS assignment fails, fall 143062306a36Sopenharmony_ci * back to BIOS settings and try to setup this. 143162306a36Sopenharmony_ci */ 143262306a36Sopenharmony_ci if (boot_cpu_data.x86 == 0x10) 143362306a36Sopenharmony_ci force_ibs_eilvt_setup(); 143462306a36Sopenharmony_ci} 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_cistatic inline int get_ibs_lvt_offset(void) 143762306a36Sopenharmony_ci{ 143862306a36Sopenharmony_ci u64 val; 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci rdmsrl(MSR_AMD64_IBSCTL, val); 144162306a36Sopenharmony_ci if (!(val & IBSCTL_LVT_OFFSET_VALID)) 144262306a36Sopenharmony_ci return -EINVAL; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci return val & IBSCTL_LVT_OFFSET_MASK; 144562306a36Sopenharmony_ci} 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_cistatic void setup_APIC_ibs(void) 144862306a36Sopenharmony_ci{ 144962306a36Sopenharmony_ci int offset; 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci offset = get_ibs_lvt_offset(); 145262306a36Sopenharmony_ci if (offset < 0) 145362306a36Sopenharmony_ci goto failed; 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) 145662306a36Sopenharmony_ci return; 145762306a36Sopenharmony_cifailed: 145862306a36Sopenharmony_ci pr_warn("perf: IBS APIC setup failed on cpu #%d\n", 145962306a36Sopenharmony_ci smp_processor_id()); 146062306a36Sopenharmony_ci} 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_cistatic void clear_APIC_ibs(void) 146362306a36Sopenharmony_ci{ 146462306a36Sopenharmony_ci int offset; 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci offset = get_ibs_lvt_offset(); 146762306a36Sopenharmony_ci if (offset >= 0) 146862306a36Sopenharmony_ci setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); 146962306a36Sopenharmony_ci} 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_cistatic int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci setup_APIC_ibs(); 147462306a36Sopenharmony_ci return 0; 147562306a36Sopenharmony_ci} 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci#ifdef CONFIG_PM 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_cistatic int perf_ibs_suspend(void) 148062306a36Sopenharmony_ci{ 148162306a36Sopenharmony_ci clear_APIC_ibs(); 148262306a36Sopenharmony_ci return 0; 148362306a36Sopenharmony_ci} 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_cistatic void perf_ibs_resume(void) 148662306a36Sopenharmony_ci{ 148762306a36Sopenharmony_ci ibs_eilvt_setup(); 148862306a36Sopenharmony_ci setup_APIC_ibs(); 148962306a36Sopenharmony_ci} 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_cistatic struct syscore_ops perf_ibs_syscore_ops = { 149262306a36Sopenharmony_ci .resume = perf_ibs_resume, 149362306a36Sopenharmony_ci .suspend = perf_ibs_suspend, 149462306a36Sopenharmony_ci}; 149562306a36Sopenharmony_ci 149662306a36Sopenharmony_cistatic void perf_ibs_pm_init(void) 149762306a36Sopenharmony_ci{ 149862306a36Sopenharmony_ci register_syscore_ops(&perf_ibs_syscore_ops); 149962306a36Sopenharmony_ci} 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci#else 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_cistatic inline void perf_ibs_pm_init(void) { } 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci#endif 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_cistatic int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) 150862306a36Sopenharmony_ci{ 150962306a36Sopenharmony_ci clear_APIC_ibs(); 151062306a36Sopenharmony_ci return 0; 151162306a36Sopenharmony_ci} 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_cistatic __init int amd_ibs_init(void) 151462306a36Sopenharmony_ci{ 151562306a36Sopenharmony_ci u32 caps; 151662306a36Sopenharmony_ci 151762306a36Sopenharmony_ci caps = __get_ibs_caps(); 151862306a36Sopenharmony_ci if (!caps) 151962306a36Sopenharmony_ci return -ENODEV; /* ibs not supported by the cpu */ 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci ibs_eilvt_setup(); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_ci if (!ibs_eilvt_valid()) 152462306a36Sopenharmony_ci return -EINVAL; 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci perf_ibs_pm_init(); 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci ibs_caps = caps; 152962306a36Sopenharmony_ci /* make ibs_caps visible to other cpus: */ 153062306a36Sopenharmony_ci smp_mb(); 153162306a36Sopenharmony_ci /* 153262306a36Sopenharmony_ci * x86_pmu_amd_ibs_starting_cpu will be called from core on 153362306a36Sopenharmony_ci * all online cpus. 153462306a36Sopenharmony_ci */ 153562306a36Sopenharmony_ci cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, 153662306a36Sopenharmony_ci "perf/x86/amd/ibs:starting", 153762306a36Sopenharmony_ci x86_pmu_amd_ibs_starting_cpu, 153862306a36Sopenharmony_ci x86_pmu_amd_ibs_dying_cpu); 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci return perf_event_ibs_init(); 154162306a36Sopenharmony_ci} 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci/* Since we need the pci subsystem to init ibs we can't do this earlier: */ 154462306a36Sopenharmony_cidevice_initcall(amd_ibs_init); 1545