162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Implement support for AMD Fam19h Branch Sampling feature 462306a36Sopenharmony_ci * Based on specifications published in AMD PPR Fam19 Model 01 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Copyright 2021 Google LLC 762306a36Sopenharmony_ci * Contributed by Stephane Eranian <eranian@google.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci#include <linux/kernel.h> 1062306a36Sopenharmony_ci#include <linux/jump_label.h> 1162306a36Sopenharmony_ci#include <asm/msr.h> 1262306a36Sopenharmony_ci#include <asm/cpufeature.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include "../perf_event.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* Debug Extension Configuration register layout */ 1962306a36Sopenharmony_ciunion amd_debug_extn_cfg { 2062306a36Sopenharmony_ci __u64 val; 2162306a36Sopenharmony_ci struct { 2262306a36Sopenharmony_ci __u64 rsvd0:2, /* reserved */ 2362306a36Sopenharmony_ci brsmen:1, /* branch sample enable */ 2462306a36Sopenharmony_ci rsvd4_3:2,/* reserved - must be 0x3 */ 2562306a36Sopenharmony_ci vb:1, /* valid branches recorded */ 2662306a36Sopenharmony_ci rsvd2:10, /* reserved */ 2762306a36Sopenharmony_ci msroff:4, /* index of next entry to write */ 2862306a36Sopenharmony_ci rsvd3:4, /* reserved */ 2962306a36Sopenharmony_ci pmc:3, /* #PMC holding the sampling event */ 3062306a36Sopenharmony_ci rsvd4:37; /* reserved */ 3162306a36Sopenharmony_ci }; 3262306a36Sopenharmony_ci}; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_cistatic inline unsigned int brs_from(int idx) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci return MSR_AMD_SAMP_BR_FROM + 2 * idx; 3762306a36Sopenharmony_ci} 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_cistatic inline unsigned int brs_to(int idx) 4062306a36Sopenharmony_ci{ 4162306a36Sopenharmony_ci return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_cistatic __always_inline void set_debug_extn_cfg(u64 val) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci /* bits[4:3] must always be set to 11b */ 4762306a36Sopenharmony_ci __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32); 4862306a36Sopenharmony_ci} 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistatic __always_inline u64 get_debug_extn_cfg(void) 5162306a36Sopenharmony_ci{ 5262306a36Sopenharmony_ci return __rdmsr(MSR_AMD_DBG_EXTN_CFG); 5362306a36Sopenharmony_ci} 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_cistatic bool __init amd_brs_detect(void) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci if (!cpu_feature_enabled(X86_FEATURE_BRS)) 5862306a36Sopenharmony_ci return false; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci switch (boot_cpu_data.x86) { 6162306a36Sopenharmony_ci case 0x19: /* AMD Fam19h (Zen3) */ 6262306a36Sopenharmony_ci x86_pmu.lbr_nr = 16; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci /* No hardware filtering supported */ 6562306a36Sopenharmony_ci x86_pmu.lbr_sel_map = NULL; 6662306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = 0; 6762306a36Sopenharmony_ci break; 6862306a36Sopenharmony_ci default: 6962306a36Sopenharmony_ci return false; 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci return true; 7362306a36Sopenharmony_ci} 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci/* 7662306a36Sopenharmony_ci * Current BRS implementation does not support branch type or privilege level 7762306a36Sopenharmony_ci * filtering. Therefore, this function simply enforces these limitations. No need for 7862306a36Sopenharmony_ci * a br_sel_map. Software filtering is not supported because it would not correlate well 7962306a36Sopenharmony_ci * with a sampling period. 8062306a36Sopenharmony_ci */ 8162306a36Sopenharmony_cistatic int amd_brs_setup_filter(struct perf_event *event) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci u64 type = event->attr.branch_sample_type; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci /* No BRS support */ 8662306a36Sopenharmony_ci if (!x86_pmu.lbr_nr) 8762306a36Sopenharmony_ci return -EOPNOTSUPP; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci /* Can only capture all branches, i.e., no filtering */ 9062306a36Sopenharmony_ci if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY) 9162306a36Sopenharmony_ci return -EINVAL; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci return 0; 9462306a36Sopenharmony_ci} 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_cistatic inline int amd_is_brs_event(struct perf_event *e) 9762306a36Sopenharmony_ci{ 9862306a36Sopenharmony_ci return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; 9962306a36Sopenharmony_ci} 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ciint amd_brs_hw_config(struct perf_event *event) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci int ret = 0; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci /* 10662306a36Sopenharmony_ci * Due to interrupt holding, BRS is not recommended in 10762306a36Sopenharmony_ci * counting mode. 10862306a36Sopenharmony_ci */ 10962306a36Sopenharmony_ci if (!is_sampling_event(event)) 11062306a36Sopenharmony_ci return -EINVAL; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* 11362306a36Sopenharmony_ci * Due to the way BRS operates by holding the interrupt until 11462306a36Sopenharmony_ci * lbr_nr entries have been captured, it does not make sense 11562306a36Sopenharmony_ci * to allow sampling on BRS with an event that does not match 11662306a36Sopenharmony_ci * what BRS is capturing, i.e., retired taken branches. 11762306a36Sopenharmony_ci * Otherwise the correlation with the event's period is even 11862306a36Sopenharmony_ci * more loose: 11962306a36Sopenharmony_ci * 12062306a36Sopenharmony_ci * With retired taken branch: 12162306a36Sopenharmony_ci * Effective P = P + 16 + X 12262306a36Sopenharmony_ci * With any other event: 12362306a36Sopenharmony_ci * Effective P = P + Y + X 12462306a36Sopenharmony_ci * 12562306a36Sopenharmony_ci * Where X is the number of taken branches due to interrupt 12662306a36Sopenharmony_ci * skid. Skid is large. 12762306a36Sopenharmony_ci * 12862306a36Sopenharmony_ci * Where Y is the occurences of the event while BRS is 12962306a36Sopenharmony_ci * capturing the lbr_nr entries. 13062306a36Sopenharmony_ci * 13162306a36Sopenharmony_ci * By using retired taken branches, we limit the impact on the 13262306a36Sopenharmony_ci * Y variable. We know it cannot be more than the depth of 13362306a36Sopenharmony_ci * BRS. 13462306a36Sopenharmony_ci */ 13562306a36Sopenharmony_ci if (!amd_is_brs_event(event)) 13662306a36Sopenharmony_ci return -EINVAL; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* 13962306a36Sopenharmony_ci * BRS implementation does not work with frequency mode 14062306a36Sopenharmony_ci * reprogramming of the period. 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_ci if (event->attr.freq) 14362306a36Sopenharmony_ci return -EINVAL; 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci * The kernel subtracts BRS depth from period, so it must 14662306a36Sopenharmony_ci * be big enough. 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_ci if (event->attr.sample_period <= x86_pmu.lbr_nr) 14962306a36Sopenharmony_ci return -EINVAL; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci /* 15262306a36Sopenharmony_ci * Check if we can allow PERF_SAMPLE_BRANCH_STACK 15362306a36Sopenharmony_ci */ 15462306a36Sopenharmony_ci ret = amd_brs_setup_filter(event); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci /* only set in case of success */ 15762306a36Sopenharmony_ci if (!ret) 15862306a36Sopenharmony_ci event->hw.flags |= PERF_X86_EVENT_AMD_BRS; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci return ret; 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci/* tos = top of stack, i.e., last valid entry written */ 16462306a36Sopenharmony_cistatic inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci /* 16762306a36Sopenharmony_ci * msroff: index of next entry to write so top-of-stack is one off 16862306a36Sopenharmony_ci * if BRS is full then msroff is set back to 0. 16962306a36Sopenharmony_ci */ 17062306a36Sopenharmony_ci return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; 17162306a36Sopenharmony_ci} 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci/* 17462306a36Sopenharmony_ci * make sure we have a sane BRS offset to begin with 17562306a36Sopenharmony_ci * especially with kexec 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_civoid amd_brs_reset(void) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci if (!cpu_feature_enabled(X86_FEATURE_BRS)) 18062306a36Sopenharmony_ci return; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci /* 18362306a36Sopenharmony_ci * Reset config 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_ci set_debug_extn_cfg(0); 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci /* 18862306a36Sopenharmony_ci * Mark first entry as poisoned 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ci wrmsrl(brs_to(0), BRS_POISON); 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ciint __init amd_brs_init(void) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci if (!amd_brs_detect()) 19662306a36Sopenharmony_ci return -EOPNOTSUPP; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci return 0; 20162306a36Sopenharmony_ci} 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_civoid amd_brs_enable(void) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 20662306a36Sopenharmony_ci union amd_debug_extn_cfg cfg; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci /* Activate only on first user */ 20962306a36Sopenharmony_ci if (++cpuc->brs_active > 1) 21062306a36Sopenharmony_ci return; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci cfg.val = 0; /* reset all fields */ 21362306a36Sopenharmony_ci cfg.brsmen = 1; /* enable branch sampling */ 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci /* Set enable bit */ 21662306a36Sopenharmony_ci set_debug_extn_cfg(cfg.val); 21762306a36Sopenharmony_ci} 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_civoid amd_brs_enable_all(void) 22062306a36Sopenharmony_ci{ 22162306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 22262306a36Sopenharmony_ci if (cpuc->lbr_users) 22362306a36Sopenharmony_ci amd_brs_enable(); 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_civoid amd_brs_disable(void) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 22962306a36Sopenharmony_ci union amd_debug_extn_cfg cfg; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci /* Check if active (could be disabled via x86_pmu_disable_all()) */ 23262306a36Sopenharmony_ci if (!cpuc->brs_active) 23362306a36Sopenharmony_ci return; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci /* Only disable for last user */ 23662306a36Sopenharmony_ci if (--cpuc->brs_active) 23762306a36Sopenharmony_ci return; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci /* 24062306a36Sopenharmony_ci * Clear the brsmen bit but preserve the others as they contain 24162306a36Sopenharmony_ci * useful state such as vb and msroff 24262306a36Sopenharmony_ci */ 24362306a36Sopenharmony_ci cfg.val = get_debug_extn_cfg(); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci /* 24662306a36Sopenharmony_ci * When coming in on interrupt and BRS is full, then hw will have 24762306a36Sopenharmony_ci * already stopped BRS, no need to issue wrmsr again 24862306a36Sopenharmony_ci */ 24962306a36Sopenharmony_ci if (cfg.brsmen) { 25062306a36Sopenharmony_ci cfg.brsmen = 0; 25162306a36Sopenharmony_ci set_debug_extn_cfg(cfg.val); 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_civoid amd_brs_disable_all(void) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 25862306a36Sopenharmony_ci if (cpuc->lbr_users) 25962306a36Sopenharmony_ci amd_brs_disable(); 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_cistatic bool amd_brs_match_plm(struct perf_event *event, u64 to) 26362306a36Sopenharmony_ci{ 26462306a36Sopenharmony_ci int type = event->attr.branch_sample_type; 26562306a36Sopenharmony_ci int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; 26662306a36Sopenharmony_ci int plm_u = PERF_SAMPLE_BRANCH_USER; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci if (!(type & plm_k) && kernel_ip(to)) 26962306a36Sopenharmony_ci return 0; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (!(type & plm_u) && !kernel_ip(to)) 27262306a36Sopenharmony_ci return 0; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci return 1; 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci/* 27862306a36Sopenharmony_ci * Caller must ensure amd_brs_inuse() is true before calling 27962306a36Sopenharmony_ci * return: 28062306a36Sopenharmony_ci */ 28162306a36Sopenharmony_civoid amd_brs_drain(void) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 28462306a36Sopenharmony_ci struct perf_event *event = cpuc->events[0]; 28562306a36Sopenharmony_ci struct perf_branch_entry *br = cpuc->lbr_entries; 28662306a36Sopenharmony_ci union amd_debug_extn_cfg cfg; 28762306a36Sopenharmony_ci u32 i, nr = 0, num, tos, start; 28862306a36Sopenharmony_ci u32 shift = 64 - boot_cpu_data.x86_virt_bits; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* 29162306a36Sopenharmony_ci * BRS event forced on PMC0, 29262306a36Sopenharmony_ci * so check if there is an event. 29362306a36Sopenharmony_ci * It is possible to have lbr_users > 0 but the event 29462306a36Sopenharmony_ci * not yet scheduled due to long latency PMU irq 29562306a36Sopenharmony_ci */ 29662306a36Sopenharmony_ci if (!event) 29762306a36Sopenharmony_ci goto empty; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci cfg.val = get_debug_extn_cfg(); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci /* Sanity check [0-x86_pmu.lbr_nr] */ 30262306a36Sopenharmony_ci if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr)) 30362306a36Sopenharmony_ci goto empty; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci /* No valid branch */ 30662306a36Sopenharmony_ci if (cfg.vb == 0) 30762306a36Sopenharmony_ci goto empty; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci /* 31062306a36Sopenharmony_ci * msr.off points to next entry to be written 31162306a36Sopenharmony_ci * tos = most recent entry index = msr.off - 1 31262306a36Sopenharmony_ci * BRS register buffer saturates, so we know we have 31362306a36Sopenharmony_ci * start < tos and that we have to read from start to tos 31462306a36Sopenharmony_ci */ 31562306a36Sopenharmony_ci start = 0; 31662306a36Sopenharmony_ci tos = amd_brs_get_tos(&cfg); 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci num = tos - start + 1; 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci /* 32162306a36Sopenharmony_ci * BRS is only one pass (saturation) from MSROFF to depth-1 32262306a36Sopenharmony_ci * MSROFF wraps to zero when buffer is full 32362306a36Sopenharmony_ci */ 32462306a36Sopenharmony_ci for (i = 0; i < num; i++) { 32562306a36Sopenharmony_ci u32 brs_idx = tos - i; 32662306a36Sopenharmony_ci u64 from, to; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci rdmsrl(brs_to(brs_idx), to); 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci /* Entry does not belong to us (as marked by kernel) */ 33162306a36Sopenharmony_ci if (to == BRS_POISON) 33262306a36Sopenharmony_ci break; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci /* 33562306a36Sopenharmony_ci * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. 33662306a36Sopenharmony_ci * Necessary to generate proper virtual addresses suitable for 33762306a36Sopenharmony_ci * symbolization 33862306a36Sopenharmony_ci */ 33962306a36Sopenharmony_ci to = (u64)(((s64)to << shift) >> shift); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci if (!amd_brs_match_plm(event, to)) 34262306a36Sopenharmony_ci continue; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci rdmsrl(brs_from(brs_idx), from); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci perf_clear_branch_entry_bitfields(br+nr); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci br[nr].from = from; 34962306a36Sopenharmony_ci br[nr].to = to; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci nr++; 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ciempty: 35462306a36Sopenharmony_ci /* Record number of sampled branches */ 35562306a36Sopenharmony_ci cpuc->lbr_stack.nr = nr; 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci/* 35962306a36Sopenharmony_ci * Poison most recent entry to prevent reuse by next task 36062306a36Sopenharmony_ci * required because BRS entry are not tagged by PID 36162306a36Sopenharmony_ci */ 36262306a36Sopenharmony_cistatic void amd_brs_poison_buffer(void) 36362306a36Sopenharmony_ci{ 36462306a36Sopenharmony_ci union amd_debug_extn_cfg cfg; 36562306a36Sopenharmony_ci unsigned int idx; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* Get current state */ 36862306a36Sopenharmony_ci cfg.val = get_debug_extn_cfg(); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci /* idx is most recently written entry */ 37162306a36Sopenharmony_ci idx = amd_brs_get_tos(&cfg); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci /* Poison target of entry */ 37462306a36Sopenharmony_ci wrmsrl(brs_to(idx), BRS_POISON); 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci/* 37862306a36Sopenharmony_ci * On context switch in, we need to make sure no samples from previous user 37962306a36Sopenharmony_ci * are left in the BRS. 38062306a36Sopenharmony_ci * 38162306a36Sopenharmony_ci * On ctxswin, sched_in = true, called after the PMU has started 38262306a36Sopenharmony_ci * On ctxswout, sched_in = false, called before the PMU is stopped 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_civoid amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci /* no active users */ 38962306a36Sopenharmony_ci if (!cpuc->lbr_users) 39062306a36Sopenharmony_ci return; 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci /* 39362306a36Sopenharmony_ci * On context switch in, we need to ensure we do not use entries 39462306a36Sopenharmony_ci * from previous BRS user on that CPU, so we poison the buffer as 39562306a36Sopenharmony_ci * a faster way compared to resetting all entries. 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci if (sched_in) 39862306a36Sopenharmony_ci amd_brs_poison_buffer(); 39962306a36Sopenharmony_ci} 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci/* 40262306a36Sopenharmony_ci * called from ACPI processor_idle.c or acpi_pad.c 40362306a36Sopenharmony_ci * with interrupts disabled 40462306a36Sopenharmony_ci */ 40562306a36Sopenharmony_civoid noinstr perf_amd_brs_lopwr_cb(bool lopwr_in) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 40862306a36Sopenharmony_ci union amd_debug_extn_cfg cfg; 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci /* 41162306a36Sopenharmony_ci * on mwait in, we may end up in non C0 state. 41262306a36Sopenharmony_ci * we must disable branch sampling to avoid holding the NMI 41362306a36Sopenharmony_ci * for too long. We disable it in hardware but we 41462306a36Sopenharmony_ci * keep the state in cpuc, so we can re-enable. 41562306a36Sopenharmony_ci * 41662306a36Sopenharmony_ci * The hardware will deliver the NMI if needed when brsmen cleared 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_ci if (cpuc->brs_active) { 41962306a36Sopenharmony_ci cfg.val = get_debug_extn_cfg(); 42062306a36Sopenharmony_ci cfg.brsmen = !lopwr_in; 42162306a36Sopenharmony_ci set_debug_extn_cfg(cfg.val); 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci} 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ciDEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); 42662306a36Sopenharmony_ciEXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb); 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_civoid __init amd_brs_lopwr_init(void) 42962306a36Sopenharmony_ci{ 43062306a36Sopenharmony_ci static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb); 43162306a36Sopenharmony_ci} 432