162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/perf_event.h> 362306a36Sopenharmony_ci#include <linux/types.h> 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci#include <asm/perf_event.h> 662306a36Sopenharmony_ci#include <asm/msr.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "../perf_event.h" 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci/* 1162306a36Sopenharmony_ci * Intel LBR_SELECT bits 1262306a36Sopenharmony_ci * Intel Vol3a, April 2011, Section 16.7 Table 16-10 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * Hardware branch filter (not available on all CPUs) 1562306a36Sopenharmony_ci */ 1662306a36Sopenharmony_ci#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 1762306a36Sopenharmony_ci#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 1862306a36Sopenharmony_ci#define LBR_JCC_BIT 2 /* do not capture conditional branches */ 1962306a36Sopenharmony_ci#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 2062306a36Sopenharmony_ci#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 2162306a36Sopenharmony_ci#define LBR_RETURN_BIT 5 /* do not capture near returns */ 2262306a36Sopenharmony_ci#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 2362306a36Sopenharmony_ci#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 2462306a36Sopenharmony_ci#define LBR_FAR_BIT 8 /* do not capture far branches */ 2562306a36Sopenharmony_ci#define LBR_CALL_STACK_BIT 9 /* enable call stack */ 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * Following bit only exists in Linux; we mask it out before writing it to 2962306a36Sopenharmony_ci * the actual MSR. But it helps the constraint perf code to understand 3062306a36Sopenharmony_ci * that this is a separate configuration. 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#define LBR_KERNEL (1 << LBR_KERNEL_BIT) 3562306a36Sopenharmony_ci#define LBR_USER (1 << LBR_USER_BIT) 3662306a36Sopenharmony_ci#define LBR_JCC (1 << LBR_JCC_BIT) 3762306a36Sopenharmony_ci#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 3862306a36Sopenharmony_ci#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 3962306a36Sopenharmony_ci#define LBR_RETURN (1 << LBR_RETURN_BIT) 4062306a36Sopenharmony_ci#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 4162306a36Sopenharmony_ci#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 4262306a36Sopenharmony_ci#define LBR_FAR (1 << LBR_FAR_BIT) 4362306a36Sopenharmony_ci#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 4462306a36Sopenharmony_ci#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define LBR_PLM (LBR_KERNEL | LBR_USER) 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ 4962306a36Sopenharmony_ci#define LBR_NOT_SUPP -1 /* LBR filter not supported */ 5062306a36Sopenharmony_ci#define LBR_IGN 0 /* ignored */ 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#define LBR_ANY \ 5362306a36Sopenharmony_ci (LBR_JCC |\ 5462306a36Sopenharmony_ci LBR_REL_CALL |\ 5562306a36Sopenharmony_ci LBR_IND_CALL |\ 5662306a36Sopenharmony_ci LBR_RETURN |\ 5762306a36Sopenharmony_ci LBR_REL_JMP |\ 5862306a36Sopenharmony_ci LBR_IND_JMP |\ 5962306a36Sopenharmony_ci LBR_FAR) 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#define LBR_FROM_FLAG_MISPRED BIT_ULL(63) 6262306a36Sopenharmony_ci#define LBR_FROM_FLAG_IN_TX BIT_ULL(62) 6362306a36Sopenharmony_ci#define LBR_FROM_FLAG_ABORT BIT_ULL(61) 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci/* 6862306a36Sopenharmony_ci * Intel LBR_CTL bits 6962306a36Sopenharmony_ci * 7062306a36Sopenharmony_ci * Hardware branch filter for Arch LBR 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ 7362306a36Sopenharmony_ci#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ 7462306a36Sopenharmony_ci#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ 7562306a36Sopenharmony_ci#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ 7662306a36Sopenharmony_ci#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ 7762306a36Sopenharmony_ci#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ 7862306a36Sopenharmony_ci#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ 7962306a36Sopenharmony_ci#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ 8062306a36Sopenharmony_ci#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ 8162306a36Sopenharmony_ci#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) 8462306a36Sopenharmony_ci#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) 8562306a36Sopenharmony_ci#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) 8662306a36Sopenharmony_ci#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) 8762306a36Sopenharmony_ci#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) 8862306a36Sopenharmony_ci#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) 8962306a36Sopenharmony_ci#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) 9062306a36Sopenharmony_ci#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) 9162306a36Sopenharmony_ci#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) 9262306a36Sopenharmony_ci#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci#define ARCH_LBR_ANY \ 9562306a36Sopenharmony_ci (ARCH_LBR_JCC |\ 9662306a36Sopenharmony_ci ARCH_LBR_REL_JMP |\ 9762306a36Sopenharmony_ci ARCH_LBR_IND_JMP |\ 9862306a36Sopenharmony_ci ARCH_LBR_REL_CALL |\ 9962306a36Sopenharmony_ci ARCH_LBR_IND_CALL |\ 10062306a36Sopenharmony_ci ARCH_LBR_RETURN |\ 10162306a36Sopenharmony_ci ARCH_LBR_OTHER_BRANCH) 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci#define ARCH_LBR_CTL_MASK 0x7f000e 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_cistatic void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_cistatic __always_inline bool is_lbr_call_stack_bit_set(u64 config) 10862306a36Sopenharmony_ci{ 10962306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 11062306a36Sopenharmony_ci return !!(config & ARCH_LBR_CALL_STACK); 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci return !!(config & LBR_CALL_STACK); 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci/* 11662306a36Sopenharmony_ci * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 11762306a36Sopenharmony_ci * otherwise it becomes near impossible to get a reliable stack. 11862306a36Sopenharmony_ci */ 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic void __intel_pmu_lbr_enable(bool pmi) 12162306a36Sopenharmony_ci{ 12262306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 12362306a36Sopenharmony_ci u64 debugctl, lbr_select = 0, orig_debugctl; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci /* 12662306a36Sopenharmony_ci * No need to unfreeze manually, as v4 can do that as part 12762306a36Sopenharmony_ci * of the GLOBAL_STATUS ack. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci if (pmi && x86_pmu.version >= 4) 13062306a36Sopenharmony_ci return; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci /* 13362306a36Sopenharmony_ci * No need to reprogram LBR_SELECT in a PMI, as it 13462306a36Sopenharmony_ci * did not change. 13562306a36Sopenharmony_ci */ 13662306a36Sopenharmony_ci if (cpuc->lbr_sel) 13762306a36Sopenharmony_ci lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 13862306a36Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) 13962306a36Sopenharmony_ci wrmsrl(MSR_LBR_SELECT, lbr_select); 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 14262306a36Sopenharmony_ci orig_debugctl = debugctl; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 14562306a36Sopenharmony_ci debugctl |= DEBUGCTLMSR_LBR; 14662306a36Sopenharmony_ci /* 14762306a36Sopenharmony_ci * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 14862306a36Sopenharmony_ci * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 14962306a36Sopenharmony_ci * may cause superfluous increase/decrease of LBR_TOS. 15062306a36Sopenharmony_ci */ 15162306a36Sopenharmony_ci if (is_lbr_call_stack_bit_set(lbr_select)) 15262306a36Sopenharmony_ci debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 15362306a36Sopenharmony_ci else 15462306a36Sopenharmony_ci debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci if (orig_debugctl != debugctl) 15762306a36Sopenharmony_ci wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 16062306a36Sopenharmony_ci wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_civoid intel_pmu_lbr_reset_32(void) 16462306a36Sopenharmony_ci{ 16562306a36Sopenharmony_ci int i; 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) 16862306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_from + i, 0); 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_civoid intel_pmu_lbr_reset_64(void) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci int i; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) { 17662306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_from + i, 0); 17762306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_to + i, 0); 17862306a36Sopenharmony_ci if (x86_pmu.lbr_has_info) 17962306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_info + i, 0); 18062306a36Sopenharmony_ci } 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_reset(void) 18462306a36Sopenharmony_ci{ 18562306a36Sopenharmony_ci /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ 18662306a36Sopenharmony_ci wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_civoid intel_pmu_lbr_reset(void) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci if (!x86_pmu.lbr_nr) 19462306a36Sopenharmony_ci return; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci x86_pmu.lbr_reset(); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci cpuc->last_task_ctx = NULL; 19962306a36Sopenharmony_ci cpuc->last_log_id = 0; 20062306a36Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) 20162306a36Sopenharmony_ci wrmsrl(MSR_LBR_SELECT, 0); 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci/* 20562306a36Sopenharmony_ci * TOS = most recently recorded branch 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_cistatic inline u64 intel_pmu_lbr_tos(void) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci u64 tos; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci rdmsrl(x86_pmu.lbr_tos, tos); 21262306a36Sopenharmony_ci return tos; 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_cienum { 21662306a36Sopenharmony_ci LBR_NONE, 21762306a36Sopenharmony_ci LBR_VALID, 21862306a36Sopenharmony_ci}; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci/* 22162306a36Sopenharmony_ci * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x 22262306a36Sopenharmony_ci * are the TSX flags when TSX is supported, but when TSX is not supported 22362306a36Sopenharmony_ci * they have no consistent behavior: 22462306a36Sopenharmony_ci * 22562306a36Sopenharmony_ci * - For wrmsr(), bits 61:62 are considered part of the sign extension. 22662306a36Sopenharmony_ci * - For HW updates (branch captures) bits 61:62 are always OFF and are not 22762306a36Sopenharmony_ci * part of the sign extension. 22862306a36Sopenharmony_ci * 22962306a36Sopenharmony_ci * Therefore, if: 23062306a36Sopenharmony_ci * 23162306a36Sopenharmony_ci * 1) LBR format LBR_FORMAT_EIP_FLAGS2 23262306a36Sopenharmony_ci * 2) CPU has no TSX support enabled 23362306a36Sopenharmony_ci * 23462306a36Sopenharmony_ci * ... then any value passed to wrmsr() must be sign extended to 63 bits and any 23562306a36Sopenharmony_ci * value from rdmsr() must be converted to have a 61 bits sign extension, 23662306a36Sopenharmony_ci * ignoring the TSX flags. 23762306a36Sopenharmony_ci */ 23862306a36Sopenharmony_cistatic inline bool lbr_from_signext_quirk_needed(void) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || 24162306a36Sopenharmony_ci boot_cpu_has(X86_FEATURE_RTM); 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci return !tsx_support; 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci/* If quirk is enabled, ensure sign extension is 63 bits: */ 24962306a36Sopenharmony_ciinline u64 lbr_from_signext_quirk_wr(u64 val) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci if (static_branch_unlikely(&lbr_from_quirk_key)) { 25262306a36Sopenharmony_ci /* 25362306a36Sopenharmony_ci * Sign extend into bits 61:62 while preserving bit 63. 25462306a36Sopenharmony_ci * 25562306a36Sopenharmony_ci * Quirk is enabled when TSX is disabled. Therefore TSX bits 25662306a36Sopenharmony_ci * in val are always OFF and must be changed to be sign 25762306a36Sopenharmony_ci * extension bits. Since bits 59:60 are guaranteed to be 25862306a36Sopenharmony_ci * part of the sign extension bits, we can just copy them 25962306a36Sopenharmony_ci * to 61:62. 26062306a36Sopenharmony_ci */ 26162306a36Sopenharmony_ci val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci return val; 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci/* 26762306a36Sopenharmony_ci * If quirk is needed, ensure sign extension is 61 bits: 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_cistatic u64 lbr_from_signext_quirk_rd(u64 val) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci if (static_branch_unlikely(&lbr_from_quirk_key)) { 27262306a36Sopenharmony_ci /* 27362306a36Sopenharmony_ci * Quirk is on when TSX is not enabled. Therefore TSX 27462306a36Sopenharmony_ci * flags must be read as OFF. 27562306a36Sopenharmony_ci */ 27662306a36Sopenharmony_ci val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci return val; 27962306a36Sopenharmony_ci} 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic __always_inline void wrlbr_from(unsigned int idx, u64 val) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci val = lbr_from_signext_quirk_wr(val); 28462306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_from + idx, val); 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_cistatic __always_inline void wrlbr_to(unsigned int idx, u64 val) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_to + idx, val); 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_cistatic __always_inline void wrlbr_info(unsigned int idx, u64 val) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_info + idx, val); 29562306a36Sopenharmony_ci} 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_cistatic __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) 29862306a36Sopenharmony_ci{ 29962306a36Sopenharmony_ci u64 val; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci if (lbr) 30262306a36Sopenharmony_ci return lbr->from; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci rdmsrl(x86_pmu.lbr_from + idx, val); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci return lbr_from_signext_quirk_rd(val); 30762306a36Sopenharmony_ci} 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_cistatic __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci u64 val; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci if (lbr) 31462306a36Sopenharmony_ci return lbr->to; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci rdmsrl(x86_pmu.lbr_to + idx, val); 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci return val; 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_cistatic __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci u64 val; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (lbr) 32662306a36Sopenharmony_ci return lbr->info; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci rdmsrl(x86_pmu.lbr_info + idx, val); 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci return val; 33162306a36Sopenharmony_ci} 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_cistatic inline void 33462306a36Sopenharmony_ciwrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 33562306a36Sopenharmony_ci{ 33662306a36Sopenharmony_ci wrlbr_from(idx, lbr->from); 33762306a36Sopenharmony_ci wrlbr_to(idx, lbr->to); 33862306a36Sopenharmony_ci if (need_info) 33962306a36Sopenharmony_ci wrlbr_info(idx, lbr->info); 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_cistatic inline bool 34362306a36Sopenharmony_cirdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci u64 from = rdlbr_from(idx, NULL); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci /* Don't read invalid entry */ 34862306a36Sopenharmony_ci if (!from) 34962306a36Sopenharmony_ci return false; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci lbr->from = from; 35262306a36Sopenharmony_ci lbr->to = rdlbr_to(idx, NULL); 35362306a36Sopenharmony_ci if (need_info) 35462306a36Sopenharmony_ci lbr->info = rdlbr_info(idx, NULL); 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci return true; 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_civoid intel_pmu_lbr_restore(void *ctx) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 36262306a36Sopenharmony_ci struct x86_perf_task_context *task_ctx = ctx; 36362306a36Sopenharmony_ci bool need_info = x86_pmu.lbr_has_info; 36462306a36Sopenharmony_ci u64 tos = task_ctx->tos; 36562306a36Sopenharmony_ci unsigned lbr_idx, mask; 36662306a36Sopenharmony_ci int i; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci mask = x86_pmu.lbr_nr - 1; 36962306a36Sopenharmony_ci for (i = 0; i < task_ctx->valid_lbrs; i++) { 37062306a36Sopenharmony_ci lbr_idx = (tos - i) & mask; 37162306a36Sopenharmony_ci wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci for (; i < x86_pmu.lbr_nr; i++) { 37562306a36Sopenharmony_ci lbr_idx = (tos - i) & mask; 37662306a36Sopenharmony_ci wrlbr_from(lbr_idx, 0); 37762306a36Sopenharmony_ci wrlbr_to(lbr_idx, 0); 37862306a36Sopenharmony_ci if (need_info) 37962306a36Sopenharmony_ci wrlbr_info(lbr_idx, 0); 38062306a36Sopenharmony_ci } 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci wrmsrl(x86_pmu.lbr_tos, tos); 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci if (cpuc->lbr_select) 38562306a36Sopenharmony_ci wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_restore(void *ctx) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 39162306a36Sopenharmony_ci struct lbr_entry *entries = task_ctx->entries; 39262306a36Sopenharmony_ci int i; 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci /* Fast reset the LBRs before restore if the call stack is not full. */ 39562306a36Sopenharmony_ci if (!entries[x86_pmu.lbr_nr - 1].from) 39662306a36Sopenharmony_ci intel_pmu_arch_lbr_reset(); 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) { 39962306a36Sopenharmony_ci if (!entries[i].from) 40062306a36Sopenharmony_ci break; 40162306a36Sopenharmony_ci wrlbr_all(&entries[i], i, true); 40262306a36Sopenharmony_ci } 40362306a36Sopenharmony_ci} 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci/* 40662306a36Sopenharmony_ci * Restore the Architecture LBR state from the xsave area in the perf 40762306a36Sopenharmony_ci * context data for the task via the XRSTORS instruction. 40862306a36Sopenharmony_ci */ 40962306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_xrstors(void *ctx) 41062306a36Sopenharmony_ci{ 41162306a36Sopenharmony_ci struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); 41462306a36Sopenharmony_ci} 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_cistatic __always_inline bool lbr_is_reset_in_cstate(void *ctx) 41762306a36Sopenharmony_ci{ 41862306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 41962306a36Sopenharmony_ci return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_cistatic void __intel_pmu_lbr_restore(void *ctx) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci if (task_context_opt(ctx)->lbr_callstack_users == 0 || 42962306a36Sopenharmony_ci task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { 43062306a36Sopenharmony_ci intel_pmu_lbr_reset(); 43162306a36Sopenharmony_ci return; 43262306a36Sopenharmony_ci } 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci /* 43562306a36Sopenharmony_ci * Does not restore the LBR registers, if 43662306a36Sopenharmony_ci * - No one else touched them, and 43762306a36Sopenharmony_ci * - Was not cleared in Cstate 43862306a36Sopenharmony_ci */ 43962306a36Sopenharmony_ci if ((ctx == cpuc->last_task_ctx) && 44062306a36Sopenharmony_ci (task_context_opt(ctx)->log_id == cpuc->last_log_id) && 44162306a36Sopenharmony_ci !lbr_is_reset_in_cstate(ctx)) { 44262306a36Sopenharmony_ci task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 44362306a36Sopenharmony_ci return; 44462306a36Sopenharmony_ci } 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci x86_pmu.lbr_restore(ctx); 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_civoid intel_pmu_lbr_save(void *ctx) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 45462306a36Sopenharmony_ci struct x86_perf_task_context *task_ctx = ctx; 45562306a36Sopenharmony_ci bool need_info = x86_pmu.lbr_has_info; 45662306a36Sopenharmony_ci unsigned lbr_idx, mask; 45762306a36Sopenharmony_ci u64 tos; 45862306a36Sopenharmony_ci int i; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci mask = x86_pmu.lbr_nr - 1; 46162306a36Sopenharmony_ci tos = intel_pmu_lbr_tos(); 46262306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) { 46362306a36Sopenharmony_ci lbr_idx = (tos - i) & mask; 46462306a36Sopenharmony_ci if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) 46562306a36Sopenharmony_ci break; 46662306a36Sopenharmony_ci } 46762306a36Sopenharmony_ci task_ctx->valid_lbrs = i; 46862306a36Sopenharmony_ci task_ctx->tos = tos; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (cpuc->lbr_select) 47162306a36Sopenharmony_ci rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_save(void *ctx) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 47762306a36Sopenharmony_ci struct lbr_entry *entries = task_ctx->entries; 47862306a36Sopenharmony_ci int i; 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) { 48162306a36Sopenharmony_ci if (!rdlbr_all(&entries[i], i, true)) 48262306a36Sopenharmony_ci break; 48362306a36Sopenharmony_ci } 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci /* LBR call stack is not full. Reset is required in restore. */ 48662306a36Sopenharmony_ci if (i < x86_pmu.lbr_nr) 48762306a36Sopenharmony_ci entries[x86_pmu.lbr_nr - 1].from = 0; 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci/* 49162306a36Sopenharmony_ci * Save the Architecture LBR state to the xsave area in the perf 49262306a36Sopenharmony_ci * context data for the task via the XSAVES instruction. 49362306a36Sopenharmony_ci */ 49462306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_xsaves(void *ctx) 49562306a36Sopenharmony_ci{ 49662306a36Sopenharmony_ci struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_cistatic void __intel_pmu_lbr_save(void *ctx) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci if (task_context_opt(ctx)->lbr_callstack_users == 0) { 50662306a36Sopenharmony_ci task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 50762306a36Sopenharmony_ci return; 50862306a36Sopenharmony_ci } 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci x86_pmu.lbr_save(ctx); 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci task_context_opt(ctx)->lbr_stack_state = LBR_VALID; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci cpuc->last_task_ctx = ctx; 51562306a36Sopenharmony_ci cpuc->last_log_id = ++task_context_opt(ctx)->log_id; 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_civoid intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, 51962306a36Sopenharmony_ci struct perf_event_pmu_context *next_epc) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci void *prev_ctx_data, *next_ctx_data; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci /* 52662306a36Sopenharmony_ci * Architecture specific synchronization makes sense in case 52762306a36Sopenharmony_ci * both prev_epc->task_ctx_data and next_epc->task_ctx_data 52862306a36Sopenharmony_ci * pointers are allocated. 52962306a36Sopenharmony_ci */ 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci prev_ctx_data = next_epc->task_ctx_data; 53262306a36Sopenharmony_ci next_ctx_data = prev_epc->task_ctx_data; 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci if (!prev_ctx_data || !next_ctx_data) 53562306a36Sopenharmony_ci return; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, 53862306a36Sopenharmony_ci task_context_opt(next_ctx_data)->lbr_callstack_users); 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_civoid intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 54462306a36Sopenharmony_ci void *task_ctx; 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci if (!cpuc->lbr_users) 54762306a36Sopenharmony_ci return; 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci /* 55062306a36Sopenharmony_ci * If LBR callstack feature is enabled and the stack was saved when 55162306a36Sopenharmony_ci * the task was scheduled out, restore the stack. Otherwise flush 55262306a36Sopenharmony_ci * the LBR stack. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ci task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; 55562306a36Sopenharmony_ci if (task_ctx) { 55662306a36Sopenharmony_ci if (sched_in) 55762306a36Sopenharmony_ci __intel_pmu_lbr_restore(task_ctx); 55862306a36Sopenharmony_ci else 55962306a36Sopenharmony_ci __intel_pmu_lbr_save(task_ctx); 56062306a36Sopenharmony_ci return; 56162306a36Sopenharmony_ci } 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci /* 56462306a36Sopenharmony_ci * Since a context switch can flip the address space and LBR entries 56562306a36Sopenharmony_ci * are not tagged with an identifier, we need to wipe the LBR, even for 56662306a36Sopenharmony_ci * per-cpu events. You simply cannot resolve the branches from the old 56762306a36Sopenharmony_ci * address space. 56862306a36Sopenharmony_ci */ 56962306a36Sopenharmony_ci if (sched_in) 57062306a36Sopenharmony_ci intel_pmu_lbr_reset(); 57162306a36Sopenharmony_ci} 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_cistatic inline bool branch_user_callstack(unsigned br_sel) 57462306a36Sopenharmony_ci{ 57562306a36Sopenharmony_ci return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_civoid intel_pmu_lbr_add(struct perf_event *event) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci if (!x86_pmu.lbr_nr) 58362306a36Sopenharmony_ci return; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 58662306a36Sopenharmony_ci cpuc->lbr_select = 1; 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci cpuc->br_sel = event->hw.branch_reg.reg; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) 59162306a36Sopenharmony_ci task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci /* 59462306a36Sopenharmony_ci * Request pmu::sched_task() callback, which will fire inside the 59562306a36Sopenharmony_ci * regular perf event scheduling, so that call will: 59662306a36Sopenharmony_ci * 59762306a36Sopenharmony_ci * - restore or wipe; when LBR-callstack, 59862306a36Sopenharmony_ci * - wipe; otherwise, 59962306a36Sopenharmony_ci * 60062306a36Sopenharmony_ci * when this is from __perf_event_task_sched_in(). 60162306a36Sopenharmony_ci * 60262306a36Sopenharmony_ci * However, if this is from perf_install_in_context(), no such callback 60362306a36Sopenharmony_ci * will follow and we'll need to reset the LBR here if this is the 60462306a36Sopenharmony_ci * first LBR event. 60562306a36Sopenharmony_ci * 60662306a36Sopenharmony_ci * The problem is, we cannot tell these cases apart... but we can 60762306a36Sopenharmony_ci * exclude the biggest chunk of cases by looking at 60862306a36Sopenharmony_ci * event->total_time_running. An event that has accrued runtime cannot 60962306a36Sopenharmony_ci * be 'new'. Conversely, a new event can get installed through the 61062306a36Sopenharmony_ci * context switch path for the first time. 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 61362306a36Sopenharmony_ci cpuc->lbr_pebs_users++; 61462306a36Sopenharmony_ci perf_sched_cb_inc(event->pmu); 61562306a36Sopenharmony_ci if (!cpuc->lbr_users++ && !event->total_time_running) 61662306a36Sopenharmony_ci intel_pmu_lbr_reset(); 61762306a36Sopenharmony_ci} 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_civoid release_lbr_buffers(void) 62062306a36Sopenharmony_ci{ 62162306a36Sopenharmony_ci struct kmem_cache *kmem_cache; 62262306a36Sopenharmony_ci struct cpu_hw_events *cpuc; 62362306a36Sopenharmony_ci int cpu; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 62662306a36Sopenharmony_ci return; 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 62962306a36Sopenharmony_ci cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 63062306a36Sopenharmony_ci kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 63162306a36Sopenharmony_ci if (kmem_cache && cpuc->lbr_xsave) { 63262306a36Sopenharmony_ci kmem_cache_free(kmem_cache, cpuc->lbr_xsave); 63362306a36Sopenharmony_ci cpuc->lbr_xsave = NULL; 63462306a36Sopenharmony_ci } 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_civoid reserve_lbr_buffers(void) 63962306a36Sopenharmony_ci{ 64062306a36Sopenharmony_ci struct kmem_cache *kmem_cache; 64162306a36Sopenharmony_ci struct cpu_hw_events *cpuc; 64262306a36Sopenharmony_ci int cpu; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 64562306a36Sopenharmony_ci return; 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 64862306a36Sopenharmony_ci cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 64962306a36Sopenharmony_ci kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 65062306a36Sopenharmony_ci if (!kmem_cache || cpuc->lbr_xsave) 65162306a36Sopenharmony_ci continue; 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, 65462306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO, 65562306a36Sopenharmony_ci cpu_to_node(cpu)); 65662306a36Sopenharmony_ci } 65762306a36Sopenharmony_ci} 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_civoid intel_pmu_lbr_del(struct perf_event *event) 66062306a36Sopenharmony_ci{ 66162306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci if (!x86_pmu.lbr_nr) 66462306a36Sopenharmony_ci return; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci if (branch_user_callstack(cpuc->br_sel) && 66762306a36Sopenharmony_ci event->pmu_ctx->task_ctx_data) 66862306a36Sopenharmony_ci task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 67162306a36Sopenharmony_ci cpuc->lbr_select = 0; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 67462306a36Sopenharmony_ci cpuc->lbr_pebs_users--; 67562306a36Sopenharmony_ci cpuc->lbr_users--; 67662306a36Sopenharmony_ci WARN_ON_ONCE(cpuc->lbr_users < 0); 67762306a36Sopenharmony_ci WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); 67862306a36Sopenharmony_ci perf_sched_cb_dec(event->pmu); 67962306a36Sopenharmony_ci} 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_cistatic inline bool vlbr_exclude_host(void) 68262306a36Sopenharmony_ci{ 68362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci return test_bit(INTEL_PMC_IDX_FIXED_VLBR, 68662306a36Sopenharmony_ci (unsigned long *)&cpuc->intel_ctrl_guest_mask); 68762306a36Sopenharmony_ci} 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_civoid intel_pmu_lbr_enable_all(bool pmi) 69062306a36Sopenharmony_ci{ 69162306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci if (cpuc->lbr_users && !vlbr_exclude_host()) 69462306a36Sopenharmony_ci __intel_pmu_lbr_enable(pmi); 69562306a36Sopenharmony_ci} 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_civoid intel_pmu_lbr_disable_all(void) 69862306a36Sopenharmony_ci{ 69962306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci if (cpuc->lbr_users && !vlbr_exclude_host()) { 70262306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 70362306a36Sopenharmony_ci return __intel_pmu_arch_lbr_disable(); 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci __intel_pmu_lbr_disable(); 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci} 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_civoid intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 71062306a36Sopenharmony_ci{ 71162306a36Sopenharmony_ci unsigned long mask = x86_pmu.lbr_nr - 1; 71262306a36Sopenharmony_ci struct perf_branch_entry *br = cpuc->lbr_entries; 71362306a36Sopenharmony_ci u64 tos = intel_pmu_lbr_tos(); 71462306a36Sopenharmony_ci int i; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) { 71762306a36Sopenharmony_ci unsigned long lbr_idx = (tos - i) & mask; 71862306a36Sopenharmony_ci union { 71962306a36Sopenharmony_ci struct { 72062306a36Sopenharmony_ci u32 from; 72162306a36Sopenharmony_ci u32 to; 72262306a36Sopenharmony_ci }; 72362306a36Sopenharmony_ci u64 lbr; 72462306a36Sopenharmony_ci } msr_lastbranch; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci perf_clear_branch_entry_bitfields(br); 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci br->from = msr_lastbranch.from; 73162306a36Sopenharmony_ci br->to = msr_lastbranch.to; 73262306a36Sopenharmony_ci br++; 73362306a36Sopenharmony_ci } 73462306a36Sopenharmony_ci cpuc->lbr_stack.nr = i; 73562306a36Sopenharmony_ci cpuc->lbr_stack.hw_idx = tos; 73662306a36Sopenharmony_ci} 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci/* 73962306a36Sopenharmony_ci * Due to lack of segmentation in Linux the effective address (offset) 74062306a36Sopenharmony_ci * is the same as the linear address, allowing us to merge the LIP and EIP 74162306a36Sopenharmony_ci * LBR formats. 74262306a36Sopenharmony_ci */ 74362306a36Sopenharmony_civoid intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 74462306a36Sopenharmony_ci{ 74562306a36Sopenharmony_ci bool need_info = false, call_stack = false; 74662306a36Sopenharmony_ci unsigned long mask = x86_pmu.lbr_nr - 1; 74762306a36Sopenharmony_ci struct perf_branch_entry *br = cpuc->lbr_entries; 74862306a36Sopenharmony_ci u64 tos = intel_pmu_lbr_tos(); 74962306a36Sopenharmony_ci int i; 75062306a36Sopenharmony_ci int out = 0; 75162306a36Sopenharmony_ci int num = x86_pmu.lbr_nr; 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci if (cpuc->lbr_sel) { 75462306a36Sopenharmony_ci need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 75562306a36Sopenharmony_ci if (cpuc->lbr_sel->config & LBR_CALL_STACK) 75662306a36Sopenharmony_ci call_stack = true; 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci for (i = 0; i < num; i++) { 76062306a36Sopenharmony_ci unsigned long lbr_idx = (tos - i) & mask; 76162306a36Sopenharmony_ci u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 76262306a36Sopenharmony_ci u16 cycles = 0; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci from = rdlbr_from(lbr_idx, NULL); 76562306a36Sopenharmony_ci to = rdlbr_to(lbr_idx, NULL); 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci /* 76862306a36Sopenharmony_ci * Read LBR call stack entries 76962306a36Sopenharmony_ci * until invalid entry (0s) is detected. 77062306a36Sopenharmony_ci */ 77162306a36Sopenharmony_ci if (call_stack && !from) 77262306a36Sopenharmony_ci break; 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci if (x86_pmu.lbr_has_info) { 77562306a36Sopenharmony_ci if (need_info) { 77662306a36Sopenharmony_ci u64 info; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci info = rdlbr_info(lbr_idx, NULL); 77962306a36Sopenharmony_ci mis = !!(info & LBR_INFO_MISPRED); 78062306a36Sopenharmony_ci pred = !mis; 78162306a36Sopenharmony_ci cycles = (info & LBR_INFO_CYCLES); 78262306a36Sopenharmony_ci if (x86_pmu.lbr_has_tsx) { 78362306a36Sopenharmony_ci in_tx = !!(info & LBR_INFO_IN_TX); 78462306a36Sopenharmony_ci abort = !!(info & LBR_INFO_ABORT); 78562306a36Sopenharmony_ci } 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci } else { 78862306a36Sopenharmony_ci int skip = 0; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci if (x86_pmu.lbr_from_flags) { 79162306a36Sopenharmony_ci mis = !!(from & LBR_FROM_FLAG_MISPRED); 79262306a36Sopenharmony_ci pred = !mis; 79362306a36Sopenharmony_ci skip = 1; 79462306a36Sopenharmony_ci } 79562306a36Sopenharmony_ci if (x86_pmu.lbr_has_tsx) { 79662306a36Sopenharmony_ci in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 79762306a36Sopenharmony_ci abort = !!(from & LBR_FROM_FLAG_ABORT); 79862306a36Sopenharmony_ci skip = 3; 79962306a36Sopenharmony_ci } 80062306a36Sopenharmony_ci from = (u64)((((s64)from) << skip) >> skip); 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci if (x86_pmu.lbr_to_cycles) { 80362306a36Sopenharmony_ci cycles = ((to >> 48) & LBR_INFO_CYCLES); 80462306a36Sopenharmony_ci to = (u64)((((s64)to) << 16) >> 16); 80562306a36Sopenharmony_ci } 80662306a36Sopenharmony_ci } 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci /* 80962306a36Sopenharmony_ci * Some CPUs report duplicated abort records, 81062306a36Sopenharmony_ci * with the second entry not having an abort bit set. 81162306a36Sopenharmony_ci * Skip them here. This loop runs backwards, 81262306a36Sopenharmony_ci * so we need to undo the previous record. 81362306a36Sopenharmony_ci * If the abort just happened outside the window 81462306a36Sopenharmony_ci * the extra entry cannot be removed. 81562306a36Sopenharmony_ci */ 81662306a36Sopenharmony_ci if (abort && x86_pmu.lbr_double_abort && out > 0) 81762306a36Sopenharmony_ci out--; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci perf_clear_branch_entry_bitfields(br+out); 82062306a36Sopenharmony_ci br[out].from = from; 82162306a36Sopenharmony_ci br[out].to = to; 82262306a36Sopenharmony_ci br[out].mispred = mis; 82362306a36Sopenharmony_ci br[out].predicted = pred; 82462306a36Sopenharmony_ci br[out].in_tx = in_tx; 82562306a36Sopenharmony_ci br[out].abort = abort; 82662306a36Sopenharmony_ci br[out].cycles = cycles; 82762306a36Sopenharmony_ci out++; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci cpuc->lbr_stack.nr = out; 83062306a36Sopenharmony_ci cpuc->lbr_stack.hw_idx = tos; 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); 83462306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); 83562306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(x86_lbr_type); 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_cistatic __always_inline int get_lbr_br_type(u64 info) 83862306a36Sopenharmony_ci{ 83962306a36Sopenharmony_ci int type = 0; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci if (static_branch_likely(&x86_lbr_type)) 84262306a36Sopenharmony_ci type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci return type; 84562306a36Sopenharmony_ci} 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_cistatic __always_inline bool get_lbr_mispred(u64 info) 84862306a36Sopenharmony_ci{ 84962306a36Sopenharmony_ci bool mispred = 0; 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci if (static_branch_likely(&x86_lbr_mispred)) 85262306a36Sopenharmony_ci mispred = !!(info & LBR_INFO_MISPRED); 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci return mispred; 85562306a36Sopenharmony_ci} 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_cistatic __always_inline u16 get_lbr_cycles(u64 info) 85862306a36Sopenharmony_ci{ 85962306a36Sopenharmony_ci u16 cycles = info & LBR_INFO_CYCLES; 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 86262306a36Sopenharmony_ci (!static_branch_likely(&x86_lbr_cycles) || 86362306a36Sopenharmony_ci !(info & LBR_INFO_CYC_CNT_VALID))) 86462306a36Sopenharmony_ci cycles = 0; 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci return cycles; 86762306a36Sopenharmony_ci} 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_cistatic void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, 87062306a36Sopenharmony_ci struct lbr_entry *entries) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci struct perf_branch_entry *e; 87362306a36Sopenharmony_ci struct lbr_entry *lbr; 87462306a36Sopenharmony_ci u64 from, to, info; 87562306a36Sopenharmony_ci int i; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci for (i = 0; i < x86_pmu.lbr_nr; i++) { 87862306a36Sopenharmony_ci lbr = entries ? &entries[i] : NULL; 87962306a36Sopenharmony_ci e = &cpuc->lbr_entries[i]; 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci from = rdlbr_from(i, lbr); 88262306a36Sopenharmony_ci /* 88362306a36Sopenharmony_ci * Read LBR entries until invalid entry (0s) is detected. 88462306a36Sopenharmony_ci */ 88562306a36Sopenharmony_ci if (!from) 88662306a36Sopenharmony_ci break; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci to = rdlbr_to(i, lbr); 88962306a36Sopenharmony_ci info = rdlbr_info(i, lbr); 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci perf_clear_branch_entry_bitfields(e); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci e->from = from; 89462306a36Sopenharmony_ci e->to = to; 89562306a36Sopenharmony_ci e->mispred = get_lbr_mispred(info); 89662306a36Sopenharmony_ci e->predicted = !e->mispred; 89762306a36Sopenharmony_ci e->in_tx = !!(info & LBR_INFO_IN_TX); 89862306a36Sopenharmony_ci e->abort = !!(info & LBR_INFO_ABORT); 89962306a36Sopenharmony_ci e->cycles = get_lbr_cycles(info); 90062306a36Sopenharmony_ci e->type = get_lbr_br_type(info); 90162306a36Sopenharmony_ci } 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci cpuc->lbr_stack.nr = i; 90462306a36Sopenharmony_ci} 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) 90762306a36Sopenharmony_ci{ 90862306a36Sopenharmony_ci intel_pmu_store_lbr(cpuc, NULL); 90962306a36Sopenharmony_ci} 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_cistatic void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) 91262306a36Sopenharmony_ci{ 91362306a36Sopenharmony_ci struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci if (!xsave) { 91662306a36Sopenharmony_ci intel_pmu_store_lbr(cpuc, NULL); 91762306a36Sopenharmony_ci return; 91862306a36Sopenharmony_ci } 91962306a36Sopenharmony_ci xsaves(&xsave->xsave, XFEATURE_MASK_LBR); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci intel_pmu_store_lbr(cpuc, xsave->lbr.entries); 92262306a36Sopenharmony_ci} 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_civoid intel_pmu_lbr_read(void) 92562306a36Sopenharmony_ci{ 92662306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci /* 92962306a36Sopenharmony_ci * Don't read when all LBRs users are using adaptive PEBS. 93062306a36Sopenharmony_ci * 93162306a36Sopenharmony_ci * This could be smarter and actually check the event, 93262306a36Sopenharmony_ci * but this simple approach seems to work for now. 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ci if (!cpuc->lbr_users || vlbr_exclude_host() || 93562306a36Sopenharmony_ci cpuc->lbr_users == cpuc->lbr_pebs_users) 93662306a36Sopenharmony_ci return; 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci x86_pmu.lbr_read(cpuc); 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci intel_pmu_lbr_filter(cpuc); 94162306a36Sopenharmony_ci} 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci/* 94462306a36Sopenharmony_ci * SW filter is used: 94562306a36Sopenharmony_ci * - in case there is no HW filter 94662306a36Sopenharmony_ci * - in case the HW filter has errata or limitations 94762306a36Sopenharmony_ci */ 94862306a36Sopenharmony_cistatic int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 94962306a36Sopenharmony_ci{ 95062306a36Sopenharmony_ci u64 br_type = event->attr.branch_sample_type; 95162306a36Sopenharmony_ci int mask = 0; 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_USER) 95462306a36Sopenharmony_ci mask |= X86_BR_USER; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 95762306a36Sopenharmony_ci mask |= X86_BR_KERNEL; 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci /* we ignore BRANCH_HV here */ 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_ANY) 96262306a36Sopenharmony_ci mask |= X86_BR_ANY; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 96562306a36Sopenharmony_ci mask |= X86_BR_ANY_CALL; 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 96862306a36Sopenharmony_ci mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 97162306a36Sopenharmony_ci mask |= X86_BR_IND_CALL; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 97462306a36Sopenharmony_ci mask |= X86_BR_ABORT; 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 97762306a36Sopenharmony_ci mask |= X86_BR_IN_TX; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 98062306a36Sopenharmony_ci mask |= X86_BR_NO_TX; 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_COND) 98362306a36Sopenharmony_ci mask |= X86_BR_JCC; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 98662306a36Sopenharmony_ci if (!x86_pmu_has_lbr_callstack()) 98762306a36Sopenharmony_ci return -EOPNOTSUPP; 98862306a36Sopenharmony_ci if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 98962306a36Sopenharmony_ci return -EINVAL; 99062306a36Sopenharmony_ci mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 99162306a36Sopenharmony_ci X86_BR_CALL_STACK; 99262306a36Sopenharmony_ci } 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 99562306a36Sopenharmony_ci mask |= X86_BR_IND_JMP; 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_CALL) 99862306a36Sopenharmony_ci mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 100162306a36Sopenharmony_ci mask |= X86_BR_TYPE_SAVE; 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci /* 100462306a36Sopenharmony_ci * stash actual user request into reg, it may 100562306a36Sopenharmony_ci * be used by fixup code for some CPU 100662306a36Sopenharmony_ci */ 100762306a36Sopenharmony_ci event->hw.branch_reg.reg = mask; 100862306a36Sopenharmony_ci return 0; 100962306a36Sopenharmony_ci} 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci/* 101262306a36Sopenharmony_ci * setup the HW LBR filter 101362306a36Sopenharmony_ci * Used only when available, may not be enough to disambiguate 101462306a36Sopenharmony_ci * all branches, may need the help of the SW filter 101562306a36Sopenharmony_ci */ 101662306a36Sopenharmony_cistatic int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci struct hw_perf_event_extra *reg; 101962306a36Sopenharmony_ci u64 br_type = event->attr.branch_sample_type; 102062306a36Sopenharmony_ci u64 mask = 0, v; 102162306a36Sopenharmony_ci int i; 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 102462306a36Sopenharmony_ci if (!(br_type & (1ULL << i))) 102562306a36Sopenharmony_ci continue; 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci v = x86_pmu.lbr_sel_map[i]; 102862306a36Sopenharmony_ci if (v == LBR_NOT_SUPP) 102962306a36Sopenharmony_ci return -EOPNOTSUPP; 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci if (v != LBR_IGN) 103262306a36Sopenharmony_ci mask |= v; 103362306a36Sopenharmony_ci } 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci reg = &event->hw.branch_reg; 103662306a36Sopenharmony_ci reg->idx = EXTRA_REG_LBR; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { 103962306a36Sopenharmony_ci reg->config = mask; 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci /* 104262306a36Sopenharmony_ci * The Arch LBR HW can retrieve the common branch types 104362306a36Sopenharmony_ci * from the LBR_INFO. It doesn't require the high overhead 104462306a36Sopenharmony_ci * SW disassemble. 104562306a36Sopenharmony_ci * Enable the branch type by default for the Arch LBR. 104662306a36Sopenharmony_ci */ 104762306a36Sopenharmony_ci reg->reg |= X86_BR_TYPE_SAVE; 104862306a36Sopenharmony_ci return 0; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci /* 105262306a36Sopenharmony_ci * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 105362306a36Sopenharmony_ci * in suppress mode. So LBR_SELECT should be set to 105462306a36Sopenharmony_ci * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 105562306a36Sopenharmony_ci * But the 10th bit LBR_CALL_STACK does not operate 105662306a36Sopenharmony_ci * in suppress mode. 105762306a36Sopenharmony_ci */ 105862306a36Sopenharmony_ci reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 106162306a36Sopenharmony_ci (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 106262306a36Sopenharmony_ci x86_pmu.lbr_has_info) 106362306a36Sopenharmony_ci reg->config |= LBR_NO_INFO; 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci return 0; 106662306a36Sopenharmony_ci} 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ciint intel_pmu_setup_lbr_filter(struct perf_event *event) 106962306a36Sopenharmony_ci{ 107062306a36Sopenharmony_ci int ret = 0; 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci /* 107362306a36Sopenharmony_ci * no LBR on this PMU 107462306a36Sopenharmony_ci */ 107562306a36Sopenharmony_ci if (!x86_pmu.lbr_nr) 107662306a36Sopenharmony_ci return -EOPNOTSUPP; 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci /* 107962306a36Sopenharmony_ci * setup SW LBR filter 108062306a36Sopenharmony_ci */ 108162306a36Sopenharmony_ci ret = intel_pmu_setup_sw_lbr_filter(event); 108262306a36Sopenharmony_ci if (ret) 108362306a36Sopenharmony_ci return ret; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci /* 108662306a36Sopenharmony_ci * setup HW LBR filter, if any 108762306a36Sopenharmony_ci */ 108862306a36Sopenharmony_ci if (x86_pmu.lbr_sel_map) 108962306a36Sopenharmony_ci ret = intel_pmu_setup_hw_lbr_filter(event); 109062306a36Sopenharmony_ci 109162306a36Sopenharmony_ci return ret; 109262306a36Sopenharmony_ci} 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_cienum { 109562306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_JCC = 0, 109662306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, 109762306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, 109862306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, 109962306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, 110062306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_NEAR_RET = 5, 110162306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci ARCH_LBR_BR_TYPE_MAP_MAX = 16, 110462306a36Sopenharmony_ci}; 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_cistatic const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { 110762306a36Sopenharmony_ci [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, 110862306a36Sopenharmony_ci [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, 110962306a36Sopenharmony_ci [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, 111062306a36Sopenharmony_ci [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, 111162306a36Sopenharmony_ci [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, 111262306a36Sopenharmony_ci [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, 111362306a36Sopenharmony_ci}; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci/* 111662306a36Sopenharmony_ci * implement actual branch filter based on user demand. 111762306a36Sopenharmony_ci * Hardware may not exactly satisfy that request, thus 111862306a36Sopenharmony_ci * we need to inspect opcodes. Mismatched branches are 111962306a36Sopenharmony_ci * discarded. Therefore, the number of branches returned 112062306a36Sopenharmony_ci * in PERF_SAMPLE_BRANCH_STACK sample may vary. 112162306a36Sopenharmony_ci */ 112262306a36Sopenharmony_cistatic void 112362306a36Sopenharmony_ciintel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 112462306a36Sopenharmony_ci{ 112562306a36Sopenharmony_ci u64 from, to; 112662306a36Sopenharmony_ci int br_sel = cpuc->br_sel; 112762306a36Sopenharmony_ci int i, j, type, to_plm; 112862306a36Sopenharmony_ci bool compress = false; 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci /* if sampling all branches, then nothing to filter */ 113162306a36Sopenharmony_ci if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 113262306a36Sopenharmony_ci ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 113362306a36Sopenharmony_ci return; 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci for (i = 0; i < cpuc->lbr_stack.nr; i++) { 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci from = cpuc->lbr_entries[i].from; 113862306a36Sopenharmony_ci to = cpuc->lbr_entries[i].to; 113962306a36Sopenharmony_ci type = cpuc->lbr_entries[i].type; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci /* 114262306a36Sopenharmony_ci * Parse the branch type recorded in LBR_x_INFO MSR. 114362306a36Sopenharmony_ci * Doesn't support OTHER_BRANCH decoding for now. 114462306a36Sopenharmony_ci * OTHER_BRANCH branch type still rely on software decoding. 114562306a36Sopenharmony_ci */ 114662306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 114762306a36Sopenharmony_ci type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { 114862306a36Sopenharmony_ci to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 114962306a36Sopenharmony_ci type = arch_lbr_br_type_map[type] | to_plm; 115062306a36Sopenharmony_ci } else 115162306a36Sopenharmony_ci type = branch_type(from, to, cpuc->lbr_entries[i].abort); 115262306a36Sopenharmony_ci if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 115362306a36Sopenharmony_ci if (cpuc->lbr_entries[i].in_tx) 115462306a36Sopenharmony_ci type |= X86_BR_IN_TX; 115562306a36Sopenharmony_ci else 115662306a36Sopenharmony_ci type |= X86_BR_NO_TX; 115762306a36Sopenharmony_ci } 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci /* if type does not correspond, then discard */ 116062306a36Sopenharmony_ci if (type == X86_BR_NONE || (br_sel & type) != type) { 116162306a36Sopenharmony_ci cpuc->lbr_entries[i].from = 0; 116262306a36Sopenharmony_ci compress = true; 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 116662306a36Sopenharmony_ci cpuc->lbr_entries[i].type = common_branch_type(type); 116762306a36Sopenharmony_ci } 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci if (!compress) 117062306a36Sopenharmony_ci return; 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci /* remove all entries with from=0 */ 117362306a36Sopenharmony_ci for (i = 0; i < cpuc->lbr_stack.nr; ) { 117462306a36Sopenharmony_ci if (!cpuc->lbr_entries[i].from) { 117562306a36Sopenharmony_ci j = i; 117662306a36Sopenharmony_ci while (++j < cpuc->lbr_stack.nr) 117762306a36Sopenharmony_ci cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 117862306a36Sopenharmony_ci cpuc->lbr_stack.nr--; 117962306a36Sopenharmony_ci if (!cpuc->lbr_entries[i].from) 118062306a36Sopenharmony_ci continue; 118162306a36Sopenharmony_ci } 118262306a36Sopenharmony_ci i++; 118362306a36Sopenharmony_ci } 118462306a36Sopenharmony_ci} 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_civoid intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci /* Cannot get TOS for large PEBS and Arch LBR */ 119162306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_ARCH_LBR) || 119262306a36Sopenharmony_ci (cpuc->n_pebs == cpuc->n_large_pebs)) 119362306a36Sopenharmony_ci cpuc->lbr_stack.hw_idx = -1ULL; 119462306a36Sopenharmony_ci else 119562306a36Sopenharmony_ci cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci intel_pmu_store_lbr(cpuc, lbr); 119862306a36Sopenharmony_ci intel_pmu_lbr_filter(cpuc); 119962306a36Sopenharmony_ci} 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci/* 120262306a36Sopenharmony_ci * Map interface branch filters onto LBR filters 120362306a36Sopenharmony_ci */ 120462306a36Sopenharmony_cistatic const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 120562306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 120662306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 120762306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 120862306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 120962306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 121062306a36Sopenharmony_ci | LBR_IND_JMP | LBR_FAR, 121162306a36Sopenharmony_ci /* 121262306a36Sopenharmony_ci * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 121362306a36Sopenharmony_ci */ 121462306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 121562306a36Sopenharmony_ci LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 121662306a36Sopenharmony_ci /* 121762306a36Sopenharmony_ci * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 121862306a36Sopenharmony_ci */ 121962306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 122062306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 122162306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 122262306a36Sopenharmony_ci}; 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_cistatic const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 122562306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 122662306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 122762306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 122862306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 122962306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 123062306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 123162306a36Sopenharmony_ci | LBR_FAR, 123262306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 123362306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 123462306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 123562306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 123662306a36Sopenharmony_ci}; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_cistatic const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 123962306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 124062306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 124162306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 124262306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 124362306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 124462306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 124562306a36Sopenharmony_ci | LBR_FAR, 124662306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 124762306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 124862306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 124962306a36Sopenharmony_ci | LBR_RETURN | LBR_CALL_STACK, 125062306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 125162306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 125262306a36Sopenharmony_ci}; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_cistatic int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 125562306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, 125662306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, 125762306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, 125862306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 125962306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | 126062306a36Sopenharmony_ci ARCH_LBR_OTHER_BRANCH, 126162306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | 126262306a36Sopenharmony_ci ARCH_LBR_IND_CALL | 126362306a36Sopenharmony_ci ARCH_LBR_OTHER_BRANCH, 126462306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, 126562306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, 126662306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | 126762306a36Sopenharmony_ci ARCH_LBR_IND_CALL | 126862306a36Sopenharmony_ci ARCH_LBR_RETURN | 126962306a36Sopenharmony_ci ARCH_LBR_CALL_STACK, 127062306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, 127162306a36Sopenharmony_ci [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, 127262306a36Sopenharmony_ci}; 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci/* core */ 127562306a36Sopenharmony_civoid __init intel_pmu_lbr_init_core(void) 127662306a36Sopenharmony_ci{ 127762306a36Sopenharmony_ci x86_pmu.lbr_nr = 4; 127862306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 127962306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 128062306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_CORE_TO; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci /* 128362306a36Sopenharmony_ci * SW branch filter usage: 128462306a36Sopenharmony_ci * - compensate for lack of HW filter 128562306a36Sopenharmony_ci */ 128662306a36Sopenharmony_ci} 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci/* nehalem/westmere */ 128962306a36Sopenharmony_civoid __init intel_pmu_lbr_init_nhm(void) 129062306a36Sopenharmony_ci{ 129162306a36Sopenharmony_ci x86_pmu.lbr_nr = 16; 129262306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 129362306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 129462306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_NHM_TO; 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 129762306a36Sopenharmony_ci x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci /* 130062306a36Sopenharmony_ci * SW branch filter usage: 130162306a36Sopenharmony_ci * - workaround LBR_SEL errata (see above) 130262306a36Sopenharmony_ci * - support syscall, sysret capture. 130362306a36Sopenharmony_ci * That requires LBR_FAR but that means far 130462306a36Sopenharmony_ci * jmp need to be filtered out 130562306a36Sopenharmony_ci */ 130662306a36Sopenharmony_ci} 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci/* sandy bridge */ 130962306a36Sopenharmony_civoid __init intel_pmu_lbr_init_snb(void) 131062306a36Sopenharmony_ci{ 131162306a36Sopenharmony_ci x86_pmu.lbr_nr = 16; 131262306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 131362306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 131462306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_NHM_TO; 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 131762306a36Sopenharmony_ci x86_pmu.lbr_sel_map = snb_lbr_sel_map; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci /* 132062306a36Sopenharmony_ci * SW branch filter usage: 132162306a36Sopenharmony_ci * - support syscall, sysret capture. 132262306a36Sopenharmony_ci * That requires LBR_FAR but that means far 132362306a36Sopenharmony_ci * jmp need to be filtered out 132462306a36Sopenharmony_ci */ 132562306a36Sopenharmony_ci} 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_cistatic inline struct kmem_cache * 132862306a36Sopenharmony_cicreate_lbr_kmem_cache(size_t size, size_t align) 132962306a36Sopenharmony_ci{ 133062306a36Sopenharmony_ci return kmem_cache_create("x86_lbr", size, align, 0, NULL); 133162306a36Sopenharmony_ci} 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci/* haswell */ 133462306a36Sopenharmony_civoid intel_pmu_lbr_init_hsw(void) 133562306a36Sopenharmony_ci{ 133662306a36Sopenharmony_ci size_t size = sizeof(struct x86_perf_task_context); 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci x86_pmu.lbr_nr = 16; 133962306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 134062306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 134162306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_NHM_TO; 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 134462306a36Sopenharmony_ci x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 134762306a36Sopenharmony_ci} 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci/* skylake */ 135062306a36Sopenharmony_ci__init void intel_pmu_lbr_init_skl(void) 135162306a36Sopenharmony_ci{ 135262306a36Sopenharmony_ci size_t size = sizeof(struct x86_perf_task_context); 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci x86_pmu.lbr_nr = 32; 135562306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 135662306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 135762306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_NHM_TO; 135862306a36Sopenharmony_ci x86_pmu.lbr_info = MSR_LBR_INFO_0; 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 136162306a36Sopenharmony_ci x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_ci /* 136662306a36Sopenharmony_ci * SW branch filter usage: 136762306a36Sopenharmony_ci * - support syscall, sysret capture. 136862306a36Sopenharmony_ci * That requires LBR_FAR but that means far 136962306a36Sopenharmony_ci * jmp need to be filtered out 137062306a36Sopenharmony_ci */ 137162306a36Sopenharmony_ci} 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci/* atom */ 137462306a36Sopenharmony_civoid __init intel_pmu_lbr_init_atom(void) 137562306a36Sopenharmony_ci{ 137662306a36Sopenharmony_ci /* 137762306a36Sopenharmony_ci * only models starting at stepping 10 seems 137862306a36Sopenharmony_ci * to have an operational LBR which can freeze 137962306a36Sopenharmony_ci * on PMU interrupt 138062306a36Sopenharmony_ci */ 138162306a36Sopenharmony_ci if (boot_cpu_data.x86_model == 28 138262306a36Sopenharmony_ci && boot_cpu_data.x86_stepping < 10) { 138362306a36Sopenharmony_ci pr_cont("LBR disabled due to erratum"); 138462306a36Sopenharmony_ci return; 138562306a36Sopenharmony_ci } 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci x86_pmu.lbr_nr = 8; 138862306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 138962306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 139062306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_CORE_TO; 139162306a36Sopenharmony_ci 139262306a36Sopenharmony_ci /* 139362306a36Sopenharmony_ci * SW branch filter usage: 139462306a36Sopenharmony_ci * - compensate for lack of HW filter 139562306a36Sopenharmony_ci */ 139662306a36Sopenharmony_ci} 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci/* slm */ 139962306a36Sopenharmony_civoid __init intel_pmu_lbr_init_slm(void) 140062306a36Sopenharmony_ci{ 140162306a36Sopenharmony_ci x86_pmu.lbr_nr = 8; 140262306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 140362306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 140462306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_CORE_TO; 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 140762306a36Sopenharmony_ci x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci /* 141062306a36Sopenharmony_ci * SW branch filter usage: 141162306a36Sopenharmony_ci * - compensate for lack of HW filter 141262306a36Sopenharmony_ci */ 141362306a36Sopenharmony_ci pr_cont("8-deep LBR, "); 141462306a36Sopenharmony_ci} 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci/* Knights Landing */ 141762306a36Sopenharmony_civoid intel_pmu_lbr_init_knl(void) 141862306a36Sopenharmony_ci{ 141962306a36Sopenharmony_ci x86_pmu.lbr_nr = 8; 142062306a36Sopenharmony_ci x86_pmu.lbr_tos = MSR_LBR_TOS; 142162306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 142262306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_LBR_NHM_TO; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 142562306a36Sopenharmony_ci x86_pmu.lbr_sel_map = snb_lbr_sel_map; 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci /* Knights Landing does have MISPREDICT bit */ 142862306a36Sopenharmony_ci if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) 142962306a36Sopenharmony_ci x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; 143062306a36Sopenharmony_ci} 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_civoid intel_pmu_lbr_init(void) 143362306a36Sopenharmony_ci{ 143462306a36Sopenharmony_ci switch (x86_pmu.intel_cap.lbr_format) { 143562306a36Sopenharmony_ci case LBR_FORMAT_EIP_FLAGS2: 143662306a36Sopenharmony_ci x86_pmu.lbr_has_tsx = 1; 143762306a36Sopenharmony_ci x86_pmu.lbr_from_flags = 1; 143862306a36Sopenharmony_ci if (lbr_from_signext_quirk_needed()) 143962306a36Sopenharmony_ci static_branch_enable(&lbr_from_quirk_key); 144062306a36Sopenharmony_ci break; 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci case LBR_FORMAT_EIP_FLAGS: 144362306a36Sopenharmony_ci x86_pmu.lbr_from_flags = 1; 144462306a36Sopenharmony_ci break; 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci case LBR_FORMAT_INFO: 144762306a36Sopenharmony_ci x86_pmu.lbr_has_tsx = 1; 144862306a36Sopenharmony_ci fallthrough; 144962306a36Sopenharmony_ci case LBR_FORMAT_INFO2: 145062306a36Sopenharmony_ci x86_pmu.lbr_has_info = 1; 145162306a36Sopenharmony_ci break; 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci case LBR_FORMAT_TIME: 145462306a36Sopenharmony_ci x86_pmu.lbr_from_flags = 1; 145562306a36Sopenharmony_ci x86_pmu.lbr_to_cycles = 1; 145662306a36Sopenharmony_ci break; 145762306a36Sopenharmony_ci } 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci if (x86_pmu.lbr_has_info) { 146062306a36Sopenharmony_ci /* 146162306a36Sopenharmony_ci * Only used in combination with baseline pebs. 146262306a36Sopenharmony_ci */ 146362306a36Sopenharmony_ci static_branch_enable(&x86_lbr_mispred); 146462306a36Sopenharmony_ci static_branch_enable(&x86_lbr_cycles); 146562306a36Sopenharmony_ci } 146662306a36Sopenharmony_ci} 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci/* 146962306a36Sopenharmony_ci * LBR state size is variable based on the max number of registers. 147062306a36Sopenharmony_ci * This calculates the expected state size, which should match 147162306a36Sopenharmony_ci * what the hardware enumerates for the size of XFEATURE_LBR. 147262306a36Sopenharmony_ci */ 147362306a36Sopenharmony_cistatic inline unsigned int get_lbr_state_size(void) 147462306a36Sopenharmony_ci{ 147562306a36Sopenharmony_ci return sizeof(struct arch_lbr_state) + 147662306a36Sopenharmony_ci x86_pmu.lbr_nr * sizeof(struct lbr_entry); 147762306a36Sopenharmony_ci} 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_cistatic bool is_arch_lbr_xsave_available(void) 148062306a36Sopenharmony_ci{ 148162306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_XSAVES)) 148262306a36Sopenharmony_ci return false; 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci /* 148562306a36Sopenharmony_ci * Check the LBR state with the corresponding software structure. 148662306a36Sopenharmony_ci * Disable LBR XSAVES support if the size doesn't match. 148762306a36Sopenharmony_ci */ 148862306a36Sopenharmony_ci if (xfeature_size(XFEATURE_LBR) == 0) 148962306a36Sopenharmony_ci return false; 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) 149262306a36Sopenharmony_ci return false; 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci return true; 149562306a36Sopenharmony_ci} 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_civoid __init intel_pmu_arch_lbr_init(void) 149862306a36Sopenharmony_ci{ 149962306a36Sopenharmony_ci struct pmu *pmu = x86_get_pmu(smp_processor_id()); 150062306a36Sopenharmony_ci union cpuid28_eax eax; 150162306a36Sopenharmony_ci union cpuid28_ebx ebx; 150262306a36Sopenharmony_ci union cpuid28_ecx ecx; 150362306a36Sopenharmony_ci unsigned int unused_edx; 150462306a36Sopenharmony_ci bool arch_lbr_xsave; 150562306a36Sopenharmony_ci size_t size; 150662306a36Sopenharmony_ci u64 lbr_nr; 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci /* Arch LBR Capabilities */ 150962306a36Sopenharmony_ci cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci lbr_nr = fls(eax.split.lbr_depth_mask) * 8; 151262306a36Sopenharmony_ci if (!lbr_nr) 151362306a36Sopenharmony_ci goto clear_arch_lbr; 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci /* Apply the max depth of Arch LBR */ 151662306a36Sopenharmony_ci if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) 151762306a36Sopenharmony_ci goto clear_arch_lbr; 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; 152062306a36Sopenharmony_ci x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; 152162306a36Sopenharmony_ci x86_pmu.lbr_lip = eax.split.lbr_lip; 152262306a36Sopenharmony_ci x86_pmu.lbr_cpl = ebx.split.lbr_cpl; 152362306a36Sopenharmony_ci x86_pmu.lbr_filter = ebx.split.lbr_filter; 152462306a36Sopenharmony_ci x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; 152562306a36Sopenharmony_ci x86_pmu.lbr_mispred = ecx.split.lbr_mispred; 152662306a36Sopenharmony_ci x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; 152762306a36Sopenharmony_ci x86_pmu.lbr_br_type = ecx.split.lbr_br_type; 152862306a36Sopenharmony_ci x86_pmu.lbr_nr = lbr_nr; 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci if (x86_pmu.lbr_mispred) 153162306a36Sopenharmony_ci static_branch_enable(&x86_lbr_mispred); 153262306a36Sopenharmony_ci if (x86_pmu.lbr_timed_lbr) 153362306a36Sopenharmony_ci static_branch_enable(&x86_lbr_cycles); 153462306a36Sopenharmony_ci if (x86_pmu.lbr_br_type) 153562306a36Sopenharmony_ci static_branch_enable(&x86_lbr_type); 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci arch_lbr_xsave = is_arch_lbr_xsave_available(); 153862306a36Sopenharmony_ci if (arch_lbr_xsave) { 153962306a36Sopenharmony_ci size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + 154062306a36Sopenharmony_ci get_lbr_state_size(); 154162306a36Sopenharmony_ci pmu->task_ctx_cache = create_lbr_kmem_cache(size, 154262306a36Sopenharmony_ci XSAVE_ALIGNMENT); 154362306a36Sopenharmony_ci } 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci if (!pmu->task_ctx_cache) { 154662306a36Sopenharmony_ci arch_lbr_xsave = false; 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci size = sizeof(struct x86_perf_task_context_arch_lbr) + 154962306a36Sopenharmony_ci lbr_nr * sizeof(struct lbr_entry); 155062306a36Sopenharmony_ci pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); 155162306a36Sopenharmony_ci } 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; 155462306a36Sopenharmony_ci x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; 155562306a36Sopenharmony_ci x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci /* LBR callstack requires both CPL and Branch Filtering support */ 155862306a36Sopenharmony_ci if (!x86_pmu.lbr_cpl || 155962306a36Sopenharmony_ci !x86_pmu.lbr_filter || 156062306a36Sopenharmony_ci !x86_pmu.lbr_call_stack) 156162306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci if (!x86_pmu.lbr_cpl) { 156462306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; 156562306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; 156662306a36Sopenharmony_ci } else if (!x86_pmu.lbr_filter) { 156762306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; 156862306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; 156962306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; 157062306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; 157162306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; 157262306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; 157362306a36Sopenharmony_ci arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; 157462306a36Sopenharmony_ci } 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; 157762306a36Sopenharmony_ci x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_ci if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) 158062306a36Sopenharmony_ci x86_pmu.lbr_ctl_map = NULL; 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; 158362306a36Sopenharmony_ci if (arch_lbr_xsave) { 158462306a36Sopenharmony_ci x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; 158562306a36Sopenharmony_ci x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; 158662306a36Sopenharmony_ci x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; 158762306a36Sopenharmony_ci pr_cont("XSAVE "); 158862306a36Sopenharmony_ci } else { 158962306a36Sopenharmony_ci x86_pmu.lbr_save = intel_pmu_arch_lbr_save; 159062306a36Sopenharmony_ci x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; 159162306a36Sopenharmony_ci x86_pmu.lbr_read = intel_pmu_arch_lbr_read; 159262306a36Sopenharmony_ci } 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci pr_cont("Architectural LBR, "); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci return; 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ciclear_arch_lbr: 159962306a36Sopenharmony_ci setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); 160062306a36Sopenharmony_ci} 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci/** 160362306a36Sopenharmony_ci * x86_perf_get_lbr - get the LBR records information 160462306a36Sopenharmony_ci * 160562306a36Sopenharmony_ci * @lbr: the caller's memory to store the LBR records information 160662306a36Sopenharmony_ci */ 160762306a36Sopenharmony_civoid x86_perf_get_lbr(struct x86_pmu_lbr *lbr) 160862306a36Sopenharmony_ci{ 160962306a36Sopenharmony_ci lbr->nr = x86_pmu.lbr_nr; 161062306a36Sopenharmony_ci lbr->from = x86_pmu.lbr_from; 161162306a36Sopenharmony_ci lbr->to = x86_pmu.lbr_to; 161262306a36Sopenharmony_ci lbr->info = x86_pmu.lbr_info; 161362306a36Sopenharmony_ci} 161462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(x86_perf_get_lbr); 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_cistruct event_constraint vlbr_constraint = 161762306a36Sopenharmony_ci __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), 161862306a36Sopenharmony_ci FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); 1619