162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/bitops.h> 362306a36Sopenharmony_ci#include <linux/types.h> 462306a36Sopenharmony_ci#include <linux/slab.h> 562306a36Sopenharmony_ci#include <linux/sched/clock.h> 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <asm/cpu_entry_area.h> 862306a36Sopenharmony_ci#include <asm/perf_event.h> 962306a36Sopenharmony_ci#include <asm/tlbflush.h> 1062306a36Sopenharmony_ci#include <asm/insn.h> 1162306a36Sopenharmony_ci#include <asm/io.h> 1262306a36Sopenharmony_ci#include <asm/timer.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include "../perf_event.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci/* Waste a full page so it can be mapped into the cpu_entry_area */ 1762306a36Sopenharmony_ciDEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/* The size of a BTS record in bytes: */ 2062306a36Sopenharmony_ci#define BTS_RECORD_SIZE 24 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#define PEBS_FIXUP_SIZE PAGE_SIZE 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci/* 2562306a36Sopenharmony_ci * pebs_record_32 for p4 and core not supported 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistruct pebs_record_32 { 2862306a36Sopenharmony_ci u32 flags, ip; 2962306a36Sopenharmony_ci u32 ax, bc, cx, dx; 3062306a36Sopenharmony_ci u32 si, di, bp, sp; 3162306a36Sopenharmony_ci}; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ciunion intel_x86_pebs_dse { 3662306a36Sopenharmony_ci u64 val; 3762306a36Sopenharmony_ci struct { 3862306a36Sopenharmony_ci unsigned int ld_dse:4; 3962306a36Sopenharmony_ci unsigned int ld_stlb_miss:1; 4062306a36Sopenharmony_ci unsigned int ld_locked:1; 4162306a36Sopenharmony_ci unsigned int ld_data_blk:1; 4262306a36Sopenharmony_ci unsigned int ld_addr_blk:1; 4362306a36Sopenharmony_ci unsigned int ld_reserved:24; 4462306a36Sopenharmony_ci }; 4562306a36Sopenharmony_ci struct { 4662306a36Sopenharmony_ci unsigned int st_l1d_hit:1; 4762306a36Sopenharmony_ci unsigned int st_reserved1:3; 4862306a36Sopenharmony_ci unsigned int st_stlb_miss:1; 4962306a36Sopenharmony_ci unsigned int st_locked:1; 5062306a36Sopenharmony_ci unsigned int st_reserved2:26; 5162306a36Sopenharmony_ci }; 5262306a36Sopenharmony_ci struct { 5362306a36Sopenharmony_ci unsigned int st_lat_dse:4; 5462306a36Sopenharmony_ci unsigned int st_lat_stlb_miss:1; 5562306a36Sopenharmony_ci unsigned int st_lat_locked:1; 5662306a36Sopenharmony_ci unsigned int ld_reserved3:26; 5762306a36Sopenharmony_ci }; 5862306a36Sopenharmony_ci struct { 5962306a36Sopenharmony_ci unsigned int mtl_dse:5; 6062306a36Sopenharmony_ci unsigned int mtl_locked:1; 6162306a36Sopenharmony_ci unsigned int mtl_stlb_miss:1; 6262306a36Sopenharmony_ci unsigned int mtl_fwd_blk:1; 6362306a36Sopenharmony_ci unsigned int ld_reserved4:24; 6462306a36Sopenharmony_ci }; 6562306a36Sopenharmony_ci}; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci/* 6962306a36Sopenharmony_ci * Map PEBS Load Latency Data Source encodings to generic 7062306a36Sopenharmony_ci * memory data source information 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci#define P(a, b) PERF_MEM_S(a, b) 7362306a36Sopenharmony_ci#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) 7462306a36Sopenharmony_ci#define LEVEL(x) P(LVLNUM, x) 7562306a36Sopenharmony_ci#define REM P(REMOTE, REMOTE) 7662306a36Sopenharmony_ci#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci/* Version for Sandy Bridge and later */ 7962306a36Sopenharmony_cistatic u64 pebs_data_source[] = { 8062306a36Sopenharmony_ci P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ 8162306a36Sopenharmony_ci OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ 8262306a36Sopenharmony_ci OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ 8362306a36Sopenharmony_ci OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ 8462306a36Sopenharmony_ci OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ 8562306a36Sopenharmony_ci OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ 8662306a36Sopenharmony_ci OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ 8762306a36Sopenharmony_ci OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ 8862306a36Sopenharmony_ci OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ 8962306a36Sopenharmony_ci OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ 9062306a36Sopenharmony_ci OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ 9162306a36Sopenharmony_ci OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ 9262306a36Sopenharmony_ci OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */ 9362306a36Sopenharmony_ci OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */ 9462306a36Sopenharmony_ci OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */ 9562306a36Sopenharmony_ci OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ 9662306a36Sopenharmony_ci}; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci/* Patch up minor differences in the bits */ 9962306a36Sopenharmony_civoid __init intel_pmu_pebs_data_source_nhm(void) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); 10262306a36Sopenharmony_ci pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 10362306a36Sopenharmony_ci pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cistatic void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); 11162306a36Sopenharmony_ci data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); 11262306a36Sopenharmony_ci data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); 11362306a36Sopenharmony_ci data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); 11462306a36Sopenharmony_ci data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); 11562306a36Sopenharmony_ci} 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_civoid __init intel_pmu_pebs_data_source_skl(bool pmem) 11862306a36Sopenharmony_ci{ 11962306a36Sopenharmony_ci __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_cistatic void __init __intel_pmu_pebs_data_source_grt(u64 *data_source) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); 12562306a36Sopenharmony_ci data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 12662306a36Sopenharmony_ci data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_civoid __init intel_pmu_pebs_data_source_grt(void) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci __intel_pmu_pebs_data_source_grt(pebs_data_source); 13262306a36Sopenharmony_ci} 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_civoid __init intel_pmu_pebs_data_source_adl(void) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci u64 *data_source; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; 13962306a36Sopenharmony_ci memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); 14062306a36Sopenharmony_ci __intel_pmu_pebs_data_source_skl(false, data_source); 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; 14362306a36Sopenharmony_ci memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); 14462306a36Sopenharmony_ci __intel_pmu_pebs_data_source_grt(data_source); 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); 15062306a36Sopenharmony_ci data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 15162306a36Sopenharmony_ci data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE); 15262306a36Sopenharmony_ci data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); 15362306a36Sopenharmony_ci data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD); 15462306a36Sopenharmony_ci data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM); 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_civoid __init intel_pmu_pebs_data_source_mtl(void) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci u64 *data_source; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; 16262306a36Sopenharmony_ci memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); 16362306a36Sopenharmony_ci __intel_pmu_pebs_data_source_skl(false, data_source); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; 16662306a36Sopenharmony_ci memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); 16762306a36Sopenharmony_ci __intel_pmu_pebs_data_source_cmt(data_source); 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_civoid __init intel_pmu_pebs_data_source_cmt(void) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci __intel_pmu_pebs_data_source_cmt(pebs_data_source); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistatic u64 precise_store_data(u64 status) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci union intel_x86_pebs_dse dse; 17862306a36Sopenharmony_ci u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci dse.val = status; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci /* 18362306a36Sopenharmony_ci * bit 4: TLB access 18462306a36Sopenharmony_ci * 1 = stored missed 2nd level TLB 18562306a36Sopenharmony_ci * 18662306a36Sopenharmony_ci * so it either hit the walker or the OS 18762306a36Sopenharmony_ci * otherwise hit 2nd level TLB 18862306a36Sopenharmony_ci */ 18962306a36Sopenharmony_ci if (dse.st_stlb_miss) 19062306a36Sopenharmony_ci val |= P(TLB, MISS); 19162306a36Sopenharmony_ci else 19262306a36Sopenharmony_ci val |= P(TLB, HIT); 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * bit 0: hit L1 data cache 19662306a36Sopenharmony_ci * if not set, then all we know is that 19762306a36Sopenharmony_ci * it missed L1D 19862306a36Sopenharmony_ci */ 19962306a36Sopenharmony_ci if (dse.st_l1d_hit) 20062306a36Sopenharmony_ci val |= P(LVL, HIT); 20162306a36Sopenharmony_ci else 20262306a36Sopenharmony_ci val |= P(LVL, MISS); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* 20562306a36Sopenharmony_ci * bit 5: Locked prefix 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_ci if (dse.st_locked) 20862306a36Sopenharmony_ci val |= P(LOCK, LOCKED); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci return val; 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_cistatic u64 precise_datala_hsw(struct perf_event *event, u64 status) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci union perf_mem_data_src dse; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci dse.val = PERF_MEM_NA; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) 22062306a36Sopenharmony_ci dse.mem_op = PERF_MEM_OP_STORE; 22162306a36Sopenharmony_ci else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) 22262306a36Sopenharmony_ci dse.mem_op = PERF_MEM_OP_LOAD; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* 22562306a36Sopenharmony_ci * L1 info only valid for following events: 22662306a36Sopenharmony_ci * 22762306a36Sopenharmony_ci * MEM_UOPS_RETIRED.STLB_MISS_STORES 22862306a36Sopenharmony_ci * MEM_UOPS_RETIRED.LOCK_STORES 22962306a36Sopenharmony_ci * MEM_UOPS_RETIRED.SPLIT_STORES 23062306a36Sopenharmony_ci * MEM_UOPS_RETIRED.ALL_STORES 23162306a36Sopenharmony_ci */ 23262306a36Sopenharmony_ci if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { 23362306a36Sopenharmony_ci if (status & 1) 23462306a36Sopenharmony_ci dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 23562306a36Sopenharmony_ci else 23662306a36Sopenharmony_ci dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci return dse.val; 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cistatic inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci /* 24462306a36Sopenharmony_ci * TLB access 24562306a36Sopenharmony_ci * 0 = did not miss 2nd level TLB 24662306a36Sopenharmony_ci * 1 = missed 2nd level TLB 24762306a36Sopenharmony_ci */ 24862306a36Sopenharmony_ci if (tlb) 24962306a36Sopenharmony_ci *val |= P(TLB, MISS) | P(TLB, L2); 25062306a36Sopenharmony_ci else 25162306a36Sopenharmony_ci *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci /* locked prefix */ 25462306a36Sopenharmony_ci if (lock) 25562306a36Sopenharmony_ci *val |= P(LOCK, LOCKED); 25662306a36Sopenharmony_ci} 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci/* Retrieve the latency data for e-core of ADL */ 25962306a36Sopenharmony_cistatic u64 __adl_latency_data_small(struct perf_event *event, u64 status, 26062306a36Sopenharmony_ci u8 dse, bool tlb, bool lock, bool blk) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci u64 val; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci dse &= PERF_PEBS_DATA_SOURCE_MASK; 26762306a36Sopenharmony_ci val = hybrid_var(event->pmu, pebs_data_source)[dse]; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci pebs_set_tlb_lock(&val, tlb, lock); 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (blk) 27262306a36Sopenharmony_ci val |= P(BLK, DATA); 27362306a36Sopenharmony_ci else 27462306a36Sopenharmony_ci val |= P(BLK, NA); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci return val; 27762306a36Sopenharmony_ci} 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ciu64 adl_latency_data_small(struct perf_event *event, u64 status) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci union intel_x86_pebs_dse dse; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci dse.val = status; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci return __adl_latency_data_small(event, status, dse.ld_dse, 28662306a36Sopenharmony_ci dse.ld_locked, dse.ld_stlb_miss, 28762306a36Sopenharmony_ci dse.ld_data_blk); 28862306a36Sopenharmony_ci} 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci/* Retrieve the latency data for e-core of MTL */ 29162306a36Sopenharmony_ciu64 mtl_latency_data_small(struct perf_event *event, u64 status) 29262306a36Sopenharmony_ci{ 29362306a36Sopenharmony_ci union intel_x86_pebs_dse dse; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci dse.val = status; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci return __adl_latency_data_small(event, status, dse.mtl_dse, 29862306a36Sopenharmony_ci dse.mtl_stlb_miss, dse.mtl_locked, 29962306a36Sopenharmony_ci dse.mtl_fwd_blk); 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_cistatic u64 load_latency_data(struct perf_event *event, u64 status) 30362306a36Sopenharmony_ci{ 30462306a36Sopenharmony_ci union intel_x86_pebs_dse dse; 30562306a36Sopenharmony_ci u64 val; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci dse.val = status; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci /* 31062306a36Sopenharmony_ci * use the mapping table for bit 0-3 31162306a36Sopenharmony_ci */ 31262306a36Sopenharmony_ci val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* 31562306a36Sopenharmony_ci * Nehalem models do not support TLB, Lock infos 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_ci if (x86_pmu.pebs_no_tlb) { 31862306a36Sopenharmony_ci val |= P(TLB, NA) | P(LOCK, NA); 31962306a36Sopenharmony_ci return val; 32062306a36Sopenharmony_ci } 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked); 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci /* 32562306a36Sopenharmony_ci * Ice Lake and earlier models do not support block infos. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci if (!x86_pmu.pebs_block) { 32862306a36Sopenharmony_ci val |= P(BLK, NA); 32962306a36Sopenharmony_ci return val; 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci /* 33262306a36Sopenharmony_ci * bit 6: load was blocked since its data could not be forwarded 33362306a36Sopenharmony_ci * from a preceding store 33462306a36Sopenharmony_ci */ 33562306a36Sopenharmony_ci if (dse.ld_data_blk) 33662306a36Sopenharmony_ci val |= P(BLK, DATA); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci /* 33962306a36Sopenharmony_ci * bit 7: load was blocked due to potential address conflict with 34062306a36Sopenharmony_ci * a preceding store 34162306a36Sopenharmony_ci */ 34262306a36Sopenharmony_ci if (dse.ld_addr_blk) 34362306a36Sopenharmony_ci val |= P(BLK, ADDR); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (!dse.ld_data_blk && !dse.ld_addr_blk) 34662306a36Sopenharmony_ci val |= P(BLK, NA); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci return val; 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_cistatic u64 store_latency_data(struct perf_event *event, u64 status) 35262306a36Sopenharmony_ci{ 35362306a36Sopenharmony_ci union intel_x86_pebs_dse dse; 35462306a36Sopenharmony_ci union perf_mem_data_src src; 35562306a36Sopenharmony_ci u64 val; 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci dse.val = status; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci /* 36062306a36Sopenharmony_ci * use the mapping table for bit 0-3 36162306a36Sopenharmony_ci */ 36262306a36Sopenharmony_ci val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse]; 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked); 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci val |= P(BLK, NA); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci /* 36962306a36Sopenharmony_ci * the pebs_data_source table is only for loads 37062306a36Sopenharmony_ci * so override the mem_op to say STORE instead 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_ci src.val = val; 37362306a36Sopenharmony_ci src.mem_op = P(OP,STORE); 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci return src.val; 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_cistruct pebs_record_core { 37962306a36Sopenharmony_ci u64 flags, ip; 38062306a36Sopenharmony_ci u64 ax, bx, cx, dx; 38162306a36Sopenharmony_ci u64 si, di, bp, sp; 38262306a36Sopenharmony_ci u64 r8, r9, r10, r11; 38362306a36Sopenharmony_ci u64 r12, r13, r14, r15; 38462306a36Sopenharmony_ci}; 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_cistruct pebs_record_nhm { 38762306a36Sopenharmony_ci u64 flags, ip; 38862306a36Sopenharmony_ci u64 ax, bx, cx, dx; 38962306a36Sopenharmony_ci u64 si, di, bp, sp; 39062306a36Sopenharmony_ci u64 r8, r9, r10, r11; 39162306a36Sopenharmony_ci u64 r12, r13, r14, r15; 39262306a36Sopenharmony_ci u64 status, dla, dse, lat; 39362306a36Sopenharmony_ci}; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci/* 39662306a36Sopenharmony_ci * Same as pebs_record_nhm, with two additional fields. 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_cistruct pebs_record_hsw { 39962306a36Sopenharmony_ci u64 flags, ip; 40062306a36Sopenharmony_ci u64 ax, bx, cx, dx; 40162306a36Sopenharmony_ci u64 si, di, bp, sp; 40262306a36Sopenharmony_ci u64 r8, r9, r10, r11; 40362306a36Sopenharmony_ci u64 r12, r13, r14, r15; 40462306a36Sopenharmony_ci u64 status, dla, dse, lat; 40562306a36Sopenharmony_ci u64 real_ip, tsx_tuning; 40662306a36Sopenharmony_ci}; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ciunion hsw_tsx_tuning { 40962306a36Sopenharmony_ci struct { 41062306a36Sopenharmony_ci u32 cycles_last_block : 32, 41162306a36Sopenharmony_ci hle_abort : 1, 41262306a36Sopenharmony_ci rtm_abort : 1, 41362306a36Sopenharmony_ci instruction_abort : 1, 41462306a36Sopenharmony_ci non_instruction_abort : 1, 41562306a36Sopenharmony_ci retry : 1, 41662306a36Sopenharmony_ci data_conflict : 1, 41762306a36Sopenharmony_ci capacity_writes : 1, 41862306a36Sopenharmony_ci capacity_reads : 1; 41962306a36Sopenharmony_ci }; 42062306a36Sopenharmony_ci u64 value; 42162306a36Sopenharmony_ci}; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci/* Same as HSW, plus TSC */ 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_cistruct pebs_record_skl { 42862306a36Sopenharmony_ci u64 flags, ip; 42962306a36Sopenharmony_ci u64 ax, bx, cx, dx; 43062306a36Sopenharmony_ci u64 si, di, bp, sp; 43162306a36Sopenharmony_ci u64 r8, r9, r10, r11; 43262306a36Sopenharmony_ci u64 r12, r13, r14, r15; 43362306a36Sopenharmony_ci u64 status, dla, dse, lat; 43462306a36Sopenharmony_ci u64 real_ip, tsx_tuning; 43562306a36Sopenharmony_ci u64 tsc; 43662306a36Sopenharmony_ci}; 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_civoid init_debug_store_on_cpu(int cpu) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci if (!ds) 44362306a36Sopenharmony_ci return; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 44662306a36Sopenharmony_ci (u32)((u64)(unsigned long)ds), 44762306a36Sopenharmony_ci (u32)((u64)(unsigned long)ds >> 32)); 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_civoid fini_debug_store_on_cpu(int cpu) 45162306a36Sopenharmony_ci{ 45262306a36Sopenharmony_ci if (!per_cpu(cpu_hw_events, cpu).ds) 45362306a36Sopenharmony_ci return; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_cistatic DEFINE_PER_CPU(void *, insn_buffer); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_cistatic void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci unsigned long start = (unsigned long)cea; 46362306a36Sopenharmony_ci phys_addr_t pa; 46462306a36Sopenharmony_ci size_t msz = 0; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci pa = virt_to_phys(addr); 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci preempt_disable(); 46962306a36Sopenharmony_ci for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) 47062306a36Sopenharmony_ci cea_set_pte(cea, pa, prot); 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci /* 47362306a36Sopenharmony_ci * This is a cross-CPU update of the cpu_entry_area, we must shoot down 47462306a36Sopenharmony_ci * all TLB entries for it. 47562306a36Sopenharmony_ci */ 47662306a36Sopenharmony_ci flush_tlb_kernel_range(start, start + size); 47762306a36Sopenharmony_ci preempt_enable(); 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_cistatic void ds_clear_cea(void *cea, size_t size) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci unsigned long start = (unsigned long)cea; 48362306a36Sopenharmony_ci size_t msz = 0; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci preempt_disable(); 48662306a36Sopenharmony_ci for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) 48762306a36Sopenharmony_ci cea_set_pte(cea, 0, PAGE_NONE); 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci flush_tlb_kernel_range(start, start + size); 49062306a36Sopenharmony_ci preempt_enable(); 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_cistatic void *dsalloc_pages(size_t size, gfp_t flags, int cpu) 49462306a36Sopenharmony_ci{ 49562306a36Sopenharmony_ci unsigned int order = get_order(size); 49662306a36Sopenharmony_ci int node = cpu_to_node(cpu); 49762306a36Sopenharmony_ci struct page *page; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci page = __alloc_pages_node(node, flags | __GFP_ZERO, order); 50062306a36Sopenharmony_ci return page ? page_address(page) : NULL; 50162306a36Sopenharmony_ci} 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_cistatic void dsfree_pages(const void *buffer, size_t size) 50462306a36Sopenharmony_ci{ 50562306a36Sopenharmony_ci if (buffer) 50662306a36Sopenharmony_ci free_pages((unsigned long)buffer, get_order(size)); 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_cistatic int alloc_pebs_buffer(int cpu) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 51262306a36Sopenharmony_ci struct debug_store *ds = hwev->ds; 51362306a36Sopenharmony_ci size_t bsiz = x86_pmu.pebs_buffer_size; 51462306a36Sopenharmony_ci int max, node = cpu_to_node(cpu); 51562306a36Sopenharmony_ci void *buffer, *insn_buff, *cea; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci if (!x86_pmu.pebs) 51862306a36Sopenharmony_ci return 0; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); 52162306a36Sopenharmony_ci if (unlikely(!buffer)) 52262306a36Sopenharmony_ci return -ENOMEM; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci /* 52562306a36Sopenharmony_ci * HSW+ already provides us the eventing ip; no need to allocate this 52662306a36Sopenharmony_ci * buffer then. 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format < 2) { 52962306a36Sopenharmony_ci insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 53062306a36Sopenharmony_ci if (!insn_buff) { 53162306a36Sopenharmony_ci dsfree_pages(buffer, bsiz); 53262306a36Sopenharmony_ci return -ENOMEM; 53362306a36Sopenharmony_ci } 53462306a36Sopenharmony_ci per_cpu(insn_buffer, cpu) = insn_buff; 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci hwev->ds_pebs_vaddr = buffer; 53762306a36Sopenharmony_ci /* Update the cpu entry area mapping */ 53862306a36Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; 53962306a36Sopenharmony_ci ds->pebs_buffer_base = (unsigned long) cea; 54062306a36Sopenharmony_ci ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); 54162306a36Sopenharmony_ci ds->pebs_index = ds->pebs_buffer_base; 54262306a36Sopenharmony_ci max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); 54362306a36Sopenharmony_ci ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; 54462306a36Sopenharmony_ci return 0; 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_cistatic void release_pebs_buffer(int cpu) 54862306a36Sopenharmony_ci{ 54962306a36Sopenharmony_ci struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 55062306a36Sopenharmony_ci void *cea; 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci if (!x86_pmu.pebs) 55362306a36Sopenharmony_ci return; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci kfree(per_cpu(insn_buffer, cpu)); 55662306a36Sopenharmony_ci per_cpu(insn_buffer, cpu) = NULL; 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci /* Clear the fixmap */ 55962306a36Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; 56062306a36Sopenharmony_ci ds_clear_cea(cea, x86_pmu.pebs_buffer_size); 56162306a36Sopenharmony_ci dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); 56262306a36Sopenharmony_ci hwev->ds_pebs_vaddr = NULL; 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_cistatic int alloc_bts_buffer(int cpu) 56662306a36Sopenharmony_ci{ 56762306a36Sopenharmony_ci struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 56862306a36Sopenharmony_ci struct debug_store *ds = hwev->ds; 56962306a36Sopenharmony_ci void *buffer, *cea; 57062306a36Sopenharmony_ci int max; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (!x86_pmu.bts) 57362306a36Sopenharmony_ci return 0; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); 57662306a36Sopenharmony_ci if (unlikely(!buffer)) { 57762306a36Sopenharmony_ci WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 57862306a36Sopenharmony_ci return -ENOMEM; 57962306a36Sopenharmony_ci } 58062306a36Sopenharmony_ci hwev->ds_bts_vaddr = buffer; 58162306a36Sopenharmony_ci /* Update the fixmap */ 58262306a36Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; 58362306a36Sopenharmony_ci ds->bts_buffer_base = (unsigned long) cea; 58462306a36Sopenharmony_ci ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); 58562306a36Sopenharmony_ci ds->bts_index = ds->bts_buffer_base; 58662306a36Sopenharmony_ci max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 58762306a36Sopenharmony_ci ds->bts_absolute_maximum = ds->bts_buffer_base + 58862306a36Sopenharmony_ci max * BTS_RECORD_SIZE; 58962306a36Sopenharmony_ci ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 59062306a36Sopenharmony_ci (max / 16) * BTS_RECORD_SIZE; 59162306a36Sopenharmony_ci return 0; 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic void release_bts_buffer(int cpu) 59562306a36Sopenharmony_ci{ 59662306a36Sopenharmony_ci struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 59762306a36Sopenharmony_ci void *cea; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci if (!x86_pmu.bts) 60062306a36Sopenharmony_ci return; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci /* Clear the fixmap */ 60362306a36Sopenharmony_ci cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; 60462306a36Sopenharmony_ci ds_clear_cea(cea, BTS_BUFFER_SIZE); 60562306a36Sopenharmony_ci dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); 60662306a36Sopenharmony_ci hwev->ds_bts_vaddr = NULL; 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_cistatic int alloc_ds_buffer(int cpu) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci memset(ds, 0, sizeof(*ds)); 61462306a36Sopenharmony_ci per_cpu(cpu_hw_events, cpu).ds = ds; 61562306a36Sopenharmony_ci return 0; 61662306a36Sopenharmony_ci} 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_cistatic void release_ds_buffer(int cpu) 61962306a36Sopenharmony_ci{ 62062306a36Sopenharmony_ci per_cpu(cpu_hw_events, cpu).ds = NULL; 62162306a36Sopenharmony_ci} 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_civoid release_ds_buffers(void) 62462306a36Sopenharmony_ci{ 62562306a36Sopenharmony_ci int cpu; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci if (!x86_pmu.bts && !x86_pmu.pebs) 62862306a36Sopenharmony_ci return; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci for_each_possible_cpu(cpu) 63162306a36Sopenharmony_ci release_ds_buffer(cpu); 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 63462306a36Sopenharmony_ci /* 63562306a36Sopenharmony_ci * Again, ignore errors from offline CPUs, they will no longer 63662306a36Sopenharmony_ci * observe cpu_hw_events.ds and not program the DS_AREA when 63762306a36Sopenharmony_ci * they come up. 63862306a36Sopenharmony_ci */ 63962306a36Sopenharmony_ci fini_debug_store_on_cpu(cpu); 64062306a36Sopenharmony_ci } 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 64362306a36Sopenharmony_ci release_pebs_buffer(cpu); 64462306a36Sopenharmony_ci release_bts_buffer(cpu); 64562306a36Sopenharmony_ci } 64662306a36Sopenharmony_ci} 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_civoid reserve_ds_buffers(void) 64962306a36Sopenharmony_ci{ 65062306a36Sopenharmony_ci int bts_err = 0, pebs_err = 0; 65162306a36Sopenharmony_ci int cpu; 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci x86_pmu.bts_active = 0; 65462306a36Sopenharmony_ci x86_pmu.pebs_active = 0; 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci if (!x86_pmu.bts && !x86_pmu.pebs) 65762306a36Sopenharmony_ci return; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci if (!x86_pmu.bts) 66062306a36Sopenharmony_ci bts_err = 1; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci if (!x86_pmu.pebs) 66362306a36Sopenharmony_ci pebs_err = 1; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 66662306a36Sopenharmony_ci if (alloc_ds_buffer(cpu)) { 66762306a36Sopenharmony_ci bts_err = 1; 66862306a36Sopenharmony_ci pebs_err = 1; 66962306a36Sopenharmony_ci } 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci if (!bts_err && alloc_bts_buffer(cpu)) 67262306a36Sopenharmony_ci bts_err = 1; 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci if (!pebs_err && alloc_pebs_buffer(cpu)) 67562306a36Sopenharmony_ci pebs_err = 1; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci if (bts_err && pebs_err) 67862306a36Sopenharmony_ci break; 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci if (bts_err) { 68262306a36Sopenharmony_ci for_each_possible_cpu(cpu) 68362306a36Sopenharmony_ci release_bts_buffer(cpu); 68462306a36Sopenharmony_ci } 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci if (pebs_err) { 68762306a36Sopenharmony_ci for_each_possible_cpu(cpu) 68862306a36Sopenharmony_ci release_pebs_buffer(cpu); 68962306a36Sopenharmony_ci } 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci if (bts_err && pebs_err) { 69262306a36Sopenharmony_ci for_each_possible_cpu(cpu) 69362306a36Sopenharmony_ci release_ds_buffer(cpu); 69462306a36Sopenharmony_ci } else { 69562306a36Sopenharmony_ci if (x86_pmu.bts && !bts_err) 69662306a36Sopenharmony_ci x86_pmu.bts_active = 1; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci if (x86_pmu.pebs && !pebs_err) 69962306a36Sopenharmony_ci x86_pmu.pebs_active = 1; 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 70262306a36Sopenharmony_ci /* 70362306a36Sopenharmony_ci * Ignores wrmsr_on_cpu() errors for offline CPUs they 70462306a36Sopenharmony_ci * will get this call through intel_pmu_cpu_starting(). 70562306a36Sopenharmony_ci */ 70662306a36Sopenharmony_ci init_debug_store_on_cpu(cpu); 70762306a36Sopenharmony_ci } 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci} 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci/* 71262306a36Sopenharmony_ci * BTS 71362306a36Sopenharmony_ci */ 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_cistruct event_constraint bts_constraint = 71662306a36Sopenharmony_ci EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_civoid intel_pmu_enable_bts(u64 config) 71962306a36Sopenharmony_ci{ 72062306a36Sopenharmony_ci unsigned long debugctlmsr; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci debugctlmsr = get_debugctlmsr(); 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci debugctlmsr |= DEBUGCTLMSR_TR; 72562306a36Sopenharmony_ci debugctlmsr |= DEBUGCTLMSR_BTS; 72662306a36Sopenharmony_ci if (config & ARCH_PERFMON_EVENTSEL_INT) 72762306a36Sopenharmony_ci debugctlmsr |= DEBUGCTLMSR_BTINT; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci if (!(config & ARCH_PERFMON_EVENTSEL_OS)) 73062306a36Sopenharmony_ci debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci if (!(config & ARCH_PERFMON_EVENTSEL_USR)) 73362306a36Sopenharmony_ci debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci update_debugctlmsr(debugctlmsr); 73662306a36Sopenharmony_ci} 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_civoid intel_pmu_disable_bts(void) 73962306a36Sopenharmony_ci{ 74062306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 74162306a36Sopenharmony_ci unsigned long debugctlmsr; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci if (!cpuc->ds) 74462306a36Sopenharmony_ci return; 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci debugctlmsr = get_debugctlmsr(); 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci debugctlmsr &= 74962306a36Sopenharmony_ci ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | 75062306a36Sopenharmony_ci DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci update_debugctlmsr(debugctlmsr); 75362306a36Sopenharmony_ci} 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ciint intel_pmu_drain_bts_buffer(void) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 75862306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 75962306a36Sopenharmony_ci struct bts_record { 76062306a36Sopenharmony_ci u64 from; 76162306a36Sopenharmony_ci u64 to; 76262306a36Sopenharmony_ci u64 flags; 76362306a36Sopenharmony_ci }; 76462306a36Sopenharmony_ci struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 76562306a36Sopenharmony_ci struct bts_record *at, *base, *top; 76662306a36Sopenharmony_ci struct perf_output_handle handle; 76762306a36Sopenharmony_ci struct perf_event_header header; 76862306a36Sopenharmony_ci struct perf_sample_data data; 76962306a36Sopenharmony_ci unsigned long skip = 0; 77062306a36Sopenharmony_ci struct pt_regs regs; 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci if (!event) 77362306a36Sopenharmony_ci return 0; 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci if (!x86_pmu.bts_active) 77662306a36Sopenharmony_ci return 0; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 77962306a36Sopenharmony_ci top = (struct bts_record *)(unsigned long)ds->bts_index; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci if (top <= base) 78262306a36Sopenharmony_ci return 0; 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci memset(®s, 0, sizeof(regs)); 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci ds->bts_index = ds->bts_buffer_base; 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci perf_sample_data_init(&data, 0, event->hw.last_period); 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci /* 79162306a36Sopenharmony_ci * BTS leaks kernel addresses in branches across the cpl boundary, 79262306a36Sopenharmony_ci * such as traps or system calls, so unless the user is asking for 79362306a36Sopenharmony_ci * kernel tracing (and right now it's not possible), we'd need to 79462306a36Sopenharmony_ci * filter them out. But first we need to count how many of those we 79562306a36Sopenharmony_ci * have in the current batch. This is an extra O(n) pass, however, 79662306a36Sopenharmony_ci * it's much faster than the other one especially considering that 79762306a36Sopenharmony_ci * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the 79862306a36Sopenharmony_ci * alloc_bts_buffer()). 79962306a36Sopenharmony_ci */ 80062306a36Sopenharmony_ci for (at = base; at < top; at++) { 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * Note that right now *this* BTS code only works if 80362306a36Sopenharmony_ci * attr::exclude_kernel is set, but let's keep this extra 80462306a36Sopenharmony_ci * check here in case that changes. 80562306a36Sopenharmony_ci */ 80662306a36Sopenharmony_ci if (event->attr.exclude_kernel && 80762306a36Sopenharmony_ci (kernel_ip(at->from) || kernel_ip(at->to))) 80862306a36Sopenharmony_ci skip++; 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci /* 81262306a36Sopenharmony_ci * Prepare a generic sample, i.e. fill in the invariant fields. 81362306a36Sopenharmony_ci * We will overwrite the from and to address before we output 81462306a36Sopenharmony_ci * the sample. 81562306a36Sopenharmony_ci */ 81662306a36Sopenharmony_ci rcu_read_lock(); 81762306a36Sopenharmony_ci perf_prepare_sample(&data, event, ®s); 81862306a36Sopenharmony_ci perf_prepare_header(&header, &data, event, ®s); 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (perf_output_begin(&handle, &data, event, 82162306a36Sopenharmony_ci header.size * (top - base - skip))) 82262306a36Sopenharmony_ci goto unlock; 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci for (at = base; at < top; at++) { 82562306a36Sopenharmony_ci /* Filter out any records that contain kernel addresses. */ 82662306a36Sopenharmony_ci if (event->attr.exclude_kernel && 82762306a36Sopenharmony_ci (kernel_ip(at->from) || kernel_ip(at->to))) 82862306a36Sopenharmony_ci continue; 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci data.ip = at->from; 83162306a36Sopenharmony_ci data.addr = at->to; 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci perf_output_sample(&handle, &header, &data, event); 83462306a36Sopenharmony_ci } 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci perf_output_end(&handle); 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci /* There's new data available. */ 83962306a36Sopenharmony_ci event->hw.interrupts++; 84062306a36Sopenharmony_ci event->pending_kill = POLL_IN; 84162306a36Sopenharmony_ciunlock: 84262306a36Sopenharmony_ci rcu_read_unlock(); 84362306a36Sopenharmony_ci return 1; 84462306a36Sopenharmony_ci} 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_cistatic inline void intel_pmu_drain_pebs_buffer(void) 84762306a36Sopenharmony_ci{ 84862306a36Sopenharmony_ci struct perf_sample_data data; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci x86_pmu.drain_pebs(NULL, &data); 85162306a36Sopenharmony_ci} 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci/* 85462306a36Sopenharmony_ci * PEBS 85562306a36Sopenharmony_ci */ 85662306a36Sopenharmony_cistruct event_constraint intel_core2_pebs_event_constraints[] = { 85762306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 85862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 85962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 86062306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 86162306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 86262306a36Sopenharmony_ci /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 86362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), 86462306a36Sopenharmony_ci EVENT_CONSTRAINT_END 86562306a36Sopenharmony_ci}; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_cistruct event_constraint intel_atom_pebs_event_constraints[] = { 86862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 86962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 87062306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 87162306a36Sopenharmony_ci /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 87262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01), 87362306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 87462306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 87562306a36Sopenharmony_ci EVENT_CONSTRAINT_END 87662306a36Sopenharmony_ci}; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_cistruct event_constraint intel_slm_pebs_event_constraints[] = { 87962306a36Sopenharmony_ci /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 88062306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1), 88162306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 88262306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 88362306a36Sopenharmony_ci EVENT_CONSTRAINT_END 88462306a36Sopenharmony_ci}; 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_cistruct event_constraint intel_glm_pebs_event_constraints[] = { 88762306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 88862306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 88962306a36Sopenharmony_ci EVENT_CONSTRAINT_END 89062306a36Sopenharmony_ci}; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_cistruct event_constraint intel_grt_pebs_event_constraints[] = { 89362306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 89462306a36Sopenharmony_ci INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3), 89562306a36Sopenharmony_ci INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), 89662306a36Sopenharmony_ci EVENT_CONSTRAINT_END 89762306a36Sopenharmony_ci}; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_cistruct event_constraint intel_nehalem_pebs_event_constraints[] = { 90062306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 90162306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 90262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 90362306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 90462306a36Sopenharmony_ci INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 90562306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 90662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ 90762306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 90862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 90962306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 91062306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 91162306a36Sopenharmony_ci /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 91262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), 91362306a36Sopenharmony_ci EVENT_CONSTRAINT_END 91462306a36Sopenharmony_ci}; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_cistruct event_constraint intel_westmere_pebs_event_constraints[] = { 91762306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 91862306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 91962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 92062306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 92162306a36Sopenharmony_ci INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 92262306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 92362306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 92462306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 92562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 92662306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 92762306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 92862306a36Sopenharmony_ci /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 92962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), 93062306a36Sopenharmony_ci EVENT_CONSTRAINT_END 93162306a36Sopenharmony_ci}; 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_cistruct event_constraint intel_snb_pebs_event_constraints[] = { 93462306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 93562306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 93662306a36Sopenharmony_ci INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 93762306a36Sopenharmony_ci /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 93862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), 93962306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 94062306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 94162306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 94262306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 94362306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 94462306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 94562306a36Sopenharmony_ci EVENT_CONSTRAINT_END 94662306a36Sopenharmony_ci}; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_cistruct event_constraint intel_ivb_pebs_event_constraints[] = { 94962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 95062306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 95162306a36Sopenharmony_ci INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 95262306a36Sopenharmony_ci /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 95362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), 95462306a36Sopenharmony_ci /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 95562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), 95662306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 95762306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 95862306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 95962306a36Sopenharmony_ci INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 96062306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 96162306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 96262306a36Sopenharmony_ci EVENT_CONSTRAINT_END 96362306a36Sopenharmony_ci}; 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_cistruct event_constraint intel_hsw_pebs_event_constraints[] = { 96662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 96762306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 96862306a36Sopenharmony_ci /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 96962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), 97062306a36Sopenharmony_ci /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 97162306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), 97262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 97362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 97462306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 97562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 97662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 97762306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 97862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 97962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 98062306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 98162306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 98262306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 98362306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 98462306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 98562306a36Sopenharmony_ci EVENT_CONSTRAINT_END 98662306a36Sopenharmony_ci}; 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_cistruct event_constraint intel_bdw_pebs_event_constraints[] = { 98962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 99062306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 99162306a36Sopenharmony_ci /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 99262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf), 99362306a36Sopenharmony_ci /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 99462306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), 99562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 99662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 99762306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 99862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 99962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 100062306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 100162306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 100262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 100362306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 100462306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 100562306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 100662306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 100762306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 100862306a36Sopenharmony_ci EVENT_CONSTRAINT_END 100962306a36Sopenharmony_ci}; 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_cistruct event_constraint intel_skl_pebs_event_constraints[] = { 101362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 101462306a36Sopenharmony_ci /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 101562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2), 101662306a36Sopenharmony_ci /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ 101762306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f), 101862306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ 101962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 102062306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 102162306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 102262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */ 102362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 102462306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 102562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 102662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 102762306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 102862306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 102962306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */ 103062306a36Sopenharmony_ci /* Allow all events as PEBS with no flags */ 103162306a36Sopenharmony_ci INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 103262306a36Sopenharmony_ci EVENT_CONSTRAINT_END 103362306a36Sopenharmony_ci}; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_cistruct event_constraint intel_icl_pebs_event_constraints[] = { 103662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */ 103762306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ 103862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104162306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 104262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 104362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 104462306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 104562306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 104662306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 104762306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci /* 105462306a36Sopenharmony_ci * Everything else is handled by PMU_FL_PEBS_ALL, because we 105562306a36Sopenharmony_ci * need the full constraints from the main table. 105662306a36Sopenharmony_ci */ 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci EVENT_CONSTRAINT_END 105962306a36Sopenharmony_ci}; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_cistruct event_constraint intel_spr_pebs_event_constraints[] = { 106262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ 106362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), 106662306a36Sopenharmony_ci INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), 106762306a36Sopenharmony_ci INTEL_PSD_CONSTRAINT(0x2cd, 0x1), 106862306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 106962306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 107062306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 107162306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 107262306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 107362306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 107462306a36Sopenharmony_ci INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci /* 108162306a36Sopenharmony_ci * Everything else is handled by PMU_FL_PEBS_ALL, because we 108262306a36Sopenharmony_ci * need the full constraints from the main table. 108362306a36Sopenharmony_ci */ 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci EVENT_CONSTRAINT_END 108662306a36Sopenharmony_ci}; 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_cistruct event_constraint *intel_pebs_constraints(struct perf_event *event) 108962306a36Sopenharmony_ci{ 109062306a36Sopenharmony_ci struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints); 109162306a36Sopenharmony_ci struct event_constraint *c; 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci if (!event->attr.precise_ip) 109462306a36Sopenharmony_ci return NULL; 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci if (pebs_constraints) { 109762306a36Sopenharmony_ci for_each_event_constraint(c, pebs_constraints) { 109862306a36Sopenharmony_ci if (constraint_match(c, event->hw.config)) { 109962306a36Sopenharmony_ci event->hw.flags |= c->flags; 110062306a36Sopenharmony_ci return c; 110162306a36Sopenharmony_ci } 110262306a36Sopenharmony_ci } 110362306a36Sopenharmony_ci } 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci /* 110662306a36Sopenharmony_ci * Extended PEBS support 110762306a36Sopenharmony_ci * Makes the PEBS code search the normal constraints. 110862306a36Sopenharmony_ci */ 110962306a36Sopenharmony_ci if (x86_pmu.flags & PMU_FL_PEBS_ALL) 111062306a36Sopenharmony_ci return NULL; 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci return &emptyconstraint; 111362306a36Sopenharmony_ci} 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci/* 111662306a36Sopenharmony_ci * We need the sched_task callback even for per-cpu events when we use 111762306a36Sopenharmony_ci * the large interrupt threshold, such that we can provide PID and TID 111862306a36Sopenharmony_ci * to PEBS samples. 111962306a36Sopenharmony_ci */ 112062306a36Sopenharmony_cistatic inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) 112162306a36Sopenharmony_ci{ 112262306a36Sopenharmony_ci if (cpuc->n_pebs == cpuc->n_pebs_via_pt) 112362306a36Sopenharmony_ci return false; 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); 112662306a36Sopenharmony_ci} 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_civoid intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 112962306a36Sopenharmony_ci{ 113062306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci if (!sched_in && pebs_needs_sched_cb(cpuc)) 113362306a36Sopenharmony_ci intel_pmu_drain_pebs_buffer(); 113462306a36Sopenharmony_ci} 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_cistatic inline void pebs_update_threshold(struct cpu_hw_events *cpuc) 113762306a36Sopenharmony_ci{ 113862306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 113962306a36Sopenharmony_ci int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); 114062306a36Sopenharmony_ci int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); 114162306a36Sopenharmony_ci u64 threshold; 114262306a36Sopenharmony_ci int reserved; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci if (cpuc->n_pebs_via_pt) 114562306a36Sopenharmony_ci return; 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci if (x86_pmu.flags & PMU_FL_PEBS_ALL) 114862306a36Sopenharmony_ci reserved = max_pebs_events + num_counters_fixed; 114962306a36Sopenharmony_ci else 115062306a36Sopenharmony_ci reserved = max_pebs_events; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci if (cpuc->n_pebs == cpuc->n_large_pebs) { 115362306a36Sopenharmony_ci threshold = ds->pebs_absolute_maximum - 115462306a36Sopenharmony_ci reserved * cpuc->pebs_record_size; 115562306a36Sopenharmony_ci } else { 115662306a36Sopenharmony_ci threshold = ds->pebs_buffer_base + cpuc->pebs_record_size; 115762306a36Sopenharmony_ci } 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci ds->pebs_interrupt_threshold = threshold; 116062306a36Sopenharmony_ci} 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_cistatic void adaptive_pebs_record_size_update(void) 116362306a36Sopenharmony_ci{ 116462306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 116562306a36Sopenharmony_ci u64 pebs_data_cfg = cpuc->pebs_data_cfg; 116662306a36Sopenharmony_ci int sz = sizeof(struct pebs_basic); 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci if (pebs_data_cfg & PEBS_DATACFG_MEMINFO) 116962306a36Sopenharmony_ci sz += sizeof(struct pebs_meminfo); 117062306a36Sopenharmony_ci if (pebs_data_cfg & PEBS_DATACFG_GP) 117162306a36Sopenharmony_ci sz += sizeof(struct pebs_gprs); 117262306a36Sopenharmony_ci if (pebs_data_cfg & PEBS_DATACFG_XMMS) 117362306a36Sopenharmony_ci sz += sizeof(struct pebs_xmm); 117462306a36Sopenharmony_ci if (pebs_data_cfg & PEBS_DATACFG_LBRS) 117562306a36Sopenharmony_ci sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci cpuc->pebs_record_size = sz; 117862306a36Sopenharmony_ci} 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ 118162306a36Sopenharmony_ci PERF_SAMPLE_PHYS_ADDR | \ 118262306a36Sopenharmony_ci PERF_SAMPLE_WEIGHT_TYPE | \ 118362306a36Sopenharmony_ci PERF_SAMPLE_TRANSACTION | \ 118462306a36Sopenharmony_ci PERF_SAMPLE_DATA_PAGE_SIZE) 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_cistatic u64 pebs_update_adaptive_cfg(struct perf_event *event) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci struct perf_event_attr *attr = &event->attr; 118962306a36Sopenharmony_ci u64 sample_type = attr->sample_type; 119062306a36Sopenharmony_ci u64 pebs_data_cfg = 0; 119162306a36Sopenharmony_ci bool gprs, tsx_weight; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) && 119462306a36Sopenharmony_ci attr->precise_ip > 1) 119562306a36Sopenharmony_ci return pebs_data_cfg; 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci if (sample_type & PERF_PEBS_MEMINFO_TYPE) 119862306a36Sopenharmony_ci pebs_data_cfg |= PEBS_DATACFG_MEMINFO; 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci /* 120162306a36Sopenharmony_ci * We need GPRs when: 120262306a36Sopenharmony_ci * + user requested them 120362306a36Sopenharmony_ci * + precise_ip < 2 for the non event IP 120462306a36Sopenharmony_ci * + For RTM TSX weight we need GPRs for the abort code. 120562306a36Sopenharmony_ci */ 120662306a36Sopenharmony_ci gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && 120762306a36Sopenharmony_ci (attr->sample_regs_intr & PEBS_GP_REGS); 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && 121062306a36Sopenharmony_ci ((attr->config & INTEL_ARCH_EVENT_MASK) == 121162306a36Sopenharmony_ci x86_pmu.rtm_abort_event); 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci if (gprs || (attr->precise_ip < 2) || tsx_weight) 121462306a36Sopenharmony_ci pebs_data_cfg |= PEBS_DATACFG_GP; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci if ((sample_type & PERF_SAMPLE_REGS_INTR) && 121762306a36Sopenharmony_ci (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) 121862306a36Sopenharmony_ci pebs_data_cfg |= PEBS_DATACFG_XMMS; 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 122162306a36Sopenharmony_ci /* 122262306a36Sopenharmony_ci * For now always log all LBRs. Could configure this 122362306a36Sopenharmony_ci * later. 122462306a36Sopenharmony_ci */ 122562306a36Sopenharmony_ci pebs_data_cfg |= PEBS_DATACFG_LBRS | 122662306a36Sopenharmony_ci ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT); 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci return pebs_data_cfg; 123062306a36Sopenharmony_ci} 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_cistatic void 123362306a36Sopenharmony_cipebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, 123462306a36Sopenharmony_ci struct perf_event *event, bool add) 123562306a36Sopenharmony_ci{ 123662306a36Sopenharmony_ci struct pmu *pmu = event->pmu; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci /* 123962306a36Sopenharmony_ci * Make sure we get updated with the first PEBS 124062306a36Sopenharmony_ci * event. It will trigger also during removal, but 124162306a36Sopenharmony_ci * that does not hurt: 124262306a36Sopenharmony_ci */ 124362306a36Sopenharmony_ci if (cpuc->n_pebs == 1) 124462306a36Sopenharmony_ci cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci if (needed_cb != pebs_needs_sched_cb(cpuc)) { 124762306a36Sopenharmony_ci if (!needed_cb) 124862306a36Sopenharmony_ci perf_sched_cb_inc(pmu); 124962306a36Sopenharmony_ci else 125062306a36Sopenharmony_ci perf_sched_cb_dec(pmu); 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW; 125362306a36Sopenharmony_ci } 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci /* 125662306a36Sopenharmony_ci * The PEBS record doesn't shrink on pmu::del(). Doing so would require 125762306a36Sopenharmony_ci * iterating all remaining PEBS events to reconstruct the config. 125862306a36Sopenharmony_ci */ 125962306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_baseline && add) { 126062306a36Sopenharmony_ci u64 pebs_data_cfg; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci pebs_data_cfg = pebs_update_adaptive_cfg(event); 126362306a36Sopenharmony_ci /* 126462306a36Sopenharmony_ci * Be sure to update the thresholds when we change the record. 126562306a36Sopenharmony_ci */ 126662306a36Sopenharmony_ci if (pebs_data_cfg & ~cpuc->pebs_data_cfg) 126762306a36Sopenharmony_ci cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW; 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci} 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_civoid intel_pmu_pebs_add(struct perf_event *event) 127262306a36Sopenharmony_ci{ 127362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 127462306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 127562306a36Sopenharmony_ci bool needed_cb = pebs_needs_sched_cb(cpuc); 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci cpuc->n_pebs++; 127862306a36Sopenharmony_ci if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) 127962306a36Sopenharmony_ci cpuc->n_large_pebs++; 128062306a36Sopenharmony_ci if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) 128162306a36Sopenharmony_ci cpuc->n_pebs_via_pt++; 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci pebs_update_state(needed_cb, cpuc, event, true); 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_cistatic void intel_pmu_pebs_via_pt_disable(struct perf_event *event) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 128962306a36Sopenharmony_ci 129062306a36Sopenharmony_ci if (!is_pebs_pt(event)) 129162306a36Sopenharmony_ci return; 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK)) 129462306a36Sopenharmony_ci cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK; 129562306a36Sopenharmony_ci} 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_cistatic void intel_pmu_pebs_via_pt_enable(struct perf_event *event) 129862306a36Sopenharmony_ci{ 129962306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 130062306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 130162306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 130262306a36Sopenharmony_ci u64 value = ds->pebs_event_reset[hwc->idx]; 130362306a36Sopenharmony_ci u32 base = MSR_RELOAD_PMC0; 130462306a36Sopenharmony_ci unsigned int idx = hwc->idx; 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci if (!is_pebs_pt(event)) 130762306a36Sopenharmony_ci return; 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) 131062306a36Sopenharmony_ci cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD; 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci cpuc->pebs_enabled |= PEBS_OUTPUT_PT; 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci if (hwc->idx >= INTEL_PMC_IDX_FIXED) { 131562306a36Sopenharmony_ci base = MSR_RELOAD_FIXED_CTR0; 131662306a36Sopenharmony_ci idx = hwc->idx - INTEL_PMC_IDX_FIXED; 131762306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format < 5) 131862306a36Sopenharmony_ci value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx]; 131962306a36Sopenharmony_ci else 132062306a36Sopenharmony_ci value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; 132162306a36Sopenharmony_ci } 132262306a36Sopenharmony_ci wrmsrl(base + idx, value); 132362306a36Sopenharmony_ci} 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_cistatic inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc) 132662306a36Sopenharmony_ci{ 132762306a36Sopenharmony_ci if (cpuc->n_pebs == cpuc->n_large_pebs && 132862306a36Sopenharmony_ci cpuc->n_pebs != cpuc->n_pebs_via_pt) 132962306a36Sopenharmony_ci intel_pmu_drain_pebs_buffer(); 133062306a36Sopenharmony_ci} 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_civoid intel_pmu_pebs_enable(struct perf_event *event) 133362306a36Sopenharmony_ci{ 133462306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 133562306a36Sopenharmony_ci u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW; 133662306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 133762306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 133862306a36Sopenharmony_ci unsigned int idx = hwc->idx; 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci cpuc->pebs_enabled |= 1ULL << hwc->idx; 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5)) 134562306a36Sopenharmony_ci cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); 134662306a36Sopenharmony_ci else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 134762306a36Sopenharmony_ci cpuc->pebs_enabled |= 1ULL << 63; 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_baseline) { 135062306a36Sopenharmony_ci hwc->config |= ICL_EVENTSEL_ADAPTIVE; 135162306a36Sopenharmony_ci if (pebs_data_cfg != cpuc->active_pebs_data_cfg) { 135262306a36Sopenharmony_ci /* 135362306a36Sopenharmony_ci * drain_pebs() assumes uniform record size; 135462306a36Sopenharmony_ci * hence we need to drain when changing said 135562306a36Sopenharmony_ci * size. 135662306a36Sopenharmony_ci */ 135762306a36Sopenharmony_ci intel_pmu_drain_large_pebs(cpuc); 135862306a36Sopenharmony_ci adaptive_pebs_record_size_update(); 135962306a36Sopenharmony_ci wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg); 136062306a36Sopenharmony_ci cpuc->active_pebs_data_cfg = pebs_data_cfg; 136162306a36Sopenharmony_ci } 136262306a36Sopenharmony_ci } 136362306a36Sopenharmony_ci if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) { 136462306a36Sopenharmony_ci cpuc->pebs_data_cfg = pebs_data_cfg; 136562306a36Sopenharmony_ci pebs_update_threshold(cpuc); 136662306a36Sopenharmony_ci } 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci if (idx >= INTEL_PMC_IDX_FIXED) { 136962306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format < 5) 137062306a36Sopenharmony_ci idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED); 137162306a36Sopenharmony_ci else 137262306a36Sopenharmony_ci idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); 137362306a36Sopenharmony_ci } 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci /* 137662306a36Sopenharmony_ci * Use auto-reload if possible to save a MSR write in the PMI. 137762306a36Sopenharmony_ci * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. 137862306a36Sopenharmony_ci */ 137962306a36Sopenharmony_ci if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 138062306a36Sopenharmony_ci ds->pebs_event_reset[idx] = 138162306a36Sopenharmony_ci (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; 138262306a36Sopenharmony_ci } else { 138362306a36Sopenharmony_ci ds->pebs_event_reset[idx] = 0; 138462306a36Sopenharmony_ci } 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci intel_pmu_pebs_via_pt_enable(event); 138762306a36Sopenharmony_ci} 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_civoid intel_pmu_pebs_del(struct perf_event *event) 139062306a36Sopenharmony_ci{ 139162306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 139262306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 139362306a36Sopenharmony_ci bool needed_cb = pebs_needs_sched_cb(cpuc); 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci cpuc->n_pebs--; 139662306a36Sopenharmony_ci if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) 139762306a36Sopenharmony_ci cpuc->n_large_pebs--; 139862306a36Sopenharmony_ci if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT) 139962306a36Sopenharmony_ci cpuc->n_pebs_via_pt--; 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci pebs_update_state(needed_cb, cpuc, event, false); 140262306a36Sopenharmony_ci} 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_civoid intel_pmu_pebs_disable(struct perf_event *event) 140562306a36Sopenharmony_ci{ 140662306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 140762306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci intel_pmu_drain_large_pebs(cpuc); 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_ci if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && 141462306a36Sopenharmony_ci (x86_pmu.version < 5)) 141562306a36Sopenharmony_ci cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); 141662306a36Sopenharmony_ci else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 141762306a36Sopenharmony_ci cpuc->pebs_enabled &= ~(1ULL << 63); 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci intel_pmu_pebs_via_pt_disable(event); 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci if (cpuc->enabled) 142262306a36Sopenharmony_ci wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 142562306a36Sopenharmony_ci} 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_civoid intel_pmu_pebs_enable_all(void) 142862306a36Sopenharmony_ci{ 142962306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci if (cpuc->pebs_enabled) 143262306a36Sopenharmony_ci wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 143362306a36Sopenharmony_ci} 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_civoid intel_pmu_pebs_disable_all(void) 143662306a36Sopenharmony_ci{ 143762306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci if (cpuc->pebs_enabled) 144062306a36Sopenharmony_ci __intel_pmu_pebs_disable_all(); 144162306a36Sopenharmony_ci} 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_cistatic int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 144462306a36Sopenharmony_ci{ 144562306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 144662306a36Sopenharmony_ci unsigned long from = cpuc->lbr_entries[0].from; 144762306a36Sopenharmony_ci unsigned long old_to, to = cpuc->lbr_entries[0].to; 144862306a36Sopenharmony_ci unsigned long ip = regs->ip; 144962306a36Sopenharmony_ci int is_64bit = 0; 145062306a36Sopenharmony_ci void *kaddr; 145162306a36Sopenharmony_ci int size; 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci /* 145462306a36Sopenharmony_ci * We don't need to fixup if the PEBS assist is fault like 145562306a36Sopenharmony_ci */ 145662306a36Sopenharmony_ci if (!x86_pmu.intel_cap.pebs_trap) 145762306a36Sopenharmony_ci return 1; 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci /* 146062306a36Sopenharmony_ci * No LBR entry, no basic block, no rewinding 146162306a36Sopenharmony_ci */ 146262306a36Sopenharmony_ci if (!cpuc->lbr_stack.nr || !from || !to) 146362306a36Sopenharmony_ci return 0; 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci /* 146662306a36Sopenharmony_ci * Basic blocks should never cross user/kernel boundaries 146762306a36Sopenharmony_ci */ 146862306a36Sopenharmony_ci if (kernel_ip(ip) != kernel_ip(to)) 146962306a36Sopenharmony_ci return 0; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci /* 147262306a36Sopenharmony_ci * unsigned math, either ip is before the start (impossible) or 147362306a36Sopenharmony_ci * the basic block is larger than 1 page (sanity) 147462306a36Sopenharmony_ci */ 147562306a36Sopenharmony_ci if ((ip - to) > PEBS_FIXUP_SIZE) 147662306a36Sopenharmony_ci return 0; 147762306a36Sopenharmony_ci 147862306a36Sopenharmony_ci /* 147962306a36Sopenharmony_ci * We sampled a branch insn, rewind using the LBR stack 148062306a36Sopenharmony_ci */ 148162306a36Sopenharmony_ci if (ip == to) { 148262306a36Sopenharmony_ci set_linear_ip(regs, from); 148362306a36Sopenharmony_ci return 1; 148462306a36Sopenharmony_ci } 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci size = ip - to; 148762306a36Sopenharmony_ci if (!kernel_ip(ip)) { 148862306a36Sopenharmony_ci int bytes; 148962306a36Sopenharmony_ci u8 *buf = this_cpu_read(insn_buffer); 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci /* 'size' must fit our buffer, see above */ 149262306a36Sopenharmony_ci bytes = copy_from_user_nmi(buf, (void __user *)to, size); 149362306a36Sopenharmony_ci if (bytes != 0) 149462306a36Sopenharmony_ci return 0; 149562306a36Sopenharmony_ci 149662306a36Sopenharmony_ci kaddr = buf; 149762306a36Sopenharmony_ci } else { 149862306a36Sopenharmony_ci kaddr = (void *)to; 149962306a36Sopenharmony_ci } 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci do { 150262306a36Sopenharmony_ci struct insn insn; 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci old_to = to; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci#ifdef CONFIG_X86_64 150762306a36Sopenharmony_ci is_64bit = kernel_ip(to) || any_64bit_mode(regs); 150862306a36Sopenharmony_ci#endif 150962306a36Sopenharmony_ci insn_init(&insn, kaddr, size, is_64bit); 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci /* 151262306a36Sopenharmony_ci * Make sure there was not a problem decoding the instruction. 151362306a36Sopenharmony_ci * This is doubly important because we have an infinite loop if 151462306a36Sopenharmony_ci * insn.length=0. 151562306a36Sopenharmony_ci */ 151662306a36Sopenharmony_ci if (insn_get_length(&insn)) 151762306a36Sopenharmony_ci break; 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci to += insn.length; 152062306a36Sopenharmony_ci kaddr += insn.length; 152162306a36Sopenharmony_ci size -= insn.length; 152262306a36Sopenharmony_ci } while (to < ip); 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci if (to == ip) { 152562306a36Sopenharmony_ci set_linear_ip(regs, old_to); 152662306a36Sopenharmony_ci return 1; 152762306a36Sopenharmony_ci } 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci /* 153062306a36Sopenharmony_ci * Even though we decoded the basic block, the instruction stream 153162306a36Sopenharmony_ci * never matched the given IP, either the TO or the IP got corrupted. 153262306a36Sopenharmony_ci */ 153362306a36Sopenharmony_ci return 0; 153462306a36Sopenharmony_ci} 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_cistatic inline u64 intel_get_tsx_weight(u64 tsx_tuning) 153762306a36Sopenharmony_ci{ 153862306a36Sopenharmony_ci if (tsx_tuning) { 153962306a36Sopenharmony_ci union hsw_tsx_tuning tsx = { .value = tsx_tuning }; 154062306a36Sopenharmony_ci return tsx.cycles_last_block; 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci return 0; 154362306a36Sopenharmony_ci} 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_cistatic inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax) 154662306a36Sopenharmony_ci{ 154762306a36Sopenharmony_ci u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; 154862306a36Sopenharmony_ci 154962306a36Sopenharmony_ci /* For RTM XABORTs also log the abort code from AX */ 155062306a36Sopenharmony_ci if ((txn & PERF_TXN_TRANSACTION) && (ax & 1)) 155162306a36Sopenharmony_ci txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 155262306a36Sopenharmony_ci return txn; 155362306a36Sopenharmony_ci} 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_cistatic inline u64 get_pebs_status(void *n) 155662306a36Sopenharmony_ci{ 155762306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format < 4) 155862306a36Sopenharmony_ci return ((struct pebs_record_nhm *)n)->status; 155962306a36Sopenharmony_ci return ((struct pebs_basic *)n)->applicable_counters; 156062306a36Sopenharmony_ci} 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci#define PERF_X86_EVENT_PEBS_HSW_PREC \ 156362306a36Sopenharmony_ci (PERF_X86_EVENT_PEBS_ST_HSW | \ 156462306a36Sopenharmony_ci PERF_X86_EVENT_PEBS_LD_HSW | \ 156562306a36Sopenharmony_ci PERF_X86_EVENT_PEBS_NA_HSW) 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_cistatic u64 get_data_src(struct perf_event *event, u64 aux) 156862306a36Sopenharmony_ci{ 156962306a36Sopenharmony_ci u64 val = PERF_MEM_NA; 157062306a36Sopenharmony_ci int fl = event->hw.flags; 157162306a36Sopenharmony_ci bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci if (fl & PERF_X86_EVENT_PEBS_LDLAT) 157462306a36Sopenharmony_ci val = load_latency_data(event, aux); 157562306a36Sopenharmony_ci else if (fl & PERF_X86_EVENT_PEBS_STLAT) 157662306a36Sopenharmony_ci val = store_latency_data(event, aux); 157762306a36Sopenharmony_ci else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID) 157862306a36Sopenharmony_ci val = x86_pmu.pebs_latency_data(event, aux); 157962306a36Sopenharmony_ci else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) 158062306a36Sopenharmony_ci val = precise_datala_hsw(event, aux); 158162306a36Sopenharmony_ci else if (fst) 158262306a36Sopenharmony_ci val = precise_store_data(aux); 158362306a36Sopenharmony_ci return val; 158462306a36Sopenharmony_ci} 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_cistatic void setup_pebs_time(struct perf_event *event, 158762306a36Sopenharmony_ci struct perf_sample_data *data, 158862306a36Sopenharmony_ci u64 tsc) 158962306a36Sopenharmony_ci{ 159062306a36Sopenharmony_ci /* Converting to a user-defined clock is not supported yet. */ 159162306a36Sopenharmony_ci if (event->attr.use_clockid != 0) 159262306a36Sopenharmony_ci return; 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci /* 159562306a36Sopenharmony_ci * Doesn't support the conversion when the TSC is unstable. 159662306a36Sopenharmony_ci * The TSC unstable case is a corner case and very unlikely to 159762306a36Sopenharmony_ci * happen. If it happens, the TSC in a PEBS record will be 159862306a36Sopenharmony_ci * dropped and fall back to perf_event_clock(). 159962306a36Sopenharmony_ci */ 160062306a36Sopenharmony_ci if (!using_native_sched_clock() || !sched_clock_stable()) 160162306a36Sopenharmony_ci return; 160262306a36Sopenharmony_ci 160362306a36Sopenharmony_ci data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset; 160462306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_TIME; 160562306a36Sopenharmony_ci} 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ 160862306a36Sopenharmony_ci PERF_SAMPLE_PHYS_ADDR | \ 160962306a36Sopenharmony_ci PERF_SAMPLE_DATA_PAGE_SIZE) 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_cistatic void setup_pebs_fixed_sample_data(struct perf_event *event, 161262306a36Sopenharmony_ci struct pt_regs *iregs, void *__pebs, 161362306a36Sopenharmony_ci struct perf_sample_data *data, 161462306a36Sopenharmony_ci struct pt_regs *regs) 161562306a36Sopenharmony_ci{ 161662306a36Sopenharmony_ci /* 161762306a36Sopenharmony_ci * We cast to the biggest pebs_record but are careful not to 161862306a36Sopenharmony_ci * unconditionally access the 'extra' entries. 161962306a36Sopenharmony_ci */ 162062306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 162162306a36Sopenharmony_ci struct pebs_record_skl *pebs = __pebs; 162262306a36Sopenharmony_ci u64 sample_type; 162362306a36Sopenharmony_ci int fll; 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci if (pebs == NULL) 162662306a36Sopenharmony_ci return; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci sample_type = event->attr.sample_type; 162962306a36Sopenharmony_ci fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci perf_sample_data_init(data, 0, event->hw.last_period); 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci data->period = event->hw.last_period; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci /* 163662306a36Sopenharmony_ci * Use latency for weight (only avail with PEBS-LL) 163762306a36Sopenharmony_ci */ 163862306a36Sopenharmony_ci if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) { 163962306a36Sopenharmony_ci data->weight.full = pebs->lat; 164062306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; 164162306a36Sopenharmony_ci } 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci /* 164462306a36Sopenharmony_ci * data.data_src encodes the data source 164562306a36Sopenharmony_ci */ 164662306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_DATA_SRC) { 164762306a36Sopenharmony_ci data->data_src.val = get_data_src(event, pebs->dse); 164862306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_DATA_SRC; 164962306a36Sopenharmony_ci } 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci /* 165262306a36Sopenharmony_ci * We must however always use iregs for the unwinder to stay sane; the 165362306a36Sopenharmony_ci * record BP,SP,IP can point into thin air when the record is from a 165462306a36Sopenharmony_ci * previous PMI context or an (I)RET happened between the record and 165562306a36Sopenharmony_ci * PMI. 165662306a36Sopenharmony_ci */ 165762306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_CALLCHAIN) 165862306a36Sopenharmony_ci perf_sample_save_callchain(data, event, iregs); 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci /* 166162306a36Sopenharmony_ci * We use the interrupt regs as a base because the PEBS record does not 166262306a36Sopenharmony_ci * contain a full regs set, specifically it seems to lack segment 166362306a36Sopenharmony_ci * descriptors, which get used by things like user_mode(). 166462306a36Sopenharmony_ci * 166562306a36Sopenharmony_ci * In the simple case fix up only the IP for PERF_SAMPLE_IP. 166662306a36Sopenharmony_ci */ 166762306a36Sopenharmony_ci *regs = *iregs; 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci /* 167062306a36Sopenharmony_ci * Initialize regs_>flags from PEBS, 167162306a36Sopenharmony_ci * Clear exact bit (which uses x86 EFLAGS Reserved bit 3), 167262306a36Sopenharmony_ci * i.e., do not rely on it being zero: 167362306a36Sopenharmony_ci */ 167462306a36Sopenharmony_ci regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT; 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_REGS_INTR) { 167762306a36Sopenharmony_ci regs->ax = pebs->ax; 167862306a36Sopenharmony_ci regs->bx = pebs->bx; 167962306a36Sopenharmony_ci regs->cx = pebs->cx; 168062306a36Sopenharmony_ci regs->dx = pebs->dx; 168162306a36Sopenharmony_ci regs->si = pebs->si; 168262306a36Sopenharmony_ci regs->di = pebs->di; 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_ci regs->bp = pebs->bp; 168562306a36Sopenharmony_ci regs->sp = pebs->sp; 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci#ifndef CONFIG_X86_32 168862306a36Sopenharmony_ci regs->r8 = pebs->r8; 168962306a36Sopenharmony_ci regs->r9 = pebs->r9; 169062306a36Sopenharmony_ci regs->r10 = pebs->r10; 169162306a36Sopenharmony_ci regs->r11 = pebs->r11; 169262306a36Sopenharmony_ci regs->r12 = pebs->r12; 169362306a36Sopenharmony_ci regs->r13 = pebs->r13; 169462306a36Sopenharmony_ci regs->r14 = pebs->r14; 169562306a36Sopenharmony_ci regs->r15 = pebs->r15; 169662306a36Sopenharmony_ci#endif 169762306a36Sopenharmony_ci } 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci if (event->attr.precise_ip > 1) { 170062306a36Sopenharmony_ci /* 170162306a36Sopenharmony_ci * Haswell and later processors have an 'eventing IP' 170262306a36Sopenharmony_ci * (real IP) which fixes the off-by-1 skid in hardware. 170362306a36Sopenharmony_ci * Use it when precise_ip >= 2 : 170462306a36Sopenharmony_ci */ 170562306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format >= 2) { 170662306a36Sopenharmony_ci set_linear_ip(regs, pebs->real_ip); 170762306a36Sopenharmony_ci regs->flags |= PERF_EFLAGS_EXACT; 170862306a36Sopenharmony_ci } else { 170962306a36Sopenharmony_ci /* Otherwise, use PEBS off-by-1 IP: */ 171062306a36Sopenharmony_ci set_linear_ip(regs, pebs->ip); 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci /* 171362306a36Sopenharmony_ci * With precise_ip >= 2, try to fix up the off-by-1 IP 171462306a36Sopenharmony_ci * using the LBR. If successful, the fixup function 171562306a36Sopenharmony_ci * corrects regs->ip and calls set_linear_ip() on regs: 171662306a36Sopenharmony_ci */ 171762306a36Sopenharmony_ci if (intel_pmu_pebs_fixup_ip(regs)) 171862306a36Sopenharmony_ci regs->flags |= PERF_EFLAGS_EXACT; 171962306a36Sopenharmony_ci } 172062306a36Sopenharmony_ci } else { 172162306a36Sopenharmony_ci /* 172262306a36Sopenharmony_ci * When precise_ip == 1, return the PEBS off-by-1 IP, 172362306a36Sopenharmony_ci * no fixup attempted: 172462306a36Sopenharmony_ci */ 172562306a36Sopenharmony_ci set_linear_ip(regs, pebs->ip); 172662306a36Sopenharmony_ci } 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci if ((sample_type & PERF_SAMPLE_ADDR_TYPE) && 173062306a36Sopenharmony_ci x86_pmu.intel_cap.pebs_format >= 1) { 173162306a36Sopenharmony_ci data->addr = pebs->dla; 173262306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_ADDR; 173362306a36Sopenharmony_ci } 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format >= 2) { 173662306a36Sopenharmony_ci /* Only set the TSX weight when no memory weight. */ 173762306a36Sopenharmony_ci if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) { 173862306a36Sopenharmony_ci data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); 173962306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; 174062306a36Sopenharmony_ci } 174162306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_TRANSACTION) { 174262306a36Sopenharmony_ci data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, 174362306a36Sopenharmony_ci pebs->ax); 174462306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_TRANSACTION; 174562306a36Sopenharmony_ci } 174662306a36Sopenharmony_ci } 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci /* 174962306a36Sopenharmony_ci * v3 supplies an accurate time stamp, so we use that 175062306a36Sopenharmony_ci * for the time stamp. 175162306a36Sopenharmony_ci * 175262306a36Sopenharmony_ci * We can only do this for the default trace clock. 175362306a36Sopenharmony_ci */ 175462306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format >= 3) 175562306a36Sopenharmony_ci setup_pebs_time(event, data, pebs->tsc); 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci if (has_branch_stack(event)) 175862306a36Sopenharmony_ci perf_sample_save_brstack(data, event, &cpuc->lbr_stack); 175962306a36Sopenharmony_ci} 176062306a36Sopenharmony_ci 176162306a36Sopenharmony_cistatic void adaptive_pebs_save_regs(struct pt_regs *regs, 176262306a36Sopenharmony_ci struct pebs_gprs *gprs) 176362306a36Sopenharmony_ci{ 176462306a36Sopenharmony_ci regs->ax = gprs->ax; 176562306a36Sopenharmony_ci regs->bx = gprs->bx; 176662306a36Sopenharmony_ci regs->cx = gprs->cx; 176762306a36Sopenharmony_ci regs->dx = gprs->dx; 176862306a36Sopenharmony_ci regs->si = gprs->si; 176962306a36Sopenharmony_ci regs->di = gprs->di; 177062306a36Sopenharmony_ci regs->bp = gprs->bp; 177162306a36Sopenharmony_ci regs->sp = gprs->sp; 177262306a36Sopenharmony_ci#ifndef CONFIG_X86_32 177362306a36Sopenharmony_ci regs->r8 = gprs->r8; 177462306a36Sopenharmony_ci regs->r9 = gprs->r9; 177562306a36Sopenharmony_ci regs->r10 = gprs->r10; 177662306a36Sopenharmony_ci regs->r11 = gprs->r11; 177762306a36Sopenharmony_ci regs->r12 = gprs->r12; 177862306a36Sopenharmony_ci regs->r13 = gprs->r13; 177962306a36Sopenharmony_ci regs->r14 = gprs->r14; 178062306a36Sopenharmony_ci regs->r15 = gprs->r15; 178162306a36Sopenharmony_ci#endif 178262306a36Sopenharmony_ci} 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci#define PEBS_LATENCY_MASK 0xffff 178562306a36Sopenharmony_ci#define PEBS_CACHE_LATENCY_OFFSET 32 178662306a36Sopenharmony_ci#define PEBS_RETIRE_LATENCY_OFFSET 32 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ci/* 178962306a36Sopenharmony_ci * With adaptive PEBS the layout depends on what fields are configured. 179062306a36Sopenharmony_ci */ 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_cistatic void setup_pebs_adaptive_sample_data(struct perf_event *event, 179362306a36Sopenharmony_ci struct pt_regs *iregs, void *__pebs, 179462306a36Sopenharmony_ci struct perf_sample_data *data, 179562306a36Sopenharmony_ci struct pt_regs *regs) 179662306a36Sopenharmony_ci{ 179762306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 179862306a36Sopenharmony_ci struct pebs_basic *basic = __pebs; 179962306a36Sopenharmony_ci void *next_record = basic + 1; 180062306a36Sopenharmony_ci u64 sample_type; 180162306a36Sopenharmony_ci u64 format_size; 180262306a36Sopenharmony_ci struct pebs_meminfo *meminfo = NULL; 180362306a36Sopenharmony_ci struct pebs_gprs *gprs = NULL; 180462306a36Sopenharmony_ci struct x86_perf_regs *perf_regs; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci if (basic == NULL) 180762306a36Sopenharmony_ci return; 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci perf_regs = container_of(regs, struct x86_perf_regs, regs); 181062306a36Sopenharmony_ci perf_regs->xmm_regs = NULL; 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci sample_type = event->attr.sample_type; 181362306a36Sopenharmony_ci format_size = basic->format_size; 181462306a36Sopenharmony_ci perf_sample_data_init(data, 0, event->hw.last_period); 181562306a36Sopenharmony_ci data->period = event->hw.last_period; 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci setup_pebs_time(event, data, basic->tsc); 181862306a36Sopenharmony_ci 181962306a36Sopenharmony_ci /* 182062306a36Sopenharmony_ci * We must however always use iregs for the unwinder to stay sane; the 182162306a36Sopenharmony_ci * record BP,SP,IP can point into thin air when the record is from a 182262306a36Sopenharmony_ci * previous PMI context or an (I)RET happened between the record and 182362306a36Sopenharmony_ci * PMI. 182462306a36Sopenharmony_ci */ 182562306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_CALLCHAIN) 182662306a36Sopenharmony_ci perf_sample_save_callchain(data, event, iregs); 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci *regs = *iregs; 182962306a36Sopenharmony_ci /* The ip in basic is EventingIP */ 183062306a36Sopenharmony_ci set_linear_ip(regs, basic->ip); 183162306a36Sopenharmony_ci regs->flags = PERF_EFLAGS_EXACT; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)) 183462306a36Sopenharmony_ci data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK; 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_ci /* 183762306a36Sopenharmony_ci * The record for MEMINFO is in front of GP 183862306a36Sopenharmony_ci * But PERF_SAMPLE_TRANSACTION needs gprs->ax. 183962306a36Sopenharmony_ci * Save the pointer here but process later. 184062306a36Sopenharmony_ci */ 184162306a36Sopenharmony_ci if (format_size & PEBS_DATACFG_MEMINFO) { 184262306a36Sopenharmony_ci meminfo = next_record; 184362306a36Sopenharmony_ci next_record = meminfo + 1; 184462306a36Sopenharmony_ci } 184562306a36Sopenharmony_ci 184662306a36Sopenharmony_ci if (format_size & PEBS_DATACFG_GP) { 184762306a36Sopenharmony_ci gprs = next_record; 184862306a36Sopenharmony_ci next_record = gprs + 1; 184962306a36Sopenharmony_ci 185062306a36Sopenharmony_ci if (event->attr.precise_ip < 2) { 185162306a36Sopenharmony_ci set_linear_ip(regs, gprs->ip); 185262306a36Sopenharmony_ci regs->flags &= ~PERF_EFLAGS_EXACT; 185362306a36Sopenharmony_ci } 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_REGS_INTR) 185662306a36Sopenharmony_ci adaptive_pebs_save_regs(regs, gprs); 185762306a36Sopenharmony_ci } 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci if (format_size & PEBS_DATACFG_MEMINFO) { 186062306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { 186162306a36Sopenharmony_ci u64 weight = meminfo->latency; 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) { 186462306a36Sopenharmony_ci data->weight.var2_w = weight & PEBS_LATENCY_MASK; 186562306a36Sopenharmony_ci weight >>= PEBS_CACHE_LATENCY_OFFSET; 186662306a36Sopenharmony_ci } 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci /* 186962306a36Sopenharmony_ci * Although meminfo::latency is defined as a u64, 187062306a36Sopenharmony_ci * only the lower 32 bits include the valid data 187162306a36Sopenharmony_ci * in practice on Ice Lake and earlier platforms. 187262306a36Sopenharmony_ci */ 187362306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_WEIGHT) { 187462306a36Sopenharmony_ci data->weight.full = weight ?: 187562306a36Sopenharmony_ci intel_get_tsx_weight(meminfo->tsx_tuning); 187662306a36Sopenharmony_ci } else { 187762306a36Sopenharmony_ci data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: 187862306a36Sopenharmony_ci intel_get_tsx_weight(meminfo->tsx_tuning); 187962306a36Sopenharmony_ci } 188062306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; 188162306a36Sopenharmony_ci } 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_DATA_SRC) { 188462306a36Sopenharmony_ci data->data_src.val = get_data_src(event, meminfo->aux); 188562306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_DATA_SRC; 188662306a36Sopenharmony_ci } 188762306a36Sopenharmony_ci 188862306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_ADDR_TYPE) { 188962306a36Sopenharmony_ci data->addr = meminfo->address; 189062306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_ADDR; 189162306a36Sopenharmony_ci } 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci if (sample_type & PERF_SAMPLE_TRANSACTION) { 189462306a36Sopenharmony_ci data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, 189562306a36Sopenharmony_ci gprs ? gprs->ax : 0); 189662306a36Sopenharmony_ci data->sample_flags |= PERF_SAMPLE_TRANSACTION; 189762306a36Sopenharmony_ci } 189862306a36Sopenharmony_ci } 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci if (format_size & PEBS_DATACFG_XMMS) { 190162306a36Sopenharmony_ci struct pebs_xmm *xmm = next_record; 190262306a36Sopenharmony_ci 190362306a36Sopenharmony_ci next_record = xmm + 1; 190462306a36Sopenharmony_ci perf_regs->xmm_regs = xmm->xmm; 190562306a36Sopenharmony_ci } 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci if (format_size & PEBS_DATACFG_LBRS) { 190862306a36Sopenharmony_ci struct lbr_entry *lbr = next_record; 190962306a36Sopenharmony_ci int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) 191062306a36Sopenharmony_ci & 0xff) + 1; 191162306a36Sopenharmony_ci next_record = next_record + num_lbr * sizeof(struct lbr_entry); 191262306a36Sopenharmony_ci 191362306a36Sopenharmony_ci if (has_branch_stack(event)) { 191462306a36Sopenharmony_ci intel_pmu_store_pebs_lbrs(lbr); 191562306a36Sopenharmony_ci perf_sample_save_brstack(data, event, &cpuc->lbr_stack); 191662306a36Sopenharmony_ci } 191762306a36Sopenharmony_ci } 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci WARN_ONCE(next_record != __pebs + (format_size >> 48), 192062306a36Sopenharmony_ci "PEBS record size %llu, expected %llu, config %llx\n", 192162306a36Sopenharmony_ci format_size >> 48, 192262306a36Sopenharmony_ci (u64)(next_record - __pebs), 192362306a36Sopenharmony_ci basic->format_size); 192462306a36Sopenharmony_ci} 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_cistatic inline void * 192762306a36Sopenharmony_ciget_next_pebs_record_by_bit(void *base, void *top, int bit) 192862306a36Sopenharmony_ci{ 192962306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 193062306a36Sopenharmony_ci void *at; 193162306a36Sopenharmony_ci u64 pebs_status; 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_ci /* 193462306a36Sopenharmony_ci * fmt0 does not have a status bitfield (does not use 193562306a36Sopenharmony_ci * perf_record_nhm format) 193662306a36Sopenharmony_ci */ 193762306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format < 1) 193862306a36Sopenharmony_ci return base; 193962306a36Sopenharmony_ci 194062306a36Sopenharmony_ci if (base == NULL) 194162306a36Sopenharmony_ci return NULL; 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ci for (at = base; at < top; at += cpuc->pebs_record_size) { 194462306a36Sopenharmony_ci unsigned long status = get_pebs_status(at); 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci if (test_bit(bit, (unsigned long *)&status)) { 194762306a36Sopenharmony_ci /* PEBS v3 has accurate status bits */ 194862306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format >= 3) 194962306a36Sopenharmony_ci return at; 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci if (status == (1 << bit)) 195262306a36Sopenharmony_ci return at; 195362306a36Sopenharmony_ci 195462306a36Sopenharmony_ci /* clear non-PEBS bit and re-check */ 195562306a36Sopenharmony_ci pebs_status = status & cpuc->pebs_enabled; 195662306a36Sopenharmony_ci pebs_status &= PEBS_COUNTER_MASK; 195762306a36Sopenharmony_ci if (pebs_status == (1 << bit)) 195862306a36Sopenharmony_ci return at; 195962306a36Sopenharmony_ci } 196062306a36Sopenharmony_ci } 196162306a36Sopenharmony_ci return NULL; 196262306a36Sopenharmony_ci} 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_civoid intel_pmu_auto_reload_read(struct perf_event *event) 196562306a36Sopenharmony_ci{ 196662306a36Sopenharmony_ci WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); 196762306a36Sopenharmony_ci 196862306a36Sopenharmony_ci perf_pmu_disable(event->pmu); 196962306a36Sopenharmony_ci intel_pmu_drain_pebs_buffer(); 197062306a36Sopenharmony_ci perf_pmu_enable(event->pmu); 197162306a36Sopenharmony_ci} 197262306a36Sopenharmony_ci 197362306a36Sopenharmony_ci/* 197462306a36Sopenharmony_ci * Special variant of intel_pmu_save_and_restart() for auto-reload. 197562306a36Sopenharmony_ci */ 197662306a36Sopenharmony_cistatic int 197762306a36Sopenharmony_ciintel_pmu_save_and_restart_reload(struct perf_event *event, int count) 197862306a36Sopenharmony_ci{ 197962306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 198062306a36Sopenharmony_ci int shift = 64 - x86_pmu.cntval_bits; 198162306a36Sopenharmony_ci u64 period = hwc->sample_period; 198262306a36Sopenharmony_ci u64 prev_raw_count, new_raw_count; 198362306a36Sopenharmony_ci s64 new, old; 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci WARN_ON(!period); 198662306a36Sopenharmony_ci 198762306a36Sopenharmony_ci /* 198862306a36Sopenharmony_ci * drain_pebs() only happens when the PMU is disabled. 198962306a36Sopenharmony_ci */ 199062306a36Sopenharmony_ci WARN_ON(this_cpu_read(cpu_hw_events.enabled)); 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci prev_raw_count = local64_read(&hwc->prev_count); 199362306a36Sopenharmony_ci rdpmcl(hwc->event_base_rdpmc, new_raw_count); 199462306a36Sopenharmony_ci local64_set(&hwc->prev_count, new_raw_count); 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci /* 199762306a36Sopenharmony_ci * Since the counter increments a negative counter value and 199862306a36Sopenharmony_ci * overflows on the sign switch, giving the interval: 199962306a36Sopenharmony_ci * 200062306a36Sopenharmony_ci * [-period, 0] 200162306a36Sopenharmony_ci * 200262306a36Sopenharmony_ci * the difference between two consecutive reads is: 200362306a36Sopenharmony_ci * 200462306a36Sopenharmony_ci * A) value2 - value1; 200562306a36Sopenharmony_ci * when no overflows have happened in between, 200662306a36Sopenharmony_ci * 200762306a36Sopenharmony_ci * B) (0 - value1) + (value2 - (-period)); 200862306a36Sopenharmony_ci * when one overflow happened in between, 200962306a36Sopenharmony_ci * 201062306a36Sopenharmony_ci * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period)); 201162306a36Sopenharmony_ci * when @n overflows happened in between. 201262306a36Sopenharmony_ci * 201362306a36Sopenharmony_ci * Here A) is the obvious difference, B) is the extension to the 201462306a36Sopenharmony_ci * discrete interval, where the first term is to the top of the 201562306a36Sopenharmony_ci * interval and the second term is from the bottom of the next 201662306a36Sopenharmony_ci * interval and C) the extension to multiple intervals, where the 201762306a36Sopenharmony_ci * middle term is the whole intervals covered. 201862306a36Sopenharmony_ci * 201962306a36Sopenharmony_ci * An equivalent of C, by reduction, is: 202062306a36Sopenharmony_ci * 202162306a36Sopenharmony_ci * value2 - value1 + n * period 202262306a36Sopenharmony_ci */ 202362306a36Sopenharmony_ci new = ((s64)(new_raw_count << shift) >> shift); 202462306a36Sopenharmony_ci old = ((s64)(prev_raw_count << shift) >> shift); 202562306a36Sopenharmony_ci local64_add(new - old + count * period, &event->count); 202662306a36Sopenharmony_ci 202762306a36Sopenharmony_ci local64_set(&hwc->period_left, -new); 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci perf_event_update_userpage(event); 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ci return 0; 203262306a36Sopenharmony_ci} 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_cistatic __always_inline void 203562306a36Sopenharmony_ci__intel_pmu_pebs_event(struct perf_event *event, 203662306a36Sopenharmony_ci struct pt_regs *iregs, 203762306a36Sopenharmony_ci struct perf_sample_data *data, 203862306a36Sopenharmony_ci void *base, void *top, 203962306a36Sopenharmony_ci int bit, int count, 204062306a36Sopenharmony_ci void (*setup_sample)(struct perf_event *, 204162306a36Sopenharmony_ci struct pt_regs *, 204262306a36Sopenharmony_ci void *, 204362306a36Sopenharmony_ci struct perf_sample_data *, 204462306a36Sopenharmony_ci struct pt_regs *)) 204562306a36Sopenharmony_ci{ 204662306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 204762306a36Sopenharmony_ci struct hw_perf_event *hwc = &event->hw; 204862306a36Sopenharmony_ci struct x86_perf_regs perf_regs; 204962306a36Sopenharmony_ci struct pt_regs *regs = &perf_regs.regs; 205062306a36Sopenharmony_ci void *at = get_next_pebs_record_by_bit(base, top, bit); 205162306a36Sopenharmony_ci static struct pt_regs dummy_iregs; 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ci if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 205462306a36Sopenharmony_ci /* 205562306a36Sopenharmony_ci * Now, auto-reload is only enabled in fixed period mode. 205662306a36Sopenharmony_ci * The reload value is always hwc->sample_period. 205762306a36Sopenharmony_ci * May need to change it, if auto-reload is enabled in 205862306a36Sopenharmony_ci * freq mode later. 205962306a36Sopenharmony_ci */ 206062306a36Sopenharmony_ci intel_pmu_save_and_restart_reload(event, count); 206162306a36Sopenharmony_ci } else if (!intel_pmu_save_and_restart(event)) 206262306a36Sopenharmony_ci return; 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci if (!iregs) 206562306a36Sopenharmony_ci iregs = &dummy_iregs; 206662306a36Sopenharmony_ci 206762306a36Sopenharmony_ci while (count > 1) { 206862306a36Sopenharmony_ci setup_sample(event, iregs, at, data, regs); 206962306a36Sopenharmony_ci perf_event_output(event, data, regs); 207062306a36Sopenharmony_ci at += cpuc->pebs_record_size; 207162306a36Sopenharmony_ci at = get_next_pebs_record_by_bit(at, top, bit); 207262306a36Sopenharmony_ci count--; 207362306a36Sopenharmony_ci } 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci setup_sample(event, iregs, at, data, regs); 207662306a36Sopenharmony_ci if (iregs == &dummy_iregs) { 207762306a36Sopenharmony_ci /* 207862306a36Sopenharmony_ci * The PEBS records may be drained in the non-overflow context, 207962306a36Sopenharmony_ci * e.g., large PEBS + context switch. Perf should treat the 208062306a36Sopenharmony_ci * last record the same as other PEBS records, and doesn't 208162306a36Sopenharmony_ci * invoke the generic overflow handler. 208262306a36Sopenharmony_ci */ 208362306a36Sopenharmony_ci perf_event_output(event, data, regs); 208462306a36Sopenharmony_ci } else { 208562306a36Sopenharmony_ci /* 208662306a36Sopenharmony_ci * All but the last records are processed. 208762306a36Sopenharmony_ci * The last one is left to be able to call the overflow handler. 208862306a36Sopenharmony_ci */ 208962306a36Sopenharmony_ci if (perf_event_overflow(event, data, regs)) 209062306a36Sopenharmony_ci x86_pmu_stop(event, 0); 209162306a36Sopenharmony_ci } 209262306a36Sopenharmony_ci} 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_cistatic void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) 209562306a36Sopenharmony_ci{ 209662306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 209762306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 209862306a36Sopenharmony_ci struct perf_event *event = cpuc->events[0]; /* PMC0 only */ 209962306a36Sopenharmony_ci struct pebs_record_core *at, *top; 210062306a36Sopenharmony_ci int n; 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci if (!x86_pmu.pebs_active) 210362306a36Sopenharmony_ci return; 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 210662306a36Sopenharmony_ci top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci /* 210962306a36Sopenharmony_ci * Whatever else happens, drain the thing 211062306a36Sopenharmony_ci */ 211162306a36Sopenharmony_ci ds->pebs_index = ds->pebs_buffer_base; 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_ci if (!test_bit(0, cpuc->active_mask)) 211462306a36Sopenharmony_ci return; 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_ci WARN_ON_ONCE(!event); 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci if (!event->attr.precise_ip) 211962306a36Sopenharmony_ci return; 212062306a36Sopenharmony_ci 212162306a36Sopenharmony_ci n = top - at; 212262306a36Sopenharmony_ci if (n <= 0) { 212362306a36Sopenharmony_ci if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) 212462306a36Sopenharmony_ci intel_pmu_save_and_restart_reload(event, 0); 212562306a36Sopenharmony_ci return; 212662306a36Sopenharmony_ci } 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, 212962306a36Sopenharmony_ci setup_pebs_fixed_sample_data); 213062306a36Sopenharmony_ci} 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_cistatic void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) 213362306a36Sopenharmony_ci{ 213462306a36Sopenharmony_ci struct perf_event *event; 213562306a36Sopenharmony_ci int bit; 213662306a36Sopenharmony_ci 213762306a36Sopenharmony_ci /* 213862306a36Sopenharmony_ci * The drain_pebs() could be called twice in a short period 213962306a36Sopenharmony_ci * for auto-reload event in pmu::read(). There are no 214062306a36Sopenharmony_ci * overflows have happened in between. 214162306a36Sopenharmony_ci * It needs to call intel_pmu_save_and_restart_reload() to 214262306a36Sopenharmony_ci * update the event->count for this case. 214362306a36Sopenharmony_ci */ 214462306a36Sopenharmony_ci for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { 214562306a36Sopenharmony_ci event = cpuc->events[bit]; 214662306a36Sopenharmony_ci if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) 214762306a36Sopenharmony_ci intel_pmu_save_and_restart_reload(event, 0); 214862306a36Sopenharmony_ci } 214962306a36Sopenharmony_ci} 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_cistatic void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) 215262306a36Sopenharmony_ci{ 215362306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 215462306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 215562306a36Sopenharmony_ci struct perf_event *event; 215662306a36Sopenharmony_ci void *base, *at, *top; 215762306a36Sopenharmony_ci short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; 215862306a36Sopenharmony_ci short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; 215962306a36Sopenharmony_ci int bit, i, size; 216062306a36Sopenharmony_ci u64 mask; 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci if (!x86_pmu.pebs_active) 216362306a36Sopenharmony_ci return; 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 216662306a36Sopenharmony_ci top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 216762306a36Sopenharmony_ci 216862306a36Sopenharmony_ci ds->pebs_index = ds->pebs_buffer_base; 216962306a36Sopenharmony_ci 217062306a36Sopenharmony_ci mask = (1ULL << x86_pmu.max_pebs_events) - 1; 217162306a36Sopenharmony_ci size = x86_pmu.max_pebs_events; 217262306a36Sopenharmony_ci if (x86_pmu.flags & PMU_FL_PEBS_ALL) { 217362306a36Sopenharmony_ci mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED; 217462306a36Sopenharmony_ci size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; 217562306a36Sopenharmony_ci } 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci if (unlikely(base >= top)) { 217862306a36Sopenharmony_ci intel_pmu_pebs_event_update_no_drain(cpuc, size); 217962306a36Sopenharmony_ci return; 218062306a36Sopenharmony_ci } 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ci for (at = base; at < top; at += x86_pmu.pebs_record_size) { 218362306a36Sopenharmony_ci struct pebs_record_nhm *p = at; 218462306a36Sopenharmony_ci u64 pebs_status; 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci pebs_status = p->status & cpuc->pebs_enabled; 218762306a36Sopenharmony_ci pebs_status &= mask; 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci /* PEBS v3 has more accurate status bits */ 219062306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_format >= 3) { 219162306a36Sopenharmony_ci for_each_set_bit(bit, (unsigned long *)&pebs_status, size) 219262306a36Sopenharmony_ci counts[bit]++; 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci continue; 219562306a36Sopenharmony_ci } 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci /* 219862306a36Sopenharmony_ci * On some CPUs the PEBS status can be zero when PEBS is 219962306a36Sopenharmony_ci * racing with clearing of GLOBAL_STATUS. 220062306a36Sopenharmony_ci * 220162306a36Sopenharmony_ci * Normally we would drop that record, but in the 220262306a36Sopenharmony_ci * case when there is only a single active PEBS event 220362306a36Sopenharmony_ci * we can assume it's for that event. 220462306a36Sopenharmony_ci */ 220562306a36Sopenharmony_ci if (!pebs_status && cpuc->pebs_enabled && 220662306a36Sopenharmony_ci !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) 220762306a36Sopenharmony_ci pebs_status = p->status = cpuc->pebs_enabled; 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci bit = find_first_bit((unsigned long *)&pebs_status, 221062306a36Sopenharmony_ci x86_pmu.max_pebs_events); 221162306a36Sopenharmony_ci if (bit >= x86_pmu.max_pebs_events) 221262306a36Sopenharmony_ci continue; 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci /* 221562306a36Sopenharmony_ci * The PEBS hardware does not deal well with the situation 221662306a36Sopenharmony_ci * when events happen near to each other and multiple bits 221762306a36Sopenharmony_ci * are set. But it should happen rarely. 221862306a36Sopenharmony_ci * 221962306a36Sopenharmony_ci * If these events include one PEBS and multiple non-PEBS 222062306a36Sopenharmony_ci * events, it doesn't impact PEBS record. The record will 222162306a36Sopenharmony_ci * be handled normally. (slow path) 222262306a36Sopenharmony_ci * 222362306a36Sopenharmony_ci * If these events include two or more PEBS events, the 222462306a36Sopenharmony_ci * records for the events can be collapsed into a single 222562306a36Sopenharmony_ci * one, and it's not possible to reconstruct all events 222662306a36Sopenharmony_ci * that caused the PEBS record. It's called collision. 222762306a36Sopenharmony_ci * If collision happened, the record will be dropped. 222862306a36Sopenharmony_ci */ 222962306a36Sopenharmony_ci if (pebs_status != (1ULL << bit)) { 223062306a36Sopenharmony_ci for_each_set_bit(i, (unsigned long *)&pebs_status, size) 223162306a36Sopenharmony_ci error[i]++; 223262306a36Sopenharmony_ci continue; 223362306a36Sopenharmony_ci } 223462306a36Sopenharmony_ci 223562306a36Sopenharmony_ci counts[bit]++; 223662306a36Sopenharmony_ci } 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_ci for_each_set_bit(bit, (unsigned long *)&mask, size) { 223962306a36Sopenharmony_ci if ((counts[bit] == 0) && (error[bit] == 0)) 224062306a36Sopenharmony_ci continue; 224162306a36Sopenharmony_ci 224262306a36Sopenharmony_ci event = cpuc->events[bit]; 224362306a36Sopenharmony_ci if (WARN_ON_ONCE(!event)) 224462306a36Sopenharmony_ci continue; 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci if (WARN_ON_ONCE(!event->attr.precise_ip)) 224762306a36Sopenharmony_ci continue; 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci /* log dropped samples number */ 225062306a36Sopenharmony_ci if (error[bit]) { 225162306a36Sopenharmony_ci perf_log_lost_samples(event, error[bit]); 225262306a36Sopenharmony_ci 225362306a36Sopenharmony_ci if (iregs && perf_event_account_interrupt(event)) 225462306a36Sopenharmony_ci x86_pmu_stop(event, 0); 225562306a36Sopenharmony_ci } 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_ci if (counts[bit]) { 225862306a36Sopenharmony_ci __intel_pmu_pebs_event(event, iregs, data, base, 225962306a36Sopenharmony_ci top, bit, counts[bit], 226062306a36Sopenharmony_ci setup_pebs_fixed_sample_data); 226162306a36Sopenharmony_ci } 226262306a36Sopenharmony_ci } 226362306a36Sopenharmony_ci} 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_cistatic void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) 226662306a36Sopenharmony_ci{ 226762306a36Sopenharmony_ci short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; 226862306a36Sopenharmony_ci struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 226962306a36Sopenharmony_ci int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); 227062306a36Sopenharmony_ci int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); 227162306a36Sopenharmony_ci struct debug_store *ds = cpuc->ds; 227262306a36Sopenharmony_ci struct perf_event *event; 227362306a36Sopenharmony_ci void *base, *at, *top; 227462306a36Sopenharmony_ci int bit, size; 227562306a36Sopenharmony_ci u64 mask; 227662306a36Sopenharmony_ci 227762306a36Sopenharmony_ci if (!x86_pmu.pebs_active) 227862306a36Sopenharmony_ci return; 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base; 228162306a36Sopenharmony_ci top = (struct pebs_basic *)(unsigned long)ds->pebs_index; 228262306a36Sopenharmony_ci 228362306a36Sopenharmony_ci ds->pebs_index = ds->pebs_buffer_base; 228462306a36Sopenharmony_ci 228562306a36Sopenharmony_ci mask = ((1ULL << max_pebs_events) - 1) | 228662306a36Sopenharmony_ci (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); 228762306a36Sopenharmony_ci size = INTEL_PMC_IDX_FIXED + num_counters_fixed; 228862306a36Sopenharmony_ci 228962306a36Sopenharmony_ci if (unlikely(base >= top)) { 229062306a36Sopenharmony_ci intel_pmu_pebs_event_update_no_drain(cpuc, size); 229162306a36Sopenharmony_ci return; 229262306a36Sopenharmony_ci } 229362306a36Sopenharmony_ci 229462306a36Sopenharmony_ci for (at = base; at < top; at += cpuc->pebs_record_size) { 229562306a36Sopenharmony_ci u64 pebs_status; 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; 229862306a36Sopenharmony_ci pebs_status &= mask; 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci for_each_set_bit(bit, (unsigned long *)&pebs_status, size) 230162306a36Sopenharmony_ci counts[bit]++; 230262306a36Sopenharmony_ci } 230362306a36Sopenharmony_ci 230462306a36Sopenharmony_ci for_each_set_bit(bit, (unsigned long *)&mask, size) { 230562306a36Sopenharmony_ci if (counts[bit] == 0) 230662306a36Sopenharmony_ci continue; 230762306a36Sopenharmony_ci 230862306a36Sopenharmony_ci event = cpuc->events[bit]; 230962306a36Sopenharmony_ci if (WARN_ON_ONCE(!event)) 231062306a36Sopenharmony_ci continue; 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci if (WARN_ON_ONCE(!event->attr.precise_ip)) 231362306a36Sopenharmony_ci continue; 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci __intel_pmu_pebs_event(event, iregs, data, base, 231662306a36Sopenharmony_ci top, bit, counts[bit], 231762306a36Sopenharmony_ci setup_pebs_adaptive_sample_data); 231862306a36Sopenharmony_ci } 231962306a36Sopenharmony_ci} 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci/* 232262306a36Sopenharmony_ci * BTS, PEBS probe and setup 232362306a36Sopenharmony_ci */ 232462306a36Sopenharmony_ci 232562306a36Sopenharmony_civoid __init intel_ds_init(void) 232662306a36Sopenharmony_ci{ 232762306a36Sopenharmony_ci /* 232862306a36Sopenharmony_ci * No support for 32bit formats 232962306a36Sopenharmony_ci */ 233062306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_DTES64)) 233162306a36Sopenharmony_ci return; 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 233462306a36Sopenharmony_ci x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 233562306a36Sopenharmony_ci x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; 233662306a36Sopenharmony_ci if (x86_pmu.version <= 4) 233762306a36Sopenharmony_ci x86_pmu.pebs_no_isolation = 1; 233862306a36Sopenharmony_ci 233962306a36Sopenharmony_ci if (x86_pmu.pebs) { 234062306a36Sopenharmony_ci char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 234162306a36Sopenharmony_ci char *pebs_qual = ""; 234262306a36Sopenharmony_ci int format = x86_pmu.intel_cap.pebs_format; 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci if (format < 4) 234562306a36Sopenharmony_ci x86_pmu.intel_cap.pebs_baseline = 0; 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci switch (format) { 234862306a36Sopenharmony_ci case 0: 234962306a36Sopenharmony_ci pr_cont("PEBS fmt0%c, ", pebs_type); 235062306a36Sopenharmony_ci x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 235162306a36Sopenharmony_ci /* 235262306a36Sopenharmony_ci * Using >PAGE_SIZE buffers makes the WRMSR to 235362306a36Sopenharmony_ci * PERF_GLOBAL_CTRL in intel_pmu_enable_all() 235462306a36Sopenharmony_ci * mysteriously hang on Core2. 235562306a36Sopenharmony_ci * 235662306a36Sopenharmony_ci * As a workaround, we don't do this. 235762306a36Sopenharmony_ci */ 235862306a36Sopenharmony_ci x86_pmu.pebs_buffer_size = PAGE_SIZE; 235962306a36Sopenharmony_ci x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 236062306a36Sopenharmony_ci break; 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci case 1: 236362306a36Sopenharmony_ci pr_cont("PEBS fmt1%c, ", pebs_type); 236462306a36Sopenharmony_ci x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 236562306a36Sopenharmony_ci x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 236662306a36Sopenharmony_ci break; 236762306a36Sopenharmony_ci 236862306a36Sopenharmony_ci case 2: 236962306a36Sopenharmony_ci pr_cont("PEBS fmt2%c, ", pebs_type); 237062306a36Sopenharmony_ci x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 237162306a36Sopenharmony_ci x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 237262306a36Sopenharmony_ci break; 237362306a36Sopenharmony_ci 237462306a36Sopenharmony_ci case 3: 237562306a36Sopenharmony_ci pr_cont("PEBS fmt3%c, ", pebs_type); 237662306a36Sopenharmony_ci x86_pmu.pebs_record_size = 237762306a36Sopenharmony_ci sizeof(struct pebs_record_skl); 237862306a36Sopenharmony_ci x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 237962306a36Sopenharmony_ci x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; 238062306a36Sopenharmony_ci break; 238162306a36Sopenharmony_ci 238262306a36Sopenharmony_ci case 5: 238362306a36Sopenharmony_ci x86_pmu.pebs_ept = 1; 238462306a36Sopenharmony_ci fallthrough; 238562306a36Sopenharmony_ci case 4: 238662306a36Sopenharmony_ci x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; 238762306a36Sopenharmony_ci x86_pmu.pebs_record_size = sizeof(struct pebs_basic); 238862306a36Sopenharmony_ci if (x86_pmu.intel_cap.pebs_baseline) { 238962306a36Sopenharmony_ci x86_pmu.large_pebs_flags |= 239062306a36Sopenharmony_ci PERF_SAMPLE_BRANCH_STACK | 239162306a36Sopenharmony_ci PERF_SAMPLE_TIME; 239262306a36Sopenharmony_ci x86_pmu.flags |= PMU_FL_PEBS_ALL; 239362306a36Sopenharmony_ci x86_pmu.pebs_capable = ~0ULL; 239462306a36Sopenharmony_ci pebs_qual = "-baseline"; 239562306a36Sopenharmony_ci x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; 239662306a36Sopenharmony_ci } else { 239762306a36Sopenharmony_ci /* Only basic record supported */ 239862306a36Sopenharmony_ci x86_pmu.large_pebs_flags &= 239962306a36Sopenharmony_ci ~(PERF_SAMPLE_ADDR | 240062306a36Sopenharmony_ci PERF_SAMPLE_TIME | 240162306a36Sopenharmony_ci PERF_SAMPLE_DATA_SRC | 240262306a36Sopenharmony_ci PERF_SAMPLE_TRANSACTION | 240362306a36Sopenharmony_ci PERF_SAMPLE_REGS_USER | 240462306a36Sopenharmony_ci PERF_SAMPLE_REGS_INTR); 240562306a36Sopenharmony_ci } 240662306a36Sopenharmony_ci pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { 240962306a36Sopenharmony_ci pr_cont("PEBS-via-PT, "); 241062306a36Sopenharmony_ci x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; 241162306a36Sopenharmony_ci } 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci break; 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci default: 241662306a36Sopenharmony_ci pr_cont("no PEBS fmt%d%c, ", format, pebs_type); 241762306a36Sopenharmony_ci x86_pmu.pebs = 0; 241862306a36Sopenharmony_ci } 241962306a36Sopenharmony_ci } 242062306a36Sopenharmony_ci} 242162306a36Sopenharmony_ci 242262306a36Sopenharmony_civoid perf_restore_debug_store(void) 242362306a36Sopenharmony_ci{ 242462306a36Sopenharmony_ci struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 242562306a36Sopenharmony_ci 242662306a36Sopenharmony_ci if (!x86_pmu.bts && !x86_pmu.pebs) 242762306a36Sopenharmony_ci return; 242862306a36Sopenharmony_ci 242962306a36Sopenharmony_ci wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); 243062306a36Sopenharmony_ci} 2431