162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Implement support for AMD Fam19h Branch Sampling feature
462306a36Sopenharmony_ci * Based on specifications published in AMD PPR Fam19 Model 01
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright 2021 Google LLC
762306a36Sopenharmony_ci * Contributed by Stephane Eranian <eranian@google.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci#include <linux/kernel.h>
1062306a36Sopenharmony_ci#include <linux/jump_label.h>
1162306a36Sopenharmony_ci#include <asm/msr.h>
1262306a36Sopenharmony_ci#include <asm/cpufeature.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include "../perf_event.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#define BRS_POISON	0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci/* Debug Extension Configuration register layout */
1962306a36Sopenharmony_ciunion amd_debug_extn_cfg {
2062306a36Sopenharmony_ci	__u64 val;
2162306a36Sopenharmony_ci	struct {
2262306a36Sopenharmony_ci		__u64	rsvd0:2,  /* reserved */
2362306a36Sopenharmony_ci			brsmen:1, /* branch sample enable */
2462306a36Sopenharmony_ci			rsvd4_3:2,/* reserved - must be 0x3 */
2562306a36Sopenharmony_ci			vb:1,     /* valid branches recorded */
2662306a36Sopenharmony_ci			rsvd2:10, /* reserved */
2762306a36Sopenharmony_ci			msroff:4, /* index of next entry to write */
2862306a36Sopenharmony_ci			rsvd3:4,  /* reserved */
2962306a36Sopenharmony_ci			pmc:3,    /* #PMC holding the sampling event */
3062306a36Sopenharmony_ci			rsvd4:37; /* reserved */
3162306a36Sopenharmony_ci	};
3262306a36Sopenharmony_ci};
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic inline unsigned int brs_from(int idx)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	return MSR_AMD_SAMP_BR_FROM + 2 * idx;
3762306a36Sopenharmony_ci}
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cistatic inline unsigned int brs_to(int idx)
4062306a36Sopenharmony_ci{
4162306a36Sopenharmony_ci	return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
4262306a36Sopenharmony_ci}
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic __always_inline void set_debug_extn_cfg(u64 val)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	/* bits[4:3] must always be set to 11b */
4762306a36Sopenharmony_ci	__wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic __always_inline u64 get_debug_extn_cfg(void)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
5362306a36Sopenharmony_ci}
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic bool __init amd_brs_detect(void)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	if (!cpu_feature_enabled(X86_FEATURE_BRS))
5862306a36Sopenharmony_ci		return false;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	switch (boot_cpu_data.x86) {
6162306a36Sopenharmony_ci	case 0x19: /* AMD Fam19h (Zen3) */
6262306a36Sopenharmony_ci		x86_pmu.lbr_nr = 16;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci		/* No hardware filtering supported */
6562306a36Sopenharmony_ci		x86_pmu.lbr_sel_map = NULL;
6662306a36Sopenharmony_ci		x86_pmu.lbr_sel_mask = 0;
6762306a36Sopenharmony_ci		break;
6862306a36Sopenharmony_ci	default:
6962306a36Sopenharmony_ci		return false;
7062306a36Sopenharmony_ci	}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	return true;
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci/*
7662306a36Sopenharmony_ci * Current BRS implementation does not support branch type or privilege level
7762306a36Sopenharmony_ci * filtering. Therefore, this function simply enforces these limitations. No need for
7862306a36Sopenharmony_ci * a br_sel_map. Software filtering is not supported because it would not correlate well
7962306a36Sopenharmony_ci * with a sampling period.
8062306a36Sopenharmony_ci */
8162306a36Sopenharmony_cistatic int amd_brs_setup_filter(struct perf_event *event)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	u64 type = event->attr.branch_sample_type;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	/* No BRS support */
8662306a36Sopenharmony_ci	if (!x86_pmu.lbr_nr)
8762306a36Sopenharmony_ci		return -EOPNOTSUPP;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	/* Can only capture all branches, i.e., no filtering */
9062306a36Sopenharmony_ci	if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
9162306a36Sopenharmony_ci		return -EINVAL;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	return 0;
9462306a36Sopenharmony_ci}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistatic inline int amd_is_brs_event(struct perf_event *e)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ciint amd_brs_hw_config(struct perf_event *event)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	int ret = 0;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	/*
10662306a36Sopenharmony_ci	 * Due to interrupt holding, BRS is not recommended in
10762306a36Sopenharmony_ci	 * counting mode.
10862306a36Sopenharmony_ci	 */
10962306a36Sopenharmony_ci	if (!is_sampling_event(event))
11062306a36Sopenharmony_ci		return -EINVAL;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	/*
11362306a36Sopenharmony_ci	 * Due to the way BRS operates by holding the interrupt until
11462306a36Sopenharmony_ci	 * lbr_nr entries have been captured, it does not make sense
11562306a36Sopenharmony_ci	 * to allow sampling on BRS with an event that does not match
11662306a36Sopenharmony_ci	 * what BRS is capturing, i.e., retired taken branches.
11762306a36Sopenharmony_ci	 * Otherwise the correlation with the event's period is even
11862306a36Sopenharmony_ci	 * more loose:
11962306a36Sopenharmony_ci	 *
12062306a36Sopenharmony_ci	 * With retired taken branch:
12162306a36Sopenharmony_ci	 *   Effective P = P + 16 + X
12262306a36Sopenharmony_ci	 * With any other event:
12362306a36Sopenharmony_ci	 *   Effective P = P + Y + X
12462306a36Sopenharmony_ci	 *
12562306a36Sopenharmony_ci	 * Where X is the number of taken branches due to interrupt
12662306a36Sopenharmony_ci	 * skid. Skid is large.
12762306a36Sopenharmony_ci	 *
12862306a36Sopenharmony_ci	 * Where Y is the occurences of the event while BRS is
12962306a36Sopenharmony_ci	 * capturing the lbr_nr entries.
13062306a36Sopenharmony_ci	 *
13162306a36Sopenharmony_ci	 * By using retired taken branches, we limit the impact on the
13262306a36Sopenharmony_ci	 * Y variable. We know it cannot be more than the depth of
13362306a36Sopenharmony_ci	 * BRS.
13462306a36Sopenharmony_ci	 */
13562306a36Sopenharmony_ci	if (!amd_is_brs_event(event))
13662306a36Sopenharmony_ci		return -EINVAL;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	/*
13962306a36Sopenharmony_ci	 * BRS implementation does not work with frequency mode
14062306a36Sopenharmony_ci	 * reprogramming of the period.
14162306a36Sopenharmony_ci	 */
14262306a36Sopenharmony_ci	if (event->attr.freq)
14362306a36Sopenharmony_ci		return -EINVAL;
14462306a36Sopenharmony_ci	/*
14562306a36Sopenharmony_ci	 * The kernel subtracts BRS depth from period, so it must
14662306a36Sopenharmony_ci	 * be big enough.
14762306a36Sopenharmony_ci	 */
14862306a36Sopenharmony_ci	if (event->attr.sample_period <= x86_pmu.lbr_nr)
14962306a36Sopenharmony_ci		return -EINVAL;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	/*
15262306a36Sopenharmony_ci	 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
15362306a36Sopenharmony_ci	 */
15462306a36Sopenharmony_ci	ret = amd_brs_setup_filter(event);
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	/* only set in case of success */
15762306a36Sopenharmony_ci	if (!ret)
15862306a36Sopenharmony_ci		event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	return ret;
16162306a36Sopenharmony_ci}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci/* tos = top of stack, i.e., last valid entry written */
16462306a36Sopenharmony_cistatic inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
16562306a36Sopenharmony_ci{
16662306a36Sopenharmony_ci	/*
16762306a36Sopenharmony_ci	 * msroff: index of next entry to write so top-of-stack is one off
16862306a36Sopenharmony_ci	 * if BRS is full then msroff is set back to 0.
16962306a36Sopenharmony_ci	 */
17062306a36Sopenharmony_ci	return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci/*
17462306a36Sopenharmony_ci * make sure we have a sane BRS offset to begin with
17562306a36Sopenharmony_ci * especially with kexec
17662306a36Sopenharmony_ci */
17762306a36Sopenharmony_civoid amd_brs_reset(void)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	if (!cpu_feature_enabled(X86_FEATURE_BRS))
18062306a36Sopenharmony_ci		return;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	/*
18362306a36Sopenharmony_ci	 * Reset config
18462306a36Sopenharmony_ci	 */
18562306a36Sopenharmony_ci	set_debug_extn_cfg(0);
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	/*
18862306a36Sopenharmony_ci	 * Mark first entry as poisoned
18962306a36Sopenharmony_ci	 */
19062306a36Sopenharmony_ci	wrmsrl(brs_to(0), BRS_POISON);
19162306a36Sopenharmony_ci}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ciint __init amd_brs_init(void)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	if (!amd_brs_detect())
19662306a36Sopenharmony_ci		return -EOPNOTSUPP;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	return 0;
20162306a36Sopenharmony_ci}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_civoid amd_brs_enable(void)
20462306a36Sopenharmony_ci{
20562306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
20662306a36Sopenharmony_ci	union amd_debug_extn_cfg cfg;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	/* Activate only on first user */
20962306a36Sopenharmony_ci	if (++cpuc->brs_active > 1)
21062306a36Sopenharmony_ci		return;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	cfg.val    = 0; /* reset all fields */
21362306a36Sopenharmony_ci	cfg.brsmen = 1; /* enable branch sampling */
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	/* Set enable bit */
21662306a36Sopenharmony_ci	set_debug_extn_cfg(cfg.val);
21762306a36Sopenharmony_ci}
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_civoid amd_brs_enable_all(void)
22062306a36Sopenharmony_ci{
22162306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
22262306a36Sopenharmony_ci	if (cpuc->lbr_users)
22362306a36Sopenharmony_ci		amd_brs_enable();
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_civoid amd_brs_disable(void)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
22962306a36Sopenharmony_ci	union amd_debug_extn_cfg cfg;
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	/* Check if active (could be disabled via x86_pmu_disable_all()) */
23262306a36Sopenharmony_ci	if (!cpuc->brs_active)
23362306a36Sopenharmony_ci		return;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	/* Only disable for last user */
23662306a36Sopenharmony_ci	if (--cpuc->brs_active)
23762306a36Sopenharmony_ci		return;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	/*
24062306a36Sopenharmony_ci	 * Clear the brsmen bit but preserve the others as they contain
24162306a36Sopenharmony_ci	 * useful state such as vb and msroff
24262306a36Sopenharmony_ci	 */
24362306a36Sopenharmony_ci	cfg.val = get_debug_extn_cfg();
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	/*
24662306a36Sopenharmony_ci	 * When coming in on interrupt and BRS is full, then hw will have
24762306a36Sopenharmony_ci	 * already stopped BRS, no need to issue wrmsr again
24862306a36Sopenharmony_ci	 */
24962306a36Sopenharmony_ci	if (cfg.brsmen) {
25062306a36Sopenharmony_ci		cfg.brsmen = 0;
25162306a36Sopenharmony_ci		set_debug_extn_cfg(cfg.val);
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_civoid amd_brs_disable_all(void)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
25862306a36Sopenharmony_ci	if (cpuc->lbr_users)
25962306a36Sopenharmony_ci		amd_brs_disable();
26062306a36Sopenharmony_ci}
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_cistatic bool amd_brs_match_plm(struct perf_event *event, u64 to)
26362306a36Sopenharmony_ci{
26462306a36Sopenharmony_ci	int type = event->attr.branch_sample_type;
26562306a36Sopenharmony_ci	int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
26662306a36Sopenharmony_ci	int plm_u = PERF_SAMPLE_BRANCH_USER;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	if (!(type & plm_k) && kernel_ip(to))
26962306a36Sopenharmony_ci		return 0;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	if (!(type & plm_u) && !kernel_ip(to))
27262306a36Sopenharmony_ci		return 0;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	return 1;
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci/*
27862306a36Sopenharmony_ci * Caller must ensure amd_brs_inuse() is true before calling
27962306a36Sopenharmony_ci * return:
28062306a36Sopenharmony_ci */
28162306a36Sopenharmony_civoid amd_brs_drain(void)
28262306a36Sopenharmony_ci{
28362306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
28462306a36Sopenharmony_ci	struct perf_event *event = cpuc->events[0];
28562306a36Sopenharmony_ci	struct perf_branch_entry *br = cpuc->lbr_entries;
28662306a36Sopenharmony_ci	union amd_debug_extn_cfg cfg;
28762306a36Sopenharmony_ci	u32 i, nr = 0, num, tos, start;
28862306a36Sopenharmony_ci	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	/*
29162306a36Sopenharmony_ci	 * BRS event forced on PMC0,
29262306a36Sopenharmony_ci	 * so check if there is an event.
29362306a36Sopenharmony_ci	 * It is possible to have lbr_users > 0 but the event
29462306a36Sopenharmony_ci	 * not yet scheduled due to long latency PMU irq
29562306a36Sopenharmony_ci	 */
29662306a36Sopenharmony_ci	if (!event)
29762306a36Sopenharmony_ci		goto empty;
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	cfg.val = get_debug_extn_cfg();
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	/* Sanity check [0-x86_pmu.lbr_nr] */
30262306a36Sopenharmony_ci	if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
30362306a36Sopenharmony_ci		goto empty;
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	/* No valid branch */
30662306a36Sopenharmony_ci	if (cfg.vb == 0)
30762306a36Sopenharmony_ci		goto empty;
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	/*
31062306a36Sopenharmony_ci	 * msr.off points to next entry to be written
31162306a36Sopenharmony_ci	 * tos = most recent entry index = msr.off - 1
31262306a36Sopenharmony_ci	 * BRS register buffer saturates, so we know we have
31362306a36Sopenharmony_ci	 * start < tos and that we have to read from start to tos
31462306a36Sopenharmony_ci	 */
31562306a36Sopenharmony_ci	start = 0;
31662306a36Sopenharmony_ci	tos = amd_brs_get_tos(&cfg);
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	num = tos - start + 1;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	/*
32162306a36Sopenharmony_ci	 * BRS is only one pass (saturation) from MSROFF to depth-1
32262306a36Sopenharmony_ci	 * MSROFF wraps to zero when buffer is full
32362306a36Sopenharmony_ci	 */
32462306a36Sopenharmony_ci	for (i = 0; i < num; i++) {
32562306a36Sopenharmony_ci		u32 brs_idx = tos - i;
32662306a36Sopenharmony_ci		u64 from, to;
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci		rdmsrl(brs_to(brs_idx), to);
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci		/* Entry does not belong to us (as marked by kernel) */
33162306a36Sopenharmony_ci		if (to == BRS_POISON)
33262306a36Sopenharmony_ci			break;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci		/*
33562306a36Sopenharmony_ci		 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
33662306a36Sopenharmony_ci		 * Necessary to generate proper virtual addresses suitable for
33762306a36Sopenharmony_ci		 * symbolization
33862306a36Sopenharmony_ci		 */
33962306a36Sopenharmony_ci		to = (u64)(((s64)to << shift) >> shift);
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci		if (!amd_brs_match_plm(event, to))
34262306a36Sopenharmony_ci			continue;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci		rdmsrl(brs_from(brs_idx), from);
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci		perf_clear_branch_entry_bitfields(br+nr);
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci		br[nr].from = from;
34962306a36Sopenharmony_ci		br[nr].to   = to;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci		nr++;
35262306a36Sopenharmony_ci	}
35362306a36Sopenharmony_ciempty:
35462306a36Sopenharmony_ci	/* Record number of sampled branches */
35562306a36Sopenharmony_ci	cpuc->lbr_stack.nr = nr;
35662306a36Sopenharmony_ci}
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci/*
35962306a36Sopenharmony_ci * Poison most recent entry to prevent reuse by next task
36062306a36Sopenharmony_ci * required because BRS entry are not tagged by PID
36162306a36Sopenharmony_ci */
36262306a36Sopenharmony_cistatic void amd_brs_poison_buffer(void)
36362306a36Sopenharmony_ci{
36462306a36Sopenharmony_ci	union amd_debug_extn_cfg cfg;
36562306a36Sopenharmony_ci	unsigned int idx;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	/* Get current state */
36862306a36Sopenharmony_ci	cfg.val = get_debug_extn_cfg();
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	/* idx is most recently written entry */
37162306a36Sopenharmony_ci	idx = amd_brs_get_tos(&cfg);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	/* Poison target of entry */
37462306a36Sopenharmony_ci	wrmsrl(brs_to(idx), BRS_POISON);
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci/*
37862306a36Sopenharmony_ci * On context switch in, we need to make sure no samples from previous user
37962306a36Sopenharmony_ci * are left in the BRS.
38062306a36Sopenharmony_ci *
38162306a36Sopenharmony_ci * On ctxswin, sched_in = true, called after the PMU has started
38262306a36Sopenharmony_ci * On ctxswout, sched_in = false, called before the PMU is stopped
38362306a36Sopenharmony_ci */
38462306a36Sopenharmony_civoid amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
38562306a36Sopenharmony_ci{
38662306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	/* no active users */
38962306a36Sopenharmony_ci	if (!cpuc->lbr_users)
39062306a36Sopenharmony_ci		return;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	/*
39362306a36Sopenharmony_ci	 * On context switch in, we need to ensure we do not use entries
39462306a36Sopenharmony_ci	 * from previous BRS user on that CPU, so we poison the buffer as
39562306a36Sopenharmony_ci	 * a faster way compared to resetting all entries.
39662306a36Sopenharmony_ci	 */
39762306a36Sopenharmony_ci	if (sched_in)
39862306a36Sopenharmony_ci		amd_brs_poison_buffer();
39962306a36Sopenharmony_ci}
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci/*
40262306a36Sopenharmony_ci * called from ACPI processor_idle.c or acpi_pad.c
40362306a36Sopenharmony_ci * with interrupts disabled
40462306a36Sopenharmony_ci */
40562306a36Sopenharmony_civoid noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
40862306a36Sopenharmony_ci	union amd_debug_extn_cfg cfg;
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	/*
41162306a36Sopenharmony_ci	 * on mwait in, we may end up in non C0 state.
41262306a36Sopenharmony_ci	 * we must disable branch sampling to avoid holding the NMI
41362306a36Sopenharmony_ci	 * for too long. We disable it in hardware but we
41462306a36Sopenharmony_ci	 * keep the state in cpuc, so we can re-enable.
41562306a36Sopenharmony_ci	 *
41662306a36Sopenharmony_ci	 * The hardware will deliver the NMI if needed when brsmen cleared
41762306a36Sopenharmony_ci	 */
41862306a36Sopenharmony_ci	if (cpuc->brs_active) {
41962306a36Sopenharmony_ci		cfg.val = get_debug_extn_cfg();
42062306a36Sopenharmony_ci		cfg.brsmen = !lopwr_in;
42162306a36Sopenharmony_ci		set_debug_extn_cfg(cfg.val);
42262306a36Sopenharmony_ci	}
42362306a36Sopenharmony_ci}
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ciDEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
42662306a36Sopenharmony_ciEXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_civoid __init amd_brs_lopwr_init(void)
42962306a36Sopenharmony_ci{
43062306a36Sopenharmony_ci	static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
43162306a36Sopenharmony_ci}
432