162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Intel Performance and Energy Bias Hint support.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2019 Intel Corporation
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Author:
862306a36Sopenharmony_ci *	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/cpuhotplug.h>
1262306a36Sopenharmony_ci#include <linux/cpu.h>
1362306a36Sopenharmony_ci#include <linux/device.h>
1462306a36Sopenharmony_ci#include <linux/kernel.h>
1562306a36Sopenharmony_ci#include <linux/string.h>
1662306a36Sopenharmony_ci#include <linux/syscore_ops.h>
1762306a36Sopenharmony_ci#include <linux/pm.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include <asm/cpu_device_id.h>
2062306a36Sopenharmony_ci#include <asm/cpufeature.h>
2162306a36Sopenharmony_ci#include <asm/msr.h>
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci/**
2462306a36Sopenharmony_ci * DOC: overview
2562306a36Sopenharmony_ci *
2662306a36Sopenharmony_ci * The Performance and Energy Bias Hint (EPB) allows software to specify its
2762306a36Sopenharmony_ci * preference with respect to the power-performance tradeoffs present in the
2862306a36Sopenharmony_ci * processor.  Generally, the EPB is expected to be set by user space (directly
2962306a36Sopenharmony_ci * via sysfs or with the help of the x86_energy_perf_policy tool), but there are
3062306a36Sopenharmony_ci * two reasons for the kernel to update it.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * First, there are systems where the platform firmware resets the EPB during
3362306a36Sopenharmony_ci * system-wide transitions from sleep states back into the working state
3462306a36Sopenharmony_ci * effectively causing the previous EPB updates by user space to be lost.
3562306a36Sopenharmony_ci * Thus the kernel needs to save the current EPB values for all CPUs during
3662306a36Sopenharmony_ci * system-wide transitions to sleep states and restore them on the way back to
3762306a36Sopenharmony_ci * the working state.  That can be achieved by saving EPB for secondary CPUs
3862306a36Sopenharmony_ci * when they are taken offline during transitions into system sleep states and
3962306a36Sopenharmony_ci * for the boot CPU in a syscore suspend operation, so that it can be restored
4062306a36Sopenharmony_ci * for the boot CPU in a syscore resume operation and for the other CPUs when
4162306a36Sopenharmony_ci * they are brought back online.  However, CPUs that are already offline when
4262306a36Sopenharmony_ci * a system-wide PM transition is started are not taken offline again, but their
4362306a36Sopenharmony_ci * EPB values may still be reset by the platform firmware during the transition,
4462306a36Sopenharmony_ci * so in fact it is necessary to save the EPB of any CPU taken offline and to
4562306a36Sopenharmony_ci * restore it when the given CPU goes back online at all times.
4662306a36Sopenharmony_ci *
4762306a36Sopenharmony_ci * Second, on many systems the initial EPB value coming from the platform
4862306a36Sopenharmony_ci * firmware is 0 ('performance') and at least on some of them that is because
4962306a36Sopenharmony_ci * the platform firmware does not initialize EPB at all with the assumption that
5062306a36Sopenharmony_ci * the OS will do that anyway.  That sometimes is problematic, as it may cause
5162306a36Sopenharmony_ci * the system battery to drain too fast, for example, so it is better to adjust
5262306a36Sopenharmony_ci * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
5362306a36Sopenharmony_ci * kernel changes it to 6 ('normal').
5462306a36Sopenharmony_ci */
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cistatic DEFINE_PER_CPU(u8, saved_epb);
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci#define EPB_MASK	0x0fULL
5962306a36Sopenharmony_ci#define EPB_SAVED	0x10ULL
6062306a36Sopenharmony_ci#define MAX_EPB		EPB_MASK
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cienum energy_perf_value_index {
6362306a36Sopenharmony_ci	EPB_INDEX_PERFORMANCE,
6462306a36Sopenharmony_ci	EPB_INDEX_BALANCE_PERFORMANCE,
6562306a36Sopenharmony_ci	EPB_INDEX_NORMAL,
6662306a36Sopenharmony_ci	EPB_INDEX_BALANCE_POWERSAVE,
6762306a36Sopenharmony_ci	EPB_INDEX_POWERSAVE,
6862306a36Sopenharmony_ci};
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic u8 energ_perf_values[] = {
7162306a36Sopenharmony_ci	[EPB_INDEX_PERFORMANCE] = ENERGY_PERF_BIAS_PERFORMANCE,
7262306a36Sopenharmony_ci	[EPB_INDEX_BALANCE_PERFORMANCE] = ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
7362306a36Sopenharmony_ci	[EPB_INDEX_NORMAL] = ENERGY_PERF_BIAS_NORMAL,
7462306a36Sopenharmony_ci	[EPB_INDEX_BALANCE_POWERSAVE] = ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
7562306a36Sopenharmony_ci	[EPB_INDEX_POWERSAVE] = ENERGY_PERF_BIAS_POWERSAVE,
7662306a36Sopenharmony_ci};
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_cistatic int intel_epb_save(void)
7962306a36Sopenharmony_ci{
8062306a36Sopenharmony_ci	u64 epb;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
8362306a36Sopenharmony_ci	/*
8462306a36Sopenharmony_ci	 * Ensure that saved_epb will always be nonzero after this write even if
8562306a36Sopenharmony_ci	 * the EPB value read from the MSR is 0.
8662306a36Sopenharmony_ci	 */
8762306a36Sopenharmony_ci	this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	return 0;
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistatic void intel_epb_restore(void)
9362306a36Sopenharmony_ci{
9462306a36Sopenharmony_ci	u64 val = this_cpu_read(saved_epb);
9562306a36Sopenharmony_ci	u64 epb;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
9862306a36Sopenharmony_ci	if (val) {
9962306a36Sopenharmony_ci		val &= EPB_MASK;
10062306a36Sopenharmony_ci	} else {
10162306a36Sopenharmony_ci		/*
10262306a36Sopenharmony_ci		 * Because intel_epb_save() has not run for the current CPU yet,
10362306a36Sopenharmony_ci		 * it is going online for the first time, so if its EPB value is
10462306a36Sopenharmony_ci		 * 0 ('performance') at this point, assume that it has not been
10562306a36Sopenharmony_ci		 * initialized by the platform firmware and set it to 6
10662306a36Sopenharmony_ci		 * ('normal').
10762306a36Sopenharmony_ci		 */
10862306a36Sopenharmony_ci		val = epb & EPB_MASK;
10962306a36Sopenharmony_ci		if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
11062306a36Sopenharmony_ci			val = energ_perf_values[EPB_INDEX_NORMAL];
11162306a36Sopenharmony_ci			pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
11262306a36Sopenharmony_ci		}
11362306a36Sopenharmony_ci	}
11462306a36Sopenharmony_ci	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic struct syscore_ops intel_epb_syscore_ops = {
11862306a36Sopenharmony_ci	.suspend = intel_epb_save,
11962306a36Sopenharmony_ci	.resume = intel_epb_restore,
12062306a36Sopenharmony_ci};
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic const char * const energy_perf_strings[] = {
12362306a36Sopenharmony_ci	[EPB_INDEX_PERFORMANCE] = "performance",
12462306a36Sopenharmony_ci	[EPB_INDEX_BALANCE_PERFORMANCE] = "balance-performance",
12562306a36Sopenharmony_ci	[EPB_INDEX_NORMAL] = "normal",
12662306a36Sopenharmony_ci	[EPB_INDEX_BALANCE_POWERSAVE] = "balance-power",
12762306a36Sopenharmony_ci	[EPB_INDEX_POWERSAVE] = "power",
12862306a36Sopenharmony_ci};
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_cistatic ssize_t energy_perf_bias_show(struct device *dev,
13162306a36Sopenharmony_ci				     struct device_attribute *attr,
13262306a36Sopenharmony_ci				     char *buf)
13362306a36Sopenharmony_ci{
13462306a36Sopenharmony_ci	unsigned int cpu = dev->id;
13562306a36Sopenharmony_ci	u64 epb;
13662306a36Sopenharmony_ci	int ret;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
13962306a36Sopenharmony_ci	if (ret < 0)
14062306a36Sopenharmony_ci		return ret;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	return sprintf(buf, "%llu\n", epb);
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_cistatic ssize_t energy_perf_bias_store(struct device *dev,
14662306a36Sopenharmony_ci				      struct device_attribute *attr,
14762306a36Sopenharmony_ci				      const char *buf, size_t count)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	unsigned int cpu = dev->id;
15062306a36Sopenharmony_ci	u64 epb, val;
15162306a36Sopenharmony_ci	int ret;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	ret = __sysfs_match_string(energy_perf_strings,
15462306a36Sopenharmony_ci				   ARRAY_SIZE(energy_perf_strings), buf);
15562306a36Sopenharmony_ci	if (ret >= 0)
15662306a36Sopenharmony_ci		val = energ_perf_values[ret];
15762306a36Sopenharmony_ci	else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
15862306a36Sopenharmony_ci		return -EINVAL;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
16162306a36Sopenharmony_ci	if (ret < 0)
16262306a36Sopenharmony_ci		return ret;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
16562306a36Sopenharmony_ci			    (epb & ~EPB_MASK) | val);
16662306a36Sopenharmony_ci	if (ret < 0)
16762306a36Sopenharmony_ci		return ret;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	return count;
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_cistatic DEVICE_ATTR_RW(energy_perf_bias);
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cistatic struct attribute *intel_epb_attrs[] = {
17562306a36Sopenharmony_ci	&dev_attr_energy_perf_bias.attr,
17662306a36Sopenharmony_ci	NULL
17762306a36Sopenharmony_ci};
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cistatic const struct attribute_group intel_epb_attr_group = {
18062306a36Sopenharmony_ci	.name = power_group_name,
18162306a36Sopenharmony_ci	.attrs =  intel_epb_attrs
18262306a36Sopenharmony_ci};
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic int intel_epb_online(unsigned int cpu)
18562306a36Sopenharmony_ci{
18662306a36Sopenharmony_ci	struct device *cpu_dev = get_cpu_device(cpu);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	intel_epb_restore();
18962306a36Sopenharmony_ci	if (!cpuhp_tasks_frozen)
19062306a36Sopenharmony_ci		sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	return 0;
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_cistatic int intel_epb_offline(unsigned int cpu)
19662306a36Sopenharmony_ci{
19762306a36Sopenharmony_ci	struct device *cpu_dev = get_cpu_device(cpu);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	if (!cpuhp_tasks_frozen)
20062306a36Sopenharmony_ci		sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	intel_epb_save();
20362306a36Sopenharmony_ci	return 0;
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_cistatic const struct x86_cpu_id intel_epb_normal[] = {
20762306a36Sopenharmony_ci	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,
20862306a36Sopenharmony_ci				   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
20962306a36Sopenharmony_ci	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,
21062306a36Sopenharmony_ci				   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
21162306a36Sopenharmony_ci	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,
21262306a36Sopenharmony_ci				   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
21362306a36Sopenharmony_ci	{}
21462306a36Sopenharmony_ci};
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_cistatic __init int intel_epb_init(void)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	const struct x86_cpu_id *id = x86_match_cpu(intel_epb_normal);
21962306a36Sopenharmony_ci	int ret;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (!boot_cpu_has(X86_FEATURE_EPB))
22262306a36Sopenharmony_ci		return -ENODEV;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	if (id)
22562306a36Sopenharmony_ci		energ_perf_values[EPB_INDEX_NORMAL] = id->driver_data;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
22862306a36Sopenharmony_ci				"x86/intel/epb:online", intel_epb_online,
22962306a36Sopenharmony_ci				intel_epb_offline);
23062306a36Sopenharmony_ci	if (ret < 0)
23162306a36Sopenharmony_ci		goto err_out_online;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	register_syscore_ops(&intel_epb_syscore_ops);
23462306a36Sopenharmony_ci	return 0;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_cierr_out_online:
23762306a36Sopenharmony_ci	cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
23862306a36Sopenharmony_ci	return ret;
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_cisubsys_initcall(intel_epb_init);
241