18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * x86_pkg_temp_thermal driver
48c2ecf20Sopenharmony_ci * Copyright (c) 2013, Intel Corporation.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/module.h>
98c2ecf20Sopenharmony_ci#include <linux/init.h>
108c2ecf20Sopenharmony_ci#include <linux/err.h>
118c2ecf20Sopenharmony_ci#include <linux/param.h>
128c2ecf20Sopenharmony_ci#include <linux/device.h>
138c2ecf20Sopenharmony_ci#include <linux/platform_device.h>
148c2ecf20Sopenharmony_ci#include <linux/cpu.h>
158c2ecf20Sopenharmony_ci#include <linux/smp.h>
168c2ecf20Sopenharmony_ci#include <linux/slab.h>
178c2ecf20Sopenharmony_ci#include <linux/pm.h>
188c2ecf20Sopenharmony_ci#include <linux/thermal.h>
198c2ecf20Sopenharmony_ci#include <linux/debugfs.h>
208c2ecf20Sopenharmony_ci#include <asm/cpu_device_id.h>
218c2ecf20Sopenharmony_ci#include <asm/mce.h>
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci/*
248c2ecf20Sopenharmony_ci* Rate control delay: Idea is to introduce denounce effect
258c2ecf20Sopenharmony_ci* This should be long enough to avoid reduce events, when
268c2ecf20Sopenharmony_ci* threshold is set to a temperature, which is constantly
278c2ecf20Sopenharmony_ci* violated, but at the short enough to take any action.
288c2ecf20Sopenharmony_ci* The action can be remove threshold or change it to next
298c2ecf20Sopenharmony_ci* interesting setting. Based on experiments, in around
308c2ecf20Sopenharmony_ci* every 5 seconds under load will give us a significant
318c2ecf20Sopenharmony_ci* temperature change.
328c2ecf20Sopenharmony_ci*/
338c2ecf20Sopenharmony_ci#define PKG_TEMP_THERMAL_NOTIFY_DELAY	5000
348c2ecf20Sopenharmony_cistatic int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
358c2ecf20Sopenharmony_cimodule_param(notify_delay_ms, int, 0644);
368c2ecf20Sopenharmony_ciMODULE_PARM_DESC(notify_delay_ms,
378c2ecf20Sopenharmony_ci	"User space notification delay in milli seconds.");
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci/* Number of trip points in thermal zone. Currently it can't
408c2ecf20Sopenharmony_ci* be more than 2. MSR can allow setting and getting notifications
418c2ecf20Sopenharmony_ci* for only 2 thresholds. This define enforces this, if there
428c2ecf20Sopenharmony_ci* is some wrong values returned by cpuid for number of thresholds.
438c2ecf20Sopenharmony_ci*/
448c2ecf20Sopenharmony_ci#define MAX_NUMBER_OF_TRIPS	2
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistruct zone_device {
478c2ecf20Sopenharmony_ci	int				cpu;
488c2ecf20Sopenharmony_ci	bool				work_scheduled;
498c2ecf20Sopenharmony_ci	u32				tj_max;
508c2ecf20Sopenharmony_ci	u32				msr_pkg_therm_low;
518c2ecf20Sopenharmony_ci	u32				msr_pkg_therm_high;
528c2ecf20Sopenharmony_ci	struct delayed_work		work;
538c2ecf20Sopenharmony_ci	struct thermal_zone_device	*tzone;
548c2ecf20Sopenharmony_ci	struct cpumask			cpumask;
558c2ecf20Sopenharmony_ci};
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic struct thermal_zone_params pkg_temp_tz_params = {
588c2ecf20Sopenharmony_ci	.no_hwmon	= true,
598c2ecf20Sopenharmony_ci};
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci/* Keep track of how many zone pointers we allocated in init() */
628c2ecf20Sopenharmony_cistatic int max_id __read_mostly;
638c2ecf20Sopenharmony_ci/* Array of zone pointers */
648c2ecf20Sopenharmony_cistatic struct zone_device **zones;
658c2ecf20Sopenharmony_ci/* Serializes interrupt notification, work and hotplug */
668c2ecf20Sopenharmony_cistatic DEFINE_RAW_SPINLOCK(pkg_temp_lock);
678c2ecf20Sopenharmony_ci/* Protects zone operation in the work function against hotplug removal */
688c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(thermal_zone_mutex);
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci/* The dynamically assigned cpu hotplug state for module_exit() */
718c2ecf20Sopenharmony_cistatic enum cpuhp_state pkg_thermal_hp_state __read_mostly;
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci/* Debug counters to show using debugfs */
748c2ecf20Sopenharmony_cistatic struct dentry *debugfs;
758c2ecf20Sopenharmony_cistatic unsigned int pkg_interrupt_cnt;
768c2ecf20Sopenharmony_cistatic unsigned int pkg_work_cnt;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_cistatic void pkg_temp_debugfs_init(void)
798c2ecf20Sopenharmony_ci{
808c2ecf20Sopenharmony_ci	debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
838c2ecf20Sopenharmony_ci			   &pkg_interrupt_cnt);
848c2ecf20Sopenharmony_ci	debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
858c2ecf20Sopenharmony_ci			   &pkg_work_cnt);
868c2ecf20Sopenharmony_ci}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/*
898c2ecf20Sopenharmony_ci * Protection:
908c2ecf20Sopenharmony_ci *
918c2ecf20Sopenharmony_ci * - cpu hotplug: Read serialized by cpu hotplug lock
928c2ecf20Sopenharmony_ci *		  Write must hold pkg_temp_lock
938c2ecf20Sopenharmony_ci *
948c2ecf20Sopenharmony_ci * - Other callsites: Must hold pkg_temp_lock
958c2ecf20Sopenharmony_ci */
968c2ecf20Sopenharmony_cistatic struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
978c2ecf20Sopenharmony_ci{
988c2ecf20Sopenharmony_ci	int id = topology_logical_die_id(cpu);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	if (id >= 0 && id < max_id)
1018c2ecf20Sopenharmony_ci		return zones[id];
1028c2ecf20Sopenharmony_ci	return NULL;
1038c2ecf20Sopenharmony_ci}
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci/*
1068c2ecf20Sopenharmony_ci* tj-max is is interesting because threshold is set relative to this
1078c2ecf20Sopenharmony_ci* temperature.
1088c2ecf20Sopenharmony_ci*/
1098c2ecf20Sopenharmony_cistatic int get_tj_max(int cpu, u32 *tj_max)
1108c2ecf20Sopenharmony_ci{
1118c2ecf20Sopenharmony_ci	u32 eax, edx, val;
1128c2ecf20Sopenharmony_ci	int err;
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
1158c2ecf20Sopenharmony_ci	if (err)
1168c2ecf20Sopenharmony_ci		return err;
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	val = (eax >> 16) & 0xff;
1198c2ecf20Sopenharmony_ci	*tj_max = val * 1000;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	return val ? 0 : -EINVAL;
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_cistatic int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
1258c2ecf20Sopenharmony_ci{
1268c2ecf20Sopenharmony_ci	struct zone_device *zonedev = tzd->devdata;
1278c2ecf20Sopenharmony_ci	u32 eax, edx;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_STATUS,
1308c2ecf20Sopenharmony_ci			&eax, &edx);
1318c2ecf20Sopenharmony_ci	if (eax & 0x80000000) {
1328c2ecf20Sopenharmony_ci		*temp = zonedev->tj_max - ((eax >> 16) & 0x7f) * 1000;
1338c2ecf20Sopenharmony_ci		pr_debug("sys_get_curr_temp %d\n", *temp);
1348c2ecf20Sopenharmony_ci		return 0;
1358c2ecf20Sopenharmony_ci	}
1368c2ecf20Sopenharmony_ci	return -EINVAL;
1378c2ecf20Sopenharmony_ci}
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_cistatic int sys_get_trip_temp(struct thermal_zone_device *tzd,
1408c2ecf20Sopenharmony_ci			     int trip, int *temp)
1418c2ecf20Sopenharmony_ci{
1428c2ecf20Sopenharmony_ci	struct zone_device *zonedev = tzd->devdata;
1438c2ecf20Sopenharmony_ci	unsigned long thres_reg_value;
1448c2ecf20Sopenharmony_ci	u32 mask, shift, eax, edx;
1458c2ecf20Sopenharmony_ci	int ret;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	if (trip >= MAX_NUMBER_OF_TRIPS)
1488c2ecf20Sopenharmony_ci		return -EINVAL;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	if (trip) {
1518c2ecf20Sopenharmony_ci		mask = THERM_MASK_THRESHOLD1;
1528c2ecf20Sopenharmony_ci		shift = THERM_SHIFT_THRESHOLD1;
1538c2ecf20Sopenharmony_ci	} else {
1548c2ecf20Sopenharmony_ci		mask = THERM_MASK_THRESHOLD0;
1558c2ecf20Sopenharmony_ci		shift = THERM_SHIFT_THRESHOLD0;
1568c2ecf20Sopenharmony_ci	}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
1598c2ecf20Sopenharmony_ci			   &eax, &edx);
1608c2ecf20Sopenharmony_ci	if (ret < 0)
1618c2ecf20Sopenharmony_ci		return ret;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	thres_reg_value = (eax & mask) >> shift;
1648c2ecf20Sopenharmony_ci	if (thres_reg_value)
1658c2ecf20Sopenharmony_ci		*temp = zonedev->tj_max - thres_reg_value * 1000;
1668c2ecf20Sopenharmony_ci	else
1678c2ecf20Sopenharmony_ci		*temp = THERMAL_TEMP_INVALID;
1688c2ecf20Sopenharmony_ci	pr_debug("sys_get_trip_temp %d\n", *temp);
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	return 0;
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_cistatic int
1748c2ecf20Sopenharmony_cisys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	struct zone_device *zonedev = tzd->devdata;
1778c2ecf20Sopenharmony_ci	u32 l, h, mask, shift, intr;
1788c2ecf20Sopenharmony_ci	int ret;
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	if (trip >= MAX_NUMBER_OF_TRIPS || temp >= zonedev->tj_max)
1818c2ecf20Sopenharmony_ci		return -EINVAL;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
1848c2ecf20Sopenharmony_ci			   &l, &h);
1858c2ecf20Sopenharmony_ci	if (ret < 0)
1868c2ecf20Sopenharmony_ci		return ret;
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	if (trip) {
1898c2ecf20Sopenharmony_ci		mask = THERM_MASK_THRESHOLD1;
1908c2ecf20Sopenharmony_ci		shift = THERM_SHIFT_THRESHOLD1;
1918c2ecf20Sopenharmony_ci		intr = THERM_INT_THRESHOLD1_ENABLE;
1928c2ecf20Sopenharmony_ci	} else {
1938c2ecf20Sopenharmony_ci		mask = THERM_MASK_THRESHOLD0;
1948c2ecf20Sopenharmony_ci		shift = THERM_SHIFT_THRESHOLD0;
1958c2ecf20Sopenharmony_ci		intr = THERM_INT_THRESHOLD0_ENABLE;
1968c2ecf20Sopenharmony_ci	}
1978c2ecf20Sopenharmony_ci	l &= ~mask;
1988c2ecf20Sopenharmony_ci	/*
1998c2ecf20Sopenharmony_ci	* When users space sets a trip temperature == 0, which is indication
2008c2ecf20Sopenharmony_ci	* that, it is no longer interested in receiving notifications.
2018c2ecf20Sopenharmony_ci	*/
2028c2ecf20Sopenharmony_ci	if (!temp) {
2038c2ecf20Sopenharmony_ci		l &= ~intr;
2048c2ecf20Sopenharmony_ci	} else {
2058c2ecf20Sopenharmony_ci		l |= (zonedev->tj_max - temp)/1000 << shift;
2068c2ecf20Sopenharmony_ci		l |= intr;
2078c2ecf20Sopenharmony_ci	}
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
2108c2ecf20Sopenharmony_ci			l, h);
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistatic int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
2148c2ecf20Sopenharmony_ci			     enum thermal_trip_type *type)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	*type = THERMAL_TRIP_PASSIVE;
2178c2ecf20Sopenharmony_ci	return 0;
2188c2ecf20Sopenharmony_ci}
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci/* Thermal zone callback registry */
2218c2ecf20Sopenharmony_cistatic struct thermal_zone_device_ops tzone_ops = {
2228c2ecf20Sopenharmony_ci	.get_temp = sys_get_curr_temp,
2238c2ecf20Sopenharmony_ci	.get_trip_temp = sys_get_trip_temp,
2248c2ecf20Sopenharmony_ci	.get_trip_type = sys_get_trip_type,
2258c2ecf20Sopenharmony_ci	.set_trip_temp = sys_set_trip_temp,
2268c2ecf20Sopenharmony_ci};
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_cistatic bool pkg_thermal_rate_control(void)
2298c2ecf20Sopenharmony_ci{
2308c2ecf20Sopenharmony_ci	return true;
2318c2ecf20Sopenharmony_ci}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci/* Enable threshold interrupt on local package/cpu */
2348c2ecf20Sopenharmony_cistatic inline void enable_pkg_thres_interrupt(void)
2358c2ecf20Sopenharmony_ci{
2368c2ecf20Sopenharmony_ci	u8 thres_0, thres_1;
2378c2ecf20Sopenharmony_ci	u32 l, h;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2408c2ecf20Sopenharmony_ci	/* only enable/disable if it had valid threshold value */
2418c2ecf20Sopenharmony_ci	thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
2428c2ecf20Sopenharmony_ci	thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
2438c2ecf20Sopenharmony_ci	if (thres_0)
2448c2ecf20Sopenharmony_ci		l |= THERM_INT_THRESHOLD0_ENABLE;
2458c2ecf20Sopenharmony_ci	if (thres_1)
2468c2ecf20Sopenharmony_ci		l |= THERM_INT_THRESHOLD1_ENABLE;
2478c2ecf20Sopenharmony_ci	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2488c2ecf20Sopenharmony_ci}
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci/* Disable threshold interrupt on local package/cpu */
2518c2ecf20Sopenharmony_cistatic inline void disable_pkg_thres_interrupt(void)
2528c2ecf20Sopenharmony_ci{
2538c2ecf20Sopenharmony_ci	u32 l, h;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
2588c2ecf20Sopenharmony_ci	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cistatic void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	struct thermal_zone_device *tzone = NULL;
2648c2ecf20Sopenharmony_ci	int cpu = smp_processor_id();
2658c2ecf20Sopenharmony_ci	struct zone_device *zonedev;
2668c2ecf20Sopenharmony_ci	u64 msr_val, wr_val;
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	mutex_lock(&thermal_zone_mutex);
2698c2ecf20Sopenharmony_ci	raw_spin_lock_irq(&pkg_temp_lock);
2708c2ecf20Sopenharmony_ci	++pkg_work_cnt;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	zonedev = pkg_temp_thermal_get_dev(cpu);
2738c2ecf20Sopenharmony_ci	if (!zonedev) {
2748c2ecf20Sopenharmony_ci		raw_spin_unlock_irq(&pkg_temp_lock);
2758c2ecf20Sopenharmony_ci		mutex_unlock(&thermal_zone_mutex);
2768c2ecf20Sopenharmony_ci		return;
2778c2ecf20Sopenharmony_ci	}
2788c2ecf20Sopenharmony_ci	zonedev->work_scheduled = false;
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
2818c2ecf20Sopenharmony_ci	wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
2828c2ecf20Sopenharmony_ci	if (wr_val != msr_val) {
2838c2ecf20Sopenharmony_ci		wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
2848c2ecf20Sopenharmony_ci		tzone = zonedev->tzone;
2858c2ecf20Sopenharmony_ci	}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	enable_pkg_thres_interrupt();
2888c2ecf20Sopenharmony_ci	raw_spin_unlock_irq(&pkg_temp_lock);
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	/*
2918c2ecf20Sopenharmony_ci	 * If tzone is not NULL, then thermal_zone_mutex will prevent the
2928c2ecf20Sopenharmony_ci	 * concurrent removal in the cpu offline callback.
2938c2ecf20Sopenharmony_ci	 */
2948c2ecf20Sopenharmony_ci	if (tzone)
2958c2ecf20Sopenharmony_ci		thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	mutex_unlock(&thermal_zone_mutex);
2988c2ecf20Sopenharmony_ci}
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_cistatic void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
3018c2ecf20Sopenharmony_ci{
3028c2ecf20Sopenharmony_ci	unsigned long ms = msecs_to_jiffies(notify_delay_ms);
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	schedule_delayed_work_on(cpu, work, ms);
3058c2ecf20Sopenharmony_ci}
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_cistatic int pkg_thermal_notify(u64 msr_val)
3088c2ecf20Sopenharmony_ci{
3098c2ecf20Sopenharmony_ci	int cpu = smp_processor_id();
3108c2ecf20Sopenharmony_ci	struct zone_device *zonedev;
3118c2ecf20Sopenharmony_ci	unsigned long flags;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	raw_spin_lock_irqsave(&pkg_temp_lock, flags);
3148c2ecf20Sopenharmony_ci	++pkg_interrupt_cnt;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	disable_pkg_thres_interrupt();
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	/* Work is per package, so scheduling it once is enough. */
3198c2ecf20Sopenharmony_ci	zonedev = pkg_temp_thermal_get_dev(cpu);
3208c2ecf20Sopenharmony_ci	if (zonedev && !zonedev->work_scheduled) {
3218c2ecf20Sopenharmony_ci		zonedev->work_scheduled = true;
3228c2ecf20Sopenharmony_ci		pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
3238c2ecf20Sopenharmony_ci	}
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
3268c2ecf20Sopenharmony_ci	return 0;
3278c2ecf20Sopenharmony_ci}
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_cistatic int pkg_temp_thermal_device_add(unsigned int cpu)
3308c2ecf20Sopenharmony_ci{
3318c2ecf20Sopenharmony_ci	int id = topology_logical_die_id(cpu);
3328c2ecf20Sopenharmony_ci	u32 tj_max, eax, ebx, ecx, edx;
3338c2ecf20Sopenharmony_ci	struct zone_device *zonedev;
3348c2ecf20Sopenharmony_ci	int thres_count, err;
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	if (id >= max_id)
3378c2ecf20Sopenharmony_ci		return -ENOMEM;
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	cpuid(6, &eax, &ebx, &ecx, &edx);
3408c2ecf20Sopenharmony_ci	thres_count = ebx & 0x07;
3418c2ecf20Sopenharmony_ci	if (!thres_count)
3428c2ecf20Sopenharmony_ci		return -ENODEV;
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci	err = get_tj_max(cpu, &tj_max);
3478c2ecf20Sopenharmony_ci	if (err)
3488c2ecf20Sopenharmony_ci		return err;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
3518c2ecf20Sopenharmony_ci	if (!zonedev)
3528c2ecf20Sopenharmony_ci		return -ENOMEM;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
3558c2ecf20Sopenharmony_ci	zonedev->cpu = cpu;
3568c2ecf20Sopenharmony_ci	zonedev->tj_max = tj_max;
3578c2ecf20Sopenharmony_ci	zonedev->tzone = thermal_zone_device_register("x86_pkg_temp",
3588c2ecf20Sopenharmony_ci			thres_count,
3598c2ecf20Sopenharmony_ci			(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
3608c2ecf20Sopenharmony_ci			zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
3618c2ecf20Sopenharmony_ci	if (IS_ERR(zonedev->tzone)) {
3628c2ecf20Sopenharmony_ci		err = PTR_ERR(zonedev->tzone);
3638c2ecf20Sopenharmony_ci		kfree(zonedev);
3648c2ecf20Sopenharmony_ci		return err;
3658c2ecf20Sopenharmony_ci	}
3668c2ecf20Sopenharmony_ci	err = thermal_zone_device_enable(zonedev->tzone);
3678c2ecf20Sopenharmony_ci	if (err) {
3688c2ecf20Sopenharmony_ci		thermal_zone_device_unregister(zonedev->tzone);
3698c2ecf20Sopenharmony_ci		kfree(zonedev);
3708c2ecf20Sopenharmony_ci		return err;
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci	/* Store MSR value for package thermal interrupt, to restore at exit */
3738c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
3748c2ecf20Sopenharmony_ci	      zonedev->msr_pkg_therm_high);
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci	cpumask_set_cpu(cpu, &zonedev->cpumask);
3778c2ecf20Sopenharmony_ci	raw_spin_lock_irq(&pkg_temp_lock);
3788c2ecf20Sopenharmony_ci	zones[id] = zonedev;
3798c2ecf20Sopenharmony_ci	raw_spin_unlock_irq(&pkg_temp_lock);
3808c2ecf20Sopenharmony_ci	return 0;
3818c2ecf20Sopenharmony_ci}
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_cistatic int pkg_thermal_cpu_offline(unsigned int cpu)
3848c2ecf20Sopenharmony_ci{
3858c2ecf20Sopenharmony_ci	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
3868c2ecf20Sopenharmony_ci	bool lastcpu, was_target;
3878c2ecf20Sopenharmony_ci	int target;
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_ci	if (!zonedev)
3908c2ecf20Sopenharmony_ci		return 0;
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	target = cpumask_any_but(&zonedev->cpumask, cpu);
3938c2ecf20Sopenharmony_ci	cpumask_clear_cpu(cpu, &zonedev->cpumask);
3948c2ecf20Sopenharmony_ci	lastcpu = target >= nr_cpu_ids;
3958c2ecf20Sopenharmony_ci	/*
3968c2ecf20Sopenharmony_ci	 * Remove the sysfs files, if this is the last cpu in the package
3978c2ecf20Sopenharmony_ci	 * before doing further cleanups.
3988c2ecf20Sopenharmony_ci	 */
3998c2ecf20Sopenharmony_ci	if (lastcpu) {
4008c2ecf20Sopenharmony_ci		struct thermal_zone_device *tzone = zonedev->tzone;
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci		/*
4038c2ecf20Sopenharmony_ci		 * We must protect against a work function calling
4048c2ecf20Sopenharmony_ci		 * thermal_zone_update, after/while unregister. We null out
4058c2ecf20Sopenharmony_ci		 * the pointer under the zone mutex, so the worker function
4068c2ecf20Sopenharmony_ci		 * won't try to call.
4078c2ecf20Sopenharmony_ci		 */
4088c2ecf20Sopenharmony_ci		mutex_lock(&thermal_zone_mutex);
4098c2ecf20Sopenharmony_ci		zonedev->tzone = NULL;
4108c2ecf20Sopenharmony_ci		mutex_unlock(&thermal_zone_mutex);
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci		thermal_zone_device_unregister(tzone);
4138c2ecf20Sopenharmony_ci	}
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	/* Protect against work and interrupts */
4168c2ecf20Sopenharmony_ci	raw_spin_lock_irq(&pkg_temp_lock);
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci	/*
4198c2ecf20Sopenharmony_ci	 * Check whether this cpu was the current target and store the new
4208c2ecf20Sopenharmony_ci	 * one. When we drop the lock, then the interrupt notify function
4218c2ecf20Sopenharmony_ci	 * will see the new target.
4228c2ecf20Sopenharmony_ci	 */
4238c2ecf20Sopenharmony_ci	was_target = zonedev->cpu == cpu;
4248c2ecf20Sopenharmony_ci	zonedev->cpu = target;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	/*
4278c2ecf20Sopenharmony_ci	 * If this is the last CPU in the package remove the package
4288c2ecf20Sopenharmony_ci	 * reference from the array and restore the interrupt MSR. When we
4298c2ecf20Sopenharmony_ci	 * drop the lock neither the interrupt notify function nor the
4308c2ecf20Sopenharmony_ci	 * worker will see the package anymore.
4318c2ecf20Sopenharmony_ci	 */
4328c2ecf20Sopenharmony_ci	if (lastcpu) {
4338c2ecf20Sopenharmony_ci		zones[topology_logical_die_id(cpu)] = NULL;
4348c2ecf20Sopenharmony_ci		/* After this point nothing touches the MSR anymore. */
4358c2ecf20Sopenharmony_ci		wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
4368c2ecf20Sopenharmony_ci		      zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
4378c2ecf20Sopenharmony_ci	}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	/*
4408c2ecf20Sopenharmony_ci	 * Check whether there is work scheduled and whether the work is
4418c2ecf20Sopenharmony_ci	 * targeted at the outgoing CPU.
4428c2ecf20Sopenharmony_ci	 */
4438c2ecf20Sopenharmony_ci	if (zonedev->work_scheduled && was_target) {
4448c2ecf20Sopenharmony_ci		/*
4458c2ecf20Sopenharmony_ci		 * To cancel the work we need to drop the lock, otherwise
4468c2ecf20Sopenharmony_ci		 * we might deadlock if the work needs to be flushed.
4478c2ecf20Sopenharmony_ci		 */
4488c2ecf20Sopenharmony_ci		raw_spin_unlock_irq(&pkg_temp_lock);
4498c2ecf20Sopenharmony_ci		cancel_delayed_work_sync(&zonedev->work);
4508c2ecf20Sopenharmony_ci		raw_spin_lock_irq(&pkg_temp_lock);
4518c2ecf20Sopenharmony_ci		/*
4528c2ecf20Sopenharmony_ci		 * If this is not the last cpu in the package and the work
4538c2ecf20Sopenharmony_ci		 * did not run after we dropped the lock above, then we
4548c2ecf20Sopenharmony_ci		 * need to reschedule the work, otherwise the interrupt
4558c2ecf20Sopenharmony_ci		 * stays disabled forever.
4568c2ecf20Sopenharmony_ci		 */
4578c2ecf20Sopenharmony_ci		if (!lastcpu && zonedev->work_scheduled)
4588c2ecf20Sopenharmony_ci			pkg_thermal_schedule_work(target, &zonedev->work);
4598c2ecf20Sopenharmony_ci	}
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	raw_spin_unlock_irq(&pkg_temp_lock);
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	/* Final cleanup if this is the last cpu */
4648c2ecf20Sopenharmony_ci	if (lastcpu)
4658c2ecf20Sopenharmony_ci		kfree(zonedev);
4668c2ecf20Sopenharmony_ci	return 0;
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_cistatic int pkg_thermal_cpu_online(unsigned int cpu)
4708c2ecf20Sopenharmony_ci{
4718c2ecf20Sopenharmony_ci	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
4728c2ecf20Sopenharmony_ci	struct cpuinfo_x86 *c = &cpu_data(cpu);
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	/* Paranoia check */
4758c2ecf20Sopenharmony_ci	if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
4768c2ecf20Sopenharmony_ci		return -ENODEV;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	/* If the package exists, nothing to do */
4798c2ecf20Sopenharmony_ci	if (zonedev) {
4808c2ecf20Sopenharmony_ci		cpumask_set_cpu(cpu, &zonedev->cpumask);
4818c2ecf20Sopenharmony_ci		return 0;
4828c2ecf20Sopenharmony_ci	}
4838c2ecf20Sopenharmony_ci	return pkg_temp_thermal_device_add(cpu);
4848c2ecf20Sopenharmony_ci}
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_cistatic const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
4878c2ecf20Sopenharmony_ci	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL),
4888c2ecf20Sopenharmony_ci	{}
4898c2ecf20Sopenharmony_ci};
4908c2ecf20Sopenharmony_ciMODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_cistatic int __init pkg_temp_thermal_init(void)
4938c2ecf20Sopenharmony_ci{
4948c2ecf20Sopenharmony_ci	int ret;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	if (!x86_match_cpu(pkg_temp_thermal_ids))
4978c2ecf20Sopenharmony_ci		return -ENODEV;
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci	max_id = topology_max_packages() * topology_max_die_per_package();
5008c2ecf20Sopenharmony_ci	zones = kcalloc(max_id, sizeof(struct zone_device *),
5018c2ecf20Sopenharmony_ci			   GFP_KERNEL);
5028c2ecf20Sopenharmony_ci	if (!zones)
5038c2ecf20Sopenharmony_ci		return -ENOMEM;
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
5068c2ecf20Sopenharmony_ci				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
5078c2ecf20Sopenharmony_ci	if (ret < 0)
5088c2ecf20Sopenharmony_ci		goto err;
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	/* Store the state for module exit */
5118c2ecf20Sopenharmony_ci	pkg_thermal_hp_state = ret;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	platform_thermal_package_notify = pkg_thermal_notify;
5148c2ecf20Sopenharmony_ci	platform_thermal_package_rate_control = pkg_thermal_rate_control;
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	 /* Don't care if it fails */
5178c2ecf20Sopenharmony_ci	pkg_temp_debugfs_init();
5188c2ecf20Sopenharmony_ci	return 0;
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_cierr:
5218c2ecf20Sopenharmony_ci	kfree(zones);
5228c2ecf20Sopenharmony_ci	return ret;
5238c2ecf20Sopenharmony_ci}
5248c2ecf20Sopenharmony_cimodule_init(pkg_temp_thermal_init)
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_cistatic void __exit pkg_temp_thermal_exit(void)
5278c2ecf20Sopenharmony_ci{
5288c2ecf20Sopenharmony_ci	platform_thermal_package_notify = NULL;
5298c2ecf20Sopenharmony_ci	platform_thermal_package_rate_control = NULL;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	cpuhp_remove_state(pkg_thermal_hp_state);
5328c2ecf20Sopenharmony_ci	debugfs_remove_recursive(debugfs);
5338c2ecf20Sopenharmony_ci	kfree(zones);
5348c2ecf20Sopenharmony_ci}
5358c2ecf20Sopenharmony_cimodule_exit(pkg_temp_thermal_exit)
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
5388c2ecf20Sopenharmony_ciMODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
5398c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
540