162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci//
362306a36Sopenharmony_ci// VCPU stall detector.
462306a36Sopenharmony_ci//  Copyright (C) Google, 2022
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/cpu.h>
762306a36Sopenharmony_ci#include <linux/init.h>
862306a36Sopenharmony_ci#include <linux/io.h>
962306a36Sopenharmony_ci#include <linux/kernel.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/device.h>
1262306a36Sopenharmony_ci#include <linux/interrupt.h>
1362306a36Sopenharmony_ci#include <linux/module.h>
1462306a36Sopenharmony_ci#include <linux/nmi.h>
1562306a36Sopenharmony_ci#include <linux/of.h>
1662306a36Sopenharmony_ci#include <linux/param.h>
1762306a36Sopenharmony_ci#include <linux/percpu.h>
1862306a36Sopenharmony_ci#include <linux/platform_device.h>
1962306a36Sopenharmony_ci#include <linux/slab.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define VCPU_STALL_REG_STATUS		(0x00)
2262306a36Sopenharmony_ci#define VCPU_STALL_REG_LOAD_CNT		(0x04)
2362306a36Sopenharmony_ci#define VCPU_STALL_REG_CURRENT_CNT	(0x08)
2462306a36Sopenharmony_ci#define VCPU_STALL_REG_CLOCK_FREQ_HZ	(0x0C)
2562306a36Sopenharmony_ci#define VCPU_STALL_REG_LEN		(0x10)
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define VCPU_STALL_DEFAULT_CLOCK_HZ	(10)
2862306a36Sopenharmony_ci#define VCPU_STALL_MAX_CLOCK_HZ		(100)
2962306a36Sopenharmony_ci#define VCPU_STALL_DEFAULT_TIMEOUT_SEC	(8)
3062306a36Sopenharmony_ci#define VCPU_STALL_MAX_TIMEOUT_SEC	(600)
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_cistruct vcpu_stall_detect_config {
3362306a36Sopenharmony_ci	u32 clock_freq_hz;
3462306a36Sopenharmony_ci	u32 stall_timeout_sec;
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	void __iomem *membase;
3762306a36Sopenharmony_ci	struct platform_device *dev;
3862306a36Sopenharmony_ci	enum cpuhp_state hp_online;
3962306a36Sopenharmony_ci};
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cistruct vcpu_stall_priv {
4262306a36Sopenharmony_ci	struct hrtimer vcpu_hrtimer;
4362306a36Sopenharmony_ci	bool is_initialized;
4462306a36Sopenharmony_ci};
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/* The vcpu stall configuration structure which applies to all the CPUs */
4762306a36Sopenharmony_cistatic struct vcpu_stall_detect_config vcpu_stall_config;
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#define vcpu_stall_reg_write(vcpu, reg, value)				\
5062306a36Sopenharmony_ci	writel_relaxed((value),						\
5162306a36Sopenharmony_ci		       (void __iomem *)(vcpu_stall_config.membase +	\
5262306a36Sopenharmony_ci		       (vcpu) * VCPU_STALL_REG_LEN + (reg)))
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic struct vcpu_stall_priv __percpu *vcpu_stall_detectors;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic enum hrtimer_restart
5862306a36Sopenharmony_civcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
5962306a36Sopenharmony_ci{
6062306a36Sopenharmony_ci	u32 ticks, ping_timeout_ms;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	/* Reload the stall detector counter register every
6362306a36Sopenharmony_ci	 * `ping_timeout_ms` to prevent the virtual device
6462306a36Sopenharmony_ci	 * from decrementing it to 0. The virtual device decrements this
6562306a36Sopenharmony_ci	 * register at 'clock_freq_hz' frequency.
6662306a36Sopenharmony_ci	 */
6762306a36Sopenharmony_ci	ticks = vcpu_stall_config.clock_freq_hz *
6862306a36Sopenharmony_ci		vcpu_stall_config.stall_timeout_sec;
6962306a36Sopenharmony_ci	vcpu_stall_reg_write(smp_processor_id(),
7062306a36Sopenharmony_ci			     VCPU_STALL_REG_LOAD_CNT, ticks);
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	ping_timeout_ms = vcpu_stall_config.stall_timeout_sec *
7362306a36Sopenharmony_ci			  MSEC_PER_SEC / 2;
7462306a36Sopenharmony_ci	hrtimer_forward_now(hrtimer,
7562306a36Sopenharmony_ci			    ms_to_ktime(ping_timeout_ms));
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	return HRTIMER_RESTART;
7862306a36Sopenharmony_ci}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_cistatic int start_stall_detector_cpu(unsigned int cpu)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	u32 ticks, ping_timeout_ms;
8362306a36Sopenharmony_ci	struct vcpu_stall_priv *vcpu_stall_detector =
8462306a36Sopenharmony_ci		this_cpu_ptr(vcpu_stall_detectors);
8562306a36Sopenharmony_ci	struct hrtimer *vcpu_hrtimer = &vcpu_stall_detector->vcpu_hrtimer;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_CLOCK_FREQ_HZ,
8862306a36Sopenharmony_ci			     vcpu_stall_config.clock_freq_hz);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	/* Compute the number of ticks required for the stall detector
9162306a36Sopenharmony_ci	 * counter register based on the internal clock frequency and the
9262306a36Sopenharmony_ci	 * timeout value given from the device tree.
9362306a36Sopenharmony_ci	 */
9462306a36Sopenharmony_ci	ticks = vcpu_stall_config.clock_freq_hz *
9562306a36Sopenharmony_ci		vcpu_stall_config.stall_timeout_sec;
9662306a36Sopenharmony_ci	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_LOAD_CNT, ticks);
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	/* Enable the internal clock and start the stall detector */
9962306a36Sopenharmony_ci	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 1);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	/* Pet the stall detector at half of its expiration timeout
10262306a36Sopenharmony_ci	 * to prevent spurious resets.
10362306a36Sopenharmony_ci	 */
10462306a36Sopenharmony_ci	ping_timeout_ms = vcpu_stall_config.stall_timeout_sec *
10562306a36Sopenharmony_ci			  MSEC_PER_SEC / 2;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	hrtimer_init(vcpu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
10862306a36Sopenharmony_ci	vcpu_hrtimer->function = vcpu_stall_detect_timer_fn;
10962306a36Sopenharmony_ci	vcpu_stall_detector->is_initialized = true;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	hrtimer_start(vcpu_hrtimer, ms_to_ktime(ping_timeout_ms),
11262306a36Sopenharmony_ci		      HRTIMER_MODE_REL_PINNED);
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	return 0;
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic int stop_stall_detector_cpu(unsigned int cpu)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	struct vcpu_stall_priv *vcpu_stall_detector =
12062306a36Sopenharmony_ci		per_cpu_ptr(vcpu_stall_detectors, cpu);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	if (!vcpu_stall_detector->is_initialized)
12362306a36Sopenharmony_ci		return 0;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/* Disable the stall detector for the current CPU */
12662306a36Sopenharmony_ci	hrtimer_cancel(&vcpu_stall_detector->vcpu_hrtimer);
12762306a36Sopenharmony_ci	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 0);
12862306a36Sopenharmony_ci	vcpu_stall_detector->is_initialized = false;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	return 0;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic int vcpu_stall_detect_probe(struct platform_device *pdev)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	int ret;
13662306a36Sopenharmony_ci	struct resource *r;
13762306a36Sopenharmony_ci	void __iomem *membase;
13862306a36Sopenharmony_ci	u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
13962306a36Sopenharmony_ci	u32 stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
14062306a36Sopenharmony_ci	struct device_node *np = pdev->dev.of_node;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	vcpu_stall_detectors = devm_alloc_percpu(&pdev->dev,
14362306a36Sopenharmony_ci						 typeof(struct vcpu_stall_priv));
14462306a36Sopenharmony_ci	if (!vcpu_stall_detectors)
14562306a36Sopenharmony_ci		return -ENOMEM;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
14862306a36Sopenharmony_ci	if (IS_ERR(membase)) {
14962306a36Sopenharmony_ci		dev_err(&pdev->dev, "Failed to get memory resource\n");
15062306a36Sopenharmony_ci		return PTR_ERR(membase);
15162306a36Sopenharmony_ci	}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	if (!of_property_read_u32(np, "clock-frequency", &clock_freq_hz)) {
15462306a36Sopenharmony_ci		if (!(clock_freq_hz > 0 &&
15562306a36Sopenharmony_ci		      clock_freq_hz < VCPU_STALL_MAX_CLOCK_HZ)) {
15662306a36Sopenharmony_ci			dev_warn(&pdev->dev, "clk out of range\n");
15762306a36Sopenharmony_ci			clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
15862306a36Sopenharmony_ci		}
15962306a36Sopenharmony_ci	}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	if (!of_property_read_u32(np, "timeout-sec", &stall_timeout_sec)) {
16262306a36Sopenharmony_ci		if (!(stall_timeout_sec > 0 &&
16362306a36Sopenharmony_ci		      stall_timeout_sec < VCPU_STALL_MAX_TIMEOUT_SEC)) {
16462306a36Sopenharmony_ci			dev_warn(&pdev->dev, "stall timeout out of range\n");
16562306a36Sopenharmony_ci			stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
16662306a36Sopenharmony_ci		}
16762306a36Sopenharmony_ci	}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	vcpu_stall_config = (struct vcpu_stall_detect_config) {
17062306a36Sopenharmony_ci		.membase		= membase,
17162306a36Sopenharmony_ci		.clock_freq_hz		= clock_freq_hz,
17262306a36Sopenharmony_ci		.stall_timeout_sec	= stall_timeout_sec
17362306a36Sopenharmony_ci	};
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
17662306a36Sopenharmony_ci				"virt/vcpu_stall_detector:online",
17762306a36Sopenharmony_ci				start_stall_detector_cpu,
17862306a36Sopenharmony_ci				stop_stall_detector_cpu);
17962306a36Sopenharmony_ci	if (ret < 0) {
18062306a36Sopenharmony_ci		dev_err(&pdev->dev, "failed to install cpu hotplug");
18162306a36Sopenharmony_ci		goto err;
18262306a36Sopenharmony_ci	}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	vcpu_stall_config.hp_online = ret;
18562306a36Sopenharmony_ci	return 0;
18662306a36Sopenharmony_cierr:
18762306a36Sopenharmony_ci	return ret;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistatic int vcpu_stall_detect_remove(struct platform_device *pdev)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	int cpu;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	cpuhp_remove_state(vcpu_stall_config.hp_online);
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
19762306a36Sopenharmony_ci		stop_stall_detector_cpu(cpu);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	return 0;
20062306a36Sopenharmony_ci}
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_cistatic const struct of_device_id vcpu_stall_detect_of_match[] = {
20362306a36Sopenharmony_ci	{ .compatible = "qemu,vcpu-stall-detector", },
20462306a36Sopenharmony_ci	{}
20562306a36Sopenharmony_ci};
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ciMODULE_DEVICE_TABLE(of, vcpu_stall_detect_of_match);
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_cistatic struct platform_driver vcpu_stall_detect_driver = {
21062306a36Sopenharmony_ci	.probe  = vcpu_stall_detect_probe,
21162306a36Sopenharmony_ci	.remove = vcpu_stall_detect_remove,
21262306a36Sopenharmony_ci	.driver = {
21362306a36Sopenharmony_ci		.name           = KBUILD_MODNAME,
21462306a36Sopenharmony_ci		.of_match_table = vcpu_stall_detect_of_match,
21562306a36Sopenharmony_ci	},
21662306a36Sopenharmony_ci};
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_cimodule_platform_driver(vcpu_stall_detect_driver);
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ciMODULE_LICENSE("GPL");
22162306a36Sopenharmony_ciMODULE_AUTHOR("Sebastian Ene <sebastianene@google.com>");
22262306a36Sopenharmony_ciMODULE_DESCRIPTION("VCPU stall detector");
223