18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Xen stolen ticks accounting.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci#include <linux/kernel.h>
68c2ecf20Sopenharmony_ci#include <linux/kernel_stat.h>
78c2ecf20Sopenharmony_ci#include <linux/math64.h>
88c2ecf20Sopenharmony_ci#include <linux/gfp.h>
98c2ecf20Sopenharmony_ci#include <linux/slab.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <asm/paravirt.h>
128c2ecf20Sopenharmony_ci#include <asm/xen/hypervisor.h>
138c2ecf20Sopenharmony_ci#include <asm/xen/hypercall.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#include <xen/events.h>
168c2ecf20Sopenharmony_ci#include <xen/features.h>
178c2ecf20Sopenharmony_ci#include <xen/interface/xen.h>
188c2ecf20Sopenharmony_ci#include <xen/interface/vcpu.h>
198c2ecf20Sopenharmony_ci#include <xen/xen-ops.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci/* runstate info updated by Xen */
228c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(u64[4], old_runstate_time);
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci/* return an consistent snapshot of 64-bit time/counter value */
278c2ecf20Sopenharmony_cistatic u64 get64(const u64 *p)
288c2ecf20Sopenharmony_ci{
298c2ecf20Sopenharmony_ci	u64 ret;
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	if (BITS_PER_LONG < 64) {
328c2ecf20Sopenharmony_ci		u32 *p32 = (u32 *)p;
338c2ecf20Sopenharmony_ci		u32 h, l, h2;
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci		/*
368c2ecf20Sopenharmony_ci		 * Read high then low, and then make sure high is
378c2ecf20Sopenharmony_ci		 * still the same; this will only loop if low wraps
388c2ecf20Sopenharmony_ci		 * and carries into high.
398c2ecf20Sopenharmony_ci		 * XXX some clean way to make this endian-proof?
408c2ecf20Sopenharmony_ci		 */
418c2ecf20Sopenharmony_ci		do {
428c2ecf20Sopenharmony_ci			h = READ_ONCE(p32[1]);
438c2ecf20Sopenharmony_ci			l = READ_ONCE(p32[0]);
448c2ecf20Sopenharmony_ci			h2 = READ_ONCE(p32[1]);
458c2ecf20Sopenharmony_ci		} while(h2 != h);
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci		ret = (((u64)h) << 32) | l;
488c2ecf20Sopenharmony_ci	} else
498c2ecf20Sopenharmony_ci		ret = READ_ONCE(*p);
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	return ret;
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic void xen_get_runstate_snapshot_cpu_delta(
558c2ecf20Sopenharmony_ci			      struct vcpu_runstate_info *res, unsigned int cpu)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	u64 state_time;
588c2ecf20Sopenharmony_ci	struct vcpu_runstate_info *state;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	BUG_ON(preemptible());
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	state = per_cpu_ptr(&xen_runstate, cpu);
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	do {
658c2ecf20Sopenharmony_ci		state_time = get64(&state->state_entry_time);
668c2ecf20Sopenharmony_ci		rmb();	/* Hypervisor might update data. */
678c2ecf20Sopenharmony_ci		*res = __READ_ONCE(*state);
688c2ecf20Sopenharmony_ci		rmb();	/* Hypervisor might update data. */
698c2ecf20Sopenharmony_ci	} while (get64(&state->state_entry_time) != state_time ||
708c2ecf20Sopenharmony_ci		 (state_time & XEN_RUNSTATE_UPDATE));
718c2ecf20Sopenharmony_ci}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_cistatic void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
748c2ecf20Sopenharmony_ci					  unsigned int cpu)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	int i;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	xen_get_runstate_snapshot_cpu_delta(res, cpu);
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	for (i = 0; i < 4; i++)
818c2ecf20Sopenharmony_ci		res->time[i] += per_cpu(old_runstate_time, cpu)[i];
828c2ecf20Sopenharmony_ci}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_civoid xen_manage_runstate_time(int action)
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	static struct vcpu_runstate_info *runstate_delta;
878c2ecf20Sopenharmony_ci	struct vcpu_runstate_info state;
888c2ecf20Sopenharmony_ci	int cpu, i;
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	switch (action) {
918c2ecf20Sopenharmony_ci	case -1: /* backup runstate time before suspend */
928c2ecf20Sopenharmony_ci		if (unlikely(runstate_delta))
938c2ecf20Sopenharmony_ci			pr_warn_once("%s: memory leak as runstate_delta is not NULL\n",
948c2ecf20Sopenharmony_ci					__func__);
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci		runstate_delta = kmalloc_array(num_possible_cpus(),
978c2ecf20Sopenharmony_ci					sizeof(*runstate_delta),
988c2ecf20Sopenharmony_ci					GFP_ATOMIC);
998c2ecf20Sopenharmony_ci		if (unlikely(!runstate_delta)) {
1008c2ecf20Sopenharmony_ci			pr_warn("%s: failed to allocate runstate_delta\n",
1018c2ecf20Sopenharmony_ci					__func__);
1028c2ecf20Sopenharmony_ci			return;
1038c2ecf20Sopenharmony_ci		}
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci		for_each_possible_cpu(cpu) {
1068c2ecf20Sopenharmony_ci			xen_get_runstate_snapshot_cpu_delta(&state, cpu);
1078c2ecf20Sopenharmony_ci			memcpy(runstate_delta[cpu].time, state.time,
1088c2ecf20Sopenharmony_ci					sizeof(runstate_delta[cpu].time));
1098c2ecf20Sopenharmony_ci		}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci		break;
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	case 0: /* backup runstate time after resume */
1148c2ecf20Sopenharmony_ci		if (unlikely(!runstate_delta)) {
1158c2ecf20Sopenharmony_ci			pr_warn("%s: cannot accumulate runstate time as runstate_delta is NULL\n",
1168c2ecf20Sopenharmony_ci					__func__);
1178c2ecf20Sopenharmony_ci			return;
1188c2ecf20Sopenharmony_ci		}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci		for_each_possible_cpu(cpu) {
1218c2ecf20Sopenharmony_ci			for (i = 0; i < 4; i++)
1228c2ecf20Sopenharmony_ci				per_cpu(old_runstate_time, cpu)[i] +=
1238c2ecf20Sopenharmony_ci					runstate_delta[cpu].time[i];
1248c2ecf20Sopenharmony_ci		}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci		break;
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	default: /* do not accumulate runstate time for checkpointing */
1298c2ecf20Sopenharmony_ci		break;
1308c2ecf20Sopenharmony_ci	}
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	if (action != -1 && runstate_delta) {
1338c2ecf20Sopenharmony_ci		kfree(runstate_delta);
1348c2ecf20Sopenharmony_ci		runstate_delta = NULL;
1358c2ecf20Sopenharmony_ci	}
1368c2ecf20Sopenharmony_ci}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci/*
1398c2ecf20Sopenharmony_ci * Runstate accounting
1408c2ecf20Sopenharmony_ci */
1418c2ecf20Sopenharmony_civoid xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	xen_get_runstate_snapshot_cpu(res, smp_processor_id());
1448c2ecf20Sopenharmony_ci}
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci/* return true when a vcpu could run but has no real cpu to run on */
1478c2ecf20Sopenharmony_cibool xen_vcpu_stolen(int vcpu)
1488c2ecf20Sopenharmony_ci{
1498c2ecf20Sopenharmony_ci	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ciu64 xen_steal_clock(int cpu)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	struct vcpu_runstate_info state;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	xen_get_runstate_snapshot_cpu(&state, cpu);
1578c2ecf20Sopenharmony_ci	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_civoid xen_setup_runstate_info(int cpu)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	struct vcpu_register_runstate_memory_area area;
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	area.addr.v = &per_cpu(xen_runstate, cpu);
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
1678c2ecf20Sopenharmony_ci			       xen_vcpu_nr(cpu), &area))
1688c2ecf20Sopenharmony_ci		BUG();
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_civoid __init xen_time_setup_guest(void)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	bool xen_runstate_remote;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
1768c2ecf20Sopenharmony_ci					VMASST_TYPE_runstate_update_flag);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	pv_ops.time.steal_clock = xen_steal_clock;
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	static_key_slow_inc(&paravirt_steal_enabled);
1818c2ecf20Sopenharmony_ci	if (xen_runstate_remote)
1828c2ecf20Sopenharmony_ci		static_key_slow_inc(&paravirt_steal_rq_enabled);
1838c2ecf20Sopenharmony_ci}
184