18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Xen stolen ticks accounting. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci#include <linux/kernel.h> 68c2ecf20Sopenharmony_ci#include <linux/kernel_stat.h> 78c2ecf20Sopenharmony_ci#include <linux/math64.h> 88c2ecf20Sopenharmony_ci#include <linux/gfp.h> 98c2ecf20Sopenharmony_ci#include <linux/slab.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <asm/paravirt.h> 128c2ecf20Sopenharmony_ci#include <asm/xen/hypervisor.h> 138c2ecf20Sopenharmony_ci#include <asm/xen/hypercall.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include <xen/events.h> 168c2ecf20Sopenharmony_ci#include <xen/features.h> 178c2ecf20Sopenharmony_ci#include <xen/interface/xen.h> 188c2ecf20Sopenharmony_ci#include <xen/interface/vcpu.h> 198c2ecf20Sopenharmony_ci#include <xen/xen-ops.h> 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci/* runstate info updated by Xen */ 228c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(u64[4], old_runstate_time); 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/* return an consistent snapshot of 64-bit time/counter value */ 278c2ecf20Sopenharmony_cistatic u64 get64(const u64 *p) 288c2ecf20Sopenharmony_ci{ 298c2ecf20Sopenharmony_ci u64 ret; 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci if (BITS_PER_LONG < 64) { 328c2ecf20Sopenharmony_ci u32 *p32 = (u32 *)p; 338c2ecf20Sopenharmony_ci u32 h, l, h2; 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci /* 368c2ecf20Sopenharmony_ci * Read high then low, and then make sure high is 378c2ecf20Sopenharmony_ci * still the same; this will only loop if low wraps 388c2ecf20Sopenharmony_ci * and carries into high. 398c2ecf20Sopenharmony_ci * XXX some clean way to make this endian-proof? 408c2ecf20Sopenharmony_ci */ 418c2ecf20Sopenharmony_ci do { 428c2ecf20Sopenharmony_ci h = READ_ONCE(p32[1]); 438c2ecf20Sopenharmony_ci l = READ_ONCE(p32[0]); 448c2ecf20Sopenharmony_ci h2 = READ_ONCE(p32[1]); 458c2ecf20Sopenharmony_ci } while(h2 != h); 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci ret = (((u64)h) << 32) | l; 488c2ecf20Sopenharmony_ci } else 498c2ecf20Sopenharmony_ci ret = READ_ONCE(*p); 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci return ret; 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistatic void xen_get_runstate_snapshot_cpu_delta( 558c2ecf20Sopenharmony_ci struct vcpu_runstate_info *res, unsigned int cpu) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci u64 state_time; 588c2ecf20Sopenharmony_ci struct vcpu_runstate_info *state; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci BUG_ON(preemptible()); 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci state = per_cpu_ptr(&xen_runstate, cpu); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci do { 658c2ecf20Sopenharmony_ci state_time = get64(&state->state_entry_time); 668c2ecf20Sopenharmony_ci rmb(); /* Hypervisor might update data. */ 678c2ecf20Sopenharmony_ci *res = __READ_ONCE(*state); 688c2ecf20Sopenharmony_ci rmb(); /* Hypervisor might update data. */ 698c2ecf20Sopenharmony_ci } while (get64(&state->state_entry_time) != state_time || 708c2ecf20Sopenharmony_ci (state_time & XEN_RUNSTATE_UPDATE)); 718c2ecf20Sopenharmony_ci} 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_cistatic void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res, 748c2ecf20Sopenharmony_ci unsigned int cpu) 758c2ecf20Sopenharmony_ci{ 768c2ecf20Sopenharmony_ci int i; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci xen_get_runstate_snapshot_cpu_delta(res, cpu); 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci for (i = 0; i < 4; i++) 818c2ecf20Sopenharmony_ci res->time[i] += per_cpu(old_runstate_time, cpu)[i]; 828c2ecf20Sopenharmony_ci} 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_civoid xen_manage_runstate_time(int action) 858c2ecf20Sopenharmony_ci{ 868c2ecf20Sopenharmony_ci static struct vcpu_runstate_info *runstate_delta; 878c2ecf20Sopenharmony_ci struct vcpu_runstate_info state; 888c2ecf20Sopenharmony_ci int cpu, i; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci switch (action) { 918c2ecf20Sopenharmony_ci case -1: /* backup runstate time before suspend */ 928c2ecf20Sopenharmony_ci if (unlikely(runstate_delta)) 938c2ecf20Sopenharmony_ci pr_warn_once("%s: memory leak as runstate_delta is not NULL\n", 948c2ecf20Sopenharmony_ci __func__); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci runstate_delta = kmalloc_array(num_possible_cpus(), 978c2ecf20Sopenharmony_ci sizeof(*runstate_delta), 988c2ecf20Sopenharmony_ci GFP_ATOMIC); 998c2ecf20Sopenharmony_ci if (unlikely(!runstate_delta)) { 1008c2ecf20Sopenharmony_ci pr_warn("%s: failed to allocate runstate_delta\n", 1018c2ecf20Sopenharmony_ci __func__); 1028c2ecf20Sopenharmony_ci return; 1038c2ecf20Sopenharmony_ci } 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 1068c2ecf20Sopenharmony_ci xen_get_runstate_snapshot_cpu_delta(&state, cpu); 1078c2ecf20Sopenharmony_ci memcpy(runstate_delta[cpu].time, state.time, 1088c2ecf20Sopenharmony_ci sizeof(runstate_delta[cpu].time)); 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci break; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci case 0: /* backup runstate time after resume */ 1148c2ecf20Sopenharmony_ci if (unlikely(!runstate_delta)) { 1158c2ecf20Sopenharmony_ci pr_warn("%s: cannot accumulate runstate time as runstate_delta is NULL\n", 1168c2ecf20Sopenharmony_ci __func__); 1178c2ecf20Sopenharmony_ci return; 1188c2ecf20Sopenharmony_ci } 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 1218c2ecf20Sopenharmony_ci for (i = 0; i < 4; i++) 1228c2ecf20Sopenharmony_ci per_cpu(old_runstate_time, cpu)[i] += 1238c2ecf20Sopenharmony_ci runstate_delta[cpu].time[i]; 1248c2ecf20Sopenharmony_ci } 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci break; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci default: /* do not accumulate runstate time for checkpointing */ 1298c2ecf20Sopenharmony_ci break; 1308c2ecf20Sopenharmony_ci } 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci if (action != -1 && runstate_delta) { 1338c2ecf20Sopenharmony_ci kfree(runstate_delta); 1348c2ecf20Sopenharmony_ci runstate_delta = NULL; 1358c2ecf20Sopenharmony_ci } 1368c2ecf20Sopenharmony_ci} 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci/* 1398c2ecf20Sopenharmony_ci * Runstate accounting 1408c2ecf20Sopenharmony_ci */ 1418c2ecf20Sopenharmony_civoid xen_get_runstate_snapshot(struct vcpu_runstate_info *res) 1428c2ecf20Sopenharmony_ci{ 1438c2ecf20Sopenharmony_ci xen_get_runstate_snapshot_cpu(res, smp_processor_id()); 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci/* return true when a vcpu could run but has no real cpu to run on */ 1478c2ecf20Sopenharmony_cibool xen_vcpu_stolen(int vcpu) 1488c2ecf20Sopenharmony_ci{ 1498c2ecf20Sopenharmony_ci return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; 1508c2ecf20Sopenharmony_ci} 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ciu64 xen_steal_clock(int cpu) 1538c2ecf20Sopenharmony_ci{ 1548c2ecf20Sopenharmony_ci struct vcpu_runstate_info state; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci xen_get_runstate_snapshot_cpu(&state, cpu); 1578c2ecf20Sopenharmony_ci return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_civoid xen_setup_runstate_info(int cpu) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci struct vcpu_register_runstate_memory_area area; 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci area.addr.v = &per_cpu(xen_runstate, cpu); 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, 1678c2ecf20Sopenharmony_ci xen_vcpu_nr(cpu), &area)) 1688c2ecf20Sopenharmony_ci BUG(); 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_civoid __init xen_time_setup_guest(void) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci bool xen_runstate_remote; 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, 1768c2ecf20Sopenharmony_ci VMASST_TYPE_runstate_update_flag); 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci pv_ops.time.steal_clock = xen_steal_clock; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci static_key_slow_inc(¶virt_steal_enabled); 1818c2ecf20Sopenharmony_ci if (xen_runstate_remote) 1828c2ecf20Sopenharmony_ci static_key_slow_inc(¶virt_steal_rq_enabled); 1838c2ecf20Sopenharmony_ci} 184