162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Xen time implementation. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This is implemented in terms of a clocksource driver which uses 662306a36Sopenharmony_ci * the hypervisor clock as a nanosecond timebase, and a clockevent 762306a36Sopenharmony_ci * driver which uses the hypervisor's timer mechanism. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci#include <linux/kernel.h> 1262306a36Sopenharmony_ci#include <linux/interrupt.h> 1362306a36Sopenharmony_ci#include <linux/clocksource.h> 1462306a36Sopenharmony_ci#include <linux/clockchips.h> 1562306a36Sopenharmony_ci#include <linux/gfp.h> 1662306a36Sopenharmony_ci#include <linux/slab.h> 1762306a36Sopenharmony_ci#include <linux/pvclock_gtod.h> 1862306a36Sopenharmony_ci#include <linux/timekeeper_internal.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <asm/pvclock.h> 2162306a36Sopenharmony_ci#include <asm/xen/hypervisor.h> 2262306a36Sopenharmony_ci#include <asm/xen/hypercall.h> 2362306a36Sopenharmony_ci#include <asm/xen/cpuid.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include <xen/events.h> 2662306a36Sopenharmony_ci#include <xen/features.h> 2762306a36Sopenharmony_ci#include <xen/interface/xen.h> 2862306a36Sopenharmony_ci#include <xen/interface/vcpu.h> 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#include "xen-ops.h" 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* Minimum amount of time until next clock event fires */ 3362306a36Sopenharmony_ci#define TIMER_SLOP 100000 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cistatic u64 xen_sched_clock_offset __read_mostly; 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* Get the TSC speed from Xen */ 3862306a36Sopenharmony_cistatic unsigned long xen_tsc_khz(void) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci struct pvclock_vcpu_time_info *info = 4162306a36Sopenharmony_ci &HYPERVISOR_shared_info->vcpu_info[0].time; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 4462306a36Sopenharmony_ci return pvclock_tsc_khz(info); 4562306a36Sopenharmony_ci} 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic u64 xen_clocksource_read(void) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci struct pvclock_vcpu_time_info *src; 5062306a36Sopenharmony_ci u64 ret; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci preempt_disable_notrace(); 5362306a36Sopenharmony_ci src = &__this_cpu_read(xen_vcpu)->time; 5462306a36Sopenharmony_ci ret = pvclock_clocksource_read(src); 5562306a36Sopenharmony_ci preempt_enable_notrace(); 5662306a36Sopenharmony_ci return ret; 5762306a36Sopenharmony_ci} 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_cistatic u64 xen_clocksource_get_cycles(struct clocksource *cs) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci return xen_clocksource_read(); 6262306a36Sopenharmony_ci} 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_cistatic noinstr u64 xen_sched_clock(void) 6562306a36Sopenharmony_ci{ 6662306a36Sopenharmony_ci struct pvclock_vcpu_time_info *src; 6762306a36Sopenharmony_ci u64 ret; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci src = &__this_cpu_read(xen_vcpu)->time; 7062306a36Sopenharmony_ci ret = pvclock_clocksource_read_nowd(src); 7162306a36Sopenharmony_ci ret -= xen_sched_clock_offset; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci return ret; 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_cistatic void xen_read_wallclock(struct timespec64 *ts) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci struct shared_info *s = HYPERVISOR_shared_info; 7962306a36Sopenharmony_ci struct pvclock_wall_clock *wall_clock = &(s->wc); 8062306a36Sopenharmony_ci struct pvclock_vcpu_time_info *vcpu_time; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci vcpu_time = &get_cpu_var(xen_vcpu)->time; 8362306a36Sopenharmony_ci pvclock_read_wallclock(wall_clock, vcpu_time, ts); 8462306a36Sopenharmony_ci put_cpu_var(xen_vcpu); 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic void xen_get_wallclock(struct timespec64 *now) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci xen_read_wallclock(now); 9062306a36Sopenharmony_ci} 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_cistatic int xen_set_wallclock(const struct timespec64 *now) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci return -ENODEV; 9562306a36Sopenharmony_ci} 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_cistatic int xen_pvclock_gtod_notify(struct notifier_block *nb, 9862306a36Sopenharmony_ci unsigned long was_set, void *priv) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci /* Protected by the calling core code serialization */ 10162306a36Sopenharmony_ci static struct timespec64 next_sync; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci struct xen_platform_op op; 10462306a36Sopenharmony_ci struct timespec64 now; 10562306a36Sopenharmony_ci struct timekeeper *tk = priv; 10662306a36Sopenharmony_ci static bool settime64_supported = true; 10762306a36Sopenharmony_ci int ret; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci now.tv_sec = tk->xtime_sec; 11062306a36Sopenharmony_ci now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* 11362306a36Sopenharmony_ci * We only take the expensive HV call when the clock was set 11462306a36Sopenharmony_ci * or when the 11 minutes RTC synchronization time elapsed. 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_ci if (!was_set && timespec64_compare(&now, &next_sync) < 0) 11762306a36Sopenharmony_ci return NOTIFY_OK; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ciagain: 12062306a36Sopenharmony_ci if (settime64_supported) { 12162306a36Sopenharmony_ci op.cmd = XENPF_settime64; 12262306a36Sopenharmony_ci op.u.settime64.mbz = 0; 12362306a36Sopenharmony_ci op.u.settime64.secs = now.tv_sec; 12462306a36Sopenharmony_ci op.u.settime64.nsecs = now.tv_nsec; 12562306a36Sopenharmony_ci op.u.settime64.system_time = xen_clocksource_read(); 12662306a36Sopenharmony_ci } else { 12762306a36Sopenharmony_ci op.cmd = XENPF_settime32; 12862306a36Sopenharmony_ci op.u.settime32.secs = now.tv_sec; 12962306a36Sopenharmony_ci op.u.settime32.nsecs = now.tv_nsec; 13062306a36Sopenharmony_ci op.u.settime32.system_time = xen_clocksource_read(); 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci ret = HYPERVISOR_platform_op(&op); 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci if (ret == -ENOSYS && settime64_supported) { 13662306a36Sopenharmony_ci settime64_supported = false; 13762306a36Sopenharmony_ci goto again; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci if (ret < 0) 14062306a36Sopenharmony_ci return NOTIFY_BAD; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci /* 14362306a36Sopenharmony_ci * Move the next drift compensation time 11 minutes 14462306a36Sopenharmony_ci * ahead. That's emulating the sync_cmos_clock() update for 14562306a36Sopenharmony_ci * the hardware RTC. 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_ci next_sync = now; 14862306a36Sopenharmony_ci next_sync.tv_sec += 11 * 60; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci return NOTIFY_OK; 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_cistatic struct notifier_block xen_pvclock_gtod_notifier = { 15462306a36Sopenharmony_ci .notifier_call = xen_pvclock_gtod_notify, 15562306a36Sopenharmony_ci}; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistatic int xen_cs_enable(struct clocksource *cs) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK); 16062306a36Sopenharmony_ci return 0; 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cistatic struct clocksource xen_clocksource __read_mostly = { 16462306a36Sopenharmony_ci .name = "xen", 16562306a36Sopenharmony_ci .rating = 400, 16662306a36Sopenharmony_ci .read = xen_clocksource_get_cycles, 16762306a36Sopenharmony_ci .mask = CLOCKSOURCE_MASK(64), 16862306a36Sopenharmony_ci .flags = CLOCK_SOURCE_IS_CONTINUOUS, 16962306a36Sopenharmony_ci .enable = xen_cs_enable, 17062306a36Sopenharmony_ci}; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci/* 17362306a36Sopenharmony_ci Xen clockevent implementation 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci Xen has two clockevent implementations: 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci The old timer_op one works with all released versions of Xen prior 17862306a36Sopenharmony_ci to version 3.0.4. This version of the hypervisor provides a 17962306a36Sopenharmony_ci single-shot timer with nanosecond resolution. However, sharing the 18062306a36Sopenharmony_ci same event channel is a 100Hz tick which is delivered while the 18162306a36Sopenharmony_ci vcpu is running. We don't care about or use this tick, but it will 18262306a36Sopenharmony_ci cause the core time code to think the timer fired too soon, and 18362306a36Sopenharmony_ci will end up resetting it each time. It could be filtered, but 18462306a36Sopenharmony_ci doing so has complications when the ktime clocksource is not yet 18562306a36Sopenharmony_ci the xen clocksource (ie, at boot time). 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci The new vcpu_op-based timer interface allows the tick timer period 18862306a36Sopenharmony_ci to be changed or turned off. The tick timer is not useful as a 18962306a36Sopenharmony_ci periodic timer because events are only delivered to running vcpus. 19062306a36Sopenharmony_ci The one-shot timer can report when a timeout is in the past, so 19162306a36Sopenharmony_ci set_next_event is capable of returning -ETIME when appropriate. 19262306a36Sopenharmony_ci This interface is used when available. 19362306a36Sopenharmony_ci*/ 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci/* 19762306a36Sopenharmony_ci Get a hypervisor absolute time. In theory we could maintain an 19862306a36Sopenharmony_ci offset between the kernel's time and the hypervisor's time, and 19962306a36Sopenharmony_ci apply that to a kernel's absolute timeout. Unfortunately the 20062306a36Sopenharmony_ci hypervisor and kernel times can drift even if the kernel is using 20162306a36Sopenharmony_ci the Xen clocksource, because ntp can warp the kernel's clocksource. 20262306a36Sopenharmony_ci*/ 20362306a36Sopenharmony_cistatic s64 get_abs_timeout(unsigned long delta) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci return xen_clocksource_read() + delta; 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cistatic int xen_timerop_shutdown(struct clock_event_device *evt) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci /* cancel timeout */ 21162306a36Sopenharmony_ci HYPERVISOR_set_timer_op(0); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci return 0; 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_cistatic int xen_timerop_set_next_event(unsigned long delta, 21762306a36Sopenharmony_ci struct clock_event_device *evt) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci WARN_ON(!clockevent_state_oneshot(evt)); 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) 22262306a36Sopenharmony_ci BUG(); 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* We may have missed the deadline, but there's no real way of 22562306a36Sopenharmony_ci knowing for sure. If the event was in the past, then we'll 22662306a36Sopenharmony_ci get an immediate interrupt. */ 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci return 0; 22962306a36Sopenharmony_ci} 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_cistatic struct clock_event_device xen_timerop_clockevent __ro_after_init = { 23262306a36Sopenharmony_ci .name = "xen", 23362306a36Sopenharmony_ci .features = CLOCK_EVT_FEAT_ONESHOT, 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci .max_delta_ns = 0xffffffff, 23662306a36Sopenharmony_ci .max_delta_ticks = 0xffffffff, 23762306a36Sopenharmony_ci .min_delta_ns = TIMER_SLOP, 23862306a36Sopenharmony_ci .min_delta_ticks = TIMER_SLOP, 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci .mult = 1, 24162306a36Sopenharmony_ci .shift = 0, 24262306a36Sopenharmony_ci .rating = 500, 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci .set_state_shutdown = xen_timerop_shutdown, 24562306a36Sopenharmony_ci .set_next_event = xen_timerop_set_next_event, 24662306a36Sopenharmony_ci}; 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_cistatic int xen_vcpuop_shutdown(struct clock_event_device *evt) 24962306a36Sopenharmony_ci{ 25062306a36Sopenharmony_ci int cpu = smp_processor_id(); 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu), 25362306a36Sopenharmony_ci NULL) || 25462306a36Sopenharmony_ci HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), 25562306a36Sopenharmony_ci NULL)) 25662306a36Sopenharmony_ci BUG(); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci return 0; 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_cistatic int xen_vcpuop_set_oneshot(struct clock_event_device *evt) 26262306a36Sopenharmony_ci{ 26362306a36Sopenharmony_ci int cpu = smp_processor_id(); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), 26662306a36Sopenharmony_ci NULL)) 26762306a36Sopenharmony_ci BUG(); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci return 0; 27062306a36Sopenharmony_ci} 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_cistatic int xen_vcpuop_set_next_event(unsigned long delta, 27362306a36Sopenharmony_ci struct clock_event_device *evt) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci int cpu = smp_processor_id(); 27662306a36Sopenharmony_ci struct vcpu_set_singleshot_timer single; 27762306a36Sopenharmony_ci int ret; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci WARN_ON(!clockevent_state_oneshot(evt)); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci single.timeout_abs_ns = get_abs_timeout(delta); 28262306a36Sopenharmony_ci /* Get an event anyway, even if the timeout is already expired */ 28362306a36Sopenharmony_ci single.flags = 0; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu), 28662306a36Sopenharmony_ci &single); 28762306a36Sopenharmony_ci BUG_ON(ret != 0); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci return ret; 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_cistatic struct clock_event_device xen_vcpuop_clockevent __ro_after_init = { 29362306a36Sopenharmony_ci .name = "xen", 29462306a36Sopenharmony_ci .features = CLOCK_EVT_FEAT_ONESHOT, 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci .max_delta_ns = 0xffffffff, 29762306a36Sopenharmony_ci .max_delta_ticks = 0xffffffff, 29862306a36Sopenharmony_ci .min_delta_ns = TIMER_SLOP, 29962306a36Sopenharmony_ci .min_delta_ticks = TIMER_SLOP, 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci .mult = 1, 30262306a36Sopenharmony_ci .shift = 0, 30362306a36Sopenharmony_ci .rating = 500, 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci .set_state_shutdown = xen_vcpuop_shutdown, 30662306a36Sopenharmony_ci .set_state_oneshot = xen_vcpuop_set_oneshot, 30762306a36Sopenharmony_ci .set_next_event = xen_vcpuop_set_next_event, 30862306a36Sopenharmony_ci}; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_cistatic const struct clock_event_device *xen_clockevent = 31162306a36Sopenharmony_ci &xen_timerop_clockevent; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_cistruct xen_clock_event_device { 31462306a36Sopenharmony_ci struct clock_event_device evt; 31562306a36Sopenharmony_ci char name[16]; 31662306a36Sopenharmony_ci}; 31762306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_cistatic irqreturn_t xen_timer_interrupt(int irq, void *dev_id) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt); 32262306a36Sopenharmony_ci irqreturn_t ret; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci ret = IRQ_NONE; 32562306a36Sopenharmony_ci if (evt->event_handler) { 32662306a36Sopenharmony_ci evt->event_handler(evt); 32762306a36Sopenharmony_ci ret = IRQ_HANDLED; 32862306a36Sopenharmony_ci } 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci return ret; 33162306a36Sopenharmony_ci} 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_civoid xen_teardown_timer(int cpu) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci struct clock_event_device *evt; 33662306a36Sopenharmony_ci evt = &per_cpu(xen_clock_events, cpu).evt; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci if (evt->irq >= 0) { 33962306a36Sopenharmony_ci unbind_from_irqhandler(evt->irq, NULL); 34062306a36Sopenharmony_ci evt->irq = -1; 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_civoid xen_setup_timer(int cpu) 34562306a36Sopenharmony_ci{ 34662306a36Sopenharmony_ci struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu); 34762306a36Sopenharmony_ci struct clock_event_device *evt = &xevt->evt; 34862306a36Sopenharmony_ci int irq; 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); 35162306a36Sopenharmony_ci if (evt->irq >= 0) 35262306a36Sopenharmony_ci xen_teardown_timer(cpu); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu); 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 35962306a36Sopenharmony_ci IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 36062306a36Sopenharmony_ci IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, 36162306a36Sopenharmony_ci xevt->name, NULL); 36262306a36Sopenharmony_ci (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci memcpy(evt, xen_clockevent, sizeof(*evt)); 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci evt->cpumask = cpumask_of(cpu); 36762306a36Sopenharmony_ci evt->irq = irq; 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_civoid xen_setup_cpu_clockevents(void) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_civoid xen_timer_resume(void) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci int cpu; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (xen_clockevent != &xen_vcpuop_clockevent) 38162306a36Sopenharmony_ci return; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci for_each_online_cpu(cpu) { 38462306a36Sopenharmony_ci if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 38562306a36Sopenharmony_ci xen_vcpu_nr(cpu), NULL)) 38662306a36Sopenharmony_ci BUG(); 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic struct pvclock_vsyscall_time_info *xen_clock __read_mostly; 39162306a36Sopenharmony_cistatic u64 xen_clock_value_saved; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_civoid xen_save_time_memory_area(void) 39462306a36Sopenharmony_ci{ 39562306a36Sopenharmony_ci struct vcpu_register_time_memory_area t; 39662306a36Sopenharmony_ci int ret; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci if (!xen_clock) 40162306a36Sopenharmony_ci return; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci t.addr.v = NULL; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); 40662306a36Sopenharmony_ci if (ret != 0) 40762306a36Sopenharmony_ci pr_notice("Cannot save secondary vcpu_time_info (err %d)", 40862306a36Sopenharmony_ci ret); 40962306a36Sopenharmony_ci else 41062306a36Sopenharmony_ci clear_page(xen_clock); 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_civoid xen_restore_time_memory_area(void) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci struct vcpu_register_time_memory_area t; 41662306a36Sopenharmony_ci int ret; 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci if (!xen_clock) 41962306a36Sopenharmony_ci goto out; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci t.addr.v = &xen_clock->pvti; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* 42662306a36Sopenharmony_ci * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to 42762306a36Sopenharmony_ci * register the secondary time info with Xen or if we migrated to a 42862306a36Sopenharmony_ci * host without the necessary flags. On both of these cases what 42962306a36Sopenharmony_ci * happens is either process seeing a zeroed out pvti or seeing no 43062306a36Sopenharmony_ci * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and 43162306a36Sopenharmony_ci * if 0, it discards the data in pvti and fallbacks to a system 43262306a36Sopenharmony_ci * call for a reliable timestamp. 43362306a36Sopenharmony_ci */ 43462306a36Sopenharmony_ci if (ret != 0) 43562306a36Sopenharmony_ci pr_notice("Cannot restore secondary vcpu_time_info (err %d)", 43662306a36Sopenharmony_ci ret); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ciout: 43962306a36Sopenharmony_ci /* Need pvclock_resume() before using xen_clocksource_read(). */ 44062306a36Sopenharmony_ci pvclock_resume(); 44162306a36Sopenharmony_ci xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved; 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_cistatic void xen_setup_vsyscall_time_info(void) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci struct vcpu_register_time_memory_area t; 44762306a36Sopenharmony_ci struct pvclock_vsyscall_time_info *ti; 44862306a36Sopenharmony_ci int ret; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL); 45162306a36Sopenharmony_ci if (!ti) 45262306a36Sopenharmony_ci return; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci t.addr.v = &ti->pvti; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t); 45762306a36Sopenharmony_ci if (ret) { 45862306a36Sopenharmony_ci pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret); 45962306a36Sopenharmony_ci free_page((unsigned long)ti); 46062306a36Sopenharmony_ci return; 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* 46462306a36Sopenharmony_ci * If primary time info had this bit set, secondary should too since 46562306a36Sopenharmony_ci * it's the same data on both just different memory regions. But we 46662306a36Sopenharmony_ci * still check it in case hypervisor is buggy. 46762306a36Sopenharmony_ci */ 46862306a36Sopenharmony_ci if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) { 46962306a36Sopenharmony_ci t.addr.v = NULL; 47062306a36Sopenharmony_ci ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 47162306a36Sopenharmony_ci 0, &t); 47262306a36Sopenharmony_ci if (!ret) 47362306a36Sopenharmony_ci free_page((unsigned long)ti); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n"); 47662306a36Sopenharmony_ci return; 47762306a36Sopenharmony_ci } 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci xen_clock = ti; 48062306a36Sopenharmony_ci pvclock_set_pvti_cpu0_va(xen_clock); 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; 48362306a36Sopenharmony_ci} 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci/* 48662306a36Sopenharmony_ci * Check if it is possible to safely use the tsc as a clocksource. This is 48762306a36Sopenharmony_ci * only true if the hypervisor notifies the guest that its tsc is invariant, 48862306a36Sopenharmony_ci * the tsc is stable, and the tsc instruction will never be emulated. 48962306a36Sopenharmony_ci */ 49062306a36Sopenharmony_cistatic int __init xen_tsc_safe_clocksource(void) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci u32 eax, ebx, ecx, edx; 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC))) 49562306a36Sopenharmony_ci return 0; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC))) 49862306a36Sopenharmony_ci return 0; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci if (check_tsc_unstable()) 50162306a36Sopenharmony_ci return 0; 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* Leaf 4, sub-leaf 0 (0x40000x03) */ 50462306a36Sopenharmony_ci cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx); 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE; 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_cistatic void __init xen_time_init(void) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci struct pvclock_vcpu_time_info *pvti; 51262306a36Sopenharmony_ci int cpu = smp_processor_id(); 51362306a36Sopenharmony_ci struct timespec64 tp; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci /* 51662306a36Sopenharmony_ci * As Dom0 is never moved, no penalty on using TSC there. 51762306a36Sopenharmony_ci * 51862306a36Sopenharmony_ci * If it is possible for the guest to determine that the tsc is a safe 51962306a36Sopenharmony_ci * clocksource, then set xen_clocksource rating below that of the tsc 52062306a36Sopenharmony_ci * so that the system prefers tsc instead. 52162306a36Sopenharmony_ci */ 52262306a36Sopenharmony_ci if (xen_initial_domain()) 52362306a36Sopenharmony_ci xen_clocksource.rating = 275; 52462306a36Sopenharmony_ci else if (xen_tsc_safe_clocksource()) 52562306a36Sopenharmony_ci xen_clocksource.rating = 299; 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), 53062306a36Sopenharmony_ci NULL) == 0) { 53162306a36Sopenharmony_ci /* Successfully turned off 100Hz tick, so we have the 53262306a36Sopenharmony_ci vcpuop-based timer interface */ 53362306a36Sopenharmony_ci printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); 53462306a36Sopenharmony_ci xen_clockevent = &xen_vcpuop_clockevent; 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci /* Set initial system time with full resolution */ 53862306a36Sopenharmony_ci xen_read_wallclock(&tp); 53962306a36Sopenharmony_ci do_settimeofday64(&tp); 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci setup_force_cpu_cap(X86_FEATURE_TSC); 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci /* 54462306a36Sopenharmony_ci * We check ahead on the primary time info if this 54562306a36Sopenharmony_ci * bit is supported hence speeding up Xen clocksource. 54662306a36Sopenharmony_ci */ 54762306a36Sopenharmony_ci pvti = &__this_cpu_read(xen_vcpu)->time; 54862306a36Sopenharmony_ci if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) { 54962306a36Sopenharmony_ci pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); 55062306a36Sopenharmony_ci xen_setup_vsyscall_time_info(); 55162306a36Sopenharmony_ci } 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci xen_setup_runstate_info(cpu); 55462306a36Sopenharmony_ci xen_setup_timer(cpu); 55562306a36Sopenharmony_ci xen_setup_cpu_clockevents(); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci xen_time_setup_guest(); 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci if (xen_initial_domain()) 56062306a36Sopenharmony_ci pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); 56162306a36Sopenharmony_ci} 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_cistatic void __init xen_init_time_common(void) 56462306a36Sopenharmony_ci{ 56562306a36Sopenharmony_ci xen_sched_clock_offset = xen_clocksource_read(); 56662306a36Sopenharmony_ci static_call_update(pv_steal_clock, xen_steal_clock); 56762306a36Sopenharmony_ci paravirt_set_sched_clock(xen_sched_clock); 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci x86_platform.calibrate_tsc = xen_tsc_khz; 57062306a36Sopenharmony_ci x86_platform.get_wallclock = xen_get_wallclock; 57162306a36Sopenharmony_ci} 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_civoid __init xen_init_time_ops(void) 57462306a36Sopenharmony_ci{ 57562306a36Sopenharmony_ci xen_init_time_common(); 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci x86_init.timers.timer_init = xen_time_init; 57862306a36Sopenharmony_ci x86_init.timers.setup_percpu_clockev = x86_init_noop; 57962306a36Sopenharmony_ci x86_cpuinit.setup_percpu_clockev = x86_init_noop; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci /* Dom0 uses the native method to set the hardware RTC. */ 58262306a36Sopenharmony_ci if (!xen_initial_domain()) 58362306a36Sopenharmony_ci x86_platform.set_wallclock = xen_set_wallclock; 58462306a36Sopenharmony_ci} 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci#ifdef CONFIG_XEN_PVHVM 58762306a36Sopenharmony_cistatic void xen_hvm_setup_cpu_clockevents(void) 58862306a36Sopenharmony_ci{ 58962306a36Sopenharmony_ci int cpu = smp_processor_id(); 59062306a36Sopenharmony_ci xen_setup_runstate_info(cpu); 59162306a36Sopenharmony_ci /* 59262306a36Sopenharmony_ci * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence 59362306a36Sopenharmony_ci * doing it xen_hvm_cpu_notify (which gets called by smp_init during 59462306a36Sopenharmony_ci * early bootup and also during CPU hotplug events). 59562306a36Sopenharmony_ci */ 59662306a36Sopenharmony_ci xen_setup_cpu_clockevents(); 59762306a36Sopenharmony_ci} 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_civoid __init xen_hvm_init_time_ops(void) 60062306a36Sopenharmony_ci{ 60162306a36Sopenharmony_ci static bool hvm_time_initialized; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci if (hvm_time_initialized) 60462306a36Sopenharmony_ci return; 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci /* 60762306a36Sopenharmony_ci * vector callback is needed otherwise we cannot receive interrupts 60862306a36Sopenharmony_ci * on cpu > 0 and at this point we don't know how many cpus are 60962306a36Sopenharmony_ci * available. 61062306a36Sopenharmony_ci */ 61162306a36Sopenharmony_ci if (!xen_have_vector_callback) 61262306a36Sopenharmony_ci return; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { 61562306a36Sopenharmony_ci pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); 61662306a36Sopenharmony_ci return; 61762306a36Sopenharmony_ci } 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci /* 62062306a36Sopenharmony_ci * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. 62162306a36Sopenharmony_ci * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest 62262306a36Sopenharmony_ci * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access 62362306a36Sopenharmony_ci * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. 62462306a36Sopenharmony_ci * 62562306a36Sopenharmony_ci * The xen_hvm_init_time_ops() should be called again later after 62662306a36Sopenharmony_ci * __this_cpu_read(xen_vcpu) is available. 62762306a36Sopenharmony_ci */ 62862306a36Sopenharmony_ci if (!__this_cpu_read(xen_vcpu)) { 62962306a36Sopenharmony_ci pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", 63062306a36Sopenharmony_ci xen_vcpu_nr(0)); 63162306a36Sopenharmony_ci return; 63262306a36Sopenharmony_ci } 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci xen_init_time_common(); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci x86_init.timers.setup_percpu_clockev = xen_time_init; 63762306a36Sopenharmony_ci x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci x86_platform.set_wallclock = xen_set_wallclock; 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci hvm_time_initialized = true; 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci#endif 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci/* Kernel parameter to specify Xen timer slop */ 64662306a36Sopenharmony_cistatic int __init parse_xen_timer_slop(char *ptr) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci unsigned long slop = memparse(ptr, NULL); 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci xen_timerop_clockevent.min_delta_ns = slop; 65162306a36Sopenharmony_ci xen_timerop_clockevent.min_delta_ticks = slop; 65262306a36Sopenharmony_ci xen_vcpuop_clockevent.min_delta_ns = slop; 65362306a36Sopenharmony_ci xen_vcpuop_clockevent.min_delta_ticks = slop; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci return 0; 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ciearly_param("xen_timer_slop", parse_xen_timer_slop); 658