1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 ARM Ltd.
4 * Author: Marc Zyngier <marc.zyngier@arm.com>
5 */
6
7#include <linux/cpu.h>
8#include <linux/kvm.h>
9#include <linux/kvm_host.h>
10#include <linux/interrupt.h>
11#include <linux/irq.h>
12#include <linux/irqdomain.h>
13#include <linux/uaccess.h>
14
15#include <clocksource/arm_arch_timer.h>
16#include <asm/arch_timer.h>
17#include <asm/kvm_emulate.h>
18#include <asm/kvm_hyp.h>
19#include <asm/kvm_nested.h>
20
21#include <kvm/arm_vgic.h>
22#include <kvm/arm_arch_timer.h>
23
24#include "trace.h"
25
26static struct timecounter *timecounter;
27static unsigned int host_vtimer_irq;
28static unsigned int host_ptimer_irq;
29static u32 host_vtimer_irq_flags;
30static u32 host_ptimer_irq_flags;
31
32static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33
34static const u8 default_ppi[] = {
35	[TIMER_PTIMER]  = 30,
36	[TIMER_VTIMER]  = 27,
37	[TIMER_HPTIMER] = 26,
38	[TIMER_HVTIMER] = 28,
39};
40
41static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
42static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
43				 struct arch_timer_context *timer_ctx);
44static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
45static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
46				struct arch_timer_context *timer,
47				enum kvm_arch_timer_regs treg,
48				u64 val);
49static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
50			      struct arch_timer_context *timer,
51			      enum kvm_arch_timer_regs treg);
52static bool kvm_arch_timer_get_input_level(int vintid);
53
54static struct irq_ops arch_timer_irq_ops = {
55	.get_input_level = kvm_arch_timer_get_input_level,
56};
57
58static int nr_timers(struct kvm_vcpu *vcpu)
59{
60	if (!vcpu_has_nv(vcpu))
61		return NR_KVM_EL0_TIMERS;
62
63	return NR_KVM_TIMERS;
64}
65
66u32 timer_get_ctl(struct arch_timer_context *ctxt)
67{
68	struct kvm_vcpu *vcpu = ctxt->vcpu;
69
70	switch(arch_timer_ctx_index(ctxt)) {
71	case TIMER_VTIMER:
72		return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
73	case TIMER_PTIMER:
74		return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
75	case TIMER_HVTIMER:
76		return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
77	case TIMER_HPTIMER:
78		return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
79	default:
80		WARN_ON(1);
81		return 0;
82	}
83}
84
85u64 timer_get_cval(struct arch_timer_context *ctxt)
86{
87	struct kvm_vcpu *vcpu = ctxt->vcpu;
88
89	switch(arch_timer_ctx_index(ctxt)) {
90	case TIMER_VTIMER:
91		return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
92	case TIMER_PTIMER:
93		return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
94	case TIMER_HVTIMER:
95		return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
96	case TIMER_HPTIMER:
97		return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
98	default:
99		WARN_ON(1);
100		return 0;
101	}
102}
103
104static u64 timer_get_offset(struct arch_timer_context *ctxt)
105{
106	u64 offset = 0;
107
108	if (!ctxt)
109		return 0;
110
111	if (ctxt->offset.vm_offset)
112		offset += *ctxt->offset.vm_offset;
113	if (ctxt->offset.vcpu_offset)
114		offset += *ctxt->offset.vcpu_offset;
115
116	return offset;
117}
118
119static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
120{
121	struct kvm_vcpu *vcpu = ctxt->vcpu;
122
123	switch(arch_timer_ctx_index(ctxt)) {
124	case TIMER_VTIMER:
125		__vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
126		break;
127	case TIMER_PTIMER:
128		__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
129		break;
130	case TIMER_HVTIMER:
131		__vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
132		break;
133	case TIMER_HPTIMER:
134		__vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
135		break;
136	default:
137		WARN_ON(1);
138	}
139}
140
141static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
142{
143	struct kvm_vcpu *vcpu = ctxt->vcpu;
144
145	switch(arch_timer_ctx_index(ctxt)) {
146	case TIMER_VTIMER:
147		__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
148		break;
149	case TIMER_PTIMER:
150		__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
151		break;
152	case TIMER_HVTIMER:
153		__vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
154		break;
155	case TIMER_HPTIMER:
156		__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
157		break;
158	default:
159		WARN_ON(1);
160	}
161}
162
163static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
164{
165	if (!ctxt->offset.vm_offset) {
166		WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
167		return;
168	}
169
170	WRITE_ONCE(*ctxt->offset.vm_offset, offset);
171}
172
173u64 kvm_phys_timer_read(void)
174{
175	return timecounter->cc->read(timecounter->cc);
176}
177
178void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
179{
180	if (vcpu_has_nv(vcpu)) {
181		if (is_hyp_ctxt(vcpu)) {
182			map->direct_vtimer = vcpu_hvtimer(vcpu);
183			map->direct_ptimer = vcpu_hptimer(vcpu);
184			map->emul_vtimer = vcpu_vtimer(vcpu);
185			map->emul_ptimer = vcpu_ptimer(vcpu);
186		} else {
187			map->direct_vtimer = vcpu_vtimer(vcpu);
188			map->direct_ptimer = vcpu_ptimer(vcpu);
189			map->emul_vtimer = vcpu_hvtimer(vcpu);
190			map->emul_ptimer = vcpu_hptimer(vcpu);
191		}
192	} else if (has_vhe()) {
193		map->direct_vtimer = vcpu_vtimer(vcpu);
194		map->direct_ptimer = vcpu_ptimer(vcpu);
195		map->emul_vtimer = NULL;
196		map->emul_ptimer = NULL;
197	} else {
198		map->direct_vtimer = vcpu_vtimer(vcpu);
199		map->direct_ptimer = NULL;
200		map->emul_vtimer = NULL;
201		map->emul_ptimer = vcpu_ptimer(vcpu);
202	}
203
204	trace_kvm_get_timer_map(vcpu->vcpu_id, map);
205}
206
207static inline bool userspace_irqchip(struct kvm *kvm)
208{
209	return static_branch_unlikely(&userspace_irqchip_in_use) &&
210		unlikely(!irqchip_in_kernel(kvm));
211}
212
213static void soft_timer_start(struct hrtimer *hrt, u64 ns)
214{
215	hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
216		      HRTIMER_MODE_ABS_HARD);
217}
218
219static void soft_timer_cancel(struct hrtimer *hrt)
220{
221	hrtimer_cancel(hrt);
222}
223
224static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
225{
226	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
227	struct arch_timer_context *ctx;
228	struct timer_map map;
229
230	/*
231	 * We may see a timer interrupt after vcpu_put() has been called which
232	 * sets the CPU's vcpu pointer to NULL, because even though the timer
233	 * has been disabled in timer_save_state(), the hardware interrupt
234	 * signal may not have been retired from the interrupt controller yet.
235	 */
236	if (!vcpu)
237		return IRQ_HANDLED;
238
239	get_timer_map(vcpu, &map);
240
241	if (irq == host_vtimer_irq)
242		ctx = map.direct_vtimer;
243	else
244		ctx = map.direct_ptimer;
245
246	if (kvm_timer_should_fire(ctx))
247		kvm_timer_update_irq(vcpu, true, ctx);
248
249	if (userspace_irqchip(vcpu->kvm) &&
250	    !static_branch_unlikely(&has_gic_active_state))
251		disable_percpu_irq(host_vtimer_irq);
252
253	return IRQ_HANDLED;
254}
255
256static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
257				     u64 val)
258{
259	u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
260
261	if (now < val) {
262		u64 ns;
263
264		ns = cyclecounter_cyc2ns(timecounter->cc,
265					 val - now,
266					 timecounter->mask,
267					 &timer_ctx->ns_frac);
268		return ns;
269	}
270
271	return 0;
272}
273
274static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
275{
276	return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
277}
278
279static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
280{
281	WARN_ON(timer_ctx && timer_ctx->loaded);
282	return timer_ctx &&
283		((timer_get_ctl(timer_ctx) &
284		  (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
285}
286
287static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
288{
289	return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
290		vcpu_get_flag(vcpu, IN_WFIT));
291}
292
293static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
294{
295	u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
296	struct arch_timer_context *ctx;
297
298	ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
299						       : vcpu_vtimer(vcpu);
300
301	return kvm_counter_compute_delta(ctx, val);
302}
303
304/*
305 * Returns the earliest expiration time in ns among guest timers.
306 * Note that it will return 0 if none of timers can fire.
307 */
308static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
309{
310	u64 min_delta = ULLONG_MAX;
311	int i;
312
313	for (i = 0; i < nr_timers(vcpu); i++) {
314		struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
315
316		WARN(ctx->loaded, "timer %d loaded\n", i);
317		if (kvm_timer_irq_can_fire(ctx))
318			min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
319	}
320
321	if (vcpu_has_wfit_active(vcpu))
322		min_delta = min(min_delta, wfit_delay_ns(vcpu));
323
324	/* If none of timers can fire, then return 0 */
325	if (min_delta == ULLONG_MAX)
326		return 0;
327
328	return min_delta;
329}
330
331static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
332{
333	struct arch_timer_cpu *timer;
334	struct kvm_vcpu *vcpu;
335	u64 ns;
336
337	timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
338	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
339
340	/*
341	 * Check that the timer has really expired from the guest's
342	 * PoV (NTP on the host may have forced it to expire
343	 * early). If we should have slept longer, restart it.
344	 */
345	ns = kvm_timer_earliest_exp(vcpu);
346	if (unlikely(ns)) {
347		hrtimer_forward_now(hrt, ns_to_ktime(ns));
348		return HRTIMER_RESTART;
349	}
350
351	kvm_vcpu_wake_up(vcpu);
352	return HRTIMER_NORESTART;
353}
354
355static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
356{
357	struct arch_timer_context *ctx;
358	struct kvm_vcpu *vcpu;
359	u64 ns;
360
361	ctx = container_of(hrt, struct arch_timer_context, hrtimer);
362	vcpu = ctx->vcpu;
363
364	trace_kvm_timer_hrtimer_expire(ctx);
365
366	/*
367	 * Check that the timer has really expired from the guest's
368	 * PoV (NTP on the host may have forced it to expire
369	 * early). If not ready, schedule for a later time.
370	 */
371	ns = kvm_timer_compute_delta(ctx);
372	if (unlikely(ns)) {
373		hrtimer_forward_now(hrt, ns_to_ktime(ns));
374		return HRTIMER_RESTART;
375	}
376
377	kvm_timer_update_irq(vcpu, true, ctx);
378	return HRTIMER_NORESTART;
379}
380
381static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
382{
383	enum kvm_arch_timers index;
384	u64 cval, now;
385
386	if (!timer_ctx)
387		return false;
388
389	index = arch_timer_ctx_index(timer_ctx);
390
391	if (timer_ctx->loaded) {
392		u32 cnt_ctl = 0;
393
394		switch (index) {
395		case TIMER_VTIMER:
396		case TIMER_HVTIMER:
397			cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
398			break;
399		case TIMER_PTIMER:
400		case TIMER_HPTIMER:
401			cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
402			break;
403		case NR_KVM_TIMERS:
404			/* GCC is braindead */
405			cnt_ctl = 0;
406			break;
407		}
408
409		return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
410		        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
411		       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
412	}
413
414	if (!kvm_timer_irq_can_fire(timer_ctx))
415		return false;
416
417	cval = timer_get_cval(timer_ctx);
418	now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
419
420	return cval <= now;
421}
422
423int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
424{
425	return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
426}
427
428/*
429 * Reflect the timer output level into the kvm_run structure
430 */
431void kvm_timer_update_run(struct kvm_vcpu *vcpu)
432{
433	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
434	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
435	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
436
437	/* Populate the device bitmap with the timer states */
438	regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
439				    KVM_ARM_DEV_EL1_PTIMER);
440	if (kvm_timer_should_fire(vtimer))
441		regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
442	if (kvm_timer_should_fire(ptimer))
443		regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
444}
445
446static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
447				 struct arch_timer_context *timer_ctx)
448{
449	int ret;
450
451	timer_ctx->irq.level = new_level;
452	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
453				   timer_ctx->irq.level);
454
455	if (!userspace_irqchip(vcpu->kvm)) {
456		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
457					  timer_irq(timer_ctx),
458					  timer_ctx->irq.level,
459					  timer_ctx);
460		WARN_ON(ret);
461	}
462}
463
464/* Only called for a fully emulated timer */
465static void timer_emulate(struct arch_timer_context *ctx)
466{
467	bool should_fire = kvm_timer_should_fire(ctx);
468
469	trace_kvm_timer_emulate(ctx, should_fire);
470
471	if (should_fire != ctx->irq.level) {
472		kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
473		return;
474	}
475
476	/*
477	 * If the timer can fire now, we don't need to have a soft timer
478	 * scheduled for the future.  If the timer cannot fire at all,
479	 * then we also don't need a soft timer.
480	 */
481	if (should_fire || !kvm_timer_irq_can_fire(ctx))
482		return;
483
484	soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
485}
486
487static void set_cntvoff(u64 cntvoff)
488{
489	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
490}
491
492static void set_cntpoff(u64 cntpoff)
493{
494	if (has_cntpoff())
495		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
496}
497
498static void timer_save_state(struct arch_timer_context *ctx)
499{
500	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
501	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
502	unsigned long flags;
503
504	if (!timer->enabled)
505		return;
506
507	local_irq_save(flags);
508
509	if (!ctx->loaded)
510		goto out;
511
512	switch (index) {
513		u64 cval;
514
515	case TIMER_VTIMER:
516	case TIMER_HVTIMER:
517		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
518		timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
519
520		/* Disable the timer */
521		write_sysreg_el0(0, SYS_CNTV_CTL);
522		isb();
523
524		/*
525		 * The kernel may decide to run userspace after
526		 * calling vcpu_put, so we reset cntvoff to 0 to
527		 * ensure a consistent read between user accesses to
528		 * the virtual counter and kernel access to the
529		 * physical counter of non-VHE case.
530		 *
531		 * For VHE, the virtual counter uses a fixed virtual
532		 * offset of zero, so no need to zero CNTVOFF_EL2
533		 * register, but this is actually useful when switching
534		 * between EL1/vEL2 with NV.
535		 *
536		 * Do it unconditionally, as this is either unavoidable
537		 * or dirt cheap.
538		 */
539		set_cntvoff(0);
540		break;
541	case TIMER_PTIMER:
542	case TIMER_HPTIMER:
543		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
544		cval = read_sysreg_el0(SYS_CNTP_CVAL);
545
546		cval -= timer_get_offset(ctx);
547
548		timer_set_cval(ctx, cval);
549
550		/* Disable the timer */
551		write_sysreg_el0(0, SYS_CNTP_CTL);
552		isb();
553
554		set_cntpoff(0);
555		break;
556	case NR_KVM_TIMERS:
557		BUG();
558	}
559
560	trace_kvm_timer_save_state(ctx);
561
562	ctx->loaded = false;
563out:
564	local_irq_restore(flags);
565}
566
567/*
568 * Schedule the background timer before calling kvm_vcpu_halt, so that this
569 * thread is removed from its waitqueue and made runnable when there's a timer
570 * interrupt to handle.
571 */
572static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
573{
574	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
575	struct timer_map map;
576
577	get_timer_map(vcpu, &map);
578
579	/*
580	 * If no timers are capable of raising interrupts (disabled or
581	 * masked), then there's no more work for us to do.
582	 */
583	if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
584	    !kvm_timer_irq_can_fire(map.direct_ptimer) &&
585	    !kvm_timer_irq_can_fire(map.emul_vtimer) &&
586	    !kvm_timer_irq_can_fire(map.emul_ptimer) &&
587	    !vcpu_has_wfit_active(vcpu))
588		return;
589
590	/*
591	 * At least one guest time will expire. Schedule a background timer.
592	 * Set the earliest expiration time among the guest timers.
593	 */
594	soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
595}
596
597static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
598{
599	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
600
601	soft_timer_cancel(&timer->bg_timer);
602}
603
604static void timer_restore_state(struct arch_timer_context *ctx)
605{
606	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
607	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
608	unsigned long flags;
609
610	if (!timer->enabled)
611		return;
612
613	local_irq_save(flags);
614
615	if (ctx->loaded)
616		goto out;
617
618	switch (index) {
619		u64 cval, offset;
620
621	case TIMER_VTIMER:
622	case TIMER_HVTIMER:
623		set_cntvoff(timer_get_offset(ctx));
624		write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
625		isb();
626		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
627		break;
628	case TIMER_PTIMER:
629	case TIMER_HPTIMER:
630		cval = timer_get_cval(ctx);
631		offset = timer_get_offset(ctx);
632		set_cntpoff(offset);
633		cval += offset;
634		write_sysreg_el0(cval, SYS_CNTP_CVAL);
635		isb();
636		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
637		break;
638	case NR_KVM_TIMERS:
639		BUG();
640	}
641
642	trace_kvm_timer_restore_state(ctx);
643
644	ctx->loaded = true;
645out:
646	local_irq_restore(flags);
647}
648
649static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
650{
651	int r;
652	r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
653	WARN_ON(r);
654}
655
656static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
657{
658	struct kvm_vcpu *vcpu = ctx->vcpu;
659	bool phys_active = false;
660
661	/*
662	 * Update the timer output so that it is likely to match the
663	 * state we're about to restore. If the timer expires between
664	 * this point and the register restoration, we'll take the
665	 * interrupt anyway.
666	 */
667	kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
668
669	if (irqchip_in_kernel(vcpu->kvm))
670		phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
671
672	phys_active |= ctx->irq.level;
673
674	set_timer_irq_phys_active(ctx, phys_active);
675}
676
677static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
678{
679	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
680
681	/*
682	 * Update the timer output so that it is likely to match the
683	 * state we're about to restore. If the timer expires between
684	 * this point and the register restoration, we'll take the
685	 * interrupt anyway.
686	 */
687	kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
688
689	/*
690	 * When using a userspace irqchip with the architected timers and a
691	 * host interrupt controller that doesn't support an active state, we
692	 * must still prevent continuously exiting from the guest, and
693	 * therefore mask the physical interrupt by disabling it on the host
694	 * interrupt controller when the virtual level is high, such that the
695	 * guest can make forward progress.  Once we detect the output level
696	 * being de-asserted, we unmask the interrupt again so that we exit
697	 * from the guest when the timer fires.
698	 */
699	if (vtimer->irq.level)
700		disable_percpu_irq(host_vtimer_irq);
701	else
702		enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
703}
704
705/* If _pred is true, set bit in _set, otherwise set it in _clr */
706#define assign_clear_set_bit(_pred, _bit, _clr, _set)			\
707	do {								\
708		if (_pred)						\
709			(_set) |= (_bit);				\
710		else							\
711			(_clr) |= (_bit);				\
712	} while (0)
713
714static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
715					      struct timer_map *map)
716{
717	int hw, ret;
718
719	if (!irqchip_in_kernel(vcpu->kvm))
720		return;
721
722	/*
723	 * We only ever unmap the vtimer irq on a VHE system that runs nested
724	 * virtualization, in which case we have both a valid emul_vtimer,
725	 * emul_ptimer, direct_vtimer, and direct_ptimer.
726	 *
727	 * Since this is called from kvm_timer_vcpu_load(), a change between
728	 * vEL2 and vEL1/0 will have just happened, and the timer_map will
729	 * represent this, and therefore we switch the emul/direct mappings
730	 * below.
731	 */
732	hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
733	if (hw < 0) {
734		kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
735		kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
736
737		ret = kvm_vgic_map_phys_irq(vcpu,
738					    map->direct_vtimer->host_timer_irq,
739					    timer_irq(map->direct_vtimer),
740					    &arch_timer_irq_ops);
741		WARN_ON_ONCE(ret);
742		ret = kvm_vgic_map_phys_irq(vcpu,
743					    map->direct_ptimer->host_timer_irq,
744					    timer_irq(map->direct_ptimer),
745					    &arch_timer_irq_ops);
746		WARN_ON_ONCE(ret);
747
748		/*
749		 * The virtual offset behaviour is "interresting", as it
750		 * always applies when HCR_EL2.E2H==0, but only when
751		 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
752		 * track E2H when putting the HV timer in "direct" mode.
753		 */
754		if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
755			struct arch_timer_offset *offs = &map->direct_vtimer->offset;
756
757			if (vcpu_el2_e2h_is_set(vcpu))
758				offs->vcpu_offset = NULL;
759			else
760				offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
761		}
762	}
763}
764
765static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
766{
767	bool tpt, tpc;
768	u64 clr, set;
769
770	/*
771	 * No trapping gets configured here with nVHE. See
772	 * __timer_enable_traps(), which is where the stuff happens.
773	 */
774	if (!has_vhe())
775		return;
776
777	/*
778	 * Our default policy is not to trap anything. As we progress
779	 * within this function, reality kicks in and we start adding
780	 * traps based on emulation requirements.
781	 */
782	tpt = tpc = false;
783
784	/*
785	 * We have two possibility to deal with a physical offset:
786	 *
787	 * - Either we have CNTPOFF (yay!) or the offset is 0:
788	 *   we let the guest freely access the HW
789	 *
790	 * - or neither of these condition apply:
791	 *   we trap accesses to the HW, but still use it
792	 *   after correcting the physical offset
793	 */
794	if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
795		tpt = tpc = true;
796
797	/*
798	 * Apply the enable bits that the guest hypervisor has requested for
799	 * its own guest. We can only add traps that wouldn't have been set
800	 * above.
801	 */
802	if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
803		u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
804
805		/* Use the VHE format for mental sanity */
806		if (!vcpu_el2_e2h_is_set(vcpu))
807			val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
808
809		tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
810		tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
811	}
812
813	/*
814	 * Now that we have collected our requirements, compute the
815	 * trap and enable bits.
816	 */
817	set = 0;
818	clr = 0;
819
820	assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
821	assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
822
823	/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
824	sysreg_clear_set(cnthctl_el2, clr, set);
825}
826
827void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
828{
829	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
830	struct timer_map map;
831
832	if (unlikely(!timer->enabled))
833		return;
834
835	get_timer_map(vcpu, &map);
836
837	if (static_branch_likely(&has_gic_active_state)) {
838		if (vcpu_has_nv(vcpu))
839			kvm_timer_vcpu_load_nested_switch(vcpu, &map);
840
841		kvm_timer_vcpu_load_gic(map.direct_vtimer);
842		if (map.direct_ptimer)
843			kvm_timer_vcpu_load_gic(map.direct_ptimer);
844	} else {
845		kvm_timer_vcpu_load_nogic(vcpu);
846	}
847
848	kvm_timer_unblocking(vcpu);
849
850	timer_restore_state(map.direct_vtimer);
851	if (map.direct_ptimer)
852		timer_restore_state(map.direct_ptimer);
853	if (map.emul_vtimer)
854		timer_emulate(map.emul_vtimer);
855	if (map.emul_ptimer)
856		timer_emulate(map.emul_ptimer);
857
858	timer_set_traps(vcpu, &map);
859}
860
861bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
862{
863	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
864	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
865	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
866	bool vlevel, plevel;
867
868	if (likely(irqchip_in_kernel(vcpu->kvm)))
869		return false;
870
871	vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
872	plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
873
874	return kvm_timer_should_fire(vtimer) != vlevel ||
875	       kvm_timer_should_fire(ptimer) != plevel;
876}
877
878void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
879{
880	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
881	struct timer_map map;
882
883	if (unlikely(!timer->enabled))
884		return;
885
886	get_timer_map(vcpu, &map);
887
888	timer_save_state(map.direct_vtimer);
889	if (map.direct_ptimer)
890		timer_save_state(map.direct_ptimer);
891
892	/*
893	 * Cancel soft timer emulation, because the only case where we
894	 * need it after a vcpu_put is in the context of a sleeping VCPU, and
895	 * in that case we already factor in the deadline for the physical
896	 * timer when scheduling the bg_timer.
897	 *
898	 * In any case, we re-schedule the hrtimer for the physical timer when
899	 * coming back to the VCPU thread in kvm_timer_vcpu_load().
900	 */
901	if (map.emul_vtimer)
902		soft_timer_cancel(&map.emul_vtimer->hrtimer);
903	if (map.emul_ptimer)
904		soft_timer_cancel(&map.emul_ptimer->hrtimer);
905
906	if (kvm_vcpu_is_blocking(vcpu))
907		kvm_timer_blocking(vcpu);
908}
909
910/*
911 * With a userspace irqchip we have to check if the guest de-asserted the
912 * timer and if so, unmask the timer irq signal on the host interrupt
913 * controller to ensure that we see future timer signals.
914 */
915static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
916{
917	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
918
919	if (!kvm_timer_should_fire(vtimer)) {
920		kvm_timer_update_irq(vcpu, false, vtimer);
921		if (static_branch_likely(&has_gic_active_state))
922			set_timer_irq_phys_active(vtimer, false);
923		else
924			enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
925	}
926}
927
928void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
929{
930	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
931
932	if (unlikely(!timer->enabled))
933		return;
934
935	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
936		unmask_vtimer_irq_user(vcpu);
937}
938
939int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
940{
941	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
942	struct timer_map map;
943
944	get_timer_map(vcpu, &map);
945
946	/*
947	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
948	 * and to 0 for ARMv7.  We provide an implementation that always
949	 * resets the timer to be disabled and unmasked and is compliant with
950	 * the ARMv7 architecture.
951	 */
952	for (int i = 0; i < nr_timers(vcpu); i++)
953		timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
954
955	/*
956	 * A vcpu running at EL2 is in charge of the offset applied to
957	 * the virtual timer, so use the physical VM offset, and point
958	 * the vcpu offset to CNTVOFF_EL2.
959	 */
960	if (vcpu_has_nv(vcpu)) {
961		struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
962
963		offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
964		offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
965	}
966
967	if (timer->enabled) {
968		for (int i = 0; i < nr_timers(vcpu); i++)
969			kvm_timer_update_irq(vcpu, false,
970					     vcpu_get_timer(vcpu, i));
971
972		if (irqchip_in_kernel(vcpu->kvm)) {
973			kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
974			if (map.direct_ptimer)
975				kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
976		}
977	}
978
979	if (map.emul_vtimer)
980		soft_timer_cancel(&map.emul_vtimer->hrtimer);
981	if (map.emul_ptimer)
982		soft_timer_cancel(&map.emul_ptimer->hrtimer);
983
984	return 0;
985}
986
987static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
988{
989	struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
990	struct kvm *kvm = vcpu->kvm;
991
992	ctxt->vcpu = vcpu;
993
994	if (timerid == TIMER_VTIMER)
995		ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
996	else
997		ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
998
999	hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1000	ctxt->hrtimer.function = kvm_hrtimer_expire;
1001
1002	switch (timerid) {
1003	case TIMER_PTIMER:
1004	case TIMER_HPTIMER:
1005		ctxt->host_timer_irq = host_ptimer_irq;
1006		break;
1007	case TIMER_VTIMER:
1008	case TIMER_HVTIMER:
1009		ctxt->host_timer_irq = host_vtimer_irq;
1010		break;
1011	}
1012}
1013
1014void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1015{
1016	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1017
1018	for (int i = 0; i < NR_KVM_TIMERS; i++)
1019		timer_context_init(vcpu, i);
1020
1021	/* Synchronize offsets across timers of a VM if not already provided */
1022	if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1023		timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1024		timer_set_offset(vcpu_ptimer(vcpu), 0);
1025	}
1026
1027	hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1028	timer->bg_timer.function = kvm_bg_timer_expire;
1029}
1030
1031void kvm_timer_init_vm(struct kvm *kvm)
1032{
1033	for (int i = 0; i < NR_KVM_TIMERS; i++)
1034		kvm->arch.timer_data.ppi[i] = default_ppi[i];
1035}
1036
1037void kvm_timer_cpu_up(void)
1038{
1039	enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1040	if (host_ptimer_irq)
1041		enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1042}
1043
1044void kvm_timer_cpu_down(void)
1045{
1046	disable_percpu_irq(host_vtimer_irq);
1047	if (host_ptimer_irq)
1048		disable_percpu_irq(host_ptimer_irq);
1049}
1050
1051int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1052{
1053	struct arch_timer_context *timer;
1054
1055	switch (regid) {
1056	case KVM_REG_ARM_TIMER_CTL:
1057		timer = vcpu_vtimer(vcpu);
1058		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1059		break;
1060	case KVM_REG_ARM_TIMER_CNT:
1061		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1062			      &vcpu->kvm->arch.flags)) {
1063			timer = vcpu_vtimer(vcpu);
1064			timer_set_offset(timer, kvm_phys_timer_read() - value);
1065		}
1066		break;
1067	case KVM_REG_ARM_TIMER_CVAL:
1068		timer = vcpu_vtimer(vcpu);
1069		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1070		break;
1071	case KVM_REG_ARM_PTIMER_CTL:
1072		timer = vcpu_ptimer(vcpu);
1073		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1074		break;
1075	case KVM_REG_ARM_PTIMER_CNT:
1076		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1077			      &vcpu->kvm->arch.flags)) {
1078			timer = vcpu_ptimer(vcpu);
1079			timer_set_offset(timer, kvm_phys_timer_read() - value);
1080		}
1081		break;
1082	case KVM_REG_ARM_PTIMER_CVAL:
1083		timer = vcpu_ptimer(vcpu);
1084		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1085		break;
1086
1087	default:
1088		return -1;
1089	}
1090
1091	return 0;
1092}
1093
1094static u64 read_timer_ctl(struct arch_timer_context *timer)
1095{
1096	/*
1097	 * Set ISTATUS bit if it's expired.
1098	 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1099	 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1100	 * regardless of ENABLE bit for our implementation convenience.
1101	 */
1102	u32 ctl = timer_get_ctl(timer);
1103
1104	if (!kvm_timer_compute_delta(timer))
1105		ctl |= ARCH_TIMER_CTRL_IT_STAT;
1106
1107	return ctl;
1108}
1109
1110u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1111{
1112	switch (regid) {
1113	case KVM_REG_ARM_TIMER_CTL:
1114		return kvm_arm_timer_read(vcpu,
1115					  vcpu_vtimer(vcpu), TIMER_REG_CTL);
1116	case KVM_REG_ARM_TIMER_CNT:
1117		return kvm_arm_timer_read(vcpu,
1118					  vcpu_vtimer(vcpu), TIMER_REG_CNT);
1119	case KVM_REG_ARM_TIMER_CVAL:
1120		return kvm_arm_timer_read(vcpu,
1121					  vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1122	case KVM_REG_ARM_PTIMER_CTL:
1123		return kvm_arm_timer_read(vcpu,
1124					  vcpu_ptimer(vcpu), TIMER_REG_CTL);
1125	case KVM_REG_ARM_PTIMER_CNT:
1126		return kvm_arm_timer_read(vcpu,
1127					  vcpu_ptimer(vcpu), TIMER_REG_CNT);
1128	case KVM_REG_ARM_PTIMER_CVAL:
1129		return kvm_arm_timer_read(vcpu,
1130					  vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1131	}
1132	return (u64)-1;
1133}
1134
1135static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1136			      struct arch_timer_context *timer,
1137			      enum kvm_arch_timer_regs treg)
1138{
1139	u64 val;
1140
1141	switch (treg) {
1142	case TIMER_REG_TVAL:
1143		val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1144		val = lower_32_bits(val);
1145		break;
1146
1147	case TIMER_REG_CTL:
1148		val = read_timer_ctl(timer);
1149		break;
1150
1151	case TIMER_REG_CVAL:
1152		val = timer_get_cval(timer);
1153		break;
1154
1155	case TIMER_REG_CNT:
1156		val = kvm_phys_timer_read() - timer_get_offset(timer);
1157		break;
1158
1159	case TIMER_REG_VOFF:
1160		val = *timer->offset.vcpu_offset;
1161		break;
1162
1163	default:
1164		BUG();
1165	}
1166
1167	return val;
1168}
1169
1170u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1171			      enum kvm_arch_timers tmr,
1172			      enum kvm_arch_timer_regs treg)
1173{
1174	struct arch_timer_context *timer;
1175	struct timer_map map;
1176	u64 val;
1177
1178	get_timer_map(vcpu, &map);
1179	timer = vcpu_get_timer(vcpu, tmr);
1180
1181	if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1182		return kvm_arm_timer_read(vcpu, timer, treg);
1183
1184	preempt_disable();
1185	timer_save_state(timer);
1186
1187	val = kvm_arm_timer_read(vcpu, timer, treg);
1188
1189	timer_restore_state(timer);
1190	preempt_enable();
1191
1192	return val;
1193}
1194
1195static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1196				struct arch_timer_context *timer,
1197				enum kvm_arch_timer_regs treg,
1198				u64 val)
1199{
1200	switch (treg) {
1201	case TIMER_REG_TVAL:
1202		timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1203		break;
1204
1205	case TIMER_REG_CTL:
1206		timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1207		break;
1208
1209	case TIMER_REG_CVAL:
1210		timer_set_cval(timer, val);
1211		break;
1212
1213	case TIMER_REG_VOFF:
1214		*timer->offset.vcpu_offset = val;
1215		break;
1216
1217	default:
1218		BUG();
1219	}
1220}
1221
1222void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1223				enum kvm_arch_timers tmr,
1224				enum kvm_arch_timer_regs treg,
1225				u64 val)
1226{
1227	struct arch_timer_context *timer;
1228	struct timer_map map;
1229
1230	get_timer_map(vcpu, &map);
1231	timer = vcpu_get_timer(vcpu, tmr);
1232	if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1233		soft_timer_cancel(&timer->hrtimer);
1234		kvm_arm_timer_write(vcpu, timer, treg, val);
1235		timer_emulate(timer);
1236	} else {
1237		preempt_disable();
1238		timer_save_state(timer);
1239		kvm_arm_timer_write(vcpu, timer, treg, val);
1240		timer_restore_state(timer);
1241		preempt_enable();
1242	}
1243}
1244
1245static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1246{
1247	if (vcpu)
1248		irqd_set_forwarded_to_vcpu(d);
1249	else
1250		irqd_clr_forwarded_to_vcpu(d);
1251
1252	return 0;
1253}
1254
1255static int timer_irq_set_irqchip_state(struct irq_data *d,
1256				       enum irqchip_irq_state which, bool val)
1257{
1258	if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1259		return irq_chip_set_parent_state(d, which, val);
1260
1261	if (val)
1262		irq_chip_mask_parent(d);
1263	else
1264		irq_chip_unmask_parent(d);
1265
1266	return 0;
1267}
1268
1269static void timer_irq_eoi(struct irq_data *d)
1270{
1271	if (!irqd_is_forwarded_to_vcpu(d))
1272		irq_chip_eoi_parent(d);
1273}
1274
1275static void timer_irq_ack(struct irq_data *d)
1276{
1277	d = d->parent_data;
1278	if (d->chip->irq_ack)
1279		d->chip->irq_ack(d);
1280}
1281
1282static struct irq_chip timer_chip = {
1283	.name			= "KVM",
1284	.irq_ack		= timer_irq_ack,
1285	.irq_mask		= irq_chip_mask_parent,
1286	.irq_unmask		= irq_chip_unmask_parent,
1287	.irq_eoi		= timer_irq_eoi,
1288	.irq_set_type		= irq_chip_set_type_parent,
1289	.irq_set_vcpu_affinity	= timer_irq_set_vcpu_affinity,
1290	.irq_set_irqchip_state	= timer_irq_set_irqchip_state,
1291};
1292
1293static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1294				  unsigned int nr_irqs, void *arg)
1295{
1296	irq_hw_number_t hwirq = (uintptr_t)arg;
1297
1298	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1299					     &timer_chip, NULL);
1300}
1301
1302static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1303				  unsigned int nr_irqs)
1304{
1305}
1306
1307static const struct irq_domain_ops timer_domain_ops = {
1308	.alloc	= timer_irq_domain_alloc,
1309	.free	= timer_irq_domain_free,
1310};
1311
1312static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1313{
1314	*flags = irq_get_trigger_type(virq);
1315	if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1316		kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1317			virq);
1318		*flags = IRQF_TRIGGER_LOW;
1319	}
1320}
1321
1322static int kvm_irq_init(struct arch_timer_kvm_info *info)
1323{
1324	struct irq_domain *domain = NULL;
1325
1326	if (info->virtual_irq <= 0) {
1327		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1328			info->virtual_irq);
1329		return -ENODEV;
1330	}
1331
1332	host_vtimer_irq = info->virtual_irq;
1333	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1334
1335	if (kvm_vgic_global_state.no_hw_deactivation) {
1336		struct fwnode_handle *fwnode;
1337		struct irq_data *data;
1338
1339		fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1340		if (!fwnode)
1341			return -ENOMEM;
1342
1343		/* Assume both vtimer and ptimer in the same parent */
1344		data = irq_get_irq_data(host_vtimer_irq);
1345		domain = irq_domain_create_hierarchy(data->domain, 0,
1346						     NR_KVM_TIMERS, fwnode,
1347						     &timer_domain_ops, NULL);
1348		if (!domain) {
1349			irq_domain_free_fwnode(fwnode);
1350			return -ENOMEM;
1351		}
1352
1353		arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1354		WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1355					    (void *)TIMER_VTIMER));
1356	}
1357
1358	if (info->physical_irq > 0) {
1359		host_ptimer_irq = info->physical_irq;
1360		kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1361
1362		if (domain)
1363			WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1364						    (void *)TIMER_PTIMER));
1365	}
1366
1367	return 0;
1368}
1369
1370int __init kvm_timer_hyp_init(bool has_gic)
1371{
1372	struct arch_timer_kvm_info *info;
1373	int err;
1374
1375	info = arch_timer_get_kvm_info();
1376	timecounter = &info->timecounter;
1377
1378	if (!timecounter->cc) {
1379		kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1380		return -ENODEV;
1381	}
1382
1383	err = kvm_irq_init(info);
1384	if (err)
1385		return err;
1386
1387	/* First, do the virtual EL1 timer irq */
1388
1389	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1390				 "kvm guest vtimer", kvm_get_running_vcpus());
1391	if (err) {
1392		kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1393			host_vtimer_irq, err);
1394		return err;
1395	}
1396
1397	if (has_gic) {
1398		err = irq_set_vcpu_affinity(host_vtimer_irq,
1399					    kvm_get_running_vcpus());
1400		if (err) {
1401			kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1402			goto out_free_vtimer_irq;
1403		}
1404
1405		static_branch_enable(&has_gic_active_state);
1406	}
1407
1408	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1409
1410	/* Now let's do the physical EL1 timer irq */
1411
1412	if (info->physical_irq > 0) {
1413		err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1414					 "kvm guest ptimer", kvm_get_running_vcpus());
1415		if (err) {
1416			kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1417				host_ptimer_irq, err);
1418			goto out_free_vtimer_irq;
1419		}
1420
1421		if (has_gic) {
1422			err = irq_set_vcpu_affinity(host_ptimer_irq,
1423						    kvm_get_running_vcpus());
1424			if (err) {
1425				kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1426				goto out_free_ptimer_irq;
1427			}
1428		}
1429
1430		kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1431	} else if (has_vhe()) {
1432		kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1433			info->physical_irq);
1434		err = -ENODEV;
1435		goto out_free_vtimer_irq;
1436	}
1437
1438	return 0;
1439
1440out_free_ptimer_irq:
1441	if (info->physical_irq > 0)
1442		free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1443out_free_vtimer_irq:
1444	free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1445	return err;
1446}
1447
1448void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1449{
1450	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1451
1452	soft_timer_cancel(&timer->bg_timer);
1453}
1454
1455static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1456{
1457	u32 ppis = 0;
1458	bool valid;
1459
1460	mutex_lock(&vcpu->kvm->arch.config_lock);
1461
1462	for (int i = 0; i < nr_timers(vcpu); i++) {
1463		struct arch_timer_context *ctx;
1464		int irq;
1465
1466		ctx = vcpu_get_timer(vcpu, i);
1467		irq = timer_irq(ctx);
1468		if (kvm_vgic_set_owner(vcpu, irq, ctx))
1469			break;
1470
1471		/*
1472		 * We know by construction that we only have PPIs, so
1473		 * all values are less than 32.
1474		 */
1475		ppis |= BIT(irq);
1476	}
1477
1478	valid = hweight32(ppis) == nr_timers(vcpu);
1479
1480	if (valid)
1481		set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1482
1483	mutex_unlock(&vcpu->kvm->arch.config_lock);
1484
1485	return valid;
1486}
1487
1488static bool kvm_arch_timer_get_input_level(int vintid)
1489{
1490	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1491
1492	if (WARN(!vcpu, "No vcpu context!\n"))
1493		return false;
1494
1495	for (int i = 0; i < nr_timers(vcpu); i++) {
1496		struct arch_timer_context *ctx;
1497
1498		ctx = vcpu_get_timer(vcpu, i);
1499		if (timer_irq(ctx) == vintid)
1500			return kvm_timer_should_fire(ctx);
1501	}
1502
1503	/* A timer IRQ has fired, but no matching timer was found? */
1504	WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1505
1506	return false;
1507}
1508
1509int kvm_timer_enable(struct kvm_vcpu *vcpu)
1510{
1511	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1512	struct timer_map map;
1513	int ret;
1514
1515	if (timer->enabled)
1516		return 0;
1517
1518	/* Without a VGIC we do not map virtual IRQs to physical IRQs */
1519	if (!irqchip_in_kernel(vcpu->kvm))
1520		goto no_vgic;
1521
1522	/*
1523	 * At this stage, we have the guarantee that the vgic is both
1524	 * available and initialized.
1525	 */
1526	if (!timer_irqs_are_valid(vcpu)) {
1527		kvm_debug("incorrectly configured timer irqs\n");
1528		return -EINVAL;
1529	}
1530
1531	get_timer_map(vcpu, &map);
1532
1533	ret = kvm_vgic_map_phys_irq(vcpu,
1534				    map.direct_vtimer->host_timer_irq,
1535				    timer_irq(map.direct_vtimer),
1536				    &arch_timer_irq_ops);
1537	if (ret)
1538		return ret;
1539
1540	if (map.direct_ptimer) {
1541		ret = kvm_vgic_map_phys_irq(vcpu,
1542					    map.direct_ptimer->host_timer_irq,
1543					    timer_irq(map.direct_ptimer),
1544					    &arch_timer_irq_ops);
1545	}
1546
1547	if (ret)
1548		return ret;
1549
1550no_vgic:
1551	timer->enabled = 1;
1552	return 0;
1553}
1554
1555/* If we have CNTPOFF, permanently set ECV to enable it */
1556void kvm_timer_init_vhe(void)
1557{
1558	if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1559		sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1560}
1561
1562int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1563{
1564	int __user *uaddr = (int __user *)(long)attr->addr;
1565	int irq, idx, ret = 0;
1566
1567	if (!irqchip_in_kernel(vcpu->kvm))
1568		return -EINVAL;
1569
1570	if (get_user(irq, uaddr))
1571		return -EFAULT;
1572
1573	if (!(irq_is_ppi(irq)))
1574		return -EINVAL;
1575
1576	mutex_lock(&vcpu->kvm->arch.config_lock);
1577
1578	if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1579		     &vcpu->kvm->arch.flags)) {
1580		ret = -EBUSY;
1581		goto out;
1582	}
1583
1584	switch (attr->attr) {
1585	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1586		idx = TIMER_VTIMER;
1587		break;
1588	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1589		idx = TIMER_PTIMER;
1590		break;
1591	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1592		idx = TIMER_HVTIMER;
1593		break;
1594	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1595		idx = TIMER_HPTIMER;
1596		break;
1597	default:
1598		ret = -ENXIO;
1599		goto out;
1600	}
1601
1602	/*
1603	 * We cannot validate the IRQ unicity before we run, so take it at
1604	 * face value. The verdict will be given on first vcpu run, for each
1605	 * vcpu. Yes this is late. Blame it on the stupid API.
1606	 */
1607	vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1608
1609out:
1610	mutex_unlock(&vcpu->kvm->arch.config_lock);
1611	return ret;
1612}
1613
1614int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1615{
1616	int __user *uaddr = (int __user *)(long)attr->addr;
1617	struct arch_timer_context *timer;
1618	int irq;
1619
1620	switch (attr->attr) {
1621	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1622		timer = vcpu_vtimer(vcpu);
1623		break;
1624	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1625		timer = vcpu_ptimer(vcpu);
1626		break;
1627	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1628		timer = vcpu_hvtimer(vcpu);
1629		break;
1630	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1631		timer = vcpu_hptimer(vcpu);
1632		break;
1633	default:
1634		return -ENXIO;
1635	}
1636
1637	irq = timer_irq(timer);
1638	return put_user(irq, uaddr);
1639}
1640
1641int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1642{
1643	switch (attr->attr) {
1644	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1645	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1646	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1647	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1648		return 0;
1649	}
1650
1651	return -ENXIO;
1652}
1653
1654int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1655				    struct kvm_arm_counter_offset *offset)
1656{
1657	int ret = 0;
1658
1659	if (offset->reserved)
1660		return -EINVAL;
1661
1662	mutex_lock(&kvm->lock);
1663
1664	if (lock_all_vcpus(kvm)) {
1665		set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1666
1667		/*
1668		 * If userspace decides to set the offset using this
1669		 * API rather than merely restoring the counter
1670		 * values, the offset applies to both the virtual and
1671		 * physical views.
1672		 */
1673		kvm->arch.timer_data.voffset = offset->counter_offset;
1674		kvm->arch.timer_data.poffset = offset->counter_offset;
1675
1676		unlock_all_vcpus(kvm);
1677	} else {
1678		ret = -EBUSY;
1679	}
1680
1681	mutex_unlock(&kvm->lock);
1682
1683	return ret;
1684}
1685