xref: /kernel/linux/linux-5.10/arch/x86/kvm/lapic.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2
3/*
4 * Local APIC virtualization
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2007 Novell
8 * Copyright (C) 2007 Intel
9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
10 *
11 * Authors:
12 *   Dor Laor <dor.laor@qumranet.com>
13 *   Gregory Haskins <ghaskins@novell.com>
14 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
15 *
16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17 */
18
19#include <linux/kvm_host.h>
20#include <linux/kvm.h>
21#include <linux/mm.h>
22#include <linux/highmem.h>
23#include <linux/smp.h>
24#include <linux/hrtimer.h>
25#include <linux/io.h>
26#include <linux/export.h>
27#include <linux/math64.h>
28#include <linux/slab.h>
29#include <asm/processor.h>
30#include <asm/msr.h>
31#include <asm/page.h>
32#include <asm/current.h>
33#include <asm/apicdef.h>
34#include <asm/delay.h>
35#include <linux/atomic.h>
36#include <linux/jump_label.h>
37#include "kvm_cache_regs.h"
38#include "irq.h"
39#include "ioapic.h"
40#include "trace.h"
41#include "x86.h"
42#include "cpuid.h"
43#include "hyperv.h"
44
45#ifndef CONFIG_X86_64
46#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
47#else
48#define mod_64(x, y) ((x) % (y))
49#endif
50
51#define PRId64 "d"
52#define PRIx64 "llx"
53#define PRIu64 "u"
54#define PRIo64 "o"
55
56/* 14 is the version for Xeon and Pentium 8.4.8*/
57#define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
58#define LAPIC_MMIO_LENGTH		(1 << 12)
59/* followed define is not in apicdef.h */
60#define MAX_APIC_VECTOR			256
61#define APIC_VECTORS_PER_REG		32
62
63static bool lapic_timer_advance_dynamic __read_mostly;
64#define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
65#define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
66#define LAPIC_TIMER_ADVANCE_NS_INIT	1000
67#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
68/* step-by-step approximation to mitigate fluctuation */
69#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
70
71static inline int apic_test_vector(int vec, void *bitmap)
72{
73	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
74}
75
76bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
77{
78	struct kvm_lapic *apic = vcpu->arch.apic;
79
80	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
81		apic_test_vector(vector, apic->regs + APIC_IRR);
82}
83
84static inline int __apic_test_and_set_vector(int vec, void *bitmap)
85{
86	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
87}
88
89static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
90{
91	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
92}
93
94struct static_key_deferred apic_hw_disabled __read_mostly;
95struct static_key_deferred apic_sw_disabled __read_mostly;
96
97static inline int apic_enabled(struct kvm_lapic *apic)
98{
99	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
100}
101
102#define LVT_MASK	\
103	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
104
105#define LINT_MASK	\
106	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
107	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
108
109static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
110{
111	return apic->vcpu->vcpu_id;
112}
113
114static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
115{
116	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
117		(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
118}
119
120bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
121{
122	return kvm_x86_ops.set_hv_timer
123	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
124		    kvm_can_post_timer_interrupt(vcpu));
125}
126EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
127
128static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
129{
130	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
131}
132
133static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
134		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
135	switch (map->mode) {
136	case KVM_APIC_MODE_X2APIC: {
137		u32 offset = (dest_id >> 16) * 16;
138		u32 max_apic_id = map->max_apic_id;
139
140		if (offset <= max_apic_id) {
141			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
142
143			offset = array_index_nospec(offset, map->max_apic_id + 1);
144			*cluster = &map->phys_map[offset];
145			*mask = dest_id & (0xffff >> (16 - cluster_size));
146		} else {
147			*mask = 0;
148		}
149
150		return true;
151		}
152	case KVM_APIC_MODE_XAPIC_FLAT:
153		*cluster = map->xapic_flat_map;
154		*mask = dest_id & 0xff;
155		return true;
156	case KVM_APIC_MODE_XAPIC_CLUSTER:
157		*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
158		*mask = dest_id & 0xf;
159		return true;
160	default:
161		/* Not optimized. */
162		return false;
163	}
164}
165
166static void kvm_apic_map_free(struct rcu_head *rcu)
167{
168	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
169
170	kvfree(map);
171}
172
173/*
174 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
175 *
176 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
177 * apic_map_lock_held.
178 */
179enum {
180	CLEAN,
181	UPDATE_IN_PROGRESS,
182	DIRTY
183};
184
185void kvm_recalculate_apic_map(struct kvm *kvm)
186{
187	struct kvm_apic_map *new, *old = NULL;
188	struct kvm_vcpu *vcpu;
189	int i;
190	u32 max_id = 255; /* enough space for any xAPIC ID */
191
192	/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
193	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
194		return;
195
196	mutex_lock(&kvm->arch.apic_map_lock);
197	/*
198	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
199	 * (if clean) or the APIC registers (if dirty).
200	 */
201	if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
202				   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
203		/* Someone else has updated the map. */
204		mutex_unlock(&kvm->arch.apic_map_lock);
205		return;
206	}
207
208	kvm_for_each_vcpu(i, vcpu, kvm)
209		if (kvm_apic_present(vcpu))
210			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
211
212	new = kvzalloc(sizeof(struct kvm_apic_map) +
213	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
214			   GFP_KERNEL_ACCOUNT);
215
216	if (!new)
217		goto out;
218
219	new->max_apic_id = max_id;
220
221	kvm_for_each_vcpu(i, vcpu, kvm) {
222		struct kvm_lapic *apic = vcpu->arch.apic;
223		struct kvm_lapic **cluster;
224		u16 mask;
225		u32 ldr;
226		u8 xapic_id;
227		u32 x2apic_id;
228
229		if (!kvm_apic_present(vcpu))
230			continue;
231
232		xapic_id = kvm_xapic_id(apic);
233		x2apic_id = kvm_x2apic_id(apic);
234
235		/* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
236		if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
237				x2apic_id <= new->max_apic_id)
238			new->phys_map[x2apic_id] = apic;
239		/*
240		 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
241		 * prevent them from masking VCPUs with APIC ID <= 0xff.
242		 */
243		if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
244			new->phys_map[xapic_id] = apic;
245
246		if (!kvm_apic_sw_enabled(apic))
247			continue;
248
249		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
250
251		if (apic_x2apic_mode(apic)) {
252			new->mode |= KVM_APIC_MODE_X2APIC;
253		} else if (ldr) {
254			ldr = GET_APIC_LOGICAL_ID(ldr);
255			if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
256				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
257			else
258				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
259		}
260
261		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
262			continue;
263
264		if (mask)
265			cluster[ffs(mask) - 1] = apic;
266	}
267out:
268	old = rcu_dereference_protected(kvm->arch.apic_map,
269			lockdep_is_held(&kvm->arch.apic_map_lock));
270	rcu_assign_pointer(kvm->arch.apic_map, new);
271	/*
272	 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
273	 * If another update has come in, leave it DIRTY.
274	 */
275	atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
276			       UPDATE_IN_PROGRESS, CLEAN);
277	mutex_unlock(&kvm->arch.apic_map_lock);
278
279	if (old)
280		call_rcu(&old->rcu, kvm_apic_map_free);
281
282	kvm_make_scan_ioapic_request(kvm);
283}
284
285static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
286{
287	bool enabled = val & APIC_SPIV_APIC_ENABLED;
288
289	kvm_lapic_set_reg(apic, APIC_SPIV, val);
290
291	if (enabled != apic->sw_enabled) {
292		apic->sw_enabled = enabled;
293		if (enabled)
294			static_key_slow_dec_deferred(&apic_sw_disabled);
295		else
296			static_key_slow_inc(&apic_sw_disabled.key);
297
298		atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
299	}
300
301	/* Check if there are APF page ready requests pending */
302	if (enabled)
303		kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
304}
305
306static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
307{
308	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
309	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
310}
311
312static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
313{
314	kvm_lapic_set_reg(apic, APIC_LDR, id);
315	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
316}
317
318static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
319{
320	kvm_lapic_set_reg(apic, APIC_DFR, val);
321	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
322}
323
324static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
325{
326	return ((id >> 4) << 16) | (1 << (id & 0xf));
327}
328
329static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
330{
331	u32 ldr = kvm_apic_calc_x2apic_ldr(id);
332
333	WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
334
335	kvm_lapic_set_reg(apic, APIC_ID, id);
336	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
337	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
338}
339
340static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
341{
342	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
343}
344
345static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
346{
347	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
348}
349
350static inline int apic_lvtt_period(struct kvm_lapic *apic)
351{
352	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
353}
354
355static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
356{
357	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
358}
359
360static inline int apic_lvt_nmi_mode(u32 lvt_val)
361{
362	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
363}
364
365void kvm_apic_set_version(struct kvm_vcpu *vcpu)
366{
367	struct kvm_lapic *apic = vcpu->arch.apic;
368	u32 v = APIC_VERSION;
369
370	if (!lapic_in_kernel(vcpu))
371		return;
372
373	/*
374	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
375	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
376	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
377	 * version first and level-triggered interrupts never get EOIed in
378	 * IOAPIC.
379	 */
380	if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
381	    !ioapic_in_kernel(vcpu->kvm))
382		v |= APIC_LVR_DIRECTED_EOI;
383	kvm_lapic_set_reg(apic, APIC_LVR, v);
384}
385
386static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
387	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
388	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
389	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
390	LINT_MASK, LINT_MASK,	/* LVT0-1 */
391	LVT_MASK		/* LVTERR */
392};
393
394static int find_highest_vector(void *bitmap)
395{
396	int vec;
397	u32 *reg;
398
399	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
400	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
401		reg = bitmap + REG_POS(vec);
402		if (*reg)
403			return __fls(*reg) + vec;
404	}
405
406	return -1;
407}
408
409static u8 count_vectors(void *bitmap)
410{
411	int vec;
412	u32 *reg;
413	u8 count = 0;
414
415	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
416		reg = bitmap + REG_POS(vec);
417		count += hweight32(*reg);
418	}
419
420	return count;
421}
422
423bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
424{
425	u32 i, vec;
426	u32 pir_val, irr_val, prev_irr_val;
427	int max_updated_irr;
428
429	max_updated_irr = -1;
430	*max_irr = -1;
431
432	for (i = vec = 0; i <= 7; i++, vec += 32) {
433		pir_val = READ_ONCE(pir[i]);
434		irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
435		if (pir_val) {
436			prev_irr_val = irr_val;
437			irr_val |= xchg(&pir[i], 0);
438			*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
439			if (prev_irr_val != irr_val) {
440				max_updated_irr =
441					__fls(irr_val ^ prev_irr_val) + vec;
442			}
443		}
444		if (irr_val)
445			*max_irr = __fls(irr_val) + vec;
446	}
447
448	return ((max_updated_irr != -1) &&
449		(max_updated_irr == *max_irr));
450}
451EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
452
453bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
454{
455	struct kvm_lapic *apic = vcpu->arch.apic;
456
457	return __kvm_apic_update_irr(pir, apic->regs, max_irr);
458}
459EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
460
461static inline int apic_search_irr(struct kvm_lapic *apic)
462{
463	return find_highest_vector(apic->regs + APIC_IRR);
464}
465
466static inline int apic_find_highest_irr(struct kvm_lapic *apic)
467{
468	int result;
469
470	/*
471	 * Note that irr_pending is just a hint. It will be always
472	 * true with virtual interrupt delivery enabled.
473	 */
474	if (!apic->irr_pending)
475		return -1;
476
477	result = apic_search_irr(apic);
478	ASSERT(result == -1 || result >= 16);
479
480	return result;
481}
482
483static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
484{
485	struct kvm_vcpu *vcpu;
486
487	vcpu = apic->vcpu;
488
489	if (unlikely(vcpu->arch.apicv_active)) {
490		/* need to update RVI */
491		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
492		kvm_x86_ops.hwapic_irr_update(vcpu,
493				apic_find_highest_irr(apic));
494	} else {
495		apic->irr_pending = false;
496		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
497		if (apic_search_irr(apic) != -1)
498			apic->irr_pending = true;
499	}
500}
501
502void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
503{
504	apic_clear_irr(vec, vcpu->arch.apic);
505}
506EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
507
508static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
509{
510	struct kvm_vcpu *vcpu;
511
512	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
513		return;
514
515	vcpu = apic->vcpu;
516
517	/*
518	 * With APIC virtualization enabled, all caching is disabled
519	 * because the processor can modify ISR under the hood.  Instead
520	 * just set SVI.
521	 */
522	if (unlikely(vcpu->arch.apicv_active))
523		kvm_x86_ops.hwapic_isr_update(vcpu, vec);
524	else {
525		++apic->isr_count;
526		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
527		/*
528		 * ISR (in service register) bit is set when injecting an interrupt.
529		 * The highest vector is injected. Thus the latest bit set matches
530		 * the highest bit in ISR.
531		 */
532		apic->highest_isr_cache = vec;
533	}
534}
535
536static inline int apic_find_highest_isr(struct kvm_lapic *apic)
537{
538	int result;
539
540	/*
541	 * Note that isr_count is always 1, and highest_isr_cache
542	 * is always -1, with APIC virtualization enabled.
543	 */
544	if (!apic->isr_count)
545		return -1;
546	if (likely(apic->highest_isr_cache != -1))
547		return apic->highest_isr_cache;
548
549	result = find_highest_vector(apic->regs + APIC_ISR);
550	ASSERT(result == -1 || result >= 16);
551
552	return result;
553}
554
555static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
556{
557	struct kvm_vcpu *vcpu;
558	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
559		return;
560
561	vcpu = apic->vcpu;
562
563	/*
564	 * We do get here for APIC virtualization enabled if the guest
565	 * uses the Hyper-V APIC enlightenment.  In this case we may need
566	 * to trigger a new interrupt delivery by writing the SVI field;
567	 * on the other hand isr_count and highest_isr_cache are unused
568	 * and must be left alone.
569	 */
570	if (unlikely(vcpu->arch.apicv_active))
571		kvm_x86_ops.hwapic_isr_update(vcpu,
572					       apic_find_highest_isr(apic));
573	else {
574		--apic->isr_count;
575		BUG_ON(apic->isr_count < 0);
576		apic->highest_isr_cache = -1;
577	}
578}
579
580int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
581{
582	/* This may race with setting of irr in __apic_accept_irq() and
583	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
584	 * will cause vmexit immediately and the value will be recalculated
585	 * on the next vmentry.
586	 */
587	return apic_find_highest_irr(vcpu->arch.apic);
588}
589EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
590
591static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
592			     int vector, int level, int trig_mode,
593			     struct dest_map *dest_map);
594
595int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
596		     struct dest_map *dest_map)
597{
598	struct kvm_lapic *apic = vcpu->arch.apic;
599
600	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
601			irq->level, irq->trig_mode, dest_map);
602}
603
604static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
605			 struct kvm_lapic_irq *irq, u32 min)
606{
607	int i, count = 0;
608	struct kvm_vcpu *vcpu;
609
610	if (min > map->max_apic_id)
611		return 0;
612
613	for_each_set_bit(i, ipi_bitmap,
614		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
615		if (map->phys_map[min + i]) {
616			vcpu = map->phys_map[min + i]->vcpu;
617			count += kvm_apic_set_irq(vcpu, irq, NULL);
618		}
619	}
620
621	return count;
622}
623
624int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
625		    unsigned long ipi_bitmap_high, u32 min,
626		    unsigned long icr, int op_64_bit)
627{
628	struct kvm_apic_map *map;
629	struct kvm_lapic_irq irq = {0};
630	int cluster_size = op_64_bit ? 64 : 32;
631	int count;
632
633	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
634		return -KVM_EINVAL;
635
636	irq.vector = icr & APIC_VECTOR_MASK;
637	irq.delivery_mode = icr & APIC_MODE_MASK;
638	irq.level = (icr & APIC_INT_ASSERT) != 0;
639	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
640
641	rcu_read_lock();
642	map = rcu_dereference(kvm->arch.apic_map);
643
644	count = -EOPNOTSUPP;
645	if (likely(map)) {
646		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
647		min += cluster_size;
648		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
649	}
650
651	rcu_read_unlock();
652	return count;
653}
654
655static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
656{
657
658	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
659				      sizeof(val));
660}
661
662static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
663{
664
665	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
666				      sizeof(*val));
667}
668
669static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
670{
671	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
672}
673
674static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
675{
676	u8 val;
677	if (pv_eoi_get_user(vcpu, &val) < 0) {
678		printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
679			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
680		return false;
681	}
682	return val & 0x1;
683}
684
685static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
686{
687	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
688		printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
689			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
690		return;
691	}
692	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
693}
694
695static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
696{
697	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
698		printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
699			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
700		return;
701	}
702	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
703}
704
705static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
706{
707	int highest_irr;
708	if (apic->vcpu->arch.apicv_active)
709		highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu);
710	else
711		highest_irr = apic_find_highest_irr(apic);
712	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
713		return -1;
714	return highest_irr;
715}
716
717static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
718{
719	u32 tpr, isrv, ppr, old_ppr;
720	int isr;
721
722	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
723	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
724	isr = apic_find_highest_isr(apic);
725	isrv = (isr != -1) ? isr : 0;
726
727	if ((tpr & 0xf0) >= (isrv & 0xf0))
728		ppr = tpr & 0xff;
729	else
730		ppr = isrv & 0xf0;
731
732	*new_ppr = ppr;
733	if (old_ppr != ppr)
734		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
735
736	return ppr < old_ppr;
737}
738
739static void apic_update_ppr(struct kvm_lapic *apic)
740{
741	u32 ppr;
742
743	if (__apic_update_ppr(apic, &ppr) &&
744	    apic_has_interrupt_for_ppr(apic, ppr) != -1)
745		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
746}
747
748void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
749{
750	apic_update_ppr(vcpu->arch.apic);
751}
752EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
753
754static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
755{
756	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
757	apic_update_ppr(apic);
758}
759
760static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
761{
762	return mda == (apic_x2apic_mode(apic) ?
763			X2APIC_BROADCAST : APIC_BROADCAST);
764}
765
766static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
767{
768	if (kvm_apic_broadcast(apic, mda))
769		return true;
770
771	if (apic_x2apic_mode(apic))
772		return mda == kvm_x2apic_id(apic);
773
774	/*
775	 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
776	 * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
777	 * this allows unique addressing of VCPUs with APIC ID over 0xff.
778	 * The 0xff condition is needed because writeable xAPIC ID.
779	 */
780	if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
781		return true;
782
783	return mda == kvm_xapic_id(apic);
784}
785
786static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
787{
788	u32 logical_id;
789
790	if (kvm_apic_broadcast(apic, mda))
791		return true;
792
793	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
794
795	if (apic_x2apic_mode(apic))
796		return ((logical_id >> 16) == (mda >> 16))
797		       && (logical_id & mda & 0xffff) != 0;
798
799	logical_id = GET_APIC_LOGICAL_ID(logical_id);
800
801	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
802	case APIC_DFR_FLAT:
803		return (logical_id & mda) != 0;
804	case APIC_DFR_CLUSTER:
805		return ((logical_id >> 4) == (mda >> 4))
806		       && (logical_id & mda & 0xf) != 0;
807	default:
808		return false;
809	}
810}
811
812/* The KVM local APIC implementation has two quirks:
813 *
814 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
815 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
816 *    KVM doesn't do that aliasing.
817 *
818 *  - in-kernel IOAPIC messages have to be delivered directly to
819 *    x2APIC, because the kernel does not support interrupt remapping.
820 *    In order to support broadcast without interrupt remapping, x2APIC
821 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
822 *    to X2APIC_BROADCAST.
823 *
824 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
825 * important when userspace wants to use x2APIC-format MSIs, because
826 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
827 */
828static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
829		struct kvm_lapic *source, struct kvm_lapic *target)
830{
831	bool ipi = source != NULL;
832
833	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
834	    !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
835		return X2APIC_BROADCAST;
836
837	return dest_id;
838}
839
840bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
841			   int shorthand, unsigned int dest, int dest_mode)
842{
843	struct kvm_lapic *target = vcpu->arch.apic;
844	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
845
846	ASSERT(target);
847	switch (shorthand) {
848	case APIC_DEST_NOSHORT:
849		if (dest_mode == APIC_DEST_PHYSICAL)
850			return kvm_apic_match_physical_addr(target, mda);
851		else
852			return kvm_apic_match_logical_addr(target, mda);
853	case APIC_DEST_SELF:
854		return target == source;
855	case APIC_DEST_ALLINC:
856		return true;
857	case APIC_DEST_ALLBUT:
858		return target != source;
859	default:
860		return false;
861	}
862}
863EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
864
865int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
866		       const unsigned long *bitmap, u32 bitmap_size)
867{
868	u32 mod;
869	int i, idx = -1;
870
871	mod = vector % dest_vcpus;
872
873	for (i = 0; i <= mod; i++) {
874		idx = find_next_bit(bitmap, bitmap_size, idx + 1);
875		BUG_ON(idx == bitmap_size);
876	}
877
878	return idx;
879}
880
881static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
882{
883	if (!kvm->arch.disabled_lapic_found) {
884		kvm->arch.disabled_lapic_found = true;
885		printk(KERN_INFO
886		       "Disabled LAPIC found during irq injection\n");
887	}
888}
889
890static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
891		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
892{
893	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
894		if ((irq->dest_id == APIC_BROADCAST &&
895				map->mode != KVM_APIC_MODE_X2APIC))
896			return true;
897		if (irq->dest_id == X2APIC_BROADCAST)
898			return true;
899	} else {
900		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
901		if (irq->dest_id == (x2apic_ipi ?
902		                     X2APIC_BROADCAST : APIC_BROADCAST))
903			return true;
904	}
905
906	return false;
907}
908
909/* Return true if the interrupt can be handled by using *bitmap as index mask
910 * for valid destinations in *dst array.
911 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
912 * Note: we may have zero kvm_lapic destinations when we return true, which
913 * means that the interrupt should be dropped.  In this case, *bitmap would be
914 * zero and *dst undefined.
915 */
916static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
917		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
918		struct kvm_apic_map *map, struct kvm_lapic ***dst,
919		unsigned long *bitmap)
920{
921	int i, lowest;
922
923	if (irq->shorthand == APIC_DEST_SELF && src) {
924		*dst = src;
925		*bitmap = 1;
926		return true;
927	} else if (irq->shorthand)
928		return false;
929
930	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
931		return false;
932
933	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
934		if (irq->dest_id > map->max_apic_id) {
935			*bitmap = 0;
936		} else {
937			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
938			*dst = &map->phys_map[dest_id];
939			*bitmap = 1;
940		}
941		return true;
942	}
943
944	*bitmap = 0;
945	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
946				(u16 *)bitmap))
947		return false;
948
949	if (!kvm_lowest_prio_delivery(irq))
950		return true;
951
952	if (!kvm_vector_hashing_enabled()) {
953		lowest = -1;
954		for_each_set_bit(i, bitmap, 16) {
955			if (!(*dst)[i])
956				continue;
957			if (lowest < 0)
958				lowest = i;
959			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
960						(*dst)[lowest]->vcpu) < 0)
961				lowest = i;
962		}
963	} else {
964		if (!*bitmap)
965			return true;
966
967		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
968				bitmap, 16);
969
970		if (!(*dst)[lowest]) {
971			kvm_apic_disabled_lapic_found(kvm);
972			*bitmap = 0;
973			return true;
974		}
975	}
976
977	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
978
979	return true;
980}
981
982bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
983		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
984{
985	struct kvm_apic_map *map;
986	unsigned long bitmap;
987	struct kvm_lapic **dst = NULL;
988	int i;
989	bool ret;
990
991	*r = -1;
992
993	if (irq->shorthand == APIC_DEST_SELF) {
994		if (KVM_BUG_ON(!src, kvm)) {
995			*r = 0;
996			return true;
997		}
998		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
999		return true;
1000	}
1001
1002	rcu_read_lock();
1003	map = rcu_dereference(kvm->arch.apic_map);
1004
1005	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1006	if (ret) {
1007		*r = 0;
1008		for_each_set_bit(i, &bitmap, 16) {
1009			if (!dst[i])
1010				continue;
1011			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1012		}
1013	}
1014
1015	rcu_read_unlock();
1016	return ret;
1017}
1018
1019/*
1020 * This routine tries to handle interrupts in posted mode, here is how
1021 * it deals with different cases:
1022 * - For single-destination interrupts, handle it in posted mode
1023 * - Else if vector hashing is enabled and it is a lowest-priority
1024 *   interrupt, handle it in posted mode and use the following mechanism
1025 *   to find the destination vCPU.
1026 *	1. For lowest-priority interrupts, store all the possible
1027 *	   destination vCPUs in an array.
1028 *	2. Use "guest vector % max number of destination vCPUs" to find
1029 *	   the right destination vCPU in the array for the lowest-priority
1030 *	   interrupt.
1031 * - Otherwise, use remapped mode to inject the interrupt.
1032 */
1033bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1034			struct kvm_vcpu **dest_vcpu)
1035{
1036	struct kvm_apic_map *map;
1037	unsigned long bitmap;
1038	struct kvm_lapic **dst = NULL;
1039	bool ret = false;
1040
1041	if (irq->shorthand)
1042		return false;
1043
1044	rcu_read_lock();
1045	map = rcu_dereference(kvm->arch.apic_map);
1046
1047	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1048			hweight16(bitmap) == 1) {
1049		unsigned long i = find_first_bit(&bitmap, 16);
1050
1051		if (dst[i]) {
1052			*dest_vcpu = dst[i]->vcpu;
1053			ret = true;
1054		}
1055	}
1056
1057	rcu_read_unlock();
1058	return ret;
1059}
1060
1061/*
1062 * Add a pending IRQ into lapic.
1063 * Return 1 if successfully added and 0 if discarded.
1064 */
1065static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1066			     int vector, int level, int trig_mode,
1067			     struct dest_map *dest_map)
1068{
1069	int result = 0;
1070	struct kvm_vcpu *vcpu = apic->vcpu;
1071
1072	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1073				  trig_mode, vector);
1074	switch (delivery_mode) {
1075	case APIC_DM_LOWEST:
1076		vcpu->arch.apic_arb_prio++;
1077		fallthrough;
1078	case APIC_DM_FIXED:
1079		if (unlikely(trig_mode && !level))
1080			break;
1081
1082		/* FIXME add logic for vcpu on reset */
1083		if (unlikely(!apic_enabled(apic)))
1084			break;
1085
1086		result = 1;
1087
1088		if (dest_map) {
1089			__set_bit(vcpu->vcpu_id, dest_map->map);
1090			dest_map->vectors[vcpu->vcpu_id] = vector;
1091		}
1092
1093		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1094			if (trig_mode)
1095				kvm_lapic_set_vector(vector,
1096						     apic->regs + APIC_TMR);
1097			else
1098				kvm_lapic_clear_vector(vector,
1099						       apic->regs + APIC_TMR);
1100		}
1101
1102		if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) {
1103			kvm_lapic_set_irr(vector, apic);
1104			kvm_make_request(KVM_REQ_EVENT, vcpu);
1105			kvm_vcpu_kick(vcpu);
1106		}
1107		break;
1108
1109	case APIC_DM_REMRD:
1110		result = 1;
1111		vcpu->arch.pv.pv_unhalted = 1;
1112		kvm_make_request(KVM_REQ_EVENT, vcpu);
1113		kvm_vcpu_kick(vcpu);
1114		break;
1115
1116	case APIC_DM_SMI:
1117		result = 1;
1118		kvm_make_request(KVM_REQ_SMI, vcpu);
1119		kvm_vcpu_kick(vcpu);
1120		break;
1121
1122	case APIC_DM_NMI:
1123		result = 1;
1124		kvm_inject_nmi(vcpu);
1125		kvm_vcpu_kick(vcpu);
1126		break;
1127
1128	case APIC_DM_INIT:
1129		if (!trig_mode || level) {
1130			result = 1;
1131			/* assumes that there are only KVM_APIC_INIT/SIPI */
1132			apic->pending_events = (1UL << KVM_APIC_INIT);
1133			kvm_make_request(KVM_REQ_EVENT, vcpu);
1134			kvm_vcpu_kick(vcpu);
1135		}
1136		break;
1137
1138	case APIC_DM_STARTUP:
1139		result = 1;
1140		apic->sipi_vector = vector;
1141		/* make sure sipi_vector is visible for the receiver */
1142		smp_wmb();
1143		set_bit(KVM_APIC_SIPI, &apic->pending_events);
1144		kvm_make_request(KVM_REQ_EVENT, vcpu);
1145		kvm_vcpu_kick(vcpu);
1146		break;
1147
1148	case APIC_DM_EXTINT:
1149		/*
1150		 * Should only be called by kvm_apic_local_deliver() with LVT0,
1151		 * before NMI watchdog was enabled. Already handled by
1152		 * kvm_apic_accept_pic_intr().
1153		 */
1154		break;
1155
1156	default:
1157		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1158		       delivery_mode);
1159		break;
1160	}
1161	return result;
1162}
1163
1164/*
1165 * This routine identifies the destination vcpus mask meant to receive the
1166 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1167 * out the destination vcpus array and set the bitmap or it traverses to
1168 * each available vcpu to identify the same.
1169 */
1170void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1171			      unsigned long *vcpu_bitmap)
1172{
1173	struct kvm_lapic **dest_vcpu = NULL;
1174	struct kvm_lapic *src = NULL;
1175	struct kvm_apic_map *map;
1176	struct kvm_vcpu *vcpu;
1177	unsigned long bitmap;
1178	int i, vcpu_idx;
1179	bool ret;
1180
1181	rcu_read_lock();
1182	map = rcu_dereference(kvm->arch.apic_map);
1183
1184	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1185					  &bitmap);
1186	if (ret) {
1187		for_each_set_bit(i, &bitmap, 16) {
1188			if (!dest_vcpu[i])
1189				continue;
1190			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1191			__set_bit(vcpu_idx, vcpu_bitmap);
1192		}
1193	} else {
1194		kvm_for_each_vcpu(i, vcpu, kvm) {
1195			if (!kvm_apic_present(vcpu))
1196				continue;
1197			if (!kvm_apic_match_dest(vcpu, NULL,
1198						 irq->shorthand,
1199						 irq->dest_id,
1200						 irq->dest_mode))
1201				continue;
1202			__set_bit(i, vcpu_bitmap);
1203		}
1204	}
1205	rcu_read_unlock();
1206}
1207
1208int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1209{
1210	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1211}
1212
1213static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1214{
1215	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1216}
1217
1218static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1219{
1220	int trigger_mode;
1221
1222	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
1223	if (!kvm_ioapic_handles_vector(apic, vector))
1224		return;
1225
1226	/* Request a KVM exit to inform the userspace IOAPIC. */
1227	if (irqchip_split(apic->vcpu->kvm)) {
1228		apic->vcpu->arch.pending_ioapic_eoi = vector;
1229		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1230		return;
1231	}
1232
1233	if (apic_test_vector(vector, apic->regs + APIC_TMR))
1234		trigger_mode = IOAPIC_LEVEL_TRIG;
1235	else
1236		trigger_mode = IOAPIC_EDGE_TRIG;
1237
1238	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1239}
1240
1241static int apic_set_eoi(struct kvm_lapic *apic)
1242{
1243	int vector = apic_find_highest_isr(apic);
1244
1245	trace_kvm_eoi(apic, vector);
1246
1247	/*
1248	 * Not every write EOI will has corresponding ISR,
1249	 * one example is when Kernel check timer on setup_IO_APIC
1250	 */
1251	if (vector == -1)
1252		return vector;
1253
1254	apic_clear_isr(vector, apic);
1255	apic_update_ppr(apic);
1256
1257	if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1258		kvm_hv_synic_send_eoi(apic->vcpu, vector);
1259
1260	kvm_ioapic_send_eoi(apic, vector);
1261	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1262	return vector;
1263}
1264
1265/*
1266 * this interface assumes a trap-like exit, which has already finished
1267 * desired side effect including vISR and vPPR update.
1268 */
1269void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1270{
1271	struct kvm_lapic *apic = vcpu->arch.apic;
1272
1273	trace_kvm_eoi(apic, vector);
1274
1275	kvm_ioapic_send_eoi(apic, vector);
1276	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1277}
1278EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1279
1280void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1281{
1282	struct kvm_lapic_irq irq;
1283
1284	irq.vector = icr_low & APIC_VECTOR_MASK;
1285	irq.delivery_mode = icr_low & APIC_MODE_MASK;
1286	irq.dest_mode = icr_low & APIC_DEST_MASK;
1287	irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1288	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1289	irq.shorthand = icr_low & APIC_SHORT_MASK;
1290	irq.msi_redir_hint = false;
1291	if (apic_x2apic_mode(apic))
1292		irq.dest_id = icr_high;
1293	else
1294		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1295
1296	trace_kvm_apic_ipi(icr_low, irq.dest_id);
1297
1298	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1299}
1300
1301static u32 apic_get_tmcct(struct kvm_lapic *apic)
1302{
1303	ktime_t remaining, now;
1304	s64 ns;
1305	u32 tmcct;
1306
1307	ASSERT(apic != NULL);
1308
1309	/* if initial count is 0, current count should also be 0 */
1310	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1311		apic->lapic_timer.period == 0)
1312		return 0;
1313
1314	now = ktime_get();
1315	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1316	if (ktime_to_ns(remaining) < 0)
1317		remaining = 0;
1318
1319	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1320	tmcct = div64_u64(ns,
1321			 (APIC_BUS_CYCLE_NS * apic->divide_count));
1322
1323	return tmcct;
1324}
1325
1326static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1327{
1328	struct kvm_vcpu *vcpu = apic->vcpu;
1329	struct kvm_run *run = vcpu->run;
1330
1331	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1332	run->tpr_access.rip = kvm_rip_read(vcpu);
1333	run->tpr_access.is_write = write;
1334}
1335
1336static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1337{
1338	if (apic->vcpu->arch.tpr_access_reporting)
1339		__report_tpr_access(apic, write);
1340}
1341
1342static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1343{
1344	u32 val = 0;
1345
1346	if (offset >= LAPIC_MMIO_LENGTH)
1347		return 0;
1348
1349	switch (offset) {
1350	case APIC_ARBPRI:
1351		break;
1352
1353	case APIC_TMCCT:	/* Timer CCR */
1354		if (apic_lvtt_tscdeadline(apic))
1355			return 0;
1356
1357		val = apic_get_tmcct(apic);
1358		break;
1359	case APIC_PROCPRI:
1360		apic_update_ppr(apic);
1361		val = kvm_lapic_get_reg(apic, offset);
1362		break;
1363	case APIC_TASKPRI:
1364		report_tpr_access(apic, false);
1365		fallthrough;
1366	default:
1367		val = kvm_lapic_get_reg(apic, offset);
1368		break;
1369	}
1370
1371	return val;
1372}
1373
1374static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1375{
1376	return container_of(dev, struct kvm_lapic, dev);
1377}
1378
1379#define APIC_REG_MASK(reg)	(1ull << ((reg) >> 4))
1380#define APIC_REGS_MASK(first, count) \
1381	(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1382
1383int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1384		void *data)
1385{
1386	unsigned char alignment = offset & 0xf;
1387	u32 result;
1388	/* this bitmask has a bit cleared for each reserved register */
1389	u64 valid_reg_mask =
1390		APIC_REG_MASK(APIC_ID) |
1391		APIC_REG_MASK(APIC_LVR) |
1392		APIC_REG_MASK(APIC_TASKPRI) |
1393		APIC_REG_MASK(APIC_PROCPRI) |
1394		APIC_REG_MASK(APIC_LDR) |
1395		APIC_REG_MASK(APIC_DFR) |
1396		APIC_REG_MASK(APIC_SPIV) |
1397		APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1398		APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1399		APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1400		APIC_REG_MASK(APIC_ESR) |
1401		APIC_REG_MASK(APIC_ICR) |
1402		APIC_REG_MASK(APIC_ICR2) |
1403		APIC_REG_MASK(APIC_LVTT) |
1404		APIC_REG_MASK(APIC_LVTTHMR) |
1405		APIC_REG_MASK(APIC_LVTPC) |
1406		APIC_REG_MASK(APIC_LVT0) |
1407		APIC_REG_MASK(APIC_LVT1) |
1408		APIC_REG_MASK(APIC_LVTERR) |
1409		APIC_REG_MASK(APIC_TMICT) |
1410		APIC_REG_MASK(APIC_TMCCT) |
1411		APIC_REG_MASK(APIC_TDCR);
1412
1413	/* ARBPRI is not valid on x2APIC */
1414	if (!apic_x2apic_mode(apic))
1415		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1416
1417	if (alignment + len > 4)
1418		return 1;
1419
1420	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1421		return 1;
1422
1423	result = __apic_read(apic, offset & ~0xf);
1424
1425	trace_kvm_apic_read(offset, result);
1426
1427	switch (len) {
1428	case 1:
1429	case 2:
1430	case 4:
1431		memcpy(data, (char *)&result + alignment, len);
1432		break;
1433	default:
1434		printk(KERN_ERR "Local APIC read with len = %x, "
1435		       "should be 1,2, or 4 instead\n", len);
1436		break;
1437	}
1438	return 0;
1439}
1440EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1441
1442static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1443{
1444	return addr >= apic->base_address &&
1445		addr < apic->base_address + LAPIC_MMIO_LENGTH;
1446}
1447
1448static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1449			   gpa_t address, int len, void *data)
1450{
1451	struct kvm_lapic *apic = to_lapic(this);
1452	u32 offset = address - apic->base_address;
1453
1454	if (!apic_mmio_in_range(apic, address))
1455		return -EOPNOTSUPP;
1456
1457	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1458		if (!kvm_check_has_quirk(vcpu->kvm,
1459					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1460			return -EOPNOTSUPP;
1461
1462		memset(data, 0xff, len);
1463		return 0;
1464	}
1465
1466	kvm_lapic_reg_read(apic, offset, len, data);
1467
1468	return 0;
1469}
1470
1471static void update_divide_count(struct kvm_lapic *apic)
1472{
1473	u32 tmp1, tmp2, tdcr;
1474
1475	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1476	tmp1 = tdcr & 0xf;
1477	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1478	apic->divide_count = 0x1 << (tmp2 & 0x7);
1479}
1480
1481static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1482{
1483	/*
1484	 * Do not allow the guest to program periodic timers with small
1485	 * interval, since the hrtimers are not throttled by the host
1486	 * scheduler.
1487	 */
1488	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1489		s64 min_period = min_timer_period_us * 1000LL;
1490
1491		if (apic->lapic_timer.period < min_period) {
1492			pr_info_ratelimited(
1493			    "kvm: vcpu %i: requested %lld ns "
1494			    "lapic timer period limited to %lld ns\n",
1495			    apic->vcpu->vcpu_id,
1496			    apic->lapic_timer.period, min_period);
1497			apic->lapic_timer.period = min_period;
1498		}
1499	}
1500}
1501
1502static void cancel_hv_timer(struct kvm_lapic *apic);
1503
1504static void apic_update_lvtt(struct kvm_lapic *apic)
1505{
1506	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1507			apic->lapic_timer.timer_mode_mask;
1508
1509	if (apic->lapic_timer.timer_mode != timer_mode) {
1510		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1511				APIC_LVT_TIMER_TSCDEADLINE)) {
1512			hrtimer_cancel(&apic->lapic_timer.timer);
1513			preempt_disable();
1514			if (apic->lapic_timer.hv_timer_in_use)
1515				cancel_hv_timer(apic);
1516			preempt_enable();
1517			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1518			apic->lapic_timer.period = 0;
1519			apic->lapic_timer.tscdeadline = 0;
1520		}
1521		apic->lapic_timer.timer_mode = timer_mode;
1522		limit_periodic_timer_frequency(apic);
1523	}
1524}
1525
1526/*
1527 * On APICv, this test will cause a busy wait
1528 * during a higher-priority task.
1529 */
1530
1531static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1532{
1533	struct kvm_lapic *apic = vcpu->arch.apic;
1534	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1535
1536	if (kvm_apic_hw_enabled(apic)) {
1537		int vec = reg & APIC_VECTOR_MASK;
1538		void *bitmap = apic->regs + APIC_ISR;
1539
1540		if (vcpu->arch.apicv_active)
1541			bitmap = apic->regs + APIC_IRR;
1542
1543		if (apic_test_vector(vec, bitmap))
1544			return true;
1545	}
1546	return false;
1547}
1548
1549static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1550{
1551	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1552
1553	/*
1554	 * If the guest TSC is running at a different ratio than the host, then
1555	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
1556	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1557	 * always for VMX enabled hardware.
1558	 */
1559	if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1560		__delay(min(guest_cycles,
1561			nsec_to_cycles(vcpu, timer_advance_ns)));
1562	} else {
1563		u64 delay_ns = guest_cycles * 1000000ULL;
1564		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1565		ndelay(min_t(u32, delay_ns, timer_advance_ns));
1566	}
1567}
1568
1569static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1570					      s64 advance_expire_delta)
1571{
1572	struct kvm_lapic *apic = vcpu->arch.apic;
1573	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1574	u64 ns;
1575
1576	/* Do not adjust for tiny fluctuations or large random spikes. */
1577	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1578	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1579		return;
1580
1581	/* too early */
1582	if (advance_expire_delta < 0) {
1583		ns = -advance_expire_delta * 1000000ULL;
1584		do_div(ns, vcpu->arch.virtual_tsc_khz);
1585		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1586	} else {
1587	/* too late */
1588		ns = advance_expire_delta * 1000000ULL;
1589		do_div(ns, vcpu->arch.virtual_tsc_khz);
1590		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1591	}
1592
1593	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1594		timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1595	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1596}
1597
1598static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1599{
1600	struct kvm_lapic *apic = vcpu->arch.apic;
1601	u64 guest_tsc, tsc_deadline;
1602
1603	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1604	apic->lapic_timer.expired_tscdeadline = 0;
1605	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1606	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1607
1608	if (guest_tsc < tsc_deadline)
1609		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1610
1611	if (lapic_timer_advance_dynamic)
1612		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1613}
1614
1615void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1616{
1617	if (lapic_in_kernel(vcpu) &&
1618	    vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1619	    vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1620	    lapic_timer_int_injected(vcpu))
1621		__kvm_wait_lapic_expire(vcpu);
1622}
1623EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1624
1625static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1626{
1627	struct kvm_timer *ktimer = &apic->lapic_timer;
1628
1629	kvm_apic_local_deliver(apic, APIC_LVTT);
1630	if (apic_lvtt_tscdeadline(apic)) {
1631		ktimer->tscdeadline = 0;
1632	} else if (apic_lvtt_oneshot(apic)) {
1633		ktimer->tscdeadline = 0;
1634		ktimer->target_expiration = 0;
1635	}
1636}
1637
1638static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1639{
1640	struct kvm_vcpu *vcpu = apic->vcpu;
1641	struct kvm_timer *ktimer = &apic->lapic_timer;
1642
1643	if (atomic_read(&apic->lapic_timer.pending))
1644		return;
1645
1646	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1647		ktimer->expired_tscdeadline = ktimer->tscdeadline;
1648
1649	if (!from_timer_fn && vcpu->arch.apicv_active) {
1650		WARN_ON(kvm_get_running_vcpu() != vcpu);
1651		kvm_apic_inject_pending_timer_irqs(apic);
1652		return;
1653	}
1654
1655	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1656		/*
1657		 * Ensure the guest's timer has truly expired before posting an
1658		 * interrupt.  Open code the relevant checks to avoid querying
1659		 * lapic_timer_int_injected(), which will be false since the
1660		 * interrupt isn't yet injected.  Waiting until after injecting
1661		 * is not an option since that won't help a posted interrupt.
1662		 */
1663		if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1664		    vcpu->arch.apic->lapic_timer.timer_advance_ns)
1665			__kvm_wait_lapic_expire(vcpu);
1666		kvm_apic_inject_pending_timer_irqs(apic);
1667		return;
1668	}
1669
1670	atomic_inc(&apic->lapic_timer.pending);
1671	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1672	if (from_timer_fn)
1673		kvm_vcpu_kick(vcpu);
1674}
1675
1676static void start_sw_tscdeadline(struct kvm_lapic *apic)
1677{
1678	struct kvm_timer *ktimer = &apic->lapic_timer;
1679	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1680	u64 ns = 0;
1681	ktime_t expire;
1682	struct kvm_vcpu *vcpu = apic->vcpu;
1683	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1684	unsigned long flags;
1685	ktime_t now;
1686
1687	if (unlikely(!tscdeadline || !this_tsc_khz))
1688		return;
1689
1690	local_irq_save(flags);
1691
1692	now = ktime_get();
1693	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1694
1695	ns = (tscdeadline - guest_tsc) * 1000000ULL;
1696	do_div(ns, this_tsc_khz);
1697
1698	if (likely(tscdeadline > guest_tsc) &&
1699	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
1700		expire = ktime_add_ns(now, ns);
1701		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1702		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1703	} else
1704		apic_timer_expired(apic, false);
1705
1706	local_irq_restore(flags);
1707}
1708
1709static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1710{
1711	return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1712}
1713
1714static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1715{
1716	ktime_t now, remaining;
1717	u64 ns_remaining_old, ns_remaining_new;
1718
1719	apic->lapic_timer.period =
1720			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1721	limit_periodic_timer_frequency(apic);
1722
1723	now = ktime_get();
1724	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1725	if (ktime_to_ns(remaining) < 0)
1726		remaining = 0;
1727
1728	ns_remaining_old = ktime_to_ns(remaining);
1729	ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1730	                                   apic->divide_count, old_divisor);
1731
1732	apic->lapic_timer.tscdeadline +=
1733		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1734		nsec_to_cycles(apic->vcpu, ns_remaining_old);
1735	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1736}
1737
1738static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1739{
1740	ktime_t now;
1741	u64 tscl = rdtsc();
1742	s64 deadline;
1743
1744	now = ktime_get();
1745	apic->lapic_timer.period =
1746			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1747
1748	if (!apic->lapic_timer.period) {
1749		apic->lapic_timer.tscdeadline = 0;
1750		return false;
1751	}
1752
1753	limit_periodic_timer_frequency(apic);
1754	deadline = apic->lapic_timer.period;
1755
1756	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1757		if (unlikely(count_reg != APIC_TMICT)) {
1758			deadline = tmict_to_ns(apic,
1759				     kvm_lapic_get_reg(apic, count_reg));
1760			if (unlikely(deadline <= 0))
1761				deadline = apic->lapic_timer.period;
1762			else if (unlikely(deadline > apic->lapic_timer.period)) {
1763				pr_info_ratelimited(
1764				    "kvm: vcpu %i: requested lapic timer restore with "
1765				    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1766				    "Using initial count to start timer.\n",
1767				    apic->vcpu->vcpu_id,
1768				    count_reg,
1769				    kvm_lapic_get_reg(apic, count_reg),
1770				    deadline, apic->lapic_timer.period);
1771				kvm_lapic_set_reg(apic, count_reg, 0);
1772				deadline = apic->lapic_timer.period;
1773			}
1774		}
1775	}
1776
1777	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1778		nsec_to_cycles(apic->vcpu, deadline);
1779	apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1780
1781	return true;
1782}
1783
1784static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1785{
1786	ktime_t now = ktime_get();
1787	u64 tscl = rdtsc();
1788	ktime_t delta;
1789
1790	/*
1791	 * Synchronize both deadlines to the same time source or
1792	 * differences in the periods (caused by differences in the
1793	 * underlying clocks or numerical approximation errors) will
1794	 * cause the two to drift apart over time as the errors
1795	 * accumulate.
1796	 */
1797	apic->lapic_timer.target_expiration =
1798		ktime_add_ns(apic->lapic_timer.target_expiration,
1799				apic->lapic_timer.period);
1800	delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1801	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1802		nsec_to_cycles(apic->vcpu, delta);
1803}
1804
1805static void start_sw_period(struct kvm_lapic *apic)
1806{
1807	if (!apic->lapic_timer.period)
1808		return;
1809
1810	if (ktime_after(ktime_get(),
1811			apic->lapic_timer.target_expiration)) {
1812		apic_timer_expired(apic, false);
1813
1814		if (apic_lvtt_oneshot(apic))
1815			return;
1816
1817		advance_periodic_target_expiration(apic);
1818	}
1819
1820	hrtimer_start(&apic->lapic_timer.timer,
1821		apic->lapic_timer.target_expiration,
1822		HRTIMER_MODE_ABS_HARD);
1823}
1824
1825bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1826{
1827	if (!lapic_in_kernel(vcpu))
1828		return false;
1829
1830	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1831}
1832EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1833
1834static void cancel_hv_timer(struct kvm_lapic *apic)
1835{
1836	WARN_ON(preemptible());
1837	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1838	kvm_x86_ops.cancel_hv_timer(apic->vcpu);
1839	apic->lapic_timer.hv_timer_in_use = false;
1840}
1841
1842static bool start_hv_timer(struct kvm_lapic *apic)
1843{
1844	struct kvm_timer *ktimer = &apic->lapic_timer;
1845	struct kvm_vcpu *vcpu = apic->vcpu;
1846	bool expired;
1847
1848	WARN_ON(preemptible());
1849	if (!kvm_can_use_hv_timer(vcpu))
1850		return false;
1851
1852	if (!ktimer->tscdeadline)
1853		return false;
1854
1855	if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1856		return false;
1857
1858	ktimer->hv_timer_in_use = true;
1859	hrtimer_cancel(&ktimer->timer);
1860
1861	/*
1862	 * To simplify handling the periodic timer, leave the hv timer running
1863	 * even if the deadline timer has expired, i.e. rely on the resulting
1864	 * VM-Exit to recompute the periodic timer's target expiration.
1865	 */
1866	if (!apic_lvtt_period(apic)) {
1867		/*
1868		 * Cancel the hv timer if the sw timer fired while the hv timer
1869		 * was being programmed, or if the hv timer itself expired.
1870		 */
1871		if (atomic_read(&ktimer->pending)) {
1872			cancel_hv_timer(apic);
1873		} else if (expired) {
1874			apic_timer_expired(apic, false);
1875			cancel_hv_timer(apic);
1876		}
1877	}
1878
1879	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1880
1881	return true;
1882}
1883
1884static void start_sw_timer(struct kvm_lapic *apic)
1885{
1886	struct kvm_timer *ktimer = &apic->lapic_timer;
1887
1888	WARN_ON(preemptible());
1889	if (apic->lapic_timer.hv_timer_in_use)
1890		cancel_hv_timer(apic);
1891	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1892		return;
1893
1894	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1895		start_sw_period(apic);
1896	else if (apic_lvtt_tscdeadline(apic))
1897		start_sw_tscdeadline(apic);
1898	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1899}
1900
1901static void restart_apic_timer(struct kvm_lapic *apic)
1902{
1903	preempt_disable();
1904
1905	if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1906		goto out;
1907
1908	if (!start_hv_timer(apic))
1909		start_sw_timer(apic);
1910out:
1911	preempt_enable();
1912}
1913
1914void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1915{
1916	struct kvm_lapic *apic = vcpu->arch.apic;
1917
1918	preempt_disable();
1919	/* If the preempt notifier has already run, it also called apic_timer_expired */
1920	if (!apic->lapic_timer.hv_timer_in_use)
1921		goto out;
1922	WARN_ON(rcuwait_active(&vcpu->wait));
1923	apic_timer_expired(apic, false);
1924	cancel_hv_timer(apic);
1925
1926	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1927		advance_periodic_target_expiration(apic);
1928		restart_apic_timer(apic);
1929	}
1930out:
1931	preempt_enable();
1932}
1933EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1934
1935void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1936{
1937	restart_apic_timer(vcpu->arch.apic);
1938}
1939EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1940
1941void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1942{
1943	struct kvm_lapic *apic = vcpu->arch.apic;
1944
1945	preempt_disable();
1946	/* Possibly the TSC deadline timer is not enabled yet */
1947	if (apic->lapic_timer.hv_timer_in_use)
1948		start_sw_timer(apic);
1949	preempt_enable();
1950}
1951EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1952
1953void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1954{
1955	struct kvm_lapic *apic = vcpu->arch.apic;
1956
1957	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1958	restart_apic_timer(apic);
1959}
1960
1961static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
1962{
1963	atomic_set(&apic->lapic_timer.pending, 0);
1964
1965	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1966	    && !set_target_expiration(apic, count_reg))
1967		return;
1968
1969	restart_apic_timer(apic);
1970}
1971
1972static void start_apic_timer(struct kvm_lapic *apic)
1973{
1974	__start_apic_timer(apic, APIC_TMICT);
1975}
1976
1977static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1978{
1979	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1980
1981	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1982		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1983		if (lvt0_in_nmi_mode) {
1984			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1985		} else
1986			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1987	}
1988}
1989
1990int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1991{
1992	int ret = 0;
1993
1994	trace_kvm_apic_write(reg, val);
1995
1996	switch (reg) {
1997	case APIC_ID:		/* Local APIC ID */
1998		if (!apic_x2apic_mode(apic))
1999			kvm_apic_set_xapic_id(apic, val >> 24);
2000		else
2001			ret = 1;
2002		break;
2003
2004	case APIC_TASKPRI:
2005		report_tpr_access(apic, true);
2006		apic_set_tpr(apic, val & 0xff);
2007		break;
2008
2009	case APIC_EOI:
2010		apic_set_eoi(apic);
2011		break;
2012
2013	case APIC_LDR:
2014		if (!apic_x2apic_mode(apic))
2015			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2016		else
2017			ret = 1;
2018		break;
2019
2020	case APIC_DFR:
2021		if (!apic_x2apic_mode(apic))
2022			kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2023		else
2024			ret = 1;
2025		break;
2026
2027	case APIC_SPIV: {
2028		u32 mask = 0x3ff;
2029		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2030			mask |= APIC_SPIV_DIRECTED_EOI;
2031		apic_set_spiv(apic, val & mask);
2032		if (!(val & APIC_SPIV_APIC_ENABLED)) {
2033			int i;
2034			u32 lvt_val;
2035
2036			for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
2037				lvt_val = kvm_lapic_get_reg(apic,
2038						       APIC_LVTT + 0x10 * i);
2039				kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
2040					     lvt_val | APIC_LVT_MASKED);
2041			}
2042			apic_update_lvtt(apic);
2043			atomic_set(&apic->lapic_timer.pending, 0);
2044
2045		}
2046		break;
2047	}
2048	case APIC_ICR:
2049		/* No delay here, so we always clear the pending bit */
2050		val &= ~(1 << 12);
2051		kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2052		kvm_lapic_set_reg(apic, APIC_ICR, val);
2053		break;
2054
2055	case APIC_ICR2:
2056		if (!apic_x2apic_mode(apic))
2057			val &= 0xff000000;
2058		kvm_lapic_set_reg(apic, APIC_ICR2, val);
2059		break;
2060
2061	case APIC_LVT0:
2062		apic_manage_nmi_watchdog(apic, val);
2063		fallthrough;
2064	case APIC_LVTTHMR:
2065	case APIC_LVTPC:
2066	case APIC_LVT1:
2067	case APIC_LVTERR: {
2068		/* TODO: Check vector */
2069		size_t size;
2070		u32 index;
2071
2072		if (!kvm_apic_sw_enabled(apic))
2073			val |= APIC_LVT_MASKED;
2074		size = ARRAY_SIZE(apic_lvt_mask);
2075		index = array_index_nospec(
2076				(reg - APIC_LVTT) >> 4, size);
2077		val &= apic_lvt_mask[index];
2078		kvm_lapic_set_reg(apic, reg, val);
2079		break;
2080	}
2081
2082	case APIC_LVTT:
2083		if (!kvm_apic_sw_enabled(apic))
2084			val |= APIC_LVT_MASKED;
2085		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2086		kvm_lapic_set_reg(apic, APIC_LVTT, val);
2087		apic_update_lvtt(apic);
2088		break;
2089
2090	case APIC_TMICT:
2091		if (apic_lvtt_tscdeadline(apic))
2092			break;
2093
2094		hrtimer_cancel(&apic->lapic_timer.timer);
2095		kvm_lapic_set_reg(apic, APIC_TMICT, val);
2096		start_apic_timer(apic);
2097		break;
2098
2099	case APIC_TDCR: {
2100		uint32_t old_divisor = apic->divide_count;
2101
2102		kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2103		update_divide_count(apic);
2104		if (apic->divide_count != old_divisor &&
2105				apic->lapic_timer.period) {
2106			hrtimer_cancel(&apic->lapic_timer.timer);
2107			update_target_expiration(apic, old_divisor);
2108			restart_apic_timer(apic);
2109		}
2110		break;
2111	}
2112	case APIC_ESR:
2113		if (apic_x2apic_mode(apic) && val != 0)
2114			ret = 1;
2115		break;
2116
2117	case APIC_SELF_IPI:
2118		/*
2119		 * Self-IPI exists only when x2APIC is enabled.  Bits 7:0 hold
2120		 * the vector, everything else is reserved.
2121		 */
2122		if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
2123			ret = 1;
2124		else
2125			kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
2126		break;
2127	default:
2128		ret = 1;
2129		break;
2130	}
2131
2132	kvm_recalculate_apic_map(apic->vcpu->kvm);
2133
2134	return ret;
2135}
2136EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
2137
2138static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2139			    gpa_t address, int len, const void *data)
2140{
2141	struct kvm_lapic *apic = to_lapic(this);
2142	unsigned int offset = address - apic->base_address;
2143	u32 val;
2144
2145	if (!apic_mmio_in_range(apic, address))
2146		return -EOPNOTSUPP;
2147
2148	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2149		if (!kvm_check_has_quirk(vcpu->kvm,
2150					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2151			return -EOPNOTSUPP;
2152
2153		return 0;
2154	}
2155
2156	/*
2157	 * APIC register must be aligned on 128-bits boundary.
2158	 * 32/64/128 bits registers must be accessed thru 32 bits.
2159	 * Refer SDM 8.4.1
2160	 */
2161	if (len != 4 || (offset & 0xf))
2162		return 0;
2163
2164	val = *(u32*)data;
2165
2166	kvm_lapic_reg_write(apic, offset & 0xff0, val);
2167
2168	return 0;
2169}
2170
2171void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2172{
2173	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2174}
2175EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2176
2177/* emulate APIC access in a trap manner */
2178void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2179{
2180	u32 val = 0;
2181
2182	/* hw has done the conditional check and inst decode */
2183	offset &= 0xff0;
2184
2185	kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2186
2187	/* TODO: optimize to just emulate side effect w/o one more write */
2188	kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2189}
2190EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2191
2192void kvm_free_lapic(struct kvm_vcpu *vcpu)
2193{
2194	struct kvm_lapic *apic = vcpu->arch.apic;
2195
2196	if (!vcpu->arch.apic)
2197		return;
2198
2199	hrtimer_cancel(&apic->lapic_timer.timer);
2200
2201	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2202		static_key_slow_dec_deferred(&apic_hw_disabled);
2203
2204	if (!apic->sw_enabled)
2205		static_key_slow_dec_deferred(&apic_sw_disabled);
2206
2207	if (apic->regs)
2208		free_page((unsigned long)apic->regs);
2209
2210	kfree(apic);
2211}
2212
2213/*
2214 *----------------------------------------------------------------------
2215 * LAPIC interface
2216 *----------------------------------------------------------------------
2217 */
2218u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2219{
2220	struct kvm_lapic *apic = vcpu->arch.apic;
2221
2222	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2223		return 0;
2224
2225	return apic->lapic_timer.tscdeadline;
2226}
2227
2228void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2229{
2230	struct kvm_lapic *apic = vcpu->arch.apic;
2231
2232	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2233		return;
2234
2235	hrtimer_cancel(&apic->lapic_timer.timer);
2236	apic->lapic_timer.tscdeadline = data;
2237	start_apic_timer(apic);
2238}
2239
2240void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2241{
2242	apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2243}
2244
2245u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2246{
2247	u64 tpr;
2248
2249	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2250
2251	return (tpr & 0xf0) >> 4;
2252}
2253
2254void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2255{
2256	u64 old_value = vcpu->arch.apic_base;
2257	struct kvm_lapic *apic = vcpu->arch.apic;
2258
2259	if (!apic)
2260		value |= MSR_IA32_APICBASE_BSP;
2261
2262	vcpu->arch.apic_base = value;
2263
2264	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2265		kvm_update_cpuid_runtime(vcpu);
2266
2267	if (!apic)
2268		return;
2269
2270	/* update jump label if enable bit changes */
2271	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2272		if (value & MSR_IA32_APICBASE_ENABLE) {
2273			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2274			static_key_slow_dec_deferred(&apic_hw_disabled);
2275			/* Check if there are APF page ready requests pending */
2276			kvm_make_request(KVM_REQ_APF_READY, vcpu);
2277		} else {
2278			static_key_slow_inc(&apic_hw_disabled.key);
2279			atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2280		}
2281	}
2282
2283	if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2284		kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2285
2286	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2287		kvm_x86_ops.set_virtual_apic_mode(vcpu);
2288
2289	apic->base_address = apic->vcpu->arch.apic_base &
2290			     MSR_IA32_APICBASE_BASE;
2291
2292	if ((value & MSR_IA32_APICBASE_ENABLE) &&
2293	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
2294		pr_warn_once("APIC base relocation is unsupported by KVM");
2295}
2296
2297void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2298{
2299	struct kvm_lapic *apic = vcpu->arch.apic;
2300
2301	if (vcpu->arch.apicv_active) {
2302		/* irr_pending is always true when apicv is activated. */
2303		apic->irr_pending = true;
2304		apic->isr_count = 1;
2305	} else {
2306		apic->irr_pending = (apic_search_irr(apic) != -1);
2307		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2308	}
2309}
2310EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2311
2312void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2313{
2314	struct kvm_lapic *apic = vcpu->arch.apic;
2315	int i;
2316
2317	if (!apic)
2318		return;
2319
2320	/* Stop the timer in case it's a reset to an active apic */
2321	hrtimer_cancel(&apic->lapic_timer.timer);
2322
2323	if (!init_event) {
2324		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2325		                         MSR_IA32_APICBASE_ENABLE);
2326		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2327	}
2328	kvm_apic_set_version(apic->vcpu);
2329
2330	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2331		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2332	apic_update_lvtt(apic);
2333	if (kvm_vcpu_is_reset_bsp(vcpu) &&
2334	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2335		kvm_lapic_set_reg(apic, APIC_LVT0,
2336			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2337	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2338
2339	kvm_apic_set_dfr(apic, 0xffffffffU);
2340	apic_set_spiv(apic, 0xff);
2341	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2342	if (!apic_x2apic_mode(apic))
2343		kvm_apic_set_ldr(apic, 0);
2344	kvm_lapic_set_reg(apic, APIC_ESR, 0);
2345	kvm_lapic_set_reg(apic, APIC_ICR, 0);
2346	kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2347	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2348	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2349	for (i = 0; i < 8; i++) {
2350		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2351		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2352		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2353	}
2354	kvm_apic_update_apicv(vcpu);
2355	apic->highest_isr_cache = -1;
2356	update_divide_count(apic);
2357	atomic_set(&apic->lapic_timer.pending, 0);
2358	if (kvm_vcpu_is_bsp(vcpu))
2359		kvm_lapic_set_base(vcpu,
2360				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2361	vcpu->arch.pv_eoi.msr_val = 0;
2362	apic_update_ppr(apic);
2363	if (vcpu->arch.apicv_active) {
2364		kvm_x86_ops.apicv_post_state_restore(vcpu);
2365		kvm_x86_ops.hwapic_irr_update(vcpu, -1);
2366		kvm_x86_ops.hwapic_isr_update(vcpu, -1);
2367	}
2368
2369	vcpu->arch.apic_arb_prio = 0;
2370	vcpu->arch.apic_attention = 0;
2371
2372	kvm_recalculate_apic_map(vcpu->kvm);
2373}
2374
2375/*
2376 *----------------------------------------------------------------------
2377 * timer interface
2378 *----------------------------------------------------------------------
2379 */
2380
2381static bool lapic_is_periodic(struct kvm_lapic *apic)
2382{
2383	return apic_lvtt_period(apic);
2384}
2385
2386int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2387{
2388	struct kvm_lapic *apic = vcpu->arch.apic;
2389
2390	if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2391		return atomic_read(&apic->lapic_timer.pending);
2392
2393	return 0;
2394}
2395
2396int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2397{
2398	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2399	int vector, mode, trig_mode;
2400	int r;
2401
2402	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2403		vector = reg & APIC_VECTOR_MASK;
2404		mode = reg & APIC_MODE_MASK;
2405		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2406
2407		r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
2408		if (r && lvt_type == APIC_LVTPC)
2409			kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
2410		return r;
2411	}
2412	return 0;
2413}
2414
2415void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2416{
2417	struct kvm_lapic *apic = vcpu->arch.apic;
2418
2419	if (apic)
2420		kvm_apic_local_deliver(apic, APIC_LVT0);
2421}
2422
2423static const struct kvm_io_device_ops apic_mmio_ops = {
2424	.read     = apic_mmio_read,
2425	.write    = apic_mmio_write,
2426};
2427
2428static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2429{
2430	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2431	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2432
2433	apic_timer_expired(apic, true);
2434
2435	if (lapic_is_periodic(apic)) {
2436		advance_periodic_target_expiration(apic);
2437		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2438		return HRTIMER_RESTART;
2439	} else
2440		return HRTIMER_NORESTART;
2441}
2442
2443int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2444{
2445	struct kvm_lapic *apic;
2446
2447	ASSERT(vcpu != NULL);
2448
2449	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2450	if (!apic)
2451		goto nomem;
2452
2453	vcpu->arch.apic = apic;
2454
2455	apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2456	if (!apic->regs) {
2457		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2458		       vcpu->vcpu_id);
2459		goto nomem_free_apic;
2460	}
2461	apic->vcpu = vcpu;
2462
2463	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2464		     HRTIMER_MODE_ABS_HARD);
2465	apic->lapic_timer.timer.function = apic_timer_fn;
2466	if (timer_advance_ns == -1) {
2467		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2468		lapic_timer_advance_dynamic = true;
2469	} else {
2470		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2471		lapic_timer_advance_dynamic = false;
2472	}
2473
2474	/*
2475	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
2476	 * thinking that APIC state has changed.
2477	 */
2478	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2479	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2480	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2481
2482	return 0;
2483nomem_free_apic:
2484	kfree(apic);
2485	vcpu->arch.apic = NULL;
2486nomem:
2487	return -ENOMEM;
2488}
2489
2490int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2491{
2492	struct kvm_lapic *apic = vcpu->arch.apic;
2493	u32 ppr;
2494
2495	if (!kvm_apic_present(vcpu))
2496		return -1;
2497
2498	__apic_update_ppr(apic, &ppr);
2499	return apic_has_interrupt_for_ppr(apic, ppr);
2500}
2501EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2502
2503int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2504{
2505	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2506
2507	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2508		return 1;
2509	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2510	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2511		return 1;
2512	return 0;
2513}
2514
2515void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2516{
2517	struct kvm_lapic *apic = vcpu->arch.apic;
2518
2519	if (atomic_read(&apic->lapic_timer.pending) > 0) {
2520		kvm_apic_inject_pending_timer_irqs(apic);
2521		atomic_set(&apic->lapic_timer.pending, 0);
2522	}
2523}
2524
2525int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2526{
2527	int vector = kvm_apic_has_interrupt(vcpu);
2528	struct kvm_lapic *apic = vcpu->arch.apic;
2529	u32 ppr;
2530
2531	if (vector == -1)
2532		return -1;
2533
2534	/*
2535	 * We get here even with APIC virtualization enabled, if doing
2536	 * nested virtualization and L1 runs with the "acknowledge interrupt
2537	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
2538	 * because the process would deliver it through the IDT.
2539	 */
2540
2541	apic_clear_irr(vector, apic);
2542	if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2543		/*
2544		 * For auto-EOI interrupts, there might be another pending
2545		 * interrupt above PPR, so check whether to raise another
2546		 * KVM_REQ_EVENT.
2547		 */
2548		apic_update_ppr(apic);
2549	} else {
2550		/*
2551		 * For normal interrupts, PPR has been raised and there cannot
2552		 * be a higher-priority pending interrupt---except if there was
2553		 * a concurrent interrupt injection, but that would have
2554		 * triggered KVM_REQ_EVENT already.
2555		 */
2556		apic_set_isr(vector, apic);
2557		__apic_update_ppr(apic, &ppr);
2558	}
2559
2560	return vector;
2561}
2562
2563static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2564		struct kvm_lapic_state *s, bool set)
2565{
2566	if (apic_x2apic_mode(vcpu->arch.apic)) {
2567		u32 *id = (u32 *)(s->regs + APIC_ID);
2568		u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2569
2570		if (vcpu->kvm->arch.x2apic_format) {
2571			if (*id != vcpu->vcpu_id)
2572				return -EINVAL;
2573		} else {
2574			if (set)
2575				*id >>= 24;
2576			else
2577				*id <<= 24;
2578		}
2579
2580		/* In x2APIC mode, the LDR is fixed and based on the id */
2581		if (set)
2582			*ldr = kvm_apic_calc_x2apic_ldr(*id);
2583	}
2584
2585	return 0;
2586}
2587
2588int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2589{
2590	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2591
2592	/*
2593	 * Get calculated timer current count for remaining timer period (if
2594	 * any) and store it in the returned register set.
2595	 */
2596	__kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2597			    __apic_read(vcpu->arch.apic, APIC_TMCCT));
2598
2599	return kvm_apic_state_fixup(vcpu, s, false);
2600}
2601
2602int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2603{
2604	struct kvm_lapic *apic = vcpu->arch.apic;
2605	int r;
2606
2607	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2608	/* set SPIV separately to get count of SW disabled APICs right */
2609	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2610
2611	r = kvm_apic_state_fixup(vcpu, s, true);
2612	if (r) {
2613		kvm_recalculate_apic_map(vcpu->kvm);
2614		return r;
2615	}
2616	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2617
2618	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2619	kvm_recalculate_apic_map(vcpu->kvm);
2620	kvm_apic_set_version(vcpu);
2621
2622	apic_update_ppr(apic);
2623	hrtimer_cancel(&apic->lapic_timer.timer);
2624	apic_update_lvtt(apic);
2625	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2626	update_divide_count(apic);
2627	__start_apic_timer(apic, APIC_TMCCT);
2628	kvm_apic_update_apicv(vcpu);
2629	apic->highest_isr_cache = -1;
2630	if (vcpu->arch.apicv_active) {
2631		kvm_x86_ops.apicv_post_state_restore(vcpu);
2632		kvm_x86_ops.hwapic_irr_update(vcpu,
2633				apic_find_highest_irr(apic));
2634		kvm_x86_ops.hwapic_isr_update(vcpu,
2635				apic_find_highest_isr(apic));
2636	}
2637	kvm_make_request(KVM_REQ_EVENT, vcpu);
2638	if (ioapic_in_kernel(vcpu->kvm))
2639		kvm_rtc_eoi_tracking_restore_one(vcpu);
2640
2641	vcpu->arch.apic_arb_prio = 0;
2642
2643	return 0;
2644}
2645
2646void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2647{
2648	struct hrtimer *timer;
2649
2650	if (!lapic_in_kernel(vcpu) ||
2651		kvm_can_post_timer_interrupt(vcpu))
2652		return;
2653
2654	timer = &vcpu->arch.apic->lapic_timer.timer;
2655	if (hrtimer_cancel(timer))
2656		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2657}
2658
2659/*
2660 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2661 *
2662 * Detect whether guest triggered PV EOI since the
2663 * last entry. If yes, set EOI on guests's behalf.
2664 * Clear PV EOI in guest memory in any case.
2665 */
2666static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2667					struct kvm_lapic *apic)
2668{
2669	bool pending;
2670	int vector;
2671	/*
2672	 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2673	 * and KVM_PV_EOI_ENABLED in guest memory as follows:
2674	 *
2675	 * KVM_APIC_PV_EOI_PENDING is unset:
2676	 * 	-> host disabled PV EOI.
2677	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2678	 * 	-> host enabled PV EOI, guest did not execute EOI yet.
2679	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2680	 * 	-> host enabled PV EOI, guest executed EOI.
2681	 */
2682	BUG_ON(!pv_eoi_enabled(vcpu));
2683	pending = pv_eoi_get_pending(vcpu);
2684	/*
2685	 * Clear pending bit in any case: it will be set again on vmentry.
2686	 * While this might not be ideal from performance point of view,
2687	 * this makes sure pv eoi is only enabled when we know it's safe.
2688	 */
2689	pv_eoi_clr_pending(vcpu);
2690	if (pending)
2691		return;
2692	vector = apic_set_eoi(apic);
2693	trace_kvm_pv_eoi(apic, vector);
2694}
2695
2696void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2697{
2698	u32 data;
2699
2700	if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2701		apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2702
2703	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2704		return;
2705
2706	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2707				  sizeof(u32)))
2708		return;
2709
2710	apic_set_tpr(vcpu->arch.apic, data & 0xff);
2711}
2712
2713/*
2714 * apic_sync_pv_eoi_to_guest - called before vmentry
2715 *
2716 * Detect whether it's safe to enable PV EOI and
2717 * if yes do so.
2718 */
2719static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2720					struct kvm_lapic *apic)
2721{
2722	if (!pv_eoi_enabled(vcpu) ||
2723	    /* IRR set or many bits in ISR: could be nested. */
2724	    apic->irr_pending ||
2725	    /* Cache not set: could be safe but we don't bother. */
2726	    apic->highest_isr_cache == -1 ||
2727	    /* Need EOI to update ioapic. */
2728	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2729		/*
2730		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2731		 * so we need not do anything here.
2732		 */
2733		return;
2734	}
2735
2736	pv_eoi_set_pending(apic->vcpu);
2737}
2738
2739void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2740{
2741	u32 data, tpr;
2742	int max_irr, max_isr;
2743	struct kvm_lapic *apic = vcpu->arch.apic;
2744
2745	apic_sync_pv_eoi_to_guest(vcpu, apic);
2746
2747	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2748		return;
2749
2750	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2751	max_irr = apic_find_highest_irr(apic);
2752	if (max_irr < 0)
2753		max_irr = 0;
2754	max_isr = apic_find_highest_isr(apic);
2755	if (max_isr < 0)
2756		max_isr = 0;
2757	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2758
2759	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2760				sizeof(u32));
2761}
2762
2763int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2764{
2765	if (vapic_addr) {
2766		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2767					&vcpu->arch.apic->vapic_cache,
2768					vapic_addr, sizeof(u32)))
2769			return -EINVAL;
2770		__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2771	} else {
2772		__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2773	}
2774
2775	vcpu->arch.apic->vapic_addr = vapic_addr;
2776	return 0;
2777}
2778
2779int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2780{
2781	struct kvm_lapic *apic = vcpu->arch.apic;
2782	u32 reg = (msr - APIC_BASE_MSR) << 4;
2783
2784	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2785		return 1;
2786
2787	if (reg == APIC_ICR2)
2788		return 1;
2789
2790	/* if this is ICR write vector before command */
2791	if (reg == APIC_ICR)
2792		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2793	return kvm_lapic_reg_write(apic, reg, (u32)data);
2794}
2795
2796int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2797{
2798	struct kvm_lapic *apic = vcpu->arch.apic;
2799	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2800
2801	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2802		return 1;
2803
2804	if (reg == APIC_DFR || reg == APIC_ICR2)
2805		return 1;
2806
2807	if (kvm_lapic_reg_read(apic, reg, 4, &low))
2808		return 1;
2809	if (reg == APIC_ICR)
2810		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2811
2812	*data = (((u64)high) << 32) | low;
2813
2814	return 0;
2815}
2816
2817int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2818{
2819	struct kvm_lapic *apic = vcpu->arch.apic;
2820
2821	if (!lapic_in_kernel(vcpu))
2822		return 1;
2823
2824	/* if this is ICR write vector before command */
2825	if (reg == APIC_ICR)
2826		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2827	return kvm_lapic_reg_write(apic, reg, (u32)data);
2828}
2829
2830int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2831{
2832	struct kvm_lapic *apic = vcpu->arch.apic;
2833	u32 low, high = 0;
2834
2835	if (!lapic_in_kernel(vcpu))
2836		return 1;
2837
2838	if (kvm_lapic_reg_read(apic, reg, 4, &low))
2839		return 1;
2840	if (reg == APIC_ICR)
2841		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2842
2843	*data = (((u64)high) << 32) | low;
2844
2845	return 0;
2846}
2847
2848int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2849{
2850	u64 addr = data & ~KVM_MSR_ENABLED;
2851	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2852	unsigned long new_len;
2853
2854	if (!IS_ALIGNED(addr, 4))
2855		return 1;
2856
2857	vcpu->arch.pv_eoi.msr_val = data;
2858	if (!pv_eoi_enabled(vcpu))
2859		return 0;
2860
2861	if (addr == ghc->gpa && len <= ghc->len)
2862		new_len = ghc->len;
2863	else
2864		new_len = len;
2865
2866	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2867}
2868
2869void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2870{
2871	struct kvm_lapic *apic = vcpu->arch.apic;
2872	u8 sipi_vector;
2873	unsigned long pe;
2874
2875	if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2876		return;
2877
2878	/*
2879	 * INITs are latched while CPU is in specific states
2880	 * (SMM, VMX non-root mode, SVM with GIF=0).
2881	 * Because a CPU cannot be in these states immediately
2882	 * after it has processed an INIT signal (and thus in
2883	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2884	 * and leave the INIT pending.
2885	 */
2886	if (kvm_vcpu_latch_init(vcpu)) {
2887		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2888		if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2889			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2890		return;
2891	}
2892
2893	pe = xchg(&apic->pending_events, 0);
2894	if (test_bit(KVM_APIC_INIT, &pe)) {
2895		kvm_vcpu_reset(vcpu, true);
2896		if (kvm_vcpu_is_bsp(apic->vcpu))
2897			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2898		else
2899			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2900	}
2901	if (test_bit(KVM_APIC_SIPI, &pe) &&
2902	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2903		/* evaluate pending_events before reading the vector */
2904		smp_rmb();
2905		sipi_vector = apic->sipi_vector;
2906		kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2907		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2908	}
2909}
2910
2911void kvm_lapic_init(void)
2912{
2913	/* do not patch jump label more than once per second */
2914	jump_label_rate_limit(&apic_hw_disabled, HZ);
2915	jump_label_rate_limit(&apic_sw_disabled, HZ);
2916}
2917
2918void kvm_lapic_exit(void)
2919{
2920	static_key_deferred_flush(&apic_hw_disabled);
2921	static_key_deferred_flush(&apic_sw_disabled);
2922}
2923