xref: /kernel/linux/linux-5.10/arch/x86/kvm/hyperv.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * KVM Microsoft Hyper-V emulation
4 *
5 * derived from arch/x86/kvm/x86.c
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright (C) 2008 Qumranet, Inc.
9 * Copyright IBM Corporation, 2008
10 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
12 *
13 * Authors:
14 *   Avi Kivity   <avi@qumranet.com>
15 *   Yaniv Kamay  <yaniv@qumranet.com>
16 *   Amit Shah    <amit.shah@qumranet.com>
17 *   Ben-Ami Yassour <benami@il.ibm.com>
18 *   Andrey Smetanin <asmetanin@virtuozzo.com>
19 */
20
21#include "x86.h"
22#include "lapic.h"
23#include "ioapic.h"
24#include "cpuid.h"
25#include "hyperv.h"
26
27#include <linux/cpu.h>
28#include <linux/kvm_host.h>
29#include <linux/highmem.h>
30#include <linux/sched/cputime.h>
31#include <linux/eventfd.h>
32
33#include <asm/apicdef.h>
34#include <trace/events/kvm.h>
35
36#include "trace.h"
37#include "irq.h"
38
39#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
40
41static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
42				bool vcpu_kick);
43
44static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
45{
46	return atomic64_read(&synic->sint[sint]);
47}
48
49static inline int synic_get_sint_vector(u64 sint_value)
50{
51	if (sint_value & HV_SYNIC_SINT_MASKED)
52		return -1;
53	return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
54}
55
56static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
57				      int vector)
58{
59	int i;
60
61	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
62		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
63			return true;
64	}
65	return false;
66}
67
68static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
69				     int vector)
70{
71	int i;
72	u64 sint_value;
73
74	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
75		sint_value = synic_read_sint(synic, i);
76		if (synic_get_sint_vector(sint_value) == vector &&
77		    sint_value & HV_SYNIC_SINT_AUTO_EOI)
78			return true;
79	}
80	return false;
81}
82
83static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
84				int vector)
85{
86	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
87		return;
88
89	if (synic_has_vector_connected(synic, vector))
90		__set_bit(vector, synic->vec_bitmap);
91	else
92		__clear_bit(vector, synic->vec_bitmap);
93
94	if (synic_has_vector_auto_eoi(synic, vector))
95		__set_bit(vector, synic->auto_eoi_bitmap);
96	else
97		__clear_bit(vector, synic->auto_eoi_bitmap);
98}
99
100static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
101			  u64 data, bool host)
102{
103	int vector, old_vector;
104	bool masked;
105
106	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
107	masked = data & HV_SYNIC_SINT_MASKED;
108
109	/*
110	 * Valid vectors are 16-255, however, nested Hyper-V attempts to write
111	 * default '0x10000' value on boot and this should not #GP. We need to
112	 * allow zero-initing the register from host as well.
113	 */
114	if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
115		return 1;
116	/*
117	 * Guest may configure multiple SINTs to use the same vector, so
118	 * we maintain a bitmap of vectors handled by synic, and a
119	 * bitmap of vectors with auto-eoi behavior.  The bitmaps are
120	 * updated here, and atomically queried on fast paths.
121	 */
122	old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
123
124	atomic64_set(&synic->sint[sint], data);
125
126	synic_update_vector(synic, old_vector);
127
128	synic_update_vector(synic, vector);
129
130	/* Load SynIC vectors into EOI exit bitmap */
131	kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
132	return 0;
133}
134
135static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
136{
137	struct kvm_vcpu *vcpu = NULL;
138	int i;
139
140	if (vpidx >= KVM_MAX_VCPUS)
141		return NULL;
142
143	vcpu = kvm_get_vcpu(kvm, vpidx);
144	if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
145		return vcpu;
146	kvm_for_each_vcpu(i, vcpu, kvm)
147		if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
148			return vcpu;
149	return NULL;
150}
151
152static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
153{
154	struct kvm_vcpu *vcpu;
155	struct kvm_vcpu_hv_synic *synic;
156
157	vcpu = get_vcpu_by_vpidx(kvm, vpidx);
158	if (!vcpu)
159		return NULL;
160	synic = vcpu_to_synic(vcpu);
161	return (synic->active) ? synic : NULL;
162}
163
164static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
165{
166	struct kvm *kvm = vcpu->kvm;
167	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
168	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
169	struct kvm_vcpu_hv_stimer *stimer;
170	int gsi, idx;
171
172	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
173
174	/* Try to deliver pending Hyper-V SynIC timers messages */
175	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
176		stimer = &hv_vcpu->stimer[idx];
177		if (stimer->msg_pending && stimer->config.enable &&
178		    !stimer->config.direct_mode &&
179		    stimer->config.sintx == sint)
180			stimer_mark_pending(stimer, false);
181	}
182
183	idx = srcu_read_lock(&kvm->irq_srcu);
184	gsi = atomic_read(&synic->sint_to_gsi[sint]);
185	if (gsi != -1)
186		kvm_notify_acked_gsi(kvm, gsi);
187	srcu_read_unlock(&kvm->irq_srcu, idx);
188}
189
190static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
191{
192	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
193	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
194
195	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
196	hv_vcpu->exit.u.synic.msr = msr;
197	hv_vcpu->exit.u.synic.control = synic->control;
198	hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
199	hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
200
201	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
202}
203
204static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
205			 u32 msr, u64 data, bool host)
206{
207	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
208	int ret;
209
210	if (!synic->active && (!host || data))
211		return 1;
212
213	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
214
215	ret = 0;
216	switch (msr) {
217	case HV_X64_MSR_SCONTROL:
218		synic->control = data;
219		if (!host)
220			synic_exit(synic, msr);
221		break;
222	case HV_X64_MSR_SVERSION:
223		if (!host) {
224			ret = 1;
225			break;
226		}
227		synic->version = data;
228		break;
229	case HV_X64_MSR_SIEFP:
230		if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
231		    !synic->dont_zero_synic_pages)
232			if (kvm_clear_guest(vcpu->kvm,
233					    data & PAGE_MASK, PAGE_SIZE)) {
234				ret = 1;
235				break;
236			}
237		synic->evt_page = data;
238		if (!host)
239			synic_exit(synic, msr);
240		break;
241	case HV_X64_MSR_SIMP:
242		if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
243		    !synic->dont_zero_synic_pages)
244			if (kvm_clear_guest(vcpu->kvm,
245					    data & PAGE_MASK, PAGE_SIZE)) {
246				ret = 1;
247				break;
248			}
249		synic->msg_page = data;
250		if (!host)
251			synic_exit(synic, msr);
252		break;
253	case HV_X64_MSR_EOM: {
254		int i;
255
256		if (!synic->active)
257			break;
258
259		for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
260			kvm_hv_notify_acked_sint(vcpu, i);
261		break;
262	}
263	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
264		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
265		break;
266	default:
267		ret = 1;
268		break;
269	}
270	return ret;
271}
272
273static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
274{
275	struct kvm_cpuid_entry2 *entry;
276
277	entry = kvm_find_cpuid_entry(vcpu,
278				     HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES,
279				     0);
280	if (!entry)
281		return false;
282
283	return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
284}
285
286static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
287{
288	struct kvm *kvm = vcpu->kvm;
289	struct kvm_hv *hv = &kvm->arch.hyperv;
290
291	if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
292		hv->hv_syndbg.control.status =
293			vcpu->run->hyperv.u.syndbg.status;
294	return 1;
295}
296
297static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
298{
299	struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
300	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
301
302	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
303	hv_vcpu->exit.u.syndbg.msr = msr;
304	hv_vcpu->exit.u.syndbg.control = syndbg->control.control;
305	hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page;
306	hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page;
307	hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page;
308	vcpu->arch.complete_userspace_io =
309			kvm_hv_syndbg_complete_userspace;
310
311	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
312}
313
314static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
315{
316	struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
317
318	if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
319		return 1;
320
321	trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
322				    vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data);
323	switch (msr) {
324	case HV_X64_MSR_SYNDBG_CONTROL:
325		syndbg->control.control = data;
326		if (!host)
327			syndbg_exit(vcpu, msr);
328		break;
329	case HV_X64_MSR_SYNDBG_STATUS:
330		syndbg->control.status = data;
331		break;
332	case HV_X64_MSR_SYNDBG_SEND_BUFFER:
333		syndbg->control.send_page = data;
334		break;
335	case HV_X64_MSR_SYNDBG_RECV_BUFFER:
336		syndbg->control.recv_page = data;
337		break;
338	case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
339		syndbg->control.pending_page = data;
340		if (!host)
341			syndbg_exit(vcpu, msr);
342		break;
343	case HV_X64_MSR_SYNDBG_OPTIONS:
344		syndbg->options = data;
345		break;
346	default:
347		break;
348	}
349
350	return 0;
351}
352
353static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
354{
355	struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
356
357	if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
358		return 1;
359
360	switch (msr) {
361	case HV_X64_MSR_SYNDBG_CONTROL:
362		*pdata = syndbg->control.control;
363		break;
364	case HV_X64_MSR_SYNDBG_STATUS:
365		*pdata = syndbg->control.status;
366		break;
367	case HV_X64_MSR_SYNDBG_SEND_BUFFER:
368		*pdata = syndbg->control.send_page;
369		break;
370	case HV_X64_MSR_SYNDBG_RECV_BUFFER:
371		*pdata = syndbg->control.recv_page;
372		break;
373	case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
374		*pdata = syndbg->control.pending_page;
375		break;
376	case HV_X64_MSR_SYNDBG_OPTIONS:
377		*pdata = syndbg->options;
378		break;
379	default:
380		break;
381	}
382
383	trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id,
384				    vcpu_to_hv_vcpu(vcpu)->vp_index, msr,
385				    *pdata);
386
387	return 0;
388}
389
390static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
391			 bool host)
392{
393	int ret;
394
395	if (!synic->active && !host)
396		return 1;
397
398	ret = 0;
399	switch (msr) {
400	case HV_X64_MSR_SCONTROL:
401		*pdata = synic->control;
402		break;
403	case HV_X64_MSR_SVERSION:
404		*pdata = synic->version;
405		break;
406	case HV_X64_MSR_SIEFP:
407		*pdata = synic->evt_page;
408		break;
409	case HV_X64_MSR_SIMP:
410		*pdata = synic->msg_page;
411		break;
412	case HV_X64_MSR_EOM:
413		*pdata = 0;
414		break;
415	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
416		*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
417		break;
418	default:
419		ret = 1;
420		break;
421	}
422	return ret;
423}
424
425static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
426{
427	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
428	struct kvm_lapic_irq irq;
429	int ret, vector;
430
431	if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
432		return -EINVAL;
433
434	if (sint >= ARRAY_SIZE(synic->sint))
435		return -EINVAL;
436
437	vector = synic_get_sint_vector(synic_read_sint(synic, sint));
438	if (vector < 0)
439		return -ENOENT;
440
441	memset(&irq, 0, sizeof(irq));
442	irq.shorthand = APIC_DEST_SELF;
443	irq.dest_mode = APIC_DEST_PHYSICAL;
444	irq.delivery_mode = APIC_DM_FIXED;
445	irq.vector = vector;
446	irq.level = 1;
447
448	ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
449	trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
450	return ret;
451}
452
453int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
454{
455	struct kvm_vcpu_hv_synic *synic;
456
457	synic = synic_get(kvm, vpidx);
458	if (!synic)
459		return -EINVAL;
460
461	return synic_set_irq(synic, sint);
462}
463
464void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
465{
466	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
467	int i;
468
469	trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
470
471	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
472		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
473			kvm_hv_notify_acked_sint(vcpu, i);
474}
475
476static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
477{
478	struct kvm_vcpu_hv_synic *synic;
479
480	synic = synic_get(kvm, vpidx);
481	if (!synic)
482		return -EINVAL;
483
484	if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
485		return -EINVAL;
486
487	atomic_set(&synic->sint_to_gsi[sint], gsi);
488	return 0;
489}
490
491void kvm_hv_irq_routing_update(struct kvm *kvm)
492{
493	struct kvm_irq_routing_table *irq_rt;
494	struct kvm_kernel_irq_routing_entry *e;
495	u32 gsi;
496
497	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
498					lockdep_is_held(&kvm->irq_lock));
499
500	for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
501		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
502			if (e->type == KVM_IRQ_ROUTING_HV_SINT)
503				kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
504						    e->hv_sint.sint, gsi);
505		}
506	}
507}
508
509static void synic_init(struct kvm_vcpu_hv_synic *synic)
510{
511	int i;
512
513	memset(synic, 0, sizeof(*synic));
514	synic->version = HV_SYNIC_VERSION_1;
515	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
516		atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
517		atomic_set(&synic->sint_to_gsi[i], -1);
518	}
519}
520
521static u64 get_time_ref_counter(struct kvm *kvm)
522{
523	struct kvm_hv *hv = &kvm->arch.hyperv;
524	struct kvm_vcpu *vcpu;
525	u64 tsc;
526
527	/*
528	 * The guest has not set up the TSC page or the clock isn't
529	 * stable, fall back to get_kvmclock_ns.
530	 */
531	if (!hv->tsc_ref.tsc_sequence)
532		return div_u64(get_kvmclock_ns(kvm), 100);
533
534	vcpu = kvm_get_vcpu(kvm, 0);
535	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
536	return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
537		+ hv->tsc_ref.tsc_offset;
538}
539
540static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
541				bool vcpu_kick)
542{
543	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
544
545	set_bit(stimer->index,
546		vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
547	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
548	if (vcpu_kick)
549		kvm_vcpu_kick(vcpu);
550}
551
552static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
553{
554	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
555
556	trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
557				    stimer->index);
558
559	hrtimer_cancel(&stimer->timer);
560	clear_bit(stimer->index,
561		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
562	stimer->msg_pending = false;
563	stimer->exp_time = 0;
564}
565
566static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
567{
568	struct kvm_vcpu_hv_stimer *stimer;
569
570	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
571	trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
572				     stimer->index);
573	stimer_mark_pending(stimer, true);
574
575	return HRTIMER_NORESTART;
576}
577
578/*
579 * stimer_start() assumptions:
580 * a) stimer->count is not equal to 0
581 * b) stimer->config has HV_STIMER_ENABLE flag
582 */
583static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
584{
585	u64 time_now;
586	ktime_t ktime_now;
587
588	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
589	ktime_now = ktime_get();
590
591	if (stimer->config.periodic) {
592		if (stimer->exp_time) {
593			if (time_now >= stimer->exp_time) {
594				u64 remainder;
595
596				div64_u64_rem(time_now - stimer->exp_time,
597					      stimer->count, &remainder);
598				stimer->exp_time =
599					time_now + (stimer->count - remainder);
600			}
601		} else
602			stimer->exp_time = time_now + stimer->count;
603
604		trace_kvm_hv_stimer_start_periodic(
605					stimer_to_vcpu(stimer)->vcpu_id,
606					stimer->index,
607					time_now, stimer->exp_time);
608
609		hrtimer_start(&stimer->timer,
610			      ktime_add_ns(ktime_now,
611					   100 * (stimer->exp_time - time_now)),
612			      HRTIMER_MODE_ABS);
613		return 0;
614	}
615	stimer->exp_time = stimer->count;
616	if (time_now >= stimer->count) {
617		/*
618		 * Expire timer according to Hypervisor Top-Level Functional
619		 * specification v4(15.3.1):
620		 * "If a one shot is enabled and the specified count is in
621		 * the past, it will expire immediately."
622		 */
623		stimer_mark_pending(stimer, false);
624		return 0;
625	}
626
627	trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
628					   stimer->index,
629					   time_now, stimer->count);
630
631	hrtimer_start(&stimer->timer,
632		      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
633		      HRTIMER_MODE_ABS);
634	return 0;
635}
636
637static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
638			     bool host)
639{
640	union hv_stimer_config new_config = {.as_uint64 = config},
641		old_config = {.as_uint64 = stimer->config.as_uint64};
642	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
643	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
644
645	if (!synic->active && (!host || config))
646		return 1;
647
648	trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
649				       stimer->index, config, host);
650
651	stimer_cleanup(stimer);
652	if (old_config.enable &&
653	    !new_config.direct_mode && new_config.sintx == 0)
654		new_config.enable = 0;
655	stimer->config.as_uint64 = new_config.as_uint64;
656
657	if (stimer->config.enable)
658		stimer_mark_pending(stimer, false);
659
660	return 0;
661}
662
663static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
664			    bool host)
665{
666	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
667	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
668
669	if (!synic->active && (!host || count))
670		return 1;
671
672	trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
673				      stimer->index, count, host);
674
675	stimer_cleanup(stimer);
676	stimer->count = count;
677	if (!host) {
678		if (stimer->count == 0)
679			stimer->config.enable = 0;
680		else if (stimer->config.auto_enable)
681			stimer->config.enable = 1;
682	}
683
684	if (stimer->config.enable)
685		stimer_mark_pending(stimer, false);
686
687	return 0;
688}
689
690static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
691{
692	*pconfig = stimer->config.as_uint64;
693	return 0;
694}
695
696static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
697{
698	*pcount = stimer->count;
699	return 0;
700}
701
702static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
703			     struct hv_message *src_msg, bool no_retry)
704{
705	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
706	int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
707	gfn_t msg_page_gfn;
708	struct hv_message_header hv_hdr;
709	int r;
710
711	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
712		return -ENOENT;
713
714	msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
715
716	/*
717	 * Strictly following the spec-mandated ordering would assume setting
718	 * .msg_pending before checking .message_type.  However, this function
719	 * is only called in vcpu context so the entire update is atomic from
720	 * guest POV and thus the exact order here doesn't matter.
721	 */
722	r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
723				     msg_off + offsetof(struct hv_message,
724							header.message_type),
725				     sizeof(hv_hdr.message_type));
726	if (r < 0)
727		return r;
728
729	if (hv_hdr.message_type != HVMSG_NONE) {
730		if (no_retry)
731			return 0;
732
733		hv_hdr.message_flags.msg_pending = 1;
734		r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn,
735					      &hv_hdr.message_flags,
736					      msg_off +
737					      offsetof(struct hv_message,
738						       header.message_flags),
739					      sizeof(hv_hdr.message_flags));
740		if (r < 0)
741			return r;
742		return -EAGAIN;
743	}
744
745	r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off,
746				      sizeof(src_msg->header) +
747				      src_msg->header.payload_size);
748	if (r < 0)
749		return r;
750
751	r = synic_set_irq(synic, sint);
752	if (r < 0)
753		return r;
754	if (r == 0)
755		return -EFAULT;
756	return 0;
757}
758
759static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
760{
761	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
762	struct hv_message *msg = &stimer->msg;
763	struct hv_timer_message_payload *payload =
764			(struct hv_timer_message_payload *)&msg->u.payload;
765
766	/*
767	 * To avoid piling up periodic ticks, don't retry message
768	 * delivery for them (within "lazy" lost ticks policy).
769	 */
770	bool no_retry = stimer->config.periodic;
771
772	payload->expiration_time = stimer->exp_time;
773	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
774	return synic_deliver_msg(vcpu_to_synic(vcpu),
775				 stimer->config.sintx, msg,
776				 no_retry);
777}
778
779static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
780{
781	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
782	struct kvm_lapic_irq irq = {
783		.delivery_mode = APIC_DM_FIXED,
784		.vector = stimer->config.apic_vector
785	};
786
787	if (lapic_in_kernel(vcpu))
788		return !kvm_apic_set_irq(vcpu, &irq, NULL);
789	return 0;
790}
791
792static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
793{
794	int r, direct = stimer->config.direct_mode;
795
796	stimer->msg_pending = true;
797	if (!direct)
798		r = stimer_send_msg(stimer);
799	else
800		r = stimer_notify_direct(stimer);
801	trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
802				       stimer->index, direct, r);
803	if (!r) {
804		stimer->msg_pending = false;
805		if (!(stimer->config.periodic))
806			stimer->config.enable = 0;
807	}
808}
809
810void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
811{
812	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
813	struct kvm_vcpu_hv_stimer *stimer;
814	u64 time_now, exp_time;
815	int i;
816
817	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
818		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
819			stimer = &hv_vcpu->stimer[i];
820			if (stimer->config.enable) {
821				exp_time = stimer->exp_time;
822
823				if (exp_time) {
824					time_now =
825						get_time_ref_counter(vcpu->kvm);
826					if (time_now >= exp_time)
827						stimer_expiration(stimer);
828				}
829
830				if ((stimer->config.enable) &&
831				    stimer->count) {
832					if (!stimer->msg_pending)
833						stimer_start(stimer);
834				} else
835					stimer_cleanup(stimer);
836			}
837		}
838}
839
840void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
841{
842	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
843	int i;
844
845	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
846		stimer_cleanup(&hv_vcpu->stimer[i]);
847}
848
849bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
850{
851	if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
852		return false;
853	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
854}
855EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
856
857bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
858			    struct hv_vp_assist_page *assist_page)
859{
860	if (!kvm_hv_assist_page_enabled(vcpu))
861		return false;
862	return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
863				      assist_page, sizeof(*assist_page));
864}
865EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
866
867static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
868{
869	struct hv_message *msg = &stimer->msg;
870	struct hv_timer_message_payload *payload =
871			(struct hv_timer_message_payload *)&msg->u.payload;
872
873	memset(&msg->header, 0, sizeof(msg->header));
874	msg->header.message_type = HVMSG_TIMER_EXPIRED;
875	msg->header.payload_size = sizeof(*payload);
876
877	payload->timer_index = stimer->index;
878	payload->expiration_time = 0;
879	payload->delivery_time = 0;
880}
881
882static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
883{
884	memset(stimer, 0, sizeof(*stimer));
885	stimer->index = timer_index;
886	hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
887	stimer->timer.function = stimer_timer_callback;
888	stimer_prepare_msg(stimer);
889}
890
891void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
892{
893	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
894	int i;
895
896	synic_init(&hv_vcpu->synic);
897
898	bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
899	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
900		stimer_init(&hv_vcpu->stimer[i], i);
901}
902
903void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
904{
905	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
906
907	hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
908}
909
910int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
911{
912	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
913
914	/*
915	 * Hyper-V SynIC auto EOI SINT's are
916	 * not compatible with APICV, so request
917	 * to deactivate APICV permanently.
918	 */
919	kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
920	synic->active = true;
921	synic->dont_zero_synic_pages = dont_zero_synic_pages;
922	synic->control = HV_SYNIC_CONTROL_ENABLE;
923	return 0;
924}
925
926static bool kvm_hv_msr_partition_wide(u32 msr)
927{
928	bool r = false;
929
930	switch (msr) {
931	case HV_X64_MSR_GUEST_OS_ID:
932	case HV_X64_MSR_HYPERCALL:
933	case HV_X64_MSR_REFERENCE_TSC:
934	case HV_X64_MSR_TIME_REF_COUNT:
935	case HV_X64_MSR_CRASH_CTL:
936	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
937	case HV_X64_MSR_RESET:
938	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
939	case HV_X64_MSR_TSC_EMULATION_CONTROL:
940	case HV_X64_MSR_TSC_EMULATION_STATUS:
941	case HV_X64_MSR_SYNDBG_OPTIONS:
942	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
943		r = true;
944		break;
945	}
946
947	return r;
948}
949
950static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
951				     u32 index, u64 *pdata)
952{
953	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
954	size_t size = ARRAY_SIZE(hv->hv_crash_param);
955
956	if (WARN_ON_ONCE(index >= size))
957		return -EINVAL;
958
959	*pdata = hv->hv_crash_param[array_index_nospec(index, size)];
960	return 0;
961}
962
963static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
964{
965	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
966
967	*pdata = hv->hv_crash_ctl;
968	return 0;
969}
970
971static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
972{
973	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
974
975	if (host)
976		hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
977
978	if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) {
979
980		vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
981			  hv->hv_crash_param[0],
982			  hv->hv_crash_param[1],
983			  hv->hv_crash_param[2],
984			  hv->hv_crash_param[3],
985			  hv->hv_crash_param[4]);
986
987		/* Send notification about crash to user space */
988		kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
989	}
990
991	return 0;
992}
993
994static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
995				     u32 index, u64 data)
996{
997	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
998	size_t size = ARRAY_SIZE(hv->hv_crash_param);
999
1000	if (WARN_ON_ONCE(index >= size))
1001		return -EINVAL;
1002
1003	hv->hv_crash_param[array_index_nospec(index, size)] = data;
1004	return 0;
1005}
1006
1007/*
1008 * The kvmclock and Hyper-V TSC page use similar formulas, and converting
1009 * between them is possible:
1010 *
1011 * kvmclock formula:
1012 *    nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
1013 *           + system_time
1014 *
1015 * Hyper-V formula:
1016 *    nsec/100 = ticks * scale / 2^64 + offset
1017 *
1018 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
1019 * By dividing the kvmclock formula by 100 and equating what's left we get:
1020 *    ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
1021 *            scale / 2^64 =         tsc_to_system_mul * 2^(tsc_shift-32) / 100
1022 *            scale        =         tsc_to_system_mul * 2^(32+tsc_shift) / 100
1023 *
1024 * Now expand the kvmclock formula and divide by 100:
1025 *    nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
1026 *           - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
1027 *           + system_time
1028 *    nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
1029 *               - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
1030 *               + system_time / 100
1031 *
1032 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
1033 *    nsec/100 = ticks * scale / 2^64
1034 *               - tsc_timestamp * scale / 2^64
1035 *               + system_time / 100
1036 *
1037 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
1038 *    offset = system_time / 100 - tsc_timestamp * scale / 2^64
1039 *
1040 * These two equivalencies are implemented in this function.
1041 */
1042static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
1043					struct ms_hyperv_tsc_page *tsc_ref)
1044{
1045	u64 max_mul;
1046
1047	if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
1048		return false;
1049
1050	/*
1051	 * check if scale would overflow, if so we use the time ref counter
1052	 *    tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
1053	 *    tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
1054	 *    tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
1055	 */
1056	max_mul = 100ull << (32 - hv_clock->tsc_shift);
1057	if (hv_clock->tsc_to_system_mul >= max_mul)
1058		return false;
1059
1060	/*
1061	 * Otherwise compute the scale and offset according to the formulas
1062	 * derived above.
1063	 */
1064	tsc_ref->tsc_scale =
1065		mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
1066				hv_clock->tsc_to_system_mul,
1067				100);
1068
1069	tsc_ref->tsc_offset = hv_clock->system_time;
1070	do_div(tsc_ref->tsc_offset, 100);
1071	tsc_ref->tsc_offset -=
1072		mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
1073	return true;
1074}
1075
1076void kvm_hv_setup_tsc_page(struct kvm *kvm,
1077			   struct pvclock_vcpu_time_info *hv_clock)
1078{
1079	struct kvm_hv *hv = &kvm->arch.hyperv;
1080	u32 tsc_seq;
1081	u64 gfn;
1082
1083	BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
1084	BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
1085
1086	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1087		return;
1088
1089	mutex_lock(&kvm->arch.hyperv.hv_lock);
1090	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1091		goto out_unlock;
1092
1093	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1094	/*
1095	 * Because the TSC parameters only vary when there is a
1096	 * change in the master clock, do not bother with caching.
1097	 */
1098	if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
1099				    &tsc_seq, sizeof(tsc_seq))))
1100		goto out_unlock;
1101
1102	/*
1103	 * While we're computing and writing the parameters, force the
1104	 * guest to use the time reference count MSR.
1105	 */
1106	hv->tsc_ref.tsc_sequence = 0;
1107	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
1108			    &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
1109		goto out_unlock;
1110
1111	if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
1112		goto out_unlock;
1113
1114	/* Ensure sequence is zero before writing the rest of the struct.  */
1115	smp_wmb();
1116	if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
1117		goto out_unlock;
1118
1119	/*
1120	 * Now switch to the TSC page mechanism by writing the sequence.
1121	 */
1122	tsc_seq++;
1123	if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
1124		tsc_seq = 1;
1125
1126	/* Write the struct entirely before the non-zero sequence.  */
1127	smp_wmb();
1128
1129	hv->tsc_ref.tsc_sequence = tsc_seq;
1130	kvm_write_guest(kvm, gfn_to_gpa(gfn),
1131			&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
1132out_unlock:
1133	mutex_unlock(&kvm->arch.hyperv.hv_lock);
1134}
1135
1136static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
1137			     bool host)
1138{
1139	struct kvm *kvm = vcpu->kvm;
1140	struct kvm_hv *hv = &kvm->arch.hyperv;
1141
1142	switch (msr) {
1143	case HV_X64_MSR_GUEST_OS_ID:
1144		hv->hv_guest_os_id = data;
1145		/* setting guest os id to zero disables hypercall page */
1146		if (!hv->hv_guest_os_id)
1147			hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1148		break;
1149	case HV_X64_MSR_HYPERCALL: {
1150		u64 gfn;
1151		unsigned long addr;
1152		u8 instructions[4];
1153
1154		/* if guest os id is not set hypercall should remain disabled */
1155		if (!hv->hv_guest_os_id)
1156			break;
1157		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1158			hv->hv_hypercall = data;
1159			break;
1160		}
1161		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1162		addr = gfn_to_hva(kvm, gfn);
1163		if (kvm_is_error_hva(addr))
1164			return 1;
1165		kvm_x86_ops.patch_hypercall(vcpu, instructions);
1166		((unsigned char *)instructions)[3] = 0xc3; /* ret */
1167		if (__copy_to_user((void __user *)addr, instructions, 4))
1168			return 1;
1169		hv->hv_hypercall = data;
1170		mark_page_dirty(kvm, gfn);
1171		break;
1172	}
1173	case HV_X64_MSR_REFERENCE_TSC:
1174		hv->hv_tsc_page = data;
1175		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
1176			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1177		break;
1178	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1179		return kvm_hv_msr_set_crash_data(vcpu,
1180						 msr - HV_X64_MSR_CRASH_P0,
1181						 data);
1182	case HV_X64_MSR_CRASH_CTL:
1183		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
1184	case HV_X64_MSR_RESET:
1185		if (data == 1) {
1186			vcpu_debug(vcpu, "hyper-v reset requested\n");
1187			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
1188		}
1189		break;
1190	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1191		hv->hv_reenlightenment_control = data;
1192		break;
1193	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1194		hv->hv_tsc_emulation_control = data;
1195		break;
1196	case HV_X64_MSR_TSC_EMULATION_STATUS:
1197		hv->hv_tsc_emulation_status = data;
1198		break;
1199	case HV_X64_MSR_TIME_REF_COUNT:
1200		/* read-only, but still ignore it if host-initiated */
1201		if (!host)
1202			return 1;
1203		break;
1204	case HV_X64_MSR_SYNDBG_OPTIONS:
1205	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
1206		return syndbg_set_msr(vcpu, msr, data, host);
1207	default:
1208		vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
1209			    msr, data);
1210		return 1;
1211	}
1212	return 0;
1213}
1214
1215/* Calculate cpu time spent by current task in 100ns units */
1216static u64 current_task_runtime_100ns(void)
1217{
1218	u64 utime, stime;
1219
1220	task_cputime_adjusted(current, &utime, &stime);
1221
1222	return div_u64(utime + stime, 100);
1223}
1224
1225static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1226{
1227	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
1228
1229	switch (msr) {
1230	case HV_X64_MSR_VP_INDEX: {
1231		struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
1232		int vcpu_idx = kvm_vcpu_get_idx(vcpu);
1233		u32 new_vp_index = (u32)data;
1234
1235		if (!host || new_vp_index >= KVM_MAX_VCPUS)
1236			return 1;
1237
1238		if (new_vp_index == hv_vcpu->vp_index)
1239			return 0;
1240
1241		/*
1242		 * The VP index is initialized to vcpu_index by
1243		 * kvm_hv_vcpu_postcreate so they initially match.  Now the
1244		 * VP index is changing, adjust num_mismatched_vp_indexes if
1245		 * it now matches or no longer matches vcpu_idx.
1246		 */
1247		if (hv_vcpu->vp_index == vcpu_idx)
1248			atomic_inc(&hv->num_mismatched_vp_indexes);
1249		else if (new_vp_index == vcpu_idx)
1250			atomic_dec(&hv->num_mismatched_vp_indexes);
1251
1252		hv_vcpu->vp_index = new_vp_index;
1253		break;
1254	}
1255	case HV_X64_MSR_VP_ASSIST_PAGE: {
1256		u64 gfn;
1257		unsigned long addr;
1258
1259		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
1260			hv_vcpu->hv_vapic = data;
1261			if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
1262				return 1;
1263			break;
1264		}
1265		gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
1266		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
1267		if (kvm_is_error_hva(addr))
1268			return 1;
1269
1270		/*
1271		 * Clear apic_assist portion of struct hv_vp_assist_page
1272		 * only, there can be valuable data in the rest which needs
1273		 * to be preserved e.g. on migration.
1274		 */
1275		if (__put_user(0, (u32 __user *)addr))
1276			return 1;
1277		hv_vcpu->hv_vapic = data;
1278		kvm_vcpu_mark_page_dirty(vcpu, gfn);
1279		if (kvm_lapic_enable_pv_eoi(vcpu,
1280					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
1281					    sizeof(struct hv_vp_assist_page)))
1282			return 1;
1283		break;
1284	}
1285	case HV_X64_MSR_EOI:
1286		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1287	case HV_X64_MSR_ICR:
1288		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1289	case HV_X64_MSR_TPR:
1290		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1291	case HV_X64_MSR_VP_RUNTIME:
1292		if (!host)
1293			return 1;
1294		hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
1295		break;
1296	case HV_X64_MSR_SCONTROL:
1297	case HV_X64_MSR_SVERSION:
1298	case HV_X64_MSR_SIEFP:
1299	case HV_X64_MSR_SIMP:
1300	case HV_X64_MSR_EOM:
1301	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1302		return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
1303	case HV_X64_MSR_STIMER0_CONFIG:
1304	case HV_X64_MSR_STIMER1_CONFIG:
1305	case HV_X64_MSR_STIMER2_CONFIG:
1306	case HV_X64_MSR_STIMER3_CONFIG: {
1307		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1308
1309		return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
1310					 data, host);
1311	}
1312	case HV_X64_MSR_STIMER0_COUNT:
1313	case HV_X64_MSR_STIMER1_COUNT:
1314	case HV_X64_MSR_STIMER2_COUNT:
1315	case HV_X64_MSR_STIMER3_COUNT: {
1316		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1317
1318		return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
1319					data, host);
1320	}
1321	case HV_X64_MSR_TSC_FREQUENCY:
1322	case HV_X64_MSR_APIC_FREQUENCY:
1323		/* read-only, but still ignore it if host-initiated */
1324		if (!host)
1325			return 1;
1326		break;
1327	default:
1328		vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
1329			    msr, data);
1330		return 1;
1331	}
1332
1333	return 0;
1334}
1335
1336static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
1337			     bool host)
1338{
1339	u64 data = 0;
1340	struct kvm *kvm = vcpu->kvm;
1341	struct kvm_hv *hv = &kvm->arch.hyperv;
1342
1343	switch (msr) {
1344	case HV_X64_MSR_GUEST_OS_ID:
1345		data = hv->hv_guest_os_id;
1346		break;
1347	case HV_X64_MSR_HYPERCALL:
1348		data = hv->hv_hypercall;
1349		break;
1350	case HV_X64_MSR_TIME_REF_COUNT:
1351		data = get_time_ref_counter(kvm);
1352		break;
1353	case HV_X64_MSR_REFERENCE_TSC:
1354		data = hv->hv_tsc_page;
1355		break;
1356	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
1357		return kvm_hv_msr_get_crash_data(vcpu,
1358						 msr - HV_X64_MSR_CRASH_P0,
1359						 pdata);
1360	case HV_X64_MSR_CRASH_CTL:
1361		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
1362	case HV_X64_MSR_RESET:
1363		data = 0;
1364		break;
1365	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
1366		data = hv->hv_reenlightenment_control;
1367		break;
1368	case HV_X64_MSR_TSC_EMULATION_CONTROL:
1369		data = hv->hv_tsc_emulation_control;
1370		break;
1371	case HV_X64_MSR_TSC_EMULATION_STATUS:
1372		data = hv->hv_tsc_emulation_status;
1373		break;
1374	case HV_X64_MSR_SYNDBG_OPTIONS:
1375	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
1376		return syndbg_get_msr(vcpu, msr, pdata, host);
1377	default:
1378		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1379		return 1;
1380	}
1381
1382	*pdata = data;
1383	return 0;
1384}
1385
1386static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
1387			  bool host)
1388{
1389	u64 data = 0;
1390	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
1391
1392	switch (msr) {
1393	case HV_X64_MSR_VP_INDEX:
1394		data = hv_vcpu->vp_index;
1395		break;
1396	case HV_X64_MSR_EOI:
1397		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1398	case HV_X64_MSR_ICR:
1399		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1400	case HV_X64_MSR_TPR:
1401		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1402	case HV_X64_MSR_VP_ASSIST_PAGE:
1403		data = hv_vcpu->hv_vapic;
1404		break;
1405	case HV_X64_MSR_VP_RUNTIME:
1406		data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
1407		break;
1408	case HV_X64_MSR_SCONTROL:
1409	case HV_X64_MSR_SVERSION:
1410	case HV_X64_MSR_SIEFP:
1411	case HV_X64_MSR_SIMP:
1412	case HV_X64_MSR_EOM:
1413	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
1414		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
1415	case HV_X64_MSR_STIMER0_CONFIG:
1416	case HV_X64_MSR_STIMER1_CONFIG:
1417	case HV_X64_MSR_STIMER2_CONFIG:
1418	case HV_X64_MSR_STIMER3_CONFIG: {
1419		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
1420
1421		return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
1422					 pdata);
1423	}
1424	case HV_X64_MSR_STIMER0_COUNT:
1425	case HV_X64_MSR_STIMER1_COUNT:
1426	case HV_X64_MSR_STIMER2_COUNT:
1427	case HV_X64_MSR_STIMER3_COUNT: {
1428		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
1429
1430		return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
1431					pdata);
1432	}
1433	case HV_X64_MSR_TSC_FREQUENCY:
1434		data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
1435		break;
1436	case HV_X64_MSR_APIC_FREQUENCY:
1437		data = APIC_BUS_FREQUENCY;
1438		break;
1439	default:
1440		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1441		return 1;
1442	}
1443	*pdata = data;
1444	return 0;
1445}
1446
1447int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
1448{
1449	if (kvm_hv_msr_partition_wide(msr)) {
1450		int r;
1451
1452		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1453		r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
1454		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1455		return r;
1456	} else
1457		return kvm_hv_set_msr(vcpu, msr, data, host);
1458}
1459
1460int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
1461{
1462	if (kvm_hv_msr_partition_wide(msr)) {
1463		int r;
1464
1465		mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1466		r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
1467		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1468		return r;
1469	} else
1470		return kvm_hv_get_msr(vcpu, msr, pdata, host);
1471}
1472
1473static __always_inline unsigned long *sparse_set_to_vcpu_mask(
1474	struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
1475	u64 *vp_bitmap, unsigned long *vcpu_bitmap)
1476{
1477	struct kvm_hv *hv = &kvm->arch.hyperv;
1478	struct kvm_vcpu *vcpu;
1479	int i, bank, sbank = 0;
1480
1481	memset(vp_bitmap, 0,
1482	       KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
1483	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
1484			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
1485		vp_bitmap[bank] = sparse_banks[sbank++];
1486
1487	if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
1488		/* for all vcpus vp_index == vcpu_idx */
1489		return (unsigned long *)vp_bitmap;
1490	}
1491
1492	bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
1493	kvm_for_each_vcpu(i, vcpu, kvm) {
1494		if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
1495			     (unsigned long *)vp_bitmap))
1496			__set_bit(i, vcpu_bitmap);
1497	}
1498	return vcpu_bitmap;
1499}
1500
1501static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
1502			    u16 rep_cnt, bool ex)
1503{
1504	struct kvm *kvm = current_vcpu->kvm;
1505	struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
1506	struct hv_tlb_flush_ex flush_ex;
1507	struct hv_tlb_flush flush;
1508	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
1509	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
1510	unsigned long *vcpu_mask;
1511	u64 valid_bank_mask;
1512	u64 sparse_banks[64];
1513	int sparse_banks_len;
1514	bool all_cpus;
1515
1516	if (!ex) {
1517		if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
1518			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1519
1520		trace_kvm_hv_flush_tlb(flush.processor_mask,
1521				       flush.address_space, flush.flags);
1522
1523		valid_bank_mask = BIT_ULL(0);
1524		sparse_banks[0] = flush.processor_mask;
1525
1526		/*
1527		 * Work around possible WS2012 bug: it sends hypercalls
1528		 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
1529		 * while also expecting us to flush something and crashing if
1530		 * we don't. Let's treat processor_mask == 0 same as
1531		 * HV_FLUSH_ALL_PROCESSORS.
1532		 */
1533		all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
1534			flush.processor_mask == 0;
1535	} else {
1536		if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
1537					    sizeof(flush_ex))))
1538			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1539
1540		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
1541					  flush_ex.hv_vp_set.format,
1542					  flush_ex.address_space,
1543					  flush_ex.flags);
1544
1545		valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
1546		all_cpus = flush_ex.hv_vp_set.format !=
1547			HV_GENERIC_SET_SPARSE_4K;
1548
1549		sparse_banks_len =
1550			bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
1551			sizeof(sparse_banks[0]);
1552
1553		if (!sparse_banks_len && !all_cpus)
1554			goto ret_success;
1555
1556		if (!all_cpus &&
1557		    kvm_read_guest(kvm,
1558				   ingpa + offsetof(struct hv_tlb_flush_ex,
1559						    hv_vp_set.bank_contents),
1560				   sparse_banks,
1561				   sparse_banks_len))
1562			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1563	}
1564
1565	cpumask_clear(&hv_vcpu->tlb_flush);
1566
1567	/*
1568	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
1569	 * analyze it here, flush TLB regardless of the specified address space.
1570	 */
1571	if (all_cpus) {
1572		kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
1573	} else {
1574		vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
1575						    vp_bitmap, vcpu_bitmap);
1576
1577		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
1578					    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
1579	}
1580
1581ret_success:
1582	/* We always do full TLB flush, set rep_done = rep_cnt. */
1583	return (u64)HV_STATUS_SUCCESS |
1584		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
1585}
1586
1587static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
1588				 unsigned long *vcpu_bitmap)
1589{
1590	struct kvm_lapic_irq irq = {
1591		.delivery_mode = APIC_DM_FIXED,
1592		.vector = vector
1593	};
1594	struct kvm_vcpu *vcpu;
1595	int i;
1596
1597	kvm_for_each_vcpu(i, vcpu, kvm) {
1598		if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
1599			continue;
1600
1601		/* We fail only when APIC is disabled */
1602		kvm_apic_set_irq(vcpu, &irq, NULL);
1603	}
1604}
1605
1606static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
1607			   bool ex, bool fast)
1608{
1609	struct kvm *kvm = current_vcpu->kvm;
1610	struct hv_send_ipi_ex send_ipi_ex;
1611	struct hv_send_ipi send_ipi;
1612	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
1613	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
1614	unsigned long *vcpu_mask;
1615	unsigned long valid_bank_mask;
1616	u64 sparse_banks[64];
1617	int sparse_banks_len;
1618	u32 vector;
1619	bool all_cpus;
1620
1621	if (!ex) {
1622		if (!fast) {
1623			if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
1624						    sizeof(send_ipi))))
1625				return HV_STATUS_INVALID_HYPERCALL_INPUT;
1626			sparse_banks[0] = send_ipi.cpu_mask;
1627			vector = send_ipi.vector;
1628		} else {
1629			/* 'reserved' part of hv_send_ipi should be 0 */
1630			if (unlikely(ingpa >> 32 != 0))
1631				return HV_STATUS_INVALID_HYPERCALL_INPUT;
1632			sparse_banks[0] = outgpa;
1633			vector = (u32)ingpa;
1634		}
1635		all_cpus = false;
1636		valid_bank_mask = BIT_ULL(0);
1637
1638		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
1639	} else {
1640		if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
1641					    sizeof(send_ipi_ex))))
1642			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1643
1644		trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
1645					 send_ipi_ex.vp_set.format,
1646					 send_ipi_ex.vp_set.valid_bank_mask);
1647
1648		vector = send_ipi_ex.vector;
1649		valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
1650		sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
1651			sizeof(sparse_banks[0]);
1652
1653		all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
1654
1655		if (all_cpus)
1656			goto check_and_send_ipi;
1657
1658		if (!sparse_banks_len)
1659			goto ret_success;
1660
1661		if (kvm_read_guest(kvm,
1662				   ingpa + offsetof(struct hv_send_ipi_ex,
1663						    vp_set.bank_contents),
1664				   sparse_banks,
1665				   sparse_banks_len))
1666			return HV_STATUS_INVALID_HYPERCALL_INPUT;
1667	}
1668
1669check_and_send_ipi:
1670	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
1671		return HV_STATUS_INVALID_HYPERCALL_INPUT;
1672
1673	vcpu_mask = all_cpus ? NULL :
1674		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
1675					vp_bitmap, vcpu_bitmap);
1676
1677	kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
1678
1679ret_success:
1680	return HV_STATUS_SUCCESS;
1681}
1682
1683bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1684{
1685	return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0;
1686}
1687
1688static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
1689{
1690	bool longmode;
1691
1692	longmode = is_64_bit_mode(vcpu);
1693	if (longmode)
1694		kvm_rax_write(vcpu, result);
1695	else {
1696		kvm_rdx_write(vcpu, result >> 32);
1697		kvm_rax_write(vcpu, result & 0xffffffff);
1698	}
1699}
1700
1701static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
1702{
1703	kvm_hv_hypercall_set_result(vcpu, result);
1704	++vcpu->stat.hypercalls;
1705	return kvm_skip_emulated_instruction(vcpu);
1706}
1707
1708static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
1709{
1710	return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
1711}
1712
1713static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
1714{
1715	struct eventfd_ctx *eventfd;
1716
1717	if (unlikely(!fast)) {
1718		int ret;
1719		gpa_t gpa = param;
1720
1721		if ((gpa & (__alignof__(param) - 1)) ||
1722		    offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
1723			return HV_STATUS_INVALID_ALIGNMENT;
1724
1725		ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
1726		if (ret < 0)
1727			return HV_STATUS_INVALID_ALIGNMENT;
1728	}
1729
1730	/*
1731	 * Per spec, bits 32-47 contain the extra "flag number".  However, we
1732	 * have no use for it, and in all known usecases it is zero, so just
1733	 * report lookup failure if it isn't.
1734	 */
1735	if (param & 0xffff00000000ULL)
1736		return HV_STATUS_INVALID_PORT_ID;
1737	/* remaining bits are reserved-zero */
1738	if (param & ~KVM_HYPERV_CONN_ID_MASK)
1739		return HV_STATUS_INVALID_HYPERCALL_INPUT;
1740
1741	/* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
1742	rcu_read_lock();
1743	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
1744	rcu_read_unlock();
1745	if (!eventfd)
1746		return HV_STATUS_INVALID_PORT_ID;
1747
1748	eventfd_signal(eventfd, 1);
1749	return HV_STATUS_SUCCESS;
1750}
1751
1752int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1753{
1754	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
1755	uint16_t code, rep_idx, rep_cnt;
1756	bool fast, rep;
1757
1758	/*
1759	 * hypercall generates UD from non zero cpl and real mode
1760	 * per HYPER-V spec
1761	 */
1762	if (kvm_x86_ops.get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
1763		kvm_queue_exception(vcpu, UD_VECTOR);
1764		return 1;
1765	}
1766
1767#ifdef CONFIG_X86_64
1768	if (is_64_bit_mode(vcpu)) {
1769		param = kvm_rcx_read(vcpu);
1770		ingpa = kvm_rdx_read(vcpu);
1771		outgpa = kvm_r8_read(vcpu);
1772	} else
1773#endif
1774	{
1775		param = ((u64)kvm_rdx_read(vcpu) << 32) |
1776			(kvm_rax_read(vcpu) & 0xffffffff);
1777		ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
1778			(kvm_rcx_read(vcpu) & 0xffffffff);
1779		outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
1780			(kvm_rsi_read(vcpu) & 0xffffffff);
1781	}
1782
1783	code = param & 0xffff;
1784	fast = !!(param & HV_HYPERCALL_FAST_BIT);
1785	rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
1786	rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
1787	rep = !!(rep_cnt || rep_idx);
1788
1789	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
1790
1791	switch (code) {
1792	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
1793		if (unlikely(rep)) {
1794			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1795			break;
1796		}
1797		kvm_vcpu_on_spin(vcpu, true);
1798		break;
1799	case HVCALL_SIGNAL_EVENT:
1800		if (unlikely(rep)) {
1801			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1802			break;
1803		}
1804		ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
1805		if (ret != HV_STATUS_INVALID_PORT_ID)
1806			break;
1807		fallthrough;	/* maybe userspace knows this conn_id */
1808	case HVCALL_POST_MESSAGE:
1809		/* don't bother userspace if it has no way to handle it */
1810		if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
1811			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1812			break;
1813		}
1814		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
1815		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
1816		vcpu->run->hyperv.u.hcall.input = param;
1817		vcpu->run->hyperv.u.hcall.params[0] = ingpa;
1818		vcpu->run->hyperv.u.hcall.params[1] = outgpa;
1819		vcpu->arch.complete_userspace_io =
1820				kvm_hv_hypercall_complete_userspace;
1821		return 0;
1822	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
1823		if (unlikely(fast || !rep_cnt || rep_idx)) {
1824			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1825			break;
1826		}
1827		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
1828		break;
1829	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
1830		if (unlikely(fast || rep)) {
1831			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1832			break;
1833		}
1834		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
1835		break;
1836	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
1837		if (unlikely(fast || !rep_cnt || rep_idx)) {
1838			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1839			break;
1840		}
1841		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1842		break;
1843	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
1844		if (unlikely(fast || rep)) {
1845			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1846			break;
1847		}
1848		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1849		break;
1850	case HVCALL_SEND_IPI:
1851		if (unlikely(rep)) {
1852			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1853			break;
1854		}
1855		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
1856		break;
1857	case HVCALL_SEND_IPI_EX:
1858		if (unlikely(fast || rep)) {
1859			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
1860			break;
1861		}
1862		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
1863		break;
1864	case HVCALL_POST_DEBUG_DATA:
1865	case HVCALL_RETRIEVE_DEBUG_DATA:
1866		if (unlikely(fast)) {
1867			ret = HV_STATUS_INVALID_PARAMETER;
1868			break;
1869		}
1870		fallthrough;
1871	case HVCALL_RESET_DEBUG_SESSION: {
1872		struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
1873
1874		if (!kvm_hv_is_syndbg_enabled(vcpu)) {
1875			ret = HV_STATUS_INVALID_HYPERCALL_CODE;
1876			break;
1877		}
1878
1879		if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) {
1880			ret = HV_STATUS_OPERATION_DENIED;
1881			break;
1882		}
1883		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
1884		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
1885		vcpu->run->hyperv.u.hcall.input = param;
1886		vcpu->run->hyperv.u.hcall.params[0] = ingpa;
1887		vcpu->run->hyperv.u.hcall.params[1] = outgpa;
1888		vcpu->arch.complete_userspace_io =
1889				kvm_hv_hypercall_complete_userspace;
1890		return 0;
1891	}
1892	default:
1893		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
1894		break;
1895	}
1896
1897	return kvm_hv_hypercall_complete(vcpu, ret);
1898}
1899
1900void kvm_hv_init_vm(struct kvm *kvm)
1901{
1902	mutex_init(&kvm->arch.hyperv.hv_lock);
1903	idr_init(&kvm->arch.hyperv.conn_to_evt);
1904}
1905
1906void kvm_hv_destroy_vm(struct kvm *kvm)
1907{
1908	struct eventfd_ctx *eventfd;
1909	int i;
1910
1911	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
1912		eventfd_ctx_put(eventfd);
1913	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
1914}
1915
1916static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
1917{
1918	struct kvm_hv *hv = &kvm->arch.hyperv;
1919	struct eventfd_ctx *eventfd;
1920	int ret;
1921
1922	eventfd = eventfd_ctx_fdget(fd);
1923	if (IS_ERR(eventfd))
1924		return PTR_ERR(eventfd);
1925
1926	mutex_lock(&hv->hv_lock);
1927	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
1928			GFP_KERNEL_ACCOUNT);
1929	mutex_unlock(&hv->hv_lock);
1930
1931	if (ret >= 0)
1932		return 0;
1933
1934	if (ret == -ENOSPC)
1935		ret = -EEXIST;
1936	eventfd_ctx_put(eventfd);
1937	return ret;
1938}
1939
1940static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
1941{
1942	struct kvm_hv *hv = &kvm->arch.hyperv;
1943	struct eventfd_ctx *eventfd;
1944
1945	mutex_lock(&hv->hv_lock);
1946	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
1947	mutex_unlock(&hv->hv_lock);
1948
1949	if (!eventfd)
1950		return -ENOENT;
1951
1952	synchronize_srcu(&kvm->srcu);
1953	eventfd_ctx_put(eventfd);
1954	return 0;
1955}
1956
1957int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
1958{
1959	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
1960	    (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
1961		return -EINVAL;
1962
1963	if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
1964		return kvm_hv_eventfd_deassign(kvm, args->conn_id);
1965	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
1966}
1967
1968int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
1969				struct kvm_cpuid_entry2 __user *entries)
1970{
1971	uint16_t evmcs_ver = 0;
1972	struct kvm_cpuid_entry2 cpuid_entries[] = {
1973		{ .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
1974		{ .function = HYPERV_CPUID_INTERFACE },
1975		{ .function = HYPERV_CPUID_VERSION },
1976		{ .function = HYPERV_CPUID_FEATURES },
1977		{ .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
1978		{ .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
1979		{ .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS },
1980		{ .function = HYPERV_CPUID_SYNDBG_INTERFACE },
1981		{ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES	},
1982		{ .function = HYPERV_CPUID_NESTED_FEATURES },
1983	};
1984	int i, nent = ARRAY_SIZE(cpuid_entries);
1985
1986	if (kvm_x86_ops.nested_ops->get_evmcs_version)
1987		evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
1988
1989	/* Skip NESTED_FEATURES if eVMCS is not supported */
1990	if (!evmcs_ver)
1991		--nent;
1992
1993	if (cpuid->nent < nent)
1994		return -E2BIG;
1995
1996	if (cpuid->nent > nent)
1997		cpuid->nent = nent;
1998
1999	for (i = 0; i < nent; i++) {
2000		struct kvm_cpuid_entry2 *ent = &cpuid_entries[i];
2001		u32 signature[3];
2002
2003		switch (ent->function) {
2004		case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
2005			memcpy(signature, "Linux KVM Hv", 12);
2006
2007			ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
2008			ent->ebx = signature[0];
2009			ent->ecx = signature[1];
2010			ent->edx = signature[2];
2011			break;
2012
2013		case HYPERV_CPUID_INTERFACE:
2014			memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
2015			ent->eax = signature[0];
2016			break;
2017
2018		case HYPERV_CPUID_VERSION:
2019			/*
2020			 * We implement some Hyper-V 2016 functions so let's use
2021			 * this version.
2022			 */
2023			ent->eax = 0x00003839;
2024			ent->ebx = 0x000A0000;
2025			break;
2026
2027		case HYPERV_CPUID_FEATURES:
2028			ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
2029			ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
2030			ent->eax |= HV_MSR_SYNIC_AVAILABLE;
2031			ent->eax |= HV_MSR_SYNTIMER_AVAILABLE;
2032			ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
2033			ent->eax |= HV_MSR_HYPERCALL_AVAILABLE;
2034			ent->eax |= HV_MSR_VP_INDEX_AVAILABLE;
2035			ent->eax |= HV_MSR_RESET_AVAILABLE;
2036			ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
2037			ent->eax |= HV_ACCESS_FREQUENCY_MSRS;
2038			ent->eax |= HV_ACCESS_REENLIGHTENMENT;
2039
2040			ent->ebx |= HV_POST_MESSAGES;
2041			ent->ebx |= HV_SIGNAL_EVENTS;
2042
2043			ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
2044			ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
2045
2046			ent->ebx |= HV_DEBUGGING;
2047			ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
2048			ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
2049
2050			/*
2051			 * Direct Synthetic timers only make sense with in-kernel
2052			 * LAPIC
2053			 */
2054			if (lapic_in_kernel(vcpu))
2055				ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
2056
2057			break;
2058
2059		case HYPERV_CPUID_ENLIGHTMENT_INFO:
2060			ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
2061			ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
2062			ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
2063			ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
2064			ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
2065			if (evmcs_ver)
2066				ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
2067			if (!cpu_smt_possible())
2068				ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
2069			/*
2070			 * Default number of spinlock retry attempts, matches
2071			 * HyperV 2016.
2072			 */
2073			ent->ebx = 0x00000FFF;
2074
2075			break;
2076
2077		case HYPERV_CPUID_IMPLEMENT_LIMITS:
2078			/* Maximum number of virtual processors */
2079			ent->eax = KVM_MAX_VCPUS;
2080			/*
2081			 * Maximum number of logical processors, matches
2082			 * HyperV 2016.
2083			 */
2084			ent->ebx = 64;
2085
2086			break;
2087
2088		case HYPERV_CPUID_NESTED_FEATURES:
2089			ent->eax = evmcs_ver;
2090
2091			break;
2092
2093		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
2094			memcpy(signature, "Linux KVM Hv", 12);
2095
2096			ent->eax = 0;
2097			ent->ebx = signature[0];
2098			ent->ecx = signature[1];
2099			ent->edx = signature[2];
2100			break;
2101
2102		case HYPERV_CPUID_SYNDBG_INTERFACE:
2103			memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
2104			ent->eax = signature[0];
2105			break;
2106
2107		case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES:
2108			ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
2109			break;
2110
2111		default:
2112			break;
2113		}
2114	}
2115
2116	if (copy_to_user(entries, cpuid_entries,
2117			 nent * sizeof(struct kvm_cpuid_entry2)))
2118		return -EFAULT;
2119
2120	return 0;
2121}
2122