1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * KVM Microsoft Hyper-V emulation 4 * 5 * derived from arch/x86/kvm/x86.c 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright (C) 2008 Qumranet, Inc. 9 * Copyright IBM Corporation, 2008 10 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 11 * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> 12 * 13 * Authors: 14 * Avi Kivity <avi@qumranet.com> 15 * Yaniv Kamay <yaniv@qumranet.com> 16 * Amit Shah <amit.shah@qumranet.com> 17 * Ben-Ami Yassour <benami@il.ibm.com> 18 * Andrey Smetanin <asmetanin@virtuozzo.com> 19 */ 20 21#include "x86.h" 22#include "lapic.h" 23#include "ioapic.h" 24#include "cpuid.h" 25#include "hyperv.h" 26 27#include <linux/cpu.h> 28#include <linux/kvm_host.h> 29#include <linux/highmem.h> 30#include <linux/sched/cputime.h> 31#include <linux/eventfd.h> 32 33#include <asm/apicdef.h> 34#include <trace/events/kvm.h> 35 36#include "trace.h" 37#include "irq.h" 38 39#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) 40 41static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 42 bool vcpu_kick); 43 44static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 45{ 46 return atomic64_read(&synic->sint[sint]); 47} 48 49static inline int synic_get_sint_vector(u64 sint_value) 50{ 51 if (sint_value & HV_SYNIC_SINT_MASKED) 52 return -1; 53 return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 54} 55 56static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 57 int vector) 58{ 59 int i; 60 61 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 62 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 63 return true; 64 } 65 return false; 66} 67 68static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 69 int vector) 70{ 71 int i; 72 u64 sint_value; 73 74 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 75 sint_value = synic_read_sint(synic, i); 76 if (synic_get_sint_vector(sint_value) == vector && 77 sint_value & HV_SYNIC_SINT_AUTO_EOI) 78 return true; 79 } 80 return false; 81} 82 83static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, 84 int vector) 85{ 86 if (vector < HV_SYNIC_FIRST_VALID_VECTOR) 87 return; 88 89 if (synic_has_vector_connected(synic, vector)) 90 __set_bit(vector, synic->vec_bitmap); 91 else 92 __clear_bit(vector, synic->vec_bitmap); 93 94 if (synic_has_vector_auto_eoi(synic, vector)) 95 __set_bit(vector, synic->auto_eoi_bitmap); 96 else 97 __clear_bit(vector, synic->auto_eoi_bitmap); 98} 99 100static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 101 u64 data, bool host) 102{ 103 int vector, old_vector; 104 bool masked; 105 106 vector = data & HV_SYNIC_SINT_VECTOR_MASK; 107 masked = data & HV_SYNIC_SINT_MASKED; 108 109 /* 110 * Valid vectors are 16-255, however, nested Hyper-V attempts to write 111 * default '0x10000' value on boot and this should not #GP. We need to 112 * allow zero-initing the register from host as well. 113 */ 114 if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) 115 return 1; 116 /* 117 * Guest may configure multiple SINTs to use the same vector, so 118 * we maintain a bitmap of vectors handled by synic, and a 119 * bitmap of vectors with auto-eoi behavior. The bitmaps are 120 * updated here, and atomically queried on fast paths. 121 */ 122 old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; 123 124 atomic64_set(&synic->sint[sint], data); 125 126 synic_update_vector(synic, old_vector); 127 128 synic_update_vector(synic, vector); 129 130 /* Load SynIC vectors into EOI exit bitmap */ 131 kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 132 return 0; 133} 134 135static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) 136{ 137 struct kvm_vcpu *vcpu = NULL; 138 int i; 139 140 if (vpidx >= KVM_MAX_VCPUS) 141 return NULL; 142 143 vcpu = kvm_get_vcpu(kvm, vpidx); 144 if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 145 return vcpu; 146 kvm_for_each_vcpu(i, vcpu, kvm) 147 if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) 148 return vcpu; 149 return NULL; 150} 151 152static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) 153{ 154 struct kvm_vcpu *vcpu; 155 struct kvm_vcpu_hv_synic *synic; 156 157 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 158 if (!vcpu) 159 return NULL; 160 synic = vcpu_to_synic(vcpu); 161 return (synic->active) ? synic : NULL; 162} 163 164static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 165{ 166 struct kvm *kvm = vcpu->kvm; 167 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 168 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 169 struct kvm_vcpu_hv_stimer *stimer; 170 int gsi, idx; 171 172 trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 173 174 /* Try to deliver pending Hyper-V SynIC timers messages */ 175 for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 176 stimer = &hv_vcpu->stimer[idx]; 177 if (stimer->msg_pending && stimer->config.enable && 178 !stimer->config.direct_mode && 179 stimer->config.sintx == sint) 180 stimer_mark_pending(stimer, false); 181 } 182 183 idx = srcu_read_lock(&kvm->irq_srcu); 184 gsi = atomic_read(&synic->sint_to_gsi[sint]); 185 if (gsi != -1) 186 kvm_notify_acked_gsi(kvm, gsi); 187 srcu_read_unlock(&kvm->irq_srcu, idx); 188} 189 190static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 191{ 192 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 193 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 194 195 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 196 hv_vcpu->exit.u.synic.msr = msr; 197 hv_vcpu->exit.u.synic.control = synic->control; 198 hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 199 hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 200 201 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 202} 203 204static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 205 u32 msr, u64 data, bool host) 206{ 207 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 208 int ret; 209 210 if (!synic->active && (!host || data)) 211 return 1; 212 213 trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 214 215 ret = 0; 216 switch (msr) { 217 case HV_X64_MSR_SCONTROL: 218 synic->control = data; 219 if (!host) 220 synic_exit(synic, msr); 221 break; 222 case HV_X64_MSR_SVERSION: 223 if (!host) { 224 ret = 1; 225 break; 226 } 227 synic->version = data; 228 break; 229 case HV_X64_MSR_SIEFP: 230 if ((data & HV_SYNIC_SIEFP_ENABLE) && !host && 231 !synic->dont_zero_synic_pages) 232 if (kvm_clear_guest(vcpu->kvm, 233 data & PAGE_MASK, PAGE_SIZE)) { 234 ret = 1; 235 break; 236 } 237 synic->evt_page = data; 238 if (!host) 239 synic_exit(synic, msr); 240 break; 241 case HV_X64_MSR_SIMP: 242 if ((data & HV_SYNIC_SIMP_ENABLE) && !host && 243 !synic->dont_zero_synic_pages) 244 if (kvm_clear_guest(vcpu->kvm, 245 data & PAGE_MASK, PAGE_SIZE)) { 246 ret = 1; 247 break; 248 } 249 synic->msg_page = data; 250 if (!host) 251 synic_exit(synic, msr); 252 break; 253 case HV_X64_MSR_EOM: { 254 int i; 255 256 if (!synic->active) 257 break; 258 259 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 260 kvm_hv_notify_acked_sint(vcpu, i); 261 break; 262 } 263 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 264 ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 265 break; 266 default: 267 ret = 1; 268 break; 269 } 270 return ret; 271} 272 273static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu) 274{ 275 struct kvm_cpuid_entry2 *entry; 276 277 entry = kvm_find_cpuid_entry(vcpu, 278 HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 279 0); 280 if (!entry) 281 return false; 282 283 return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; 284} 285 286static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu) 287{ 288 struct kvm *kvm = vcpu->kvm; 289 struct kvm_hv *hv = &kvm->arch.hyperv; 290 291 if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL) 292 hv->hv_syndbg.control.status = 293 vcpu->run->hyperv.u.syndbg.status; 294 return 1; 295} 296 297static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr) 298{ 299 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 300 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 301 302 hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG; 303 hv_vcpu->exit.u.syndbg.msr = msr; 304 hv_vcpu->exit.u.syndbg.control = syndbg->control.control; 305 hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page; 306 hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page; 307 hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page; 308 vcpu->arch.complete_userspace_io = 309 kvm_hv_syndbg_complete_userspace; 310 311 kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 312} 313 314static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 315{ 316 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 317 318 if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) 319 return 1; 320 321 trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id, 322 vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data); 323 switch (msr) { 324 case HV_X64_MSR_SYNDBG_CONTROL: 325 syndbg->control.control = data; 326 if (!host) 327 syndbg_exit(vcpu, msr); 328 break; 329 case HV_X64_MSR_SYNDBG_STATUS: 330 syndbg->control.status = data; 331 break; 332 case HV_X64_MSR_SYNDBG_SEND_BUFFER: 333 syndbg->control.send_page = data; 334 break; 335 case HV_X64_MSR_SYNDBG_RECV_BUFFER: 336 syndbg->control.recv_page = data; 337 break; 338 case HV_X64_MSR_SYNDBG_PENDING_BUFFER: 339 syndbg->control.pending_page = data; 340 if (!host) 341 syndbg_exit(vcpu, msr); 342 break; 343 case HV_X64_MSR_SYNDBG_OPTIONS: 344 syndbg->options = data; 345 break; 346 default: 347 break; 348 } 349 350 return 0; 351} 352 353static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 354{ 355 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 356 357 if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) 358 return 1; 359 360 switch (msr) { 361 case HV_X64_MSR_SYNDBG_CONTROL: 362 *pdata = syndbg->control.control; 363 break; 364 case HV_X64_MSR_SYNDBG_STATUS: 365 *pdata = syndbg->control.status; 366 break; 367 case HV_X64_MSR_SYNDBG_SEND_BUFFER: 368 *pdata = syndbg->control.send_page; 369 break; 370 case HV_X64_MSR_SYNDBG_RECV_BUFFER: 371 *pdata = syndbg->control.recv_page; 372 break; 373 case HV_X64_MSR_SYNDBG_PENDING_BUFFER: 374 *pdata = syndbg->control.pending_page; 375 break; 376 case HV_X64_MSR_SYNDBG_OPTIONS: 377 *pdata = syndbg->options; 378 break; 379 default: 380 break; 381 } 382 383 trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, 384 vcpu_to_hv_vcpu(vcpu)->vp_index, msr, 385 *pdata); 386 387 return 0; 388} 389 390static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, 391 bool host) 392{ 393 int ret; 394 395 if (!synic->active && !host) 396 return 1; 397 398 ret = 0; 399 switch (msr) { 400 case HV_X64_MSR_SCONTROL: 401 *pdata = synic->control; 402 break; 403 case HV_X64_MSR_SVERSION: 404 *pdata = synic->version; 405 break; 406 case HV_X64_MSR_SIEFP: 407 *pdata = synic->evt_page; 408 break; 409 case HV_X64_MSR_SIMP: 410 *pdata = synic->msg_page; 411 break; 412 case HV_X64_MSR_EOM: 413 *pdata = 0; 414 break; 415 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 416 *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 417 break; 418 default: 419 ret = 1; 420 break; 421 } 422 return ret; 423} 424 425static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 426{ 427 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 428 struct kvm_lapic_irq irq; 429 int ret, vector; 430 431 if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) 432 return -EINVAL; 433 434 if (sint >= ARRAY_SIZE(synic->sint)) 435 return -EINVAL; 436 437 vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 438 if (vector < 0) 439 return -ENOENT; 440 441 memset(&irq, 0, sizeof(irq)); 442 irq.shorthand = APIC_DEST_SELF; 443 irq.dest_mode = APIC_DEST_PHYSICAL; 444 irq.delivery_mode = APIC_DM_FIXED; 445 irq.vector = vector; 446 irq.level = 1; 447 448 ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 449 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 450 return ret; 451} 452 453int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint) 454{ 455 struct kvm_vcpu_hv_synic *synic; 456 457 synic = synic_get(kvm, vpidx); 458 if (!synic) 459 return -EINVAL; 460 461 return synic_set_irq(synic, sint); 462} 463 464void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 465{ 466 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 467 int i; 468 469 trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 470 471 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 472 if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 473 kvm_hv_notify_acked_sint(vcpu, i); 474} 475 476static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi) 477{ 478 struct kvm_vcpu_hv_synic *synic; 479 480 synic = synic_get(kvm, vpidx); 481 if (!synic) 482 return -EINVAL; 483 484 if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 485 return -EINVAL; 486 487 atomic_set(&synic->sint_to_gsi[sint], gsi); 488 return 0; 489} 490 491void kvm_hv_irq_routing_update(struct kvm *kvm) 492{ 493 struct kvm_irq_routing_table *irq_rt; 494 struct kvm_kernel_irq_routing_entry *e; 495 u32 gsi; 496 497 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 498 lockdep_is_held(&kvm->irq_lock)); 499 500 for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 501 hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 502 if (e->type == KVM_IRQ_ROUTING_HV_SINT) 503 kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 504 e->hv_sint.sint, gsi); 505 } 506 } 507} 508 509static void synic_init(struct kvm_vcpu_hv_synic *synic) 510{ 511 int i; 512 513 memset(synic, 0, sizeof(*synic)); 514 synic->version = HV_SYNIC_VERSION_1; 515 for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 516 atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 517 atomic_set(&synic->sint_to_gsi[i], -1); 518 } 519} 520 521static u64 get_time_ref_counter(struct kvm *kvm) 522{ 523 struct kvm_hv *hv = &kvm->arch.hyperv; 524 struct kvm_vcpu *vcpu; 525 u64 tsc; 526 527 /* 528 * The guest has not set up the TSC page or the clock isn't 529 * stable, fall back to get_kvmclock_ns. 530 */ 531 if (!hv->tsc_ref.tsc_sequence) 532 return div_u64(get_kvmclock_ns(kvm), 100); 533 534 vcpu = kvm_get_vcpu(kvm, 0); 535 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 536 return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) 537 + hv->tsc_ref.tsc_offset; 538} 539 540static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 541 bool vcpu_kick) 542{ 543 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 544 545 set_bit(stimer->index, 546 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 547 kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 548 if (vcpu_kick) 549 kvm_vcpu_kick(vcpu); 550} 551 552static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 553{ 554 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 555 556 trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 557 stimer->index); 558 559 hrtimer_cancel(&stimer->timer); 560 clear_bit(stimer->index, 561 vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 562 stimer->msg_pending = false; 563 stimer->exp_time = 0; 564} 565 566static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 567{ 568 struct kvm_vcpu_hv_stimer *stimer; 569 570 stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 571 trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 572 stimer->index); 573 stimer_mark_pending(stimer, true); 574 575 return HRTIMER_NORESTART; 576} 577 578/* 579 * stimer_start() assumptions: 580 * a) stimer->count is not equal to 0 581 * b) stimer->config has HV_STIMER_ENABLE flag 582 */ 583static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 584{ 585 u64 time_now; 586 ktime_t ktime_now; 587 588 time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 589 ktime_now = ktime_get(); 590 591 if (stimer->config.periodic) { 592 if (stimer->exp_time) { 593 if (time_now >= stimer->exp_time) { 594 u64 remainder; 595 596 div64_u64_rem(time_now - stimer->exp_time, 597 stimer->count, &remainder); 598 stimer->exp_time = 599 time_now + (stimer->count - remainder); 600 } 601 } else 602 stimer->exp_time = time_now + stimer->count; 603 604 trace_kvm_hv_stimer_start_periodic( 605 stimer_to_vcpu(stimer)->vcpu_id, 606 stimer->index, 607 time_now, stimer->exp_time); 608 609 hrtimer_start(&stimer->timer, 610 ktime_add_ns(ktime_now, 611 100 * (stimer->exp_time - time_now)), 612 HRTIMER_MODE_ABS); 613 return 0; 614 } 615 stimer->exp_time = stimer->count; 616 if (time_now >= stimer->count) { 617 /* 618 * Expire timer according to Hypervisor Top-Level Functional 619 * specification v4(15.3.1): 620 * "If a one shot is enabled and the specified count is in 621 * the past, it will expire immediately." 622 */ 623 stimer_mark_pending(stimer, false); 624 return 0; 625 } 626 627 trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 628 stimer->index, 629 time_now, stimer->count); 630 631 hrtimer_start(&stimer->timer, 632 ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 633 HRTIMER_MODE_ABS); 634 return 0; 635} 636 637static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 638 bool host) 639{ 640 union hv_stimer_config new_config = {.as_uint64 = config}, 641 old_config = {.as_uint64 = stimer->config.as_uint64}; 642 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 643 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 644 645 if (!synic->active && (!host || config)) 646 return 1; 647 648 trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 649 stimer->index, config, host); 650 651 stimer_cleanup(stimer); 652 if (old_config.enable && 653 !new_config.direct_mode && new_config.sintx == 0) 654 new_config.enable = 0; 655 stimer->config.as_uint64 = new_config.as_uint64; 656 657 if (stimer->config.enable) 658 stimer_mark_pending(stimer, false); 659 660 return 0; 661} 662 663static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 664 bool host) 665{ 666 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 667 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 668 669 if (!synic->active && (!host || count)) 670 return 1; 671 672 trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 673 stimer->index, count, host); 674 675 stimer_cleanup(stimer); 676 stimer->count = count; 677 if (!host) { 678 if (stimer->count == 0) 679 stimer->config.enable = 0; 680 else if (stimer->config.auto_enable) 681 stimer->config.enable = 1; 682 } 683 684 if (stimer->config.enable) 685 stimer_mark_pending(stimer, false); 686 687 return 0; 688} 689 690static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 691{ 692 *pconfig = stimer->config.as_uint64; 693 return 0; 694} 695 696static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 697{ 698 *pcount = stimer->count; 699 return 0; 700} 701 702static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 703 struct hv_message *src_msg, bool no_retry) 704{ 705 struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 706 int msg_off = offsetof(struct hv_message_page, sint_message[sint]); 707 gfn_t msg_page_gfn; 708 struct hv_message_header hv_hdr; 709 int r; 710 711 if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 712 return -ENOENT; 713 714 msg_page_gfn = synic->msg_page >> PAGE_SHIFT; 715 716 /* 717 * Strictly following the spec-mandated ordering would assume setting 718 * .msg_pending before checking .message_type. However, this function 719 * is only called in vcpu context so the entire update is atomic from 720 * guest POV and thus the exact order here doesn't matter. 721 */ 722 r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type, 723 msg_off + offsetof(struct hv_message, 724 header.message_type), 725 sizeof(hv_hdr.message_type)); 726 if (r < 0) 727 return r; 728 729 if (hv_hdr.message_type != HVMSG_NONE) { 730 if (no_retry) 731 return 0; 732 733 hv_hdr.message_flags.msg_pending = 1; 734 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, 735 &hv_hdr.message_flags, 736 msg_off + 737 offsetof(struct hv_message, 738 header.message_flags), 739 sizeof(hv_hdr.message_flags)); 740 if (r < 0) 741 return r; 742 return -EAGAIN; 743 } 744 745 r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off, 746 sizeof(src_msg->header) + 747 src_msg->header.payload_size); 748 if (r < 0) 749 return r; 750 751 r = synic_set_irq(synic, sint); 752 if (r < 0) 753 return r; 754 if (r == 0) 755 return -EFAULT; 756 return 0; 757} 758 759static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 760{ 761 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 762 struct hv_message *msg = &stimer->msg; 763 struct hv_timer_message_payload *payload = 764 (struct hv_timer_message_payload *)&msg->u.payload; 765 766 /* 767 * To avoid piling up periodic ticks, don't retry message 768 * delivery for them (within "lazy" lost ticks policy). 769 */ 770 bool no_retry = stimer->config.periodic; 771 772 payload->expiration_time = stimer->exp_time; 773 payload->delivery_time = get_time_ref_counter(vcpu->kvm); 774 return synic_deliver_msg(vcpu_to_synic(vcpu), 775 stimer->config.sintx, msg, 776 no_retry); 777} 778 779static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) 780{ 781 struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 782 struct kvm_lapic_irq irq = { 783 .delivery_mode = APIC_DM_FIXED, 784 .vector = stimer->config.apic_vector 785 }; 786 787 if (lapic_in_kernel(vcpu)) 788 return !kvm_apic_set_irq(vcpu, &irq, NULL); 789 return 0; 790} 791 792static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 793{ 794 int r, direct = stimer->config.direct_mode; 795 796 stimer->msg_pending = true; 797 if (!direct) 798 r = stimer_send_msg(stimer); 799 else 800 r = stimer_notify_direct(stimer); 801 trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 802 stimer->index, direct, r); 803 if (!r) { 804 stimer->msg_pending = false; 805 if (!(stimer->config.periodic)) 806 stimer->config.enable = 0; 807 } 808} 809 810void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 811{ 812 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 813 struct kvm_vcpu_hv_stimer *stimer; 814 u64 time_now, exp_time; 815 int i; 816 817 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 818 if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 819 stimer = &hv_vcpu->stimer[i]; 820 if (stimer->config.enable) { 821 exp_time = stimer->exp_time; 822 823 if (exp_time) { 824 time_now = 825 get_time_ref_counter(vcpu->kvm); 826 if (time_now >= exp_time) 827 stimer_expiration(stimer); 828 } 829 830 if ((stimer->config.enable) && 831 stimer->count) { 832 if (!stimer->msg_pending) 833 stimer_start(stimer); 834 } else 835 stimer_cleanup(stimer); 836 } 837 } 838} 839 840void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 841{ 842 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 843 int i; 844 845 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 846 stimer_cleanup(&hv_vcpu->stimer[i]); 847} 848 849bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) 850{ 851 if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) 852 return false; 853 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 854} 855EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); 856 857bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, 858 struct hv_vp_assist_page *assist_page) 859{ 860 if (!kvm_hv_assist_page_enabled(vcpu)) 861 return false; 862 return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, 863 assist_page, sizeof(*assist_page)); 864} 865EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); 866 867static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 868{ 869 struct hv_message *msg = &stimer->msg; 870 struct hv_timer_message_payload *payload = 871 (struct hv_timer_message_payload *)&msg->u.payload; 872 873 memset(&msg->header, 0, sizeof(msg->header)); 874 msg->header.message_type = HVMSG_TIMER_EXPIRED; 875 msg->header.payload_size = sizeof(*payload); 876 877 payload->timer_index = stimer->index; 878 payload->expiration_time = 0; 879 payload->delivery_time = 0; 880} 881 882static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 883{ 884 memset(stimer, 0, sizeof(*stimer)); 885 stimer->index = timer_index; 886 hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 887 stimer->timer.function = stimer_timer_callback; 888 stimer_prepare_msg(stimer); 889} 890 891void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 892{ 893 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 894 int i; 895 896 synic_init(&hv_vcpu->synic); 897 898 bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 899 for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 900 stimer_init(&hv_vcpu->stimer[i], i); 901} 902 903void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu) 904{ 905 struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 906 907 hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu); 908} 909 910int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) 911{ 912 struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 913 914 /* 915 * Hyper-V SynIC auto EOI SINT's are 916 * not compatible with APICV, so request 917 * to deactivate APICV permanently. 918 */ 919 kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); 920 synic->active = true; 921 synic->dont_zero_synic_pages = dont_zero_synic_pages; 922 synic->control = HV_SYNIC_CONTROL_ENABLE; 923 return 0; 924} 925 926static bool kvm_hv_msr_partition_wide(u32 msr) 927{ 928 bool r = false; 929 930 switch (msr) { 931 case HV_X64_MSR_GUEST_OS_ID: 932 case HV_X64_MSR_HYPERCALL: 933 case HV_X64_MSR_REFERENCE_TSC: 934 case HV_X64_MSR_TIME_REF_COUNT: 935 case HV_X64_MSR_CRASH_CTL: 936 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 937 case HV_X64_MSR_RESET: 938 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 939 case HV_X64_MSR_TSC_EMULATION_CONTROL: 940 case HV_X64_MSR_TSC_EMULATION_STATUS: 941 case HV_X64_MSR_SYNDBG_OPTIONS: 942 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 943 r = true; 944 break; 945 } 946 947 return r; 948} 949 950static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, 951 u32 index, u64 *pdata) 952{ 953 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 954 size_t size = ARRAY_SIZE(hv->hv_crash_param); 955 956 if (WARN_ON_ONCE(index >= size)) 957 return -EINVAL; 958 959 *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; 960 return 0; 961} 962 963static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) 964{ 965 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 966 967 *pdata = hv->hv_crash_ctl; 968 return 0; 969} 970 971static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) 972{ 973 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 974 975 if (host) 976 hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY; 977 978 if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) { 979 980 vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", 981 hv->hv_crash_param[0], 982 hv->hv_crash_param[1], 983 hv->hv_crash_param[2], 984 hv->hv_crash_param[3], 985 hv->hv_crash_param[4]); 986 987 /* Send notification about crash to user space */ 988 kvm_make_request(KVM_REQ_HV_CRASH, vcpu); 989 } 990 991 return 0; 992} 993 994static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, 995 u32 index, u64 data) 996{ 997 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 998 size_t size = ARRAY_SIZE(hv->hv_crash_param); 999 1000 if (WARN_ON_ONCE(index >= size)) 1001 return -EINVAL; 1002 1003 hv->hv_crash_param[array_index_nospec(index, size)] = data; 1004 return 0; 1005} 1006 1007/* 1008 * The kvmclock and Hyper-V TSC page use similar formulas, and converting 1009 * between them is possible: 1010 * 1011 * kvmclock formula: 1012 * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) 1013 * + system_time 1014 * 1015 * Hyper-V formula: 1016 * nsec/100 = ticks * scale / 2^64 + offset 1017 * 1018 * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. 1019 * By dividing the kvmclock formula by 100 and equating what's left we get: 1020 * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1021 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 1022 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 1023 * 1024 * Now expand the kvmclock formula and divide by 100: 1025 * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) 1026 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) 1027 * + system_time 1028 * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1029 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 1030 * + system_time / 100 1031 * 1032 * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: 1033 * nsec/100 = ticks * scale / 2^64 1034 * - tsc_timestamp * scale / 2^64 1035 * + system_time / 100 1036 * 1037 * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: 1038 * offset = system_time / 100 - tsc_timestamp * scale / 2^64 1039 * 1040 * These two equivalencies are implemented in this function. 1041 */ 1042static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, 1043 struct ms_hyperv_tsc_page *tsc_ref) 1044{ 1045 u64 max_mul; 1046 1047 if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) 1048 return false; 1049 1050 /* 1051 * check if scale would overflow, if so we use the time ref counter 1052 * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 1053 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) 1054 * tsc_to_system_mul >= 100 * 2^(32-tsc_shift) 1055 */ 1056 max_mul = 100ull << (32 - hv_clock->tsc_shift); 1057 if (hv_clock->tsc_to_system_mul >= max_mul) 1058 return false; 1059 1060 /* 1061 * Otherwise compute the scale and offset according to the formulas 1062 * derived above. 1063 */ 1064 tsc_ref->tsc_scale = 1065 mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), 1066 hv_clock->tsc_to_system_mul, 1067 100); 1068 1069 tsc_ref->tsc_offset = hv_clock->system_time; 1070 do_div(tsc_ref->tsc_offset, 100); 1071 tsc_ref->tsc_offset -= 1072 mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); 1073 return true; 1074} 1075 1076void kvm_hv_setup_tsc_page(struct kvm *kvm, 1077 struct pvclock_vcpu_time_info *hv_clock) 1078{ 1079 struct kvm_hv *hv = &kvm->arch.hyperv; 1080 u32 tsc_seq; 1081 u64 gfn; 1082 1083 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 1084 BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); 1085 1086 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1087 return; 1088 1089 mutex_lock(&kvm->arch.hyperv.hv_lock); 1090 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1091 goto out_unlock; 1092 1093 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1094 /* 1095 * Because the TSC parameters only vary when there is a 1096 * change in the master clock, do not bother with caching. 1097 */ 1098 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 1099 &tsc_seq, sizeof(tsc_seq)))) 1100 goto out_unlock; 1101 1102 /* 1103 * While we're computing and writing the parameters, force the 1104 * guest to use the time reference count MSR. 1105 */ 1106 hv->tsc_ref.tsc_sequence = 0; 1107 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1108 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1109 goto out_unlock; 1110 1111 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 1112 goto out_unlock; 1113 1114 /* Ensure sequence is zero before writing the rest of the struct. */ 1115 smp_wmb(); 1116 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 1117 goto out_unlock; 1118 1119 /* 1120 * Now switch to the TSC page mechanism by writing the sequence. 1121 */ 1122 tsc_seq++; 1123 if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0) 1124 tsc_seq = 1; 1125 1126 /* Write the struct entirely before the non-zero sequence. */ 1127 smp_wmb(); 1128 1129 hv->tsc_ref.tsc_sequence = tsc_seq; 1130 kvm_write_guest(kvm, gfn_to_gpa(gfn), 1131 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 1132out_unlock: 1133 mutex_unlock(&kvm->arch.hyperv.hv_lock); 1134} 1135 1136static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, 1137 bool host) 1138{ 1139 struct kvm *kvm = vcpu->kvm; 1140 struct kvm_hv *hv = &kvm->arch.hyperv; 1141 1142 switch (msr) { 1143 case HV_X64_MSR_GUEST_OS_ID: 1144 hv->hv_guest_os_id = data; 1145 /* setting guest os id to zero disables hypercall page */ 1146 if (!hv->hv_guest_os_id) 1147 hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1148 break; 1149 case HV_X64_MSR_HYPERCALL: { 1150 u64 gfn; 1151 unsigned long addr; 1152 u8 instructions[4]; 1153 1154 /* if guest os id is not set hypercall should remain disabled */ 1155 if (!hv->hv_guest_os_id) 1156 break; 1157 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1158 hv->hv_hypercall = data; 1159 break; 1160 } 1161 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1162 addr = gfn_to_hva(kvm, gfn); 1163 if (kvm_is_error_hva(addr)) 1164 return 1; 1165 kvm_x86_ops.patch_hypercall(vcpu, instructions); 1166 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1167 if (__copy_to_user((void __user *)addr, instructions, 4)) 1168 return 1; 1169 hv->hv_hypercall = data; 1170 mark_page_dirty(kvm, gfn); 1171 break; 1172 } 1173 case HV_X64_MSR_REFERENCE_TSC: 1174 hv->hv_tsc_page = data; 1175 if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1176 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1177 break; 1178 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1179 return kvm_hv_msr_set_crash_data(vcpu, 1180 msr - HV_X64_MSR_CRASH_P0, 1181 data); 1182 case HV_X64_MSR_CRASH_CTL: 1183 return kvm_hv_msr_set_crash_ctl(vcpu, data, host); 1184 case HV_X64_MSR_RESET: 1185 if (data == 1) { 1186 vcpu_debug(vcpu, "hyper-v reset requested\n"); 1187 kvm_make_request(KVM_REQ_HV_RESET, vcpu); 1188 } 1189 break; 1190 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1191 hv->hv_reenlightenment_control = data; 1192 break; 1193 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1194 hv->hv_tsc_emulation_control = data; 1195 break; 1196 case HV_X64_MSR_TSC_EMULATION_STATUS: 1197 hv->hv_tsc_emulation_status = data; 1198 break; 1199 case HV_X64_MSR_TIME_REF_COUNT: 1200 /* read-only, but still ignore it if host-initiated */ 1201 if (!host) 1202 return 1; 1203 break; 1204 case HV_X64_MSR_SYNDBG_OPTIONS: 1205 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 1206 return syndbg_set_msr(vcpu, msr, data, host); 1207 default: 1208 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1209 msr, data); 1210 return 1; 1211 } 1212 return 0; 1213} 1214 1215/* Calculate cpu time spent by current task in 100ns units */ 1216static u64 current_task_runtime_100ns(void) 1217{ 1218 u64 utime, stime; 1219 1220 task_cputime_adjusted(current, &utime, &stime); 1221 1222 return div_u64(utime + stime, 100); 1223} 1224 1225static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1226{ 1227 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1228 1229 switch (msr) { 1230 case HV_X64_MSR_VP_INDEX: { 1231 struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; 1232 int vcpu_idx = kvm_vcpu_get_idx(vcpu); 1233 u32 new_vp_index = (u32)data; 1234 1235 if (!host || new_vp_index >= KVM_MAX_VCPUS) 1236 return 1; 1237 1238 if (new_vp_index == hv_vcpu->vp_index) 1239 return 0; 1240 1241 /* 1242 * The VP index is initialized to vcpu_index by 1243 * kvm_hv_vcpu_postcreate so they initially match. Now the 1244 * VP index is changing, adjust num_mismatched_vp_indexes if 1245 * it now matches or no longer matches vcpu_idx. 1246 */ 1247 if (hv_vcpu->vp_index == vcpu_idx) 1248 atomic_inc(&hv->num_mismatched_vp_indexes); 1249 else if (new_vp_index == vcpu_idx) 1250 atomic_dec(&hv->num_mismatched_vp_indexes); 1251 1252 hv_vcpu->vp_index = new_vp_index; 1253 break; 1254 } 1255 case HV_X64_MSR_VP_ASSIST_PAGE: { 1256 u64 gfn; 1257 unsigned long addr; 1258 1259 if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { 1260 hv_vcpu->hv_vapic = data; 1261 if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) 1262 return 1; 1263 break; 1264 } 1265 gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; 1266 addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); 1267 if (kvm_is_error_hva(addr)) 1268 return 1; 1269 1270 /* 1271 * Clear apic_assist portion of struct hv_vp_assist_page 1272 * only, there can be valuable data in the rest which needs 1273 * to be preserved e.g. on migration. 1274 */ 1275 if (__put_user(0, (u32 __user *)addr)) 1276 return 1; 1277 hv_vcpu->hv_vapic = data; 1278 kvm_vcpu_mark_page_dirty(vcpu, gfn); 1279 if (kvm_lapic_enable_pv_eoi(vcpu, 1280 gfn_to_gpa(gfn) | KVM_MSR_ENABLED, 1281 sizeof(struct hv_vp_assist_page))) 1282 return 1; 1283 break; 1284 } 1285 case HV_X64_MSR_EOI: 1286 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1287 case HV_X64_MSR_ICR: 1288 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1289 case HV_X64_MSR_TPR: 1290 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1291 case HV_X64_MSR_VP_RUNTIME: 1292 if (!host) 1293 return 1; 1294 hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); 1295 break; 1296 case HV_X64_MSR_SCONTROL: 1297 case HV_X64_MSR_SVERSION: 1298 case HV_X64_MSR_SIEFP: 1299 case HV_X64_MSR_SIMP: 1300 case HV_X64_MSR_EOM: 1301 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1302 return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 1303 case HV_X64_MSR_STIMER0_CONFIG: 1304 case HV_X64_MSR_STIMER1_CONFIG: 1305 case HV_X64_MSR_STIMER2_CONFIG: 1306 case HV_X64_MSR_STIMER3_CONFIG: { 1307 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1308 1309 return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 1310 data, host); 1311 } 1312 case HV_X64_MSR_STIMER0_COUNT: 1313 case HV_X64_MSR_STIMER1_COUNT: 1314 case HV_X64_MSR_STIMER2_COUNT: 1315 case HV_X64_MSR_STIMER3_COUNT: { 1316 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1317 1318 return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 1319 data, host); 1320 } 1321 case HV_X64_MSR_TSC_FREQUENCY: 1322 case HV_X64_MSR_APIC_FREQUENCY: 1323 /* read-only, but still ignore it if host-initiated */ 1324 if (!host) 1325 return 1; 1326 break; 1327 default: 1328 vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", 1329 msr, data); 1330 return 1; 1331 } 1332 1333 return 0; 1334} 1335 1336static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1337 bool host) 1338{ 1339 u64 data = 0; 1340 struct kvm *kvm = vcpu->kvm; 1341 struct kvm_hv *hv = &kvm->arch.hyperv; 1342 1343 switch (msr) { 1344 case HV_X64_MSR_GUEST_OS_ID: 1345 data = hv->hv_guest_os_id; 1346 break; 1347 case HV_X64_MSR_HYPERCALL: 1348 data = hv->hv_hypercall; 1349 break; 1350 case HV_X64_MSR_TIME_REF_COUNT: 1351 data = get_time_ref_counter(kvm); 1352 break; 1353 case HV_X64_MSR_REFERENCE_TSC: 1354 data = hv->hv_tsc_page; 1355 break; 1356 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1357 return kvm_hv_msr_get_crash_data(vcpu, 1358 msr - HV_X64_MSR_CRASH_P0, 1359 pdata); 1360 case HV_X64_MSR_CRASH_CTL: 1361 return kvm_hv_msr_get_crash_ctl(vcpu, pdata); 1362 case HV_X64_MSR_RESET: 1363 data = 0; 1364 break; 1365 case HV_X64_MSR_REENLIGHTENMENT_CONTROL: 1366 data = hv->hv_reenlightenment_control; 1367 break; 1368 case HV_X64_MSR_TSC_EMULATION_CONTROL: 1369 data = hv->hv_tsc_emulation_control; 1370 break; 1371 case HV_X64_MSR_TSC_EMULATION_STATUS: 1372 data = hv->hv_tsc_emulation_status; 1373 break; 1374 case HV_X64_MSR_SYNDBG_OPTIONS: 1375 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: 1376 return syndbg_get_msr(vcpu, msr, pdata, host); 1377 default: 1378 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1379 return 1; 1380 } 1381 1382 *pdata = data; 1383 return 0; 1384} 1385 1386static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, 1387 bool host) 1388{ 1389 u64 data = 0; 1390 struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 1391 1392 switch (msr) { 1393 case HV_X64_MSR_VP_INDEX: 1394 data = hv_vcpu->vp_index; 1395 break; 1396 case HV_X64_MSR_EOI: 1397 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1398 case HV_X64_MSR_ICR: 1399 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1400 case HV_X64_MSR_TPR: 1401 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1402 case HV_X64_MSR_VP_ASSIST_PAGE: 1403 data = hv_vcpu->hv_vapic; 1404 break; 1405 case HV_X64_MSR_VP_RUNTIME: 1406 data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; 1407 break; 1408 case HV_X64_MSR_SCONTROL: 1409 case HV_X64_MSR_SVERSION: 1410 case HV_X64_MSR_SIEFP: 1411 case HV_X64_MSR_SIMP: 1412 case HV_X64_MSR_EOM: 1413 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 1414 return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); 1415 case HV_X64_MSR_STIMER0_CONFIG: 1416 case HV_X64_MSR_STIMER1_CONFIG: 1417 case HV_X64_MSR_STIMER2_CONFIG: 1418 case HV_X64_MSR_STIMER3_CONFIG: { 1419 int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 1420 1421 return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 1422 pdata); 1423 } 1424 case HV_X64_MSR_STIMER0_COUNT: 1425 case HV_X64_MSR_STIMER1_COUNT: 1426 case HV_X64_MSR_STIMER2_COUNT: 1427 case HV_X64_MSR_STIMER3_COUNT: { 1428 int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 1429 1430 return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 1431 pdata); 1432 } 1433 case HV_X64_MSR_TSC_FREQUENCY: 1434 data = (u64)vcpu->arch.virtual_tsc_khz * 1000; 1435 break; 1436 case HV_X64_MSR_APIC_FREQUENCY: 1437 data = APIC_BUS_FREQUENCY; 1438 break; 1439 default: 1440 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1441 return 1; 1442 } 1443 *pdata = data; 1444 return 0; 1445} 1446 1447int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) 1448{ 1449 if (kvm_hv_msr_partition_wide(msr)) { 1450 int r; 1451 1452 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1453 r = kvm_hv_set_msr_pw(vcpu, msr, data, host); 1454 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1455 return r; 1456 } else 1457 return kvm_hv_set_msr(vcpu, msr, data, host); 1458} 1459 1460int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) 1461{ 1462 if (kvm_hv_msr_partition_wide(msr)) { 1463 int r; 1464 1465 mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); 1466 r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host); 1467 mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); 1468 return r; 1469 } else 1470 return kvm_hv_get_msr(vcpu, msr, pdata, host); 1471} 1472 1473static __always_inline unsigned long *sparse_set_to_vcpu_mask( 1474 struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, 1475 u64 *vp_bitmap, unsigned long *vcpu_bitmap) 1476{ 1477 struct kvm_hv *hv = &kvm->arch.hyperv; 1478 struct kvm_vcpu *vcpu; 1479 int i, bank, sbank = 0; 1480 1481 memset(vp_bitmap, 0, 1482 KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); 1483 for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 1484 KVM_HV_MAX_SPARSE_VCPU_SET_BITS) 1485 vp_bitmap[bank] = sparse_banks[sbank++]; 1486 1487 if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { 1488 /* for all vcpus vp_index == vcpu_idx */ 1489 return (unsigned long *)vp_bitmap; 1490 } 1491 1492 bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); 1493 kvm_for_each_vcpu(i, vcpu, kvm) { 1494 if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, 1495 (unsigned long *)vp_bitmap)) 1496 __set_bit(i, vcpu_bitmap); 1497 } 1498 return vcpu_bitmap; 1499} 1500 1501static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, 1502 u16 rep_cnt, bool ex) 1503{ 1504 struct kvm *kvm = current_vcpu->kvm; 1505 struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; 1506 struct hv_tlb_flush_ex flush_ex; 1507 struct hv_tlb_flush flush; 1508 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1509 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1510 unsigned long *vcpu_mask; 1511 u64 valid_bank_mask; 1512 u64 sparse_banks[64]; 1513 int sparse_banks_len; 1514 bool all_cpus; 1515 1516 if (!ex) { 1517 if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) 1518 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1519 1520 trace_kvm_hv_flush_tlb(flush.processor_mask, 1521 flush.address_space, flush.flags); 1522 1523 valid_bank_mask = BIT_ULL(0); 1524 sparse_banks[0] = flush.processor_mask; 1525 1526 /* 1527 * Work around possible WS2012 bug: it sends hypercalls 1528 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, 1529 * while also expecting us to flush something and crashing if 1530 * we don't. Let's treat processor_mask == 0 same as 1531 * HV_FLUSH_ALL_PROCESSORS. 1532 */ 1533 all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || 1534 flush.processor_mask == 0; 1535 } else { 1536 if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, 1537 sizeof(flush_ex)))) 1538 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1539 1540 trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, 1541 flush_ex.hv_vp_set.format, 1542 flush_ex.address_space, 1543 flush_ex.flags); 1544 1545 valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; 1546 all_cpus = flush_ex.hv_vp_set.format != 1547 HV_GENERIC_SET_SPARSE_4K; 1548 1549 sparse_banks_len = 1550 bitmap_weight((unsigned long *)&valid_bank_mask, 64) * 1551 sizeof(sparse_banks[0]); 1552 1553 if (!sparse_banks_len && !all_cpus) 1554 goto ret_success; 1555 1556 if (!all_cpus && 1557 kvm_read_guest(kvm, 1558 ingpa + offsetof(struct hv_tlb_flush_ex, 1559 hv_vp_set.bank_contents), 1560 sparse_banks, 1561 sparse_banks_len)) 1562 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1563 } 1564 1565 cpumask_clear(&hv_vcpu->tlb_flush); 1566 1567 /* 1568 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't 1569 * analyze it here, flush TLB regardless of the specified address space. 1570 */ 1571 if (all_cpus) { 1572 kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST); 1573 } else { 1574 vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1575 vp_bitmap, vcpu_bitmap); 1576 1577 kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, 1578 NULL, vcpu_mask, &hv_vcpu->tlb_flush); 1579 } 1580 1581ret_success: 1582 /* We always do full TLB flush, set rep_done = rep_cnt. */ 1583 return (u64)HV_STATUS_SUCCESS | 1584 ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); 1585} 1586 1587static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, 1588 unsigned long *vcpu_bitmap) 1589{ 1590 struct kvm_lapic_irq irq = { 1591 .delivery_mode = APIC_DM_FIXED, 1592 .vector = vector 1593 }; 1594 struct kvm_vcpu *vcpu; 1595 int i; 1596 1597 kvm_for_each_vcpu(i, vcpu, kvm) { 1598 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) 1599 continue; 1600 1601 /* We fail only when APIC is disabled */ 1602 kvm_apic_set_irq(vcpu, &irq, NULL); 1603 } 1604} 1605 1606static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, 1607 bool ex, bool fast) 1608{ 1609 struct kvm *kvm = current_vcpu->kvm; 1610 struct hv_send_ipi_ex send_ipi_ex; 1611 struct hv_send_ipi send_ipi; 1612 u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; 1613 DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); 1614 unsigned long *vcpu_mask; 1615 unsigned long valid_bank_mask; 1616 u64 sparse_banks[64]; 1617 int sparse_banks_len; 1618 u32 vector; 1619 bool all_cpus; 1620 1621 if (!ex) { 1622 if (!fast) { 1623 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, 1624 sizeof(send_ipi)))) 1625 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1626 sparse_banks[0] = send_ipi.cpu_mask; 1627 vector = send_ipi.vector; 1628 } else { 1629 /* 'reserved' part of hv_send_ipi should be 0 */ 1630 if (unlikely(ingpa >> 32 != 0)) 1631 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1632 sparse_banks[0] = outgpa; 1633 vector = (u32)ingpa; 1634 } 1635 all_cpus = false; 1636 valid_bank_mask = BIT_ULL(0); 1637 1638 trace_kvm_hv_send_ipi(vector, sparse_banks[0]); 1639 } else { 1640 if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, 1641 sizeof(send_ipi_ex)))) 1642 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1643 1644 trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, 1645 send_ipi_ex.vp_set.format, 1646 send_ipi_ex.vp_set.valid_bank_mask); 1647 1648 vector = send_ipi_ex.vector; 1649 valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; 1650 sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * 1651 sizeof(sparse_banks[0]); 1652 1653 all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; 1654 1655 if (all_cpus) 1656 goto check_and_send_ipi; 1657 1658 if (!sparse_banks_len) 1659 goto ret_success; 1660 1661 if (kvm_read_guest(kvm, 1662 ingpa + offsetof(struct hv_send_ipi_ex, 1663 vp_set.bank_contents), 1664 sparse_banks, 1665 sparse_banks_len)) 1666 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1667 } 1668 1669check_and_send_ipi: 1670 if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) 1671 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1672 1673 vcpu_mask = all_cpus ? NULL : 1674 sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, 1675 vp_bitmap, vcpu_bitmap); 1676 1677 kvm_send_ipi_to_many(kvm, vector, vcpu_mask); 1678 1679ret_success: 1680 return HV_STATUS_SUCCESS; 1681} 1682 1683bool kvm_hv_hypercall_enabled(struct kvm *kvm) 1684{ 1685 return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0; 1686} 1687 1688static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 1689{ 1690 bool longmode; 1691 1692 longmode = is_64_bit_mode(vcpu); 1693 if (longmode) 1694 kvm_rax_write(vcpu, result); 1695 else { 1696 kvm_rdx_write(vcpu, result >> 32); 1697 kvm_rax_write(vcpu, result & 0xffffffff); 1698 } 1699} 1700 1701static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) 1702{ 1703 kvm_hv_hypercall_set_result(vcpu, result); 1704 ++vcpu->stat.hypercalls; 1705 return kvm_skip_emulated_instruction(vcpu); 1706} 1707 1708static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1709{ 1710 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); 1711} 1712 1713static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1714{ 1715 struct eventfd_ctx *eventfd; 1716 1717 if (unlikely(!fast)) { 1718 int ret; 1719 gpa_t gpa = param; 1720 1721 if ((gpa & (__alignof__(param) - 1)) || 1722 offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) 1723 return HV_STATUS_INVALID_ALIGNMENT; 1724 1725 ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); 1726 if (ret < 0) 1727 return HV_STATUS_INVALID_ALIGNMENT; 1728 } 1729 1730 /* 1731 * Per spec, bits 32-47 contain the extra "flag number". However, we 1732 * have no use for it, and in all known usecases it is zero, so just 1733 * report lookup failure if it isn't. 1734 */ 1735 if (param & 0xffff00000000ULL) 1736 return HV_STATUS_INVALID_PORT_ID; 1737 /* remaining bits are reserved-zero */ 1738 if (param & ~KVM_HYPERV_CONN_ID_MASK) 1739 return HV_STATUS_INVALID_HYPERCALL_INPUT; 1740 1741 /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ 1742 rcu_read_lock(); 1743 eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); 1744 rcu_read_unlock(); 1745 if (!eventfd) 1746 return HV_STATUS_INVALID_PORT_ID; 1747 1748 eventfd_signal(eventfd, 1); 1749 return HV_STATUS_SUCCESS; 1750} 1751 1752int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 1753{ 1754 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1755 uint16_t code, rep_idx, rep_cnt; 1756 bool fast, rep; 1757 1758 /* 1759 * hypercall generates UD from non zero cpl and real mode 1760 * per HYPER-V spec 1761 */ 1762 if (kvm_x86_ops.get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 1763 kvm_queue_exception(vcpu, UD_VECTOR); 1764 return 1; 1765 } 1766 1767#ifdef CONFIG_X86_64 1768 if (is_64_bit_mode(vcpu)) { 1769 param = kvm_rcx_read(vcpu); 1770 ingpa = kvm_rdx_read(vcpu); 1771 outgpa = kvm_r8_read(vcpu); 1772 } else 1773#endif 1774 { 1775 param = ((u64)kvm_rdx_read(vcpu) << 32) | 1776 (kvm_rax_read(vcpu) & 0xffffffff); 1777 ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | 1778 (kvm_rcx_read(vcpu) & 0xffffffff); 1779 outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1780 (kvm_rsi_read(vcpu) & 0xffffffff); 1781 } 1782 1783 code = param & 0xffff; 1784 fast = !!(param & HV_HYPERCALL_FAST_BIT); 1785 rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; 1786 rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; 1787 rep = !!(rep_cnt || rep_idx); 1788 1789 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 1790 1791 switch (code) { 1792 case HVCALL_NOTIFY_LONG_SPIN_WAIT: 1793 if (unlikely(rep)) { 1794 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1795 break; 1796 } 1797 kvm_vcpu_on_spin(vcpu, true); 1798 break; 1799 case HVCALL_SIGNAL_EVENT: 1800 if (unlikely(rep)) { 1801 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1802 break; 1803 } 1804 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); 1805 if (ret != HV_STATUS_INVALID_PORT_ID) 1806 break; 1807 fallthrough; /* maybe userspace knows this conn_id */ 1808 case HVCALL_POST_MESSAGE: 1809 /* don't bother userspace if it has no way to handle it */ 1810 if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { 1811 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1812 break; 1813 } 1814 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1815 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1816 vcpu->run->hyperv.u.hcall.input = param; 1817 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1818 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1819 vcpu->arch.complete_userspace_io = 1820 kvm_hv_hypercall_complete_userspace; 1821 return 0; 1822 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: 1823 if (unlikely(fast || !rep_cnt || rep_idx)) { 1824 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1825 break; 1826 } 1827 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1828 break; 1829 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: 1830 if (unlikely(fast || rep)) { 1831 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1832 break; 1833 } 1834 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); 1835 break; 1836 case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: 1837 if (unlikely(fast || !rep_cnt || rep_idx)) { 1838 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1839 break; 1840 } 1841 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1842 break; 1843 case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: 1844 if (unlikely(fast || rep)) { 1845 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1846 break; 1847 } 1848 ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); 1849 break; 1850 case HVCALL_SEND_IPI: 1851 if (unlikely(rep)) { 1852 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1853 break; 1854 } 1855 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); 1856 break; 1857 case HVCALL_SEND_IPI_EX: 1858 if (unlikely(fast || rep)) { 1859 ret = HV_STATUS_INVALID_HYPERCALL_INPUT; 1860 break; 1861 } 1862 ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); 1863 break; 1864 case HVCALL_POST_DEBUG_DATA: 1865 case HVCALL_RETRIEVE_DEBUG_DATA: 1866 if (unlikely(fast)) { 1867 ret = HV_STATUS_INVALID_PARAMETER; 1868 break; 1869 } 1870 fallthrough; 1871 case HVCALL_RESET_DEBUG_SESSION: { 1872 struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); 1873 1874 if (!kvm_hv_is_syndbg_enabled(vcpu)) { 1875 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1876 break; 1877 } 1878 1879 if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) { 1880 ret = HV_STATUS_OPERATION_DENIED; 1881 break; 1882 } 1883 vcpu->run->exit_reason = KVM_EXIT_HYPERV; 1884 vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; 1885 vcpu->run->hyperv.u.hcall.input = param; 1886 vcpu->run->hyperv.u.hcall.params[0] = ingpa; 1887 vcpu->run->hyperv.u.hcall.params[1] = outgpa; 1888 vcpu->arch.complete_userspace_io = 1889 kvm_hv_hypercall_complete_userspace; 1890 return 0; 1891 } 1892 default: 1893 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1894 break; 1895 } 1896 1897 return kvm_hv_hypercall_complete(vcpu, ret); 1898} 1899 1900void kvm_hv_init_vm(struct kvm *kvm) 1901{ 1902 mutex_init(&kvm->arch.hyperv.hv_lock); 1903 idr_init(&kvm->arch.hyperv.conn_to_evt); 1904} 1905 1906void kvm_hv_destroy_vm(struct kvm *kvm) 1907{ 1908 struct eventfd_ctx *eventfd; 1909 int i; 1910 1911 idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) 1912 eventfd_ctx_put(eventfd); 1913 idr_destroy(&kvm->arch.hyperv.conn_to_evt); 1914} 1915 1916static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) 1917{ 1918 struct kvm_hv *hv = &kvm->arch.hyperv; 1919 struct eventfd_ctx *eventfd; 1920 int ret; 1921 1922 eventfd = eventfd_ctx_fdget(fd); 1923 if (IS_ERR(eventfd)) 1924 return PTR_ERR(eventfd); 1925 1926 mutex_lock(&hv->hv_lock); 1927 ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, 1928 GFP_KERNEL_ACCOUNT); 1929 mutex_unlock(&hv->hv_lock); 1930 1931 if (ret >= 0) 1932 return 0; 1933 1934 if (ret == -ENOSPC) 1935 ret = -EEXIST; 1936 eventfd_ctx_put(eventfd); 1937 return ret; 1938} 1939 1940static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) 1941{ 1942 struct kvm_hv *hv = &kvm->arch.hyperv; 1943 struct eventfd_ctx *eventfd; 1944 1945 mutex_lock(&hv->hv_lock); 1946 eventfd = idr_remove(&hv->conn_to_evt, conn_id); 1947 mutex_unlock(&hv->hv_lock); 1948 1949 if (!eventfd) 1950 return -ENOENT; 1951 1952 synchronize_srcu(&kvm->srcu); 1953 eventfd_ctx_put(eventfd); 1954 return 0; 1955} 1956 1957int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) 1958{ 1959 if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || 1960 (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) 1961 return -EINVAL; 1962 1963 if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) 1964 return kvm_hv_eventfd_deassign(kvm, args->conn_id); 1965 return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); 1966} 1967 1968int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, 1969 struct kvm_cpuid_entry2 __user *entries) 1970{ 1971 uint16_t evmcs_ver = 0; 1972 struct kvm_cpuid_entry2 cpuid_entries[] = { 1973 { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, 1974 { .function = HYPERV_CPUID_INTERFACE }, 1975 { .function = HYPERV_CPUID_VERSION }, 1976 { .function = HYPERV_CPUID_FEATURES }, 1977 { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, 1978 { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, 1979 { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS }, 1980 { .function = HYPERV_CPUID_SYNDBG_INTERFACE }, 1981 { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }, 1982 { .function = HYPERV_CPUID_NESTED_FEATURES }, 1983 }; 1984 int i, nent = ARRAY_SIZE(cpuid_entries); 1985 1986 if (kvm_x86_ops.nested_ops->get_evmcs_version) 1987 evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu); 1988 1989 /* Skip NESTED_FEATURES if eVMCS is not supported */ 1990 if (!evmcs_ver) 1991 --nent; 1992 1993 if (cpuid->nent < nent) 1994 return -E2BIG; 1995 1996 if (cpuid->nent > nent) 1997 cpuid->nent = nent; 1998 1999 for (i = 0; i < nent; i++) { 2000 struct kvm_cpuid_entry2 *ent = &cpuid_entries[i]; 2001 u32 signature[3]; 2002 2003 switch (ent->function) { 2004 case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: 2005 memcpy(signature, "Linux KVM Hv", 12); 2006 2007 ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES; 2008 ent->ebx = signature[0]; 2009 ent->ecx = signature[1]; 2010 ent->edx = signature[2]; 2011 break; 2012 2013 case HYPERV_CPUID_INTERFACE: 2014 memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); 2015 ent->eax = signature[0]; 2016 break; 2017 2018 case HYPERV_CPUID_VERSION: 2019 /* 2020 * We implement some Hyper-V 2016 functions so let's use 2021 * this version. 2022 */ 2023 ent->eax = 0x00003839; 2024 ent->ebx = 0x000A0000; 2025 break; 2026 2027 case HYPERV_CPUID_FEATURES: 2028 ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE; 2029 ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; 2030 ent->eax |= HV_MSR_SYNIC_AVAILABLE; 2031 ent->eax |= HV_MSR_SYNTIMER_AVAILABLE; 2032 ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE; 2033 ent->eax |= HV_MSR_HYPERCALL_AVAILABLE; 2034 ent->eax |= HV_MSR_VP_INDEX_AVAILABLE; 2035 ent->eax |= HV_MSR_RESET_AVAILABLE; 2036 ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; 2037 ent->eax |= HV_ACCESS_FREQUENCY_MSRS; 2038 ent->eax |= HV_ACCESS_REENLIGHTENMENT; 2039 2040 ent->ebx |= HV_POST_MESSAGES; 2041 ent->ebx |= HV_SIGNAL_EVENTS; 2042 2043 ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; 2044 ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; 2045 2046 ent->ebx |= HV_DEBUGGING; 2047 ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; 2048 ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; 2049 2050 /* 2051 * Direct Synthetic timers only make sense with in-kernel 2052 * LAPIC 2053 */ 2054 if (lapic_in_kernel(vcpu)) 2055 ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; 2056 2057 break; 2058 2059 case HYPERV_CPUID_ENLIGHTMENT_INFO: 2060 ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; 2061 ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; 2062 ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; 2063 ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; 2064 ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; 2065 if (evmcs_ver) 2066 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 2067 if (!cpu_smt_possible()) 2068 ent->eax |= HV_X64_NO_NONARCH_CORESHARING; 2069 /* 2070 * Default number of spinlock retry attempts, matches 2071 * HyperV 2016. 2072 */ 2073 ent->ebx = 0x00000FFF; 2074 2075 break; 2076 2077 case HYPERV_CPUID_IMPLEMENT_LIMITS: 2078 /* Maximum number of virtual processors */ 2079 ent->eax = KVM_MAX_VCPUS; 2080 /* 2081 * Maximum number of logical processors, matches 2082 * HyperV 2016. 2083 */ 2084 ent->ebx = 64; 2085 2086 break; 2087 2088 case HYPERV_CPUID_NESTED_FEATURES: 2089 ent->eax = evmcs_ver; 2090 2091 break; 2092 2093 case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS: 2094 memcpy(signature, "Linux KVM Hv", 12); 2095 2096 ent->eax = 0; 2097 ent->ebx = signature[0]; 2098 ent->ecx = signature[1]; 2099 ent->edx = signature[2]; 2100 break; 2101 2102 case HYPERV_CPUID_SYNDBG_INTERFACE: 2103 memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12); 2104 ent->eax = signature[0]; 2105 break; 2106 2107 case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES: 2108 ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; 2109 break; 2110 2111 default: 2112 break; 2113 } 2114 } 2115 2116 if (copy_to_user(entries, cpuid_entries, 2117 nent * sizeof(struct kvm_cpuid_entry2))) 2118 return -EFAULT; 2119 2120 return 0; 2121} 2122