1// SPDX-License-Identifier: GPL-2.0-only 2 3/* 4 * Local APIC virtualization 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2007 Novell 8 * Copyright (C) 2007 Intel 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 10 * 11 * Authors: 12 * Dor Laor <dor.laor@qumranet.com> 13 * Gregory Haskins <ghaskins@novell.com> 14 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 15 * 16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 17 */ 18 19#include <linux/kvm_host.h> 20#include <linux/kvm.h> 21#include <linux/mm.h> 22#include <linux/highmem.h> 23#include <linux/smp.h> 24#include <linux/hrtimer.h> 25#include <linux/io.h> 26#include <linux/export.h> 27#include <linux/math64.h> 28#include <linux/slab.h> 29#include <asm/processor.h> 30#include <asm/msr.h> 31#include <asm/page.h> 32#include <asm/current.h> 33#include <asm/apicdef.h> 34#include <asm/delay.h> 35#include <linux/atomic.h> 36#include <linux/jump_label.h> 37#include "kvm_cache_regs.h" 38#include "irq.h" 39#include "ioapic.h" 40#include "trace.h" 41#include "x86.h" 42#include "cpuid.h" 43#include "hyperv.h" 44 45#ifndef CONFIG_X86_64 46#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 47#else 48#define mod_64(x, y) ((x) % (y)) 49#endif 50 51#define PRId64 "d" 52#define PRIx64 "llx" 53#define PRIu64 "u" 54#define PRIo64 "o" 55 56/* 14 is the version for Xeon and Pentium 8.4.8*/ 57#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) 58#define LAPIC_MMIO_LENGTH (1 << 12) 59/* followed define is not in apicdef.h */ 60#define MAX_APIC_VECTOR 256 61#define APIC_VECTORS_PER_REG 32 62 63static bool lapic_timer_advance_dynamic __read_mostly; 64#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ 65#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ 66#define LAPIC_TIMER_ADVANCE_NS_INIT 1000 67#define LAPIC_TIMER_ADVANCE_NS_MAX 5000 68/* step-by-step approximation to mitigate fluctuation */ 69#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 70 71static inline int apic_test_vector(int vec, void *bitmap) 72{ 73 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 74} 75 76bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 77{ 78 struct kvm_lapic *apic = vcpu->arch.apic; 79 80 return apic_test_vector(vector, apic->regs + APIC_ISR) || 81 apic_test_vector(vector, apic->regs + APIC_IRR); 82} 83 84static inline int __apic_test_and_set_vector(int vec, void *bitmap) 85{ 86 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 87} 88 89static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 90{ 91 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 92} 93 94struct static_key_deferred apic_hw_disabled __read_mostly; 95struct static_key_deferred apic_sw_disabled __read_mostly; 96 97static inline int apic_enabled(struct kvm_lapic *apic) 98{ 99 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 100} 101 102#define LVT_MASK \ 103 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 104 105#define LINT_MASK \ 106 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 107 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 108 109static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 110{ 111 return apic->vcpu->vcpu_id; 112} 113 114static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) 115{ 116 return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && 117 (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); 118} 119 120bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) 121{ 122 return kvm_x86_ops.set_hv_timer 123 && !(kvm_mwait_in_guest(vcpu->kvm) || 124 kvm_can_post_timer_interrupt(vcpu)); 125} 126EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); 127 128static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) 129{ 130 return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; 131} 132 133static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, 134 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { 135 switch (map->mode) { 136 case KVM_APIC_MODE_X2APIC: { 137 u32 offset = (dest_id >> 16) * 16; 138 u32 max_apic_id = map->max_apic_id; 139 140 if (offset <= max_apic_id) { 141 u8 cluster_size = min(max_apic_id - offset + 1, 16U); 142 143 offset = array_index_nospec(offset, map->max_apic_id + 1); 144 *cluster = &map->phys_map[offset]; 145 *mask = dest_id & (0xffff >> (16 - cluster_size)); 146 } else { 147 *mask = 0; 148 } 149 150 return true; 151 } 152 case KVM_APIC_MODE_XAPIC_FLAT: 153 *cluster = map->xapic_flat_map; 154 *mask = dest_id & 0xff; 155 return true; 156 case KVM_APIC_MODE_XAPIC_CLUSTER: 157 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; 158 *mask = dest_id & 0xf; 159 return true; 160 default: 161 /* Not optimized. */ 162 return false; 163 } 164} 165 166static void kvm_apic_map_free(struct rcu_head *rcu) 167{ 168 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); 169 170 kvfree(map); 171} 172 173/* 174 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. 175 * 176 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with 177 * apic_map_lock_held. 178 */ 179enum { 180 CLEAN, 181 UPDATE_IN_PROGRESS, 182 DIRTY 183}; 184 185void kvm_recalculate_apic_map(struct kvm *kvm) 186{ 187 struct kvm_apic_map *new, *old = NULL; 188 struct kvm_vcpu *vcpu; 189 int i; 190 u32 max_id = 255; /* enough space for any xAPIC ID */ 191 192 /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ 193 if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) 194 return; 195 196 mutex_lock(&kvm->arch.apic_map_lock); 197 /* 198 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map 199 * (if clean) or the APIC registers (if dirty). 200 */ 201 if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, 202 DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { 203 /* Someone else has updated the map. */ 204 mutex_unlock(&kvm->arch.apic_map_lock); 205 return; 206 } 207 208 kvm_for_each_vcpu(i, vcpu, kvm) 209 if (kvm_apic_present(vcpu)) 210 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); 211 212 new = kvzalloc(sizeof(struct kvm_apic_map) + 213 sizeof(struct kvm_lapic *) * ((u64)max_id + 1), 214 GFP_KERNEL_ACCOUNT); 215 216 if (!new) 217 goto out; 218 219 new->max_apic_id = max_id; 220 221 kvm_for_each_vcpu(i, vcpu, kvm) { 222 struct kvm_lapic *apic = vcpu->arch.apic; 223 struct kvm_lapic **cluster; 224 u16 mask; 225 u32 ldr; 226 u8 xapic_id; 227 u32 x2apic_id; 228 229 if (!kvm_apic_present(vcpu)) 230 continue; 231 232 xapic_id = kvm_xapic_id(apic); 233 x2apic_id = kvm_x2apic_id(apic); 234 235 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ 236 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && 237 x2apic_id <= new->max_apic_id) 238 new->phys_map[x2apic_id] = apic; 239 /* 240 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, 241 * prevent them from masking VCPUs with APIC ID <= 0xff. 242 */ 243 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) 244 new->phys_map[xapic_id] = apic; 245 246 if (!kvm_apic_sw_enabled(apic)) 247 continue; 248 249 ldr = kvm_lapic_get_reg(apic, APIC_LDR); 250 251 if (apic_x2apic_mode(apic)) { 252 new->mode |= KVM_APIC_MODE_X2APIC; 253 } else if (ldr) { 254 ldr = GET_APIC_LOGICAL_ID(ldr); 255 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 256 new->mode |= KVM_APIC_MODE_XAPIC_FLAT; 257 else 258 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; 259 } 260 261 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) 262 continue; 263 264 if (mask) 265 cluster[ffs(mask) - 1] = apic; 266 } 267out: 268 old = rcu_dereference_protected(kvm->arch.apic_map, 269 lockdep_is_held(&kvm->arch.apic_map_lock)); 270 rcu_assign_pointer(kvm->arch.apic_map, new); 271 /* 272 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. 273 * If another update has come in, leave it DIRTY. 274 */ 275 atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, 276 UPDATE_IN_PROGRESS, CLEAN); 277 mutex_unlock(&kvm->arch.apic_map_lock); 278 279 if (old) 280 call_rcu(&old->rcu, kvm_apic_map_free); 281 282 kvm_make_scan_ioapic_request(kvm); 283} 284 285static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 286{ 287 bool enabled = val & APIC_SPIV_APIC_ENABLED; 288 289 kvm_lapic_set_reg(apic, APIC_SPIV, val); 290 291 if (enabled != apic->sw_enabled) { 292 apic->sw_enabled = enabled; 293 if (enabled) 294 static_key_slow_dec_deferred(&apic_sw_disabled); 295 else 296 static_key_slow_inc(&apic_sw_disabled.key); 297 298 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 299 } 300 301 /* Check if there are APF page ready requests pending */ 302 if (enabled) 303 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); 304} 305 306static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) 307{ 308 kvm_lapic_set_reg(apic, APIC_ID, id << 24); 309 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 310} 311 312static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 313{ 314 kvm_lapic_set_reg(apic, APIC_LDR, id); 315 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 316} 317 318static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val) 319{ 320 kvm_lapic_set_reg(apic, APIC_DFR, val); 321 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 322} 323 324static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 325{ 326 return ((id >> 4) << 16) | (1 << (id & 0xf)); 327} 328 329static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 330{ 331 u32 ldr = kvm_apic_calc_x2apic_ldr(id); 332 333 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 334 335 kvm_lapic_set_reg(apic, APIC_ID, id); 336 kvm_lapic_set_reg(apic, APIC_LDR, ldr); 337 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 338} 339 340static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 341{ 342 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 343} 344 345static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 346{ 347 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; 348} 349 350static inline int apic_lvtt_period(struct kvm_lapic *apic) 351{ 352 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; 353} 354 355static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 356{ 357 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; 358} 359 360static inline int apic_lvt_nmi_mode(u32 lvt_val) 361{ 362 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 363} 364 365void kvm_apic_set_version(struct kvm_vcpu *vcpu) 366{ 367 struct kvm_lapic *apic = vcpu->arch.apic; 368 u32 v = APIC_VERSION; 369 370 if (!lapic_in_kernel(vcpu)) 371 return; 372 373 /* 374 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) 375 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with 376 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC 377 * version first and level-triggered interrupts never get EOIed in 378 * IOAPIC. 379 */ 380 if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) && 381 !ioapic_in_kernel(vcpu->kvm)) 382 v |= APIC_LVR_DIRECTED_EOI; 383 kvm_lapic_set_reg(apic, APIC_LVR, v); 384} 385 386static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = { 387 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 388 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 389 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 390 LINT_MASK, LINT_MASK, /* LVT0-1 */ 391 LVT_MASK /* LVTERR */ 392}; 393 394static int find_highest_vector(void *bitmap) 395{ 396 int vec; 397 u32 *reg; 398 399 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 400 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 401 reg = bitmap + REG_POS(vec); 402 if (*reg) 403 return __fls(*reg) + vec; 404 } 405 406 return -1; 407} 408 409static u8 count_vectors(void *bitmap) 410{ 411 int vec; 412 u32 *reg; 413 u8 count = 0; 414 415 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 416 reg = bitmap + REG_POS(vec); 417 count += hweight32(*reg); 418 } 419 420 return count; 421} 422 423bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) 424{ 425 u32 i, vec; 426 u32 pir_val, irr_val, prev_irr_val; 427 int max_updated_irr; 428 429 max_updated_irr = -1; 430 *max_irr = -1; 431 432 for (i = vec = 0; i <= 7; i++, vec += 32) { 433 pir_val = READ_ONCE(pir[i]); 434 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); 435 if (pir_val) { 436 prev_irr_val = irr_val; 437 irr_val |= xchg(&pir[i], 0); 438 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; 439 if (prev_irr_val != irr_val) { 440 max_updated_irr = 441 __fls(irr_val ^ prev_irr_val) + vec; 442 } 443 } 444 if (irr_val) 445 *max_irr = __fls(irr_val) + vec; 446 } 447 448 return ((max_updated_irr != -1) && 449 (max_updated_irr == *max_irr)); 450} 451EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 452 453bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) 454{ 455 struct kvm_lapic *apic = vcpu->arch.apic; 456 457 return __kvm_apic_update_irr(pir, apic->regs, max_irr); 458} 459EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 460 461static inline int apic_search_irr(struct kvm_lapic *apic) 462{ 463 return find_highest_vector(apic->regs + APIC_IRR); 464} 465 466static inline int apic_find_highest_irr(struct kvm_lapic *apic) 467{ 468 int result; 469 470 /* 471 * Note that irr_pending is just a hint. It will be always 472 * true with virtual interrupt delivery enabled. 473 */ 474 if (!apic->irr_pending) 475 return -1; 476 477 result = apic_search_irr(apic); 478 ASSERT(result == -1 || result >= 16); 479 480 return result; 481} 482 483static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 484{ 485 struct kvm_vcpu *vcpu; 486 487 vcpu = apic->vcpu; 488 489 if (unlikely(vcpu->arch.apicv_active)) { 490 /* need to update RVI */ 491 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 492 kvm_x86_ops.hwapic_irr_update(vcpu, 493 apic_find_highest_irr(apic)); 494 } else { 495 apic->irr_pending = false; 496 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 497 if (apic_search_irr(apic) != -1) 498 apic->irr_pending = true; 499 } 500} 501 502void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec) 503{ 504 apic_clear_irr(vec, vcpu->arch.apic); 505} 506EXPORT_SYMBOL_GPL(kvm_apic_clear_irr); 507 508static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 509{ 510 struct kvm_vcpu *vcpu; 511 512 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 513 return; 514 515 vcpu = apic->vcpu; 516 517 /* 518 * With APIC virtualization enabled, all caching is disabled 519 * because the processor can modify ISR under the hood. Instead 520 * just set SVI. 521 */ 522 if (unlikely(vcpu->arch.apicv_active)) 523 kvm_x86_ops.hwapic_isr_update(vcpu, vec); 524 else { 525 ++apic->isr_count; 526 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 527 /* 528 * ISR (in service register) bit is set when injecting an interrupt. 529 * The highest vector is injected. Thus the latest bit set matches 530 * the highest bit in ISR. 531 */ 532 apic->highest_isr_cache = vec; 533 } 534} 535 536static inline int apic_find_highest_isr(struct kvm_lapic *apic) 537{ 538 int result; 539 540 /* 541 * Note that isr_count is always 1, and highest_isr_cache 542 * is always -1, with APIC virtualization enabled. 543 */ 544 if (!apic->isr_count) 545 return -1; 546 if (likely(apic->highest_isr_cache != -1)) 547 return apic->highest_isr_cache; 548 549 result = find_highest_vector(apic->regs + APIC_ISR); 550 ASSERT(result == -1 || result >= 16); 551 552 return result; 553} 554 555static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 556{ 557 struct kvm_vcpu *vcpu; 558 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 559 return; 560 561 vcpu = apic->vcpu; 562 563 /* 564 * We do get here for APIC virtualization enabled if the guest 565 * uses the Hyper-V APIC enlightenment. In this case we may need 566 * to trigger a new interrupt delivery by writing the SVI field; 567 * on the other hand isr_count and highest_isr_cache are unused 568 * and must be left alone. 569 */ 570 if (unlikely(vcpu->arch.apicv_active)) 571 kvm_x86_ops.hwapic_isr_update(vcpu, 572 apic_find_highest_isr(apic)); 573 else { 574 --apic->isr_count; 575 BUG_ON(apic->isr_count < 0); 576 apic->highest_isr_cache = -1; 577 } 578} 579 580int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 581{ 582 /* This may race with setting of irr in __apic_accept_irq() and 583 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 584 * will cause vmexit immediately and the value will be recalculated 585 * on the next vmentry. 586 */ 587 return apic_find_highest_irr(vcpu->arch.apic); 588} 589EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 590 591static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 592 int vector, int level, int trig_mode, 593 struct dest_map *dest_map); 594 595int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 596 struct dest_map *dest_map) 597{ 598 struct kvm_lapic *apic = vcpu->arch.apic; 599 600 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 601 irq->level, irq->trig_mode, dest_map); 602} 603 604static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, 605 struct kvm_lapic_irq *irq, u32 min) 606{ 607 int i, count = 0; 608 struct kvm_vcpu *vcpu; 609 610 if (min > map->max_apic_id) 611 return 0; 612 613 for_each_set_bit(i, ipi_bitmap, 614 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 615 if (map->phys_map[min + i]) { 616 vcpu = map->phys_map[min + i]->vcpu; 617 count += kvm_apic_set_irq(vcpu, irq, NULL); 618 } 619 } 620 621 return count; 622} 623 624int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 625 unsigned long ipi_bitmap_high, u32 min, 626 unsigned long icr, int op_64_bit) 627{ 628 struct kvm_apic_map *map; 629 struct kvm_lapic_irq irq = {0}; 630 int cluster_size = op_64_bit ? 64 : 32; 631 int count; 632 633 if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) 634 return -KVM_EINVAL; 635 636 irq.vector = icr & APIC_VECTOR_MASK; 637 irq.delivery_mode = icr & APIC_MODE_MASK; 638 irq.level = (icr & APIC_INT_ASSERT) != 0; 639 irq.trig_mode = icr & APIC_INT_LEVELTRIG; 640 641 rcu_read_lock(); 642 map = rcu_dereference(kvm->arch.apic_map); 643 644 count = -EOPNOTSUPP; 645 if (likely(map)) { 646 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); 647 min += cluster_size; 648 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); 649 } 650 651 rcu_read_unlock(); 652 return count; 653} 654 655static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 656{ 657 658 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 659 sizeof(val)); 660} 661 662static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 663{ 664 665 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 666 sizeof(*val)); 667} 668 669static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 670{ 671 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 672} 673 674static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 675{ 676 u8 val; 677 if (pv_eoi_get_user(vcpu, &val) < 0) { 678 printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", 679 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 680 return false; 681 } 682 return val & 0x1; 683} 684 685static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 686{ 687 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 688 printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n", 689 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 690 return; 691 } 692 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 693} 694 695static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 696{ 697 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 698 printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n", 699 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 700 return; 701 } 702 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 703} 704 705static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 706{ 707 int highest_irr; 708 if (apic->vcpu->arch.apicv_active) 709 highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu); 710 else 711 highest_irr = apic_find_highest_irr(apic); 712 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 713 return -1; 714 return highest_irr; 715} 716 717static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) 718{ 719 u32 tpr, isrv, ppr, old_ppr; 720 int isr; 721 722 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI); 723 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI); 724 isr = apic_find_highest_isr(apic); 725 isrv = (isr != -1) ? isr : 0; 726 727 if ((tpr & 0xf0) >= (isrv & 0xf0)) 728 ppr = tpr & 0xff; 729 else 730 ppr = isrv & 0xf0; 731 732 *new_ppr = ppr; 733 if (old_ppr != ppr) 734 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); 735 736 return ppr < old_ppr; 737} 738 739static void apic_update_ppr(struct kvm_lapic *apic) 740{ 741 u32 ppr; 742 743 if (__apic_update_ppr(apic, &ppr) && 744 apic_has_interrupt_for_ppr(apic, ppr) != -1) 745 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 746} 747 748void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) 749{ 750 apic_update_ppr(vcpu->arch.apic); 751} 752EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); 753 754static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 755{ 756 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); 757 apic_update_ppr(apic); 758} 759 760static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) 761{ 762 return mda == (apic_x2apic_mode(apic) ? 763 X2APIC_BROADCAST : APIC_BROADCAST); 764} 765 766static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) 767{ 768 if (kvm_apic_broadcast(apic, mda)) 769 return true; 770 771 if (apic_x2apic_mode(apic)) 772 return mda == kvm_x2apic_id(apic); 773 774 /* 775 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if 776 * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and 777 * this allows unique addressing of VCPUs with APIC ID over 0xff. 778 * The 0xff condition is needed because writeable xAPIC ID. 779 */ 780 if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) 781 return true; 782 783 return mda == kvm_xapic_id(apic); 784} 785 786static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) 787{ 788 u32 logical_id; 789 790 if (kvm_apic_broadcast(apic, mda)) 791 return true; 792 793 logical_id = kvm_lapic_get_reg(apic, APIC_LDR); 794 795 if (apic_x2apic_mode(apic)) 796 return ((logical_id >> 16) == (mda >> 16)) 797 && (logical_id & mda & 0xffff) != 0; 798 799 logical_id = GET_APIC_LOGICAL_ID(logical_id); 800 801 switch (kvm_lapic_get_reg(apic, APIC_DFR)) { 802 case APIC_DFR_FLAT: 803 return (logical_id & mda) != 0; 804 case APIC_DFR_CLUSTER: 805 return ((logical_id >> 4) == (mda >> 4)) 806 && (logical_id & mda & 0xf) != 0; 807 default: 808 return false; 809 } 810} 811 812/* The KVM local APIC implementation has two quirks: 813 * 814 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs 815 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. 816 * KVM doesn't do that aliasing. 817 * 818 * - in-kernel IOAPIC messages have to be delivered directly to 819 * x2APIC, because the kernel does not support interrupt remapping. 820 * In order to support broadcast without interrupt remapping, x2APIC 821 * rewrites the destination of non-IPI messages from APIC_BROADCAST 822 * to X2APIC_BROADCAST. 823 * 824 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is 825 * important when userspace wants to use x2APIC-format MSIs, because 826 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". 827 */ 828static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, 829 struct kvm_lapic *source, struct kvm_lapic *target) 830{ 831 bool ipi = source != NULL; 832 833 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && 834 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) 835 return X2APIC_BROADCAST; 836 837 return dest_id; 838} 839 840bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 841 int shorthand, unsigned int dest, int dest_mode) 842{ 843 struct kvm_lapic *target = vcpu->arch.apic; 844 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 845 846 ASSERT(target); 847 switch (shorthand) { 848 case APIC_DEST_NOSHORT: 849 if (dest_mode == APIC_DEST_PHYSICAL) 850 return kvm_apic_match_physical_addr(target, mda); 851 else 852 return kvm_apic_match_logical_addr(target, mda); 853 case APIC_DEST_SELF: 854 return target == source; 855 case APIC_DEST_ALLINC: 856 return true; 857 case APIC_DEST_ALLBUT: 858 return target != source; 859 default: 860 return false; 861 } 862} 863EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 864 865int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 866 const unsigned long *bitmap, u32 bitmap_size) 867{ 868 u32 mod; 869 int i, idx = -1; 870 871 mod = vector % dest_vcpus; 872 873 for (i = 0; i <= mod; i++) { 874 idx = find_next_bit(bitmap, bitmap_size, idx + 1); 875 BUG_ON(idx == bitmap_size); 876 } 877 878 return idx; 879} 880 881static void kvm_apic_disabled_lapic_found(struct kvm *kvm) 882{ 883 if (!kvm->arch.disabled_lapic_found) { 884 kvm->arch.disabled_lapic_found = true; 885 printk(KERN_INFO 886 "Disabled LAPIC found during irq injection\n"); 887 } 888} 889 890static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, 891 struct kvm_lapic_irq *irq, struct kvm_apic_map *map) 892{ 893 if (kvm->arch.x2apic_broadcast_quirk_disabled) { 894 if ((irq->dest_id == APIC_BROADCAST && 895 map->mode != KVM_APIC_MODE_X2APIC)) 896 return true; 897 if (irq->dest_id == X2APIC_BROADCAST) 898 return true; 899 } else { 900 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); 901 if (irq->dest_id == (x2apic_ipi ? 902 X2APIC_BROADCAST : APIC_BROADCAST)) 903 return true; 904 } 905 906 return false; 907} 908 909/* Return true if the interrupt can be handled by using *bitmap as index mask 910 * for valid destinations in *dst array. 911 * Return false if kvm_apic_map_get_dest_lapic did nothing useful. 912 * Note: we may have zero kvm_lapic destinations when we return true, which 913 * means that the interrupt should be dropped. In this case, *bitmap would be 914 * zero and *dst undefined. 915 */ 916static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, 917 struct kvm_lapic **src, struct kvm_lapic_irq *irq, 918 struct kvm_apic_map *map, struct kvm_lapic ***dst, 919 unsigned long *bitmap) 920{ 921 int i, lowest; 922 923 if (irq->shorthand == APIC_DEST_SELF && src) { 924 *dst = src; 925 *bitmap = 1; 926 return true; 927 } else if (irq->shorthand) 928 return false; 929 930 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) 931 return false; 932 933 if (irq->dest_mode == APIC_DEST_PHYSICAL) { 934 if (irq->dest_id > map->max_apic_id) { 935 *bitmap = 0; 936 } else { 937 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); 938 *dst = &map->phys_map[dest_id]; 939 *bitmap = 1; 940 } 941 return true; 942 } 943 944 *bitmap = 0; 945 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, 946 (u16 *)bitmap)) 947 return false; 948 949 if (!kvm_lowest_prio_delivery(irq)) 950 return true; 951 952 if (!kvm_vector_hashing_enabled()) { 953 lowest = -1; 954 for_each_set_bit(i, bitmap, 16) { 955 if (!(*dst)[i]) 956 continue; 957 if (lowest < 0) 958 lowest = i; 959 else if (kvm_apic_compare_prio((*dst)[i]->vcpu, 960 (*dst)[lowest]->vcpu) < 0) 961 lowest = i; 962 } 963 } else { 964 if (!*bitmap) 965 return true; 966 967 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), 968 bitmap, 16); 969 970 if (!(*dst)[lowest]) { 971 kvm_apic_disabled_lapic_found(kvm); 972 *bitmap = 0; 973 return true; 974 } 975 } 976 977 *bitmap = (lowest >= 0) ? 1 << lowest : 0; 978 979 return true; 980} 981 982bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 983 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 984{ 985 struct kvm_apic_map *map; 986 unsigned long bitmap; 987 struct kvm_lapic **dst = NULL; 988 int i; 989 bool ret; 990 991 *r = -1; 992 993 if (irq->shorthand == APIC_DEST_SELF) { 994 if (KVM_BUG_ON(!src, kvm)) { 995 *r = 0; 996 return true; 997 } 998 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 999 return true; 1000 } 1001 1002 rcu_read_lock(); 1003 map = rcu_dereference(kvm->arch.apic_map); 1004 1005 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); 1006 if (ret) { 1007 *r = 0; 1008 for_each_set_bit(i, &bitmap, 16) { 1009 if (!dst[i]) 1010 continue; 1011 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 1012 } 1013 } 1014 1015 rcu_read_unlock(); 1016 return ret; 1017} 1018 1019/* 1020 * This routine tries to handle interrupts in posted mode, here is how 1021 * it deals with different cases: 1022 * - For single-destination interrupts, handle it in posted mode 1023 * - Else if vector hashing is enabled and it is a lowest-priority 1024 * interrupt, handle it in posted mode and use the following mechanism 1025 * to find the destination vCPU. 1026 * 1. For lowest-priority interrupts, store all the possible 1027 * destination vCPUs in an array. 1028 * 2. Use "guest vector % max number of destination vCPUs" to find 1029 * the right destination vCPU in the array for the lowest-priority 1030 * interrupt. 1031 * - Otherwise, use remapped mode to inject the interrupt. 1032 */ 1033bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 1034 struct kvm_vcpu **dest_vcpu) 1035{ 1036 struct kvm_apic_map *map; 1037 unsigned long bitmap; 1038 struct kvm_lapic **dst = NULL; 1039 bool ret = false; 1040 1041 if (irq->shorthand) 1042 return false; 1043 1044 rcu_read_lock(); 1045 map = rcu_dereference(kvm->arch.apic_map); 1046 1047 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && 1048 hweight16(bitmap) == 1) { 1049 unsigned long i = find_first_bit(&bitmap, 16); 1050 1051 if (dst[i]) { 1052 *dest_vcpu = dst[i]->vcpu; 1053 ret = true; 1054 } 1055 } 1056 1057 rcu_read_unlock(); 1058 return ret; 1059} 1060 1061/* 1062 * Add a pending IRQ into lapic. 1063 * Return 1 if successfully added and 0 if discarded. 1064 */ 1065static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1066 int vector, int level, int trig_mode, 1067 struct dest_map *dest_map) 1068{ 1069 int result = 0; 1070 struct kvm_vcpu *vcpu = apic->vcpu; 1071 1072 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 1073 trig_mode, vector); 1074 switch (delivery_mode) { 1075 case APIC_DM_LOWEST: 1076 vcpu->arch.apic_arb_prio++; 1077 fallthrough; 1078 case APIC_DM_FIXED: 1079 if (unlikely(trig_mode && !level)) 1080 break; 1081 1082 /* FIXME add logic for vcpu on reset */ 1083 if (unlikely(!apic_enabled(apic))) 1084 break; 1085 1086 result = 1; 1087 1088 if (dest_map) { 1089 __set_bit(vcpu->vcpu_id, dest_map->map); 1090 dest_map->vectors[vcpu->vcpu_id] = vector; 1091 } 1092 1093 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1094 if (trig_mode) 1095 kvm_lapic_set_vector(vector, 1096 apic->regs + APIC_TMR); 1097 else 1098 kvm_lapic_clear_vector(vector, 1099 apic->regs + APIC_TMR); 1100 } 1101 1102 if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) { 1103 kvm_lapic_set_irr(vector, apic); 1104 kvm_make_request(KVM_REQ_EVENT, vcpu); 1105 kvm_vcpu_kick(vcpu); 1106 } 1107 break; 1108 1109 case APIC_DM_REMRD: 1110 result = 1; 1111 vcpu->arch.pv.pv_unhalted = 1; 1112 kvm_make_request(KVM_REQ_EVENT, vcpu); 1113 kvm_vcpu_kick(vcpu); 1114 break; 1115 1116 case APIC_DM_SMI: 1117 result = 1; 1118 kvm_make_request(KVM_REQ_SMI, vcpu); 1119 kvm_vcpu_kick(vcpu); 1120 break; 1121 1122 case APIC_DM_NMI: 1123 result = 1; 1124 kvm_inject_nmi(vcpu); 1125 kvm_vcpu_kick(vcpu); 1126 break; 1127 1128 case APIC_DM_INIT: 1129 if (!trig_mode || level) { 1130 result = 1; 1131 /* assumes that there are only KVM_APIC_INIT/SIPI */ 1132 apic->pending_events = (1UL << KVM_APIC_INIT); 1133 kvm_make_request(KVM_REQ_EVENT, vcpu); 1134 kvm_vcpu_kick(vcpu); 1135 } 1136 break; 1137 1138 case APIC_DM_STARTUP: 1139 result = 1; 1140 apic->sipi_vector = vector; 1141 /* make sure sipi_vector is visible for the receiver */ 1142 smp_wmb(); 1143 set_bit(KVM_APIC_SIPI, &apic->pending_events); 1144 kvm_make_request(KVM_REQ_EVENT, vcpu); 1145 kvm_vcpu_kick(vcpu); 1146 break; 1147 1148 case APIC_DM_EXTINT: 1149 /* 1150 * Should only be called by kvm_apic_local_deliver() with LVT0, 1151 * before NMI watchdog was enabled. Already handled by 1152 * kvm_apic_accept_pic_intr(). 1153 */ 1154 break; 1155 1156 default: 1157 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 1158 delivery_mode); 1159 break; 1160 } 1161 return result; 1162} 1163 1164/* 1165 * This routine identifies the destination vcpus mask meant to receive the 1166 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find 1167 * out the destination vcpus array and set the bitmap or it traverses to 1168 * each available vcpu to identify the same. 1169 */ 1170void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, 1171 unsigned long *vcpu_bitmap) 1172{ 1173 struct kvm_lapic **dest_vcpu = NULL; 1174 struct kvm_lapic *src = NULL; 1175 struct kvm_apic_map *map; 1176 struct kvm_vcpu *vcpu; 1177 unsigned long bitmap; 1178 int i, vcpu_idx; 1179 bool ret; 1180 1181 rcu_read_lock(); 1182 map = rcu_dereference(kvm->arch.apic_map); 1183 1184 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, 1185 &bitmap); 1186 if (ret) { 1187 for_each_set_bit(i, &bitmap, 16) { 1188 if (!dest_vcpu[i]) 1189 continue; 1190 vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; 1191 __set_bit(vcpu_idx, vcpu_bitmap); 1192 } 1193 } else { 1194 kvm_for_each_vcpu(i, vcpu, kvm) { 1195 if (!kvm_apic_present(vcpu)) 1196 continue; 1197 if (!kvm_apic_match_dest(vcpu, NULL, 1198 irq->shorthand, 1199 irq->dest_id, 1200 irq->dest_mode)) 1201 continue; 1202 __set_bit(i, vcpu_bitmap); 1203 } 1204 } 1205 rcu_read_unlock(); 1206} 1207 1208int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1209{ 1210 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1211} 1212 1213static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1214{ 1215 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 1216} 1217 1218static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 1219{ 1220 int trigger_mode; 1221 1222 /* Eoi the ioapic only if the ioapic doesn't own the vector. */ 1223 if (!kvm_ioapic_handles_vector(apic, vector)) 1224 return; 1225 1226 /* Request a KVM exit to inform the userspace IOAPIC. */ 1227 if (irqchip_split(apic->vcpu->kvm)) { 1228 apic->vcpu->arch.pending_ioapic_eoi = vector; 1229 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); 1230 return; 1231 } 1232 1233 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 1234 trigger_mode = IOAPIC_LEVEL_TRIG; 1235 else 1236 trigger_mode = IOAPIC_EDGE_TRIG; 1237 1238 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 1239} 1240 1241static int apic_set_eoi(struct kvm_lapic *apic) 1242{ 1243 int vector = apic_find_highest_isr(apic); 1244 1245 trace_kvm_eoi(apic, vector); 1246 1247 /* 1248 * Not every write EOI will has corresponding ISR, 1249 * one example is when Kernel check timer on setup_IO_APIC 1250 */ 1251 if (vector == -1) 1252 return vector; 1253 1254 apic_clear_isr(vector, apic); 1255 apic_update_ppr(apic); 1256 1257 if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) 1258 kvm_hv_synic_send_eoi(apic->vcpu, vector); 1259 1260 kvm_ioapic_send_eoi(apic, vector); 1261 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1262 return vector; 1263} 1264 1265/* 1266 * this interface assumes a trap-like exit, which has already finished 1267 * desired side effect including vISR and vPPR update. 1268 */ 1269void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 1270{ 1271 struct kvm_lapic *apic = vcpu->arch.apic; 1272 1273 trace_kvm_eoi(apic, vector); 1274 1275 kvm_ioapic_send_eoi(apic, vector); 1276 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1277} 1278EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1279 1280void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) 1281{ 1282 struct kvm_lapic_irq irq; 1283 1284 irq.vector = icr_low & APIC_VECTOR_MASK; 1285 irq.delivery_mode = icr_low & APIC_MODE_MASK; 1286 irq.dest_mode = icr_low & APIC_DEST_MASK; 1287 irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1288 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1289 irq.shorthand = icr_low & APIC_SHORT_MASK; 1290 irq.msi_redir_hint = false; 1291 if (apic_x2apic_mode(apic)) 1292 irq.dest_id = icr_high; 1293 else 1294 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 1295 1296 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1297 1298 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1299} 1300 1301static u32 apic_get_tmcct(struct kvm_lapic *apic) 1302{ 1303 ktime_t remaining, now; 1304 s64 ns; 1305 u32 tmcct; 1306 1307 ASSERT(apic != NULL); 1308 1309 /* if initial count is 0, current count should also be 0 */ 1310 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || 1311 apic->lapic_timer.period == 0) 1312 return 0; 1313 1314 now = ktime_get(); 1315 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1316 if (ktime_to_ns(remaining) < 0) 1317 remaining = 0; 1318 1319 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 1320 tmcct = div64_u64(ns, 1321 (APIC_BUS_CYCLE_NS * apic->divide_count)); 1322 1323 return tmcct; 1324} 1325 1326static void __report_tpr_access(struct kvm_lapic *apic, bool write) 1327{ 1328 struct kvm_vcpu *vcpu = apic->vcpu; 1329 struct kvm_run *run = vcpu->run; 1330 1331 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 1332 run->tpr_access.rip = kvm_rip_read(vcpu); 1333 run->tpr_access.is_write = write; 1334} 1335 1336static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 1337{ 1338 if (apic->vcpu->arch.tpr_access_reporting) 1339 __report_tpr_access(apic, write); 1340} 1341 1342static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 1343{ 1344 u32 val = 0; 1345 1346 if (offset >= LAPIC_MMIO_LENGTH) 1347 return 0; 1348 1349 switch (offset) { 1350 case APIC_ARBPRI: 1351 break; 1352 1353 case APIC_TMCCT: /* Timer CCR */ 1354 if (apic_lvtt_tscdeadline(apic)) 1355 return 0; 1356 1357 val = apic_get_tmcct(apic); 1358 break; 1359 case APIC_PROCPRI: 1360 apic_update_ppr(apic); 1361 val = kvm_lapic_get_reg(apic, offset); 1362 break; 1363 case APIC_TASKPRI: 1364 report_tpr_access(apic, false); 1365 fallthrough; 1366 default: 1367 val = kvm_lapic_get_reg(apic, offset); 1368 break; 1369 } 1370 1371 return val; 1372} 1373 1374static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 1375{ 1376 return container_of(dev, struct kvm_lapic, dev); 1377} 1378 1379#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) 1380#define APIC_REGS_MASK(first, count) \ 1381 (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) 1382 1383int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1384 void *data) 1385{ 1386 unsigned char alignment = offset & 0xf; 1387 u32 result; 1388 /* this bitmask has a bit cleared for each reserved register */ 1389 u64 valid_reg_mask = 1390 APIC_REG_MASK(APIC_ID) | 1391 APIC_REG_MASK(APIC_LVR) | 1392 APIC_REG_MASK(APIC_TASKPRI) | 1393 APIC_REG_MASK(APIC_PROCPRI) | 1394 APIC_REG_MASK(APIC_LDR) | 1395 APIC_REG_MASK(APIC_DFR) | 1396 APIC_REG_MASK(APIC_SPIV) | 1397 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | 1398 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | 1399 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | 1400 APIC_REG_MASK(APIC_ESR) | 1401 APIC_REG_MASK(APIC_ICR) | 1402 APIC_REG_MASK(APIC_ICR2) | 1403 APIC_REG_MASK(APIC_LVTT) | 1404 APIC_REG_MASK(APIC_LVTTHMR) | 1405 APIC_REG_MASK(APIC_LVTPC) | 1406 APIC_REG_MASK(APIC_LVT0) | 1407 APIC_REG_MASK(APIC_LVT1) | 1408 APIC_REG_MASK(APIC_LVTERR) | 1409 APIC_REG_MASK(APIC_TMICT) | 1410 APIC_REG_MASK(APIC_TMCCT) | 1411 APIC_REG_MASK(APIC_TDCR); 1412 1413 /* ARBPRI is not valid on x2APIC */ 1414 if (!apic_x2apic_mode(apic)) 1415 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); 1416 1417 if (alignment + len > 4) 1418 return 1; 1419 1420 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) 1421 return 1; 1422 1423 result = __apic_read(apic, offset & ~0xf); 1424 1425 trace_kvm_apic_read(offset, result); 1426 1427 switch (len) { 1428 case 1: 1429 case 2: 1430 case 4: 1431 memcpy(data, (char *)&result + alignment, len); 1432 break; 1433 default: 1434 printk(KERN_ERR "Local APIC read with len = %x, " 1435 "should be 1,2, or 4 instead\n", len); 1436 break; 1437 } 1438 return 0; 1439} 1440EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); 1441 1442static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1443{ 1444 return addr >= apic->base_address && 1445 addr < apic->base_address + LAPIC_MMIO_LENGTH; 1446} 1447 1448static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1449 gpa_t address, int len, void *data) 1450{ 1451 struct kvm_lapic *apic = to_lapic(this); 1452 u32 offset = address - apic->base_address; 1453 1454 if (!apic_mmio_in_range(apic, address)) 1455 return -EOPNOTSUPP; 1456 1457 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 1458 if (!kvm_check_has_quirk(vcpu->kvm, 1459 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 1460 return -EOPNOTSUPP; 1461 1462 memset(data, 0xff, len); 1463 return 0; 1464 } 1465 1466 kvm_lapic_reg_read(apic, offset, len, data); 1467 1468 return 0; 1469} 1470 1471static void update_divide_count(struct kvm_lapic *apic) 1472{ 1473 u32 tmp1, tmp2, tdcr; 1474 1475 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR); 1476 tmp1 = tdcr & 0xf; 1477 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1478 apic->divide_count = 0x1 << (tmp2 & 0x7); 1479} 1480 1481static void limit_periodic_timer_frequency(struct kvm_lapic *apic) 1482{ 1483 /* 1484 * Do not allow the guest to program periodic timers with small 1485 * interval, since the hrtimers are not throttled by the host 1486 * scheduler. 1487 */ 1488 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1489 s64 min_period = min_timer_period_us * 1000LL; 1490 1491 if (apic->lapic_timer.period < min_period) { 1492 pr_info_ratelimited( 1493 "kvm: vcpu %i: requested %lld ns " 1494 "lapic timer period limited to %lld ns\n", 1495 apic->vcpu->vcpu_id, 1496 apic->lapic_timer.period, min_period); 1497 apic->lapic_timer.period = min_period; 1498 } 1499 } 1500} 1501 1502static void cancel_hv_timer(struct kvm_lapic *apic); 1503 1504static void apic_update_lvtt(struct kvm_lapic *apic) 1505{ 1506 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & 1507 apic->lapic_timer.timer_mode_mask; 1508 1509 if (apic->lapic_timer.timer_mode != timer_mode) { 1510 if (apic_lvtt_tscdeadline(apic) != (timer_mode == 1511 APIC_LVT_TIMER_TSCDEADLINE)) { 1512 hrtimer_cancel(&apic->lapic_timer.timer); 1513 preempt_disable(); 1514 if (apic->lapic_timer.hv_timer_in_use) 1515 cancel_hv_timer(apic); 1516 preempt_enable(); 1517 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 1518 apic->lapic_timer.period = 0; 1519 apic->lapic_timer.tscdeadline = 0; 1520 } 1521 apic->lapic_timer.timer_mode = timer_mode; 1522 limit_periodic_timer_frequency(apic); 1523 } 1524} 1525 1526/* 1527 * On APICv, this test will cause a busy wait 1528 * during a higher-priority task. 1529 */ 1530 1531static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1532{ 1533 struct kvm_lapic *apic = vcpu->arch.apic; 1534 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); 1535 1536 if (kvm_apic_hw_enabled(apic)) { 1537 int vec = reg & APIC_VECTOR_MASK; 1538 void *bitmap = apic->regs + APIC_ISR; 1539 1540 if (vcpu->arch.apicv_active) 1541 bitmap = apic->regs + APIC_IRR; 1542 1543 if (apic_test_vector(vec, bitmap)) 1544 return true; 1545 } 1546 return false; 1547} 1548 1549static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) 1550{ 1551 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; 1552 1553 /* 1554 * If the guest TSC is running at a different ratio than the host, then 1555 * convert the delay to nanoseconds to achieve an accurate delay. Note 1556 * that __delay() uses delay_tsc whenever the hardware has TSC, thus 1557 * always for VMX enabled hardware. 1558 */ 1559 if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { 1560 __delay(min(guest_cycles, 1561 nsec_to_cycles(vcpu, timer_advance_ns))); 1562 } else { 1563 u64 delay_ns = guest_cycles * 1000000ULL; 1564 do_div(delay_ns, vcpu->arch.virtual_tsc_khz); 1565 ndelay(min_t(u32, delay_ns, timer_advance_ns)); 1566 } 1567} 1568 1569static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, 1570 s64 advance_expire_delta) 1571{ 1572 struct kvm_lapic *apic = vcpu->arch.apic; 1573 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; 1574 u64 ns; 1575 1576 /* Do not adjust for tiny fluctuations or large random spikes. */ 1577 if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || 1578 abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) 1579 return; 1580 1581 /* too early */ 1582 if (advance_expire_delta < 0) { 1583 ns = -advance_expire_delta * 1000000ULL; 1584 do_div(ns, vcpu->arch.virtual_tsc_khz); 1585 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1586 } else { 1587 /* too late */ 1588 ns = advance_expire_delta * 1000000ULL; 1589 do_div(ns, vcpu->arch.virtual_tsc_khz); 1590 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1591 } 1592 1593 if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) 1594 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 1595 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1596} 1597 1598static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1599{ 1600 struct kvm_lapic *apic = vcpu->arch.apic; 1601 u64 guest_tsc, tsc_deadline; 1602 1603 tsc_deadline = apic->lapic_timer.expired_tscdeadline; 1604 apic->lapic_timer.expired_tscdeadline = 0; 1605 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1606 apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; 1607 1608 if (guest_tsc < tsc_deadline) 1609 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); 1610 1611 if (lapic_timer_advance_dynamic) 1612 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); 1613} 1614 1615void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1616{ 1617 if (lapic_in_kernel(vcpu) && 1618 vcpu->arch.apic->lapic_timer.expired_tscdeadline && 1619 vcpu->arch.apic->lapic_timer.timer_advance_ns && 1620 lapic_timer_int_injected(vcpu)) 1621 __kvm_wait_lapic_expire(vcpu); 1622} 1623EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1624 1625static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) 1626{ 1627 struct kvm_timer *ktimer = &apic->lapic_timer; 1628 1629 kvm_apic_local_deliver(apic, APIC_LVTT); 1630 if (apic_lvtt_tscdeadline(apic)) { 1631 ktimer->tscdeadline = 0; 1632 } else if (apic_lvtt_oneshot(apic)) { 1633 ktimer->tscdeadline = 0; 1634 ktimer->target_expiration = 0; 1635 } 1636} 1637 1638static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) 1639{ 1640 struct kvm_vcpu *vcpu = apic->vcpu; 1641 struct kvm_timer *ktimer = &apic->lapic_timer; 1642 1643 if (atomic_read(&apic->lapic_timer.pending)) 1644 return; 1645 1646 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1647 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1648 1649 if (!from_timer_fn && vcpu->arch.apicv_active) { 1650 WARN_ON(kvm_get_running_vcpu() != vcpu); 1651 kvm_apic_inject_pending_timer_irqs(apic); 1652 return; 1653 } 1654 1655 if (kvm_use_posted_timer_interrupt(apic->vcpu)) { 1656 /* 1657 * Ensure the guest's timer has truly expired before posting an 1658 * interrupt. Open code the relevant checks to avoid querying 1659 * lapic_timer_int_injected(), which will be false since the 1660 * interrupt isn't yet injected. Waiting until after injecting 1661 * is not an option since that won't help a posted interrupt. 1662 */ 1663 if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && 1664 vcpu->arch.apic->lapic_timer.timer_advance_ns) 1665 __kvm_wait_lapic_expire(vcpu); 1666 kvm_apic_inject_pending_timer_irqs(apic); 1667 return; 1668 } 1669 1670 atomic_inc(&apic->lapic_timer.pending); 1671 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1672 if (from_timer_fn) 1673 kvm_vcpu_kick(vcpu); 1674} 1675 1676static void start_sw_tscdeadline(struct kvm_lapic *apic) 1677{ 1678 struct kvm_timer *ktimer = &apic->lapic_timer; 1679 u64 guest_tsc, tscdeadline = ktimer->tscdeadline; 1680 u64 ns = 0; 1681 ktime_t expire; 1682 struct kvm_vcpu *vcpu = apic->vcpu; 1683 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1684 unsigned long flags; 1685 ktime_t now; 1686 1687 if (unlikely(!tscdeadline || !this_tsc_khz)) 1688 return; 1689 1690 local_irq_save(flags); 1691 1692 now = ktime_get(); 1693 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1694 1695 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1696 do_div(ns, this_tsc_khz); 1697 1698 if (likely(tscdeadline > guest_tsc) && 1699 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1700 expire = ktime_add_ns(now, ns); 1701 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1702 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); 1703 } else 1704 apic_timer_expired(apic, false); 1705 1706 local_irq_restore(flags); 1707} 1708 1709static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) 1710{ 1711 return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; 1712} 1713 1714static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) 1715{ 1716 ktime_t now, remaining; 1717 u64 ns_remaining_old, ns_remaining_new; 1718 1719 apic->lapic_timer.period = 1720 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1721 limit_periodic_timer_frequency(apic); 1722 1723 now = ktime_get(); 1724 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1725 if (ktime_to_ns(remaining) < 0) 1726 remaining = 0; 1727 1728 ns_remaining_old = ktime_to_ns(remaining); 1729 ns_remaining_new = mul_u64_u32_div(ns_remaining_old, 1730 apic->divide_count, old_divisor); 1731 1732 apic->lapic_timer.tscdeadline += 1733 nsec_to_cycles(apic->vcpu, ns_remaining_new) - 1734 nsec_to_cycles(apic->vcpu, ns_remaining_old); 1735 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); 1736} 1737 1738static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) 1739{ 1740 ktime_t now; 1741 u64 tscl = rdtsc(); 1742 s64 deadline; 1743 1744 now = ktime_get(); 1745 apic->lapic_timer.period = 1746 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1747 1748 if (!apic->lapic_timer.period) { 1749 apic->lapic_timer.tscdeadline = 0; 1750 return false; 1751 } 1752 1753 limit_periodic_timer_frequency(apic); 1754 deadline = apic->lapic_timer.period; 1755 1756 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 1757 if (unlikely(count_reg != APIC_TMICT)) { 1758 deadline = tmict_to_ns(apic, 1759 kvm_lapic_get_reg(apic, count_reg)); 1760 if (unlikely(deadline <= 0)) 1761 deadline = apic->lapic_timer.period; 1762 else if (unlikely(deadline > apic->lapic_timer.period)) { 1763 pr_info_ratelimited( 1764 "kvm: vcpu %i: requested lapic timer restore with " 1765 "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " 1766 "Using initial count to start timer.\n", 1767 apic->vcpu->vcpu_id, 1768 count_reg, 1769 kvm_lapic_get_reg(apic, count_reg), 1770 deadline, apic->lapic_timer.period); 1771 kvm_lapic_set_reg(apic, count_reg, 0); 1772 deadline = apic->lapic_timer.period; 1773 } 1774 } 1775 } 1776 1777 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1778 nsec_to_cycles(apic->vcpu, deadline); 1779 apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); 1780 1781 return true; 1782} 1783 1784static void advance_periodic_target_expiration(struct kvm_lapic *apic) 1785{ 1786 ktime_t now = ktime_get(); 1787 u64 tscl = rdtsc(); 1788 ktime_t delta; 1789 1790 /* 1791 * Synchronize both deadlines to the same time source or 1792 * differences in the periods (caused by differences in the 1793 * underlying clocks or numerical approximation errors) will 1794 * cause the two to drift apart over time as the errors 1795 * accumulate. 1796 */ 1797 apic->lapic_timer.target_expiration = 1798 ktime_add_ns(apic->lapic_timer.target_expiration, 1799 apic->lapic_timer.period); 1800 delta = ktime_sub(apic->lapic_timer.target_expiration, now); 1801 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1802 nsec_to_cycles(apic->vcpu, delta); 1803} 1804 1805static void start_sw_period(struct kvm_lapic *apic) 1806{ 1807 if (!apic->lapic_timer.period) 1808 return; 1809 1810 if (ktime_after(ktime_get(), 1811 apic->lapic_timer.target_expiration)) { 1812 apic_timer_expired(apic, false); 1813 1814 if (apic_lvtt_oneshot(apic)) 1815 return; 1816 1817 advance_periodic_target_expiration(apic); 1818 } 1819 1820 hrtimer_start(&apic->lapic_timer.timer, 1821 apic->lapic_timer.target_expiration, 1822 HRTIMER_MODE_ABS_HARD); 1823} 1824 1825bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) 1826{ 1827 if (!lapic_in_kernel(vcpu)) 1828 return false; 1829 1830 return vcpu->arch.apic->lapic_timer.hv_timer_in_use; 1831} 1832EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); 1833 1834static void cancel_hv_timer(struct kvm_lapic *apic) 1835{ 1836 WARN_ON(preemptible()); 1837 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1838 kvm_x86_ops.cancel_hv_timer(apic->vcpu); 1839 apic->lapic_timer.hv_timer_in_use = false; 1840} 1841 1842static bool start_hv_timer(struct kvm_lapic *apic) 1843{ 1844 struct kvm_timer *ktimer = &apic->lapic_timer; 1845 struct kvm_vcpu *vcpu = apic->vcpu; 1846 bool expired; 1847 1848 WARN_ON(preemptible()); 1849 if (!kvm_can_use_hv_timer(vcpu)) 1850 return false; 1851 1852 if (!ktimer->tscdeadline) 1853 return false; 1854 1855 if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) 1856 return false; 1857 1858 ktimer->hv_timer_in_use = true; 1859 hrtimer_cancel(&ktimer->timer); 1860 1861 /* 1862 * To simplify handling the periodic timer, leave the hv timer running 1863 * even if the deadline timer has expired, i.e. rely on the resulting 1864 * VM-Exit to recompute the periodic timer's target expiration. 1865 */ 1866 if (!apic_lvtt_period(apic)) { 1867 /* 1868 * Cancel the hv timer if the sw timer fired while the hv timer 1869 * was being programmed, or if the hv timer itself expired. 1870 */ 1871 if (atomic_read(&ktimer->pending)) { 1872 cancel_hv_timer(apic); 1873 } else if (expired) { 1874 apic_timer_expired(apic, false); 1875 cancel_hv_timer(apic); 1876 } 1877 } 1878 1879 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); 1880 1881 return true; 1882} 1883 1884static void start_sw_timer(struct kvm_lapic *apic) 1885{ 1886 struct kvm_timer *ktimer = &apic->lapic_timer; 1887 1888 WARN_ON(preemptible()); 1889 if (apic->lapic_timer.hv_timer_in_use) 1890 cancel_hv_timer(apic); 1891 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) 1892 return; 1893 1894 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1895 start_sw_period(apic); 1896 else if (apic_lvtt_tscdeadline(apic)) 1897 start_sw_tscdeadline(apic); 1898 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); 1899} 1900 1901static void restart_apic_timer(struct kvm_lapic *apic) 1902{ 1903 preempt_disable(); 1904 1905 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) 1906 goto out; 1907 1908 if (!start_hv_timer(apic)) 1909 start_sw_timer(apic); 1910out: 1911 preempt_enable(); 1912} 1913 1914void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 1915{ 1916 struct kvm_lapic *apic = vcpu->arch.apic; 1917 1918 preempt_disable(); 1919 /* If the preempt notifier has already run, it also called apic_timer_expired */ 1920 if (!apic->lapic_timer.hv_timer_in_use) 1921 goto out; 1922 WARN_ON(rcuwait_active(&vcpu->wait)); 1923 apic_timer_expired(apic, false); 1924 cancel_hv_timer(apic); 1925 1926 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1927 advance_periodic_target_expiration(apic); 1928 restart_apic_timer(apic); 1929 } 1930out: 1931 preempt_enable(); 1932} 1933EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 1934 1935void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) 1936{ 1937 restart_apic_timer(vcpu->arch.apic); 1938} 1939EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); 1940 1941void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) 1942{ 1943 struct kvm_lapic *apic = vcpu->arch.apic; 1944 1945 preempt_disable(); 1946 /* Possibly the TSC deadline timer is not enabled yet */ 1947 if (apic->lapic_timer.hv_timer_in_use) 1948 start_sw_timer(apic); 1949 preempt_enable(); 1950} 1951EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); 1952 1953void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) 1954{ 1955 struct kvm_lapic *apic = vcpu->arch.apic; 1956 1957 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1958 restart_apic_timer(apic); 1959} 1960 1961static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) 1962{ 1963 atomic_set(&apic->lapic_timer.pending, 0); 1964 1965 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1966 && !set_target_expiration(apic, count_reg)) 1967 return; 1968 1969 restart_apic_timer(apic); 1970} 1971 1972static void start_apic_timer(struct kvm_lapic *apic) 1973{ 1974 __start_apic_timer(apic, APIC_TMICT); 1975} 1976 1977static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1978{ 1979 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); 1980 1981 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 1982 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 1983 if (lvt0_in_nmi_mode) { 1984 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1985 } else 1986 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1987 } 1988} 1989 1990int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1991{ 1992 int ret = 0; 1993 1994 trace_kvm_apic_write(reg, val); 1995 1996 switch (reg) { 1997 case APIC_ID: /* Local APIC ID */ 1998 if (!apic_x2apic_mode(apic)) 1999 kvm_apic_set_xapic_id(apic, val >> 24); 2000 else 2001 ret = 1; 2002 break; 2003 2004 case APIC_TASKPRI: 2005 report_tpr_access(apic, true); 2006 apic_set_tpr(apic, val & 0xff); 2007 break; 2008 2009 case APIC_EOI: 2010 apic_set_eoi(apic); 2011 break; 2012 2013 case APIC_LDR: 2014 if (!apic_x2apic_mode(apic)) 2015 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 2016 else 2017 ret = 1; 2018 break; 2019 2020 case APIC_DFR: 2021 if (!apic_x2apic_mode(apic)) 2022 kvm_apic_set_dfr(apic, val | 0x0FFFFFFF); 2023 else 2024 ret = 1; 2025 break; 2026 2027 case APIC_SPIV: { 2028 u32 mask = 0x3ff; 2029 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 2030 mask |= APIC_SPIV_DIRECTED_EOI; 2031 apic_set_spiv(apic, val & mask); 2032 if (!(val & APIC_SPIV_APIC_ENABLED)) { 2033 int i; 2034 u32 lvt_val; 2035 2036 for (i = 0; i < KVM_APIC_LVT_NUM; i++) { 2037 lvt_val = kvm_lapic_get_reg(apic, 2038 APIC_LVTT + 0x10 * i); 2039 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, 2040 lvt_val | APIC_LVT_MASKED); 2041 } 2042 apic_update_lvtt(apic); 2043 atomic_set(&apic->lapic_timer.pending, 0); 2044 2045 } 2046 break; 2047 } 2048 case APIC_ICR: 2049 /* No delay here, so we always clear the pending bit */ 2050 val &= ~(1 << 12); 2051 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); 2052 kvm_lapic_set_reg(apic, APIC_ICR, val); 2053 break; 2054 2055 case APIC_ICR2: 2056 if (!apic_x2apic_mode(apic)) 2057 val &= 0xff000000; 2058 kvm_lapic_set_reg(apic, APIC_ICR2, val); 2059 break; 2060 2061 case APIC_LVT0: 2062 apic_manage_nmi_watchdog(apic, val); 2063 fallthrough; 2064 case APIC_LVTTHMR: 2065 case APIC_LVTPC: 2066 case APIC_LVT1: 2067 case APIC_LVTERR: { 2068 /* TODO: Check vector */ 2069 size_t size; 2070 u32 index; 2071 2072 if (!kvm_apic_sw_enabled(apic)) 2073 val |= APIC_LVT_MASKED; 2074 size = ARRAY_SIZE(apic_lvt_mask); 2075 index = array_index_nospec( 2076 (reg - APIC_LVTT) >> 4, size); 2077 val &= apic_lvt_mask[index]; 2078 kvm_lapic_set_reg(apic, reg, val); 2079 break; 2080 } 2081 2082 case APIC_LVTT: 2083 if (!kvm_apic_sw_enabled(apic)) 2084 val |= APIC_LVT_MASKED; 2085 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 2086 kvm_lapic_set_reg(apic, APIC_LVTT, val); 2087 apic_update_lvtt(apic); 2088 break; 2089 2090 case APIC_TMICT: 2091 if (apic_lvtt_tscdeadline(apic)) 2092 break; 2093 2094 hrtimer_cancel(&apic->lapic_timer.timer); 2095 kvm_lapic_set_reg(apic, APIC_TMICT, val); 2096 start_apic_timer(apic); 2097 break; 2098 2099 case APIC_TDCR: { 2100 uint32_t old_divisor = apic->divide_count; 2101 2102 kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb); 2103 update_divide_count(apic); 2104 if (apic->divide_count != old_divisor && 2105 apic->lapic_timer.period) { 2106 hrtimer_cancel(&apic->lapic_timer.timer); 2107 update_target_expiration(apic, old_divisor); 2108 restart_apic_timer(apic); 2109 } 2110 break; 2111 } 2112 case APIC_ESR: 2113 if (apic_x2apic_mode(apic) && val != 0) 2114 ret = 1; 2115 break; 2116 2117 case APIC_SELF_IPI: 2118 /* 2119 * Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold 2120 * the vector, everything else is reserved. 2121 */ 2122 if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK)) 2123 ret = 1; 2124 else 2125 kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0); 2126 break; 2127 default: 2128 ret = 1; 2129 break; 2130 } 2131 2132 kvm_recalculate_apic_map(apic->vcpu->kvm); 2133 2134 return ret; 2135} 2136EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); 2137 2138static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 2139 gpa_t address, int len, const void *data) 2140{ 2141 struct kvm_lapic *apic = to_lapic(this); 2142 unsigned int offset = address - apic->base_address; 2143 u32 val; 2144 2145 if (!apic_mmio_in_range(apic, address)) 2146 return -EOPNOTSUPP; 2147 2148 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 2149 if (!kvm_check_has_quirk(vcpu->kvm, 2150 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 2151 return -EOPNOTSUPP; 2152 2153 return 0; 2154 } 2155 2156 /* 2157 * APIC register must be aligned on 128-bits boundary. 2158 * 32/64/128 bits registers must be accessed thru 32 bits. 2159 * Refer SDM 8.4.1 2160 */ 2161 if (len != 4 || (offset & 0xf)) 2162 return 0; 2163 2164 val = *(u32*)data; 2165 2166 kvm_lapic_reg_write(apic, offset & 0xff0, val); 2167 2168 return 0; 2169} 2170 2171void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 2172{ 2173 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 2174} 2175EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 2176 2177/* emulate APIC access in a trap manner */ 2178void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2179{ 2180 u32 val = 0; 2181 2182 /* hw has done the conditional check and inst decode */ 2183 offset &= 0xff0; 2184 2185 kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); 2186 2187 /* TODO: optimize to just emulate side effect w/o one more write */ 2188 kvm_lapic_reg_write(vcpu->arch.apic, offset, val); 2189} 2190EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 2191 2192void kvm_free_lapic(struct kvm_vcpu *vcpu) 2193{ 2194 struct kvm_lapic *apic = vcpu->arch.apic; 2195 2196 if (!vcpu->arch.apic) 2197 return; 2198 2199 hrtimer_cancel(&apic->lapic_timer.timer); 2200 2201 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 2202 static_key_slow_dec_deferred(&apic_hw_disabled); 2203 2204 if (!apic->sw_enabled) 2205 static_key_slow_dec_deferred(&apic_sw_disabled); 2206 2207 if (apic->regs) 2208 free_page((unsigned long)apic->regs); 2209 2210 kfree(apic); 2211} 2212 2213/* 2214 *---------------------------------------------------------------------- 2215 * LAPIC interface 2216 *---------------------------------------------------------------------- 2217 */ 2218u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 2219{ 2220 struct kvm_lapic *apic = vcpu->arch.apic; 2221 2222 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) 2223 return 0; 2224 2225 return apic->lapic_timer.tscdeadline; 2226} 2227 2228void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 2229{ 2230 struct kvm_lapic *apic = vcpu->arch.apic; 2231 2232 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) 2233 return; 2234 2235 hrtimer_cancel(&apic->lapic_timer.timer); 2236 apic->lapic_timer.tscdeadline = data; 2237 start_apic_timer(apic); 2238} 2239 2240void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 2241{ 2242 apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); 2243} 2244 2245u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 2246{ 2247 u64 tpr; 2248 2249 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 2250 2251 return (tpr & 0xf0) >> 4; 2252} 2253 2254void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 2255{ 2256 u64 old_value = vcpu->arch.apic_base; 2257 struct kvm_lapic *apic = vcpu->arch.apic; 2258 2259 if (!apic) 2260 value |= MSR_IA32_APICBASE_BSP; 2261 2262 vcpu->arch.apic_base = value; 2263 2264 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) 2265 kvm_update_cpuid_runtime(vcpu); 2266 2267 if (!apic) 2268 return; 2269 2270 /* update jump label if enable bit changes */ 2271 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 2272 if (value & MSR_IA32_APICBASE_ENABLE) { 2273 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2274 static_key_slow_dec_deferred(&apic_hw_disabled); 2275 /* Check if there are APF page ready requests pending */ 2276 kvm_make_request(KVM_REQ_APF_READY, vcpu); 2277 } else { 2278 static_key_slow_inc(&apic_hw_disabled.key); 2279 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2280 } 2281 } 2282 2283 if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) 2284 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); 2285 2286 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) 2287 kvm_x86_ops.set_virtual_apic_mode(vcpu); 2288 2289 apic->base_address = apic->vcpu->arch.apic_base & 2290 MSR_IA32_APICBASE_BASE; 2291 2292 if ((value & MSR_IA32_APICBASE_ENABLE) && 2293 apic->base_address != APIC_DEFAULT_PHYS_BASE) 2294 pr_warn_once("APIC base relocation is unsupported by KVM"); 2295} 2296 2297void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) 2298{ 2299 struct kvm_lapic *apic = vcpu->arch.apic; 2300 2301 if (vcpu->arch.apicv_active) { 2302 /* irr_pending is always true when apicv is activated. */ 2303 apic->irr_pending = true; 2304 apic->isr_count = 1; 2305 } else { 2306 apic->irr_pending = (apic_search_irr(apic) != -1); 2307 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 2308 } 2309} 2310EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); 2311 2312void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2313{ 2314 struct kvm_lapic *apic = vcpu->arch.apic; 2315 int i; 2316 2317 if (!apic) 2318 return; 2319 2320 /* Stop the timer in case it's a reset to an active apic */ 2321 hrtimer_cancel(&apic->lapic_timer.timer); 2322 2323 if (!init_event) { 2324 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | 2325 MSR_IA32_APICBASE_ENABLE); 2326 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2327 } 2328 kvm_apic_set_version(apic->vcpu); 2329 2330 for (i = 0; i < KVM_APIC_LVT_NUM; i++) 2331 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 2332 apic_update_lvtt(apic); 2333 if (kvm_vcpu_is_reset_bsp(vcpu) && 2334 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 2335 kvm_lapic_set_reg(apic, APIC_LVT0, 2336 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 2337 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2338 2339 kvm_apic_set_dfr(apic, 0xffffffffU); 2340 apic_set_spiv(apic, 0xff); 2341 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); 2342 if (!apic_x2apic_mode(apic)) 2343 kvm_apic_set_ldr(apic, 0); 2344 kvm_lapic_set_reg(apic, APIC_ESR, 0); 2345 kvm_lapic_set_reg(apic, APIC_ICR, 0); 2346 kvm_lapic_set_reg(apic, APIC_ICR2, 0); 2347 kvm_lapic_set_reg(apic, APIC_TDCR, 0); 2348 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 2349 for (i = 0; i < 8; i++) { 2350 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 2351 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 2352 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 2353 } 2354 kvm_apic_update_apicv(vcpu); 2355 apic->highest_isr_cache = -1; 2356 update_divide_count(apic); 2357 atomic_set(&apic->lapic_timer.pending, 0); 2358 if (kvm_vcpu_is_bsp(vcpu)) 2359 kvm_lapic_set_base(vcpu, 2360 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 2361 vcpu->arch.pv_eoi.msr_val = 0; 2362 apic_update_ppr(apic); 2363 if (vcpu->arch.apicv_active) { 2364 kvm_x86_ops.apicv_post_state_restore(vcpu); 2365 kvm_x86_ops.hwapic_irr_update(vcpu, -1); 2366 kvm_x86_ops.hwapic_isr_update(vcpu, -1); 2367 } 2368 2369 vcpu->arch.apic_arb_prio = 0; 2370 vcpu->arch.apic_attention = 0; 2371 2372 kvm_recalculate_apic_map(vcpu->kvm); 2373} 2374 2375/* 2376 *---------------------------------------------------------------------- 2377 * timer interface 2378 *---------------------------------------------------------------------- 2379 */ 2380 2381static bool lapic_is_periodic(struct kvm_lapic *apic) 2382{ 2383 return apic_lvtt_period(apic); 2384} 2385 2386int apic_has_pending_timer(struct kvm_vcpu *vcpu) 2387{ 2388 struct kvm_lapic *apic = vcpu->arch.apic; 2389 2390 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) 2391 return atomic_read(&apic->lapic_timer.pending); 2392 2393 return 0; 2394} 2395 2396int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 2397{ 2398 u32 reg = kvm_lapic_get_reg(apic, lvt_type); 2399 int vector, mode, trig_mode; 2400 int r; 2401 2402 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 2403 vector = reg & APIC_VECTOR_MASK; 2404 mode = reg & APIC_MODE_MASK; 2405 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 2406 2407 r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); 2408 if (r && lvt_type == APIC_LVTPC) 2409 kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); 2410 return r; 2411 } 2412 return 0; 2413} 2414 2415void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 2416{ 2417 struct kvm_lapic *apic = vcpu->arch.apic; 2418 2419 if (apic) 2420 kvm_apic_local_deliver(apic, APIC_LVT0); 2421} 2422 2423static const struct kvm_io_device_ops apic_mmio_ops = { 2424 .read = apic_mmio_read, 2425 .write = apic_mmio_write, 2426}; 2427 2428static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 2429{ 2430 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 2431 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 2432 2433 apic_timer_expired(apic, true); 2434 2435 if (lapic_is_periodic(apic)) { 2436 advance_periodic_target_expiration(apic); 2437 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 2438 return HRTIMER_RESTART; 2439 } else 2440 return HRTIMER_NORESTART; 2441} 2442 2443int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) 2444{ 2445 struct kvm_lapic *apic; 2446 2447 ASSERT(vcpu != NULL); 2448 2449 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2450 if (!apic) 2451 goto nomem; 2452 2453 vcpu->arch.apic = apic; 2454 2455 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 2456 if (!apic->regs) { 2457 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 2458 vcpu->vcpu_id); 2459 goto nomem_free_apic; 2460 } 2461 apic->vcpu = vcpu; 2462 2463 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2464 HRTIMER_MODE_ABS_HARD); 2465 apic->lapic_timer.timer.function = apic_timer_fn; 2466 if (timer_advance_ns == -1) { 2467 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 2468 lapic_timer_advance_dynamic = true; 2469 } else { 2470 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 2471 lapic_timer_advance_dynamic = false; 2472 } 2473 2474 /* 2475 * APIC is created enabled. This will prevent kvm_lapic_set_base from 2476 * thinking that APIC state has changed. 2477 */ 2478 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2479 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2480 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2481 2482 return 0; 2483nomem_free_apic: 2484 kfree(apic); 2485 vcpu->arch.apic = NULL; 2486nomem: 2487 return -ENOMEM; 2488} 2489 2490int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 2491{ 2492 struct kvm_lapic *apic = vcpu->arch.apic; 2493 u32 ppr; 2494 2495 if (!kvm_apic_present(vcpu)) 2496 return -1; 2497 2498 __apic_update_ppr(apic, &ppr); 2499 return apic_has_interrupt_for_ppr(apic, ppr); 2500} 2501EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt); 2502 2503int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 2504{ 2505 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); 2506 2507 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 2508 return 1; 2509 if ((lvt0 & APIC_LVT_MASKED) == 0 && 2510 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 2511 return 1; 2512 return 0; 2513} 2514 2515void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 2516{ 2517 struct kvm_lapic *apic = vcpu->arch.apic; 2518 2519 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2520 kvm_apic_inject_pending_timer_irqs(apic); 2521 atomic_set(&apic->lapic_timer.pending, 0); 2522 } 2523} 2524 2525int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 2526{ 2527 int vector = kvm_apic_has_interrupt(vcpu); 2528 struct kvm_lapic *apic = vcpu->arch.apic; 2529 u32 ppr; 2530 2531 if (vector == -1) 2532 return -1; 2533 2534 /* 2535 * We get here even with APIC virtualization enabled, if doing 2536 * nested virtualization and L1 runs with the "acknowledge interrupt 2537 * on exit" mode. Then we cannot inject the interrupt via RVI, 2538 * because the process would deliver it through the IDT. 2539 */ 2540 2541 apic_clear_irr(vector, apic); 2542 if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { 2543 /* 2544 * For auto-EOI interrupts, there might be another pending 2545 * interrupt above PPR, so check whether to raise another 2546 * KVM_REQ_EVENT. 2547 */ 2548 apic_update_ppr(apic); 2549 } else { 2550 /* 2551 * For normal interrupts, PPR has been raised and there cannot 2552 * be a higher-priority pending interrupt---except if there was 2553 * a concurrent interrupt injection, but that would have 2554 * triggered KVM_REQ_EVENT already. 2555 */ 2556 apic_set_isr(vector, apic); 2557 __apic_update_ppr(apic, &ppr); 2558 } 2559 2560 return vector; 2561} 2562 2563static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, 2564 struct kvm_lapic_state *s, bool set) 2565{ 2566 if (apic_x2apic_mode(vcpu->arch.apic)) { 2567 u32 *id = (u32 *)(s->regs + APIC_ID); 2568 u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2569 2570 if (vcpu->kvm->arch.x2apic_format) { 2571 if (*id != vcpu->vcpu_id) 2572 return -EINVAL; 2573 } else { 2574 if (set) 2575 *id >>= 24; 2576 else 2577 *id <<= 24; 2578 } 2579 2580 /* In x2APIC mode, the LDR is fixed and based on the id */ 2581 if (set) 2582 *ldr = kvm_apic_calc_x2apic_ldr(*id); 2583 } 2584 2585 return 0; 2586} 2587 2588int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2589{ 2590 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); 2591 2592 /* 2593 * Get calculated timer current count for remaining timer period (if 2594 * any) and store it in the returned register set. 2595 */ 2596 __kvm_lapic_set_reg(s->regs, APIC_TMCCT, 2597 __apic_read(vcpu->arch.apic, APIC_TMCCT)); 2598 2599 return kvm_apic_state_fixup(vcpu, s, false); 2600} 2601 2602int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2603{ 2604 struct kvm_lapic *apic = vcpu->arch.apic; 2605 int r; 2606 2607 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 2608 /* set SPIV separately to get count of SW disabled APICs right */ 2609 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 2610 2611 r = kvm_apic_state_fixup(vcpu, s, true); 2612 if (r) { 2613 kvm_recalculate_apic_map(vcpu->kvm); 2614 return r; 2615 } 2616 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); 2617 2618 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2619 kvm_recalculate_apic_map(vcpu->kvm); 2620 kvm_apic_set_version(vcpu); 2621 2622 apic_update_ppr(apic); 2623 hrtimer_cancel(&apic->lapic_timer.timer); 2624 apic_update_lvtt(apic); 2625 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2626 update_divide_count(apic); 2627 __start_apic_timer(apic, APIC_TMCCT); 2628 kvm_apic_update_apicv(vcpu); 2629 apic->highest_isr_cache = -1; 2630 if (vcpu->arch.apicv_active) { 2631 kvm_x86_ops.apicv_post_state_restore(vcpu); 2632 kvm_x86_ops.hwapic_irr_update(vcpu, 2633 apic_find_highest_irr(apic)); 2634 kvm_x86_ops.hwapic_isr_update(vcpu, 2635 apic_find_highest_isr(apic)); 2636 } 2637 kvm_make_request(KVM_REQ_EVENT, vcpu); 2638 if (ioapic_in_kernel(vcpu->kvm)) 2639 kvm_rtc_eoi_tracking_restore_one(vcpu); 2640 2641 vcpu->arch.apic_arb_prio = 0; 2642 2643 return 0; 2644} 2645 2646void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 2647{ 2648 struct hrtimer *timer; 2649 2650 if (!lapic_in_kernel(vcpu) || 2651 kvm_can_post_timer_interrupt(vcpu)) 2652 return; 2653 2654 timer = &vcpu->arch.apic->lapic_timer.timer; 2655 if (hrtimer_cancel(timer)) 2656 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD); 2657} 2658 2659/* 2660 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 2661 * 2662 * Detect whether guest triggered PV EOI since the 2663 * last entry. If yes, set EOI on guests's behalf. 2664 * Clear PV EOI in guest memory in any case. 2665 */ 2666static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 2667 struct kvm_lapic *apic) 2668{ 2669 bool pending; 2670 int vector; 2671 /* 2672 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 2673 * and KVM_PV_EOI_ENABLED in guest memory as follows: 2674 * 2675 * KVM_APIC_PV_EOI_PENDING is unset: 2676 * -> host disabled PV EOI. 2677 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 2678 * -> host enabled PV EOI, guest did not execute EOI yet. 2679 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 2680 * -> host enabled PV EOI, guest executed EOI. 2681 */ 2682 BUG_ON(!pv_eoi_enabled(vcpu)); 2683 pending = pv_eoi_get_pending(vcpu); 2684 /* 2685 * Clear pending bit in any case: it will be set again on vmentry. 2686 * While this might not be ideal from performance point of view, 2687 * this makes sure pv eoi is only enabled when we know it's safe. 2688 */ 2689 pv_eoi_clr_pending(vcpu); 2690 if (pending) 2691 return; 2692 vector = apic_set_eoi(apic); 2693 trace_kvm_pv_eoi(apic, vector); 2694} 2695 2696void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 2697{ 2698 u32 data; 2699 2700 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 2701 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 2702 2703 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2704 return; 2705 2706 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2707 sizeof(u32))) 2708 return; 2709 2710 apic_set_tpr(vcpu->arch.apic, data & 0xff); 2711} 2712 2713/* 2714 * apic_sync_pv_eoi_to_guest - called before vmentry 2715 * 2716 * Detect whether it's safe to enable PV EOI and 2717 * if yes do so. 2718 */ 2719static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 2720 struct kvm_lapic *apic) 2721{ 2722 if (!pv_eoi_enabled(vcpu) || 2723 /* IRR set or many bits in ISR: could be nested. */ 2724 apic->irr_pending || 2725 /* Cache not set: could be safe but we don't bother. */ 2726 apic->highest_isr_cache == -1 || 2727 /* Need EOI to update ioapic. */ 2728 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { 2729 /* 2730 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 2731 * so we need not do anything here. 2732 */ 2733 return; 2734 } 2735 2736 pv_eoi_set_pending(apic->vcpu); 2737} 2738 2739void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 2740{ 2741 u32 data, tpr; 2742 int max_irr, max_isr; 2743 struct kvm_lapic *apic = vcpu->arch.apic; 2744 2745 apic_sync_pv_eoi_to_guest(vcpu, apic); 2746 2747 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2748 return; 2749 2750 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff; 2751 max_irr = apic_find_highest_irr(apic); 2752 if (max_irr < 0) 2753 max_irr = 0; 2754 max_isr = apic_find_highest_isr(apic); 2755 if (max_isr < 0) 2756 max_isr = 0; 2757 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 2758 2759 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2760 sizeof(u32)); 2761} 2762 2763int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 2764{ 2765 if (vapic_addr) { 2766 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2767 &vcpu->arch.apic->vapic_cache, 2768 vapic_addr, sizeof(u32))) 2769 return -EINVAL; 2770 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2771 } else { 2772 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2773 } 2774 2775 vcpu->arch.apic->vapic_addr = vapic_addr; 2776 return 0; 2777} 2778 2779int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2780{ 2781 struct kvm_lapic *apic = vcpu->arch.apic; 2782 u32 reg = (msr - APIC_BASE_MSR) << 4; 2783 2784 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2785 return 1; 2786 2787 if (reg == APIC_ICR2) 2788 return 1; 2789 2790 /* if this is ICR write vector before command */ 2791 if (reg == APIC_ICR) 2792 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2793 return kvm_lapic_reg_write(apic, reg, (u32)data); 2794} 2795 2796int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 2797{ 2798 struct kvm_lapic *apic = vcpu->arch.apic; 2799 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 2800 2801 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2802 return 1; 2803 2804 if (reg == APIC_DFR || reg == APIC_ICR2) 2805 return 1; 2806 2807 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2808 return 1; 2809 if (reg == APIC_ICR) 2810 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2811 2812 *data = (((u64)high) << 32) | low; 2813 2814 return 0; 2815} 2816 2817int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 2818{ 2819 struct kvm_lapic *apic = vcpu->arch.apic; 2820 2821 if (!lapic_in_kernel(vcpu)) 2822 return 1; 2823 2824 /* if this is ICR write vector before command */ 2825 if (reg == APIC_ICR) 2826 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2827 return kvm_lapic_reg_write(apic, reg, (u32)data); 2828} 2829 2830int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 2831{ 2832 struct kvm_lapic *apic = vcpu->arch.apic; 2833 u32 low, high = 0; 2834 2835 if (!lapic_in_kernel(vcpu)) 2836 return 1; 2837 2838 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2839 return 1; 2840 if (reg == APIC_ICR) 2841 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2842 2843 *data = (((u64)high) << 32) | low; 2844 2845 return 0; 2846} 2847 2848int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) 2849{ 2850 u64 addr = data & ~KVM_MSR_ENABLED; 2851 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; 2852 unsigned long new_len; 2853 2854 if (!IS_ALIGNED(addr, 4)) 2855 return 1; 2856 2857 vcpu->arch.pv_eoi.msr_val = data; 2858 if (!pv_eoi_enabled(vcpu)) 2859 return 0; 2860 2861 if (addr == ghc->gpa && len <= ghc->len) 2862 new_len = ghc->len; 2863 else 2864 new_len = len; 2865 2866 return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); 2867} 2868 2869void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 2870{ 2871 struct kvm_lapic *apic = vcpu->arch.apic; 2872 u8 sipi_vector; 2873 unsigned long pe; 2874 2875 if (!lapic_in_kernel(vcpu) || !apic->pending_events) 2876 return; 2877 2878 /* 2879 * INITs are latched while CPU is in specific states 2880 * (SMM, VMX non-root mode, SVM with GIF=0). 2881 * Because a CPU cannot be in these states immediately 2882 * after it has processed an INIT signal (and thus in 2883 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs 2884 * and leave the INIT pending. 2885 */ 2886 if (kvm_vcpu_latch_init(vcpu)) { 2887 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); 2888 if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) 2889 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 2890 return; 2891 } 2892 2893 pe = xchg(&apic->pending_events, 0); 2894 if (test_bit(KVM_APIC_INIT, &pe)) { 2895 kvm_vcpu_reset(vcpu, true); 2896 if (kvm_vcpu_is_bsp(apic->vcpu)) 2897 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2898 else 2899 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 2900 } 2901 if (test_bit(KVM_APIC_SIPI, &pe) && 2902 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 2903 /* evaluate pending_events before reading the vector */ 2904 smp_rmb(); 2905 sipi_vector = apic->sipi_vector; 2906 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 2907 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2908 } 2909} 2910 2911void kvm_lapic_init(void) 2912{ 2913 /* do not patch jump label more than once per second */ 2914 jump_label_rate_limit(&apic_hw_disabled, HZ); 2915 jump_label_rate_limit(&apic_sw_disabled, HZ); 2916} 2917 2918void kvm_lapic_exit(void) 2919{ 2920 static_key_deferred_flush(&apic_hw_disabled); 2921 static_key_deferred_flush(&apic_sw_disabled); 2922} 2923