162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Kernel-based Virtual Machine driver for Linux 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This module enables machines with Intel VT-x extensions to run virtual 662306a36Sopenharmony_ci * machines without emulation or binary translation. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright (C) 2006 Qumranet, Inc. 962306a36Sopenharmony_ci * Copyright 2010 Red Hat, Inc. and/or its affiliates. 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Authors: 1262306a36Sopenharmony_ci * Avi Kivity <avi@qumranet.com> 1362306a36Sopenharmony_ci * Yaniv Kamay <yaniv@qumranet.com> 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include <linux/highmem.h> 1862306a36Sopenharmony_ci#include <linux/hrtimer.h> 1962306a36Sopenharmony_ci#include <linux/kernel.h> 2062306a36Sopenharmony_ci#include <linux/kvm_host.h> 2162306a36Sopenharmony_ci#include <linux/module.h> 2262306a36Sopenharmony_ci#include <linux/moduleparam.h> 2362306a36Sopenharmony_ci#include <linux/mod_devicetable.h> 2462306a36Sopenharmony_ci#include <linux/mm.h> 2562306a36Sopenharmony_ci#include <linux/objtool.h> 2662306a36Sopenharmony_ci#include <linux/sched.h> 2762306a36Sopenharmony_ci#include <linux/sched/smt.h> 2862306a36Sopenharmony_ci#include <linux/slab.h> 2962306a36Sopenharmony_ci#include <linux/tboot.h> 3062306a36Sopenharmony_ci#include <linux/trace_events.h> 3162306a36Sopenharmony_ci#include <linux/entry-kvm.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include <asm/apic.h> 3462306a36Sopenharmony_ci#include <asm/asm.h> 3562306a36Sopenharmony_ci#include <asm/cpu.h> 3662306a36Sopenharmony_ci#include <asm/cpu_device_id.h> 3762306a36Sopenharmony_ci#include <asm/debugreg.h> 3862306a36Sopenharmony_ci#include <asm/desc.h> 3962306a36Sopenharmony_ci#include <asm/fpu/api.h> 4062306a36Sopenharmony_ci#include <asm/fpu/xstate.h> 4162306a36Sopenharmony_ci#include <asm/idtentry.h> 4262306a36Sopenharmony_ci#include <asm/io.h> 4362306a36Sopenharmony_ci#include <asm/irq_remapping.h> 4462306a36Sopenharmony_ci#include <asm/reboot.h> 4562306a36Sopenharmony_ci#include <asm/perf_event.h> 4662306a36Sopenharmony_ci#include <asm/mmu_context.h> 4762306a36Sopenharmony_ci#include <asm/mshyperv.h> 4862306a36Sopenharmony_ci#include <asm/mwait.h> 4962306a36Sopenharmony_ci#include <asm/spec-ctrl.h> 5062306a36Sopenharmony_ci#include <asm/vmx.h> 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#include "capabilities.h" 5362306a36Sopenharmony_ci#include "cpuid.h" 5462306a36Sopenharmony_ci#include "hyperv.h" 5562306a36Sopenharmony_ci#include "kvm_onhyperv.h" 5662306a36Sopenharmony_ci#include "irq.h" 5762306a36Sopenharmony_ci#include "kvm_cache_regs.h" 5862306a36Sopenharmony_ci#include "lapic.h" 5962306a36Sopenharmony_ci#include "mmu.h" 6062306a36Sopenharmony_ci#include "nested.h" 6162306a36Sopenharmony_ci#include "pmu.h" 6262306a36Sopenharmony_ci#include "sgx.h" 6362306a36Sopenharmony_ci#include "trace.h" 6462306a36Sopenharmony_ci#include "vmcs.h" 6562306a36Sopenharmony_ci#include "vmcs12.h" 6662306a36Sopenharmony_ci#include "vmx.h" 6762306a36Sopenharmony_ci#include "x86.h" 6862306a36Sopenharmony_ci#include "smm.h" 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ciMODULE_AUTHOR("Qumranet"); 7162306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci#ifdef MODULE 7462306a36Sopenharmony_cistatic const struct x86_cpu_id vmx_cpu_id[] = { 7562306a36Sopenharmony_ci X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL), 7662306a36Sopenharmony_ci {} 7762306a36Sopenharmony_ci}; 7862306a36Sopenharmony_ciMODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); 7962306a36Sopenharmony_ci#endif 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_cibool __read_mostly enable_vpid = 1; 8262306a36Sopenharmony_cimodule_param_named(vpid, enable_vpid, bool, 0444); 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cistatic bool __read_mostly enable_vnmi = 1; 8562306a36Sopenharmony_cimodule_param_named(vnmi, enable_vnmi, bool, S_IRUGO); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cibool __read_mostly flexpriority_enabled = 1; 8862306a36Sopenharmony_cimodule_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cibool __read_mostly enable_ept = 1; 9162306a36Sopenharmony_cimodule_param_named(ept, enable_ept, bool, S_IRUGO); 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_cibool __read_mostly enable_unrestricted_guest = 1; 9462306a36Sopenharmony_cimodule_param_named(unrestricted_guest, 9562306a36Sopenharmony_ci enable_unrestricted_guest, bool, S_IRUGO); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_cibool __read_mostly enable_ept_ad_bits = 1; 9862306a36Sopenharmony_cimodule_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic bool __read_mostly emulate_invalid_guest_state = true; 10162306a36Sopenharmony_cimodule_param(emulate_invalid_guest_state, bool, S_IRUGO); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_cistatic bool __read_mostly fasteoi = 1; 10462306a36Sopenharmony_cimodule_param(fasteoi, bool, S_IRUGO); 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cimodule_param(enable_apicv, bool, S_IRUGO); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_cibool __read_mostly enable_ipiv = true; 10962306a36Sopenharmony_cimodule_param(enable_ipiv, bool, 0444); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci/* 11262306a36Sopenharmony_ci * If nested=1, nested virtualization is supported, i.e., guests may use 11362306a36Sopenharmony_ci * VMX and be a hypervisor for its own guests. If nested=0, guests may not 11462306a36Sopenharmony_ci * use VMX instructions. 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_cistatic bool __read_mostly nested = 1; 11762306a36Sopenharmony_cimodule_param(nested, bool, S_IRUGO); 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_cibool __read_mostly enable_pml = 1; 12062306a36Sopenharmony_cimodule_param_named(pml, enable_pml, bool, S_IRUGO); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_cistatic bool __read_mostly error_on_inconsistent_vmcs_config = true; 12362306a36Sopenharmony_cimodule_param(error_on_inconsistent_vmcs_config, bool, 0444); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic bool __read_mostly dump_invalid_vmcs = 0; 12662306a36Sopenharmony_cimodule_param(dump_invalid_vmcs, bool, 0644); 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci#define MSR_BITMAP_MODE_X2APIC 1 12962306a36Sopenharmony_ci#define MSR_BITMAP_MODE_X2APIC_APICV 2 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ 13462306a36Sopenharmony_cistatic int __read_mostly cpu_preemption_timer_multi; 13562306a36Sopenharmony_cistatic bool __read_mostly enable_preemption_timer = 1; 13662306a36Sopenharmony_ci#ifdef CONFIG_X86_64 13762306a36Sopenharmony_cimodule_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); 13862306a36Sopenharmony_ci#endif 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ciextern bool __read_mostly allow_smaller_maxphyaddr; 14162306a36Sopenharmony_cimodule_param(allow_smaller_maxphyaddr, bool, S_IRUGO); 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) 14462306a36Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE 14562306a36Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_ON \ 14662306a36Sopenharmony_ci (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE 14962306a36Sopenharmony_ci#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 15062306a36Sopenharmony_ci#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \ 15562306a36Sopenharmony_ci RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \ 15662306a36Sopenharmony_ci RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ 15762306a36Sopenharmony_ci RTIT_STATUS_BYTECNT)) 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/* 16062306a36Sopenharmony_ci * List of MSRs that can be directly passed to the guest. 16162306a36Sopenharmony_ci * In addition to these x2apic and PT MSRs are handled specially. 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_cistatic u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { 16462306a36Sopenharmony_ci MSR_IA32_SPEC_CTRL, 16562306a36Sopenharmony_ci MSR_IA32_PRED_CMD, 16662306a36Sopenharmony_ci MSR_IA32_FLUSH_CMD, 16762306a36Sopenharmony_ci MSR_IA32_TSC, 16862306a36Sopenharmony_ci#ifdef CONFIG_X86_64 16962306a36Sopenharmony_ci MSR_FS_BASE, 17062306a36Sopenharmony_ci MSR_GS_BASE, 17162306a36Sopenharmony_ci MSR_KERNEL_GS_BASE, 17262306a36Sopenharmony_ci MSR_IA32_XFD, 17362306a36Sopenharmony_ci MSR_IA32_XFD_ERR, 17462306a36Sopenharmony_ci#endif 17562306a36Sopenharmony_ci MSR_IA32_SYSENTER_CS, 17662306a36Sopenharmony_ci MSR_IA32_SYSENTER_ESP, 17762306a36Sopenharmony_ci MSR_IA32_SYSENTER_EIP, 17862306a36Sopenharmony_ci MSR_CORE_C1_RES, 17962306a36Sopenharmony_ci MSR_CORE_C3_RESIDENCY, 18062306a36Sopenharmony_ci MSR_CORE_C6_RESIDENCY, 18162306a36Sopenharmony_ci MSR_CORE_C7_RESIDENCY, 18262306a36Sopenharmony_ci}; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci/* 18562306a36Sopenharmony_ci * These 2 parameters are used to config the controls for Pause-Loop Exiting: 18662306a36Sopenharmony_ci * ple_gap: upper bound on the amount of time between two successive 18762306a36Sopenharmony_ci * executions of PAUSE in a loop. Also indicate if ple enabled. 18862306a36Sopenharmony_ci * According to test, this time is usually smaller than 128 cycles. 18962306a36Sopenharmony_ci * ple_window: upper bound on the amount of time a guest is allowed to execute 19062306a36Sopenharmony_ci * in a PAUSE loop. Tests indicate that most spinlocks are held for 19162306a36Sopenharmony_ci * less than 2^12 cycles 19262306a36Sopenharmony_ci * Time is measured based on a counter that runs at the same rate as the TSC, 19362306a36Sopenharmony_ci * refer SDM volume 3b section 21.6.13 & 22.1.3. 19462306a36Sopenharmony_ci */ 19562306a36Sopenharmony_cistatic unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; 19662306a36Sopenharmony_cimodule_param(ple_gap, uint, 0444); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_cistatic unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 19962306a36Sopenharmony_cimodule_param(ple_window, uint, 0444); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci/* Default doubles per-vcpu window every exit. */ 20262306a36Sopenharmony_cistatic unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; 20362306a36Sopenharmony_cimodule_param(ple_window_grow, uint, 0444); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci/* Default resets per-vcpu window every exit to ple_window. */ 20662306a36Sopenharmony_cistatic unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; 20762306a36Sopenharmony_cimodule_param(ple_window_shrink, uint, 0444); 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci/* Default is to compute the maximum so we can never overflow. */ 21062306a36Sopenharmony_cistatic unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; 21162306a36Sopenharmony_cimodule_param(ple_window_max, uint, 0444); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci/* Default is SYSTEM mode, 1 for host-guest mode */ 21462306a36Sopenharmony_ciint __read_mostly pt_mode = PT_MODE_SYSTEM; 21562306a36Sopenharmony_cimodule_param(pt_mode, int, S_IRUGO); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); 21862306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); 21962306a36Sopenharmony_cistatic DEFINE_MUTEX(vmx_l1d_flush_mutex); 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci/* Storage for pre module init parameter parsing */ 22262306a36Sopenharmony_cistatic enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic const struct { 22562306a36Sopenharmony_ci const char *option; 22662306a36Sopenharmony_ci bool for_parse; 22762306a36Sopenharmony_ci} vmentry_l1d_param[] = { 22862306a36Sopenharmony_ci [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, 22962306a36Sopenharmony_ci [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, 23062306a36Sopenharmony_ci [VMENTER_L1D_FLUSH_COND] = {"cond", true}, 23162306a36Sopenharmony_ci [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, 23262306a36Sopenharmony_ci [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, 23362306a36Sopenharmony_ci [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, 23462306a36Sopenharmony_ci}; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci#define L1D_CACHE_ORDER 4 23762306a36Sopenharmony_cistatic void *vmx_l1d_flush_pages; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_cistatic int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci struct page *page; 24262306a36Sopenharmony_ci unsigned int i; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci if (!boot_cpu_has_bug(X86_BUG_L1TF)) { 24562306a36Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; 24662306a36Sopenharmony_ci return 0; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci if (!enable_ept) { 25062306a36Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; 25162306a36Sopenharmony_ci return 0; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { 25562306a36Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; 25662306a36Sopenharmony_ci return 0; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci /* If set to auto use the default l1tf mitigation method */ 26062306a36Sopenharmony_ci if (l1tf == VMENTER_L1D_FLUSH_AUTO) { 26162306a36Sopenharmony_ci switch (l1tf_mitigation) { 26262306a36Sopenharmony_ci case L1TF_MITIGATION_OFF: 26362306a36Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_NEVER; 26462306a36Sopenharmony_ci break; 26562306a36Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOWARN: 26662306a36Sopenharmony_ci case L1TF_MITIGATION_FLUSH: 26762306a36Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOSMT: 26862306a36Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_COND; 26962306a36Sopenharmony_ci break; 27062306a36Sopenharmony_ci case L1TF_MITIGATION_FULL: 27162306a36Sopenharmony_ci case L1TF_MITIGATION_FULL_FORCE: 27262306a36Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_ALWAYS; 27362306a36Sopenharmony_ci break; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { 27662306a36Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_ALWAYS; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && 28062306a36Sopenharmony_ci !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { 28162306a36Sopenharmony_ci /* 28262306a36Sopenharmony_ci * This allocation for vmx_l1d_flush_pages is not tied to a VM 28362306a36Sopenharmony_ci * lifetime and so should not be charged to a memcg. 28462306a36Sopenharmony_ci */ 28562306a36Sopenharmony_ci page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); 28662306a36Sopenharmony_ci if (!page) 28762306a36Sopenharmony_ci return -ENOMEM; 28862306a36Sopenharmony_ci vmx_l1d_flush_pages = page_address(page); 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* 29162306a36Sopenharmony_ci * Initialize each page with a different pattern in 29262306a36Sopenharmony_ci * order to protect against KSM in the nested 29362306a36Sopenharmony_ci * virtualization case. 29462306a36Sopenharmony_ci */ 29562306a36Sopenharmony_ci for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { 29662306a36Sopenharmony_ci memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, 29762306a36Sopenharmony_ci PAGE_SIZE); 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci l1tf_vmx_mitigation = l1tf; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci if (l1tf != VMENTER_L1D_FLUSH_NEVER) 30462306a36Sopenharmony_ci static_branch_enable(&vmx_l1d_should_flush); 30562306a36Sopenharmony_ci else 30662306a36Sopenharmony_ci static_branch_disable(&vmx_l1d_should_flush); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (l1tf == VMENTER_L1D_FLUSH_COND) 30962306a36Sopenharmony_ci static_branch_enable(&vmx_l1d_flush_cond); 31062306a36Sopenharmony_ci else 31162306a36Sopenharmony_ci static_branch_disable(&vmx_l1d_flush_cond); 31262306a36Sopenharmony_ci return 0; 31362306a36Sopenharmony_ci} 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_cistatic int vmentry_l1d_flush_parse(const char *s) 31662306a36Sopenharmony_ci{ 31762306a36Sopenharmony_ci unsigned int i; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci if (s) { 32062306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { 32162306a36Sopenharmony_ci if (vmentry_l1d_param[i].for_parse && 32262306a36Sopenharmony_ci sysfs_streq(s, vmentry_l1d_param[i].option)) 32362306a36Sopenharmony_ci return i; 32462306a36Sopenharmony_ci } 32562306a36Sopenharmony_ci } 32662306a36Sopenharmony_ci return -EINVAL; 32762306a36Sopenharmony_ci} 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_cistatic int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci int l1tf, ret; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci l1tf = vmentry_l1d_flush_parse(s); 33462306a36Sopenharmony_ci if (l1tf < 0) 33562306a36Sopenharmony_ci return l1tf; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (!boot_cpu_has(X86_BUG_L1TF)) 33862306a36Sopenharmony_ci return 0; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci /* 34162306a36Sopenharmony_ci * Has vmx_init() run already? If not then this is the pre init 34262306a36Sopenharmony_ci * parameter parsing. In that case just store the value and let 34362306a36Sopenharmony_ci * vmx_init() do the proper setup after enable_ept has been 34462306a36Sopenharmony_ci * established. 34562306a36Sopenharmony_ci */ 34662306a36Sopenharmony_ci if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { 34762306a36Sopenharmony_ci vmentry_l1d_flush_param = l1tf; 34862306a36Sopenharmony_ci return 0; 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci mutex_lock(&vmx_l1d_flush_mutex); 35262306a36Sopenharmony_ci ret = vmx_setup_l1d_flush(l1tf); 35362306a36Sopenharmony_ci mutex_unlock(&vmx_l1d_flush_mutex); 35462306a36Sopenharmony_ci return ret; 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cistatic int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) 35862306a36Sopenharmony_ci{ 35962306a36Sopenharmony_ci if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) 36062306a36Sopenharmony_ci return sysfs_emit(s, "???\n"); 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); 36362306a36Sopenharmony_ci} 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_cistatic __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) 36662306a36Sopenharmony_ci{ 36762306a36Sopenharmony_ci u64 msr; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci if (!vmx->disable_fb_clear) 37062306a36Sopenharmony_ci return; 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); 37362306a36Sopenharmony_ci msr |= FB_CLEAR_DIS; 37462306a36Sopenharmony_ci native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); 37562306a36Sopenharmony_ci /* Cache the MSR value to avoid reading it later */ 37662306a36Sopenharmony_ci vmx->msr_ia32_mcu_opt_ctrl = msr; 37762306a36Sopenharmony_ci} 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_cistatic __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) 38062306a36Sopenharmony_ci{ 38162306a36Sopenharmony_ci if (!vmx->disable_fb_clear) 38262306a36Sopenharmony_ci return; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; 38562306a36Sopenharmony_ci native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_cistatic void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci /* 39162306a36Sopenharmony_ci * Disable VERW's behavior of clearing CPU buffers for the guest if the 39262306a36Sopenharmony_ci * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled 39362306a36Sopenharmony_ci * the mitigation. Disabling the clearing behavior provides a 39462306a36Sopenharmony_ci * performance boost for guests that aren't aware that manually clearing 39562306a36Sopenharmony_ci * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry 39662306a36Sopenharmony_ci * and VM-Exit. 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_ci vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && 39962306a36Sopenharmony_ci (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && 40062306a36Sopenharmony_ci !boot_cpu_has_bug(X86_BUG_MDS) && 40162306a36Sopenharmony_ci !boot_cpu_has_bug(X86_BUG_TAA); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci /* 40462306a36Sopenharmony_ci * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS 40562306a36Sopenharmony_ci * at VMEntry. Skip the MSR read/write when a guest has no use case to 40662306a36Sopenharmony_ci * execute VERW. 40762306a36Sopenharmony_ci */ 40862306a36Sopenharmony_ci if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || 40962306a36Sopenharmony_ci ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && 41062306a36Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && 41162306a36Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && 41262306a36Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && 41362306a36Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) 41462306a36Sopenharmony_ci vmx->disable_fb_clear = false; 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_cistatic const struct kernel_param_ops vmentry_l1d_flush_ops = { 41862306a36Sopenharmony_ci .set = vmentry_l1d_flush_set, 41962306a36Sopenharmony_ci .get = vmentry_l1d_flush_get, 42062306a36Sopenharmony_ci}; 42162306a36Sopenharmony_cimodule_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cistatic u32 vmx_segment_access_rights(struct kvm_segment *var); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_civoid vmx_vmexit(void); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci#define vmx_insn_failed(fmt...) \ 42862306a36Sopenharmony_cido { \ 42962306a36Sopenharmony_ci WARN_ONCE(1, fmt); \ 43062306a36Sopenharmony_ci pr_warn_ratelimited(fmt); \ 43162306a36Sopenharmony_ci} while (0) 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_cinoinline void vmread_error(unsigned long field) 43462306a36Sopenharmony_ci{ 43562306a36Sopenharmony_ci vmx_insn_failed("vmread failed: field=%lx\n", field); 43662306a36Sopenharmony_ci} 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT 43962306a36Sopenharmony_cinoinstr void vmread_error_trampoline2(unsigned long field, bool fault) 44062306a36Sopenharmony_ci{ 44162306a36Sopenharmony_ci if (fault) { 44262306a36Sopenharmony_ci kvm_spurious_fault(); 44362306a36Sopenharmony_ci } else { 44462306a36Sopenharmony_ci instrumentation_begin(); 44562306a36Sopenharmony_ci vmread_error(field); 44662306a36Sopenharmony_ci instrumentation_end(); 44762306a36Sopenharmony_ci } 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci#endif 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_cinoinline void vmwrite_error(unsigned long field, unsigned long value) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci vmx_insn_failed("vmwrite failed: field=%lx val=%lx err=%u\n", 45462306a36Sopenharmony_ci field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); 45562306a36Sopenharmony_ci} 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_cinoinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) 45862306a36Sopenharmony_ci{ 45962306a36Sopenharmony_ci vmx_insn_failed("vmclear failed: %p/%llx err=%u\n", 46062306a36Sopenharmony_ci vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR)); 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_cinoinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) 46462306a36Sopenharmony_ci{ 46562306a36Sopenharmony_ci vmx_insn_failed("vmptrld failed: %p/%llx err=%u\n", 46662306a36Sopenharmony_ci vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR)); 46762306a36Sopenharmony_ci} 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_cinoinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) 47062306a36Sopenharmony_ci{ 47162306a36Sopenharmony_ci vmx_insn_failed("invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", 47262306a36Sopenharmony_ci ext, vpid, gva); 47362306a36Sopenharmony_ci} 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_cinoinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", 47862306a36Sopenharmony_ci ext, eptp, gpa); 47962306a36Sopenharmony_ci} 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct vmcs *, vmxarea); 48262306a36Sopenharmony_ciDEFINE_PER_CPU(struct vmcs *, current_vmcs); 48362306a36Sopenharmony_ci/* 48462306a36Sopenharmony_ci * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed 48562306a36Sopenharmony_ci * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. 48662306a36Sopenharmony_ci */ 48762306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_cistatic DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); 49062306a36Sopenharmony_cistatic DEFINE_SPINLOCK(vmx_vpid_lock); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_cistruct vmcs_config vmcs_config __ro_after_init; 49362306a36Sopenharmony_cistruct vmx_capability vmx_capability __ro_after_init; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci#define VMX_SEGMENT_FIELD(seg) \ 49662306a36Sopenharmony_ci [VCPU_SREG_##seg] = { \ 49762306a36Sopenharmony_ci .selector = GUEST_##seg##_SELECTOR, \ 49862306a36Sopenharmony_ci .base = GUEST_##seg##_BASE, \ 49962306a36Sopenharmony_ci .limit = GUEST_##seg##_LIMIT, \ 50062306a36Sopenharmony_ci .ar_bytes = GUEST_##seg##_AR_BYTES, \ 50162306a36Sopenharmony_ci } 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_cistatic const struct kvm_vmx_segment_field { 50462306a36Sopenharmony_ci unsigned selector; 50562306a36Sopenharmony_ci unsigned base; 50662306a36Sopenharmony_ci unsigned limit; 50762306a36Sopenharmony_ci unsigned ar_bytes; 50862306a36Sopenharmony_ci} kvm_vmx_segment_fields[] = { 50962306a36Sopenharmony_ci VMX_SEGMENT_FIELD(CS), 51062306a36Sopenharmony_ci VMX_SEGMENT_FIELD(DS), 51162306a36Sopenharmony_ci VMX_SEGMENT_FIELD(ES), 51262306a36Sopenharmony_ci VMX_SEGMENT_FIELD(FS), 51362306a36Sopenharmony_ci VMX_SEGMENT_FIELD(GS), 51462306a36Sopenharmony_ci VMX_SEGMENT_FIELD(SS), 51562306a36Sopenharmony_ci VMX_SEGMENT_FIELD(TR), 51662306a36Sopenharmony_ci VMX_SEGMENT_FIELD(LDTR), 51762306a36Sopenharmony_ci}; 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_cistatic inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci vmx->segment_cache.bitmask = 0; 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_cistatic unsigned long host_idt_base; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 52762306a36Sopenharmony_cistatic struct kvm_x86_ops vmx_x86_ops __initdata; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_cistatic bool __read_mostly enlightened_vmcs = true; 53062306a36Sopenharmony_cimodule_param(enlightened_vmcs, bool, 0444); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_cistatic int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) 53362306a36Sopenharmony_ci{ 53462306a36Sopenharmony_ci struct hv_enlightened_vmcs *evmcs; 53562306a36Sopenharmony_ci struct hv_partition_assist_pg **p_hv_pa_pg = 53662306a36Sopenharmony_ci &to_kvm_hv(vcpu->kvm)->hv_pa_pg; 53762306a36Sopenharmony_ci /* 53862306a36Sopenharmony_ci * Synthetic VM-Exit is not enabled in current code and so All 53962306a36Sopenharmony_ci * evmcs in singe VM shares same assist page. 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_ci if (!*p_hv_pa_pg) 54262306a36Sopenharmony_ci *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci if (!*p_hv_pa_pg) 54562306a36Sopenharmony_ci return -ENOMEM; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci evmcs->partition_assist_page = 55062306a36Sopenharmony_ci __pa(*p_hv_pa_pg); 55162306a36Sopenharmony_ci evmcs->hv_vm_id = (unsigned long)vcpu->kvm; 55262306a36Sopenharmony_ci evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci return 0; 55562306a36Sopenharmony_ci} 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_cistatic __init void hv_init_evmcs(void) 55862306a36Sopenharmony_ci{ 55962306a36Sopenharmony_ci int cpu; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci if (!enlightened_vmcs) 56262306a36Sopenharmony_ci return; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci /* 56562306a36Sopenharmony_ci * Enlightened VMCS usage should be recommended and the host needs 56662306a36Sopenharmony_ci * to support eVMCS v1 or above. 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_ci if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && 56962306a36Sopenharmony_ci (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= 57062306a36Sopenharmony_ci KVM_EVMCS_VERSION) { 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci /* Check that we have assist pages on all online CPUs */ 57362306a36Sopenharmony_ci for_each_online_cpu(cpu) { 57462306a36Sopenharmony_ci if (!hv_get_vp_assist_page(cpu)) { 57562306a36Sopenharmony_ci enlightened_vmcs = false; 57662306a36Sopenharmony_ci break; 57762306a36Sopenharmony_ci } 57862306a36Sopenharmony_ci } 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (enlightened_vmcs) { 58162306a36Sopenharmony_ci pr_info("Using Hyper-V Enlightened VMCS\n"); 58262306a36Sopenharmony_ci static_branch_enable(&__kvm_is_using_evmcs); 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) 58662306a36Sopenharmony_ci vmx_x86_ops.enable_l2_tlb_flush 58762306a36Sopenharmony_ci = hv_enable_l2_tlb_flush; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci } else { 59062306a36Sopenharmony_ci enlightened_vmcs = false; 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic void hv_reset_evmcs(void) 59562306a36Sopenharmony_ci{ 59662306a36Sopenharmony_ci struct hv_vp_assist_page *vp_ap; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci if (!kvm_is_using_evmcs()) 59962306a36Sopenharmony_ci return; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci /* 60262306a36Sopenharmony_ci * KVM should enable eVMCS if and only if all CPUs have a VP assist 60362306a36Sopenharmony_ci * page, and should reject CPU onlining if eVMCS is enabled the CPU 60462306a36Sopenharmony_ci * doesn't have a VP assist page allocated. 60562306a36Sopenharmony_ci */ 60662306a36Sopenharmony_ci vp_ap = hv_get_vp_assist_page(smp_processor_id()); 60762306a36Sopenharmony_ci if (WARN_ON_ONCE(!vp_ap)) 60862306a36Sopenharmony_ci return; 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci /* 61162306a36Sopenharmony_ci * Reset everything to support using non-enlightened VMCS access later 61262306a36Sopenharmony_ci * (e.g. when we reload the module with enlightened_vmcs=0) 61362306a36Sopenharmony_ci */ 61462306a36Sopenharmony_ci vp_ap->nested_control.features.directhypercall = 0; 61562306a36Sopenharmony_ci vp_ap->current_nested_vmcs = 0; 61662306a36Sopenharmony_ci vp_ap->enlighten_vmentry = 0; 61762306a36Sopenharmony_ci} 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci#else /* IS_ENABLED(CONFIG_HYPERV) */ 62062306a36Sopenharmony_cistatic void hv_init_evmcs(void) {} 62162306a36Sopenharmony_cistatic void hv_reset_evmcs(void) {} 62262306a36Sopenharmony_ci#endif /* IS_ENABLED(CONFIG_HYPERV) */ 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci/* 62562306a36Sopenharmony_ci * Comment's format: document - errata name - stepping - processor name. 62662306a36Sopenharmony_ci * Refer from 62762306a36Sopenharmony_ci * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp 62862306a36Sopenharmony_ci */ 62962306a36Sopenharmony_cistatic u32 vmx_preemption_cpu_tfms[] = { 63062306a36Sopenharmony_ci/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ 63162306a36Sopenharmony_ci0x000206E6, 63262306a36Sopenharmony_ci/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ 63362306a36Sopenharmony_ci/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ 63462306a36Sopenharmony_ci/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ 63562306a36Sopenharmony_ci0x00020652, 63662306a36Sopenharmony_ci/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ 63762306a36Sopenharmony_ci0x00020655, 63862306a36Sopenharmony_ci/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ 63962306a36Sopenharmony_ci/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ 64062306a36Sopenharmony_ci/* 64162306a36Sopenharmony_ci * 320767.pdf - AAP86 - B1 - 64262306a36Sopenharmony_ci * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile 64362306a36Sopenharmony_ci */ 64462306a36Sopenharmony_ci0x000106E5, 64562306a36Sopenharmony_ci/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ 64662306a36Sopenharmony_ci0x000106A0, 64762306a36Sopenharmony_ci/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ 64862306a36Sopenharmony_ci0x000106A1, 64962306a36Sopenharmony_ci/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ 65062306a36Sopenharmony_ci0x000106A4, 65162306a36Sopenharmony_ci /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ 65262306a36Sopenharmony_ci /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ 65362306a36Sopenharmony_ci /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ 65462306a36Sopenharmony_ci0x000106A5, 65562306a36Sopenharmony_ci /* Xeon E3-1220 V2 */ 65662306a36Sopenharmony_ci0x000306A8, 65762306a36Sopenharmony_ci}; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_cistatic inline bool cpu_has_broken_vmx_preemption_timer(void) 66062306a36Sopenharmony_ci{ 66162306a36Sopenharmony_ci u32 eax = cpuid_eax(0x00000001), i; 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci /* Clear the reserved bits */ 66462306a36Sopenharmony_ci eax &= ~(0x3U << 14 | 0xfU << 28); 66562306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) 66662306a36Sopenharmony_ci if (eax == vmx_preemption_cpu_tfms[i]) 66762306a36Sopenharmony_ci return true; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci return false; 67062306a36Sopenharmony_ci} 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_cistatic inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) 67362306a36Sopenharmony_ci{ 67462306a36Sopenharmony_ci return flexpriority_enabled && lapic_in_kernel(vcpu); 67562306a36Sopenharmony_ci} 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_cistatic int possible_passthrough_msr_slot(u32 msr) 67862306a36Sopenharmony_ci{ 67962306a36Sopenharmony_ci u32 i; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) 68262306a36Sopenharmony_ci if (vmx_possible_passthrough_msrs[i] == msr) 68362306a36Sopenharmony_ci return i; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci return -ENOENT; 68662306a36Sopenharmony_ci} 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_cistatic bool is_valid_passthrough_msr(u32 msr) 68962306a36Sopenharmony_ci{ 69062306a36Sopenharmony_ci bool r; 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci switch (msr) { 69362306a36Sopenharmony_ci case 0x800 ... 0x8ff: 69462306a36Sopenharmony_ci /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */ 69562306a36Sopenharmony_ci return true; 69662306a36Sopenharmony_ci case MSR_IA32_RTIT_STATUS: 69762306a36Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_BASE: 69862306a36Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_MASK: 69962306a36Sopenharmony_ci case MSR_IA32_RTIT_CR3_MATCH: 70062306a36Sopenharmony_ci case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 70162306a36Sopenharmony_ci /* PT MSRs. These are handled in pt_update_intercept_for_msr() */ 70262306a36Sopenharmony_ci case MSR_LBR_SELECT: 70362306a36Sopenharmony_ci case MSR_LBR_TOS: 70462306a36Sopenharmony_ci case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31: 70562306a36Sopenharmony_ci case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31: 70662306a36Sopenharmony_ci case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31: 70762306a36Sopenharmony_ci case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8: 70862306a36Sopenharmony_ci case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8: 70962306a36Sopenharmony_ci /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */ 71062306a36Sopenharmony_ci return true; 71162306a36Sopenharmony_ci } 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci r = possible_passthrough_msr_slot(msr) != -ENOENT; 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr); 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci return r; 71862306a36Sopenharmony_ci} 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_cistruct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) 72162306a36Sopenharmony_ci{ 72262306a36Sopenharmony_ci int i; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci i = kvm_find_user_return_msr(msr); 72562306a36Sopenharmony_ci if (i >= 0) 72662306a36Sopenharmony_ci return &vmx->guest_uret_msrs[i]; 72762306a36Sopenharmony_ci return NULL; 72862306a36Sopenharmony_ci} 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_cistatic int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, 73162306a36Sopenharmony_ci struct vmx_uret_msr *msr, u64 data) 73262306a36Sopenharmony_ci{ 73362306a36Sopenharmony_ci unsigned int slot = msr - vmx->guest_uret_msrs; 73462306a36Sopenharmony_ci int ret = 0; 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci if (msr->load_into_hardware) { 73762306a36Sopenharmony_ci preempt_disable(); 73862306a36Sopenharmony_ci ret = kvm_set_user_return_msr(slot, data, msr->mask); 73962306a36Sopenharmony_ci preempt_enable(); 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci if (!ret) 74262306a36Sopenharmony_ci msr->data = data; 74362306a36Sopenharmony_ci return ret; 74462306a36Sopenharmony_ci} 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci/* 74762306a36Sopenharmony_ci * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) 74862306a36Sopenharmony_ci * 74962306a36Sopenharmony_ci * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to 75062306a36Sopenharmony_ci * atomically track post-VMXON state, e.g. this may be called in NMI context. 75162306a36Sopenharmony_ci * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. 75262306a36Sopenharmony_ci * faults are guaranteed to be due to the !post-VMXON check unless the CPU is 75362306a36Sopenharmony_ci * magically in RM, VM86, compat mode, or at CPL>0. 75462306a36Sopenharmony_ci */ 75562306a36Sopenharmony_cistatic int kvm_cpu_vmxoff(void) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci asm goto("1: vmxoff\n\t" 75862306a36Sopenharmony_ci _ASM_EXTABLE(1b, %l[fault]) 75962306a36Sopenharmony_ci ::: "cc", "memory" : fault); 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci cr4_clear_bits(X86_CR4_VMXE); 76262306a36Sopenharmony_ci return 0; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_cifault: 76562306a36Sopenharmony_ci cr4_clear_bits(X86_CR4_VMXE); 76662306a36Sopenharmony_ci return -EIO; 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_cistatic void vmx_emergency_disable(void) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 77262306a36Sopenharmony_ci struct loaded_vmcs *v; 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci kvm_rebooting = true; 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci /* 77762306a36Sopenharmony_ci * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be 77862306a36Sopenharmony_ci * set in task context. If this races with VMX is disabled by an NMI, 77962306a36Sopenharmony_ci * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to 78062306a36Sopenharmony_ci * kvm_rebooting set. 78162306a36Sopenharmony_ci */ 78262306a36Sopenharmony_ci if (!(__read_cr4() & X86_CR4_VMXE)) 78362306a36Sopenharmony_ci return; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), 78662306a36Sopenharmony_ci loaded_vmcss_on_cpu_link) 78762306a36Sopenharmony_ci vmcs_clear(v->vmcs); 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci kvm_cpu_vmxoff(); 79062306a36Sopenharmony_ci} 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_cistatic void __loaded_vmcs_clear(void *arg) 79362306a36Sopenharmony_ci{ 79462306a36Sopenharmony_ci struct loaded_vmcs *loaded_vmcs = arg; 79562306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci if (loaded_vmcs->cpu != cpu) 79862306a36Sopenharmony_ci return; /* vcpu migration can race with cpu offline */ 79962306a36Sopenharmony_ci if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) 80062306a36Sopenharmony_ci per_cpu(current_vmcs, cpu) = NULL; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci vmcs_clear(loaded_vmcs->vmcs); 80362306a36Sopenharmony_ci if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) 80462306a36Sopenharmony_ci vmcs_clear(loaded_vmcs->shadow_vmcs); 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci /* 80962306a36Sopenharmony_ci * Ensure all writes to loaded_vmcs, including deleting it from its 81062306a36Sopenharmony_ci * current percpu list, complete before setting loaded_vmcs->cpu to 81162306a36Sopenharmony_ci * -1, otherwise a different cpu can see loaded_vmcs->cpu == -1 first 81262306a36Sopenharmony_ci * and add loaded_vmcs to its percpu list before it's deleted from this 81362306a36Sopenharmony_ci * cpu's list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). 81462306a36Sopenharmony_ci */ 81562306a36Sopenharmony_ci smp_wmb(); 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci loaded_vmcs->cpu = -1; 81862306a36Sopenharmony_ci loaded_vmcs->launched = 0; 81962306a36Sopenharmony_ci} 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_civoid loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) 82262306a36Sopenharmony_ci{ 82362306a36Sopenharmony_ci int cpu = loaded_vmcs->cpu; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci if (cpu != -1) 82662306a36Sopenharmony_ci smp_call_function_single(cpu, 82762306a36Sopenharmony_ci __loaded_vmcs_clear, loaded_vmcs, 1); 82862306a36Sopenharmony_ci} 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_cistatic bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, 83162306a36Sopenharmony_ci unsigned field) 83262306a36Sopenharmony_ci{ 83362306a36Sopenharmony_ci bool ret; 83462306a36Sopenharmony_ci u32 mask = 1 << (seg * SEG_FIELD_NR + field); 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { 83762306a36Sopenharmony_ci kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); 83862306a36Sopenharmony_ci vmx->segment_cache.bitmask = 0; 83962306a36Sopenharmony_ci } 84062306a36Sopenharmony_ci ret = vmx->segment_cache.bitmask & mask; 84162306a36Sopenharmony_ci vmx->segment_cache.bitmask |= mask; 84262306a36Sopenharmony_ci return ret; 84362306a36Sopenharmony_ci} 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_cistatic u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci u16 *p = &vmx->segment_cache.seg[seg].selector; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) 85062306a36Sopenharmony_ci *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); 85162306a36Sopenharmony_ci return *p; 85262306a36Sopenharmony_ci} 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_cistatic ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) 85562306a36Sopenharmony_ci{ 85662306a36Sopenharmony_ci ulong *p = &vmx->segment_cache.seg[seg].base; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) 85962306a36Sopenharmony_ci *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); 86062306a36Sopenharmony_ci return *p; 86162306a36Sopenharmony_ci} 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_cistatic u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) 86462306a36Sopenharmony_ci{ 86562306a36Sopenharmony_ci u32 *p = &vmx->segment_cache.seg[seg].limit; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) 86862306a36Sopenharmony_ci *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); 86962306a36Sopenharmony_ci return *p; 87062306a36Sopenharmony_ci} 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_cistatic u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) 87362306a36Sopenharmony_ci{ 87462306a36Sopenharmony_ci u32 *p = &vmx->segment_cache.seg[seg].ar; 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) 87762306a36Sopenharmony_ci *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); 87862306a36Sopenharmony_ci return *p; 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_civoid vmx_update_exception_bitmap(struct kvm_vcpu *vcpu) 88262306a36Sopenharmony_ci{ 88362306a36Sopenharmony_ci u32 eb; 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | 88662306a36Sopenharmony_ci (1u << DB_VECTOR) | (1u << AC_VECTOR); 88762306a36Sopenharmony_ci /* 88862306a36Sopenharmony_ci * Guest access to VMware backdoor ports could legitimately 88962306a36Sopenharmony_ci * trigger #GP because of TSS I/O permission bitmap. 89062306a36Sopenharmony_ci * We intercept those #GP and allow access to them anyway 89162306a36Sopenharmony_ci * as VMware does. 89262306a36Sopenharmony_ci */ 89362306a36Sopenharmony_ci if (enable_vmware_backdoor) 89462306a36Sopenharmony_ci eb |= (1u << GP_VECTOR); 89562306a36Sopenharmony_ci if ((vcpu->guest_debug & 89662306a36Sopenharmony_ci (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == 89762306a36Sopenharmony_ci (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) 89862306a36Sopenharmony_ci eb |= 1u << BP_VECTOR; 89962306a36Sopenharmony_ci if (to_vmx(vcpu)->rmode.vm86_active) 90062306a36Sopenharmony_ci eb = ~0; 90162306a36Sopenharmony_ci if (!vmx_need_pf_intercept(vcpu)) 90262306a36Sopenharmony_ci eb &= ~(1u << PF_VECTOR); 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci /* When we are running a nested L2 guest and L1 specified for it a 90562306a36Sopenharmony_ci * certain exception bitmap, we must trap the same exceptions and pass 90662306a36Sopenharmony_ci * them to L1. When running L2, we will only handle the exceptions 90762306a36Sopenharmony_ci * specified above if L1 did not want them. 90862306a36Sopenharmony_ci */ 90962306a36Sopenharmony_ci if (is_guest_mode(vcpu)) 91062306a36Sopenharmony_ci eb |= get_vmcs12(vcpu)->exception_bitmap; 91162306a36Sopenharmony_ci else { 91262306a36Sopenharmony_ci int mask = 0, match = 0; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci if (enable_ept && (eb & (1u << PF_VECTOR))) { 91562306a36Sopenharmony_ci /* 91662306a36Sopenharmony_ci * If EPT is enabled, #PF is currently only intercepted 91762306a36Sopenharmony_ci * if MAXPHYADDR is smaller on the guest than on the 91862306a36Sopenharmony_ci * host. In that case we only care about present, 91962306a36Sopenharmony_ci * non-reserved faults. For vmcs02, however, PFEC_MASK 92062306a36Sopenharmony_ci * and PFEC_MATCH are set in prepare_vmcs02_rare. 92162306a36Sopenharmony_ci */ 92262306a36Sopenharmony_ci mask = PFERR_PRESENT_MASK | PFERR_RSVD_MASK; 92362306a36Sopenharmony_ci match = PFERR_PRESENT_MASK; 92462306a36Sopenharmony_ci } 92562306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask); 92662306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match); 92762306a36Sopenharmony_ci } 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci /* 93062306a36Sopenharmony_ci * Disabling xfd interception indicates that dynamic xfeatures 93162306a36Sopenharmony_ci * might be used in the guest. Always trap #NM in this case 93262306a36Sopenharmony_ci * to save guest xfd_err timely. 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ci if (vcpu->arch.xfd_no_write_intercept) 93562306a36Sopenharmony_ci eb |= (1u << NM_VECTOR); 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci vmcs_write32(EXCEPTION_BITMAP, eb); 93862306a36Sopenharmony_ci} 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci/* 94162306a36Sopenharmony_ci * Check if MSR is intercepted for currently loaded MSR bitmap. 94262306a36Sopenharmony_ci */ 94362306a36Sopenharmony_cistatic bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) 94462306a36Sopenharmony_ci{ 94562306a36Sopenharmony_ci if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) 94662306a36Sopenharmony_ci return true; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); 94962306a36Sopenharmony_ci} 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ciunsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci unsigned int flags = 0; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci if (vmx->loaded_vmcs->launched) 95662306a36Sopenharmony_ci flags |= VMX_RUN_VMRESUME; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci /* 95962306a36Sopenharmony_ci * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free 96062306a36Sopenharmony_ci * to change it directly without causing a vmexit. In that case read 96162306a36Sopenharmony_ci * it after vmexit and store it in vmx->spec_ctrl. 96262306a36Sopenharmony_ci */ 96362306a36Sopenharmony_ci if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) 96462306a36Sopenharmony_ci flags |= VMX_RUN_SAVE_SPEC_CTRL; 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci return flags; 96762306a36Sopenharmony_ci} 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_cistatic __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, 97062306a36Sopenharmony_ci unsigned long entry, unsigned long exit) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci vm_entry_controls_clearbit(vmx, entry); 97362306a36Sopenharmony_ci vm_exit_controls_clearbit(vmx, exit); 97462306a36Sopenharmony_ci} 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ciint vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr) 97762306a36Sopenharmony_ci{ 97862306a36Sopenharmony_ci unsigned int i; 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci for (i = 0; i < m->nr; ++i) { 98162306a36Sopenharmony_ci if (m->val[i].index == msr) 98262306a36Sopenharmony_ci return i; 98362306a36Sopenharmony_ci } 98462306a36Sopenharmony_ci return -ENOENT; 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_cistatic void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 98862306a36Sopenharmony_ci{ 98962306a36Sopenharmony_ci int i; 99062306a36Sopenharmony_ci struct msr_autoload *m = &vmx->msr_autoload; 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci switch (msr) { 99362306a36Sopenharmony_ci case MSR_EFER: 99462306a36Sopenharmony_ci if (cpu_has_load_ia32_efer()) { 99562306a36Sopenharmony_ci clear_atomic_switch_msr_special(vmx, 99662306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_EFER, 99762306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER); 99862306a36Sopenharmony_ci return; 99962306a36Sopenharmony_ci } 100062306a36Sopenharmony_ci break; 100162306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 100262306a36Sopenharmony_ci if (cpu_has_load_perf_global_ctrl()) { 100362306a36Sopenharmony_ci clear_atomic_switch_msr_special(vmx, 100462306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 100562306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 100662306a36Sopenharmony_ci return; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci break; 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci i = vmx_find_loadstore_msr_slot(&m->guest, msr); 101162306a36Sopenharmony_ci if (i < 0) 101262306a36Sopenharmony_ci goto skip_guest; 101362306a36Sopenharmony_ci --m->guest.nr; 101462306a36Sopenharmony_ci m->guest.val[i] = m->guest.val[m->guest.nr]; 101562306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ciskip_guest: 101862306a36Sopenharmony_ci i = vmx_find_loadstore_msr_slot(&m->host, msr); 101962306a36Sopenharmony_ci if (i < 0) 102062306a36Sopenharmony_ci return; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci --m->host.nr; 102362306a36Sopenharmony_ci m->host.val[i] = m->host.val[m->host.nr]; 102462306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); 102562306a36Sopenharmony_ci} 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_cistatic __always_inline void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, 102862306a36Sopenharmony_ci unsigned long entry, unsigned long exit, 102962306a36Sopenharmony_ci unsigned long guest_val_vmcs, unsigned long host_val_vmcs, 103062306a36Sopenharmony_ci u64 guest_val, u64 host_val) 103162306a36Sopenharmony_ci{ 103262306a36Sopenharmony_ci vmcs_write64(guest_val_vmcs, guest_val); 103362306a36Sopenharmony_ci if (host_val_vmcs != HOST_IA32_EFER) 103462306a36Sopenharmony_ci vmcs_write64(host_val_vmcs, host_val); 103562306a36Sopenharmony_ci vm_entry_controls_setbit(vmx, entry); 103662306a36Sopenharmony_ci vm_exit_controls_setbit(vmx, exit); 103762306a36Sopenharmony_ci} 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_cistatic void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 104062306a36Sopenharmony_ci u64 guest_val, u64 host_val, bool entry_only) 104162306a36Sopenharmony_ci{ 104262306a36Sopenharmony_ci int i, j = 0; 104362306a36Sopenharmony_ci struct msr_autoload *m = &vmx->msr_autoload; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci switch (msr) { 104662306a36Sopenharmony_ci case MSR_EFER: 104762306a36Sopenharmony_ci if (cpu_has_load_ia32_efer()) { 104862306a36Sopenharmony_ci add_atomic_switch_msr_special(vmx, 104962306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_EFER, 105062306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER, 105162306a36Sopenharmony_ci GUEST_IA32_EFER, 105262306a36Sopenharmony_ci HOST_IA32_EFER, 105362306a36Sopenharmony_ci guest_val, host_val); 105462306a36Sopenharmony_ci return; 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci break; 105762306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 105862306a36Sopenharmony_ci if (cpu_has_load_perf_global_ctrl()) { 105962306a36Sopenharmony_ci add_atomic_switch_msr_special(vmx, 106062306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 106162306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 106262306a36Sopenharmony_ci GUEST_IA32_PERF_GLOBAL_CTRL, 106362306a36Sopenharmony_ci HOST_IA32_PERF_GLOBAL_CTRL, 106462306a36Sopenharmony_ci guest_val, host_val); 106562306a36Sopenharmony_ci return; 106662306a36Sopenharmony_ci } 106762306a36Sopenharmony_ci break; 106862306a36Sopenharmony_ci case MSR_IA32_PEBS_ENABLE: 106962306a36Sopenharmony_ci /* PEBS needs a quiescent period after being disabled (to write 107062306a36Sopenharmony_ci * a record). Disabling PEBS through VMX MSR swapping doesn't 107162306a36Sopenharmony_ci * provide that period, so a CPU could write host's record into 107262306a36Sopenharmony_ci * guest's memory. 107362306a36Sopenharmony_ci */ 107462306a36Sopenharmony_ci wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 107562306a36Sopenharmony_ci } 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci i = vmx_find_loadstore_msr_slot(&m->guest, msr); 107862306a36Sopenharmony_ci if (!entry_only) 107962306a36Sopenharmony_ci j = vmx_find_loadstore_msr_slot(&m->host, msr); 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci if ((i < 0 && m->guest.nr == MAX_NR_LOADSTORE_MSRS) || 108262306a36Sopenharmony_ci (j < 0 && m->host.nr == MAX_NR_LOADSTORE_MSRS)) { 108362306a36Sopenharmony_ci printk_once(KERN_WARNING "Not enough msr switch entries. " 108462306a36Sopenharmony_ci "Can't add msr %x\n", msr); 108562306a36Sopenharmony_ci return; 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci if (i < 0) { 108862306a36Sopenharmony_ci i = m->guest.nr++; 108962306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); 109062306a36Sopenharmony_ci } 109162306a36Sopenharmony_ci m->guest.val[i].index = msr; 109262306a36Sopenharmony_ci m->guest.val[i].value = guest_val; 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci if (entry_only) 109562306a36Sopenharmony_ci return; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci if (j < 0) { 109862306a36Sopenharmony_ci j = m->host.nr++; 109962306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); 110062306a36Sopenharmony_ci } 110162306a36Sopenharmony_ci m->host.val[j].index = msr; 110262306a36Sopenharmony_ci m->host.val[j].value = host_val; 110362306a36Sopenharmony_ci} 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_cistatic bool update_transition_efer(struct vcpu_vmx *vmx) 110662306a36Sopenharmony_ci{ 110762306a36Sopenharmony_ci u64 guest_efer = vmx->vcpu.arch.efer; 110862306a36Sopenharmony_ci u64 ignore_bits = 0; 110962306a36Sopenharmony_ci int i; 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci /* Shadow paging assumes NX to be available. */ 111262306a36Sopenharmony_ci if (!enable_ept) 111362306a36Sopenharmony_ci guest_efer |= EFER_NX; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci /* 111662306a36Sopenharmony_ci * LMA and LME handled by hardware; SCE meaningless outside long mode. 111762306a36Sopenharmony_ci */ 111862306a36Sopenharmony_ci ignore_bits |= EFER_SCE; 111962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 112062306a36Sopenharmony_ci ignore_bits |= EFER_LMA | EFER_LME; 112162306a36Sopenharmony_ci /* SCE is meaningful only in long mode on Intel */ 112262306a36Sopenharmony_ci if (guest_efer & EFER_LMA) 112362306a36Sopenharmony_ci ignore_bits &= ~(u64)EFER_SCE; 112462306a36Sopenharmony_ci#endif 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci /* 112762306a36Sopenharmony_ci * On EPT, we can't emulate NX, so we must switch EFER atomically. 112862306a36Sopenharmony_ci * On CPUs that support "load IA32_EFER", always switch EFER 112962306a36Sopenharmony_ci * atomically, since it's faster than switching it manually. 113062306a36Sopenharmony_ci */ 113162306a36Sopenharmony_ci if (cpu_has_load_ia32_efer() || 113262306a36Sopenharmony_ci (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { 113362306a36Sopenharmony_ci if (!(guest_efer & EFER_LMA)) 113462306a36Sopenharmony_ci guest_efer &= ~EFER_LME; 113562306a36Sopenharmony_ci if (guest_efer != host_efer) 113662306a36Sopenharmony_ci add_atomic_switch_msr(vmx, MSR_EFER, 113762306a36Sopenharmony_ci guest_efer, host_efer, false); 113862306a36Sopenharmony_ci else 113962306a36Sopenharmony_ci clear_atomic_switch_msr(vmx, MSR_EFER); 114062306a36Sopenharmony_ci return false; 114162306a36Sopenharmony_ci } 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci i = kvm_find_user_return_msr(MSR_EFER); 114462306a36Sopenharmony_ci if (i < 0) 114562306a36Sopenharmony_ci return false; 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci clear_atomic_switch_msr(vmx, MSR_EFER); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci guest_efer &= ~ignore_bits; 115062306a36Sopenharmony_ci guest_efer |= host_efer & ignore_bits; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci vmx->guest_uret_msrs[i].data = guest_efer; 115362306a36Sopenharmony_ci vmx->guest_uret_msrs[i].mask = ~ignore_bits; 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci return true; 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci#ifdef CONFIG_X86_32 115962306a36Sopenharmony_ci/* 116062306a36Sopenharmony_ci * On 32-bit kernels, VM exits still load the FS and GS bases from the 116162306a36Sopenharmony_ci * VMCS rather than the segment table. KVM uses this helper to figure 116262306a36Sopenharmony_ci * out the current bases to poke them into the VMCS before entry. 116362306a36Sopenharmony_ci */ 116462306a36Sopenharmony_cistatic unsigned long segment_base(u16 selector) 116562306a36Sopenharmony_ci{ 116662306a36Sopenharmony_ci struct desc_struct *table; 116762306a36Sopenharmony_ci unsigned long v; 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci if (!(selector & ~SEGMENT_RPL_MASK)) 117062306a36Sopenharmony_ci return 0; 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci table = get_current_gdt_ro(); 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { 117562306a36Sopenharmony_ci u16 ldt_selector = kvm_read_ldt(); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci if (!(ldt_selector & ~SEGMENT_RPL_MASK)) 117862306a36Sopenharmony_ci return 0; 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci table = (struct desc_struct *)segment_base(ldt_selector); 118162306a36Sopenharmony_ci } 118262306a36Sopenharmony_ci v = get_desc_base(&table[selector >> 3]); 118362306a36Sopenharmony_ci return v; 118462306a36Sopenharmony_ci} 118562306a36Sopenharmony_ci#endif 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_cistatic inline bool pt_can_write_msr(struct vcpu_vmx *vmx) 118862306a36Sopenharmony_ci{ 118962306a36Sopenharmony_ci return vmx_pt_mode_is_host_guest() && 119062306a36Sopenharmony_ci !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); 119162306a36Sopenharmony_ci} 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_cistatic inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci /* The base must be 128-byte aligned and a legal physical address. */ 119662306a36Sopenharmony_ci return kvm_vcpu_is_legal_aligned_gpa(vcpu, base, 128); 119762306a36Sopenharmony_ci} 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_cistatic inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) 120062306a36Sopenharmony_ci{ 120162306a36Sopenharmony_ci u32 i; 120262306a36Sopenharmony_ci 120362306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status); 120462306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); 120562306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); 120662306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); 120762306a36Sopenharmony_ci for (i = 0; i < addr_range; i++) { 120862306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); 120962306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); 121062306a36Sopenharmony_ci } 121162306a36Sopenharmony_ci} 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_cistatic inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range) 121462306a36Sopenharmony_ci{ 121562306a36Sopenharmony_ci u32 i; 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status); 121862306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); 121962306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); 122062306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); 122162306a36Sopenharmony_ci for (i = 0; i < addr_range; i++) { 122262306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); 122362306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); 122462306a36Sopenharmony_ci } 122562306a36Sopenharmony_ci} 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_cistatic void pt_guest_enter(struct vcpu_vmx *vmx) 122862306a36Sopenharmony_ci{ 122962306a36Sopenharmony_ci if (vmx_pt_mode_is_system()) 123062306a36Sopenharmony_ci return; 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci /* 123362306a36Sopenharmony_ci * GUEST_IA32_RTIT_CTL is already set in the VMCS. 123462306a36Sopenharmony_ci * Save host state before VM entry. 123562306a36Sopenharmony_ci */ 123662306a36Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); 123762306a36Sopenharmony_ci if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { 123862306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_CTL, 0); 123962306a36Sopenharmony_ci pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges); 124062306a36Sopenharmony_ci pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges); 124162306a36Sopenharmony_ci } 124262306a36Sopenharmony_ci} 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_cistatic void pt_guest_exit(struct vcpu_vmx *vmx) 124562306a36Sopenharmony_ci{ 124662306a36Sopenharmony_ci if (vmx_pt_mode_is_system()) 124762306a36Sopenharmony_ci return; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { 125062306a36Sopenharmony_ci pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges); 125162306a36Sopenharmony_ci pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges); 125262306a36Sopenharmony_ci } 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci /* 125562306a36Sopenharmony_ci * KVM requires VM_EXIT_CLEAR_IA32_RTIT_CTL to expose PT to the guest, 125662306a36Sopenharmony_ci * i.e. RTIT_CTL is always cleared on VM-Exit. Restore it if necessary. 125762306a36Sopenharmony_ci */ 125862306a36Sopenharmony_ci if (vmx->pt_desc.host.ctl) 125962306a36Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_civoid vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, 126362306a36Sopenharmony_ci unsigned long fs_base, unsigned long gs_base) 126462306a36Sopenharmony_ci{ 126562306a36Sopenharmony_ci if (unlikely(fs_sel != host->fs_sel)) { 126662306a36Sopenharmony_ci if (!(fs_sel & 7)) 126762306a36Sopenharmony_ci vmcs_write16(HOST_FS_SELECTOR, fs_sel); 126862306a36Sopenharmony_ci else 126962306a36Sopenharmony_ci vmcs_write16(HOST_FS_SELECTOR, 0); 127062306a36Sopenharmony_ci host->fs_sel = fs_sel; 127162306a36Sopenharmony_ci } 127262306a36Sopenharmony_ci if (unlikely(gs_sel != host->gs_sel)) { 127362306a36Sopenharmony_ci if (!(gs_sel & 7)) 127462306a36Sopenharmony_ci vmcs_write16(HOST_GS_SELECTOR, gs_sel); 127562306a36Sopenharmony_ci else 127662306a36Sopenharmony_ci vmcs_write16(HOST_GS_SELECTOR, 0); 127762306a36Sopenharmony_ci host->gs_sel = gs_sel; 127862306a36Sopenharmony_ci } 127962306a36Sopenharmony_ci if (unlikely(fs_base != host->fs_base)) { 128062306a36Sopenharmony_ci vmcs_writel(HOST_FS_BASE, fs_base); 128162306a36Sopenharmony_ci host->fs_base = fs_base; 128262306a36Sopenharmony_ci } 128362306a36Sopenharmony_ci if (unlikely(gs_base != host->gs_base)) { 128462306a36Sopenharmony_ci vmcs_writel(HOST_GS_BASE, gs_base); 128562306a36Sopenharmony_ci host->gs_base = gs_base; 128662306a36Sopenharmony_ci } 128762306a36Sopenharmony_ci} 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_civoid vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) 129062306a36Sopenharmony_ci{ 129162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 129262306a36Sopenharmony_ci struct vmcs_host_state *host_state; 129362306a36Sopenharmony_ci#ifdef CONFIG_X86_64 129462306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 129562306a36Sopenharmony_ci#endif 129662306a36Sopenharmony_ci unsigned long fs_base, gs_base; 129762306a36Sopenharmony_ci u16 fs_sel, gs_sel; 129862306a36Sopenharmony_ci int i; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci vmx->req_immediate_exit = false; 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci /* 130362306a36Sopenharmony_ci * Note that guest MSRs to be saved/restored can also be changed 130462306a36Sopenharmony_ci * when guest state is loaded. This happens when guest transitions 130562306a36Sopenharmony_ci * to/from long-mode by setting MSR_EFER.LMA. 130662306a36Sopenharmony_ci */ 130762306a36Sopenharmony_ci if (!vmx->guest_uret_msrs_loaded) { 130862306a36Sopenharmony_ci vmx->guest_uret_msrs_loaded = true; 130962306a36Sopenharmony_ci for (i = 0; i < kvm_nr_uret_msrs; ++i) { 131062306a36Sopenharmony_ci if (!vmx->guest_uret_msrs[i].load_into_hardware) 131162306a36Sopenharmony_ci continue; 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci kvm_set_user_return_msr(i, 131462306a36Sopenharmony_ci vmx->guest_uret_msrs[i].data, 131562306a36Sopenharmony_ci vmx->guest_uret_msrs[i].mask); 131662306a36Sopenharmony_ci } 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci if (vmx->nested.need_vmcs12_to_shadow_sync) 132062306a36Sopenharmony_ci nested_sync_vmcs12_to_shadow(vcpu); 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci if (vmx->guest_state_loaded) 132362306a36Sopenharmony_ci return; 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci host_state = &vmx->loaded_vmcs->host_state; 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci /* 132862306a36Sopenharmony_ci * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 132962306a36Sopenharmony_ci * allow segment selectors with cpl > 0 or ti == 1. 133062306a36Sopenharmony_ci */ 133162306a36Sopenharmony_ci host_state->ldt_sel = kvm_read_ldt(); 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci#ifdef CONFIG_X86_64 133462306a36Sopenharmony_ci savesegment(ds, host_state->ds_sel); 133562306a36Sopenharmony_ci savesegment(es, host_state->es_sel); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci gs_base = cpu_kernelmode_gs_base(cpu); 133862306a36Sopenharmony_ci if (likely(is_64bit_mm(current->mm))) { 133962306a36Sopenharmony_ci current_save_fsgs(); 134062306a36Sopenharmony_ci fs_sel = current->thread.fsindex; 134162306a36Sopenharmony_ci gs_sel = current->thread.gsindex; 134262306a36Sopenharmony_ci fs_base = current->thread.fsbase; 134362306a36Sopenharmony_ci vmx->msr_host_kernel_gs_base = current->thread.gsbase; 134462306a36Sopenharmony_ci } else { 134562306a36Sopenharmony_ci savesegment(fs, fs_sel); 134662306a36Sopenharmony_ci savesegment(gs, gs_sel); 134762306a36Sopenharmony_ci fs_base = read_msr(MSR_FS_BASE); 134862306a36Sopenharmony_ci vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); 134962306a36Sopenharmony_ci } 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 135262306a36Sopenharmony_ci#else 135362306a36Sopenharmony_ci savesegment(fs, fs_sel); 135462306a36Sopenharmony_ci savesegment(gs, gs_sel); 135562306a36Sopenharmony_ci fs_base = segment_base(fs_sel); 135662306a36Sopenharmony_ci gs_base = segment_base(gs_sel); 135762306a36Sopenharmony_ci#endif 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); 136062306a36Sopenharmony_ci vmx->guest_state_loaded = true; 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_cistatic void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) 136462306a36Sopenharmony_ci{ 136562306a36Sopenharmony_ci struct vmcs_host_state *host_state; 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci if (!vmx->guest_state_loaded) 136862306a36Sopenharmony_ci return; 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci host_state = &vmx->loaded_vmcs->host_state; 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci ++vmx->vcpu.stat.host_state_reload; 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci#ifdef CONFIG_X86_64 137562306a36Sopenharmony_ci rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 137662306a36Sopenharmony_ci#endif 137762306a36Sopenharmony_ci if (host_state->ldt_sel || (host_state->gs_sel & 7)) { 137862306a36Sopenharmony_ci kvm_load_ldt(host_state->ldt_sel); 137962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 138062306a36Sopenharmony_ci load_gs_index(host_state->gs_sel); 138162306a36Sopenharmony_ci#else 138262306a36Sopenharmony_ci loadsegment(gs, host_state->gs_sel); 138362306a36Sopenharmony_ci#endif 138462306a36Sopenharmony_ci } 138562306a36Sopenharmony_ci if (host_state->fs_sel & 7) 138662306a36Sopenharmony_ci loadsegment(fs, host_state->fs_sel); 138762306a36Sopenharmony_ci#ifdef CONFIG_X86_64 138862306a36Sopenharmony_ci if (unlikely(host_state->ds_sel | host_state->es_sel)) { 138962306a36Sopenharmony_ci loadsegment(ds, host_state->ds_sel); 139062306a36Sopenharmony_ci loadsegment(es, host_state->es_sel); 139162306a36Sopenharmony_ci } 139262306a36Sopenharmony_ci#endif 139362306a36Sopenharmony_ci invalidate_tss_limit(); 139462306a36Sopenharmony_ci#ifdef CONFIG_X86_64 139562306a36Sopenharmony_ci wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 139662306a36Sopenharmony_ci#endif 139762306a36Sopenharmony_ci load_fixmap_gdt(raw_smp_processor_id()); 139862306a36Sopenharmony_ci vmx->guest_state_loaded = false; 139962306a36Sopenharmony_ci vmx->guest_uret_msrs_loaded = false; 140062306a36Sopenharmony_ci} 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci#ifdef CONFIG_X86_64 140362306a36Sopenharmony_cistatic u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) 140462306a36Sopenharmony_ci{ 140562306a36Sopenharmony_ci preempt_disable(); 140662306a36Sopenharmony_ci if (vmx->guest_state_loaded) 140762306a36Sopenharmony_ci rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 140862306a36Sopenharmony_ci preempt_enable(); 140962306a36Sopenharmony_ci return vmx->msr_guest_kernel_gs_base; 141062306a36Sopenharmony_ci} 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_cistatic void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) 141362306a36Sopenharmony_ci{ 141462306a36Sopenharmony_ci preempt_disable(); 141562306a36Sopenharmony_ci if (vmx->guest_state_loaded) 141662306a36Sopenharmony_ci wrmsrl(MSR_KERNEL_GS_BASE, data); 141762306a36Sopenharmony_ci preempt_enable(); 141862306a36Sopenharmony_ci vmx->msr_guest_kernel_gs_base = data; 141962306a36Sopenharmony_ci} 142062306a36Sopenharmony_ci#endif 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_civoid vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, 142362306a36Sopenharmony_ci struct loaded_vmcs *buddy) 142462306a36Sopenharmony_ci{ 142562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 142662306a36Sopenharmony_ci bool already_loaded = vmx->loaded_vmcs->cpu == cpu; 142762306a36Sopenharmony_ci struct vmcs *prev; 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci if (!already_loaded) { 143062306a36Sopenharmony_ci loaded_vmcs_clear(vmx->loaded_vmcs); 143162306a36Sopenharmony_ci local_irq_disable(); 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci /* 143462306a36Sopenharmony_ci * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to 143562306a36Sopenharmony_ci * this cpu's percpu list, otherwise it may not yet be deleted 143662306a36Sopenharmony_ci * from its previous cpu's percpu list. Pairs with the 143762306a36Sopenharmony_ci * smb_wmb() in __loaded_vmcs_clear(). 143862306a36Sopenharmony_ci */ 143962306a36Sopenharmony_ci smp_rmb(); 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, 144262306a36Sopenharmony_ci &per_cpu(loaded_vmcss_on_cpu, cpu)); 144362306a36Sopenharmony_ci local_irq_enable(); 144462306a36Sopenharmony_ci } 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci prev = per_cpu(current_vmcs, cpu); 144762306a36Sopenharmony_ci if (prev != vmx->loaded_vmcs->vmcs) { 144862306a36Sopenharmony_ci per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; 144962306a36Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci /* 145262306a36Sopenharmony_ci * No indirect branch prediction barrier needed when switching 145362306a36Sopenharmony_ci * the active VMCS within a vCPU, unless IBRS is advertised to 145462306a36Sopenharmony_ci * the vCPU. To minimize the number of IBPBs executed, KVM 145562306a36Sopenharmony_ci * performs IBPB on nested VM-Exit (a single nested transition 145662306a36Sopenharmony_ci * may switch the active VMCS multiple times). 145762306a36Sopenharmony_ci */ 145862306a36Sopenharmony_ci if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) 145962306a36Sopenharmony_ci indirect_branch_prediction_barrier(); 146062306a36Sopenharmony_ci } 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci if (!already_loaded) { 146362306a36Sopenharmony_ci void *gdt = get_current_gdt_ro(); 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci /* 146662306a36Sopenharmony_ci * Flush all EPTP/VPID contexts, the new pCPU may have stale 146762306a36Sopenharmony_ci * TLB entries from its previous association with the vCPU. 146862306a36Sopenharmony_ci */ 146962306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci /* 147262306a36Sopenharmony_ci * Linux uses per-cpu TSS and GDT, so set these when switching 147362306a36Sopenharmony_ci * processors. See 22.2.4. 147462306a36Sopenharmony_ci */ 147562306a36Sopenharmony_ci vmcs_writel(HOST_TR_BASE, 147662306a36Sopenharmony_ci (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); 147762306a36Sopenharmony_ci vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) { 148062306a36Sopenharmony_ci /* 22.2.3 */ 148162306a36Sopenharmony_ci vmcs_writel(HOST_IA32_SYSENTER_ESP, 148262306a36Sopenharmony_ci (unsigned long)(cpu_entry_stack(cpu) + 1)); 148362306a36Sopenharmony_ci } 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci vmx->loaded_vmcs->cpu = cpu; 148662306a36Sopenharmony_ci } 148762306a36Sopenharmony_ci} 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci/* 149062306a36Sopenharmony_ci * Switches to specified vcpu, until a matching vcpu_put(), but assumes 149162306a36Sopenharmony_ci * vcpu mutex is already taken. 149262306a36Sopenharmony_ci */ 149362306a36Sopenharmony_cistatic void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 149462306a36Sopenharmony_ci{ 149562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, NULL); 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci vmx_vcpu_pi_load(vcpu, cpu); 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci vmx->host_debugctlmsr = get_debugctlmsr(); 150262306a36Sopenharmony_ci} 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_cistatic void vmx_vcpu_put(struct kvm_vcpu *vcpu) 150562306a36Sopenharmony_ci{ 150662306a36Sopenharmony_ci vmx_vcpu_pi_put(vcpu); 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci vmx_prepare_switch_to_host(to_vmx(vcpu)); 150962306a36Sopenharmony_ci} 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_cibool vmx_emulation_required(struct kvm_vcpu *vcpu) 151262306a36Sopenharmony_ci{ 151362306a36Sopenharmony_ci return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu); 151462306a36Sopenharmony_ci} 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ciunsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 151762306a36Sopenharmony_ci{ 151862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 151962306a36Sopenharmony_ci unsigned long rflags, save_rflags; 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { 152262306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); 152362306a36Sopenharmony_ci rflags = vmcs_readl(GUEST_RFLAGS); 152462306a36Sopenharmony_ci if (vmx->rmode.vm86_active) { 152562306a36Sopenharmony_ci rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 152662306a36Sopenharmony_ci save_rflags = vmx->rmode.save_rflags; 152762306a36Sopenharmony_ci rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 152862306a36Sopenharmony_ci } 152962306a36Sopenharmony_ci vmx->rflags = rflags; 153062306a36Sopenharmony_ci } 153162306a36Sopenharmony_ci return vmx->rflags; 153262306a36Sopenharmony_ci} 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_civoid vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 153562306a36Sopenharmony_ci{ 153662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 153762306a36Sopenharmony_ci unsigned long old_rflags; 153862306a36Sopenharmony_ci 153962306a36Sopenharmony_ci /* 154062306a36Sopenharmony_ci * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU 154162306a36Sopenharmony_ci * is an unrestricted guest in order to mark L2 as needing emulation 154262306a36Sopenharmony_ci * if L1 runs L2 as a restricted guest. 154362306a36Sopenharmony_ci */ 154462306a36Sopenharmony_ci if (is_unrestricted_guest(vcpu)) { 154562306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); 154662306a36Sopenharmony_ci vmx->rflags = rflags; 154762306a36Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, rflags); 154862306a36Sopenharmony_ci return; 154962306a36Sopenharmony_ci } 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci old_rflags = vmx_get_rflags(vcpu); 155262306a36Sopenharmony_ci vmx->rflags = rflags; 155362306a36Sopenharmony_ci if (vmx->rmode.vm86_active) { 155462306a36Sopenharmony_ci vmx->rmode.save_rflags = rflags; 155562306a36Sopenharmony_ci rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 155662306a36Sopenharmony_ci } 155762306a36Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, rflags); 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) 156062306a36Sopenharmony_ci vmx->emulation_required = vmx_emulation_required(vcpu); 156162306a36Sopenharmony_ci} 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_cistatic bool vmx_get_if_flag(struct kvm_vcpu *vcpu) 156462306a36Sopenharmony_ci{ 156562306a36Sopenharmony_ci return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; 156662306a36Sopenharmony_ci} 156762306a36Sopenharmony_ci 156862306a36Sopenharmony_ciu32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) 156962306a36Sopenharmony_ci{ 157062306a36Sopenharmony_ci u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 157162306a36Sopenharmony_ci int ret = 0; 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci if (interruptibility & GUEST_INTR_STATE_STI) 157462306a36Sopenharmony_ci ret |= KVM_X86_SHADOW_INT_STI; 157562306a36Sopenharmony_ci if (interruptibility & GUEST_INTR_STATE_MOV_SS) 157662306a36Sopenharmony_ci ret |= KVM_X86_SHADOW_INT_MOV_SS; 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci return ret; 157962306a36Sopenharmony_ci} 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_civoid vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 158262306a36Sopenharmony_ci{ 158362306a36Sopenharmony_ci u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 158462306a36Sopenharmony_ci u32 interruptibility = interruptibility_old; 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_ci interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci if (mask & KVM_X86_SHADOW_INT_MOV_SS) 158962306a36Sopenharmony_ci interruptibility |= GUEST_INTR_STATE_MOV_SS; 159062306a36Sopenharmony_ci else if (mask & KVM_X86_SHADOW_INT_STI) 159162306a36Sopenharmony_ci interruptibility |= GUEST_INTR_STATE_STI; 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci if ((interruptibility != interruptibility_old)) 159462306a36Sopenharmony_ci vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); 159562306a36Sopenharmony_ci} 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_cistatic int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) 159862306a36Sopenharmony_ci{ 159962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 160062306a36Sopenharmony_ci unsigned long value; 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci /* 160362306a36Sopenharmony_ci * Any MSR write that attempts to change bits marked reserved will 160462306a36Sopenharmony_ci * case a #GP fault. 160562306a36Sopenharmony_ci */ 160662306a36Sopenharmony_ci if (data & vmx->pt_desc.ctl_bitmask) 160762306a36Sopenharmony_ci return 1; 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci /* 161062306a36Sopenharmony_ci * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will 161162306a36Sopenharmony_ci * result in a #GP unless the same write also clears TraceEn. 161262306a36Sopenharmony_ci */ 161362306a36Sopenharmony_ci if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && 161462306a36Sopenharmony_ci ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) 161562306a36Sopenharmony_ci return 1; 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci /* 161862306a36Sopenharmony_ci * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit 161962306a36Sopenharmony_ci * and FabricEn would cause #GP, if 162062306a36Sopenharmony_ci * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0 162162306a36Sopenharmony_ci */ 162262306a36Sopenharmony_ci if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) && 162362306a36Sopenharmony_ci !(data & RTIT_CTL_FABRIC_EN) && 162462306a36Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 162562306a36Sopenharmony_ci PT_CAP_single_range_output)) 162662306a36Sopenharmony_ci return 1; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci /* 162962306a36Sopenharmony_ci * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that 163062306a36Sopenharmony_ci * utilize encodings marked reserved will cause a #GP fault. 163162306a36Sopenharmony_ci */ 163262306a36Sopenharmony_ci value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods); 163362306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) && 163462306a36Sopenharmony_ci !test_bit((data & RTIT_CTL_MTC_RANGE) >> 163562306a36Sopenharmony_ci RTIT_CTL_MTC_RANGE_OFFSET, &value)) 163662306a36Sopenharmony_ci return 1; 163762306a36Sopenharmony_ci value = intel_pt_validate_cap(vmx->pt_desc.caps, 163862306a36Sopenharmony_ci PT_CAP_cycle_thresholds); 163962306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && 164062306a36Sopenharmony_ci !test_bit((data & RTIT_CTL_CYC_THRESH) >> 164162306a36Sopenharmony_ci RTIT_CTL_CYC_THRESH_OFFSET, &value)) 164262306a36Sopenharmony_ci return 1; 164362306a36Sopenharmony_ci value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods); 164462306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && 164562306a36Sopenharmony_ci !test_bit((data & RTIT_CTL_PSB_FREQ) >> 164662306a36Sopenharmony_ci RTIT_CTL_PSB_FREQ_OFFSET, &value)) 164762306a36Sopenharmony_ci return 1; 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci /* 165062306a36Sopenharmony_ci * If ADDRx_CFG is reserved or the encodings is >2 will 165162306a36Sopenharmony_ci * cause a #GP fault. 165262306a36Sopenharmony_ci */ 165362306a36Sopenharmony_ci value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET; 165462306a36Sopenharmony_ci if ((value && (vmx->pt_desc.num_address_ranges < 1)) || (value > 2)) 165562306a36Sopenharmony_ci return 1; 165662306a36Sopenharmony_ci value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET; 165762306a36Sopenharmony_ci if ((value && (vmx->pt_desc.num_address_ranges < 2)) || (value > 2)) 165862306a36Sopenharmony_ci return 1; 165962306a36Sopenharmony_ci value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET; 166062306a36Sopenharmony_ci if ((value && (vmx->pt_desc.num_address_ranges < 3)) || (value > 2)) 166162306a36Sopenharmony_ci return 1; 166262306a36Sopenharmony_ci value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET; 166362306a36Sopenharmony_ci if ((value && (vmx->pt_desc.num_address_ranges < 4)) || (value > 2)) 166462306a36Sopenharmony_ci return 1; 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci return 0; 166762306a36Sopenharmony_ci} 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_cistatic bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, 167062306a36Sopenharmony_ci void *insn, int insn_len) 167162306a36Sopenharmony_ci{ 167262306a36Sopenharmony_ci /* 167362306a36Sopenharmony_ci * Emulation of instructions in SGX enclaves is impossible as RIP does 167462306a36Sopenharmony_ci * not point at the failing instruction, and even if it did, the code 167562306a36Sopenharmony_ci * stream is inaccessible. Inject #UD instead of exiting to userspace 167662306a36Sopenharmony_ci * so that guest userspace can't DoS the guest simply by triggering 167762306a36Sopenharmony_ci * emulation (enclaves are CPL3 only). 167862306a36Sopenharmony_ci */ 167962306a36Sopenharmony_ci if (to_vmx(vcpu)->exit_reason.enclave_mode) { 168062306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 168162306a36Sopenharmony_ci return false; 168262306a36Sopenharmony_ci } 168362306a36Sopenharmony_ci return true; 168462306a36Sopenharmony_ci} 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_cistatic int skip_emulated_instruction(struct kvm_vcpu *vcpu) 168762306a36Sopenharmony_ci{ 168862306a36Sopenharmony_ci union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason; 168962306a36Sopenharmony_ci unsigned long rip, orig_rip; 169062306a36Sopenharmony_ci u32 instr_len; 169162306a36Sopenharmony_ci 169262306a36Sopenharmony_ci /* 169362306a36Sopenharmony_ci * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on 169462306a36Sopenharmony_ci * undefined behavior: Intel's SDM doesn't mandate the VMCS field be 169562306a36Sopenharmony_ci * set when EPT misconfig occurs. In practice, real hardware updates 169662306a36Sopenharmony_ci * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors 169762306a36Sopenharmony_ci * (namely Hyper-V) don't set it due to it being undefined behavior, 169862306a36Sopenharmony_ci * i.e. we end up advancing IP with some random value. 169962306a36Sopenharmony_ci */ 170062306a36Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || 170162306a36Sopenharmony_ci exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { 170262306a36Sopenharmony_ci instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 170362306a36Sopenharmony_ci 170462306a36Sopenharmony_ci /* 170562306a36Sopenharmony_ci * Emulating an enclave's instructions isn't supported as KVM 170662306a36Sopenharmony_ci * cannot access the enclave's memory or its true RIP, e.g. the 170762306a36Sopenharmony_ci * vmcs.GUEST_RIP points at the exit point of the enclave, not 170862306a36Sopenharmony_ci * the RIP that actually triggered the VM-Exit. But, because 170962306a36Sopenharmony_ci * most instructions that cause VM-Exit will #UD in an enclave, 171062306a36Sopenharmony_ci * most instruction-based VM-Exits simply do not occur. 171162306a36Sopenharmony_ci * 171262306a36Sopenharmony_ci * There are a few exceptions, notably the debug instructions 171362306a36Sopenharmony_ci * INT1ICEBRK and INT3, as they are allowed in debug enclaves 171462306a36Sopenharmony_ci * and generate #DB/#BP as expected, which KVM might intercept. 171562306a36Sopenharmony_ci * But again, the CPU does the dirty work and saves an instr 171662306a36Sopenharmony_ci * length of zero so VMMs don't shoot themselves in the foot. 171762306a36Sopenharmony_ci * WARN if KVM tries to skip a non-zero length instruction on 171862306a36Sopenharmony_ci * a VM-Exit from an enclave. 171962306a36Sopenharmony_ci */ 172062306a36Sopenharmony_ci if (!instr_len) 172162306a36Sopenharmony_ci goto rip_updated; 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci WARN_ONCE(exit_reason.enclave_mode, 172462306a36Sopenharmony_ci "skipping instruction after SGX enclave VM-Exit"); 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci orig_rip = kvm_rip_read(vcpu); 172762306a36Sopenharmony_ci rip = orig_rip + instr_len; 172862306a36Sopenharmony_ci#ifdef CONFIG_X86_64 172962306a36Sopenharmony_ci /* 173062306a36Sopenharmony_ci * We need to mask out the high 32 bits of RIP if not in 64-bit 173162306a36Sopenharmony_ci * mode, but just finding out that we are in 64-bit mode is 173262306a36Sopenharmony_ci * quite expensive. Only do it if there was a carry. 173362306a36Sopenharmony_ci */ 173462306a36Sopenharmony_ci if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu)) 173562306a36Sopenharmony_ci rip = (u32)rip; 173662306a36Sopenharmony_ci#endif 173762306a36Sopenharmony_ci kvm_rip_write(vcpu, rip); 173862306a36Sopenharmony_ci } else { 173962306a36Sopenharmony_ci if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) 174062306a36Sopenharmony_ci return 0; 174162306a36Sopenharmony_ci } 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_cirip_updated: 174462306a36Sopenharmony_ci /* skipping an emulated instruction also counts */ 174562306a36Sopenharmony_ci vmx_set_interrupt_shadow(vcpu, 0); 174662306a36Sopenharmony_ci 174762306a36Sopenharmony_ci return 1; 174862306a36Sopenharmony_ci} 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci/* 175162306a36Sopenharmony_ci * Recognizes a pending MTF VM-exit and records the nested state for later 175262306a36Sopenharmony_ci * delivery. 175362306a36Sopenharmony_ci */ 175462306a36Sopenharmony_cistatic void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) 175562306a36Sopenharmony_ci{ 175662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 175762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci if (!is_guest_mode(vcpu)) 176062306a36Sopenharmony_ci return; 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci /* 176362306a36Sopenharmony_ci * Per the SDM, MTF takes priority over debug-trap exceptions besides 176462306a36Sopenharmony_ci * TSS T-bit traps and ICEBP (INT1). KVM doesn't emulate T-bit traps 176562306a36Sopenharmony_ci * or ICEBP (in the emulator proper), and skipping of ICEBP after an 176662306a36Sopenharmony_ci * intercepted #DB deliberately avoids single-step #DB and MTF updates 176762306a36Sopenharmony_ci * as ICEBP is higher priority than both. As instruction emulation is 176862306a36Sopenharmony_ci * completed at this point (i.e. KVM is at the instruction boundary), 176962306a36Sopenharmony_ci * any #DB exception pending delivery must be a debug-trap of lower 177062306a36Sopenharmony_ci * priority than MTF. Record the pending MTF state to be delivered in 177162306a36Sopenharmony_ci * vmx_check_nested_events(). 177262306a36Sopenharmony_ci */ 177362306a36Sopenharmony_ci if (nested_cpu_has_mtf(vmcs12) && 177462306a36Sopenharmony_ci (!vcpu->arch.exception.pending || 177562306a36Sopenharmony_ci vcpu->arch.exception.vector == DB_VECTOR) && 177662306a36Sopenharmony_ci (!vcpu->arch.exception_vmexit.pending || 177762306a36Sopenharmony_ci vcpu->arch.exception_vmexit.vector == DB_VECTOR)) { 177862306a36Sopenharmony_ci vmx->nested.mtf_pending = true; 177962306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 178062306a36Sopenharmony_ci } else { 178162306a36Sopenharmony_ci vmx->nested.mtf_pending = false; 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci} 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_cistatic int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) 178662306a36Sopenharmony_ci{ 178762306a36Sopenharmony_ci vmx_update_emulated_instruction(vcpu); 178862306a36Sopenharmony_ci return skip_emulated_instruction(vcpu); 178962306a36Sopenharmony_ci} 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_cistatic void vmx_clear_hlt(struct kvm_vcpu *vcpu) 179262306a36Sopenharmony_ci{ 179362306a36Sopenharmony_ci /* 179462306a36Sopenharmony_ci * Ensure that we clear the HLT state in the VMCS. We don't need to 179562306a36Sopenharmony_ci * explicitly skip the instruction because if the HLT state is set, 179662306a36Sopenharmony_ci * then the instruction is already executing and RIP has already been 179762306a36Sopenharmony_ci * advanced. 179862306a36Sopenharmony_ci */ 179962306a36Sopenharmony_ci if (kvm_hlt_in_guest(vcpu->kvm) && 180062306a36Sopenharmony_ci vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) 180162306a36Sopenharmony_ci vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); 180262306a36Sopenharmony_ci} 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_cistatic void vmx_inject_exception(struct kvm_vcpu *vcpu) 180562306a36Sopenharmony_ci{ 180662306a36Sopenharmony_ci struct kvm_queued_exception *ex = &vcpu->arch.exception; 180762306a36Sopenharmony_ci u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; 180862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci kvm_deliver_exception_payload(vcpu, ex); 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci if (ex->has_error_code) { 181362306a36Sopenharmony_ci /* 181462306a36Sopenharmony_ci * Despite the error code being architecturally defined as 32 181562306a36Sopenharmony_ci * bits, and the VMCS field being 32 bits, Intel CPUs and thus 181662306a36Sopenharmony_ci * VMX don't actually supporting setting bits 31:16. Hardware 181762306a36Sopenharmony_ci * will (should) never provide a bogus error code, but AMD CPUs 181862306a36Sopenharmony_ci * do generate error codes with bits 31:16 set, and so KVM's 181962306a36Sopenharmony_ci * ABI lets userspace shove in arbitrary 32-bit values. Drop 182062306a36Sopenharmony_ci * the upper bits to avoid VM-Fail, losing information that 182162306a36Sopenharmony_ci * does't really exist is preferable to killing the VM. 182262306a36Sopenharmony_ci */ 182362306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code); 182462306a36Sopenharmony_ci intr_info |= INTR_INFO_DELIVER_CODE_MASK; 182562306a36Sopenharmony_ci } 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci if (vmx->rmode.vm86_active) { 182862306a36Sopenharmony_ci int inc_eip = 0; 182962306a36Sopenharmony_ci if (kvm_exception_is_soft(ex->vector)) 183062306a36Sopenharmony_ci inc_eip = vcpu->arch.event_exit_inst_len; 183162306a36Sopenharmony_ci kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip); 183262306a36Sopenharmony_ci return; 183362306a36Sopenharmony_ci } 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci WARN_ON_ONCE(vmx->emulation_required); 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci if (kvm_exception_is_soft(ex->vector)) { 183862306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 183962306a36Sopenharmony_ci vmx->vcpu.arch.event_exit_inst_len); 184062306a36Sopenharmony_ci intr_info |= INTR_TYPE_SOFT_EXCEPTION; 184162306a36Sopenharmony_ci } else 184262306a36Sopenharmony_ci intr_info |= INTR_TYPE_HARD_EXCEPTION; 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 184562306a36Sopenharmony_ci 184662306a36Sopenharmony_ci vmx_clear_hlt(vcpu); 184762306a36Sopenharmony_ci} 184862306a36Sopenharmony_ci 184962306a36Sopenharmony_cistatic void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr, 185062306a36Sopenharmony_ci bool load_into_hardware) 185162306a36Sopenharmony_ci{ 185262306a36Sopenharmony_ci struct vmx_uret_msr *uret_msr; 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_ci uret_msr = vmx_find_uret_msr(vmx, msr); 185562306a36Sopenharmony_ci if (!uret_msr) 185662306a36Sopenharmony_ci return; 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_ci uret_msr->load_into_hardware = load_into_hardware; 185962306a36Sopenharmony_ci} 186062306a36Sopenharmony_ci 186162306a36Sopenharmony_ci/* 186262306a36Sopenharmony_ci * Configuring user return MSRs to automatically save, load, and restore MSRs 186362306a36Sopenharmony_ci * that need to be shoved into hardware when running the guest. Note, omitting 186462306a36Sopenharmony_ci * an MSR here does _NOT_ mean it's not emulated, only that it will not be 186562306a36Sopenharmony_ci * loaded into hardware when running the guest. 186662306a36Sopenharmony_ci */ 186762306a36Sopenharmony_cistatic void vmx_setup_uret_msrs(struct vcpu_vmx *vmx) 186862306a36Sopenharmony_ci{ 186962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 187062306a36Sopenharmony_ci bool load_syscall_msrs; 187162306a36Sopenharmony_ci 187262306a36Sopenharmony_ci /* 187362306a36Sopenharmony_ci * The SYSCALL MSRs are only needed on long mode guests, and only 187462306a36Sopenharmony_ci * when EFER.SCE is set. 187562306a36Sopenharmony_ci */ 187662306a36Sopenharmony_ci load_syscall_msrs = is_long_mode(&vmx->vcpu) && 187762306a36Sopenharmony_ci (vmx->vcpu.arch.efer & EFER_SCE); 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs); 188062306a36Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs); 188162306a36Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs); 188262306a36Sopenharmony_ci#endif 188362306a36Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx)); 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_TSC_AUX, 188662306a36Sopenharmony_ci guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || 188762306a36Sopenharmony_ci guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci /* 189062306a36Sopenharmony_ci * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new 189162306a36Sopenharmony_ci * kernel and old userspace. If those guests run on a tsx=off host, do 189262306a36Sopenharmony_ci * allow guests to use TSX_CTRL, but don't change the value in hardware 189362306a36Sopenharmony_ci * so that TSX remains always disabled. 189462306a36Sopenharmony_ci */ 189562306a36Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM)); 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci /* 189862306a36Sopenharmony_ci * The set of MSRs to load may have changed, reload MSRs before the 189962306a36Sopenharmony_ci * next VM-Enter. 190062306a36Sopenharmony_ci */ 190162306a36Sopenharmony_ci vmx->guest_uret_msrs_loaded = false; 190262306a36Sopenharmony_ci} 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ciu64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu) 190562306a36Sopenharmony_ci{ 190662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) 190962306a36Sopenharmony_ci return vmcs12->tsc_offset; 191062306a36Sopenharmony_ci 191162306a36Sopenharmony_ci return 0; 191262306a36Sopenharmony_ci} 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ciu64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) 191562306a36Sopenharmony_ci{ 191662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING) && 191962306a36Sopenharmony_ci nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING)) 192062306a36Sopenharmony_ci return vmcs12->tsc_multiplier; 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ci return kvm_caps.default_tsc_scaling_ratio; 192362306a36Sopenharmony_ci} 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_cistatic void vmx_write_tsc_offset(struct kvm_vcpu *vcpu) 192662306a36Sopenharmony_ci{ 192762306a36Sopenharmony_ci vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); 192862306a36Sopenharmony_ci} 192962306a36Sopenharmony_ci 193062306a36Sopenharmony_cistatic void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu) 193162306a36Sopenharmony_ci{ 193262306a36Sopenharmony_ci vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); 193362306a36Sopenharmony_ci} 193462306a36Sopenharmony_ci 193562306a36Sopenharmony_ci/* 193662306a36Sopenharmony_ci * Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of 193762306a36Sopenharmony_ci * guest CPUID. Note, KVM allows userspace to set "VMX in SMX" to maintain 193862306a36Sopenharmony_ci * backwards compatibility even though KVM doesn't support emulating SMX. And 193962306a36Sopenharmony_ci * because userspace set "VMX in SMX", the guest must also be allowed to set it, 194062306a36Sopenharmony_ci * e.g. if the MSR is left unlocked and the guest does a RMW operation. 194162306a36Sopenharmony_ci */ 194262306a36Sopenharmony_ci#define KVM_SUPPORTED_FEATURE_CONTROL (FEAT_CTL_LOCKED | \ 194362306a36Sopenharmony_ci FEAT_CTL_VMX_ENABLED_INSIDE_SMX | \ 194462306a36Sopenharmony_ci FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | \ 194562306a36Sopenharmony_ci FEAT_CTL_SGX_LC_ENABLED | \ 194662306a36Sopenharmony_ci FEAT_CTL_SGX_ENABLED | \ 194762306a36Sopenharmony_ci FEAT_CTL_LMCE_ENABLED) 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_cistatic inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, 195062306a36Sopenharmony_ci struct msr_data *msr) 195162306a36Sopenharmony_ci{ 195262306a36Sopenharmony_ci uint64_t valid_bits; 195362306a36Sopenharmony_ci 195462306a36Sopenharmony_ci /* 195562306a36Sopenharmony_ci * Ensure KVM_SUPPORTED_FEATURE_CONTROL is updated when new bits are 195662306a36Sopenharmony_ci * exposed to the guest. 195762306a36Sopenharmony_ci */ 195862306a36Sopenharmony_ci WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits & 195962306a36Sopenharmony_ci ~KVM_SUPPORTED_FEATURE_CONTROL); 196062306a36Sopenharmony_ci 196162306a36Sopenharmony_ci if (!msr->host_initiated && 196262306a36Sopenharmony_ci (vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED)) 196362306a36Sopenharmony_ci return false; 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_ci if (msr->host_initiated) 196662306a36Sopenharmony_ci valid_bits = KVM_SUPPORTED_FEATURE_CONTROL; 196762306a36Sopenharmony_ci else 196862306a36Sopenharmony_ci valid_bits = vmx->msr_ia32_feature_control_valid_bits; 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci return !(msr->data & ~valid_bits); 197162306a36Sopenharmony_ci} 197262306a36Sopenharmony_ci 197362306a36Sopenharmony_cistatic int vmx_get_msr_feature(struct kvm_msr_entry *msr) 197462306a36Sopenharmony_ci{ 197562306a36Sopenharmony_ci switch (msr->index) { 197662306a36Sopenharmony_ci case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: 197762306a36Sopenharmony_ci if (!nested) 197862306a36Sopenharmony_ci return 1; 197962306a36Sopenharmony_ci return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); 198062306a36Sopenharmony_ci default: 198162306a36Sopenharmony_ci return KVM_MSR_RET_INVALID; 198262306a36Sopenharmony_ci } 198362306a36Sopenharmony_ci} 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci/* 198662306a36Sopenharmony_ci * Reads an msr value (of 'msr_info->index') into 'msr_info->data'. 198762306a36Sopenharmony_ci * Returns 0 on success, non-0 otherwise. 198862306a36Sopenharmony_ci * Assumes vcpu_load() was already called. 198962306a36Sopenharmony_ci */ 199062306a36Sopenharmony_cistatic int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 199162306a36Sopenharmony_ci{ 199262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 199362306a36Sopenharmony_ci struct vmx_uret_msr *msr; 199462306a36Sopenharmony_ci u32 index; 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci switch (msr_info->index) { 199762306a36Sopenharmony_ci#ifdef CONFIG_X86_64 199862306a36Sopenharmony_ci case MSR_FS_BASE: 199962306a36Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_FS_BASE); 200062306a36Sopenharmony_ci break; 200162306a36Sopenharmony_ci case MSR_GS_BASE: 200262306a36Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_GS_BASE); 200362306a36Sopenharmony_ci break; 200462306a36Sopenharmony_ci case MSR_KERNEL_GS_BASE: 200562306a36Sopenharmony_ci msr_info->data = vmx_read_guest_kernel_gs_base(vmx); 200662306a36Sopenharmony_ci break; 200762306a36Sopenharmony_ci#endif 200862306a36Sopenharmony_ci case MSR_EFER: 200962306a36Sopenharmony_ci return kvm_get_msr_common(vcpu, msr_info); 201062306a36Sopenharmony_ci case MSR_IA32_TSX_CTRL: 201162306a36Sopenharmony_ci if (!msr_info->host_initiated && 201262306a36Sopenharmony_ci !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) 201362306a36Sopenharmony_ci return 1; 201462306a36Sopenharmony_ci goto find_uret_msr; 201562306a36Sopenharmony_ci case MSR_IA32_UMWAIT_CONTROL: 201662306a36Sopenharmony_ci if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) 201762306a36Sopenharmony_ci return 1; 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci msr_info->data = vmx->msr_ia32_umwait_control; 202062306a36Sopenharmony_ci break; 202162306a36Sopenharmony_ci case MSR_IA32_SPEC_CTRL: 202262306a36Sopenharmony_ci if (!msr_info->host_initiated && 202362306a36Sopenharmony_ci !guest_has_spec_ctrl_msr(vcpu)) 202462306a36Sopenharmony_ci return 1; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci msr_info->data = to_vmx(vcpu)->spec_ctrl; 202762306a36Sopenharmony_ci break; 202862306a36Sopenharmony_ci case MSR_IA32_SYSENTER_CS: 202962306a36Sopenharmony_ci msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); 203062306a36Sopenharmony_ci break; 203162306a36Sopenharmony_ci case MSR_IA32_SYSENTER_EIP: 203262306a36Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); 203362306a36Sopenharmony_ci break; 203462306a36Sopenharmony_ci case MSR_IA32_SYSENTER_ESP: 203562306a36Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); 203662306a36Sopenharmony_ci break; 203762306a36Sopenharmony_ci case MSR_IA32_BNDCFGS: 203862306a36Sopenharmony_ci if (!kvm_mpx_supported() || 203962306a36Sopenharmony_ci (!msr_info->host_initiated && 204062306a36Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) 204162306a36Sopenharmony_ci return 1; 204262306a36Sopenharmony_ci msr_info->data = vmcs_read64(GUEST_BNDCFGS); 204362306a36Sopenharmony_ci break; 204462306a36Sopenharmony_ci case MSR_IA32_MCG_EXT_CTL: 204562306a36Sopenharmony_ci if (!msr_info->host_initiated && 204662306a36Sopenharmony_ci !(vmx->msr_ia32_feature_control & 204762306a36Sopenharmony_ci FEAT_CTL_LMCE_ENABLED)) 204862306a36Sopenharmony_ci return 1; 204962306a36Sopenharmony_ci msr_info->data = vcpu->arch.mcg_ext_ctl; 205062306a36Sopenharmony_ci break; 205162306a36Sopenharmony_ci case MSR_IA32_FEAT_CTL: 205262306a36Sopenharmony_ci msr_info->data = vmx->msr_ia32_feature_control; 205362306a36Sopenharmony_ci break; 205462306a36Sopenharmony_ci case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: 205562306a36Sopenharmony_ci if (!msr_info->host_initiated && 205662306a36Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC)) 205762306a36Sopenharmony_ci return 1; 205862306a36Sopenharmony_ci msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash 205962306a36Sopenharmony_ci [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; 206062306a36Sopenharmony_ci break; 206162306a36Sopenharmony_ci case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: 206262306a36Sopenharmony_ci if (!guest_can_use(vcpu, X86_FEATURE_VMX)) 206362306a36Sopenharmony_ci return 1; 206462306a36Sopenharmony_ci if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, 206562306a36Sopenharmony_ci &msr_info->data)) 206662306a36Sopenharmony_ci return 1; 206762306a36Sopenharmony_ci /* 206862306a36Sopenharmony_ci * Enlightened VMCS v1 doesn't have certain VMCS fields but 206962306a36Sopenharmony_ci * instead of just ignoring the features, different Hyper-V 207062306a36Sopenharmony_ci * versions are either trying to use them and fail or do some 207162306a36Sopenharmony_ci * sanity checking and refuse to boot. Filter all unsupported 207262306a36Sopenharmony_ci * features out. 207362306a36Sopenharmony_ci */ 207462306a36Sopenharmony_ci if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu)) 207562306a36Sopenharmony_ci nested_evmcs_filter_control_msr(vcpu, msr_info->index, 207662306a36Sopenharmony_ci &msr_info->data); 207762306a36Sopenharmony_ci break; 207862306a36Sopenharmony_ci case MSR_IA32_RTIT_CTL: 207962306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest()) 208062306a36Sopenharmony_ci return 1; 208162306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.ctl; 208262306a36Sopenharmony_ci break; 208362306a36Sopenharmony_ci case MSR_IA32_RTIT_STATUS: 208462306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest()) 208562306a36Sopenharmony_ci return 1; 208662306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.status; 208762306a36Sopenharmony_ci break; 208862306a36Sopenharmony_ci case MSR_IA32_RTIT_CR3_MATCH: 208962306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 209062306a36Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 209162306a36Sopenharmony_ci PT_CAP_cr3_filtering)) 209262306a36Sopenharmony_ci return 1; 209362306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.cr3_match; 209462306a36Sopenharmony_ci break; 209562306a36Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_BASE: 209662306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 209762306a36Sopenharmony_ci (!intel_pt_validate_cap(vmx->pt_desc.caps, 209862306a36Sopenharmony_ci PT_CAP_topa_output) && 209962306a36Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 210062306a36Sopenharmony_ci PT_CAP_single_range_output))) 210162306a36Sopenharmony_ci return 1; 210262306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.output_base; 210362306a36Sopenharmony_ci break; 210462306a36Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_MASK: 210562306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 210662306a36Sopenharmony_ci (!intel_pt_validate_cap(vmx->pt_desc.caps, 210762306a36Sopenharmony_ci PT_CAP_topa_output) && 210862306a36Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 210962306a36Sopenharmony_ci PT_CAP_single_range_output))) 211062306a36Sopenharmony_ci return 1; 211162306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.output_mask; 211262306a36Sopenharmony_ci break; 211362306a36Sopenharmony_ci case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 211462306a36Sopenharmony_ci index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; 211562306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 211662306a36Sopenharmony_ci (index >= 2 * vmx->pt_desc.num_address_ranges)) 211762306a36Sopenharmony_ci return 1; 211862306a36Sopenharmony_ci if (index % 2) 211962306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.addr_b[index / 2]; 212062306a36Sopenharmony_ci else 212162306a36Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; 212262306a36Sopenharmony_ci break; 212362306a36Sopenharmony_ci case MSR_IA32_DEBUGCTLMSR: 212462306a36Sopenharmony_ci msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); 212562306a36Sopenharmony_ci break; 212662306a36Sopenharmony_ci default: 212762306a36Sopenharmony_ci find_uret_msr: 212862306a36Sopenharmony_ci msr = vmx_find_uret_msr(vmx, msr_info->index); 212962306a36Sopenharmony_ci if (msr) { 213062306a36Sopenharmony_ci msr_info->data = msr->data; 213162306a36Sopenharmony_ci break; 213262306a36Sopenharmony_ci } 213362306a36Sopenharmony_ci return kvm_get_msr_common(vcpu, msr_info); 213462306a36Sopenharmony_ci } 213562306a36Sopenharmony_ci 213662306a36Sopenharmony_ci return 0; 213762306a36Sopenharmony_ci} 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_cistatic u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, 214062306a36Sopenharmony_ci u64 data) 214162306a36Sopenharmony_ci{ 214262306a36Sopenharmony_ci#ifdef CONFIG_X86_64 214362306a36Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) 214462306a36Sopenharmony_ci return (u32)data; 214562306a36Sopenharmony_ci#endif 214662306a36Sopenharmony_ci return (unsigned long)data; 214762306a36Sopenharmony_ci} 214862306a36Sopenharmony_ci 214962306a36Sopenharmony_cistatic u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) 215062306a36Sopenharmony_ci{ 215162306a36Sopenharmony_ci u64 debugctl = 0; 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && 215462306a36Sopenharmony_ci (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) 215562306a36Sopenharmony_ci debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) && 215862306a36Sopenharmony_ci (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) 215962306a36Sopenharmony_ci debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci return debugctl; 216262306a36Sopenharmony_ci} 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci/* 216562306a36Sopenharmony_ci * Writes msr value into the appropriate "register". 216662306a36Sopenharmony_ci * Returns 0 on success, non-0 otherwise. 216762306a36Sopenharmony_ci * Assumes vcpu_load() was already called. 216862306a36Sopenharmony_ci */ 216962306a36Sopenharmony_cistatic int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 217062306a36Sopenharmony_ci{ 217162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 217262306a36Sopenharmony_ci struct vmx_uret_msr *msr; 217362306a36Sopenharmony_ci int ret = 0; 217462306a36Sopenharmony_ci u32 msr_index = msr_info->index; 217562306a36Sopenharmony_ci u64 data = msr_info->data; 217662306a36Sopenharmony_ci u32 index; 217762306a36Sopenharmony_ci 217862306a36Sopenharmony_ci switch (msr_index) { 217962306a36Sopenharmony_ci case MSR_EFER: 218062306a36Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 218162306a36Sopenharmony_ci break; 218262306a36Sopenharmony_ci#ifdef CONFIG_X86_64 218362306a36Sopenharmony_ci case MSR_FS_BASE: 218462306a36Sopenharmony_ci vmx_segment_cache_clear(vmx); 218562306a36Sopenharmony_ci vmcs_writel(GUEST_FS_BASE, data); 218662306a36Sopenharmony_ci break; 218762306a36Sopenharmony_ci case MSR_GS_BASE: 218862306a36Sopenharmony_ci vmx_segment_cache_clear(vmx); 218962306a36Sopenharmony_ci vmcs_writel(GUEST_GS_BASE, data); 219062306a36Sopenharmony_ci break; 219162306a36Sopenharmony_ci case MSR_KERNEL_GS_BASE: 219262306a36Sopenharmony_ci vmx_write_guest_kernel_gs_base(vmx, data); 219362306a36Sopenharmony_ci break; 219462306a36Sopenharmony_ci case MSR_IA32_XFD: 219562306a36Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 219662306a36Sopenharmony_ci /* 219762306a36Sopenharmony_ci * Always intercepting WRMSR could incur non-negligible 219862306a36Sopenharmony_ci * overhead given xfd might be changed frequently in 219962306a36Sopenharmony_ci * guest context switch. Disable write interception 220062306a36Sopenharmony_ci * upon the first write with a non-zero value (indicating 220162306a36Sopenharmony_ci * potential usage on dynamic xfeatures). Also update 220262306a36Sopenharmony_ci * exception bitmap to trap #NM for proper virtualization 220362306a36Sopenharmony_ci * of guest xfd_err. 220462306a36Sopenharmony_ci */ 220562306a36Sopenharmony_ci if (!ret && data) { 220662306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, 220762306a36Sopenharmony_ci MSR_TYPE_RW); 220862306a36Sopenharmony_ci vcpu->arch.xfd_no_write_intercept = true; 220962306a36Sopenharmony_ci vmx_update_exception_bitmap(vcpu); 221062306a36Sopenharmony_ci } 221162306a36Sopenharmony_ci break; 221262306a36Sopenharmony_ci#endif 221362306a36Sopenharmony_ci case MSR_IA32_SYSENTER_CS: 221462306a36Sopenharmony_ci if (is_guest_mode(vcpu)) 221562306a36Sopenharmony_ci get_vmcs12(vcpu)->guest_sysenter_cs = data; 221662306a36Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, data); 221762306a36Sopenharmony_ci break; 221862306a36Sopenharmony_ci case MSR_IA32_SYSENTER_EIP: 221962306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 222062306a36Sopenharmony_ci data = nested_vmx_truncate_sysenter_addr(vcpu, data); 222162306a36Sopenharmony_ci get_vmcs12(vcpu)->guest_sysenter_eip = data; 222262306a36Sopenharmony_ci } 222362306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, data); 222462306a36Sopenharmony_ci break; 222562306a36Sopenharmony_ci case MSR_IA32_SYSENTER_ESP: 222662306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 222762306a36Sopenharmony_ci data = nested_vmx_truncate_sysenter_addr(vcpu, data); 222862306a36Sopenharmony_ci get_vmcs12(vcpu)->guest_sysenter_esp = data; 222962306a36Sopenharmony_ci } 223062306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, data); 223162306a36Sopenharmony_ci break; 223262306a36Sopenharmony_ci case MSR_IA32_DEBUGCTLMSR: { 223362306a36Sopenharmony_ci u64 invalid; 223462306a36Sopenharmony_ci 223562306a36Sopenharmony_ci invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated); 223662306a36Sopenharmony_ci if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { 223762306a36Sopenharmony_ci kvm_pr_unimpl_wrmsr(vcpu, msr_index, data); 223862306a36Sopenharmony_ci data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); 223962306a36Sopenharmony_ci invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); 224062306a36Sopenharmony_ci } 224162306a36Sopenharmony_ci 224262306a36Sopenharmony_ci if (invalid) 224362306a36Sopenharmony_ci return 1; 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & 224662306a36Sopenharmony_ci VM_EXIT_SAVE_DEBUG_CONTROLS) 224762306a36Sopenharmony_ci get_vmcs12(vcpu)->guest_ia32_debugctl = data; 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, data); 225062306a36Sopenharmony_ci if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && 225162306a36Sopenharmony_ci (data & DEBUGCTLMSR_LBR)) 225262306a36Sopenharmony_ci intel_pmu_create_guest_lbr_event(vcpu); 225362306a36Sopenharmony_ci return 0; 225462306a36Sopenharmony_ci } 225562306a36Sopenharmony_ci case MSR_IA32_BNDCFGS: 225662306a36Sopenharmony_ci if (!kvm_mpx_supported() || 225762306a36Sopenharmony_ci (!msr_info->host_initiated && 225862306a36Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) 225962306a36Sopenharmony_ci return 1; 226062306a36Sopenharmony_ci if (is_noncanonical_address(data & PAGE_MASK, vcpu) || 226162306a36Sopenharmony_ci (data & MSR_IA32_BNDCFGS_RSVD)) 226262306a36Sopenharmony_ci return 1; 226362306a36Sopenharmony_ci 226462306a36Sopenharmony_ci if (is_guest_mode(vcpu) && 226562306a36Sopenharmony_ci ((vmx->nested.msrs.entry_ctls_high & VM_ENTRY_LOAD_BNDCFGS) || 226662306a36Sopenharmony_ci (vmx->nested.msrs.exit_ctls_high & VM_EXIT_CLEAR_BNDCFGS))) 226762306a36Sopenharmony_ci get_vmcs12(vcpu)->guest_bndcfgs = data; 226862306a36Sopenharmony_ci 226962306a36Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, data); 227062306a36Sopenharmony_ci break; 227162306a36Sopenharmony_ci case MSR_IA32_UMWAIT_CONTROL: 227262306a36Sopenharmony_ci if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) 227362306a36Sopenharmony_ci return 1; 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci /* The reserved bit 1 and non-32 bit [63:32] should be zero */ 227662306a36Sopenharmony_ci if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) 227762306a36Sopenharmony_ci return 1; 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci vmx->msr_ia32_umwait_control = data; 228062306a36Sopenharmony_ci break; 228162306a36Sopenharmony_ci case MSR_IA32_SPEC_CTRL: 228262306a36Sopenharmony_ci if (!msr_info->host_initiated && 228362306a36Sopenharmony_ci !guest_has_spec_ctrl_msr(vcpu)) 228462306a36Sopenharmony_ci return 1; 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci if (kvm_spec_ctrl_test_value(data)) 228762306a36Sopenharmony_ci return 1; 228862306a36Sopenharmony_ci 228962306a36Sopenharmony_ci vmx->spec_ctrl = data; 229062306a36Sopenharmony_ci if (!data) 229162306a36Sopenharmony_ci break; 229262306a36Sopenharmony_ci 229362306a36Sopenharmony_ci /* 229462306a36Sopenharmony_ci * For non-nested: 229562306a36Sopenharmony_ci * When it's written (to non-zero) for the first time, pass 229662306a36Sopenharmony_ci * it through. 229762306a36Sopenharmony_ci * 229862306a36Sopenharmony_ci * For nested: 229962306a36Sopenharmony_ci * The handling of the MSR bitmap for L2 guests is done in 230062306a36Sopenharmony_ci * nested_vmx_prepare_msr_bitmap. We should not touch the 230162306a36Sopenharmony_ci * vmcs02.msr_bitmap here since it gets completely overwritten 230262306a36Sopenharmony_ci * in the merging. We update the vmcs01 here for L1 as well 230362306a36Sopenharmony_ci * since it will end up touching the MSR anyway now. 230462306a36Sopenharmony_ci */ 230562306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, 230662306a36Sopenharmony_ci MSR_IA32_SPEC_CTRL, 230762306a36Sopenharmony_ci MSR_TYPE_RW); 230862306a36Sopenharmony_ci break; 230962306a36Sopenharmony_ci case MSR_IA32_TSX_CTRL: 231062306a36Sopenharmony_ci if (!msr_info->host_initiated && 231162306a36Sopenharmony_ci !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) 231262306a36Sopenharmony_ci return 1; 231362306a36Sopenharmony_ci if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) 231462306a36Sopenharmony_ci return 1; 231562306a36Sopenharmony_ci goto find_uret_msr; 231662306a36Sopenharmony_ci case MSR_IA32_CR_PAT: 231762306a36Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 231862306a36Sopenharmony_ci if (ret) 231962306a36Sopenharmony_ci break; 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci if (is_guest_mode(vcpu) && 232262306a36Sopenharmony_ci get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 232362306a36Sopenharmony_ci get_vmcs12(vcpu)->guest_ia32_pat = data; 232462306a36Sopenharmony_ci 232562306a36Sopenharmony_ci if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) 232662306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, data); 232762306a36Sopenharmony_ci break; 232862306a36Sopenharmony_ci case MSR_IA32_MCG_EXT_CTL: 232962306a36Sopenharmony_ci if ((!msr_info->host_initiated && 233062306a36Sopenharmony_ci !(to_vmx(vcpu)->msr_ia32_feature_control & 233162306a36Sopenharmony_ci FEAT_CTL_LMCE_ENABLED)) || 233262306a36Sopenharmony_ci (data & ~MCG_EXT_CTL_LMCE_EN)) 233362306a36Sopenharmony_ci return 1; 233462306a36Sopenharmony_ci vcpu->arch.mcg_ext_ctl = data; 233562306a36Sopenharmony_ci break; 233662306a36Sopenharmony_ci case MSR_IA32_FEAT_CTL: 233762306a36Sopenharmony_ci if (!is_vmx_feature_control_msr_valid(vmx, msr_info)) 233862306a36Sopenharmony_ci return 1; 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci vmx->msr_ia32_feature_control = data; 234162306a36Sopenharmony_ci if (msr_info->host_initiated && data == 0) 234262306a36Sopenharmony_ci vmx_leave_nested(vcpu); 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci /* SGX may be enabled/disabled by guest's firmware */ 234562306a36Sopenharmony_ci vmx_write_encls_bitmap(vcpu, NULL); 234662306a36Sopenharmony_ci break; 234762306a36Sopenharmony_ci case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: 234862306a36Sopenharmony_ci /* 234962306a36Sopenharmony_ci * On real hardware, the LE hash MSRs are writable before 235062306a36Sopenharmony_ci * the firmware sets bit 0 in MSR 0x7a ("activating" SGX), 235162306a36Sopenharmony_ci * at which point SGX related bits in IA32_FEATURE_CONTROL 235262306a36Sopenharmony_ci * become writable. 235362306a36Sopenharmony_ci * 235462306a36Sopenharmony_ci * KVM does not emulate SGX activation for simplicity, so 235562306a36Sopenharmony_ci * allow writes to the LE hash MSRs if IA32_FEATURE_CONTROL 235662306a36Sopenharmony_ci * is unlocked. This is technically not architectural 235762306a36Sopenharmony_ci * behavior, but it's close enough. 235862306a36Sopenharmony_ci */ 235962306a36Sopenharmony_ci if (!msr_info->host_initiated && 236062306a36Sopenharmony_ci (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) || 236162306a36Sopenharmony_ci ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) && 236262306a36Sopenharmony_ci !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED)))) 236362306a36Sopenharmony_ci return 1; 236462306a36Sopenharmony_ci vmx->msr_ia32_sgxlepubkeyhash 236562306a36Sopenharmony_ci [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data; 236662306a36Sopenharmony_ci break; 236762306a36Sopenharmony_ci case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: 236862306a36Sopenharmony_ci if (!msr_info->host_initiated) 236962306a36Sopenharmony_ci return 1; /* they are read-only */ 237062306a36Sopenharmony_ci if (!guest_can_use(vcpu, X86_FEATURE_VMX)) 237162306a36Sopenharmony_ci return 1; 237262306a36Sopenharmony_ci return vmx_set_vmx_msr(vcpu, msr_index, data); 237362306a36Sopenharmony_ci case MSR_IA32_RTIT_CTL: 237462306a36Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 237562306a36Sopenharmony_ci vmx_rtit_ctl_check(vcpu, data) || 237662306a36Sopenharmony_ci vmx->nested.vmxon) 237762306a36Sopenharmony_ci return 1; 237862306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_RTIT_CTL, data); 237962306a36Sopenharmony_ci vmx->pt_desc.guest.ctl = data; 238062306a36Sopenharmony_ci pt_update_intercept_for_msr(vcpu); 238162306a36Sopenharmony_ci break; 238262306a36Sopenharmony_ci case MSR_IA32_RTIT_STATUS: 238362306a36Sopenharmony_ci if (!pt_can_write_msr(vmx)) 238462306a36Sopenharmony_ci return 1; 238562306a36Sopenharmony_ci if (data & MSR_IA32_RTIT_STATUS_MASK) 238662306a36Sopenharmony_ci return 1; 238762306a36Sopenharmony_ci vmx->pt_desc.guest.status = data; 238862306a36Sopenharmony_ci break; 238962306a36Sopenharmony_ci case MSR_IA32_RTIT_CR3_MATCH: 239062306a36Sopenharmony_ci if (!pt_can_write_msr(vmx)) 239162306a36Sopenharmony_ci return 1; 239262306a36Sopenharmony_ci if (!intel_pt_validate_cap(vmx->pt_desc.caps, 239362306a36Sopenharmony_ci PT_CAP_cr3_filtering)) 239462306a36Sopenharmony_ci return 1; 239562306a36Sopenharmony_ci vmx->pt_desc.guest.cr3_match = data; 239662306a36Sopenharmony_ci break; 239762306a36Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_BASE: 239862306a36Sopenharmony_ci if (!pt_can_write_msr(vmx)) 239962306a36Sopenharmony_ci return 1; 240062306a36Sopenharmony_ci if (!intel_pt_validate_cap(vmx->pt_desc.caps, 240162306a36Sopenharmony_ci PT_CAP_topa_output) && 240262306a36Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 240362306a36Sopenharmony_ci PT_CAP_single_range_output)) 240462306a36Sopenharmony_ci return 1; 240562306a36Sopenharmony_ci if (!pt_output_base_valid(vcpu, data)) 240662306a36Sopenharmony_ci return 1; 240762306a36Sopenharmony_ci vmx->pt_desc.guest.output_base = data; 240862306a36Sopenharmony_ci break; 240962306a36Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_MASK: 241062306a36Sopenharmony_ci if (!pt_can_write_msr(vmx)) 241162306a36Sopenharmony_ci return 1; 241262306a36Sopenharmony_ci if (!intel_pt_validate_cap(vmx->pt_desc.caps, 241362306a36Sopenharmony_ci PT_CAP_topa_output) && 241462306a36Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 241562306a36Sopenharmony_ci PT_CAP_single_range_output)) 241662306a36Sopenharmony_ci return 1; 241762306a36Sopenharmony_ci vmx->pt_desc.guest.output_mask = data; 241862306a36Sopenharmony_ci break; 241962306a36Sopenharmony_ci case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 242062306a36Sopenharmony_ci if (!pt_can_write_msr(vmx)) 242162306a36Sopenharmony_ci return 1; 242262306a36Sopenharmony_ci index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; 242362306a36Sopenharmony_ci if (index >= 2 * vmx->pt_desc.num_address_ranges) 242462306a36Sopenharmony_ci return 1; 242562306a36Sopenharmony_ci if (is_noncanonical_address(data, vcpu)) 242662306a36Sopenharmony_ci return 1; 242762306a36Sopenharmony_ci if (index % 2) 242862306a36Sopenharmony_ci vmx->pt_desc.guest.addr_b[index / 2] = data; 242962306a36Sopenharmony_ci else 243062306a36Sopenharmony_ci vmx->pt_desc.guest.addr_a[index / 2] = data; 243162306a36Sopenharmony_ci break; 243262306a36Sopenharmony_ci case MSR_IA32_PERF_CAPABILITIES: 243362306a36Sopenharmony_ci if (data && !vcpu_to_pmu(vcpu)->version) 243462306a36Sopenharmony_ci return 1; 243562306a36Sopenharmony_ci if (data & PMU_CAP_LBR_FMT) { 243662306a36Sopenharmony_ci if ((data & PMU_CAP_LBR_FMT) != 243762306a36Sopenharmony_ci (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT)) 243862306a36Sopenharmony_ci return 1; 243962306a36Sopenharmony_ci if (!cpuid_model_is_consistent(vcpu)) 244062306a36Sopenharmony_ci return 1; 244162306a36Sopenharmony_ci } 244262306a36Sopenharmony_ci if (data & PERF_CAP_PEBS_FORMAT) { 244362306a36Sopenharmony_ci if ((data & PERF_CAP_PEBS_MASK) != 244462306a36Sopenharmony_ci (kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK)) 244562306a36Sopenharmony_ci return 1; 244662306a36Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_DS)) 244762306a36Sopenharmony_ci return 1; 244862306a36Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64)) 244962306a36Sopenharmony_ci return 1; 245062306a36Sopenharmony_ci if (!cpuid_model_is_consistent(vcpu)) 245162306a36Sopenharmony_ci return 1; 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 245462306a36Sopenharmony_ci break; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci default: 245762306a36Sopenharmony_ci find_uret_msr: 245862306a36Sopenharmony_ci msr = vmx_find_uret_msr(vmx, msr_index); 245962306a36Sopenharmony_ci if (msr) 246062306a36Sopenharmony_ci ret = vmx_set_guest_uret_msr(vmx, msr, data); 246162306a36Sopenharmony_ci else 246262306a36Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 246362306a36Sopenharmony_ci } 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ 246662306a36Sopenharmony_ci if (msr_index == MSR_IA32_ARCH_CAPABILITIES) 246762306a36Sopenharmony_ci vmx_update_fb_clear_dis(vcpu, vmx); 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci return ret; 247062306a36Sopenharmony_ci} 247162306a36Sopenharmony_ci 247262306a36Sopenharmony_cistatic void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 247362306a36Sopenharmony_ci{ 247462306a36Sopenharmony_ci unsigned long guest_owned_bits; 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci kvm_register_mark_available(vcpu, reg); 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci switch (reg) { 247962306a36Sopenharmony_ci case VCPU_REGS_RSP: 248062306a36Sopenharmony_ci vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); 248162306a36Sopenharmony_ci break; 248262306a36Sopenharmony_ci case VCPU_REGS_RIP: 248362306a36Sopenharmony_ci vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); 248462306a36Sopenharmony_ci break; 248562306a36Sopenharmony_ci case VCPU_EXREG_PDPTR: 248662306a36Sopenharmony_ci if (enable_ept) 248762306a36Sopenharmony_ci ept_save_pdptrs(vcpu); 248862306a36Sopenharmony_ci break; 248962306a36Sopenharmony_ci case VCPU_EXREG_CR0: 249062306a36Sopenharmony_ci guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; 249162306a36Sopenharmony_ci 249262306a36Sopenharmony_ci vcpu->arch.cr0 &= ~guest_owned_bits; 249362306a36Sopenharmony_ci vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits; 249462306a36Sopenharmony_ci break; 249562306a36Sopenharmony_ci case VCPU_EXREG_CR3: 249662306a36Sopenharmony_ci /* 249762306a36Sopenharmony_ci * When intercepting CR3 loads, e.g. for shadowing paging, KVM's 249862306a36Sopenharmony_ci * CR3 is loaded into hardware, not the guest's CR3. 249962306a36Sopenharmony_ci */ 250062306a36Sopenharmony_ci if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING)) 250162306a36Sopenharmony_ci vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 250262306a36Sopenharmony_ci break; 250362306a36Sopenharmony_ci case VCPU_EXREG_CR4: 250462306a36Sopenharmony_ci guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; 250562306a36Sopenharmony_ci 250662306a36Sopenharmony_ci vcpu->arch.cr4 &= ~guest_owned_bits; 250762306a36Sopenharmony_ci vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits; 250862306a36Sopenharmony_ci break; 250962306a36Sopenharmony_ci default: 251062306a36Sopenharmony_ci KVM_BUG_ON(1, vcpu->kvm); 251162306a36Sopenharmony_ci break; 251262306a36Sopenharmony_ci } 251362306a36Sopenharmony_ci} 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci/* 251662306a36Sopenharmony_ci * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID 251762306a36Sopenharmony_ci * directly instead of going through cpu_has(), to ensure KVM is trapping 251862306a36Sopenharmony_ci * ENCLS whenever it's supported in hardware. It does not matter whether 251962306a36Sopenharmony_ci * the host OS supports or has enabled SGX. 252062306a36Sopenharmony_ci */ 252162306a36Sopenharmony_cistatic bool cpu_has_sgx(void) 252262306a36Sopenharmony_ci{ 252362306a36Sopenharmony_ci return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0)); 252462306a36Sopenharmony_ci} 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci/* 252762306a36Sopenharmony_ci * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they 252862306a36Sopenharmony_ci * can't be used due to errata where VM Exit may incorrectly clear 252962306a36Sopenharmony_ci * IA32_PERF_GLOBAL_CTRL[34:32]. Work around the errata by using the 253062306a36Sopenharmony_ci * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. 253162306a36Sopenharmony_ci */ 253262306a36Sopenharmony_cistatic bool cpu_has_perf_global_ctrl_bug(void) 253362306a36Sopenharmony_ci{ 253462306a36Sopenharmony_ci if (boot_cpu_data.x86 == 0x6) { 253562306a36Sopenharmony_ci switch (boot_cpu_data.x86_model) { 253662306a36Sopenharmony_ci case INTEL_FAM6_NEHALEM_EP: /* AAK155 */ 253762306a36Sopenharmony_ci case INTEL_FAM6_NEHALEM: /* AAP115 */ 253862306a36Sopenharmony_ci case INTEL_FAM6_WESTMERE: /* AAT100 */ 253962306a36Sopenharmony_ci case INTEL_FAM6_WESTMERE_EP: /* BC86,AAY89,BD102 */ 254062306a36Sopenharmony_ci case INTEL_FAM6_NEHALEM_EX: /* BA97 */ 254162306a36Sopenharmony_ci return true; 254262306a36Sopenharmony_ci default: 254362306a36Sopenharmony_ci break; 254462306a36Sopenharmony_ci } 254562306a36Sopenharmony_ci } 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_ci return false; 254862306a36Sopenharmony_ci} 254962306a36Sopenharmony_ci 255062306a36Sopenharmony_cistatic int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) 255162306a36Sopenharmony_ci{ 255262306a36Sopenharmony_ci u32 vmx_msr_low, vmx_msr_high; 255362306a36Sopenharmony_ci u32 ctl = ctl_min | ctl_opt; 255462306a36Sopenharmony_ci 255562306a36Sopenharmony_ci rdmsr(msr, vmx_msr_low, vmx_msr_high); 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ 255862306a36Sopenharmony_ci ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ 255962306a36Sopenharmony_ci 256062306a36Sopenharmony_ci /* Ensure minimum (required) set of control bits are supported. */ 256162306a36Sopenharmony_ci if (ctl_min & ~ctl) 256262306a36Sopenharmony_ci return -EIO; 256362306a36Sopenharmony_ci 256462306a36Sopenharmony_ci *result = ctl; 256562306a36Sopenharmony_ci return 0; 256662306a36Sopenharmony_ci} 256762306a36Sopenharmony_ci 256862306a36Sopenharmony_cistatic u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) 256962306a36Sopenharmony_ci{ 257062306a36Sopenharmony_ci u64 allowed; 257162306a36Sopenharmony_ci 257262306a36Sopenharmony_ci rdmsrl(msr, allowed); 257362306a36Sopenharmony_ci 257462306a36Sopenharmony_ci return ctl_opt & allowed; 257562306a36Sopenharmony_ci} 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_cistatic int setup_vmcs_config(struct vmcs_config *vmcs_conf, 257862306a36Sopenharmony_ci struct vmx_capability *vmx_cap) 257962306a36Sopenharmony_ci{ 258062306a36Sopenharmony_ci u32 vmx_msr_low, vmx_msr_high; 258162306a36Sopenharmony_ci u32 _pin_based_exec_control = 0; 258262306a36Sopenharmony_ci u32 _cpu_based_exec_control = 0; 258362306a36Sopenharmony_ci u32 _cpu_based_2nd_exec_control = 0; 258462306a36Sopenharmony_ci u64 _cpu_based_3rd_exec_control = 0; 258562306a36Sopenharmony_ci u32 _vmexit_control = 0; 258662306a36Sopenharmony_ci u32 _vmentry_control = 0; 258762306a36Sopenharmony_ci u64 misc_msr; 258862306a36Sopenharmony_ci int i; 258962306a36Sopenharmony_ci 259062306a36Sopenharmony_ci /* 259162306a36Sopenharmony_ci * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory. 259262306a36Sopenharmony_ci * SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always 259362306a36Sopenharmony_ci * intercepts writes to PAT and EFER, i.e. never enables those controls. 259462306a36Sopenharmony_ci */ 259562306a36Sopenharmony_ci struct { 259662306a36Sopenharmony_ci u32 entry_control; 259762306a36Sopenharmony_ci u32 exit_control; 259862306a36Sopenharmony_ci } const vmcs_entry_exit_pairs[] = { 259962306a36Sopenharmony_ci { VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL }, 260062306a36Sopenharmony_ci { VM_ENTRY_LOAD_IA32_PAT, VM_EXIT_LOAD_IA32_PAT }, 260162306a36Sopenharmony_ci { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER }, 260262306a36Sopenharmony_ci { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS }, 260362306a36Sopenharmony_ci { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL }, 260462306a36Sopenharmony_ci }; 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci memset(vmcs_conf, 0, sizeof(*vmcs_conf)); 260762306a36Sopenharmony_ci 260862306a36Sopenharmony_ci if (adjust_vmx_controls(KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL, 260962306a36Sopenharmony_ci KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL, 261062306a36Sopenharmony_ci MSR_IA32_VMX_PROCBASED_CTLS, 261162306a36Sopenharmony_ci &_cpu_based_exec_control)) 261262306a36Sopenharmony_ci return -EIO; 261362306a36Sopenharmony_ci if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 261462306a36Sopenharmony_ci if (adjust_vmx_controls(KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL, 261562306a36Sopenharmony_ci KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL, 261662306a36Sopenharmony_ci MSR_IA32_VMX_PROCBASED_CTLS2, 261762306a36Sopenharmony_ci &_cpu_based_2nd_exec_control)) 261862306a36Sopenharmony_ci return -EIO; 261962306a36Sopenharmony_ci } 262062306a36Sopenharmony_ci#ifndef CONFIG_X86_64 262162306a36Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & 262262306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 262362306a36Sopenharmony_ci _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 262462306a36Sopenharmony_ci#endif 262562306a36Sopenharmony_ci 262662306a36Sopenharmony_ci if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) 262762306a36Sopenharmony_ci _cpu_based_2nd_exec_control &= ~( 262862306a36Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 262962306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 263062306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, 263362306a36Sopenharmony_ci &vmx_cap->ept, &vmx_cap->vpid); 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) && 263662306a36Sopenharmony_ci vmx_cap->ept) { 263762306a36Sopenharmony_ci pr_warn_once("EPT CAP should not exist if not support " 263862306a36Sopenharmony_ci "1-setting enable EPT VM-execution control\n"); 263962306a36Sopenharmony_ci 264062306a36Sopenharmony_ci if (error_on_inconsistent_vmcs_config) 264162306a36Sopenharmony_ci return -EIO; 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_ci vmx_cap->ept = 0; 264462306a36Sopenharmony_ci } 264562306a36Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && 264662306a36Sopenharmony_ci vmx_cap->vpid) { 264762306a36Sopenharmony_ci pr_warn_once("VPID CAP should not exist if not support " 264862306a36Sopenharmony_ci "1-setting enable VPID VM-execution control\n"); 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci if (error_on_inconsistent_vmcs_config) 265162306a36Sopenharmony_ci return -EIO; 265262306a36Sopenharmony_ci 265362306a36Sopenharmony_ci vmx_cap->vpid = 0; 265462306a36Sopenharmony_ci } 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci if (!cpu_has_sgx()) 265762306a36Sopenharmony_ci _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_ENCLS_EXITING; 265862306a36Sopenharmony_ci 265962306a36Sopenharmony_ci if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) 266062306a36Sopenharmony_ci _cpu_based_3rd_exec_control = 266162306a36Sopenharmony_ci adjust_vmx_controls64(KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL, 266262306a36Sopenharmony_ci MSR_IA32_VMX_PROCBASED_CTLS3); 266362306a36Sopenharmony_ci 266462306a36Sopenharmony_ci if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_EXIT_CONTROLS, 266562306a36Sopenharmony_ci KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS, 266662306a36Sopenharmony_ci MSR_IA32_VMX_EXIT_CTLS, 266762306a36Sopenharmony_ci &_vmexit_control)) 266862306a36Sopenharmony_ci return -EIO; 266962306a36Sopenharmony_ci 267062306a36Sopenharmony_ci if (adjust_vmx_controls(KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL, 267162306a36Sopenharmony_ci KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL, 267262306a36Sopenharmony_ci MSR_IA32_VMX_PINBASED_CTLS, 267362306a36Sopenharmony_ci &_pin_based_exec_control)) 267462306a36Sopenharmony_ci return -EIO; 267562306a36Sopenharmony_ci 267662306a36Sopenharmony_ci if (cpu_has_broken_vmx_preemption_timer()) 267762306a36Sopenharmony_ci _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 267862306a36Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & 267962306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) 268062306a36Sopenharmony_ci _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_ci if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS, 268362306a36Sopenharmony_ci KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS, 268462306a36Sopenharmony_ci MSR_IA32_VMX_ENTRY_CTLS, 268562306a36Sopenharmony_ci &_vmentry_control)) 268662306a36Sopenharmony_ci return -EIO; 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) { 268962306a36Sopenharmony_ci u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control; 269062306a36Sopenharmony_ci u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control; 269162306a36Sopenharmony_ci 269262306a36Sopenharmony_ci if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl)) 269362306a36Sopenharmony_ci continue; 269462306a36Sopenharmony_ci 269562306a36Sopenharmony_ci pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n", 269662306a36Sopenharmony_ci _vmentry_control & n_ctrl, _vmexit_control & x_ctrl); 269762306a36Sopenharmony_ci 269862306a36Sopenharmony_ci if (error_on_inconsistent_vmcs_config) 269962306a36Sopenharmony_ci return -EIO; 270062306a36Sopenharmony_ci 270162306a36Sopenharmony_ci _vmentry_control &= ~n_ctrl; 270262306a36Sopenharmony_ci _vmexit_control &= ~x_ctrl; 270362306a36Sopenharmony_ci } 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); 270662306a36Sopenharmony_ci 270762306a36Sopenharmony_ci /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ 270862306a36Sopenharmony_ci if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) 270962306a36Sopenharmony_ci return -EIO; 271062306a36Sopenharmony_ci 271162306a36Sopenharmony_ci#ifdef CONFIG_X86_64 271262306a36Sopenharmony_ci /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ 271362306a36Sopenharmony_ci if (vmx_msr_high & (1u<<16)) 271462306a36Sopenharmony_ci return -EIO; 271562306a36Sopenharmony_ci#endif 271662306a36Sopenharmony_ci 271762306a36Sopenharmony_ci /* Require Write-Back (WB) memory type for VMCS accesses. */ 271862306a36Sopenharmony_ci if (((vmx_msr_high >> 18) & 15) != 6) 271962306a36Sopenharmony_ci return -EIO; 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci rdmsrl(MSR_IA32_VMX_MISC, misc_msr); 272262306a36Sopenharmony_ci 272362306a36Sopenharmony_ci vmcs_conf->size = vmx_msr_high & 0x1fff; 272462306a36Sopenharmony_ci vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; 272562306a36Sopenharmony_ci 272662306a36Sopenharmony_ci vmcs_conf->revision_id = vmx_msr_low; 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; 272962306a36Sopenharmony_ci vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; 273062306a36Sopenharmony_ci vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; 273162306a36Sopenharmony_ci vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control; 273262306a36Sopenharmony_ci vmcs_conf->vmexit_ctrl = _vmexit_control; 273362306a36Sopenharmony_ci vmcs_conf->vmentry_ctrl = _vmentry_control; 273462306a36Sopenharmony_ci vmcs_conf->misc = misc_msr; 273562306a36Sopenharmony_ci 273662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 273762306a36Sopenharmony_ci if (enlightened_vmcs) 273862306a36Sopenharmony_ci evmcs_sanitize_exec_ctrls(vmcs_conf); 273962306a36Sopenharmony_ci#endif 274062306a36Sopenharmony_ci 274162306a36Sopenharmony_ci return 0; 274262306a36Sopenharmony_ci} 274362306a36Sopenharmony_ci 274462306a36Sopenharmony_cistatic bool __kvm_is_vmx_supported(void) 274562306a36Sopenharmony_ci{ 274662306a36Sopenharmony_ci int cpu = smp_processor_id(); 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci if (!(cpuid_ecx(1) & feature_bit(VMX))) { 274962306a36Sopenharmony_ci pr_err("VMX not supported by CPU %d\n", cpu); 275062306a36Sopenharmony_ci return false; 275162306a36Sopenharmony_ci } 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_ci if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || 275462306a36Sopenharmony_ci !this_cpu_has(X86_FEATURE_VMX)) { 275562306a36Sopenharmony_ci pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu); 275662306a36Sopenharmony_ci return false; 275762306a36Sopenharmony_ci } 275862306a36Sopenharmony_ci 275962306a36Sopenharmony_ci return true; 276062306a36Sopenharmony_ci} 276162306a36Sopenharmony_ci 276262306a36Sopenharmony_cistatic bool kvm_is_vmx_supported(void) 276362306a36Sopenharmony_ci{ 276462306a36Sopenharmony_ci bool supported; 276562306a36Sopenharmony_ci 276662306a36Sopenharmony_ci migrate_disable(); 276762306a36Sopenharmony_ci supported = __kvm_is_vmx_supported(); 276862306a36Sopenharmony_ci migrate_enable(); 276962306a36Sopenharmony_ci 277062306a36Sopenharmony_ci return supported; 277162306a36Sopenharmony_ci} 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_cistatic int vmx_check_processor_compat(void) 277462306a36Sopenharmony_ci{ 277562306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 277662306a36Sopenharmony_ci struct vmcs_config vmcs_conf; 277762306a36Sopenharmony_ci struct vmx_capability vmx_cap; 277862306a36Sopenharmony_ci 277962306a36Sopenharmony_ci if (!__kvm_is_vmx_supported()) 278062306a36Sopenharmony_ci return -EIO; 278162306a36Sopenharmony_ci 278262306a36Sopenharmony_ci if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) { 278362306a36Sopenharmony_ci pr_err("Failed to setup VMCS config on CPU %d\n", cpu); 278462306a36Sopenharmony_ci return -EIO; 278562306a36Sopenharmony_ci } 278662306a36Sopenharmony_ci if (nested) 278762306a36Sopenharmony_ci nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept); 278862306a36Sopenharmony_ci if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config))) { 278962306a36Sopenharmony_ci pr_err("Inconsistent VMCS config on CPU %d\n", cpu); 279062306a36Sopenharmony_ci return -EIO; 279162306a36Sopenharmony_ci } 279262306a36Sopenharmony_ci return 0; 279362306a36Sopenharmony_ci} 279462306a36Sopenharmony_ci 279562306a36Sopenharmony_cistatic int kvm_cpu_vmxon(u64 vmxon_pointer) 279662306a36Sopenharmony_ci{ 279762306a36Sopenharmony_ci u64 msr; 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci cr4_set_bits(X86_CR4_VMXE); 280062306a36Sopenharmony_ci 280162306a36Sopenharmony_ci asm goto("1: vmxon %[vmxon_pointer]\n\t" 280262306a36Sopenharmony_ci _ASM_EXTABLE(1b, %l[fault]) 280362306a36Sopenharmony_ci : : [vmxon_pointer] "m"(vmxon_pointer) 280462306a36Sopenharmony_ci : : fault); 280562306a36Sopenharmony_ci return 0; 280662306a36Sopenharmony_ci 280762306a36Sopenharmony_cifault: 280862306a36Sopenharmony_ci WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", 280962306a36Sopenharmony_ci rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); 281062306a36Sopenharmony_ci cr4_clear_bits(X86_CR4_VMXE); 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_ci return -EFAULT; 281362306a36Sopenharmony_ci} 281462306a36Sopenharmony_ci 281562306a36Sopenharmony_cistatic int vmx_hardware_enable(void) 281662306a36Sopenharmony_ci{ 281762306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 281862306a36Sopenharmony_ci u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 281962306a36Sopenharmony_ci int r; 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci if (cr4_read_shadow() & X86_CR4_VMXE) 282262306a36Sopenharmony_ci return -EBUSY; 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci /* 282562306a36Sopenharmony_ci * This can happen if we hot-added a CPU but failed to allocate 282662306a36Sopenharmony_ci * VP assist page for it. 282762306a36Sopenharmony_ci */ 282862306a36Sopenharmony_ci if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu)) 282962306a36Sopenharmony_ci return -EFAULT; 283062306a36Sopenharmony_ci 283162306a36Sopenharmony_ci intel_pt_handle_vmx(1); 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci r = kvm_cpu_vmxon(phys_addr); 283462306a36Sopenharmony_ci if (r) { 283562306a36Sopenharmony_ci intel_pt_handle_vmx(0); 283662306a36Sopenharmony_ci return r; 283762306a36Sopenharmony_ci } 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci if (enable_ept) 284062306a36Sopenharmony_ci ept_sync_global(); 284162306a36Sopenharmony_ci 284262306a36Sopenharmony_ci return 0; 284362306a36Sopenharmony_ci} 284462306a36Sopenharmony_ci 284562306a36Sopenharmony_cistatic void vmclear_local_loaded_vmcss(void) 284662306a36Sopenharmony_ci{ 284762306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 284862306a36Sopenharmony_ci struct loaded_vmcs *v, *n; 284962306a36Sopenharmony_ci 285062306a36Sopenharmony_ci list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), 285162306a36Sopenharmony_ci loaded_vmcss_on_cpu_link) 285262306a36Sopenharmony_ci __loaded_vmcs_clear(v); 285362306a36Sopenharmony_ci} 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_cistatic void vmx_hardware_disable(void) 285662306a36Sopenharmony_ci{ 285762306a36Sopenharmony_ci vmclear_local_loaded_vmcss(); 285862306a36Sopenharmony_ci 285962306a36Sopenharmony_ci if (kvm_cpu_vmxoff()) 286062306a36Sopenharmony_ci kvm_spurious_fault(); 286162306a36Sopenharmony_ci 286262306a36Sopenharmony_ci hv_reset_evmcs(); 286362306a36Sopenharmony_ci 286462306a36Sopenharmony_ci intel_pt_handle_vmx(0); 286562306a36Sopenharmony_ci} 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_cistruct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) 286862306a36Sopenharmony_ci{ 286962306a36Sopenharmony_ci int node = cpu_to_node(cpu); 287062306a36Sopenharmony_ci struct page *pages; 287162306a36Sopenharmony_ci struct vmcs *vmcs; 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci pages = __alloc_pages_node(node, flags, 0); 287462306a36Sopenharmony_ci if (!pages) 287562306a36Sopenharmony_ci return NULL; 287662306a36Sopenharmony_ci vmcs = page_address(pages); 287762306a36Sopenharmony_ci memset(vmcs, 0, vmcs_config.size); 287862306a36Sopenharmony_ci 287962306a36Sopenharmony_ci /* KVM supports Enlightened VMCS v1 only */ 288062306a36Sopenharmony_ci if (kvm_is_using_evmcs()) 288162306a36Sopenharmony_ci vmcs->hdr.revision_id = KVM_EVMCS_VERSION; 288262306a36Sopenharmony_ci else 288362306a36Sopenharmony_ci vmcs->hdr.revision_id = vmcs_config.revision_id; 288462306a36Sopenharmony_ci 288562306a36Sopenharmony_ci if (shadow) 288662306a36Sopenharmony_ci vmcs->hdr.shadow_vmcs = 1; 288762306a36Sopenharmony_ci return vmcs; 288862306a36Sopenharmony_ci} 288962306a36Sopenharmony_ci 289062306a36Sopenharmony_civoid free_vmcs(struct vmcs *vmcs) 289162306a36Sopenharmony_ci{ 289262306a36Sopenharmony_ci free_page((unsigned long)vmcs); 289362306a36Sopenharmony_ci} 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci/* 289662306a36Sopenharmony_ci * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded 289762306a36Sopenharmony_ci */ 289862306a36Sopenharmony_civoid free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) 289962306a36Sopenharmony_ci{ 290062306a36Sopenharmony_ci if (!loaded_vmcs->vmcs) 290162306a36Sopenharmony_ci return; 290262306a36Sopenharmony_ci loaded_vmcs_clear(loaded_vmcs); 290362306a36Sopenharmony_ci free_vmcs(loaded_vmcs->vmcs); 290462306a36Sopenharmony_ci loaded_vmcs->vmcs = NULL; 290562306a36Sopenharmony_ci if (loaded_vmcs->msr_bitmap) 290662306a36Sopenharmony_ci free_page((unsigned long)loaded_vmcs->msr_bitmap); 290762306a36Sopenharmony_ci WARN_ON(loaded_vmcs->shadow_vmcs != NULL); 290862306a36Sopenharmony_ci} 290962306a36Sopenharmony_ci 291062306a36Sopenharmony_ciint alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) 291162306a36Sopenharmony_ci{ 291262306a36Sopenharmony_ci loaded_vmcs->vmcs = alloc_vmcs(false); 291362306a36Sopenharmony_ci if (!loaded_vmcs->vmcs) 291462306a36Sopenharmony_ci return -ENOMEM; 291562306a36Sopenharmony_ci 291662306a36Sopenharmony_ci vmcs_clear(loaded_vmcs->vmcs); 291762306a36Sopenharmony_ci 291862306a36Sopenharmony_ci loaded_vmcs->shadow_vmcs = NULL; 291962306a36Sopenharmony_ci loaded_vmcs->hv_timer_soft_disabled = false; 292062306a36Sopenharmony_ci loaded_vmcs->cpu = -1; 292162306a36Sopenharmony_ci loaded_vmcs->launched = 0; 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) { 292462306a36Sopenharmony_ci loaded_vmcs->msr_bitmap = (unsigned long *) 292562306a36Sopenharmony_ci __get_free_page(GFP_KERNEL_ACCOUNT); 292662306a36Sopenharmony_ci if (!loaded_vmcs->msr_bitmap) 292762306a36Sopenharmony_ci goto out_vmcs; 292862306a36Sopenharmony_ci memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); 292962306a36Sopenharmony_ci } 293062306a36Sopenharmony_ci 293162306a36Sopenharmony_ci memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); 293262306a36Sopenharmony_ci memset(&loaded_vmcs->controls_shadow, 0, 293362306a36Sopenharmony_ci sizeof(struct vmcs_controls_shadow)); 293462306a36Sopenharmony_ci 293562306a36Sopenharmony_ci return 0; 293662306a36Sopenharmony_ci 293762306a36Sopenharmony_ciout_vmcs: 293862306a36Sopenharmony_ci free_loaded_vmcs(loaded_vmcs); 293962306a36Sopenharmony_ci return -ENOMEM; 294062306a36Sopenharmony_ci} 294162306a36Sopenharmony_ci 294262306a36Sopenharmony_cistatic void free_kvm_area(void) 294362306a36Sopenharmony_ci{ 294462306a36Sopenharmony_ci int cpu; 294562306a36Sopenharmony_ci 294662306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 294762306a36Sopenharmony_ci free_vmcs(per_cpu(vmxarea, cpu)); 294862306a36Sopenharmony_ci per_cpu(vmxarea, cpu) = NULL; 294962306a36Sopenharmony_ci } 295062306a36Sopenharmony_ci} 295162306a36Sopenharmony_ci 295262306a36Sopenharmony_cistatic __init int alloc_kvm_area(void) 295362306a36Sopenharmony_ci{ 295462306a36Sopenharmony_ci int cpu; 295562306a36Sopenharmony_ci 295662306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 295762306a36Sopenharmony_ci struct vmcs *vmcs; 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_ci vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); 296062306a36Sopenharmony_ci if (!vmcs) { 296162306a36Sopenharmony_ci free_kvm_area(); 296262306a36Sopenharmony_ci return -ENOMEM; 296362306a36Sopenharmony_ci } 296462306a36Sopenharmony_ci 296562306a36Sopenharmony_ci /* 296662306a36Sopenharmony_ci * When eVMCS is enabled, alloc_vmcs_cpu() sets 296762306a36Sopenharmony_ci * vmcs->revision_id to KVM_EVMCS_VERSION instead of 296862306a36Sopenharmony_ci * revision_id reported by MSR_IA32_VMX_BASIC. 296962306a36Sopenharmony_ci * 297062306a36Sopenharmony_ci * However, even though not explicitly documented by 297162306a36Sopenharmony_ci * TLFS, VMXArea passed as VMXON argument should 297262306a36Sopenharmony_ci * still be marked with revision_id reported by 297362306a36Sopenharmony_ci * physical CPU. 297462306a36Sopenharmony_ci */ 297562306a36Sopenharmony_ci if (kvm_is_using_evmcs()) 297662306a36Sopenharmony_ci vmcs->hdr.revision_id = vmcs_config.revision_id; 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci per_cpu(vmxarea, cpu) = vmcs; 297962306a36Sopenharmony_ci } 298062306a36Sopenharmony_ci return 0; 298162306a36Sopenharmony_ci} 298262306a36Sopenharmony_ci 298362306a36Sopenharmony_cistatic void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, 298462306a36Sopenharmony_ci struct kvm_segment *save) 298562306a36Sopenharmony_ci{ 298662306a36Sopenharmony_ci if (!emulate_invalid_guest_state) { 298762306a36Sopenharmony_ci /* 298862306a36Sopenharmony_ci * CS and SS RPL should be equal during guest entry according 298962306a36Sopenharmony_ci * to VMX spec, but in reality it is not always so. Since vcpu 299062306a36Sopenharmony_ci * is in the middle of the transition from real mode to 299162306a36Sopenharmony_ci * protected mode it is safe to assume that RPL 0 is a good 299262306a36Sopenharmony_ci * default value. 299362306a36Sopenharmony_ci */ 299462306a36Sopenharmony_ci if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) 299562306a36Sopenharmony_ci save->selector &= ~SEGMENT_RPL_MASK; 299662306a36Sopenharmony_ci save->dpl = save->selector & SEGMENT_RPL_MASK; 299762306a36Sopenharmony_ci save->s = 1; 299862306a36Sopenharmony_ci } 299962306a36Sopenharmony_ci __vmx_set_segment(vcpu, save, seg); 300062306a36Sopenharmony_ci} 300162306a36Sopenharmony_ci 300262306a36Sopenharmony_cistatic void enter_pmode(struct kvm_vcpu *vcpu) 300362306a36Sopenharmony_ci{ 300462306a36Sopenharmony_ci unsigned long flags; 300562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 300662306a36Sopenharmony_ci 300762306a36Sopenharmony_ci /* 300862306a36Sopenharmony_ci * Update real mode segment cache. It may be not up-to-date if segment 300962306a36Sopenharmony_ci * register was written while vcpu was in a guest mode. 301062306a36Sopenharmony_ci */ 301162306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 301262306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 301362306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 301462306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 301562306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); 301662306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); 301762306a36Sopenharmony_ci 301862306a36Sopenharmony_ci vmx->rmode.vm86_active = 0; 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_ci __vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 302162306a36Sopenharmony_ci 302262306a36Sopenharmony_ci flags = vmcs_readl(GUEST_RFLAGS); 302362306a36Sopenharmony_ci flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 302462306a36Sopenharmony_ci flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 302562306a36Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, flags); 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 302862306a36Sopenharmony_ci (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci vmx_update_exception_bitmap(vcpu); 303162306a36Sopenharmony_ci 303262306a36Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); 303362306a36Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); 303462306a36Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 303562306a36Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 303662306a36Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 303762306a36Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); 303862306a36Sopenharmony_ci} 303962306a36Sopenharmony_ci 304062306a36Sopenharmony_cistatic void fix_rmode_seg(int seg, struct kvm_segment *save) 304162306a36Sopenharmony_ci{ 304262306a36Sopenharmony_ci const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 304362306a36Sopenharmony_ci struct kvm_segment var = *save; 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci var.dpl = 0x3; 304662306a36Sopenharmony_ci if (seg == VCPU_SREG_CS) 304762306a36Sopenharmony_ci var.type = 0x3; 304862306a36Sopenharmony_ci 304962306a36Sopenharmony_ci if (!emulate_invalid_guest_state) { 305062306a36Sopenharmony_ci var.selector = var.base >> 4; 305162306a36Sopenharmony_ci var.base = var.base & 0xffff0; 305262306a36Sopenharmony_ci var.limit = 0xffff; 305362306a36Sopenharmony_ci var.g = 0; 305462306a36Sopenharmony_ci var.db = 0; 305562306a36Sopenharmony_ci var.present = 1; 305662306a36Sopenharmony_ci var.s = 1; 305762306a36Sopenharmony_ci var.l = 0; 305862306a36Sopenharmony_ci var.unusable = 0; 305962306a36Sopenharmony_ci var.type = 0x3; 306062306a36Sopenharmony_ci var.avl = 0; 306162306a36Sopenharmony_ci if (save->base & 0xf) 306262306a36Sopenharmony_ci pr_warn_once("segment base is not paragraph aligned " 306362306a36Sopenharmony_ci "when entering protected mode (seg=%d)", seg); 306462306a36Sopenharmony_ci } 306562306a36Sopenharmony_ci 306662306a36Sopenharmony_ci vmcs_write16(sf->selector, var.selector); 306762306a36Sopenharmony_ci vmcs_writel(sf->base, var.base); 306862306a36Sopenharmony_ci vmcs_write32(sf->limit, var.limit); 306962306a36Sopenharmony_ci vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); 307062306a36Sopenharmony_ci} 307162306a36Sopenharmony_ci 307262306a36Sopenharmony_cistatic void enter_rmode(struct kvm_vcpu *vcpu) 307362306a36Sopenharmony_ci{ 307462306a36Sopenharmony_ci unsigned long flags; 307562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 307662306a36Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); 307762306a36Sopenharmony_ci 307862306a36Sopenharmony_ci /* 307962306a36Sopenharmony_ci * KVM should never use VM86 to virtualize Real Mode when L2 is active, 308062306a36Sopenharmony_ci * as using VM86 is unnecessary if unrestricted guest is enabled, and 308162306a36Sopenharmony_ci * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0 308262306a36Sopenharmony_ci * should VM-Fail and KVM should reject userspace attempts to stuff 308362306a36Sopenharmony_ci * CR0.PG=0 when L2 is active. 308462306a36Sopenharmony_ci */ 308562306a36Sopenharmony_ci WARN_ON_ONCE(is_guest_mode(vcpu)); 308662306a36Sopenharmony_ci 308762306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 308862306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 308962306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 309062306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 309162306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 309262306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); 309362306a36Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); 309462306a36Sopenharmony_ci 309562306a36Sopenharmony_ci vmx->rmode.vm86_active = 1; 309662306a36Sopenharmony_ci 309762306a36Sopenharmony_ci vmx_segment_cache_clear(vmx); 309862306a36Sopenharmony_ci 309962306a36Sopenharmony_ci vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); 310062306a36Sopenharmony_ci vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); 310162306a36Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 310262306a36Sopenharmony_ci 310362306a36Sopenharmony_ci flags = vmcs_readl(GUEST_RFLAGS); 310462306a36Sopenharmony_ci vmx->rmode.save_rflags = flags; 310562306a36Sopenharmony_ci 310662306a36Sopenharmony_ci flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 310762306a36Sopenharmony_ci 310862306a36Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, flags); 310962306a36Sopenharmony_ci vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); 311062306a36Sopenharmony_ci vmx_update_exception_bitmap(vcpu); 311162306a36Sopenharmony_ci 311262306a36Sopenharmony_ci fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); 311362306a36Sopenharmony_ci fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); 311462306a36Sopenharmony_ci fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 311562306a36Sopenharmony_ci fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 311662306a36Sopenharmony_ci fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); 311762306a36Sopenharmony_ci fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 311862306a36Sopenharmony_ci} 311962306a36Sopenharmony_ci 312062306a36Sopenharmony_ciint vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 312162306a36Sopenharmony_ci{ 312262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 312362306a36Sopenharmony_ci 312462306a36Sopenharmony_ci /* Nothing to do if hardware doesn't support EFER. */ 312562306a36Sopenharmony_ci if (!vmx_find_uret_msr(vmx, MSR_EFER)) 312662306a36Sopenharmony_ci return 0; 312762306a36Sopenharmony_ci 312862306a36Sopenharmony_ci vcpu->arch.efer = efer; 312962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 313062306a36Sopenharmony_ci if (efer & EFER_LMA) 313162306a36Sopenharmony_ci vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE); 313262306a36Sopenharmony_ci else 313362306a36Sopenharmony_ci vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE); 313462306a36Sopenharmony_ci#else 313562306a36Sopenharmony_ci if (KVM_BUG_ON(efer & EFER_LMA, vcpu->kvm)) 313662306a36Sopenharmony_ci return 1; 313762306a36Sopenharmony_ci#endif 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_ci vmx_setup_uret_msrs(vmx); 314062306a36Sopenharmony_ci return 0; 314162306a36Sopenharmony_ci} 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci#ifdef CONFIG_X86_64 314462306a36Sopenharmony_ci 314562306a36Sopenharmony_cistatic void enter_lmode(struct kvm_vcpu *vcpu) 314662306a36Sopenharmony_ci{ 314762306a36Sopenharmony_ci u32 guest_tr_ar; 314862306a36Sopenharmony_ci 314962306a36Sopenharmony_ci vmx_segment_cache_clear(to_vmx(vcpu)); 315062306a36Sopenharmony_ci 315162306a36Sopenharmony_ci guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 315262306a36Sopenharmony_ci if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { 315362306a36Sopenharmony_ci pr_debug_ratelimited("%s: tss fixup for long mode. \n", 315462306a36Sopenharmony_ci __func__); 315562306a36Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, 315662306a36Sopenharmony_ci (guest_tr_ar & ~VMX_AR_TYPE_MASK) 315762306a36Sopenharmony_ci | VMX_AR_TYPE_BUSY_64_TSS); 315862306a36Sopenharmony_ci } 315962306a36Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); 316062306a36Sopenharmony_ci} 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_cistatic void exit_lmode(struct kvm_vcpu *vcpu) 316362306a36Sopenharmony_ci{ 316462306a36Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 316562306a36Sopenharmony_ci} 316662306a36Sopenharmony_ci 316762306a36Sopenharmony_ci#endif 316862306a36Sopenharmony_ci 316962306a36Sopenharmony_cistatic void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) 317062306a36Sopenharmony_ci{ 317162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 317262306a36Sopenharmony_ci 317362306a36Sopenharmony_ci /* 317462306a36Sopenharmony_ci * INVEPT must be issued when EPT is enabled, irrespective of VPID, as 317562306a36Sopenharmony_ci * the CPU is not required to invalidate guest-physical mappings on 317662306a36Sopenharmony_ci * VM-Entry, even if VPID is disabled. Guest-physical mappings are 317762306a36Sopenharmony_ci * associated with the root EPT structure and not any particular VPID 317862306a36Sopenharmony_ci * (INVVPID also isn't required to invalidate guest-physical mappings). 317962306a36Sopenharmony_ci */ 318062306a36Sopenharmony_ci if (enable_ept) { 318162306a36Sopenharmony_ci ept_sync_global(); 318262306a36Sopenharmony_ci } else if (enable_vpid) { 318362306a36Sopenharmony_ci if (cpu_has_vmx_invvpid_global()) { 318462306a36Sopenharmony_ci vpid_sync_vcpu_global(); 318562306a36Sopenharmony_ci } else { 318662306a36Sopenharmony_ci vpid_sync_vcpu_single(vmx->vpid); 318762306a36Sopenharmony_ci vpid_sync_vcpu_single(vmx->nested.vpid02); 318862306a36Sopenharmony_ci } 318962306a36Sopenharmony_ci } 319062306a36Sopenharmony_ci} 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_cistatic inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) 319362306a36Sopenharmony_ci{ 319462306a36Sopenharmony_ci if (is_guest_mode(vcpu)) 319562306a36Sopenharmony_ci return nested_get_vpid02(vcpu); 319662306a36Sopenharmony_ci return to_vmx(vcpu)->vpid; 319762306a36Sopenharmony_ci} 319862306a36Sopenharmony_ci 319962306a36Sopenharmony_cistatic void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) 320062306a36Sopenharmony_ci{ 320162306a36Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.mmu; 320262306a36Sopenharmony_ci u64 root_hpa = mmu->root.hpa; 320362306a36Sopenharmony_ci 320462306a36Sopenharmony_ci /* No flush required if the current context is invalid. */ 320562306a36Sopenharmony_ci if (!VALID_PAGE(root_hpa)) 320662306a36Sopenharmony_ci return; 320762306a36Sopenharmony_ci 320862306a36Sopenharmony_ci if (enable_ept) 320962306a36Sopenharmony_ci ept_sync_context(construct_eptp(vcpu, root_hpa, 321062306a36Sopenharmony_ci mmu->root_role.level)); 321162306a36Sopenharmony_ci else 321262306a36Sopenharmony_ci vpid_sync_context(vmx_get_current_vpid(vcpu)); 321362306a36Sopenharmony_ci} 321462306a36Sopenharmony_ci 321562306a36Sopenharmony_cistatic void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) 321662306a36Sopenharmony_ci{ 321762306a36Sopenharmony_ci /* 321862306a36Sopenharmony_ci * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in 321962306a36Sopenharmony_ci * vmx_flush_tlb_guest() for an explanation of why this is ok. 322062306a36Sopenharmony_ci */ 322162306a36Sopenharmony_ci vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); 322262306a36Sopenharmony_ci} 322362306a36Sopenharmony_ci 322462306a36Sopenharmony_cistatic void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) 322562306a36Sopenharmony_ci{ 322662306a36Sopenharmony_ci /* 322762306a36Sopenharmony_ci * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a 322862306a36Sopenharmony_ci * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are 322962306a36Sopenharmony_ci * required to flush GVA->{G,H}PA mappings from the TLB if vpid is 323062306a36Sopenharmony_ci * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), 323162306a36Sopenharmony_ci * i.e. no explicit INVVPID is necessary. 323262306a36Sopenharmony_ci */ 323362306a36Sopenharmony_ci vpid_sync_context(vmx_get_current_vpid(vcpu)); 323462306a36Sopenharmony_ci} 323562306a36Sopenharmony_ci 323662306a36Sopenharmony_civoid vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) 323762306a36Sopenharmony_ci{ 323862306a36Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.walk_mmu; 323962306a36Sopenharmony_ci 324062306a36Sopenharmony_ci if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) 324162306a36Sopenharmony_ci return; 324262306a36Sopenharmony_ci 324362306a36Sopenharmony_ci if (is_pae_paging(vcpu)) { 324462306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); 324562306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); 324662306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); 324762306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); 324862306a36Sopenharmony_ci } 324962306a36Sopenharmony_ci} 325062306a36Sopenharmony_ci 325162306a36Sopenharmony_civoid ept_save_pdptrs(struct kvm_vcpu *vcpu) 325262306a36Sopenharmony_ci{ 325362306a36Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.walk_mmu; 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ci if (WARN_ON_ONCE(!is_pae_paging(vcpu))) 325662306a36Sopenharmony_ci return; 325762306a36Sopenharmony_ci 325862306a36Sopenharmony_ci mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); 325962306a36Sopenharmony_ci mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); 326062306a36Sopenharmony_ci mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); 326162306a36Sopenharmony_ci mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); 326262306a36Sopenharmony_ci 326362306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR); 326462306a36Sopenharmony_ci} 326562306a36Sopenharmony_ci 326662306a36Sopenharmony_ci#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ 326762306a36Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING) 326862306a36Sopenharmony_ci 326962306a36Sopenharmony_cistatic bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 327062306a36Sopenharmony_ci{ 327162306a36Sopenharmony_ci if (is_guest_mode(vcpu)) 327262306a36Sopenharmony_ci return nested_guest_cr0_valid(vcpu, cr0); 327362306a36Sopenharmony_ci 327462306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.vmxon) 327562306a36Sopenharmony_ci return nested_host_cr0_valid(vcpu, cr0); 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ci return true; 327862306a36Sopenharmony_ci} 327962306a36Sopenharmony_ci 328062306a36Sopenharmony_civoid vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 328162306a36Sopenharmony_ci{ 328262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 328362306a36Sopenharmony_ci unsigned long hw_cr0, old_cr0_pg; 328462306a36Sopenharmony_ci u32 tmp; 328562306a36Sopenharmony_ci 328662306a36Sopenharmony_ci old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG); 328762306a36Sopenharmony_ci 328862306a36Sopenharmony_ci hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); 328962306a36Sopenharmony_ci if (enable_unrestricted_guest) 329062306a36Sopenharmony_ci hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; 329162306a36Sopenharmony_ci else { 329262306a36Sopenharmony_ci hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; 329362306a36Sopenharmony_ci if (!enable_ept) 329462306a36Sopenharmony_ci hw_cr0 |= X86_CR0_WP; 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 329762306a36Sopenharmony_ci enter_pmode(vcpu); 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) 330062306a36Sopenharmony_ci enter_rmode(vcpu); 330162306a36Sopenharmony_ci } 330262306a36Sopenharmony_ci 330362306a36Sopenharmony_ci vmcs_writel(CR0_READ_SHADOW, cr0); 330462306a36Sopenharmony_ci vmcs_writel(GUEST_CR0, hw_cr0); 330562306a36Sopenharmony_ci vcpu->arch.cr0 = cr0; 330662306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR0); 330762306a36Sopenharmony_ci 330862306a36Sopenharmony_ci#ifdef CONFIG_X86_64 330962306a36Sopenharmony_ci if (vcpu->arch.efer & EFER_LME) { 331062306a36Sopenharmony_ci if (!old_cr0_pg && (cr0 & X86_CR0_PG)) 331162306a36Sopenharmony_ci enter_lmode(vcpu); 331262306a36Sopenharmony_ci else if (old_cr0_pg && !(cr0 & X86_CR0_PG)) 331362306a36Sopenharmony_ci exit_lmode(vcpu); 331462306a36Sopenharmony_ci } 331562306a36Sopenharmony_ci#endif 331662306a36Sopenharmony_ci 331762306a36Sopenharmony_ci if (enable_ept && !enable_unrestricted_guest) { 331862306a36Sopenharmony_ci /* 331962306a36Sopenharmony_ci * Ensure KVM has an up-to-date snapshot of the guest's CR3. If 332062306a36Sopenharmony_ci * the below code _enables_ CR3 exiting, vmx_cache_reg() will 332162306a36Sopenharmony_ci * (correctly) stop reading vmcs.GUEST_CR3 because it thinks 332262306a36Sopenharmony_ci * KVM's CR3 is installed. 332362306a36Sopenharmony_ci */ 332462306a36Sopenharmony_ci if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) 332562306a36Sopenharmony_ci vmx_cache_reg(vcpu, VCPU_EXREG_CR3); 332662306a36Sopenharmony_ci 332762306a36Sopenharmony_ci /* 332862306a36Sopenharmony_ci * When running with EPT but not unrestricted guest, KVM must 332962306a36Sopenharmony_ci * intercept CR3 accesses when paging is _disabled_. This is 333062306a36Sopenharmony_ci * necessary because restricted guests can't actually run with 333162306a36Sopenharmony_ci * paging disabled, and so KVM stuffs its own CR3 in order to 333262306a36Sopenharmony_ci * run the guest when identity mapped page tables. 333362306a36Sopenharmony_ci * 333462306a36Sopenharmony_ci * Do _NOT_ check the old CR0.PG, e.g. to optimize away the 333562306a36Sopenharmony_ci * update, it may be stale with respect to CR3 interception, 333662306a36Sopenharmony_ci * e.g. after nested VM-Enter. 333762306a36Sopenharmony_ci * 333862306a36Sopenharmony_ci * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or 333962306a36Sopenharmony_ci * stores to forward them to L1, even if KVM does not need to 334062306a36Sopenharmony_ci * intercept them to preserve its identity mapped page tables. 334162306a36Sopenharmony_ci */ 334262306a36Sopenharmony_ci if (!(cr0 & X86_CR0_PG)) { 334362306a36Sopenharmony_ci exec_controls_setbit(vmx, CR3_EXITING_BITS); 334462306a36Sopenharmony_ci } else if (!is_guest_mode(vcpu)) { 334562306a36Sopenharmony_ci exec_controls_clearbit(vmx, CR3_EXITING_BITS); 334662306a36Sopenharmony_ci } else { 334762306a36Sopenharmony_ci tmp = exec_controls_get(vmx); 334862306a36Sopenharmony_ci tmp &= ~CR3_EXITING_BITS; 334962306a36Sopenharmony_ci tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS; 335062306a36Sopenharmony_ci exec_controls_set(vmx, tmp); 335162306a36Sopenharmony_ci } 335262306a36Sopenharmony_ci 335362306a36Sopenharmony_ci /* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */ 335462306a36Sopenharmony_ci if ((old_cr0_pg ^ cr0) & X86_CR0_PG) 335562306a36Sopenharmony_ci vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); 335662306a36Sopenharmony_ci 335762306a36Sopenharmony_ci /* 335862306a36Sopenharmony_ci * When !CR0_PG -> CR0_PG, vcpu->arch.cr3 becomes active, but 335962306a36Sopenharmony_ci * GUEST_CR3 is still vmx->ept_identity_map_addr if EPT + !URG. 336062306a36Sopenharmony_ci */ 336162306a36Sopenharmony_ci if (!(old_cr0_pg & X86_CR0_PG) && (cr0 & X86_CR0_PG)) 336262306a36Sopenharmony_ci kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3); 336362306a36Sopenharmony_ci } 336462306a36Sopenharmony_ci 336562306a36Sopenharmony_ci /* depends on vcpu->arch.cr0 to be set to a new value */ 336662306a36Sopenharmony_ci vmx->emulation_required = vmx_emulation_required(vcpu); 336762306a36Sopenharmony_ci} 336862306a36Sopenharmony_ci 336962306a36Sopenharmony_cistatic int vmx_get_max_ept_level(void) 337062306a36Sopenharmony_ci{ 337162306a36Sopenharmony_ci if (cpu_has_vmx_ept_5levels()) 337262306a36Sopenharmony_ci return 5; 337362306a36Sopenharmony_ci return 4; 337462306a36Sopenharmony_ci} 337562306a36Sopenharmony_ci 337662306a36Sopenharmony_ciu64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) 337762306a36Sopenharmony_ci{ 337862306a36Sopenharmony_ci u64 eptp = VMX_EPTP_MT_WB; 337962306a36Sopenharmony_ci 338062306a36Sopenharmony_ci eptp |= (root_level == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; 338162306a36Sopenharmony_ci 338262306a36Sopenharmony_ci if (enable_ept_ad_bits && 338362306a36Sopenharmony_ci (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) 338462306a36Sopenharmony_ci eptp |= VMX_EPTP_AD_ENABLE_BIT; 338562306a36Sopenharmony_ci eptp |= root_hpa; 338662306a36Sopenharmony_ci 338762306a36Sopenharmony_ci return eptp; 338862306a36Sopenharmony_ci} 338962306a36Sopenharmony_ci 339062306a36Sopenharmony_cistatic void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, 339162306a36Sopenharmony_ci int root_level) 339262306a36Sopenharmony_ci{ 339362306a36Sopenharmony_ci struct kvm *kvm = vcpu->kvm; 339462306a36Sopenharmony_ci bool update_guest_cr3 = true; 339562306a36Sopenharmony_ci unsigned long guest_cr3; 339662306a36Sopenharmony_ci u64 eptp; 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci if (enable_ept) { 339962306a36Sopenharmony_ci eptp = construct_eptp(vcpu, root_hpa, root_level); 340062306a36Sopenharmony_ci vmcs_write64(EPT_POINTER, eptp); 340162306a36Sopenharmony_ci 340262306a36Sopenharmony_ci hv_track_root_tdp(vcpu, root_hpa); 340362306a36Sopenharmony_ci 340462306a36Sopenharmony_ci if (!enable_unrestricted_guest && !is_paging(vcpu)) 340562306a36Sopenharmony_ci guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; 340662306a36Sopenharmony_ci else if (kvm_register_is_dirty(vcpu, VCPU_EXREG_CR3)) 340762306a36Sopenharmony_ci guest_cr3 = vcpu->arch.cr3; 340862306a36Sopenharmony_ci else /* vmcs.GUEST_CR3 is already up-to-date. */ 340962306a36Sopenharmony_ci update_guest_cr3 = false; 341062306a36Sopenharmony_ci vmx_ept_load_pdptrs(vcpu); 341162306a36Sopenharmony_ci } else { 341262306a36Sopenharmony_ci guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu); 341362306a36Sopenharmony_ci } 341462306a36Sopenharmony_ci 341562306a36Sopenharmony_ci if (update_guest_cr3) 341662306a36Sopenharmony_ci vmcs_writel(GUEST_CR3, guest_cr3); 341762306a36Sopenharmony_ci} 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci 342062306a36Sopenharmony_cistatic bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 342162306a36Sopenharmony_ci{ 342262306a36Sopenharmony_ci /* 342362306a36Sopenharmony_ci * We operate under the default treatment of SMM, so VMX cannot be 342462306a36Sopenharmony_ci * enabled under SMM. Note, whether or not VMXE is allowed at all, 342562306a36Sopenharmony_ci * i.e. is a reserved bit, is handled by common x86 code. 342662306a36Sopenharmony_ci */ 342762306a36Sopenharmony_ci if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) 342862306a36Sopenharmony_ci return false; 342962306a36Sopenharmony_ci 343062306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) 343162306a36Sopenharmony_ci return false; 343262306a36Sopenharmony_ci 343362306a36Sopenharmony_ci return true; 343462306a36Sopenharmony_ci} 343562306a36Sopenharmony_ci 343662306a36Sopenharmony_civoid vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 343762306a36Sopenharmony_ci{ 343862306a36Sopenharmony_ci unsigned long old_cr4 = kvm_read_cr4(vcpu); 343962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 344062306a36Sopenharmony_ci unsigned long hw_cr4; 344162306a36Sopenharmony_ci 344262306a36Sopenharmony_ci /* 344362306a36Sopenharmony_ci * Pass through host's Machine Check Enable value to hw_cr4, which 344462306a36Sopenharmony_ci * is in force while we are in guest mode. Do not let guests control 344562306a36Sopenharmony_ci * this bit, even if host CR4.MCE == 0. 344662306a36Sopenharmony_ci */ 344762306a36Sopenharmony_ci hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); 344862306a36Sopenharmony_ci if (enable_unrestricted_guest) 344962306a36Sopenharmony_ci hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; 345062306a36Sopenharmony_ci else if (vmx->rmode.vm86_active) 345162306a36Sopenharmony_ci hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; 345262306a36Sopenharmony_ci else 345362306a36Sopenharmony_ci hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; 345462306a36Sopenharmony_ci 345562306a36Sopenharmony_ci if (vmx_umip_emulated()) { 345662306a36Sopenharmony_ci if (cr4 & X86_CR4_UMIP) { 345762306a36Sopenharmony_ci secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); 345862306a36Sopenharmony_ci hw_cr4 &= ~X86_CR4_UMIP; 345962306a36Sopenharmony_ci } else if (!is_guest_mode(vcpu) || 346062306a36Sopenharmony_ci !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { 346162306a36Sopenharmony_ci secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); 346262306a36Sopenharmony_ci } 346362306a36Sopenharmony_ci } 346462306a36Sopenharmony_ci 346562306a36Sopenharmony_ci vcpu->arch.cr4 = cr4; 346662306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); 346762306a36Sopenharmony_ci 346862306a36Sopenharmony_ci if (!enable_unrestricted_guest) { 346962306a36Sopenharmony_ci if (enable_ept) { 347062306a36Sopenharmony_ci if (!is_paging(vcpu)) { 347162306a36Sopenharmony_ci hw_cr4 &= ~X86_CR4_PAE; 347262306a36Sopenharmony_ci hw_cr4 |= X86_CR4_PSE; 347362306a36Sopenharmony_ci } else if (!(cr4 & X86_CR4_PAE)) { 347462306a36Sopenharmony_ci hw_cr4 &= ~X86_CR4_PAE; 347562306a36Sopenharmony_ci } 347662306a36Sopenharmony_ci } 347762306a36Sopenharmony_ci 347862306a36Sopenharmony_ci /* 347962306a36Sopenharmony_ci * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in 348062306a36Sopenharmony_ci * hardware. To emulate this behavior, SMEP/SMAP/PKU needs 348162306a36Sopenharmony_ci * to be manually disabled when guest switches to non-paging 348262306a36Sopenharmony_ci * mode. 348362306a36Sopenharmony_ci * 348462306a36Sopenharmony_ci * If !enable_unrestricted_guest, the CPU is always running 348562306a36Sopenharmony_ci * with CR0.PG=1 and CR4 needs to be modified. 348662306a36Sopenharmony_ci * If enable_unrestricted_guest, the CPU automatically 348762306a36Sopenharmony_ci * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. 348862306a36Sopenharmony_ci */ 348962306a36Sopenharmony_ci if (!is_paging(vcpu)) 349062306a36Sopenharmony_ci hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); 349162306a36Sopenharmony_ci } 349262306a36Sopenharmony_ci 349362306a36Sopenharmony_ci vmcs_writel(CR4_READ_SHADOW, cr4); 349462306a36Sopenharmony_ci vmcs_writel(GUEST_CR4, hw_cr4); 349562306a36Sopenharmony_ci 349662306a36Sopenharmony_ci if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) 349762306a36Sopenharmony_ci kvm_update_cpuid_runtime(vcpu); 349862306a36Sopenharmony_ci} 349962306a36Sopenharmony_ci 350062306a36Sopenharmony_civoid vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) 350162306a36Sopenharmony_ci{ 350262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 350362306a36Sopenharmony_ci u32 ar; 350462306a36Sopenharmony_ci 350562306a36Sopenharmony_ci if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { 350662306a36Sopenharmony_ci *var = vmx->rmode.segs[seg]; 350762306a36Sopenharmony_ci if (seg == VCPU_SREG_TR 350862306a36Sopenharmony_ci || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 350962306a36Sopenharmony_ci return; 351062306a36Sopenharmony_ci var->base = vmx_read_guest_seg_base(vmx, seg); 351162306a36Sopenharmony_ci var->selector = vmx_read_guest_seg_selector(vmx, seg); 351262306a36Sopenharmony_ci return; 351362306a36Sopenharmony_ci } 351462306a36Sopenharmony_ci var->base = vmx_read_guest_seg_base(vmx, seg); 351562306a36Sopenharmony_ci var->limit = vmx_read_guest_seg_limit(vmx, seg); 351662306a36Sopenharmony_ci var->selector = vmx_read_guest_seg_selector(vmx, seg); 351762306a36Sopenharmony_ci ar = vmx_read_guest_seg_ar(vmx, seg); 351862306a36Sopenharmony_ci var->unusable = (ar >> 16) & 1; 351962306a36Sopenharmony_ci var->type = ar & 15; 352062306a36Sopenharmony_ci var->s = (ar >> 4) & 1; 352162306a36Sopenharmony_ci var->dpl = (ar >> 5) & 3; 352262306a36Sopenharmony_ci /* 352362306a36Sopenharmony_ci * Some userspaces do not preserve unusable property. Since usable 352462306a36Sopenharmony_ci * segment has to be present according to VMX spec we can use present 352562306a36Sopenharmony_ci * property to amend userspace bug by making unusable segment always 352662306a36Sopenharmony_ci * nonpresent. vmx_segment_access_rights() already marks nonpresent 352762306a36Sopenharmony_ci * segment as unusable. 352862306a36Sopenharmony_ci */ 352962306a36Sopenharmony_ci var->present = !var->unusable; 353062306a36Sopenharmony_ci var->avl = (ar >> 12) & 1; 353162306a36Sopenharmony_ci var->l = (ar >> 13) & 1; 353262306a36Sopenharmony_ci var->db = (ar >> 14) & 1; 353362306a36Sopenharmony_ci var->g = (ar >> 15) & 1; 353462306a36Sopenharmony_ci} 353562306a36Sopenharmony_ci 353662306a36Sopenharmony_cistatic u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 353762306a36Sopenharmony_ci{ 353862306a36Sopenharmony_ci struct kvm_segment s; 353962306a36Sopenharmony_ci 354062306a36Sopenharmony_ci if (to_vmx(vcpu)->rmode.vm86_active) { 354162306a36Sopenharmony_ci vmx_get_segment(vcpu, &s, seg); 354262306a36Sopenharmony_ci return s.base; 354362306a36Sopenharmony_ci } 354462306a36Sopenharmony_ci return vmx_read_guest_seg_base(to_vmx(vcpu), seg); 354562306a36Sopenharmony_ci} 354662306a36Sopenharmony_ci 354762306a36Sopenharmony_ciint vmx_get_cpl(struct kvm_vcpu *vcpu) 354862306a36Sopenharmony_ci{ 354962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 355062306a36Sopenharmony_ci 355162306a36Sopenharmony_ci if (unlikely(vmx->rmode.vm86_active)) 355262306a36Sopenharmony_ci return 0; 355362306a36Sopenharmony_ci else { 355462306a36Sopenharmony_ci int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); 355562306a36Sopenharmony_ci return VMX_AR_DPL(ar); 355662306a36Sopenharmony_ci } 355762306a36Sopenharmony_ci} 355862306a36Sopenharmony_ci 355962306a36Sopenharmony_cistatic u32 vmx_segment_access_rights(struct kvm_segment *var) 356062306a36Sopenharmony_ci{ 356162306a36Sopenharmony_ci u32 ar; 356262306a36Sopenharmony_ci 356362306a36Sopenharmony_ci ar = var->type & 15; 356462306a36Sopenharmony_ci ar |= (var->s & 1) << 4; 356562306a36Sopenharmony_ci ar |= (var->dpl & 3) << 5; 356662306a36Sopenharmony_ci ar |= (var->present & 1) << 7; 356762306a36Sopenharmony_ci ar |= (var->avl & 1) << 12; 356862306a36Sopenharmony_ci ar |= (var->l & 1) << 13; 356962306a36Sopenharmony_ci ar |= (var->db & 1) << 14; 357062306a36Sopenharmony_ci ar |= (var->g & 1) << 15; 357162306a36Sopenharmony_ci ar |= (var->unusable || !var->present) << 16; 357262306a36Sopenharmony_ci 357362306a36Sopenharmony_ci return ar; 357462306a36Sopenharmony_ci} 357562306a36Sopenharmony_ci 357662306a36Sopenharmony_civoid __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) 357762306a36Sopenharmony_ci{ 357862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 357962306a36Sopenharmony_ci const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 358062306a36Sopenharmony_ci 358162306a36Sopenharmony_ci vmx_segment_cache_clear(vmx); 358262306a36Sopenharmony_ci 358362306a36Sopenharmony_ci if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { 358462306a36Sopenharmony_ci vmx->rmode.segs[seg] = *var; 358562306a36Sopenharmony_ci if (seg == VCPU_SREG_TR) 358662306a36Sopenharmony_ci vmcs_write16(sf->selector, var->selector); 358762306a36Sopenharmony_ci else if (var->s) 358862306a36Sopenharmony_ci fix_rmode_seg(seg, &vmx->rmode.segs[seg]); 358962306a36Sopenharmony_ci return; 359062306a36Sopenharmony_ci } 359162306a36Sopenharmony_ci 359262306a36Sopenharmony_ci vmcs_writel(sf->base, var->base); 359362306a36Sopenharmony_ci vmcs_write32(sf->limit, var->limit); 359462306a36Sopenharmony_ci vmcs_write16(sf->selector, var->selector); 359562306a36Sopenharmony_ci 359662306a36Sopenharmony_ci /* 359762306a36Sopenharmony_ci * Fix the "Accessed" bit in AR field of segment registers for older 359862306a36Sopenharmony_ci * qemu binaries. 359962306a36Sopenharmony_ci * IA32 arch specifies that at the time of processor reset the 360062306a36Sopenharmony_ci * "Accessed" bit in the AR field of segment registers is 1. And qemu 360162306a36Sopenharmony_ci * is setting it to 0 in the userland code. This causes invalid guest 360262306a36Sopenharmony_ci * state vmexit when "unrestricted guest" mode is turned on. 360362306a36Sopenharmony_ci * Fix for this setup issue in cpu_reset is being pushed in the qemu 360462306a36Sopenharmony_ci * tree. Newer qemu binaries with that qemu fix would not need this 360562306a36Sopenharmony_ci * kvm hack. 360662306a36Sopenharmony_ci */ 360762306a36Sopenharmony_ci if (is_unrestricted_guest(vcpu) && (seg != VCPU_SREG_LDTR)) 360862306a36Sopenharmony_ci var->type |= 0x1; /* Accessed */ 360962306a36Sopenharmony_ci 361062306a36Sopenharmony_ci vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); 361162306a36Sopenharmony_ci} 361262306a36Sopenharmony_ci 361362306a36Sopenharmony_cistatic void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) 361462306a36Sopenharmony_ci{ 361562306a36Sopenharmony_ci __vmx_set_segment(vcpu, var, seg); 361662306a36Sopenharmony_ci 361762306a36Sopenharmony_ci to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu); 361862306a36Sopenharmony_ci} 361962306a36Sopenharmony_ci 362062306a36Sopenharmony_cistatic void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 362162306a36Sopenharmony_ci{ 362262306a36Sopenharmony_ci u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); 362362306a36Sopenharmony_ci 362462306a36Sopenharmony_ci *db = (ar >> 14) & 1; 362562306a36Sopenharmony_ci *l = (ar >> 13) & 1; 362662306a36Sopenharmony_ci} 362762306a36Sopenharmony_ci 362862306a36Sopenharmony_cistatic void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 362962306a36Sopenharmony_ci{ 363062306a36Sopenharmony_ci dt->size = vmcs_read32(GUEST_IDTR_LIMIT); 363162306a36Sopenharmony_ci dt->address = vmcs_readl(GUEST_IDTR_BASE); 363262306a36Sopenharmony_ci} 363362306a36Sopenharmony_ci 363462306a36Sopenharmony_cistatic void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 363562306a36Sopenharmony_ci{ 363662306a36Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, dt->size); 363762306a36Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, dt->address); 363862306a36Sopenharmony_ci} 363962306a36Sopenharmony_ci 364062306a36Sopenharmony_cistatic void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 364162306a36Sopenharmony_ci{ 364262306a36Sopenharmony_ci dt->size = vmcs_read32(GUEST_GDTR_LIMIT); 364362306a36Sopenharmony_ci dt->address = vmcs_readl(GUEST_GDTR_BASE); 364462306a36Sopenharmony_ci} 364562306a36Sopenharmony_ci 364662306a36Sopenharmony_cistatic void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 364762306a36Sopenharmony_ci{ 364862306a36Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, dt->size); 364962306a36Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, dt->address); 365062306a36Sopenharmony_ci} 365162306a36Sopenharmony_ci 365262306a36Sopenharmony_cistatic bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) 365362306a36Sopenharmony_ci{ 365462306a36Sopenharmony_ci struct kvm_segment var; 365562306a36Sopenharmony_ci u32 ar; 365662306a36Sopenharmony_ci 365762306a36Sopenharmony_ci vmx_get_segment(vcpu, &var, seg); 365862306a36Sopenharmony_ci var.dpl = 0x3; 365962306a36Sopenharmony_ci if (seg == VCPU_SREG_CS) 366062306a36Sopenharmony_ci var.type = 0x3; 366162306a36Sopenharmony_ci ar = vmx_segment_access_rights(&var); 366262306a36Sopenharmony_ci 366362306a36Sopenharmony_ci if (var.base != (var.selector << 4)) 366462306a36Sopenharmony_ci return false; 366562306a36Sopenharmony_ci if (var.limit != 0xffff) 366662306a36Sopenharmony_ci return false; 366762306a36Sopenharmony_ci if (ar != 0xf3) 366862306a36Sopenharmony_ci return false; 366962306a36Sopenharmony_ci 367062306a36Sopenharmony_ci return true; 367162306a36Sopenharmony_ci} 367262306a36Sopenharmony_ci 367362306a36Sopenharmony_cistatic bool code_segment_valid(struct kvm_vcpu *vcpu) 367462306a36Sopenharmony_ci{ 367562306a36Sopenharmony_ci struct kvm_segment cs; 367662306a36Sopenharmony_ci unsigned int cs_rpl; 367762306a36Sopenharmony_ci 367862306a36Sopenharmony_ci vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 367962306a36Sopenharmony_ci cs_rpl = cs.selector & SEGMENT_RPL_MASK; 368062306a36Sopenharmony_ci 368162306a36Sopenharmony_ci if (cs.unusable) 368262306a36Sopenharmony_ci return false; 368362306a36Sopenharmony_ci if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) 368462306a36Sopenharmony_ci return false; 368562306a36Sopenharmony_ci if (!cs.s) 368662306a36Sopenharmony_ci return false; 368762306a36Sopenharmony_ci if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { 368862306a36Sopenharmony_ci if (cs.dpl > cs_rpl) 368962306a36Sopenharmony_ci return false; 369062306a36Sopenharmony_ci } else { 369162306a36Sopenharmony_ci if (cs.dpl != cs_rpl) 369262306a36Sopenharmony_ci return false; 369362306a36Sopenharmony_ci } 369462306a36Sopenharmony_ci if (!cs.present) 369562306a36Sopenharmony_ci return false; 369662306a36Sopenharmony_ci 369762306a36Sopenharmony_ci /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ 369862306a36Sopenharmony_ci return true; 369962306a36Sopenharmony_ci} 370062306a36Sopenharmony_ci 370162306a36Sopenharmony_cistatic bool stack_segment_valid(struct kvm_vcpu *vcpu) 370262306a36Sopenharmony_ci{ 370362306a36Sopenharmony_ci struct kvm_segment ss; 370462306a36Sopenharmony_ci unsigned int ss_rpl; 370562306a36Sopenharmony_ci 370662306a36Sopenharmony_ci vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 370762306a36Sopenharmony_ci ss_rpl = ss.selector & SEGMENT_RPL_MASK; 370862306a36Sopenharmony_ci 370962306a36Sopenharmony_ci if (ss.unusable) 371062306a36Sopenharmony_ci return true; 371162306a36Sopenharmony_ci if (ss.type != 3 && ss.type != 7) 371262306a36Sopenharmony_ci return false; 371362306a36Sopenharmony_ci if (!ss.s) 371462306a36Sopenharmony_ci return false; 371562306a36Sopenharmony_ci if (ss.dpl != ss_rpl) /* DPL != RPL */ 371662306a36Sopenharmony_ci return false; 371762306a36Sopenharmony_ci if (!ss.present) 371862306a36Sopenharmony_ci return false; 371962306a36Sopenharmony_ci 372062306a36Sopenharmony_ci return true; 372162306a36Sopenharmony_ci} 372262306a36Sopenharmony_ci 372362306a36Sopenharmony_cistatic bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) 372462306a36Sopenharmony_ci{ 372562306a36Sopenharmony_ci struct kvm_segment var; 372662306a36Sopenharmony_ci unsigned int rpl; 372762306a36Sopenharmony_ci 372862306a36Sopenharmony_ci vmx_get_segment(vcpu, &var, seg); 372962306a36Sopenharmony_ci rpl = var.selector & SEGMENT_RPL_MASK; 373062306a36Sopenharmony_ci 373162306a36Sopenharmony_ci if (var.unusable) 373262306a36Sopenharmony_ci return true; 373362306a36Sopenharmony_ci if (!var.s) 373462306a36Sopenharmony_ci return false; 373562306a36Sopenharmony_ci if (!var.present) 373662306a36Sopenharmony_ci return false; 373762306a36Sopenharmony_ci if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { 373862306a36Sopenharmony_ci if (var.dpl < rpl) /* DPL < RPL */ 373962306a36Sopenharmony_ci return false; 374062306a36Sopenharmony_ci } 374162306a36Sopenharmony_ci 374262306a36Sopenharmony_ci /* TODO: Add other members to kvm_segment_field to allow checking for other access 374362306a36Sopenharmony_ci * rights flags 374462306a36Sopenharmony_ci */ 374562306a36Sopenharmony_ci return true; 374662306a36Sopenharmony_ci} 374762306a36Sopenharmony_ci 374862306a36Sopenharmony_cistatic bool tr_valid(struct kvm_vcpu *vcpu) 374962306a36Sopenharmony_ci{ 375062306a36Sopenharmony_ci struct kvm_segment tr; 375162306a36Sopenharmony_ci 375262306a36Sopenharmony_ci vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); 375362306a36Sopenharmony_ci 375462306a36Sopenharmony_ci if (tr.unusable) 375562306a36Sopenharmony_ci return false; 375662306a36Sopenharmony_ci if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ 375762306a36Sopenharmony_ci return false; 375862306a36Sopenharmony_ci if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ 375962306a36Sopenharmony_ci return false; 376062306a36Sopenharmony_ci if (!tr.present) 376162306a36Sopenharmony_ci return false; 376262306a36Sopenharmony_ci 376362306a36Sopenharmony_ci return true; 376462306a36Sopenharmony_ci} 376562306a36Sopenharmony_ci 376662306a36Sopenharmony_cistatic bool ldtr_valid(struct kvm_vcpu *vcpu) 376762306a36Sopenharmony_ci{ 376862306a36Sopenharmony_ci struct kvm_segment ldtr; 376962306a36Sopenharmony_ci 377062306a36Sopenharmony_ci vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); 377162306a36Sopenharmony_ci 377262306a36Sopenharmony_ci if (ldtr.unusable) 377362306a36Sopenharmony_ci return true; 377462306a36Sopenharmony_ci if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ 377562306a36Sopenharmony_ci return false; 377662306a36Sopenharmony_ci if (ldtr.type != 2) 377762306a36Sopenharmony_ci return false; 377862306a36Sopenharmony_ci if (!ldtr.present) 377962306a36Sopenharmony_ci return false; 378062306a36Sopenharmony_ci 378162306a36Sopenharmony_ci return true; 378262306a36Sopenharmony_ci} 378362306a36Sopenharmony_ci 378462306a36Sopenharmony_cistatic bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) 378562306a36Sopenharmony_ci{ 378662306a36Sopenharmony_ci struct kvm_segment cs, ss; 378762306a36Sopenharmony_ci 378862306a36Sopenharmony_ci vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 378962306a36Sopenharmony_ci vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 379062306a36Sopenharmony_ci 379162306a36Sopenharmony_ci return ((cs.selector & SEGMENT_RPL_MASK) == 379262306a36Sopenharmony_ci (ss.selector & SEGMENT_RPL_MASK)); 379362306a36Sopenharmony_ci} 379462306a36Sopenharmony_ci 379562306a36Sopenharmony_ci/* 379662306a36Sopenharmony_ci * Check if guest state is valid. Returns true if valid, false if 379762306a36Sopenharmony_ci * not. 379862306a36Sopenharmony_ci * We assume that registers are always usable 379962306a36Sopenharmony_ci */ 380062306a36Sopenharmony_cibool __vmx_guest_state_valid(struct kvm_vcpu *vcpu) 380162306a36Sopenharmony_ci{ 380262306a36Sopenharmony_ci /* real mode guest state checks */ 380362306a36Sopenharmony_ci if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { 380462306a36Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 380562306a36Sopenharmony_ci return false; 380662306a36Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) 380762306a36Sopenharmony_ci return false; 380862306a36Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) 380962306a36Sopenharmony_ci return false; 381062306a36Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) 381162306a36Sopenharmony_ci return false; 381262306a36Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) 381362306a36Sopenharmony_ci return false; 381462306a36Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) 381562306a36Sopenharmony_ci return false; 381662306a36Sopenharmony_ci } else { 381762306a36Sopenharmony_ci /* protected mode guest state checks */ 381862306a36Sopenharmony_ci if (!cs_ss_rpl_check(vcpu)) 381962306a36Sopenharmony_ci return false; 382062306a36Sopenharmony_ci if (!code_segment_valid(vcpu)) 382162306a36Sopenharmony_ci return false; 382262306a36Sopenharmony_ci if (!stack_segment_valid(vcpu)) 382362306a36Sopenharmony_ci return false; 382462306a36Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_DS)) 382562306a36Sopenharmony_ci return false; 382662306a36Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_ES)) 382762306a36Sopenharmony_ci return false; 382862306a36Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_FS)) 382962306a36Sopenharmony_ci return false; 383062306a36Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_GS)) 383162306a36Sopenharmony_ci return false; 383262306a36Sopenharmony_ci if (!tr_valid(vcpu)) 383362306a36Sopenharmony_ci return false; 383462306a36Sopenharmony_ci if (!ldtr_valid(vcpu)) 383562306a36Sopenharmony_ci return false; 383662306a36Sopenharmony_ci } 383762306a36Sopenharmony_ci /* TODO: 383862306a36Sopenharmony_ci * - Add checks on RIP 383962306a36Sopenharmony_ci * - Add checks on RFLAGS 384062306a36Sopenharmony_ci */ 384162306a36Sopenharmony_ci 384262306a36Sopenharmony_ci return true; 384362306a36Sopenharmony_ci} 384462306a36Sopenharmony_ci 384562306a36Sopenharmony_cistatic int init_rmode_tss(struct kvm *kvm, void __user *ua) 384662306a36Sopenharmony_ci{ 384762306a36Sopenharmony_ci const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); 384862306a36Sopenharmony_ci u16 data; 384962306a36Sopenharmony_ci int i; 385062306a36Sopenharmony_ci 385162306a36Sopenharmony_ci for (i = 0; i < 3; i++) { 385262306a36Sopenharmony_ci if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE)) 385362306a36Sopenharmony_ci return -EFAULT; 385462306a36Sopenharmony_ci } 385562306a36Sopenharmony_ci 385662306a36Sopenharmony_ci data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; 385762306a36Sopenharmony_ci if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16))) 385862306a36Sopenharmony_ci return -EFAULT; 385962306a36Sopenharmony_ci 386062306a36Sopenharmony_ci data = ~0; 386162306a36Sopenharmony_ci if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8))) 386262306a36Sopenharmony_ci return -EFAULT; 386362306a36Sopenharmony_ci 386462306a36Sopenharmony_ci return 0; 386562306a36Sopenharmony_ci} 386662306a36Sopenharmony_ci 386762306a36Sopenharmony_cistatic int init_rmode_identity_map(struct kvm *kvm) 386862306a36Sopenharmony_ci{ 386962306a36Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); 387062306a36Sopenharmony_ci int i, r = 0; 387162306a36Sopenharmony_ci void __user *uaddr; 387262306a36Sopenharmony_ci u32 tmp; 387362306a36Sopenharmony_ci 387462306a36Sopenharmony_ci /* Protect kvm_vmx->ept_identity_pagetable_done. */ 387562306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 387662306a36Sopenharmony_ci 387762306a36Sopenharmony_ci if (likely(kvm_vmx->ept_identity_pagetable_done)) 387862306a36Sopenharmony_ci goto out; 387962306a36Sopenharmony_ci 388062306a36Sopenharmony_ci if (!kvm_vmx->ept_identity_map_addr) 388162306a36Sopenharmony_ci kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; 388262306a36Sopenharmony_ci 388362306a36Sopenharmony_ci uaddr = __x86_set_memory_region(kvm, 388462306a36Sopenharmony_ci IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 388562306a36Sopenharmony_ci kvm_vmx->ept_identity_map_addr, 388662306a36Sopenharmony_ci PAGE_SIZE); 388762306a36Sopenharmony_ci if (IS_ERR(uaddr)) { 388862306a36Sopenharmony_ci r = PTR_ERR(uaddr); 388962306a36Sopenharmony_ci goto out; 389062306a36Sopenharmony_ci } 389162306a36Sopenharmony_ci 389262306a36Sopenharmony_ci /* Set up identity-mapping pagetable for EPT in real mode */ 389362306a36Sopenharmony_ci for (i = 0; i < (PAGE_SIZE / sizeof(tmp)); i++) { 389462306a36Sopenharmony_ci tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | 389562306a36Sopenharmony_ci _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); 389662306a36Sopenharmony_ci if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) { 389762306a36Sopenharmony_ci r = -EFAULT; 389862306a36Sopenharmony_ci goto out; 389962306a36Sopenharmony_ci } 390062306a36Sopenharmony_ci } 390162306a36Sopenharmony_ci kvm_vmx->ept_identity_pagetable_done = true; 390262306a36Sopenharmony_ci 390362306a36Sopenharmony_ciout: 390462306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 390562306a36Sopenharmony_ci return r; 390662306a36Sopenharmony_ci} 390762306a36Sopenharmony_ci 390862306a36Sopenharmony_cistatic void seg_setup(int seg) 390962306a36Sopenharmony_ci{ 391062306a36Sopenharmony_ci const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 391162306a36Sopenharmony_ci unsigned int ar; 391262306a36Sopenharmony_ci 391362306a36Sopenharmony_ci vmcs_write16(sf->selector, 0); 391462306a36Sopenharmony_ci vmcs_writel(sf->base, 0); 391562306a36Sopenharmony_ci vmcs_write32(sf->limit, 0xffff); 391662306a36Sopenharmony_ci ar = 0x93; 391762306a36Sopenharmony_ci if (seg == VCPU_SREG_CS) 391862306a36Sopenharmony_ci ar |= 0x08; /* code segment */ 391962306a36Sopenharmony_ci 392062306a36Sopenharmony_ci vmcs_write32(sf->ar_bytes, ar); 392162306a36Sopenharmony_ci} 392262306a36Sopenharmony_ci 392362306a36Sopenharmony_ciint allocate_vpid(void) 392462306a36Sopenharmony_ci{ 392562306a36Sopenharmony_ci int vpid; 392662306a36Sopenharmony_ci 392762306a36Sopenharmony_ci if (!enable_vpid) 392862306a36Sopenharmony_ci return 0; 392962306a36Sopenharmony_ci spin_lock(&vmx_vpid_lock); 393062306a36Sopenharmony_ci vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); 393162306a36Sopenharmony_ci if (vpid < VMX_NR_VPIDS) 393262306a36Sopenharmony_ci __set_bit(vpid, vmx_vpid_bitmap); 393362306a36Sopenharmony_ci else 393462306a36Sopenharmony_ci vpid = 0; 393562306a36Sopenharmony_ci spin_unlock(&vmx_vpid_lock); 393662306a36Sopenharmony_ci return vpid; 393762306a36Sopenharmony_ci} 393862306a36Sopenharmony_ci 393962306a36Sopenharmony_civoid free_vpid(int vpid) 394062306a36Sopenharmony_ci{ 394162306a36Sopenharmony_ci if (!enable_vpid || vpid == 0) 394262306a36Sopenharmony_ci return; 394362306a36Sopenharmony_ci spin_lock(&vmx_vpid_lock); 394462306a36Sopenharmony_ci __clear_bit(vpid, vmx_vpid_bitmap); 394562306a36Sopenharmony_ci spin_unlock(&vmx_vpid_lock); 394662306a36Sopenharmony_ci} 394762306a36Sopenharmony_ci 394862306a36Sopenharmony_cistatic void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx) 394962306a36Sopenharmony_ci{ 395062306a36Sopenharmony_ci /* 395162306a36Sopenharmony_ci * When KVM is a nested hypervisor on top of Hyper-V and uses 395262306a36Sopenharmony_ci * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR 395362306a36Sopenharmony_ci * bitmap has changed. 395462306a36Sopenharmony_ci */ 395562306a36Sopenharmony_ci if (kvm_is_using_evmcs()) { 395662306a36Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs; 395762306a36Sopenharmony_ci 395862306a36Sopenharmony_ci if (evmcs->hv_enlightenments_control.msr_bitmap) 395962306a36Sopenharmony_ci evmcs->hv_clean_fields &= 396062306a36Sopenharmony_ci ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; 396162306a36Sopenharmony_ci } 396262306a36Sopenharmony_ci 396362306a36Sopenharmony_ci vmx->nested.force_msr_bitmap_recalc = true; 396462306a36Sopenharmony_ci} 396562306a36Sopenharmony_ci 396662306a36Sopenharmony_civoid vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) 396762306a36Sopenharmony_ci{ 396862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 396962306a36Sopenharmony_ci unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; 397062306a36Sopenharmony_ci 397162306a36Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap()) 397262306a36Sopenharmony_ci return; 397362306a36Sopenharmony_ci 397462306a36Sopenharmony_ci vmx_msr_bitmap_l01_changed(vmx); 397562306a36Sopenharmony_ci 397662306a36Sopenharmony_ci /* 397762306a36Sopenharmony_ci * Mark the desired intercept state in shadow bitmap, this is needed 397862306a36Sopenharmony_ci * for resync when the MSR filters change. 397962306a36Sopenharmony_ci */ 398062306a36Sopenharmony_ci if (is_valid_passthrough_msr(msr)) { 398162306a36Sopenharmony_ci int idx = possible_passthrough_msr_slot(msr); 398262306a36Sopenharmony_ci 398362306a36Sopenharmony_ci if (idx != -ENOENT) { 398462306a36Sopenharmony_ci if (type & MSR_TYPE_R) 398562306a36Sopenharmony_ci clear_bit(idx, vmx->shadow_msr_intercept.read); 398662306a36Sopenharmony_ci if (type & MSR_TYPE_W) 398762306a36Sopenharmony_ci clear_bit(idx, vmx->shadow_msr_intercept.write); 398862306a36Sopenharmony_ci } 398962306a36Sopenharmony_ci } 399062306a36Sopenharmony_ci 399162306a36Sopenharmony_ci if ((type & MSR_TYPE_R) && 399262306a36Sopenharmony_ci !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) { 399362306a36Sopenharmony_ci vmx_set_msr_bitmap_read(msr_bitmap, msr); 399462306a36Sopenharmony_ci type &= ~MSR_TYPE_R; 399562306a36Sopenharmony_ci } 399662306a36Sopenharmony_ci 399762306a36Sopenharmony_ci if ((type & MSR_TYPE_W) && 399862306a36Sopenharmony_ci !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) { 399962306a36Sopenharmony_ci vmx_set_msr_bitmap_write(msr_bitmap, msr); 400062306a36Sopenharmony_ci type &= ~MSR_TYPE_W; 400162306a36Sopenharmony_ci } 400262306a36Sopenharmony_ci 400362306a36Sopenharmony_ci if (type & MSR_TYPE_R) 400462306a36Sopenharmony_ci vmx_clear_msr_bitmap_read(msr_bitmap, msr); 400562306a36Sopenharmony_ci 400662306a36Sopenharmony_ci if (type & MSR_TYPE_W) 400762306a36Sopenharmony_ci vmx_clear_msr_bitmap_write(msr_bitmap, msr); 400862306a36Sopenharmony_ci} 400962306a36Sopenharmony_ci 401062306a36Sopenharmony_civoid vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) 401162306a36Sopenharmony_ci{ 401262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 401362306a36Sopenharmony_ci unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; 401462306a36Sopenharmony_ci 401562306a36Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap()) 401662306a36Sopenharmony_ci return; 401762306a36Sopenharmony_ci 401862306a36Sopenharmony_ci vmx_msr_bitmap_l01_changed(vmx); 401962306a36Sopenharmony_ci 402062306a36Sopenharmony_ci /* 402162306a36Sopenharmony_ci * Mark the desired intercept state in shadow bitmap, this is needed 402262306a36Sopenharmony_ci * for resync when the MSR filter changes. 402362306a36Sopenharmony_ci */ 402462306a36Sopenharmony_ci if (is_valid_passthrough_msr(msr)) { 402562306a36Sopenharmony_ci int idx = possible_passthrough_msr_slot(msr); 402662306a36Sopenharmony_ci 402762306a36Sopenharmony_ci if (idx != -ENOENT) { 402862306a36Sopenharmony_ci if (type & MSR_TYPE_R) 402962306a36Sopenharmony_ci set_bit(idx, vmx->shadow_msr_intercept.read); 403062306a36Sopenharmony_ci if (type & MSR_TYPE_W) 403162306a36Sopenharmony_ci set_bit(idx, vmx->shadow_msr_intercept.write); 403262306a36Sopenharmony_ci } 403362306a36Sopenharmony_ci } 403462306a36Sopenharmony_ci 403562306a36Sopenharmony_ci if (type & MSR_TYPE_R) 403662306a36Sopenharmony_ci vmx_set_msr_bitmap_read(msr_bitmap, msr); 403762306a36Sopenharmony_ci 403862306a36Sopenharmony_ci if (type & MSR_TYPE_W) 403962306a36Sopenharmony_ci vmx_set_msr_bitmap_write(msr_bitmap, msr); 404062306a36Sopenharmony_ci} 404162306a36Sopenharmony_ci 404262306a36Sopenharmony_cistatic void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu) 404362306a36Sopenharmony_ci{ 404462306a36Sopenharmony_ci /* 404562306a36Sopenharmony_ci * x2APIC indices for 64-bit accesses into the RDMSR and WRMSR halves 404662306a36Sopenharmony_ci * of the MSR bitmap. KVM emulates APIC registers up through 0x3f0, 404762306a36Sopenharmony_ci * i.e. MSR 0x83f, and so only needs to dynamically manipulate 64 bits. 404862306a36Sopenharmony_ci */ 404962306a36Sopenharmony_ci const int read_idx = APIC_BASE_MSR / BITS_PER_LONG_LONG; 405062306a36Sopenharmony_ci const int write_idx = read_idx + (0x800 / sizeof(u64)); 405162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 405262306a36Sopenharmony_ci u64 *msr_bitmap = (u64 *)vmx->vmcs01.msr_bitmap; 405362306a36Sopenharmony_ci u8 mode; 405462306a36Sopenharmony_ci 405562306a36Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap() || WARN_ON_ONCE(!lapic_in_kernel(vcpu))) 405662306a36Sopenharmony_ci return; 405762306a36Sopenharmony_ci 405862306a36Sopenharmony_ci if (cpu_has_secondary_exec_ctrls() && 405962306a36Sopenharmony_ci (secondary_exec_controls_get(vmx) & 406062306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { 406162306a36Sopenharmony_ci mode = MSR_BITMAP_MODE_X2APIC; 406262306a36Sopenharmony_ci if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) 406362306a36Sopenharmony_ci mode |= MSR_BITMAP_MODE_X2APIC_APICV; 406462306a36Sopenharmony_ci } else { 406562306a36Sopenharmony_ci mode = 0; 406662306a36Sopenharmony_ci } 406762306a36Sopenharmony_ci 406862306a36Sopenharmony_ci if (mode == vmx->x2apic_msr_bitmap_mode) 406962306a36Sopenharmony_ci return; 407062306a36Sopenharmony_ci 407162306a36Sopenharmony_ci vmx->x2apic_msr_bitmap_mode = mode; 407262306a36Sopenharmony_ci 407362306a36Sopenharmony_ci /* 407462306a36Sopenharmony_ci * Reset the bitmap for MSRs 0x800 - 0x83f. Leave AMD's uber-extended 407562306a36Sopenharmony_ci * registers (0x840 and above) intercepted, KVM doesn't support them. 407662306a36Sopenharmony_ci * Intercept all writes by default and poke holes as needed. Pass 407762306a36Sopenharmony_ci * through reads for all valid registers by default in x2APIC+APICv 407862306a36Sopenharmony_ci * mode, only the current timer count needs on-demand emulation by KVM. 407962306a36Sopenharmony_ci */ 408062306a36Sopenharmony_ci if (mode & MSR_BITMAP_MODE_X2APIC_APICV) 408162306a36Sopenharmony_ci msr_bitmap[read_idx] = ~kvm_lapic_readable_reg_mask(vcpu->arch.apic); 408262306a36Sopenharmony_ci else 408362306a36Sopenharmony_ci msr_bitmap[read_idx] = ~0ull; 408462306a36Sopenharmony_ci msr_bitmap[write_idx] = ~0ull; 408562306a36Sopenharmony_ci 408662306a36Sopenharmony_ci /* 408762306a36Sopenharmony_ci * TPR reads and writes can be virtualized even if virtual interrupt 408862306a36Sopenharmony_ci * delivery is not in use. 408962306a36Sopenharmony_ci */ 409062306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW, 409162306a36Sopenharmony_ci !(mode & MSR_BITMAP_MODE_X2APIC)); 409262306a36Sopenharmony_ci 409362306a36Sopenharmony_ci if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { 409462306a36Sopenharmony_ci vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW); 409562306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); 409662306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); 409762306a36Sopenharmony_ci if (enable_ipiv) 409862306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW); 409962306a36Sopenharmony_ci } 410062306a36Sopenharmony_ci} 410162306a36Sopenharmony_ci 410262306a36Sopenharmony_civoid pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) 410362306a36Sopenharmony_ci{ 410462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 410562306a36Sopenharmony_ci bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); 410662306a36Sopenharmony_ci u32 i; 410762306a36Sopenharmony_ci 410862306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_STATUS, MSR_TYPE_RW, flag); 410962306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_BASE, MSR_TYPE_RW, flag); 411062306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_MASK, MSR_TYPE_RW, flag); 411162306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_CR3_MATCH, MSR_TYPE_RW, flag); 411262306a36Sopenharmony_ci for (i = 0; i < vmx->pt_desc.num_address_ranges; i++) { 411362306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag); 411462306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag); 411562306a36Sopenharmony_ci } 411662306a36Sopenharmony_ci} 411762306a36Sopenharmony_ci 411862306a36Sopenharmony_cistatic bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) 411962306a36Sopenharmony_ci{ 412062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 412162306a36Sopenharmony_ci void *vapic_page; 412262306a36Sopenharmony_ci u32 vppr; 412362306a36Sopenharmony_ci int rvi; 412462306a36Sopenharmony_ci 412562306a36Sopenharmony_ci if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || 412662306a36Sopenharmony_ci !nested_cpu_has_vid(get_vmcs12(vcpu)) || 412762306a36Sopenharmony_ci WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) 412862306a36Sopenharmony_ci return false; 412962306a36Sopenharmony_ci 413062306a36Sopenharmony_ci rvi = vmx_get_rvi(); 413162306a36Sopenharmony_ci 413262306a36Sopenharmony_ci vapic_page = vmx->nested.virtual_apic_map.hva; 413362306a36Sopenharmony_ci vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); 413462306a36Sopenharmony_ci 413562306a36Sopenharmony_ci return ((rvi & 0xf0) > (vppr & 0xf0)); 413662306a36Sopenharmony_ci} 413762306a36Sopenharmony_ci 413862306a36Sopenharmony_cistatic void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) 413962306a36Sopenharmony_ci{ 414062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 414162306a36Sopenharmony_ci u32 i; 414262306a36Sopenharmony_ci 414362306a36Sopenharmony_ci /* 414462306a36Sopenharmony_ci * Redo intercept permissions for MSRs that KVM is passing through to 414562306a36Sopenharmony_ci * the guest. Disabling interception will check the new MSR filter and 414662306a36Sopenharmony_ci * ensure that KVM enables interception if usersepace wants to filter 414762306a36Sopenharmony_ci * the MSR. MSRs that KVM is already intercepting don't need to be 414862306a36Sopenharmony_ci * refreshed since KVM is going to intercept them regardless of what 414962306a36Sopenharmony_ci * userspace wants. 415062306a36Sopenharmony_ci */ 415162306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { 415262306a36Sopenharmony_ci u32 msr = vmx_possible_passthrough_msrs[i]; 415362306a36Sopenharmony_ci 415462306a36Sopenharmony_ci if (!test_bit(i, vmx->shadow_msr_intercept.read)) 415562306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R); 415662306a36Sopenharmony_ci 415762306a36Sopenharmony_ci if (!test_bit(i, vmx->shadow_msr_intercept.write)) 415862306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W); 415962306a36Sopenharmony_ci } 416062306a36Sopenharmony_ci 416162306a36Sopenharmony_ci /* PT MSRs can be passed through iff PT is exposed to the guest. */ 416262306a36Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) 416362306a36Sopenharmony_ci pt_update_intercept_for_msr(vcpu); 416462306a36Sopenharmony_ci} 416562306a36Sopenharmony_ci 416662306a36Sopenharmony_cistatic inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, 416762306a36Sopenharmony_ci int pi_vec) 416862306a36Sopenharmony_ci{ 416962306a36Sopenharmony_ci#ifdef CONFIG_SMP 417062306a36Sopenharmony_ci if (vcpu->mode == IN_GUEST_MODE) { 417162306a36Sopenharmony_ci /* 417262306a36Sopenharmony_ci * The vector of the virtual has already been set in the PIR. 417362306a36Sopenharmony_ci * Send a notification event to deliver the virtual interrupt 417462306a36Sopenharmony_ci * unless the vCPU is the currently running vCPU, i.e. the 417562306a36Sopenharmony_ci * event is being sent from a fastpath VM-Exit handler, in 417662306a36Sopenharmony_ci * which case the PIR will be synced to the vIRR before 417762306a36Sopenharmony_ci * re-entering the guest. 417862306a36Sopenharmony_ci * 417962306a36Sopenharmony_ci * When the target is not the running vCPU, the following 418062306a36Sopenharmony_ci * possibilities emerge: 418162306a36Sopenharmony_ci * 418262306a36Sopenharmony_ci * Case 1: vCPU stays in non-root mode. Sending a notification 418362306a36Sopenharmony_ci * event posts the interrupt to the vCPU. 418462306a36Sopenharmony_ci * 418562306a36Sopenharmony_ci * Case 2: vCPU exits to root mode and is still runnable. The 418662306a36Sopenharmony_ci * PIR will be synced to the vIRR before re-entering the guest. 418762306a36Sopenharmony_ci * Sending a notification event is ok as the host IRQ handler 418862306a36Sopenharmony_ci * will ignore the spurious event. 418962306a36Sopenharmony_ci * 419062306a36Sopenharmony_ci * Case 3: vCPU exits to root mode and is blocked. vcpu_block() 419162306a36Sopenharmony_ci * has already synced PIR to vIRR and never blocks the vCPU if 419262306a36Sopenharmony_ci * the vIRR is not empty. Therefore, a blocked vCPU here does 419362306a36Sopenharmony_ci * not wait for any requested interrupts in PIR, and sending a 419462306a36Sopenharmony_ci * notification event also results in a benign, spurious event. 419562306a36Sopenharmony_ci */ 419662306a36Sopenharmony_ci 419762306a36Sopenharmony_ci if (vcpu != kvm_get_running_vcpu()) 419862306a36Sopenharmony_ci __apic_send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); 419962306a36Sopenharmony_ci return; 420062306a36Sopenharmony_ci } 420162306a36Sopenharmony_ci#endif 420262306a36Sopenharmony_ci /* 420362306a36Sopenharmony_ci * The vCPU isn't in the guest; wake the vCPU in case it is blocking, 420462306a36Sopenharmony_ci * otherwise do nothing as KVM will grab the highest priority pending 420562306a36Sopenharmony_ci * IRQ via ->sync_pir_to_irr() in vcpu_enter_guest(). 420662306a36Sopenharmony_ci */ 420762306a36Sopenharmony_ci kvm_vcpu_wake_up(vcpu); 420862306a36Sopenharmony_ci} 420962306a36Sopenharmony_ci 421062306a36Sopenharmony_cistatic int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, 421162306a36Sopenharmony_ci int vector) 421262306a36Sopenharmony_ci{ 421362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 421462306a36Sopenharmony_ci 421562306a36Sopenharmony_ci if (is_guest_mode(vcpu) && 421662306a36Sopenharmony_ci vector == vmx->nested.posted_intr_nv) { 421762306a36Sopenharmony_ci /* 421862306a36Sopenharmony_ci * If a posted intr is not recognized by hardware, 421962306a36Sopenharmony_ci * we will accomplish it in the next vmentry. 422062306a36Sopenharmony_ci */ 422162306a36Sopenharmony_ci vmx->nested.pi_pending = true; 422262306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 422362306a36Sopenharmony_ci 422462306a36Sopenharmony_ci /* 422562306a36Sopenharmony_ci * This pairs with the smp_mb_*() after setting vcpu->mode in 422662306a36Sopenharmony_ci * vcpu_enter_guest() to guarantee the vCPU sees the event 422762306a36Sopenharmony_ci * request if triggering a posted interrupt "fails" because 422862306a36Sopenharmony_ci * vcpu->mode != IN_GUEST_MODE. The extra barrier is needed as 422962306a36Sopenharmony_ci * the smb_wmb() in kvm_make_request() only ensures everything 423062306a36Sopenharmony_ci * done before making the request is visible when the request 423162306a36Sopenharmony_ci * is visible, it doesn't ensure ordering between the store to 423262306a36Sopenharmony_ci * vcpu->requests and the load from vcpu->mode. 423362306a36Sopenharmony_ci */ 423462306a36Sopenharmony_ci smp_mb__after_atomic(); 423562306a36Sopenharmony_ci 423662306a36Sopenharmony_ci /* the PIR and ON have been set by L1. */ 423762306a36Sopenharmony_ci kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR); 423862306a36Sopenharmony_ci return 0; 423962306a36Sopenharmony_ci } 424062306a36Sopenharmony_ci return -1; 424162306a36Sopenharmony_ci} 424262306a36Sopenharmony_ci/* 424362306a36Sopenharmony_ci * Send interrupt to vcpu via posted interrupt way. 424462306a36Sopenharmony_ci * 1. If target vcpu is running(non-root mode), send posted interrupt 424562306a36Sopenharmony_ci * notification to vcpu and hardware will sync PIR to vIRR atomically. 424662306a36Sopenharmony_ci * 2. If target vcpu isn't running(root mode), kick it to pick up the 424762306a36Sopenharmony_ci * interrupt from PIR in next vmentry. 424862306a36Sopenharmony_ci */ 424962306a36Sopenharmony_cistatic int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) 425062306a36Sopenharmony_ci{ 425162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 425262306a36Sopenharmony_ci int r; 425362306a36Sopenharmony_ci 425462306a36Sopenharmony_ci r = vmx_deliver_nested_posted_interrupt(vcpu, vector); 425562306a36Sopenharmony_ci if (!r) 425662306a36Sopenharmony_ci return 0; 425762306a36Sopenharmony_ci 425862306a36Sopenharmony_ci /* Note, this is called iff the local APIC is in-kernel. */ 425962306a36Sopenharmony_ci if (!vcpu->arch.apic->apicv_active) 426062306a36Sopenharmony_ci return -1; 426162306a36Sopenharmony_ci 426262306a36Sopenharmony_ci if (pi_test_and_set_pir(vector, &vmx->pi_desc)) 426362306a36Sopenharmony_ci return 0; 426462306a36Sopenharmony_ci 426562306a36Sopenharmony_ci /* If a previous notification has sent the IPI, nothing to do. */ 426662306a36Sopenharmony_ci if (pi_test_and_set_on(&vmx->pi_desc)) 426762306a36Sopenharmony_ci return 0; 426862306a36Sopenharmony_ci 426962306a36Sopenharmony_ci /* 427062306a36Sopenharmony_ci * The implied barrier in pi_test_and_set_on() pairs with the smp_mb_*() 427162306a36Sopenharmony_ci * after setting vcpu->mode in vcpu_enter_guest(), thus the vCPU is 427262306a36Sopenharmony_ci * guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a 427362306a36Sopenharmony_ci * posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE. 427462306a36Sopenharmony_ci */ 427562306a36Sopenharmony_ci kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR); 427662306a36Sopenharmony_ci return 0; 427762306a36Sopenharmony_ci} 427862306a36Sopenharmony_ci 427962306a36Sopenharmony_cistatic void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, 428062306a36Sopenharmony_ci int trig_mode, int vector) 428162306a36Sopenharmony_ci{ 428262306a36Sopenharmony_ci struct kvm_vcpu *vcpu = apic->vcpu; 428362306a36Sopenharmony_ci 428462306a36Sopenharmony_ci if (vmx_deliver_posted_interrupt(vcpu, vector)) { 428562306a36Sopenharmony_ci kvm_lapic_set_irr(vector, apic); 428662306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 428762306a36Sopenharmony_ci kvm_vcpu_kick(vcpu); 428862306a36Sopenharmony_ci } else { 428962306a36Sopenharmony_ci trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, 429062306a36Sopenharmony_ci trig_mode, vector); 429162306a36Sopenharmony_ci } 429262306a36Sopenharmony_ci} 429362306a36Sopenharmony_ci 429462306a36Sopenharmony_ci/* 429562306a36Sopenharmony_ci * Set up the vmcs's constant host-state fields, i.e., host-state fields that 429662306a36Sopenharmony_ci * will not change in the lifetime of the guest. 429762306a36Sopenharmony_ci * Note that host-state that does change is set elsewhere. E.g., host-state 429862306a36Sopenharmony_ci * that is set differently for each CPU is set in vmx_vcpu_load(), not here. 429962306a36Sopenharmony_ci */ 430062306a36Sopenharmony_civoid vmx_set_constant_host_state(struct vcpu_vmx *vmx) 430162306a36Sopenharmony_ci{ 430262306a36Sopenharmony_ci u32 low32, high32; 430362306a36Sopenharmony_ci unsigned long tmpl; 430462306a36Sopenharmony_ci unsigned long cr0, cr3, cr4; 430562306a36Sopenharmony_ci 430662306a36Sopenharmony_ci cr0 = read_cr0(); 430762306a36Sopenharmony_ci WARN_ON(cr0 & X86_CR0_TS); 430862306a36Sopenharmony_ci vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ 430962306a36Sopenharmony_ci 431062306a36Sopenharmony_ci /* 431162306a36Sopenharmony_ci * Save the most likely value for this task's CR3 in the VMCS. 431262306a36Sopenharmony_ci * We can't use __get_current_cr3_fast() because we're not atomic. 431362306a36Sopenharmony_ci */ 431462306a36Sopenharmony_ci cr3 = __read_cr3(); 431562306a36Sopenharmony_ci vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ 431662306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.cr3 = cr3; 431762306a36Sopenharmony_ci 431862306a36Sopenharmony_ci /* Save the most likely value for this task's CR4 in the VMCS. */ 431962306a36Sopenharmony_ci cr4 = cr4_read_shadow(); 432062306a36Sopenharmony_ci vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 432162306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.cr4 = cr4; 432262306a36Sopenharmony_ci 432362306a36Sopenharmony_ci vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 432462306a36Sopenharmony_ci#ifdef CONFIG_X86_64 432562306a36Sopenharmony_ci /* 432662306a36Sopenharmony_ci * Load null selectors, so we can avoid reloading them in 432762306a36Sopenharmony_ci * vmx_prepare_switch_to_host(), in case userspace uses 432862306a36Sopenharmony_ci * the null selectors too (the expected case). 432962306a36Sopenharmony_ci */ 433062306a36Sopenharmony_ci vmcs_write16(HOST_DS_SELECTOR, 0); 433162306a36Sopenharmony_ci vmcs_write16(HOST_ES_SELECTOR, 0); 433262306a36Sopenharmony_ci#else 433362306a36Sopenharmony_ci vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 433462306a36Sopenharmony_ci vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 433562306a36Sopenharmony_ci#endif 433662306a36Sopenharmony_ci vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 433762306a36Sopenharmony_ci vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 433862306a36Sopenharmony_ci 433962306a36Sopenharmony_ci vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ 434062306a36Sopenharmony_ci 434162306a36Sopenharmony_ci vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ 434262306a36Sopenharmony_ci 434362306a36Sopenharmony_ci rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); 434462306a36Sopenharmony_ci vmcs_write32(HOST_IA32_SYSENTER_CS, low32); 434562306a36Sopenharmony_ci 434662306a36Sopenharmony_ci /* 434762306a36Sopenharmony_ci * SYSENTER is used for 32-bit system calls on either 32-bit or 434862306a36Sopenharmony_ci * 64-bit kernels. It is always zero If neither is allowed, otherwise 434962306a36Sopenharmony_ci * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may 435062306a36Sopenharmony_ci * have already done so!). 435162306a36Sopenharmony_ci */ 435262306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32)) 435362306a36Sopenharmony_ci vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); 435462306a36Sopenharmony_ci 435562306a36Sopenharmony_ci rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); 435662306a36Sopenharmony_ci vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ 435762306a36Sopenharmony_ci 435862306a36Sopenharmony_ci if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { 435962306a36Sopenharmony_ci rdmsr(MSR_IA32_CR_PAT, low32, high32); 436062306a36Sopenharmony_ci vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); 436162306a36Sopenharmony_ci } 436262306a36Sopenharmony_ci 436362306a36Sopenharmony_ci if (cpu_has_load_ia32_efer()) 436462306a36Sopenharmony_ci vmcs_write64(HOST_IA32_EFER, host_efer); 436562306a36Sopenharmony_ci} 436662306a36Sopenharmony_ci 436762306a36Sopenharmony_civoid set_cr4_guest_host_mask(struct vcpu_vmx *vmx) 436862306a36Sopenharmony_ci{ 436962306a36Sopenharmony_ci struct kvm_vcpu *vcpu = &vmx->vcpu; 437062306a36Sopenharmony_ci 437162306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS & 437262306a36Sopenharmony_ci ~vcpu->arch.cr4_guest_rsvd_bits; 437362306a36Sopenharmony_ci if (!enable_ept) { 437462306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_TLBFLUSH_BITS; 437562306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PDPTR_BITS; 437662306a36Sopenharmony_ci } 437762306a36Sopenharmony_ci if (is_guest_mode(&vmx->vcpu)) 437862306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits &= 437962306a36Sopenharmony_ci ~get_vmcs12(vcpu)->cr4_guest_host_mask; 438062306a36Sopenharmony_ci vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits); 438162306a36Sopenharmony_ci} 438262306a36Sopenharmony_ci 438362306a36Sopenharmony_cistatic u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) 438462306a36Sopenharmony_ci{ 438562306a36Sopenharmony_ci u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; 438662306a36Sopenharmony_ci 438762306a36Sopenharmony_ci if (!kvm_vcpu_apicv_active(&vmx->vcpu)) 438862306a36Sopenharmony_ci pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; 438962306a36Sopenharmony_ci 439062306a36Sopenharmony_ci if (!enable_vnmi) 439162306a36Sopenharmony_ci pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; 439262306a36Sopenharmony_ci 439362306a36Sopenharmony_ci if (!enable_preemption_timer) 439462306a36Sopenharmony_ci pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 439562306a36Sopenharmony_ci 439662306a36Sopenharmony_ci return pin_based_exec_ctrl; 439762306a36Sopenharmony_ci} 439862306a36Sopenharmony_ci 439962306a36Sopenharmony_cistatic u32 vmx_vmentry_ctrl(void) 440062306a36Sopenharmony_ci{ 440162306a36Sopenharmony_ci u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; 440262306a36Sopenharmony_ci 440362306a36Sopenharmony_ci if (vmx_pt_mode_is_system()) 440462306a36Sopenharmony_ci vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | 440562306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_RTIT_CTL); 440662306a36Sopenharmony_ci /* 440762306a36Sopenharmony_ci * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically. 440862306a36Sopenharmony_ci */ 440962306a36Sopenharmony_ci vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | 441062306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_EFER | 441162306a36Sopenharmony_ci VM_ENTRY_IA32E_MODE); 441262306a36Sopenharmony_ci 441362306a36Sopenharmony_ci if (cpu_has_perf_global_ctrl_bug()) 441462306a36Sopenharmony_ci vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 441562306a36Sopenharmony_ci 441662306a36Sopenharmony_ci return vmentry_ctrl; 441762306a36Sopenharmony_ci} 441862306a36Sopenharmony_ci 441962306a36Sopenharmony_cistatic u32 vmx_vmexit_ctrl(void) 442062306a36Sopenharmony_ci{ 442162306a36Sopenharmony_ci u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; 442262306a36Sopenharmony_ci 442362306a36Sopenharmony_ci /* 442462306a36Sopenharmony_ci * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for 442562306a36Sopenharmony_ci * nested virtualization and thus allowed to be set in vmcs12. 442662306a36Sopenharmony_ci */ 442762306a36Sopenharmony_ci vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER | 442862306a36Sopenharmony_ci VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); 442962306a36Sopenharmony_ci 443062306a36Sopenharmony_ci if (vmx_pt_mode_is_system()) 443162306a36Sopenharmony_ci vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | 443262306a36Sopenharmony_ci VM_EXIT_CLEAR_IA32_RTIT_CTL); 443362306a36Sopenharmony_ci 443462306a36Sopenharmony_ci if (cpu_has_perf_global_ctrl_bug()) 443562306a36Sopenharmony_ci vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 443662306a36Sopenharmony_ci 443762306a36Sopenharmony_ci /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ 443862306a36Sopenharmony_ci return vmexit_ctrl & 443962306a36Sopenharmony_ci ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); 444062306a36Sopenharmony_ci} 444162306a36Sopenharmony_ci 444262306a36Sopenharmony_cistatic void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 444362306a36Sopenharmony_ci{ 444462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 444562306a36Sopenharmony_ci 444662306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 444762306a36Sopenharmony_ci vmx->nested.update_vmcs01_apicv_status = true; 444862306a36Sopenharmony_ci return; 444962306a36Sopenharmony_ci } 445062306a36Sopenharmony_ci 445162306a36Sopenharmony_ci pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); 445262306a36Sopenharmony_ci 445362306a36Sopenharmony_ci if (kvm_vcpu_apicv_active(vcpu)) { 445462306a36Sopenharmony_ci secondary_exec_controls_setbit(vmx, 445562306a36Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 445662306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 445762306a36Sopenharmony_ci if (enable_ipiv) 445862306a36Sopenharmony_ci tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT); 445962306a36Sopenharmony_ci } else { 446062306a36Sopenharmony_ci secondary_exec_controls_clearbit(vmx, 446162306a36Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 446262306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 446362306a36Sopenharmony_ci if (enable_ipiv) 446462306a36Sopenharmony_ci tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT); 446562306a36Sopenharmony_ci } 446662306a36Sopenharmony_ci 446762306a36Sopenharmony_ci vmx_update_msr_bitmap_x2apic(vcpu); 446862306a36Sopenharmony_ci} 446962306a36Sopenharmony_ci 447062306a36Sopenharmony_cistatic u32 vmx_exec_control(struct vcpu_vmx *vmx) 447162306a36Sopenharmony_ci{ 447262306a36Sopenharmony_ci u32 exec_control = vmcs_config.cpu_based_exec_ctrl; 447362306a36Sopenharmony_ci 447462306a36Sopenharmony_ci /* 447562306a36Sopenharmony_ci * Not used by KVM, but fully supported for nesting, i.e. are allowed in 447662306a36Sopenharmony_ci * vmcs12 and propagated to vmcs02 when set in vmcs12. 447762306a36Sopenharmony_ci */ 447862306a36Sopenharmony_ci exec_control &= ~(CPU_BASED_RDTSC_EXITING | 447962306a36Sopenharmony_ci CPU_BASED_USE_IO_BITMAPS | 448062306a36Sopenharmony_ci CPU_BASED_MONITOR_TRAP_FLAG | 448162306a36Sopenharmony_ci CPU_BASED_PAUSE_EXITING); 448262306a36Sopenharmony_ci 448362306a36Sopenharmony_ci /* INTR_WINDOW_EXITING and NMI_WINDOW_EXITING are toggled dynamically */ 448462306a36Sopenharmony_ci exec_control &= ~(CPU_BASED_INTR_WINDOW_EXITING | 448562306a36Sopenharmony_ci CPU_BASED_NMI_WINDOW_EXITING); 448662306a36Sopenharmony_ci 448762306a36Sopenharmony_ci if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) 448862306a36Sopenharmony_ci exec_control &= ~CPU_BASED_MOV_DR_EXITING; 448962306a36Sopenharmony_ci 449062306a36Sopenharmony_ci if (!cpu_need_tpr_shadow(&vmx->vcpu)) 449162306a36Sopenharmony_ci exec_control &= ~CPU_BASED_TPR_SHADOW; 449262306a36Sopenharmony_ci 449362306a36Sopenharmony_ci#ifdef CONFIG_X86_64 449462306a36Sopenharmony_ci if (exec_control & CPU_BASED_TPR_SHADOW) 449562306a36Sopenharmony_ci exec_control &= ~(CPU_BASED_CR8_LOAD_EXITING | 449662306a36Sopenharmony_ci CPU_BASED_CR8_STORE_EXITING); 449762306a36Sopenharmony_ci else 449862306a36Sopenharmony_ci exec_control |= CPU_BASED_CR8_STORE_EXITING | 449962306a36Sopenharmony_ci CPU_BASED_CR8_LOAD_EXITING; 450062306a36Sopenharmony_ci#endif 450162306a36Sopenharmony_ci /* No need to intercept CR3 access or INVPLG when using EPT. */ 450262306a36Sopenharmony_ci if (enable_ept) 450362306a36Sopenharmony_ci exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | 450462306a36Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING | 450562306a36Sopenharmony_ci CPU_BASED_INVLPG_EXITING); 450662306a36Sopenharmony_ci if (kvm_mwait_in_guest(vmx->vcpu.kvm)) 450762306a36Sopenharmony_ci exec_control &= ~(CPU_BASED_MWAIT_EXITING | 450862306a36Sopenharmony_ci CPU_BASED_MONITOR_EXITING); 450962306a36Sopenharmony_ci if (kvm_hlt_in_guest(vmx->vcpu.kvm)) 451062306a36Sopenharmony_ci exec_control &= ~CPU_BASED_HLT_EXITING; 451162306a36Sopenharmony_ci return exec_control; 451262306a36Sopenharmony_ci} 451362306a36Sopenharmony_ci 451462306a36Sopenharmony_cistatic u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx) 451562306a36Sopenharmony_ci{ 451662306a36Sopenharmony_ci u64 exec_control = vmcs_config.cpu_based_3rd_exec_ctrl; 451762306a36Sopenharmony_ci 451862306a36Sopenharmony_ci /* 451962306a36Sopenharmony_ci * IPI virtualization relies on APICv. Disable IPI virtualization if 452062306a36Sopenharmony_ci * APICv is inhibited. 452162306a36Sopenharmony_ci */ 452262306a36Sopenharmony_ci if (!enable_ipiv || !kvm_vcpu_apicv_active(&vmx->vcpu)) 452362306a36Sopenharmony_ci exec_control &= ~TERTIARY_EXEC_IPI_VIRT; 452462306a36Sopenharmony_ci 452562306a36Sopenharmony_ci return exec_control; 452662306a36Sopenharmony_ci} 452762306a36Sopenharmony_ci 452862306a36Sopenharmony_ci/* 452962306a36Sopenharmony_ci * Adjust a single secondary execution control bit to intercept/allow an 453062306a36Sopenharmony_ci * instruction in the guest. This is usually done based on whether or not a 453162306a36Sopenharmony_ci * feature has been exposed to the guest in order to correctly emulate faults. 453262306a36Sopenharmony_ci */ 453362306a36Sopenharmony_cistatic inline void 453462306a36Sopenharmony_civmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, 453562306a36Sopenharmony_ci u32 control, bool enabled, bool exiting) 453662306a36Sopenharmony_ci{ 453762306a36Sopenharmony_ci /* 453862306a36Sopenharmony_ci * If the control is for an opt-in feature, clear the control if the 453962306a36Sopenharmony_ci * feature is not exposed to the guest, i.e. not enabled. If the 454062306a36Sopenharmony_ci * control is opt-out, i.e. an exiting control, clear the control if 454162306a36Sopenharmony_ci * the feature _is_ exposed to the guest, i.e. exiting/interception is 454262306a36Sopenharmony_ci * disabled for the associated instruction. Note, the caller is 454362306a36Sopenharmony_ci * responsible presetting exec_control to set all supported bits. 454462306a36Sopenharmony_ci */ 454562306a36Sopenharmony_ci if (enabled == exiting) 454662306a36Sopenharmony_ci *exec_control &= ~control; 454762306a36Sopenharmony_ci 454862306a36Sopenharmony_ci /* 454962306a36Sopenharmony_ci * Update the nested MSR settings so that a nested VMM can/can't set 455062306a36Sopenharmony_ci * controls for features that are/aren't exposed to the guest. 455162306a36Sopenharmony_ci */ 455262306a36Sopenharmony_ci if (nested) { 455362306a36Sopenharmony_ci /* 455462306a36Sopenharmony_ci * All features that can be added or removed to VMX MSRs must 455562306a36Sopenharmony_ci * be supported in the first place for nested virtualization. 455662306a36Sopenharmony_ci */ 455762306a36Sopenharmony_ci if (WARN_ON_ONCE(!(vmcs_config.nested.secondary_ctls_high & control))) 455862306a36Sopenharmony_ci enabled = false; 455962306a36Sopenharmony_ci 456062306a36Sopenharmony_ci if (enabled) 456162306a36Sopenharmony_ci vmx->nested.msrs.secondary_ctls_high |= control; 456262306a36Sopenharmony_ci else 456362306a36Sopenharmony_ci vmx->nested.msrs.secondary_ctls_high &= ~control; 456462306a36Sopenharmony_ci } 456562306a36Sopenharmony_ci} 456662306a36Sopenharmony_ci 456762306a36Sopenharmony_ci/* 456862306a36Sopenharmony_ci * Wrapper macro for the common case of adjusting a secondary execution control 456962306a36Sopenharmony_ci * based on a single guest CPUID bit, with a dedicated feature bit. This also 457062306a36Sopenharmony_ci * verifies that the control is actually supported by KVM and hardware. 457162306a36Sopenharmony_ci */ 457262306a36Sopenharmony_ci#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \ 457362306a36Sopenharmony_ci({ \ 457462306a36Sopenharmony_ci struct kvm_vcpu *__vcpu = &(vmx)->vcpu; \ 457562306a36Sopenharmony_ci bool __enabled; \ 457662306a36Sopenharmony_ci \ 457762306a36Sopenharmony_ci if (cpu_has_vmx_##name()) { \ 457862306a36Sopenharmony_ci if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \ 457962306a36Sopenharmony_ci __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \ 458062306a36Sopenharmony_ci else \ 458162306a36Sopenharmony_ci __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \ 458262306a36Sopenharmony_ci vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\ 458362306a36Sopenharmony_ci __enabled, exiting); \ 458462306a36Sopenharmony_ci } \ 458562306a36Sopenharmony_ci}) 458662306a36Sopenharmony_ci 458762306a36Sopenharmony_ci/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */ 458862306a36Sopenharmony_ci#define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) \ 458962306a36Sopenharmony_ci vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, ENABLE_##uname, false) 459062306a36Sopenharmony_ci 459162306a36Sopenharmony_ci#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \ 459262306a36Sopenharmony_ci vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true) 459362306a36Sopenharmony_ci 459462306a36Sopenharmony_cistatic u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) 459562306a36Sopenharmony_ci{ 459662306a36Sopenharmony_ci struct kvm_vcpu *vcpu = &vmx->vcpu; 459762306a36Sopenharmony_ci 459862306a36Sopenharmony_ci u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 459962306a36Sopenharmony_ci 460062306a36Sopenharmony_ci if (vmx_pt_mode_is_system()) 460162306a36Sopenharmony_ci exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); 460262306a36Sopenharmony_ci if (!cpu_need_virtualize_apic_accesses(vcpu)) 460362306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 460462306a36Sopenharmony_ci if (vmx->vpid == 0) 460562306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 460662306a36Sopenharmony_ci if (!enable_ept) { 460762306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 460862306a36Sopenharmony_ci enable_unrestricted_guest = 0; 460962306a36Sopenharmony_ci } 461062306a36Sopenharmony_ci if (!enable_unrestricted_guest) 461162306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 461262306a36Sopenharmony_ci if (kvm_pause_in_guest(vmx->vcpu.kvm)) 461362306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 461462306a36Sopenharmony_ci if (!kvm_vcpu_apicv_active(vcpu)) 461562306a36Sopenharmony_ci exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | 461662306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 461762306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 461862306a36Sopenharmony_ci 461962306a36Sopenharmony_ci /* 462062306a36Sopenharmony_ci * KVM doesn't support VMFUNC for L1, but the control is set in KVM's 462162306a36Sopenharmony_ci * base configuration as KVM emulates VMFUNC[EPTP_SWITCHING] for L2. 462262306a36Sopenharmony_ci */ 462362306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_VMFUNC; 462462306a36Sopenharmony_ci 462562306a36Sopenharmony_ci /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, 462662306a36Sopenharmony_ci * in vmx_set_cr4. */ 462762306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_DESC; 462862306a36Sopenharmony_ci 462962306a36Sopenharmony_ci /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD 463062306a36Sopenharmony_ci (handle_vmptrld). 463162306a36Sopenharmony_ci We can NOT enable shadow_vmcs here because we don't have yet 463262306a36Sopenharmony_ci a current VMCS12 463362306a36Sopenharmony_ci */ 463462306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 463562306a36Sopenharmony_ci 463662306a36Sopenharmony_ci /* 463762306a36Sopenharmony_ci * PML is enabled/disabled when dirty logging of memsmlots changes, but 463862306a36Sopenharmony_ci * it needs to be set here when dirty logging is already active, e.g. 463962306a36Sopenharmony_ci * if this vCPU was created after dirty logging was enabled. 464062306a36Sopenharmony_ci */ 464162306a36Sopenharmony_ci if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging)) 464262306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_PML; 464362306a36Sopenharmony_ci 464462306a36Sopenharmony_ci vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES); 464562306a36Sopenharmony_ci 464662306a36Sopenharmony_ci /* 464762306a36Sopenharmony_ci * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either 464862306a36Sopenharmony_ci * feature is exposed to the guest. This creates a virtualization hole 464962306a36Sopenharmony_ci * if both are supported in hardware but only one is exposed to the 465062306a36Sopenharmony_ci * guest, but letting the guest execute RDTSCP or RDPID when either one 465162306a36Sopenharmony_ci * is advertised is preferable to emulating the advertised instruction 465262306a36Sopenharmony_ci * in KVM on #UD, and obviously better than incorrectly injecting #UD. 465362306a36Sopenharmony_ci */ 465462306a36Sopenharmony_ci if (cpu_has_vmx_rdtscp()) { 465562306a36Sopenharmony_ci bool rdpid_or_rdtscp_enabled = 465662306a36Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || 465762306a36Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_RDPID); 465862306a36Sopenharmony_ci 465962306a36Sopenharmony_ci vmx_adjust_secondary_exec_control(vmx, &exec_control, 466062306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_RDTSCP, 466162306a36Sopenharmony_ci rdpid_or_rdtscp_enabled, false); 466262306a36Sopenharmony_ci } 466362306a36Sopenharmony_ci 466462306a36Sopenharmony_ci vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID); 466562306a36Sopenharmony_ci 466662306a36Sopenharmony_ci vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND); 466762306a36Sopenharmony_ci vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED); 466862306a36Sopenharmony_ci 466962306a36Sopenharmony_ci vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG, 467062306a36Sopenharmony_ci ENABLE_USR_WAIT_PAUSE, false); 467162306a36Sopenharmony_ci 467262306a36Sopenharmony_ci if (!vcpu->kvm->arch.bus_lock_detection_enabled) 467362306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION; 467462306a36Sopenharmony_ci 467562306a36Sopenharmony_ci if (!kvm_notify_vmexit_enabled(vcpu->kvm)) 467662306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_NOTIFY_VM_EXITING; 467762306a36Sopenharmony_ci 467862306a36Sopenharmony_ci return exec_control; 467962306a36Sopenharmony_ci} 468062306a36Sopenharmony_ci 468162306a36Sopenharmony_cistatic inline int vmx_get_pid_table_order(struct kvm *kvm) 468262306a36Sopenharmony_ci{ 468362306a36Sopenharmony_ci return get_order(kvm->arch.max_vcpu_ids * sizeof(*to_kvm_vmx(kvm)->pid_table)); 468462306a36Sopenharmony_ci} 468562306a36Sopenharmony_ci 468662306a36Sopenharmony_cistatic int vmx_alloc_ipiv_pid_table(struct kvm *kvm) 468762306a36Sopenharmony_ci{ 468862306a36Sopenharmony_ci struct page *pages; 468962306a36Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); 469062306a36Sopenharmony_ci 469162306a36Sopenharmony_ci if (!irqchip_in_kernel(kvm) || !enable_ipiv) 469262306a36Sopenharmony_ci return 0; 469362306a36Sopenharmony_ci 469462306a36Sopenharmony_ci if (kvm_vmx->pid_table) 469562306a36Sopenharmony_ci return 0; 469662306a36Sopenharmony_ci 469762306a36Sopenharmony_ci pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 469862306a36Sopenharmony_ci vmx_get_pid_table_order(kvm)); 469962306a36Sopenharmony_ci if (!pages) 470062306a36Sopenharmony_ci return -ENOMEM; 470162306a36Sopenharmony_ci 470262306a36Sopenharmony_ci kvm_vmx->pid_table = (void *)page_address(pages); 470362306a36Sopenharmony_ci return 0; 470462306a36Sopenharmony_ci} 470562306a36Sopenharmony_ci 470662306a36Sopenharmony_cistatic int vmx_vcpu_precreate(struct kvm *kvm) 470762306a36Sopenharmony_ci{ 470862306a36Sopenharmony_ci return vmx_alloc_ipiv_pid_table(kvm); 470962306a36Sopenharmony_ci} 471062306a36Sopenharmony_ci 471162306a36Sopenharmony_ci#define VMX_XSS_EXIT_BITMAP 0 471262306a36Sopenharmony_ci 471362306a36Sopenharmony_cistatic void init_vmcs(struct vcpu_vmx *vmx) 471462306a36Sopenharmony_ci{ 471562306a36Sopenharmony_ci struct kvm *kvm = vmx->vcpu.kvm; 471662306a36Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); 471762306a36Sopenharmony_ci 471862306a36Sopenharmony_ci if (nested) 471962306a36Sopenharmony_ci nested_vmx_set_vmcs_shadowing_bitmap(); 472062306a36Sopenharmony_ci 472162306a36Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 472262306a36Sopenharmony_ci vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); 472362306a36Sopenharmony_ci 472462306a36Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); /* 22.3.1.5 */ 472562306a36Sopenharmony_ci 472662306a36Sopenharmony_ci /* Control */ 472762306a36Sopenharmony_ci pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); 472862306a36Sopenharmony_ci 472962306a36Sopenharmony_ci exec_controls_set(vmx, vmx_exec_control(vmx)); 473062306a36Sopenharmony_ci 473162306a36Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) 473262306a36Sopenharmony_ci secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); 473362306a36Sopenharmony_ci 473462306a36Sopenharmony_ci if (cpu_has_tertiary_exec_ctrls()) 473562306a36Sopenharmony_ci tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); 473662306a36Sopenharmony_ci 473762306a36Sopenharmony_ci if (enable_apicv && lapic_in_kernel(&vmx->vcpu)) { 473862306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP0, 0); 473962306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP1, 0); 474062306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP2, 0); 474162306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP3, 0); 474262306a36Sopenharmony_ci 474362306a36Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, 0); 474462306a36Sopenharmony_ci 474562306a36Sopenharmony_ci vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 474662306a36Sopenharmony_ci vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); 474762306a36Sopenharmony_ci } 474862306a36Sopenharmony_ci 474962306a36Sopenharmony_ci if (vmx_can_use_ipiv(&vmx->vcpu)) { 475062306a36Sopenharmony_ci vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table)); 475162306a36Sopenharmony_ci vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1); 475262306a36Sopenharmony_ci } 475362306a36Sopenharmony_ci 475462306a36Sopenharmony_ci if (!kvm_pause_in_guest(kvm)) { 475562306a36Sopenharmony_ci vmcs_write32(PLE_GAP, ple_gap); 475662306a36Sopenharmony_ci vmx->ple_window = ple_window; 475762306a36Sopenharmony_ci vmx->ple_window_dirty = true; 475862306a36Sopenharmony_ci } 475962306a36Sopenharmony_ci 476062306a36Sopenharmony_ci if (kvm_notify_vmexit_enabled(kvm)) 476162306a36Sopenharmony_ci vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window); 476262306a36Sopenharmony_ci 476362306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 476462306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); 476562306a36Sopenharmony_ci vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 476662306a36Sopenharmony_ci 476762306a36Sopenharmony_ci vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ 476862306a36Sopenharmony_ci vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ 476962306a36Sopenharmony_ci vmx_set_constant_host_state(vmx); 477062306a36Sopenharmony_ci vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ 477162306a36Sopenharmony_ci vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ 477262306a36Sopenharmony_ci 477362306a36Sopenharmony_ci if (cpu_has_vmx_vmfunc()) 477462306a36Sopenharmony_ci vmcs_write64(VM_FUNCTION_CONTROL, 0); 477562306a36Sopenharmony_ci 477662306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); 477762306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); 477862306a36Sopenharmony_ci vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); 477962306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); 478062306a36Sopenharmony_ci vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); 478162306a36Sopenharmony_ci 478262306a36Sopenharmony_ci if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) 478362306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 478462306a36Sopenharmony_ci 478562306a36Sopenharmony_ci vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); 478662306a36Sopenharmony_ci 478762306a36Sopenharmony_ci /* 22.2.1, 20.8.1 */ 478862306a36Sopenharmony_ci vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); 478962306a36Sopenharmony_ci 479062306a36Sopenharmony_ci vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); 479162306a36Sopenharmony_ci vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); 479262306a36Sopenharmony_ci 479362306a36Sopenharmony_ci set_cr4_guest_host_mask(vmx); 479462306a36Sopenharmony_ci 479562306a36Sopenharmony_ci if (vmx->vpid != 0) 479662306a36Sopenharmony_ci vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 479762306a36Sopenharmony_ci 479862306a36Sopenharmony_ci if (cpu_has_vmx_xsaves()) 479962306a36Sopenharmony_ci vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); 480062306a36Sopenharmony_ci 480162306a36Sopenharmony_ci if (enable_pml) { 480262306a36Sopenharmony_ci vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); 480362306a36Sopenharmony_ci vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); 480462306a36Sopenharmony_ci } 480562306a36Sopenharmony_ci 480662306a36Sopenharmony_ci vmx_write_encls_bitmap(&vmx->vcpu, NULL); 480762306a36Sopenharmony_ci 480862306a36Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) { 480962306a36Sopenharmony_ci memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); 481062306a36Sopenharmony_ci /* Bit[6~0] are forced to 1, writes are ignored. */ 481162306a36Sopenharmony_ci vmx->pt_desc.guest.output_mask = 0x7F; 481262306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_RTIT_CTL, 0); 481362306a36Sopenharmony_ci } 481462306a36Sopenharmony_ci 481562306a36Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, 0); 481662306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, 0); 481762306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, 0); 481862306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 481962306a36Sopenharmony_ci 482062306a36Sopenharmony_ci if (cpu_has_vmx_tpr_shadow()) { 482162306a36Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); 482262306a36Sopenharmony_ci if (cpu_need_tpr_shadow(&vmx->vcpu)) 482362306a36Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 482462306a36Sopenharmony_ci __pa(vmx->vcpu.arch.apic->regs)); 482562306a36Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, 0); 482662306a36Sopenharmony_ci } 482762306a36Sopenharmony_ci 482862306a36Sopenharmony_ci vmx_setup_uret_msrs(vmx); 482962306a36Sopenharmony_ci} 483062306a36Sopenharmony_ci 483162306a36Sopenharmony_cistatic void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) 483262306a36Sopenharmony_ci{ 483362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 483462306a36Sopenharmony_ci 483562306a36Sopenharmony_ci init_vmcs(vmx); 483662306a36Sopenharmony_ci 483762306a36Sopenharmony_ci if (nested) 483862306a36Sopenharmony_ci memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs)); 483962306a36Sopenharmony_ci 484062306a36Sopenharmony_ci vcpu_setup_sgx_lepubkeyhash(vcpu); 484162306a36Sopenharmony_ci 484262306a36Sopenharmony_ci vmx->nested.posted_intr_nv = -1; 484362306a36Sopenharmony_ci vmx->nested.vmxon_ptr = INVALID_GPA; 484462306a36Sopenharmony_ci vmx->nested.current_vmptr = INVALID_GPA; 484562306a36Sopenharmony_ci vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; 484662306a36Sopenharmony_ci 484762306a36Sopenharmony_ci vcpu->arch.microcode_version = 0x100000000ULL; 484862306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; 484962306a36Sopenharmony_ci 485062306a36Sopenharmony_ci /* 485162306a36Sopenharmony_ci * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR 485262306a36Sopenharmony_ci * or POSTED_INTR_WAKEUP_VECTOR. 485362306a36Sopenharmony_ci */ 485462306a36Sopenharmony_ci vmx->pi_desc.nv = POSTED_INTR_VECTOR; 485562306a36Sopenharmony_ci vmx->pi_desc.sn = 1; 485662306a36Sopenharmony_ci} 485762306a36Sopenharmony_ci 485862306a36Sopenharmony_cistatic void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 485962306a36Sopenharmony_ci{ 486062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 486162306a36Sopenharmony_ci 486262306a36Sopenharmony_ci if (!init_event) 486362306a36Sopenharmony_ci __vmx_vcpu_reset(vcpu); 486462306a36Sopenharmony_ci 486562306a36Sopenharmony_ci vmx->rmode.vm86_active = 0; 486662306a36Sopenharmony_ci vmx->spec_ctrl = 0; 486762306a36Sopenharmony_ci 486862306a36Sopenharmony_ci vmx->msr_ia32_umwait_control = 0; 486962306a36Sopenharmony_ci 487062306a36Sopenharmony_ci vmx->hv_deadline_tsc = -1; 487162306a36Sopenharmony_ci kvm_set_cr8(vcpu, 0); 487262306a36Sopenharmony_ci 487362306a36Sopenharmony_ci vmx_segment_cache_clear(vmx); 487462306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS); 487562306a36Sopenharmony_ci 487662306a36Sopenharmony_ci seg_setup(VCPU_SREG_CS); 487762306a36Sopenharmony_ci vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 487862306a36Sopenharmony_ci vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); 487962306a36Sopenharmony_ci 488062306a36Sopenharmony_ci seg_setup(VCPU_SREG_DS); 488162306a36Sopenharmony_ci seg_setup(VCPU_SREG_ES); 488262306a36Sopenharmony_ci seg_setup(VCPU_SREG_FS); 488362306a36Sopenharmony_ci seg_setup(VCPU_SREG_GS); 488462306a36Sopenharmony_ci seg_setup(VCPU_SREG_SS); 488562306a36Sopenharmony_ci 488662306a36Sopenharmony_ci vmcs_write16(GUEST_TR_SELECTOR, 0); 488762306a36Sopenharmony_ci vmcs_writel(GUEST_TR_BASE, 0); 488862306a36Sopenharmony_ci vmcs_write32(GUEST_TR_LIMIT, 0xffff); 488962306a36Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 489062306a36Sopenharmony_ci 489162306a36Sopenharmony_ci vmcs_write16(GUEST_LDTR_SELECTOR, 0); 489262306a36Sopenharmony_ci vmcs_writel(GUEST_LDTR_BASE, 0); 489362306a36Sopenharmony_ci vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); 489462306a36Sopenharmony_ci vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); 489562306a36Sopenharmony_ci 489662306a36Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, 0); 489762306a36Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); 489862306a36Sopenharmony_ci 489962306a36Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, 0); 490062306a36Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); 490162306a36Sopenharmony_ci 490262306a36Sopenharmony_ci vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); 490362306a36Sopenharmony_ci vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 490462306a36Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); 490562306a36Sopenharmony_ci if (kvm_mpx_supported()) 490662306a36Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, 0); 490762306a36Sopenharmony_ci 490862306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ 490962306a36Sopenharmony_ci 491062306a36Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 491162306a36Sopenharmony_ci 491262306a36Sopenharmony_ci vpid_sync_context(vmx->vpid); 491362306a36Sopenharmony_ci 491462306a36Sopenharmony_ci vmx_update_fb_clear_dis(vcpu, vmx); 491562306a36Sopenharmony_ci} 491662306a36Sopenharmony_ci 491762306a36Sopenharmony_cistatic void vmx_enable_irq_window(struct kvm_vcpu *vcpu) 491862306a36Sopenharmony_ci{ 491962306a36Sopenharmony_ci exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); 492062306a36Sopenharmony_ci} 492162306a36Sopenharmony_ci 492262306a36Sopenharmony_cistatic void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) 492362306a36Sopenharmony_ci{ 492462306a36Sopenharmony_ci if (!enable_vnmi || 492562306a36Sopenharmony_ci vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { 492662306a36Sopenharmony_ci vmx_enable_irq_window(vcpu); 492762306a36Sopenharmony_ci return; 492862306a36Sopenharmony_ci } 492962306a36Sopenharmony_ci 493062306a36Sopenharmony_ci exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); 493162306a36Sopenharmony_ci} 493262306a36Sopenharmony_ci 493362306a36Sopenharmony_cistatic void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) 493462306a36Sopenharmony_ci{ 493562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 493662306a36Sopenharmony_ci uint32_t intr; 493762306a36Sopenharmony_ci int irq = vcpu->arch.interrupt.nr; 493862306a36Sopenharmony_ci 493962306a36Sopenharmony_ci trace_kvm_inj_virq(irq, vcpu->arch.interrupt.soft, reinjected); 494062306a36Sopenharmony_ci 494162306a36Sopenharmony_ci ++vcpu->stat.irq_injections; 494262306a36Sopenharmony_ci if (vmx->rmode.vm86_active) { 494362306a36Sopenharmony_ci int inc_eip = 0; 494462306a36Sopenharmony_ci if (vcpu->arch.interrupt.soft) 494562306a36Sopenharmony_ci inc_eip = vcpu->arch.event_exit_inst_len; 494662306a36Sopenharmony_ci kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); 494762306a36Sopenharmony_ci return; 494862306a36Sopenharmony_ci } 494962306a36Sopenharmony_ci intr = irq | INTR_INFO_VALID_MASK; 495062306a36Sopenharmony_ci if (vcpu->arch.interrupt.soft) { 495162306a36Sopenharmony_ci intr |= INTR_TYPE_SOFT_INTR; 495262306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 495362306a36Sopenharmony_ci vmx->vcpu.arch.event_exit_inst_len); 495462306a36Sopenharmony_ci } else 495562306a36Sopenharmony_ci intr |= INTR_TYPE_EXT_INTR; 495662306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); 495762306a36Sopenharmony_ci 495862306a36Sopenharmony_ci vmx_clear_hlt(vcpu); 495962306a36Sopenharmony_ci} 496062306a36Sopenharmony_ci 496162306a36Sopenharmony_cistatic void vmx_inject_nmi(struct kvm_vcpu *vcpu) 496262306a36Sopenharmony_ci{ 496362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 496462306a36Sopenharmony_ci 496562306a36Sopenharmony_ci if (!enable_vnmi) { 496662306a36Sopenharmony_ci /* 496762306a36Sopenharmony_ci * Tracking the NMI-blocked state in software is built upon 496862306a36Sopenharmony_ci * finding the next open IRQ window. This, in turn, depends on 496962306a36Sopenharmony_ci * well-behaving guests: They have to keep IRQs disabled at 497062306a36Sopenharmony_ci * least as long as the NMI handler runs. Otherwise we may 497162306a36Sopenharmony_ci * cause NMI nesting, maybe breaking the guest. But as this is 497262306a36Sopenharmony_ci * highly unlikely, we can live with the residual risk. 497362306a36Sopenharmony_ci */ 497462306a36Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = 1; 497562306a36Sopenharmony_ci vmx->loaded_vmcs->vnmi_blocked_time = 0; 497662306a36Sopenharmony_ci } 497762306a36Sopenharmony_ci 497862306a36Sopenharmony_ci ++vcpu->stat.nmi_injections; 497962306a36Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = false; 498062306a36Sopenharmony_ci 498162306a36Sopenharmony_ci if (vmx->rmode.vm86_active) { 498262306a36Sopenharmony_ci kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); 498362306a36Sopenharmony_ci return; 498462306a36Sopenharmony_ci } 498562306a36Sopenharmony_ci 498662306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 498762306a36Sopenharmony_ci INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 498862306a36Sopenharmony_ci 498962306a36Sopenharmony_ci vmx_clear_hlt(vcpu); 499062306a36Sopenharmony_ci} 499162306a36Sopenharmony_ci 499262306a36Sopenharmony_cibool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) 499362306a36Sopenharmony_ci{ 499462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 499562306a36Sopenharmony_ci bool masked; 499662306a36Sopenharmony_ci 499762306a36Sopenharmony_ci if (!enable_vnmi) 499862306a36Sopenharmony_ci return vmx->loaded_vmcs->soft_vnmi_blocked; 499962306a36Sopenharmony_ci if (vmx->loaded_vmcs->nmi_known_unmasked) 500062306a36Sopenharmony_ci return false; 500162306a36Sopenharmony_ci masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 500262306a36Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = !masked; 500362306a36Sopenharmony_ci return masked; 500462306a36Sopenharmony_ci} 500562306a36Sopenharmony_ci 500662306a36Sopenharmony_civoid vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 500762306a36Sopenharmony_ci{ 500862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 500962306a36Sopenharmony_ci 501062306a36Sopenharmony_ci if (!enable_vnmi) { 501162306a36Sopenharmony_ci if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { 501262306a36Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = masked; 501362306a36Sopenharmony_ci vmx->loaded_vmcs->vnmi_blocked_time = 0; 501462306a36Sopenharmony_ci } 501562306a36Sopenharmony_ci } else { 501662306a36Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = !masked; 501762306a36Sopenharmony_ci if (masked) 501862306a36Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 501962306a36Sopenharmony_ci GUEST_INTR_STATE_NMI); 502062306a36Sopenharmony_ci else 502162306a36Sopenharmony_ci vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 502262306a36Sopenharmony_ci GUEST_INTR_STATE_NMI); 502362306a36Sopenharmony_ci } 502462306a36Sopenharmony_ci} 502562306a36Sopenharmony_ci 502662306a36Sopenharmony_cibool vmx_nmi_blocked(struct kvm_vcpu *vcpu) 502762306a36Sopenharmony_ci{ 502862306a36Sopenharmony_ci if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) 502962306a36Sopenharmony_ci return false; 503062306a36Sopenharmony_ci 503162306a36Sopenharmony_ci if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) 503262306a36Sopenharmony_ci return true; 503362306a36Sopenharmony_ci 503462306a36Sopenharmony_ci return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 503562306a36Sopenharmony_ci (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | 503662306a36Sopenharmony_ci GUEST_INTR_STATE_NMI)); 503762306a36Sopenharmony_ci} 503862306a36Sopenharmony_ci 503962306a36Sopenharmony_cistatic int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 504062306a36Sopenharmony_ci{ 504162306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending) 504262306a36Sopenharmony_ci return -EBUSY; 504362306a36Sopenharmony_ci 504462306a36Sopenharmony_ci /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ 504562306a36Sopenharmony_ci if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) 504662306a36Sopenharmony_ci return -EBUSY; 504762306a36Sopenharmony_ci 504862306a36Sopenharmony_ci return !vmx_nmi_blocked(vcpu); 504962306a36Sopenharmony_ci} 505062306a36Sopenharmony_ci 505162306a36Sopenharmony_cibool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) 505262306a36Sopenharmony_ci{ 505362306a36Sopenharmony_ci if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 505462306a36Sopenharmony_ci return false; 505562306a36Sopenharmony_ci 505662306a36Sopenharmony_ci return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || 505762306a36Sopenharmony_ci (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 505862306a36Sopenharmony_ci (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); 505962306a36Sopenharmony_ci} 506062306a36Sopenharmony_ci 506162306a36Sopenharmony_cistatic int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) 506262306a36Sopenharmony_ci{ 506362306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending) 506462306a36Sopenharmony_ci return -EBUSY; 506562306a36Sopenharmony_ci 506662306a36Sopenharmony_ci /* 506762306a36Sopenharmony_ci * An IRQ must not be injected into L2 if it's supposed to VM-Exit, 506862306a36Sopenharmony_ci * e.g. if the IRQ arrived asynchronously after checking nested events. 506962306a36Sopenharmony_ci */ 507062306a36Sopenharmony_ci if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 507162306a36Sopenharmony_ci return -EBUSY; 507262306a36Sopenharmony_ci 507362306a36Sopenharmony_ci return !vmx_interrupt_blocked(vcpu); 507462306a36Sopenharmony_ci} 507562306a36Sopenharmony_ci 507662306a36Sopenharmony_cistatic int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 507762306a36Sopenharmony_ci{ 507862306a36Sopenharmony_ci void __user *ret; 507962306a36Sopenharmony_ci 508062306a36Sopenharmony_ci if (enable_unrestricted_guest) 508162306a36Sopenharmony_ci return 0; 508262306a36Sopenharmony_ci 508362306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 508462306a36Sopenharmony_ci ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, 508562306a36Sopenharmony_ci PAGE_SIZE * 3); 508662306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 508762306a36Sopenharmony_ci 508862306a36Sopenharmony_ci if (IS_ERR(ret)) 508962306a36Sopenharmony_ci return PTR_ERR(ret); 509062306a36Sopenharmony_ci 509162306a36Sopenharmony_ci to_kvm_vmx(kvm)->tss_addr = addr; 509262306a36Sopenharmony_ci 509362306a36Sopenharmony_ci return init_rmode_tss(kvm, ret); 509462306a36Sopenharmony_ci} 509562306a36Sopenharmony_ci 509662306a36Sopenharmony_cistatic int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) 509762306a36Sopenharmony_ci{ 509862306a36Sopenharmony_ci to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; 509962306a36Sopenharmony_ci return 0; 510062306a36Sopenharmony_ci} 510162306a36Sopenharmony_ci 510262306a36Sopenharmony_cistatic bool rmode_exception(struct kvm_vcpu *vcpu, int vec) 510362306a36Sopenharmony_ci{ 510462306a36Sopenharmony_ci switch (vec) { 510562306a36Sopenharmony_ci case BP_VECTOR: 510662306a36Sopenharmony_ci /* 510762306a36Sopenharmony_ci * Update instruction length as we may reinject the exception 510862306a36Sopenharmony_ci * from user space while in guest debugging mode. 510962306a36Sopenharmony_ci */ 511062306a36Sopenharmony_ci to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = 511162306a36Sopenharmony_ci vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 511262306a36Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 511362306a36Sopenharmony_ci return false; 511462306a36Sopenharmony_ci fallthrough; 511562306a36Sopenharmony_ci case DB_VECTOR: 511662306a36Sopenharmony_ci return !(vcpu->guest_debug & 511762306a36Sopenharmony_ci (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)); 511862306a36Sopenharmony_ci case DE_VECTOR: 511962306a36Sopenharmony_ci case OF_VECTOR: 512062306a36Sopenharmony_ci case BR_VECTOR: 512162306a36Sopenharmony_ci case UD_VECTOR: 512262306a36Sopenharmony_ci case DF_VECTOR: 512362306a36Sopenharmony_ci case SS_VECTOR: 512462306a36Sopenharmony_ci case GP_VECTOR: 512562306a36Sopenharmony_ci case MF_VECTOR: 512662306a36Sopenharmony_ci return true; 512762306a36Sopenharmony_ci } 512862306a36Sopenharmony_ci return false; 512962306a36Sopenharmony_ci} 513062306a36Sopenharmony_ci 513162306a36Sopenharmony_cistatic int handle_rmode_exception(struct kvm_vcpu *vcpu, 513262306a36Sopenharmony_ci int vec, u32 err_code) 513362306a36Sopenharmony_ci{ 513462306a36Sopenharmony_ci /* 513562306a36Sopenharmony_ci * Instruction with address size override prefix opcode 0x67 513662306a36Sopenharmony_ci * Cause the #SS fault with 0 error code in VM86 mode. 513762306a36Sopenharmony_ci */ 513862306a36Sopenharmony_ci if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { 513962306a36Sopenharmony_ci if (kvm_emulate_instruction(vcpu, 0)) { 514062306a36Sopenharmony_ci if (vcpu->arch.halt_request) { 514162306a36Sopenharmony_ci vcpu->arch.halt_request = 0; 514262306a36Sopenharmony_ci return kvm_emulate_halt_noskip(vcpu); 514362306a36Sopenharmony_ci } 514462306a36Sopenharmony_ci return 1; 514562306a36Sopenharmony_ci } 514662306a36Sopenharmony_ci return 0; 514762306a36Sopenharmony_ci } 514862306a36Sopenharmony_ci 514962306a36Sopenharmony_ci /* 515062306a36Sopenharmony_ci * Forward all other exceptions that are valid in real mode. 515162306a36Sopenharmony_ci * FIXME: Breaks guest debugging in real mode, needs to be fixed with 515262306a36Sopenharmony_ci * the required debugging infrastructure rework. 515362306a36Sopenharmony_ci */ 515462306a36Sopenharmony_ci kvm_queue_exception(vcpu, vec); 515562306a36Sopenharmony_ci return 1; 515662306a36Sopenharmony_ci} 515762306a36Sopenharmony_ci 515862306a36Sopenharmony_cistatic int handle_machine_check(struct kvm_vcpu *vcpu) 515962306a36Sopenharmony_ci{ 516062306a36Sopenharmony_ci /* handled by vmx_vcpu_run() */ 516162306a36Sopenharmony_ci return 1; 516262306a36Sopenharmony_ci} 516362306a36Sopenharmony_ci 516462306a36Sopenharmony_ci/* 516562306a36Sopenharmony_ci * If the host has split lock detection disabled, then #AC is 516662306a36Sopenharmony_ci * unconditionally injected into the guest, which is the pre split lock 516762306a36Sopenharmony_ci * detection behaviour. 516862306a36Sopenharmony_ci * 516962306a36Sopenharmony_ci * If the host has split lock detection enabled then #AC is 517062306a36Sopenharmony_ci * only injected into the guest when: 517162306a36Sopenharmony_ci * - Guest CPL == 3 (user mode) 517262306a36Sopenharmony_ci * - Guest has #AC detection enabled in CR0 517362306a36Sopenharmony_ci * - Guest EFLAGS has AC bit set 517462306a36Sopenharmony_ci */ 517562306a36Sopenharmony_cibool vmx_guest_inject_ac(struct kvm_vcpu *vcpu) 517662306a36Sopenharmony_ci{ 517762306a36Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) 517862306a36Sopenharmony_ci return true; 517962306a36Sopenharmony_ci 518062306a36Sopenharmony_ci return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) && 518162306a36Sopenharmony_ci (kvm_get_rflags(vcpu) & X86_EFLAGS_AC); 518262306a36Sopenharmony_ci} 518362306a36Sopenharmony_ci 518462306a36Sopenharmony_cistatic int handle_exception_nmi(struct kvm_vcpu *vcpu) 518562306a36Sopenharmony_ci{ 518662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 518762306a36Sopenharmony_ci struct kvm_run *kvm_run = vcpu->run; 518862306a36Sopenharmony_ci u32 intr_info, ex_no, error_code; 518962306a36Sopenharmony_ci unsigned long cr2, dr6; 519062306a36Sopenharmony_ci u32 vect_info; 519162306a36Sopenharmony_ci 519262306a36Sopenharmony_ci vect_info = vmx->idt_vectoring_info; 519362306a36Sopenharmony_ci intr_info = vmx_get_intr_info(vcpu); 519462306a36Sopenharmony_ci 519562306a36Sopenharmony_ci /* 519662306a36Sopenharmony_ci * Machine checks are handled by handle_exception_irqoff(), or by 519762306a36Sopenharmony_ci * vmx_vcpu_run() if a #MC occurs on VM-Entry. NMIs are handled by 519862306a36Sopenharmony_ci * vmx_vcpu_enter_exit(). 519962306a36Sopenharmony_ci */ 520062306a36Sopenharmony_ci if (is_machine_check(intr_info) || is_nmi(intr_info)) 520162306a36Sopenharmony_ci return 1; 520262306a36Sopenharmony_ci 520362306a36Sopenharmony_ci /* 520462306a36Sopenharmony_ci * Queue the exception here instead of in handle_nm_fault_irqoff(). 520562306a36Sopenharmony_ci * This ensures the nested_vmx check is not skipped so vmexit can 520662306a36Sopenharmony_ci * be reflected to L1 (when it intercepts #NM) before reaching this 520762306a36Sopenharmony_ci * point. 520862306a36Sopenharmony_ci */ 520962306a36Sopenharmony_ci if (is_nm_fault(intr_info)) { 521062306a36Sopenharmony_ci kvm_queue_exception(vcpu, NM_VECTOR); 521162306a36Sopenharmony_ci return 1; 521262306a36Sopenharmony_ci } 521362306a36Sopenharmony_ci 521462306a36Sopenharmony_ci if (is_invalid_opcode(intr_info)) 521562306a36Sopenharmony_ci return handle_ud(vcpu); 521662306a36Sopenharmony_ci 521762306a36Sopenharmony_ci error_code = 0; 521862306a36Sopenharmony_ci if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 521962306a36Sopenharmony_ci error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 522062306a36Sopenharmony_ci 522162306a36Sopenharmony_ci if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { 522262306a36Sopenharmony_ci WARN_ON_ONCE(!enable_vmware_backdoor); 522362306a36Sopenharmony_ci 522462306a36Sopenharmony_ci /* 522562306a36Sopenharmony_ci * VMware backdoor emulation on #GP interception only handles 522662306a36Sopenharmony_ci * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero 522762306a36Sopenharmony_ci * error code on #GP. 522862306a36Sopenharmony_ci */ 522962306a36Sopenharmony_ci if (error_code) { 523062306a36Sopenharmony_ci kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 523162306a36Sopenharmony_ci return 1; 523262306a36Sopenharmony_ci } 523362306a36Sopenharmony_ci return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); 523462306a36Sopenharmony_ci } 523562306a36Sopenharmony_ci 523662306a36Sopenharmony_ci /* 523762306a36Sopenharmony_ci * The #PF with PFEC.RSVD = 1 indicates the guest is accessing 523862306a36Sopenharmony_ci * MMIO, it is better to report an internal error. 523962306a36Sopenharmony_ci * See the comments in vmx_handle_exit. 524062306a36Sopenharmony_ci */ 524162306a36Sopenharmony_ci if ((vect_info & VECTORING_INFO_VALID_MASK) && 524262306a36Sopenharmony_ci !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { 524362306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 524462306a36Sopenharmony_ci vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; 524562306a36Sopenharmony_ci vcpu->run->internal.ndata = 4; 524662306a36Sopenharmony_ci vcpu->run->internal.data[0] = vect_info; 524762306a36Sopenharmony_ci vcpu->run->internal.data[1] = intr_info; 524862306a36Sopenharmony_ci vcpu->run->internal.data[2] = error_code; 524962306a36Sopenharmony_ci vcpu->run->internal.data[3] = vcpu->arch.last_vmentry_cpu; 525062306a36Sopenharmony_ci return 0; 525162306a36Sopenharmony_ci } 525262306a36Sopenharmony_ci 525362306a36Sopenharmony_ci if (is_page_fault(intr_info)) { 525462306a36Sopenharmony_ci cr2 = vmx_get_exit_qual(vcpu); 525562306a36Sopenharmony_ci if (enable_ept && !vcpu->arch.apf.host_apf_flags) { 525662306a36Sopenharmony_ci /* 525762306a36Sopenharmony_ci * EPT will cause page fault only if we need to 525862306a36Sopenharmony_ci * detect illegal GPAs. 525962306a36Sopenharmony_ci */ 526062306a36Sopenharmony_ci WARN_ON_ONCE(!allow_smaller_maxphyaddr); 526162306a36Sopenharmony_ci kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code); 526262306a36Sopenharmony_ci return 1; 526362306a36Sopenharmony_ci } else 526462306a36Sopenharmony_ci return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); 526562306a36Sopenharmony_ci } 526662306a36Sopenharmony_ci 526762306a36Sopenharmony_ci ex_no = intr_info & INTR_INFO_VECTOR_MASK; 526862306a36Sopenharmony_ci 526962306a36Sopenharmony_ci if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) 527062306a36Sopenharmony_ci return handle_rmode_exception(vcpu, ex_no, error_code); 527162306a36Sopenharmony_ci 527262306a36Sopenharmony_ci switch (ex_no) { 527362306a36Sopenharmony_ci case DB_VECTOR: 527462306a36Sopenharmony_ci dr6 = vmx_get_exit_qual(vcpu); 527562306a36Sopenharmony_ci if (!(vcpu->guest_debug & 527662306a36Sopenharmony_ci (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 527762306a36Sopenharmony_ci /* 527862306a36Sopenharmony_ci * If the #DB was due to ICEBP, a.k.a. INT1, skip the 527962306a36Sopenharmony_ci * instruction. ICEBP generates a trap-like #DB, but 528062306a36Sopenharmony_ci * despite its interception control being tied to #DB, 528162306a36Sopenharmony_ci * is an instruction intercept, i.e. the VM-Exit occurs 528262306a36Sopenharmony_ci * on the ICEBP itself. Use the inner "skip" helper to 528362306a36Sopenharmony_ci * avoid single-step #DB and MTF updates, as ICEBP is 528462306a36Sopenharmony_ci * higher priority. Note, skipping ICEBP still clears 528562306a36Sopenharmony_ci * STI and MOVSS blocking. 528662306a36Sopenharmony_ci * 528762306a36Sopenharmony_ci * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS 528862306a36Sopenharmony_ci * if single-step is enabled in RFLAGS and STI or MOVSS 528962306a36Sopenharmony_ci * blocking is active, as the CPU doesn't set the bit 529062306a36Sopenharmony_ci * on VM-Exit due to #DB interception. VM-Entry has a 529162306a36Sopenharmony_ci * consistency check that a single-step #DB is pending 529262306a36Sopenharmony_ci * in this scenario as the previous instruction cannot 529362306a36Sopenharmony_ci * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV 529462306a36Sopenharmony_ci * don't modify RFLAGS), therefore the one instruction 529562306a36Sopenharmony_ci * delay when activating single-step breakpoints must 529662306a36Sopenharmony_ci * have already expired. Note, the CPU sets/clears BS 529762306a36Sopenharmony_ci * as appropriate for all other VM-Exits types. 529862306a36Sopenharmony_ci */ 529962306a36Sopenharmony_ci if (is_icebp(intr_info)) 530062306a36Sopenharmony_ci WARN_ON(!skip_emulated_instruction(vcpu)); 530162306a36Sopenharmony_ci else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && 530262306a36Sopenharmony_ci (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 530362306a36Sopenharmony_ci (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) 530462306a36Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 530562306a36Sopenharmony_ci vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); 530662306a36Sopenharmony_ci 530762306a36Sopenharmony_ci kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); 530862306a36Sopenharmony_ci return 1; 530962306a36Sopenharmony_ci } 531062306a36Sopenharmony_ci kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; 531162306a36Sopenharmony_ci kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 531262306a36Sopenharmony_ci fallthrough; 531362306a36Sopenharmony_ci case BP_VECTOR: 531462306a36Sopenharmony_ci /* 531562306a36Sopenharmony_ci * Update instruction length as we may reinject #BP from 531662306a36Sopenharmony_ci * user space while in guest debugging mode. Reading it for 531762306a36Sopenharmony_ci * #DB as well causes no harm, it is not used in that case. 531862306a36Sopenharmony_ci */ 531962306a36Sopenharmony_ci vmx->vcpu.arch.event_exit_inst_len = 532062306a36Sopenharmony_ci vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 532162306a36Sopenharmony_ci kvm_run->exit_reason = KVM_EXIT_DEBUG; 532262306a36Sopenharmony_ci kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); 532362306a36Sopenharmony_ci kvm_run->debug.arch.exception = ex_no; 532462306a36Sopenharmony_ci break; 532562306a36Sopenharmony_ci case AC_VECTOR: 532662306a36Sopenharmony_ci if (vmx_guest_inject_ac(vcpu)) { 532762306a36Sopenharmony_ci kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); 532862306a36Sopenharmony_ci return 1; 532962306a36Sopenharmony_ci } 533062306a36Sopenharmony_ci 533162306a36Sopenharmony_ci /* 533262306a36Sopenharmony_ci * Handle split lock. Depending on detection mode this will 533362306a36Sopenharmony_ci * either warn and disable split lock detection for this 533462306a36Sopenharmony_ci * task or force SIGBUS on it. 533562306a36Sopenharmony_ci */ 533662306a36Sopenharmony_ci if (handle_guest_split_lock(kvm_rip_read(vcpu))) 533762306a36Sopenharmony_ci return 1; 533862306a36Sopenharmony_ci fallthrough; 533962306a36Sopenharmony_ci default: 534062306a36Sopenharmony_ci kvm_run->exit_reason = KVM_EXIT_EXCEPTION; 534162306a36Sopenharmony_ci kvm_run->ex.exception = ex_no; 534262306a36Sopenharmony_ci kvm_run->ex.error_code = error_code; 534362306a36Sopenharmony_ci break; 534462306a36Sopenharmony_ci } 534562306a36Sopenharmony_ci return 0; 534662306a36Sopenharmony_ci} 534762306a36Sopenharmony_ci 534862306a36Sopenharmony_cistatic __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) 534962306a36Sopenharmony_ci{ 535062306a36Sopenharmony_ci ++vcpu->stat.irq_exits; 535162306a36Sopenharmony_ci return 1; 535262306a36Sopenharmony_ci} 535362306a36Sopenharmony_ci 535462306a36Sopenharmony_cistatic int handle_triple_fault(struct kvm_vcpu *vcpu) 535562306a36Sopenharmony_ci{ 535662306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 535762306a36Sopenharmony_ci vcpu->mmio_needed = 0; 535862306a36Sopenharmony_ci return 0; 535962306a36Sopenharmony_ci} 536062306a36Sopenharmony_ci 536162306a36Sopenharmony_cistatic int handle_io(struct kvm_vcpu *vcpu) 536262306a36Sopenharmony_ci{ 536362306a36Sopenharmony_ci unsigned long exit_qualification; 536462306a36Sopenharmony_ci int size, in, string; 536562306a36Sopenharmony_ci unsigned port; 536662306a36Sopenharmony_ci 536762306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 536862306a36Sopenharmony_ci string = (exit_qualification & 16) != 0; 536962306a36Sopenharmony_ci 537062306a36Sopenharmony_ci ++vcpu->stat.io_exits; 537162306a36Sopenharmony_ci 537262306a36Sopenharmony_ci if (string) 537362306a36Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 537462306a36Sopenharmony_ci 537562306a36Sopenharmony_ci port = exit_qualification >> 16; 537662306a36Sopenharmony_ci size = (exit_qualification & 7) + 1; 537762306a36Sopenharmony_ci in = (exit_qualification & 8) != 0; 537862306a36Sopenharmony_ci 537962306a36Sopenharmony_ci return kvm_fast_pio(vcpu, size, port, in); 538062306a36Sopenharmony_ci} 538162306a36Sopenharmony_ci 538262306a36Sopenharmony_cistatic void 538362306a36Sopenharmony_civmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) 538462306a36Sopenharmony_ci{ 538562306a36Sopenharmony_ci /* 538662306a36Sopenharmony_ci * Patch in the VMCALL instruction: 538762306a36Sopenharmony_ci */ 538862306a36Sopenharmony_ci hypercall[0] = 0x0f; 538962306a36Sopenharmony_ci hypercall[1] = 0x01; 539062306a36Sopenharmony_ci hypercall[2] = 0xc1; 539162306a36Sopenharmony_ci} 539262306a36Sopenharmony_ci 539362306a36Sopenharmony_ci/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 539462306a36Sopenharmony_cistatic int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 539562306a36Sopenharmony_ci{ 539662306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 539762306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 539862306a36Sopenharmony_ci unsigned long orig_val = val; 539962306a36Sopenharmony_ci 540062306a36Sopenharmony_ci /* 540162306a36Sopenharmony_ci * We get here when L2 changed cr0 in a way that did not change 540262306a36Sopenharmony_ci * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), 540362306a36Sopenharmony_ci * but did change L0 shadowed bits. So we first calculate the 540462306a36Sopenharmony_ci * effective cr0 value that L1 would like to write into the 540562306a36Sopenharmony_ci * hardware. It consists of the L2-owned bits from the new 540662306a36Sopenharmony_ci * value combined with the L1-owned bits from L1's guest_cr0. 540762306a36Sopenharmony_ci */ 540862306a36Sopenharmony_ci val = (val & ~vmcs12->cr0_guest_host_mask) | 540962306a36Sopenharmony_ci (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 541062306a36Sopenharmony_ci 541162306a36Sopenharmony_ci if (kvm_set_cr0(vcpu, val)) 541262306a36Sopenharmony_ci return 1; 541362306a36Sopenharmony_ci vmcs_writel(CR0_READ_SHADOW, orig_val); 541462306a36Sopenharmony_ci return 0; 541562306a36Sopenharmony_ci } else { 541662306a36Sopenharmony_ci return kvm_set_cr0(vcpu, val); 541762306a36Sopenharmony_ci } 541862306a36Sopenharmony_ci} 541962306a36Sopenharmony_ci 542062306a36Sopenharmony_cistatic int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) 542162306a36Sopenharmony_ci{ 542262306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 542362306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 542462306a36Sopenharmony_ci unsigned long orig_val = val; 542562306a36Sopenharmony_ci 542662306a36Sopenharmony_ci /* analogously to handle_set_cr0 */ 542762306a36Sopenharmony_ci val = (val & ~vmcs12->cr4_guest_host_mask) | 542862306a36Sopenharmony_ci (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); 542962306a36Sopenharmony_ci if (kvm_set_cr4(vcpu, val)) 543062306a36Sopenharmony_ci return 1; 543162306a36Sopenharmony_ci vmcs_writel(CR4_READ_SHADOW, orig_val); 543262306a36Sopenharmony_ci return 0; 543362306a36Sopenharmony_ci } else 543462306a36Sopenharmony_ci return kvm_set_cr4(vcpu, val); 543562306a36Sopenharmony_ci} 543662306a36Sopenharmony_ci 543762306a36Sopenharmony_cistatic int handle_desc(struct kvm_vcpu *vcpu) 543862306a36Sopenharmony_ci{ 543962306a36Sopenharmony_ci /* 544062306a36Sopenharmony_ci * UMIP emulation relies on intercepting writes to CR4.UMIP, i.e. this 544162306a36Sopenharmony_ci * and other code needs to be updated if UMIP can be guest owned. 544262306a36Sopenharmony_ci */ 544362306a36Sopenharmony_ci BUILD_BUG_ON(KVM_POSSIBLE_CR4_GUEST_BITS & X86_CR4_UMIP); 544462306a36Sopenharmony_ci 544562306a36Sopenharmony_ci WARN_ON_ONCE(!kvm_is_cr4_bit_set(vcpu, X86_CR4_UMIP)); 544662306a36Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 544762306a36Sopenharmony_ci} 544862306a36Sopenharmony_ci 544962306a36Sopenharmony_cistatic int handle_cr(struct kvm_vcpu *vcpu) 545062306a36Sopenharmony_ci{ 545162306a36Sopenharmony_ci unsigned long exit_qualification, val; 545262306a36Sopenharmony_ci int cr; 545362306a36Sopenharmony_ci int reg; 545462306a36Sopenharmony_ci int err; 545562306a36Sopenharmony_ci int ret; 545662306a36Sopenharmony_ci 545762306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 545862306a36Sopenharmony_ci cr = exit_qualification & 15; 545962306a36Sopenharmony_ci reg = (exit_qualification >> 8) & 15; 546062306a36Sopenharmony_ci switch ((exit_qualification >> 4) & 3) { 546162306a36Sopenharmony_ci case 0: /* mov to cr */ 546262306a36Sopenharmony_ci val = kvm_register_read(vcpu, reg); 546362306a36Sopenharmony_ci trace_kvm_cr_write(cr, val); 546462306a36Sopenharmony_ci switch (cr) { 546562306a36Sopenharmony_ci case 0: 546662306a36Sopenharmony_ci err = handle_set_cr0(vcpu, val); 546762306a36Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 546862306a36Sopenharmony_ci case 3: 546962306a36Sopenharmony_ci WARN_ON_ONCE(enable_unrestricted_guest); 547062306a36Sopenharmony_ci 547162306a36Sopenharmony_ci err = kvm_set_cr3(vcpu, val); 547262306a36Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 547362306a36Sopenharmony_ci case 4: 547462306a36Sopenharmony_ci err = handle_set_cr4(vcpu, val); 547562306a36Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 547662306a36Sopenharmony_ci case 8: { 547762306a36Sopenharmony_ci u8 cr8_prev = kvm_get_cr8(vcpu); 547862306a36Sopenharmony_ci u8 cr8 = (u8)val; 547962306a36Sopenharmony_ci err = kvm_set_cr8(vcpu, cr8); 548062306a36Sopenharmony_ci ret = kvm_complete_insn_gp(vcpu, err); 548162306a36Sopenharmony_ci if (lapic_in_kernel(vcpu)) 548262306a36Sopenharmony_ci return ret; 548362306a36Sopenharmony_ci if (cr8_prev <= cr8) 548462306a36Sopenharmony_ci return ret; 548562306a36Sopenharmony_ci /* 548662306a36Sopenharmony_ci * TODO: we might be squashing a 548762306a36Sopenharmony_ci * KVM_GUESTDBG_SINGLESTEP-triggered 548862306a36Sopenharmony_ci * KVM_EXIT_DEBUG here. 548962306a36Sopenharmony_ci */ 549062306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_SET_TPR; 549162306a36Sopenharmony_ci return 0; 549262306a36Sopenharmony_ci } 549362306a36Sopenharmony_ci } 549462306a36Sopenharmony_ci break; 549562306a36Sopenharmony_ci case 2: /* clts */ 549662306a36Sopenharmony_ci KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS"); 549762306a36Sopenharmony_ci return -EIO; 549862306a36Sopenharmony_ci case 1: /*mov from cr*/ 549962306a36Sopenharmony_ci switch (cr) { 550062306a36Sopenharmony_ci case 3: 550162306a36Sopenharmony_ci WARN_ON_ONCE(enable_unrestricted_guest); 550262306a36Sopenharmony_ci 550362306a36Sopenharmony_ci val = kvm_read_cr3(vcpu); 550462306a36Sopenharmony_ci kvm_register_write(vcpu, reg, val); 550562306a36Sopenharmony_ci trace_kvm_cr_read(cr, val); 550662306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 550762306a36Sopenharmony_ci case 8: 550862306a36Sopenharmony_ci val = kvm_get_cr8(vcpu); 550962306a36Sopenharmony_ci kvm_register_write(vcpu, reg, val); 551062306a36Sopenharmony_ci trace_kvm_cr_read(cr, val); 551162306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 551262306a36Sopenharmony_ci } 551362306a36Sopenharmony_ci break; 551462306a36Sopenharmony_ci case 3: /* lmsw */ 551562306a36Sopenharmony_ci val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; 551662306a36Sopenharmony_ci trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val)); 551762306a36Sopenharmony_ci kvm_lmsw(vcpu, val); 551862306a36Sopenharmony_ci 551962306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 552062306a36Sopenharmony_ci default: 552162306a36Sopenharmony_ci break; 552262306a36Sopenharmony_ci } 552362306a36Sopenharmony_ci vcpu->run->exit_reason = 0; 552462306a36Sopenharmony_ci vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", 552562306a36Sopenharmony_ci (int)(exit_qualification >> 4) & 3, cr); 552662306a36Sopenharmony_ci return 0; 552762306a36Sopenharmony_ci} 552862306a36Sopenharmony_ci 552962306a36Sopenharmony_cistatic int handle_dr(struct kvm_vcpu *vcpu) 553062306a36Sopenharmony_ci{ 553162306a36Sopenharmony_ci unsigned long exit_qualification; 553262306a36Sopenharmony_ci int dr, dr7, reg; 553362306a36Sopenharmony_ci int err = 1; 553462306a36Sopenharmony_ci 553562306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 553662306a36Sopenharmony_ci dr = exit_qualification & DEBUG_REG_ACCESS_NUM; 553762306a36Sopenharmony_ci 553862306a36Sopenharmony_ci /* First, if DR does not exist, trigger UD */ 553962306a36Sopenharmony_ci if (!kvm_require_dr(vcpu, dr)) 554062306a36Sopenharmony_ci return 1; 554162306a36Sopenharmony_ci 554262306a36Sopenharmony_ci if (vmx_get_cpl(vcpu) > 0) 554362306a36Sopenharmony_ci goto out; 554462306a36Sopenharmony_ci 554562306a36Sopenharmony_ci dr7 = vmcs_readl(GUEST_DR7); 554662306a36Sopenharmony_ci if (dr7 & DR7_GD) { 554762306a36Sopenharmony_ci /* 554862306a36Sopenharmony_ci * As the vm-exit takes precedence over the debug trap, we 554962306a36Sopenharmony_ci * need to emulate the latter, either for the host or the 555062306a36Sopenharmony_ci * guest debugging itself. 555162306a36Sopenharmony_ci */ 555262306a36Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 555362306a36Sopenharmony_ci vcpu->run->debug.arch.dr6 = DR6_BD | DR6_ACTIVE_LOW; 555462306a36Sopenharmony_ci vcpu->run->debug.arch.dr7 = dr7; 555562306a36Sopenharmony_ci vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); 555662306a36Sopenharmony_ci vcpu->run->debug.arch.exception = DB_VECTOR; 555762306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_DEBUG; 555862306a36Sopenharmony_ci return 0; 555962306a36Sopenharmony_ci } else { 556062306a36Sopenharmony_ci kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD); 556162306a36Sopenharmony_ci return 1; 556262306a36Sopenharmony_ci } 556362306a36Sopenharmony_ci } 556462306a36Sopenharmony_ci 556562306a36Sopenharmony_ci if (vcpu->guest_debug == 0) { 556662306a36Sopenharmony_ci exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); 556762306a36Sopenharmony_ci 556862306a36Sopenharmony_ci /* 556962306a36Sopenharmony_ci * No more DR vmexits; force a reload of the debug registers 557062306a36Sopenharmony_ci * and reenter on this instruction. The next vmexit will 557162306a36Sopenharmony_ci * retrieve the full state of the debug registers. 557262306a36Sopenharmony_ci */ 557362306a36Sopenharmony_ci vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; 557462306a36Sopenharmony_ci return 1; 557562306a36Sopenharmony_ci } 557662306a36Sopenharmony_ci 557762306a36Sopenharmony_ci reg = DEBUG_REG_ACCESS_REG(exit_qualification); 557862306a36Sopenharmony_ci if (exit_qualification & TYPE_MOV_FROM_DR) { 557962306a36Sopenharmony_ci unsigned long val; 558062306a36Sopenharmony_ci 558162306a36Sopenharmony_ci kvm_get_dr(vcpu, dr, &val); 558262306a36Sopenharmony_ci kvm_register_write(vcpu, reg, val); 558362306a36Sopenharmony_ci err = 0; 558462306a36Sopenharmony_ci } else { 558562306a36Sopenharmony_ci err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg)); 558662306a36Sopenharmony_ci } 558762306a36Sopenharmony_ci 558862306a36Sopenharmony_ciout: 558962306a36Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 559062306a36Sopenharmony_ci} 559162306a36Sopenharmony_ci 559262306a36Sopenharmony_cistatic void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 559362306a36Sopenharmony_ci{ 559462306a36Sopenharmony_ci get_debugreg(vcpu->arch.db[0], 0); 559562306a36Sopenharmony_ci get_debugreg(vcpu->arch.db[1], 1); 559662306a36Sopenharmony_ci get_debugreg(vcpu->arch.db[2], 2); 559762306a36Sopenharmony_ci get_debugreg(vcpu->arch.db[3], 3); 559862306a36Sopenharmony_ci get_debugreg(vcpu->arch.dr6, 6); 559962306a36Sopenharmony_ci vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); 560062306a36Sopenharmony_ci 560162306a36Sopenharmony_ci vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; 560262306a36Sopenharmony_ci exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); 560362306a36Sopenharmony_ci 560462306a36Sopenharmony_ci /* 560562306a36Sopenharmony_ci * exc_debug expects dr6 to be cleared after it runs, avoid that it sees 560662306a36Sopenharmony_ci * a stale dr6 from the guest. 560762306a36Sopenharmony_ci */ 560862306a36Sopenharmony_ci set_debugreg(DR6_RESERVED, 6); 560962306a36Sopenharmony_ci} 561062306a36Sopenharmony_ci 561162306a36Sopenharmony_cistatic void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 561262306a36Sopenharmony_ci{ 561362306a36Sopenharmony_ci vmcs_writel(GUEST_DR7, val); 561462306a36Sopenharmony_ci} 561562306a36Sopenharmony_ci 561662306a36Sopenharmony_cistatic int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) 561762306a36Sopenharmony_ci{ 561862306a36Sopenharmony_ci kvm_apic_update_ppr(vcpu); 561962306a36Sopenharmony_ci return 1; 562062306a36Sopenharmony_ci} 562162306a36Sopenharmony_ci 562262306a36Sopenharmony_cistatic int handle_interrupt_window(struct kvm_vcpu *vcpu) 562362306a36Sopenharmony_ci{ 562462306a36Sopenharmony_ci exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); 562562306a36Sopenharmony_ci 562662306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 562762306a36Sopenharmony_ci 562862306a36Sopenharmony_ci ++vcpu->stat.irq_window_exits; 562962306a36Sopenharmony_ci return 1; 563062306a36Sopenharmony_ci} 563162306a36Sopenharmony_ci 563262306a36Sopenharmony_cistatic int handle_invlpg(struct kvm_vcpu *vcpu) 563362306a36Sopenharmony_ci{ 563462306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 563562306a36Sopenharmony_ci 563662306a36Sopenharmony_ci kvm_mmu_invlpg(vcpu, exit_qualification); 563762306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 563862306a36Sopenharmony_ci} 563962306a36Sopenharmony_ci 564062306a36Sopenharmony_cistatic int handle_apic_access(struct kvm_vcpu *vcpu) 564162306a36Sopenharmony_ci{ 564262306a36Sopenharmony_ci if (likely(fasteoi)) { 564362306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 564462306a36Sopenharmony_ci int access_type, offset; 564562306a36Sopenharmony_ci 564662306a36Sopenharmony_ci access_type = exit_qualification & APIC_ACCESS_TYPE; 564762306a36Sopenharmony_ci offset = exit_qualification & APIC_ACCESS_OFFSET; 564862306a36Sopenharmony_ci /* 564962306a36Sopenharmony_ci * Sane guest uses MOV to write EOI, with written value 565062306a36Sopenharmony_ci * not cared. So make a short-circuit here by avoiding 565162306a36Sopenharmony_ci * heavy instruction emulation. 565262306a36Sopenharmony_ci */ 565362306a36Sopenharmony_ci if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && 565462306a36Sopenharmony_ci (offset == APIC_EOI)) { 565562306a36Sopenharmony_ci kvm_lapic_set_eoi(vcpu); 565662306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 565762306a36Sopenharmony_ci } 565862306a36Sopenharmony_ci } 565962306a36Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 566062306a36Sopenharmony_ci} 566162306a36Sopenharmony_ci 566262306a36Sopenharmony_cistatic int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) 566362306a36Sopenharmony_ci{ 566462306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 566562306a36Sopenharmony_ci int vector = exit_qualification & 0xff; 566662306a36Sopenharmony_ci 566762306a36Sopenharmony_ci /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ 566862306a36Sopenharmony_ci kvm_apic_set_eoi_accelerated(vcpu, vector); 566962306a36Sopenharmony_ci return 1; 567062306a36Sopenharmony_ci} 567162306a36Sopenharmony_ci 567262306a36Sopenharmony_cistatic int handle_apic_write(struct kvm_vcpu *vcpu) 567362306a36Sopenharmony_ci{ 567462306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 567562306a36Sopenharmony_ci 567662306a36Sopenharmony_ci /* 567762306a36Sopenharmony_ci * APIC-write VM-Exit is trap-like, KVM doesn't need to advance RIP and 567862306a36Sopenharmony_ci * hardware has done any necessary aliasing, offset adjustments, etc... 567962306a36Sopenharmony_ci * for the access. I.e. the correct value has already been written to 568062306a36Sopenharmony_ci * the vAPIC page for the correct 16-byte chunk. KVM needs only to 568162306a36Sopenharmony_ci * retrieve the register value and emulate the access. 568262306a36Sopenharmony_ci */ 568362306a36Sopenharmony_ci u32 offset = exit_qualification & 0xff0; 568462306a36Sopenharmony_ci 568562306a36Sopenharmony_ci kvm_apic_write_nodecode(vcpu, offset); 568662306a36Sopenharmony_ci return 1; 568762306a36Sopenharmony_ci} 568862306a36Sopenharmony_ci 568962306a36Sopenharmony_cistatic int handle_task_switch(struct kvm_vcpu *vcpu) 569062306a36Sopenharmony_ci{ 569162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 569262306a36Sopenharmony_ci unsigned long exit_qualification; 569362306a36Sopenharmony_ci bool has_error_code = false; 569462306a36Sopenharmony_ci u32 error_code = 0; 569562306a36Sopenharmony_ci u16 tss_selector; 569662306a36Sopenharmony_ci int reason, type, idt_v, idt_index; 569762306a36Sopenharmony_ci 569862306a36Sopenharmony_ci idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); 569962306a36Sopenharmony_ci idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); 570062306a36Sopenharmony_ci type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); 570162306a36Sopenharmony_ci 570262306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 570362306a36Sopenharmony_ci 570462306a36Sopenharmony_ci reason = (u32)exit_qualification >> 30; 570562306a36Sopenharmony_ci if (reason == TASK_SWITCH_GATE && idt_v) { 570662306a36Sopenharmony_ci switch (type) { 570762306a36Sopenharmony_ci case INTR_TYPE_NMI_INTR: 570862306a36Sopenharmony_ci vcpu->arch.nmi_injected = false; 570962306a36Sopenharmony_ci vmx_set_nmi_mask(vcpu, true); 571062306a36Sopenharmony_ci break; 571162306a36Sopenharmony_ci case INTR_TYPE_EXT_INTR: 571262306a36Sopenharmony_ci case INTR_TYPE_SOFT_INTR: 571362306a36Sopenharmony_ci kvm_clear_interrupt_queue(vcpu); 571462306a36Sopenharmony_ci break; 571562306a36Sopenharmony_ci case INTR_TYPE_HARD_EXCEPTION: 571662306a36Sopenharmony_ci if (vmx->idt_vectoring_info & 571762306a36Sopenharmony_ci VECTORING_INFO_DELIVER_CODE_MASK) { 571862306a36Sopenharmony_ci has_error_code = true; 571962306a36Sopenharmony_ci error_code = 572062306a36Sopenharmony_ci vmcs_read32(IDT_VECTORING_ERROR_CODE); 572162306a36Sopenharmony_ci } 572262306a36Sopenharmony_ci fallthrough; 572362306a36Sopenharmony_ci case INTR_TYPE_SOFT_EXCEPTION: 572462306a36Sopenharmony_ci kvm_clear_exception_queue(vcpu); 572562306a36Sopenharmony_ci break; 572662306a36Sopenharmony_ci default: 572762306a36Sopenharmony_ci break; 572862306a36Sopenharmony_ci } 572962306a36Sopenharmony_ci } 573062306a36Sopenharmony_ci tss_selector = exit_qualification; 573162306a36Sopenharmony_ci 573262306a36Sopenharmony_ci if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && 573362306a36Sopenharmony_ci type != INTR_TYPE_EXT_INTR && 573462306a36Sopenharmony_ci type != INTR_TYPE_NMI_INTR)) 573562306a36Sopenharmony_ci WARN_ON(!skip_emulated_instruction(vcpu)); 573662306a36Sopenharmony_ci 573762306a36Sopenharmony_ci /* 573862306a36Sopenharmony_ci * TODO: What about debug traps on tss switch? 573962306a36Sopenharmony_ci * Are we supposed to inject them and update dr6? 574062306a36Sopenharmony_ci */ 574162306a36Sopenharmony_ci return kvm_task_switch(vcpu, tss_selector, 574262306a36Sopenharmony_ci type == INTR_TYPE_SOFT_INTR ? idt_index : -1, 574362306a36Sopenharmony_ci reason, has_error_code, error_code); 574462306a36Sopenharmony_ci} 574562306a36Sopenharmony_ci 574662306a36Sopenharmony_cistatic int handle_ept_violation(struct kvm_vcpu *vcpu) 574762306a36Sopenharmony_ci{ 574862306a36Sopenharmony_ci unsigned long exit_qualification; 574962306a36Sopenharmony_ci gpa_t gpa; 575062306a36Sopenharmony_ci u64 error_code; 575162306a36Sopenharmony_ci 575262306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 575362306a36Sopenharmony_ci 575462306a36Sopenharmony_ci /* 575562306a36Sopenharmony_ci * EPT violation happened while executing iret from NMI, 575662306a36Sopenharmony_ci * "blocked by NMI" bit has to be set before next VM entry. 575762306a36Sopenharmony_ci * There are errata that may cause this bit to not be set: 575862306a36Sopenharmony_ci * AAK134, BY25. 575962306a36Sopenharmony_ci */ 576062306a36Sopenharmony_ci if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 576162306a36Sopenharmony_ci enable_vnmi && 576262306a36Sopenharmony_ci (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 576362306a36Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); 576462306a36Sopenharmony_ci 576562306a36Sopenharmony_ci gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 576662306a36Sopenharmony_ci trace_kvm_page_fault(vcpu, gpa, exit_qualification); 576762306a36Sopenharmony_ci 576862306a36Sopenharmony_ci /* Is it a read fault? */ 576962306a36Sopenharmony_ci error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) 577062306a36Sopenharmony_ci ? PFERR_USER_MASK : 0; 577162306a36Sopenharmony_ci /* Is it a write fault? */ 577262306a36Sopenharmony_ci error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) 577362306a36Sopenharmony_ci ? PFERR_WRITE_MASK : 0; 577462306a36Sopenharmony_ci /* Is it a fetch fault? */ 577562306a36Sopenharmony_ci error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) 577662306a36Sopenharmony_ci ? PFERR_FETCH_MASK : 0; 577762306a36Sopenharmony_ci /* ept page table entry is present? */ 577862306a36Sopenharmony_ci error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK) 577962306a36Sopenharmony_ci ? PFERR_PRESENT_MASK : 0; 578062306a36Sopenharmony_ci 578162306a36Sopenharmony_ci error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? 578262306a36Sopenharmony_ci PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; 578362306a36Sopenharmony_ci 578462306a36Sopenharmony_ci vcpu->arch.exit_qualification = exit_qualification; 578562306a36Sopenharmony_ci 578662306a36Sopenharmony_ci /* 578762306a36Sopenharmony_ci * Check that the GPA doesn't exceed physical memory limits, as that is 578862306a36Sopenharmony_ci * a guest page fault. We have to emulate the instruction here, because 578962306a36Sopenharmony_ci * if the illegal address is that of a paging structure, then 579062306a36Sopenharmony_ci * EPT_VIOLATION_ACC_WRITE bit is set. Alternatively, if supported we 579162306a36Sopenharmony_ci * would also use advanced VM-exit information for EPT violations to 579262306a36Sopenharmony_ci * reconstruct the page fault error code. 579362306a36Sopenharmony_ci */ 579462306a36Sopenharmony_ci if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa))) 579562306a36Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 579662306a36Sopenharmony_ci 579762306a36Sopenharmony_ci return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); 579862306a36Sopenharmony_ci} 579962306a36Sopenharmony_ci 580062306a36Sopenharmony_cistatic int handle_ept_misconfig(struct kvm_vcpu *vcpu) 580162306a36Sopenharmony_ci{ 580262306a36Sopenharmony_ci gpa_t gpa; 580362306a36Sopenharmony_ci 580462306a36Sopenharmony_ci if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) 580562306a36Sopenharmony_ci return 1; 580662306a36Sopenharmony_ci 580762306a36Sopenharmony_ci /* 580862306a36Sopenharmony_ci * A nested guest cannot optimize MMIO vmexits, because we have an 580962306a36Sopenharmony_ci * nGPA here instead of the required GPA. 581062306a36Sopenharmony_ci */ 581162306a36Sopenharmony_ci gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 581262306a36Sopenharmony_ci if (!is_guest_mode(vcpu) && 581362306a36Sopenharmony_ci !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { 581462306a36Sopenharmony_ci trace_kvm_fast_mmio(gpa); 581562306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 581662306a36Sopenharmony_ci } 581762306a36Sopenharmony_ci 581862306a36Sopenharmony_ci return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); 581962306a36Sopenharmony_ci} 582062306a36Sopenharmony_ci 582162306a36Sopenharmony_cistatic int handle_nmi_window(struct kvm_vcpu *vcpu) 582262306a36Sopenharmony_ci{ 582362306a36Sopenharmony_ci if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm)) 582462306a36Sopenharmony_ci return -EIO; 582562306a36Sopenharmony_ci 582662306a36Sopenharmony_ci exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); 582762306a36Sopenharmony_ci ++vcpu->stat.nmi_window_exits; 582862306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 582962306a36Sopenharmony_ci 583062306a36Sopenharmony_ci return 1; 583162306a36Sopenharmony_ci} 583262306a36Sopenharmony_ci 583362306a36Sopenharmony_cistatic bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu) 583462306a36Sopenharmony_ci{ 583562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 583662306a36Sopenharmony_ci 583762306a36Sopenharmony_ci return vmx->emulation_required && !vmx->rmode.vm86_active && 583862306a36Sopenharmony_ci (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected); 583962306a36Sopenharmony_ci} 584062306a36Sopenharmony_ci 584162306a36Sopenharmony_cistatic int handle_invalid_guest_state(struct kvm_vcpu *vcpu) 584262306a36Sopenharmony_ci{ 584362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 584462306a36Sopenharmony_ci bool intr_window_requested; 584562306a36Sopenharmony_ci unsigned count = 130; 584662306a36Sopenharmony_ci 584762306a36Sopenharmony_ci intr_window_requested = exec_controls_get(vmx) & 584862306a36Sopenharmony_ci CPU_BASED_INTR_WINDOW_EXITING; 584962306a36Sopenharmony_ci 585062306a36Sopenharmony_ci while (vmx->emulation_required && count-- != 0) { 585162306a36Sopenharmony_ci if (intr_window_requested && !vmx_interrupt_blocked(vcpu)) 585262306a36Sopenharmony_ci return handle_interrupt_window(&vmx->vcpu); 585362306a36Sopenharmony_ci 585462306a36Sopenharmony_ci if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 585562306a36Sopenharmony_ci return 1; 585662306a36Sopenharmony_ci 585762306a36Sopenharmony_ci if (!kvm_emulate_instruction(vcpu, 0)) 585862306a36Sopenharmony_ci return 0; 585962306a36Sopenharmony_ci 586062306a36Sopenharmony_ci if (vmx_emulation_required_with_pending_exception(vcpu)) { 586162306a36Sopenharmony_ci kvm_prepare_emulation_failure_exit(vcpu); 586262306a36Sopenharmony_ci return 0; 586362306a36Sopenharmony_ci } 586462306a36Sopenharmony_ci 586562306a36Sopenharmony_ci if (vcpu->arch.halt_request) { 586662306a36Sopenharmony_ci vcpu->arch.halt_request = 0; 586762306a36Sopenharmony_ci return kvm_emulate_halt_noskip(vcpu); 586862306a36Sopenharmony_ci } 586962306a36Sopenharmony_ci 587062306a36Sopenharmony_ci /* 587162306a36Sopenharmony_ci * Note, return 1 and not 0, vcpu_run() will invoke 587262306a36Sopenharmony_ci * xfer_to_guest_mode() which will create a proper return 587362306a36Sopenharmony_ci * code. 587462306a36Sopenharmony_ci */ 587562306a36Sopenharmony_ci if (__xfer_to_guest_mode_work_pending()) 587662306a36Sopenharmony_ci return 1; 587762306a36Sopenharmony_ci } 587862306a36Sopenharmony_ci 587962306a36Sopenharmony_ci return 1; 588062306a36Sopenharmony_ci} 588162306a36Sopenharmony_ci 588262306a36Sopenharmony_cistatic int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu) 588362306a36Sopenharmony_ci{ 588462306a36Sopenharmony_ci if (vmx_emulation_required_with_pending_exception(vcpu)) { 588562306a36Sopenharmony_ci kvm_prepare_emulation_failure_exit(vcpu); 588662306a36Sopenharmony_ci return 0; 588762306a36Sopenharmony_ci } 588862306a36Sopenharmony_ci 588962306a36Sopenharmony_ci return 1; 589062306a36Sopenharmony_ci} 589162306a36Sopenharmony_ci 589262306a36Sopenharmony_cistatic void grow_ple_window(struct kvm_vcpu *vcpu) 589362306a36Sopenharmony_ci{ 589462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 589562306a36Sopenharmony_ci unsigned int old = vmx->ple_window; 589662306a36Sopenharmony_ci 589762306a36Sopenharmony_ci vmx->ple_window = __grow_ple_window(old, ple_window, 589862306a36Sopenharmony_ci ple_window_grow, 589962306a36Sopenharmony_ci ple_window_max); 590062306a36Sopenharmony_ci 590162306a36Sopenharmony_ci if (vmx->ple_window != old) { 590262306a36Sopenharmony_ci vmx->ple_window_dirty = true; 590362306a36Sopenharmony_ci trace_kvm_ple_window_update(vcpu->vcpu_id, 590462306a36Sopenharmony_ci vmx->ple_window, old); 590562306a36Sopenharmony_ci } 590662306a36Sopenharmony_ci} 590762306a36Sopenharmony_ci 590862306a36Sopenharmony_cistatic void shrink_ple_window(struct kvm_vcpu *vcpu) 590962306a36Sopenharmony_ci{ 591062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 591162306a36Sopenharmony_ci unsigned int old = vmx->ple_window; 591262306a36Sopenharmony_ci 591362306a36Sopenharmony_ci vmx->ple_window = __shrink_ple_window(old, ple_window, 591462306a36Sopenharmony_ci ple_window_shrink, 591562306a36Sopenharmony_ci ple_window); 591662306a36Sopenharmony_ci 591762306a36Sopenharmony_ci if (vmx->ple_window != old) { 591862306a36Sopenharmony_ci vmx->ple_window_dirty = true; 591962306a36Sopenharmony_ci trace_kvm_ple_window_update(vcpu->vcpu_id, 592062306a36Sopenharmony_ci vmx->ple_window, old); 592162306a36Sopenharmony_ci } 592262306a36Sopenharmony_ci} 592362306a36Sopenharmony_ci 592462306a36Sopenharmony_ci/* 592562306a36Sopenharmony_ci * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE 592662306a36Sopenharmony_ci * exiting, so only get here on cpu with PAUSE-Loop-Exiting. 592762306a36Sopenharmony_ci */ 592862306a36Sopenharmony_cistatic int handle_pause(struct kvm_vcpu *vcpu) 592962306a36Sopenharmony_ci{ 593062306a36Sopenharmony_ci if (!kvm_pause_in_guest(vcpu->kvm)) 593162306a36Sopenharmony_ci grow_ple_window(vcpu); 593262306a36Sopenharmony_ci 593362306a36Sopenharmony_ci /* 593462306a36Sopenharmony_ci * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" 593562306a36Sopenharmony_ci * VM-execution control is ignored if CPL > 0. OTOH, KVM 593662306a36Sopenharmony_ci * never set PAUSE_EXITING and just set PLE if supported, 593762306a36Sopenharmony_ci * so the vcpu must be CPL=0 if it gets a PAUSE exit. 593862306a36Sopenharmony_ci */ 593962306a36Sopenharmony_ci kvm_vcpu_on_spin(vcpu, true); 594062306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 594162306a36Sopenharmony_ci} 594262306a36Sopenharmony_ci 594362306a36Sopenharmony_cistatic int handle_monitor_trap(struct kvm_vcpu *vcpu) 594462306a36Sopenharmony_ci{ 594562306a36Sopenharmony_ci return 1; 594662306a36Sopenharmony_ci} 594762306a36Sopenharmony_ci 594862306a36Sopenharmony_cistatic int handle_invpcid(struct kvm_vcpu *vcpu) 594962306a36Sopenharmony_ci{ 595062306a36Sopenharmony_ci u32 vmx_instruction_info; 595162306a36Sopenharmony_ci unsigned long type; 595262306a36Sopenharmony_ci gva_t gva; 595362306a36Sopenharmony_ci struct { 595462306a36Sopenharmony_ci u64 pcid; 595562306a36Sopenharmony_ci u64 gla; 595662306a36Sopenharmony_ci } operand; 595762306a36Sopenharmony_ci int gpr_index; 595862306a36Sopenharmony_ci 595962306a36Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { 596062306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 596162306a36Sopenharmony_ci return 1; 596262306a36Sopenharmony_ci } 596362306a36Sopenharmony_ci 596462306a36Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 596562306a36Sopenharmony_ci gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); 596662306a36Sopenharmony_ci type = kvm_register_read(vcpu, gpr_index); 596762306a36Sopenharmony_ci 596862306a36Sopenharmony_ci /* According to the Intel instruction reference, the memory operand 596962306a36Sopenharmony_ci * is read even if it isn't needed (e.g., for type==all) 597062306a36Sopenharmony_ci */ 597162306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 597262306a36Sopenharmony_ci vmx_instruction_info, false, 597362306a36Sopenharmony_ci sizeof(operand), &gva)) 597462306a36Sopenharmony_ci return 1; 597562306a36Sopenharmony_ci 597662306a36Sopenharmony_ci return kvm_handle_invpcid(vcpu, type, gva); 597762306a36Sopenharmony_ci} 597862306a36Sopenharmony_ci 597962306a36Sopenharmony_cistatic int handle_pml_full(struct kvm_vcpu *vcpu) 598062306a36Sopenharmony_ci{ 598162306a36Sopenharmony_ci unsigned long exit_qualification; 598262306a36Sopenharmony_ci 598362306a36Sopenharmony_ci trace_kvm_pml_full(vcpu->vcpu_id); 598462306a36Sopenharmony_ci 598562306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 598662306a36Sopenharmony_ci 598762306a36Sopenharmony_ci /* 598862306a36Sopenharmony_ci * PML buffer FULL happened while executing iret from NMI, 598962306a36Sopenharmony_ci * "blocked by NMI" bit has to be set before next VM entry. 599062306a36Sopenharmony_ci */ 599162306a36Sopenharmony_ci if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 599262306a36Sopenharmony_ci enable_vnmi && 599362306a36Sopenharmony_ci (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 599462306a36Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 599562306a36Sopenharmony_ci GUEST_INTR_STATE_NMI); 599662306a36Sopenharmony_ci 599762306a36Sopenharmony_ci /* 599862306a36Sopenharmony_ci * PML buffer already flushed at beginning of VMEXIT. Nothing to do 599962306a36Sopenharmony_ci * here.., and there's no userspace involvement needed for PML. 600062306a36Sopenharmony_ci */ 600162306a36Sopenharmony_ci return 1; 600262306a36Sopenharmony_ci} 600362306a36Sopenharmony_ci 600462306a36Sopenharmony_cistatic fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu) 600562306a36Sopenharmony_ci{ 600662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 600762306a36Sopenharmony_ci 600862306a36Sopenharmony_ci if (!vmx->req_immediate_exit && 600962306a36Sopenharmony_ci !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) { 601062306a36Sopenharmony_ci kvm_lapic_expired_hv_timer(vcpu); 601162306a36Sopenharmony_ci return EXIT_FASTPATH_REENTER_GUEST; 601262306a36Sopenharmony_ci } 601362306a36Sopenharmony_ci 601462306a36Sopenharmony_ci return EXIT_FASTPATH_NONE; 601562306a36Sopenharmony_ci} 601662306a36Sopenharmony_ci 601762306a36Sopenharmony_cistatic int handle_preemption_timer(struct kvm_vcpu *vcpu) 601862306a36Sopenharmony_ci{ 601962306a36Sopenharmony_ci handle_fastpath_preemption_timer(vcpu); 602062306a36Sopenharmony_ci return 1; 602162306a36Sopenharmony_ci} 602262306a36Sopenharmony_ci 602362306a36Sopenharmony_ci/* 602462306a36Sopenharmony_ci * When nested=0, all VMX instruction VM Exits filter here. The handlers 602562306a36Sopenharmony_ci * are overwritten by nested_vmx_setup() when nested=1. 602662306a36Sopenharmony_ci */ 602762306a36Sopenharmony_cistatic int handle_vmx_instruction(struct kvm_vcpu *vcpu) 602862306a36Sopenharmony_ci{ 602962306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 603062306a36Sopenharmony_ci return 1; 603162306a36Sopenharmony_ci} 603262306a36Sopenharmony_ci 603362306a36Sopenharmony_ci#ifndef CONFIG_X86_SGX_KVM 603462306a36Sopenharmony_cistatic int handle_encls(struct kvm_vcpu *vcpu) 603562306a36Sopenharmony_ci{ 603662306a36Sopenharmony_ci /* 603762306a36Sopenharmony_ci * SGX virtualization is disabled. There is no software enable bit for 603862306a36Sopenharmony_ci * SGX, so KVM intercepts all ENCLS leafs and injects a #UD to prevent 603962306a36Sopenharmony_ci * the guest from executing ENCLS (when SGX is supported by hardware). 604062306a36Sopenharmony_ci */ 604162306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 604262306a36Sopenharmony_ci return 1; 604362306a36Sopenharmony_ci} 604462306a36Sopenharmony_ci#endif /* CONFIG_X86_SGX_KVM */ 604562306a36Sopenharmony_ci 604662306a36Sopenharmony_cistatic int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) 604762306a36Sopenharmony_ci{ 604862306a36Sopenharmony_ci /* 604962306a36Sopenharmony_ci * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK 605062306a36Sopenharmony_ci * VM-Exits. Unconditionally set the flag here and leave the handling to 605162306a36Sopenharmony_ci * vmx_handle_exit(). 605262306a36Sopenharmony_ci */ 605362306a36Sopenharmony_ci to_vmx(vcpu)->exit_reason.bus_lock_detected = true; 605462306a36Sopenharmony_ci return 1; 605562306a36Sopenharmony_ci} 605662306a36Sopenharmony_ci 605762306a36Sopenharmony_cistatic int handle_notify(struct kvm_vcpu *vcpu) 605862306a36Sopenharmony_ci{ 605962306a36Sopenharmony_ci unsigned long exit_qual = vmx_get_exit_qual(vcpu); 606062306a36Sopenharmony_ci bool context_invalid = exit_qual & NOTIFY_VM_CONTEXT_INVALID; 606162306a36Sopenharmony_ci 606262306a36Sopenharmony_ci ++vcpu->stat.notify_window_exits; 606362306a36Sopenharmony_ci 606462306a36Sopenharmony_ci /* 606562306a36Sopenharmony_ci * Notify VM exit happened while executing iret from NMI, 606662306a36Sopenharmony_ci * "blocked by NMI" bit has to be set before next VM entry. 606762306a36Sopenharmony_ci */ 606862306a36Sopenharmony_ci if (enable_vnmi && (exit_qual & INTR_INFO_UNBLOCK_NMI)) 606962306a36Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 607062306a36Sopenharmony_ci GUEST_INTR_STATE_NMI); 607162306a36Sopenharmony_ci 607262306a36Sopenharmony_ci if (vcpu->kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_USER || 607362306a36Sopenharmony_ci context_invalid) { 607462306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_NOTIFY; 607562306a36Sopenharmony_ci vcpu->run->notify.flags = context_invalid ? 607662306a36Sopenharmony_ci KVM_NOTIFY_CONTEXT_INVALID : 0; 607762306a36Sopenharmony_ci return 0; 607862306a36Sopenharmony_ci } 607962306a36Sopenharmony_ci 608062306a36Sopenharmony_ci return 1; 608162306a36Sopenharmony_ci} 608262306a36Sopenharmony_ci 608362306a36Sopenharmony_ci/* 608462306a36Sopenharmony_ci * The exit handlers return 1 if the exit was handled fully and guest execution 608562306a36Sopenharmony_ci * may resume. Otherwise they set the kvm_run parameter to indicate what needs 608662306a36Sopenharmony_ci * to be done to userspace and return 0. 608762306a36Sopenharmony_ci */ 608862306a36Sopenharmony_cistatic int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { 608962306a36Sopenharmony_ci [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, 609062306a36Sopenharmony_ci [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 609162306a36Sopenharmony_ci [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 609262306a36Sopenharmony_ci [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, 609362306a36Sopenharmony_ci [EXIT_REASON_IO_INSTRUCTION] = handle_io, 609462306a36Sopenharmony_ci [EXIT_REASON_CR_ACCESS] = handle_cr, 609562306a36Sopenharmony_ci [EXIT_REASON_DR_ACCESS] = handle_dr, 609662306a36Sopenharmony_ci [EXIT_REASON_CPUID] = kvm_emulate_cpuid, 609762306a36Sopenharmony_ci [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, 609862306a36Sopenharmony_ci [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, 609962306a36Sopenharmony_ci [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, 610062306a36Sopenharmony_ci [EXIT_REASON_HLT] = kvm_emulate_halt, 610162306a36Sopenharmony_ci [EXIT_REASON_INVD] = kvm_emulate_invd, 610262306a36Sopenharmony_ci [EXIT_REASON_INVLPG] = handle_invlpg, 610362306a36Sopenharmony_ci [EXIT_REASON_RDPMC] = kvm_emulate_rdpmc, 610462306a36Sopenharmony_ci [EXIT_REASON_VMCALL] = kvm_emulate_hypercall, 610562306a36Sopenharmony_ci [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, 610662306a36Sopenharmony_ci [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, 610762306a36Sopenharmony_ci [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, 610862306a36Sopenharmony_ci [EXIT_REASON_VMPTRST] = handle_vmx_instruction, 610962306a36Sopenharmony_ci [EXIT_REASON_VMREAD] = handle_vmx_instruction, 611062306a36Sopenharmony_ci [EXIT_REASON_VMRESUME] = handle_vmx_instruction, 611162306a36Sopenharmony_ci [EXIT_REASON_VMWRITE] = handle_vmx_instruction, 611262306a36Sopenharmony_ci [EXIT_REASON_VMOFF] = handle_vmx_instruction, 611362306a36Sopenharmony_ci [EXIT_REASON_VMON] = handle_vmx_instruction, 611462306a36Sopenharmony_ci [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 611562306a36Sopenharmony_ci [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 611662306a36Sopenharmony_ci [EXIT_REASON_APIC_WRITE] = handle_apic_write, 611762306a36Sopenharmony_ci [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, 611862306a36Sopenharmony_ci [EXIT_REASON_WBINVD] = kvm_emulate_wbinvd, 611962306a36Sopenharmony_ci [EXIT_REASON_XSETBV] = kvm_emulate_xsetbv, 612062306a36Sopenharmony_ci [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 612162306a36Sopenharmony_ci [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 612262306a36Sopenharmony_ci [EXIT_REASON_GDTR_IDTR] = handle_desc, 612362306a36Sopenharmony_ci [EXIT_REASON_LDTR_TR] = handle_desc, 612462306a36Sopenharmony_ci [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 612562306a36Sopenharmony_ci [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 612662306a36Sopenharmony_ci [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 612762306a36Sopenharmony_ci [EXIT_REASON_MWAIT_INSTRUCTION] = kvm_emulate_mwait, 612862306a36Sopenharmony_ci [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, 612962306a36Sopenharmony_ci [EXIT_REASON_MONITOR_INSTRUCTION] = kvm_emulate_monitor, 613062306a36Sopenharmony_ci [EXIT_REASON_INVEPT] = handle_vmx_instruction, 613162306a36Sopenharmony_ci [EXIT_REASON_INVVPID] = handle_vmx_instruction, 613262306a36Sopenharmony_ci [EXIT_REASON_RDRAND] = kvm_handle_invalid_op, 613362306a36Sopenharmony_ci [EXIT_REASON_RDSEED] = kvm_handle_invalid_op, 613462306a36Sopenharmony_ci [EXIT_REASON_PML_FULL] = handle_pml_full, 613562306a36Sopenharmony_ci [EXIT_REASON_INVPCID] = handle_invpcid, 613662306a36Sopenharmony_ci [EXIT_REASON_VMFUNC] = handle_vmx_instruction, 613762306a36Sopenharmony_ci [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, 613862306a36Sopenharmony_ci [EXIT_REASON_ENCLS] = handle_encls, 613962306a36Sopenharmony_ci [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, 614062306a36Sopenharmony_ci [EXIT_REASON_NOTIFY] = handle_notify, 614162306a36Sopenharmony_ci}; 614262306a36Sopenharmony_ci 614362306a36Sopenharmony_cistatic const int kvm_vmx_max_exit_handlers = 614462306a36Sopenharmony_ci ARRAY_SIZE(kvm_vmx_exit_handlers); 614562306a36Sopenharmony_ci 614662306a36Sopenharmony_cistatic void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, 614762306a36Sopenharmony_ci u64 *info1, u64 *info2, 614862306a36Sopenharmony_ci u32 *intr_info, u32 *error_code) 614962306a36Sopenharmony_ci{ 615062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 615162306a36Sopenharmony_ci 615262306a36Sopenharmony_ci *reason = vmx->exit_reason.full; 615362306a36Sopenharmony_ci *info1 = vmx_get_exit_qual(vcpu); 615462306a36Sopenharmony_ci if (!(vmx->exit_reason.failed_vmentry)) { 615562306a36Sopenharmony_ci *info2 = vmx->idt_vectoring_info; 615662306a36Sopenharmony_ci *intr_info = vmx_get_intr_info(vcpu); 615762306a36Sopenharmony_ci if (is_exception_with_error_code(*intr_info)) 615862306a36Sopenharmony_ci *error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 615962306a36Sopenharmony_ci else 616062306a36Sopenharmony_ci *error_code = 0; 616162306a36Sopenharmony_ci } else { 616262306a36Sopenharmony_ci *info2 = 0; 616362306a36Sopenharmony_ci *intr_info = 0; 616462306a36Sopenharmony_ci *error_code = 0; 616562306a36Sopenharmony_ci } 616662306a36Sopenharmony_ci} 616762306a36Sopenharmony_ci 616862306a36Sopenharmony_cistatic void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) 616962306a36Sopenharmony_ci{ 617062306a36Sopenharmony_ci if (vmx->pml_pg) { 617162306a36Sopenharmony_ci __free_page(vmx->pml_pg); 617262306a36Sopenharmony_ci vmx->pml_pg = NULL; 617362306a36Sopenharmony_ci } 617462306a36Sopenharmony_ci} 617562306a36Sopenharmony_ci 617662306a36Sopenharmony_cistatic void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) 617762306a36Sopenharmony_ci{ 617862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 617962306a36Sopenharmony_ci u64 *pml_buf; 618062306a36Sopenharmony_ci u16 pml_idx; 618162306a36Sopenharmony_ci 618262306a36Sopenharmony_ci pml_idx = vmcs_read16(GUEST_PML_INDEX); 618362306a36Sopenharmony_ci 618462306a36Sopenharmony_ci /* Do nothing if PML buffer is empty */ 618562306a36Sopenharmony_ci if (pml_idx == (PML_ENTITY_NUM - 1)) 618662306a36Sopenharmony_ci return; 618762306a36Sopenharmony_ci 618862306a36Sopenharmony_ci /* PML index always points to next available PML buffer entity */ 618962306a36Sopenharmony_ci if (pml_idx >= PML_ENTITY_NUM) 619062306a36Sopenharmony_ci pml_idx = 0; 619162306a36Sopenharmony_ci else 619262306a36Sopenharmony_ci pml_idx++; 619362306a36Sopenharmony_ci 619462306a36Sopenharmony_ci pml_buf = page_address(vmx->pml_pg); 619562306a36Sopenharmony_ci for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { 619662306a36Sopenharmony_ci u64 gpa; 619762306a36Sopenharmony_ci 619862306a36Sopenharmony_ci gpa = pml_buf[pml_idx]; 619962306a36Sopenharmony_ci WARN_ON(gpa & (PAGE_SIZE - 1)); 620062306a36Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 620162306a36Sopenharmony_ci } 620262306a36Sopenharmony_ci 620362306a36Sopenharmony_ci /* reset PML index */ 620462306a36Sopenharmony_ci vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); 620562306a36Sopenharmony_ci} 620662306a36Sopenharmony_ci 620762306a36Sopenharmony_cistatic void vmx_dump_sel(char *name, uint32_t sel) 620862306a36Sopenharmony_ci{ 620962306a36Sopenharmony_ci pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", 621062306a36Sopenharmony_ci name, vmcs_read16(sel), 621162306a36Sopenharmony_ci vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), 621262306a36Sopenharmony_ci vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), 621362306a36Sopenharmony_ci vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); 621462306a36Sopenharmony_ci} 621562306a36Sopenharmony_ci 621662306a36Sopenharmony_cistatic void vmx_dump_dtsel(char *name, uint32_t limit) 621762306a36Sopenharmony_ci{ 621862306a36Sopenharmony_ci pr_err("%s limit=0x%08x, base=0x%016lx\n", 621962306a36Sopenharmony_ci name, vmcs_read32(limit), 622062306a36Sopenharmony_ci vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); 622162306a36Sopenharmony_ci} 622262306a36Sopenharmony_ci 622362306a36Sopenharmony_cistatic void vmx_dump_msrs(char *name, struct vmx_msrs *m) 622462306a36Sopenharmony_ci{ 622562306a36Sopenharmony_ci unsigned int i; 622662306a36Sopenharmony_ci struct vmx_msr_entry *e; 622762306a36Sopenharmony_ci 622862306a36Sopenharmony_ci pr_err("MSR %s:\n", name); 622962306a36Sopenharmony_ci for (i = 0, e = m->val; i < m->nr; ++i, ++e) 623062306a36Sopenharmony_ci pr_err(" %2d: msr=0x%08x value=0x%016llx\n", i, e->index, e->value); 623162306a36Sopenharmony_ci} 623262306a36Sopenharmony_ci 623362306a36Sopenharmony_civoid dump_vmcs(struct kvm_vcpu *vcpu) 623462306a36Sopenharmony_ci{ 623562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 623662306a36Sopenharmony_ci u32 vmentry_ctl, vmexit_ctl; 623762306a36Sopenharmony_ci u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; 623862306a36Sopenharmony_ci u64 tertiary_exec_control; 623962306a36Sopenharmony_ci unsigned long cr4; 624062306a36Sopenharmony_ci int efer_slot; 624162306a36Sopenharmony_ci 624262306a36Sopenharmony_ci if (!dump_invalid_vmcs) { 624362306a36Sopenharmony_ci pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); 624462306a36Sopenharmony_ci return; 624562306a36Sopenharmony_ci } 624662306a36Sopenharmony_ci 624762306a36Sopenharmony_ci vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); 624862306a36Sopenharmony_ci vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); 624962306a36Sopenharmony_ci cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 625062306a36Sopenharmony_ci pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); 625162306a36Sopenharmony_ci cr4 = vmcs_readl(GUEST_CR4); 625262306a36Sopenharmony_ci 625362306a36Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) 625462306a36Sopenharmony_ci secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); 625562306a36Sopenharmony_ci else 625662306a36Sopenharmony_ci secondary_exec_control = 0; 625762306a36Sopenharmony_ci 625862306a36Sopenharmony_ci if (cpu_has_tertiary_exec_ctrls()) 625962306a36Sopenharmony_ci tertiary_exec_control = vmcs_read64(TERTIARY_VM_EXEC_CONTROL); 626062306a36Sopenharmony_ci else 626162306a36Sopenharmony_ci tertiary_exec_control = 0; 626262306a36Sopenharmony_ci 626362306a36Sopenharmony_ci pr_err("VMCS %p, last attempted VM-entry on CPU %d\n", 626462306a36Sopenharmony_ci vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu); 626562306a36Sopenharmony_ci pr_err("*** Guest State ***\n"); 626662306a36Sopenharmony_ci pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", 626762306a36Sopenharmony_ci vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), 626862306a36Sopenharmony_ci vmcs_readl(CR0_GUEST_HOST_MASK)); 626962306a36Sopenharmony_ci pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", 627062306a36Sopenharmony_ci cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); 627162306a36Sopenharmony_ci pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); 627262306a36Sopenharmony_ci if (cpu_has_vmx_ept()) { 627362306a36Sopenharmony_ci pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", 627462306a36Sopenharmony_ci vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); 627562306a36Sopenharmony_ci pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", 627662306a36Sopenharmony_ci vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); 627762306a36Sopenharmony_ci } 627862306a36Sopenharmony_ci pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", 627962306a36Sopenharmony_ci vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); 628062306a36Sopenharmony_ci pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", 628162306a36Sopenharmony_ci vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); 628262306a36Sopenharmony_ci pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", 628362306a36Sopenharmony_ci vmcs_readl(GUEST_SYSENTER_ESP), 628462306a36Sopenharmony_ci vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); 628562306a36Sopenharmony_ci vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); 628662306a36Sopenharmony_ci vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); 628762306a36Sopenharmony_ci vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); 628862306a36Sopenharmony_ci vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); 628962306a36Sopenharmony_ci vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); 629062306a36Sopenharmony_ci vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); 629162306a36Sopenharmony_ci vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); 629262306a36Sopenharmony_ci vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); 629362306a36Sopenharmony_ci vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); 629462306a36Sopenharmony_ci vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); 629562306a36Sopenharmony_ci efer_slot = vmx_find_loadstore_msr_slot(&vmx->msr_autoload.guest, MSR_EFER); 629662306a36Sopenharmony_ci if (vmentry_ctl & VM_ENTRY_LOAD_IA32_EFER) 629762306a36Sopenharmony_ci pr_err("EFER= 0x%016llx\n", vmcs_read64(GUEST_IA32_EFER)); 629862306a36Sopenharmony_ci else if (efer_slot >= 0) 629962306a36Sopenharmony_ci pr_err("EFER= 0x%016llx (autoload)\n", 630062306a36Sopenharmony_ci vmx->msr_autoload.guest.val[efer_slot].value); 630162306a36Sopenharmony_ci else if (vmentry_ctl & VM_ENTRY_IA32E_MODE) 630262306a36Sopenharmony_ci pr_err("EFER= 0x%016llx (effective)\n", 630362306a36Sopenharmony_ci vcpu->arch.efer | (EFER_LMA | EFER_LME)); 630462306a36Sopenharmony_ci else 630562306a36Sopenharmony_ci pr_err("EFER= 0x%016llx (effective)\n", 630662306a36Sopenharmony_ci vcpu->arch.efer & ~(EFER_LMA | EFER_LME)); 630762306a36Sopenharmony_ci if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PAT) 630862306a36Sopenharmony_ci pr_err("PAT = 0x%016llx\n", vmcs_read64(GUEST_IA32_PAT)); 630962306a36Sopenharmony_ci pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", 631062306a36Sopenharmony_ci vmcs_read64(GUEST_IA32_DEBUGCTL), 631162306a36Sopenharmony_ci vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); 631262306a36Sopenharmony_ci if (cpu_has_load_perf_global_ctrl() && 631362306a36Sopenharmony_ci vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) 631462306a36Sopenharmony_ci pr_err("PerfGlobCtl = 0x%016llx\n", 631562306a36Sopenharmony_ci vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); 631662306a36Sopenharmony_ci if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) 631762306a36Sopenharmony_ci pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); 631862306a36Sopenharmony_ci pr_err("Interruptibility = %08x ActivityState = %08x\n", 631962306a36Sopenharmony_ci vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), 632062306a36Sopenharmony_ci vmcs_read32(GUEST_ACTIVITY_STATE)); 632162306a36Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) 632262306a36Sopenharmony_ci pr_err("InterruptStatus = %04x\n", 632362306a36Sopenharmony_ci vmcs_read16(GUEST_INTR_STATUS)); 632462306a36Sopenharmony_ci if (vmcs_read32(VM_ENTRY_MSR_LOAD_COUNT) > 0) 632562306a36Sopenharmony_ci vmx_dump_msrs("guest autoload", &vmx->msr_autoload.guest); 632662306a36Sopenharmony_ci if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0) 632762306a36Sopenharmony_ci vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest); 632862306a36Sopenharmony_ci 632962306a36Sopenharmony_ci pr_err("*** Host State ***\n"); 633062306a36Sopenharmony_ci pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", 633162306a36Sopenharmony_ci vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); 633262306a36Sopenharmony_ci pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", 633362306a36Sopenharmony_ci vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), 633462306a36Sopenharmony_ci vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), 633562306a36Sopenharmony_ci vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), 633662306a36Sopenharmony_ci vmcs_read16(HOST_TR_SELECTOR)); 633762306a36Sopenharmony_ci pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", 633862306a36Sopenharmony_ci vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), 633962306a36Sopenharmony_ci vmcs_readl(HOST_TR_BASE)); 634062306a36Sopenharmony_ci pr_err("GDTBase=%016lx IDTBase=%016lx\n", 634162306a36Sopenharmony_ci vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); 634262306a36Sopenharmony_ci pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", 634362306a36Sopenharmony_ci vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), 634462306a36Sopenharmony_ci vmcs_readl(HOST_CR4)); 634562306a36Sopenharmony_ci pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", 634662306a36Sopenharmony_ci vmcs_readl(HOST_IA32_SYSENTER_ESP), 634762306a36Sopenharmony_ci vmcs_read32(HOST_IA32_SYSENTER_CS), 634862306a36Sopenharmony_ci vmcs_readl(HOST_IA32_SYSENTER_EIP)); 634962306a36Sopenharmony_ci if (vmexit_ctl & VM_EXIT_LOAD_IA32_EFER) 635062306a36Sopenharmony_ci pr_err("EFER= 0x%016llx\n", vmcs_read64(HOST_IA32_EFER)); 635162306a36Sopenharmony_ci if (vmexit_ctl & VM_EXIT_LOAD_IA32_PAT) 635262306a36Sopenharmony_ci pr_err("PAT = 0x%016llx\n", vmcs_read64(HOST_IA32_PAT)); 635362306a36Sopenharmony_ci if (cpu_has_load_perf_global_ctrl() && 635462306a36Sopenharmony_ci vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) 635562306a36Sopenharmony_ci pr_err("PerfGlobCtl = 0x%016llx\n", 635662306a36Sopenharmony_ci vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); 635762306a36Sopenharmony_ci if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0) 635862306a36Sopenharmony_ci vmx_dump_msrs("host autoload", &vmx->msr_autoload.host); 635962306a36Sopenharmony_ci 636062306a36Sopenharmony_ci pr_err("*** Control State ***\n"); 636162306a36Sopenharmony_ci pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n", 636262306a36Sopenharmony_ci cpu_based_exec_ctrl, secondary_exec_control, tertiary_exec_control); 636362306a36Sopenharmony_ci pr_err("PinBased=0x%08x EntryControls=%08x ExitControls=%08x\n", 636462306a36Sopenharmony_ci pin_based_exec_ctrl, vmentry_ctl, vmexit_ctl); 636562306a36Sopenharmony_ci pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", 636662306a36Sopenharmony_ci vmcs_read32(EXCEPTION_BITMAP), 636762306a36Sopenharmony_ci vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), 636862306a36Sopenharmony_ci vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); 636962306a36Sopenharmony_ci pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", 637062306a36Sopenharmony_ci vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), 637162306a36Sopenharmony_ci vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), 637262306a36Sopenharmony_ci vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); 637362306a36Sopenharmony_ci pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", 637462306a36Sopenharmony_ci vmcs_read32(VM_EXIT_INTR_INFO), 637562306a36Sopenharmony_ci vmcs_read32(VM_EXIT_INTR_ERROR_CODE), 637662306a36Sopenharmony_ci vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 637762306a36Sopenharmony_ci pr_err(" reason=%08x qualification=%016lx\n", 637862306a36Sopenharmony_ci vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); 637962306a36Sopenharmony_ci pr_err("IDTVectoring: info=%08x errcode=%08x\n", 638062306a36Sopenharmony_ci vmcs_read32(IDT_VECTORING_INFO_FIELD), 638162306a36Sopenharmony_ci vmcs_read32(IDT_VECTORING_ERROR_CODE)); 638262306a36Sopenharmony_ci pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); 638362306a36Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) 638462306a36Sopenharmony_ci pr_err("TSC Multiplier = 0x%016llx\n", 638562306a36Sopenharmony_ci vmcs_read64(TSC_MULTIPLIER)); 638662306a36Sopenharmony_ci if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { 638762306a36Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { 638862306a36Sopenharmony_ci u16 status = vmcs_read16(GUEST_INTR_STATUS); 638962306a36Sopenharmony_ci pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); 639062306a36Sopenharmony_ci } 639162306a36Sopenharmony_ci pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); 639262306a36Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) 639362306a36Sopenharmony_ci pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); 639462306a36Sopenharmony_ci pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); 639562306a36Sopenharmony_ci } 639662306a36Sopenharmony_ci if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) 639762306a36Sopenharmony_ci pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); 639862306a36Sopenharmony_ci if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) 639962306a36Sopenharmony_ci pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); 640062306a36Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) 640162306a36Sopenharmony_ci pr_err("PLE Gap=%08x Window=%08x\n", 640262306a36Sopenharmony_ci vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); 640362306a36Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) 640462306a36Sopenharmony_ci pr_err("Virtual processor ID = 0x%04x\n", 640562306a36Sopenharmony_ci vmcs_read16(VIRTUAL_PROCESSOR_ID)); 640662306a36Sopenharmony_ci} 640762306a36Sopenharmony_ci 640862306a36Sopenharmony_ci/* 640962306a36Sopenharmony_ci * The guest has exited. See if we can fix it or if we need userspace 641062306a36Sopenharmony_ci * assistance. 641162306a36Sopenharmony_ci */ 641262306a36Sopenharmony_cistatic int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) 641362306a36Sopenharmony_ci{ 641462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 641562306a36Sopenharmony_ci union vmx_exit_reason exit_reason = vmx->exit_reason; 641662306a36Sopenharmony_ci u32 vectoring_info = vmx->idt_vectoring_info; 641762306a36Sopenharmony_ci u16 exit_handler_index; 641862306a36Sopenharmony_ci 641962306a36Sopenharmony_ci /* 642062306a36Sopenharmony_ci * Flush logged GPAs PML buffer, this will make dirty_bitmap more 642162306a36Sopenharmony_ci * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before 642262306a36Sopenharmony_ci * querying dirty_bitmap, we only need to kick all vcpus out of guest 642362306a36Sopenharmony_ci * mode as if vcpus is in root mode, the PML buffer must has been 642462306a36Sopenharmony_ci * flushed already. Note, PML is never enabled in hardware while 642562306a36Sopenharmony_ci * running L2. 642662306a36Sopenharmony_ci */ 642762306a36Sopenharmony_ci if (enable_pml && !is_guest_mode(vcpu)) 642862306a36Sopenharmony_ci vmx_flush_pml_buffer(vcpu); 642962306a36Sopenharmony_ci 643062306a36Sopenharmony_ci /* 643162306a36Sopenharmony_ci * KVM should never reach this point with a pending nested VM-Enter. 643262306a36Sopenharmony_ci * More specifically, short-circuiting VM-Entry to emulate L2 due to 643362306a36Sopenharmony_ci * invalid guest state should never happen as that means KVM knowingly 643462306a36Sopenharmony_ci * allowed a nested VM-Enter with an invalid vmcs12. More below. 643562306a36Sopenharmony_ci */ 643662306a36Sopenharmony_ci if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm)) 643762306a36Sopenharmony_ci return -EIO; 643862306a36Sopenharmony_ci 643962306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 644062306a36Sopenharmony_ci /* 644162306a36Sopenharmony_ci * PML is never enabled when running L2, bail immediately if a 644262306a36Sopenharmony_ci * PML full exit occurs as something is horribly wrong. 644362306a36Sopenharmony_ci */ 644462306a36Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_PML_FULL) 644562306a36Sopenharmony_ci goto unexpected_vmexit; 644662306a36Sopenharmony_ci 644762306a36Sopenharmony_ci /* 644862306a36Sopenharmony_ci * The host physical addresses of some pages of guest memory 644962306a36Sopenharmony_ci * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC 645062306a36Sopenharmony_ci * Page). The CPU may write to these pages via their host 645162306a36Sopenharmony_ci * physical address while L2 is running, bypassing any 645262306a36Sopenharmony_ci * address-translation-based dirty tracking (e.g. EPT write 645362306a36Sopenharmony_ci * protection). 645462306a36Sopenharmony_ci * 645562306a36Sopenharmony_ci * Mark them dirty on every exit from L2 to prevent them from 645662306a36Sopenharmony_ci * getting out of sync with dirty tracking. 645762306a36Sopenharmony_ci */ 645862306a36Sopenharmony_ci nested_mark_vmcs12_pages_dirty(vcpu); 645962306a36Sopenharmony_ci 646062306a36Sopenharmony_ci /* 646162306a36Sopenharmony_ci * Synthesize a triple fault if L2 state is invalid. In normal 646262306a36Sopenharmony_ci * operation, nested VM-Enter rejects any attempt to enter L2 646362306a36Sopenharmony_ci * with invalid state. However, those checks are skipped if 646462306a36Sopenharmony_ci * state is being stuffed via RSM or KVM_SET_NESTED_STATE. If 646562306a36Sopenharmony_ci * L2 state is invalid, it means either L1 modified SMRAM state 646662306a36Sopenharmony_ci * or userspace provided bad state. Synthesize TRIPLE_FAULT as 646762306a36Sopenharmony_ci * doing so is architecturally allowed in the RSM case, and is 646862306a36Sopenharmony_ci * the least awful solution for the userspace case without 646962306a36Sopenharmony_ci * risking false positives. 647062306a36Sopenharmony_ci */ 647162306a36Sopenharmony_ci if (vmx->emulation_required) { 647262306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); 647362306a36Sopenharmony_ci return 1; 647462306a36Sopenharmony_ci } 647562306a36Sopenharmony_ci 647662306a36Sopenharmony_ci if (nested_vmx_reflect_vmexit(vcpu)) 647762306a36Sopenharmony_ci return 1; 647862306a36Sopenharmony_ci } 647962306a36Sopenharmony_ci 648062306a36Sopenharmony_ci /* If guest state is invalid, start emulating. L2 is handled above. */ 648162306a36Sopenharmony_ci if (vmx->emulation_required) 648262306a36Sopenharmony_ci return handle_invalid_guest_state(vcpu); 648362306a36Sopenharmony_ci 648462306a36Sopenharmony_ci if (exit_reason.failed_vmentry) { 648562306a36Sopenharmony_ci dump_vmcs(vcpu); 648662306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 648762306a36Sopenharmony_ci vcpu->run->fail_entry.hardware_entry_failure_reason 648862306a36Sopenharmony_ci = exit_reason.full; 648962306a36Sopenharmony_ci vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu; 649062306a36Sopenharmony_ci return 0; 649162306a36Sopenharmony_ci } 649262306a36Sopenharmony_ci 649362306a36Sopenharmony_ci if (unlikely(vmx->fail)) { 649462306a36Sopenharmony_ci dump_vmcs(vcpu); 649562306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 649662306a36Sopenharmony_ci vcpu->run->fail_entry.hardware_entry_failure_reason 649762306a36Sopenharmony_ci = vmcs_read32(VM_INSTRUCTION_ERROR); 649862306a36Sopenharmony_ci vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu; 649962306a36Sopenharmony_ci return 0; 650062306a36Sopenharmony_ci } 650162306a36Sopenharmony_ci 650262306a36Sopenharmony_ci /* 650362306a36Sopenharmony_ci * Note: 650462306a36Sopenharmony_ci * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by 650562306a36Sopenharmony_ci * delivery event since it indicates guest is accessing MMIO. 650662306a36Sopenharmony_ci * The vm-exit can be triggered again after return to guest that 650762306a36Sopenharmony_ci * will cause infinite loop. 650862306a36Sopenharmony_ci */ 650962306a36Sopenharmony_ci if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 651062306a36Sopenharmony_ci (exit_reason.basic != EXIT_REASON_EXCEPTION_NMI && 651162306a36Sopenharmony_ci exit_reason.basic != EXIT_REASON_EPT_VIOLATION && 651262306a36Sopenharmony_ci exit_reason.basic != EXIT_REASON_PML_FULL && 651362306a36Sopenharmony_ci exit_reason.basic != EXIT_REASON_APIC_ACCESS && 651462306a36Sopenharmony_ci exit_reason.basic != EXIT_REASON_TASK_SWITCH && 651562306a36Sopenharmony_ci exit_reason.basic != EXIT_REASON_NOTIFY)) { 651662306a36Sopenharmony_ci int ndata = 3; 651762306a36Sopenharmony_ci 651862306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 651962306a36Sopenharmony_ci vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; 652062306a36Sopenharmony_ci vcpu->run->internal.data[0] = vectoring_info; 652162306a36Sopenharmony_ci vcpu->run->internal.data[1] = exit_reason.full; 652262306a36Sopenharmony_ci vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; 652362306a36Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { 652462306a36Sopenharmony_ci vcpu->run->internal.data[ndata++] = 652562306a36Sopenharmony_ci vmcs_read64(GUEST_PHYSICAL_ADDRESS); 652662306a36Sopenharmony_ci } 652762306a36Sopenharmony_ci vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; 652862306a36Sopenharmony_ci vcpu->run->internal.ndata = ndata; 652962306a36Sopenharmony_ci return 0; 653062306a36Sopenharmony_ci } 653162306a36Sopenharmony_ci 653262306a36Sopenharmony_ci if (unlikely(!enable_vnmi && 653362306a36Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked)) { 653462306a36Sopenharmony_ci if (!vmx_interrupt_blocked(vcpu)) { 653562306a36Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = 0; 653662306a36Sopenharmony_ci } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && 653762306a36Sopenharmony_ci vcpu->arch.nmi_pending) { 653862306a36Sopenharmony_ci /* 653962306a36Sopenharmony_ci * This CPU don't support us in finding the end of an 654062306a36Sopenharmony_ci * NMI-blocked window if the guest runs with IRQs 654162306a36Sopenharmony_ci * disabled. So we pull the trigger after 1 s of 654262306a36Sopenharmony_ci * futile waiting, but inform the user about this. 654362306a36Sopenharmony_ci */ 654462306a36Sopenharmony_ci printk(KERN_WARNING "%s: Breaking out of NMI-blocked " 654562306a36Sopenharmony_ci "state on VCPU %d after 1 s timeout\n", 654662306a36Sopenharmony_ci __func__, vcpu->vcpu_id); 654762306a36Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = 0; 654862306a36Sopenharmony_ci } 654962306a36Sopenharmony_ci } 655062306a36Sopenharmony_ci 655162306a36Sopenharmony_ci if (exit_fastpath != EXIT_FASTPATH_NONE) 655262306a36Sopenharmony_ci return 1; 655362306a36Sopenharmony_ci 655462306a36Sopenharmony_ci if (exit_reason.basic >= kvm_vmx_max_exit_handlers) 655562306a36Sopenharmony_ci goto unexpected_vmexit; 655662306a36Sopenharmony_ci#ifdef CONFIG_RETPOLINE 655762306a36Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_MSR_WRITE) 655862306a36Sopenharmony_ci return kvm_emulate_wrmsr(vcpu); 655962306a36Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) 656062306a36Sopenharmony_ci return handle_preemption_timer(vcpu); 656162306a36Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW) 656262306a36Sopenharmony_ci return handle_interrupt_window(vcpu); 656362306a36Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) 656462306a36Sopenharmony_ci return handle_external_interrupt(vcpu); 656562306a36Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_HLT) 656662306a36Sopenharmony_ci return kvm_emulate_halt(vcpu); 656762306a36Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) 656862306a36Sopenharmony_ci return handle_ept_misconfig(vcpu); 656962306a36Sopenharmony_ci#endif 657062306a36Sopenharmony_ci 657162306a36Sopenharmony_ci exit_handler_index = array_index_nospec((u16)exit_reason.basic, 657262306a36Sopenharmony_ci kvm_vmx_max_exit_handlers); 657362306a36Sopenharmony_ci if (!kvm_vmx_exit_handlers[exit_handler_index]) 657462306a36Sopenharmony_ci goto unexpected_vmexit; 657562306a36Sopenharmony_ci 657662306a36Sopenharmony_ci return kvm_vmx_exit_handlers[exit_handler_index](vcpu); 657762306a36Sopenharmony_ci 657862306a36Sopenharmony_ciunexpected_vmexit: 657962306a36Sopenharmony_ci vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", 658062306a36Sopenharmony_ci exit_reason.full); 658162306a36Sopenharmony_ci dump_vmcs(vcpu); 658262306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 658362306a36Sopenharmony_ci vcpu->run->internal.suberror = 658462306a36Sopenharmony_ci KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; 658562306a36Sopenharmony_ci vcpu->run->internal.ndata = 2; 658662306a36Sopenharmony_ci vcpu->run->internal.data[0] = exit_reason.full; 658762306a36Sopenharmony_ci vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; 658862306a36Sopenharmony_ci return 0; 658962306a36Sopenharmony_ci} 659062306a36Sopenharmony_ci 659162306a36Sopenharmony_cistatic int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) 659262306a36Sopenharmony_ci{ 659362306a36Sopenharmony_ci int ret = __vmx_handle_exit(vcpu, exit_fastpath); 659462306a36Sopenharmony_ci 659562306a36Sopenharmony_ci /* 659662306a36Sopenharmony_ci * Exit to user space when bus lock detected to inform that there is 659762306a36Sopenharmony_ci * a bus lock in guest. 659862306a36Sopenharmony_ci */ 659962306a36Sopenharmony_ci if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { 660062306a36Sopenharmony_ci if (ret > 0) 660162306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; 660262306a36Sopenharmony_ci 660362306a36Sopenharmony_ci vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; 660462306a36Sopenharmony_ci return 0; 660562306a36Sopenharmony_ci } 660662306a36Sopenharmony_ci return ret; 660762306a36Sopenharmony_ci} 660862306a36Sopenharmony_ci 660962306a36Sopenharmony_ci/* 661062306a36Sopenharmony_ci * Software based L1D cache flush which is used when microcode providing 661162306a36Sopenharmony_ci * the cache control MSR is not loaded. 661262306a36Sopenharmony_ci * 661362306a36Sopenharmony_ci * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to 661462306a36Sopenharmony_ci * flush it is required to read in 64 KiB because the replacement algorithm 661562306a36Sopenharmony_ci * is not exactly LRU. This could be sized at runtime via topology 661662306a36Sopenharmony_ci * information but as all relevant affected CPUs have 32KiB L1D cache size 661762306a36Sopenharmony_ci * there is no point in doing so. 661862306a36Sopenharmony_ci */ 661962306a36Sopenharmony_cistatic noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu) 662062306a36Sopenharmony_ci{ 662162306a36Sopenharmony_ci int size = PAGE_SIZE << L1D_CACHE_ORDER; 662262306a36Sopenharmony_ci 662362306a36Sopenharmony_ci /* 662462306a36Sopenharmony_ci * This code is only executed when the flush mode is 'cond' or 662562306a36Sopenharmony_ci * 'always' 662662306a36Sopenharmony_ci */ 662762306a36Sopenharmony_ci if (static_branch_likely(&vmx_l1d_flush_cond)) { 662862306a36Sopenharmony_ci bool flush_l1d; 662962306a36Sopenharmony_ci 663062306a36Sopenharmony_ci /* 663162306a36Sopenharmony_ci * Clear the per-vcpu flush bit, it gets set again 663262306a36Sopenharmony_ci * either from vcpu_run() or from one of the unsafe 663362306a36Sopenharmony_ci * VMEXIT handlers. 663462306a36Sopenharmony_ci */ 663562306a36Sopenharmony_ci flush_l1d = vcpu->arch.l1tf_flush_l1d; 663662306a36Sopenharmony_ci vcpu->arch.l1tf_flush_l1d = false; 663762306a36Sopenharmony_ci 663862306a36Sopenharmony_ci /* 663962306a36Sopenharmony_ci * Clear the per-cpu flush bit, it gets set again from 664062306a36Sopenharmony_ci * the interrupt handlers. 664162306a36Sopenharmony_ci */ 664262306a36Sopenharmony_ci flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); 664362306a36Sopenharmony_ci kvm_clear_cpu_l1tf_flush_l1d(); 664462306a36Sopenharmony_ci 664562306a36Sopenharmony_ci if (!flush_l1d) 664662306a36Sopenharmony_ci return; 664762306a36Sopenharmony_ci } 664862306a36Sopenharmony_ci 664962306a36Sopenharmony_ci vcpu->stat.l1d_flush++; 665062306a36Sopenharmony_ci 665162306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { 665262306a36Sopenharmony_ci native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); 665362306a36Sopenharmony_ci return; 665462306a36Sopenharmony_ci } 665562306a36Sopenharmony_ci 665662306a36Sopenharmony_ci asm volatile( 665762306a36Sopenharmony_ci /* First ensure the pages are in the TLB */ 665862306a36Sopenharmony_ci "xorl %%eax, %%eax\n" 665962306a36Sopenharmony_ci ".Lpopulate_tlb:\n\t" 666062306a36Sopenharmony_ci "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" 666162306a36Sopenharmony_ci "addl $4096, %%eax\n\t" 666262306a36Sopenharmony_ci "cmpl %%eax, %[size]\n\t" 666362306a36Sopenharmony_ci "jne .Lpopulate_tlb\n\t" 666462306a36Sopenharmony_ci "xorl %%eax, %%eax\n\t" 666562306a36Sopenharmony_ci "cpuid\n\t" 666662306a36Sopenharmony_ci /* Now fill the cache */ 666762306a36Sopenharmony_ci "xorl %%eax, %%eax\n" 666862306a36Sopenharmony_ci ".Lfill_cache:\n" 666962306a36Sopenharmony_ci "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" 667062306a36Sopenharmony_ci "addl $64, %%eax\n\t" 667162306a36Sopenharmony_ci "cmpl %%eax, %[size]\n\t" 667262306a36Sopenharmony_ci "jne .Lfill_cache\n\t" 667362306a36Sopenharmony_ci "lfence\n" 667462306a36Sopenharmony_ci :: [flush_pages] "r" (vmx_l1d_flush_pages), 667562306a36Sopenharmony_ci [size] "r" (size) 667662306a36Sopenharmony_ci : "eax", "ebx", "ecx", "edx"); 667762306a36Sopenharmony_ci} 667862306a36Sopenharmony_ci 667962306a36Sopenharmony_cistatic void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 668062306a36Sopenharmony_ci{ 668162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 668262306a36Sopenharmony_ci int tpr_threshold; 668362306a36Sopenharmony_ci 668462306a36Sopenharmony_ci if (is_guest_mode(vcpu) && 668562306a36Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) 668662306a36Sopenharmony_ci return; 668762306a36Sopenharmony_ci 668862306a36Sopenharmony_ci tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; 668962306a36Sopenharmony_ci if (is_guest_mode(vcpu)) 669062306a36Sopenharmony_ci to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; 669162306a36Sopenharmony_ci else 669262306a36Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, tpr_threshold); 669362306a36Sopenharmony_ci} 669462306a36Sopenharmony_ci 669562306a36Sopenharmony_civoid vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) 669662306a36Sopenharmony_ci{ 669762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 669862306a36Sopenharmony_ci u32 sec_exec_control; 669962306a36Sopenharmony_ci 670062306a36Sopenharmony_ci if (!lapic_in_kernel(vcpu)) 670162306a36Sopenharmony_ci return; 670262306a36Sopenharmony_ci 670362306a36Sopenharmony_ci if (!flexpriority_enabled && 670462306a36Sopenharmony_ci !cpu_has_vmx_virtualize_x2apic_mode()) 670562306a36Sopenharmony_ci return; 670662306a36Sopenharmony_ci 670762306a36Sopenharmony_ci /* Postpone execution until vmcs01 is the current VMCS. */ 670862306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 670962306a36Sopenharmony_ci vmx->nested.change_vmcs01_virtual_apic_mode = true; 671062306a36Sopenharmony_ci return; 671162306a36Sopenharmony_ci } 671262306a36Sopenharmony_ci 671362306a36Sopenharmony_ci sec_exec_control = secondary_exec_controls_get(vmx); 671462306a36Sopenharmony_ci sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 671562306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); 671662306a36Sopenharmony_ci 671762306a36Sopenharmony_ci switch (kvm_get_apic_mode(vcpu)) { 671862306a36Sopenharmony_ci case LAPIC_MODE_INVALID: 671962306a36Sopenharmony_ci WARN_ONCE(true, "Invalid local APIC state"); 672062306a36Sopenharmony_ci break; 672162306a36Sopenharmony_ci case LAPIC_MODE_DISABLED: 672262306a36Sopenharmony_ci break; 672362306a36Sopenharmony_ci case LAPIC_MODE_XAPIC: 672462306a36Sopenharmony_ci if (flexpriority_enabled) { 672562306a36Sopenharmony_ci sec_exec_control |= 672662306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 672762306a36Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 672862306a36Sopenharmony_ci 672962306a36Sopenharmony_ci /* 673062306a36Sopenharmony_ci * Flush the TLB, reloading the APIC access page will 673162306a36Sopenharmony_ci * only do so if its physical address has changed, but 673262306a36Sopenharmony_ci * the guest may have inserted a non-APIC mapping into 673362306a36Sopenharmony_ci * the TLB while the APIC access page was disabled. 673462306a36Sopenharmony_ci */ 673562306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 673662306a36Sopenharmony_ci } 673762306a36Sopenharmony_ci break; 673862306a36Sopenharmony_ci case LAPIC_MODE_X2APIC: 673962306a36Sopenharmony_ci if (cpu_has_vmx_virtualize_x2apic_mode()) 674062306a36Sopenharmony_ci sec_exec_control |= 674162306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 674262306a36Sopenharmony_ci break; 674362306a36Sopenharmony_ci } 674462306a36Sopenharmony_ci secondary_exec_controls_set(vmx, sec_exec_control); 674562306a36Sopenharmony_ci 674662306a36Sopenharmony_ci vmx_update_msr_bitmap_x2apic(vcpu); 674762306a36Sopenharmony_ci} 674862306a36Sopenharmony_ci 674962306a36Sopenharmony_cistatic void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) 675062306a36Sopenharmony_ci{ 675162306a36Sopenharmony_ci const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT; 675262306a36Sopenharmony_ci struct kvm *kvm = vcpu->kvm; 675362306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_memslots(kvm); 675462306a36Sopenharmony_ci struct kvm_memory_slot *slot; 675562306a36Sopenharmony_ci unsigned long mmu_seq; 675662306a36Sopenharmony_ci kvm_pfn_t pfn; 675762306a36Sopenharmony_ci 675862306a36Sopenharmony_ci /* Defer reload until vmcs01 is the current VMCS. */ 675962306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 676062306a36Sopenharmony_ci to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true; 676162306a36Sopenharmony_ci return; 676262306a36Sopenharmony_ci } 676362306a36Sopenharmony_ci 676462306a36Sopenharmony_ci if (!(secondary_exec_controls_get(to_vmx(vcpu)) & 676562306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 676662306a36Sopenharmony_ci return; 676762306a36Sopenharmony_ci 676862306a36Sopenharmony_ci /* 676962306a36Sopenharmony_ci * Grab the memslot so that the hva lookup for the mmu_notifier retry 677062306a36Sopenharmony_ci * is guaranteed to use the same memslot as the pfn lookup, i.e. rely 677162306a36Sopenharmony_ci * on the pfn lookup's validation of the memslot to ensure a valid hva 677262306a36Sopenharmony_ci * is used for the retry check. 677362306a36Sopenharmony_ci */ 677462306a36Sopenharmony_ci slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT); 677562306a36Sopenharmony_ci if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 677662306a36Sopenharmony_ci return; 677762306a36Sopenharmony_ci 677862306a36Sopenharmony_ci /* 677962306a36Sopenharmony_ci * Ensure that the mmu_notifier sequence count is read before KVM 678062306a36Sopenharmony_ci * retrieves the pfn from the primary MMU. Note, the memslot is 678162306a36Sopenharmony_ci * protected by SRCU, not the mmu_notifier. Pairs with the smp_wmb() 678262306a36Sopenharmony_ci * in kvm_mmu_invalidate_end(). 678362306a36Sopenharmony_ci */ 678462306a36Sopenharmony_ci mmu_seq = kvm->mmu_invalidate_seq; 678562306a36Sopenharmony_ci smp_rmb(); 678662306a36Sopenharmony_ci 678762306a36Sopenharmony_ci /* 678862306a36Sopenharmony_ci * No need to retry if the memslot does not exist or is invalid. KVM 678962306a36Sopenharmony_ci * controls the APIC-access page memslot, and only deletes the memslot 679062306a36Sopenharmony_ci * if APICv is permanently inhibited, i.e. the memslot won't reappear. 679162306a36Sopenharmony_ci */ 679262306a36Sopenharmony_ci pfn = gfn_to_pfn_memslot(slot, gfn); 679362306a36Sopenharmony_ci if (is_error_noslot_pfn(pfn)) 679462306a36Sopenharmony_ci return; 679562306a36Sopenharmony_ci 679662306a36Sopenharmony_ci read_lock(&vcpu->kvm->mmu_lock); 679762306a36Sopenharmony_ci if (mmu_invalidate_retry_hva(kvm, mmu_seq, 679862306a36Sopenharmony_ci gfn_to_hva_memslot(slot, gfn))) { 679962306a36Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 680062306a36Sopenharmony_ci read_unlock(&vcpu->kvm->mmu_lock); 680162306a36Sopenharmony_ci goto out; 680262306a36Sopenharmony_ci } 680362306a36Sopenharmony_ci 680462306a36Sopenharmony_ci vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn)); 680562306a36Sopenharmony_ci read_unlock(&vcpu->kvm->mmu_lock); 680662306a36Sopenharmony_ci 680762306a36Sopenharmony_ci /* 680862306a36Sopenharmony_ci * No need for a manual TLB flush at this point, KVM has already done a 680962306a36Sopenharmony_ci * flush if there were SPTEs pointing at the previous page. 681062306a36Sopenharmony_ci */ 681162306a36Sopenharmony_ciout: 681262306a36Sopenharmony_ci /* 681362306a36Sopenharmony_ci * Do not pin apic access page in memory, the MMU notifier 681462306a36Sopenharmony_ci * will call us again if it is migrated or swapped out. 681562306a36Sopenharmony_ci */ 681662306a36Sopenharmony_ci kvm_release_pfn_clean(pfn); 681762306a36Sopenharmony_ci} 681862306a36Sopenharmony_ci 681962306a36Sopenharmony_cistatic void vmx_hwapic_isr_update(int max_isr) 682062306a36Sopenharmony_ci{ 682162306a36Sopenharmony_ci u16 status; 682262306a36Sopenharmony_ci u8 old; 682362306a36Sopenharmony_ci 682462306a36Sopenharmony_ci if (max_isr == -1) 682562306a36Sopenharmony_ci max_isr = 0; 682662306a36Sopenharmony_ci 682762306a36Sopenharmony_ci status = vmcs_read16(GUEST_INTR_STATUS); 682862306a36Sopenharmony_ci old = status >> 8; 682962306a36Sopenharmony_ci if (max_isr != old) { 683062306a36Sopenharmony_ci status &= 0xff; 683162306a36Sopenharmony_ci status |= max_isr << 8; 683262306a36Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, status); 683362306a36Sopenharmony_ci } 683462306a36Sopenharmony_ci} 683562306a36Sopenharmony_ci 683662306a36Sopenharmony_cistatic void vmx_set_rvi(int vector) 683762306a36Sopenharmony_ci{ 683862306a36Sopenharmony_ci u16 status; 683962306a36Sopenharmony_ci u8 old; 684062306a36Sopenharmony_ci 684162306a36Sopenharmony_ci if (vector == -1) 684262306a36Sopenharmony_ci vector = 0; 684362306a36Sopenharmony_ci 684462306a36Sopenharmony_ci status = vmcs_read16(GUEST_INTR_STATUS); 684562306a36Sopenharmony_ci old = (u8)status & 0xff; 684662306a36Sopenharmony_ci if ((u8)vector != old) { 684762306a36Sopenharmony_ci status &= ~0xff; 684862306a36Sopenharmony_ci status |= (u8)vector; 684962306a36Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, status); 685062306a36Sopenharmony_ci } 685162306a36Sopenharmony_ci} 685262306a36Sopenharmony_ci 685362306a36Sopenharmony_cistatic void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) 685462306a36Sopenharmony_ci{ 685562306a36Sopenharmony_ci /* 685662306a36Sopenharmony_ci * When running L2, updating RVI is only relevant when 685762306a36Sopenharmony_ci * vmcs12 virtual-interrupt-delivery enabled. 685862306a36Sopenharmony_ci * However, it can be enabled only when L1 also 685962306a36Sopenharmony_ci * intercepts external-interrupts and in that case 686062306a36Sopenharmony_ci * we should not update vmcs02 RVI but instead intercept 686162306a36Sopenharmony_ci * interrupt. Therefore, do nothing when running L2. 686262306a36Sopenharmony_ci */ 686362306a36Sopenharmony_ci if (!is_guest_mode(vcpu)) 686462306a36Sopenharmony_ci vmx_set_rvi(max_irr); 686562306a36Sopenharmony_ci} 686662306a36Sopenharmony_ci 686762306a36Sopenharmony_cistatic int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) 686862306a36Sopenharmony_ci{ 686962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 687062306a36Sopenharmony_ci int max_irr; 687162306a36Sopenharmony_ci bool got_posted_interrupt; 687262306a36Sopenharmony_ci 687362306a36Sopenharmony_ci if (KVM_BUG_ON(!enable_apicv, vcpu->kvm)) 687462306a36Sopenharmony_ci return -EIO; 687562306a36Sopenharmony_ci 687662306a36Sopenharmony_ci if (pi_test_on(&vmx->pi_desc)) { 687762306a36Sopenharmony_ci pi_clear_on(&vmx->pi_desc); 687862306a36Sopenharmony_ci /* 687962306a36Sopenharmony_ci * IOMMU can write to PID.ON, so the barrier matters even on UP. 688062306a36Sopenharmony_ci * But on x86 this is just a compiler barrier anyway. 688162306a36Sopenharmony_ci */ 688262306a36Sopenharmony_ci smp_mb__after_atomic(); 688362306a36Sopenharmony_ci got_posted_interrupt = 688462306a36Sopenharmony_ci kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); 688562306a36Sopenharmony_ci } else { 688662306a36Sopenharmony_ci max_irr = kvm_lapic_find_highest_irr(vcpu); 688762306a36Sopenharmony_ci got_posted_interrupt = false; 688862306a36Sopenharmony_ci } 688962306a36Sopenharmony_ci 689062306a36Sopenharmony_ci /* 689162306a36Sopenharmony_ci * Newly recognized interrupts are injected via either virtual interrupt 689262306a36Sopenharmony_ci * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is 689362306a36Sopenharmony_ci * disabled in two cases: 689462306a36Sopenharmony_ci * 689562306a36Sopenharmony_ci * 1) If L2 is running and the vCPU has a new pending interrupt. If L1 689662306a36Sopenharmony_ci * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a 689762306a36Sopenharmony_ci * VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected 689862306a36Sopenharmony_ci * into L2, but KVM doesn't use virtual interrupt delivery to inject 689962306a36Sopenharmony_ci * interrupts into L2, and so KVM_REQ_EVENT is again needed. 690062306a36Sopenharmony_ci * 690162306a36Sopenharmony_ci * 2) If APICv is disabled for this vCPU, assigned devices may still 690262306a36Sopenharmony_ci * attempt to post interrupts. The posted interrupt vector will cause 690362306a36Sopenharmony_ci * a VM-Exit and the subsequent entry will call sync_pir_to_irr. 690462306a36Sopenharmony_ci */ 690562306a36Sopenharmony_ci if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu)) 690662306a36Sopenharmony_ci vmx_set_rvi(max_irr); 690762306a36Sopenharmony_ci else if (got_posted_interrupt) 690862306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 690962306a36Sopenharmony_ci 691062306a36Sopenharmony_ci return max_irr; 691162306a36Sopenharmony_ci} 691262306a36Sopenharmony_ci 691362306a36Sopenharmony_cistatic void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 691462306a36Sopenharmony_ci{ 691562306a36Sopenharmony_ci if (!kvm_vcpu_apicv_active(vcpu)) 691662306a36Sopenharmony_ci return; 691762306a36Sopenharmony_ci 691862306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); 691962306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); 692062306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); 692162306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); 692262306a36Sopenharmony_ci} 692362306a36Sopenharmony_ci 692462306a36Sopenharmony_cistatic void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) 692562306a36Sopenharmony_ci{ 692662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 692762306a36Sopenharmony_ci 692862306a36Sopenharmony_ci pi_clear_on(&vmx->pi_desc); 692962306a36Sopenharmony_ci memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); 693062306a36Sopenharmony_ci} 693162306a36Sopenharmony_ci 693262306a36Sopenharmony_civoid vmx_do_interrupt_irqoff(unsigned long entry); 693362306a36Sopenharmony_civoid vmx_do_nmi_irqoff(void); 693462306a36Sopenharmony_ci 693562306a36Sopenharmony_cistatic void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) 693662306a36Sopenharmony_ci{ 693762306a36Sopenharmony_ci /* 693862306a36Sopenharmony_ci * Save xfd_err to guest_fpu before interrupt is enabled, so the 693962306a36Sopenharmony_ci * MSR value is not clobbered by the host activity before the guest 694062306a36Sopenharmony_ci * has chance to consume it. 694162306a36Sopenharmony_ci * 694262306a36Sopenharmony_ci * Do not blindly read xfd_err here, since this exception might 694362306a36Sopenharmony_ci * be caused by L1 interception on a platform which doesn't 694462306a36Sopenharmony_ci * support xfd at all. 694562306a36Sopenharmony_ci * 694662306a36Sopenharmony_ci * Do it conditionally upon guest_fpu::xfd. xfd_err matters 694762306a36Sopenharmony_ci * only when xfd contains a non-zero value. 694862306a36Sopenharmony_ci * 694962306a36Sopenharmony_ci * Queuing exception is done in vmx_handle_exit. See comment there. 695062306a36Sopenharmony_ci */ 695162306a36Sopenharmony_ci if (vcpu->arch.guest_fpu.fpstate->xfd) 695262306a36Sopenharmony_ci rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); 695362306a36Sopenharmony_ci} 695462306a36Sopenharmony_ci 695562306a36Sopenharmony_cistatic void handle_exception_irqoff(struct vcpu_vmx *vmx) 695662306a36Sopenharmony_ci{ 695762306a36Sopenharmony_ci u32 intr_info = vmx_get_intr_info(&vmx->vcpu); 695862306a36Sopenharmony_ci 695962306a36Sopenharmony_ci /* if exit due to PF check for async PF */ 696062306a36Sopenharmony_ci if (is_page_fault(intr_info)) 696162306a36Sopenharmony_ci vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); 696262306a36Sopenharmony_ci /* if exit due to NM, handle before interrupts are enabled */ 696362306a36Sopenharmony_ci else if (is_nm_fault(intr_info)) 696462306a36Sopenharmony_ci handle_nm_fault_irqoff(&vmx->vcpu); 696562306a36Sopenharmony_ci /* Handle machine checks before interrupts are enabled */ 696662306a36Sopenharmony_ci else if (is_machine_check(intr_info)) 696762306a36Sopenharmony_ci kvm_machine_check(); 696862306a36Sopenharmony_ci} 696962306a36Sopenharmony_ci 697062306a36Sopenharmony_cistatic void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) 697162306a36Sopenharmony_ci{ 697262306a36Sopenharmony_ci u32 intr_info = vmx_get_intr_info(vcpu); 697362306a36Sopenharmony_ci unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; 697462306a36Sopenharmony_ci gate_desc *desc = (gate_desc *)host_idt_base + vector; 697562306a36Sopenharmony_ci 697662306a36Sopenharmony_ci if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, 697762306a36Sopenharmony_ci "unexpected VM-Exit interrupt info: 0x%x", intr_info)) 697862306a36Sopenharmony_ci return; 697962306a36Sopenharmony_ci 698062306a36Sopenharmony_ci kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); 698162306a36Sopenharmony_ci vmx_do_interrupt_irqoff(gate_offset(desc)); 698262306a36Sopenharmony_ci kvm_after_interrupt(vcpu); 698362306a36Sopenharmony_ci 698462306a36Sopenharmony_ci vcpu->arch.at_instruction_boundary = true; 698562306a36Sopenharmony_ci} 698662306a36Sopenharmony_ci 698762306a36Sopenharmony_cistatic void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) 698862306a36Sopenharmony_ci{ 698962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 699062306a36Sopenharmony_ci 699162306a36Sopenharmony_ci if (vmx->emulation_required) 699262306a36Sopenharmony_ci return; 699362306a36Sopenharmony_ci 699462306a36Sopenharmony_ci if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) 699562306a36Sopenharmony_ci handle_external_interrupt_irqoff(vcpu); 699662306a36Sopenharmony_ci else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) 699762306a36Sopenharmony_ci handle_exception_irqoff(vmx); 699862306a36Sopenharmony_ci} 699962306a36Sopenharmony_ci 700062306a36Sopenharmony_ci/* 700162306a36Sopenharmony_ci * The kvm parameter can be NULL (module initialization, or invocation before 700262306a36Sopenharmony_ci * VM creation). Be sure to check the kvm parameter before using it. 700362306a36Sopenharmony_ci */ 700462306a36Sopenharmony_cistatic bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) 700562306a36Sopenharmony_ci{ 700662306a36Sopenharmony_ci switch (index) { 700762306a36Sopenharmony_ci case MSR_IA32_SMBASE: 700862306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_KVM_SMM)) 700962306a36Sopenharmony_ci return false; 701062306a36Sopenharmony_ci /* 701162306a36Sopenharmony_ci * We cannot do SMM unless we can run the guest in big 701262306a36Sopenharmony_ci * real mode. 701362306a36Sopenharmony_ci */ 701462306a36Sopenharmony_ci return enable_unrestricted_guest || emulate_invalid_guest_state; 701562306a36Sopenharmony_ci case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: 701662306a36Sopenharmony_ci return nested; 701762306a36Sopenharmony_ci case MSR_AMD64_VIRT_SPEC_CTRL: 701862306a36Sopenharmony_ci case MSR_AMD64_TSC_RATIO: 701962306a36Sopenharmony_ci /* This is AMD only. */ 702062306a36Sopenharmony_ci return false; 702162306a36Sopenharmony_ci default: 702262306a36Sopenharmony_ci return true; 702362306a36Sopenharmony_ci } 702462306a36Sopenharmony_ci} 702562306a36Sopenharmony_ci 702662306a36Sopenharmony_cistatic void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 702762306a36Sopenharmony_ci{ 702862306a36Sopenharmony_ci u32 exit_intr_info; 702962306a36Sopenharmony_ci bool unblock_nmi; 703062306a36Sopenharmony_ci u8 vector; 703162306a36Sopenharmony_ci bool idtv_info_valid; 703262306a36Sopenharmony_ci 703362306a36Sopenharmony_ci idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 703462306a36Sopenharmony_ci 703562306a36Sopenharmony_ci if (enable_vnmi) { 703662306a36Sopenharmony_ci if (vmx->loaded_vmcs->nmi_known_unmasked) 703762306a36Sopenharmony_ci return; 703862306a36Sopenharmony_ci 703962306a36Sopenharmony_ci exit_intr_info = vmx_get_intr_info(&vmx->vcpu); 704062306a36Sopenharmony_ci unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 704162306a36Sopenharmony_ci vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 704262306a36Sopenharmony_ci /* 704362306a36Sopenharmony_ci * SDM 3: 27.7.1.2 (September 2008) 704462306a36Sopenharmony_ci * Re-set bit "block by NMI" before VM entry if vmexit caused by 704562306a36Sopenharmony_ci * a guest IRET fault. 704662306a36Sopenharmony_ci * SDM 3: 23.2.2 (September 2008) 704762306a36Sopenharmony_ci * Bit 12 is undefined in any of the following cases: 704862306a36Sopenharmony_ci * If the VM exit sets the valid bit in the IDT-vectoring 704962306a36Sopenharmony_ci * information field. 705062306a36Sopenharmony_ci * If the VM exit is due to a double fault. 705162306a36Sopenharmony_ci */ 705262306a36Sopenharmony_ci if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && 705362306a36Sopenharmony_ci vector != DF_VECTOR && !idtv_info_valid) 705462306a36Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 705562306a36Sopenharmony_ci GUEST_INTR_STATE_NMI); 705662306a36Sopenharmony_ci else 705762306a36Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = 705862306a36Sopenharmony_ci !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) 705962306a36Sopenharmony_ci & GUEST_INTR_STATE_NMI); 706062306a36Sopenharmony_ci } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) 706162306a36Sopenharmony_ci vmx->loaded_vmcs->vnmi_blocked_time += 706262306a36Sopenharmony_ci ktime_to_ns(ktime_sub(ktime_get(), 706362306a36Sopenharmony_ci vmx->loaded_vmcs->entry_time)); 706462306a36Sopenharmony_ci} 706562306a36Sopenharmony_ci 706662306a36Sopenharmony_cistatic void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, 706762306a36Sopenharmony_ci u32 idt_vectoring_info, 706862306a36Sopenharmony_ci int instr_len_field, 706962306a36Sopenharmony_ci int error_code_field) 707062306a36Sopenharmony_ci{ 707162306a36Sopenharmony_ci u8 vector; 707262306a36Sopenharmony_ci int type; 707362306a36Sopenharmony_ci bool idtv_info_valid; 707462306a36Sopenharmony_ci 707562306a36Sopenharmony_ci idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 707662306a36Sopenharmony_ci 707762306a36Sopenharmony_ci vcpu->arch.nmi_injected = false; 707862306a36Sopenharmony_ci kvm_clear_exception_queue(vcpu); 707962306a36Sopenharmony_ci kvm_clear_interrupt_queue(vcpu); 708062306a36Sopenharmony_ci 708162306a36Sopenharmony_ci if (!idtv_info_valid) 708262306a36Sopenharmony_ci return; 708362306a36Sopenharmony_ci 708462306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 708562306a36Sopenharmony_ci 708662306a36Sopenharmony_ci vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; 708762306a36Sopenharmony_ci type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; 708862306a36Sopenharmony_ci 708962306a36Sopenharmony_ci switch (type) { 709062306a36Sopenharmony_ci case INTR_TYPE_NMI_INTR: 709162306a36Sopenharmony_ci vcpu->arch.nmi_injected = true; 709262306a36Sopenharmony_ci /* 709362306a36Sopenharmony_ci * SDM 3: 27.7.1.2 (September 2008) 709462306a36Sopenharmony_ci * Clear bit "block by NMI" before VM entry if a NMI 709562306a36Sopenharmony_ci * delivery faulted. 709662306a36Sopenharmony_ci */ 709762306a36Sopenharmony_ci vmx_set_nmi_mask(vcpu, false); 709862306a36Sopenharmony_ci break; 709962306a36Sopenharmony_ci case INTR_TYPE_SOFT_EXCEPTION: 710062306a36Sopenharmony_ci vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 710162306a36Sopenharmony_ci fallthrough; 710262306a36Sopenharmony_ci case INTR_TYPE_HARD_EXCEPTION: 710362306a36Sopenharmony_ci if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 710462306a36Sopenharmony_ci u32 err = vmcs_read32(error_code_field); 710562306a36Sopenharmony_ci kvm_requeue_exception_e(vcpu, vector, err); 710662306a36Sopenharmony_ci } else 710762306a36Sopenharmony_ci kvm_requeue_exception(vcpu, vector); 710862306a36Sopenharmony_ci break; 710962306a36Sopenharmony_ci case INTR_TYPE_SOFT_INTR: 711062306a36Sopenharmony_ci vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 711162306a36Sopenharmony_ci fallthrough; 711262306a36Sopenharmony_ci case INTR_TYPE_EXT_INTR: 711362306a36Sopenharmony_ci kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); 711462306a36Sopenharmony_ci break; 711562306a36Sopenharmony_ci default: 711662306a36Sopenharmony_ci break; 711762306a36Sopenharmony_ci } 711862306a36Sopenharmony_ci} 711962306a36Sopenharmony_ci 712062306a36Sopenharmony_cistatic void vmx_complete_interrupts(struct vcpu_vmx *vmx) 712162306a36Sopenharmony_ci{ 712262306a36Sopenharmony_ci __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, 712362306a36Sopenharmony_ci VM_EXIT_INSTRUCTION_LEN, 712462306a36Sopenharmony_ci IDT_VECTORING_ERROR_CODE); 712562306a36Sopenharmony_ci} 712662306a36Sopenharmony_ci 712762306a36Sopenharmony_cistatic void vmx_cancel_injection(struct kvm_vcpu *vcpu) 712862306a36Sopenharmony_ci{ 712962306a36Sopenharmony_ci __vmx_complete_interrupts(vcpu, 713062306a36Sopenharmony_ci vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), 713162306a36Sopenharmony_ci VM_ENTRY_INSTRUCTION_LEN, 713262306a36Sopenharmony_ci VM_ENTRY_EXCEPTION_ERROR_CODE); 713362306a36Sopenharmony_ci 713462306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); 713562306a36Sopenharmony_ci} 713662306a36Sopenharmony_ci 713762306a36Sopenharmony_cistatic void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) 713862306a36Sopenharmony_ci{ 713962306a36Sopenharmony_ci int i, nr_msrs; 714062306a36Sopenharmony_ci struct perf_guest_switch_msr *msrs; 714162306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); 714262306a36Sopenharmony_ci 714362306a36Sopenharmony_ci pmu->host_cross_mapped_mask = 0; 714462306a36Sopenharmony_ci if (pmu->pebs_enable & pmu->global_ctrl) 714562306a36Sopenharmony_ci intel_pmu_cross_mapped_check(pmu); 714662306a36Sopenharmony_ci 714762306a36Sopenharmony_ci /* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */ 714862306a36Sopenharmony_ci msrs = perf_guest_get_msrs(&nr_msrs, (void *)pmu); 714962306a36Sopenharmony_ci if (!msrs) 715062306a36Sopenharmony_ci return; 715162306a36Sopenharmony_ci 715262306a36Sopenharmony_ci for (i = 0; i < nr_msrs; i++) 715362306a36Sopenharmony_ci if (msrs[i].host == msrs[i].guest) 715462306a36Sopenharmony_ci clear_atomic_switch_msr(vmx, msrs[i].msr); 715562306a36Sopenharmony_ci else 715662306a36Sopenharmony_ci add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, 715762306a36Sopenharmony_ci msrs[i].host, false); 715862306a36Sopenharmony_ci} 715962306a36Sopenharmony_ci 716062306a36Sopenharmony_cistatic void vmx_update_hv_timer(struct kvm_vcpu *vcpu) 716162306a36Sopenharmony_ci{ 716262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 716362306a36Sopenharmony_ci u64 tscl; 716462306a36Sopenharmony_ci u32 delta_tsc; 716562306a36Sopenharmony_ci 716662306a36Sopenharmony_ci if (vmx->req_immediate_exit) { 716762306a36Sopenharmony_ci vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); 716862306a36Sopenharmony_ci vmx->loaded_vmcs->hv_timer_soft_disabled = false; 716962306a36Sopenharmony_ci } else if (vmx->hv_deadline_tsc != -1) { 717062306a36Sopenharmony_ci tscl = rdtsc(); 717162306a36Sopenharmony_ci if (vmx->hv_deadline_tsc > tscl) 717262306a36Sopenharmony_ci /* set_hv_timer ensures the delta fits in 32-bits */ 717362306a36Sopenharmony_ci delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> 717462306a36Sopenharmony_ci cpu_preemption_timer_multi); 717562306a36Sopenharmony_ci else 717662306a36Sopenharmony_ci delta_tsc = 0; 717762306a36Sopenharmony_ci 717862306a36Sopenharmony_ci vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); 717962306a36Sopenharmony_ci vmx->loaded_vmcs->hv_timer_soft_disabled = false; 718062306a36Sopenharmony_ci } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { 718162306a36Sopenharmony_ci vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); 718262306a36Sopenharmony_ci vmx->loaded_vmcs->hv_timer_soft_disabled = true; 718362306a36Sopenharmony_ci } 718462306a36Sopenharmony_ci} 718562306a36Sopenharmony_ci 718662306a36Sopenharmony_civoid noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) 718762306a36Sopenharmony_ci{ 718862306a36Sopenharmony_ci if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { 718962306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.rsp = host_rsp; 719062306a36Sopenharmony_ci vmcs_writel(HOST_RSP, host_rsp); 719162306a36Sopenharmony_ci } 719262306a36Sopenharmony_ci} 719362306a36Sopenharmony_ci 719462306a36Sopenharmony_civoid noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, 719562306a36Sopenharmony_ci unsigned int flags) 719662306a36Sopenharmony_ci{ 719762306a36Sopenharmony_ci u64 hostval = this_cpu_read(x86_spec_ctrl_current); 719862306a36Sopenharmony_ci 719962306a36Sopenharmony_ci if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) 720062306a36Sopenharmony_ci return; 720162306a36Sopenharmony_ci 720262306a36Sopenharmony_ci if (flags & VMX_RUN_SAVE_SPEC_CTRL) 720362306a36Sopenharmony_ci vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); 720462306a36Sopenharmony_ci 720562306a36Sopenharmony_ci /* 720662306a36Sopenharmony_ci * If the guest/host SPEC_CTRL values differ, restore the host value. 720762306a36Sopenharmony_ci * 720862306a36Sopenharmony_ci * For legacy IBRS, the IBRS bit always needs to be written after 720962306a36Sopenharmony_ci * transitioning from a less privileged predictor mode, regardless of 721062306a36Sopenharmony_ci * whether the guest/host values differ. 721162306a36Sopenharmony_ci */ 721262306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || 721362306a36Sopenharmony_ci vmx->spec_ctrl != hostval) 721462306a36Sopenharmony_ci native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); 721562306a36Sopenharmony_ci 721662306a36Sopenharmony_ci barrier_nospec(); 721762306a36Sopenharmony_ci} 721862306a36Sopenharmony_ci 721962306a36Sopenharmony_cistatic fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) 722062306a36Sopenharmony_ci{ 722162306a36Sopenharmony_ci switch (to_vmx(vcpu)->exit_reason.basic) { 722262306a36Sopenharmony_ci case EXIT_REASON_MSR_WRITE: 722362306a36Sopenharmony_ci return handle_fastpath_set_msr_irqoff(vcpu); 722462306a36Sopenharmony_ci case EXIT_REASON_PREEMPTION_TIMER: 722562306a36Sopenharmony_ci return handle_fastpath_preemption_timer(vcpu); 722662306a36Sopenharmony_ci default: 722762306a36Sopenharmony_ci return EXIT_FASTPATH_NONE; 722862306a36Sopenharmony_ci } 722962306a36Sopenharmony_ci} 723062306a36Sopenharmony_ci 723162306a36Sopenharmony_cistatic noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, 723262306a36Sopenharmony_ci unsigned int flags) 723362306a36Sopenharmony_ci{ 723462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 723562306a36Sopenharmony_ci 723662306a36Sopenharmony_ci guest_state_enter_irqoff(); 723762306a36Sopenharmony_ci 723862306a36Sopenharmony_ci /* 723962306a36Sopenharmony_ci * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW 724062306a36Sopenharmony_ci * mitigation for MDS is done late in VMentry and is still 724162306a36Sopenharmony_ci * executed in spite of L1D Flush. This is because an extra VERW 724262306a36Sopenharmony_ci * should not matter much after the big hammer L1D Flush. 724362306a36Sopenharmony_ci */ 724462306a36Sopenharmony_ci if (static_branch_unlikely(&vmx_l1d_should_flush)) 724562306a36Sopenharmony_ci vmx_l1d_flush(vcpu); 724662306a36Sopenharmony_ci else if (static_branch_unlikely(&mmio_stale_data_clear) && 724762306a36Sopenharmony_ci kvm_arch_has_assigned_device(vcpu->kvm)) 724862306a36Sopenharmony_ci mds_clear_cpu_buffers(); 724962306a36Sopenharmony_ci 725062306a36Sopenharmony_ci vmx_disable_fb_clear(vmx); 725162306a36Sopenharmony_ci 725262306a36Sopenharmony_ci if (vcpu->arch.cr2 != native_read_cr2()) 725362306a36Sopenharmony_ci native_write_cr2(vcpu->arch.cr2); 725462306a36Sopenharmony_ci 725562306a36Sopenharmony_ci vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, 725662306a36Sopenharmony_ci flags); 725762306a36Sopenharmony_ci 725862306a36Sopenharmony_ci vcpu->arch.cr2 = native_read_cr2(); 725962306a36Sopenharmony_ci vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; 726062306a36Sopenharmony_ci 726162306a36Sopenharmony_ci vmx->idt_vectoring_info = 0; 726262306a36Sopenharmony_ci 726362306a36Sopenharmony_ci vmx_enable_fb_clear(vmx); 726462306a36Sopenharmony_ci 726562306a36Sopenharmony_ci if (unlikely(vmx->fail)) { 726662306a36Sopenharmony_ci vmx->exit_reason.full = 0xdead; 726762306a36Sopenharmony_ci goto out; 726862306a36Sopenharmony_ci } 726962306a36Sopenharmony_ci 727062306a36Sopenharmony_ci vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON); 727162306a36Sopenharmony_ci if (likely(!vmx->exit_reason.failed_vmentry)) 727262306a36Sopenharmony_ci vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 727362306a36Sopenharmony_ci 727462306a36Sopenharmony_ci if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI && 727562306a36Sopenharmony_ci is_nmi(vmx_get_intr_info(vcpu))) { 727662306a36Sopenharmony_ci kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); 727762306a36Sopenharmony_ci vmx_do_nmi_irqoff(); 727862306a36Sopenharmony_ci kvm_after_interrupt(vcpu); 727962306a36Sopenharmony_ci } 728062306a36Sopenharmony_ci 728162306a36Sopenharmony_ciout: 728262306a36Sopenharmony_ci guest_state_exit_irqoff(); 728362306a36Sopenharmony_ci} 728462306a36Sopenharmony_ci 728562306a36Sopenharmony_cistatic fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) 728662306a36Sopenharmony_ci{ 728762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 728862306a36Sopenharmony_ci unsigned long cr3, cr4; 728962306a36Sopenharmony_ci 729062306a36Sopenharmony_ci /* Record the guest's net vcpu time for enforced NMI injections. */ 729162306a36Sopenharmony_ci if (unlikely(!enable_vnmi && 729262306a36Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked)) 729362306a36Sopenharmony_ci vmx->loaded_vmcs->entry_time = ktime_get(); 729462306a36Sopenharmony_ci 729562306a36Sopenharmony_ci /* 729662306a36Sopenharmony_ci * Don't enter VMX if guest state is invalid, let the exit handler 729762306a36Sopenharmony_ci * start emulation until we arrive back to a valid state. Synthesize a 729862306a36Sopenharmony_ci * consistency check VM-Exit due to invalid guest state and bail. 729962306a36Sopenharmony_ci */ 730062306a36Sopenharmony_ci if (unlikely(vmx->emulation_required)) { 730162306a36Sopenharmony_ci vmx->fail = 0; 730262306a36Sopenharmony_ci 730362306a36Sopenharmony_ci vmx->exit_reason.full = EXIT_REASON_INVALID_STATE; 730462306a36Sopenharmony_ci vmx->exit_reason.failed_vmentry = 1; 730562306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1); 730662306a36Sopenharmony_ci vmx->exit_qualification = ENTRY_FAIL_DEFAULT; 730762306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2); 730862306a36Sopenharmony_ci vmx->exit_intr_info = 0; 730962306a36Sopenharmony_ci return EXIT_FASTPATH_NONE; 731062306a36Sopenharmony_ci } 731162306a36Sopenharmony_ci 731262306a36Sopenharmony_ci trace_kvm_entry(vcpu); 731362306a36Sopenharmony_ci 731462306a36Sopenharmony_ci if (vmx->ple_window_dirty) { 731562306a36Sopenharmony_ci vmx->ple_window_dirty = false; 731662306a36Sopenharmony_ci vmcs_write32(PLE_WINDOW, vmx->ple_window); 731762306a36Sopenharmony_ci } 731862306a36Sopenharmony_ci 731962306a36Sopenharmony_ci /* 732062306a36Sopenharmony_ci * We did this in prepare_switch_to_guest, because it needs to 732162306a36Sopenharmony_ci * be within srcu_read_lock. 732262306a36Sopenharmony_ci */ 732362306a36Sopenharmony_ci WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync); 732462306a36Sopenharmony_ci 732562306a36Sopenharmony_ci if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) 732662306a36Sopenharmony_ci vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); 732762306a36Sopenharmony_ci if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) 732862306a36Sopenharmony_ci vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 732962306a36Sopenharmony_ci vcpu->arch.regs_dirty = 0; 733062306a36Sopenharmony_ci 733162306a36Sopenharmony_ci /* 733262306a36Sopenharmony_ci * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately 733362306a36Sopenharmony_ci * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time 733462306a36Sopenharmony_ci * it switches back to the current->mm, which can occur in KVM context 733562306a36Sopenharmony_ci * when switching to a temporary mm to patch kernel code, e.g. if KVM 733662306a36Sopenharmony_ci * toggles a static key while handling a VM-Exit. 733762306a36Sopenharmony_ci */ 733862306a36Sopenharmony_ci cr3 = __get_current_cr3_fast(); 733962306a36Sopenharmony_ci if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { 734062306a36Sopenharmony_ci vmcs_writel(HOST_CR3, cr3); 734162306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.cr3 = cr3; 734262306a36Sopenharmony_ci } 734362306a36Sopenharmony_ci 734462306a36Sopenharmony_ci cr4 = cr4_read_shadow(); 734562306a36Sopenharmony_ci if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { 734662306a36Sopenharmony_ci vmcs_writel(HOST_CR4, cr4); 734762306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.cr4 = cr4; 734862306a36Sopenharmony_ci } 734962306a36Sopenharmony_ci 735062306a36Sopenharmony_ci /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ 735162306a36Sopenharmony_ci if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) 735262306a36Sopenharmony_ci set_debugreg(vcpu->arch.dr6, 6); 735362306a36Sopenharmony_ci 735462306a36Sopenharmony_ci /* When single-stepping over STI and MOV SS, we must clear the 735562306a36Sopenharmony_ci * corresponding interruptibility bits in the guest state. Otherwise 735662306a36Sopenharmony_ci * vmentry fails as it then expects bit 14 (BS) in pending debug 735762306a36Sopenharmony_ci * exceptions being set, but that's not correct for the guest debugging 735862306a36Sopenharmony_ci * case. */ 735962306a36Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 736062306a36Sopenharmony_ci vmx_set_interrupt_shadow(vcpu, 0); 736162306a36Sopenharmony_ci 736262306a36Sopenharmony_ci kvm_load_guest_xsave_state(vcpu); 736362306a36Sopenharmony_ci 736462306a36Sopenharmony_ci pt_guest_enter(vmx); 736562306a36Sopenharmony_ci 736662306a36Sopenharmony_ci atomic_switch_perf_msrs(vmx); 736762306a36Sopenharmony_ci if (intel_pmu_lbr_is_enabled(vcpu)) 736862306a36Sopenharmony_ci vmx_passthrough_lbr_msrs(vcpu); 736962306a36Sopenharmony_ci 737062306a36Sopenharmony_ci if (enable_preemption_timer) 737162306a36Sopenharmony_ci vmx_update_hv_timer(vcpu); 737262306a36Sopenharmony_ci 737362306a36Sopenharmony_ci kvm_wait_lapic_expire(vcpu); 737462306a36Sopenharmony_ci 737562306a36Sopenharmony_ci /* The actual VMENTER/EXIT is in the .noinstr.text section. */ 737662306a36Sopenharmony_ci vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx)); 737762306a36Sopenharmony_ci 737862306a36Sopenharmony_ci /* All fields are clean at this point */ 737962306a36Sopenharmony_ci if (kvm_is_using_evmcs()) { 738062306a36Sopenharmony_ci current_evmcs->hv_clean_fields |= 738162306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 738262306a36Sopenharmony_ci 738362306a36Sopenharmony_ci current_evmcs->hv_vp_id = kvm_hv_get_vpindex(vcpu); 738462306a36Sopenharmony_ci } 738562306a36Sopenharmony_ci 738662306a36Sopenharmony_ci /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ 738762306a36Sopenharmony_ci if (vmx->host_debugctlmsr) 738862306a36Sopenharmony_ci update_debugctlmsr(vmx->host_debugctlmsr); 738962306a36Sopenharmony_ci 739062306a36Sopenharmony_ci#ifndef CONFIG_X86_64 739162306a36Sopenharmony_ci /* 739262306a36Sopenharmony_ci * The sysexit path does not restore ds/es, so we must set them to 739362306a36Sopenharmony_ci * a reasonable value ourselves. 739462306a36Sopenharmony_ci * 739562306a36Sopenharmony_ci * We can't defer this to vmx_prepare_switch_to_host() since that 739662306a36Sopenharmony_ci * function may be executed in interrupt context, which saves and 739762306a36Sopenharmony_ci * restore segments around it, nullifying its effect. 739862306a36Sopenharmony_ci */ 739962306a36Sopenharmony_ci loadsegment(ds, __USER_DS); 740062306a36Sopenharmony_ci loadsegment(es, __USER_DS); 740162306a36Sopenharmony_ci#endif 740262306a36Sopenharmony_ci 740362306a36Sopenharmony_ci pt_guest_exit(vmx); 740462306a36Sopenharmony_ci 740562306a36Sopenharmony_ci kvm_load_host_xsave_state(vcpu); 740662306a36Sopenharmony_ci 740762306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 740862306a36Sopenharmony_ci /* 740962306a36Sopenharmony_ci * Track VMLAUNCH/VMRESUME that have made past guest state 741062306a36Sopenharmony_ci * checking. 741162306a36Sopenharmony_ci */ 741262306a36Sopenharmony_ci if (vmx->nested.nested_run_pending && 741362306a36Sopenharmony_ci !vmx->exit_reason.failed_vmentry) 741462306a36Sopenharmony_ci ++vcpu->stat.nested_run; 741562306a36Sopenharmony_ci 741662306a36Sopenharmony_ci vmx->nested.nested_run_pending = 0; 741762306a36Sopenharmony_ci } 741862306a36Sopenharmony_ci 741962306a36Sopenharmony_ci if (unlikely(vmx->fail)) 742062306a36Sopenharmony_ci return EXIT_FASTPATH_NONE; 742162306a36Sopenharmony_ci 742262306a36Sopenharmony_ci if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY)) 742362306a36Sopenharmony_ci kvm_machine_check(); 742462306a36Sopenharmony_ci 742562306a36Sopenharmony_ci trace_kvm_exit(vcpu, KVM_ISA_VMX); 742662306a36Sopenharmony_ci 742762306a36Sopenharmony_ci if (unlikely(vmx->exit_reason.failed_vmentry)) 742862306a36Sopenharmony_ci return EXIT_FASTPATH_NONE; 742962306a36Sopenharmony_ci 743062306a36Sopenharmony_ci vmx->loaded_vmcs->launched = 1; 743162306a36Sopenharmony_ci 743262306a36Sopenharmony_ci vmx_recover_nmi_blocking(vmx); 743362306a36Sopenharmony_ci vmx_complete_interrupts(vmx); 743462306a36Sopenharmony_ci 743562306a36Sopenharmony_ci if (is_guest_mode(vcpu)) 743662306a36Sopenharmony_ci return EXIT_FASTPATH_NONE; 743762306a36Sopenharmony_ci 743862306a36Sopenharmony_ci return vmx_exit_handlers_fastpath(vcpu); 743962306a36Sopenharmony_ci} 744062306a36Sopenharmony_ci 744162306a36Sopenharmony_cistatic void vmx_vcpu_free(struct kvm_vcpu *vcpu) 744262306a36Sopenharmony_ci{ 744362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 744462306a36Sopenharmony_ci 744562306a36Sopenharmony_ci if (enable_pml) 744662306a36Sopenharmony_ci vmx_destroy_pml_buffer(vmx); 744762306a36Sopenharmony_ci free_vpid(vmx->vpid); 744862306a36Sopenharmony_ci nested_vmx_free_vcpu(vcpu); 744962306a36Sopenharmony_ci free_loaded_vmcs(vmx->loaded_vmcs); 745062306a36Sopenharmony_ci} 745162306a36Sopenharmony_ci 745262306a36Sopenharmony_cistatic int vmx_vcpu_create(struct kvm_vcpu *vcpu) 745362306a36Sopenharmony_ci{ 745462306a36Sopenharmony_ci struct vmx_uret_msr *tsx_ctrl; 745562306a36Sopenharmony_ci struct vcpu_vmx *vmx; 745662306a36Sopenharmony_ci int i, err; 745762306a36Sopenharmony_ci 745862306a36Sopenharmony_ci BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); 745962306a36Sopenharmony_ci vmx = to_vmx(vcpu); 746062306a36Sopenharmony_ci 746162306a36Sopenharmony_ci INIT_LIST_HEAD(&vmx->pi_wakeup_list); 746262306a36Sopenharmony_ci 746362306a36Sopenharmony_ci err = -ENOMEM; 746462306a36Sopenharmony_ci 746562306a36Sopenharmony_ci vmx->vpid = allocate_vpid(); 746662306a36Sopenharmony_ci 746762306a36Sopenharmony_ci /* 746862306a36Sopenharmony_ci * If PML is turned on, failure on enabling PML just results in failure 746962306a36Sopenharmony_ci * of creating the vcpu, therefore we can simplify PML logic (by 747062306a36Sopenharmony_ci * avoiding dealing with cases, such as enabling PML partially on vcpus 747162306a36Sopenharmony_ci * for the guest), etc. 747262306a36Sopenharmony_ci */ 747362306a36Sopenharmony_ci if (enable_pml) { 747462306a36Sopenharmony_ci vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 747562306a36Sopenharmony_ci if (!vmx->pml_pg) 747662306a36Sopenharmony_ci goto free_vpid; 747762306a36Sopenharmony_ci } 747862306a36Sopenharmony_ci 747962306a36Sopenharmony_ci for (i = 0; i < kvm_nr_uret_msrs; ++i) 748062306a36Sopenharmony_ci vmx->guest_uret_msrs[i].mask = -1ull; 748162306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_RTM)) { 748262306a36Sopenharmony_ci /* 748362306a36Sopenharmony_ci * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception. 748462306a36Sopenharmony_ci * Keep the host value unchanged to avoid changing CPUID bits 748562306a36Sopenharmony_ci * under the host kernel's feet. 748662306a36Sopenharmony_ci */ 748762306a36Sopenharmony_ci tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); 748862306a36Sopenharmony_ci if (tsx_ctrl) 748962306a36Sopenharmony_ci tsx_ctrl->mask = ~(u64)TSX_CTRL_CPUID_CLEAR; 749062306a36Sopenharmony_ci } 749162306a36Sopenharmony_ci 749262306a36Sopenharmony_ci err = alloc_loaded_vmcs(&vmx->vmcs01); 749362306a36Sopenharmony_ci if (err < 0) 749462306a36Sopenharmony_ci goto free_pml; 749562306a36Sopenharmony_ci 749662306a36Sopenharmony_ci /* 749762306a36Sopenharmony_ci * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a 749862306a36Sopenharmony_ci * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the 749962306a36Sopenharmony_ci * feature only for vmcs01, KVM currently isn't equipped to realize any 750062306a36Sopenharmony_ci * performance benefits from enabling it for vmcs02. 750162306a36Sopenharmony_ci */ 750262306a36Sopenharmony_ci if (kvm_is_using_evmcs() && 750362306a36Sopenharmony_ci (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { 750462306a36Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs; 750562306a36Sopenharmony_ci 750662306a36Sopenharmony_ci evmcs->hv_enlightenments_control.msr_bitmap = 1; 750762306a36Sopenharmony_ci } 750862306a36Sopenharmony_ci 750962306a36Sopenharmony_ci /* The MSR bitmap starts with all ones */ 751062306a36Sopenharmony_ci bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); 751162306a36Sopenharmony_ci bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); 751262306a36Sopenharmony_ci 751362306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); 751462306a36Sopenharmony_ci#ifdef CONFIG_X86_64 751562306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); 751662306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); 751762306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); 751862306a36Sopenharmony_ci#endif 751962306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); 752062306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); 752162306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); 752262306a36Sopenharmony_ci if (kvm_cstate_in_guest(vcpu->kvm)) { 752362306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); 752462306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); 752562306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); 752662306a36Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); 752762306a36Sopenharmony_ci } 752862306a36Sopenharmony_ci 752962306a36Sopenharmony_ci vmx->loaded_vmcs = &vmx->vmcs01; 753062306a36Sopenharmony_ci 753162306a36Sopenharmony_ci if (cpu_need_virtualize_apic_accesses(vcpu)) { 753262306a36Sopenharmony_ci err = kvm_alloc_apic_access_page(vcpu->kvm); 753362306a36Sopenharmony_ci if (err) 753462306a36Sopenharmony_ci goto free_vmcs; 753562306a36Sopenharmony_ci } 753662306a36Sopenharmony_ci 753762306a36Sopenharmony_ci if (enable_ept && !enable_unrestricted_guest) { 753862306a36Sopenharmony_ci err = init_rmode_identity_map(vcpu->kvm); 753962306a36Sopenharmony_ci if (err) 754062306a36Sopenharmony_ci goto free_vmcs; 754162306a36Sopenharmony_ci } 754262306a36Sopenharmony_ci 754362306a36Sopenharmony_ci if (vmx_can_use_ipiv(vcpu)) 754462306a36Sopenharmony_ci WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id], 754562306a36Sopenharmony_ci __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID); 754662306a36Sopenharmony_ci 754762306a36Sopenharmony_ci return 0; 754862306a36Sopenharmony_ci 754962306a36Sopenharmony_cifree_vmcs: 755062306a36Sopenharmony_ci free_loaded_vmcs(vmx->loaded_vmcs); 755162306a36Sopenharmony_cifree_pml: 755262306a36Sopenharmony_ci vmx_destroy_pml_buffer(vmx); 755362306a36Sopenharmony_cifree_vpid: 755462306a36Sopenharmony_ci free_vpid(vmx->vpid); 755562306a36Sopenharmony_ci return err; 755662306a36Sopenharmony_ci} 755762306a36Sopenharmony_ci 755862306a36Sopenharmony_ci#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" 755962306a36Sopenharmony_ci#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" 756062306a36Sopenharmony_ci 756162306a36Sopenharmony_cistatic int vmx_vm_init(struct kvm *kvm) 756262306a36Sopenharmony_ci{ 756362306a36Sopenharmony_ci if (!ple_gap) 756462306a36Sopenharmony_ci kvm->arch.pause_in_guest = true; 756562306a36Sopenharmony_ci 756662306a36Sopenharmony_ci if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { 756762306a36Sopenharmony_ci switch (l1tf_mitigation) { 756862306a36Sopenharmony_ci case L1TF_MITIGATION_OFF: 756962306a36Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOWARN: 757062306a36Sopenharmony_ci /* 'I explicitly don't care' is set */ 757162306a36Sopenharmony_ci break; 757262306a36Sopenharmony_ci case L1TF_MITIGATION_FLUSH: 757362306a36Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOSMT: 757462306a36Sopenharmony_ci case L1TF_MITIGATION_FULL: 757562306a36Sopenharmony_ci /* 757662306a36Sopenharmony_ci * Warn upon starting the first VM in a potentially 757762306a36Sopenharmony_ci * insecure environment. 757862306a36Sopenharmony_ci */ 757962306a36Sopenharmony_ci if (sched_smt_active()) 758062306a36Sopenharmony_ci pr_warn_once(L1TF_MSG_SMT); 758162306a36Sopenharmony_ci if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) 758262306a36Sopenharmony_ci pr_warn_once(L1TF_MSG_L1D); 758362306a36Sopenharmony_ci break; 758462306a36Sopenharmony_ci case L1TF_MITIGATION_FULL_FORCE: 758562306a36Sopenharmony_ci /* Flush is enforced */ 758662306a36Sopenharmony_ci break; 758762306a36Sopenharmony_ci } 758862306a36Sopenharmony_ci } 758962306a36Sopenharmony_ci return 0; 759062306a36Sopenharmony_ci} 759162306a36Sopenharmony_ci 759262306a36Sopenharmony_cistatic u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) 759362306a36Sopenharmony_ci{ 759462306a36Sopenharmony_ci u8 cache; 759562306a36Sopenharmony_ci 759662306a36Sopenharmony_ci /* We wanted to honor guest CD/MTRR/PAT, but doing so could result in 759762306a36Sopenharmony_ci * memory aliases with conflicting memory types and sometimes MCEs. 759862306a36Sopenharmony_ci * We have to be careful as to what are honored and when. 759962306a36Sopenharmony_ci * 760062306a36Sopenharmony_ci * For MMIO, guest CD/MTRR are ignored. The EPT memory type is set to 760162306a36Sopenharmony_ci * UC. The effective memory type is UC or WC depending on guest PAT. 760262306a36Sopenharmony_ci * This was historically the source of MCEs and we want to be 760362306a36Sopenharmony_ci * conservative. 760462306a36Sopenharmony_ci * 760562306a36Sopenharmony_ci * When there is no need to deal with noncoherent DMA (e.g., no VT-d 760662306a36Sopenharmony_ci * or VT-d has snoop control), guest CD/MTRR/PAT are all ignored. The 760762306a36Sopenharmony_ci * EPT memory type is set to WB. The effective memory type is forced 760862306a36Sopenharmony_ci * WB. 760962306a36Sopenharmony_ci * 761062306a36Sopenharmony_ci * Otherwise, we trust guest. Guest CD/MTRR/PAT are all honored. The 761162306a36Sopenharmony_ci * EPT memory type is used to emulate guest CD/MTRR. 761262306a36Sopenharmony_ci */ 761362306a36Sopenharmony_ci 761462306a36Sopenharmony_ci if (is_mmio) 761562306a36Sopenharmony_ci return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; 761662306a36Sopenharmony_ci 761762306a36Sopenharmony_ci if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) 761862306a36Sopenharmony_ci return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; 761962306a36Sopenharmony_ci 762062306a36Sopenharmony_ci if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) { 762162306a36Sopenharmony_ci if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) 762262306a36Sopenharmony_ci cache = MTRR_TYPE_WRBACK; 762362306a36Sopenharmony_ci else 762462306a36Sopenharmony_ci cache = MTRR_TYPE_UNCACHABLE; 762562306a36Sopenharmony_ci 762662306a36Sopenharmony_ci return (cache << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; 762762306a36Sopenharmony_ci } 762862306a36Sopenharmony_ci 762962306a36Sopenharmony_ci return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT; 763062306a36Sopenharmony_ci} 763162306a36Sopenharmony_ci 763262306a36Sopenharmony_cistatic void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl) 763362306a36Sopenharmony_ci{ 763462306a36Sopenharmony_ci /* 763562306a36Sopenharmony_ci * These bits in the secondary execution controls field 763662306a36Sopenharmony_ci * are dynamic, the others are mostly based on the hypervisor 763762306a36Sopenharmony_ci * architecture and the guest's CPUID. Do not touch the 763862306a36Sopenharmony_ci * dynamic bits. 763962306a36Sopenharmony_ci */ 764062306a36Sopenharmony_ci u32 mask = 764162306a36Sopenharmony_ci SECONDARY_EXEC_SHADOW_VMCS | 764262306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 764362306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 764462306a36Sopenharmony_ci SECONDARY_EXEC_DESC; 764562306a36Sopenharmony_ci 764662306a36Sopenharmony_ci u32 cur_ctl = secondary_exec_controls_get(vmx); 764762306a36Sopenharmony_ci 764862306a36Sopenharmony_ci secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); 764962306a36Sopenharmony_ci} 765062306a36Sopenharmony_ci 765162306a36Sopenharmony_ci/* 765262306a36Sopenharmony_ci * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits 765362306a36Sopenharmony_ci * (indicating "allowed-1") if they are supported in the guest's CPUID. 765462306a36Sopenharmony_ci */ 765562306a36Sopenharmony_cistatic void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) 765662306a36Sopenharmony_ci{ 765762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 765862306a36Sopenharmony_ci struct kvm_cpuid_entry2 *entry; 765962306a36Sopenharmony_ci 766062306a36Sopenharmony_ci vmx->nested.msrs.cr0_fixed1 = 0xffffffff; 766162306a36Sopenharmony_ci vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; 766262306a36Sopenharmony_ci 766362306a36Sopenharmony_ci#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ 766462306a36Sopenharmony_ci if (entry && (entry->_reg & (_cpuid_mask))) \ 766562306a36Sopenharmony_ci vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ 766662306a36Sopenharmony_ci} while (0) 766762306a36Sopenharmony_ci 766862306a36Sopenharmony_ci entry = kvm_find_cpuid_entry(vcpu, 0x1); 766962306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME)); 767062306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME)); 767162306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC)); 767262306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE)); 767362306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE)); 767462306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE)); 767562306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE)); 767662306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE)); 767762306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR)); 767862306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM)); 767962306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX)); 768062306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX)); 768162306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID)); 768262306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE)); 768362306a36Sopenharmony_ci 768462306a36Sopenharmony_ci entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 0); 768562306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE)); 768662306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP)); 768762306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP)); 768862306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); 768962306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); 769062306a36Sopenharmony_ci cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); 769162306a36Sopenharmony_ci 769262306a36Sopenharmony_ci#undef cr4_fixed1_update 769362306a36Sopenharmony_ci} 769462306a36Sopenharmony_ci 769562306a36Sopenharmony_cistatic void update_intel_pt_cfg(struct kvm_vcpu *vcpu) 769662306a36Sopenharmony_ci{ 769762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 769862306a36Sopenharmony_ci struct kvm_cpuid_entry2 *best = NULL; 769962306a36Sopenharmony_ci int i; 770062306a36Sopenharmony_ci 770162306a36Sopenharmony_ci for (i = 0; i < PT_CPUID_LEAVES; i++) { 770262306a36Sopenharmony_ci best = kvm_find_cpuid_entry_index(vcpu, 0x14, i); 770362306a36Sopenharmony_ci if (!best) 770462306a36Sopenharmony_ci return; 770562306a36Sopenharmony_ci vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax; 770662306a36Sopenharmony_ci vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx; 770762306a36Sopenharmony_ci vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx; 770862306a36Sopenharmony_ci vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx; 770962306a36Sopenharmony_ci } 771062306a36Sopenharmony_ci 771162306a36Sopenharmony_ci /* Get the number of configurable Address Ranges for filtering */ 771262306a36Sopenharmony_ci vmx->pt_desc.num_address_ranges = intel_pt_validate_cap(vmx->pt_desc.caps, 771362306a36Sopenharmony_ci PT_CAP_num_address_ranges); 771462306a36Sopenharmony_ci 771562306a36Sopenharmony_ci /* Initialize and clear the no dependency bits */ 771662306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS | 771762306a36Sopenharmony_ci RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC | 771862306a36Sopenharmony_ci RTIT_CTL_BRANCH_EN); 771962306a36Sopenharmony_ci 772062306a36Sopenharmony_ci /* 772162306a36Sopenharmony_ci * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise 772262306a36Sopenharmony_ci * will inject an #GP 772362306a36Sopenharmony_ci */ 772462306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering)) 772562306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN; 772662306a36Sopenharmony_ci 772762306a36Sopenharmony_ci /* 772862306a36Sopenharmony_ci * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and 772962306a36Sopenharmony_ci * PSBFreq can be set 773062306a36Sopenharmony_ci */ 773162306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc)) 773262306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC | 773362306a36Sopenharmony_ci RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ); 773462306a36Sopenharmony_ci 773562306a36Sopenharmony_ci /* 773662306a36Sopenharmony_ci * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn and MTCFreq can be set 773762306a36Sopenharmony_ci */ 773862306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc)) 773962306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN | 774062306a36Sopenharmony_ci RTIT_CTL_MTC_RANGE); 774162306a36Sopenharmony_ci 774262306a36Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */ 774362306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite)) 774462306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW | 774562306a36Sopenharmony_ci RTIT_CTL_PTW_EN); 774662306a36Sopenharmony_ci 774762306a36Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */ 774862306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace)) 774962306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN; 775062306a36Sopenharmony_ci 775162306a36Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */ 775262306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output)) 775362306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA; 775462306a36Sopenharmony_ci 775562306a36Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabricEn can be set */ 775662306a36Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys)) 775762306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN; 775862306a36Sopenharmony_ci 775962306a36Sopenharmony_ci /* unmask address range configure area */ 776062306a36Sopenharmony_ci for (i = 0; i < vmx->pt_desc.num_address_ranges; i++) 776162306a36Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); 776262306a36Sopenharmony_ci} 776362306a36Sopenharmony_ci 776462306a36Sopenharmony_cistatic void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) 776562306a36Sopenharmony_ci{ 776662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 776762306a36Sopenharmony_ci 776862306a36Sopenharmony_ci /* 776962306a36Sopenharmony_ci * XSAVES is effectively enabled if and only if XSAVE is also exposed 777062306a36Sopenharmony_ci * to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be 777162306a36Sopenharmony_ci * set if and only if XSAVE is supported. 777262306a36Sopenharmony_ci */ 777362306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_XSAVE) && 777462306a36Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) 777562306a36Sopenharmony_ci kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES); 777662306a36Sopenharmony_ci 777762306a36Sopenharmony_ci kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX); 777862306a36Sopenharmony_ci 777962306a36Sopenharmony_ci vmx_setup_uret_msrs(vmx); 778062306a36Sopenharmony_ci 778162306a36Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) 778262306a36Sopenharmony_ci vmcs_set_secondary_exec_control(vmx, 778362306a36Sopenharmony_ci vmx_secondary_exec_control(vmx)); 778462306a36Sopenharmony_ci 778562306a36Sopenharmony_ci if (guest_can_use(vcpu, X86_FEATURE_VMX)) 778662306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits |= 778762306a36Sopenharmony_ci FEAT_CTL_VMX_ENABLED_INSIDE_SMX | 778862306a36Sopenharmony_ci FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; 778962306a36Sopenharmony_ci else 779062306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits &= 779162306a36Sopenharmony_ci ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | 779262306a36Sopenharmony_ci FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); 779362306a36Sopenharmony_ci 779462306a36Sopenharmony_ci if (guest_can_use(vcpu, X86_FEATURE_VMX)) 779562306a36Sopenharmony_ci nested_vmx_cr_fixed1_bits_update(vcpu); 779662306a36Sopenharmony_ci 779762306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_INTEL_PT) && 779862306a36Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) 779962306a36Sopenharmony_ci update_intel_pt_cfg(vcpu); 780062306a36Sopenharmony_ci 780162306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_RTM)) { 780262306a36Sopenharmony_ci struct vmx_uret_msr *msr; 780362306a36Sopenharmony_ci msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); 780462306a36Sopenharmony_ci if (msr) { 780562306a36Sopenharmony_ci bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); 780662306a36Sopenharmony_ci vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); 780762306a36Sopenharmony_ci } 780862306a36Sopenharmony_ci } 780962306a36Sopenharmony_ci 781062306a36Sopenharmony_ci if (kvm_cpu_cap_has(X86_FEATURE_XFD)) 781162306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, 781262306a36Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_XFD)); 781362306a36Sopenharmony_ci 781462306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_IBPB)) 781562306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W, 781662306a36Sopenharmony_ci !guest_has_pred_cmd_msr(vcpu)); 781762306a36Sopenharmony_ci 781862306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_FLUSH_L1D)) 781962306a36Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, 782062306a36Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); 782162306a36Sopenharmony_ci 782262306a36Sopenharmony_ci set_cr4_guest_host_mask(vmx); 782362306a36Sopenharmony_ci 782462306a36Sopenharmony_ci vmx_write_encls_bitmap(vcpu, NULL); 782562306a36Sopenharmony_ci if (guest_cpuid_has(vcpu, X86_FEATURE_SGX)) 782662306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED; 782762306a36Sopenharmony_ci else 782862306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED; 782962306a36Sopenharmony_ci 783062306a36Sopenharmony_ci if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC)) 783162306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits |= 783262306a36Sopenharmony_ci FEAT_CTL_SGX_LC_ENABLED; 783362306a36Sopenharmony_ci else 783462306a36Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits &= 783562306a36Sopenharmony_ci ~FEAT_CTL_SGX_LC_ENABLED; 783662306a36Sopenharmony_ci 783762306a36Sopenharmony_ci /* Refresh #PF interception to account for MAXPHYADDR changes. */ 783862306a36Sopenharmony_ci vmx_update_exception_bitmap(vcpu); 783962306a36Sopenharmony_ci} 784062306a36Sopenharmony_ci 784162306a36Sopenharmony_cistatic u64 vmx_get_perf_capabilities(void) 784262306a36Sopenharmony_ci{ 784362306a36Sopenharmony_ci u64 perf_cap = PMU_CAP_FW_WRITES; 784462306a36Sopenharmony_ci struct x86_pmu_lbr lbr; 784562306a36Sopenharmony_ci u64 host_perf_cap = 0; 784662306a36Sopenharmony_ci 784762306a36Sopenharmony_ci if (!enable_pmu) 784862306a36Sopenharmony_ci return 0; 784962306a36Sopenharmony_ci 785062306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_PDCM)) 785162306a36Sopenharmony_ci rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); 785262306a36Sopenharmony_ci 785362306a36Sopenharmony_ci if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { 785462306a36Sopenharmony_ci x86_perf_get_lbr(&lbr); 785562306a36Sopenharmony_ci if (lbr.nr) 785662306a36Sopenharmony_ci perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; 785762306a36Sopenharmony_ci } 785862306a36Sopenharmony_ci 785962306a36Sopenharmony_ci if (vmx_pebs_supported()) { 786062306a36Sopenharmony_ci perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; 786162306a36Sopenharmony_ci if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) 786262306a36Sopenharmony_ci perf_cap &= ~PERF_CAP_PEBS_BASELINE; 786362306a36Sopenharmony_ci } 786462306a36Sopenharmony_ci 786562306a36Sopenharmony_ci return perf_cap; 786662306a36Sopenharmony_ci} 786762306a36Sopenharmony_ci 786862306a36Sopenharmony_cistatic __init void vmx_set_cpu_caps(void) 786962306a36Sopenharmony_ci{ 787062306a36Sopenharmony_ci kvm_set_cpu_caps(); 787162306a36Sopenharmony_ci 787262306a36Sopenharmony_ci /* CPUID 0x1 */ 787362306a36Sopenharmony_ci if (nested) 787462306a36Sopenharmony_ci kvm_cpu_cap_set(X86_FEATURE_VMX); 787562306a36Sopenharmony_ci 787662306a36Sopenharmony_ci /* CPUID 0x7 */ 787762306a36Sopenharmony_ci if (kvm_mpx_supported()) 787862306a36Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_MPX); 787962306a36Sopenharmony_ci if (!cpu_has_vmx_invpcid()) 788062306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_INVPCID); 788162306a36Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) 788262306a36Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT); 788362306a36Sopenharmony_ci if (vmx_pebs_supported()) { 788462306a36Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_DS); 788562306a36Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_DTES64); 788662306a36Sopenharmony_ci } 788762306a36Sopenharmony_ci 788862306a36Sopenharmony_ci if (!enable_pmu) 788962306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_PDCM); 789062306a36Sopenharmony_ci kvm_caps.supported_perf_cap = vmx_get_perf_capabilities(); 789162306a36Sopenharmony_ci 789262306a36Sopenharmony_ci if (!enable_sgx) { 789362306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_SGX); 789462306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_SGX_LC); 789562306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_SGX1); 789662306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_SGX2); 789762306a36Sopenharmony_ci } 789862306a36Sopenharmony_ci 789962306a36Sopenharmony_ci if (vmx_umip_emulated()) 790062306a36Sopenharmony_ci kvm_cpu_cap_set(X86_FEATURE_UMIP); 790162306a36Sopenharmony_ci 790262306a36Sopenharmony_ci /* CPUID 0xD.1 */ 790362306a36Sopenharmony_ci kvm_caps.supported_xss = 0; 790462306a36Sopenharmony_ci if (!cpu_has_vmx_xsaves()) 790562306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_XSAVES); 790662306a36Sopenharmony_ci 790762306a36Sopenharmony_ci /* CPUID 0x80000001 and 0x7 (RDPID) */ 790862306a36Sopenharmony_ci if (!cpu_has_vmx_rdtscp()) { 790962306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); 791062306a36Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_RDPID); 791162306a36Sopenharmony_ci } 791262306a36Sopenharmony_ci 791362306a36Sopenharmony_ci if (cpu_has_vmx_waitpkg()) 791462306a36Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); 791562306a36Sopenharmony_ci} 791662306a36Sopenharmony_ci 791762306a36Sopenharmony_cistatic void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) 791862306a36Sopenharmony_ci{ 791962306a36Sopenharmony_ci to_vmx(vcpu)->req_immediate_exit = true; 792062306a36Sopenharmony_ci} 792162306a36Sopenharmony_ci 792262306a36Sopenharmony_cistatic int vmx_check_intercept_io(struct kvm_vcpu *vcpu, 792362306a36Sopenharmony_ci struct x86_instruction_info *info) 792462306a36Sopenharmony_ci{ 792562306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 792662306a36Sopenharmony_ci unsigned short port; 792762306a36Sopenharmony_ci bool intercept; 792862306a36Sopenharmony_ci int size; 792962306a36Sopenharmony_ci 793062306a36Sopenharmony_ci if (info->intercept == x86_intercept_in || 793162306a36Sopenharmony_ci info->intercept == x86_intercept_ins) { 793262306a36Sopenharmony_ci port = info->src_val; 793362306a36Sopenharmony_ci size = info->dst_bytes; 793462306a36Sopenharmony_ci } else { 793562306a36Sopenharmony_ci port = info->dst_val; 793662306a36Sopenharmony_ci size = info->src_bytes; 793762306a36Sopenharmony_ci } 793862306a36Sopenharmony_ci 793962306a36Sopenharmony_ci /* 794062306a36Sopenharmony_ci * If the 'use IO bitmaps' VM-execution control is 0, IO instruction 794162306a36Sopenharmony_ci * VM-exits depend on the 'unconditional IO exiting' VM-execution 794262306a36Sopenharmony_ci * control. 794362306a36Sopenharmony_ci * 794462306a36Sopenharmony_ci * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. 794562306a36Sopenharmony_ci */ 794662306a36Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 794762306a36Sopenharmony_ci intercept = nested_cpu_has(vmcs12, 794862306a36Sopenharmony_ci CPU_BASED_UNCOND_IO_EXITING); 794962306a36Sopenharmony_ci else 795062306a36Sopenharmony_ci intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); 795162306a36Sopenharmony_ci 795262306a36Sopenharmony_ci /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ 795362306a36Sopenharmony_ci return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; 795462306a36Sopenharmony_ci} 795562306a36Sopenharmony_ci 795662306a36Sopenharmony_cistatic int vmx_check_intercept(struct kvm_vcpu *vcpu, 795762306a36Sopenharmony_ci struct x86_instruction_info *info, 795862306a36Sopenharmony_ci enum x86_intercept_stage stage, 795962306a36Sopenharmony_ci struct x86_exception *exception) 796062306a36Sopenharmony_ci{ 796162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 796262306a36Sopenharmony_ci 796362306a36Sopenharmony_ci switch (info->intercept) { 796462306a36Sopenharmony_ci /* 796562306a36Sopenharmony_ci * RDPID causes #UD if disabled through secondary execution controls. 796662306a36Sopenharmony_ci * Because it is marked as EmulateOnUD, we need to intercept it here. 796762306a36Sopenharmony_ci * Note, RDPID is hidden behind ENABLE_RDTSCP. 796862306a36Sopenharmony_ci */ 796962306a36Sopenharmony_ci case x86_intercept_rdpid: 797062306a36Sopenharmony_ci if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) { 797162306a36Sopenharmony_ci exception->vector = UD_VECTOR; 797262306a36Sopenharmony_ci exception->error_code_valid = false; 797362306a36Sopenharmony_ci return X86EMUL_PROPAGATE_FAULT; 797462306a36Sopenharmony_ci } 797562306a36Sopenharmony_ci break; 797662306a36Sopenharmony_ci 797762306a36Sopenharmony_ci case x86_intercept_in: 797862306a36Sopenharmony_ci case x86_intercept_ins: 797962306a36Sopenharmony_ci case x86_intercept_out: 798062306a36Sopenharmony_ci case x86_intercept_outs: 798162306a36Sopenharmony_ci return vmx_check_intercept_io(vcpu, info); 798262306a36Sopenharmony_ci 798362306a36Sopenharmony_ci case x86_intercept_lgdt: 798462306a36Sopenharmony_ci case x86_intercept_lidt: 798562306a36Sopenharmony_ci case x86_intercept_lldt: 798662306a36Sopenharmony_ci case x86_intercept_ltr: 798762306a36Sopenharmony_ci case x86_intercept_sgdt: 798862306a36Sopenharmony_ci case x86_intercept_sidt: 798962306a36Sopenharmony_ci case x86_intercept_sldt: 799062306a36Sopenharmony_ci case x86_intercept_str: 799162306a36Sopenharmony_ci if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) 799262306a36Sopenharmony_ci return X86EMUL_CONTINUE; 799362306a36Sopenharmony_ci 799462306a36Sopenharmony_ci /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ 799562306a36Sopenharmony_ci break; 799662306a36Sopenharmony_ci 799762306a36Sopenharmony_ci case x86_intercept_pause: 799862306a36Sopenharmony_ci /* 799962306a36Sopenharmony_ci * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides 800062306a36Sopenharmony_ci * with vanilla NOPs in the emulator. Apply the interception 800162306a36Sopenharmony_ci * check only to actual PAUSE instructions. Don't check 800262306a36Sopenharmony_ci * PAUSE-loop-exiting, software can't expect a given PAUSE to 800362306a36Sopenharmony_ci * exit, i.e. KVM is within its rights to allow L2 to execute 800462306a36Sopenharmony_ci * the PAUSE. 800562306a36Sopenharmony_ci */ 800662306a36Sopenharmony_ci if ((info->rep_prefix != REPE_PREFIX) || 800762306a36Sopenharmony_ci !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING)) 800862306a36Sopenharmony_ci return X86EMUL_CONTINUE; 800962306a36Sopenharmony_ci 801062306a36Sopenharmony_ci break; 801162306a36Sopenharmony_ci 801262306a36Sopenharmony_ci /* TODO: check more intercepts... */ 801362306a36Sopenharmony_ci default: 801462306a36Sopenharmony_ci break; 801562306a36Sopenharmony_ci } 801662306a36Sopenharmony_ci 801762306a36Sopenharmony_ci return X86EMUL_UNHANDLEABLE; 801862306a36Sopenharmony_ci} 801962306a36Sopenharmony_ci 802062306a36Sopenharmony_ci#ifdef CONFIG_X86_64 802162306a36Sopenharmony_ci/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ 802262306a36Sopenharmony_cistatic inline int u64_shl_div_u64(u64 a, unsigned int shift, 802362306a36Sopenharmony_ci u64 divisor, u64 *result) 802462306a36Sopenharmony_ci{ 802562306a36Sopenharmony_ci u64 low = a << shift, high = a >> (64 - shift); 802662306a36Sopenharmony_ci 802762306a36Sopenharmony_ci /* To avoid the overflow on divq */ 802862306a36Sopenharmony_ci if (high >= divisor) 802962306a36Sopenharmony_ci return 1; 803062306a36Sopenharmony_ci 803162306a36Sopenharmony_ci /* Low hold the result, high hold rem which is discarded */ 803262306a36Sopenharmony_ci asm("divq %2\n\t" : "=a" (low), "=d" (high) : 803362306a36Sopenharmony_ci "rm" (divisor), "0" (low), "1" (high)); 803462306a36Sopenharmony_ci *result = low; 803562306a36Sopenharmony_ci 803662306a36Sopenharmony_ci return 0; 803762306a36Sopenharmony_ci} 803862306a36Sopenharmony_ci 803962306a36Sopenharmony_cistatic int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, 804062306a36Sopenharmony_ci bool *expired) 804162306a36Sopenharmony_ci{ 804262306a36Sopenharmony_ci struct vcpu_vmx *vmx; 804362306a36Sopenharmony_ci u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; 804462306a36Sopenharmony_ci struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; 804562306a36Sopenharmony_ci 804662306a36Sopenharmony_ci vmx = to_vmx(vcpu); 804762306a36Sopenharmony_ci tscl = rdtsc(); 804862306a36Sopenharmony_ci guest_tscl = kvm_read_l1_tsc(vcpu, tscl); 804962306a36Sopenharmony_ci delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; 805062306a36Sopenharmony_ci lapic_timer_advance_cycles = nsec_to_cycles(vcpu, 805162306a36Sopenharmony_ci ktimer->timer_advance_ns); 805262306a36Sopenharmony_ci 805362306a36Sopenharmony_ci if (delta_tsc > lapic_timer_advance_cycles) 805462306a36Sopenharmony_ci delta_tsc -= lapic_timer_advance_cycles; 805562306a36Sopenharmony_ci else 805662306a36Sopenharmony_ci delta_tsc = 0; 805762306a36Sopenharmony_ci 805862306a36Sopenharmony_ci /* Convert to host delta tsc if tsc scaling is enabled */ 805962306a36Sopenharmony_ci if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio && 806062306a36Sopenharmony_ci delta_tsc && u64_shl_div_u64(delta_tsc, 806162306a36Sopenharmony_ci kvm_caps.tsc_scaling_ratio_frac_bits, 806262306a36Sopenharmony_ci vcpu->arch.l1_tsc_scaling_ratio, &delta_tsc)) 806362306a36Sopenharmony_ci return -ERANGE; 806462306a36Sopenharmony_ci 806562306a36Sopenharmony_ci /* 806662306a36Sopenharmony_ci * If the delta tsc can't fit in the 32 bit after the multi shift, 806762306a36Sopenharmony_ci * we can't use the preemption timer. 806862306a36Sopenharmony_ci * It's possible that it fits on later vmentries, but checking 806962306a36Sopenharmony_ci * on every vmentry is costly so we just use an hrtimer. 807062306a36Sopenharmony_ci */ 807162306a36Sopenharmony_ci if (delta_tsc >> (cpu_preemption_timer_multi + 32)) 807262306a36Sopenharmony_ci return -ERANGE; 807362306a36Sopenharmony_ci 807462306a36Sopenharmony_ci vmx->hv_deadline_tsc = tscl + delta_tsc; 807562306a36Sopenharmony_ci *expired = !delta_tsc; 807662306a36Sopenharmony_ci return 0; 807762306a36Sopenharmony_ci} 807862306a36Sopenharmony_ci 807962306a36Sopenharmony_cistatic void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) 808062306a36Sopenharmony_ci{ 808162306a36Sopenharmony_ci to_vmx(vcpu)->hv_deadline_tsc = -1; 808262306a36Sopenharmony_ci} 808362306a36Sopenharmony_ci#endif 808462306a36Sopenharmony_ci 808562306a36Sopenharmony_cistatic void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) 808662306a36Sopenharmony_ci{ 808762306a36Sopenharmony_ci if (!kvm_pause_in_guest(vcpu->kvm)) 808862306a36Sopenharmony_ci shrink_ple_window(vcpu); 808962306a36Sopenharmony_ci} 809062306a36Sopenharmony_ci 809162306a36Sopenharmony_civoid vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) 809262306a36Sopenharmony_ci{ 809362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 809462306a36Sopenharmony_ci 809562306a36Sopenharmony_ci if (WARN_ON_ONCE(!enable_pml)) 809662306a36Sopenharmony_ci return; 809762306a36Sopenharmony_ci 809862306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 809962306a36Sopenharmony_ci vmx->nested.update_vmcs01_cpu_dirty_logging = true; 810062306a36Sopenharmony_ci return; 810162306a36Sopenharmony_ci } 810262306a36Sopenharmony_ci 810362306a36Sopenharmony_ci /* 810462306a36Sopenharmony_ci * Note, nr_memslots_dirty_logging can be changed concurrent with this 810562306a36Sopenharmony_ci * code, but in that case another update request will be made and so 810662306a36Sopenharmony_ci * the guest will never run with a stale PML value. 810762306a36Sopenharmony_ci */ 810862306a36Sopenharmony_ci if (atomic_read(&vcpu->kvm->nr_memslots_dirty_logging)) 810962306a36Sopenharmony_ci secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML); 811062306a36Sopenharmony_ci else 811162306a36Sopenharmony_ci secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML); 811262306a36Sopenharmony_ci} 811362306a36Sopenharmony_ci 811462306a36Sopenharmony_cistatic void vmx_setup_mce(struct kvm_vcpu *vcpu) 811562306a36Sopenharmony_ci{ 811662306a36Sopenharmony_ci if (vcpu->arch.mcg_cap & MCG_LMCE_P) 811762306a36Sopenharmony_ci to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= 811862306a36Sopenharmony_ci FEAT_CTL_LMCE_ENABLED; 811962306a36Sopenharmony_ci else 812062306a36Sopenharmony_ci to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= 812162306a36Sopenharmony_ci ~FEAT_CTL_LMCE_ENABLED; 812262306a36Sopenharmony_ci} 812362306a36Sopenharmony_ci 812462306a36Sopenharmony_ci#ifdef CONFIG_KVM_SMM 812562306a36Sopenharmony_cistatic int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 812662306a36Sopenharmony_ci{ 812762306a36Sopenharmony_ci /* we need a nested vmexit to enter SMM, postpone if run is pending */ 812862306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending) 812962306a36Sopenharmony_ci return -EBUSY; 813062306a36Sopenharmony_ci return !is_smm(vcpu); 813162306a36Sopenharmony_ci} 813262306a36Sopenharmony_ci 813362306a36Sopenharmony_cistatic int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 813462306a36Sopenharmony_ci{ 813562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 813662306a36Sopenharmony_ci 813762306a36Sopenharmony_ci /* 813862306a36Sopenharmony_ci * TODO: Implement custom flows for forcing the vCPU out/in of L2 on 813962306a36Sopenharmony_ci * SMI and RSM. Using the common VM-Exit + VM-Enter routines is wrong 814062306a36Sopenharmony_ci * SMI and RSM only modify state that is saved and restored via SMRAM. 814162306a36Sopenharmony_ci * E.g. most MSRs are left untouched, but many are modified by VM-Exit 814262306a36Sopenharmony_ci * and VM-Enter, and thus L2's values may be corrupted on SMI+RSM. 814362306a36Sopenharmony_ci */ 814462306a36Sopenharmony_ci vmx->nested.smm.guest_mode = is_guest_mode(vcpu); 814562306a36Sopenharmony_ci if (vmx->nested.smm.guest_mode) 814662306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, -1, 0, 0); 814762306a36Sopenharmony_ci 814862306a36Sopenharmony_ci vmx->nested.smm.vmxon = vmx->nested.vmxon; 814962306a36Sopenharmony_ci vmx->nested.vmxon = false; 815062306a36Sopenharmony_ci vmx_clear_hlt(vcpu); 815162306a36Sopenharmony_ci return 0; 815262306a36Sopenharmony_ci} 815362306a36Sopenharmony_ci 815462306a36Sopenharmony_cistatic int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 815562306a36Sopenharmony_ci{ 815662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 815762306a36Sopenharmony_ci int ret; 815862306a36Sopenharmony_ci 815962306a36Sopenharmony_ci if (vmx->nested.smm.vmxon) { 816062306a36Sopenharmony_ci vmx->nested.vmxon = true; 816162306a36Sopenharmony_ci vmx->nested.smm.vmxon = false; 816262306a36Sopenharmony_ci } 816362306a36Sopenharmony_ci 816462306a36Sopenharmony_ci if (vmx->nested.smm.guest_mode) { 816562306a36Sopenharmony_ci ret = nested_vmx_enter_non_root_mode(vcpu, false); 816662306a36Sopenharmony_ci if (ret) 816762306a36Sopenharmony_ci return ret; 816862306a36Sopenharmony_ci 816962306a36Sopenharmony_ci vmx->nested.nested_run_pending = 1; 817062306a36Sopenharmony_ci vmx->nested.smm.guest_mode = false; 817162306a36Sopenharmony_ci } 817262306a36Sopenharmony_ci return 0; 817362306a36Sopenharmony_ci} 817462306a36Sopenharmony_ci 817562306a36Sopenharmony_cistatic void vmx_enable_smi_window(struct kvm_vcpu *vcpu) 817662306a36Sopenharmony_ci{ 817762306a36Sopenharmony_ci /* RSM will cause a vmexit anyway. */ 817862306a36Sopenharmony_ci} 817962306a36Sopenharmony_ci#endif 818062306a36Sopenharmony_ci 818162306a36Sopenharmony_cistatic bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) 818262306a36Sopenharmony_ci{ 818362306a36Sopenharmony_ci return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); 818462306a36Sopenharmony_ci} 818562306a36Sopenharmony_ci 818662306a36Sopenharmony_cistatic void vmx_migrate_timers(struct kvm_vcpu *vcpu) 818762306a36Sopenharmony_ci{ 818862306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 818962306a36Sopenharmony_ci struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer; 819062306a36Sopenharmony_ci 819162306a36Sopenharmony_ci if (hrtimer_try_to_cancel(timer) == 1) 819262306a36Sopenharmony_ci hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); 819362306a36Sopenharmony_ci } 819462306a36Sopenharmony_ci} 819562306a36Sopenharmony_ci 819662306a36Sopenharmony_cistatic void vmx_hardware_unsetup(void) 819762306a36Sopenharmony_ci{ 819862306a36Sopenharmony_ci kvm_set_posted_intr_wakeup_handler(NULL); 819962306a36Sopenharmony_ci 820062306a36Sopenharmony_ci if (nested) 820162306a36Sopenharmony_ci nested_vmx_hardware_unsetup(); 820262306a36Sopenharmony_ci 820362306a36Sopenharmony_ci free_kvm_area(); 820462306a36Sopenharmony_ci} 820562306a36Sopenharmony_ci 820662306a36Sopenharmony_ci#define VMX_REQUIRED_APICV_INHIBITS \ 820762306a36Sopenharmony_ci( \ 820862306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_DISABLE)| \ 820962306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_ABSENT) | \ 821062306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_HYPERV) | \ 821162306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ 821262306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ 821362306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ 821462306a36Sopenharmony_ci BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) \ 821562306a36Sopenharmony_ci) 821662306a36Sopenharmony_ci 821762306a36Sopenharmony_cistatic void vmx_vm_destroy(struct kvm *kvm) 821862306a36Sopenharmony_ci{ 821962306a36Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); 822062306a36Sopenharmony_ci 822162306a36Sopenharmony_ci free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm)); 822262306a36Sopenharmony_ci} 822362306a36Sopenharmony_ci 822462306a36Sopenharmony_cistatic struct kvm_x86_ops vmx_x86_ops __initdata = { 822562306a36Sopenharmony_ci .name = KBUILD_MODNAME, 822662306a36Sopenharmony_ci 822762306a36Sopenharmony_ci .check_processor_compatibility = vmx_check_processor_compat, 822862306a36Sopenharmony_ci 822962306a36Sopenharmony_ci .hardware_unsetup = vmx_hardware_unsetup, 823062306a36Sopenharmony_ci 823162306a36Sopenharmony_ci .hardware_enable = vmx_hardware_enable, 823262306a36Sopenharmony_ci .hardware_disable = vmx_hardware_disable, 823362306a36Sopenharmony_ci .has_emulated_msr = vmx_has_emulated_msr, 823462306a36Sopenharmony_ci 823562306a36Sopenharmony_ci .vm_size = sizeof(struct kvm_vmx), 823662306a36Sopenharmony_ci .vm_init = vmx_vm_init, 823762306a36Sopenharmony_ci .vm_destroy = vmx_vm_destroy, 823862306a36Sopenharmony_ci 823962306a36Sopenharmony_ci .vcpu_precreate = vmx_vcpu_precreate, 824062306a36Sopenharmony_ci .vcpu_create = vmx_vcpu_create, 824162306a36Sopenharmony_ci .vcpu_free = vmx_vcpu_free, 824262306a36Sopenharmony_ci .vcpu_reset = vmx_vcpu_reset, 824362306a36Sopenharmony_ci 824462306a36Sopenharmony_ci .prepare_switch_to_guest = vmx_prepare_switch_to_guest, 824562306a36Sopenharmony_ci .vcpu_load = vmx_vcpu_load, 824662306a36Sopenharmony_ci .vcpu_put = vmx_vcpu_put, 824762306a36Sopenharmony_ci 824862306a36Sopenharmony_ci .update_exception_bitmap = vmx_update_exception_bitmap, 824962306a36Sopenharmony_ci .get_msr_feature = vmx_get_msr_feature, 825062306a36Sopenharmony_ci .get_msr = vmx_get_msr, 825162306a36Sopenharmony_ci .set_msr = vmx_set_msr, 825262306a36Sopenharmony_ci .get_segment_base = vmx_get_segment_base, 825362306a36Sopenharmony_ci .get_segment = vmx_get_segment, 825462306a36Sopenharmony_ci .set_segment = vmx_set_segment, 825562306a36Sopenharmony_ci .get_cpl = vmx_get_cpl, 825662306a36Sopenharmony_ci .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 825762306a36Sopenharmony_ci .is_valid_cr0 = vmx_is_valid_cr0, 825862306a36Sopenharmony_ci .set_cr0 = vmx_set_cr0, 825962306a36Sopenharmony_ci .is_valid_cr4 = vmx_is_valid_cr4, 826062306a36Sopenharmony_ci .set_cr4 = vmx_set_cr4, 826162306a36Sopenharmony_ci .set_efer = vmx_set_efer, 826262306a36Sopenharmony_ci .get_idt = vmx_get_idt, 826362306a36Sopenharmony_ci .set_idt = vmx_set_idt, 826462306a36Sopenharmony_ci .get_gdt = vmx_get_gdt, 826562306a36Sopenharmony_ci .set_gdt = vmx_set_gdt, 826662306a36Sopenharmony_ci .set_dr7 = vmx_set_dr7, 826762306a36Sopenharmony_ci .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, 826862306a36Sopenharmony_ci .cache_reg = vmx_cache_reg, 826962306a36Sopenharmony_ci .get_rflags = vmx_get_rflags, 827062306a36Sopenharmony_ci .set_rflags = vmx_set_rflags, 827162306a36Sopenharmony_ci .get_if_flag = vmx_get_if_flag, 827262306a36Sopenharmony_ci 827362306a36Sopenharmony_ci .flush_tlb_all = vmx_flush_tlb_all, 827462306a36Sopenharmony_ci .flush_tlb_current = vmx_flush_tlb_current, 827562306a36Sopenharmony_ci .flush_tlb_gva = vmx_flush_tlb_gva, 827662306a36Sopenharmony_ci .flush_tlb_guest = vmx_flush_tlb_guest, 827762306a36Sopenharmony_ci 827862306a36Sopenharmony_ci .vcpu_pre_run = vmx_vcpu_pre_run, 827962306a36Sopenharmony_ci .vcpu_run = vmx_vcpu_run, 828062306a36Sopenharmony_ci .handle_exit = vmx_handle_exit, 828162306a36Sopenharmony_ci .skip_emulated_instruction = vmx_skip_emulated_instruction, 828262306a36Sopenharmony_ci .update_emulated_instruction = vmx_update_emulated_instruction, 828362306a36Sopenharmony_ci .set_interrupt_shadow = vmx_set_interrupt_shadow, 828462306a36Sopenharmony_ci .get_interrupt_shadow = vmx_get_interrupt_shadow, 828562306a36Sopenharmony_ci .patch_hypercall = vmx_patch_hypercall, 828662306a36Sopenharmony_ci .inject_irq = vmx_inject_irq, 828762306a36Sopenharmony_ci .inject_nmi = vmx_inject_nmi, 828862306a36Sopenharmony_ci .inject_exception = vmx_inject_exception, 828962306a36Sopenharmony_ci .cancel_injection = vmx_cancel_injection, 829062306a36Sopenharmony_ci .interrupt_allowed = vmx_interrupt_allowed, 829162306a36Sopenharmony_ci .nmi_allowed = vmx_nmi_allowed, 829262306a36Sopenharmony_ci .get_nmi_mask = vmx_get_nmi_mask, 829362306a36Sopenharmony_ci .set_nmi_mask = vmx_set_nmi_mask, 829462306a36Sopenharmony_ci .enable_nmi_window = vmx_enable_nmi_window, 829562306a36Sopenharmony_ci .enable_irq_window = vmx_enable_irq_window, 829662306a36Sopenharmony_ci .update_cr8_intercept = vmx_update_cr8_intercept, 829762306a36Sopenharmony_ci .set_virtual_apic_mode = vmx_set_virtual_apic_mode, 829862306a36Sopenharmony_ci .set_apic_access_page_addr = vmx_set_apic_access_page_addr, 829962306a36Sopenharmony_ci .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, 830062306a36Sopenharmony_ci .load_eoi_exitmap = vmx_load_eoi_exitmap, 830162306a36Sopenharmony_ci .apicv_pre_state_restore = vmx_apicv_pre_state_restore, 830262306a36Sopenharmony_ci .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, 830362306a36Sopenharmony_ci .hwapic_irr_update = vmx_hwapic_irr_update, 830462306a36Sopenharmony_ci .hwapic_isr_update = vmx_hwapic_isr_update, 830562306a36Sopenharmony_ci .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, 830662306a36Sopenharmony_ci .sync_pir_to_irr = vmx_sync_pir_to_irr, 830762306a36Sopenharmony_ci .deliver_interrupt = vmx_deliver_interrupt, 830862306a36Sopenharmony_ci .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, 830962306a36Sopenharmony_ci 831062306a36Sopenharmony_ci .set_tss_addr = vmx_set_tss_addr, 831162306a36Sopenharmony_ci .set_identity_map_addr = vmx_set_identity_map_addr, 831262306a36Sopenharmony_ci .get_mt_mask = vmx_get_mt_mask, 831362306a36Sopenharmony_ci 831462306a36Sopenharmony_ci .get_exit_info = vmx_get_exit_info, 831562306a36Sopenharmony_ci 831662306a36Sopenharmony_ci .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, 831762306a36Sopenharmony_ci 831862306a36Sopenharmony_ci .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 831962306a36Sopenharmony_ci 832062306a36Sopenharmony_ci .get_l2_tsc_offset = vmx_get_l2_tsc_offset, 832162306a36Sopenharmony_ci .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier, 832262306a36Sopenharmony_ci .write_tsc_offset = vmx_write_tsc_offset, 832362306a36Sopenharmony_ci .write_tsc_multiplier = vmx_write_tsc_multiplier, 832462306a36Sopenharmony_ci 832562306a36Sopenharmony_ci .load_mmu_pgd = vmx_load_mmu_pgd, 832662306a36Sopenharmony_ci 832762306a36Sopenharmony_ci .check_intercept = vmx_check_intercept, 832862306a36Sopenharmony_ci .handle_exit_irqoff = vmx_handle_exit_irqoff, 832962306a36Sopenharmony_ci 833062306a36Sopenharmony_ci .request_immediate_exit = vmx_request_immediate_exit, 833162306a36Sopenharmony_ci 833262306a36Sopenharmony_ci .sched_in = vmx_sched_in, 833362306a36Sopenharmony_ci 833462306a36Sopenharmony_ci .cpu_dirty_log_size = PML_ENTITY_NUM, 833562306a36Sopenharmony_ci .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, 833662306a36Sopenharmony_ci 833762306a36Sopenharmony_ci .nested_ops = &vmx_nested_ops, 833862306a36Sopenharmony_ci 833962306a36Sopenharmony_ci .pi_update_irte = vmx_pi_update_irte, 834062306a36Sopenharmony_ci .pi_start_assignment = vmx_pi_start_assignment, 834162306a36Sopenharmony_ci 834262306a36Sopenharmony_ci#ifdef CONFIG_X86_64 834362306a36Sopenharmony_ci .set_hv_timer = vmx_set_hv_timer, 834462306a36Sopenharmony_ci .cancel_hv_timer = vmx_cancel_hv_timer, 834562306a36Sopenharmony_ci#endif 834662306a36Sopenharmony_ci 834762306a36Sopenharmony_ci .setup_mce = vmx_setup_mce, 834862306a36Sopenharmony_ci 834962306a36Sopenharmony_ci#ifdef CONFIG_KVM_SMM 835062306a36Sopenharmony_ci .smi_allowed = vmx_smi_allowed, 835162306a36Sopenharmony_ci .enter_smm = vmx_enter_smm, 835262306a36Sopenharmony_ci .leave_smm = vmx_leave_smm, 835362306a36Sopenharmony_ci .enable_smi_window = vmx_enable_smi_window, 835462306a36Sopenharmony_ci#endif 835562306a36Sopenharmony_ci 835662306a36Sopenharmony_ci .can_emulate_instruction = vmx_can_emulate_instruction, 835762306a36Sopenharmony_ci .apic_init_signal_blocked = vmx_apic_init_signal_blocked, 835862306a36Sopenharmony_ci .migrate_timers = vmx_migrate_timers, 835962306a36Sopenharmony_ci 836062306a36Sopenharmony_ci .msr_filter_changed = vmx_msr_filter_changed, 836162306a36Sopenharmony_ci .complete_emulated_msr = kvm_complete_insn_gp, 836262306a36Sopenharmony_ci 836362306a36Sopenharmony_ci .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, 836462306a36Sopenharmony_ci}; 836562306a36Sopenharmony_ci 836662306a36Sopenharmony_cistatic unsigned int vmx_handle_intel_pt_intr(void) 836762306a36Sopenharmony_ci{ 836862306a36Sopenharmony_ci struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 836962306a36Sopenharmony_ci 837062306a36Sopenharmony_ci /* '0' on failure so that the !PT case can use a RET0 static call. */ 837162306a36Sopenharmony_ci if (!vcpu || !kvm_handling_nmi_from_guest(vcpu)) 837262306a36Sopenharmony_ci return 0; 837362306a36Sopenharmony_ci 837462306a36Sopenharmony_ci kvm_make_request(KVM_REQ_PMI, vcpu); 837562306a36Sopenharmony_ci __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, 837662306a36Sopenharmony_ci (unsigned long *)&vcpu->arch.pmu.global_status); 837762306a36Sopenharmony_ci return 1; 837862306a36Sopenharmony_ci} 837962306a36Sopenharmony_ci 838062306a36Sopenharmony_cistatic __init void vmx_setup_user_return_msrs(void) 838162306a36Sopenharmony_ci{ 838262306a36Sopenharmony_ci 838362306a36Sopenharmony_ci /* 838462306a36Sopenharmony_ci * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm 838562306a36Sopenharmony_ci * will emulate SYSCALL in legacy mode if the vendor string in guest 838662306a36Sopenharmony_ci * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To 838762306a36Sopenharmony_ci * support this emulation, MSR_STAR is included in the list for i386, 838862306a36Sopenharmony_ci * but is never loaded into hardware. MSR_CSTAR is also never loaded 838962306a36Sopenharmony_ci * into hardware and is here purely for emulation purposes. 839062306a36Sopenharmony_ci */ 839162306a36Sopenharmony_ci const u32 vmx_uret_msrs_list[] = { 839262306a36Sopenharmony_ci #ifdef CONFIG_X86_64 839362306a36Sopenharmony_ci MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, 839462306a36Sopenharmony_ci #endif 839562306a36Sopenharmony_ci MSR_EFER, MSR_TSC_AUX, MSR_STAR, 839662306a36Sopenharmony_ci MSR_IA32_TSX_CTRL, 839762306a36Sopenharmony_ci }; 839862306a36Sopenharmony_ci int i; 839962306a36Sopenharmony_ci 840062306a36Sopenharmony_ci BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); 840162306a36Sopenharmony_ci 840262306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) 840362306a36Sopenharmony_ci kvm_add_user_return_msr(vmx_uret_msrs_list[i]); 840462306a36Sopenharmony_ci} 840562306a36Sopenharmony_ci 840662306a36Sopenharmony_cistatic void __init vmx_setup_me_spte_mask(void) 840762306a36Sopenharmony_ci{ 840862306a36Sopenharmony_ci u64 me_mask = 0; 840962306a36Sopenharmony_ci 841062306a36Sopenharmony_ci /* 841162306a36Sopenharmony_ci * kvm_get_shadow_phys_bits() returns shadow_phys_bits. Use 841262306a36Sopenharmony_ci * the former to avoid exposing shadow_phys_bits. 841362306a36Sopenharmony_ci * 841462306a36Sopenharmony_ci * On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to 841562306a36Sopenharmony_ci * shadow_phys_bits. On MKTME and/or TDX capable systems, 841662306a36Sopenharmony_ci * boot_cpu_data.x86_phys_bits holds the actual physical address 841762306a36Sopenharmony_ci * w/o the KeyID bits, and shadow_phys_bits equals to MAXPHYADDR 841862306a36Sopenharmony_ci * reported by CPUID. Those bits between are KeyID bits. 841962306a36Sopenharmony_ci */ 842062306a36Sopenharmony_ci if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits()) 842162306a36Sopenharmony_ci me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits, 842262306a36Sopenharmony_ci kvm_get_shadow_phys_bits() - 1); 842362306a36Sopenharmony_ci /* 842462306a36Sopenharmony_ci * Unlike SME, host kernel doesn't support setting up any 842562306a36Sopenharmony_ci * MKTME KeyID on Intel platforms. No memory encryption 842662306a36Sopenharmony_ci * bits should be included into the SPTE. 842762306a36Sopenharmony_ci */ 842862306a36Sopenharmony_ci kvm_mmu_set_me_spte_mask(0, me_mask); 842962306a36Sopenharmony_ci} 843062306a36Sopenharmony_ci 843162306a36Sopenharmony_cistatic struct kvm_x86_init_ops vmx_init_ops __initdata; 843262306a36Sopenharmony_ci 843362306a36Sopenharmony_cistatic __init int hardware_setup(void) 843462306a36Sopenharmony_ci{ 843562306a36Sopenharmony_ci unsigned long host_bndcfgs; 843662306a36Sopenharmony_ci struct desc_ptr dt; 843762306a36Sopenharmony_ci int r; 843862306a36Sopenharmony_ci 843962306a36Sopenharmony_ci store_idt(&dt); 844062306a36Sopenharmony_ci host_idt_base = dt.address; 844162306a36Sopenharmony_ci 844262306a36Sopenharmony_ci vmx_setup_user_return_msrs(); 844362306a36Sopenharmony_ci 844462306a36Sopenharmony_ci if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) 844562306a36Sopenharmony_ci return -EIO; 844662306a36Sopenharmony_ci 844762306a36Sopenharmony_ci if (cpu_has_perf_global_ctrl_bug()) 844862306a36Sopenharmony_ci pr_warn_once("VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " 844962306a36Sopenharmony_ci "does not work properly. Using workaround\n"); 845062306a36Sopenharmony_ci 845162306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_NX)) 845262306a36Sopenharmony_ci kvm_enable_efer_bits(EFER_NX); 845362306a36Sopenharmony_ci 845462306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_MPX)) { 845562306a36Sopenharmony_ci rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); 845662306a36Sopenharmony_ci WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost"); 845762306a36Sopenharmony_ci } 845862306a36Sopenharmony_ci 845962306a36Sopenharmony_ci if (!cpu_has_vmx_mpx()) 846062306a36Sopenharmony_ci kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | 846162306a36Sopenharmony_ci XFEATURE_MASK_BNDCSR); 846262306a36Sopenharmony_ci 846362306a36Sopenharmony_ci if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || 846462306a36Sopenharmony_ci !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) 846562306a36Sopenharmony_ci enable_vpid = 0; 846662306a36Sopenharmony_ci 846762306a36Sopenharmony_ci if (!cpu_has_vmx_ept() || 846862306a36Sopenharmony_ci !cpu_has_vmx_ept_4levels() || 846962306a36Sopenharmony_ci !cpu_has_vmx_ept_mt_wb() || 847062306a36Sopenharmony_ci !cpu_has_vmx_invept_global()) 847162306a36Sopenharmony_ci enable_ept = 0; 847262306a36Sopenharmony_ci 847362306a36Sopenharmony_ci /* NX support is required for shadow paging. */ 847462306a36Sopenharmony_ci if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) { 847562306a36Sopenharmony_ci pr_err_ratelimited("NX (Execute Disable) not supported\n"); 847662306a36Sopenharmony_ci return -EOPNOTSUPP; 847762306a36Sopenharmony_ci } 847862306a36Sopenharmony_ci 847962306a36Sopenharmony_ci if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) 848062306a36Sopenharmony_ci enable_ept_ad_bits = 0; 848162306a36Sopenharmony_ci 848262306a36Sopenharmony_ci if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) 848362306a36Sopenharmony_ci enable_unrestricted_guest = 0; 848462306a36Sopenharmony_ci 848562306a36Sopenharmony_ci if (!cpu_has_vmx_flexpriority()) 848662306a36Sopenharmony_ci flexpriority_enabled = 0; 848762306a36Sopenharmony_ci 848862306a36Sopenharmony_ci if (!cpu_has_virtual_nmis()) 848962306a36Sopenharmony_ci enable_vnmi = 0; 849062306a36Sopenharmony_ci 849162306a36Sopenharmony_ci#ifdef CONFIG_X86_SGX_KVM 849262306a36Sopenharmony_ci if (!cpu_has_vmx_encls_vmexit()) 849362306a36Sopenharmony_ci enable_sgx = false; 849462306a36Sopenharmony_ci#endif 849562306a36Sopenharmony_ci 849662306a36Sopenharmony_ci /* 849762306a36Sopenharmony_ci * set_apic_access_page_addr() is used to reload apic access 849862306a36Sopenharmony_ci * page upon invalidation. No need to do anything if not 849962306a36Sopenharmony_ci * using the APIC_ACCESS_ADDR VMCS field. 850062306a36Sopenharmony_ci */ 850162306a36Sopenharmony_ci if (!flexpriority_enabled) 850262306a36Sopenharmony_ci vmx_x86_ops.set_apic_access_page_addr = NULL; 850362306a36Sopenharmony_ci 850462306a36Sopenharmony_ci if (!cpu_has_vmx_tpr_shadow()) 850562306a36Sopenharmony_ci vmx_x86_ops.update_cr8_intercept = NULL; 850662306a36Sopenharmony_ci 850762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 850862306a36Sopenharmony_ci if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH 850962306a36Sopenharmony_ci && enable_ept) { 851062306a36Sopenharmony_ci vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; 851162306a36Sopenharmony_ci vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; 851262306a36Sopenharmony_ci } 851362306a36Sopenharmony_ci#endif 851462306a36Sopenharmony_ci 851562306a36Sopenharmony_ci if (!cpu_has_vmx_ple()) { 851662306a36Sopenharmony_ci ple_gap = 0; 851762306a36Sopenharmony_ci ple_window = 0; 851862306a36Sopenharmony_ci ple_window_grow = 0; 851962306a36Sopenharmony_ci ple_window_max = 0; 852062306a36Sopenharmony_ci ple_window_shrink = 0; 852162306a36Sopenharmony_ci } 852262306a36Sopenharmony_ci 852362306a36Sopenharmony_ci if (!cpu_has_vmx_apicv()) 852462306a36Sopenharmony_ci enable_apicv = 0; 852562306a36Sopenharmony_ci if (!enable_apicv) 852662306a36Sopenharmony_ci vmx_x86_ops.sync_pir_to_irr = NULL; 852762306a36Sopenharmony_ci 852862306a36Sopenharmony_ci if (!enable_apicv || !cpu_has_vmx_ipiv()) 852962306a36Sopenharmony_ci enable_ipiv = false; 853062306a36Sopenharmony_ci 853162306a36Sopenharmony_ci if (cpu_has_vmx_tsc_scaling()) 853262306a36Sopenharmony_ci kvm_caps.has_tsc_control = true; 853362306a36Sopenharmony_ci 853462306a36Sopenharmony_ci kvm_caps.max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; 853562306a36Sopenharmony_ci kvm_caps.tsc_scaling_ratio_frac_bits = 48; 853662306a36Sopenharmony_ci kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); 853762306a36Sopenharmony_ci kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit(); 853862306a36Sopenharmony_ci 853962306a36Sopenharmony_ci set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 854062306a36Sopenharmony_ci 854162306a36Sopenharmony_ci if (enable_ept) 854262306a36Sopenharmony_ci kvm_mmu_set_ept_masks(enable_ept_ad_bits, 854362306a36Sopenharmony_ci cpu_has_vmx_ept_execute_only()); 854462306a36Sopenharmony_ci 854562306a36Sopenharmony_ci /* 854662306a36Sopenharmony_ci * Setup shadow_me_value/shadow_me_mask to include MKTME KeyID 854762306a36Sopenharmony_ci * bits to shadow_zero_check. 854862306a36Sopenharmony_ci */ 854962306a36Sopenharmony_ci vmx_setup_me_spte_mask(); 855062306a36Sopenharmony_ci 855162306a36Sopenharmony_ci kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(), 855262306a36Sopenharmony_ci ept_caps_to_lpage_level(vmx_capability.ept)); 855362306a36Sopenharmony_ci 855462306a36Sopenharmony_ci /* 855562306a36Sopenharmony_ci * Only enable PML when hardware supports PML feature, and both EPT 855662306a36Sopenharmony_ci * and EPT A/D bit features are enabled -- PML depends on them to work. 855762306a36Sopenharmony_ci */ 855862306a36Sopenharmony_ci if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) 855962306a36Sopenharmony_ci enable_pml = 0; 856062306a36Sopenharmony_ci 856162306a36Sopenharmony_ci if (!enable_pml) 856262306a36Sopenharmony_ci vmx_x86_ops.cpu_dirty_log_size = 0; 856362306a36Sopenharmony_ci 856462306a36Sopenharmony_ci if (!cpu_has_vmx_preemption_timer()) 856562306a36Sopenharmony_ci enable_preemption_timer = false; 856662306a36Sopenharmony_ci 856762306a36Sopenharmony_ci if (enable_preemption_timer) { 856862306a36Sopenharmony_ci u64 use_timer_freq = 5000ULL * 1000 * 1000; 856962306a36Sopenharmony_ci 857062306a36Sopenharmony_ci cpu_preemption_timer_multi = 857162306a36Sopenharmony_ci vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; 857262306a36Sopenharmony_ci 857362306a36Sopenharmony_ci if (tsc_khz) 857462306a36Sopenharmony_ci use_timer_freq = (u64)tsc_khz * 1000; 857562306a36Sopenharmony_ci use_timer_freq >>= cpu_preemption_timer_multi; 857662306a36Sopenharmony_ci 857762306a36Sopenharmony_ci /* 857862306a36Sopenharmony_ci * KVM "disables" the preemption timer by setting it to its max 857962306a36Sopenharmony_ci * value. Don't use the timer if it might cause spurious exits 858062306a36Sopenharmony_ci * at a rate faster than 0.1 Hz (of uninterrupted guest time). 858162306a36Sopenharmony_ci */ 858262306a36Sopenharmony_ci if (use_timer_freq > 0xffffffffu / 10) 858362306a36Sopenharmony_ci enable_preemption_timer = false; 858462306a36Sopenharmony_ci } 858562306a36Sopenharmony_ci 858662306a36Sopenharmony_ci if (!enable_preemption_timer) { 858762306a36Sopenharmony_ci vmx_x86_ops.set_hv_timer = NULL; 858862306a36Sopenharmony_ci vmx_x86_ops.cancel_hv_timer = NULL; 858962306a36Sopenharmony_ci vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit; 859062306a36Sopenharmony_ci } 859162306a36Sopenharmony_ci 859262306a36Sopenharmony_ci kvm_caps.supported_mce_cap |= MCG_LMCE_P; 859362306a36Sopenharmony_ci kvm_caps.supported_mce_cap |= MCG_CMCI_P; 859462306a36Sopenharmony_ci 859562306a36Sopenharmony_ci if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) 859662306a36Sopenharmony_ci return -EINVAL; 859762306a36Sopenharmony_ci if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt()) 859862306a36Sopenharmony_ci pt_mode = PT_MODE_SYSTEM; 859962306a36Sopenharmony_ci if (pt_mode == PT_MODE_HOST_GUEST) 860062306a36Sopenharmony_ci vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; 860162306a36Sopenharmony_ci else 860262306a36Sopenharmony_ci vmx_init_ops.handle_intel_pt_intr = NULL; 860362306a36Sopenharmony_ci 860462306a36Sopenharmony_ci setup_default_sgx_lepubkeyhash(); 860562306a36Sopenharmony_ci 860662306a36Sopenharmony_ci if (nested) { 860762306a36Sopenharmony_ci nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept); 860862306a36Sopenharmony_ci 860962306a36Sopenharmony_ci r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); 861062306a36Sopenharmony_ci if (r) 861162306a36Sopenharmony_ci return r; 861262306a36Sopenharmony_ci } 861362306a36Sopenharmony_ci 861462306a36Sopenharmony_ci vmx_set_cpu_caps(); 861562306a36Sopenharmony_ci 861662306a36Sopenharmony_ci r = alloc_kvm_area(); 861762306a36Sopenharmony_ci if (r && nested) 861862306a36Sopenharmony_ci nested_vmx_hardware_unsetup(); 861962306a36Sopenharmony_ci 862062306a36Sopenharmony_ci kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); 862162306a36Sopenharmony_ci 862262306a36Sopenharmony_ci return r; 862362306a36Sopenharmony_ci} 862462306a36Sopenharmony_ci 862562306a36Sopenharmony_cistatic struct kvm_x86_init_ops vmx_init_ops __initdata = { 862662306a36Sopenharmony_ci .hardware_setup = hardware_setup, 862762306a36Sopenharmony_ci .handle_intel_pt_intr = NULL, 862862306a36Sopenharmony_ci 862962306a36Sopenharmony_ci .runtime_ops = &vmx_x86_ops, 863062306a36Sopenharmony_ci .pmu_ops = &intel_pmu_ops, 863162306a36Sopenharmony_ci}; 863262306a36Sopenharmony_ci 863362306a36Sopenharmony_cistatic void vmx_cleanup_l1d_flush(void) 863462306a36Sopenharmony_ci{ 863562306a36Sopenharmony_ci if (vmx_l1d_flush_pages) { 863662306a36Sopenharmony_ci free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); 863762306a36Sopenharmony_ci vmx_l1d_flush_pages = NULL; 863862306a36Sopenharmony_ci } 863962306a36Sopenharmony_ci /* Restore state so sysfs ignores VMX */ 864062306a36Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; 864162306a36Sopenharmony_ci} 864262306a36Sopenharmony_ci 864362306a36Sopenharmony_cistatic void __vmx_exit(void) 864462306a36Sopenharmony_ci{ 864562306a36Sopenharmony_ci allow_smaller_maxphyaddr = false; 864662306a36Sopenharmony_ci 864762306a36Sopenharmony_ci cpu_emergency_unregister_virt_callback(vmx_emergency_disable); 864862306a36Sopenharmony_ci 864962306a36Sopenharmony_ci vmx_cleanup_l1d_flush(); 865062306a36Sopenharmony_ci} 865162306a36Sopenharmony_ci 865262306a36Sopenharmony_cistatic void vmx_exit(void) 865362306a36Sopenharmony_ci{ 865462306a36Sopenharmony_ci kvm_exit(); 865562306a36Sopenharmony_ci kvm_x86_vendor_exit(); 865662306a36Sopenharmony_ci 865762306a36Sopenharmony_ci __vmx_exit(); 865862306a36Sopenharmony_ci} 865962306a36Sopenharmony_cimodule_exit(vmx_exit); 866062306a36Sopenharmony_ci 866162306a36Sopenharmony_cistatic int __init vmx_init(void) 866262306a36Sopenharmony_ci{ 866362306a36Sopenharmony_ci int r, cpu; 866462306a36Sopenharmony_ci 866562306a36Sopenharmony_ci if (!kvm_is_vmx_supported()) 866662306a36Sopenharmony_ci return -EOPNOTSUPP; 866762306a36Sopenharmony_ci 866862306a36Sopenharmony_ci /* 866962306a36Sopenharmony_ci * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing 867062306a36Sopenharmony_ci * to unwind if a later step fails. 867162306a36Sopenharmony_ci */ 867262306a36Sopenharmony_ci hv_init_evmcs(); 867362306a36Sopenharmony_ci 867462306a36Sopenharmony_ci r = kvm_x86_vendor_init(&vmx_init_ops); 867562306a36Sopenharmony_ci if (r) 867662306a36Sopenharmony_ci return r; 867762306a36Sopenharmony_ci 867862306a36Sopenharmony_ci /* 867962306a36Sopenharmony_ci * Must be called after common x86 init so enable_ept is properly set 868062306a36Sopenharmony_ci * up. Hand the parameter mitigation value in which was stored in 868162306a36Sopenharmony_ci * the pre module init parser. If no parameter was given, it will 868262306a36Sopenharmony_ci * contain 'auto' which will be turned into the default 'cond' 868362306a36Sopenharmony_ci * mitigation mode. 868462306a36Sopenharmony_ci */ 868562306a36Sopenharmony_ci r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); 868662306a36Sopenharmony_ci if (r) 868762306a36Sopenharmony_ci goto err_l1d_flush; 868862306a36Sopenharmony_ci 868962306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 869062306a36Sopenharmony_ci INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); 869162306a36Sopenharmony_ci 869262306a36Sopenharmony_ci pi_init_cpu(cpu); 869362306a36Sopenharmony_ci } 869462306a36Sopenharmony_ci 869562306a36Sopenharmony_ci cpu_emergency_register_virt_callback(vmx_emergency_disable); 869662306a36Sopenharmony_ci 869762306a36Sopenharmony_ci vmx_check_vmcs12_offsets(); 869862306a36Sopenharmony_ci 869962306a36Sopenharmony_ci /* 870062306a36Sopenharmony_ci * Shadow paging doesn't have a (further) performance penalty 870162306a36Sopenharmony_ci * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it 870262306a36Sopenharmony_ci * by default 870362306a36Sopenharmony_ci */ 870462306a36Sopenharmony_ci if (!enable_ept) 870562306a36Sopenharmony_ci allow_smaller_maxphyaddr = true; 870662306a36Sopenharmony_ci 870762306a36Sopenharmony_ci /* 870862306a36Sopenharmony_ci * Common KVM initialization _must_ come last, after this, /dev/kvm is 870962306a36Sopenharmony_ci * exposed to userspace! 871062306a36Sopenharmony_ci */ 871162306a36Sopenharmony_ci r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), 871262306a36Sopenharmony_ci THIS_MODULE); 871362306a36Sopenharmony_ci if (r) 871462306a36Sopenharmony_ci goto err_kvm_init; 871562306a36Sopenharmony_ci 871662306a36Sopenharmony_ci return 0; 871762306a36Sopenharmony_ci 871862306a36Sopenharmony_cierr_kvm_init: 871962306a36Sopenharmony_ci __vmx_exit(); 872062306a36Sopenharmony_cierr_l1d_flush: 872162306a36Sopenharmony_ci kvm_x86_vendor_exit(); 872262306a36Sopenharmony_ci return r; 872362306a36Sopenharmony_ci} 872462306a36Sopenharmony_cimodule_init(vmx_init); 8725