18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Kernel-based Virtual Machine driver for Linux 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This module enables machines with Intel VT-x extensions to run virtual 68c2ecf20Sopenharmony_ci * machines without emulation or binary translation. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Copyright (C) 2006 Qumranet, Inc. 98c2ecf20Sopenharmony_ci * Copyright 2010 Red Hat, Inc. and/or its affiliates. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * Authors: 128c2ecf20Sopenharmony_ci * Avi Kivity <avi@qumranet.com> 138c2ecf20Sopenharmony_ci * Yaniv Kamay <yaniv@qumranet.com> 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include <linux/highmem.h> 178c2ecf20Sopenharmony_ci#include <linux/hrtimer.h> 188c2ecf20Sopenharmony_ci#include <linux/kernel.h> 198c2ecf20Sopenharmony_ci#include <linux/kvm_host.h> 208c2ecf20Sopenharmony_ci#include <linux/module.h> 218c2ecf20Sopenharmony_ci#include <linux/moduleparam.h> 228c2ecf20Sopenharmony_ci#include <linux/mod_devicetable.h> 238c2ecf20Sopenharmony_ci#include <linux/mm.h> 248c2ecf20Sopenharmony_ci#include <linux/objtool.h> 258c2ecf20Sopenharmony_ci#include <linux/sched.h> 268c2ecf20Sopenharmony_ci#include <linux/sched/smt.h> 278c2ecf20Sopenharmony_ci#include <linux/slab.h> 288c2ecf20Sopenharmony_ci#include <linux/tboot.h> 298c2ecf20Sopenharmony_ci#include <linux/trace_events.h> 308c2ecf20Sopenharmony_ci#include <linux/entry-kvm.h> 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci#include <asm/apic.h> 338c2ecf20Sopenharmony_ci#include <asm/asm.h> 348c2ecf20Sopenharmony_ci#include <asm/cpu.h> 358c2ecf20Sopenharmony_ci#include <asm/cpu_device_id.h> 368c2ecf20Sopenharmony_ci#include <asm/debugreg.h> 378c2ecf20Sopenharmony_ci#include <asm/desc.h> 388c2ecf20Sopenharmony_ci#include <asm/fpu/internal.h> 398c2ecf20Sopenharmony_ci#include <asm/idtentry.h> 408c2ecf20Sopenharmony_ci#include <asm/io.h> 418c2ecf20Sopenharmony_ci#include <asm/irq_remapping.h> 428c2ecf20Sopenharmony_ci#include <asm/kexec.h> 438c2ecf20Sopenharmony_ci#include <asm/perf_event.h> 448c2ecf20Sopenharmony_ci#include <asm/mce.h> 458c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 468c2ecf20Sopenharmony_ci#include <asm/mshyperv.h> 478c2ecf20Sopenharmony_ci#include <asm/mwait.h> 488c2ecf20Sopenharmony_ci#include <asm/spec-ctrl.h> 498c2ecf20Sopenharmony_ci#include <asm/virtext.h> 508c2ecf20Sopenharmony_ci#include <asm/vmx.h> 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#include "capabilities.h" 538c2ecf20Sopenharmony_ci#include "cpuid.h" 548c2ecf20Sopenharmony_ci#include "evmcs.h" 558c2ecf20Sopenharmony_ci#include "irq.h" 568c2ecf20Sopenharmony_ci#include "kvm_cache_regs.h" 578c2ecf20Sopenharmony_ci#include "lapic.h" 588c2ecf20Sopenharmony_ci#include "mmu.h" 598c2ecf20Sopenharmony_ci#include "nested.h" 608c2ecf20Sopenharmony_ci#include "pmu.h" 618c2ecf20Sopenharmony_ci#include "trace.h" 628c2ecf20Sopenharmony_ci#include "vmcs.h" 638c2ecf20Sopenharmony_ci#include "vmcs12.h" 648c2ecf20Sopenharmony_ci#include "vmx.h" 658c2ecf20Sopenharmony_ci#include "x86.h" 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ciMODULE_AUTHOR("Qumranet"); 688c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci#ifdef MODULE 718c2ecf20Sopenharmony_cistatic const struct x86_cpu_id vmx_cpu_id[] = { 728c2ecf20Sopenharmony_ci X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL), 738c2ecf20Sopenharmony_ci {} 748c2ecf20Sopenharmony_ci}; 758c2ecf20Sopenharmony_ciMODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); 768c2ecf20Sopenharmony_ci#endif 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_cibool __read_mostly enable_vpid = 1; 798c2ecf20Sopenharmony_cimodule_param_named(vpid, enable_vpid, bool, 0444); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_cistatic bool __read_mostly enable_vnmi = 1; 828c2ecf20Sopenharmony_cimodule_param_named(vnmi, enable_vnmi, bool, S_IRUGO); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_cibool __read_mostly flexpriority_enabled = 1; 858c2ecf20Sopenharmony_cimodule_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cibool __read_mostly enable_ept = 1; 888c2ecf20Sopenharmony_cimodule_param_named(ept, enable_ept, bool, S_IRUGO); 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_cibool __read_mostly enable_unrestricted_guest = 1; 918c2ecf20Sopenharmony_cimodule_param_named(unrestricted_guest, 928c2ecf20Sopenharmony_ci enable_unrestricted_guest, bool, S_IRUGO); 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_cibool __read_mostly enable_ept_ad_bits = 1; 958c2ecf20Sopenharmony_cimodule_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_cistatic bool __read_mostly emulate_invalid_guest_state = true; 988c2ecf20Sopenharmony_cimodule_param(emulate_invalid_guest_state, bool, S_IRUGO); 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_cistatic bool __read_mostly fasteoi = 1; 1018c2ecf20Sopenharmony_cimodule_param(fasteoi, bool, S_IRUGO); 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_cibool __read_mostly enable_apicv = 1; 1048c2ecf20Sopenharmony_cimodule_param(enable_apicv, bool, S_IRUGO); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci/* 1078c2ecf20Sopenharmony_ci * If nested=1, nested virtualization is supported, i.e., guests may use 1088c2ecf20Sopenharmony_ci * VMX and be a hypervisor for its own guests. If nested=0, guests may not 1098c2ecf20Sopenharmony_ci * use VMX instructions. 1108c2ecf20Sopenharmony_ci */ 1118c2ecf20Sopenharmony_cistatic bool __read_mostly nested = 1; 1128c2ecf20Sopenharmony_cimodule_param(nested, bool, S_IRUGO); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cibool __read_mostly enable_pml = 1; 1158c2ecf20Sopenharmony_cimodule_param_named(pml, enable_pml, bool, S_IRUGO); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_cistatic bool __read_mostly dump_invalid_vmcs = 0; 1188c2ecf20Sopenharmony_cimodule_param(dump_invalid_vmcs, bool, 0644); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci#define MSR_BITMAP_MODE_X2APIC 1 1218c2ecf20Sopenharmony_ci#define MSR_BITMAP_MODE_X2APIC_APICV 2 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci/* Guest_tsc -> host_tsc conversion requires 64-bit division. */ 1268c2ecf20Sopenharmony_cistatic int __read_mostly cpu_preemption_timer_multi; 1278c2ecf20Sopenharmony_cistatic bool __read_mostly enable_preemption_timer = 1; 1288c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 1298c2ecf20Sopenharmony_cimodule_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); 1308c2ecf20Sopenharmony_ci#endif 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ciextern bool __read_mostly allow_smaller_maxphyaddr; 1338c2ecf20Sopenharmony_cimodule_param(allow_smaller_maxphyaddr, bool, S_IRUGO); 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) 1368c2ecf20Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE 1378c2ecf20Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_ON \ 1388c2ecf20Sopenharmony_ci (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE 1418c2ecf20Sopenharmony_ci#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 1428c2ecf20Sopenharmony_ci#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \ 1478c2ecf20Sopenharmony_ci RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \ 1488c2ecf20Sopenharmony_ci RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \ 1498c2ecf20Sopenharmony_ci RTIT_STATUS_BYTECNT)) 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci/* 1528c2ecf20Sopenharmony_ci * List of MSRs that can be directly passed to the guest. 1538c2ecf20Sopenharmony_ci * In addition to these x2apic and PT MSRs are handled specially. 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_cistatic u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { 1568c2ecf20Sopenharmony_ci MSR_IA32_SPEC_CTRL, 1578c2ecf20Sopenharmony_ci MSR_IA32_PRED_CMD, 1588c2ecf20Sopenharmony_ci MSR_IA32_TSC, 1598c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 1608c2ecf20Sopenharmony_ci MSR_FS_BASE, 1618c2ecf20Sopenharmony_ci MSR_GS_BASE, 1628c2ecf20Sopenharmony_ci MSR_KERNEL_GS_BASE, 1638c2ecf20Sopenharmony_ci#endif 1648c2ecf20Sopenharmony_ci MSR_IA32_SYSENTER_CS, 1658c2ecf20Sopenharmony_ci MSR_IA32_SYSENTER_ESP, 1668c2ecf20Sopenharmony_ci MSR_IA32_SYSENTER_EIP, 1678c2ecf20Sopenharmony_ci MSR_CORE_C1_RES, 1688c2ecf20Sopenharmony_ci MSR_CORE_C3_RESIDENCY, 1698c2ecf20Sopenharmony_ci MSR_CORE_C6_RESIDENCY, 1708c2ecf20Sopenharmony_ci MSR_CORE_C7_RESIDENCY, 1718c2ecf20Sopenharmony_ci}; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci/* 1748c2ecf20Sopenharmony_ci * These 2 parameters are used to config the controls for Pause-Loop Exiting: 1758c2ecf20Sopenharmony_ci * ple_gap: upper bound on the amount of time between two successive 1768c2ecf20Sopenharmony_ci * executions of PAUSE in a loop. Also indicate if ple enabled. 1778c2ecf20Sopenharmony_ci * According to test, this time is usually smaller than 128 cycles. 1788c2ecf20Sopenharmony_ci * ple_window: upper bound on the amount of time a guest is allowed to execute 1798c2ecf20Sopenharmony_ci * in a PAUSE loop. Tests indicate that most spinlocks are held for 1808c2ecf20Sopenharmony_ci * less than 2^12 cycles 1818c2ecf20Sopenharmony_ci * Time is measured based on a counter that runs at the same rate as the TSC, 1828c2ecf20Sopenharmony_ci * refer SDM volume 3b section 21.6.13 & 22.1.3. 1838c2ecf20Sopenharmony_ci */ 1848c2ecf20Sopenharmony_cistatic unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; 1858c2ecf20Sopenharmony_cimodule_param(ple_gap, uint, 0444); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistatic unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 1888c2ecf20Sopenharmony_cimodule_param(ple_window, uint, 0444); 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci/* Default doubles per-vcpu window every exit. */ 1918c2ecf20Sopenharmony_cistatic unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; 1928c2ecf20Sopenharmony_cimodule_param(ple_window_grow, uint, 0444); 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci/* Default resets per-vcpu window every exit to ple_window. */ 1958c2ecf20Sopenharmony_cistatic unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; 1968c2ecf20Sopenharmony_cimodule_param(ple_window_shrink, uint, 0444); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci/* Default is to compute the maximum so we can never overflow. */ 1998c2ecf20Sopenharmony_cistatic unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; 2008c2ecf20Sopenharmony_cimodule_param(ple_window_max, uint, 0444); 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci/* Default is SYSTEM mode, 1 for host-guest mode */ 2038c2ecf20Sopenharmony_ciint __read_mostly pt_mode = PT_MODE_SYSTEM; 2048c2ecf20Sopenharmony_cimodule_param(pt_mode, int, S_IRUGO); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); 2078c2ecf20Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); 2088c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(vmx_l1d_flush_mutex); 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci/* Storage for pre module init parameter parsing */ 2118c2ecf20Sopenharmony_cistatic enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_cistatic const struct { 2148c2ecf20Sopenharmony_ci const char *option; 2158c2ecf20Sopenharmony_ci bool for_parse; 2168c2ecf20Sopenharmony_ci} vmentry_l1d_param[] = { 2178c2ecf20Sopenharmony_ci [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, 2188c2ecf20Sopenharmony_ci [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, 2198c2ecf20Sopenharmony_ci [VMENTER_L1D_FLUSH_COND] = {"cond", true}, 2208c2ecf20Sopenharmony_ci [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, 2218c2ecf20Sopenharmony_ci [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, 2228c2ecf20Sopenharmony_ci [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, 2238c2ecf20Sopenharmony_ci}; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci#define L1D_CACHE_ORDER 4 2268c2ecf20Sopenharmony_cistatic void *vmx_l1d_flush_pages; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci/* Control for disabling CPU Fill buffer clear */ 2298c2ecf20Sopenharmony_cistatic bool __read_mostly vmx_fb_clear_ctrl_available; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_cistatic int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) 2328c2ecf20Sopenharmony_ci{ 2338c2ecf20Sopenharmony_ci struct page *page; 2348c2ecf20Sopenharmony_ci unsigned int i; 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci if (!boot_cpu_has_bug(X86_BUG_L1TF)) { 2378c2ecf20Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; 2388c2ecf20Sopenharmony_ci return 0; 2398c2ecf20Sopenharmony_ci } 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci if (!enable_ept) { 2428c2ecf20Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED; 2438c2ecf20Sopenharmony_ci return 0; 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { 2478c2ecf20Sopenharmony_ci u64 msr; 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); 2508c2ecf20Sopenharmony_ci if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { 2518c2ecf20Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; 2528c2ecf20Sopenharmony_ci return 0; 2538c2ecf20Sopenharmony_ci } 2548c2ecf20Sopenharmony_ci } 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci /* If set to auto use the default l1tf mitigation method */ 2578c2ecf20Sopenharmony_ci if (l1tf == VMENTER_L1D_FLUSH_AUTO) { 2588c2ecf20Sopenharmony_ci switch (l1tf_mitigation) { 2598c2ecf20Sopenharmony_ci case L1TF_MITIGATION_OFF: 2608c2ecf20Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_NEVER; 2618c2ecf20Sopenharmony_ci break; 2628c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOWARN: 2638c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FLUSH: 2648c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOSMT: 2658c2ecf20Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_COND; 2668c2ecf20Sopenharmony_ci break; 2678c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FULL: 2688c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FULL_FORCE: 2698c2ecf20Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_ALWAYS; 2708c2ecf20Sopenharmony_ci break; 2718c2ecf20Sopenharmony_ci } 2728c2ecf20Sopenharmony_ci } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) { 2738c2ecf20Sopenharmony_ci l1tf = VMENTER_L1D_FLUSH_ALWAYS; 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && 2778c2ecf20Sopenharmony_ci !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { 2788c2ecf20Sopenharmony_ci /* 2798c2ecf20Sopenharmony_ci * This allocation for vmx_l1d_flush_pages is not tied to a VM 2808c2ecf20Sopenharmony_ci * lifetime and so should not be charged to a memcg. 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_ci page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); 2838c2ecf20Sopenharmony_ci if (!page) 2848c2ecf20Sopenharmony_ci return -ENOMEM; 2858c2ecf20Sopenharmony_ci vmx_l1d_flush_pages = page_address(page); 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci /* 2888c2ecf20Sopenharmony_ci * Initialize each page with a different pattern in 2898c2ecf20Sopenharmony_ci * order to protect against KSM in the nested 2908c2ecf20Sopenharmony_ci * virtualization case. 2918c2ecf20Sopenharmony_ci */ 2928c2ecf20Sopenharmony_ci for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) { 2938c2ecf20Sopenharmony_ci memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1, 2948c2ecf20Sopenharmony_ci PAGE_SIZE); 2958c2ecf20Sopenharmony_ci } 2968c2ecf20Sopenharmony_ci } 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci l1tf_vmx_mitigation = l1tf; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci if (l1tf != VMENTER_L1D_FLUSH_NEVER) 3018c2ecf20Sopenharmony_ci static_branch_enable(&vmx_l1d_should_flush); 3028c2ecf20Sopenharmony_ci else 3038c2ecf20Sopenharmony_ci static_branch_disable(&vmx_l1d_should_flush); 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci if (l1tf == VMENTER_L1D_FLUSH_COND) 3068c2ecf20Sopenharmony_ci static_branch_enable(&vmx_l1d_flush_cond); 3078c2ecf20Sopenharmony_ci else 3088c2ecf20Sopenharmony_ci static_branch_disable(&vmx_l1d_flush_cond); 3098c2ecf20Sopenharmony_ci return 0; 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_cistatic int vmentry_l1d_flush_parse(const char *s) 3138c2ecf20Sopenharmony_ci{ 3148c2ecf20Sopenharmony_ci unsigned int i; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci if (s) { 3178c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { 3188c2ecf20Sopenharmony_ci if (vmentry_l1d_param[i].for_parse && 3198c2ecf20Sopenharmony_ci sysfs_streq(s, vmentry_l1d_param[i].option)) 3208c2ecf20Sopenharmony_ci return i; 3218c2ecf20Sopenharmony_ci } 3228c2ecf20Sopenharmony_ci } 3238c2ecf20Sopenharmony_ci return -EINVAL; 3248c2ecf20Sopenharmony_ci} 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_cistatic int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) 3278c2ecf20Sopenharmony_ci{ 3288c2ecf20Sopenharmony_ci int l1tf, ret; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci l1tf = vmentry_l1d_flush_parse(s); 3318c2ecf20Sopenharmony_ci if (l1tf < 0) 3328c2ecf20Sopenharmony_ci return l1tf; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_BUG_L1TF)) 3358c2ecf20Sopenharmony_ci return 0; 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci /* 3388c2ecf20Sopenharmony_ci * Has vmx_init() run already? If not then this is the pre init 3398c2ecf20Sopenharmony_ci * parameter parsing. In that case just store the value and let 3408c2ecf20Sopenharmony_ci * vmx_init() do the proper setup after enable_ept has been 3418c2ecf20Sopenharmony_ci * established. 3428c2ecf20Sopenharmony_ci */ 3438c2ecf20Sopenharmony_ci if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) { 3448c2ecf20Sopenharmony_ci vmentry_l1d_flush_param = l1tf; 3458c2ecf20Sopenharmony_ci return 0; 3468c2ecf20Sopenharmony_ci } 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci mutex_lock(&vmx_l1d_flush_mutex); 3498c2ecf20Sopenharmony_ci ret = vmx_setup_l1d_flush(l1tf); 3508c2ecf20Sopenharmony_ci mutex_unlock(&vmx_l1d_flush_mutex); 3518c2ecf20Sopenharmony_ci return ret; 3528c2ecf20Sopenharmony_ci} 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_cistatic int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) 3558c2ecf20Sopenharmony_ci{ 3568c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) 3578c2ecf20Sopenharmony_ci return sprintf(s, "???\n"); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_cistatic void vmx_setup_fb_clear_ctrl(void) 3638c2ecf20Sopenharmony_ci{ 3648c2ecf20Sopenharmony_ci u64 msr; 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && 3678c2ecf20Sopenharmony_ci !boot_cpu_has_bug(X86_BUG_MDS) && 3688c2ecf20Sopenharmony_ci !boot_cpu_has_bug(X86_BUG_TAA)) { 3698c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); 3708c2ecf20Sopenharmony_ci if (msr & ARCH_CAP_FB_CLEAR_CTRL) 3718c2ecf20Sopenharmony_ci vmx_fb_clear_ctrl_available = true; 3728c2ecf20Sopenharmony_ci } 3738c2ecf20Sopenharmony_ci} 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_cistatic __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) 3768c2ecf20Sopenharmony_ci{ 3778c2ecf20Sopenharmony_ci u64 msr; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci if (!vmx->disable_fb_clear) 3808c2ecf20Sopenharmony_ci return; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); 3838c2ecf20Sopenharmony_ci msr |= FB_CLEAR_DIS; 3848c2ecf20Sopenharmony_ci native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); 3858c2ecf20Sopenharmony_ci /* Cache the MSR value to avoid reading it later */ 3868c2ecf20Sopenharmony_ci vmx->msr_ia32_mcu_opt_ctrl = msr; 3878c2ecf20Sopenharmony_ci} 3888c2ecf20Sopenharmony_ci 3898c2ecf20Sopenharmony_cistatic __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) 3908c2ecf20Sopenharmony_ci{ 3918c2ecf20Sopenharmony_ci if (!vmx->disable_fb_clear) 3928c2ecf20Sopenharmony_ci return; 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; 3958c2ecf20Sopenharmony_ci native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); 3968c2ecf20Sopenharmony_ci} 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_cistatic void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) 3998c2ecf20Sopenharmony_ci{ 4008c2ecf20Sopenharmony_ci vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci /* 4038c2ecf20Sopenharmony_ci * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS 4048c2ecf20Sopenharmony_ci * at VMEntry. Skip the MSR read/write when a guest has no use case to 4058c2ecf20Sopenharmony_ci * execute VERW. 4068c2ecf20Sopenharmony_ci */ 4078c2ecf20Sopenharmony_ci if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || 4088c2ecf20Sopenharmony_ci ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && 4098c2ecf20Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && 4108c2ecf20Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && 4118c2ecf20Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && 4128c2ecf20Sopenharmony_ci (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) 4138c2ecf20Sopenharmony_ci vmx->disable_fb_clear = false; 4148c2ecf20Sopenharmony_ci} 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_cistatic const struct kernel_param_ops vmentry_l1d_flush_ops = { 4178c2ecf20Sopenharmony_ci .set = vmentry_l1d_flush_set, 4188c2ecf20Sopenharmony_ci .get = vmentry_l1d_flush_get, 4198c2ecf20Sopenharmony_ci}; 4208c2ecf20Sopenharmony_cimodule_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_cistatic u32 vmx_segment_access_rights(struct kvm_segment *var); 4238c2ecf20Sopenharmony_cistatic __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, 4248c2ecf20Sopenharmony_ci u32 msr, int type); 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_civoid vmx_vmexit(void); 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci#define vmx_insn_failed(fmt...) \ 4298c2ecf20Sopenharmony_cido { \ 4308c2ecf20Sopenharmony_ci WARN_ONCE(1, fmt); \ 4318c2ecf20Sopenharmony_ci pr_warn_ratelimited(fmt); \ 4328c2ecf20Sopenharmony_ci} while (0) 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ciasmlinkage void vmread_error(unsigned long field, bool fault) 4358c2ecf20Sopenharmony_ci{ 4368c2ecf20Sopenharmony_ci if (fault) 4378c2ecf20Sopenharmony_ci kvm_spurious_fault(); 4388c2ecf20Sopenharmony_ci else 4398c2ecf20Sopenharmony_ci vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); 4408c2ecf20Sopenharmony_ci} 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_cinoinline void vmwrite_error(unsigned long field, unsigned long value) 4438c2ecf20Sopenharmony_ci{ 4448c2ecf20Sopenharmony_ci vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", 4458c2ecf20Sopenharmony_ci field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); 4468c2ecf20Sopenharmony_ci} 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_cinoinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) 4498c2ecf20Sopenharmony_ci{ 4508c2ecf20Sopenharmony_ci vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); 4518c2ecf20Sopenharmony_ci} 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_cinoinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) 4548c2ecf20Sopenharmony_ci{ 4558c2ecf20Sopenharmony_ci vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_cinoinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) 4598c2ecf20Sopenharmony_ci{ 4608c2ecf20Sopenharmony_ci vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", 4618c2ecf20Sopenharmony_ci ext, vpid, gva); 4628c2ecf20Sopenharmony_ci} 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_cinoinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) 4658c2ecf20Sopenharmony_ci{ 4668c2ecf20Sopenharmony_ci vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", 4678c2ecf20Sopenharmony_ci ext, eptp, gpa); 4688c2ecf20Sopenharmony_ci} 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vmcs *, vmxarea); 4718c2ecf20Sopenharmony_ciDEFINE_PER_CPU(struct vmcs *, current_vmcs); 4728c2ecf20Sopenharmony_ci/* 4738c2ecf20Sopenharmony_ci * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed 4748c2ecf20Sopenharmony_ci * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. 4758c2ecf20Sopenharmony_ci */ 4768c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_cistatic DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); 4798c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(vmx_vpid_lock); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_cistruct vmcs_config vmcs_config; 4828c2ecf20Sopenharmony_cistruct vmx_capability vmx_capability; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci#define VMX_SEGMENT_FIELD(seg) \ 4858c2ecf20Sopenharmony_ci [VCPU_SREG_##seg] = { \ 4868c2ecf20Sopenharmony_ci .selector = GUEST_##seg##_SELECTOR, \ 4878c2ecf20Sopenharmony_ci .base = GUEST_##seg##_BASE, \ 4888c2ecf20Sopenharmony_ci .limit = GUEST_##seg##_LIMIT, \ 4898c2ecf20Sopenharmony_ci .ar_bytes = GUEST_##seg##_AR_BYTES, \ 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_cistatic const struct kvm_vmx_segment_field { 4938c2ecf20Sopenharmony_ci unsigned selector; 4948c2ecf20Sopenharmony_ci unsigned base; 4958c2ecf20Sopenharmony_ci unsigned limit; 4968c2ecf20Sopenharmony_ci unsigned ar_bytes; 4978c2ecf20Sopenharmony_ci} kvm_vmx_segment_fields[] = { 4988c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(CS), 4998c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(DS), 5008c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(ES), 5018c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(FS), 5028c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(GS), 5038c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(SS), 5048c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(TR), 5058c2ecf20Sopenharmony_ci VMX_SEGMENT_FIELD(LDTR), 5068c2ecf20Sopenharmony_ci}; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_cistatic inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 5098c2ecf20Sopenharmony_ci{ 5108c2ecf20Sopenharmony_ci vmx->segment_cache.bitmask = 0; 5118c2ecf20Sopenharmony_ci} 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_cistatic unsigned long host_idt_base; 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci/* 5168c2ecf20Sopenharmony_ci * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm 5178c2ecf20Sopenharmony_ci * will emulate SYSCALL in legacy mode if the vendor string in guest 5188c2ecf20Sopenharmony_ci * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To 5198c2ecf20Sopenharmony_ci * support this emulation, IA32_STAR must always be included in 5208c2ecf20Sopenharmony_ci * vmx_uret_msrs_list[], even in i386 builds. 5218c2ecf20Sopenharmony_ci */ 5228c2ecf20Sopenharmony_cistatic const u32 vmx_uret_msrs_list[] = { 5238c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 5248c2ecf20Sopenharmony_ci MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, 5258c2ecf20Sopenharmony_ci#endif 5268c2ecf20Sopenharmony_ci MSR_EFER, MSR_TSC_AUX, MSR_STAR, 5278c2ecf20Sopenharmony_ci MSR_IA32_TSX_CTRL, 5288c2ecf20Sopenharmony_ci}; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 5318c2ecf20Sopenharmony_cistatic bool __read_mostly enlightened_vmcs = true; 5328c2ecf20Sopenharmony_cimodule_param(enlightened_vmcs, bool, 0444); 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci/* check_ept_pointer() should be under protection of ept_pointer_lock. */ 5358c2ecf20Sopenharmony_cistatic void check_ept_pointer_match(struct kvm *kvm) 5368c2ecf20Sopenharmony_ci{ 5378c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu; 5388c2ecf20Sopenharmony_ci u64 tmp_eptp = INVALID_PAGE; 5398c2ecf20Sopenharmony_ci int i; 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) { 5428c2ecf20Sopenharmony_ci if (!VALID_PAGE(tmp_eptp)) { 5438c2ecf20Sopenharmony_ci tmp_eptp = to_vmx(vcpu)->ept_pointer; 5448c2ecf20Sopenharmony_ci } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { 5458c2ecf20Sopenharmony_ci to_kvm_vmx(kvm)->ept_pointers_match 5468c2ecf20Sopenharmony_ci = EPT_POINTERS_MISMATCH; 5478c2ecf20Sopenharmony_ci return; 5488c2ecf20Sopenharmony_ci } 5498c2ecf20Sopenharmony_ci } 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; 5528c2ecf20Sopenharmony_ci} 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_cistatic int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, 5558c2ecf20Sopenharmony_ci void *data) 5568c2ecf20Sopenharmony_ci{ 5578c2ecf20Sopenharmony_ci struct kvm_tlb_range *range = data; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn, 5608c2ecf20Sopenharmony_ci range->pages); 5618c2ecf20Sopenharmony_ci} 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_cistatic inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm, 5648c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu, struct kvm_tlb_range *range) 5658c2ecf20Sopenharmony_ci{ 5668c2ecf20Sopenharmony_ci u64 ept_pointer = to_vmx(vcpu)->ept_pointer; 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci /* 5698c2ecf20Sopenharmony_ci * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address 5708c2ecf20Sopenharmony_ci * of the base of EPT PML4 table, strip off EPT configuration 5718c2ecf20Sopenharmony_ci * information. 5728c2ecf20Sopenharmony_ci */ 5738c2ecf20Sopenharmony_ci if (range) 5748c2ecf20Sopenharmony_ci return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK, 5758c2ecf20Sopenharmony_ci kvm_fill_hv_flush_list_func, (void *)range); 5768c2ecf20Sopenharmony_ci else 5778c2ecf20Sopenharmony_ci return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK); 5788c2ecf20Sopenharmony_ci} 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_cistatic int hv_remote_flush_tlb_with_range(struct kvm *kvm, 5818c2ecf20Sopenharmony_ci struct kvm_tlb_range *range) 5828c2ecf20Sopenharmony_ci{ 5838c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu; 5848c2ecf20Sopenharmony_ci int ret = 0, i; 5858c2ecf20Sopenharmony_ci 5868c2ecf20Sopenharmony_ci spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) 5898c2ecf20Sopenharmony_ci check_ept_pointer_match(kvm); 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { 5928c2ecf20Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) { 5938c2ecf20Sopenharmony_ci /* If ept_pointer is invalid pointer, bypass flush request. */ 5948c2ecf20Sopenharmony_ci if (VALID_PAGE(to_vmx(vcpu)->ept_pointer)) 5958c2ecf20Sopenharmony_ci ret |= __hv_remote_flush_tlb_with_range( 5968c2ecf20Sopenharmony_ci kvm, vcpu, range); 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci } else { 5998c2ecf20Sopenharmony_ci ret = __hv_remote_flush_tlb_with_range(kvm, 6008c2ecf20Sopenharmony_ci kvm_get_vcpu(kvm, 0), range); 6018c2ecf20Sopenharmony_ci } 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); 6048c2ecf20Sopenharmony_ci return ret; 6058c2ecf20Sopenharmony_ci} 6068c2ecf20Sopenharmony_cistatic int hv_remote_flush_tlb(struct kvm *kvm) 6078c2ecf20Sopenharmony_ci{ 6088c2ecf20Sopenharmony_ci return hv_remote_flush_tlb_with_range(kvm, NULL); 6098c2ecf20Sopenharmony_ci} 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_cistatic int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) 6128c2ecf20Sopenharmony_ci{ 6138c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *evmcs; 6148c2ecf20Sopenharmony_ci struct hv_partition_assist_pg **p_hv_pa_pg = 6158c2ecf20Sopenharmony_ci &vcpu->kvm->arch.hyperv.hv_pa_pg; 6168c2ecf20Sopenharmony_ci /* 6178c2ecf20Sopenharmony_ci * Synthetic VM-Exit is not enabled in current code and so All 6188c2ecf20Sopenharmony_ci * evmcs in singe VM shares same assist page. 6198c2ecf20Sopenharmony_ci */ 6208c2ecf20Sopenharmony_ci if (!*p_hv_pa_pg) 6218c2ecf20Sopenharmony_ci *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci if (!*p_hv_pa_pg) 6248c2ecf20Sopenharmony_ci return -ENOMEM; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci evmcs->partition_assist_page = 6298c2ecf20Sopenharmony_ci __pa(*p_hv_pa_pg); 6308c2ecf20Sopenharmony_ci evmcs->hv_vm_id = (unsigned long)vcpu->kvm; 6318c2ecf20Sopenharmony_ci evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci return 0; 6348c2ecf20Sopenharmony_ci} 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci#endif /* IS_ENABLED(CONFIG_HYPERV) */ 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci/* 6398c2ecf20Sopenharmony_ci * Comment's format: document - errata name - stepping - processor name. 6408c2ecf20Sopenharmony_ci * Refer from 6418c2ecf20Sopenharmony_ci * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp 6428c2ecf20Sopenharmony_ci */ 6438c2ecf20Sopenharmony_cistatic u32 vmx_preemption_cpu_tfms[] = { 6448c2ecf20Sopenharmony_ci/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */ 6458c2ecf20Sopenharmony_ci0x000206E6, 6468c2ecf20Sopenharmony_ci/* 323056.pdf - AAX65 - C2 - Xeon L3406 */ 6478c2ecf20Sopenharmony_ci/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */ 6488c2ecf20Sopenharmony_ci/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */ 6498c2ecf20Sopenharmony_ci0x00020652, 6508c2ecf20Sopenharmony_ci/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */ 6518c2ecf20Sopenharmony_ci0x00020655, 6528c2ecf20Sopenharmony_ci/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */ 6538c2ecf20Sopenharmony_ci/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */ 6548c2ecf20Sopenharmony_ci/* 6558c2ecf20Sopenharmony_ci * 320767.pdf - AAP86 - B1 - 6568c2ecf20Sopenharmony_ci * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile 6578c2ecf20Sopenharmony_ci */ 6588c2ecf20Sopenharmony_ci0x000106E5, 6598c2ecf20Sopenharmony_ci/* 321333.pdf - AAM126 - C0 - Xeon 3500 */ 6608c2ecf20Sopenharmony_ci0x000106A0, 6618c2ecf20Sopenharmony_ci/* 321333.pdf - AAM126 - C1 - Xeon 3500 */ 6628c2ecf20Sopenharmony_ci0x000106A1, 6638c2ecf20Sopenharmony_ci/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */ 6648c2ecf20Sopenharmony_ci0x000106A4, 6658c2ecf20Sopenharmony_ci /* 321333.pdf - AAM126 - D0 - Xeon 3500 */ 6668c2ecf20Sopenharmony_ci /* 321324.pdf - AAK139 - D0 - Xeon 5500 */ 6678c2ecf20Sopenharmony_ci /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */ 6688c2ecf20Sopenharmony_ci0x000106A5, 6698c2ecf20Sopenharmony_ci /* Xeon E3-1220 V2 */ 6708c2ecf20Sopenharmony_ci0x000306A8, 6718c2ecf20Sopenharmony_ci}; 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_cistatic inline bool cpu_has_broken_vmx_preemption_timer(void) 6748c2ecf20Sopenharmony_ci{ 6758c2ecf20Sopenharmony_ci u32 eax = cpuid_eax(0x00000001), i; 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci /* Clear the reserved bits */ 6788c2ecf20Sopenharmony_ci eax &= ~(0x3U << 14 | 0xfU << 28); 6798c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++) 6808c2ecf20Sopenharmony_ci if (eax == vmx_preemption_cpu_tfms[i]) 6818c2ecf20Sopenharmony_ci return true; 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci return false; 6848c2ecf20Sopenharmony_ci} 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_cistatic inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) 6878c2ecf20Sopenharmony_ci{ 6888c2ecf20Sopenharmony_ci return flexpriority_enabled && lapic_in_kernel(vcpu); 6898c2ecf20Sopenharmony_ci} 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_cistatic inline bool report_flexpriority(void) 6928c2ecf20Sopenharmony_ci{ 6938c2ecf20Sopenharmony_ci return flexpriority_enabled; 6948c2ecf20Sopenharmony_ci} 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_cistatic int possible_passthrough_msr_slot(u32 msr) 6978c2ecf20Sopenharmony_ci{ 6988c2ecf20Sopenharmony_ci u32 i; 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) 7018c2ecf20Sopenharmony_ci if (vmx_possible_passthrough_msrs[i] == msr) 7028c2ecf20Sopenharmony_ci return i; 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ci return -ENOENT; 7058c2ecf20Sopenharmony_ci} 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_cistatic bool is_valid_passthrough_msr(u32 msr) 7088c2ecf20Sopenharmony_ci{ 7098c2ecf20Sopenharmony_ci bool r; 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci switch (msr) { 7128c2ecf20Sopenharmony_ci case 0x800 ... 0x8ff: 7138c2ecf20Sopenharmony_ci /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */ 7148c2ecf20Sopenharmony_ci return true; 7158c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_STATUS: 7168c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_BASE: 7178c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_MASK: 7188c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_CR3_MATCH: 7198c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 7208c2ecf20Sopenharmony_ci /* PT MSRs. These are handled in pt_update_intercept_for_msr() */ 7218c2ecf20Sopenharmony_ci return true; 7228c2ecf20Sopenharmony_ci } 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci r = possible_passthrough_msr_slot(msr) != -ENOENT; 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr); 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci return r; 7298c2ecf20Sopenharmony_ci} 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_cistatic inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) 7328c2ecf20Sopenharmony_ci{ 7338c2ecf20Sopenharmony_ci int i; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci for (i = 0; i < vmx->nr_uret_msrs; ++i) 7368c2ecf20Sopenharmony_ci if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr) 7378c2ecf20Sopenharmony_ci return i; 7388c2ecf20Sopenharmony_ci return -1; 7398c2ecf20Sopenharmony_ci} 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_cistruct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) 7428c2ecf20Sopenharmony_ci{ 7438c2ecf20Sopenharmony_ci int i; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci i = __vmx_find_uret_msr(vmx, msr); 7468c2ecf20Sopenharmony_ci if (i >= 0) 7478c2ecf20Sopenharmony_ci return &vmx->guest_uret_msrs[i]; 7488c2ecf20Sopenharmony_ci return NULL; 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_cistatic int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, 7528c2ecf20Sopenharmony_ci struct vmx_uret_msr *msr, u64 data) 7538c2ecf20Sopenharmony_ci{ 7548c2ecf20Sopenharmony_ci int ret = 0; 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_ci u64 old_msr_data = msr->data; 7578c2ecf20Sopenharmony_ci msr->data = data; 7588c2ecf20Sopenharmony_ci if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) { 7598c2ecf20Sopenharmony_ci preempt_disable(); 7608c2ecf20Sopenharmony_ci ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask); 7618c2ecf20Sopenharmony_ci preempt_enable(); 7628c2ecf20Sopenharmony_ci if (ret) 7638c2ecf20Sopenharmony_ci msr->data = old_msr_data; 7648c2ecf20Sopenharmony_ci } 7658c2ecf20Sopenharmony_ci return ret; 7668c2ecf20Sopenharmony_ci} 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci#ifdef CONFIG_KEXEC_CORE 7698c2ecf20Sopenharmony_cistatic void crash_vmclear_local_loaded_vmcss(void) 7708c2ecf20Sopenharmony_ci{ 7718c2ecf20Sopenharmony_ci int cpu = raw_smp_processor_id(); 7728c2ecf20Sopenharmony_ci struct loaded_vmcs *v; 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), 7758c2ecf20Sopenharmony_ci loaded_vmcss_on_cpu_link) 7768c2ecf20Sopenharmony_ci vmcs_clear(v->vmcs); 7778c2ecf20Sopenharmony_ci} 7788c2ecf20Sopenharmony_ci#endif /* CONFIG_KEXEC_CORE */ 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_cistatic void __loaded_vmcs_clear(void *arg) 7818c2ecf20Sopenharmony_ci{ 7828c2ecf20Sopenharmony_ci struct loaded_vmcs *loaded_vmcs = arg; 7838c2ecf20Sopenharmony_ci int cpu = raw_smp_processor_id(); 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci if (loaded_vmcs->cpu != cpu) 7868c2ecf20Sopenharmony_ci return; /* vcpu migration can race with cpu offline */ 7878c2ecf20Sopenharmony_ci if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) 7888c2ecf20Sopenharmony_ci per_cpu(current_vmcs, cpu) = NULL; 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci vmcs_clear(loaded_vmcs->vmcs); 7918c2ecf20Sopenharmony_ci if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) 7928c2ecf20Sopenharmony_ci vmcs_clear(loaded_vmcs->shadow_vmcs); 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci /* 7978c2ecf20Sopenharmony_ci * Ensure all writes to loaded_vmcs, including deleting it from its 7988c2ecf20Sopenharmony_ci * current percpu list, complete before setting loaded_vmcs->vcpu to 7998c2ecf20Sopenharmony_ci * -1, otherwise a different cpu can see vcpu == -1 first and add 8008c2ecf20Sopenharmony_ci * loaded_vmcs to its percpu list before it's deleted from this cpu's 8018c2ecf20Sopenharmony_ci * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). 8028c2ecf20Sopenharmony_ci */ 8038c2ecf20Sopenharmony_ci smp_wmb(); 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci loaded_vmcs->cpu = -1; 8068c2ecf20Sopenharmony_ci loaded_vmcs->launched = 0; 8078c2ecf20Sopenharmony_ci} 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_civoid loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) 8108c2ecf20Sopenharmony_ci{ 8118c2ecf20Sopenharmony_ci int cpu = loaded_vmcs->cpu; 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci if (cpu != -1) 8148c2ecf20Sopenharmony_ci smp_call_function_single(cpu, 8158c2ecf20Sopenharmony_ci __loaded_vmcs_clear, loaded_vmcs, 1); 8168c2ecf20Sopenharmony_ci} 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_cistatic bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, 8198c2ecf20Sopenharmony_ci unsigned field) 8208c2ecf20Sopenharmony_ci{ 8218c2ecf20Sopenharmony_ci bool ret; 8228c2ecf20Sopenharmony_ci u32 mask = 1 << (seg * SEG_FIELD_NR + field); 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { 8258c2ecf20Sopenharmony_ci kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); 8268c2ecf20Sopenharmony_ci vmx->segment_cache.bitmask = 0; 8278c2ecf20Sopenharmony_ci } 8288c2ecf20Sopenharmony_ci ret = vmx->segment_cache.bitmask & mask; 8298c2ecf20Sopenharmony_ci vmx->segment_cache.bitmask |= mask; 8308c2ecf20Sopenharmony_ci return ret; 8318c2ecf20Sopenharmony_ci} 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_cistatic u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) 8348c2ecf20Sopenharmony_ci{ 8358c2ecf20Sopenharmony_ci u16 *p = &vmx->segment_cache.seg[seg].selector; 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) 8388c2ecf20Sopenharmony_ci *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); 8398c2ecf20Sopenharmony_ci return *p; 8408c2ecf20Sopenharmony_ci} 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_cistatic ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) 8438c2ecf20Sopenharmony_ci{ 8448c2ecf20Sopenharmony_ci ulong *p = &vmx->segment_cache.seg[seg].base; 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) 8478c2ecf20Sopenharmony_ci *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); 8488c2ecf20Sopenharmony_ci return *p; 8498c2ecf20Sopenharmony_ci} 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_cistatic u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) 8528c2ecf20Sopenharmony_ci{ 8538c2ecf20Sopenharmony_ci u32 *p = &vmx->segment_cache.seg[seg].limit; 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) 8568c2ecf20Sopenharmony_ci *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); 8578c2ecf20Sopenharmony_ci return *p; 8588c2ecf20Sopenharmony_ci} 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_cistatic u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) 8618c2ecf20Sopenharmony_ci{ 8628c2ecf20Sopenharmony_ci u32 *p = &vmx->segment_cache.seg[seg].ar; 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) 8658c2ecf20Sopenharmony_ci *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); 8668c2ecf20Sopenharmony_ci return *p; 8678c2ecf20Sopenharmony_ci} 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_civoid update_exception_bitmap(struct kvm_vcpu *vcpu) 8708c2ecf20Sopenharmony_ci{ 8718c2ecf20Sopenharmony_ci u32 eb; 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | 8748c2ecf20Sopenharmony_ci (1u << DB_VECTOR) | (1u << AC_VECTOR); 8758c2ecf20Sopenharmony_ci /* 8768c2ecf20Sopenharmony_ci * Guest access to VMware backdoor ports could legitimately 8778c2ecf20Sopenharmony_ci * trigger #GP because of TSS I/O permission bitmap. 8788c2ecf20Sopenharmony_ci * We intercept those #GP and allow access to them anyway 8798c2ecf20Sopenharmony_ci * as VMware does. 8808c2ecf20Sopenharmony_ci */ 8818c2ecf20Sopenharmony_ci if (enable_vmware_backdoor) 8828c2ecf20Sopenharmony_ci eb |= (1u << GP_VECTOR); 8838c2ecf20Sopenharmony_ci if ((vcpu->guest_debug & 8848c2ecf20Sopenharmony_ci (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == 8858c2ecf20Sopenharmony_ci (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) 8868c2ecf20Sopenharmony_ci eb |= 1u << BP_VECTOR; 8878c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->rmode.vm86_active) 8888c2ecf20Sopenharmony_ci eb = ~0; 8898c2ecf20Sopenharmony_ci if (!vmx_need_pf_intercept(vcpu)) 8908c2ecf20Sopenharmony_ci eb &= ~(1u << PF_VECTOR); 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci /* When we are running a nested L2 guest and L1 specified for it a 8938c2ecf20Sopenharmony_ci * certain exception bitmap, we must trap the same exceptions and pass 8948c2ecf20Sopenharmony_ci * them to L1. When running L2, we will only handle the exceptions 8958c2ecf20Sopenharmony_ci * specified above if L1 did not want them. 8968c2ecf20Sopenharmony_ci */ 8978c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) 8988c2ecf20Sopenharmony_ci eb |= get_vmcs12(vcpu)->exception_bitmap; 8998c2ecf20Sopenharmony_ci else { 9008c2ecf20Sopenharmony_ci /* 9018c2ecf20Sopenharmony_ci * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched 9028c2ecf20Sopenharmony_ci * between guest and host. In that case we only care about present 9038c2ecf20Sopenharmony_ci * faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in 9048c2ecf20Sopenharmony_ci * prepare_vmcs02_rare. 9058c2ecf20Sopenharmony_ci */ 9068c2ecf20Sopenharmony_ci bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR)); 9078c2ecf20Sopenharmony_ci int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0; 9088c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask); 9098c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask); 9108c2ecf20Sopenharmony_ci } 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci vmcs_write32(EXCEPTION_BITMAP, eb); 9138c2ecf20Sopenharmony_ci} 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci/* 9168c2ecf20Sopenharmony_ci * Check if MSR is intercepted for currently loaded MSR bitmap. 9178c2ecf20Sopenharmony_ci */ 9188c2ecf20Sopenharmony_cistatic bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) 9198c2ecf20Sopenharmony_ci{ 9208c2ecf20Sopenharmony_ci unsigned long *msr_bitmap; 9218c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) 9248c2ecf20Sopenharmony_ci return true; 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci msr_bitmap = vmx->loaded_vmcs->msr_bitmap; 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci if (msr <= 0x1fff) { 9298c2ecf20Sopenharmony_ci return !!test_bit(msr, msr_bitmap + 0x800 / f); 9308c2ecf20Sopenharmony_ci } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 9318c2ecf20Sopenharmony_ci msr &= 0x1fff; 9328c2ecf20Sopenharmony_ci return !!test_bit(msr, msr_bitmap + 0xc00 / f); 9338c2ecf20Sopenharmony_ci } 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci return true; 9368c2ecf20Sopenharmony_ci} 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ciunsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) 9398c2ecf20Sopenharmony_ci{ 9408c2ecf20Sopenharmony_ci unsigned int flags = 0; 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_ci if (vmx->loaded_vmcs->launched) 9438c2ecf20Sopenharmony_ci flags |= VMX_RUN_VMRESUME; 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci /* 9468c2ecf20Sopenharmony_ci * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free 9478c2ecf20Sopenharmony_ci * to change it directly without causing a vmexit. In that case read 9488c2ecf20Sopenharmony_ci * it after vmexit and store it in vmx->spec_ctrl. 9498c2ecf20Sopenharmony_ci */ 9508c2ecf20Sopenharmony_ci if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) 9518c2ecf20Sopenharmony_ci flags |= VMX_RUN_SAVE_SPEC_CTRL; 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci return flags; 9548c2ecf20Sopenharmony_ci} 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_cistatic void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, 9578c2ecf20Sopenharmony_ci unsigned long entry, unsigned long exit) 9588c2ecf20Sopenharmony_ci{ 9598c2ecf20Sopenharmony_ci vm_entry_controls_clearbit(vmx, entry); 9608c2ecf20Sopenharmony_ci vm_exit_controls_clearbit(vmx, exit); 9618c2ecf20Sopenharmony_ci} 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_ciint vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr) 9648c2ecf20Sopenharmony_ci{ 9658c2ecf20Sopenharmony_ci unsigned int i; 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci for (i = 0; i < m->nr; ++i) { 9688c2ecf20Sopenharmony_ci if (m->val[i].index == msr) 9698c2ecf20Sopenharmony_ci return i; 9708c2ecf20Sopenharmony_ci } 9718c2ecf20Sopenharmony_ci return -ENOENT; 9728c2ecf20Sopenharmony_ci} 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_cistatic void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 9758c2ecf20Sopenharmony_ci{ 9768c2ecf20Sopenharmony_ci int i; 9778c2ecf20Sopenharmony_ci struct msr_autoload *m = &vmx->msr_autoload; 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_ci switch (msr) { 9808c2ecf20Sopenharmony_ci case MSR_EFER: 9818c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer()) { 9828c2ecf20Sopenharmony_ci clear_atomic_switch_msr_special(vmx, 9838c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_EFER, 9848c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER); 9858c2ecf20Sopenharmony_ci return; 9868c2ecf20Sopenharmony_ci } 9878c2ecf20Sopenharmony_ci break; 9888c2ecf20Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 9898c2ecf20Sopenharmony_ci if (cpu_has_load_perf_global_ctrl()) { 9908c2ecf20Sopenharmony_ci clear_atomic_switch_msr_special(vmx, 9918c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 9928c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 9938c2ecf20Sopenharmony_ci return; 9948c2ecf20Sopenharmony_ci } 9958c2ecf20Sopenharmony_ci break; 9968c2ecf20Sopenharmony_ci } 9978c2ecf20Sopenharmony_ci i = vmx_find_loadstore_msr_slot(&m->guest, msr); 9988c2ecf20Sopenharmony_ci if (i < 0) 9998c2ecf20Sopenharmony_ci goto skip_guest; 10008c2ecf20Sopenharmony_ci --m->guest.nr; 10018c2ecf20Sopenharmony_ci m->guest.val[i] = m->guest.val[m->guest.nr]; 10028c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); 10038c2ecf20Sopenharmony_ci 10048c2ecf20Sopenharmony_ciskip_guest: 10058c2ecf20Sopenharmony_ci i = vmx_find_loadstore_msr_slot(&m->host, msr); 10068c2ecf20Sopenharmony_ci if (i < 0) 10078c2ecf20Sopenharmony_ci return; 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci --m->host.nr; 10108c2ecf20Sopenharmony_ci m->host.val[i] = m->host.val[m->host.nr]; 10118c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); 10128c2ecf20Sopenharmony_ci} 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_cistatic void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, 10158c2ecf20Sopenharmony_ci unsigned long entry, unsigned long exit, 10168c2ecf20Sopenharmony_ci unsigned long guest_val_vmcs, unsigned long host_val_vmcs, 10178c2ecf20Sopenharmony_ci u64 guest_val, u64 host_val) 10188c2ecf20Sopenharmony_ci{ 10198c2ecf20Sopenharmony_ci vmcs_write64(guest_val_vmcs, guest_val); 10208c2ecf20Sopenharmony_ci if (host_val_vmcs != HOST_IA32_EFER) 10218c2ecf20Sopenharmony_ci vmcs_write64(host_val_vmcs, host_val); 10228c2ecf20Sopenharmony_ci vm_entry_controls_setbit(vmx, entry); 10238c2ecf20Sopenharmony_ci vm_exit_controls_setbit(vmx, exit); 10248c2ecf20Sopenharmony_ci} 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_cistatic void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 10278c2ecf20Sopenharmony_ci u64 guest_val, u64 host_val, bool entry_only) 10288c2ecf20Sopenharmony_ci{ 10298c2ecf20Sopenharmony_ci int i, j = 0; 10308c2ecf20Sopenharmony_ci struct msr_autoload *m = &vmx->msr_autoload; 10318c2ecf20Sopenharmony_ci 10328c2ecf20Sopenharmony_ci switch (msr) { 10338c2ecf20Sopenharmony_ci case MSR_EFER: 10348c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer()) { 10358c2ecf20Sopenharmony_ci add_atomic_switch_msr_special(vmx, 10368c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_EFER, 10378c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER, 10388c2ecf20Sopenharmony_ci GUEST_IA32_EFER, 10398c2ecf20Sopenharmony_ci HOST_IA32_EFER, 10408c2ecf20Sopenharmony_ci guest_val, host_val); 10418c2ecf20Sopenharmony_ci return; 10428c2ecf20Sopenharmony_ci } 10438c2ecf20Sopenharmony_ci break; 10448c2ecf20Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 10458c2ecf20Sopenharmony_ci if (cpu_has_load_perf_global_ctrl()) { 10468c2ecf20Sopenharmony_ci add_atomic_switch_msr_special(vmx, 10478c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 10488c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 10498c2ecf20Sopenharmony_ci GUEST_IA32_PERF_GLOBAL_CTRL, 10508c2ecf20Sopenharmony_ci HOST_IA32_PERF_GLOBAL_CTRL, 10518c2ecf20Sopenharmony_ci guest_val, host_val); 10528c2ecf20Sopenharmony_ci return; 10538c2ecf20Sopenharmony_ci } 10548c2ecf20Sopenharmony_ci break; 10558c2ecf20Sopenharmony_ci case MSR_IA32_PEBS_ENABLE: 10568c2ecf20Sopenharmony_ci /* PEBS needs a quiescent period after being disabled (to write 10578c2ecf20Sopenharmony_ci * a record). Disabling PEBS through VMX MSR swapping doesn't 10588c2ecf20Sopenharmony_ci * provide that period, so a CPU could write host's record into 10598c2ecf20Sopenharmony_ci * guest's memory. 10608c2ecf20Sopenharmony_ci */ 10618c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 10628c2ecf20Sopenharmony_ci } 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci i = vmx_find_loadstore_msr_slot(&m->guest, msr); 10658c2ecf20Sopenharmony_ci if (!entry_only) 10668c2ecf20Sopenharmony_ci j = vmx_find_loadstore_msr_slot(&m->host, msr); 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_ci if ((i < 0 && m->guest.nr == MAX_NR_LOADSTORE_MSRS) || 10698c2ecf20Sopenharmony_ci (j < 0 && m->host.nr == MAX_NR_LOADSTORE_MSRS)) { 10708c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "Not enough msr switch entries. " 10718c2ecf20Sopenharmony_ci "Can't add msr %x\n", msr); 10728c2ecf20Sopenharmony_ci return; 10738c2ecf20Sopenharmony_ci } 10748c2ecf20Sopenharmony_ci if (i < 0) { 10758c2ecf20Sopenharmony_ci i = m->guest.nr++; 10768c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); 10778c2ecf20Sopenharmony_ci } 10788c2ecf20Sopenharmony_ci m->guest.val[i].index = msr; 10798c2ecf20Sopenharmony_ci m->guest.val[i].value = guest_val; 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci if (entry_only) 10828c2ecf20Sopenharmony_ci return; 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci if (j < 0) { 10858c2ecf20Sopenharmony_ci j = m->host.nr++; 10868c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr); 10878c2ecf20Sopenharmony_ci } 10888c2ecf20Sopenharmony_ci m->host.val[j].index = msr; 10898c2ecf20Sopenharmony_ci m->host.val[j].value = host_val; 10908c2ecf20Sopenharmony_ci} 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_cistatic bool update_transition_efer(struct vcpu_vmx *vmx) 10938c2ecf20Sopenharmony_ci{ 10948c2ecf20Sopenharmony_ci u64 guest_efer = vmx->vcpu.arch.efer; 10958c2ecf20Sopenharmony_ci u64 ignore_bits = 0; 10968c2ecf20Sopenharmony_ci int i; 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_ci /* Shadow paging assumes NX to be available. */ 10998c2ecf20Sopenharmony_ci if (!enable_ept) 11008c2ecf20Sopenharmony_ci guest_efer |= EFER_NX; 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci /* 11038c2ecf20Sopenharmony_ci * LMA and LME handled by hardware; SCE meaningless outside long mode. 11048c2ecf20Sopenharmony_ci */ 11058c2ecf20Sopenharmony_ci ignore_bits |= EFER_SCE; 11068c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 11078c2ecf20Sopenharmony_ci ignore_bits |= EFER_LMA | EFER_LME; 11088c2ecf20Sopenharmony_ci /* SCE is meaningful only in long mode on Intel */ 11098c2ecf20Sopenharmony_ci if (guest_efer & EFER_LMA) 11108c2ecf20Sopenharmony_ci ignore_bits &= ~(u64)EFER_SCE; 11118c2ecf20Sopenharmony_ci#endif 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ci /* 11148c2ecf20Sopenharmony_ci * On EPT, we can't emulate NX, so we must switch EFER atomically. 11158c2ecf20Sopenharmony_ci * On CPUs that support "load IA32_EFER", always switch EFER 11168c2ecf20Sopenharmony_ci * atomically, since it's faster than switching it manually. 11178c2ecf20Sopenharmony_ci */ 11188c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer() || 11198c2ecf20Sopenharmony_ci (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { 11208c2ecf20Sopenharmony_ci if (!(guest_efer & EFER_LMA)) 11218c2ecf20Sopenharmony_ci guest_efer &= ~EFER_LME; 11228c2ecf20Sopenharmony_ci if (guest_efer != host_efer) 11238c2ecf20Sopenharmony_ci add_atomic_switch_msr(vmx, MSR_EFER, 11248c2ecf20Sopenharmony_ci guest_efer, host_efer, false); 11258c2ecf20Sopenharmony_ci else 11268c2ecf20Sopenharmony_ci clear_atomic_switch_msr(vmx, MSR_EFER); 11278c2ecf20Sopenharmony_ci return false; 11288c2ecf20Sopenharmony_ci } 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci i = __vmx_find_uret_msr(vmx, MSR_EFER); 11318c2ecf20Sopenharmony_ci if (i < 0) 11328c2ecf20Sopenharmony_ci return false; 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci clear_atomic_switch_msr(vmx, MSR_EFER); 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci guest_efer &= ~ignore_bits; 11378c2ecf20Sopenharmony_ci guest_efer |= host_efer & ignore_bits; 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[i].data = guest_efer; 11408c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[i].mask = ~ignore_bits; 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci return true; 11438c2ecf20Sopenharmony_ci} 11448c2ecf20Sopenharmony_ci 11458c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_32 11468c2ecf20Sopenharmony_ci/* 11478c2ecf20Sopenharmony_ci * On 32-bit kernels, VM exits still load the FS and GS bases from the 11488c2ecf20Sopenharmony_ci * VMCS rather than the segment table. KVM uses this helper to figure 11498c2ecf20Sopenharmony_ci * out the current bases to poke them into the VMCS before entry. 11508c2ecf20Sopenharmony_ci */ 11518c2ecf20Sopenharmony_cistatic unsigned long segment_base(u16 selector) 11528c2ecf20Sopenharmony_ci{ 11538c2ecf20Sopenharmony_ci struct desc_struct *table; 11548c2ecf20Sopenharmony_ci unsigned long v; 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci if (!(selector & ~SEGMENT_RPL_MASK)) 11578c2ecf20Sopenharmony_ci return 0; 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_ci table = get_current_gdt_ro(); 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_ci if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { 11628c2ecf20Sopenharmony_ci u16 ldt_selector = kvm_read_ldt(); 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci if (!(ldt_selector & ~SEGMENT_RPL_MASK)) 11658c2ecf20Sopenharmony_ci return 0; 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci table = (struct desc_struct *)segment_base(ldt_selector); 11688c2ecf20Sopenharmony_ci } 11698c2ecf20Sopenharmony_ci v = get_desc_base(&table[selector >> 3]); 11708c2ecf20Sopenharmony_ci return v; 11718c2ecf20Sopenharmony_ci} 11728c2ecf20Sopenharmony_ci#endif 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_cistatic inline bool pt_can_write_msr(struct vcpu_vmx *vmx) 11758c2ecf20Sopenharmony_ci{ 11768c2ecf20Sopenharmony_ci return vmx_pt_mode_is_host_guest() && 11778c2ecf20Sopenharmony_ci !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); 11788c2ecf20Sopenharmony_ci} 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_cistatic inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base) 11818c2ecf20Sopenharmony_ci{ 11828c2ecf20Sopenharmony_ci /* The base must be 128-byte aligned and a legal physical address. */ 11838c2ecf20Sopenharmony_ci return !kvm_vcpu_is_illegal_gpa(vcpu, base) && !(base & 0x7f); 11848c2ecf20Sopenharmony_ci} 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_cistatic inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) 11878c2ecf20Sopenharmony_ci{ 11888c2ecf20Sopenharmony_ci u32 i; 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status); 11918c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); 11928c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); 11938c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); 11948c2ecf20Sopenharmony_ci for (i = 0; i < addr_range; i++) { 11958c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); 11968c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); 11978c2ecf20Sopenharmony_ci } 11988c2ecf20Sopenharmony_ci} 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_cistatic inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range) 12018c2ecf20Sopenharmony_ci{ 12028c2ecf20Sopenharmony_ci u32 i; 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status); 12058c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base); 12068c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask); 12078c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match); 12088c2ecf20Sopenharmony_ci for (i = 0; i < addr_range; i++) { 12098c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]); 12108c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]); 12118c2ecf20Sopenharmony_ci } 12128c2ecf20Sopenharmony_ci} 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_cistatic void pt_guest_enter(struct vcpu_vmx *vmx) 12158c2ecf20Sopenharmony_ci{ 12168c2ecf20Sopenharmony_ci if (vmx_pt_mode_is_system()) 12178c2ecf20Sopenharmony_ci return; 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci /* 12208c2ecf20Sopenharmony_ci * GUEST_IA32_RTIT_CTL is already set in the VMCS. 12218c2ecf20Sopenharmony_ci * Save host state before VM entry. 12228c2ecf20Sopenharmony_ci */ 12238c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); 12248c2ecf20Sopenharmony_ci if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { 12258c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_CTL, 0); 12268c2ecf20Sopenharmony_ci pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); 12278c2ecf20Sopenharmony_ci pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); 12288c2ecf20Sopenharmony_ci } 12298c2ecf20Sopenharmony_ci} 12308c2ecf20Sopenharmony_ci 12318c2ecf20Sopenharmony_cistatic void pt_guest_exit(struct vcpu_vmx *vmx) 12328c2ecf20Sopenharmony_ci{ 12338c2ecf20Sopenharmony_ci if (vmx_pt_mode_is_system()) 12348c2ecf20Sopenharmony_ci return; 12358c2ecf20Sopenharmony_ci 12368c2ecf20Sopenharmony_ci if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) { 12378c2ecf20Sopenharmony_ci pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range); 12388c2ecf20Sopenharmony_ci pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range); 12398c2ecf20Sopenharmony_ci } 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_ci /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */ 12428c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); 12438c2ecf20Sopenharmony_ci} 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_civoid vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, 12468c2ecf20Sopenharmony_ci unsigned long fs_base, unsigned long gs_base) 12478c2ecf20Sopenharmony_ci{ 12488c2ecf20Sopenharmony_ci if (unlikely(fs_sel != host->fs_sel)) { 12498c2ecf20Sopenharmony_ci if (!(fs_sel & 7)) 12508c2ecf20Sopenharmony_ci vmcs_write16(HOST_FS_SELECTOR, fs_sel); 12518c2ecf20Sopenharmony_ci else 12528c2ecf20Sopenharmony_ci vmcs_write16(HOST_FS_SELECTOR, 0); 12538c2ecf20Sopenharmony_ci host->fs_sel = fs_sel; 12548c2ecf20Sopenharmony_ci } 12558c2ecf20Sopenharmony_ci if (unlikely(gs_sel != host->gs_sel)) { 12568c2ecf20Sopenharmony_ci if (!(gs_sel & 7)) 12578c2ecf20Sopenharmony_ci vmcs_write16(HOST_GS_SELECTOR, gs_sel); 12588c2ecf20Sopenharmony_ci else 12598c2ecf20Sopenharmony_ci vmcs_write16(HOST_GS_SELECTOR, 0); 12608c2ecf20Sopenharmony_ci host->gs_sel = gs_sel; 12618c2ecf20Sopenharmony_ci } 12628c2ecf20Sopenharmony_ci if (unlikely(fs_base != host->fs_base)) { 12638c2ecf20Sopenharmony_ci vmcs_writel(HOST_FS_BASE, fs_base); 12648c2ecf20Sopenharmony_ci host->fs_base = fs_base; 12658c2ecf20Sopenharmony_ci } 12668c2ecf20Sopenharmony_ci if (unlikely(gs_base != host->gs_base)) { 12678c2ecf20Sopenharmony_ci vmcs_writel(HOST_GS_BASE, gs_base); 12688c2ecf20Sopenharmony_ci host->gs_base = gs_base; 12698c2ecf20Sopenharmony_ci } 12708c2ecf20Sopenharmony_ci} 12718c2ecf20Sopenharmony_ci 12728c2ecf20Sopenharmony_civoid vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) 12738c2ecf20Sopenharmony_ci{ 12748c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 12758c2ecf20Sopenharmony_ci struct vmcs_host_state *host_state; 12768c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 12778c2ecf20Sopenharmony_ci int cpu = raw_smp_processor_id(); 12788c2ecf20Sopenharmony_ci#endif 12798c2ecf20Sopenharmony_ci unsigned long fs_base, gs_base; 12808c2ecf20Sopenharmony_ci u16 fs_sel, gs_sel; 12818c2ecf20Sopenharmony_ci int i; 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci vmx->req_immediate_exit = false; 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_ci /* 12868c2ecf20Sopenharmony_ci * Note that guest MSRs to be saved/restored can also be changed 12878c2ecf20Sopenharmony_ci * when guest state is loaded. This happens when guest transitions 12888c2ecf20Sopenharmony_ci * to/from long-mode by setting MSR_EFER.LMA. 12898c2ecf20Sopenharmony_ci */ 12908c2ecf20Sopenharmony_ci if (!vmx->guest_uret_msrs_loaded) { 12918c2ecf20Sopenharmony_ci vmx->guest_uret_msrs_loaded = true; 12928c2ecf20Sopenharmony_ci for (i = 0; i < vmx->nr_active_uret_msrs; ++i) 12938c2ecf20Sopenharmony_ci kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot, 12948c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[i].data, 12958c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[i].mask); 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci } 12988c2ecf20Sopenharmony_ci 12998c2ecf20Sopenharmony_ci if (vmx->nested.need_vmcs12_to_shadow_sync) 13008c2ecf20Sopenharmony_ci nested_sync_vmcs12_to_shadow(vcpu); 13018c2ecf20Sopenharmony_ci 13028c2ecf20Sopenharmony_ci if (vmx->guest_state_loaded) 13038c2ecf20Sopenharmony_ci return; 13048c2ecf20Sopenharmony_ci 13058c2ecf20Sopenharmony_ci host_state = &vmx->loaded_vmcs->host_state; 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci /* 13088c2ecf20Sopenharmony_ci * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 13098c2ecf20Sopenharmony_ci * allow segment selectors with cpl > 0 or ti == 1. 13108c2ecf20Sopenharmony_ci */ 13118c2ecf20Sopenharmony_ci host_state->ldt_sel = kvm_read_ldt(); 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 13148c2ecf20Sopenharmony_ci savesegment(ds, host_state->ds_sel); 13158c2ecf20Sopenharmony_ci savesegment(es, host_state->es_sel); 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci gs_base = cpu_kernelmode_gs_base(cpu); 13188c2ecf20Sopenharmony_ci if (likely(is_64bit_mm(current->mm))) { 13198c2ecf20Sopenharmony_ci current_save_fsgs(); 13208c2ecf20Sopenharmony_ci fs_sel = current->thread.fsindex; 13218c2ecf20Sopenharmony_ci gs_sel = current->thread.gsindex; 13228c2ecf20Sopenharmony_ci fs_base = current->thread.fsbase; 13238c2ecf20Sopenharmony_ci vmx->msr_host_kernel_gs_base = current->thread.gsbase; 13248c2ecf20Sopenharmony_ci } else { 13258c2ecf20Sopenharmony_ci savesegment(fs, fs_sel); 13268c2ecf20Sopenharmony_ci savesegment(gs, gs_sel); 13278c2ecf20Sopenharmony_ci fs_base = read_msr(MSR_FS_BASE); 13288c2ecf20Sopenharmony_ci vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); 13298c2ecf20Sopenharmony_ci } 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 13328c2ecf20Sopenharmony_ci#else 13338c2ecf20Sopenharmony_ci savesegment(fs, fs_sel); 13348c2ecf20Sopenharmony_ci savesegment(gs, gs_sel); 13358c2ecf20Sopenharmony_ci fs_base = segment_base(fs_sel); 13368c2ecf20Sopenharmony_ci gs_base = segment_base(gs_sel); 13378c2ecf20Sopenharmony_ci#endif 13388c2ecf20Sopenharmony_ci 13398c2ecf20Sopenharmony_ci vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); 13408c2ecf20Sopenharmony_ci vmx->guest_state_loaded = true; 13418c2ecf20Sopenharmony_ci} 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_cistatic void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) 13448c2ecf20Sopenharmony_ci{ 13458c2ecf20Sopenharmony_ci struct vmcs_host_state *host_state; 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci if (!vmx->guest_state_loaded) 13488c2ecf20Sopenharmony_ci return; 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci host_state = &vmx->loaded_vmcs->host_state; 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci ++vmx->vcpu.stat.host_state_reload; 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 13558c2ecf20Sopenharmony_ci rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 13568c2ecf20Sopenharmony_ci#endif 13578c2ecf20Sopenharmony_ci if (host_state->ldt_sel || (host_state->gs_sel & 7)) { 13588c2ecf20Sopenharmony_ci kvm_load_ldt(host_state->ldt_sel); 13598c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 13608c2ecf20Sopenharmony_ci load_gs_index(host_state->gs_sel); 13618c2ecf20Sopenharmony_ci#else 13628c2ecf20Sopenharmony_ci loadsegment(gs, host_state->gs_sel); 13638c2ecf20Sopenharmony_ci#endif 13648c2ecf20Sopenharmony_ci } 13658c2ecf20Sopenharmony_ci if (host_state->fs_sel & 7) 13668c2ecf20Sopenharmony_ci loadsegment(fs, host_state->fs_sel); 13678c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 13688c2ecf20Sopenharmony_ci if (unlikely(host_state->ds_sel | host_state->es_sel)) { 13698c2ecf20Sopenharmony_ci loadsegment(ds, host_state->ds_sel); 13708c2ecf20Sopenharmony_ci loadsegment(es, host_state->es_sel); 13718c2ecf20Sopenharmony_ci } 13728c2ecf20Sopenharmony_ci#endif 13738c2ecf20Sopenharmony_ci invalidate_tss_limit(); 13748c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 13758c2ecf20Sopenharmony_ci wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 13768c2ecf20Sopenharmony_ci#endif 13778c2ecf20Sopenharmony_ci load_fixmap_gdt(raw_smp_processor_id()); 13788c2ecf20Sopenharmony_ci vmx->guest_state_loaded = false; 13798c2ecf20Sopenharmony_ci vmx->guest_uret_msrs_loaded = false; 13808c2ecf20Sopenharmony_ci} 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 13838c2ecf20Sopenharmony_cistatic u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) 13848c2ecf20Sopenharmony_ci{ 13858c2ecf20Sopenharmony_ci preempt_disable(); 13868c2ecf20Sopenharmony_ci if (vmx->guest_state_loaded) 13878c2ecf20Sopenharmony_ci rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 13888c2ecf20Sopenharmony_ci preempt_enable(); 13898c2ecf20Sopenharmony_ci return vmx->msr_guest_kernel_gs_base; 13908c2ecf20Sopenharmony_ci} 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_cistatic void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) 13938c2ecf20Sopenharmony_ci{ 13948c2ecf20Sopenharmony_ci preempt_disable(); 13958c2ecf20Sopenharmony_ci if (vmx->guest_state_loaded) 13968c2ecf20Sopenharmony_ci wrmsrl(MSR_KERNEL_GS_BASE, data); 13978c2ecf20Sopenharmony_ci preempt_enable(); 13988c2ecf20Sopenharmony_ci vmx->msr_guest_kernel_gs_base = data; 13998c2ecf20Sopenharmony_ci} 14008c2ecf20Sopenharmony_ci#endif 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_civoid vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, 14038c2ecf20Sopenharmony_ci struct loaded_vmcs *buddy) 14048c2ecf20Sopenharmony_ci{ 14058c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 14068c2ecf20Sopenharmony_ci bool already_loaded = vmx->loaded_vmcs->cpu == cpu; 14078c2ecf20Sopenharmony_ci struct vmcs *prev; 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci if (!already_loaded) { 14108c2ecf20Sopenharmony_ci loaded_vmcs_clear(vmx->loaded_vmcs); 14118c2ecf20Sopenharmony_ci local_irq_disable(); 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci /* 14148c2ecf20Sopenharmony_ci * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to 14158c2ecf20Sopenharmony_ci * this cpu's percpu list, otherwise it may not yet be deleted 14168c2ecf20Sopenharmony_ci * from its previous cpu's percpu list. Pairs with the 14178c2ecf20Sopenharmony_ci * smb_wmb() in __loaded_vmcs_clear(). 14188c2ecf20Sopenharmony_ci */ 14198c2ecf20Sopenharmony_ci smp_rmb(); 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, 14228c2ecf20Sopenharmony_ci &per_cpu(loaded_vmcss_on_cpu, cpu)); 14238c2ecf20Sopenharmony_ci local_irq_enable(); 14248c2ecf20Sopenharmony_ci } 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci prev = per_cpu(current_vmcs, cpu); 14278c2ecf20Sopenharmony_ci if (prev != vmx->loaded_vmcs->vmcs) { 14288c2ecf20Sopenharmony_ci per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; 14298c2ecf20Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci /* 14328c2ecf20Sopenharmony_ci * No indirect branch prediction barrier needed when switching 14338c2ecf20Sopenharmony_ci * the active VMCS within a vCPU, unless IBRS is advertised to 14348c2ecf20Sopenharmony_ci * the vCPU. To minimize the number of IBPBs executed, KVM 14358c2ecf20Sopenharmony_ci * performs IBPB on nested VM-Exit (a single nested transition 14368c2ecf20Sopenharmony_ci * may switch the active VMCS multiple times). 14378c2ecf20Sopenharmony_ci */ 14388c2ecf20Sopenharmony_ci if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) 14398c2ecf20Sopenharmony_ci indirect_branch_prediction_barrier(); 14408c2ecf20Sopenharmony_ci } 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci if (!already_loaded) { 14438c2ecf20Sopenharmony_ci void *gdt = get_current_gdt_ro(); 14448c2ecf20Sopenharmony_ci unsigned long sysenter_esp; 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci /* 14478c2ecf20Sopenharmony_ci * Flush all EPTP/VPID contexts, the new pCPU may have stale 14488c2ecf20Sopenharmony_ci * TLB entries from its previous association with the vCPU. 14498c2ecf20Sopenharmony_ci */ 14508c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci /* 14538c2ecf20Sopenharmony_ci * Linux uses per-cpu TSS and GDT, so set these when switching 14548c2ecf20Sopenharmony_ci * processors. See 22.2.4. 14558c2ecf20Sopenharmony_ci */ 14568c2ecf20Sopenharmony_ci vmcs_writel(HOST_TR_BASE, 14578c2ecf20Sopenharmony_ci (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); 14588c2ecf20Sopenharmony_ci vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ 14598c2ecf20Sopenharmony_ci 14608c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 14618c2ecf20Sopenharmony_ci vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci vmx->loaded_vmcs->cpu = cpu; 14648c2ecf20Sopenharmony_ci } 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci /* Setup TSC multiplier */ 14678c2ecf20Sopenharmony_ci if (kvm_has_tsc_control && 14688c2ecf20Sopenharmony_ci vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) 14698c2ecf20Sopenharmony_ci decache_tsc_multiplier(vmx); 14708c2ecf20Sopenharmony_ci} 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci/* 14738c2ecf20Sopenharmony_ci * Switches to specified vcpu, until a matching vcpu_put(), but assumes 14748c2ecf20Sopenharmony_ci * vcpu mutex is already taken. 14758c2ecf20Sopenharmony_ci */ 14768c2ecf20Sopenharmony_cistatic void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 14778c2ecf20Sopenharmony_ci{ 14788c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 14798c2ecf20Sopenharmony_ci 14808c2ecf20Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, NULL); 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci vmx_vcpu_pi_load(vcpu, cpu); 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci vmx->host_debugctlmsr = get_debugctlmsr(); 14858c2ecf20Sopenharmony_ci} 14868c2ecf20Sopenharmony_ci 14878c2ecf20Sopenharmony_cistatic void vmx_vcpu_put(struct kvm_vcpu *vcpu) 14888c2ecf20Sopenharmony_ci{ 14898c2ecf20Sopenharmony_ci vmx_vcpu_pi_put(vcpu); 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci vmx_prepare_switch_to_host(to_vmx(vcpu)); 14928c2ecf20Sopenharmony_ci} 14938c2ecf20Sopenharmony_ci 14948c2ecf20Sopenharmony_cistatic bool emulation_required(struct kvm_vcpu *vcpu) 14958c2ecf20Sopenharmony_ci{ 14968c2ecf20Sopenharmony_ci return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu); 14978c2ecf20Sopenharmony_ci} 14988c2ecf20Sopenharmony_ci 14998c2ecf20Sopenharmony_ciunsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 15008c2ecf20Sopenharmony_ci{ 15018c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 15028c2ecf20Sopenharmony_ci unsigned long rflags, save_rflags; 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { 15058c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); 15068c2ecf20Sopenharmony_ci rflags = vmcs_readl(GUEST_RFLAGS); 15078c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active) { 15088c2ecf20Sopenharmony_ci rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 15098c2ecf20Sopenharmony_ci save_rflags = vmx->rmode.save_rflags; 15108c2ecf20Sopenharmony_ci rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 15118c2ecf20Sopenharmony_ci } 15128c2ecf20Sopenharmony_ci vmx->rflags = rflags; 15138c2ecf20Sopenharmony_ci } 15148c2ecf20Sopenharmony_ci return vmx->rflags; 15158c2ecf20Sopenharmony_ci} 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_civoid vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 15188c2ecf20Sopenharmony_ci{ 15198c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 15208c2ecf20Sopenharmony_ci unsigned long old_rflags; 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci /* 15238c2ecf20Sopenharmony_ci * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU 15248c2ecf20Sopenharmony_ci * is an unrestricted guest in order to mark L2 as needing emulation 15258c2ecf20Sopenharmony_ci * if L1 runs L2 as a restricted guest. 15268c2ecf20Sopenharmony_ci */ 15278c2ecf20Sopenharmony_ci if (is_unrestricted_guest(vcpu)) { 15288c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); 15298c2ecf20Sopenharmony_ci vmx->rflags = rflags; 15308c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, rflags); 15318c2ecf20Sopenharmony_ci return; 15328c2ecf20Sopenharmony_ci } 15338c2ecf20Sopenharmony_ci 15348c2ecf20Sopenharmony_ci old_rflags = vmx_get_rflags(vcpu); 15358c2ecf20Sopenharmony_ci vmx->rflags = rflags; 15368c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active) { 15378c2ecf20Sopenharmony_ci vmx->rmode.save_rflags = rflags; 15388c2ecf20Sopenharmony_ci rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 15398c2ecf20Sopenharmony_ci } 15408c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, rflags); 15418c2ecf20Sopenharmony_ci 15428c2ecf20Sopenharmony_ci if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) 15438c2ecf20Sopenharmony_ci vmx->emulation_required = emulation_required(vcpu); 15448c2ecf20Sopenharmony_ci} 15458c2ecf20Sopenharmony_ci 15468c2ecf20Sopenharmony_ciu32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) 15478c2ecf20Sopenharmony_ci{ 15488c2ecf20Sopenharmony_ci u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 15498c2ecf20Sopenharmony_ci int ret = 0; 15508c2ecf20Sopenharmony_ci 15518c2ecf20Sopenharmony_ci if (interruptibility & GUEST_INTR_STATE_STI) 15528c2ecf20Sopenharmony_ci ret |= KVM_X86_SHADOW_INT_STI; 15538c2ecf20Sopenharmony_ci if (interruptibility & GUEST_INTR_STATE_MOV_SS) 15548c2ecf20Sopenharmony_ci ret |= KVM_X86_SHADOW_INT_MOV_SS; 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci return ret; 15578c2ecf20Sopenharmony_ci} 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_civoid vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 15608c2ecf20Sopenharmony_ci{ 15618c2ecf20Sopenharmony_ci u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 15628c2ecf20Sopenharmony_ci u32 interruptibility = interruptibility_old; 15638c2ecf20Sopenharmony_ci 15648c2ecf20Sopenharmony_ci interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci if (mask & KVM_X86_SHADOW_INT_MOV_SS) 15678c2ecf20Sopenharmony_ci interruptibility |= GUEST_INTR_STATE_MOV_SS; 15688c2ecf20Sopenharmony_ci else if (mask & KVM_X86_SHADOW_INT_STI) 15698c2ecf20Sopenharmony_ci interruptibility |= GUEST_INTR_STATE_STI; 15708c2ecf20Sopenharmony_ci 15718c2ecf20Sopenharmony_ci if ((interruptibility != interruptibility_old)) 15728c2ecf20Sopenharmony_ci vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); 15738c2ecf20Sopenharmony_ci} 15748c2ecf20Sopenharmony_ci 15758c2ecf20Sopenharmony_cistatic int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) 15768c2ecf20Sopenharmony_ci{ 15778c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 15788c2ecf20Sopenharmony_ci unsigned long value; 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci /* 15818c2ecf20Sopenharmony_ci * Any MSR write that attempts to change bits marked reserved will 15828c2ecf20Sopenharmony_ci * case a #GP fault. 15838c2ecf20Sopenharmony_ci */ 15848c2ecf20Sopenharmony_ci if (data & vmx->pt_desc.ctl_bitmask) 15858c2ecf20Sopenharmony_ci return 1; 15868c2ecf20Sopenharmony_ci 15878c2ecf20Sopenharmony_ci /* 15888c2ecf20Sopenharmony_ci * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will 15898c2ecf20Sopenharmony_ci * result in a #GP unless the same write also clears TraceEn. 15908c2ecf20Sopenharmony_ci */ 15918c2ecf20Sopenharmony_ci if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && 15928c2ecf20Sopenharmony_ci ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) 15938c2ecf20Sopenharmony_ci return 1; 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci /* 15968c2ecf20Sopenharmony_ci * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit 15978c2ecf20Sopenharmony_ci * and FabricEn would cause #GP, if 15988c2ecf20Sopenharmony_ci * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0 15998c2ecf20Sopenharmony_ci */ 16008c2ecf20Sopenharmony_ci if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) && 16018c2ecf20Sopenharmony_ci !(data & RTIT_CTL_FABRIC_EN) && 16028c2ecf20Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 16038c2ecf20Sopenharmony_ci PT_CAP_single_range_output)) 16048c2ecf20Sopenharmony_ci return 1; 16058c2ecf20Sopenharmony_ci 16068c2ecf20Sopenharmony_ci /* 16078c2ecf20Sopenharmony_ci * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that 16088c2ecf20Sopenharmony_ci * utilize encodings marked reserved will casue a #GP fault. 16098c2ecf20Sopenharmony_ci */ 16108c2ecf20Sopenharmony_ci value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods); 16118c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) && 16128c2ecf20Sopenharmony_ci !test_bit((data & RTIT_CTL_MTC_RANGE) >> 16138c2ecf20Sopenharmony_ci RTIT_CTL_MTC_RANGE_OFFSET, &value)) 16148c2ecf20Sopenharmony_ci return 1; 16158c2ecf20Sopenharmony_ci value = intel_pt_validate_cap(vmx->pt_desc.caps, 16168c2ecf20Sopenharmony_ci PT_CAP_cycle_thresholds); 16178c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && 16188c2ecf20Sopenharmony_ci !test_bit((data & RTIT_CTL_CYC_THRESH) >> 16198c2ecf20Sopenharmony_ci RTIT_CTL_CYC_THRESH_OFFSET, &value)) 16208c2ecf20Sopenharmony_ci return 1; 16218c2ecf20Sopenharmony_ci value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods); 16228c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) && 16238c2ecf20Sopenharmony_ci !test_bit((data & RTIT_CTL_PSB_FREQ) >> 16248c2ecf20Sopenharmony_ci RTIT_CTL_PSB_FREQ_OFFSET, &value)) 16258c2ecf20Sopenharmony_ci return 1; 16268c2ecf20Sopenharmony_ci 16278c2ecf20Sopenharmony_ci /* 16288c2ecf20Sopenharmony_ci * If ADDRx_CFG is reserved or the encodings is >2 will 16298c2ecf20Sopenharmony_ci * cause a #GP fault. 16308c2ecf20Sopenharmony_ci */ 16318c2ecf20Sopenharmony_ci value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET; 16328c2ecf20Sopenharmony_ci if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2)) 16338c2ecf20Sopenharmony_ci return 1; 16348c2ecf20Sopenharmony_ci value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET; 16358c2ecf20Sopenharmony_ci if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2)) 16368c2ecf20Sopenharmony_ci return 1; 16378c2ecf20Sopenharmony_ci value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET; 16388c2ecf20Sopenharmony_ci if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2)) 16398c2ecf20Sopenharmony_ci return 1; 16408c2ecf20Sopenharmony_ci value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET; 16418c2ecf20Sopenharmony_ci if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2)) 16428c2ecf20Sopenharmony_ci return 1; 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci return 0; 16458c2ecf20Sopenharmony_ci} 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_cistatic bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) 16488c2ecf20Sopenharmony_ci{ 16498c2ecf20Sopenharmony_ci return true; 16508c2ecf20Sopenharmony_ci} 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_cistatic int skip_emulated_instruction(struct kvm_vcpu *vcpu) 16538c2ecf20Sopenharmony_ci{ 16548c2ecf20Sopenharmony_ci unsigned long rip, orig_rip; 16558c2ecf20Sopenharmony_ci 16568c2ecf20Sopenharmony_ci /* 16578c2ecf20Sopenharmony_ci * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on 16588c2ecf20Sopenharmony_ci * undefined behavior: Intel's SDM doesn't mandate the VMCS field be 16598c2ecf20Sopenharmony_ci * set when EPT misconfig occurs. In practice, real hardware updates 16608c2ecf20Sopenharmony_ci * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors 16618c2ecf20Sopenharmony_ci * (namely Hyper-V) don't set it due to it being undefined behavior, 16628c2ecf20Sopenharmony_ci * i.e. we end up advancing IP with some random value. 16638c2ecf20Sopenharmony_ci */ 16648c2ecf20Sopenharmony_ci if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || 16658c2ecf20Sopenharmony_ci to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) { 16668c2ecf20Sopenharmony_ci orig_rip = kvm_rip_read(vcpu); 16678c2ecf20Sopenharmony_ci rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 16688c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 16698c2ecf20Sopenharmony_ci /* 16708c2ecf20Sopenharmony_ci * We need to mask out the high 32 bits of RIP if not in 64-bit 16718c2ecf20Sopenharmony_ci * mode, but just finding out that we are in 64-bit mode is 16728c2ecf20Sopenharmony_ci * quite expensive. Only do it if there was a carry. 16738c2ecf20Sopenharmony_ci */ 16748c2ecf20Sopenharmony_ci if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu)) 16758c2ecf20Sopenharmony_ci rip = (u32)rip; 16768c2ecf20Sopenharmony_ci#endif 16778c2ecf20Sopenharmony_ci kvm_rip_write(vcpu, rip); 16788c2ecf20Sopenharmony_ci } else { 16798c2ecf20Sopenharmony_ci if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) 16808c2ecf20Sopenharmony_ci return 0; 16818c2ecf20Sopenharmony_ci } 16828c2ecf20Sopenharmony_ci 16838c2ecf20Sopenharmony_ci /* skipping an emulated instruction also counts */ 16848c2ecf20Sopenharmony_ci vmx_set_interrupt_shadow(vcpu, 0); 16858c2ecf20Sopenharmony_ci 16868c2ecf20Sopenharmony_ci return 1; 16878c2ecf20Sopenharmony_ci} 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci/* 16908c2ecf20Sopenharmony_ci * Recognizes a pending MTF VM-exit and records the nested state for later 16918c2ecf20Sopenharmony_ci * delivery. 16928c2ecf20Sopenharmony_ci */ 16938c2ecf20Sopenharmony_cistatic void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) 16948c2ecf20Sopenharmony_ci{ 16958c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 16968c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu)) 16998c2ecf20Sopenharmony_ci return; 17008c2ecf20Sopenharmony_ci 17018c2ecf20Sopenharmony_ci /* 17028c2ecf20Sopenharmony_ci * Per the SDM, MTF takes priority over debug-trap exceptions besides 17038c2ecf20Sopenharmony_ci * T-bit traps. As instruction emulation is completed (i.e. at the 17048c2ecf20Sopenharmony_ci * instruction boundary), any #DB exception pending delivery must be a 17058c2ecf20Sopenharmony_ci * debug-trap. Record the pending MTF state to be delivered in 17068c2ecf20Sopenharmony_ci * vmx_check_nested_events(). 17078c2ecf20Sopenharmony_ci */ 17088c2ecf20Sopenharmony_ci if (nested_cpu_has_mtf(vmcs12) && 17098c2ecf20Sopenharmony_ci (!vcpu->arch.exception.pending || 17108c2ecf20Sopenharmony_ci vcpu->arch.exception.nr == DB_VECTOR)) 17118c2ecf20Sopenharmony_ci vmx->nested.mtf_pending = true; 17128c2ecf20Sopenharmony_ci else 17138c2ecf20Sopenharmony_ci vmx->nested.mtf_pending = false; 17148c2ecf20Sopenharmony_ci} 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_cistatic int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) 17178c2ecf20Sopenharmony_ci{ 17188c2ecf20Sopenharmony_ci vmx_update_emulated_instruction(vcpu); 17198c2ecf20Sopenharmony_ci return skip_emulated_instruction(vcpu); 17208c2ecf20Sopenharmony_ci} 17218c2ecf20Sopenharmony_ci 17228c2ecf20Sopenharmony_cistatic void vmx_clear_hlt(struct kvm_vcpu *vcpu) 17238c2ecf20Sopenharmony_ci{ 17248c2ecf20Sopenharmony_ci /* 17258c2ecf20Sopenharmony_ci * Ensure that we clear the HLT state in the VMCS. We don't need to 17268c2ecf20Sopenharmony_ci * explicitly skip the instruction because if the HLT state is set, 17278c2ecf20Sopenharmony_ci * then the instruction is already executing and RIP has already been 17288c2ecf20Sopenharmony_ci * advanced. 17298c2ecf20Sopenharmony_ci */ 17308c2ecf20Sopenharmony_ci if (kvm_hlt_in_guest(vcpu->kvm) && 17318c2ecf20Sopenharmony_ci vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) 17328c2ecf20Sopenharmony_ci vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); 17338c2ecf20Sopenharmony_ci} 17348c2ecf20Sopenharmony_ci 17358c2ecf20Sopenharmony_cistatic void vmx_queue_exception(struct kvm_vcpu *vcpu) 17368c2ecf20Sopenharmony_ci{ 17378c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 17388c2ecf20Sopenharmony_ci unsigned nr = vcpu->arch.exception.nr; 17398c2ecf20Sopenharmony_ci bool has_error_code = vcpu->arch.exception.has_error_code; 17408c2ecf20Sopenharmony_ci u32 error_code = vcpu->arch.exception.error_code; 17418c2ecf20Sopenharmony_ci u32 intr_info = nr | INTR_INFO_VALID_MASK; 17428c2ecf20Sopenharmony_ci 17438c2ecf20Sopenharmony_ci kvm_deliver_exception_payload(vcpu); 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci if (has_error_code) { 17468c2ecf20Sopenharmony_ci /* 17478c2ecf20Sopenharmony_ci * Despite the error code being architecturally defined as 32 17488c2ecf20Sopenharmony_ci * bits, and the VMCS field being 32 bits, Intel CPUs and thus 17498c2ecf20Sopenharmony_ci * VMX don't actually supporting setting bits 31:16. Hardware 17508c2ecf20Sopenharmony_ci * will (should) never provide a bogus error code, but AMD CPUs 17518c2ecf20Sopenharmony_ci * do generate error codes with bits 31:16 set, and so KVM's 17528c2ecf20Sopenharmony_ci * ABI lets userspace shove in arbitrary 32-bit values. Drop 17538c2ecf20Sopenharmony_ci * the upper bits to avoid VM-Fail, losing information that 17548c2ecf20Sopenharmony_ci * does't really exist is preferable to killing the VM. 17558c2ecf20Sopenharmony_ci */ 17568c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)error_code); 17578c2ecf20Sopenharmony_ci intr_info |= INTR_INFO_DELIVER_CODE_MASK; 17588c2ecf20Sopenharmony_ci } 17598c2ecf20Sopenharmony_ci 17608c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active) { 17618c2ecf20Sopenharmony_ci int inc_eip = 0; 17628c2ecf20Sopenharmony_ci if (kvm_exception_is_soft(nr)) 17638c2ecf20Sopenharmony_ci inc_eip = vcpu->arch.event_exit_inst_len; 17648c2ecf20Sopenharmony_ci kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); 17658c2ecf20Sopenharmony_ci return; 17668c2ecf20Sopenharmony_ci } 17678c2ecf20Sopenharmony_ci 17688c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmx->emulation_required); 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci if (kvm_exception_is_soft(nr)) { 17718c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 17728c2ecf20Sopenharmony_ci vmx->vcpu.arch.event_exit_inst_len); 17738c2ecf20Sopenharmony_ci intr_info |= INTR_TYPE_SOFT_EXCEPTION; 17748c2ecf20Sopenharmony_ci } else 17758c2ecf20Sopenharmony_ci intr_info |= INTR_TYPE_HARD_EXCEPTION; 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 17788c2ecf20Sopenharmony_ci 17798c2ecf20Sopenharmony_ci vmx_clear_hlt(vcpu); 17808c2ecf20Sopenharmony_ci} 17818c2ecf20Sopenharmony_ci 17828c2ecf20Sopenharmony_cistatic void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) 17838c2ecf20Sopenharmony_ci{ 17848c2ecf20Sopenharmony_ci struct vmx_uret_msr tmp; 17858c2ecf20Sopenharmony_ci int from, to; 17868c2ecf20Sopenharmony_ci 17878c2ecf20Sopenharmony_ci from = __vmx_find_uret_msr(vmx, msr); 17888c2ecf20Sopenharmony_ci if (from < 0) 17898c2ecf20Sopenharmony_ci return; 17908c2ecf20Sopenharmony_ci to = vmx->nr_active_uret_msrs++; 17918c2ecf20Sopenharmony_ci 17928c2ecf20Sopenharmony_ci tmp = vmx->guest_uret_msrs[to]; 17938c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from]; 17948c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[from] = tmp; 17958c2ecf20Sopenharmony_ci} 17968c2ecf20Sopenharmony_ci 17978c2ecf20Sopenharmony_ci/* 17988c2ecf20Sopenharmony_ci * Set up the vmcs to automatically save and restore system 17998c2ecf20Sopenharmony_ci * msrs. Don't touch the 64-bit msrs if the guest is in legacy 18008c2ecf20Sopenharmony_ci * mode, as fiddling with msrs is very expensive. 18018c2ecf20Sopenharmony_ci */ 18028c2ecf20Sopenharmony_cistatic void setup_msrs(struct vcpu_vmx *vmx) 18038c2ecf20Sopenharmony_ci{ 18048c2ecf20Sopenharmony_ci vmx->guest_uret_msrs_loaded = false; 18058c2ecf20Sopenharmony_ci vmx->nr_active_uret_msrs = 0; 18068c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 18078c2ecf20Sopenharmony_ci /* 18088c2ecf20Sopenharmony_ci * The SYSCALL MSRs are only needed on long mode guests, and only 18098c2ecf20Sopenharmony_ci * when EFER.SCE is set. 18108c2ecf20Sopenharmony_ci */ 18118c2ecf20Sopenharmony_ci if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { 18128c2ecf20Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_STAR); 18138c2ecf20Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_LSTAR); 18148c2ecf20Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK); 18158c2ecf20Sopenharmony_ci } 18168c2ecf20Sopenharmony_ci#endif 18178c2ecf20Sopenharmony_ci if (update_transition_efer(vmx)) 18188c2ecf20Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_EFER); 18198c2ecf20Sopenharmony_ci 18208c2ecf20Sopenharmony_ci if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) 18218c2ecf20Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_TSC_AUX); 18228c2ecf20Sopenharmony_ci 18238c2ecf20Sopenharmony_ci vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL); 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 18268c2ecf20Sopenharmony_ci vmx_update_msr_bitmap(&vmx->vcpu); 18278c2ecf20Sopenharmony_ci} 18288c2ecf20Sopenharmony_ci 18298c2ecf20Sopenharmony_cistatic u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 18308c2ecf20Sopenharmony_ci{ 18318c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 18328c2ecf20Sopenharmony_ci u64 g_tsc_offset = 0; 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_ci /* 18358c2ecf20Sopenharmony_ci * We're here if L1 chose not to trap WRMSR to TSC. According 18368c2ecf20Sopenharmony_ci * to the spec, this should set L1's TSC; The offset that L1 18378c2ecf20Sopenharmony_ci * set for L2 remains unchanged, and still needs to be added 18388c2ecf20Sopenharmony_ci * to the newly set TSC to get L2's TSC. 18398c2ecf20Sopenharmony_ci */ 18408c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && 18418c2ecf20Sopenharmony_ci (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) 18428c2ecf20Sopenharmony_ci g_tsc_offset = vmcs12->tsc_offset; 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci trace_kvm_write_tsc_offset(vcpu->vcpu_id, 18458c2ecf20Sopenharmony_ci vcpu->arch.tsc_offset - g_tsc_offset, 18468c2ecf20Sopenharmony_ci offset); 18478c2ecf20Sopenharmony_ci vmcs_write64(TSC_OFFSET, offset + g_tsc_offset); 18488c2ecf20Sopenharmony_ci return offset + g_tsc_offset; 18498c2ecf20Sopenharmony_ci} 18508c2ecf20Sopenharmony_ci 18518c2ecf20Sopenharmony_ci/* 18528c2ecf20Sopenharmony_ci * nested_vmx_allowed() checks whether a guest should be allowed to use VMX 18538c2ecf20Sopenharmony_ci * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for 18548c2ecf20Sopenharmony_ci * all guests if the "nested" module option is off, and can also be disabled 18558c2ecf20Sopenharmony_ci * for a single guest by disabling its VMX cpuid bit. 18568c2ecf20Sopenharmony_ci */ 18578c2ecf20Sopenharmony_cibool nested_vmx_allowed(struct kvm_vcpu *vcpu) 18588c2ecf20Sopenharmony_ci{ 18598c2ecf20Sopenharmony_ci return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); 18608c2ecf20Sopenharmony_ci} 18618c2ecf20Sopenharmony_ci 18628c2ecf20Sopenharmony_cistatic inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, 18638c2ecf20Sopenharmony_ci uint64_t val) 18648c2ecf20Sopenharmony_ci{ 18658c2ecf20Sopenharmony_ci uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; 18668c2ecf20Sopenharmony_ci 18678c2ecf20Sopenharmony_ci return !(val & ~valid_bits); 18688c2ecf20Sopenharmony_ci} 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_cistatic int vmx_get_msr_feature(struct kvm_msr_entry *msr) 18718c2ecf20Sopenharmony_ci{ 18728c2ecf20Sopenharmony_ci switch (msr->index) { 18738c2ecf20Sopenharmony_ci case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 18748c2ecf20Sopenharmony_ci if (!nested) 18758c2ecf20Sopenharmony_ci return 1; 18768c2ecf20Sopenharmony_ci return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); 18778c2ecf20Sopenharmony_ci case MSR_IA32_PERF_CAPABILITIES: 18788c2ecf20Sopenharmony_ci msr->data = vmx_get_perf_capabilities(); 18798c2ecf20Sopenharmony_ci return 0; 18808c2ecf20Sopenharmony_ci default: 18818c2ecf20Sopenharmony_ci return KVM_MSR_RET_INVALID; 18828c2ecf20Sopenharmony_ci } 18838c2ecf20Sopenharmony_ci} 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci/* 18868c2ecf20Sopenharmony_ci * Reads an msr value (of 'msr_index') into 'pdata'. 18878c2ecf20Sopenharmony_ci * Returns 0 on success, non-0 otherwise. 18888c2ecf20Sopenharmony_ci * Assumes vcpu_load() was already called. 18898c2ecf20Sopenharmony_ci */ 18908c2ecf20Sopenharmony_cistatic int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 18918c2ecf20Sopenharmony_ci{ 18928c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 18938c2ecf20Sopenharmony_ci struct vmx_uret_msr *msr; 18948c2ecf20Sopenharmony_ci u32 index; 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ci switch (msr_info->index) { 18978c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 18988c2ecf20Sopenharmony_ci case MSR_FS_BASE: 18998c2ecf20Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_FS_BASE); 19008c2ecf20Sopenharmony_ci break; 19018c2ecf20Sopenharmony_ci case MSR_GS_BASE: 19028c2ecf20Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_GS_BASE); 19038c2ecf20Sopenharmony_ci break; 19048c2ecf20Sopenharmony_ci case MSR_KERNEL_GS_BASE: 19058c2ecf20Sopenharmony_ci msr_info->data = vmx_read_guest_kernel_gs_base(vmx); 19068c2ecf20Sopenharmony_ci break; 19078c2ecf20Sopenharmony_ci#endif 19088c2ecf20Sopenharmony_ci case MSR_EFER: 19098c2ecf20Sopenharmony_ci return kvm_get_msr_common(vcpu, msr_info); 19108c2ecf20Sopenharmony_ci case MSR_IA32_TSX_CTRL: 19118c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 19128c2ecf20Sopenharmony_ci !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) 19138c2ecf20Sopenharmony_ci return 1; 19148c2ecf20Sopenharmony_ci goto find_uret_msr; 19158c2ecf20Sopenharmony_ci case MSR_IA32_UMWAIT_CONTROL: 19168c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) 19178c2ecf20Sopenharmony_ci return 1; 19188c2ecf20Sopenharmony_ci 19198c2ecf20Sopenharmony_ci msr_info->data = vmx->msr_ia32_umwait_control; 19208c2ecf20Sopenharmony_ci break; 19218c2ecf20Sopenharmony_ci case MSR_IA32_SPEC_CTRL: 19228c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 19238c2ecf20Sopenharmony_ci !guest_has_spec_ctrl_msr(vcpu)) 19248c2ecf20Sopenharmony_ci return 1; 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci msr_info->data = to_vmx(vcpu)->spec_ctrl; 19278c2ecf20Sopenharmony_ci break; 19288c2ecf20Sopenharmony_ci case MSR_IA32_SYSENTER_CS: 19298c2ecf20Sopenharmony_ci msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); 19308c2ecf20Sopenharmony_ci break; 19318c2ecf20Sopenharmony_ci case MSR_IA32_SYSENTER_EIP: 19328c2ecf20Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); 19338c2ecf20Sopenharmony_ci break; 19348c2ecf20Sopenharmony_ci case MSR_IA32_SYSENTER_ESP: 19358c2ecf20Sopenharmony_ci msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); 19368c2ecf20Sopenharmony_ci break; 19378c2ecf20Sopenharmony_ci case MSR_IA32_BNDCFGS: 19388c2ecf20Sopenharmony_ci if (!kvm_mpx_supported() || 19398c2ecf20Sopenharmony_ci (!msr_info->host_initiated && 19408c2ecf20Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) 19418c2ecf20Sopenharmony_ci return 1; 19428c2ecf20Sopenharmony_ci msr_info->data = vmcs_read64(GUEST_BNDCFGS); 19438c2ecf20Sopenharmony_ci break; 19448c2ecf20Sopenharmony_ci case MSR_IA32_MCG_EXT_CTL: 19458c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 19468c2ecf20Sopenharmony_ci !(vmx->msr_ia32_feature_control & 19478c2ecf20Sopenharmony_ci FEAT_CTL_LMCE_ENABLED)) 19488c2ecf20Sopenharmony_ci return 1; 19498c2ecf20Sopenharmony_ci msr_info->data = vcpu->arch.mcg_ext_ctl; 19508c2ecf20Sopenharmony_ci break; 19518c2ecf20Sopenharmony_ci case MSR_IA32_FEAT_CTL: 19528c2ecf20Sopenharmony_ci msr_info->data = vmx->msr_ia32_feature_control; 19538c2ecf20Sopenharmony_ci break; 19548c2ecf20Sopenharmony_ci case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 19558c2ecf20Sopenharmony_ci if (!nested_vmx_allowed(vcpu)) 19568c2ecf20Sopenharmony_ci return 1; 19578c2ecf20Sopenharmony_ci if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, 19588c2ecf20Sopenharmony_ci &msr_info->data)) 19598c2ecf20Sopenharmony_ci return 1; 19608c2ecf20Sopenharmony_ci /* 19618c2ecf20Sopenharmony_ci * Enlightened VMCS v1 doesn't have certain VMCS fields but 19628c2ecf20Sopenharmony_ci * instead of just ignoring the features, different Hyper-V 19638c2ecf20Sopenharmony_ci * versions are either trying to use them and fail or do some 19648c2ecf20Sopenharmony_ci * sanity checking and refuse to boot. Filter all unsupported 19658c2ecf20Sopenharmony_ci * features out. 19668c2ecf20Sopenharmony_ci */ 19678c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 19688c2ecf20Sopenharmony_ci vmx->nested.enlightened_vmcs_enabled) 19698c2ecf20Sopenharmony_ci nested_evmcs_filter_control_msr(msr_info->index, 19708c2ecf20Sopenharmony_ci &msr_info->data); 19718c2ecf20Sopenharmony_ci break; 19728c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_CTL: 19738c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest()) 19748c2ecf20Sopenharmony_ci return 1; 19758c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.ctl; 19768c2ecf20Sopenharmony_ci break; 19778c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_STATUS: 19788c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest()) 19798c2ecf20Sopenharmony_ci return 1; 19808c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.status; 19818c2ecf20Sopenharmony_ci break; 19828c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_CR3_MATCH: 19838c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 19848c2ecf20Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 19858c2ecf20Sopenharmony_ci PT_CAP_cr3_filtering)) 19868c2ecf20Sopenharmony_ci return 1; 19878c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.cr3_match; 19888c2ecf20Sopenharmony_ci break; 19898c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_BASE: 19908c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 19918c2ecf20Sopenharmony_ci (!intel_pt_validate_cap(vmx->pt_desc.caps, 19928c2ecf20Sopenharmony_ci PT_CAP_topa_output) && 19938c2ecf20Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 19948c2ecf20Sopenharmony_ci PT_CAP_single_range_output))) 19958c2ecf20Sopenharmony_ci return 1; 19968c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.output_base; 19978c2ecf20Sopenharmony_ci break; 19988c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_MASK: 19998c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 20008c2ecf20Sopenharmony_ci (!intel_pt_validate_cap(vmx->pt_desc.caps, 20018c2ecf20Sopenharmony_ci PT_CAP_topa_output) && 20028c2ecf20Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 20038c2ecf20Sopenharmony_ci PT_CAP_single_range_output))) 20048c2ecf20Sopenharmony_ci return 1; 20058c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.output_mask; 20068c2ecf20Sopenharmony_ci break; 20078c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 20088c2ecf20Sopenharmony_ci index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; 20098c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 20108c2ecf20Sopenharmony_ci (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, 20118c2ecf20Sopenharmony_ci PT_CAP_num_address_ranges))) 20128c2ecf20Sopenharmony_ci return 1; 20138c2ecf20Sopenharmony_ci if (index % 2) 20148c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.addr_b[index / 2]; 20158c2ecf20Sopenharmony_ci else 20168c2ecf20Sopenharmony_ci msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; 20178c2ecf20Sopenharmony_ci break; 20188c2ecf20Sopenharmony_ci case MSR_TSC_AUX: 20198c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 20208c2ecf20Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) 20218c2ecf20Sopenharmony_ci return 1; 20228c2ecf20Sopenharmony_ci goto find_uret_msr; 20238c2ecf20Sopenharmony_ci default: 20248c2ecf20Sopenharmony_ci find_uret_msr: 20258c2ecf20Sopenharmony_ci msr = vmx_find_uret_msr(vmx, msr_info->index); 20268c2ecf20Sopenharmony_ci if (msr) { 20278c2ecf20Sopenharmony_ci msr_info->data = msr->data; 20288c2ecf20Sopenharmony_ci break; 20298c2ecf20Sopenharmony_ci } 20308c2ecf20Sopenharmony_ci return kvm_get_msr_common(vcpu, msr_info); 20318c2ecf20Sopenharmony_ci } 20328c2ecf20Sopenharmony_ci 20338c2ecf20Sopenharmony_ci return 0; 20348c2ecf20Sopenharmony_ci} 20358c2ecf20Sopenharmony_ci 20368c2ecf20Sopenharmony_cistatic u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, 20378c2ecf20Sopenharmony_ci u64 data) 20388c2ecf20Sopenharmony_ci{ 20398c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 20408c2ecf20Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) 20418c2ecf20Sopenharmony_ci return (u32)data; 20428c2ecf20Sopenharmony_ci#endif 20438c2ecf20Sopenharmony_ci return (unsigned long)data; 20448c2ecf20Sopenharmony_ci} 20458c2ecf20Sopenharmony_ci 20468c2ecf20Sopenharmony_ci/* 20478c2ecf20Sopenharmony_ci * Writes msr value into the appropriate "register". 20488c2ecf20Sopenharmony_ci * Returns 0 on success, non-0 otherwise. 20498c2ecf20Sopenharmony_ci * Assumes vcpu_load() was already called. 20508c2ecf20Sopenharmony_ci */ 20518c2ecf20Sopenharmony_cistatic int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 20528c2ecf20Sopenharmony_ci{ 20538c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 20548c2ecf20Sopenharmony_ci struct vmx_uret_msr *msr; 20558c2ecf20Sopenharmony_ci int ret = 0; 20568c2ecf20Sopenharmony_ci u32 msr_index = msr_info->index; 20578c2ecf20Sopenharmony_ci u64 data = msr_info->data; 20588c2ecf20Sopenharmony_ci u32 index; 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_ci switch (msr_index) { 20618c2ecf20Sopenharmony_ci case MSR_EFER: 20628c2ecf20Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 20638c2ecf20Sopenharmony_ci break; 20648c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 20658c2ecf20Sopenharmony_ci case MSR_FS_BASE: 20668c2ecf20Sopenharmony_ci vmx_segment_cache_clear(vmx); 20678c2ecf20Sopenharmony_ci vmcs_writel(GUEST_FS_BASE, data); 20688c2ecf20Sopenharmony_ci break; 20698c2ecf20Sopenharmony_ci case MSR_GS_BASE: 20708c2ecf20Sopenharmony_ci vmx_segment_cache_clear(vmx); 20718c2ecf20Sopenharmony_ci vmcs_writel(GUEST_GS_BASE, data); 20728c2ecf20Sopenharmony_ci break; 20738c2ecf20Sopenharmony_ci case MSR_KERNEL_GS_BASE: 20748c2ecf20Sopenharmony_ci vmx_write_guest_kernel_gs_base(vmx, data); 20758c2ecf20Sopenharmony_ci break; 20768c2ecf20Sopenharmony_ci#endif 20778c2ecf20Sopenharmony_ci case MSR_IA32_SYSENTER_CS: 20788c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) 20798c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->guest_sysenter_cs = data; 20808c2ecf20Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, data); 20818c2ecf20Sopenharmony_ci break; 20828c2ecf20Sopenharmony_ci case MSR_IA32_SYSENTER_EIP: 20838c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 20848c2ecf20Sopenharmony_ci data = nested_vmx_truncate_sysenter_addr(vcpu, data); 20858c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->guest_sysenter_eip = data; 20868c2ecf20Sopenharmony_ci } 20878c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, data); 20888c2ecf20Sopenharmony_ci break; 20898c2ecf20Sopenharmony_ci case MSR_IA32_SYSENTER_ESP: 20908c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 20918c2ecf20Sopenharmony_ci data = nested_vmx_truncate_sysenter_addr(vcpu, data); 20928c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->guest_sysenter_esp = data; 20938c2ecf20Sopenharmony_ci } 20948c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, data); 20958c2ecf20Sopenharmony_ci break; 20968c2ecf20Sopenharmony_ci case MSR_IA32_DEBUGCTLMSR: 20978c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & 20988c2ecf20Sopenharmony_ci VM_EXIT_SAVE_DEBUG_CONTROLS) 20998c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->guest_ia32_debugctl = data; 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 21028c2ecf20Sopenharmony_ci break; 21038c2ecf20Sopenharmony_ci 21048c2ecf20Sopenharmony_ci case MSR_IA32_BNDCFGS: 21058c2ecf20Sopenharmony_ci if (!kvm_mpx_supported() || 21068c2ecf20Sopenharmony_ci (!msr_info->host_initiated && 21078c2ecf20Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) 21088c2ecf20Sopenharmony_ci return 1; 21098c2ecf20Sopenharmony_ci if (is_noncanonical_address(data & PAGE_MASK, vcpu) || 21108c2ecf20Sopenharmony_ci (data & MSR_IA32_BNDCFGS_RSVD)) 21118c2ecf20Sopenharmony_ci return 1; 21128c2ecf20Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, data); 21138c2ecf20Sopenharmony_ci break; 21148c2ecf20Sopenharmony_ci case MSR_IA32_UMWAIT_CONTROL: 21158c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) 21168c2ecf20Sopenharmony_ci return 1; 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci /* The reserved bit 1 and non-32 bit [63:32] should be zero */ 21198c2ecf20Sopenharmony_ci if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) 21208c2ecf20Sopenharmony_ci return 1; 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci vmx->msr_ia32_umwait_control = data; 21238c2ecf20Sopenharmony_ci break; 21248c2ecf20Sopenharmony_ci case MSR_IA32_SPEC_CTRL: 21258c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 21268c2ecf20Sopenharmony_ci !guest_has_spec_ctrl_msr(vcpu)) 21278c2ecf20Sopenharmony_ci return 1; 21288c2ecf20Sopenharmony_ci 21298c2ecf20Sopenharmony_ci if (kvm_spec_ctrl_test_value(data)) 21308c2ecf20Sopenharmony_ci return 1; 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci vmx->spec_ctrl = data; 21338c2ecf20Sopenharmony_ci if (!data) 21348c2ecf20Sopenharmony_ci break; 21358c2ecf20Sopenharmony_ci 21368c2ecf20Sopenharmony_ci /* 21378c2ecf20Sopenharmony_ci * For non-nested: 21388c2ecf20Sopenharmony_ci * When it's written (to non-zero) for the first time, pass 21398c2ecf20Sopenharmony_ci * it through. 21408c2ecf20Sopenharmony_ci * 21418c2ecf20Sopenharmony_ci * For nested: 21428c2ecf20Sopenharmony_ci * The handling of the MSR bitmap for L2 guests is done in 21438c2ecf20Sopenharmony_ci * nested_vmx_prepare_msr_bitmap. We should not touch the 21448c2ecf20Sopenharmony_ci * vmcs02.msr_bitmap here since it gets completely overwritten 21458c2ecf20Sopenharmony_ci * in the merging. We update the vmcs01 here for L1 as well 21468c2ecf20Sopenharmony_ci * since it will end up touching the MSR anyway now. 21478c2ecf20Sopenharmony_ci */ 21488c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, 21498c2ecf20Sopenharmony_ci MSR_IA32_SPEC_CTRL, 21508c2ecf20Sopenharmony_ci MSR_TYPE_RW); 21518c2ecf20Sopenharmony_ci break; 21528c2ecf20Sopenharmony_ci case MSR_IA32_TSX_CTRL: 21538c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 21548c2ecf20Sopenharmony_ci !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) 21558c2ecf20Sopenharmony_ci return 1; 21568c2ecf20Sopenharmony_ci if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) 21578c2ecf20Sopenharmony_ci return 1; 21588c2ecf20Sopenharmony_ci goto find_uret_msr; 21598c2ecf20Sopenharmony_ci case MSR_IA32_PRED_CMD: 21608c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 21618c2ecf20Sopenharmony_ci !guest_has_pred_cmd_msr(vcpu)) 21628c2ecf20Sopenharmony_ci return 1; 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci if (data & ~PRED_CMD_IBPB) 21658c2ecf20Sopenharmony_ci return 1; 21668c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_IBPB)) 21678c2ecf20Sopenharmony_ci return 1; 21688c2ecf20Sopenharmony_ci if (!data) 21698c2ecf20Sopenharmony_ci break; 21708c2ecf20Sopenharmony_ci 21718c2ecf20Sopenharmony_ci wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); 21728c2ecf20Sopenharmony_ci 21738c2ecf20Sopenharmony_ci /* 21748c2ecf20Sopenharmony_ci * For non-nested: 21758c2ecf20Sopenharmony_ci * When it's written (to non-zero) for the first time, pass 21768c2ecf20Sopenharmony_ci * it through. 21778c2ecf20Sopenharmony_ci * 21788c2ecf20Sopenharmony_ci * For nested: 21798c2ecf20Sopenharmony_ci * The handling of the MSR bitmap for L2 guests is done in 21808c2ecf20Sopenharmony_ci * nested_vmx_prepare_msr_bitmap. We should not touch the 21818c2ecf20Sopenharmony_ci * vmcs02.msr_bitmap here since it gets completely overwritten 21828c2ecf20Sopenharmony_ci * in the merging. 21838c2ecf20Sopenharmony_ci */ 21848c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W); 21858c2ecf20Sopenharmony_ci break; 21868c2ecf20Sopenharmony_ci case MSR_IA32_CR_PAT: 21878c2ecf20Sopenharmony_ci if (!kvm_pat_valid(data)) 21888c2ecf20Sopenharmony_ci return 1; 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && 21918c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 21928c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->guest_ia32_pat = data; 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 21958c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, data); 21968c2ecf20Sopenharmony_ci vcpu->arch.pat = data; 21978c2ecf20Sopenharmony_ci break; 21988c2ecf20Sopenharmony_ci } 21998c2ecf20Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 22008c2ecf20Sopenharmony_ci break; 22018c2ecf20Sopenharmony_ci case MSR_IA32_TSC_ADJUST: 22028c2ecf20Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 22038c2ecf20Sopenharmony_ci break; 22048c2ecf20Sopenharmony_ci case MSR_IA32_MCG_EXT_CTL: 22058c2ecf20Sopenharmony_ci if ((!msr_info->host_initiated && 22068c2ecf20Sopenharmony_ci !(to_vmx(vcpu)->msr_ia32_feature_control & 22078c2ecf20Sopenharmony_ci FEAT_CTL_LMCE_ENABLED)) || 22088c2ecf20Sopenharmony_ci (data & ~MCG_EXT_CTL_LMCE_EN)) 22098c2ecf20Sopenharmony_ci return 1; 22108c2ecf20Sopenharmony_ci vcpu->arch.mcg_ext_ctl = data; 22118c2ecf20Sopenharmony_ci break; 22128c2ecf20Sopenharmony_ci case MSR_IA32_FEAT_CTL: 22138c2ecf20Sopenharmony_ci if (!vmx_feature_control_msr_valid(vcpu, data) || 22148c2ecf20Sopenharmony_ci (to_vmx(vcpu)->msr_ia32_feature_control & 22158c2ecf20Sopenharmony_ci FEAT_CTL_LOCKED && !msr_info->host_initiated)) 22168c2ecf20Sopenharmony_ci return 1; 22178c2ecf20Sopenharmony_ci vmx->msr_ia32_feature_control = data; 22188c2ecf20Sopenharmony_ci if (msr_info->host_initiated && data == 0) 22198c2ecf20Sopenharmony_ci vmx_leave_nested(vcpu); 22208c2ecf20Sopenharmony_ci break; 22218c2ecf20Sopenharmony_ci case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 22228c2ecf20Sopenharmony_ci if (!msr_info->host_initiated) 22238c2ecf20Sopenharmony_ci return 1; /* they are read-only */ 22248c2ecf20Sopenharmony_ci if (!nested_vmx_allowed(vcpu)) 22258c2ecf20Sopenharmony_ci return 1; 22268c2ecf20Sopenharmony_ci return vmx_set_vmx_msr(vcpu, msr_index, data); 22278c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_CTL: 22288c2ecf20Sopenharmony_ci if (!vmx_pt_mode_is_host_guest() || 22298c2ecf20Sopenharmony_ci vmx_rtit_ctl_check(vcpu, data) || 22308c2ecf20Sopenharmony_ci vmx->nested.vmxon) 22318c2ecf20Sopenharmony_ci return 1; 22328c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_RTIT_CTL, data); 22338c2ecf20Sopenharmony_ci vmx->pt_desc.guest.ctl = data; 22348c2ecf20Sopenharmony_ci pt_update_intercept_for_msr(vcpu); 22358c2ecf20Sopenharmony_ci break; 22368c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_STATUS: 22378c2ecf20Sopenharmony_ci if (!pt_can_write_msr(vmx)) 22388c2ecf20Sopenharmony_ci return 1; 22398c2ecf20Sopenharmony_ci if (data & MSR_IA32_RTIT_STATUS_MASK) 22408c2ecf20Sopenharmony_ci return 1; 22418c2ecf20Sopenharmony_ci vmx->pt_desc.guest.status = data; 22428c2ecf20Sopenharmony_ci break; 22438c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_CR3_MATCH: 22448c2ecf20Sopenharmony_ci if (!pt_can_write_msr(vmx)) 22458c2ecf20Sopenharmony_ci return 1; 22468c2ecf20Sopenharmony_ci if (!intel_pt_validate_cap(vmx->pt_desc.caps, 22478c2ecf20Sopenharmony_ci PT_CAP_cr3_filtering)) 22488c2ecf20Sopenharmony_ci return 1; 22498c2ecf20Sopenharmony_ci vmx->pt_desc.guest.cr3_match = data; 22508c2ecf20Sopenharmony_ci break; 22518c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_BASE: 22528c2ecf20Sopenharmony_ci if (!pt_can_write_msr(vmx)) 22538c2ecf20Sopenharmony_ci return 1; 22548c2ecf20Sopenharmony_ci if (!intel_pt_validate_cap(vmx->pt_desc.caps, 22558c2ecf20Sopenharmony_ci PT_CAP_topa_output) && 22568c2ecf20Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 22578c2ecf20Sopenharmony_ci PT_CAP_single_range_output)) 22588c2ecf20Sopenharmony_ci return 1; 22598c2ecf20Sopenharmony_ci if (!pt_output_base_valid(vcpu, data)) 22608c2ecf20Sopenharmony_ci return 1; 22618c2ecf20Sopenharmony_ci vmx->pt_desc.guest.output_base = data; 22628c2ecf20Sopenharmony_ci break; 22638c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_OUTPUT_MASK: 22648c2ecf20Sopenharmony_ci if (!pt_can_write_msr(vmx)) 22658c2ecf20Sopenharmony_ci return 1; 22668c2ecf20Sopenharmony_ci if (!intel_pt_validate_cap(vmx->pt_desc.caps, 22678c2ecf20Sopenharmony_ci PT_CAP_topa_output) && 22688c2ecf20Sopenharmony_ci !intel_pt_validate_cap(vmx->pt_desc.caps, 22698c2ecf20Sopenharmony_ci PT_CAP_single_range_output)) 22708c2ecf20Sopenharmony_ci return 1; 22718c2ecf20Sopenharmony_ci vmx->pt_desc.guest.output_mask = data; 22728c2ecf20Sopenharmony_ci break; 22738c2ecf20Sopenharmony_ci case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 22748c2ecf20Sopenharmony_ci if (!pt_can_write_msr(vmx)) 22758c2ecf20Sopenharmony_ci return 1; 22768c2ecf20Sopenharmony_ci index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; 22778c2ecf20Sopenharmony_ci if (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, 22788c2ecf20Sopenharmony_ci PT_CAP_num_address_ranges)) 22798c2ecf20Sopenharmony_ci return 1; 22808c2ecf20Sopenharmony_ci if (is_noncanonical_address(data, vcpu)) 22818c2ecf20Sopenharmony_ci return 1; 22828c2ecf20Sopenharmony_ci if (index % 2) 22838c2ecf20Sopenharmony_ci vmx->pt_desc.guest.addr_b[index / 2] = data; 22848c2ecf20Sopenharmony_ci else 22858c2ecf20Sopenharmony_ci vmx->pt_desc.guest.addr_a[index / 2] = data; 22868c2ecf20Sopenharmony_ci break; 22878c2ecf20Sopenharmony_ci case MSR_TSC_AUX: 22888c2ecf20Sopenharmony_ci if (!msr_info->host_initiated && 22898c2ecf20Sopenharmony_ci !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) 22908c2ecf20Sopenharmony_ci return 1; 22918c2ecf20Sopenharmony_ci /* Check reserved bit, higher 32 bits should be zero */ 22928c2ecf20Sopenharmony_ci if ((data >> 32) != 0) 22938c2ecf20Sopenharmony_ci return 1; 22948c2ecf20Sopenharmony_ci goto find_uret_msr; 22958c2ecf20Sopenharmony_ci 22968c2ecf20Sopenharmony_ci default: 22978c2ecf20Sopenharmony_ci find_uret_msr: 22988c2ecf20Sopenharmony_ci msr = vmx_find_uret_msr(vmx, msr_index); 22998c2ecf20Sopenharmony_ci if (msr) 23008c2ecf20Sopenharmony_ci ret = vmx_set_guest_uret_msr(vmx, msr, data); 23018c2ecf20Sopenharmony_ci else 23028c2ecf20Sopenharmony_ci ret = kvm_set_msr_common(vcpu, msr_info); 23038c2ecf20Sopenharmony_ci } 23048c2ecf20Sopenharmony_ci 23058c2ecf20Sopenharmony_ci /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ 23068c2ecf20Sopenharmony_ci if (msr_index == MSR_IA32_ARCH_CAPABILITIES) 23078c2ecf20Sopenharmony_ci vmx_update_fb_clear_dis(vcpu, vmx); 23088c2ecf20Sopenharmony_ci 23098c2ecf20Sopenharmony_ci return ret; 23108c2ecf20Sopenharmony_ci} 23118c2ecf20Sopenharmony_ci 23128c2ecf20Sopenharmony_cistatic void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 23138c2ecf20Sopenharmony_ci{ 23148c2ecf20Sopenharmony_ci unsigned long guest_owned_bits; 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, reg); 23178c2ecf20Sopenharmony_ci 23188c2ecf20Sopenharmony_ci switch (reg) { 23198c2ecf20Sopenharmony_ci case VCPU_REGS_RSP: 23208c2ecf20Sopenharmony_ci vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); 23218c2ecf20Sopenharmony_ci break; 23228c2ecf20Sopenharmony_ci case VCPU_REGS_RIP: 23238c2ecf20Sopenharmony_ci vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); 23248c2ecf20Sopenharmony_ci break; 23258c2ecf20Sopenharmony_ci case VCPU_EXREG_PDPTR: 23268c2ecf20Sopenharmony_ci if (enable_ept) 23278c2ecf20Sopenharmony_ci ept_save_pdptrs(vcpu); 23288c2ecf20Sopenharmony_ci break; 23298c2ecf20Sopenharmony_ci case VCPU_EXREG_CR0: 23308c2ecf20Sopenharmony_ci guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; 23318c2ecf20Sopenharmony_ci 23328c2ecf20Sopenharmony_ci vcpu->arch.cr0 &= ~guest_owned_bits; 23338c2ecf20Sopenharmony_ci vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits; 23348c2ecf20Sopenharmony_ci break; 23358c2ecf20Sopenharmony_ci case VCPU_EXREG_CR3: 23368c2ecf20Sopenharmony_ci if (is_unrestricted_guest(vcpu) || 23378c2ecf20Sopenharmony_ci (enable_ept && is_paging(vcpu))) 23388c2ecf20Sopenharmony_ci vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 23398c2ecf20Sopenharmony_ci break; 23408c2ecf20Sopenharmony_ci case VCPU_EXREG_CR4: 23418c2ecf20Sopenharmony_ci guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; 23428c2ecf20Sopenharmony_ci 23438c2ecf20Sopenharmony_ci vcpu->arch.cr4 &= ~guest_owned_bits; 23448c2ecf20Sopenharmony_ci vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits; 23458c2ecf20Sopenharmony_ci break; 23468c2ecf20Sopenharmony_ci default: 23478c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 23488c2ecf20Sopenharmony_ci break; 23498c2ecf20Sopenharmony_ci } 23508c2ecf20Sopenharmony_ci} 23518c2ecf20Sopenharmony_ci 23528c2ecf20Sopenharmony_cistatic __init int cpu_has_kvm_support(void) 23538c2ecf20Sopenharmony_ci{ 23548c2ecf20Sopenharmony_ci return cpu_has_vmx(); 23558c2ecf20Sopenharmony_ci} 23568c2ecf20Sopenharmony_ci 23578c2ecf20Sopenharmony_cistatic __init int vmx_disabled_by_bios(void) 23588c2ecf20Sopenharmony_ci{ 23598c2ecf20Sopenharmony_ci return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || 23608c2ecf20Sopenharmony_ci !boot_cpu_has(X86_FEATURE_VMX); 23618c2ecf20Sopenharmony_ci} 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_cistatic int kvm_cpu_vmxon(u64 vmxon_pointer) 23648c2ecf20Sopenharmony_ci{ 23658c2ecf20Sopenharmony_ci u64 msr; 23668c2ecf20Sopenharmony_ci 23678c2ecf20Sopenharmony_ci cr4_set_bits(X86_CR4_VMXE); 23688c2ecf20Sopenharmony_ci intel_pt_handle_vmx(1); 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" 23718c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, %l[fault]) 23728c2ecf20Sopenharmony_ci : : [vmxon_pointer] "m"(vmxon_pointer) 23738c2ecf20Sopenharmony_ci : : fault); 23748c2ecf20Sopenharmony_ci return 0; 23758c2ecf20Sopenharmony_ci 23768c2ecf20Sopenharmony_cifault: 23778c2ecf20Sopenharmony_ci WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", 23788c2ecf20Sopenharmony_ci rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); 23798c2ecf20Sopenharmony_ci intel_pt_handle_vmx(0); 23808c2ecf20Sopenharmony_ci cr4_clear_bits(X86_CR4_VMXE); 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci return -EFAULT; 23838c2ecf20Sopenharmony_ci} 23848c2ecf20Sopenharmony_ci 23858c2ecf20Sopenharmony_cistatic int hardware_enable(void) 23868c2ecf20Sopenharmony_ci{ 23878c2ecf20Sopenharmony_ci int cpu = raw_smp_processor_id(); 23888c2ecf20Sopenharmony_ci u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 23898c2ecf20Sopenharmony_ci int r; 23908c2ecf20Sopenharmony_ci 23918c2ecf20Sopenharmony_ci if (cr4_read_shadow() & X86_CR4_VMXE) 23928c2ecf20Sopenharmony_ci return -EBUSY; 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci /* 23958c2ecf20Sopenharmony_ci * This can happen if we hot-added a CPU but failed to allocate 23968c2ecf20Sopenharmony_ci * VP assist page for it. 23978c2ecf20Sopenharmony_ci */ 23988c2ecf20Sopenharmony_ci if (static_branch_unlikely(&enable_evmcs) && 23998c2ecf20Sopenharmony_ci !hv_get_vp_assist_page(cpu)) 24008c2ecf20Sopenharmony_ci return -EFAULT; 24018c2ecf20Sopenharmony_ci 24028c2ecf20Sopenharmony_ci r = kvm_cpu_vmxon(phys_addr); 24038c2ecf20Sopenharmony_ci if (r) 24048c2ecf20Sopenharmony_ci return r; 24058c2ecf20Sopenharmony_ci 24068c2ecf20Sopenharmony_ci if (enable_ept) 24078c2ecf20Sopenharmony_ci ept_sync_global(); 24088c2ecf20Sopenharmony_ci 24098c2ecf20Sopenharmony_ci return 0; 24108c2ecf20Sopenharmony_ci} 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_cistatic void vmclear_local_loaded_vmcss(void) 24138c2ecf20Sopenharmony_ci{ 24148c2ecf20Sopenharmony_ci int cpu = raw_smp_processor_id(); 24158c2ecf20Sopenharmony_ci struct loaded_vmcs *v, *n; 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), 24188c2ecf20Sopenharmony_ci loaded_vmcss_on_cpu_link) 24198c2ecf20Sopenharmony_ci __loaded_vmcs_clear(v); 24208c2ecf20Sopenharmony_ci} 24218c2ecf20Sopenharmony_ci 24228c2ecf20Sopenharmony_ci 24238c2ecf20Sopenharmony_ci/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() 24248c2ecf20Sopenharmony_ci * tricks. 24258c2ecf20Sopenharmony_ci */ 24268c2ecf20Sopenharmony_cistatic void kvm_cpu_vmxoff(void) 24278c2ecf20Sopenharmony_ci{ 24288c2ecf20Sopenharmony_ci asm volatile (__ex("vmxoff")); 24298c2ecf20Sopenharmony_ci 24308c2ecf20Sopenharmony_ci intel_pt_handle_vmx(0); 24318c2ecf20Sopenharmony_ci cr4_clear_bits(X86_CR4_VMXE); 24328c2ecf20Sopenharmony_ci} 24338c2ecf20Sopenharmony_ci 24348c2ecf20Sopenharmony_cistatic void hardware_disable(void) 24358c2ecf20Sopenharmony_ci{ 24368c2ecf20Sopenharmony_ci vmclear_local_loaded_vmcss(); 24378c2ecf20Sopenharmony_ci kvm_cpu_vmxoff(); 24388c2ecf20Sopenharmony_ci} 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci/* 24418c2ecf20Sopenharmony_ci * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID 24428c2ecf20Sopenharmony_ci * directly instead of going through cpu_has(), to ensure KVM is trapping 24438c2ecf20Sopenharmony_ci * ENCLS whenever it's supported in hardware. It does not matter whether 24448c2ecf20Sopenharmony_ci * the host OS supports or has enabled SGX. 24458c2ecf20Sopenharmony_ci */ 24468c2ecf20Sopenharmony_cistatic bool cpu_has_sgx(void) 24478c2ecf20Sopenharmony_ci{ 24488c2ecf20Sopenharmony_ci return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0)); 24498c2ecf20Sopenharmony_ci} 24508c2ecf20Sopenharmony_ci 24518c2ecf20Sopenharmony_cistatic __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, 24528c2ecf20Sopenharmony_ci u32 msr, u32 *result) 24538c2ecf20Sopenharmony_ci{ 24548c2ecf20Sopenharmony_ci u32 vmx_msr_low, vmx_msr_high; 24558c2ecf20Sopenharmony_ci u32 ctl = ctl_min | ctl_opt; 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_ci rdmsr(msr, vmx_msr_low, vmx_msr_high); 24588c2ecf20Sopenharmony_ci 24598c2ecf20Sopenharmony_ci ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ 24608c2ecf20Sopenharmony_ci ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ci /* Ensure minimum (required) set of control bits are supported. */ 24638c2ecf20Sopenharmony_ci if (ctl_min & ~ctl) 24648c2ecf20Sopenharmony_ci return -EIO; 24658c2ecf20Sopenharmony_ci 24668c2ecf20Sopenharmony_ci *result = ctl; 24678c2ecf20Sopenharmony_ci return 0; 24688c2ecf20Sopenharmony_ci} 24698c2ecf20Sopenharmony_ci 24708c2ecf20Sopenharmony_cistatic __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, 24718c2ecf20Sopenharmony_ci struct vmx_capability *vmx_cap) 24728c2ecf20Sopenharmony_ci{ 24738c2ecf20Sopenharmony_ci u32 vmx_msr_low, vmx_msr_high; 24748c2ecf20Sopenharmony_ci u32 min, opt, min2, opt2; 24758c2ecf20Sopenharmony_ci u32 _pin_based_exec_control = 0; 24768c2ecf20Sopenharmony_ci u32 _cpu_based_exec_control = 0; 24778c2ecf20Sopenharmony_ci u32 _cpu_based_2nd_exec_control = 0; 24788c2ecf20Sopenharmony_ci u32 _vmexit_control = 0; 24798c2ecf20Sopenharmony_ci u32 _vmentry_control = 0; 24808c2ecf20Sopenharmony_ci 24818c2ecf20Sopenharmony_ci memset(vmcs_conf, 0, sizeof(*vmcs_conf)); 24828c2ecf20Sopenharmony_ci min = CPU_BASED_HLT_EXITING | 24838c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 24848c2ecf20Sopenharmony_ci CPU_BASED_CR8_LOAD_EXITING | 24858c2ecf20Sopenharmony_ci CPU_BASED_CR8_STORE_EXITING | 24868c2ecf20Sopenharmony_ci#endif 24878c2ecf20Sopenharmony_ci CPU_BASED_CR3_LOAD_EXITING | 24888c2ecf20Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING | 24898c2ecf20Sopenharmony_ci CPU_BASED_UNCOND_IO_EXITING | 24908c2ecf20Sopenharmony_ci CPU_BASED_MOV_DR_EXITING | 24918c2ecf20Sopenharmony_ci CPU_BASED_USE_TSC_OFFSETTING | 24928c2ecf20Sopenharmony_ci CPU_BASED_MWAIT_EXITING | 24938c2ecf20Sopenharmony_ci CPU_BASED_MONITOR_EXITING | 24948c2ecf20Sopenharmony_ci CPU_BASED_INVLPG_EXITING | 24958c2ecf20Sopenharmony_ci CPU_BASED_RDPMC_EXITING; 24968c2ecf20Sopenharmony_ci 24978c2ecf20Sopenharmony_ci opt = CPU_BASED_TPR_SHADOW | 24988c2ecf20Sopenharmony_ci CPU_BASED_USE_MSR_BITMAPS | 24998c2ecf20Sopenharmony_ci CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 25008c2ecf20Sopenharmony_ci if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, 25018c2ecf20Sopenharmony_ci &_cpu_based_exec_control) < 0) 25028c2ecf20Sopenharmony_ci return -EIO; 25038c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 25048c2ecf20Sopenharmony_ci if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) 25058c2ecf20Sopenharmony_ci _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & 25068c2ecf20Sopenharmony_ci ~CPU_BASED_CR8_STORE_EXITING; 25078c2ecf20Sopenharmony_ci#endif 25088c2ecf20Sopenharmony_ci if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 25098c2ecf20Sopenharmony_ci min2 = 0; 25108c2ecf20Sopenharmony_ci opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 25118c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 25128c2ecf20Sopenharmony_ci SECONDARY_EXEC_WBINVD_EXITING | 25138c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_VPID | 25148c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_EPT | 25158c2ecf20Sopenharmony_ci SECONDARY_EXEC_UNRESTRICTED_GUEST | 25168c2ecf20Sopenharmony_ci SECONDARY_EXEC_PAUSE_LOOP_EXITING | 25178c2ecf20Sopenharmony_ci SECONDARY_EXEC_DESC | 25188c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_RDTSCP | 25198c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_INVPCID | 25208c2ecf20Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 25218c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 25228c2ecf20Sopenharmony_ci SECONDARY_EXEC_SHADOW_VMCS | 25238c2ecf20Sopenharmony_ci SECONDARY_EXEC_XSAVES | 25248c2ecf20Sopenharmony_ci SECONDARY_EXEC_RDSEED_EXITING | 25258c2ecf20Sopenharmony_ci SECONDARY_EXEC_RDRAND_EXITING | 25268c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_PML | 25278c2ecf20Sopenharmony_ci SECONDARY_EXEC_TSC_SCALING | 25288c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | 25298c2ecf20Sopenharmony_ci SECONDARY_EXEC_PT_USE_GPA | 25308c2ecf20Sopenharmony_ci SECONDARY_EXEC_PT_CONCEAL_VMX | 25318c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_VMFUNC; 25328c2ecf20Sopenharmony_ci if (cpu_has_sgx()) 25338c2ecf20Sopenharmony_ci opt2 |= SECONDARY_EXEC_ENCLS_EXITING; 25348c2ecf20Sopenharmony_ci if (adjust_vmx_controls(min2, opt2, 25358c2ecf20Sopenharmony_ci MSR_IA32_VMX_PROCBASED_CTLS2, 25368c2ecf20Sopenharmony_ci &_cpu_based_2nd_exec_control) < 0) 25378c2ecf20Sopenharmony_ci return -EIO; 25388c2ecf20Sopenharmony_ci } 25398c2ecf20Sopenharmony_ci#ifndef CONFIG_X86_64 25408c2ecf20Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & 25418c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 25428c2ecf20Sopenharmony_ci _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 25438c2ecf20Sopenharmony_ci#endif 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_ci if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) 25468c2ecf20Sopenharmony_ci _cpu_based_2nd_exec_control &= ~( 25478c2ecf20Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 25488c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 25498c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 25508c2ecf20Sopenharmony_ci 25518c2ecf20Sopenharmony_ci rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, 25528c2ecf20Sopenharmony_ci &vmx_cap->ept, &vmx_cap->vpid); 25538c2ecf20Sopenharmony_ci 25548c2ecf20Sopenharmony_ci if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 25558c2ecf20Sopenharmony_ci /* CR3 accesses and invlpg don't need to cause VM Exits when EPT 25568c2ecf20Sopenharmony_ci enabled */ 25578c2ecf20Sopenharmony_ci _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING | 25588c2ecf20Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING | 25598c2ecf20Sopenharmony_ci CPU_BASED_INVLPG_EXITING); 25608c2ecf20Sopenharmony_ci } else if (vmx_cap->ept) { 25618c2ecf20Sopenharmony_ci vmx_cap->ept = 0; 25628c2ecf20Sopenharmony_ci pr_warn_once("EPT CAP should not exist if not support " 25638c2ecf20Sopenharmony_ci "1-setting enable EPT VM-execution control\n"); 25648c2ecf20Sopenharmony_ci } 25658c2ecf20Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && 25668c2ecf20Sopenharmony_ci vmx_cap->vpid) { 25678c2ecf20Sopenharmony_ci vmx_cap->vpid = 0; 25688c2ecf20Sopenharmony_ci pr_warn_once("VPID CAP should not exist if not support " 25698c2ecf20Sopenharmony_ci "1-setting enable VPID VM-execution control\n"); 25708c2ecf20Sopenharmony_ci } 25718c2ecf20Sopenharmony_ci 25728c2ecf20Sopenharmony_ci min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT; 25738c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 25748c2ecf20Sopenharmony_ci min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 25758c2ecf20Sopenharmony_ci#endif 25768c2ecf20Sopenharmony_ci opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | 25778c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_PAT | 25788c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER | 25798c2ecf20Sopenharmony_ci VM_EXIT_CLEAR_BNDCFGS | 25808c2ecf20Sopenharmony_ci VM_EXIT_PT_CONCEAL_PIP | 25818c2ecf20Sopenharmony_ci VM_EXIT_CLEAR_IA32_RTIT_CTL; 25828c2ecf20Sopenharmony_ci if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 25838c2ecf20Sopenharmony_ci &_vmexit_control) < 0) 25848c2ecf20Sopenharmony_ci return -EIO; 25858c2ecf20Sopenharmony_ci 25868c2ecf20Sopenharmony_ci min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; 25878c2ecf20Sopenharmony_ci opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | 25888c2ecf20Sopenharmony_ci PIN_BASED_VMX_PREEMPTION_TIMER; 25898c2ecf20Sopenharmony_ci if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, 25908c2ecf20Sopenharmony_ci &_pin_based_exec_control) < 0) 25918c2ecf20Sopenharmony_ci return -EIO; 25928c2ecf20Sopenharmony_ci 25938c2ecf20Sopenharmony_ci if (cpu_has_broken_vmx_preemption_timer()) 25948c2ecf20Sopenharmony_ci _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 25958c2ecf20Sopenharmony_ci if (!(_cpu_based_2nd_exec_control & 25968c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)) 25978c2ecf20Sopenharmony_ci _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; 25988c2ecf20Sopenharmony_ci 25998c2ecf20Sopenharmony_ci min = VM_ENTRY_LOAD_DEBUG_CONTROLS; 26008c2ecf20Sopenharmony_ci opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | 26018c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_PAT | 26028c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_EFER | 26038c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_BNDCFGS | 26048c2ecf20Sopenharmony_ci VM_ENTRY_PT_CONCEAL_PIP | 26058c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_RTIT_CTL; 26068c2ecf20Sopenharmony_ci if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 26078c2ecf20Sopenharmony_ci &_vmentry_control) < 0) 26088c2ecf20Sopenharmony_ci return -EIO; 26098c2ecf20Sopenharmony_ci 26108c2ecf20Sopenharmony_ci /* 26118c2ecf20Sopenharmony_ci * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they 26128c2ecf20Sopenharmony_ci * can't be used due to an errata where VM Exit may incorrectly clear 26138c2ecf20Sopenharmony_ci * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the 26148c2ecf20Sopenharmony_ci * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL. 26158c2ecf20Sopenharmony_ci */ 26168c2ecf20Sopenharmony_ci if (boot_cpu_data.x86 == 0x6) { 26178c2ecf20Sopenharmony_ci switch (boot_cpu_data.x86_model) { 26188c2ecf20Sopenharmony_ci case 26: /* AAK155 */ 26198c2ecf20Sopenharmony_ci case 30: /* AAP115 */ 26208c2ecf20Sopenharmony_ci case 37: /* AAT100 */ 26218c2ecf20Sopenharmony_ci case 44: /* BC86,AAY89,BD102 */ 26228c2ecf20Sopenharmony_ci case 46: /* BA97 */ 26238c2ecf20Sopenharmony_ci _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 26248c2ecf20Sopenharmony_ci _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 26258c2ecf20Sopenharmony_ci pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " 26268c2ecf20Sopenharmony_ci "does not work properly. Using workaround\n"); 26278c2ecf20Sopenharmony_ci break; 26288c2ecf20Sopenharmony_ci default: 26298c2ecf20Sopenharmony_ci break; 26308c2ecf20Sopenharmony_ci } 26318c2ecf20Sopenharmony_ci } 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_ci 26348c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); 26358c2ecf20Sopenharmony_ci 26368c2ecf20Sopenharmony_ci /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ 26378c2ecf20Sopenharmony_ci if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) 26388c2ecf20Sopenharmony_ci return -EIO; 26398c2ecf20Sopenharmony_ci 26408c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 26418c2ecf20Sopenharmony_ci /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ 26428c2ecf20Sopenharmony_ci if (vmx_msr_high & (1u<<16)) 26438c2ecf20Sopenharmony_ci return -EIO; 26448c2ecf20Sopenharmony_ci#endif 26458c2ecf20Sopenharmony_ci 26468c2ecf20Sopenharmony_ci /* Require Write-Back (WB) memory type for VMCS accesses. */ 26478c2ecf20Sopenharmony_ci if (((vmx_msr_high >> 18) & 15) != 6) 26488c2ecf20Sopenharmony_ci return -EIO; 26498c2ecf20Sopenharmony_ci 26508c2ecf20Sopenharmony_ci vmcs_conf->size = vmx_msr_high & 0x1fff; 26518c2ecf20Sopenharmony_ci vmcs_conf->order = get_order(vmcs_conf->size); 26528c2ecf20Sopenharmony_ci vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; 26538c2ecf20Sopenharmony_ci 26548c2ecf20Sopenharmony_ci vmcs_conf->revision_id = vmx_msr_low; 26558c2ecf20Sopenharmony_ci 26568c2ecf20Sopenharmony_ci vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; 26578c2ecf20Sopenharmony_ci vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; 26588c2ecf20Sopenharmony_ci vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; 26598c2ecf20Sopenharmony_ci vmcs_conf->vmexit_ctrl = _vmexit_control; 26608c2ecf20Sopenharmony_ci vmcs_conf->vmentry_ctrl = _vmentry_control; 26618c2ecf20Sopenharmony_ci 26628c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 26638c2ecf20Sopenharmony_ci if (enlightened_vmcs) 26648c2ecf20Sopenharmony_ci evmcs_sanitize_exec_ctrls(vmcs_conf); 26658c2ecf20Sopenharmony_ci#endif 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci return 0; 26688c2ecf20Sopenharmony_ci} 26698c2ecf20Sopenharmony_ci 26708c2ecf20Sopenharmony_cistruct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) 26718c2ecf20Sopenharmony_ci{ 26728c2ecf20Sopenharmony_ci int node = cpu_to_node(cpu); 26738c2ecf20Sopenharmony_ci struct page *pages; 26748c2ecf20Sopenharmony_ci struct vmcs *vmcs; 26758c2ecf20Sopenharmony_ci 26768c2ecf20Sopenharmony_ci pages = __alloc_pages_node(node, flags, vmcs_config.order); 26778c2ecf20Sopenharmony_ci if (!pages) 26788c2ecf20Sopenharmony_ci return NULL; 26798c2ecf20Sopenharmony_ci vmcs = page_address(pages); 26808c2ecf20Sopenharmony_ci memset(vmcs, 0, vmcs_config.size); 26818c2ecf20Sopenharmony_ci 26828c2ecf20Sopenharmony_ci /* KVM supports Enlightened VMCS v1 only */ 26838c2ecf20Sopenharmony_ci if (static_branch_unlikely(&enable_evmcs)) 26848c2ecf20Sopenharmony_ci vmcs->hdr.revision_id = KVM_EVMCS_VERSION; 26858c2ecf20Sopenharmony_ci else 26868c2ecf20Sopenharmony_ci vmcs->hdr.revision_id = vmcs_config.revision_id; 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci if (shadow) 26898c2ecf20Sopenharmony_ci vmcs->hdr.shadow_vmcs = 1; 26908c2ecf20Sopenharmony_ci return vmcs; 26918c2ecf20Sopenharmony_ci} 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_civoid free_vmcs(struct vmcs *vmcs) 26948c2ecf20Sopenharmony_ci{ 26958c2ecf20Sopenharmony_ci free_pages((unsigned long)vmcs, vmcs_config.order); 26968c2ecf20Sopenharmony_ci} 26978c2ecf20Sopenharmony_ci 26988c2ecf20Sopenharmony_ci/* 26998c2ecf20Sopenharmony_ci * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded 27008c2ecf20Sopenharmony_ci */ 27018c2ecf20Sopenharmony_civoid free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) 27028c2ecf20Sopenharmony_ci{ 27038c2ecf20Sopenharmony_ci if (!loaded_vmcs->vmcs) 27048c2ecf20Sopenharmony_ci return; 27058c2ecf20Sopenharmony_ci loaded_vmcs_clear(loaded_vmcs); 27068c2ecf20Sopenharmony_ci free_vmcs(loaded_vmcs->vmcs); 27078c2ecf20Sopenharmony_ci loaded_vmcs->vmcs = NULL; 27088c2ecf20Sopenharmony_ci if (loaded_vmcs->msr_bitmap) 27098c2ecf20Sopenharmony_ci free_page((unsigned long)loaded_vmcs->msr_bitmap); 27108c2ecf20Sopenharmony_ci WARN_ON(loaded_vmcs->shadow_vmcs != NULL); 27118c2ecf20Sopenharmony_ci} 27128c2ecf20Sopenharmony_ci 27138c2ecf20Sopenharmony_ciint alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) 27148c2ecf20Sopenharmony_ci{ 27158c2ecf20Sopenharmony_ci loaded_vmcs->vmcs = alloc_vmcs(false); 27168c2ecf20Sopenharmony_ci if (!loaded_vmcs->vmcs) 27178c2ecf20Sopenharmony_ci return -ENOMEM; 27188c2ecf20Sopenharmony_ci 27198c2ecf20Sopenharmony_ci vmcs_clear(loaded_vmcs->vmcs); 27208c2ecf20Sopenharmony_ci 27218c2ecf20Sopenharmony_ci loaded_vmcs->shadow_vmcs = NULL; 27228c2ecf20Sopenharmony_ci loaded_vmcs->hv_timer_soft_disabled = false; 27238c2ecf20Sopenharmony_ci loaded_vmcs->cpu = -1; 27248c2ecf20Sopenharmony_ci loaded_vmcs->launched = 0; 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) { 27278c2ecf20Sopenharmony_ci loaded_vmcs->msr_bitmap = (unsigned long *) 27288c2ecf20Sopenharmony_ci __get_free_page(GFP_KERNEL_ACCOUNT); 27298c2ecf20Sopenharmony_ci if (!loaded_vmcs->msr_bitmap) 27308c2ecf20Sopenharmony_ci goto out_vmcs; 27318c2ecf20Sopenharmony_ci memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); 27328c2ecf20Sopenharmony_ci } 27338c2ecf20Sopenharmony_ci 27348c2ecf20Sopenharmony_ci memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); 27358c2ecf20Sopenharmony_ci memset(&loaded_vmcs->controls_shadow, 0, 27368c2ecf20Sopenharmony_ci sizeof(struct vmcs_controls_shadow)); 27378c2ecf20Sopenharmony_ci 27388c2ecf20Sopenharmony_ci return 0; 27398c2ecf20Sopenharmony_ci 27408c2ecf20Sopenharmony_ciout_vmcs: 27418c2ecf20Sopenharmony_ci free_loaded_vmcs(loaded_vmcs); 27428c2ecf20Sopenharmony_ci return -ENOMEM; 27438c2ecf20Sopenharmony_ci} 27448c2ecf20Sopenharmony_ci 27458c2ecf20Sopenharmony_cistatic void free_kvm_area(void) 27468c2ecf20Sopenharmony_ci{ 27478c2ecf20Sopenharmony_ci int cpu; 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 27508c2ecf20Sopenharmony_ci free_vmcs(per_cpu(vmxarea, cpu)); 27518c2ecf20Sopenharmony_ci per_cpu(vmxarea, cpu) = NULL; 27528c2ecf20Sopenharmony_ci } 27538c2ecf20Sopenharmony_ci} 27548c2ecf20Sopenharmony_ci 27558c2ecf20Sopenharmony_cistatic __init int alloc_kvm_area(void) 27568c2ecf20Sopenharmony_ci{ 27578c2ecf20Sopenharmony_ci int cpu; 27588c2ecf20Sopenharmony_ci 27598c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 27608c2ecf20Sopenharmony_ci struct vmcs *vmcs; 27618c2ecf20Sopenharmony_ci 27628c2ecf20Sopenharmony_ci vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); 27638c2ecf20Sopenharmony_ci if (!vmcs) { 27648c2ecf20Sopenharmony_ci free_kvm_area(); 27658c2ecf20Sopenharmony_ci return -ENOMEM; 27668c2ecf20Sopenharmony_ci } 27678c2ecf20Sopenharmony_ci 27688c2ecf20Sopenharmony_ci /* 27698c2ecf20Sopenharmony_ci * When eVMCS is enabled, alloc_vmcs_cpu() sets 27708c2ecf20Sopenharmony_ci * vmcs->revision_id to KVM_EVMCS_VERSION instead of 27718c2ecf20Sopenharmony_ci * revision_id reported by MSR_IA32_VMX_BASIC. 27728c2ecf20Sopenharmony_ci * 27738c2ecf20Sopenharmony_ci * However, even though not explicitly documented by 27748c2ecf20Sopenharmony_ci * TLFS, VMXArea passed as VMXON argument should 27758c2ecf20Sopenharmony_ci * still be marked with revision_id reported by 27768c2ecf20Sopenharmony_ci * physical CPU. 27778c2ecf20Sopenharmony_ci */ 27788c2ecf20Sopenharmony_ci if (static_branch_unlikely(&enable_evmcs)) 27798c2ecf20Sopenharmony_ci vmcs->hdr.revision_id = vmcs_config.revision_id; 27808c2ecf20Sopenharmony_ci 27818c2ecf20Sopenharmony_ci per_cpu(vmxarea, cpu) = vmcs; 27828c2ecf20Sopenharmony_ci } 27838c2ecf20Sopenharmony_ci return 0; 27848c2ecf20Sopenharmony_ci} 27858c2ecf20Sopenharmony_ci 27868c2ecf20Sopenharmony_cistatic void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, 27878c2ecf20Sopenharmony_ci struct kvm_segment *save) 27888c2ecf20Sopenharmony_ci{ 27898c2ecf20Sopenharmony_ci if (!emulate_invalid_guest_state) { 27908c2ecf20Sopenharmony_ci /* 27918c2ecf20Sopenharmony_ci * CS and SS RPL should be equal during guest entry according 27928c2ecf20Sopenharmony_ci * to VMX spec, but in reality it is not always so. Since vcpu 27938c2ecf20Sopenharmony_ci * is in the middle of the transition from real mode to 27948c2ecf20Sopenharmony_ci * protected mode it is safe to assume that RPL 0 is a good 27958c2ecf20Sopenharmony_ci * default value. 27968c2ecf20Sopenharmony_ci */ 27978c2ecf20Sopenharmony_ci if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) 27988c2ecf20Sopenharmony_ci save->selector &= ~SEGMENT_RPL_MASK; 27998c2ecf20Sopenharmony_ci save->dpl = save->selector & SEGMENT_RPL_MASK; 28008c2ecf20Sopenharmony_ci save->s = 1; 28018c2ecf20Sopenharmony_ci } 28028c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, save, seg); 28038c2ecf20Sopenharmony_ci} 28048c2ecf20Sopenharmony_ci 28058c2ecf20Sopenharmony_cistatic void enter_pmode(struct kvm_vcpu *vcpu) 28068c2ecf20Sopenharmony_ci{ 28078c2ecf20Sopenharmony_ci unsigned long flags; 28088c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 28098c2ecf20Sopenharmony_ci 28108c2ecf20Sopenharmony_ci /* 28118c2ecf20Sopenharmony_ci * Update real mode segment cache. It may be not up-to-date if sement 28128c2ecf20Sopenharmony_ci * register was written while vcpu was in a guest mode. 28138c2ecf20Sopenharmony_ci */ 28148c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 28158c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 28168c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 28178c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 28188c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); 28198c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ci vmx->rmode.vm86_active = 0; 28228c2ecf20Sopenharmony_ci 28238c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 28248c2ecf20Sopenharmony_ci 28258c2ecf20Sopenharmony_ci flags = vmcs_readl(GUEST_RFLAGS); 28268c2ecf20Sopenharmony_ci flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 28278c2ecf20Sopenharmony_ci flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 28288c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, flags); 28298c2ecf20Sopenharmony_ci 28308c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 28318c2ecf20Sopenharmony_ci (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); 28328c2ecf20Sopenharmony_ci 28338c2ecf20Sopenharmony_ci update_exception_bitmap(vcpu); 28348c2ecf20Sopenharmony_ci 28358c2ecf20Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); 28368c2ecf20Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); 28378c2ecf20Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 28388c2ecf20Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 28398c2ecf20Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 28408c2ecf20Sopenharmony_ci fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); 28418c2ecf20Sopenharmony_ci} 28428c2ecf20Sopenharmony_ci 28438c2ecf20Sopenharmony_cistatic void fix_rmode_seg(int seg, struct kvm_segment *save) 28448c2ecf20Sopenharmony_ci{ 28458c2ecf20Sopenharmony_ci const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 28468c2ecf20Sopenharmony_ci struct kvm_segment var = *save; 28478c2ecf20Sopenharmony_ci 28488c2ecf20Sopenharmony_ci var.dpl = 0x3; 28498c2ecf20Sopenharmony_ci if (seg == VCPU_SREG_CS) 28508c2ecf20Sopenharmony_ci var.type = 0x3; 28518c2ecf20Sopenharmony_ci 28528c2ecf20Sopenharmony_ci if (!emulate_invalid_guest_state) { 28538c2ecf20Sopenharmony_ci var.selector = var.base >> 4; 28548c2ecf20Sopenharmony_ci var.base = var.base & 0xffff0; 28558c2ecf20Sopenharmony_ci var.limit = 0xffff; 28568c2ecf20Sopenharmony_ci var.g = 0; 28578c2ecf20Sopenharmony_ci var.db = 0; 28588c2ecf20Sopenharmony_ci var.present = 1; 28598c2ecf20Sopenharmony_ci var.s = 1; 28608c2ecf20Sopenharmony_ci var.l = 0; 28618c2ecf20Sopenharmony_ci var.unusable = 0; 28628c2ecf20Sopenharmony_ci var.type = 0x3; 28638c2ecf20Sopenharmony_ci var.avl = 0; 28648c2ecf20Sopenharmony_ci if (save->base & 0xf) 28658c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "kvm: segment base is not " 28668c2ecf20Sopenharmony_ci "paragraph aligned when entering " 28678c2ecf20Sopenharmony_ci "protected mode (seg=%d)", seg); 28688c2ecf20Sopenharmony_ci } 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci vmcs_write16(sf->selector, var.selector); 28718c2ecf20Sopenharmony_ci vmcs_writel(sf->base, var.base); 28728c2ecf20Sopenharmony_ci vmcs_write32(sf->limit, var.limit); 28738c2ecf20Sopenharmony_ci vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); 28748c2ecf20Sopenharmony_ci} 28758c2ecf20Sopenharmony_ci 28768c2ecf20Sopenharmony_cistatic void enter_rmode(struct kvm_vcpu *vcpu) 28778c2ecf20Sopenharmony_ci{ 28788c2ecf20Sopenharmony_ci unsigned long flags; 28798c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 28808c2ecf20Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); 28818c2ecf20Sopenharmony_ci 28828c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 28838c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 28848c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 28858c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 28868c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 28878c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); 28888c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); 28898c2ecf20Sopenharmony_ci 28908c2ecf20Sopenharmony_ci vmx->rmode.vm86_active = 1; 28918c2ecf20Sopenharmony_ci 28928c2ecf20Sopenharmony_ci /* 28938c2ecf20Sopenharmony_ci * Very old userspace does not call KVM_SET_TSS_ADDR before entering 28948c2ecf20Sopenharmony_ci * vcpu. Warn the user that an update is overdue. 28958c2ecf20Sopenharmony_ci */ 28968c2ecf20Sopenharmony_ci if (!kvm_vmx->tss_addr) 28978c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " 28988c2ecf20Sopenharmony_ci "called before entering vcpu\n"); 28998c2ecf20Sopenharmony_ci 29008c2ecf20Sopenharmony_ci vmx_segment_cache_clear(vmx); 29018c2ecf20Sopenharmony_ci 29028c2ecf20Sopenharmony_ci vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); 29038c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); 29048c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 29058c2ecf20Sopenharmony_ci 29068c2ecf20Sopenharmony_ci flags = vmcs_readl(GUEST_RFLAGS); 29078c2ecf20Sopenharmony_ci vmx->rmode.save_rflags = flags; 29088c2ecf20Sopenharmony_ci 29098c2ecf20Sopenharmony_ci flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 29108c2ecf20Sopenharmony_ci 29118c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, flags); 29128c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); 29138c2ecf20Sopenharmony_ci update_exception_bitmap(vcpu); 29148c2ecf20Sopenharmony_ci 29158c2ecf20Sopenharmony_ci fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); 29168c2ecf20Sopenharmony_ci fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); 29178c2ecf20Sopenharmony_ci fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 29188c2ecf20Sopenharmony_ci fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 29198c2ecf20Sopenharmony_ci fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); 29208c2ecf20Sopenharmony_ci fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 29218c2ecf20Sopenharmony_ci 29228c2ecf20Sopenharmony_ci kvm_mmu_reset_context(vcpu); 29238c2ecf20Sopenharmony_ci} 29248c2ecf20Sopenharmony_ci 29258c2ecf20Sopenharmony_ciint vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 29268c2ecf20Sopenharmony_ci{ 29278c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 29288c2ecf20Sopenharmony_ci struct vmx_uret_msr *msr = vmx_find_uret_msr(vmx, MSR_EFER); 29298c2ecf20Sopenharmony_ci 29308c2ecf20Sopenharmony_ci /* Nothing to do if hardware doesn't support EFER. */ 29318c2ecf20Sopenharmony_ci if (!msr) 29328c2ecf20Sopenharmony_ci return 0; 29338c2ecf20Sopenharmony_ci 29348c2ecf20Sopenharmony_ci vcpu->arch.efer = efer; 29358c2ecf20Sopenharmony_ci if (efer & EFER_LMA) { 29368c2ecf20Sopenharmony_ci vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); 29378c2ecf20Sopenharmony_ci msr->data = efer; 29388c2ecf20Sopenharmony_ci } else { 29398c2ecf20Sopenharmony_ci vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); 29408c2ecf20Sopenharmony_ci 29418c2ecf20Sopenharmony_ci msr->data = efer & ~EFER_LME; 29428c2ecf20Sopenharmony_ci } 29438c2ecf20Sopenharmony_ci setup_msrs(vmx); 29448c2ecf20Sopenharmony_ci return 0; 29458c2ecf20Sopenharmony_ci} 29468c2ecf20Sopenharmony_ci 29478c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 29488c2ecf20Sopenharmony_ci 29498c2ecf20Sopenharmony_cistatic void enter_lmode(struct kvm_vcpu *vcpu) 29508c2ecf20Sopenharmony_ci{ 29518c2ecf20Sopenharmony_ci u32 guest_tr_ar; 29528c2ecf20Sopenharmony_ci 29538c2ecf20Sopenharmony_ci vmx_segment_cache_clear(to_vmx(vcpu)); 29548c2ecf20Sopenharmony_ci 29558c2ecf20Sopenharmony_ci guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 29568c2ecf20Sopenharmony_ci if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { 29578c2ecf20Sopenharmony_ci pr_debug_ratelimited("%s: tss fixup for long mode. \n", 29588c2ecf20Sopenharmony_ci __func__); 29598c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, 29608c2ecf20Sopenharmony_ci (guest_tr_ar & ~VMX_AR_TYPE_MASK) 29618c2ecf20Sopenharmony_ci | VMX_AR_TYPE_BUSY_64_TSS); 29628c2ecf20Sopenharmony_ci } 29638c2ecf20Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); 29648c2ecf20Sopenharmony_ci} 29658c2ecf20Sopenharmony_ci 29668c2ecf20Sopenharmony_cistatic void exit_lmode(struct kvm_vcpu *vcpu) 29678c2ecf20Sopenharmony_ci{ 29688c2ecf20Sopenharmony_ci vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); 29698c2ecf20Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 29708c2ecf20Sopenharmony_ci} 29718c2ecf20Sopenharmony_ci 29728c2ecf20Sopenharmony_ci#endif 29738c2ecf20Sopenharmony_ci 29748c2ecf20Sopenharmony_cistatic void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) 29758c2ecf20Sopenharmony_ci{ 29768c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_ci /* 29798c2ecf20Sopenharmony_ci * INVEPT must be issued when EPT is enabled, irrespective of VPID, as 29808c2ecf20Sopenharmony_ci * the CPU is not required to invalidate guest-physical mappings on 29818c2ecf20Sopenharmony_ci * VM-Entry, even if VPID is disabled. Guest-physical mappings are 29828c2ecf20Sopenharmony_ci * associated with the root EPT structure and not any particular VPID 29838c2ecf20Sopenharmony_ci * (INVVPID also isn't required to invalidate guest-physical mappings). 29848c2ecf20Sopenharmony_ci */ 29858c2ecf20Sopenharmony_ci if (enable_ept) { 29868c2ecf20Sopenharmony_ci ept_sync_global(); 29878c2ecf20Sopenharmony_ci } else if (enable_vpid) { 29888c2ecf20Sopenharmony_ci if (cpu_has_vmx_invvpid_global()) { 29898c2ecf20Sopenharmony_ci vpid_sync_vcpu_global(); 29908c2ecf20Sopenharmony_ci } else { 29918c2ecf20Sopenharmony_ci vpid_sync_vcpu_single(vmx->vpid); 29928c2ecf20Sopenharmony_ci vpid_sync_vcpu_single(vmx->nested.vpid02); 29938c2ecf20Sopenharmony_ci } 29948c2ecf20Sopenharmony_ci } 29958c2ecf20Sopenharmony_ci} 29968c2ecf20Sopenharmony_ci 29978c2ecf20Sopenharmony_cistatic inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) 29988c2ecf20Sopenharmony_ci{ 29998c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) 30008c2ecf20Sopenharmony_ci return nested_get_vpid02(vcpu); 30018c2ecf20Sopenharmony_ci return to_vmx(vcpu)->vpid; 30028c2ecf20Sopenharmony_ci} 30038c2ecf20Sopenharmony_ci 30048c2ecf20Sopenharmony_cistatic void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) 30058c2ecf20Sopenharmony_ci{ 30068c2ecf20Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.mmu; 30078c2ecf20Sopenharmony_ci u64 root_hpa = mmu->root_hpa; 30088c2ecf20Sopenharmony_ci 30098c2ecf20Sopenharmony_ci /* No flush required if the current context is invalid. */ 30108c2ecf20Sopenharmony_ci if (!VALID_PAGE(root_hpa)) 30118c2ecf20Sopenharmony_ci return; 30128c2ecf20Sopenharmony_ci 30138c2ecf20Sopenharmony_ci if (enable_ept) 30148c2ecf20Sopenharmony_ci ept_sync_context(construct_eptp(vcpu, root_hpa, 30158c2ecf20Sopenharmony_ci mmu->shadow_root_level)); 30168c2ecf20Sopenharmony_ci else 30178c2ecf20Sopenharmony_ci vpid_sync_context(vmx_get_current_vpid(vcpu)); 30188c2ecf20Sopenharmony_ci} 30198c2ecf20Sopenharmony_ci 30208c2ecf20Sopenharmony_cistatic void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) 30218c2ecf20Sopenharmony_ci{ 30228c2ecf20Sopenharmony_ci /* 30238c2ecf20Sopenharmony_ci * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in 30248c2ecf20Sopenharmony_ci * vmx_flush_tlb_guest() for an explanation of why this is ok. 30258c2ecf20Sopenharmony_ci */ 30268c2ecf20Sopenharmony_ci vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); 30278c2ecf20Sopenharmony_ci} 30288c2ecf20Sopenharmony_ci 30298c2ecf20Sopenharmony_cistatic void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) 30308c2ecf20Sopenharmony_ci{ 30318c2ecf20Sopenharmony_ci /* 30328c2ecf20Sopenharmony_ci * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a 30338c2ecf20Sopenharmony_ci * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are 30348c2ecf20Sopenharmony_ci * required to flush GVA->{G,H}PA mappings from the TLB if vpid is 30358c2ecf20Sopenharmony_ci * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), 30368c2ecf20Sopenharmony_ci * i.e. no explicit INVVPID is necessary. 30378c2ecf20Sopenharmony_ci */ 30388c2ecf20Sopenharmony_ci vpid_sync_context(vmx_get_current_vpid(vcpu)); 30398c2ecf20Sopenharmony_ci} 30408c2ecf20Sopenharmony_ci 30418c2ecf20Sopenharmony_civoid vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) 30428c2ecf20Sopenharmony_ci{ 30438c2ecf20Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.walk_mmu; 30448c2ecf20Sopenharmony_ci 30458c2ecf20Sopenharmony_ci if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) 30468c2ecf20Sopenharmony_ci return; 30478c2ecf20Sopenharmony_ci 30488c2ecf20Sopenharmony_ci if (is_pae_paging(vcpu)) { 30498c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); 30508c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); 30518c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); 30528c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); 30538c2ecf20Sopenharmony_ci } 30548c2ecf20Sopenharmony_ci} 30558c2ecf20Sopenharmony_ci 30568c2ecf20Sopenharmony_civoid ept_save_pdptrs(struct kvm_vcpu *vcpu) 30578c2ecf20Sopenharmony_ci{ 30588c2ecf20Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.walk_mmu; 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!is_pae_paging(vcpu))) 30618c2ecf20Sopenharmony_ci return; 30628c2ecf20Sopenharmony_ci 30638c2ecf20Sopenharmony_ci mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); 30648c2ecf20Sopenharmony_ci mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); 30658c2ecf20Sopenharmony_ci mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); 30668c2ecf20Sopenharmony_ci mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); 30678c2ecf20Sopenharmony_ci 30688c2ecf20Sopenharmony_ci kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); 30698c2ecf20Sopenharmony_ci} 30708c2ecf20Sopenharmony_ci 30718c2ecf20Sopenharmony_ci#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ 30728c2ecf20Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING) 30738c2ecf20Sopenharmony_ci 30748c2ecf20Sopenharmony_civoid vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 30758c2ecf20Sopenharmony_ci{ 30768c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 30778c2ecf20Sopenharmony_ci unsigned long hw_cr0; 30788c2ecf20Sopenharmony_ci u32 tmp; 30798c2ecf20Sopenharmony_ci 30808c2ecf20Sopenharmony_ci hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); 30818c2ecf20Sopenharmony_ci if (enable_unrestricted_guest) 30828c2ecf20Sopenharmony_ci hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; 30838c2ecf20Sopenharmony_ci else { 30848c2ecf20Sopenharmony_ci hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; 30858c2ecf20Sopenharmony_ci if (!enable_ept) 30868c2ecf20Sopenharmony_ci hw_cr0 |= X86_CR0_WP; 30878c2ecf20Sopenharmony_ci 30888c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 30898c2ecf20Sopenharmony_ci enter_pmode(vcpu); 30908c2ecf20Sopenharmony_ci 30918c2ecf20Sopenharmony_ci if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) 30928c2ecf20Sopenharmony_ci enter_rmode(vcpu); 30938c2ecf20Sopenharmony_ci } 30948c2ecf20Sopenharmony_ci 30958c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 30968c2ecf20Sopenharmony_ci if (vcpu->arch.efer & EFER_LME) { 30978c2ecf20Sopenharmony_ci if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) 30988c2ecf20Sopenharmony_ci enter_lmode(vcpu); 30998c2ecf20Sopenharmony_ci if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) 31008c2ecf20Sopenharmony_ci exit_lmode(vcpu); 31018c2ecf20Sopenharmony_ci } 31028c2ecf20Sopenharmony_ci#endif 31038c2ecf20Sopenharmony_ci 31048c2ecf20Sopenharmony_ci if (enable_ept && !enable_unrestricted_guest) { 31058c2ecf20Sopenharmony_ci /* 31068c2ecf20Sopenharmony_ci * Ensure KVM has an up-to-date snapshot of the guest's CR3. If 31078c2ecf20Sopenharmony_ci * the below code _enables_ CR3 exiting, vmx_cache_reg() will 31088c2ecf20Sopenharmony_ci * (correctly) stop reading vmcs.GUEST_CR3 because it thinks 31098c2ecf20Sopenharmony_ci * KVM's CR3 is installed. 31108c2ecf20Sopenharmony_ci */ 31118c2ecf20Sopenharmony_ci if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) 31128c2ecf20Sopenharmony_ci vmx_cache_reg(vcpu, VCPU_EXREG_CR3); 31138c2ecf20Sopenharmony_ci 31148c2ecf20Sopenharmony_ci /* 31158c2ecf20Sopenharmony_ci * When running with EPT but not unrestricted guest, KVM must 31168c2ecf20Sopenharmony_ci * intercept CR3 accesses when paging is _disabled_. This is 31178c2ecf20Sopenharmony_ci * necessary because restricted guests can't actually run with 31188c2ecf20Sopenharmony_ci * paging disabled, and so KVM stuffs its own CR3 in order to 31198c2ecf20Sopenharmony_ci * run the guest when identity mapped page tables. 31208c2ecf20Sopenharmony_ci * 31218c2ecf20Sopenharmony_ci * Do _NOT_ check the old CR0.PG, e.g. to optimize away the 31228c2ecf20Sopenharmony_ci * update, it may be stale with respect to CR3 interception, 31238c2ecf20Sopenharmony_ci * e.g. after nested VM-Enter. 31248c2ecf20Sopenharmony_ci * 31258c2ecf20Sopenharmony_ci * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or 31268c2ecf20Sopenharmony_ci * stores to forward them to L1, even if KVM does not need to 31278c2ecf20Sopenharmony_ci * intercept them to preserve its identity mapped page tables. 31288c2ecf20Sopenharmony_ci */ 31298c2ecf20Sopenharmony_ci if (!(cr0 & X86_CR0_PG)) { 31308c2ecf20Sopenharmony_ci exec_controls_setbit(vmx, CR3_EXITING_BITS); 31318c2ecf20Sopenharmony_ci } else if (!is_guest_mode(vcpu)) { 31328c2ecf20Sopenharmony_ci exec_controls_clearbit(vmx, CR3_EXITING_BITS); 31338c2ecf20Sopenharmony_ci } else { 31348c2ecf20Sopenharmony_ci tmp = exec_controls_get(vmx); 31358c2ecf20Sopenharmony_ci tmp &= ~CR3_EXITING_BITS; 31368c2ecf20Sopenharmony_ci tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS; 31378c2ecf20Sopenharmony_ci exec_controls_set(vmx, tmp); 31388c2ecf20Sopenharmony_ci } 31398c2ecf20Sopenharmony_ci 31408c2ecf20Sopenharmony_ci if (!is_paging(vcpu) != !(cr0 & X86_CR0_PG)) { 31418c2ecf20Sopenharmony_ci vcpu->arch.cr0 = cr0; 31428c2ecf20Sopenharmony_ci vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); 31438c2ecf20Sopenharmony_ci } 31448c2ecf20Sopenharmony_ci } 31458c2ecf20Sopenharmony_ci 31468c2ecf20Sopenharmony_ci vmcs_writel(CR0_READ_SHADOW, cr0); 31478c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR0, hw_cr0); 31488c2ecf20Sopenharmony_ci vcpu->arch.cr0 = cr0; 31498c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR0); 31508c2ecf20Sopenharmony_ci 31518c2ecf20Sopenharmony_ci /* depends on vcpu->arch.cr0 to be set to a new value */ 31528c2ecf20Sopenharmony_ci vmx->emulation_required = emulation_required(vcpu); 31538c2ecf20Sopenharmony_ci} 31548c2ecf20Sopenharmony_ci 31558c2ecf20Sopenharmony_cistatic int vmx_get_max_tdp_level(void) 31568c2ecf20Sopenharmony_ci{ 31578c2ecf20Sopenharmony_ci if (cpu_has_vmx_ept_5levels()) 31588c2ecf20Sopenharmony_ci return 5; 31598c2ecf20Sopenharmony_ci return 4; 31608c2ecf20Sopenharmony_ci} 31618c2ecf20Sopenharmony_ci 31628c2ecf20Sopenharmony_ciu64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa, 31638c2ecf20Sopenharmony_ci int root_level) 31648c2ecf20Sopenharmony_ci{ 31658c2ecf20Sopenharmony_ci u64 eptp = VMX_EPTP_MT_WB; 31668c2ecf20Sopenharmony_ci 31678c2ecf20Sopenharmony_ci eptp |= (root_level == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; 31688c2ecf20Sopenharmony_ci 31698c2ecf20Sopenharmony_ci if (enable_ept_ad_bits && 31708c2ecf20Sopenharmony_ci (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) 31718c2ecf20Sopenharmony_ci eptp |= VMX_EPTP_AD_ENABLE_BIT; 31728c2ecf20Sopenharmony_ci eptp |= (root_hpa & PAGE_MASK); 31738c2ecf20Sopenharmony_ci 31748c2ecf20Sopenharmony_ci return eptp; 31758c2ecf20Sopenharmony_ci} 31768c2ecf20Sopenharmony_ci 31778c2ecf20Sopenharmony_cistatic void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd, 31788c2ecf20Sopenharmony_ci int pgd_level) 31798c2ecf20Sopenharmony_ci{ 31808c2ecf20Sopenharmony_ci struct kvm *kvm = vcpu->kvm; 31818c2ecf20Sopenharmony_ci bool update_guest_cr3 = true; 31828c2ecf20Sopenharmony_ci unsigned long guest_cr3; 31838c2ecf20Sopenharmony_ci u64 eptp; 31848c2ecf20Sopenharmony_ci 31858c2ecf20Sopenharmony_ci if (enable_ept) { 31868c2ecf20Sopenharmony_ci eptp = construct_eptp(vcpu, pgd, pgd_level); 31878c2ecf20Sopenharmony_ci vmcs_write64(EPT_POINTER, eptp); 31888c2ecf20Sopenharmony_ci 31898c2ecf20Sopenharmony_ci if (kvm_x86_ops.tlb_remote_flush) { 31908c2ecf20Sopenharmony_ci spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); 31918c2ecf20Sopenharmony_ci to_vmx(vcpu)->ept_pointer = eptp; 31928c2ecf20Sopenharmony_ci to_kvm_vmx(kvm)->ept_pointers_match 31938c2ecf20Sopenharmony_ci = EPT_POINTERS_CHECK; 31948c2ecf20Sopenharmony_ci spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); 31958c2ecf20Sopenharmony_ci } 31968c2ecf20Sopenharmony_ci 31978c2ecf20Sopenharmony_ci if (!enable_unrestricted_guest && !is_paging(vcpu)) 31988c2ecf20Sopenharmony_ci guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; 31998c2ecf20Sopenharmony_ci else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) 32008c2ecf20Sopenharmony_ci guest_cr3 = vcpu->arch.cr3; 32018c2ecf20Sopenharmony_ci else /* vmcs01.GUEST_CR3 is already up-to-date. */ 32028c2ecf20Sopenharmony_ci update_guest_cr3 = false; 32038c2ecf20Sopenharmony_ci vmx_ept_load_pdptrs(vcpu); 32048c2ecf20Sopenharmony_ci } else { 32058c2ecf20Sopenharmony_ci guest_cr3 = pgd; 32068c2ecf20Sopenharmony_ci } 32078c2ecf20Sopenharmony_ci 32088c2ecf20Sopenharmony_ci if (update_guest_cr3) 32098c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR3, guest_cr3); 32108c2ecf20Sopenharmony_ci} 32118c2ecf20Sopenharmony_ci 32128c2ecf20Sopenharmony_cistatic bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 32138c2ecf20Sopenharmony_ci{ 32148c2ecf20Sopenharmony_ci /* 32158c2ecf20Sopenharmony_ci * We operate under the default treatment of SMM, so VMX cannot be 32168c2ecf20Sopenharmony_ci * enabled under SMM. Note, whether or not VMXE is allowed at all is 32178c2ecf20Sopenharmony_ci * handled by kvm_valid_cr4(). 32188c2ecf20Sopenharmony_ci */ 32198c2ecf20Sopenharmony_ci if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) 32208c2ecf20Sopenharmony_ci return false; 32218c2ecf20Sopenharmony_ci 32228c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) 32238c2ecf20Sopenharmony_ci return false; 32248c2ecf20Sopenharmony_ci 32258c2ecf20Sopenharmony_ci return true; 32268c2ecf20Sopenharmony_ci} 32278c2ecf20Sopenharmony_ci 32288c2ecf20Sopenharmony_civoid vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 32298c2ecf20Sopenharmony_ci{ 32308c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 32318c2ecf20Sopenharmony_ci /* 32328c2ecf20Sopenharmony_ci * Pass through host's Machine Check Enable value to hw_cr4, which 32338c2ecf20Sopenharmony_ci * is in force while we are in guest mode. Do not let guests control 32348c2ecf20Sopenharmony_ci * this bit, even if host CR4.MCE == 0. 32358c2ecf20Sopenharmony_ci */ 32368c2ecf20Sopenharmony_ci unsigned long hw_cr4; 32378c2ecf20Sopenharmony_ci 32388c2ecf20Sopenharmony_ci hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); 32398c2ecf20Sopenharmony_ci if (enable_unrestricted_guest) 32408c2ecf20Sopenharmony_ci hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; 32418c2ecf20Sopenharmony_ci else if (vmx->rmode.vm86_active) 32428c2ecf20Sopenharmony_ci hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; 32438c2ecf20Sopenharmony_ci else 32448c2ecf20Sopenharmony_ci hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; 32458c2ecf20Sopenharmony_ci 32468c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { 32478c2ecf20Sopenharmony_ci if (cr4 & X86_CR4_UMIP) { 32488c2ecf20Sopenharmony_ci secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); 32498c2ecf20Sopenharmony_ci hw_cr4 &= ~X86_CR4_UMIP; 32508c2ecf20Sopenharmony_ci } else if (!is_guest_mode(vcpu) || 32518c2ecf20Sopenharmony_ci !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { 32528c2ecf20Sopenharmony_ci secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); 32538c2ecf20Sopenharmony_ci } 32548c2ecf20Sopenharmony_ci } 32558c2ecf20Sopenharmony_ci 32568c2ecf20Sopenharmony_ci vcpu->arch.cr4 = cr4; 32578c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); 32588c2ecf20Sopenharmony_ci 32598c2ecf20Sopenharmony_ci if (!enable_unrestricted_guest) { 32608c2ecf20Sopenharmony_ci if (enable_ept) { 32618c2ecf20Sopenharmony_ci if (!is_paging(vcpu)) { 32628c2ecf20Sopenharmony_ci hw_cr4 &= ~X86_CR4_PAE; 32638c2ecf20Sopenharmony_ci hw_cr4 |= X86_CR4_PSE; 32648c2ecf20Sopenharmony_ci } else if (!(cr4 & X86_CR4_PAE)) { 32658c2ecf20Sopenharmony_ci hw_cr4 &= ~X86_CR4_PAE; 32668c2ecf20Sopenharmony_ci } 32678c2ecf20Sopenharmony_ci } 32688c2ecf20Sopenharmony_ci 32698c2ecf20Sopenharmony_ci /* 32708c2ecf20Sopenharmony_ci * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in 32718c2ecf20Sopenharmony_ci * hardware. To emulate this behavior, SMEP/SMAP/PKU needs 32728c2ecf20Sopenharmony_ci * to be manually disabled when guest switches to non-paging 32738c2ecf20Sopenharmony_ci * mode. 32748c2ecf20Sopenharmony_ci * 32758c2ecf20Sopenharmony_ci * If !enable_unrestricted_guest, the CPU is always running 32768c2ecf20Sopenharmony_ci * with CR0.PG=1 and CR4 needs to be modified. 32778c2ecf20Sopenharmony_ci * If enable_unrestricted_guest, the CPU automatically 32788c2ecf20Sopenharmony_ci * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. 32798c2ecf20Sopenharmony_ci */ 32808c2ecf20Sopenharmony_ci if (!is_paging(vcpu)) 32818c2ecf20Sopenharmony_ci hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); 32828c2ecf20Sopenharmony_ci } 32838c2ecf20Sopenharmony_ci 32848c2ecf20Sopenharmony_ci vmcs_writel(CR4_READ_SHADOW, cr4); 32858c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR4, hw_cr4); 32868c2ecf20Sopenharmony_ci} 32878c2ecf20Sopenharmony_ci 32888c2ecf20Sopenharmony_civoid vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) 32898c2ecf20Sopenharmony_ci{ 32908c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 32918c2ecf20Sopenharmony_ci u32 ar; 32928c2ecf20Sopenharmony_ci 32938c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { 32948c2ecf20Sopenharmony_ci *var = vmx->rmode.segs[seg]; 32958c2ecf20Sopenharmony_ci if (seg == VCPU_SREG_TR 32968c2ecf20Sopenharmony_ci || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 32978c2ecf20Sopenharmony_ci return; 32988c2ecf20Sopenharmony_ci var->base = vmx_read_guest_seg_base(vmx, seg); 32998c2ecf20Sopenharmony_ci var->selector = vmx_read_guest_seg_selector(vmx, seg); 33008c2ecf20Sopenharmony_ci return; 33018c2ecf20Sopenharmony_ci } 33028c2ecf20Sopenharmony_ci var->base = vmx_read_guest_seg_base(vmx, seg); 33038c2ecf20Sopenharmony_ci var->limit = vmx_read_guest_seg_limit(vmx, seg); 33048c2ecf20Sopenharmony_ci var->selector = vmx_read_guest_seg_selector(vmx, seg); 33058c2ecf20Sopenharmony_ci ar = vmx_read_guest_seg_ar(vmx, seg); 33068c2ecf20Sopenharmony_ci var->unusable = (ar >> 16) & 1; 33078c2ecf20Sopenharmony_ci var->type = ar & 15; 33088c2ecf20Sopenharmony_ci var->s = (ar >> 4) & 1; 33098c2ecf20Sopenharmony_ci var->dpl = (ar >> 5) & 3; 33108c2ecf20Sopenharmony_ci /* 33118c2ecf20Sopenharmony_ci * Some userspaces do not preserve unusable property. Since usable 33128c2ecf20Sopenharmony_ci * segment has to be present according to VMX spec we can use present 33138c2ecf20Sopenharmony_ci * property to amend userspace bug by making unusable segment always 33148c2ecf20Sopenharmony_ci * nonpresent. vmx_segment_access_rights() already marks nonpresent 33158c2ecf20Sopenharmony_ci * segment as unusable. 33168c2ecf20Sopenharmony_ci */ 33178c2ecf20Sopenharmony_ci var->present = !var->unusable; 33188c2ecf20Sopenharmony_ci var->avl = (ar >> 12) & 1; 33198c2ecf20Sopenharmony_ci var->l = (ar >> 13) & 1; 33208c2ecf20Sopenharmony_ci var->db = (ar >> 14) & 1; 33218c2ecf20Sopenharmony_ci var->g = (ar >> 15) & 1; 33228c2ecf20Sopenharmony_ci} 33238c2ecf20Sopenharmony_ci 33248c2ecf20Sopenharmony_cistatic u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 33258c2ecf20Sopenharmony_ci{ 33268c2ecf20Sopenharmony_ci struct kvm_segment s; 33278c2ecf20Sopenharmony_ci 33288c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->rmode.vm86_active) { 33298c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &s, seg); 33308c2ecf20Sopenharmony_ci return s.base; 33318c2ecf20Sopenharmony_ci } 33328c2ecf20Sopenharmony_ci return vmx_read_guest_seg_base(to_vmx(vcpu), seg); 33338c2ecf20Sopenharmony_ci} 33348c2ecf20Sopenharmony_ci 33358c2ecf20Sopenharmony_ciint vmx_get_cpl(struct kvm_vcpu *vcpu) 33368c2ecf20Sopenharmony_ci{ 33378c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 33388c2ecf20Sopenharmony_ci 33398c2ecf20Sopenharmony_ci if (unlikely(vmx->rmode.vm86_active)) 33408c2ecf20Sopenharmony_ci return 0; 33418c2ecf20Sopenharmony_ci else { 33428c2ecf20Sopenharmony_ci int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); 33438c2ecf20Sopenharmony_ci return VMX_AR_DPL(ar); 33448c2ecf20Sopenharmony_ci } 33458c2ecf20Sopenharmony_ci} 33468c2ecf20Sopenharmony_ci 33478c2ecf20Sopenharmony_cistatic u32 vmx_segment_access_rights(struct kvm_segment *var) 33488c2ecf20Sopenharmony_ci{ 33498c2ecf20Sopenharmony_ci u32 ar; 33508c2ecf20Sopenharmony_ci 33518c2ecf20Sopenharmony_ci ar = var->type & 15; 33528c2ecf20Sopenharmony_ci ar |= (var->s & 1) << 4; 33538c2ecf20Sopenharmony_ci ar |= (var->dpl & 3) << 5; 33548c2ecf20Sopenharmony_ci ar |= (var->present & 1) << 7; 33558c2ecf20Sopenharmony_ci ar |= (var->avl & 1) << 12; 33568c2ecf20Sopenharmony_ci ar |= (var->l & 1) << 13; 33578c2ecf20Sopenharmony_ci ar |= (var->db & 1) << 14; 33588c2ecf20Sopenharmony_ci ar |= (var->g & 1) << 15; 33598c2ecf20Sopenharmony_ci ar |= (var->unusable || !var->present) << 16; 33608c2ecf20Sopenharmony_ci 33618c2ecf20Sopenharmony_ci return ar; 33628c2ecf20Sopenharmony_ci} 33638c2ecf20Sopenharmony_ci 33648c2ecf20Sopenharmony_civoid vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) 33658c2ecf20Sopenharmony_ci{ 33668c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 33678c2ecf20Sopenharmony_ci const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 33688c2ecf20Sopenharmony_ci 33698c2ecf20Sopenharmony_ci vmx_segment_cache_clear(vmx); 33708c2ecf20Sopenharmony_ci 33718c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { 33728c2ecf20Sopenharmony_ci vmx->rmode.segs[seg] = *var; 33738c2ecf20Sopenharmony_ci if (seg == VCPU_SREG_TR) 33748c2ecf20Sopenharmony_ci vmcs_write16(sf->selector, var->selector); 33758c2ecf20Sopenharmony_ci else if (var->s) 33768c2ecf20Sopenharmony_ci fix_rmode_seg(seg, &vmx->rmode.segs[seg]); 33778c2ecf20Sopenharmony_ci goto out; 33788c2ecf20Sopenharmony_ci } 33798c2ecf20Sopenharmony_ci 33808c2ecf20Sopenharmony_ci vmcs_writel(sf->base, var->base); 33818c2ecf20Sopenharmony_ci vmcs_write32(sf->limit, var->limit); 33828c2ecf20Sopenharmony_ci vmcs_write16(sf->selector, var->selector); 33838c2ecf20Sopenharmony_ci 33848c2ecf20Sopenharmony_ci /* 33858c2ecf20Sopenharmony_ci * Fix the "Accessed" bit in AR field of segment registers for older 33868c2ecf20Sopenharmony_ci * qemu binaries. 33878c2ecf20Sopenharmony_ci * IA32 arch specifies that at the time of processor reset the 33888c2ecf20Sopenharmony_ci * "Accessed" bit in the AR field of segment registers is 1. And qemu 33898c2ecf20Sopenharmony_ci * is setting it to 0 in the userland code. This causes invalid guest 33908c2ecf20Sopenharmony_ci * state vmexit when "unrestricted guest" mode is turned on. 33918c2ecf20Sopenharmony_ci * Fix for this setup issue in cpu_reset is being pushed in the qemu 33928c2ecf20Sopenharmony_ci * tree. Newer qemu binaries with that qemu fix would not need this 33938c2ecf20Sopenharmony_ci * kvm hack. 33948c2ecf20Sopenharmony_ci */ 33958c2ecf20Sopenharmony_ci if (is_unrestricted_guest(vcpu) && (seg != VCPU_SREG_LDTR)) 33968c2ecf20Sopenharmony_ci var->type |= 0x1; /* Accessed */ 33978c2ecf20Sopenharmony_ci 33988c2ecf20Sopenharmony_ci vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); 33998c2ecf20Sopenharmony_ci 34008c2ecf20Sopenharmony_ciout: 34018c2ecf20Sopenharmony_ci vmx->emulation_required = emulation_required(vcpu); 34028c2ecf20Sopenharmony_ci} 34038c2ecf20Sopenharmony_ci 34048c2ecf20Sopenharmony_cistatic void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 34058c2ecf20Sopenharmony_ci{ 34068c2ecf20Sopenharmony_ci u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); 34078c2ecf20Sopenharmony_ci 34088c2ecf20Sopenharmony_ci *db = (ar >> 14) & 1; 34098c2ecf20Sopenharmony_ci *l = (ar >> 13) & 1; 34108c2ecf20Sopenharmony_ci} 34118c2ecf20Sopenharmony_ci 34128c2ecf20Sopenharmony_cistatic void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 34138c2ecf20Sopenharmony_ci{ 34148c2ecf20Sopenharmony_ci dt->size = vmcs_read32(GUEST_IDTR_LIMIT); 34158c2ecf20Sopenharmony_ci dt->address = vmcs_readl(GUEST_IDTR_BASE); 34168c2ecf20Sopenharmony_ci} 34178c2ecf20Sopenharmony_ci 34188c2ecf20Sopenharmony_cistatic void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 34198c2ecf20Sopenharmony_ci{ 34208c2ecf20Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, dt->size); 34218c2ecf20Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, dt->address); 34228c2ecf20Sopenharmony_ci} 34238c2ecf20Sopenharmony_ci 34248c2ecf20Sopenharmony_cistatic void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 34258c2ecf20Sopenharmony_ci{ 34268c2ecf20Sopenharmony_ci dt->size = vmcs_read32(GUEST_GDTR_LIMIT); 34278c2ecf20Sopenharmony_ci dt->address = vmcs_readl(GUEST_GDTR_BASE); 34288c2ecf20Sopenharmony_ci} 34298c2ecf20Sopenharmony_ci 34308c2ecf20Sopenharmony_cistatic void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 34318c2ecf20Sopenharmony_ci{ 34328c2ecf20Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, dt->size); 34338c2ecf20Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, dt->address); 34348c2ecf20Sopenharmony_ci} 34358c2ecf20Sopenharmony_ci 34368c2ecf20Sopenharmony_cistatic bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) 34378c2ecf20Sopenharmony_ci{ 34388c2ecf20Sopenharmony_ci struct kvm_segment var; 34398c2ecf20Sopenharmony_ci u32 ar; 34408c2ecf20Sopenharmony_ci 34418c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &var, seg); 34428c2ecf20Sopenharmony_ci var.dpl = 0x3; 34438c2ecf20Sopenharmony_ci if (seg == VCPU_SREG_CS) 34448c2ecf20Sopenharmony_ci var.type = 0x3; 34458c2ecf20Sopenharmony_ci ar = vmx_segment_access_rights(&var); 34468c2ecf20Sopenharmony_ci 34478c2ecf20Sopenharmony_ci if (var.base != (var.selector << 4)) 34488c2ecf20Sopenharmony_ci return false; 34498c2ecf20Sopenharmony_ci if (var.limit != 0xffff) 34508c2ecf20Sopenharmony_ci return false; 34518c2ecf20Sopenharmony_ci if (ar != 0xf3) 34528c2ecf20Sopenharmony_ci return false; 34538c2ecf20Sopenharmony_ci 34548c2ecf20Sopenharmony_ci return true; 34558c2ecf20Sopenharmony_ci} 34568c2ecf20Sopenharmony_ci 34578c2ecf20Sopenharmony_cistatic bool code_segment_valid(struct kvm_vcpu *vcpu) 34588c2ecf20Sopenharmony_ci{ 34598c2ecf20Sopenharmony_ci struct kvm_segment cs; 34608c2ecf20Sopenharmony_ci unsigned int cs_rpl; 34618c2ecf20Sopenharmony_ci 34628c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 34638c2ecf20Sopenharmony_ci cs_rpl = cs.selector & SEGMENT_RPL_MASK; 34648c2ecf20Sopenharmony_ci 34658c2ecf20Sopenharmony_ci if (cs.unusable) 34668c2ecf20Sopenharmony_ci return false; 34678c2ecf20Sopenharmony_ci if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) 34688c2ecf20Sopenharmony_ci return false; 34698c2ecf20Sopenharmony_ci if (!cs.s) 34708c2ecf20Sopenharmony_ci return false; 34718c2ecf20Sopenharmony_ci if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { 34728c2ecf20Sopenharmony_ci if (cs.dpl > cs_rpl) 34738c2ecf20Sopenharmony_ci return false; 34748c2ecf20Sopenharmony_ci } else { 34758c2ecf20Sopenharmony_ci if (cs.dpl != cs_rpl) 34768c2ecf20Sopenharmony_ci return false; 34778c2ecf20Sopenharmony_ci } 34788c2ecf20Sopenharmony_ci if (!cs.present) 34798c2ecf20Sopenharmony_ci return false; 34808c2ecf20Sopenharmony_ci 34818c2ecf20Sopenharmony_ci /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ 34828c2ecf20Sopenharmony_ci return true; 34838c2ecf20Sopenharmony_ci} 34848c2ecf20Sopenharmony_ci 34858c2ecf20Sopenharmony_cistatic bool stack_segment_valid(struct kvm_vcpu *vcpu) 34868c2ecf20Sopenharmony_ci{ 34878c2ecf20Sopenharmony_ci struct kvm_segment ss; 34888c2ecf20Sopenharmony_ci unsigned int ss_rpl; 34898c2ecf20Sopenharmony_ci 34908c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 34918c2ecf20Sopenharmony_ci ss_rpl = ss.selector & SEGMENT_RPL_MASK; 34928c2ecf20Sopenharmony_ci 34938c2ecf20Sopenharmony_ci if (ss.unusable) 34948c2ecf20Sopenharmony_ci return true; 34958c2ecf20Sopenharmony_ci if (ss.type != 3 && ss.type != 7) 34968c2ecf20Sopenharmony_ci return false; 34978c2ecf20Sopenharmony_ci if (!ss.s) 34988c2ecf20Sopenharmony_ci return false; 34998c2ecf20Sopenharmony_ci if (ss.dpl != ss_rpl) /* DPL != RPL */ 35008c2ecf20Sopenharmony_ci return false; 35018c2ecf20Sopenharmony_ci if (!ss.present) 35028c2ecf20Sopenharmony_ci return false; 35038c2ecf20Sopenharmony_ci 35048c2ecf20Sopenharmony_ci return true; 35058c2ecf20Sopenharmony_ci} 35068c2ecf20Sopenharmony_ci 35078c2ecf20Sopenharmony_cistatic bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) 35088c2ecf20Sopenharmony_ci{ 35098c2ecf20Sopenharmony_ci struct kvm_segment var; 35108c2ecf20Sopenharmony_ci unsigned int rpl; 35118c2ecf20Sopenharmony_ci 35128c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &var, seg); 35138c2ecf20Sopenharmony_ci rpl = var.selector & SEGMENT_RPL_MASK; 35148c2ecf20Sopenharmony_ci 35158c2ecf20Sopenharmony_ci if (var.unusable) 35168c2ecf20Sopenharmony_ci return true; 35178c2ecf20Sopenharmony_ci if (!var.s) 35188c2ecf20Sopenharmony_ci return false; 35198c2ecf20Sopenharmony_ci if (!var.present) 35208c2ecf20Sopenharmony_ci return false; 35218c2ecf20Sopenharmony_ci if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { 35228c2ecf20Sopenharmony_ci if (var.dpl < rpl) /* DPL < RPL */ 35238c2ecf20Sopenharmony_ci return false; 35248c2ecf20Sopenharmony_ci } 35258c2ecf20Sopenharmony_ci 35268c2ecf20Sopenharmony_ci /* TODO: Add other members to kvm_segment_field to allow checking for other access 35278c2ecf20Sopenharmony_ci * rights flags 35288c2ecf20Sopenharmony_ci */ 35298c2ecf20Sopenharmony_ci return true; 35308c2ecf20Sopenharmony_ci} 35318c2ecf20Sopenharmony_ci 35328c2ecf20Sopenharmony_cistatic bool tr_valid(struct kvm_vcpu *vcpu) 35338c2ecf20Sopenharmony_ci{ 35348c2ecf20Sopenharmony_ci struct kvm_segment tr; 35358c2ecf20Sopenharmony_ci 35368c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); 35378c2ecf20Sopenharmony_ci 35388c2ecf20Sopenharmony_ci if (tr.unusable) 35398c2ecf20Sopenharmony_ci return false; 35408c2ecf20Sopenharmony_ci if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ 35418c2ecf20Sopenharmony_ci return false; 35428c2ecf20Sopenharmony_ci if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ 35438c2ecf20Sopenharmony_ci return false; 35448c2ecf20Sopenharmony_ci if (!tr.present) 35458c2ecf20Sopenharmony_ci return false; 35468c2ecf20Sopenharmony_ci 35478c2ecf20Sopenharmony_ci return true; 35488c2ecf20Sopenharmony_ci} 35498c2ecf20Sopenharmony_ci 35508c2ecf20Sopenharmony_cistatic bool ldtr_valid(struct kvm_vcpu *vcpu) 35518c2ecf20Sopenharmony_ci{ 35528c2ecf20Sopenharmony_ci struct kvm_segment ldtr; 35538c2ecf20Sopenharmony_ci 35548c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); 35558c2ecf20Sopenharmony_ci 35568c2ecf20Sopenharmony_ci if (ldtr.unusable) 35578c2ecf20Sopenharmony_ci return true; 35588c2ecf20Sopenharmony_ci if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ 35598c2ecf20Sopenharmony_ci return false; 35608c2ecf20Sopenharmony_ci if (ldtr.type != 2) 35618c2ecf20Sopenharmony_ci return false; 35628c2ecf20Sopenharmony_ci if (!ldtr.present) 35638c2ecf20Sopenharmony_ci return false; 35648c2ecf20Sopenharmony_ci 35658c2ecf20Sopenharmony_ci return true; 35668c2ecf20Sopenharmony_ci} 35678c2ecf20Sopenharmony_ci 35688c2ecf20Sopenharmony_cistatic bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) 35698c2ecf20Sopenharmony_ci{ 35708c2ecf20Sopenharmony_ci struct kvm_segment cs, ss; 35718c2ecf20Sopenharmony_ci 35728c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 35738c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 35748c2ecf20Sopenharmony_ci 35758c2ecf20Sopenharmony_ci return ((cs.selector & SEGMENT_RPL_MASK) == 35768c2ecf20Sopenharmony_ci (ss.selector & SEGMENT_RPL_MASK)); 35778c2ecf20Sopenharmony_ci} 35788c2ecf20Sopenharmony_ci 35798c2ecf20Sopenharmony_ci/* 35808c2ecf20Sopenharmony_ci * Check if guest state is valid. Returns true if valid, false if 35818c2ecf20Sopenharmony_ci * not. 35828c2ecf20Sopenharmony_ci * We assume that registers are always usable 35838c2ecf20Sopenharmony_ci */ 35848c2ecf20Sopenharmony_cibool __vmx_guest_state_valid(struct kvm_vcpu *vcpu) 35858c2ecf20Sopenharmony_ci{ 35868c2ecf20Sopenharmony_ci /* real mode guest state checks */ 35878c2ecf20Sopenharmony_ci if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { 35888c2ecf20Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 35898c2ecf20Sopenharmony_ci return false; 35908c2ecf20Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) 35918c2ecf20Sopenharmony_ci return false; 35928c2ecf20Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) 35938c2ecf20Sopenharmony_ci return false; 35948c2ecf20Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) 35958c2ecf20Sopenharmony_ci return false; 35968c2ecf20Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) 35978c2ecf20Sopenharmony_ci return false; 35988c2ecf20Sopenharmony_ci if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) 35998c2ecf20Sopenharmony_ci return false; 36008c2ecf20Sopenharmony_ci } else { 36018c2ecf20Sopenharmony_ci /* protected mode guest state checks */ 36028c2ecf20Sopenharmony_ci if (!cs_ss_rpl_check(vcpu)) 36038c2ecf20Sopenharmony_ci return false; 36048c2ecf20Sopenharmony_ci if (!code_segment_valid(vcpu)) 36058c2ecf20Sopenharmony_ci return false; 36068c2ecf20Sopenharmony_ci if (!stack_segment_valid(vcpu)) 36078c2ecf20Sopenharmony_ci return false; 36088c2ecf20Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_DS)) 36098c2ecf20Sopenharmony_ci return false; 36108c2ecf20Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_ES)) 36118c2ecf20Sopenharmony_ci return false; 36128c2ecf20Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_FS)) 36138c2ecf20Sopenharmony_ci return false; 36148c2ecf20Sopenharmony_ci if (!data_segment_valid(vcpu, VCPU_SREG_GS)) 36158c2ecf20Sopenharmony_ci return false; 36168c2ecf20Sopenharmony_ci if (!tr_valid(vcpu)) 36178c2ecf20Sopenharmony_ci return false; 36188c2ecf20Sopenharmony_ci if (!ldtr_valid(vcpu)) 36198c2ecf20Sopenharmony_ci return false; 36208c2ecf20Sopenharmony_ci } 36218c2ecf20Sopenharmony_ci /* TODO: 36228c2ecf20Sopenharmony_ci * - Add checks on RIP 36238c2ecf20Sopenharmony_ci * - Add checks on RFLAGS 36248c2ecf20Sopenharmony_ci */ 36258c2ecf20Sopenharmony_ci 36268c2ecf20Sopenharmony_ci return true; 36278c2ecf20Sopenharmony_ci} 36288c2ecf20Sopenharmony_ci 36298c2ecf20Sopenharmony_cistatic int init_rmode_tss(struct kvm *kvm) 36308c2ecf20Sopenharmony_ci{ 36318c2ecf20Sopenharmony_ci gfn_t fn; 36328c2ecf20Sopenharmony_ci u16 data = 0; 36338c2ecf20Sopenharmony_ci int idx, r; 36348c2ecf20Sopenharmony_ci 36358c2ecf20Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 36368c2ecf20Sopenharmony_ci fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; 36378c2ecf20Sopenharmony_ci r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 36388c2ecf20Sopenharmony_ci if (r < 0) 36398c2ecf20Sopenharmony_ci goto out; 36408c2ecf20Sopenharmony_ci data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; 36418c2ecf20Sopenharmony_ci r = kvm_write_guest_page(kvm, fn++, &data, 36428c2ecf20Sopenharmony_ci TSS_IOPB_BASE_OFFSET, sizeof(u16)); 36438c2ecf20Sopenharmony_ci if (r < 0) 36448c2ecf20Sopenharmony_ci goto out; 36458c2ecf20Sopenharmony_ci r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); 36468c2ecf20Sopenharmony_ci if (r < 0) 36478c2ecf20Sopenharmony_ci goto out; 36488c2ecf20Sopenharmony_ci r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 36498c2ecf20Sopenharmony_ci if (r < 0) 36508c2ecf20Sopenharmony_ci goto out; 36518c2ecf20Sopenharmony_ci data = ~0; 36528c2ecf20Sopenharmony_ci r = kvm_write_guest_page(kvm, fn, &data, 36538c2ecf20Sopenharmony_ci RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, 36548c2ecf20Sopenharmony_ci sizeof(u8)); 36558c2ecf20Sopenharmony_ciout: 36568c2ecf20Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 36578c2ecf20Sopenharmony_ci return r; 36588c2ecf20Sopenharmony_ci} 36598c2ecf20Sopenharmony_ci 36608c2ecf20Sopenharmony_cistatic int init_rmode_identity_map(struct kvm *kvm) 36618c2ecf20Sopenharmony_ci{ 36628c2ecf20Sopenharmony_ci struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); 36638c2ecf20Sopenharmony_ci int i, r = 0; 36648c2ecf20Sopenharmony_ci kvm_pfn_t identity_map_pfn; 36658c2ecf20Sopenharmony_ci u32 tmp; 36668c2ecf20Sopenharmony_ci 36678c2ecf20Sopenharmony_ci /* Protect kvm_vmx->ept_identity_pagetable_done. */ 36688c2ecf20Sopenharmony_ci mutex_lock(&kvm->slots_lock); 36698c2ecf20Sopenharmony_ci 36708c2ecf20Sopenharmony_ci if (likely(kvm_vmx->ept_identity_pagetable_done)) 36718c2ecf20Sopenharmony_ci goto out; 36728c2ecf20Sopenharmony_ci 36738c2ecf20Sopenharmony_ci if (!kvm_vmx->ept_identity_map_addr) 36748c2ecf20Sopenharmony_ci kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; 36758c2ecf20Sopenharmony_ci identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; 36768c2ecf20Sopenharmony_ci 36778c2ecf20Sopenharmony_ci r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 36788c2ecf20Sopenharmony_ci kvm_vmx->ept_identity_map_addr, PAGE_SIZE); 36798c2ecf20Sopenharmony_ci if (r < 0) 36808c2ecf20Sopenharmony_ci goto out; 36818c2ecf20Sopenharmony_ci 36828c2ecf20Sopenharmony_ci r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); 36838c2ecf20Sopenharmony_ci if (r < 0) 36848c2ecf20Sopenharmony_ci goto out; 36858c2ecf20Sopenharmony_ci /* Set up identity-mapping pagetable for EPT in real mode */ 36868c2ecf20Sopenharmony_ci for (i = 0; i < PT32_ENT_PER_PAGE; i++) { 36878c2ecf20Sopenharmony_ci tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | 36888c2ecf20Sopenharmony_ci _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); 36898c2ecf20Sopenharmony_ci r = kvm_write_guest_page(kvm, identity_map_pfn, 36908c2ecf20Sopenharmony_ci &tmp, i * sizeof(tmp), sizeof(tmp)); 36918c2ecf20Sopenharmony_ci if (r < 0) 36928c2ecf20Sopenharmony_ci goto out; 36938c2ecf20Sopenharmony_ci } 36948c2ecf20Sopenharmony_ci kvm_vmx->ept_identity_pagetable_done = true; 36958c2ecf20Sopenharmony_ci 36968c2ecf20Sopenharmony_ciout: 36978c2ecf20Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 36988c2ecf20Sopenharmony_ci return r; 36998c2ecf20Sopenharmony_ci} 37008c2ecf20Sopenharmony_ci 37018c2ecf20Sopenharmony_cistatic void seg_setup(int seg) 37028c2ecf20Sopenharmony_ci{ 37038c2ecf20Sopenharmony_ci const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 37048c2ecf20Sopenharmony_ci unsigned int ar; 37058c2ecf20Sopenharmony_ci 37068c2ecf20Sopenharmony_ci vmcs_write16(sf->selector, 0); 37078c2ecf20Sopenharmony_ci vmcs_writel(sf->base, 0); 37088c2ecf20Sopenharmony_ci vmcs_write32(sf->limit, 0xffff); 37098c2ecf20Sopenharmony_ci ar = 0x93; 37108c2ecf20Sopenharmony_ci if (seg == VCPU_SREG_CS) 37118c2ecf20Sopenharmony_ci ar |= 0x08; /* code segment */ 37128c2ecf20Sopenharmony_ci 37138c2ecf20Sopenharmony_ci vmcs_write32(sf->ar_bytes, ar); 37148c2ecf20Sopenharmony_ci} 37158c2ecf20Sopenharmony_ci 37168c2ecf20Sopenharmony_cistatic int alloc_apic_access_page(struct kvm *kvm) 37178c2ecf20Sopenharmony_ci{ 37188c2ecf20Sopenharmony_ci struct page *page; 37198c2ecf20Sopenharmony_ci int r = 0; 37208c2ecf20Sopenharmony_ci 37218c2ecf20Sopenharmony_ci mutex_lock(&kvm->slots_lock); 37228c2ecf20Sopenharmony_ci if (kvm->arch.apic_access_page_done) 37238c2ecf20Sopenharmony_ci goto out; 37248c2ecf20Sopenharmony_ci r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 37258c2ecf20Sopenharmony_ci APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); 37268c2ecf20Sopenharmony_ci if (r) 37278c2ecf20Sopenharmony_ci goto out; 37288c2ecf20Sopenharmony_ci 37298c2ecf20Sopenharmony_ci page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); 37308c2ecf20Sopenharmony_ci if (is_error_page(page)) { 37318c2ecf20Sopenharmony_ci r = -EFAULT; 37328c2ecf20Sopenharmony_ci goto out; 37338c2ecf20Sopenharmony_ci } 37348c2ecf20Sopenharmony_ci 37358c2ecf20Sopenharmony_ci /* 37368c2ecf20Sopenharmony_ci * Do not pin the page in memory, so that memory hot-unplug 37378c2ecf20Sopenharmony_ci * is able to migrate it. 37388c2ecf20Sopenharmony_ci */ 37398c2ecf20Sopenharmony_ci put_page(page); 37408c2ecf20Sopenharmony_ci kvm->arch.apic_access_page_done = true; 37418c2ecf20Sopenharmony_ciout: 37428c2ecf20Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 37438c2ecf20Sopenharmony_ci return r; 37448c2ecf20Sopenharmony_ci} 37458c2ecf20Sopenharmony_ci 37468c2ecf20Sopenharmony_ciint allocate_vpid(void) 37478c2ecf20Sopenharmony_ci{ 37488c2ecf20Sopenharmony_ci int vpid; 37498c2ecf20Sopenharmony_ci 37508c2ecf20Sopenharmony_ci if (!enable_vpid) 37518c2ecf20Sopenharmony_ci return 0; 37528c2ecf20Sopenharmony_ci spin_lock(&vmx_vpid_lock); 37538c2ecf20Sopenharmony_ci vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); 37548c2ecf20Sopenharmony_ci if (vpid < VMX_NR_VPIDS) 37558c2ecf20Sopenharmony_ci __set_bit(vpid, vmx_vpid_bitmap); 37568c2ecf20Sopenharmony_ci else 37578c2ecf20Sopenharmony_ci vpid = 0; 37588c2ecf20Sopenharmony_ci spin_unlock(&vmx_vpid_lock); 37598c2ecf20Sopenharmony_ci return vpid; 37608c2ecf20Sopenharmony_ci} 37618c2ecf20Sopenharmony_ci 37628c2ecf20Sopenharmony_civoid free_vpid(int vpid) 37638c2ecf20Sopenharmony_ci{ 37648c2ecf20Sopenharmony_ci if (!enable_vpid || vpid == 0) 37658c2ecf20Sopenharmony_ci return; 37668c2ecf20Sopenharmony_ci spin_lock(&vmx_vpid_lock); 37678c2ecf20Sopenharmony_ci __clear_bit(vpid, vmx_vpid_bitmap); 37688c2ecf20Sopenharmony_ci spin_unlock(&vmx_vpid_lock); 37698c2ecf20Sopenharmony_ci} 37708c2ecf20Sopenharmony_ci 37718c2ecf20Sopenharmony_cistatic void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr) 37728c2ecf20Sopenharmony_ci{ 37738c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 37748c2ecf20Sopenharmony_ci 37758c2ecf20Sopenharmony_ci if (msr <= 0x1fff) 37768c2ecf20Sopenharmony_ci __clear_bit(msr, msr_bitmap + 0x000 / f); 37778c2ecf20Sopenharmony_ci else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) 37788c2ecf20Sopenharmony_ci __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); 37798c2ecf20Sopenharmony_ci} 37808c2ecf20Sopenharmony_ci 37818c2ecf20Sopenharmony_cistatic void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr) 37828c2ecf20Sopenharmony_ci{ 37838c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 37848c2ecf20Sopenharmony_ci 37858c2ecf20Sopenharmony_ci if (msr <= 0x1fff) 37868c2ecf20Sopenharmony_ci __clear_bit(msr, msr_bitmap + 0x800 / f); 37878c2ecf20Sopenharmony_ci else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) 37888c2ecf20Sopenharmony_ci __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); 37898c2ecf20Sopenharmony_ci} 37908c2ecf20Sopenharmony_ci 37918c2ecf20Sopenharmony_cistatic void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr) 37928c2ecf20Sopenharmony_ci{ 37938c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 37948c2ecf20Sopenharmony_ci 37958c2ecf20Sopenharmony_ci if (msr <= 0x1fff) 37968c2ecf20Sopenharmony_ci __set_bit(msr, msr_bitmap + 0x000 / f); 37978c2ecf20Sopenharmony_ci else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) 37988c2ecf20Sopenharmony_ci __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); 37998c2ecf20Sopenharmony_ci} 38008c2ecf20Sopenharmony_ci 38018c2ecf20Sopenharmony_cistatic void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr) 38028c2ecf20Sopenharmony_ci{ 38038c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 38048c2ecf20Sopenharmony_ci 38058c2ecf20Sopenharmony_ci if (msr <= 0x1fff) 38068c2ecf20Sopenharmony_ci __set_bit(msr, msr_bitmap + 0x800 / f); 38078c2ecf20Sopenharmony_ci else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) 38088c2ecf20Sopenharmony_ci __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); 38098c2ecf20Sopenharmony_ci} 38108c2ecf20Sopenharmony_ci 38118c2ecf20Sopenharmony_cistatic void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx) 38128c2ecf20Sopenharmony_ci{ 38138c2ecf20Sopenharmony_ci /* 38148c2ecf20Sopenharmony_ci * When KVM is a nested hypervisor on top of Hyper-V and uses 38158c2ecf20Sopenharmony_ci * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR 38168c2ecf20Sopenharmony_ci * bitmap has changed. 38178c2ecf20Sopenharmony_ci */ 38188c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs)) { 38198c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs; 38208c2ecf20Sopenharmony_ci 38218c2ecf20Sopenharmony_ci if (evmcs->hv_enlightenments_control.msr_bitmap) 38228c2ecf20Sopenharmony_ci evmcs->hv_clean_fields &= 38238c2ecf20Sopenharmony_ci ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; 38248c2ecf20Sopenharmony_ci } 38258c2ecf20Sopenharmony_ci} 38268c2ecf20Sopenharmony_ci 38278c2ecf20Sopenharmony_cistatic __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, 38288c2ecf20Sopenharmony_ci u32 msr, int type) 38298c2ecf20Sopenharmony_ci{ 38308c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 38318c2ecf20Sopenharmony_ci unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; 38328c2ecf20Sopenharmony_ci 38338c2ecf20Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap()) 38348c2ecf20Sopenharmony_ci return; 38358c2ecf20Sopenharmony_ci 38368c2ecf20Sopenharmony_ci vmx_msr_bitmap_l01_changed(vmx); 38378c2ecf20Sopenharmony_ci 38388c2ecf20Sopenharmony_ci /* 38398c2ecf20Sopenharmony_ci * Mark the desired intercept state in shadow bitmap, this is needed 38408c2ecf20Sopenharmony_ci * for resync when the MSR filters change. 38418c2ecf20Sopenharmony_ci */ 38428c2ecf20Sopenharmony_ci if (is_valid_passthrough_msr(msr)) { 38438c2ecf20Sopenharmony_ci int idx = possible_passthrough_msr_slot(msr); 38448c2ecf20Sopenharmony_ci 38458c2ecf20Sopenharmony_ci if (idx != -ENOENT) { 38468c2ecf20Sopenharmony_ci if (type & MSR_TYPE_R) 38478c2ecf20Sopenharmony_ci clear_bit(idx, vmx->shadow_msr_intercept.read); 38488c2ecf20Sopenharmony_ci if (type & MSR_TYPE_W) 38498c2ecf20Sopenharmony_ci clear_bit(idx, vmx->shadow_msr_intercept.write); 38508c2ecf20Sopenharmony_ci } 38518c2ecf20Sopenharmony_ci } 38528c2ecf20Sopenharmony_ci 38538c2ecf20Sopenharmony_ci if ((type & MSR_TYPE_R) && 38548c2ecf20Sopenharmony_ci !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) { 38558c2ecf20Sopenharmony_ci vmx_set_msr_bitmap_read(msr_bitmap, msr); 38568c2ecf20Sopenharmony_ci type &= ~MSR_TYPE_R; 38578c2ecf20Sopenharmony_ci } 38588c2ecf20Sopenharmony_ci 38598c2ecf20Sopenharmony_ci if ((type & MSR_TYPE_W) && 38608c2ecf20Sopenharmony_ci !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) { 38618c2ecf20Sopenharmony_ci vmx_set_msr_bitmap_write(msr_bitmap, msr); 38628c2ecf20Sopenharmony_ci type &= ~MSR_TYPE_W; 38638c2ecf20Sopenharmony_ci } 38648c2ecf20Sopenharmony_ci 38658c2ecf20Sopenharmony_ci if (type & MSR_TYPE_R) 38668c2ecf20Sopenharmony_ci vmx_clear_msr_bitmap_read(msr_bitmap, msr); 38678c2ecf20Sopenharmony_ci 38688c2ecf20Sopenharmony_ci if (type & MSR_TYPE_W) 38698c2ecf20Sopenharmony_ci vmx_clear_msr_bitmap_write(msr_bitmap, msr); 38708c2ecf20Sopenharmony_ci} 38718c2ecf20Sopenharmony_ci 38728c2ecf20Sopenharmony_cistatic __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, 38738c2ecf20Sopenharmony_ci u32 msr, int type) 38748c2ecf20Sopenharmony_ci{ 38758c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 38768c2ecf20Sopenharmony_ci unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; 38778c2ecf20Sopenharmony_ci 38788c2ecf20Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap()) 38798c2ecf20Sopenharmony_ci return; 38808c2ecf20Sopenharmony_ci 38818c2ecf20Sopenharmony_ci vmx_msr_bitmap_l01_changed(vmx); 38828c2ecf20Sopenharmony_ci 38838c2ecf20Sopenharmony_ci /* 38848c2ecf20Sopenharmony_ci * Mark the desired intercept state in shadow bitmap, this is needed 38858c2ecf20Sopenharmony_ci * for resync when the MSR filter changes. 38868c2ecf20Sopenharmony_ci */ 38878c2ecf20Sopenharmony_ci if (is_valid_passthrough_msr(msr)) { 38888c2ecf20Sopenharmony_ci int idx = possible_passthrough_msr_slot(msr); 38898c2ecf20Sopenharmony_ci 38908c2ecf20Sopenharmony_ci if (idx != -ENOENT) { 38918c2ecf20Sopenharmony_ci if (type & MSR_TYPE_R) 38928c2ecf20Sopenharmony_ci set_bit(idx, vmx->shadow_msr_intercept.read); 38938c2ecf20Sopenharmony_ci if (type & MSR_TYPE_W) 38948c2ecf20Sopenharmony_ci set_bit(idx, vmx->shadow_msr_intercept.write); 38958c2ecf20Sopenharmony_ci } 38968c2ecf20Sopenharmony_ci } 38978c2ecf20Sopenharmony_ci 38988c2ecf20Sopenharmony_ci if (type & MSR_TYPE_R) 38998c2ecf20Sopenharmony_ci vmx_set_msr_bitmap_read(msr_bitmap, msr); 39008c2ecf20Sopenharmony_ci 39018c2ecf20Sopenharmony_ci if (type & MSR_TYPE_W) 39028c2ecf20Sopenharmony_ci vmx_set_msr_bitmap_write(msr_bitmap, msr); 39038c2ecf20Sopenharmony_ci} 39048c2ecf20Sopenharmony_ci 39058c2ecf20Sopenharmony_cistatic __always_inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, 39068c2ecf20Sopenharmony_ci u32 msr, int type, bool value) 39078c2ecf20Sopenharmony_ci{ 39088c2ecf20Sopenharmony_ci if (value) 39098c2ecf20Sopenharmony_ci vmx_enable_intercept_for_msr(vcpu, msr, type); 39108c2ecf20Sopenharmony_ci else 39118c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, msr, type); 39128c2ecf20Sopenharmony_ci} 39138c2ecf20Sopenharmony_ci 39148c2ecf20Sopenharmony_cistatic u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) 39158c2ecf20Sopenharmony_ci{ 39168c2ecf20Sopenharmony_ci u8 mode = 0; 39178c2ecf20Sopenharmony_ci 39188c2ecf20Sopenharmony_ci if (cpu_has_secondary_exec_ctrls() && 39198c2ecf20Sopenharmony_ci (secondary_exec_controls_get(to_vmx(vcpu)) & 39208c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { 39218c2ecf20Sopenharmony_ci mode |= MSR_BITMAP_MODE_X2APIC; 39228c2ecf20Sopenharmony_ci if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) 39238c2ecf20Sopenharmony_ci mode |= MSR_BITMAP_MODE_X2APIC_APICV; 39248c2ecf20Sopenharmony_ci } 39258c2ecf20Sopenharmony_ci 39268c2ecf20Sopenharmony_ci return mode; 39278c2ecf20Sopenharmony_ci} 39288c2ecf20Sopenharmony_ci 39298c2ecf20Sopenharmony_cistatic void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode) 39308c2ecf20Sopenharmony_ci{ 39318c2ecf20Sopenharmony_ci unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; 39328c2ecf20Sopenharmony_ci unsigned long read_intercept; 39338c2ecf20Sopenharmony_ci int msr; 39348c2ecf20Sopenharmony_ci 39358c2ecf20Sopenharmony_ci read_intercept = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; 39368c2ecf20Sopenharmony_ci 39378c2ecf20Sopenharmony_ci for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { 39388c2ecf20Sopenharmony_ci unsigned int read_idx = msr / BITS_PER_LONG; 39398c2ecf20Sopenharmony_ci unsigned int write_idx = read_idx + (0x800 / sizeof(long)); 39408c2ecf20Sopenharmony_ci 39418c2ecf20Sopenharmony_ci msr_bitmap[read_idx] = read_intercept; 39428c2ecf20Sopenharmony_ci msr_bitmap[write_idx] = ~0ul; 39438c2ecf20Sopenharmony_ci } 39448c2ecf20Sopenharmony_ci} 39458c2ecf20Sopenharmony_ci 39468c2ecf20Sopenharmony_cistatic void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode) 39478c2ecf20Sopenharmony_ci{ 39488c2ecf20Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap()) 39498c2ecf20Sopenharmony_ci return; 39508c2ecf20Sopenharmony_ci 39518c2ecf20Sopenharmony_ci vmx_reset_x2apic_msrs(vcpu, mode); 39528c2ecf20Sopenharmony_ci 39538c2ecf20Sopenharmony_ci /* 39548c2ecf20Sopenharmony_ci * TPR reads and writes can be virtualized even if virtual interrupt 39558c2ecf20Sopenharmony_ci * delivery is not in use. 39568c2ecf20Sopenharmony_ci */ 39578c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW, 39588c2ecf20Sopenharmony_ci !(mode & MSR_BITMAP_MODE_X2APIC)); 39598c2ecf20Sopenharmony_ci 39608c2ecf20Sopenharmony_ci if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { 39618c2ecf20Sopenharmony_ci vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW); 39628c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); 39638c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); 39648c2ecf20Sopenharmony_ci } 39658c2ecf20Sopenharmony_ci} 39668c2ecf20Sopenharmony_ci 39678c2ecf20Sopenharmony_civoid vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) 39688c2ecf20Sopenharmony_ci{ 39698c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 39708c2ecf20Sopenharmony_ci u8 mode = vmx_msr_bitmap_mode(vcpu); 39718c2ecf20Sopenharmony_ci u8 changed = mode ^ vmx->msr_bitmap_mode; 39728c2ecf20Sopenharmony_ci 39738c2ecf20Sopenharmony_ci if (!changed) 39748c2ecf20Sopenharmony_ci return; 39758c2ecf20Sopenharmony_ci 39768c2ecf20Sopenharmony_ci if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) 39778c2ecf20Sopenharmony_ci vmx_update_msr_bitmap_x2apic(vcpu, mode); 39788c2ecf20Sopenharmony_ci 39798c2ecf20Sopenharmony_ci vmx->msr_bitmap_mode = mode; 39808c2ecf20Sopenharmony_ci} 39818c2ecf20Sopenharmony_ci 39828c2ecf20Sopenharmony_civoid pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) 39838c2ecf20Sopenharmony_ci{ 39848c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 39858c2ecf20Sopenharmony_ci bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); 39868c2ecf20Sopenharmony_ci u32 i; 39878c2ecf20Sopenharmony_ci 39888c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_STATUS, MSR_TYPE_RW, flag); 39898c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_BASE, MSR_TYPE_RW, flag); 39908c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_MASK, MSR_TYPE_RW, flag); 39918c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_CR3_MATCH, MSR_TYPE_RW, flag); 39928c2ecf20Sopenharmony_ci for (i = 0; i < vmx->pt_desc.addr_range; i++) { 39938c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag); 39948c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag); 39958c2ecf20Sopenharmony_ci } 39968c2ecf20Sopenharmony_ci} 39978c2ecf20Sopenharmony_ci 39988c2ecf20Sopenharmony_cistatic bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) 39998c2ecf20Sopenharmony_ci{ 40008c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 40018c2ecf20Sopenharmony_ci void *vapic_page; 40028c2ecf20Sopenharmony_ci u32 vppr; 40038c2ecf20Sopenharmony_ci int rvi; 40048c2ecf20Sopenharmony_ci 40058c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || 40068c2ecf20Sopenharmony_ci !nested_cpu_has_vid(get_vmcs12(vcpu)) || 40078c2ecf20Sopenharmony_ci WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) 40088c2ecf20Sopenharmony_ci return false; 40098c2ecf20Sopenharmony_ci 40108c2ecf20Sopenharmony_ci rvi = vmx_get_rvi(); 40118c2ecf20Sopenharmony_ci 40128c2ecf20Sopenharmony_ci vapic_page = vmx->nested.virtual_apic_map.hva; 40138c2ecf20Sopenharmony_ci vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); 40148c2ecf20Sopenharmony_ci 40158c2ecf20Sopenharmony_ci return ((rvi & 0xf0) > (vppr & 0xf0)); 40168c2ecf20Sopenharmony_ci} 40178c2ecf20Sopenharmony_ci 40188c2ecf20Sopenharmony_cistatic void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) 40198c2ecf20Sopenharmony_ci{ 40208c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 40218c2ecf20Sopenharmony_ci u32 i; 40228c2ecf20Sopenharmony_ci 40238c2ecf20Sopenharmony_ci /* 40248c2ecf20Sopenharmony_ci * Set intercept permissions for all potentially passed through MSRs 40258c2ecf20Sopenharmony_ci * again. They will automatically get filtered through the MSR filter, 40268c2ecf20Sopenharmony_ci * so we are back in sync after this. 40278c2ecf20Sopenharmony_ci */ 40288c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { 40298c2ecf20Sopenharmony_ci u32 msr = vmx_possible_passthrough_msrs[i]; 40308c2ecf20Sopenharmony_ci bool read = test_bit(i, vmx->shadow_msr_intercept.read); 40318c2ecf20Sopenharmony_ci bool write = test_bit(i, vmx->shadow_msr_intercept.write); 40328c2ecf20Sopenharmony_ci 40338c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_R, read); 40348c2ecf20Sopenharmony_ci vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_W, write); 40358c2ecf20Sopenharmony_ci } 40368c2ecf20Sopenharmony_ci 40378c2ecf20Sopenharmony_ci pt_update_intercept_for_msr(vcpu); 40388c2ecf20Sopenharmony_ci vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu)); 40398c2ecf20Sopenharmony_ci} 40408c2ecf20Sopenharmony_ci 40418c2ecf20Sopenharmony_cistatic inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, 40428c2ecf20Sopenharmony_ci bool nested) 40438c2ecf20Sopenharmony_ci{ 40448c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 40458c2ecf20Sopenharmony_ci int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; 40468c2ecf20Sopenharmony_ci 40478c2ecf20Sopenharmony_ci if (vcpu->mode == IN_GUEST_MODE) { 40488c2ecf20Sopenharmony_ci /* 40498c2ecf20Sopenharmony_ci * The vector of interrupt to be delivered to vcpu had 40508c2ecf20Sopenharmony_ci * been set in PIR before this function. 40518c2ecf20Sopenharmony_ci * 40528c2ecf20Sopenharmony_ci * Following cases will be reached in this block, and 40538c2ecf20Sopenharmony_ci * we always send a notification event in all cases as 40548c2ecf20Sopenharmony_ci * explained below. 40558c2ecf20Sopenharmony_ci * 40568c2ecf20Sopenharmony_ci * Case 1: vcpu keeps in non-root mode. Sending a 40578c2ecf20Sopenharmony_ci * notification event posts the interrupt to vcpu. 40588c2ecf20Sopenharmony_ci * 40598c2ecf20Sopenharmony_ci * Case 2: vcpu exits to root mode and is still 40608c2ecf20Sopenharmony_ci * runnable. PIR will be synced to vIRR before the 40618c2ecf20Sopenharmony_ci * next vcpu entry. Sending a notification event in 40628c2ecf20Sopenharmony_ci * this case has no effect, as vcpu is not in root 40638c2ecf20Sopenharmony_ci * mode. 40648c2ecf20Sopenharmony_ci * 40658c2ecf20Sopenharmony_ci * Case 3: vcpu exits to root mode and is blocked. 40668c2ecf20Sopenharmony_ci * vcpu_block() has already synced PIR to vIRR and 40678c2ecf20Sopenharmony_ci * never blocks vcpu if vIRR is not cleared. Therefore, 40688c2ecf20Sopenharmony_ci * a blocked vcpu here does not wait for any requested 40698c2ecf20Sopenharmony_ci * interrupts in PIR, and sending a notification event 40708c2ecf20Sopenharmony_ci * which has no effect is safe here. 40718c2ecf20Sopenharmony_ci */ 40728c2ecf20Sopenharmony_ci 40738c2ecf20Sopenharmony_ci apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); 40748c2ecf20Sopenharmony_ci return true; 40758c2ecf20Sopenharmony_ci } 40768c2ecf20Sopenharmony_ci#endif 40778c2ecf20Sopenharmony_ci return false; 40788c2ecf20Sopenharmony_ci} 40798c2ecf20Sopenharmony_ci 40808c2ecf20Sopenharmony_cistatic int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, 40818c2ecf20Sopenharmony_ci int vector) 40828c2ecf20Sopenharmony_ci{ 40838c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 40848c2ecf20Sopenharmony_ci 40858c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && 40868c2ecf20Sopenharmony_ci vector == vmx->nested.posted_intr_nv) { 40878c2ecf20Sopenharmony_ci /* 40888c2ecf20Sopenharmony_ci * If a posted intr is not recognized by hardware, 40898c2ecf20Sopenharmony_ci * we will accomplish it in the next vmentry. 40908c2ecf20Sopenharmony_ci */ 40918c2ecf20Sopenharmony_ci vmx->nested.pi_pending = true; 40928c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 40938c2ecf20Sopenharmony_ci /* the PIR and ON have been set by L1. */ 40948c2ecf20Sopenharmony_ci if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) 40958c2ecf20Sopenharmony_ci kvm_vcpu_kick(vcpu); 40968c2ecf20Sopenharmony_ci return 0; 40978c2ecf20Sopenharmony_ci } 40988c2ecf20Sopenharmony_ci return -1; 40998c2ecf20Sopenharmony_ci} 41008c2ecf20Sopenharmony_ci/* 41018c2ecf20Sopenharmony_ci * Send interrupt to vcpu via posted interrupt way. 41028c2ecf20Sopenharmony_ci * 1. If target vcpu is running(non-root mode), send posted interrupt 41038c2ecf20Sopenharmony_ci * notification to vcpu and hardware will sync PIR to vIRR atomically. 41048c2ecf20Sopenharmony_ci * 2. If target vcpu isn't running(root mode), kick it to pick up the 41058c2ecf20Sopenharmony_ci * interrupt from PIR in next vmentry. 41068c2ecf20Sopenharmony_ci */ 41078c2ecf20Sopenharmony_cistatic int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) 41088c2ecf20Sopenharmony_ci{ 41098c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 41108c2ecf20Sopenharmony_ci int r; 41118c2ecf20Sopenharmony_ci 41128c2ecf20Sopenharmony_ci r = vmx_deliver_nested_posted_interrupt(vcpu, vector); 41138c2ecf20Sopenharmony_ci if (!r) 41148c2ecf20Sopenharmony_ci return 0; 41158c2ecf20Sopenharmony_ci 41168c2ecf20Sopenharmony_ci if (!vcpu->arch.apicv_active) 41178c2ecf20Sopenharmony_ci return -1; 41188c2ecf20Sopenharmony_ci 41198c2ecf20Sopenharmony_ci if (pi_test_and_set_pir(vector, &vmx->pi_desc)) 41208c2ecf20Sopenharmony_ci return 0; 41218c2ecf20Sopenharmony_ci 41228c2ecf20Sopenharmony_ci /* If a previous notification has sent the IPI, nothing to do. */ 41238c2ecf20Sopenharmony_ci if (pi_test_and_set_on(&vmx->pi_desc)) 41248c2ecf20Sopenharmony_ci return 0; 41258c2ecf20Sopenharmony_ci 41268c2ecf20Sopenharmony_ci if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) 41278c2ecf20Sopenharmony_ci kvm_vcpu_kick(vcpu); 41288c2ecf20Sopenharmony_ci 41298c2ecf20Sopenharmony_ci return 0; 41308c2ecf20Sopenharmony_ci} 41318c2ecf20Sopenharmony_ci 41328c2ecf20Sopenharmony_ci/* 41338c2ecf20Sopenharmony_ci * Set up the vmcs's constant host-state fields, i.e., host-state fields that 41348c2ecf20Sopenharmony_ci * will not change in the lifetime of the guest. 41358c2ecf20Sopenharmony_ci * Note that host-state that does change is set elsewhere. E.g., host-state 41368c2ecf20Sopenharmony_ci * that is set differently for each CPU is set in vmx_vcpu_load(), not here. 41378c2ecf20Sopenharmony_ci */ 41388c2ecf20Sopenharmony_civoid vmx_set_constant_host_state(struct vcpu_vmx *vmx) 41398c2ecf20Sopenharmony_ci{ 41408c2ecf20Sopenharmony_ci u32 low32, high32; 41418c2ecf20Sopenharmony_ci unsigned long tmpl; 41428c2ecf20Sopenharmony_ci unsigned long cr0, cr3, cr4; 41438c2ecf20Sopenharmony_ci 41448c2ecf20Sopenharmony_ci cr0 = read_cr0(); 41458c2ecf20Sopenharmony_ci WARN_ON(cr0 & X86_CR0_TS); 41468c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ 41478c2ecf20Sopenharmony_ci 41488c2ecf20Sopenharmony_ci /* 41498c2ecf20Sopenharmony_ci * Save the most likely value for this task's CR3 in the VMCS. 41508c2ecf20Sopenharmony_ci * We can't use __get_current_cr3_fast() because we're not atomic. 41518c2ecf20Sopenharmony_ci */ 41528c2ecf20Sopenharmony_ci cr3 = __read_cr3(); 41538c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ 41548c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.cr3 = cr3; 41558c2ecf20Sopenharmony_ci 41568c2ecf20Sopenharmony_ci /* Save the most likely value for this task's CR4 in the VMCS. */ 41578c2ecf20Sopenharmony_ci cr4 = cr4_read_shadow(); 41588c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 41598c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.cr4 = cr4; 41608c2ecf20Sopenharmony_ci 41618c2ecf20Sopenharmony_ci vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 41628c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 41638c2ecf20Sopenharmony_ci /* 41648c2ecf20Sopenharmony_ci * Load null selectors, so we can avoid reloading them in 41658c2ecf20Sopenharmony_ci * vmx_prepare_switch_to_host(), in case userspace uses 41668c2ecf20Sopenharmony_ci * the null selectors too (the expected case). 41678c2ecf20Sopenharmony_ci */ 41688c2ecf20Sopenharmony_ci vmcs_write16(HOST_DS_SELECTOR, 0); 41698c2ecf20Sopenharmony_ci vmcs_write16(HOST_ES_SELECTOR, 0); 41708c2ecf20Sopenharmony_ci#else 41718c2ecf20Sopenharmony_ci vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 41728c2ecf20Sopenharmony_ci vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 41738c2ecf20Sopenharmony_ci#endif 41748c2ecf20Sopenharmony_ci vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 41758c2ecf20Sopenharmony_ci vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 41768c2ecf20Sopenharmony_ci 41778c2ecf20Sopenharmony_ci vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ 41788c2ecf20Sopenharmony_ci 41798c2ecf20Sopenharmony_ci vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ 41808c2ecf20Sopenharmony_ci 41818c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); 41828c2ecf20Sopenharmony_ci vmcs_write32(HOST_IA32_SYSENTER_CS, low32); 41838c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); 41848c2ecf20Sopenharmony_ci vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ 41858c2ecf20Sopenharmony_ci 41868c2ecf20Sopenharmony_ci if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { 41878c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_CR_PAT, low32, high32); 41888c2ecf20Sopenharmony_ci vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); 41898c2ecf20Sopenharmony_ci } 41908c2ecf20Sopenharmony_ci 41918c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer()) 41928c2ecf20Sopenharmony_ci vmcs_write64(HOST_IA32_EFER, host_efer); 41938c2ecf20Sopenharmony_ci} 41948c2ecf20Sopenharmony_ci 41958c2ecf20Sopenharmony_civoid set_cr4_guest_host_mask(struct vcpu_vmx *vmx) 41968c2ecf20Sopenharmony_ci{ 41978c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu = &vmx->vcpu; 41988c2ecf20Sopenharmony_ci 41998c2ecf20Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS & 42008c2ecf20Sopenharmony_ci ~vcpu->arch.cr4_guest_rsvd_bits; 42018c2ecf20Sopenharmony_ci if (!enable_ept) 42028c2ecf20Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PGE; 42038c2ecf20Sopenharmony_ci if (is_guest_mode(&vmx->vcpu)) 42048c2ecf20Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits &= 42058c2ecf20Sopenharmony_ci ~get_vmcs12(vcpu)->cr4_guest_host_mask; 42068c2ecf20Sopenharmony_ci vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits); 42078c2ecf20Sopenharmony_ci} 42088c2ecf20Sopenharmony_ci 42098c2ecf20Sopenharmony_ciu32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) 42108c2ecf20Sopenharmony_ci{ 42118c2ecf20Sopenharmony_ci u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; 42128c2ecf20Sopenharmony_ci 42138c2ecf20Sopenharmony_ci if (!kvm_vcpu_apicv_active(&vmx->vcpu)) 42148c2ecf20Sopenharmony_ci pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; 42158c2ecf20Sopenharmony_ci 42168c2ecf20Sopenharmony_ci if (!enable_vnmi) 42178c2ecf20Sopenharmony_ci pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; 42188c2ecf20Sopenharmony_ci 42198c2ecf20Sopenharmony_ci if (!enable_preemption_timer) 42208c2ecf20Sopenharmony_ci pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 42218c2ecf20Sopenharmony_ci 42228c2ecf20Sopenharmony_ci return pin_based_exec_ctrl; 42238c2ecf20Sopenharmony_ci} 42248c2ecf20Sopenharmony_ci 42258c2ecf20Sopenharmony_cistatic void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 42268c2ecf20Sopenharmony_ci{ 42278c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 42288c2ecf20Sopenharmony_ci 42298c2ecf20Sopenharmony_ci pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); 42308c2ecf20Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) { 42318c2ecf20Sopenharmony_ci if (kvm_vcpu_apicv_active(vcpu)) 42328c2ecf20Sopenharmony_ci secondary_exec_controls_setbit(vmx, 42338c2ecf20Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 42348c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 42358c2ecf20Sopenharmony_ci else 42368c2ecf20Sopenharmony_ci secondary_exec_controls_clearbit(vmx, 42378c2ecf20Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 42388c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 42398c2ecf20Sopenharmony_ci } 42408c2ecf20Sopenharmony_ci 42418c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 42428c2ecf20Sopenharmony_ci vmx_update_msr_bitmap(vcpu); 42438c2ecf20Sopenharmony_ci} 42448c2ecf20Sopenharmony_ci 42458c2ecf20Sopenharmony_ciu32 vmx_exec_control(struct vcpu_vmx *vmx) 42468c2ecf20Sopenharmony_ci{ 42478c2ecf20Sopenharmony_ci u32 exec_control = vmcs_config.cpu_based_exec_ctrl; 42488c2ecf20Sopenharmony_ci 42498c2ecf20Sopenharmony_ci if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) 42508c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_MOV_DR_EXITING; 42518c2ecf20Sopenharmony_ci 42528c2ecf20Sopenharmony_ci if (!cpu_need_tpr_shadow(&vmx->vcpu)) { 42538c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_TPR_SHADOW; 42548c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 42558c2ecf20Sopenharmony_ci exec_control |= CPU_BASED_CR8_STORE_EXITING | 42568c2ecf20Sopenharmony_ci CPU_BASED_CR8_LOAD_EXITING; 42578c2ecf20Sopenharmony_ci#endif 42588c2ecf20Sopenharmony_ci } 42598c2ecf20Sopenharmony_ci if (!enable_ept) 42608c2ecf20Sopenharmony_ci exec_control |= CPU_BASED_CR3_STORE_EXITING | 42618c2ecf20Sopenharmony_ci CPU_BASED_CR3_LOAD_EXITING | 42628c2ecf20Sopenharmony_ci CPU_BASED_INVLPG_EXITING; 42638c2ecf20Sopenharmony_ci if (kvm_mwait_in_guest(vmx->vcpu.kvm)) 42648c2ecf20Sopenharmony_ci exec_control &= ~(CPU_BASED_MWAIT_EXITING | 42658c2ecf20Sopenharmony_ci CPU_BASED_MONITOR_EXITING); 42668c2ecf20Sopenharmony_ci if (kvm_hlt_in_guest(vmx->vcpu.kvm)) 42678c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_HLT_EXITING; 42688c2ecf20Sopenharmony_ci return exec_control; 42698c2ecf20Sopenharmony_ci} 42708c2ecf20Sopenharmony_ci 42718c2ecf20Sopenharmony_ci/* 42728c2ecf20Sopenharmony_ci * Adjust a single secondary execution control bit to intercept/allow an 42738c2ecf20Sopenharmony_ci * instruction in the guest. This is usually done based on whether or not a 42748c2ecf20Sopenharmony_ci * feature has been exposed to the guest in order to correctly emulate faults. 42758c2ecf20Sopenharmony_ci */ 42768c2ecf20Sopenharmony_cistatic inline void 42778c2ecf20Sopenharmony_civmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, 42788c2ecf20Sopenharmony_ci u32 control, bool enabled, bool exiting) 42798c2ecf20Sopenharmony_ci{ 42808c2ecf20Sopenharmony_ci /* 42818c2ecf20Sopenharmony_ci * If the control is for an opt-in feature, clear the control if the 42828c2ecf20Sopenharmony_ci * feature is not exposed to the guest, i.e. not enabled. If the 42838c2ecf20Sopenharmony_ci * control is opt-out, i.e. an exiting control, clear the control if 42848c2ecf20Sopenharmony_ci * the feature _is_ exposed to the guest, i.e. exiting/interception is 42858c2ecf20Sopenharmony_ci * disabled for the associated instruction. Note, the caller is 42868c2ecf20Sopenharmony_ci * responsible presetting exec_control to set all supported bits. 42878c2ecf20Sopenharmony_ci */ 42888c2ecf20Sopenharmony_ci if (enabled == exiting) 42898c2ecf20Sopenharmony_ci *exec_control &= ~control; 42908c2ecf20Sopenharmony_ci 42918c2ecf20Sopenharmony_ci /* 42928c2ecf20Sopenharmony_ci * Update the nested MSR settings so that a nested VMM can/can't set 42938c2ecf20Sopenharmony_ci * controls for features that are/aren't exposed to the guest. 42948c2ecf20Sopenharmony_ci */ 42958c2ecf20Sopenharmony_ci if (nested) { 42968c2ecf20Sopenharmony_ci if (enabled) 42978c2ecf20Sopenharmony_ci vmx->nested.msrs.secondary_ctls_high |= control; 42988c2ecf20Sopenharmony_ci else 42998c2ecf20Sopenharmony_ci vmx->nested.msrs.secondary_ctls_high &= ~control; 43008c2ecf20Sopenharmony_ci } 43018c2ecf20Sopenharmony_ci} 43028c2ecf20Sopenharmony_ci 43038c2ecf20Sopenharmony_ci/* 43048c2ecf20Sopenharmony_ci * Wrapper macro for the common case of adjusting a secondary execution control 43058c2ecf20Sopenharmony_ci * based on a single guest CPUID bit, with a dedicated feature bit. This also 43068c2ecf20Sopenharmony_ci * verifies that the control is actually supported by KVM and hardware. 43078c2ecf20Sopenharmony_ci */ 43088c2ecf20Sopenharmony_ci#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \ 43098c2ecf20Sopenharmony_ci({ \ 43108c2ecf20Sopenharmony_ci bool __enabled; \ 43118c2ecf20Sopenharmony_ci \ 43128c2ecf20Sopenharmony_ci if (cpu_has_vmx_##name()) { \ 43138c2ecf20Sopenharmony_ci __enabled = guest_cpuid_has(&(vmx)->vcpu, \ 43148c2ecf20Sopenharmony_ci X86_FEATURE_##feat_name); \ 43158c2ecf20Sopenharmony_ci vmx_adjust_secondary_exec_control(vmx, exec_control, \ 43168c2ecf20Sopenharmony_ci SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \ 43178c2ecf20Sopenharmony_ci } \ 43188c2ecf20Sopenharmony_ci}) 43198c2ecf20Sopenharmony_ci 43208c2ecf20Sopenharmony_ci/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */ 43218c2ecf20Sopenharmony_ci#define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) \ 43228c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, ENABLE_##uname, false) 43238c2ecf20Sopenharmony_ci 43248c2ecf20Sopenharmony_ci#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \ 43258c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true) 43268c2ecf20Sopenharmony_ci 43278c2ecf20Sopenharmony_cistatic void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) 43288c2ecf20Sopenharmony_ci{ 43298c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu = &vmx->vcpu; 43308c2ecf20Sopenharmony_ci 43318c2ecf20Sopenharmony_ci u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 43328c2ecf20Sopenharmony_ci 43338c2ecf20Sopenharmony_ci if (vmx_pt_mode_is_system()) 43348c2ecf20Sopenharmony_ci exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); 43358c2ecf20Sopenharmony_ci if (!cpu_need_virtualize_apic_accesses(vcpu)) 43368c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 43378c2ecf20Sopenharmony_ci if (vmx->vpid == 0) 43388c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 43398c2ecf20Sopenharmony_ci if (!enable_ept) { 43408c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; 43418c2ecf20Sopenharmony_ci enable_unrestricted_guest = 0; 43428c2ecf20Sopenharmony_ci } 43438c2ecf20Sopenharmony_ci if (!enable_unrestricted_guest) 43448c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 43458c2ecf20Sopenharmony_ci if (kvm_pause_in_guest(vmx->vcpu.kvm)) 43468c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 43478c2ecf20Sopenharmony_ci if (!kvm_vcpu_apicv_active(vcpu)) 43488c2ecf20Sopenharmony_ci exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | 43498c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 43508c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 43518c2ecf20Sopenharmony_ci 43528c2ecf20Sopenharmony_ci /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, 43538c2ecf20Sopenharmony_ci * in vmx_set_cr4. */ 43548c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_DESC; 43558c2ecf20Sopenharmony_ci 43568c2ecf20Sopenharmony_ci /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD 43578c2ecf20Sopenharmony_ci (handle_vmptrld). 43588c2ecf20Sopenharmony_ci We can NOT enable shadow_vmcs here because we don't have yet 43598c2ecf20Sopenharmony_ci a current VMCS12 43608c2ecf20Sopenharmony_ci */ 43618c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 43628c2ecf20Sopenharmony_ci 43638c2ecf20Sopenharmony_ci if (!enable_pml) 43648c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_PML; 43658c2ecf20Sopenharmony_ci 43668c2ecf20Sopenharmony_ci if (cpu_has_vmx_xsaves()) { 43678c2ecf20Sopenharmony_ci /* Exposing XSAVES only when XSAVE is exposed */ 43688c2ecf20Sopenharmony_ci bool xsaves_enabled = 43698c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_XSAVE) && 43708c2ecf20Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && 43718c2ecf20Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); 43728c2ecf20Sopenharmony_ci 43738c2ecf20Sopenharmony_ci vcpu->arch.xsaves_enabled = xsaves_enabled; 43748c2ecf20Sopenharmony_ci 43758c2ecf20Sopenharmony_ci vmx_adjust_secondary_exec_control(vmx, &exec_control, 43768c2ecf20Sopenharmony_ci SECONDARY_EXEC_XSAVES, 43778c2ecf20Sopenharmony_ci xsaves_enabled, false); 43788c2ecf20Sopenharmony_ci } 43798c2ecf20Sopenharmony_ci 43808c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP); 43818c2ecf20Sopenharmony_ci 43828c2ecf20Sopenharmony_ci /* 43838c2ecf20Sopenharmony_ci * Expose INVPCID if and only if PCID is also exposed to the guest. 43848c2ecf20Sopenharmony_ci * INVPCID takes a #UD when it's disabled in the VMCS, but a #GP or #PF 43858c2ecf20Sopenharmony_ci * if CR4.PCIDE=0. Enumerating CPUID.INVPCID=1 would lead to incorrect 43868c2ecf20Sopenharmony_ci * behavior from the guest perspective (it would expect #GP or #PF). 43878c2ecf20Sopenharmony_ci */ 43888c2ecf20Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) 43898c2ecf20Sopenharmony_ci guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); 43908c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID); 43918c2ecf20Sopenharmony_ci 43928c2ecf20Sopenharmony_ci 43938c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND); 43948c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED); 43958c2ecf20Sopenharmony_ci 43968c2ecf20Sopenharmony_ci vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG, 43978c2ecf20Sopenharmony_ci ENABLE_USR_WAIT_PAUSE, false); 43988c2ecf20Sopenharmony_ci 43998c2ecf20Sopenharmony_ci vmx->secondary_exec_control = exec_control; 44008c2ecf20Sopenharmony_ci} 44018c2ecf20Sopenharmony_ci 44028c2ecf20Sopenharmony_cistatic void ept_set_mmio_spte_mask(void) 44038c2ecf20Sopenharmony_ci{ 44048c2ecf20Sopenharmony_ci /* 44058c2ecf20Sopenharmony_ci * EPT Misconfigurations can be generated if the value of bits 2:0 44068c2ecf20Sopenharmony_ci * of an EPT paging-structure entry is 110b (write/execute). 44078c2ecf20Sopenharmony_ci */ 44088c2ecf20Sopenharmony_ci kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0); 44098c2ecf20Sopenharmony_ci} 44108c2ecf20Sopenharmony_ci 44118c2ecf20Sopenharmony_ci#define VMX_XSS_EXIT_BITMAP 0 44128c2ecf20Sopenharmony_ci 44138c2ecf20Sopenharmony_ci/* 44148c2ecf20Sopenharmony_ci * Noting that the initialization of Guest-state Area of VMCS is in 44158c2ecf20Sopenharmony_ci * vmx_vcpu_reset(). 44168c2ecf20Sopenharmony_ci */ 44178c2ecf20Sopenharmony_cistatic void init_vmcs(struct vcpu_vmx *vmx) 44188c2ecf20Sopenharmony_ci{ 44198c2ecf20Sopenharmony_ci if (nested) 44208c2ecf20Sopenharmony_ci nested_vmx_set_vmcs_shadowing_bitmap(); 44218c2ecf20Sopenharmony_ci 44228c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 44238c2ecf20Sopenharmony_ci vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); 44248c2ecf20Sopenharmony_ci 44258c2ecf20Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ 44268c2ecf20Sopenharmony_ci 44278c2ecf20Sopenharmony_ci /* Control */ 44288c2ecf20Sopenharmony_ci pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); 44298c2ecf20Sopenharmony_ci 44308c2ecf20Sopenharmony_ci exec_controls_set(vmx, vmx_exec_control(vmx)); 44318c2ecf20Sopenharmony_ci 44328c2ecf20Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) { 44338c2ecf20Sopenharmony_ci vmx_compute_secondary_exec_control(vmx); 44348c2ecf20Sopenharmony_ci secondary_exec_controls_set(vmx, vmx->secondary_exec_control); 44358c2ecf20Sopenharmony_ci } 44368c2ecf20Sopenharmony_ci 44378c2ecf20Sopenharmony_ci if (kvm_vcpu_apicv_active(&vmx->vcpu)) { 44388c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP0, 0); 44398c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP1, 0); 44408c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP2, 0); 44418c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP3, 0); 44428c2ecf20Sopenharmony_ci 44438c2ecf20Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, 0); 44448c2ecf20Sopenharmony_ci 44458c2ecf20Sopenharmony_ci vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 44468c2ecf20Sopenharmony_ci vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); 44478c2ecf20Sopenharmony_ci } 44488c2ecf20Sopenharmony_ci 44498c2ecf20Sopenharmony_ci if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { 44508c2ecf20Sopenharmony_ci vmcs_write32(PLE_GAP, ple_gap); 44518c2ecf20Sopenharmony_ci vmx->ple_window = ple_window; 44528c2ecf20Sopenharmony_ci vmx->ple_window_dirty = true; 44538c2ecf20Sopenharmony_ci } 44548c2ecf20Sopenharmony_ci 44558c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 44568c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); 44578c2ecf20Sopenharmony_ci vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ 44588c2ecf20Sopenharmony_ci 44598c2ecf20Sopenharmony_ci vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ 44608c2ecf20Sopenharmony_ci vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ 44618c2ecf20Sopenharmony_ci vmx_set_constant_host_state(vmx); 44628c2ecf20Sopenharmony_ci vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ 44638c2ecf20Sopenharmony_ci vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ 44648c2ecf20Sopenharmony_ci 44658c2ecf20Sopenharmony_ci if (cpu_has_vmx_vmfunc()) 44668c2ecf20Sopenharmony_ci vmcs_write64(VM_FUNCTION_CONTROL, 0); 44678c2ecf20Sopenharmony_ci 44688c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); 44698c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); 44708c2ecf20Sopenharmony_ci vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); 44718c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); 44728c2ecf20Sopenharmony_ci vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); 44738c2ecf20Sopenharmony_ci 44748c2ecf20Sopenharmony_ci if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) 44758c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 44768c2ecf20Sopenharmony_ci 44778c2ecf20Sopenharmony_ci vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); 44788c2ecf20Sopenharmony_ci 44798c2ecf20Sopenharmony_ci /* 22.2.1, 20.8.1 */ 44808c2ecf20Sopenharmony_ci vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); 44818c2ecf20Sopenharmony_ci 44828c2ecf20Sopenharmony_ci vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; 44838c2ecf20Sopenharmony_ci vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); 44848c2ecf20Sopenharmony_ci 44858c2ecf20Sopenharmony_ci set_cr4_guest_host_mask(vmx); 44868c2ecf20Sopenharmony_ci 44878c2ecf20Sopenharmony_ci if (vmx->vpid != 0) 44888c2ecf20Sopenharmony_ci vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 44898c2ecf20Sopenharmony_ci 44908c2ecf20Sopenharmony_ci if (cpu_has_vmx_xsaves()) 44918c2ecf20Sopenharmony_ci vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); 44928c2ecf20Sopenharmony_ci 44938c2ecf20Sopenharmony_ci if (enable_pml) { 44948c2ecf20Sopenharmony_ci vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); 44958c2ecf20Sopenharmony_ci vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); 44968c2ecf20Sopenharmony_ci } 44978c2ecf20Sopenharmony_ci 44988c2ecf20Sopenharmony_ci if (cpu_has_vmx_encls_vmexit()) 44998c2ecf20Sopenharmony_ci vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); 45008c2ecf20Sopenharmony_ci 45018c2ecf20Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) { 45028c2ecf20Sopenharmony_ci memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc)); 45038c2ecf20Sopenharmony_ci /* Bit[6~0] are forced to 1, writes are ignored. */ 45048c2ecf20Sopenharmony_ci vmx->pt_desc.guest.output_mask = 0x7F; 45058c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_RTIT_CTL, 0); 45068c2ecf20Sopenharmony_ci } 45078c2ecf20Sopenharmony_ci} 45088c2ecf20Sopenharmony_ci 45098c2ecf20Sopenharmony_cistatic void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 45108c2ecf20Sopenharmony_ci{ 45118c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 45128c2ecf20Sopenharmony_ci struct msr_data apic_base_msr; 45138c2ecf20Sopenharmony_ci u64 cr0; 45148c2ecf20Sopenharmony_ci 45158c2ecf20Sopenharmony_ci vmx->rmode.vm86_active = 0; 45168c2ecf20Sopenharmony_ci vmx->spec_ctrl = 0; 45178c2ecf20Sopenharmony_ci 45188c2ecf20Sopenharmony_ci vmx->msr_ia32_umwait_control = 0; 45198c2ecf20Sopenharmony_ci 45208c2ecf20Sopenharmony_ci vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 45218c2ecf20Sopenharmony_ci vmx->hv_deadline_tsc = -1; 45228c2ecf20Sopenharmony_ci kvm_set_cr8(vcpu, 0); 45238c2ecf20Sopenharmony_ci 45248c2ecf20Sopenharmony_ci if (!init_event) { 45258c2ecf20Sopenharmony_ci apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | 45268c2ecf20Sopenharmony_ci MSR_IA32_APICBASE_ENABLE; 45278c2ecf20Sopenharmony_ci if (kvm_vcpu_is_reset_bsp(vcpu)) 45288c2ecf20Sopenharmony_ci apic_base_msr.data |= MSR_IA32_APICBASE_BSP; 45298c2ecf20Sopenharmony_ci apic_base_msr.host_initiated = true; 45308c2ecf20Sopenharmony_ci kvm_set_apic_base(vcpu, &apic_base_msr); 45318c2ecf20Sopenharmony_ci } 45328c2ecf20Sopenharmony_ci 45338c2ecf20Sopenharmony_ci vmx_segment_cache_clear(vmx); 45348c2ecf20Sopenharmony_ci 45358c2ecf20Sopenharmony_ci seg_setup(VCPU_SREG_CS); 45368c2ecf20Sopenharmony_ci vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 45378c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); 45388c2ecf20Sopenharmony_ci 45398c2ecf20Sopenharmony_ci seg_setup(VCPU_SREG_DS); 45408c2ecf20Sopenharmony_ci seg_setup(VCPU_SREG_ES); 45418c2ecf20Sopenharmony_ci seg_setup(VCPU_SREG_FS); 45428c2ecf20Sopenharmony_ci seg_setup(VCPU_SREG_GS); 45438c2ecf20Sopenharmony_ci seg_setup(VCPU_SREG_SS); 45448c2ecf20Sopenharmony_ci 45458c2ecf20Sopenharmony_ci vmcs_write16(GUEST_TR_SELECTOR, 0); 45468c2ecf20Sopenharmony_ci vmcs_writel(GUEST_TR_BASE, 0); 45478c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_LIMIT, 0xffff); 45488c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 45498c2ecf20Sopenharmony_ci 45508c2ecf20Sopenharmony_ci vmcs_write16(GUEST_LDTR_SELECTOR, 0); 45518c2ecf20Sopenharmony_ci vmcs_writel(GUEST_LDTR_BASE, 0); 45528c2ecf20Sopenharmony_ci vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); 45538c2ecf20Sopenharmony_ci vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); 45548c2ecf20Sopenharmony_ci 45558c2ecf20Sopenharmony_ci if (!init_event) { 45568c2ecf20Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, 0); 45578c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, 0); 45588c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, 0); 45598c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 45608c2ecf20Sopenharmony_ci } 45618c2ecf20Sopenharmony_ci 45628c2ecf20Sopenharmony_ci kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); 45638c2ecf20Sopenharmony_ci kvm_rip_write(vcpu, 0xfff0); 45648c2ecf20Sopenharmony_ci 45658c2ecf20Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, 0); 45668c2ecf20Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); 45678c2ecf20Sopenharmony_ci 45688c2ecf20Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, 0); 45698c2ecf20Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); 45708c2ecf20Sopenharmony_ci 45718c2ecf20Sopenharmony_ci vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); 45728c2ecf20Sopenharmony_ci vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 45738c2ecf20Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); 45748c2ecf20Sopenharmony_ci if (kvm_mpx_supported()) 45758c2ecf20Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, 0); 45768c2ecf20Sopenharmony_ci 45778c2ecf20Sopenharmony_ci setup_msrs(vmx); 45788c2ecf20Sopenharmony_ci 45798c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ 45808c2ecf20Sopenharmony_ci 45818c2ecf20Sopenharmony_ci if (cpu_has_vmx_tpr_shadow() && !init_event) { 45828c2ecf20Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); 45838c2ecf20Sopenharmony_ci if (cpu_need_tpr_shadow(vcpu)) 45848c2ecf20Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 45858c2ecf20Sopenharmony_ci __pa(vcpu->arch.apic->regs)); 45868c2ecf20Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, 0); 45878c2ecf20Sopenharmony_ci } 45888c2ecf20Sopenharmony_ci 45898c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 45908c2ecf20Sopenharmony_ci 45918c2ecf20Sopenharmony_ci cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 45928c2ecf20Sopenharmony_ci vmx->vcpu.arch.cr0 = cr0; 45938c2ecf20Sopenharmony_ci vmx_set_cr0(vcpu, cr0); /* enter rmode */ 45948c2ecf20Sopenharmony_ci vmx_set_cr4(vcpu, 0); 45958c2ecf20Sopenharmony_ci vmx_set_efer(vcpu, 0); 45968c2ecf20Sopenharmony_ci 45978c2ecf20Sopenharmony_ci update_exception_bitmap(vcpu); 45988c2ecf20Sopenharmony_ci 45998c2ecf20Sopenharmony_ci vpid_sync_context(vmx->vpid); 46008c2ecf20Sopenharmony_ci if (init_event) 46018c2ecf20Sopenharmony_ci vmx_clear_hlt(vcpu); 46028c2ecf20Sopenharmony_ci 46038c2ecf20Sopenharmony_ci vmx_update_fb_clear_dis(vcpu, vmx); 46048c2ecf20Sopenharmony_ci} 46058c2ecf20Sopenharmony_ci 46068c2ecf20Sopenharmony_cistatic void enable_irq_window(struct kvm_vcpu *vcpu) 46078c2ecf20Sopenharmony_ci{ 46088c2ecf20Sopenharmony_ci exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); 46098c2ecf20Sopenharmony_ci} 46108c2ecf20Sopenharmony_ci 46118c2ecf20Sopenharmony_cistatic void enable_nmi_window(struct kvm_vcpu *vcpu) 46128c2ecf20Sopenharmony_ci{ 46138c2ecf20Sopenharmony_ci if (!enable_vnmi || 46148c2ecf20Sopenharmony_ci vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { 46158c2ecf20Sopenharmony_ci enable_irq_window(vcpu); 46168c2ecf20Sopenharmony_ci return; 46178c2ecf20Sopenharmony_ci } 46188c2ecf20Sopenharmony_ci 46198c2ecf20Sopenharmony_ci exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); 46208c2ecf20Sopenharmony_ci} 46218c2ecf20Sopenharmony_ci 46228c2ecf20Sopenharmony_cistatic void vmx_inject_irq(struct kvm_vcpu *vcpu) 46238c2ecf20Sopenharmony_ci{ 46248c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 46258c2ecf20Sopenharmony_ci uint32_t intr; 46268c2ecf20Sopenharmony_ci int irq = vcpu->arch.interrupt.nr; 46278c2ecf20Sopenharmony_ci 46288c2ecf20Sopenharmony_ci trace_kvm_inj_virq(irq); 46298c2ecf20Sopenharmony_ci 46308c2ecf20Sopenharmony_ci ++vcpu->stat.irq_injections; 46318c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active) { 46328c2ecf20Sopenharmony_ci int inc_eip = 0; 46338c2ecf20Sopenharmony_ci if (vcpu->arch.interrupt.soft) 46348c2ecf20Sopenharmony_ci inc_eip = vcpu->arch.event_exit_inst_len; 46358c2ecf20Sopenharmony_ci kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); 46368c2ecf20Sopenharmony_ci return; 46378c2ecf20Sopenharmony_ci } 46388c2ecf20Sopenharmony_ci intr = irq | INTR_INFO_VALID_MASK; 46398c2ecf20Sopenharmony_ci if (vcpu->arch.interrupt.soft) { 46408c2ecf20Sopenharmony_ci intr |= INTR_TYPE_SOFT_INTR; 46418c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 46428c2ecf20Sopenharmony_ci vmx->vcpu.arch.event_exit_inst_len); 46438c2ecf20Sopenharmony_ci } else 46448c2ecf20Sopenharmony_ci intr |= INTR_TYPE_EXT_INTR; 46458c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); 46468c2ecf20Sopenharmony_ci 46478c2ecf20Sopenharmony_ci vmx_clear_hlt(vcpu); 46488c2ecf20Sopenharmony_ci} 46498c2ecf20Sopenharmony_ci 46508c2ecf20Sopenharmony_cistatic void vmx_inject_nmi(struct kvm_vcpu *vcpu) 46518c2ecf20Sopenharmony_ci{ 46528c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 46538c2ecf20Sopenharmony_ci 46548c2ecf20Sopenharmony_ci if (!enable_vnmi) { 46558c2ecf20Sopenharmony_ci /* 46568c2ecf20Sopenharmony_ci * Tracking the NMI-blocked state in software is built upon 46578c2ecf20Sopenharmony_ci * finding the next open IRQ window. This, in turn, depends on 46588c2ecf20Sopenharmony_ci * well-behaving guests: They have to keep IRQs disabled at 46598c2ecf20Sopenharmony_ci * least as long as the NMI handler runs. Otherwise we may 46608c2ecf20Sopenharmony_ci * cause NMI nesting, maybe breaking the guest. But as this is 46618c2ecf20Sopenharmony_ci * highly unlikely, we can live with the residual risk. 46628c2ecf20Sopenharmony_ci */ 46638c2ecf20Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = 1; 46648c2ecf20Sopenharmony_ci vmx->loaded_vmcs->vnmi_blocked_time = 0; 46658c2ecf20Sopenharmony_ci } 46668c2ecf20Sopenharmony_ci 46678c2ecf20Sopenharmony_ci ++vcpu->stat.nmi_injections; 46688c2ecf20Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = false; 46698c2ecf20Sopenharmony_ci 46708c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active) { 46718c2ecf20Sopenharmony_ci kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); 46728c2ecf20Sopenharmony_ci return; 46738c2ecf20Sopenharmony_ci } 46748c2ecf20Sopenharmony_ci 46758c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 46768c2ecf20Sopenharmony_ci INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 46778c2ecf20Sopenharmony_ci 46788c2ecf20Sopenharmony_ci vmx_clear_hlt(vcpu); 46798c2ecf20Sopenharmony_ci} 46808c2ecf20Sopenharmony_ci 46818c2ecf20Sopenharmony_cibool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) 46828c2ecf20Sopenharmony_ci{ 46838c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 46848c2ecf20Sopenharmony_ci bool masked; 46858c2ecf20Sopenharmony_ci 46868c2ecf20Sopenharmony_ci if (!enable_vnmi) 46878c2ecf20Sopenharmony_ci return vmx->loaded_vmcs->soft_vnmi_blocked; 46888c2ecf20Sopenharmony_ci if (vmx->loaded_vmcs->nmi_known_unmasked) 46898c2ecf20Sopenharmony_ci return false; 46908c2ecf20Sopenharmony_ci masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 46918c2ecf20Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = !masked; 46928c2ecf20Sopenharmony_ci return masked; 46938c2ecf20Sopenharmony_ci} 46948c2ecf20Sopenharmony_ci 46958c2ecf20Sopenharmony_civoid vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 46968c2ecf20Sopenharmony_ci{ 46978c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 46988c2ecf20Sopenharmony_ci 46998c2ecf20Sopenharmony_ci if (!enable_vnmi) { 47008c2ecf20Sopenharmony_ci if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { 47018c2ecf20Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = masked; 47028c2ecf20Sopenharmony_ci vmx->loaded_vmcs->vnmi_blocked_time = 0; 47038c2ecf20Sopenharmony_ci } 47048c2ecf20Sopenharmony_ci } else { 47058c2ecf20Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = !masked; 47068c2ecf20Sopenharmony_ci if (masked) 47078c2ecf20Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 47088c2ecf20Sopenharmony_ci GUEST_INTR_STATE_NMI); 47098c2ecf20Sopenharmony_ci else 47108c2ecf20Sopenharmony_ci vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 47118c2ecf20Sopenharmony_ci GUEST_INTR_STATE_NMI); 47128c2ecf20Sopenharmony_ci } 47138c2ecf20Sopenharmony_ci} 47148c2ecf20Sopenharmony_ci 47158c2ecf20Sopenharmony_cibool vmx_nmi_blocked(struct kvm_vcpu *vcpu) 47168c2ecf20Sopenharmony_ci{ 47178c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) 47188c2ecf20Sopenharmony_ci return false; 47198c2ecf20Sopenharmony_ci 47208c2ecf20Sopenharmony_ci if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) 47218c2ecf20Sopenharmony_ci return true; 47228c2ecf20Sopenharmony_ci 47238c2ecf20Sopenharmony_ci return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 47248c2ecf20Sopenharmony_ci (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | 47258c2ecf20Sopenharmony_ci GUEST_INTR_STATE_NMI)); 47268c2ecf20Sopenharmony_ci} 47278c2ecf20Sopenharmony_ci 47288c2ecf20Sopenharmony_cistatic int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 47298c2ecf20Sopenharmony_ci{ 47308c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending) 47318c2ecf20Sopenharmony_ci return -EBUSY; 47328c2ecf20Sopenharmony_ci 47338c2ecf20Sopenharmony_ci /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ 47348c2ecf20Sopenharmony_ci if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) 47358c2ecf20Sopenharmony_ci return -EBUSY; 47368c2ecf20Sopenharmony_ci 47378c2ecf20Sopenharmony_ci return !vmx_nmi_blocked(vcpu); 47388c2ecf20Sopenharmony_ci} 47398c2ecf20Sopenharmony_ci 47408c2ecf20Sopenharmony_cibool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) 47418c2ecf20Sopenharmony_ci{ 47428c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 47438c2ecf20Sopenharmony_ci return false; 47448c2ecf20Sopenharmony_ci 47458c2ecf20Sopenharmony_ci return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || 47468c2ecf20Sopenharmony_ci (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 47478c2ecf20Sopenharmony_ci (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); 47488c2ecf20Sopenharmony_ci} 47498c2ecf20Sopenharmony_ci 47508c2ecf20Sopenharmony_cistatic int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) 47518c2ecf20Sopenharmony_ci{ 47528c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending) 47538c2ecf20Sopenharmony_ci return -EBUSY; 47548c2ecf20Sopenharmony_ci 47558c2ecf20Sopenharmony_ci /* 47568c2ecf20Sopenharmony_ci * An IRQ must not be injected into L2 if it's supposed to VM-Exit, 47578c2ecf20Sopenharmony_ci * e.g. if the IRQ arrived asynchronously after checking nested events. 47588c2ecf20Sopenharmony_ci */ 47598c2ecf20Sopenharmony_ci if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 47608c2ecf20Sopenharmony_ci return -EBUSY; 47618c2ecf20Sopenharmony_ci 47628c2ecf20Sopenharmony_ci return !vmx_interrupt_blocked(vcpu); 47638c2ecf20Sopenharmony_ci} 47648c2ecf20Sopenharmony_ci 47658c2ecf20Sopenharmony_cistatic int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 47668c2ecf20Sopenharmony_ci{ 47678c2ecf20Sopenharmony_ci int ret; 47688c2ecf20Sopenharmony_ci 47698c2ecf20Sopenharmony_ci if (enable_unrestricted_guest) 47708c2ecf20Sopenharmony_ci return 0; 47718c2ecf20Sopenharmony_ci 47728c2ecf20Sopenharmony_ci mutex_lock(&kvm->slots_lock); 47738c2ecf20Sopenharmony_ci ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, 47748c2ecf20Sopenharmony_ci PAGE_SIZE * 3); 47758c2ecf20Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 47768c2ecf20Sopenharmony_ci 47778c2ecf20Sopenharmony_ci if (ret) 47788c2ecf20Sopenharmony_ci return ret; 47798c2ecf20Sopenharmony_ci to_kvm_vmx(kvm)->tss_addr = addr; 47808c2ecf20Sopenharmony_ci return init_rmode_tss(kvm); 47818c2ecf20Sopenharmony_ci} 47828c2ecf20Sopenharmony_ci 47838c2ecf20Sopenharmony_cistatic int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) 47848c2ecf20Sopenharmony_ci{ 47858c2ecf20Sopenharmony_ci to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; 47868c2ecf20Sopenharmony_ci return 0; 47878c2ecf20Sopenharmony_ci} 47888c2ecf20Sopenharmony_ci 47898c2ecf20Sopenharmony_cistatic bool rmode_exception(struct kvm_vcpu *vcpu, int vec) 47908c2ecf20Sopenharmony_ci{ 47918c2ecf20Sopenharmony_ci switch (vec) { 47928c2ecf20Sopenharmony_ci case BP_VECTOR: 47938c2ecf20Sopenharmony_ci /* 47948c2ecf20Sopenharmony_ci * Update instruction length as we may reinject the exception 47958c2ecf20Sopenharmony_ci * from user space while in guest debugging mode. 47968c2ecf20Sopenharmony_ci */ 47978c2ecf20Sopenharmony_ci to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = 47988c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 47998c2ecf20Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 48008c2ecf20Sopenharmony_ci return false; 48018c2ecf20Sopenharmony_ci fallthrough; 48028c2ecf20Sopenharmony_ci case DB_VECTOR: 48038c2ecf20Sopenharmony_ci return !(vcpu->guest_debug & 48048c2ecf20Sopenharmony_ci (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)); 48058c2ecf20Sopenharmony_ci case DE_VECTOR: 48068c2ecf20Sopenharmony_ci case OF_VECTOR: 48078c2ecf20Sopenharmony_ci case BR_VECTOR: 48088c2ecf20Sopenharmony_ci case UD_VECTOR: 48098c2ecf20Sopenharmony_ci case DF_VECTOR: 48108c2ecf20Sopenharmony_ci case SS_VECTOR: 48118c2ecf20Sopenharmony_ci case GP_VECTOR: 48128c2ecf20Sopenharmony_ci case MF_VECTOR: 48138c2ecf20Sopenharmony_ci return true; 48148c2ecf20Sopenharmony_ci } 48158c2ecf20Sopenharmony_ci return false; 48168c2ecf20Sopenharmony_ci} 48178c2ecf20Sopenharmony_ci 48188c2ecf20Sopenharmony_cistatic int handle_rmode_exception(struct kvm_vcpu *vcpu, 48198c2ecf20Sopenharmony_ci int vec, u32 err_code) 48208c2ecf20Sopenharmony_ci{ 48218c2ecf20Sopenharmony_ci /* 48228c2ecf20Sopenharmony_ci * Instruction with address size override prefix opcode 0x67 48238c2ecf20Sopenharmony_ci * Cause the #SS fault with 0 error code in VM86 mode. 48248c2ecf20Sopenharmony_ci */ 48258c2ecf20Sopenharmony_ci if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { 48268c2ecf20Sopenharmony_ci if (kvm_emulate_instruction(vcpu, 0)) { 48278c2ecf20Sopenharmony_ci if (vcpu->arch.halt_request) { 48288c2ecf20Sopenharmony_ci vcpu->arch.halt_request = 0; 48298c2ecf20Sopenharmony_ci return kvm_vcpu_halt(vcpu); 48308c2ecf20Sopenharmony_ci } 48318c2ecf20Sopenharmony_ci return 1; 48328c2ecf20Sopenharmony_ci } 48338c2ecf20Sopenharmony_ci return 0; 48348c2ecf20Sopenharmony_ci } 48358c2ecf20Sopenharmony_ci 48368c2ecf20Sopenharmony_ci /* 48378c2ecf20Sopenharmony_ci * Forward all other exceptions that are valid in real mode. 48388c2ecf20Sopenharmony_ci * FIXME: Breaks guest debugging in real mode, needs to be fixed with 48398c2ecf20Sopenharmony_ci * the required debugging infrastructure rework. 48408c2ecf20Sopenharmony_ci */ 48418c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, vec); 48428c2ecf20Sopenharmony_ci return 1; 48438c2ecf20Sopenharmony_ci} 48448c2ecf20Sopenharmony_ci 48458c2ecf20Sopenharmony_ci/* 48468c2ecf20Sopenharmony_ci * Trigger machine check on the host. We assume all the MSRs are already set up 48478c2ecf20Sopenharmony_ci * by the CPU and that we still run on the same CPU as the MCE occurred on. 48488c2ecf20Sopenharmony_ci * We pass a fake environment to the machine check handler because we want 48498c2ecf20Sopenharmony_ci * the guest to be always treated like user space, no matter what context 48508c2ecf20Sopenharmony_ci * it used internally. 48518c2ecf20Sopenharmony_ci */ 48528c2ecf20Sopenharmony_cistatic void kvm_machine_check(void) 48538c2ecf20Sopenharmony_ci{ 48548c2ecf20Sopenharmony_ci#if defined(CONFIG_X86_MCE) 48558c2ecf20Sopenharmony_ci struct pt_regs regs = { 48568c2ecf20Sopenharmony_ci .cs = 3, /* Fake ring 3 no matter what the guest ran on */ 48578c2ecf20Sopenharmony_ci .flags = X86_EFLAGS_IF, 48588c2ecf20Sopenharmony_ci }; 48598c2ecf20Sopenharmony_ci 48608c2ecf20Sopenharmony_ci do_machine_check(®s); 48618c2ecf20Sopenharmony_ci#endif 48628c2ecf20Sopenharmony_ci} 48638c2ecf20Sopenharmony_ci 48648c2ecf20Sopenharmony_cistatic int handle_machine_check(struct kvm_vcpu *vcpu) 48658c2ecf20Sopenharmony_ci{ 48668c2ecf20Sopenharmony_ci /* handled by vmx_vcpu_run() */ 48678c2ecf20Sopenharmony_ci return 1; 48688c2ecf20Sopenharmony_ci} 48698c2ecf20Sopenharmony_ci 48708c2ecf20Sopenharmony_ci/* 48718c2ecf20Sopenharmony_ci * If the host has split lock detection disabled, then #AC is 48728c2ecf20Sopenharmony_ci * unconditionally injected into the guest, which is the pre split lock 48738c2ecf20Sopenharmony_ci * detection behaviour. 48748c2ecf20Sopenharmony_ci * 48758c2ecf20Sopenharmony_ci * If the host has split lock detection enabled then #AC is 48768c2ecf20Sopenharmony_ci * only injected into the guest when: 48778c2ecf20Sopenharmony_ci * - Guest CPL == 3 (user mode) 48788c2ecf20Sopenharmony_ci * - Guest has #AC detection enabled in CR0 48798c2ecf20Sopenharmony_ci * - Guest EFLAGS has AC bit set 48808c2ecf20Sopenharmony_ci */ 48818c2ecf20Sopenharmony_cibool vmx_guest_inject_ac(struct kvm_vcpu *vcpu) 48828c2ecf20Sopenharmony_ci{ 48838c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) 48848c2ecf20Sopenharmony_ci return true; 48858c2ecf20Sopenharmony_ci 48868c2ecf20Sopenharmony_ci return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) && 48878c2ecf20Sopenharmony_ci (kvm_get_rflags(vcpu) & X86_EFLAGS_AC); 48888c2ecf20Sopenharmony_ci} 48898c2ecf20Sopenharmony_ci 48908c2ecf20Sopenharmony_cistatic int handle_exception_nmi(struct kvm_vcpu *vcpu) 48918c2ecf20Sopenharmony_ci{ 48928c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 48938c2ecf20Sopenharmony_ci struct kvm_run *kvm_run = vcpu->run; 48948c2ecf20Sopenharmony_ci u32 intr_info, ex_no, error_code; 48958c2ecf20Sopenharmony_ci unsigned long cr2, rip, dr6; 48968c2ecf20Sopenharmony_ci u32 vect_info; 48978c2ecf20Sopenharmony_ci 48988c2ecf20Sopenharmony_ci vect_info = vmx->idt_vectoring_info; 48998c2ecf20Sopenharmony_ci intr_info = vmx_get_intr_info(vcpu); 49008c2ecf20Sopenharmony_ci 49018c2ecf20Sopenharmony_ci if (is_machine_check(intr_info) || is_nmi(intr_info)) 49028c2ecf20Sopenharmony_ci return 1; /* handled by handle_exception_nmi_irqoff() */ 49038c2ecf20Sopenharmony_ci 49048c2ecf20Sopenharmony_ci if (is_invalid_opcode(intr_info)) 49058c2ecf20Sopenharmony_ci return handle_ud(vcpu); 49068c2ecf20Sopenharmony_ci 49078c2ecf20Sopenharmony_ci error_code = 0; 49088c2ecf20Sopenharmony_ci if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 49098c2ecf20Sopenharmony_ci error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 49108c2ecf20Sopenharmony_ci 49118c2ecf20Sopenharmony_ci if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { 49128c2ecf20Sopenharmony_ci WARN_ON_ONCE(!enable_vmware_backdoor); 49138c2ecf20Sopenharmony_ci 49148c2ecf20Sopenharmony_ci /* 49158c2ecf20Sopenharmony_ci * VMware backdoor emulation on #GP interception only handles 49168c2ecf20Sopenharmony_ci * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero 49178c2ecf20Sopenharmony_ci * error code on #GP. 49188c2ecf20Sopenharmony_ci */ 49198c2ecf20Sopenharmony_ci if (error_code) { 49208c2ecf20Sopenharmony_ci kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 49218c2ecf20Sopenharmony_ci return 1; 49228c2ecf20Sopenharmony_ci } 49238c2ecf20Sopenharmony_ci return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); 49248c2ecf20Sopenharmony_ci } 49258c2ecf20Sopenharmony_ci 49268c2ecf20Sopenharmony_ci /* 49278c2ecf20Sopenharmony_ci * The #PF with PFEC.RSVD = 1 indicates the guest is accessing 49288c2ecf20Sopenharmony_ci * MMIO, it is better to report an internal error. 49298c2ecf20Sopenharmony_ci * See the comments in vmx_handle_exit. 49308c2ecf20Sopenharmony_ci */ 49318c2ecf20Sopenharmony_ci if ((vect_info & VECTORING_INFO_VALID_MASK) && 49328c2ecf20Sopenharmony_ci !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { 49338c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 49348c2ecf20Sopenharmony_ci vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; 49358c2ecf20Sopenharmony_ci vcpu->run->internal.ndata = 4; 49368c2ecf20Sopenharmony_ci vcpu->run->internal.data[0] = vect_info; 49378c2ecf20Sopenharmony_ci vcpu->run->internal.data[1] = intr_info; 49388c2ecf20Sopenharmony_ci vcpu->run->internal.data[2] = error_code; 49398c2ecf20Sopenharmony_ci vcpu->run->internal.data[3] = vcpu->arch.last_vmentry_cpu; 49408c2ecf20Sopenharmony_ci return 0; 49418c2ecf20Sopenharmony_ci } 49428c2ecf20Sopenharmony_ci 49438c2ecf20Sopenharmony_ci if (is_page_fault(intr_info)) { 49448c2ecf20Sopenharmony_ci cr2 = vmx_get_exit_qual(vcpu); 49458c2ecf20Sopenharmony_ci if (enable_ept && !vcpu->arch.apf.host_apf_flags) { 49468c2ecf20Sopenharmony_ci /* 49478c2ecf20Sopenharmony_ci * EPT will cause page fault only if we need to 49488c2ecf20Sopenharmony_ci * detect illegal GPAs. 49498c2ecf20Sopenharmony_ci */ 49508c2ecf20Sopenharmony_ci WARN_ON_ONCE(!allow_smaller_maxphyaddr); 49518c2ecf20Sopenharmony_ci kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code); 49528c2ecf20Sopenharmony_ci return 1; 49538c2ecf20Sopenharmony_ci } else 49548c2ecf20Sopenharmony_ci return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); 49558c2ecf20Sopenharmony_ci } 49568c2ecf20Sopenharmony_ci 49578c2ecf20Sopenharmony_ci ex_no = intr_info & INTR_INFO_VECTOR_MASK; 49588c2ecf20Sopenharmony_ci 49598c2ecf20Sopenharmony_ci if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) 49608c2ecf20Sopenharmony_ci return handle_rmode_exception(vcpu, ex_no, error_code); 49618c2ecf20Sopenharmony_ci 49628c2ecf20Sopenharmony_ci switch (ex_no) { 49638c2ecf20Sopenharmony_ci case DB_VECTOR: 49648c2ecf20Sopenharmony_ci dr6 = vmx_get_exit_qual(vcpu); 49658c2ecf20Sopenharmony_ci if (!(vcpu->guest_debug & 49668c2ecf20Sopenharmony_ci (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 49678c2ecf20Sopenharmony_ci /* 49688c2ecf20Sopenharmony_ci * If the #DB was due to ICEBP, a.k.a. INT1, skip the 49698c2ecf20Sopenharmony_ci * instruction. ICEBP generates a trap-like #DB, but 49708c2ecf20Sopenharmony_ci * despite its interception control being tied to #DB, 49718c2ecf20Sopenharmony_ci * is an instruction intercept, i.e. the VM-Exit occurs 49728c2ecf20Sopenharmony_ci * on the ICEBP itself. Note, skipping ICEBP also 49738c2ecf20Sopenharmony_ci * clears STI and MOVSS blocking. 49748c2ecf20Sopenharmony_ci * 49758c2ecf20Sopenharmony_ci * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS 49768c2ecf20Sopenharmony_ci * if single-step is enabled in RFLAGS and STI or MOVSS 49778c2ecf20Sopenharmony_ci * blocking is active, as the CPU doesn't set the bit 49788c2ecf20Sopenharmony_ci * on VM-Exit due to #DB interception. VM-Entry has a 49798c2ecf20Sopenharmony_ci * consistency check that a single-step #DB is pending 49808c2ecf20Sopenharmony_ci * in this scenario as the previous instruction cannot 49818c2ecf20Sopenharmony_ci * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV 49828c2ecf20Sopenharmony_ci * don't modify RFLAGS), therefore the one instruction 49838c2ecf20Sopenharmony_ci * delay when activating single-step breakpoints must 49848c2ecf20Sopenharmony_ci * have already expired. Note, the CPU sets/clears BS 49858c2ecf20Sopenharmony_ci * as appropriate for all other VM-Exits types. 49868c2ecf20Sopenharmony_ci */ 49878c2ecf20Sopenharmony_ci if (is_icebp(intr_info)) 49888c2ecf20Sopenharmony_ci WARN_ON(!skip_emulated_instruction(vcpu)); 49898c2ecf20Sopenharmony_ci else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && 49908c2ecf20Sopenharmony_ci (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 49918c2ecf20Sopenharmony_ci (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) 49928c2ecf20Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 49938c2ecf20Sopenharmony_ci vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); 49948c2ecf20Sopenharmony_ci 49958c2ecf20Sopenharmony_ci kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); 49968c2ecf20Sopenharmony_ci return 1; 49978c2ecf20Sopenharmony_ci } 49988c2ecf20Sopenharmony_ci kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; 49998c2ecf20Sopenharmony_ci kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 50008c2ecf20Sopenharmony_ci fallthrough; 50018c2ecf20Sopenharmony_ci case BP_VECTOR: 50028c2ecf20Sopenharmony_ci /* 50038c2ecf20Sopenharmony_ci * Update instruction length as we may reinject #BP from 50048c2ecf20Sopenharmony_ci * user space while in guest debugging mode. Reading it for 50058c2ecf20Sopenharmony_ci * #DB as well causes no harm, it is not used in that case. 50068c2ecf20Sopenharmony_ci */ 50078c2ecf20Sopenharmony_ci vmx->vcpu.arch.event_exit_inst_len = 50088c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 50098c2ecf20Sopenharmony_ci kvm_run->exit_reason = KVM_EXIT_DEBUG; 50108c2ecf20Sopenharmony_ci rip = kvm_rip_read(vcpu); 50118c2ecf20Sopenharmony_ci kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 50128c2ecf20Sopenharmony_ci kvm_run->debug.arch.exception = ex_no; 50138c2ecf20Sopenharmony_ci break; 50148c2ecf20Sopenharmony_ci case AC_VECTOR: 50158c2ecf20Sopenharmony_ci if (vmx_guest_inject_ac(vcpu)) { 50168c2ecf20Sopenharmony_ci kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); 50178c2ecf20Sopenharmony_ci return 1; 50188c2ecf20Sopenharmony_ci } 50198c2ecf20Sopenharmony_ci 50208c2ecf20Sopenharmony_ci /* 50218c2ecf20Sopenharmony_ci * Handle split lock. Depending on detection mode this will 50228c2ecf20Sopenharmony_ci * either warn and disable split lock detection for this 50238c2ecf20Sopenharmony_ci * task or force SIGBUS on it. 50248c2ecf20Sopenharmony_ci */ 50258c2ecf20Sopenharmony_ci if (handle_guest_split_lock(kvm_rip_read(vcpu))) 50268c2ecf20Sopenharmony_ci return 1; 50278c2ecf20Sopenharmony_ci fallthrough; 50288c2ecf20Sopenharmony_ci default: 50298c2ecf20Sopenharmony_ci kvm_run->exit_reason = KVM_EXIT_EXCEPTION; 50308c2ecf20Sopenharmony_ci kvm_run->ex.exception = ex_no; 50318c2ecf20Sopenharmony_ci kvm_run->ex.error_code = error_code; 50328c2ecf20Sopenharmony_ci break; 50338c2ecf20Sopenharmony_ci } 50348c2ecf20Sopenharmony_ci return 0; 50358c2ecf20Sopenharmony_ci} 50368c2ecf20Sopenharmony_ci 50378c2ecf20Sopenharmony_cistatic __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) 50388c2ecf20Sopenharmony_ci{ 50398c2ecf20Sopenharmony_ci ++vcpu->stat.irq_exits; 50408c2ecf20Sopenharmony_ci return 1; 50418c2ecf20Sopenharmony_ci} 50428c2ecf20Sopenharmony_ci 50438c2ecf20Sopenharmony_cistatic int handle_triple_fault(struct kvm_vcpu *vcpu) 50448c2ecf20Sopenharmony_ci{ 50458c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 50468c2ecf20Sopenharmony_ci vcpu->mmio_needed = 0; 50478c2ecf20Sopenharmony_ci return 0; 50488c2ecf20Sopenharmony_ci} 50498c2ecf20Sopenharmony_ci 50508c2ecf20Sopenharmony_cistatic int handle_io(struct kvm_vcpu *vcpu) 50518c2ecf20Sopenharmony_ci{ 50528c2ecf20Sopenharmony_ci unsigned long exit_qualification; 50538c2ecf20Sopenharmony_ci int size, in, string; 50548c2ecf20Sopenharmony_ci unsigned port; 50558c2ecf20Sopenharmony_ci 50568c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 50578c2ecf20Sopenharmony_ci string = (exit_qualification & 16) != 0; 50588c2ecf20Sopenharmony_ci 50598c2ecf20Sopenharmony_ci ++vcpu->stat.io_exits; 50608c2ecf20Sopenharmony_ci 50618c2ecf20Sopenharmony_ci if (string) 50628c2ecf20Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 50638c2ecf20Sopenharmony_ci 50648c2ecf20Sopenharmony_ci port = exit_qualification >> 16; 50658c2ecf20Sopenharmony_ci size = (exit_qualification & 7) + 1; 50668c2ecf20Sopenharmony_ci in = (exit_qualification & 8) != 0; 50678c2ecf20Sopenharmony_ci 50688c2ecf20Sopenharmony_ci return kvm_fast_pio(vcpu, size, port, in); 50698c2ecf20Sopenharmony_ci} 50708c2ecf20Sopenharmony_ci 50718c2ecf20Sopenharmony_cistatic void 50728c2ecf20Sopenharmony_civmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) 50738c2ecf20Sopenharmony_ci{ 50748c2ecf20Sopenharmony_ci /* 50758c2ecf20Sopenharmony_ci * Patch in the VMCALL instruction: 50768c2ecf20Sopenharmony_ci */ 50778c2ecf20Sopenharmony_ci hypercall[0] = 0x0f; 50788c2ecf20Sopenharmony_ci hypercall[1] = 0x01; 50798c2ecf20Sopenharmony_ci hypercall[2] = 0xc1; 50808c2ecf20Sopenharmony_ci} 50818c2ecf20Sopenharmony_ci 50828c2ecf20Sopenharmony_ci/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 50838c2ecf20Sopenharmony_cistatic int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 50848c2ecf20Sopenharmony_ci{ 50858c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 50868c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 50878c2ecf20Sopenharmony_ci unsigned long orig_val = val; 50888c2ecf20Sopenharmony_ci 50898c2ecf20Sopenharmony_ci /* 50908c2ecf20Sopenharmony_ci * We get here when L2 changed cr0 in a way that did not change 50918c2ecf20Sopenharmony_ci * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), 50928c2ecf20Sopenharmony_ci * but did change L0 shadowed bits. So we first calculate the 50938c2ecf20Sopenharmony_ci * effective cr0 value that L1 would like to write into the 50948c2ecf20Sopenharmony_ci * hardware. It consists of the L2-owned bits from the new 50958c2ecf20Sopenharmony_ci * value combined with the L1-owned bits from L1's guest_cr0. 50968c2ecf20Sopenharmony_ci */ 50978c2ecf20Sopenharmony_ci val = (val & ~vmcs12->cr0_guest_host_mask) | 50988c2ecf20Sopenharmony_ci (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 50998c2ecf20Sopenharmony_ci 51008c2ecf20Sopenharmony_ci if (!nested_guest_cr0_valid(vcpu, val)) 51018c2ecf20Sopenharmony_ci return 1; 51028c2ecf20Sopenharmony_ci 51038c2ecf20Sopenharmony_ci if (kvm_set_cr0(vcpu, val)) 51048c2ecf20Sopenharmony_ci return 1; 51058c2ecf20Sopenharmony_ci vmcs_writel(CR0_READ_SHADOW, orig_val); 51068c2ecf20Sopenharmony_ci return 0; 51078c2ecf20Sopenharmony_ci } else { 51088c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.vmxon && 51098c2ecf20Sopenharmony_ci !nested_host_cr0_valid(vcpu, val)) 51108c2ecf20Sopenharmony_ci return 1; 51118c2ecf20Sopenharmony_ci 51128c2ecf20Sopenharmony_ci return kvm_set_cr0(vcpu, val); 51138c2ecf20Sopenharmony_ci } 51148c2ecf20Sopenharmony_ci} 51158c2ecf20Sopenharmony_ci 51168c2ecf20Sopenharmony_cistatic int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) 51178c2ecf20Sopenharmony_ci{ 51188c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 51198c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 51208c2ecf20Sopenharmony_ci unsigned long orig_val = val; 51218c2ecf20Sopenharmony_ci 51228c2ecf20Sopenharmony_ci /* analogously to handle_set_cr0 */ 51238c2ecf20Sopenharmony_ci val = (val & ~vmcs12->cr4_guest_host_mask) | 51248c2ecf20Sopenharmony_ci (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); 51258c2ecf20Sopenharmony_ci if (kvm_set_cr4(vcpu, val)) 51268c2ecf20Sopenharmony_ci return 1; 51278c2ecf20Sopenharmony_ci vmcs_writel(CR4_READ_SHADOW, orig_val); 51288c2ecf20Sopenharmony_ci return 0; 51298c2ecf20Sopenharmony_ci } else 51308c2ecf20Sopenharmony_ci return kvm_set_cr4(vcpu, val); 51318c2ecf20Sopenharmony_ci} 51328c2ecf20Sopenharmony_ci 51338c2ecf20Sopenharmony_cistatic int handle_desc(struct kvm_vcpu *vcpu) 51348c2ecf20Sopenharmony_ci{ 51358c2ecf20Sopenharmony_ci WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); 51368c2ecf20Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 51378c2ecf20Sopenharmony_ci} 51388c2ecf20Sopenharmony_ci 51398c2ecf20Sopenharmony_cistatic int handle_cr(struct kvm_vcpu *vcpu) 51408c2ecf20Sopenharmony_ci{ 51418c2ecf20Sopenharmony_ci unsigned long exit_qualification, val; 51428c2ecf20Sopenharmony_ci int cr; 51438c2ecf20Sopenharmony_ci int reg; 51448c2ecf20Sopenharmony_ci int err; 51458c2ecf20Sopenharmony_ci int ret; 51468c2ecf20Sopenharmony_ci 51478c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 51488c2ecf20Sopenharmony_ci cr = exit_qualification & 15; 51498c2ecf20Sopenharmony_ci reg = (exit_qualification >> 8) & 15; 51508c2ecf20Sopenharmony_ci switch ((exit_qualification >> 4) & 3) { 51518c2ecf20Sopenharmony_ci case 0: /* mov to cr */ 51528c2ecf20Sopenharmony_ci val = kvm_register_readl(vcpu, reg); 51538c2ecf20Sopenharmony_ci trace_kvm_cr_write(cr, val); 51548c2ecf20Sopenharmony_ci switch (cr) { 51558c2ecf20Sopenharmony_ci case 0: 51568c2ecf20Sopenharmony_ci err = handle_set_cr0(vcpu, val); 51578c2ecf20Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 51588c2ecf20Sopenharmony_ci case 3: 51598c2ecf20Sopenharmony_ci WARN_ON_ONCE(enable_unrestricted_guest); 51608c2ecf20Sopenharmony_ci err = kvm_set_cr3(vcpu, val); 51618c2ecf20Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 51628c2ecf20Sopenharmony_ci case 4: 51638c2ecf20Sopenharmony_ci err = handle_set_cr4(vcpu, val); 51648c2ecf20Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 51658c2ecf20Sopenharmony_ci case 8: { 51668c2ecf20Sopenharmony_ci u8 cr8_prev = kvm_get_cr8(vcpu); 51678c2ecf20Sopenharmony_ci u8 cr8 = (u8)val; 51688c2ecf20Sopenharmony_ci err = kvm_set_cr8(vcpu, cr8); 51698c2ecf20Sopenharmony_ci ret = kvm_complete_insn_gp(vcpu, err); 51708c2ecf20Sopenharmony_ci if (lapic_in_kernel(vcpu)) 51718c2ecf20Sopenharmony_ci return ret; 51728c2ecf20Sopenharmony_ci if (cr8_prev <= cr8) 51738c2ecf20Sopenharmony_ci return ret; 51748c2ecf20Sopenharmony_ci /* 51758c2ecf20Sopenharmony_ci * TODO: we might be squashing a 51768c2ecf20Sopenharmony_ci * KVM_GUESTDBG_SINGLESTEP-triggered 51778c2ecf20Sopenharmony_ci * KVM_EXIT_DEBUG here. 51788c2ecf20Sopenharmony_ci */ 51798c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_SET_TPR; 51808c2ecf20Sopenharmony_ci return 0; 51818c2ecf20Sopenharmony_ci } 51828c2ecf20Sopenharmony_ci } 51838c2ecf20Sopenharmony_ci break; 51848c2ecf20Sopenharmony_ci case 2: /* clts */ 51858c2ecf20Sopenharmony_ci WARN_ONCE(1, "Guest should always own CR0.TS"); 51868c2ecf20Sopenharmony_ci vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 51878c2ecf20Sopenharmony_ci trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); 51888c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 51898c2ecf20Sopenharmony_ci case 1: /*mov from cr*/ 51908c2ecf20Sopenharmony_ci switch (cr) { 51918c2ecf20Sopenharmony_ci case 3: 51928c2ecf20Sopenharmony_ci WARN_ON_ONCE(enable_unrestricted_guest); 51938c2ecf20Sopenharmony_ci val = kvm_read_cr3(vcpu); 51948c2ecf20Sopenharmony_ci kvm_register_write(vcpu, reg, val); 51958c2ecf20Sopenharmony_ci trace_kvm_cr_read(cr, val); 51968c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 51978c2ecf20Sopenharmony_ci case 8: 51988c2ecf20Sopenharmony_ci val = kvm_get_cr8(vcpu); 51998c2ecf20Sopenharmony_ci kvm_register_write(vcpu, reg, val); 52008c2ecf20Sopenharmony_ci trace_kvm_cr_read(cr, val); 52018c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 52028c2ecf20Sopenharmony_ci } 52038c2ecf20Sopenharmony_ci break; 52048c2ecf20Sopenharmony_ci case 3: /* lmsw */ 52058c2ecf20Sopenharmony_ci val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; 52068c2ecf20Sopenharmony_ci trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); 52078c2ecf20Sopenharmony_ci kvm_lmsw(vcpu, val); 52088c2ecf20Sopenharmony_ci 52098c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 52108c2ecf20Sopenharmony_ci default: 52118c2ecf20Sopenharmony_ci break; 52128c2ecf20Sopenharmony_ci } 52138c2ecf20Sopenharmony_ci vcpu->run->exit_reason = 0; 52148c2ecf20Sopenharmony_ci vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", 52158c2ecf20Sopenharmony_ci (int)(exit_qualification >> 4) & 3, cr); 52168c2ecf20Sopenharmony_ci return 0; 52178c2ecf20Sopenharmony_ci} 52188c2ecf20Sopenharmony_ci 52198c2ecf20Sopenharmony_cistatic int handle_dr(struct kvm_vcpu *vcpu) 52208c2ecf20Sopenharmony_ci{ 52218c2ecf20Sopenharmony_ci unsigned long exit_qualification; 52228c2ecf20Sopenharmony_ci int dr, dr7, reg; 52238c2ecf20Sopenharmony_ci 52248c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 52258c2ecf20Sopenharmony_ci dr = exit_qualification & DEBUG_REG_ACCESS_NUM; 52268c2ecf20Sopenharmony_ci 52278c2ecf20Sopenharmony_ci /* First, if DR does not exist, trigger UD */ 52288c2ecf20Sopenharmony_ci if (!kvm_require_dr(vcpu, dr)) 52298c2ecf20Sopenharmony_ci return 1; 52308c2ecf20Sopenharmony_ci 52318c2ecf20Sopenharmony_ci /* Do not handle if the CPL > 0, will trigger GP on re-entry */ 52328c2ecf20Sopenharmony_ci if (!kvm_require_cpl(vcpu, 0)) 52338c2ecf20Sopenharmony_ci return 1; 52348c2ecf20Sopenharmony_ci dr7 = vmcs_readl(GUEST_DR7); 52358c2ecf20Sopenharmony_ci if (dr7 & DR7_GD) { 52368c2ecf20Sopenharmony_ci /* 52378c2ecf20Sopenharmony_ci * As the vm-exit takes precedence over the debug trap, we 52388c2ecf20Sopenharmony_ci * need to emulate the latter, either for the host or the 52398c2ecf20Sopenharmony_ci * guest debugging itself. 52408c2ecf20Sopenharmony_ci */ 52418c2ecf20Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 52428c2ecf20Sopenharmony_ci vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1; 52438c2ecf20Sopenharmony_ci vcpu->run->debug.arch.dr7 = dr7; 52448c2ecf20Sopenharmony_ci vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); 52458c2ecf20Sopenharmony_ci vcpu->run->debug.arch.exception = DB_VECTOR; 52468c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_DEBUG; 52478c2ecf20Sopenharmony_ci return 0; 52488c2ecf20Sopenharmony_ci } else { 52498c2ecf20Sopenharmony_ci kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD); 52508c2ecf20Sopenharmony_ci return 1; 52518c2ecf20Sopenharmony_ci } 52528c2ecf20Sopenharmony_ci } 52538c2ecf20Sopenharmony_ci 52548c2ecf20Sopenharmony_ci if (vcpu->guest_debug == 0) { 52558c2ecf20Sopenharmony_ci exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); 52568c2ecf20Sopenharmony_ci 52578c2ecf20Sopenharmony_ci /* 52588c2ecf20Sopenharmony_ci * No more DR vmexits; force a reload of the debug registers 52598c2ecf20Sopenharmony_ci * and reenter on this instruction. The next vmexit will 52608c2ecf20Sopenharmony_ci * retrieve the full state of the debug registers. 52618c2ecf20Sopenharmony_ci */ 52628c2ecf20Sopenharmony_ci vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; 52638c2ecf20Sopenharmony_ci return 1; 52648c2ecf20Sopenharmony_ci } 52658c2ecf20Sopenharmony_ci 52668c2ecf20Sopenharmony_ci reg = DEBUG_REG_ACCESS_REG(exit_qualification); 52678c2ecf20Sopenharmony_ci if (exit_qualification & TYPE_MOV_FROM_DR) { 52688c2ecf20Sopenharmony_ci unsigned long val; 52698c2ecf20Sopenharmony_ci 52708c2ecf20Sopenharmony_ci if (kvm_get_dr(vcpu, dr, &val)) 52718c2ecf20Sopenharmony_ci return 1; 52728c2ecf20Sopenharmony_ci kvm_register_write(vcpu, reg, val); 52738c2ecf20Sopenharmony_ci } else 52748c2ecf20Sopenharmony_ci if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) 52758c2ecf20Sopenharmony_ci return 1; 52768c2ecf20Sopenharmony_ci 52778c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 52788c2ecf20Sopenharmony_ci} 52798c2ecf20Sopenharmony_ci 52808c2ecf20Sopenharmony_cistatic void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 52818c2ecf20Sopenharmony_ci{ 52828c2ecf20Sopenharmony_ci get_debugreg(vcpu->arch.db[0], 0); 52838c2ecf20Sopenharmony_ci get_debugreg(vcpu->arch.db[1], 1); 52848c2ecf20Sopenharmony_ci get_debugreg(vcpu->arch.db[2], 2); 52858c2ecf20Sopenharmony_ci get_debugreg(vcpu->arch.db[3], 3); 52868c2ecf20Sopenharmony_ci get_debugreg(vcpu->arch.dr6, 6); 52878c2ecf20Sopenharmony_ci vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); 52888c2ecf20Sopenharmony_ci 52898c2ecf20Sopenharmony_ci vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; 52908c2ecf20Sopenharmony_ci exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); 52918c2ecf20Sopenharmony_ci} 52928c2ecf20Sopenharmony_ci 52938c2ecf20Sopenharmony_cistatic void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 52948c2ecf20Sopenharmony_ci{ 52958c2ecf20Sopenharmony_ci vmcs_writel(GUEST_DR7, val); 52968c2ecf20Sopenharmony_ci} 52978c2ecf20Sopenharmony_ci 52988c2ecf20Sopenharmony_cistatic int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) 52998c2ecf20Sopenharmony_ci{ 53008c2ecf20Sopenharmony_ci kvm_apic_update_ppr(vcpu); 53018c2ecf20Sopenharmony_ci return 1; 53028c2ecf20Sopenharmony_ci} 53038c2ecf20Sopenharmony_ci 53048c2ecf20Sopenharmony_cistatic int handle_interrupt_window(struct kvm_vcpu *vcpu) 53058c2ecf20Sopenharmony_ci{ 53068c2ecf20Sopenharmony_ci exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); 53078c2ecf20Sopenharmony_ci 53088c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 53098c2ecf20Sopenharmony_ci 53108c2ecf20Sopenharmony_ci ++vcpu->stat.irq_window_exits; 53118c2ecf20Sopenharmony_ci return 1; 53128c2ecf20Sopenharmony_ci} 53138c2ecf20Sopenharmony_ci 53148c2ecf20Sopenharmony_cistatic int handle_vmcall(struct kvm_vcpu *vcpu) 53158c2ecf20Sopenharmony_ci{ 53168c2ecf20Sopenharmony_ci return kvm_emulate_hypercall(vcpu); 53178c2ecf20Sopenharmony_ci} 53188c2ecf20Sopenharmony_ci 53198c2ecf20Sopenharmony_cistatic int handle_invd(struct kvm_vcpu *vcpu) 53208c2ecf20Sopenharmony_ci{ 53218c2ecf20Sopenharmony_ci /* Treat an INVD instruction as a NOP and just skip it. */ 53228c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 53238c2ecf20Sopenharmony_ci} 53248c2ecf20Sopenharmony_ci 53258c2ecf20Sopenharmony_cistatic int handle_invlpg(struct kvm_vcpu *vcpu) 53268c2ecf20Sopenharmony_ci{ 53278c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 53288c2ecf20Sopenharmony_ci 53298c2ecf20Sopenharmony_ci kvm_mmu_invlpg(vcpu, exit_qualification); 53308c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 53318c2ecf20Sopenharmony_ci} 53328c2ecf20Sopenharmony_ci 53338c2ecf20Sopenharmony_cistatic int handle_rdpmc(struct kvm_vcpu *vcpu) 53348c2ecf20Sopenharmony_ci{ 53358c2ecf20Sopenharmony_ci int err; 53368c2ecf20Sopenharmony_ci 53378c2ecf20Sopenharmony_ci err = kvm_rdpmc(vcpu); 53388c2ecf20Sopenharmony_ci return kvm_complete_insn_gp(vcpu, err); 53398c2ecf20Sopenharmony_ci} 53408c2ecf20Sopenharmony_ci 53418c2ecf20Sopenharmony_cistatic int handle_wbinvd(struct kvm_vcpu *vcpu) 53428c2ecf20Sopenharmony_ci{ 53438c2ecf20Sopenharmony_ci return kvm_emulate_wbinvd(vcpu); 53448c2ecf20Sopenharmony_ci} 53458c2ecf20Sopenharmony_ci 53468c2ecf20Sopenharmony_cistatic int handle_xsetbv(struct kvm_vcpu *vcpu) 53478c2ecf20Sopenharmony_ci{ 53488c2ecf20Sopenharmony_ci u64 new_bv = kvm_read_edx_eax(vcpu); 53498c2ecf20Sopenharmony_ci u32 index = kvm_rcx_read(vcpu); 53508c2ecf20Sopenharmony_ci 53518c2ecf20Sopenharmony_ci if (kvm_set_xcr(vcpu, index, new_bv) == 0) 53528c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 53538c2ecf20Sopenharmony_ci return 1; 53548c2ecf20Sopenharmony_ci} 53558c2ecf20Sopenharmony_ci 53568c2ecf20Sopenharmony_cistatic int handle_apic_access(struct kvm_vcpu *vcpu) 53578c2ecf20Sopenharmony_ci{ 53588c2ecf20Sopenharmony_ci if (likely(fasteoi)) { 53598c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 53608c2ecf20Sopenharmony_ci int access_type, offset; 53618c2ecf20Sopenharmony_ci 53628c2ecf20Sopenharmony_ci access_type = exit_qualification & APIC_ACCESS_TYPE; 53638c2ecf20Sopenharmony_ci offset = exit_qualification & APIC_ACCESS_OFFSET; 53648c2ecf20Sopenharmony_ci /* 53658c2ecf20Sopenharmony_ci * Sane guest uses MOV to write EOI, with written value 53668c2ecf20Sopenharmony_ci * not cared. So make a short-circuit here by avoiding 53678c2ecf20Sopenharmony_ci * heavy instruction emulation. 53688c2ecf20Sopenharmony_ci */ 53698c2ecf20Sopenharmony_ci if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) && 53708c2ecf20Sopenharmony_ci (offset == APIC_EOI)) { 53718c2ecf20Sopenharmony_ci kvm_lapic_set_eoi(vcpu); 53728c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 53738c2ecf20Sopenharmony_ci } 53748c2ecf20Sopenharmony_ci } 53758c2ecf20Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 53768c2ecf20Sopenharmony_ci} 53778c2ecf20Sopenharmony_ci 53788c2ecf20Sopenharmony_cistatic int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) 53798c2ecf20Sopenharmony_ci{ 53808c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 53818c2ecf20Sopenharmony_ci int vector = exit_qualification & 0xff; 53828c2ecf20Sopenharmony_ci 53838c2ecf20Sopenharmony_ci /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ 53848c2ecf20Sopenharmony_ci kvm_apic_set_eoi_accelerated(vcpu, vector); 53858c2ecf20Sopenharmony_ci return 1; 53868c2ecf20Sopenharmony_ci} 53878c2ecf20Sopenharmony_ci 53888c2ecf20Sopenharmony_cistatic int handle_apic_write(struct kvm_vcpu *vcpu) 53898c2ecf20Sopenharmony_ci{ 53908c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 53918c2ecf20Sopenharmony_ci u32 offset = exit_qualification & 0xfff; 53928c2ecf20Sopenharmony_ci 53938c2ecf20Sopenharmony_ci /* APIC-write VM exit is trap-like and thus no need to adjust IP */ 53948c2ecf20Sopenharmony_ci kvm_apic_write_nodecode(vcpu, offset); 53958c2ecf20Sopenharmony_ci return 1; 53968c2ecf20Sopenharmony_ci} 53978c2ecf20Sopenharmony_ci 53988c2ecf20Sopenharmony_cistatic int handle_task_switch(struct kvm_vcpu *vcpu) 53998c2ecf20Sopenharmony_ci{ 54008c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 54018c2ecf20Sopenharmony_ci unsigned long exit_qualification; 54028c2ecf20Sopenharmony_ci bool has_error_code = false; 54038c2ecf20Sopenharmony_ci u32 error_code = 0; 54048c2ecf20Sopenharmony_ci u16 tss_selector; 54058c2ecf20Sopenharmony_ci int reason, type, idt_v, idt_index; 54068c2ecf20Sopenharmony_ci 54078c2ecf20Sopenharmony_ci idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); 54088c2ecf20Sopenharmony_ci idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); 54098c2ecf20Sopenharmony_ci type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); 54108c2ecf20Sopenharmony_ci 54118c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 54128c2ecf20Sopenharmony_ci 54138c2ecf20Sopenharmony_ci reason = (u32)exit_qualification >> 30; 54148c2ecf20Sopenharmony_ci if (reason == TASK_SWITCH_GATE && idt_v) { 54158c2ecf20Sopenharmony_ci switch (type) { 54168c2ecf20Sopenharmony_ci case INTR_TYPE_NMI_INTR: 54178c2ecf20Sopenharmony_ci vcpu->arch.nmi_injected = false; 54188c2ecf20Sopenharmony_ci vmx_set_nmi_mask(vcpu, true); 54198c2ecf20Sopenharmony_ci break; 54208c2ecf20Sopenharmony_ci case INTR_TYPE_EXT_INTR: 54218c2ecf20Sopenharmony_ci case INTR_TYPE_SOFT_INTR: 54228c2ecf20Sopenharmony_ci kvm_clear_interrupt_queue(vcpu); 54238c2ecf20Sopenharmony_ci break; 54248c2ecf20Sopenharmony_ci case INTR_TYPE_HARD_EXCEPTION: 54258c2ecf20Sopenharmony_ci if (vmx->idt_vectoring_info & 54268c2ecf20Sopenharmony_ci VECTORING_INFO_DELIVER_CODE_MASK) { 54278c2ecf20Sopenharmony_ci has_error_code = true; 54288c2ecf20Sopenharmony_ci error_code = 54298c2ecf20Sopenharmony_ci vmcs_read32(IDT_VECTORING_ERROR_CODE); 54308c2ecf20Sopenharmony_ci } 54318c2ecf20Sopenharmony_ci fallthrough; 54328c2ecf20Sopenharmony_ci case INTR_TYPE_SOFT_EXCEPTION: 54338c2ecf20Sopenharmony_ci kvm_clear_exception_queue(vcpu); 54348c2ecf20Sopenharmony_ci break; 54358c2ecf20Sopenharmony_ci default: 54368c2ecf20Sopenharmony_ci break; 54378c2ecf20Sopenharmony_ci } 54388c2ecf20Sopenharmony_ci } 54398c2ecf20Sopenharmony_ci tss_selector = exit_qualification; 54408c2ecf20Sopenharmony_ci 54418c2ecf20Sopenharmony_ci if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && 54428c2ecf20Sopenharmony_ci type != INTR_TYPE_EXT_INTR && 54438c2ecf20Sopenharmony_ci type != INTR_TYPE_NMI_INTR)) 54448c2ecf20Sopenharmony_ci WARN_ON(!skip_emulated_instruction(vcpu)); 54458c2ecf20Sopenharmony_ci 54468c2ecf20Sopenharmony_ci /* 54478c2ecf20Sopenharmony_ci * TODO: What about debug traps on tss switch? 54488c2ecf20Sopenharmony_ci * Are we supposed to inject them and update dr6? 54498c2ecf20Sopenharmony_ci */ 54508c2ecf20Sopenharmony_ci return kvm_task_switch(vcpu, tss_selector, 54518c2ecf20Sopenharmony_ci type == INTR_TYPE_SOFT_INTR ? idt_index : -1, 54528c2ecf20Sopenharmony_ci reason, has_error_code, error_code); 54538c2ecf20Sopenharmony_ci} 54548c2ecf20Sopenharmony_ci 54558c2ecf20Sopenharmony_cistatic int handle_ept_violation(struct kvm_vcpu *vcpu) 54568c2ecf20Sopenharmony_ci{ 54578c2ecf20Sopenharmony_ci unsigned long exit_qualification; 54588c2ecf20Sopenharmony_ci gpa_t gpa; 54598c2ecf20Sopenharmony_ci u64 error_code; 54608c2ecf20Sopenharmony_ci 54618c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 54628c2ecf20Sopenharmony_ci 54638c2ecf20Sopenharmony_ci /* 54648c2ecf20Sopenharmony_ci * EPT violation happened while executing iret from NMI, 54658c2ecf20Sopenharmony_ci * "blocked by NMI" bit has to be set before next VM entry. 54668c2ecf20Sopenharmony_ci * There are errata that may cause this bit to not be set: 54678c2ecf20Sopenharmony_ci * AAK134, BY25. 54688c2ecf20Sopenharmony_ci */ 54698c2ecf20Sopenharmony_ci if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 54708c2ecf20Sopenharmony_ci enable_vnmi && 54718c2ecf20Sopenharmony_ci (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 54728c2ecf20Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); 54738c2ecf20Sopenharmony_ci 54748c2ecf20Sopenharmony_ci gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 54758c2ecf20Sopenharmony_ci trace_kvm_page_fault(gpa, exit_qualification); 54768c2ecf20Sopenharmony_ci 54778c2ecf20Sopenharmony_ci /* Is it a read fault? */ 54788c2ecf20Sopenharmony_ci error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) 54798c2ecf20Sopenharmony_ci ? PFERR_USER_MASK : 0; 54808c2ecf20Sopenharmony_ci /* Is it a write fault? */ 54818c2ecf20Sopenharmony_ci error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) 54828c2ecf20Sopenharmony_ci ? PFERR_WRITE_MASK : 0; 54838c2ecf20Sopenharmony_ci /* Is it a fetch fault? */ 54848c2ecf20Sopenharmony_ci error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) 54858c2ecf20Sopenharmony_ci ? PFERR_FETCH_MASK : 0; 54868c2ecf20Sopenharmony_ci /* ept page table entry is present? */ 54878c2ecf20Sopenharmony_ci error_code |= (exit_qualification & 54888c2ecf20Sopenharmony_ci (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | 54898c2ecf20Sopenharmony_ci EPT_VIOLATION_EXECUTABLE)) 54908c2ecf20Sopenharmony_ci ? PFERR_PRESENT_MASK : 0; 54918c2ecf20Sopenharmony_ci 54928c2ecf20Sopenharmony_ci error_code |= (exit_qualification & 0x100) != 0 ? 54938c2ecf20Sopenharmony_ci PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; 54948c2ecf20Sopenharmony_ci 54958c2ecf20Sopenharmony_ci vcpu->arch.exit_qualification = exit_qualification; 54968c2ecf20Sopenharmony_ci 54978c2ecf20Sopenharmony_ci /* 54988c2ecf20Sopenharmony_ci * Check that the GPA doesn't exceed physical memory limits, as that is 54998c2ecf20Sopenharmony_ci * a guest page fault. We have to emulate the instruction here, because 55008c2ecf20Sopenharmony_ci * if the illegal address is that of a paging structure, then 55018c2ecf20Sopenharmony_ci * EPT_VIOLATION_ACC_WRITE bit is set. Alternatively, if supported we 55028c2ecf20Sopenharmony_ci * would also use advanced VM-exit information for EPT violations to 55038c2ecf20Sopenharmony_ci * reconstruct the page fault error code. 55048c2ecf20Sopenharmony_ci */ 55058c2ecf20Sopenharmony_ci if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa))) 55068c2ecf20Sopenharmony_ci return kvm_emulate_instruction(vcpu, 0); 55078c2ecf20Sopenharmony_ci 55088c2ecf20Sopenharmony_ci return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); 55098c2ecf20Sopenharmony_ci} 55108c2ecf20Sopenharmony_ci 55118c2ecf20Sopenharmony_cistatic int handle_ept_misconfig(struct kvm_vcpu *vcpu) 55128c2ecf20Sopenharmony_ci{ 55138c2ecf20Sopenharmony_ci gpa_t gpa; 55148c2ecf20Sopenharmony_ci 55158c2ecf20Sopenharmony_ci /* 55168c2ecf20Sopenharmony_ci * A nested guest cannot optimize MMIO vmexits, because we have an 55178c2ecf20Sopenharmony_ci * nGPA here instead of the required GPA. 55188c2ecf20Sopenharmony_ci */ 55198c2ecf20Sopenharmony_ci gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 55208c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu) && 55218c2ecf20Sopenharmony_ci !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { 55228c2ecf20Sopenharmony_ci trace_kvm_fast_mmio(gpa); 55238c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 55248c2ecf20Sopenharmony_ci } 55258c2ecf20Sopenharmony_ci 55268c2ecf20Sopenharmony_ci return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); 55278c2ecf20Sopenharmony_ci} 55288c2ecf20Sopenharmony_ci 55298c2ecf20Sopenharmony_cistatic int handle_nmi_window(struct kvm_vcpu *vcpu) 55308c2ecf20Sopenharmony_ci{ 55318c2ecf20Sopenharmony_ci WARN_ON_ONCE(!enable_vnmi); 55328c2ecf20Sopenharmony_ci exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); 55338c2ecf20Sopenharmony_ci ++vcpu->stat.nmi_window_exits; 55348c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 55358c2ecf20Sopenharmony_ci 55368c2ecf20Sopenharmony_ci return 1; 55378c2ecf20Sopenharmony_ci} 55388c2ecf20Sopenharmony_ci 55398c2ecf20Sopenharmony_cistatic int handle_invalid_guest_state(struct kvm_vcpu *vcpu) 55408c2ecf20Sopenharmony_ci{ 55418c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 55428c2ecf20Sopenharmony_ci bool intr_window_requested; 55438c2ecf20Sopenharmony_ci unsigned count = 130; 55448c2ecf20Sopenharmony_ci 55458c2ecf20Sopenharmony_ci intr_window_requested = exec_controls_get(vmx) & 55468c2ecf20Sopenharmony_ci CPU_BASED_INTR_WINDOW_EXITING; 55478c2ecf20Sopenharmony_ci 55488c2ecf20Sopenharmony_ci while (vmx->emulation_required && count-- != 0) { 55498c2ecf20Sopenharmony_ci if (intr_window_requested && !vmx_interrupt_blocked(vcpu)) 55508c2ecf20Sopenharmony_ci return handle_interrupt_window(&vmx->vcpu); 55518c2ecf20Sopenharmony_ci 55528c2ecf20Sopenharmony_ci if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 55538c2ecf20Sopenharmony_ci return 1; 55548c2ecf20Sopenharmony_ci 55558c2ecf20Sopenharmony_ci if (!kvm_emulate_instruction(vcpu, 0)) 55568c2ecf20Sopenharmony_ci return 0; 55578c2ecf20Sopenharmony_ci 55588c2ecf20Sopenharmony_ci if (vmx->emulation_required && !vmx->rmode.vm86_active && 55598c2ecf20Sopenharmony_ci vcpu->arch.exception.pending) { 55608c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 55618c2ecf20Sopenharmony_ci vcpu->run->internal.suberror = 55628c2ecf20Sopenharmony_ci KVM_INTERNAL_ERROR_EMULATION; 55638c2ecf20Sopenharmony_ci vcpu->run->internal.ndata = 0; 55648c2ecf20Sopenharmony_ci return 0; 55658c2ecf20Sopenharmony_ci } 55668c2ecf20Sopenharmony_ci 55678c2ecf20Sopenharmony_ci if (vcpu->arch.halt_request) { 55688c2ecf20Sopenharmony_ci vcpu->arch.halt_request = 0; 55698c2ecf20Sopenharmony_ci return kvm_vcpu_halt(vcpu); 55708c2ecf20Sopenharmony_ci } 55718c2ecf20Sopenharmony_ci 55728c2ecf20Sopenharmony_ci /* 55738c2ecf20Sopenharmony_ci * Note, return 1 and not 0, vcpu_run() will invoke 55748c2ecf20Sopenharmony_ci * xfer_to_guest_mode() which will create a proper return 55758c2ecf20Sopenharmony_ci * code. 55768c2ecf20Sopenharmony_ci */ 55778c2ecf20Sopenharmony_ci if (__xfer_to_guest_mode_work_pending()) 55788c2ecf20Sopenharmony_ci return 1; 55798c2ecf20Sopenharmony_ci } 55808c2ecf20Sopenharmony_ci 55818c2ecf20Sopenharmony_ci return 1; 55828c2ecf20Sopenharmony_ci} 55838c2ecf20Sopenharmony_ci 55848c2ecf20Sopenharmony_cistatic void grow_ple_window(struct kvm_vcpu *vcpu) 55858c2ecf20Sopenharmony_ci{ 55868c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 55878c2ecf20Sopenharmony_ci unsigned int old = vmx->ple_window; 55888c2ecf20Sopenharmony_ci 55898c2ecf20Sopenharmony_ci vmx->ple_window = __grow_ple_window(old, ple_window, 55908c2ecf20Sopenharmony_ci ple_window_grow, 55918c2ecf20Sopenharmony_ci ple_window_max); 55928c2ecf20Sopenharmony_ci 55938c2ecf20Sopenharmony_ci if (vmx->ple_window != old) { 55948c2ecf20Sopenharmony_ci vmx->ple_window_dirty = true; 55958c2ecf20Sopenharmony_ci trace_kvm_ple_window_update(vcpu->vcpu_id, 55968c2ecf20Sopenharmony_ci vmx->ple_window, old); 55978c2ecf20Sopenharmony_ci } 55988c2ecf20Sopenharmony_ci} 55998c2ecf20Sopenharmony_ci 56008c2ecf20Sopenharmony_cistatic void shrink_ple_window(struct kvm_vcpu *vcpu) 56018c2ecf20Sopenharmony_ci{ 56028c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 56038c2ecf20Sopenharmony_ci unsigned int old = vmx->ple_window; 56048c2ecf20Sopenharmony_ci 56058c2ecf20Sopenharmony_ci vmx->ple_window = __shrink_ple_window(old, ple_window, 56068c2ecf20Sopenharmony_ci ple_window_shrink, 56078c2ecf20Sopenharmony_ci ple_window); 56088c2ecf20Sopenharmony_ci 56098c2ecf20Sopenharmony_ci if (vmx->ple_window != old) { 56108c2ecf20Sopenharmony_ci vmx->ple_window_dirty = true; 56118c2ecf20Sopenharmony_ci trace_kvm_ple_window_update(vcpu->vcpu_id, 56128c2ecf20Sopenharmony_ci vmx->ple_window, old); 56138c2ecf20Sopenharmony_ci } 56148c2ecf20Sopenharmony_ci} 56158c2ecf20Sopenharmony_ci 56168c2ecf20Sopenharmony_cistatic void vmx_enable_tdp(void) 56178c2ecf20Sopenharmony_ci{ 56188c2ecf20Sopenharmony_ci kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, 56198c2ecf20Sopenharmony_ci enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, 56208c2ecf20Sopenharmony_ci enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, 56218c2ecf20Sopenharmony_ci 0ull, VMX_EPT_EXECUTABLE_MASK, 56228c2ecf20Sopenharmony_ci cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, 56238c2ecf20Sopenharmony_ci VMX_EPT_RWX_MASK, 0ull); 56248c2ecf20Sopenharmony_ci 56258c2ecf20Sopenharmony_ci ept_set_mmio_spte_mask(); 56268c2ecf20Sopenharmony_ci} 56278c2ecf20Sopenharmony_ci 56288c2ecf20Sopenharmony_ci/* 56298c2ecf20Sopenharmony_ci * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE 56308c2ecf20Sopenharmony_ci * exiting, so only get here on cpu with PAUSE-Loop-Exiting. 56318c2ecf20Sopenharmony_ci */ 56328c2ecf20Sopenharmony_cistatic int handle_pause(struct kvm_vcpu *vcpu) 56338c2ecf20Sopenharmony_ci{ 56348c2ecf20Sopenharmony_ci if (!kvm_pause_in_guest(vcpu->kvm)) 56358c2ecf20Sopenharmony_ci grow_ple_window(vcpu); 56368c2ecf20Sopenharmony_ci 56378c2ecf20Sopenharmony_ci /* 56388c2ecf20Sopenharmony_ci * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" 56398c2ecf20Sopenharmony_ci * VM-execution control is ignored if CPL > 0. OTOH, KVM 56408c2ecf20Sopenharmony_ci * never set PAUSE_EXITING and just set PLE if supported, 56418c2ecf20Sopenharmony_ci * so the vcpu must be CPL=0 if it gets a PAUSE exit. 56428c2ecf20Sopenharmony_ci */ 56438c2ecf20Sopenharmony_ci kvm_vcpu_on_spin(vcpu, true); 56448c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 56458c2ecf20Sopenharmony_ci} 56468c2ecf20Sopenharmony_ci 56478c2ecf20Sopenharmony_cistatic int handle_nop(struct kvm_vcpu *vcpu) 56488c2ecf20Sopenharmony_ci{ 56498c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 56508c2ecf20Sopenharmony_ci} 56518c2ecf20Sopenharmony_ci 56528c2ecf20Sopenharmony_cistatic int handle_mwait(struct kvm_vcpu *vcpu) 56538c2ecf20Sopenharmony_ci{ 56548c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); 56558c2ecf20Sopenharmony_ci return handle_nop(vcpu); 56568c2ecf20Sopenharmony_ci} 56578c2ecf20Sopenharmony_ci 56588c2ecf20Sopenharmony_cistatic int handle_invalid_op(struct kvm_vcpu *vcpu) 56598c2ecf20Sopenharmony_ci{ 56608c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 56618c2ecf20Sopenharmony_ci return 1; 56628c2ecf20Sopenharmony_ci} 56638c2ecf20Sopenharmony_ci 56648c2ecf20Sopenharmony_cistatic int handle_monitor_trap(struct kvm_vcpu *vcpu) 56658c2ecf20Sopenharmony_ci{ 56668c2ecf20Sopenharmony_ci return 1; 56678c2ecf20Sopenharmony_ci} 56688c2ecf20Sopenharmony_ci 56698c2ecf20Sopenharmony_cistatic int handle_monitor(struct kvm_vcpu *vcpu) 56708c2ecf20Sopenharmony_ci{ 56718c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); 56728c2ecf20Sopenharmony_ci return handle_nop(vcpu); 56738c2ecf20Sopenharmony_ci} 56748c2ecf20Sopenharmony_ci 56758c2ecf20Sopenharmony_cistatic int handle_invpcid(struct kvm_vcpu *vcpu) 56768c2ecf20Sopenharmony_ci{ 56778c2ecf20Sopenharmony_ci u32 vmx_instruction_info; 56788c2ecf20Sopenharmony_ci unsigned long type; 56798c2ecf20Sopenharmony_ci gva_t gva; 56808c2ecf20Sopenharmony_ci struct { 56818c2ecf20Sopenharmony_ci u64 pcid; 56828c2ecf20Sopenharmony_ci u64 gla; 56838c2ecf20Sopenharmony_ci } operand; 56848c2ecf20Sopenharmony_ci 56858c2ecf20Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { 56868c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 56878c2ecf20Sopenharmony_ci return 1; 56888c2ecf20Sopenharmony_ci } 56898c2ecf20Sopenharmony_ci 56908c2ecf20Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 56918c2ecf20Sopenharmony_ci type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); 56928c2ecf20Sopenharmony_ci 56938c2ecf20Sopenharmony_ci if (type > 3) { 56948c2ecf20Sopenharmony_ci kvm_inject_gp(vcpu, 0); 56958c2ecf20Sopenharmony_ci return 1; 56968c2ecf20Sopenharmony_ci } 56978c2ecf20Sopenharmony_ci 56988c2ecf20Sopenharmony_ci /* According to the Intel instruction reference, the memory operand 56998c2ecf20Sopenharmony_ci * is read even if it isn't needed (e.g., for type==all) 57008c2ecf20Sopenharmony_ci */ 57018c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 57028c2ecf20Sopenharmony_ci vmx_instruction_info, false, 57038c2ecf20Sopenharmony_ci sizeof(operand), &gva)) 57048c2ecf20Sopenharmony_ci return 1; 57058c2ecf20Sopenharmony_ci 57068c2ecf20Sopenharmony_ci return kvm_handle_invpcid(vcpu, type, gva); 57078c2ecf20Sopenharmony_ci} 57088c2ecf20Sopenharmony_ci 57098c2ecf20Sopenharmony_cistatic int handle_pml_full(struct kvm_vcpu *vcpu) 57108c2ecf20Sopenharmony_ci{ 57118c2ecf20Sopenharmony_ci unsigned long exit_qualification; 57128c2ecf20Sopenharmony_ci 57138c2ecf20Sopenharmony_ci trace_kvm_pml_full(vcpu->vcpu_id); 57148c2ecf20Sopenharmony_ci 57158c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 57168c2ecf20Sopenharmony_ci 57178c2ecf20Sopenharmony_ci /* 57188c2ecf20Sopenharmony_ci * PML buffer FULL happened while executing iret from NMI, 57198c2ecf20Sopenharmony_ci * "blocked by NMI" bit has to be set before next VM entry. 57208c2ecf20Sopenharmony_ci */ 57218c2ecf20Sopenharmony_ci if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 57228c2ecf20Sopenharmony_ci enable_vnmi && 57238c2ecf20Sopenharmony_ci (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 57248c2ecf20Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 57258c2ecf20Sopenharmony_ci GUEST_INTR_STATE_NMI); 57268c2ecf20Sopenharmony_ci 57278c2ecf20Sopenharmony_ci /* 57288c2ecf20Sopenharmony_ci * PML buffer already flushed at beginning of VMEXIT. Nothing to do 57298c2ecf20Sopenharmony_ci * here.., and there's no userspace involvement needed for PML. 57308c2ecf20Sopenharmony_ci */ 57318c2ecf20Sopenharmony_ci return 1; 57328c2ecf20Sopenharmony_ci} 57338c2ecf20Sopenharmony_ci 57348c2ecf20Sopenharmony_cistatic fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu) 57358c2ecf20Sopenharmony_ci{ 57368c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 57378c2ecf20Sopenharmony_ci 57388c2ecf20Sopenharmony_ci if (!vmx->req_immediate_exit && 57398c2ecf20Sopenharmony_ci !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) { 57408c2ecf20Sopenharmony_ci kvm_lapic_expired_hv_timer(vcpu); 57418c2ecf20Sopenharmony_ci return EXIT_FASTPATH_REENTER_GUEST; 57428c2ecf20Sopenharmony_ci } 57438c2ecf20Sopenharmony_ci 57448c2ecf20Sopenharmony_ci return EXIT_FASTPATH_NONE; 57458c2ecf20Sopenharmony_ci} 57468c2ecf20Sopenharmony_ci 57478c2ecf20Sopenharmony_cistatic int handle_preemption_timer(struct kvm_vcpu *vcpu) 57488c2ecf20Sopenharmony_ci{ 57498c2ecf20Sopenharmony_ci handle_fastpath_preemption_timer(vcpu); 57508c2ecf20Sopenharmony_ci return 1; 57518c2ecf20Sopenharmony_ci} 57528c2ecf20Sopenharmony_ci 57538c2ecf20Sopenharmony_ci/* 57548c2ecf20Sopenharmony_ci * When nested=0, all VMX instruction VM Exits filter here. The handlers 57558c2ecf20Sopenharmony_ci * are overwritten by nested_vmx_setup() when nested=1. 57568c2ecf20Sopenharmony_ci */ 57578c2ecf20Sopenharmony_cistatic int handle_vmx_instruction(struct kvm_vcpu *vcpu) 57588c2ecf20Sopenharmony_ci{ 57598c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 57608c2ecf20Sopenharmony_ci return 1; 57618c2ecf20Sopenharmony_ci} 57628c2ecf20Sopenharmony_ci 57638c2ecf20Sopenharmony_cistatic int handle_encls(struct kvm_vcpu *vcpu) 57648c2ecf20Sopenharmony_ci{ 57658c2ecf20Sopenharmony_ci /* 57668c2ecf20Sopenharmony_ci * SGX virtualization is not yet supported. There is no software 57678c2ecf20Sopenharmony_ci * enable bit for SGX, so we have to trap ENCLS and inject a #UD 57688c2ecf20Sopenharmony_ci * to prevent the guest from executing ENCLS. 57698c2ecf20Sopenharmony_ci */ 57708c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 57718c2ecf20Sopenharmony_ci return 1; 57728c2ecf20Sopenharmony_ci} 57738c2ecf20Sopenharmony_ci 57748c2ecf20Sopenharmony_ci/* 57758c2ecf20Sopenharmony_ci * The exit handlers return 1 if the exit was handled fully and guest execution 57768c2ecf20Sopenharmony_ci * may resume. Otherwise they set the kvm_run parameter to indicate what needs 57778c2ecf20Sopenharmony_ci * to be done to userspace and return 0. 57788c2ecf20Sopenharmony_ci */ 57798c2ecf20Sopenharmony_cistatic int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { 57808c2ecf20Sopenharmony_ci [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, 57818c2ecf20Sopenharmony_ci [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 57828c2ecf20Sopenharmony_ci [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 57838c2ecf20Sopenharmony_ci [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, 57848c2ecf20Sopenharmony_ci [EXIT_REASON_IO_INSTRUCTION] = handle_io, 57858c2ecf20Sopenharmony_ci [EXIT_REASON_CR_ACCESS] = handle_cr, 57868c2ecf20Sopenharmony_ci [EXIT_REASON_DR_ACCESS] = handle_dr, 57878c2ecf20Sopenharmony_ci [EXIT_REASON_CPUID] = kvm_emulate_cpuid, 57888c2ecf20Sopenharmony_ci [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, 57898c2ecf20Sopenharmony_ci [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, 57908c2ecf20Sopenharmony_ci [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, 57918c2ecf20Sopenharmony_ci [EXIT_REASON_HLT] = kvm_emulate_halt, 57928c2ecf20Sopenharmony_ci [EXIT_REASON_INVD] = handle_invd, 57938c2ecf20Sopenharmony_ci [EXIT_REASON_INVLPG] = handle_invlpg, 57948c2ecf20Sopenharmony_ci [EXIT_REASON_RDPMC] = handle_rdpmc, 57958c2ecf20Sopenharmony_ci [EXIT_REASON_VMCALL] = handle_vmcall, 57968c2ecf20Sopenharmony_ci [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, 57978c2ecf20Sopenharmony_ci [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, 57988c2ecf20Sopenharmony_ci [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, 57998c2ecf20Sopenharmony_ci [EXIT_REASON_VMPTRST] = handle_vmx_instruction, 58008c2ecf20Sopenharmony_ci [EXIT_REASON_VMREAD] = handle_vmx_instruction, 58018c2ecf20Sopenharmony_ci [EXIT_REASON_VMRESUME] = handle_vmx_instruction, 58028c2ecf20Sopenharmony_ci [EXIT_REASON_VMWRITE] = handle_vmx_instruction, 58038c2ecf20Sopenharmony_ci [EXIT_REASON_VMOFF] = handle_vmx_instruction, 58048c2ecf20Sopenharmony_ci [EXIT_REASON_VMON] = handle_vmx_instruction, 58058c2ecf20Sopenharmony_ci [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 58068c2ecf20Sopenharmony_ci [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 58078c2ecf20Sopenharmony_ci [EXIT_REASON_APIC_WRITE] = handle_apic_write, 58088c2ecf20Sopenharmony_ci [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, 58098c2ecf20Sopenharmony_ci [EXIT_REASON_WBINVD] = handle_wbinvd, 58108c2ecf20Sopenharmony_ci [EXIT_REASON_XSETBV] = handle_xsetbv, 58118c2ecf20Sopenharmony_ci [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 58128c2ecf20Sopenharmony_ci [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, 58138c2ecf20Sopenharmony_ci [EXIT_REASON_GDTR_IDTR] = handle_desc, 58148c2ecf20Sopenharmony_ci [EXIT_REASON_LDTR_TR] = handle_desc, 58158c2ecf20Sopenharmony_ci [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 58168c2ecf20Sopenharmony_ci [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 58178c2ecf20Sopenharmony_ci [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 58188c2ecf20Sopenharmony_ci [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 58198c2ecf20Sopenharmony_ci [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, 58208c2ecf20Sopenharmony_ci [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 58218c2ecf20Sopenharmony_ci [EXIT_REASON_INVEPT] = handle_vmx_instruction, 58228c2ecf20Sopenharmony_ci [EXIT_REASON_INVVPID] = handle_vmx_instruction, 58238c2ecf20Sopenharmony_ci [EXIT_REASON_RDRAND] = handle_invalid_op, 58248c2ecf20Sopenharmony_ci [EXIT_REASON_RDSEED] = handle_invalid_op, 58258c2ecf20Sopenharmony_ci [EXIT_REASON_PML_FULL] = handle_pml_full, 58268c2ecf20Sopenharmony_ci [EXIT_REASON_INVPCID] = handle_invpcid, 58278c2ecf20Sopenharmony_ci [EXIT_REASON_VMFUNC] = handle_vmx_instruction, 58288c2ecf20Sopenharmony_ci [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, 58298c2ecf20Sopenharmony_ci [EXIT_REASON_ENCLS] = handle_encls, 58308c2ecf20Sopenharmony_ci}; 58318c2ecf20Sopenharmony_ci 58328c2ecf20Sopenharmony_cistatic const int kvm_vmx_max_exit_handlers = 58338c2ecf20Sopenharmony_ci ARRAY_SIZE(kvm_vmx_exit_handlers); 58348c2ecf20Sopenharmony_ci 58358c2ecf20Sopenharmony_cistatic void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, 58368c2ecf20Sopenharmony_ci u32 *intr_info, u32 *error_code) 58378c2ecf20Sopenharmony_ci{ 58388c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 58398c2ecf20Sopenharmony_ci 58408c2ecf20Sopenharmony_ci *info1 = vmx_get_exit_qual(vcpu); 58418c2ecf20Sopenharmony_ci if (!(vmx->exit_reason.failed_vmentry)) { 58428c2ecf20Sopenharmony_ci *info2 = vmx->idt_vectoring_info; 58438c2ecf20Sopenharmony_ci *intr_info = vmx_get_intr_info(vcpu); 58448c2ecf20Sopenharmony_ci if (is_exception_with_error_code(*intr_info)) 58458c2ecf20Sopenharmony_ci *error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 58468c2ecf20Sopenharmony_ci else 58478c2ecf20Sopenharmony_ci *error_code = 0; 58488c2ecf20Sopenharmony_ci } else { 58498c2ecf20Sopenharmony_ci *info2 = 0; 58508c2ecf20Sopenharmony_ci *intr_info = 0; 58518c2ecf20Sopenharmony_ci *error_code = 0; 58528c2ecf20Sopenharmony_ci } 58538c2ecf20Sopenharmony_ci} 58548c2ecf20Sopenharmony_ci 58558c2ecf20Sopenharmony_cistatic void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) 58568c2ecf20Sopenharmony_ci{ 58578c2ecf20Sopenharmony_ci if (vmx->pml_pg) { 58588c2ecf20Sopenharmony_ci __free_page(vmx->pml_pg); 58598c2ecf20Sopenharmony_ci vmx->pml_pg = NULL; 58608c2ecf20Sopenharmony_ci } 58618c2ecf20Sopenharmony_ci} 58628c2ecf20Sopenharmony_ci 58638c2ecf20Sopenharmony_cistatic void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) 58648c2ecf20Sopenharmony_ci{ 58658c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 58668c2ecf20Sopenharmony_ci u64 *pml_buf; 58678c2ecf20Sopenharmony_ci u16 pml_idx; 58688c2ecf20Sopenharmony_ci 58698c2ecf20Sopenharmony_ci pml_idx = vmcs_read16(GUEST_PML_INDEX); 58708c2ecf20Sopenharmony_ci 58718c2ecf20Sopenharmony_ci /* Do nothing if PML buffer is empty */ 58728c2ecf20Sopenharmony_ci if (pml_idx == (PML_ENTITY_NUM - 1)) 58738c2ecf20Sopenharmony_ci return; 58748c2ecf20Sopenharmony_ci 58758c2ecf20Sopenharmony_ci /* PML index always points to next available PML buffer entity */ 58768c2ecf20Sopenharmony_ci if (pml_idx >= PML_ENTITY_NUM) 58778c2ecf20Sopenharmony_ci pml_idx = 0; 58788c2ecf20Sopenharmony_ci else 58798c2ecf20Sopenharmony_ci pml_idx++; 58808c2ecf20Sopenharmony_ci 58818c2ecf20Sopenharmony_ci pml_buf = page_address(vmx->pml_pg); 58828c2ecf20Sopenharmony_ci for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { 58838c2ecf20Sopenharmony_ci u64 gpa; 58848c2ecf20Sopenharmony_ci 58858c2ecf20Sopenharmony_ci gpa = pml_buf[pml_idx]; 58868c2ecf20Sopenharmony_ci WARN_ON(gpa & (PAGE_SIZE - 1)); 58878c2ecf20Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 58888c2ecf20Sopenharmony_ci } 58898c2ecf20Sopenharmony_ci 58908c2ecf20Sopenharmony_ci /* reset PML index */ 58918c2ecf20Sopenharmony_ci vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); 58928c2ecf20Sopenharmony_ci} 58938c2ecf20Sopenharmony_ci 58948c2ecf20Sopenharmony_ci/* 58958c2ecf20Sopenharmony_ci * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap. 58968c2ecf20Sopenharmony_ci * Called before reporting dirty_bitmap to userspace. 58978c2ecf20Sopenharmony_ci */ 58988c2ecf20Sopenharmony_cistatic void kvm_flush_pml_buffers(struct kvm *kvm) 58998c2ecf20Sopenharmony_ci{ 59008c2ecf20Sopenharmony_ci int i; 59018c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu; 59028c2ecf20Sopenharmony_ci /* 59038c2ecf20Sopenharmony_ci * We only need to kick vcpu out of guest mode here, as PML buffer 59048c2ecf20Sopenharmony_ci * is flushed at beginning of all VMEXITs, and it's obvious that only 59058c2ecf20Sopenharmony_ci * vcpus running in guest are possible to have unflushed GPAs in PML 59068c2ecf20Sopenharmony_ci * buffer. 59078c2ecf20Sopenharmony_ci */ 59088c2ecf20Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) 59098c2ecf20Sopenharmony_ci kvm_vcpu_kick(vcpu); 59108c2ecf20Sopenharmony_ci} 59118c2ecf20Sopenharmony_ci 59128c2ecf20Sopenharmony_cistatic void vmx_dump_sel(char *name, uint32_t sel) 59138c2ecf20Sopenharmony_ci{ 59148c2ecf20Sopenharmony_ci pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", 59158c2ecf20Sopenharmony_ci name, vmcs_read16(sel), 59168c2ecf20Sopenharmony_ci vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), 59178c2ecf20Sopenharmony_ci vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), 59188c2ecf20Sopenharmony_ci vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); 59198c2ecf20Sopenharmony_ci} 59208c2ecf20Sopenharmony_ci 59218c2ecf20Sopenharmony_cistatic void vmx_dump_dtsel(char *name, uint32_t limit) 59228c2ecf20Sopenharmony_ci{ 59238c2ecf20Sopenharmony_ci pr_err("%s limit=0x%08x, base=0x%016lx\n", 59248c2ecf20Sopenharmony_ci name, vmcs_read32(limit), 59258c2ecf20Sopenharmony_ci vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); 59268c2ecf20Sopenharmony_ci} 59278c2ecf20Sopenharmony_ci 59288c2ecf20Sopenharmony_civoid dump_vmcs(void) 59298c2ecf20Sopenharmony_ci{ 59308c2ecf20Sopenharmony_ci u32 vmentry_ctl, vmexit_ctl; 59318c2ecf20Sopenharmony_ci u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; 59328c2ecf20Sopenharmony_ci unsigned long cr4; 59338c2ecf20Sopenharmony_ci 59348c2ecf20Sopenharmony_ci if (!dump_invalid_vmcs) { 59358c2ecf20Sopenharmony_ci pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); 59368c2ecf20Sopenharmony_ci return; 59378c2ecf20Sopenharmony_ci } 59388c2ecf20Sopenharmony_ci 59398c2ecf20Sopenharmony_ci vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); 59408c2ecf20Sopenharmony_ci vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); 59418c2ecf20Sopenharmony_ci cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 59428c2ecf20Sopenharmony_ci pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); 59438c2ecf20Sopenharmony_ci cr4 = vmcs_readl(GUEST_CR4); 59448c2ecf20Sopenharmony_ci secondary_exec_control = 0; 59458c2ecf20Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) 59468c2ecf20Sopenharmony_ci secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); 59478c2ecf20Sopenharmony_ci 59488c2ecf20Sopenharmony_ci pr_err("*** Guest State ***\n"); 59498c2ecf20Sopenharmony_ci pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", 59508c2ecf20Sopenharmony_ci vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), 59518c2ecf20Sopenharmony_ci vmcs_readl(CR0_GUEST_HOST_MASK)); 59528c2ecf20Sopenharmony_ci pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", 59538c2ecf20Sopenharmony_ci cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); 59548c2ecf20Sopenharmony_ci pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); 59558c2ecf20Sopenharmony_ci if (cpu_has_vmx_ept()) { 59568c2ecf20Sopenharmony_ci pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", 59578c2ecf20Sopenharmony_ci vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); 59588c2ecf20Sopenharmony_ci pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", 59598c2ecf20Sopenharmony_ci vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); 59608c2ecf20Sopenharmony_ci } 59618c2ecf20Sopenharmony_ci pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", 59628c2ecf20Sopenharmony_ci vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); 59638c2ecf20Sopenharmony_ci pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", 59648c2ecf20Sopenharmony_ci vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); 59658c2ecf20Sopenharmony_ci pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", 59668c2ecf20Sopenharmony_ci vmcs_readl(GUEST_SYSENTER_ESP), 59678c2ecf20Sopenharmony_ci vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); 59688c2ecf20Sopenharmony_ci vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); 59698c2ecf20Sopenharmony_ci vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); 59708c2ecf20Sopenharmony_ci vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); 59718c2ecf20Sopenharmony_ci vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); 59728c2ecf20Sopenharmony_ci vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); 59738c2ecf20Sopenharmony_ci vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); 59748c2ecf20Sopenharmony_ci vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); 59758c2ecf20Sopenharmony_ci vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); 59768c2ecf20Sopenharmony_ci vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); 59778c2ecf20Sopenharmony_ci vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); 59788c2ecf20Sopenharmony_ci if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || 59798c2ecf20Sopenharmony_ci (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) 59808c2ecf20Sopenharmony_ci pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", 59818c2ecf20Sopenharmony_ci vmcs_read64(GUEST_IA32_EFER), 59828c2ecf20Sopenharmony_ci vmcs_read64(GUEST_IA32_PAT)); 59838c2ecf20Sopenharmony_ci pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", 59848c2ecf20Sopenharmony_ci vmcs_read64(GUEST_IA32_DEBUGCTL), 59858c2ecf20Sopenharmony_ci vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); 59868c2ecf20Sopenharmony_ci if (cpu_has_load_perf_global_ctrl() && 59878c2ecf20Sopenharmony_ci vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) 59888c2ecf20Sopenharmony_ci pr_err("PerfGlobCtl = 0x%016llx\n", 59898c2ecf20Sopenharmony_ci vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); 59908c2ecf20Sopenharmony_ci if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) 59918c2ecf20Sopenharmony_ci pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); 59928c2ecf20Sopenharmony_ci pr_err("Interruptibility = %08x ActivityState = %08x\n", 59938c2ecf20Sopenharmony_ci vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), 59948c2ecf20Sopenharmony_ci vmcs_read32(GUEST_ACTIVITY_STATE)); 59958c2ecf20Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) 59968c2ecf20Sopenharmony_ci pr_err("InterruptStatus = %04x\n", 59978c2ecf20Sopenharmony_ci vmcs_read16(GUEST_INTR_STATUS)); 59988c2ecf20Sopenharmony_ci 59998c2ecf20Sopenharmony_ci pr_err("*** Host State ***\n"); 60008c2ecf20Sopenharmony_ci pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", 60018c2ecf20Sopenharmony_ci vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); 60028c2ecf20Sopenharmony_ci pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", 60038c2ecf20Sopenharmony_ci vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), 60048c2ecf20Sopenharmony_ci vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), 60058c2ecf20Sopenharmony_ci vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), 60068c2ecf20Sopenharmony_ci vmcs_read16(HOST_TR_SELECTOR)); 60078c2ecf20Sopenharmony_ci pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", 60088c2ecf20Sopenharmony_ci vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), 60098c2ecf20Sopenharmony_ci vmcs_readl(HOST_TR_BASE)); 60108c2ecf20Sopenharmony_ci pr_err("GDTBase=%016lx IDTBase=%016lx\n", 60118c2ecf20Sopenharmony_ci vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); 60128c2ecf20Sopenharmony_ci pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", 60138c2ecf20Sopenharmony_ci vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), 60148c2ecf20Sopenharmony_ci vmcs_readl(HOST_CR4)); 60158c2ecf20Sopenharmony_ci pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", 60168c2ecf20Sopenharmony_ci vmcs_readl(HOST_IA32_SYSENTER_ESP), 60178c2ecf20Sopenharmony_ci vmcs_read32(HOST_IA32_SYSENTER_CS), 60188c2ecf20Sopenharmony_ci vmcs_readl(HOST_IA32_SYSENTER_EIP)); 60198c2ecf20Sopenharmony_ci if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) 60208c2ecf20Sopenharmony_ci pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", 60218c2ecf20Sopenharmony_ci vmcs_read64(HOST_IA32_EFER), 60228c2ecf20Sopenharmony_ci vmcs_read64(HOST_IA32_PAT)); 60238c2ecf20Sopenharmony_ci if (cpu_has_load_perf_global_ctrl() && 60248c2ecf20Sopenharmony_ci vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) 60258c2ecf20Sopenharmony_ci pr_err("PerfGlobCtl = 0x%016llx\n", 60268c2ecf20Sopenharmony_ci vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); 60278c2ecf20Sopenharmony_ci 60288c2ecf20Sopenharmony_ci pr_err("*** Control State ***\n"); 60298c2ecf20Sopenharmony_ci pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", 60308c2ecf20Sopenharmony_ci pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); 60318c2ecf20Sopenharmony_ci pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); 60328c2ecf20Sopenharmony_ci pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", 60338c2ecf20Sopenharmony_ci vmcs_read32(EXCEPTION_BITMAP), 60348c2ecf20Sopenharmony_ci vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), 60358c2ecf20Sopenharmony_ci vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); 60368c2ecf20Sopenharmony_ci pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", 60378c2ecf20Sopenharmony_ci vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), 60388c2ecf20Sopenharmony_ci vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), 60398c2ecf20Sopenharmony_ci vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); 60408c2ecf20Sopenharmony_ci pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", 60418c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_INTR_INFO), 60428c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_INTR_ERROR_CODE), 60438c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 60448c2ecf20Sopenharmony_ci pr_err(" reason=%08x qualification=%016lx\n", 60458c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); 60468c2ecf20Sopenharmony_ci pr_err("IDTVectoring: info=%08x errcode=%08x\n", 60478c2ecf20Sopenharmony_ci vmcs_read32(IDT_VECTORING_INFO_FIELD), 60488c2ecf20Sopenharmony_ci vmcs_read32(IDT_VECTORING_ERROR_CODE)); 60498c2ecf20Sopenharmony_ci pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); 60508c2ecf20Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) 60518c2ecf20Sopenharmony_ci pr_err("TSC Multiplier = 0x%016llx\n", 60528c2ecf20Sopenharmony_ci vmcs_read64(TSC_MULTIPLIER)); 60538c2ecf20Sopenharmony_ci if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { 60548c2ecf20Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { 60558c2ecf20Sopenharmony_ci u16 status = vmcs_read16(GUEST_INTR_STATUS); 60568c2ecf20Sopenharmony_ci pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); 60578c2ecf20Sopenharmony_ci } 60588c2ecf20Sopenharmony_ci pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); 60598c2ecf20Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) 60608c2ecf20Sopenharmony_ci pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); 60618c2ecf20Sopenharmony_ci pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); 60628c2ecf20Sopenharmony_ci } 60638c2ecf20Sopenharmony_ci if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) 60648c2ecf20Sopenharmony_ci pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); 60658c2ecf20Sopenharmony_ci if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) 60668c2ecf20Sopenharmony_ci pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); 60678c2ecf20Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) 60688c2ecf20Sopenharmony_ci pr_err("PLE Gap=%08x Window=%08x\n", 60698c2ecf20Sopenharmony_ci vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); 60708c2ecf20Sopenharmony_ci if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) 60718c2ecf20Sopenharmony_ci pr_err("Virtual processor ID = 0x%04x\n", 60728c2ecf20Sopenharmony_ci vmcs_read16(VIRTUAL_PROCESSOR_ID)); 60738c2ecf20Sopenharmony_ci} 60748c2ecf20Sopenharmony_ci 60758c2ecf20Sopenharmony_ci/* 60768c2ecf20Sopenharmony_ci * The guest has exited. See if we can fix it or if we need userspace 60778c2ecf20Sopenharmony_ci * assistance. 60788c2ecf20Sopenharmony_ci */ 60798c2ecf20Sopenharmony_cistatic int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) 60808c2ecf20Sopenharmony_ci{ 60818c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 60828c2ecf20Sopenharmony_ci union vmx_exit_reason exit_reason = vmx->exit_reason; 60838c2ecf20Sopenharmony_ci u32 vectoring_info = vmx->idt_vectoring_info; 60848c2ecf20Sopenharmony_ci u16 exit_handler_index; 60858c2ecf20Sopenharmony_ci 60868c2ecf20Sopenharmony_ci /* 60878c2ecf20Sopenharmony_ci * Flush logged GPAs PML buffer, this will make dirty_bitmap more 60888c2ecf20Sopenharmony_ci * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before 60898c2ecf20Sopenharmony_ci * querying dirty_bitmap, we only need to kick all vcpus out of guest 60908c2ecf20Sopenharmony_ci * mode as if vcpus is in root mode, the PML buffer must has been 60918c2ecf20Sopenharmony_ci * flushed already. 60928c2ecf20Sopenharmony_ci */ 60938c2ecf20Sopenharmony_ci if (enable_pml) 60948c2ecf20Sopenharmony_ci vmx_flush_pml_buffer(vcpu); 60958c2ecf20Sopenharmony_ci 60968c2ecf20Sopenharmony_ci /* 60978c2ecf20Sopenharmony_ci * We should never reach this point with a pending nested VM-Enter, and 60988c2ecf20Sopenharmony_ci * more specifically emulation of L2 due to invalid guest state (see 60998c2ecf20Sopenharmony_ci * below) should never happen as that means we incorrectly allowed a 61008c2ecf20Sopenharmony_ci * nested VM-Enter with an invalid vmcs12. 61018c2ecf20Sopenharmony_ci */ 61028c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmx->nested.nested_run_pending); 61038c2ecf20Sopenharmony_ci 61048c2ecf20Sopenharmony_ci /* If guest state is invalid, start emulating */ 61058c2ecf20Sopenharmony_ci if (vmx->emulation_required) 61068c2ecf20Sopenharmony_ci return handle_invalid_guest_state(vcpu); 61078c2ecf20Sopenharmony_ci 61088c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 61098c2ecf20Sopenharmony_ci /* 61108c2ecf20Sopenharmony_ci * The host physical addresses of some pages of guest memory 61118c2ecf20Sopenharmony_ci * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC 61128c2ecf20Sopenharmony_ci * Page). The CPU may write to these pages via their host 61138c2ecf20Sopenharmony_ci * physical address while L2 is running, bypassing any 61148c2ecf20Sopenharmony_ci * address-translation-based dirty tracking (e.g. EPT write 61158c2ecf20Sopenharmony_ci * protection). 61168c2ecf20Sopenharmony_ci * 61178c2ecf20Sopenharmony_ci * Mark them dirty on every exit from L2 to prevent them from 61188c2ecf20Sopenharmony_ci * getting out of sync with dirty tracking. 61198c2ecf20Sopenharmony_ci */ 61208c2ecf20Sopenharmony_ci nested_mark_vmcs12_pages_dirty(vcpu); 61218c2ecf20Sopenharmony_ci 61228c2ecf20Sopenharmony_ci if (nested_vmx_reflect_vmexit(vcpu)) 61238c2ecf20Sopenharmony_ci return 1; 61248c2ecf20Sopenharmony_ci } 61258c2ecf20Sopenharmony_ci 61268c2ecf20Sopenharmony_ci if (exit_reason.failed_vmentry) { 61278c2ecf20Sopenharmony_ci dump_vmcs(); 61288c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 61298c2ecf20Sopenharmony_ci vcpu->run->fail_entry.hardware_entry_failure_reason 61308c2ecf20Sopenharmony_ci = exit_reason.full; 61318c2ecf20Sopenharmony_ci vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu; 61328c2ecf20Sopenharmony_ci return 0; 61338c2ecf20Sopenharmony_ci } 61348c2ecf20Sopenharmony_ci 61358c2ecf20Sopenharmony_ci if (unlikely(vmx->fail)) { 61368c2ecf20Sopenharmony_ci dump_vmcs(); 61378c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 61388c2ecf20Sopenharmony_ci vcpu->run->fail_entry.hardware_entry_failure_reason 61398c2ecf20Sopenharmony_ci = vmcs_read32(VM_INSTRUCTION_ERROR); 61408c2ecf20Sopenharmony_ci vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu; 61418c2ecf20Sopenharmony_ci return 0; 61428c2ecf20Sopenharmony_ci } 61438c2ecf20Sopenharmony_ci 61448c2ecf20Sopenharmony_ci /* 61458c2ecf20Sopenharmony_ci * Note: 61468c2ecf20Sopenharmony_ci * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by 61478c2ecf20Sopenharmony_ci * delivery event since it indicates guest is accessing MMIO. 61488c2ecf20Sopenharmony_ci * The vm-exit can be triggered again after return to guest that 61498c2ecf20Sopenharmony_ci * will cause infinite loop. 61508c2ecf20Sopenharmony_ci */ 61518c2ecf20Sopenharmony_ci if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 61528c2ecf20Sopenharmony_ci (exit_reason.basic != EXIT_REASON_EXCEPTION_NMI && 61538c2ecf20Sopenharmony_ci exit_reason.basic != EXIT_REASON_EPT_VIOLATION && 61548c2ecf20Sopenharmony_ci exit_reason.basic != EXIT_REASON_PML_FULL && 61558c2ecf20Sopenharmony_ci exit_reason.basic != EXIT_REASON_APIC_ACCESS && 61568c2ecf20Sopenharmony_ci exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { 61578c2ecf20Sopenharmony_ci int ndata = 3; 61588c2ecf20Sopenharmony_ci 61598c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 61608c2ecf20Sopenharmony_ci vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; 61618c2ecf20Sopenharmony_ci vcpu->run->internal.data[0] = vectoring_info; 61628c2ecf20Sopenharmony_ci vcpu->run->internal.data[1] = exit_reason.full; 61638c2ecf20Sopenharmony_ci vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; 61648c2ecf20Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { 61658c2ecf20Sopenharmony_ci vcpu->run->internal.data[ndata++] = 61668c2ecf20Sopenharmony_ci vmcs_read64(GUEST_PHYSICAL_ADDRESS); 61678c2ecf20Sopenharmony_ci } 61688c2ecf20Sopenharmony_ci vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; 61698c2ecf20Sopenharmony_ci vcpu->run->internal.ndata = ndata; 61708c2ecf20Sopenharmony_ci return 0; 61718c2ecf20Sopenharmony_ci } 61728c2ecf20Sopenharmony_ci 61738c2ecf20Sopenharmony_ci if (unlikely(!enable_vnmi && 61748c2ecf20Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked)) { 61758c2ecf20Sopenharmony_ci if (!vmx_interrupt_blocked(vcpu)) { 61768c2ecf20Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = 0; 61778c2ecf20Sopenharmony_ci } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && 61788c2ecf20Sopenharmony_ci vcpu->arch.nmi_pending) { 61798c2ecf20Sopenharmony_ci /* 61808c2ecf20Sopenharmony_ci * This CPU don't support us in finding the end of an 61818c2ecf20Sopenharmony_ci * NMI-blocked window if the guest runs with IRQs 61828c2ecf20Sopenharmony_ci * disabled. So we pull the trigger after 1 s of 61838c2ecf20Sopenharmony_ci * futile waiting, but inform the user about this. 61848c2ecf20Sopenharmony_ci */ 61858c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: Breaking out of NMI-blocked " 61868c2ecf20Sopenharmony_ci "state on VCPU %d after 1 s timeout\n", 61878c2ecf20Sopenharmony_ci __func__, vcpu->vcpu_id); 61888c2ecf20Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked = 0; 61898c2ecf20Sopenharmony_ci } 61908c2ecf20Sopenharmony_ci } 61918c2ecf20Sopenharmony_ci 61928c2ecf20Sopenharmony_ci if (exit_fastpath != EXIT_FASTPATH_NONE) 61938c2ecf20Sopenharmony_ci return 1; 61948c2ecf20Sopenharmony_ci 61958c2ecf20Sopenharmony_ci if (exit_reason.basic >= kvm_vmx_max_exit_handlers) 61968c2ecf20Sopenharmony_ci goto unexpected_vmexit; 61978c2ecf20Sopenharmony_ci#ifdef CONFIG_RETPOLINE 61988c2ecf20Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_MSR_WRITE) 61998c2ecf20Sopenharmony_ci return kvm_emulate_wrmsr(vcpu); 62008c2ecf20Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) 62018c2ecf20Sopenharmony_ci return handle_preemption_timer(vcpu); 62028c2ecf20Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW) 62038c2ecf20Sopenharmony_ci return handle_interrupt_window(vcpu); 62048c2ecf20Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) 62058c2ecf20Sopenharmony_ci return handle_external_interrupt(vcpu); 62068c2ecf20Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_HLT) 62078c2ecf20Sopenharmony_ci return kvm_emulate_halt(vcpu); 62088c2ecf20Sopenharmony_ci else if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) 62098c2ecf20Sopenharmony_ci return handle_ept_misconfig(vcpu); 62108c2ecf20Sopenharmony_ci#endif 62118c2ecf20Sopenharmony_ci 62128c2ecf20Sopenharmony_ci exit_handler_index = array_index_nospec((u16)exit_reason.basic, 62138c2ecf20Sopenharmony_ci kvm_vmx_max_exit_handlers); 62148c2ecf20Sopenharmony_ci if (!kvm_vmx_exit_handlers[exit_handler_index]) 62158c2ecf20Sopenharmony_ci goto unexpected_vmexit; 62168c2ecf20Sopenharmony_ci 62178c2ecf20Sopenharmony_ci return kvm_vmx_exit_handlers[exit_handler_index](vcpu); 62188c2ecf20Sopenharmony_ci 62198c2ecf20Sopenharmony_ciunexpected_vmexit: 62208c2ecf20Sopenharmony_ci vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", 62218c2ecf20Sopenharmony_ci exit_reason.full); 62228c2ecf20Sopenharmony_ci dump_vmcs(); 62238c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 62248c2ecf20Sopenharmony_ci vcpu->run->internal.suberror = 62258c2ecf20Sopenharmony_ci KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; 62268c2ecf20Sopenharmony_ci vcpu->run->internal.ndata = 2; 62278c2ecf20Sopenharmony_ci vcpu->run->internal.data[0] = exit_reason.full; 62288c2ecf20Sopenharmony_ci vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; 62298c2ecf20Sopenharmony_ci return 0; 62308c2ecf20Sopenharmony_ci} 62318c2ecf20Sopenharmony_ci 62328c2ecf20Sopenharmony_ci/* 62338c2ecf20Sopenharmony_ci * Software based L1D cache flush which is used when microcode providing 62348c2ecf20Sopenharmony_ci * the cache control MSR is not loaded. 62358c2ecf20Sopenharmony_ci * 62368c2ecf20Sopenharmony_ci * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to 62378c2ecf20Sopenharmony_ci * flush it is required to read in 64 KiB because the replacement algorithm 62388c2ecf20Sopenharmony_ci * is not exactly LRU. This could be sized at runtime via topology 62398c2ecf20Sopenharmony_ci * information but as all relevant affected CPUs have 32KiB L1D cache size 62408c2ecf20Sopenharmony_ci * there is no point in doing so. 62418c2ecf20Sopenharmony_ci */ 62428c2ecf20Sopenharmony_cistatic noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu) 62438c2ecf20Sopenharmony_ci{ 62448c2ecf20Sopenharmony_ci int size = PAGE_SIZE << L1D_CACHE_ORDER; 62458c2ecf20Sopenharmony_ci 62468c2ecf20Sopenharmony_ci /* 62478c2ecf20Sopenharmony_ci * This code is only executed when the the flush mode is 'cond' or 62488c2ecf20Sopenharmony_ci * 'always' 62498c2ecf20Sopenharmony_ci */ 62508c2ecf20Sopenharmony_ci if (static_branch_likely(&vmx_l1d_flush_cond)) { 62518c2ecf20Sopenharmony_ci bool flush_l1d; 62528c2ecf20Sopenharmony_ci 62538c2ecf20Sopenharmony_ci /* 62548c2ecf20Sopenharmony_ci * Clear the per-vcpu flush bit, it gets set again 62558c2ecf20Sopenharmony_ci * either from vcpu_run() or from one of the unsafe 62568c2ecf20Sopenharmony_ci * VMEXIT handlers. 62578c2ecf20Sopenharmony_ci */ 62588c2ecf20Sopenharmony_ci flush_l1d = vcpu->arch.l1tf_flush_l1d; 62598c2ecf20Sopenharmony_ci vcpu->arch.l1tf_flush_l1d = false; 62608c2ecf20Sopenharmony_ci 62618c2ecf20Sopenharmony_ci /* 62628c2ecf20Sopenharmony_ci * Clear the per-cpu flush bit, it gets set again from 62638c2ecf20Sopenharmony_ci * the interrupt handlers. 62648c2ecf20Sopenharmony_ci */ 62658c2ecf20Sopenharmony_ci flush_l1d |= kvm_get_cpu_l1tf_flush_l1d(); 62668c2ecf20Sopenharmony_ci kvm_clear_cpu_l1tf_flush_l1d(); 62678c2ecf20Sopenharmony_ci 62688c2ecf20Sopenharmony_ci if (!flush_l1d) 62698c2ecf20Sopenharmony_ci return; 62708c2ecf20Sopenharmony_ci } 62718c2ecf20Sopenharmony_ci 62728c2ecf20Sopenharmony_ci vcpu->stat.l1d_flush++; 62738c2ecf20Sopenharmony_ci 62748c2ecf20Sopenharmony_ci if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { 62758c2ecf20Sopenharmony_ci native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); 62768c2ecf20Sopenharmony_ci return; 62778c2ecf20Sopenharmony_ci } 62788c2ecf20Sopenharmony_ci 62798c2ecf20Sopenharmony_ci asm volatile( 62808c2ecf20Sopenharmony_ci /* First ensure the pages are in the TLB */ 62818c2ecf20Sopenharmony_ci "xorl %%eax, %%eax\n" 62828c2ecf20Sopenharmony_ci ".Lpopulate_tlb:\n\t" 62838c2ecf20Sopenharmony_ci "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" 62848c2ecf20Sopenharmony_ci "addl $4096, %%eax\n\t" 62858c2ecf20Sopenharmony_ci "cmpl %%eax, %[size]\n\t" 62868c2ecf20Sopenharmony_ci "jne .Lpopulate_tlb\n\t" 62878c2ecf20Sopenharmony_ci "xorl %%eax, %%eax\n\t" 62888c2ecf20Sopenharmony_ci "cpuid\n\t" 62898c2ecf20Sopenharmony_ci /* Now fill the cache */ 62908c2ecf20Sopenharmony_ci "xorl %%eax, %%eax\n" 62918c2ecf20Sopenharmony_ci ".Lfill_cache:\n" 62928c2ecf20Sopenharmony_ci "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t" 62938c2ecf20Sopenharmony_ci "addl $64, %%eax\n\t" 62948c2ecf20Sopenharmony_ci "cmpl %%eax, %[size]\n\t" 62958c2ecf20Sopenharmony_ci "jne .Lfill_cache\n\t" 62968c2ecf20Sopenharmony_ci "lfence\n" 62978c2ecf20Sopenharmony_ci :: [flush_pages] "r" (vmx_l1d_flush_pages), 62988c2ecf20Sopenharmony_ci [size] "r" (size) 62998c2ecf20Sopenharmony_ci : "eax", "ebx", "ecx", "edx"); 63008c2ecf20Sopenharmony_ci} 63018c2ecf20Sopenharmony_ci 63028c2ecf20Sopenharmony_cistatic void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 63038c2ecf20Sopenharmony_ci{ 63048c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 63058c2ecf20Sopenharmony_ci int tpr_threshold; 63068c2ecf20Sopenharmony_ci 63078c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && 63088c2ecf20Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) 63098c2ecf20Sopenharmony_ci return; 63108c2ecf20Sopenharmony_ci 63118c2ecf20Sopenharmony_ci tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; 63128c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) 63138c2ecf20Sopenharmony_ci to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; 63148c2ecf20Sopenharmony_ci else 63158c2ecf20Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, tpr_threshold); 63168c2ecf20Sopenharmony_ci} 63178c2ecf20Sopenharmony_ci 63188c2ecf20Sopenharmony_civoid vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) 63198c2ecf20Sopenharmony_ci{ 63208c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 63218c2ecf20Sopenharmony_ci u32 sec_exec_control; 63228c2ecf20Sopenharmony_ci 63238c2ecf20Sopenharmony_ci if (!lapic_in_kernel(vcpu)) 63248c2ecf20Sopenharmony_ci return; 63258c2ecf20Sopenharmony_ci 63268c2ecf20Sopenharmony_ci if (!flexpriority_enabled && 63278c2ecf20Sopenharmony_ci !cpu_has_vmx_virtualize_x2apic_mode()) 63288c2ecf20Sopenharmony_ci return; 63298c2ecf20Sopenharmony_ci 63308c2ecf20Sopenharmony_ci /* Postpone execution until vmcs01 is the current VMCS. */ 63318c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 63328c2ecf20Sopenharmony_ci vmx->nested.change_vmcs01_virtual_apic_mode = true; 63338c2ecf20Sopenharmony_ci return; 63348c2ecf20Sopenharmony_ci } 63358c2ecf20Sopenharmony_ci 63368c2ecf20Sopenharmony_ci sec_exec_control = secondary_exec_controls_get(vmx); 63378c2ecf20Sopenharmony_ci sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 63388c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); 63398c2ecf20Sopenharmony_ci 63408c2ecf20Sopenharmony_ci switch (kvm_get_apic_mode(vcpu)) { 63418c2ecf20Sopenharmony_ci case LAPIC_MODE_INVALID: 63428c2ecf20Sopenharmony_ci WARN_ONCE(true, "Invalid local APIC state"); 63438c2ecf20Sopenharmony_ci case LAPIC_MODE_DISABLED: 63448c2ecf20Sopenharmony_ci break; 63458c2ecf20Sopenharmony_ci case LAPIC_MODE_XAPIC: 63468c2ecf20Sopenharmony_ci if (flexpriority_enabled) { 63478c2ecf20Sopenharmony_ci sec_exec_control |= 63488c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 63498c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 63508c2ecf20Sopenharmony_ci 63518c2ecf20Sopenharmony_ci /* 63528c2ecf20Sopenharmony_ci * Flush the TLB, reloading the APIC access page will 63538c2ecf20Sopenharmony_ci * only do so if its physical address has changed, but 63548c2ecf20Sopenharmony_ci * the guest may have inserted a non-APIC mapping into 63558c2ecf20Sopenharmony_ci * the TLB while the APIC access page was disabled. 63568c2ecf20Sopenharmony_ci */ 63578c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 63588c2ecf20Sopenharmony_ci } 63598c2ecf20Sopenharmony_ci break; 63608c2ecf20Sopenharmony_ci case LAPIC_MODE_X2APIC: 63618c2ecf20Sopenharmony_ci if (cpu_has_vmx_virtualize_x2apic_mode()) 63628c2ecf20Sopenharmony_ci sec_exec_control |= 63638c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 63648c2ecf20Sopenharmony_ci break; 63658c2ecf20Sopenharmony_ci } 63668c2ecf20Sopenharmony_ci secondary_exec_controls_set(vmx, sec_exec_control); 63678c2ecf20Sopenharmony_ci 63688c2ecf20Sopenharmony_ci vmx_update_msr_bitmap(vcpu); 63698c2ecf20Sopenharmony_ci} 63708c2ecf20Sopenharmony_ci 63718c2ecf20Sopenharmony_cistatic void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) 63728c2ecf20Sopenharmony_ci{ 63738c2ecf20Sopenharmony_ci struct page *page; 63748c2ecf20Sopenharmony_ci 63758c2ecf20Sopenharmony_ci /* Defer reload until vmcs01 is the current VMCS. */ 63768c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 63778c2ecf20Sopenharmony_ci to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true; 63788c2ecf20Sopenharmony_ci return; 63798c2ecf20Sopenharmony_ci } 63808c2ecf20Sopenharmony_ci 63818c2ecf20Sopenharmony_ci if (!(secondary_exec_controls_get(to_vmx(vcpu)) & 63828c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 63838c2ecf20Sopenharmony_ci return; 63848c2ecf20Sopenharmony_ci 63858c2ecf20Sopenharmony_ci page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); 63868c2ecf20Sopenharmony_ci if (is_error_page(page)) 63878c2ecf20Sopenharmony_ci return; 63888c2ecf20Sopenharmony_ci 63898c2ecf20Sopenharmony_ci vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); 63908c2ecf20Sopenharmony_ci vmx_flush_tlb_current(vcpu); 63918c2ecf20Sopenharmony_ci 63928c2ecf20Sopenharmony_ci /* 63938c2ecf20Sopenharmony_ci * Do not pin apic access page in memory, the MMU notifier 63948c2ecf20Sopenharmony_ci * will call us again if it is migrated or swapped out. 63958c2ecf20Sopenharmony_ci */ 63968c2ecf20Sopenharmony_ci put_page(page); 63978c2ecf20Sopenharmony_ci} 63988c2ecf20Sopenharmony_ci 63998c2ecf20Sopenharmony_cistatic void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) 64008c2ecf20Sopenharmony_ci{ 64018c2ecf20Sopenharmony_ci u16 status; 64028c2ecf20Sopenharmony_ci u8 old; 64038c2ecf20Sopenharmony_ci 64048c2ecf20Sopenharmony_ci if (max_isr == -1) 64058c2ecf20Sopenharmony_ci max_isr = 0; 64068c2ecf20Sopenharmony_ci 64078c2ecf20Sopenharmony_ci status = vmcs_read16(GUEST_INTR_STATUS); 64088c2ecf20Sopenharmony_ci old = status >> 8; 64098c2ecf20Sopenharmony_ci if (max_isr != old) { 64108c2ecf20Sopenharmony_ci status &= 0xff; 64118c2ecf20Sopenharmony_ci status |= max_isr << 8; 64128c2ecf20Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, status); 64138c2ecf20Sopenharmony_ci } 64148c2ecf20Sopenharmony_ci} 64158c2ecf20Sopenharmony_ci 64168c2ecf20Sopenharmony_cistatic void vmx_set_rvi(int vector) 64178c2ecf20Sopenharmony_ci{ 64188c2ecf20Sopenharmony_ci u16 status; 64198c2ecf20Sopenharmony_ci u8 old; 64208c2ecf20Sopenharmony_ci 64218c2ecf20Sopenharmony_ci if (vector == -1) 64228c2ecf20Sopenharmony_ci vector = 0; 64238c2ecf20Sopenharmony_ci 64248c2ecf20Sopenharmony_ci status = vmcs_read16(GUEST_INTR_STATUS); 64258c2ecf20Sopenharmony_ci old = (u8)status & 0xff; 64268c2ecf20Sopenharmony_ci if ((u8)vector != old) { 64278c2ecf20Sopenharmony_ci status &= ~0xff; 64288c2ecf20Sopenharmony_ci status |= (u8)vector; 64298c2ecf20Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, status); 64308c2ecf20Sopenharmony_ci } 64318c2ecf20Sopenharmony_ci} 64328c2ecf20Sopenharmony_ci 64338c2ecf20Sopenharmony_cistatic void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) 64348c2ecf20Sopenharmony_ci{ 64358c2ecf20Sopenharmony_ci /* 64368c2ecf20Sopenharmony_ci * When running L2, updating RVI is only relevant when 64378c2ecf20Sopenharmony_ci * vmcs12 virtual-interrupt-delivery enabled. 64388c2ecf20Sopenharmony_ci * However, it can be enabled only when L1 also 64398c2ecf20Sopenharmony_ci * intercepts external-interrupts and in that case 64408c2ecf20Sopenharmony_ci * we should not update vmcs02 RVI but instead intercept 64418c2ecf20Sopenharmony_ci * interrupt. Therefore, do nothing when running L2. 64428c2ecf20Sopenharmony_ci */ 64438c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu)) 64448c2ecf20Sopenharmony_ci vmx_set_rvi(max_irr); 64458c2ecf20Sopenharmony_ci} 64468c2ecf20Sopenharmony_ci 64478c2ecf20Sopenharmony_cistatic int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) 64488c2ecf20Sopenharmony_ci{ 64498c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 64508c2ecf20Sopenharmony_ci int max_irr; 64518c2ecf20Sopenharmony_ci bool max_irr_updated; 64528c2ecf20Sopenharmony_ci 64538c2ecf20Sopenharmony_ci WARN_ON(!vcpu->arch.apicv_active); 64548c2ecf20Sopenharmony_ci if (pi_test_on(&vmx->pi_desc)) { 64558c2ecf20Sopenharmony_ci pi_clear_on(&vmx->pi_desc); 64568c2ecf20Sopenharmony_ci /* 64578c2ecf20Sopenharmony_ci * IOMMU can write to PID.ON, so the barrier matters even on UP. 64588c2ecf20Sopenharmony_ci * But on x86 this is just a compiler barrier anyway. 64598c2ecf20Sopenharmony_ci */ 64608c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 64618c2ecf20Sopenharmony_ci max_irr_updated = 64628c2ecf20Sopenharmony_ci kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); 64638c2ecf20Sopenharmony_ci 64648c2ecf20Sopenharmony_ci /* 64658c2ecf20Sopenharmony_ci * If we are running L2 and L1 has a new pending interrupt 64668c2ecf20Sopenharmony_ci * which can be injected, this may cause a vmexit or it may 64678c2ecf20Sopenharmony_ci * be injected into L2. Either way, this interrupt will be 64688c2ecf20Sopenharmony_ci * processed via KVM_REQ_EVENT, not RVI, because we do not use 64698c2ecf20Sopenharmony_ci * virtual interrupt delivery to inject L1 interrupts into L2. 64708c2ecf20Sopenharmony_ci */ 64718c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && max_irr_updated) 64728c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 64738c2ecf20Sopenharmony_ci } else { 64748c2ecf20Sopenharmony_ci max_irr = kvm_lapic_find_highest_irr(vcpu); 64758c2ecf20Sopenharmony_ci } 64768c2ecf20Sopenharmony_ci vmx_hwapic_irr_update(vcpu, max_irr); 64778c2ecf20Sopenharmony_ci return max_irr; 64788c2ecf20Sopenharmony_ci} 64798c2ecf20Sopenharmony_ci 64808c2ecf20Sopenharmony_cistatic void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 64818c2ecf20Sopenharmony_ci{ 64828c2ecf20Sopenharmony_ci if (!kvm_vcpu_apicv_active(vcpu)) 64838c2ecf20Sopenharmony_ci return; 64848c2ecf20Sopenharmony_ci 64858c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); 64868c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); 64878c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); 64888c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); 64898c2ecf20Sopenharmony_ci} 64908c2ecf20Sopenharmony_ci 64918c2ecf20Sopenharmony_cistatic void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) 64928c2ecf20Sopenharmony_ci{ 64938c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 64948c2ecf20Sopenharmony_ci 64958c2ecf20Sopenharmony_ci pi_clear_on(&vmx->pi_desc); 64968c2ecf20Sopenharmony_ci memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); 64978c2ecf20Sopenharmony_ci} 64988c2ecf20Sopenharmony_ci 64998c2ecf20Sopenharmony_civoid vmx_do_interrupt_nmi_irqoff(unsigned long entry); 65008c2ecf20Sopenharmony_ci 65018c2ecf20Sopenharmony_cistatic void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, 65028c2ecf20Sopenharmony_ci unsigned long entry) 65038c2ecf20Sopenharmony_ci{ 65048c2ecf20Sopenharmony_ci kvm_before_interrupt(vcpu); 65058c2ecf20Sopenharmony_ci vmx_do_interrupt_nmi_irqoff(entry); 65068c2ecf20Sopenharmony_ci kvm_after_interrupt(vcpu); 65078c2ecf20Sopenharmony_ci} 65088c2ecf20Sopenharmony_ci 65098c2ecf20Sopenharmony_cistatic void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) 65108c2ecf20Sopenharmony_ci{ 65118c2ecf20Sopenharmony_ci const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist; 65128c2ecf20Sopenharmony_ci u32 intr_info = vmx_get_intr_info(&vmx->vcpu); 65138c2ecf20Sopenharmony_ci 65148c2ecf20Sopenharmony_ci /* if exit due to PF check for async PF */ 65158c2ecf20Sopenharmony_ci if (is_page_fault(intr_info)) 65168c2ecf20Sopenharmony_ci vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); 65178c2ecf20Sopenharmony_ci /* Handle machine checks before interrupts are enabled */ 65188c2ecf20Sopenharmony_ci else if (is_machine_check(intr_info)) 65198c2ecf20Sopenharmony_ci kvm_machine_check(); 65208c2ecf20Sopenharmony_ci /* We need to handle NMIs before interrupts are enabled */ 65218c2ecf20Sopenharmony_ci else if (is_nmi(intr_info)) 65228c2ecf20Sopenharmony_ci handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry); 65238c2ecf20Sopenharmony_ci} 65248c2ecf20Sopenharmony_ci 65258c2ecf20Sopenharmony_cistatic void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) 65268c2ecf20Sopenharmony_ci{ 65278c2ecf20Sopenharmony_ci u32 intr_info = vmx_get_intr_info(vcpu); 65288c2ecf20Sopenharmony_ci unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; 65298c2ecf20Sopenharmony_ci gate_desc *desc = (gate_desc *)host_idt_base + vector; 65308c2ecf20Sopenharmony_ci 65318c2ecf20Sopenharmony_ci if (WARN_ONCE(!is_external_intr(intr_info), 65328c2ecf20Sopenharmony_ci "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) 65338c2ecf20Sopenharmony_ci return; 65348c2ecf20Sopenharmony_ci 65358c2ecf20Sopenharmony_ci handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); 65368c2ecf20Sopenharmony_ci vcpu->arch.at_instruction_boundary = true; 65378c2ecf20Sopenharmony_ci} 65388c2ecf20Sopenharmony_ci 65398c2ecf20Sopenharmony_cistatic void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) 65408c2ecf20Sopenharmony_ci{ 65418c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 65428c2ecf20Sopenharmony_ci 65438c2ecf20Sopenharmony_ci if (vmx->emulation_required) 65448c2ecf20Sopenharmony_ci return; 65458c2ecf20Sopenharmony_ci 65468c2ecf20Sopenharmony_ci if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) 65478c2ecf20Sopenharmony_ci handle_external_interrupt_irqoff(vcpu); 65488c2ecf20Sopenharmony_ci else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) 65498c2ecf20Sopenharmony_ci handle_exception_nmi_irqoff(vmx); 65508c2ecf20Sopenharmony_ci} 65518c2ecf20Sopenharmony_ci 65528c2ecf20Sopenharmony_cistatic bool vmx_has_emulated_msr(u32 index) 65538c2ecf20Sopenharmony_ci{ 65548c2ecf20Sopenharmony_ci switch (index) { 65558c2ecf20Sopenharmony_ci case MSR_IA32_SMBASE: 65568c2ecf20Sopenharmony_ci /* 65578c2ecf20Sopenharmony_ci * We cannot do SMM unless we can run the guest in big 65588c2ecf20Sopenharmony_ci * real mode. 65598c2ecf20Sopenharmony_ci */ 65608c2ecf20Sopenharmony_ci return enable_unrestricted_guest || emulate_invalid_guest_state; 65618c2ecf20Sopenharmony_ci case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 65628c2ecf20Sopenharmony_ci return nested; 65638c2ecf20Sopenharmony_ci case MSR_AMD64_VIRT_SPEC_CTRL: 65648c2ecf20Sopenharmony_ci /* This is AMD only. */ 65658c2ecf20Sopenharmony_ci return false; 65668c2ecf20Sopenharmony_ci default: 65678c2ecf20Sopenharmony_ci return true; 65688c2ecf20Sopenharmony_ci } 65698c2ecf20Sopenharmony_ci} 65708c2ecf20Sopenharmony_ci 65718c2ecf20Sopenharmony_cistatic void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 65728c2ecf20Sopenharmony_ci{ 65738c2ecf20Sopenharmony_ci u32 exit_intr_info; 65748c2ecf20Sopenharmony_ci bool unblock_nmi; 65758c2ecf20Sopenharmony_ci u8 vector; 65768c2ecf20Sopenharmony_ci bool idtv_info_valid; 65778c2ecf20Sopenharmony_ci 65788c2ecf20Sopenharmony_ci idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 65798c2ecf20Sopenharmony_ci 65808c2ecf20Sopenharmony_ci if (enable_vnmi) { 65818c2ecf20Sopenharmony_ci if (vmx->loaded_vmcs->nmi_known_unmasked) 65828c2ecf20Sopenharmony_ci return; 65838c2ecf20Sopenharmony_ci 65848c2ecf20Sopenharmony_ci exit_intr_info = vmx_get_intr_info(&vmx->vcpu); 65858c2ecf20Sopenharmony_ci unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 65868c2ecf20Sopenharmony_ci vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 65878c2ecf20Sopenharmony_ci /* 65888c2ecf20Sopenharmony_ci * SDM 3: 27.7.1.2 (September 2008) 65898c2ecf20Sopenharmony_ci * Re-set bit "block by NMI" before VM entry if vmexit caused by 65908c2ecf20Sopenharmony_ci * a guest IRET fault. 65918c2ecf20Sopenharmony_ci * SDM 3: 23.2.2 (September 2008) 65928c2ecf20Sopenharmony_ci * Bit 12 is undefined in any of the following cases: 65938c2ecf20Sopenharmony_ci * If the VM exit sets the valid bit in the IDT-vectoring 65948c2ecf20Sopenharmony_ci * information field. 65958c2ecf20Sopenharmony_ci * If the VM exit is due to a double fault. 65968c2ecf20Sopenharmony_ci */ 65978c2ecf20Sopenharmony_ci if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && 65988c2ecf20Sopenharmony_ci vector != DF_VECTOR && !idtv_info_valid) 65998c2ecf20Sopenharmony_ci vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 66008c2ecf20Sopenharmony_ci GUEST_INTR_STATE_NMI); 66018c2ecf20Sopenharmony_ci else 66028c2ecf20Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = 66038c2ecf20Sopenharmony_ci !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) 66048c2ecf20Sopenharmony_ci & GUEST_INTR_STATE_NMI); 66058c2ecf20Sopenharmony_ci } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) 66068c2ecf20Sopenharmony_ci vmx->loaded_vmcs->vnmi_blocked_time += 66078c2ecf20Sopenharmony_ci ktime_to_ns(ktime_sub(ktime_get(), 66088c2ecf20Sopenharmony_ci vmx->loaded_vmcs->entry_time)); 66098c2ecf20Sopenharmony_ci} 66108c2ecf20Sopenharmony_ci 66118c2ecf20Sopenharmony_cistatic void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, 66128c2ecf20Sopenharmony_ci u32 idt_vectoring_info, 66138c2ecf20Sopenharmony_ci int instr_len_field, 66148c2ecf20Sopenharmony_ci int error_code_field) 66158c2ecf20Sopenharmony_ci{ 66168c2ecf20Sopenharmony_ci u8 vector; 66178c2ecf20Sopenharmony_ci int type; 66188c2ecf20Sopenharmony_ci bool idtv_info_valid; 66198c2ecf20Sopenharmony_ci 66208c2ecf20Sopenharmony_ci idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 66218c2ecf20Sopenharmony_ci 66228c2ecf20Sopenharmony_ci vcpu->arch.nmi_injected = false; 66238c2ecf20Sopenharmony_ci kvm_clear_exception_queue(vcpu); 66248c2ecf20Sopenharmony_ci kvm_clear_interrupt_queue(vcpu); 66258c2ecf20Sopenharmony_ci 66268c2ecf20Sopenharmony_ci if (!idtv_info_valid) 66278c2ecf20Sopenharmony_ci return; 66288c2ecf20Sopenharmony_ci 66298c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 66308c2ecf20Sopenharmony_ci 66318c2ecf20Sopenharmony_ci vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; 66328c2ecf20Sopenharmony_ci type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; 66338c2ecf20Sopenharmony_ci 66348c2ecf20Sopenharmony_ci switch (type) { 66358c2ecf20Sopenharmony_ci case INTR_TYPE_NMI_INTR: 66368c2ecf20Sopenharmony_ci vcpu->arch.nmi_injected = true; 66378c2ecf20Sopenharmony_ci /* 66388c2ecf20Sopenharmony_ci * SDM 3: 27.7.1.2 (September 2008) 66398c2ecf20Sopenharmony_ci * Clear bit "block by NMI" before VM entry if a NMI 66408c2ecf20Sopenharmony_ci * delivery faulted. 66418c2ecf20Sopenharmony_ci */ 66428c2ecf20Sopenharmony_ci vmx_set_nmi_mask(vcpu, false); 66438c2ecf20Sopenharmony_ci break; 66448c2ecf20Sopenharmony_ci case INTR_TYPE_SOFT_EXCEPTION: 66458c2ecf20Sopenharmony_ci vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 66468c2ecf20Sopenharmony_ci fallthrough; 66478c2ecf20Sopenharmony_ci case INTR_TYPE_HARD_EXCEPTION: 66488c2ecf20Sopenharmony_ci if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 66498c2ecf20Sopenharmony_ci u32 err = vmcs_read32(error_code_field); 66508c2ecf20Sopenharmony_ci kvm_requeue_exception_e(vcpu, vector, err); 66518c2ecf20Sopenharmony_ci } else 66528c2ecf20Sopenharmony_ci kvm_requeue_exception(vcpu, vector); 66538c2ecf20Sopenharmony_ci break; 66548c2ecf20Sopenharmony_ci case INTR_TYPE_SOFT_INTR: 66558c2ecf20Sopenharmony_ci vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 66568c2ecf20Sopenharmony_ci fallthrough; 66578c2ecf20Sopenharmony_ci case INTR_TYPE_EXT_INTR: 66588c2ecf20Sopenharmony_ci kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); 66598c2ecf20Sopenharmony_ci break; 66608c2ecf20Sopenharmony_ci default: 66618c2ecf20Sopenharmony_ci break; 66628c2ecf20Sopenharmony_ci } 66638c2ecf20Sopenharmony_ci} 66648c2ecf20Sopenharmony_ci 66658c2ecf20Sopenharmony_cistatic void vmx_complete_interrupts(struct vcpu_vmx *vmx) 66668c2ecf20Sopenharmony_ci{ 66678c2ecf20Sopenharmony_ci __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, 66688c2ecf20Sopenharmony_ci VM_EXIT_INSTRUCTION_LEN, 66698c2ecf20Sopenharmony_ci IDT_VECTORING_ERROR_CODE); 66708c2ecf20Sopenharmony_ci} 66718c2ecf20Sopenharmony_ci 66728c2ecf20Sopenharmony_cistatic void vmx_cancel_injection(struct kvm_vcpu *vcpu) 66738c2ecf20Sopenharmony_ci{ 66748c2ecf20Sopenharmony_ci __vmx_complete_interrupts(vcpu, 66758c2ecf20Sopenharmony_ci vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), 66768c2ecf20Sopenharmony_ci VM_ENTRY_INSTRUCTION_LEN, 66778c2ecf20Sopenharmony_ci VM_ENTRY_EXCEPTION_ERROR_CODE); 66788c2ecf20Sopenharmony_ci 66798c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); 66808c2ecf20Sopenharmony_ci} 66818c2ecf20Sopenharmony_ci 66828c2ecf20Sopenharmony_cistatic void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) 66838c2ecf20Sopenharmony_ci{ 66848c2ecf20Sopenharmony_ci int i, nr_msrs; 66858c2ecf20Sopenharmony_ci struct perf_guest_switch_msr *msrs; 66868c2ecf20Sopenharmony_ci 66878c2ecf20Sopenharmony_ci msrs = perf_guest_get_msrs(&nr_msrs); 66888c2ecf20Sopenharmony_ci 66898c2ecf20Sopenharmony_ci if (!msrs) 66908c2ecf20Sopenharmony_ci return; 66918c2ecf20Sopenharmony_ci 66928c2ecf20Sopenharmony_ci for (i = 0; i < nr_msrs; i++) 66938c2ecf20Sopenharmony_ci if (msrs[i].host == msrs[i].guest) 66948c2ecf20Sopenharmony_ci clear_atomic_switch_msr(vmx, msrs[i].msr); 66958c2ecf20Sopenharmony_ci else 66968c2ecf20Sopenharmony_ci add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, 66978c2ecf20Sopenharmony_ci msrs[i].host, false); 66988c2ecf20Sopenharmony_ci} 66998c2ecf20Sopenharmony_ci 67008c2ecf20Sopenharmony_cistatic void vmx_update_hv_timer(struct kvm_vcpu *vcpu) 67018c2ecf20Sopenharmony_ci{ 67028c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 67038c2ecf20Sopenharmony_ci u64 tscl; 67048c2ecf20Sopenharmony_ci u32 delta_tsc; 67058c2ecf20Sopenharmony_ci 67068c2ecf20Sopenharmony_ci if (vmx->req_immediate_exit) { 67078c2ecf20Sopenharmony_ci vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); 67088c2ecf20Sopenharmony_ci vmx->loaded_vmcs->hv_timer_soft_disabled = false; 67098c2ecf20Sopenharmony_ci } else if (vmx->hv_deadline_tsc != -1) { 67108c2ecf20Sopenharmony_ci tscl = rdtsc(); 67118c2ecf20Sopenharmony_ci if (vmx->hv_deadline_tsc > tscl) 67128c2ecf20Sopenharmony_ci /* set_hv_timer ensures the delta fits in 32-bits */ 67138c2ecf20Sopenharmony_ci delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> 67148c2ecf20Sopenharmony_ci cpu_preemption_timer_multi); 67158c2ecf20Sopenharmony_ci else 67168c2ecf20Sopenharmony_ci delta_tsc = 0; 67178c2ecf20Sopenharmony_ci 67188c2ecf20Sopenharmony_ci vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); 67198c2ecf20Sopenharmony_ci vmx->loaded_vmcs->hv_timer_soft_disabled = false; 67208c2ecf20Sopenharmony_ci } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { 67218c2ecf20Sopenharmony_ci vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); 67228c2ecf20Sopenharmony_ci vmx->loaded_vmcs->hv_timer_soft_disabled = true; 67238c2ecf20Sopenharmony_ci } 67248c2ecf20Sopenharmony_ci} 67258c2ecf20Sopenharmony_ci 67268c2ecf20Sopenharmony_civoid noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) 67278c2ecf20Sopenharmony_ci{ 67288c2ecf20Sopenharmony_ci if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { 67298c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.rsp = host_rsp; 67308c2ecf20Sopenharmony_ci vmcs_writel(HOST_RSP, host_rsp); 67318c2ecf20Sopenharmony_ci } 67328c2ecf20Sopenharmony_ci} 67338c2ecf20Sopenharmony_ci 67348c2ecf20Sopenharmony_civoid noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, 67358c2ecf20Sopenharmony_ci unsigned int flags) 67368c2ecf20Sopenharmony_ci{ 67378c2ecf20Sopenharmony_ci u64 hostval = this_cpu_read(x86_spec_ctrl_current); 67388c2ecf20Sopenharmony_ci 67398c2ecf20Sopenharmony_ci if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) 67408c2ecf20Sopenharmony_ci return; 67418c2ecf20Sopenharmony_ci 67428c2ecf20Sopenharmony_ci if (flags & VMX_RUN_SAVE_SPEC_CTRL) 67438c2ecf20Sopenharmony_ci vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); 67448c2ecf20Sopenharmony_ci 67458c2ecf20Sopenharmony_ci /* 67468c2ecf20Sopenharmony_ci * If the guest/host SPEC_CTRL values differ, restore the host value. 67478c2ecf20Sopenharmony_ci * 67488c2ecf20Sopenharmony_ci * For legacy IBRS, the IBRS bit always needs to be written after 67498c2ecf20Sopenharmony_ci * transitioning from a less privileged predictor mode, regardless of 67508c2ecf20Sopenharmony_ci * whether the guest/host values differ. 67518c2ecf20Sopenharmony_ci */ 67528c2ecf20Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || 67538c2ecf20Sopenharmony_ci vmx->spec_ctrl != hostval) 67548c2ecf20Sopenharmony_ci native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); 67558c2ecf20Sopenharmony_ci 67568c2ecf20Sopenharmony_ci barrier_nospec(); 67578c2ecf20Sopenharmony_ci} 67588c2ecf20Sopenharmony_ci 67598c2ecf20Sopenharmony_cistatic fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) 67608c2ecf20Sopenharmony_ci{ 67618c2ecf20Sopenharmony_ci switch (to_vmx(vcpu)->exit_reason.basic) { 67628c2ecf20Sopenharmony_ci case EXIT_REASON_MSR_WRITE: 67638c2ecf20Sopenharmony_ci return handle_fastpath_set_msr_irqoff(vcpu); 67648c2ecf20Sopenharmony_ci case EXIT_REASON_PREEMPTION_TIMER: 67658c2ecf20Sopenharmony_ci return handle_fastpath_preemption_timer(vcpu); 67668c2ecf20Sopenharmony_ci default: 67678c2ecf20Sopenharmony_ci return EXIT_FASTPATH_NONE; 67688c2ecf20Sopenharmony_ci } 67698c2ecf20Sopenharmony_ci} 67708c2ecf20Sopenharmony_ci 67718c2ecf20Sopenharmony_cistatic noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, 67728c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx, 67738c2ecf20Sopenharmony_ci unsigned long flags) 67748c2ecf20Sopenharmony_ci{ 67758c2ecf20Sopenharmony_ci /* 67768c2ecf20Sopenharmony_ci * VMENTER enables interrupts (host state), but the kernel state is 67778c2ecf20Sopenharmony_ci * interrupts disabled when this is invoked. Also tell RCU about 67788c2ecf20Sopenharmony_ci * it. This is the same logic as for exit_to_user_mode(). 67798c2ecf20Sopenharmony_ci * 67808c2ecf20Sopenharmony_ci * This ensures that e.g. latency analysis on the host observes 67818c2ecf20Sopenharmony_ci * guest mode as interrupt enabled. 67828c2ecf20Sopenharmony_ci * 67838c2ecf20Sopenharmony_ci * guest_enter_irqoff() informs context tracking about the 67848c2ecf20Sopenharmony_ci * transition to guest mode and if enabled adjusts RCU state 67858c2ecf20Sopenharmony_ci * accordingly. 67868c2ecf20Sopenharmony_ci */ 67878c2ecf20Sopenharmony_ci instrumentation_begin(); 67888c2ecf20Sopenharmony_ci trace_hardirqs_on_prepare(); 67898c2ecf20Sopenharmony_ci lockdep_hardirqs_on_prepare(CALLER_ADDR0); 67908c2ecf20Sopenharmony_ci instrumentation_end(); 67918c2ecf20Sopenharmony_ci 67928c2ecf20Sopenharmony_ci guest_enter_irqoff(); 67938c2ecf20Sopenharmony_ci lockdep_hardirqs_on(CALLER_ADDR0); 67948c2ecf20Sopenharmony_ci 67958c2ecf20Sopenharmony_ci /* L1D Flush includes CPU buffer clear to mitigate MDS */ 67968c2ecf20Sopenharmony_ci if (static_branch_unlikely(&vmx_l1d_should_flush)) 67978c2ecf20Sopenharmony_ci vmx_l1d_flush(vcpu); 67988c2ecf20Sopenharmony_ci else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) 67998c2ecf20Sopenharmony_ci mds_clear_cpu_buffers(); 68008c2ecf20Sopenharmony_ci else if (static_branch_unlikely(&mmio_stale_data_clear) && 68018c2ecf20Sopenharmony_ci kvm_arch_has_assigned_device(vcpu->kvm)) 68028c2ecf20Sopenharmony_ci mds_clear_cpu_buffers(); 68038c2ecf20Sopenharmony_ci 68048c2ecf20Sopenharmony_ci vmx_disable_fb_clear(vmx); 68058c2ecf20Sopenharmony_ci 68068c2ecf20Sopenharmony_ci if (vcpu->arch.cr2 != native_read_cr2()) 68078c2ecf20Sopenharmony_ci native_write_cr2(vcpu->arch.cr2); 68088c2ecf20Sopenharmony_ci 68098c2ecf20Sopenharmony_ci vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, 68108c2ecf20Sopenharmony_ci flags); 68118c2ecf20Sopenharmony_ci 68128c2ecf20Sopenharmony_ci vcpu->arch.cr2 = native_read_cr2(); 68138c2ecf20Sopenharmony_ci 68148c2ecf20Sopenharmony_ci vmx_enable_fb_clear(vmx); 68158c2ecf20Sopenharmony_ci 68168c2ecf20Sopenharmony_ci /* 68178c2ecf20Sopenharmony_ci * VMEXIT disables interrupts (host state), but tracing and lockdep 68188c2ecf20Sopenharmony_ci * have them in state 'on' as recorded before entering guest mode. 68198c2ecf20Sopenharmony_ci * Same as enter_from_user_mode(). 68208c2ecf20Sopenharmony_ci * 68218c2ecf20Sopenharmony_ci * context_tracking_guest_exit() restores host context and reinstates 68228c2ecf20Sopenharmony_ci * RCU if enabled and required. 68238c2ecf20Sopenharmony_ci * 68248c2ecf20Sopenharmony_ci * This needs to be done before the below as native_read_msr() 68258c2ecf20Sopenharmony_ci * contains a tracepoint and x86_spec_ctrl_restore_host() calls 68268c2ecf20Sopenharmony_ci * into world and some more. 68278c2ecf20Sopenharmony_ci */ 68288c2ecf20Sopenharmony_ci lockdep_hardirqs_off(CALLER_ADDR0); 68298c2ecf20Sopenharmony_ci context_tracking_guest_exit(); 68308c2ecf20Sopenharmony_ci 68318c2ecf20Sopenharmony_ci instrumentation_begin(); 68328c2ecf20Sopenharmony_ci trace_hardirqs_off_finish(); 68338c2ecf20Sopenharmony_ci instrumentation_end(); 68348c2ecf20Sopenharmony_ci} 68358c2ecf20Sopenharmony_ci 68368c2ecf20Sopenharmony_cistatic fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) 68378c2ecf20Sopenharmony_ci{ 68388c2ecf20Sopenharmony_ci fastpath_t exit_fastpath; 68398c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 68408c2ecf20Sopenharmony_ci unsigned long cr3, cr4; 68418c2ecf20Sopenharmony_ci 68428c2ecf20Sopenharmony_cireenter_guest: 68438c2ecf20Sopenharmony_ci /* Record the guest's net vcpu time for enforced NMI injections. */ 68448c2ecf20Sopenharmony_ci if (unlikely(!enable_vnmi && 68458c2ecf20Sopenharmony_ci vmx->loaded_vmcs->soft_vnmi_blocked)) 68468c2ecf20Sopenharmony_ci vmx->loaded_vmcs->entry_time = ktime_get(); 68478c2ecf20Sopenharmony_ci 68488c2ecf20Sopenharmony_ci /* Don't enter VMX if guest state is invalid, let the exit handler 68498c2ecf20Sopenharmony_ci start emulation until we arrive back to a valid state */ 68508c2ecf20Sopenharmony_ci if (vmx->emulation_required) 68518c2ecf20Sopenharmony_ci return EXIT_FASTPATH_NONE; 68528c2ecf20Sopenharmony_ci 68538c2ecf20Sopenharmony_ci if (vmx->ple_window_dirty) { 68548c2ecf20Sopenharmony_ci vmx->ple_window_dirty = false; 68558c2ecf20Sopenharmony_ci vmcs_write32(PLE_WINDOW, vmx->ple_window); 68568c2ecf20Sopenharmony_ci } 68578c2ecf20Sopenharmony_ci 68588c2ecf20Sopenharmony_ci /* 68598c2ecf20Sopenharmony_ci * We did this in prepare_switch_to_guest, because it needs to 68608c2ecf20Sopenharmony_ci * be within srcu_read_lock. 68618c2ecf20Sopenharmony_ci */ 68628c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync); 68638c2ecf20Sopenharmony_ci 68648c2ecf20Sopenharmony_ci if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) 68658c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); 68668c2ecf20Sopenharmony_ci if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) 68678c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 68688c2ecf20Sopenharmony_ci 68698c2ecf20Sopenharmony_ci cr3 = __get_current_cr3_fast(); 68708c2ecf20Sopenharmony_ci if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { 68718c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR3, cr3); 68728c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.cr3 = cr3; 68738c2ecf20Sopenharmony_ci } 68748c2ecf20Sopenharmony_ci 68758c2ecf20Sopenharmony_ci cr4 = cr4_read_shadow(); 68768c2ecf20Sopenharmony_ci if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { 68778c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR4, cr4); 68788c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.cr4 = cr4; 68798c2ecf20Sopenharmony_ci } 68808c2ecf20Sopenharmony_ci 68818c2ecf20Sopenharmony_ci /* When single-stepping over STI and MOV SS, we must clear the 68828c2ecf20Sopenharmony_ci * corresponding interruptibility bits in the guest state. Otherwise 68838c2ecf20Sopenharmony_ci * vmentry fails as it then expects bit 14 (BS) in pending debug 68848c2ecf20Sopenharmony_ci * exceptions being set, but that's not correct for the guest debugging 68858c2ecf20Sopenharmony_ci * case. */ 68868c2ecf20Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 68878c2ecf20Sopenharmony_ci vmx_set_interrupt_shadow(vcpu, 0); 68888c2ecf20Sopenharmony_ci 68898c2ecf20Sopenharmony_ci kvm_load_guest_xsave_state(vcpu); 68908c2ecf20Sopenharmony_ci 68918c2ecf20Sopenharmony_ci pt_guest_enter(vmx); 68928c2ecf20Sopenharmony_ci 68938c2ecf20Sopenharmony_ci atomic_switch_perf_msrs(vmx); 68948c2ecf20Sopenharmony_ci 68958c2ecf20Sopenharmony_ci if (enable_preemption_timer) 68968c2ecf20Sopenharmony_ci vmx_update_hv_timer(vcpu); 68978c2ecf20Sopenharmony_ci 68988c2ecf20Sopenharmony_ci kvm_wait_lapic_expire(vcpu); 68998c2ecf20Sopenharmony_ci 69008c2ecf20Sopenharmony_ci /* 69018c2ecf20Sopenharmony_ci * If this vCPU has touched SPEC_CTRL, restore the guest's value if 69028c2ecf20Sopenharmony_ci * it's non-zero. Since vmentry is serialising on affected CPUs, there 69038c2ecf20Sopenharmony_ci * is no need to worry about the conditional branch over the wrmsr 69048c2ecf20Sopenharmony_ci * being speculatively taken. 69058c2ecf20Sopenharmony_ci */ 69068c2ecf20Sopenharmony_ci x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); 69078c2ecf20Sopenharmony_ci 69088c2ecf20Sopenharmony_ci /* The actual VMENTER/EXIT is in the .noinstr.text section. */ 69098c2ecf20Sopenharmony_ci vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); 69108c2ecf20Sopenharmony_ci 69118c2ecf20Sopenharmony_ci /* All fields are clean at this point */ 69128c2ecf20Sopenharmony_ci if (static_branch_unlikely(&enable_evmcs)) 69138c2ecf20Sopenharmony_ci current_evmcs->hv_clean_fields |= 69148c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 69158c2ecf20Sopenharmony_ci 69168c2ecf20Sopenharmony_ci if (static_branch_unlikely(&enable_evmcs)) 69178c2ecf20Sopenharmony_ci current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; 69188c2ecf20Sopenharmony_ci 69198c2ecf20Sopenharmony_ci /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ 69208c2ecf20Sopenharmony_ci if (vmx->host_debugctlmsr) 69218c2ecf20Sopenharmony_ci update_debugctlmsr(vmx->host_debugctlmsr); 69228c2ecf20Sopenharmony_ci 69238c2ecf20Sopenharmony_ci#ifndef CONFIG_X86_64 69248c2ecf20Sopenharmony_ci /* 69258c2ecf20Sopenharmony_ci * The sysexit path does not restore ds/es, so we must set them to 69268c2ecf20Sopenharmony_ci * a reasonable value ourselves. 69278c2ecf20Sopenharmony_ci * 69288c2ecf20Sopenharmony_ci * We can't defer this to vmx_prepare_switch_to_host() since that 69298c2ecf20Sopenharmony_ci * function may be executed in interrupt context, which saves and 69308c2ecf20Sopenharmony_ci * restore segments around it, nullifying its effect. 69318c2ecf20Sopenharmony_ci */ 69328c2ecf20Sopenharmony_ci loadsegment(ds, __USER_DS); 69338c2ecf20Sopenharmony_ci loadsegment(es, __USER_DS); 69348c2ecf20Sopenharmony_ci#endif 69358c2ecf20Sopenharmony_ci 69368c2ecf20Sopenharmony_ci vmx_register_cache_reset(vcpu); 69378c2ecf20Sopenharmony_ci 69388c2ecf20Sopenharmony_ci pt_guest_exit(vmx); 69398c2ecf20Sopenharmony_ci 69408c2ecf20Sopenharmony_ci kvm_load_host_xsave_state(vcpu); 69418c2ecf20Sopenharmony_ci 69428c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending = 0; 69438c2ecf20Sopenharmony_ci vmx->idt_vectoring_info = 0; 69448c2ecf20Sopenharmony_ci 69458c2ecf20Sopenharmony_ci if (unlikely(vmx->fail)) { 69468c2ecf20Sopenharmony_ci vmx->exit_reason.full = 0xdead; 69478c2ecf20Sopenharmony_ci return EXIT_FASTPATH_NONE; 69488c2ecf20Sopenharmony_ci } 69498c2ecf20Sopenharmony_ci 69508c2ecf20Sopenharmony_ci vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON); 69518c2ecf20Sopenharmony_ci if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY)) 69528c2ecf20Sopenharmony_ci kvm_machine_check(); 69538c2ecf20Sopenharmony_ci 69548c2ecf20Sopenharmony_ci trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX); 69558c2ecf20Sopenharmony_ci 69568c2ecf20Sopenharmony_ci if (unlikely(vmx->exit_reason.failed_vmentry)) 69578c2ecf20Sopenharmony_ci return EXIT_FASTPATH_NONE; 69588c2ecf20Sopenharmony_ci 69598c2ecf20Sopenharmony_ci vmx->loaded_vmcs->launched = 1; 69608c2ecf20Sopenharmony_ci vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 69618c2ecf20Sopenharmony_ci 69628c2ecf20Sopenharmony_ci vmx_recover_nmi_blocking(vmx); 69638c2ecf20Sopenharmony_ci vmx_complete_interrupts(vmx); 69648c2ecf20Sopenharmony_ci 69658c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) 69668c2ecf20Sopenharmony_ci return EXIT_FASTPATH_NONE; 69678c2ecf20Sopenharmony_ci 69688c2ecf20Sopenharmony_ci exit_fastpath = vmx_exit_handlers_fastpath(vcpu); 69698c2ecf20Sopenharmony_ci if (exit_fastpath == EXIT_FASTPATH_REENTER_GUEST) { 69708c2ecf20Sopenharmony_ci if (!kvm_vcpu_exit_request(vcpu)) { 69718c2ecf20Sopenharmony_ci /* 69728c2ecf20Sopenharmony_ci * FIXME: this goto should be a loop in vcpu_enter_guest, 69738c2ecf20Sopenharmony_ci * but it would incur the cost of a retpoline for now. 69748c2ecf20Sopenharmony_ci * Revisit once static calls are available. 69758c2ecf20Sopenharmony_ci */ 69768c2ecf20Sopenharmony_ci if (vcpu->arch.apicv_active) 69778c2ecf20Sopenharmony_ci vmx_sync_pir_to_irr(vcpu); 69788c2ecf20Sopenharmony_ci goto reenter_guest; 69798c2ecf20Sopenharmony_ci } 69808c2ecf20Sopenharmony_ci exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; 69818c2ecf20Sopenharmony_ci } 69828c2ecf20Sopenharmony_ci 69838c2ecf20Sopenharmony_ci return exit_fastpath; 69848c2ecf20Sopenharmony_ci} 69858c2ecf20Sopenharmony_ci 69868c2ecf20Sopenharmony_cistatic void vmx_free_vcpu(struct kvm_vcpu *vcpu) 69878c2ecf20Sopenharmony_ci{ 69888c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 69898c2ecf20Sopenharmony_ci 69908c2ecf20Sopenharmony_ci if (enable_pml) 69918c2ecf20Sopenharmony_ci vmx_destroy_pml_buffer(vmx); 69928c2ecf20Sopenharmony_ci free_vpid(vmx->vpid); 69938c2ecf20Sopenharmony_ci nested_vmx_free_vcpu(vcpu); 69948c2ecf20Sopenharmony_ci free_loaded_vmcs(vmx->loaded_vmcs); 69958c2ecf20Sopenharmony_ci} 69968c2ecf20Sopenharmony_ci 69978c2ecf20Sopenharmony_cistatic int vmx_create_vcpu(struct kvm_vcpu *vcpu) 69988c2ecf20Sopenharmony_ci{ 69998c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx; 70008c2ecf20Sopenharmony_ci int i, cpu, err; 70018c2ecf20Sopenharmony_ci 70028c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); 70038c2ecf20Sopenharmony_ci vmx = to_vmx(vcpu); 70048c2ecf20Sopenharmony_ci 70058c2ecf20Sopenharmony_ci err = -ENOMEM; 70068c2ecf20Sopenharmony_ci 70078c2ecf20Sopenharmony_ci vmx->vpid = allocate_vpid(); 70088c2ecf20Sopenharmony_ci 70098c2ecf20Sopenharmony_ci /* 70108c2ecf20Sopenharmony_ci * If PML is turned on, failure on enabling PML just results in failure 70118c2ecf20Sopenharmony_ci * of creating the vcpu, therefore we can simplify PML logic (by 70128c2ecf20Sopenharmony_ci * avoiding dealing with cases, such as enabling PML partially on vcpus 70138c2ecf20Sopenharmony_ci * for the guest), etc. 70148c2ecf20Sopenharmony_ci */ 70158c2ecf20Sopenharmony_ci if (enable_pml) { 70168c2ecf20Sopenharmony_ci vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 70178c2ecf20Sopenharmony_ci if (!vmx->pml_pg) 70188c2ecf20Sopenharmony_ci goto free_vpid; 70198c2ecf20Sopenharmony_ci } 70208c2ecf20Sopenharmony_ci 70218c2ecf20Sopenharmony_ci BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); 70228c2ecf20Sopenharmony_ci 70238c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { 70248c2ecf20Sopenharmony_ci u32 index = vmx_uret_msrs_list[i]; 70258c2ecf20Sopenharmony_ci int j = vmx->nr_uret_msrs; 70268c2ecf20Sopenharmony_ci 70278c2ecf20Sopenharmony_ci if (kvm_probe_user_return_msr(index)) 70288c2ecf20Sopenharmony_ci continue; 70298c2ecf20Sopenharmony_ci 70308c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[j].slot = i; 70318c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[j].data = 0; 70328c2ecf20Sopenharmony_ci switch (index) { 70338c2ecf20Sopenharmony_ci case MSR_IA32_TSX_CTRL: 70348c2ecf20Sopenharmony_ci /* 70358c2ecf20Sopenharmony_ci * TSX_CTRL_CPUID_CLEAR is handled in the CPUID 70368c2ecf20Sopenharmony_ci * interception. Keep the host value unchanged to avoid 70378c2ecf20Sopenharmony_ci * changing CPUID bits under the host kernel's feet. 70388c2ecf20Sopenharmony_ci * 70398c2ecf20Sopenharmony_ci * hle=0, rtm=0, tsx_ctrl=1 can be found with some 70408c2ecf20Sopenharmony_ci * combinations of new kernel and old userspace. If 70418c2ecf20Sopenharmony_ci * those guests run on a tsx=off host, do allow guests 70428c2ecf20Sopenharmony_ci * to use TSX_CTRL, but do not change the value on the 70438c2ecf20Sopenharmony_ci * host so that TSX remains always disabled. 70448c2ecf20Sopenharmony_ci */ 70458c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_RTM)) 70468c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; 70478c2ecf20Sopenharmony_ci else 70488c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[j].mask = 0; 70498c2ecf20Sopenharmony_ci break; 70508c2ecf20Sopenharmony_ci default: 70518c2ecf20Sopenharmony_ci vmx->guest_uret_msrs[j].mask = -1ull; 70528c2ecf20Sopenharmony_ci break; 70538c2ecf20Sopenharmony_ci } 70548c2ecf20Sopenharmony_ci ++vmx->nr_uret_msrs; 70558c2ecf20Sopenharmony_ci } 70568c2ecf20Sopenharmony_ci 70578c2ecf20Sopenharmony_ci err = alloc_loaded_vmcs(&vmx->vmcs01); 70588c2ecf20Sopenharmony_ci if (err < 0) 70598c2ecf20Sopenharmony_ci goto free_pml; 70608c2ecf20Sopenharmony_ci 70618c2ecf20Sopenharmony_ci /* 70628c2ecf20Sopenharmony_ci * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a 70638c2ecf20Sopenharmony_ci * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the 70648c2ecf20Sopenharmony_ci * feature only for vmcs01, KVM currently isn't equipped to realize any 70658c2ecf20Sopenharmony_ci * performance benefits from enabling it for vmcs02. 70668c2ecf20Sopenharmony_ci */ 70678c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) && 70688c2ecf20Sopenharmony_ci (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) { 70698c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs; 70708c2ecf20Sopenharmony_ci 70718c2ecf20Sopenharmony_ci evmcs->hv_enlightenments_control.msr_bitmap = 1; 70728c2ecf20Sopenharmony_ci } 70738c2ecf20Sopenharmony_ci 70748c2ecf20Sopenharmony_ci /* The MSR bitmap starts with all ones */ 70758c2ecf20Sopenharmony_ci bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); 70768c2ecf20Sopenharmony_ci bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); 70778c2ecf20Sopenharmony_ci 70788c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); 70798c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 70808c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); 70818c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); 70828c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); 70838c2ecf20Sopenharmony_ci#endif 70848c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); 70858c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); 70868c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); 70878c2ecf20Sopenharmony_ci if (kvm_cstate_in_guest(vcpu->kvm)) { 70888c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); 70898c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); 70908c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); 70918c2ecf20Sopenharmony_ci vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); 70928c2ecf20Sopenharmony_ci } 70938c2ecf20Sopenharmony_ci vmx->msr_bitmap_mode = 0; 70948c2ecf20Sopenharmony_ci 70958c2ecf20Sopenharmony_ci vmx->loaded_vmcs = &vmx->vmcs01; 70968c2ecf20Sopenharmony_ci cpu = get_cpu(); 70978c2ecf20Sopenharmony_ci vmx_vcpu_load(vcpu, cpu); 70988c2ecf20Sopenharmony_ci vcpu->cpu = cpu; 70998c2ecf20Sopenharmony_ci init_vmcs(vmx); 71008c2ecf20Sopenharmony_ci vmx_vcpu_put(vcpu); 71018c2ecf20Sopenharmony_ci put_cpu(); 71028c2ecf20Sopenharmony_ci if (cpu_need_virtualize_apic_accesses(vcpu)) { 71038c2ecf20Sopenharmony_ci err = alloc_apic_access_page(vcpu->kvm); 71048c2ecf20Sopenharmony_ci if (err) 71058c2ecf20Sopenharmony_ci goto free_vmcs; 71068c2ecf20Sopenharmony_ci } 71078c2ecf20Sopenharmony_ci 71088c2ecf20Sopenharmony_ci if (enable_ept && !enable_unrestricted_guest) { 71098c2ecf20Sopenharmony_ci err = init_rmode_identity_map(vcpu->kvm); 71108c2ecf20Sopenharmony_ci if (err) 71118c2ecf20Sopenharmony_ci goto free_vmcs; 71128c2ecf20Sopenharmony_ci } 71138c2ecf20Sopenharmony_ci 71148c2ecf20Sopenharmony_ci if (nested) 71158c2ecf20Sopenharmony_ci memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs)); 71168c2ecf20Sopenharmony_ci else 71178c2ecf20Sopenharmony_ci memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); 71188c2ecf20Sopenharmony_ci 71198c2ecf20Sopenharmony_ci vmx->nested.posted_intr_nv = -1; 71208c2ecf20Sopenharmony_ci vmx->nested.current_vmptr = -1ull; 71218c2ecf20Sopenharmony_ci 71228c2ecf20Sopenharmony_ci vcpu->arch.microcode_version = 0x100000000ULL; 71238c2ecf20Sopenharmony_ci vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; 71248c2ecf20Sopenharmony_ci 71258c2ecf20Sopenharmony_ci /* 71268c2ecf20Sopenharmony_ci * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR 71278c2ecf20Sopenharmony_ci * or POSTED_INTR_WAKEUP_VECTOR. 71288c2ecf20Sopenharmony_ci */ 71298c2ecf20Sopenharmony_ci vmx->pi_desc.nv = POSTED_INTR_VECTOR; 71308c2ecf20Sopenharmony_ci vmx->pi_desc.sn = 1; 71318c2ecf20Sopenharmony_ci 71328c2ecf20Sopenharmony_ci vmx->ept_pointer = INVALID_PAGE; 71338c2ecf20Sopenharmony_ci 71348c2ecf20Sopenharmony_ci return 0; 71358c2ecf20Sopenharmony_ci 71368c2ecf20Sopenharmony_cifree_vmcs: 71378c2ecf20Sopenharmony_ci free_loaded_vmcs(vmx->loaded_vmcs); 71388c2ecf20Sopenharmony_cifree_pml: 71398c2ecf20Sopenharmony_ci vmx_destroy_pml_buffer(vmx); 71408c2ecf20Sopenharmony_cifree_vpid: 71418c2ecf20Sopenharmony_ci free_vpid(vmx->vpid); 71428c2ecf20Sopenharmony_ci return err; 71438c2ecf20Sopenharmony_ci} 71448c2ecf20Sopenharmony_ci 71458c2ecf20Sopenharmony_ci#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" 71468c2ecf20Sopenharmony_ci#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" 71478c2ecf20Sopenharmony_ci 71488c2ecf20Sopenharmony_cistatic int vmx_vm_init(struct kvm *kvm) 71498c2ecf20Sopenharmony_ci{ 71508c2ecf20Sopenharmony_ci spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); 71518c2ecf20Sopenharmony_ci 71528c2ecf20Sopenharmony_ci if (!ple_gap) 71538c2ecf20Sopenharmony_ci kvm->arch.pause_in_guest = true; 71548c2ecf20Sopenharmony_ci 71558c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { 71568c2ecf20Sopenharmony_ci switch (l1tf_mitigation) { 71578c2ecf20Sopenharmony_ci case L1TF_MITIGATION_OFF: 71588c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOWARN: 71598c2ecf20Sopenharmony_ci /* 'I explicitly don't care' is set */ 71608c2ecf20Sopenharmony_ci break; 71618c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FLUSH: 71628c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FLUSH_NOSMT: 71638c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FULL: 71648c2ecf20Sopenharmony_ci /* 71658c2ecf20Sopenharmony_ci * Warn upon starting the first VM in a potentially 71668c2ecf20Sopenharmony_ci * insecure environment. 71678c2ecf20Sopenharmony_ci */ 71688c2ecf20Sopenharmony_ci if (sched_smt_active()) 71698c2ecf20Sopenharmony_ci pr_warn_once(L1TF_MSG_SMT); 71708c2ecf20Sopenharmony_ci if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) 71718c2ecf20Sopenharmony_ci pr_warn_once(L1TF_MSG_L1D); 71728c2ecf20Sopenharmony_ci break; 71738c2ecf20Sopenharmony_ci case L1TF_MITIGATION_FULL_FORCE: 71748c2ecf20Sopenharmony_ci /* Flush is enforced */ 71758c2ecf20Sopenharmony_ci break; 71768c2ecf20Sopenharmony_ci } 71778c2ecf20Sopenharmony_ci } 71788c2ecf20Sopenharmony_ci kvm_apicv_init(kvm, enable_apicv); 71798c2ecf20Sopenharmony_ci return 0; 71808c2ecf20Sopenharmony_ci} 71818c2ecf20Sopenharmony_ci 71828c2ecf20Sopenharmony_cistatic int __init vmx_check_processor_compat(void) 71838c2ecf20Sopenharmony_ci{ 71848c2ecf20Sopenharmony_ci struct vmcs_config vmcs_conf; 71858c2ecf20Sopenharmony_ci struct vmx_capability vmx_cap; 71868c2ecf20Sopenharmony_ci 71878c2ecf20Sopenharmony_ci if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || 71888c2ecf20Sopenharmony_ci !this_cpu_has(X86_FEATURE_VMX)) { 71898c2ecf20Sopenharmony_ci pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id()); 71908c2ecf20Sopenharmony_ci return -EIO; 71918c2ecf20Sopenharmony_ci } 71928c2ecf20Sopenharmony_ci 71938c2ecf20Sopenharmony_ci if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) 71948c2ecf20Sopenharmony_ci return -EIO; 71958c2ecf20Sopenharmony_ci if (nested) 71968c2ecf20Sopenharmony_ci nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); 71978c2ecf20Sopenharmony_ci if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { 71988c2ecf20Sopenharmony_ci printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", 71998c2ecf20Sopenharmony_ci smp_processor_id()); 72008c2ecf20Sopenharmony_ci return -EIO; 72018c2ecf20Sopenharmony_ci } 72028c2ecf20Sopenharmony_ci return 0; 72038c2ecf20Sopenharmony_ci} 72048c2ecf20Sopenharmony_ci 72058c2ecf20Sopenharmony_cistatic u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) 72068c2ecf20Sopenharmony_ci{ 72078c2ecf20Sopenharmony_ci u8 cache; 72088c2ecf20Sopenharmony_ci u64 ipat = 0; 72098c2ecf20Sopenharmony_ci 72108c2ecf20Sopenharmony_ci /* We wanted to honor guest CD/MTRR/PAT, but doing so could result in 72118c2ecf20Sopenharmony_ci * memory aliases with conflicting memory types and sometimes MCEs. 72128c2ecf20Sopenharmony_ci * We have to be careful as to what are honored and when. 72138c2ecf20Sopenharmony_ci * 72148c2ecf20Sopenharmony_ci * For MMIO, guest CD/MTRR are ignored. The EPT memory type is set to 72158c2ecf20Sopenharmony_ci * UC. The effective memory type is UC or WC depending on guest PAT. 72168c2ecf20Sopenharmony_ci * This was historically the source of MCEs and we want to be 72178c2ecf20Sopenharmony_ci * conservative. 72188c2ecf20Sopenharmony_ci * 72198c2ecf20Sopenharmony_ci * When there is no need to deal with noncoherent DMA (e.g., no VT-d 72208c2ecf20Sopenharmony_ci * or VT-d has snoop control), guest CD/MTRR/PAT are all ignored. The 72218c2ecf20Sopenharmony_ci * EPT memory type is set to WB. The effective memory type is forced 72228c2ecf20Sopenharmony_ci * WB. 72238c2ecf20Sopenharmony_ci * 72248c2ecf20Sopenharmony_ci * Otherwise, we trust guest. Guest CD/MTRR/PAT are all honored. The 72258c2ecf20Sopenharmony_ci * EPT memory type is used to emulate guest CD/MTRR. 72268c2ecf20Sopenharmony_ci */ 72278c2ecf20Sopenharmony_ci 72288c2ecf20Sopenharmony_ci if (is_mmio) { 72298c2ecf20Sopenharmony_ci cache = MTRR_TYPE_UNCACHABLE; 72308c2ecf20Sopenharmony_ci goto exit; 72318c2ecf20Sopenharmony_ci } 72328c2ecf20Sopenharmony_ci 72338c2ecf20Sopenharmony_ci if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { 72348c2ecf20Sopenharmony_ci ipat = VMX_EPT_IPAT_BIT; 72358c2ecf20Sopenharmony_ci cache = MTRR_TYPE_WRBACK; 72368c2ecf20Sopenharmony_ci goto exit; 72378c2ecf20Sopenharmony_ci } 72388c2ecf20Sopenharmony_ci 72398c2ecf20Sopenharmony_ci if (kvm_read_cr0(vcpu) & X86_CR0_CD) { 72408c2ecf20Sopenharmony_ci ipat = VMX_EPT_IPAT_BIT; 72418c2ecf20Sopenharmony_ci if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) 72428c2ecf20Sopenharmony_ci cache = MTRR_TYPE_WRBACK; 72438c2ecf20Sopenharmony_ci else 72448c2ecf20Sopenharmony_ci cache = MTRR_TYPE_UNCACHABLE; 72458c2ecf20Sopenharmony_ci goto exit; 72468c2ecf20Sopenharmony_ci } 72478c2ecf20Sopenharmony_ci 72488c2ecf20Sopenharmony_ci cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); 72498c2ecf20Sopenharmony_ci 72508c2ecf20Sopenharmony_ciexit: 72518c2ecf20Sopenharmony_ci return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; 72528c2ecf20Sopenharmony_ci} 72538c2ecf20Sopenharmony_ci 72548c2ecf20Sopenharmony_cistatic void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) 72558c2ecf20Sopenharmony_ci{ 72568c2ecf20Sopenharmony_ci /* 72578c2ecf20Sopenharmony_ci * These bits in the secondary execution controls field 72588c2ecf20Sopenharmony_ci * are dynamic, the others are mostly based on the hypervisor 72598c2ecf20Sopenharmony_ci * architecture and the guest's CPUID. Do not touch the 72608c2ecf20Sopenharmony_ci * dynamic bits. 72618c2ecf20Sopenharmony_ci */ 72628c2ecf20Sopenharmony_ci u32 mask = 72638c2ecf20Sopenharmony_ci SECONDARY_EXEC_SHADOW_VMCS | 72648c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 72658c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 72668c2ecf20Sopenharmony_ci SECONDARY_EXEC_DESC; 72678c2ecf20Sopenharmony_ci 72688c2ecf20Sopenharmony_ci u32 new_ctl = vmx->secondary_exec_control; 72698c2ecf20Sopenharmony_ci u32 cur_ctl = secondary_exec_controls_get(vmx); 72708c2ecf20Sopenharmony_ci 72718c2ecf20Sopenharmony_ci secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); 72728c2ecf20Sopenharmony_ci} 72738c2ecf20Sopenharmony_ci 72748c2ecf20Sopenharmony_ci/* 72758c2ecf20Sopenharmony_ci * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits 72768c2ecf20Sopenharmony_ci * (indicating "allowed-1") if they are supported in the guest's CPUID. 72778c2ecf20Sopenharmony_ci */ 72788c2ecf20Sopenharmony_cistatic void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) 72798c2ecf20Sopenharmony_ci{ 72808c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 72818c2ecf20Sopenharmony_ci struct kvm_cpuid_entry2 *entry; 72828c2ecf20Sopenharmony_ci 72838c2ecf20Sopenharmony_ci vmx->nested.msrs.cr0_fixed1 = 0xffffffff; 72848c2ecf20Sopenharmony_ci vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; 72858c2ecf20Sopenharmony_ci 72868c2ecf20Sopenharmony_ci#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ 72878c2ecf20Sopenharmony_ci if (entry && (entry->_reg & (_cpuid_mask))) \ 72888c2ecf20Sopenharmony_ci vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ 72898c2ecf20Sopenharmony_ci} while (0) 72908c2ecf20Sopenharmony_ci 72918c2ecf20Sopenharmony_ci entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); 72928c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME)); 72938c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME)); 72948c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC)); 72958c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE)); 72968c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE)); 72978c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE)); 72988c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE)); 72998c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE)); 73008c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR)); 73018c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM)); 73028c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX)); 73038c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX)); 73048c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID)); 73058c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE)); 73068c2ecf20Sopenharmony_ci 73078c2ecf20Sopenharmony_ci entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); 73088c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE)); 73098c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP)); 73108c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP)); 73118c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); 73128c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); 73138c2ecf20Sopenharmony_ci cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); 73148c2ecf20Sopenharmony_ci 73158c2ecf20Sopenharmony_ci#undef cr4_fixed1_update 73168c2ecf20Sopenharmony_ci} 73178c2ecf20Sopenharmony_ci 73188c2ecf20Sopenharmony_cistatic void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) 73198c2ecf20Sopenharmony_ci{ 73208c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 73218c2ecf20Sopenharmony_ci 73228c2ecf20Sopenharmony_ci if (kvm_mpx_supported()) { 73238c2ecf20Sopenharmony_ci bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); 73248c2ecf20Sopenharmony_ci 73258c2ecf20Sopenharmony_ci if (mpx_enabled) { 73268c2ecf20Sopenharmony_ci vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; 73278c2ecf20Sopenharmony_ci vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 73288c2ecf20Sopenharmony_ci } else { 73298c2ecf20Sopenharmony_ci vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; 73308c2ecf20Sopenharmony_ci vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; 73318c2ecf20Sopenharmony_ci } 73328c2ecf20Sopenharmony_ci } 73338c2ecf20Sopenharmony_ci} 73348c2ecf20Sopenharmony_ci 73358c2ecf20Sopenharmony_cistatic void update_intel_pt_cfg(struct kvm_vcpu *vcpu) 73368c2ecf20Sopenharmony_ci{ 73378c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 73388c2ecf20Sopenharmony_ci struct kvm_cpuid_entry2 *best = NULL; 73398c2ecf20Sopenharmony_ci int i; 73408c2ecf20Sopenharmony_ci 73418c2ecf20Sopenharmony_ci for (i = 0; i < PT_CPUID_LEAVES; i++) { 73428c2ecf20Sopenharmony_ci best = kvm_find_cpuid_entry(vcpu, 0x14, i); 73438c2ecf20Sopenharmony_ci if (!best) 73448c2ecf20Sopenharmony_ci return; 73458c2ecf20Sopenharmony_ci vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax; 73468c2ecf20Sopenharmony_ci vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx; 73478c2ecf20Sopenharmony_ci vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx; 73488c2ecf20Sopenharmony_ci vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx; 73498c2ecf20Sopenharmony_ci } 73508c2ecf20Sopenharmony_ci 73518c2ecf20Sopenharmony_ci /* Get the number of configurable Address Ranges for filtering */ 73528c2ecf20Sopenharmony_ci vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps, 73538c2ecf20Sopenharmony_ci PT_CAP_num_address_ranges); 73548c2ecf20Sopenharmony_ci 73558c2ecf20Sopenharmony_ci /* Initialize and clear the no dependency bits */ 73568c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS | 73578c2ecf20Sopenharmony_ci RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC); 73588c2ecf20Sopenharmony_ci 73598c2ecf20Sopenharmony_ci /* 73608c2ecf20Sopenharmony_ci * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise 73618c2ecf20Sopenharmony_ci * will inject an #GP 73628c2ecf20Sopenharmony_ci */ 73638c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering)) 73648c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN; 73658c2ecf20Sopenharmony_ci 73668c2ecf20Sopenharmony_ci /* 73678c2ecf20Sopenharmony_ci * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and 73688c2ecf20Sopenharmony_ci * PSBFreq can be set 73698c2ecf20Sopenharmony_ci */ 73708c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc)) 73718c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC | 73728c2ecf20Sopenharmony_ci RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ); 73738c2ecf20Sopenharmony_ci 73748c2ecf20Sopenharmony_ci /* 73758c2ecf20Sopenharmony_ci * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and 73768c2ecf20Sopenharmony_ci * MTCFreq can be set 73778c2ecf20Sopenharmony_ci */ 73788c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc)) 73798c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN | 73808c2ecf20Sopenharmony_ci RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE); 73818c2ecf20Sopenharmony_ci 73828c2ecf20Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */ 73838c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite)) 73848c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW | 73858c2ecf20Sopenharmony_ci RTIT_CTL_PTW_EN); 73868c2ecf20Sopenharmony_ci 73878c2ecf20Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */ 73888c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace)) 73898c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN; 73908c2ecf20Sopenharmony_ci 73918c2ecf20Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */ 73928c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output)) 73938c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA; 73948c2ecf20Sopenharmony_ci 73958c2ecf20Sopenharmony_ci /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */ 73968c2ecf20Sopenharmony_ci if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys)) 73978c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN; 73988c2ecf20Sopenharmony_ci 73998c2ecf20Sopenharmony_ci /* unmask address range configure area */ 74008c2ecf20Sopenharmony_ci for (i = 0; i < vmx->pt_desc.addr_range; i++) 74018c2ecf20Sopenharmony_ci vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); 74028c2ecf20Sopenharmony_ci} 74038c2ecf20Sopenharmony_ci 74048c2ecf20Sopenharmony_cistatic void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) 74058c2ecf20Sopenharmony_ci{ 74068c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 74078c2ecf20Sopenharmony_ci 74088c2ecf20Sopenharmony_ci /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ 74098c2ecf20Sopenharmony_ci vcpu->arch.xsaves_enabled = false; 74108c2ecf20Sopenharmony_ci 74118c2ecf20Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) { 74128c2ecf20Sopenharmony_ci vmx_compute_secondary_exec_control(vmx); 74138c2ecf20Sopenharmony_ci vmcs_set_secondary_exec_control(vmx); 74148c2ecf20Sopenharmony_ci } 74158c2ecf20Sopenharmony_ci 74168c2ecf20Sopenharmony_ci if (nested_vmx_allowed(vcpu)) 74178c2ecf20Sopenharmony_ci to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= 74188c2ecf20Sopenharmony_ci FEAT_CTL_VMX_ENABLED_INSIDE_SMX | 74198c2ecf20Sopenharmony_ci FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; 74208c2ecf20Sopenharmony_ci else 74218c2ecf20Sopenharmony_ci to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= 74228c2ecf20Sopenharmony_ci ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | 74238c2ecf20Sopenharmony_ci FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); 74248c2ecf20Sopenharmony_ci 74258c2ecf20Sopenharmony_ci if (nested_vmx_allowed(vcpu)) { 74268c2ecf20Sopenharmony_ci nested_vmx_cr_fixed1_bits_update(vcpu); 74278c2ecf20Sopenharmony_ci nested_vmx_entry_exit_ctls_update(vcpu); 74288c2ecf20Sopenharmony_ci } 74298c2ecf20Sopenharmony_ci 74308c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_INTEL_PT) && 74318c2ecf20Sopenharmony_ci guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) 74328c2ecf20Sopenharmony_ci update_intel_pt_cfg(vcpu); 74338c2ecf20Sopenharmony_ci 74348c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_RTM)) { 74358c2ecf20Sopenharmony_ci struct vmx_uret_msr *msr; 74368c2ecf20Sopenharmony_ci msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); 74378c2ecf20Sopenharmony_ci if (msr) { 74388c2ecf20Sopenharmony_ci bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); 74398c2ecf20Sopenharmony_ci vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); 74408c2ecf20Sopenharmony_ci } 74418c2ecf20Sopenharmony_ci } 74428c2ecf20Sopenharmony_ci 74438c2ecf20Sopenharmony_ci set_cr4_guest_host_mask(vmx); 74448c2ecf20Sopenharmony_ci 74458c2ecf20Sopenharmony_ci /* Refresh #PF interception to account for MAXPHYADDR changes. */ 74468c2ecf20Sopenharmony_ci update_exception_bitmap(vcpu); 74478c2ecf20Sopenharmony_ci} 74488c2ecf20Sopenharmony_ci 74498c2ecf20Sopenharmony_cistatic __init void vmx_set_cpu_caps(void) 74508c2ecf20Sopenharmony_ci{ 74518c2ecf20Sopenharmony_ci kvm_set_cpu_caps(); 74528c2ecf20Sopenharmony_ci 74538c2ecf20Sopenharmony_ci /* CPUID 0x1 */ 74548c2ecf20Sopenharmony_ci if (nested) 74558c2ecf20Sopenharmony_ci kvm_cpu_cap_set(X86_FEATURE_VMX); 74568c2ecf20Sopenharmony_ci 74578c2ecf20Sopenharmony_ci /* CPUID 0x7 */ 74588c2ecf20Sopenharmony_ci if (kvm_mpx_supported()) 74598c2ecf20Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_MPX); 74608c2ecf20Sopenharmony_ci if (cpu_has_vmx_invpcid()) 74618c2ecf20Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID); 74628c2ecf20Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) 74638c2ecf20Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT); 74648c2ecf20Sopenharmony_ci 74658c2ecf20Sopenharmony_ci if (vmx_umip_emulated()) 74668c2ecf20Sopenharmony_ci kvm_cpu_cap_set(X86_FEATURE_UMIP); 74678c2ecf20Sopenharmony_ci 74688c2ecf20Sopenharmony_ci /* CPUID 0xD.1 */ 74698c2ecf20Sopenharmony_ci supported_xss = 0; 74708c2ecf20Sopenharmony_ci if (!cpu_has_vmx_xsaves()) 74718c2ecf20Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_XSAVES); 74728c2ecf20Sopenharmony_ci 74738c2ecf20Sopenharmony_ci /* CPUID 0x80000001 and 0x7 (RDPID) */ 74748c2ecf20Sopenharmony_ci if (!cpu_has_vmx_rdtscp()) { 74758c2ecf20Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); 74768c2ecf20Sopenharmony_ci kvm_cpu_cap_clear(X86_FEATURE_RDPID); 74778c2ecf20Sopenharmony_ci } 74788c2ecf20Sopenharmony_ci 74798c2ecf20Sopenharmony_ci if (cpu_has_vmx_waitpkg()) 74808c2ecf20Sopenharmony_ci kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); 74818c2ecf20Sopenharmony_ci} 74828c2ecf20Sopenharmony_ci 74838c2ecf20Sopenharmony_cistatic void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) 74848c2ecf20Sopenharmony_ci{ 74858c2ecf20Sopenharmony_ci to_vmx(vcpu)->req_immediate_exit = true; 74868c2ecf20Sopenharmony_ci} 74878c2ecf20Sopenharmony_ci 74888c2ecf20Sopenharmony_cistatic int vmx_check_intercept_io(struct kvm_vcpu *vcpu, 74898c2ecf20Sopenharmony_ci struct x86_instruction_info *info) 74908c2ecf20Sopenharmony_ci{ 74918c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 74928c2ecf20Sopenharmony_ci unsigned short port; 74938c2ecf20Sopenharmony_ci bool intercept; 74948c2ecf20Sopenharmony_ci int size; 74958c2ecf20Sopenharmony_ci 74968c2ecf20Sopenharmony_ci if (info->intercept == x86_intercept_in || 74978c2ecf20Sopenharmony_ci info->intercept == x86_intercept_ins) { 74988c2ecf20Sopenharmony_ci port = info->src_val; 74998c2ecf20Sopenharmony_ci size = info->dst_bytes; 75008c2ecf20Sopenharmony_ci } else { 75018c2ecf20Sopenharmony_ci port = info->dst_val; 75028c2ecf20Sopenharmony_ci size = info->src_bytes; 75038c2ecf20Sopenharmony_ci } 75048c2ecf20Sopenharmony_ci 75058c2ecf20Sopenharmony_ci /* 75068c2ecf20Sopenharmony_ci * If the 'use IO bitmaps' VM-execution control is 0, IO instruction 75078c2ecf20Sopenharmony_ci * VM-exits depend on the 'unconditional IO exiting' VM-execution 75088c2ecf20Sopenharmony_ci * control. 75098c2ecf20Sopenharmony_ci * 75108c2ecf20Sopenharmony_ci * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. 75118c2ecf20Sopenharmony_ci */ 75128c2ecf20Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 75138c2ecf20Sopenharmony_ci intercept = nested_cpu_has(vmcs12, 75148c2ecf20Sopenharmony_ci CPU_BASED_UNCOND_IO_EXITING); 75158c2ecf20Sopenharmony_ci else 75168c2ecf20Sopenharmony_ci intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); 75178c2ecf20Sopenharmony_ci 75188c2ecf20Sopenharmony_ci /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ 75198c2ecf20Sopenharmony_ci return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; 75208c2ecf20Sopenharmony_ci} 75218c2ecf20Sopenharmony_ci 75228c2ecf20Sopenharmony_cistatic int vmx_check_intercept(struct kvm_vcpu *vcpu, 75238c2ecf20Sopenharmony_ci struct x86_instruction_info *info, 75248c2ecf20Sopenharmony_ci enum x86_intercept_stage stage, 75258c2ecf20Sopenharmony_ci struct x86_exception *exception) 75268c2ecf20Sopenharmony_ci{ 75278c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 75288c2ecf20Sopenharmony_ci 75298c2ecf20Sopenharmony_ci switch (info->intercept) { 75308c2ecf20Sopenharmony_ci /* 75318c2ecf20Sopenharmony_ci * RDPID causes #UD if disabled through secondary execution controls. 75328c2ecf20Sopenharmony_ci * Because it is marked as EmulateOnUD, we need to intercept it here. 75338c2ecf20Sopenharmony_ci * Note, RDPID is hidden behind ENABLE_RDTSCP. 75348c2ecf20Sopenharmony_ci */ 75358c2ecf20Sopenharmony_ci case x86_intercept_rdpid: 75368c2ecf20Sopenharmony_ci if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) { 75378c2ecf20Sopenharmony_ci exception->vector = UD_VECTOR; 75388c2ecf20Sopenharmony_ci exception->error_code_valid = false; 75398c2ecf20Sopenharmony_ci return X86EMUL_PROPAGATE_FAULT; 75408c2ecf20Sopenharmony_ci } 75418c2ecf20Sopenharmony_ci break; 75428c2ecf20Sopenharmony_ci 75438c2ecf20Sopenharmony_ci case x86_intercept_in: 75448c2ecf20Sopenharmony_ci case x86_intercept_ins: 75458c2ecf20Sopenharmony_ci case x86_intercept_out: 75468c2ecf20Sopenharmony_ci case x86_intercept_outs: 75478c2ecf20Sopenharmony_ci return vmx_check_intercept_io(vcpu, info); 75488c2ecf20Sopenharmony_ci 75498c2ecf20Sopenharmony_ci case x86_intercept_lgdt: 75508c2ecf20Sopenharmony_ci case x86_intercept_lidt: 75518c2ecf20Sopenharmony_ci case x86_intercept_lldt: 75528c2ecf20Sopenharmony_ci case x86_intercept_ltr: 75538c2ecf20Sopenharmony_ci case x86_intercept_sgdt: 75548c2ecf20Sopenharmony_ci case x86_intercept_sidt: 75558c2ecf20Sopenharmony_ci case x86_intercept_sldt: 75568c2ecf20Sopenharmony_ci case x86_intercept_str: 75578c2ecf20Sopenharmony_ci if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) 75588c2ecf20Sopenharmony_ci return X86EMUL_CONTINUE; 75598c2ecf20Sopenharmony_ci 75608c2ecf20Sopenharmony_ci /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ 75618c2ecf20Sopenharmony_ci break; 75628c2ecf20Sopenharmony_ci 75638c2ecf20Sopenharmony_ci case x86_intercept_pause: 75648c2ecf20Sopenharmony_ci /* 75658c2ecf20Sopenharmony_ci * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides 75668c2ecf20Sopenharmony_ci * with vanilla NOPs in the emulator. Apply the interception 75678c2ecf20Sopenharmony_ci * check only to actual PAUSE instructions. Don't check 75688c2ecf20Sopenharmony_ci * PAUSE-loop-exiting, software can't expect a given PAUSE to 75698c2ecf20Sopenharmony_ci * exit, i.e. KVM is within its rights to allow L2 to execute 75708c2ecf20Sopenharmony_ci * the PAUSE. 75718c2ecf20Sopenharmony_ci */ 75728c2ecf20Sopenharmony_ci if ((info->rep_prefix != REPE_PREFIX) || 75738c2ecf20Sopenharmony_ci !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING)) 75748c2ecf20Sopenharmony_ci return X86EMUL_CONTINUE; 75758c2ecf20Sopenharmony_ci 75768c2ecf20Sopenharmony_ci break; 75778c2ecf20Sopenharmony_ci 75788c2ecf20Sopenharmony_ci /* TODO: check more intercepts... */ 75798c2ecf20Sopenharmony_ci default: 75808c2ecf20Sopenharmony_ci break; 75818c2ecf20Sopenharmony_ci } 75828c2ecf20Sopenharmony_ci 75838c2ecf20Sopenharmony_ci return X86EMUL_UNHANDLEABLE; 75848c2ecf20Sopenharmony_ci} 75858c2ecf20Sopenharmony_ci 75868c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 75878c2ecf20Sopenharmony_ci/* (a << shift) / divisor, return 1 if overflow otherwise 0 */ 75888c2ecf20Sopenharmony_cistatic inline int u64_shl_div_u64(u64 a, unsigned int shift, 75898c2ecf20Sopenharmony_ci u64 divisor, u64 *result) 75908c2ecf20Sopenharmony_ci{ 75918c2ecf20Sopenharmony_ci u64 low = a << shift, high = a >> (64 - shift); 75928c2ecf20Sopenharmony_ci 75938c2ecf20Sopenharmony_ci /* To avoid the overflow on divq */ 75948c2ecf20Sopenharmony_ci if (high >= divisor) 75958c2ecf20Sopenharmony_ci return 1; 75968c2ecf20Sopenharmony_ci 75978c2ecf20Sopenharmony_ci /* Low hold the result, high hold rem which is discarded */ 75988c2ecf20Sopenharmony_ci asm("divq %2\n\t" : "=a" (low), "=d" (high) : 75998c2ecf20Sopenharmony_ci "rm" (divisor), "0" (low), "1" (high)); 76008c2ecf20Sopenharmony_ci *result = low; 76018c2ecf20Sopenharmony_ci 76028c2ecf20Sopenharmony_ci return 0; 76038c2ecf20Sopenharmony_ci} 76048c2ecf20Sopenharmony_ci 76058c2ecf20Sopenharmony_cistatic int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, 76068c2ecf20Sopenharmony_ci bool *expired) 76078c2ecf20Sopenharmony_ci{ 76088c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx; 76098c2ecf20Sopenharmony_ci u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; 76108c2ecf20Sopenharmony_ci struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; 76118c2ecf20Sopenharmony_ci 76128c2ecf20Sopenharmony_ci vmx = to_vmx(vcpu); 76138c2ecf20Sopenharmony_ci tscl = rdtsc(); 76148c2ecf20Sopenharmony_ci guest_tscl = kvm_read_l1_tsc(vcpu, tscl); 76158c2ecf20Sopenharmony_ci delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; 76168c2ecf20Sopenharmony_ci lapic_timer_advance_cycles = nsec_to_cycles(vcpu, 76178c2ecf20Sopenharmony_ci ktimer->timer_advance_ns); 76188c2ecf20Sopenharmony_ci 76198c2ecf20Sopenharmony_ci if (delta_tsc > lapic_timer_advance_cycles) 76208c2ecf20Sopenharmony_ci delta_tsc -= lapic_timer_advance_cycles; 76218c2ecf20Sopenharmony_ci else 76228c2ecf20Sopenharmony_ci delta_tsc = 0; 76238c2ecf20Sopenharmony_ci 76248c2ecf20Sopenharmony_ci /* Convert to host delta tsc if tsc scaling is enabled */ 76258c2ecf20Sopenharmony_ci if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && 76268c2ecf20Sopenharmony_ci delta_tsc && u64_shl_div_u64(delta_tsc, 76278c2ecf20Sopenharmony_ci kvm_tsc_scaling_ratio_frac_bits, 76288c2ecf20Sopenharmony_ci vcpu->arch.tsc_scaling_ratio, &delta_tsc)) 76298c2ecf20Sopenharmony_ci return -ERANGE; 76308c2ecf20Sopenharmony_ci 76318c2ecf20Sopenharmony_ci /* 76328c2ecf20Sopenharmony_ci * If the delta tsc can't fit in the 32 bit after the multi shift, 76338c2ecf20Sopenharmony_ci * we can't use the preemption timer. 76348c2ecf20Sopenharmony_ci * It's possible that it fits on later vmentries, but checking 76358c2ecf20Sopenharmony_ci * on every vmentry is costly so we just use an hrtimer. 76368c2ecf20Sopenharmony_ci */ 76378c2ecf20Sopenharmony_ci if (delta_tsc >> (cpu_preemption_timer_multi + 32)) 76388c2ecf20Sopenharmony_ci return -ERANGE; 76398c2ecf20Sopenharmony_ci 76408c2ecf20Sopenharmony_ci vmx->hv_deadline_tsc = tscl + delta_tsc; 76418c2ecf20Sopenharmony_ci *expired = !delta_tsc; 76428c2ecf20Sopenharmony_ci return 0; 76438c2ecf20Sopenharmony_ci} 76448c2ecf20Sopenharmony_ci 76458c2ecf20Sopenharmony_cistatic void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) 76468c2ecf20Sopenharmony_ci{ 76478c2ecf20Sopenharmony_ci to_vmx(vcpu)->hv_deadline_tsc = -1; 76488c2ecf20Sopenharmony_ci} 76498c2ecf20Sopenharmony_ci#endif 76508c2ecf20Sopenharmony_ci 76518c2ecf20Sopenharmony_cistatic void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) 76528c2ecf20Sopenharmony_ci{ 76538c2ecf20Sopenharmony_ci if (!kvm_pause_in_guest(vcpu->kvm)) 76548c2ecf20Sopenharmony_ci shrink_ple_window(vcpu); 76558c2ecf20Sopenharmony_ci} 76568c2ecf20Sopenharmony_ci 76578c2ecf20Sopenharmony_cistatic void vmx_slot_enable_log_dirty(struct kvm *kvm, 76588c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot) 76598c2ecf20Sopenharmony_ci{ 76608c2ecf20Sopenharmony_ci if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) 76618c2ecf20Sopenharmony_ci kvm_mmu_slot_leaf_clear_dirty(kvm, slot); 76628c2ecf20Sopenharmony_ci kvm_mmu_slot_largepage_remove_write_access(kvm, slot); 76638c2ecf20Sopenharmony_ci} 76648c2ecf20Sopenharmony_ci 76658c2ecf20Sopenharmony_cistatic void vmx_slot_disable_log_dirty(struct kvm *kvm, 76668c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot) 76678c2ecf20Sopenharmony_ci{ 76688c2ecf20Sopenharmony_ci kvm_mmu_slot_set_dirty(kvm, slot); 76698c2ecf20Sopenharmony_ci} 76708c2ecf20Sopenharmony_ci 76718c2ecf20Sopenharmony_cistatic void vmx_flush_log_dirty(struct kvm *kvm) 76728c2ecf20Sopenharmony_ci{ 76738c2ecf20Sopenharmony_ci kvm_flush_pml_buffers(kvm); 76748c2ecf20Sopenharmony_ci} 76758c2ecf20Sopenharmony_ci 76768c2ecf20Sopenharmony_cistatic void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, 76778c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot, 76788c2ecf20Sopenharmony_ci gfn_t offset, unsigned long mask) 76798c2ecf20Sopenharmony_ci{ 76808c2ecf20Sopenharmony_ci kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); 76818c2ecf20Sopenharmony_ci} 76828c2ecf20Sopenharmony_ci 76838c2ecf20Sopenharmony_cistatic int vmx_pre_block(struct kvm_vcpu *vcpu) 76848c2ecf20Sopenharmony_ci{ 76858c2ecf20Sopenharmony_ci if (pi_pre_block(vcpu)) 76868c2ecf20Sopenharmony_ci return 1; 76878c2ecf20Sopenharmony_ci 76888c2ecf20Sopenharmony_ci if (kvm_lapic_hv_timer_in_use(vcpu)) 76898c2ecf20Sopenharmony_ci kvm_lapic_switch_to_sw_timer(vcpu); 76908c2ecf20Sopenharmony_ci 76918c2ecf20Sopenharmony_ci return 0; 76928c2ecf20Sopenharmony_ci} 76938c2ecf20Sopenharmony_ci 76948c2ecf20Sopenharmony_cistatic void vmx_post_block(struct kvm_vcpu *vcpu) 76958c2ecf20Sopenharmony_ci{ 76968c2ecf20Sopenharmony_ci if (kvm_x86_ops.set_hv_timer) 76978c2ecf20Sopenharmony_ci kvm_lapic_switch_to_hv_timer(vcpu); 76988c2ecf20Sopenharmony_ci 76998c2ecf20Sopenharmony_ci pi_post_block(vcpu); 77008c2ecf20Sopenharmony_ci} 77018c2ecf20Sopenharmony_ci 77028c2ecf20Sopenharmony_cistatic void vmx_setup_mce(struct kvm_vcpu *vcpu) 77038c2ecf20Sopenharmony_ci{ 77048c2ecf20Sopenharmony_ci if (vcpu->arch.mcg_cap & MCG_LMCE_P) 77058c2ecf20Sopenharmony_ci to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= 77068c2ecf20Sopenharmony_ci FEAT_CTL_LMCE_ENABLED; 77078c2ecf20Sopenharmony_ci else 77088c2ecf20Sopenharmony_ci to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= 77098c2ecf20Sopenharmony_ci ~FEAT_CTL_LMCE_ENABLED; 77108c2ecf20Sopenharmony_ci} 77118c2ecf20Sopenharmony_ci 77128c2ecf20Sopenharmony_cistatic int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 77138c2ecf20Sopenharmony_ci{ 77148c2ecf20Sopenharmony_ci /* we need a nested vmexit to enter SMM, postpone if run is pending */ 77158c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending) 77168c2ecf20Sopenharmony_ci return -EBUSY; 77178c2ecf20Sopenharmony_ci return !is_smm(vcpu); 77188c2ecf20Sopenharmony_ci} 77198c2ecf20Sopenharmony_ci 77208c2ecf20Sopenharmony_cistatic int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) 77218c2ecf20Sopenharmony_ci{ 77228c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 77238c2ecf20Sopenharmony_ci 77248c2ecf20Sopenharmony_ci vmx->nested.smm.guest_mode = is_guest_mode(vcpu); 77258c2ecf20Sopenharmony_ci if (vmx->nested.smm.guest_mode) 77268c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, -1, 0, 0); 77278c2ecf20Sopenharmony_ci 77288c2ecf20Sopenharmony_ci vmx->nested.smm.vmxon = vmx->nested.vmxon; 77298c2ecf20Sopenharmony_ci vmx->nested.vmxon = false; 77308c2ecf20Sopenharmony_ci vmx_clear_hlt(vcpu); 77318c2ecf20Sopenharmony_ci return 0; 77328c2ecf20Sopenharmony_ci} 77338c2ecf20Sopenharmony_ci 77348c2ecf20Sopenharmony_cistatic int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) 77358c2ecf20Sopenharmony_ci{ 77368c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 77378c2ecf20Sopenharmony_ci int ret; 77388c2ecf20Sopenharmony_ci 77398c2ecf20Sopenharmony_ci if (vmx->nested.smm.vmxon) { 77408c2ecf20Sopenharmony_ci vmx->nested.vmxon = true; 77418c2ecf20Sopenharmony_ci vmx->nested.smm.vmxon = false; 77428c2ecf20Sopenharmony_ci } 77438c2ecf20Sopenharmony_ci 77448c2ecf20Sopenharmony_ci if (vmx->nested.smm.guest_mode) { 77458c2ecf20Sopenharmony_ci ret = nested_vmx_enter_non_root_mode(vcpu, false); 77468c2ecf20Sopenharmony_ci if (ret) 77478c2ecf20Sopenharmony_ci return ret; 77488c2ecf20Sopenharmony_ci 77498c2ecf20Sopenharmony_ci vmx->nested.smm.guest_mode = false; 77508c2ecf20Sopenharmony_ci } 77518c2ecf20Sopenharmony_ci return 0; 77528c2ecf20Sopenharmony_ci} 77538c2ecf20Sopenharmony_ci 77548c2ecf20Sopenharmony_cistatic void enable_smi_window(struct kvm_vcpu *vcpu) 77558c2ecf20Sopenharmony_ci{ 77568c2ecf20Sopenharmony_ci /* RSM will cause a vmexit anyway. */ 77578c2ecf20Sopenharmony_ci} 77588c2ecf20Sopenharmony_ci 77598c2ecf20Sopenharmony_cistatic bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) 77608c2ecf20Sopenharmony_ci{ 77618c2ecf20Sopenharmony_ci return to_vmx(vcpu)->nested.vmxon; 77628c2ecf20Sopenharmony_ci} 77638c2ecf20Sopenharmony_ci 77648c2ecf20Sopenharmony_cistatic void vmx_migrate_timers(struct kvm_vcpu *vcpu) 77658c2ecf20Sopenharmony_ci{ 77668c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 77678c2ecf20Sopenharmony_ci struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer; 77688c2ecf20Sopenharmony_ci 77698c2ecf20Sopenharmony_ci if (hrtimer_try_to_cancel(timer) == 1) 77708c2ecf20Sopenharmony_ci hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); 77718c2ecf20Sopenharmony_ci } 77728c2ecf20Sopenharmony_ci} 77738c2ecf20Sopenharmony_ci 77748c2ecf20Sopenharmony_cistatic void hardware_unsetup(void) 77758c2ecf20Sopenharmony_ci{ 77768c2ecf20Sopenharmony_ci kvm_set_posted_intr_wakeup_handler(NULL); 77778c2ecf20Sopenharmony_ci 77788c2ecf20Sopenharmony_ci if (nested) 77798c2ecf20Sopenharmony_ci nested_vmx_hardware_unsetup(); 77808c2ecf20Sopenharmony_ci 77818c2ecf20Sopenharmony_ci free_kvm_area(); 77828c2ecf20Sopenharmony_ci} 77838c2ecf20Sopenharmony_ci 77848c2ecf20Sopenharmony_cistatic bool vmx_check_apicv_inhibit_reasons(ulong bit) 77858c2ecf20Sopenharmony_ci{ 77868c2ecf20Sopenharmony_ci ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | 77878c2ecf20Sopenharmony_ci BIT(APICV_INHIBIT_REASON_HYPERV); 77888c2ecf20Sopenharmony_ci 77898c2ecf20Sopenharmony_ci return supported & BIT(bit); 77908c2ecf20Sopenharmony_ci} 77918c2ecf20Sopenharmony_ci 77928c2ecf20Sopenharmony_cistatic struct kvm_x86_ops vmx_x86_ops __initdata = { 77938c2ecf20Sopenharmony_ci .hardware_unsetup = hardware_unsetup, 77948c2ecf20Sopenharmony_ci 77958c2ecf20Sopenharmony_ci .hardware_enable = hardware_enable, 77968c2ecf20Sopenharmony_ci .hardware_disable = hardware_disable, 77978c2ecf20Sopenharmony_ci .cpu_has_accelerated_tpr = report_flexpriority, 77988c2ecf20Sopenharmony_ci .has_emulated_msr = vmx_has_emulated_msr, 77998c2ecf20Sopenharmony_ci 78008c2ecf20Sopenharmony_ci .vm_size = sizeof(struct kvm_vmx), 78018c2ecf20Sopenharmony_ci .vm_init = vmx_vm_init, 78028c2ecf20Sopenharmony_ci 78038c2ecf20Sopenharmony_ci .vcpu_create = vmx_create_vcpu, 78048c2ecf20Sopenharmony_ci .vcpu_free = vmx_free_vcpu, 78058c2ecf20Sopenharmony_ci .vcpu_reset = vmx_vcpu_reset, 78068c2ecf20Sopenharmony_ci 78078c2ecf20Sopenharmony_ci .prepare_guest_switch = vmx_prepare_switch_to_guest, 78088c2ecf20Sopenharmony_ci .vcpu_load = vmx_vcpu_load, 78098c2ecf20Sopenharmony_ci .vcpu_put = vmx_vcpu_put, 78108c2ecf20Sopenharmony_ci 78118c2ecf20Sopenharmony_ci .update_exception_bitmap = update_exception_bitmap, 78128c2ecf20Sopenharmony_ci .get_msr_feature = vmx_get_msr_feature, 78138c2ecf20Sopenharmony_ci .get_msr = vmx_get_msr, 78148c2ecf20Sopenharmony_ci .set_msr = vmx_set_msr, 78158c2ecf20Sopenharmony_ci .get_segment_base = vmx_get_segment_base, 78168c2ecf20Sopenharmony_ci .get_segment = vmx_get_segment, 78178c2ecf20Sopenharmony_ci .set_segment = vmx_set_segment, 78188c2ecf20Sopenharmony_ci .get_cpl = vmx_get_cpl, 78198c2ecf20Sopenharmony_ci .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 78208c2ecf20Sopenharmony_ci .set_cr0 = vmx_set_cr0, 78218c2ecf20Sopenharmony_ci .is_valid_cr4 = vmx_is_valid_cr4, 78228c2ecf20Sopenharmony_ci .set_cr4 = vmx_set_cr4, 78238c2ecf20Sopenharmony_ci .set_efer = vmx_set_efer, 78248c2ecf20Sopenharmony_ci .get_idt = vmx_get_idt, 78258c2ecf20Sopenharmony_ci .set_idt = vmx_set_idt, 78268c2ecf20Sopenharmony_ci .get_gdt = vmx_get_gdt, 78278c2ecf20Sopenharmony_ci .set_gdt = vmx_set_gdt, 78288c2ecf20Sopenharmony_ci .set_dr7 = vmx_set_dr7, 78298c2ecf20Sopenharmony_ci .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, 78308c2ecf20Sopenharmony_ci .cache_reg = vmx_cache_reg, 78318c2ecf20Sopenharmony_ci .get_rflags = vmx_get_rflags, 78328c2ecf20Sopenharmony_ci .set_rflags = vmx_set_rflags, 78338c2ecf20Sopenharmony_ci 78348c2ecf20Sopenharmony_ci .tlb_flush_all = vmx_flush_tlb_all, 78358c2ecf20Sopenharmony_ci .tlb_flush_current = vmx_flush_tlb_current, 78368c2ecf20Sopenharmony_ci .tlb_flush_gva = vmx_flush_tlb_gva, 78378c2ecf20Sopenharmony_ci .tlb_flush_guest = vmx_flush_tlb_guest, 78388c2ecf20Sopenharmony_ci 78398c2ecf20Sopenharmony_ci .run = vmx_vcpu_run, 78408c2ecf20Sopenharmony_ci .handle_exit = vmx_handle_exit, 78418c2ecf20Sopenharmony_ci .skip_emulated_instruction = vmx_skip_emulated_instruction, 78428c2ecf20Sopenharmony_ci .update_emulated_instruction = vmx_update_emulated_instruction, 78438c2ecf20Sopenharmony_ci .set_interrupt_shadow = vmx_set_interrupt_shadow, 78448c2ecf20Sopenharmony_ci .get_interrupt_shadow = vmx_get_interrupt_shadow, 78458c2ecf20Sopenharmony_ci .patch_hypercall = vmx_patch_hypercall, 78468c2ecf20Sopenharmony_ci .set_irq = vmx_inject_irq, 78478c2ecf20Sopenharmony_ci .set_nmi = vmx_inject_nmi, 78488c2ecf20Sopenharmony_ci .queue_exception = vmx_queue_exception, 78498c2ecf20Sopenharmony_ci .cancel_injection = vmx_cancel_injection, 78508c2ecf20Sopenharmony_ci .interrupt_allowed = vmx_interrupt_allowed, 78518c2ecf20Sopenharmony_ci .nmi_allowed = vmx_nmi_allowed, 78528c2ecf20Sopenharmony_ci .get_nmi_mask = vmx_get_nmi_mask, 78538c2ecf20Sopenharmony_ci .set_nmi_mask = vmx_set_nmi_mask, 78548c2ecf20Sopenharmony_ci .enable_nmi_window = enable_nmi_window, 78558c2ecf20Sopenharmony_ci .enable_irq_window = enable_irq_window, 78568c2ecf20Sopenharmony_ci .update_cr8_intercept = update_cr8_intercept, 78578c2ecf20Sopenharmony_ci .set_virtual_apic_mode = vmx_set_virtual_apic_mode, 78588c2ecf20Sopenharmony_ci .set_apic_access_page_addr = vmx_set_apic_access_page_addr, 78598c2ecf20Sopenharmony_ci .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, 78608c2ecf20Sopenharmony_ci .load_eoi_exitmap = vmx_load_eoi_exitmap, 78618c2ecf20Sopenharmony_ci .apicv_post_state_restore = vmx_apicv_post_state_restore, 78628c2ecf20Sopenharmony_ci .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons, 78638c2ecf20Sopenharmony_ci .hwapic_irr_update = vmx_hwapic_irr_update, 78648c2ecf20Sopenharmony_ci .hwapic_isr_update = vmx_hwapic_isr_update, 78658c2ecf20Sopenharmony_ci .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, 78668c2ecf20Sopenharmony_ci .sync_pir_to_irr = vmx_sync_pir_to_irr, 78678c2ecf20Sopenharmony_ci .deliver_posted_interrupt = vmx_deliver_posted_interrupt, 78688c2ecf20Sopenharmony_ci .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, 78698c2ecf20Sopenharmony_ci 78708c2ecf20Sopenharmony_ci .set_tss_addr = vmx_set_tss_addr, 78718c2ecf20Sopenharmony_ci .set_identity_map_addr = vmx_set_identity_map_addr, 78728c2ecf20Sopenharmony_ci .get_mt_mask = vmx_get_mt_mask, 78738c2ecf20Sopenharmony_ci 78748c2ecf20Sopenharmony_ci .get_exit_info = vmx_get_exit_info, 78758c2ecf20Sopenharmony_ci 78768c2ecf20Sopenharmony_ci .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, 78778c2ecf20Sopenharmony_ci 78788c2ecf20Sopenharmony_ci .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 78798c2ecf20Sopenharmony_ci 78808c2ecf20Sopenharmony_ci .write_l1_tsc_offset = vmx_write_l1_tsc_offset, 78818c2ecf20Sopenharmony_ci 78828c2ecf20Sopenharmony_ci .load_mmu_pgd = vmx_load_mmu_pgd, 78838c2ecf20Sopenharmony_ci 78848c2ecf20Sopenharmony_ci .check_intercept = vmx_check_intercept, 78858c2ecf20Sopenharmony_ci .handle_exit_irqoff = vmx_handle_exit_irqoff, 78868c2ecf20Sopenharmony_ci 78878c2ecf20Sopenharmony_ci .request_immediate_exit = vmx_request_immediate_exit, 78888c2ecf20Sopenharmony_ci 78898c2ecf20Sopenharmony_ci .sched_in = vmx_sched_in, 78908c2ecf20Sopenharmony_ci 78918c2ecf20Sopenharmony_ci .slot_enable_log_dirty = vmx_slot_enable_log_dirty, 78928c2ecf20Sopenharmony_ci .slot_disable_log_dirty = vmx_slot_disable_log_dirty, 78938c2ecf20Sopenharmony_ci .flush_log_dirty = vmx_flush_log_dirty, 78948c2ecf20Sopenharmony_ci .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, 78958c2ecf20Sopenharmony_ci 78968c2ecf20Sopenharmony_ci .pre_block = vmx_pre_block, 78978c2ecf20Sopenharmony_ci .post_block = vmx_post_block, 78988c2ecf20Sopenharmony_ci 78998c2ecf20Sopenharmony_ci .pmu_ops = &intel_pmu_ops, 79008c2ecf20Sopenharmony_ci .nested_ops = &vmx_nested_ops, 79018c2ecf20Sopenharmony_ci 79028c2ecf20Sopenharmony_ci .update_pi_irte = pi_update_irte, 79038c2ecf20Sopenharmony_ci 79048c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 79058c2ecf20Sopenharmony_ci .set_hv_timer = vmx_set_hv_timer, 79068c2ecf20Sopenharmony_ci .cancel_hv_timer = vmx_cancel_hv_timer, 79078c2ecf20Sopenharmony_ci#endif 79088c2ecf20Sopenharmony_ci 79098c2ecf20Sopenharmony_ci .setup_mce = vmx_setup_mce, 79108c2ecf20Sopenharmony_ci 79118c2ecf20Sopenharmony_ci .smi_allowed = vmx_smi_allowed, 79128c2ecf20Sopenharmony_ci .pre_enter_smm = vmx_pre_enter_smm, 79138c2ecf20Sopenharmony_ci .pre_leave_smm = vmx_pre_leave_smm, 79148c2ecf20Sopenharmony_ci .enable_smi_window = enable_smi_window, 79158c2ecf20Sopenharmony_ci 79168c2ecf20Sopenharmony_ci .can_emulate_instruction = vmx_can_emulate_instruction, 79178c2ecf20Sopenharmony_ci .apic_init_signal_blocked = vmx_apic_init_signal_blocked, 79188c2ecf20Sopenharmony_ci .migrate_timers = vmx_migrate_timers, 79198c2ecf20Sopenharmony_ci 79208c2ecf20Sopenharmony_ci .msr_filter_changed = vmx_msr_filter_changed, 79218c2ecf20Sopenharmony_ci}; 79228c2ecf20Sopenharmony_ci 79238c2ecf20Sopenharmony_cistatic __init int hardware_setup(void) 79248c2ecf20Sopenharmony_ci{ 79258c2ecf20Sopenharmony_ci unsigned long host_bndcfgs; 79268c2ecf20Sopenharmony_ci struct desc_ptr dt; 79278c2ecf20Sopenharmony_ci int r, i, ept_lpage_level; 79288c2ecf20Sopenharmony_ci 79298c2ecf20Sopenharmony_ci store_idt(&dt); 79308c2ecf20Sopenharmony_ci host_idt_base = dt.address; 79318c2ecf20Sopenharmony_ci 79328c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) 79338c2ecf20Sopenharmony_ci kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]); 79348c2ecf20Sopenharmony_ci 79358c2ecf20Sopenharmony_ci if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) 79368c2ecf20Sopenharmony_ci return -EIO; 79378c2ecf20Sopenharmony_ci 79388c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_NX)) 79398c2ecf20Sopenharmony_ci kvm_enable_efer_bits(EFER_NX); 79408c2ecf20Sopenharmony_ci 79418c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_MPX)) { 79428c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); 79438c2ecf20Sopenharmony_ci WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); 79448c2ecf20Sopenharmony_ci } 79458c2ecf20Sopenharmony_ci 79468c2ecf20Sopenharmony_ci if (!cpu_has_vmx_mpx()) 79478c2ecf20Sopenharmony_ci supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | 79488c2ecf20Sopenharmony_ci XFEATURE_MASK_BNDCSR); 79498c2ecf20Sopenharmony_ci 79508c2ecf20Sopenharmony_ci if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || 79518c2ecf20Sopenharmony_ci !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) 79528c2ecf20Sopenharmony_ci enable_vpid = 0; 79538c2ecf20Sopenharmony_ci 79548c2ecf20Sopenharmony_ci if (!cpu_has_vmx_ept() || 79558c2ecf20Sopenharmony_ci !cpu_has_vmx_ept_4levels() || 79568c2ecf20Sopenharmony_ci !cpu_has_vmx_ept_mt_wb() || 79578c2ecf20Sopenharmony_ci !cpu_has_vmx_invept_global()) 79588c2ecf20Sopenharmony_ci enable_ept = 0; 79598c2ecf20Sopenharmony_ci 79608c2ecf20Sopenharmony_ci if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) 79618c2ecf20Sopenharmony_ci enable_ept_ad_bits = 0; 79628c2ecf20Sopenharmony_ci 79638c2ecf20Sopenharmony_ci if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) 79648c2ecf20Sopenharmony_ci enable_unrestricted_guest = 0; 79658c2ecf20Sopenharmony_ci 79668c2ecf20Sopenharmony_ci if (!cpu_has_vmx_flexpriority()) 79678c2ecf20Sopenharmony_ci flexpriority_enabled = 0; 79688c2ecf20Sopenharmony_ci 79698c2ecf20Sopenharmony_ci if (!cpu_has_virtual_nmis()) 79708c2ecf20Sopenharmony_ci enable_vnmi = 0; 79718c2ecf20Sopenharmony_ci 79728c2ecf20Sopenharmony_ci /* 79738c2ecf20Sopenharmony_ci * set_apic_access_page_addr() is used to reload apic access 79748c2ecf20Sopenharmony_ci * page upon invalidation. No need to do anything if not 79758c2ecf20Sopenharmony_ci * using the APIC_ACCESS_ADDR VMCS field. 79768c2ecf20Sopenharmony_ci */ 79778c2ecf20Sopenharmony_ci if (!flexpriority_enabled) 79788c2ecf20Sopenharmony_ci vmx_x86_ops.set_apic_access_page_addr = NULL; 79798c2ecf20Sopenharmony_ci 79808c2ecf20Sopenharmony_ci if (!cpu_has_vmx_tpr_shadow()) 79818c2ecf20Sopenharmony_ci vmx_x86_ops.update_cr8_intercept = NULL; 79828c2ecf20Sopenharmony_ci 79838c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 79848c2ecf20Sopenharmony_ci if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH 79858c2ecf20Sopenharmony_ci && enable_ept) { 79868c2ecf20Sopenharmony_ci vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb; 79878c2ecf20Sopenharmony_ci vmx_x86_ops.tlb_remote_flush_with_range = 79888c2ecf20Sopenharmony_ci hv_remote_flush_tlb_with_range; 79898c2ecf20Sopenharmony_ci } 79908c2ecf20Sopenharmony_ci#endif 79918c2ecf20Sopenharmony_ci 79928c2ecf20Sopenharmony_ci if (!cpu_has_vmx_ple()) { 79938c2ecf20Sopenharmony_ci ple_gap = 0; 79948c2ecf20Sopenharmony_ci ple_window = 0; 79958c2ecf20Sopenharmony_ci ple_window_grow = 0; 79968c2ecf20Sopenharmony_ci ple_window_max = 0; 79978c2ecf20Sopenharmony_ci ple_window_shrink = 0; 79988c2ecf20Sopenharmony_ci } 79998c2ecf20Sopenharmony_ci 80008c2ecf20Sopenharmony_ci if (!cpu_has_vmx_apicv()) { 80018c2ecf20Sopenharmony_ci enable_apicv = 0; 80028c2ecf20Sopenharmony_ci vmx_x86_ops.sync_pir_to_irr = NULL; 80038c2ecf20Sopenharmony_ci } 80048c2ecf20Sopenharmony_ci 80058c2ecf20Sopenharmony_ci if (cpu_has_vmx_tsc_scaling()) { 80068c2ecf20Sopenharmony_ci kvm_has_tsc_control = true; 80078c2ecf20Sopenharmony_ci kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; 80088c2ecf20Sopenharmony_ci kvm_tsc_scaling_ratio_frac_bits = 48; 80098c2ecf20Sopenharmony_ci } 80108c2ecf20Sopenharmony_ci 80118c2ecf20Sopenharmony_ci set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 80128c2ecf20Sopenharmony_ci 80138c2ecf20Sopenharmony_ci if (enable_ept) 80148c2ecf20Sopenharmony_ci vmx_enable_tdp(); 80158c2ecf20Sopenharmony_ci 80168c2ecf20Sopenharmony_ci if (!enable_ept) 80178c2ecf20Sopenharmony_ci ept_lpage_level = 0; 80188c2ecf20Sopenharmony_ci else if (cpu_has_vmx_ept_1g_page()) 80198c2ecf20Sopenharmony_ci ept_lpage_level = PG_LEVEL_1G; 80208c2ecf20Sopenharmony_ci else if (cpu_has_vmx_ept_2m_page()) 80218c2ecf20Sopenharmony_ci ept_lpage_level = PG_LEVEL_2M; 80228c2ecf20Sopenharmony_ci else 80238c2ecf20Sopenharmony_ci ept_lpage_level = PG_LEVEL_4K; 80248c2ecf20Sopenharmony_ci kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level); 80258c2ecf20Sopenharmony_ci 80268c2ecf20Sopenharmony_ci /* 80278c2ecf20Sopenharmony_ci * Only enable PML when hardware supports PML feature, and both EPT 80288c2ecf20Sopenharmony_ci * and EPT A/D bit features are enabled -- PML depends on them to work. 80298c2ecf20Sopenharmony_ci */ 80308c2ecf20Sopenharmony_ci if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) 80318c2ecf20Sopenharmony_ci enable_pml = 0; 80328c2ecf20Sopenharmony_ci 80338c2ecf20Sopenharmony_ci if (!enable_pml) { 80348c2ecf20Sopenharmony_ci vmx_x86_ops.slot_enable_log_dirty = NULL; 80358c2ecf20Sopenharmony_ci vmx_x86_ops.slot_disable_log_dirty = NULL; 80368c2ecf20Sopenharmony_ci vmx_x86_ops.flush_log_dirty = NULL; 80378c2ecf20Sopenharmony_ci vmx_x86_ops.enable_log_dirty_pt_masked = NULL; 80388c2ecf20Sopenharmony_ci } 80398c2ecf20Sopenharmony_ci 80408c2ecf20Sopenharmony_ci if (!cpu_has_vmx_preemption_timer()) 80418c2ecf20Sopenharmony_ci enable_preemption_timer = false; 80428c2ecf20Sopenharmony_ci 80438c2ecf20Sopenharmony_ci if (enable_preemption_timer) { 80448c2ecf20Sopenharmony_ci u64 use_timer_freq = 5000ULL * 1000 * 1000; 80458c2ecf20Sopenharmony_ci u64 vmx_msr; 80468c2ecf20Sopenharmony_ci 80478c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); 80488c2ecf20Sopenharmony_ci cpu_preemption_timer_multi = 80498c2ecf20Sopenharmony_ci vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; 80508c2ecf20Sopenharmony_ci 80518c2ecf20Sopenharmony_ci if (tsc_khz) 80528c2ecf20Sopenharmony_ci use_timer_freq = (u64)tsc_khz * 1000; 80538c2ecf20Sopenharmony_ci use_timer_freq >>= cpu_preemption_timer_multi; 80548c2ecf20Sopenharmony_ci 80558c2ecf20Sopenharmony_ci /* 80568c2ecf20Sopenharmony_ci * KVM "disables" the preemption timer by setting it to its max 80578c2ecf20Sopenharmony_ci * value. Don't use the timer if it might cause spurious exits 80588c2ecf20Sopenharmony_ci * at a rate faster than 0.1 Hz (of uninterrupted guest time). 80598c2ecf20Sopenharmony_ci */ 80608c2ecf20Sopenharmony_ci if (use_timer_freq > 0xffffffffu / 10) 80618c2ecf20Sopenharmony_ci enable_preemption_timer = false; 80628c2ecf20Sopenharmony_ci } 80638c2ecf20Sopenharmony_ci 80648c2ecf20Sopenharmony_ci if (!enable_preemption_timer) { 80658c2ecf20Sopenharmony_ci vmx_x86_ops.set_hv_timer = NULL; 80668c2ecf20Sopenharmony_ci vmx_x86_ops.cancel_hv_timer = NULL; 80678c2ecf20Sopenharmony_ci vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit; 80688c2ecf20Sopenharmony_ci } 80698c2ecf20Sopenharmony_ci 80708c2ecf20Sopenharmony_ci kvm_mce_cap_supported |= MCG_LMCE_P; 80718c2ecf20Sopenharmony_ci 80728c2ecf20Sopenharmony_ci if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) 80738c2ecf20Sopenharmony_ci return -EINVAL; 80748c2ecf20Sopenharmony_ci if (!enable_ept || !cpu_has_vmx_intel_pt()) 80758c2ecf20Sopenharmony_ci pt_mode = PT_MODE_SYSTEM; 80768c2ecf20Sopenharmony_ci 80778c2ecf20Sopenharmony_ci if (nested) { 80788c2ecf20Sopenharmony_ci nested_vmx_setup_ctls_msrs(&vmcs_config.nested, 80798c2ecf20Sopenharmony_ci vmx_capability.ept); 80808c2ecf20Sopenharmony_ci 80818c2ecf20Sopenharmony_ci r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); 80828c2ecf20Sopenharmony_ci if (r) 80838c2ecf20Sopenharmony_ci return r; 80848c2ecf20Sopenharmony_ci } 80858c2ecf20Sopenharmony_ci 80868c2ecf20Sopenharmony_ci vmx_set_cpu_caps(); 80878c2ecf20Sopenharmony_ci 80888c2ecf20Sopenharmony_ci r = alloc_kvm_area(); 80898c2ecf20Sopenharmony_ci if (r) 80908c2ecf20Sopenharmony_ci nested_vmx_hardware_unsetup(); 80918c2ecf20Sopenharmony_ci 80928c2ecf20Sopenharmony_ci kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); 80938c2ecf20Sopenharmony_ci 80948c2ecf20Sopenharmony_ci return r; 80958c2ecf20Sopenharmony_ci} 80968c2ecf20Sopenharmony_ci 80978c2ecf20Sopenharmony_cistatic struct kvm_x86_init_ops vmx_init_ops __initdata = { 80988c2ecf20Sopenharmony_ci .cpu_has_kvm_support = cpu_has_kvm_support, 80998c2ecf20Sopenharmony_ci .disabled_by_bios = vmx_disabled_by_bios, 81008c2ecf20Sopenharmony_ci .check_processor_compatibility = vmx_check_processor_compat, 81018c2ecf20Sopenharmony_ci .hardware_setup = hardware_setup, 81028c2ecf20Sopenharmony_ci .intel_pt_intr_in_guest = vmx_pt_mode_is_host_guest, 81038c2ecf20Sopenharmony_ci 81048c2ecf20Sopenharmony_ci .runtime_ops = &vmx_x86_ops, 81058c2ecf20Sopenharmony_ci}; 81068c2ecf20Sopenharmony_ci 81078c2ecf20Sopenharmony_cistatic void vmx_cleanup_l1d_flush(void) 81088c2ecf20Sopenharmony_ci{ 81098c2ecf20Sopenharmony_ci if (vmx_l1d_flush_pages) { 81108c2ecf20Sopenharmony_ci free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER); 81118c2ecf20Sopenharmony_ci vmx_l1d_flush_pages = NULL; 81128c2ecf20Sopenharmony_ci } 81138c2ecf20Sopenharmony_ci /* Restore state so sysfs ignores VMX */ 81148c2ecf20Sopenharmony_ci l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; 81158c2ecf20Sopenharmony_ci} 81168c2ecf20Sopenharmony_ci 81178c2ecf20Sopenharmony_cistatic void vmx_exit(void) 81188c2ecf20Sopenharmony_ci{ 81198c2ecf20Sopenharmony_ci#ifdef CONFIG_KEXEC_CORE 81208c2ecf20Sopenharmony_ci RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); 81218c2ecf20Sopenharmony_ci synchronize_rcu(); 81228c2ecf20Sopenharmony_ci#endif 81238c2ecf20Sopenharmony_ci 81248c2ecf20Sopenharmony_ci kvm_exit(); 81258c2ecf20Sopenharmony_ci 81268c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 81278c2ecf20Sopenharmony_ci if (static_branch_unlikely(&enable_evmcs)) { 81288c2ecf20Sopenharmony_ci int cpu; 81298c2ecf20Sopenharmony_ci struct hv_vp_assist_page *vp_ap; 81308c2ecf20Sopenharmony_ci /* 81318c2ecf20Sopenharmony_ci * Reset everything to support using non-enlightened VMCS 81328c2ecf20Sopenharmony_ci * access later (e.g. when we reload the module with 81338c2ecf20Sopenharmony_ci * enlightened_vmcs=0) 81348c2ecf20Sopenharmony_ci */ 81358c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 81368c2ecf20Sopenharmony_ci vp_ap = hv_get_vp_assist_page(cpu); 81378c2ecf20Sopenharmony_ci 81388c2ecf20Sopenharmony_ci if (!vp_ap) 81398c2ecf20Sopenharmony_ci continue; 81408c2ecf20Sopenharmony_ci 81418c2ecf20Sopenharmony_ci vp_ap->nested_control.features.directhypercall = 0; 81428c2ecf20Sopenharmony_ci vp_ap->current_nested_vmcs = 0; 81438c2ecf20Sopenharmony_ci vp_ap->enlighten_vmentry = 0; 81448c2ecf20Sopenharmony_ci } 81458c2ecf20Sopenharmony_ci 81468c2ecf20Sopenharmony_ci static_branch_disable(&enable_evmcs); 81478c2ecf20Sopenharmony_ci } 81488c2ecf20Sopenharmony_ci#endif 81498c2ecf20Sopenharmony_ci vmx_cleanup_l1d_flush(); 81508c2ecf20Sopenharmony_ci} 81518c2ecf20Sopenharmony_cimodule_exit(vmx_exit); 81528c2ecf20Sopenharmony_ci 81538c2ecf20Sopenharmony_cistatic int __init vmx_init(void) 81548c2ecf20Sopenharmony_ci{ 81558c2ecf20Sopenharmony_ci int r, cpu; 81568c2ecf20Sopenharmony_ci 81578c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV) 81588c2ecf20Sopenharmony_ci /* 81598c2ecf20Sopenharmony_ci * Enlightened VMCS usage should be recommended and the host needs 81608c2ecf20Sopenharmony_ci * to support eVMCS v1 or above. We can also disable eVMCS support 81618c2ecf20Sopenharmony_ci * with module parameter. 81628c2ecf20Sopenharmony_ci */ 81638c2ecf20Sopenharmony_ci if (enlightened_vmcs && 81648c2ecf20Sopenharmony_ci ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && 81658c2ecf20Sopenharmony_ci (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= 81668c2ecf20Sopenharmony_ci KVM_EVMCS_VERSION) { 81678c2ecf20Sopenharmony_ci int cpu; 81688c2ecf20Sopenharmony_ci 81698c2ecf20Sopenharmony_ci /* Check that we have assist pages on all online CPUs */ 81708c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 81718c2ecf20Sopenharmony_ci if (!hv_get_vp_assist_page(cpu)) { 81728c2ecf20Sopenharmony_ci enlightened_vmcs = false; 81738c2ecf20Sopenharmony_ci break; 81748c2ecf20Sopenharmony_ci } 81758c2ecf20Sopenharmony_ci } 81768c2ecf20Sopenharmony_ci 81778c2ecf20Sopenharmony_ci if (enlightened_vmcs) { 81788c2ecf20Sopenharmony_ci pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); 81798c2ecf20Sopenharmony_ci static_branch_enable(&enable_evmcs); 81808c2ecf20Sopenharmony_ci } 81818c2ecf20Sopenharmony_ci 81828c2ecf20Sopenharmony_ci if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) 81838c2ecf20Sopenharmony_ci vmx_x86_ops.enable_direct_tlbflush 81848c2ecf20Sopenharmony_ci = hv_enable_direct_tlbflush; 81858c2ecf20Sopenharmony_ci 81868c2ecf20Sopenharmony_ci } else { 81878c2ecf20Sopenharmony_ci enlightened_vmcs = false; 81888c2ecf20Sopenharmony_ci } 81898c2ecf20Sopenharmony_ci#endif 81908c2ecf20Sopenharmony_ci 81918c2ecf20Sopenharmony_ci r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx), 81928c2ecf20Sopenharmony_ci __alignof__(struct vcpu_vmx), THIS_MODULE); 81938c2ecf20Sopenharmony_ci if (r) 81948c2ecf20Sopenharmony_ci return r; 81958c2ecf20Sopenharmony_ci 81968c2ecf20Sopenharmony_ci /* 81978c2ecf20Sopenharmony_ci * Must be called after kvm_init() so enable_ept is properly set 81988c2ecf20Sopenharmony_ci * up. Hand the parameter mitigation value in which was stored in 81998c2ecf20Sopenharmony_ci * the pre module init parser. If no parameter was given, it will 82008c2ecf20Sopenharmony_ci * contain 'auto' which will be turned into the default 'cond' 82018c2ecf20Sopenharmony_ci * mitigation mode. 82028c2ecf20Sopenharmony_ci */ 82038c2ecf20Sopenharmony_ci r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); 82048c2ecf20Sopenharmony_ci if (r) { 82058c2ecf20Sopenharmony_ci vmx_exit(); 82068c2ecf20Sopenharmony_ci return r; 82078c2ecf20Sopenharmony_ci } 82088c2ecf20Sopenharmony_ci 82098c2ecf20Sopenharmony_ci vmx_setup_fb_clear_ctrl(); 82108c2ecf20Sopenharmony_ci 82118c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 82128c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); 82138c2ecf20Sopenharmony_ci 82148c2ecf20Sopenharmony_ci pi_init_cpu(cpu); 82158c2ecf20Sopenharmony_ci } 82168c2ecf20Sopenharmony_ci 82178c2ecf20Sopenharmony_ci#ifdef CONFIG_KEXEC_CORE 82188c2ecf20Sopenharmony_ci rcu_assign_pointer(crash_vmclear_loaded_vmcss, 82198c2ecf20Sopenharmony_ci crash_vmclear_local_loaded_vmcss); 82208c2ecf20Sopenharmony_ci#endif 82218c2ecf20Sopenharmony_ci vmx_check_vmcs12_offsets(); 82228c2ecf20Sopenharmony_ci 82238c2ecf20Sopenharmony_ci /* 82248c2ecf20Sopenharmony_ci * Shadow paging doesn't have a (further) performance penalty 82258c2ecf20Sopenharmony_ci * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it 82268c2ecf20Sopenharmony_ci * by default 82278c2ecf20Sopenharmony_ci */ 82288c2ecf20Sopenharmony_ci if (!enable_ept) 82298c2ecf20Sopenharmony_ci allow_smaller_maxphyaddr = true; 82308c2ecf20Sopenharmony_ci 82318c2ecf20Sopenharmony_ci return 0; 82328c2ecf20Sopenharmony_ci} 82338c2ecf20Sopenharmony_cimodule_init(vmx_init); 8234