18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include <linux/objtool.h> 48c2ecf20Sopenharmony_ci#include <linux/percpu.h> 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <asm/debugreg.h> 78c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include "cpuid.h" 108c2ecf20Sopenharmony_ci#include "hyperv.h" 118c2ecf20Sopenharmony_ci#include "mmu.h" 128c2ecf20Sopenharmony_ci#include "nested.h" 138c2ecf20Sopenharmony_ci#include "pmu.h" 148c2ecf20Sopenharmony_ci#include "trace.h" 158c2ecf20Sopenharmony_ci#include "vmx.h" 168c2ecf20Sopenharmony_ci#include "x86.h" 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_cistatic bool __read_mostly enable_shadow_vmcs = 1; 198c2ecf20Sopenharmony_cimodule_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_cistatic bool __read_mostly nested_early_check = 0; 228c2ecf20Sopenharmony_cimodule_param(nested_early_check, bool, S_IRUGO); 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#define CC(consistency_check) \ 258c2ecf20Sopenharmony_ci({ \ 268c2ecf20Sopenharmony_ci bool failed = (consistency_check); \ 278c2ecf20Sopenharmony_ci if (failed) \ 288c2ecf20Sopenharmony_ci trace_kvm_nested_vmenter_failed(#consistency_check, 0); \ 298c2ecf20Sopenharmony_ci failed; \ 308c2ecf20Sopenharmony_ci}) 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ci * Hyper-V requires all of these, so mark them as supported even though 348c2ecf20Sopenharmony_ci * they are just treated the same as all-context. 358c2ecf20Sopenharmony_ci */ 368c2ecf20Sopenharmony_ci#define VMX_VPID_EXTENT_SUPPORTED_MASK \ 378c2ecf20Sopenharmony_ci (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \ 388c2ecf20Sopenharmony_ci VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \ 398c2ecf20Sopenharmony_ci VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \ 408c2ecf20Sopenharmony_ci VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT) 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_cienum { 458c2ecf20Sopenharmony_ci VMX_VMREAD_BITMAP, 468c2ecf20Sopenharmony_ci VMX_VMWRITE_BITMAP, 478c2ecf20Sopenharmony_ci VMX_BITMAP_NR 488c2ecf20Sopenharmony_ci}; 498c2ecf20Sopenharmony_cistatic unsigned long *vmx_bitmap[VMX_BITMAP_NR]; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) 528c2ecf20Sopenharmony_ci#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistruct shadow_vmcs_field { 558c2ecf20Sopenharmony_ci u16 encoding; 568c2ecf20Sopenharmony_ci u16 offset; 578c2ecf20Sopenharmony_ci}; 588c2ecf20Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_only_fields[] = { 598c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) }, 608c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h" 618c2ecf20Sopenharmony_ci}; 628c2ecf20Sopenharmony_cistatic int max_shadow_read_only_fields = 638c2ecf20Sopenharmony_ci ARRAY_SIZE(shadow_read_only_fields); 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_write_fields[] = { 668c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) }, 678c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h" 688c2ecf20Sopenharmony_ci}; 698c2ecf20Sopenharmony_cistatic int max_shadow_read_write_fields = 708c2ecf20Sopenharmony_ci ARRAY_SIZE(shadow_read_write_fields); 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistatic void init_vmcs_shadow_fields(void) 738c2ecf20Sopenharmony_ci{ 748c2ecf20Sopenharmony_ci int i, j; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); 778c2ecf20Sopenharmony_ci memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci for (i = j = 0; i < max_shadow_read_only_fields; i++) { 808c2ecf20Sopenharmony_ci struct shadow_vmcs_field entry = shadow_read_only_fields[i]; 818c2ecf20Sopenharmony_ci u16 field = entry.encoding; 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && 848c2ecf20Sopenharmony_ci (i + 1 == max_shadow_read_only_fields || 858c2ecf20Sopenharmony_ci shadow_read_only_fields[i + 1].encoding != field + 1)) 868c2ecf20Sopenharmony_ci pr_err("Missing field from shadow_read_only_field %x\n", 878c2ecf20Sopenharmony_ci field + 1); 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci clear_bit(field, vmx_vmread_bitmap); 908c2ecf20Sopenharmony_ci if (field & 1) 918c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 928c2ecf20Sopenharmony_ci continue; 938c2ecf20Sopenharmony_ci#else 948c2ecf20Sopenharmony_ci entry.offset += sizeof(u32); 958c2ecf20Sopenharmony_ci#endif 968c2ecf20Sopenharmony_ci shadow_read_only_fields[j++] = entry; 978c2ecf20Sopenharmony_ci } 988c2ecf20Sopenharmony_ci max_shadow_read_only_fields = j; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci for (i = j = 0; i < max_shadow_read_write_fields; i++) { 1018c2ecf20Sopenharmony_ci struct shadow_vmcs_field entry = shadow_read_write_fields[i]; 1028c2ecf20Sopenharmony_ci u16 field = entry.encoding; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && 1058c2ecf20Sopenharmony_ci (i + 1 == max_shadow_read_write_fields || 1068c2ecf20Sopenharmony_ci shadow_read_write_fields[i + 1].encoding != field + 1)) 1078c2ecf20Sopenharmony_ci pr_err("Missing field from shadow_read_write_field %x\n", 1088c2ecf20Sopenharmony_ci field + 1); 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci WARN_ONCE(field >= GUEST_ES_AR_BYTES && 1118c2ecf20Sopenharmony_ci field <= GUEST_TR_AR_BYTES, 1128c2ecf20Sopenharmony_ci "Update vmcs12_write_any() to drop reserved bits from AR_BYTES"); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci /* 1158c2ecf20Sopenharmony_ci * PML and the preemption timer can be emulated, but the 1168c2ecf20Sopenharmony_ci * processor cannot vmwrite to fields that don't exist 1178c2ecf20Sopenharmony_ci * on bare metal. 1188c2ecf20Sopenharmony_ci */ 1198c2ecf20Sopenharmony_ci switch (field) { 1208c2ecf20Sopenharmony_ci case GUEST_PML_INDEX: 1218c2ecf20Sopenharmony_ci if (!cpu_has_vmx_pml()) 1228c2ecf20Sopenharmony_ci continue; 1238c2ecf20Sopenharmony_ci break; 1248c2ecf20Sopenharmony_ci case VMX_PREEMPTION_TIMER_VALUE: 1258c2ecf20Sopenharmony_ci if (!cpu_has_vmx_preemption_timer()) 1268c2ecf20Sopenharmony_ci continue; 1278c2ecf20Sopenharmony_ci break; 1288c2ecf20Sopenharmony_ci case GUEST_INTR_STATUS: 1298c2ecf20Sopenharmony_ci if (!cpu_has_vmx_apicv()) 1308c2ecf20Sopenharmony_ci continue; 1318c2ecf20Sopenharmony_ci break; 1328c2ecf20Sopenharmony_ci default: 1338c2ecf20Sopenharmony_ci break; 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci clear_bit(field, vmx_vmwrite_bitmap); 1378c2ecf20Sopenharmony_ci clear_bit(field, vmx_vmread_bitmap); 1388c2ecf20Sopenharmony_ci if (field & 1) 1398c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 1408c2ecf20Sopenharmony_ci continue; 1418c2ecf20Sopenharmony_ci#else 1428c2ecf20Sopenharmony_ci entry.offset += sizeof(u32); 1438c2ecf20Sopenharmony_ci#endif 1448c2ecf20Sopenharmony_ci shadow_read_write_fields[j++] = entry; 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci max_shadow_read_write_fields = j; 1478c2ecf20Sopenharmony_ci} 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci/* 1508c2ecf20Sopenharmony_ci * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), 1518c2ecf20Sopenharmony_ci * set the success or error code of an emulated VMX instruction (as specified 1528c2ecf20Sopenharmony_ci * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated 1538c2ecf20Sopenharmony_ci * instruction. 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_cistatic int nested_vmx_succeed(struct kvm_vcpu *vcpu) 1568c2ecf20Sopenharmony_ci{ 1578c2ecf20Sopenharmony_ci vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) 1588c2ecf20Sopenharmony_ci & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | 1598c2ecf20Sopenharmony_ci X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); 1608c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 1618c2ecf20Sopenharmony_ci} 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_cistatic int nested_vmx_failInvalid(struct kvm_vcpu *vcpu) 1648c2ecf20Sopenharmony_ci{ 1658c2ecf20Sopenharmony_ci vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) 1668c2ecf20Sopenharmony_ci & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | 1678c2ecf20Sopenharmony_ci X86_EFLAGS_SF | X86_EFLAGS_OF)) 1688c2ecf20Sopenharmony_ci | X86_EFLAGS_CF); 1698c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_cistatic int nested_vmx_failValid(struct kvm_vcpu *vcpu, 1738c2ecf20Sopenharmony_ci u32 vm_instruction_error) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) 1768c2ecf20Sopenharmony_ci & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | 1778c2ecf20Sopenharmony_ci X86_EFLAGS_SF | X86_EFLAGS_OF)) 1788c2ecf20Sopenharmony_ci | X86_EFLAGS_ZF); 1798c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; 1808c2ecf20Sopenharmony_ci /* 1818c2ecf20Sopenharmony_ci * We don't need to force a shadow sync because 1828c2ecf20Sopenharmony_ci * VM_INSTRUCTION_ERROR is not shadowed 1838c2ecf20Sopenharmony_ci */ 1848c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistatic int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci /* 1928c2ecf20Sopenharmony_ci * failValid writes the error number to the current VMCS, which 1938c2ecf20Sopenharmony_ci * can't be done if there isn't a current VMCS. 1948c2ecf20Sopenharmony_ci */ 1958c2ecf20Sopenharmony_ci if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) 1968c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci return nested_vmx_failValid(vcpu, vm_instruction_error); 1998c2ecf20Sopenharmony_ci} 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_cistatic void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) 2028c2ecf20Sopenharmony_ci{ 2038c2ecf20Sopenharmony_ci /* TODO: not to reset guest simply here. */ 2048c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 2058c2ecf20Sopenharmony_ci pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); 2068c2ecf20Sopenharmony_ci} 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_cistatic inline bool vmx_control_verify(u32 control, u32 low, u32 high) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci return fixed_bits_valid(control, low, high); 2118c2ecf20Sopenharmony_ci} 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_cistatic inline u64 vmx_control_msr(u32 low, u32 high) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci return low | ((u64)high << 32); 2168c2ecf20Sopenharmony_ci} 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_cistatic void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) 2198c2ecf20Sopenharmony_ci{ 2208c2ecf20Sopenharmony_ci secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); 2218c2ecf20Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, -1ull); 2228c2ecf20Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = false; 2238c2ecf20Sopenharmony_ci} 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_cistatic inline void nested_release_evmcs(struct kvm_vcpu *vcpu) 2268c2ecf20Sopenharmony_ci{ 2278c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci if (!vmx->nested.hv_evmcs) 2308c2ecf20Sopenharmony_ci return; 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); 2338c2ecf20Sopenharmony_ci vmx->nested.hv_evmcs_vmptr = 0; 2348c2ecf20Sopenharmony_ci vmx->nested.hv_evmcs = NULL; 2358c2ecf20Sopenharmony_ci} 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_cistatic void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, 2388c2ecf20Sopenharmony_ci struct loaded_vmcs *prev) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci struct vmcs_host_state *dest, *src; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci if (unlikely(!vmx->guest_state_loaded)) 2438c2ecf20Sopenharmony_ci return; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci src = &prev->host_state; 2468c2ecf20Sopenharmony_ci dest = &vmx->loaded_vmcs->host_state; 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); 2498c2ecf20Sopenharmony_ci dest->ldt_sel = src->ldt_sel; 2508c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 2518c2ecf20Sopenharmony_ci dest->ds_sel = src->ds_sel; 2528c2ecf20Sopenharmony_ci dest->es_sel = src->es_sel; 2538c2ecf20Sopenharmony_ci#endif 2548c2ecf20Sopenharmony_ci} 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_cistatic void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) 2578c2ecf20Sopenharmony_ci{ 2588c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 2598c2ecf20Sopenharmony_ci struct loaded_vmcs *prev; 2608c2ecf20Sopenharmony_ci int cpu; 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs)) 2638c2ecf20Sopenharmony_ci return; 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci cpu = get_cpu(); 2668c2ecf20Sopenharmony_ci prev = vmx->loaded_vmcs; 2678c2ecf20Sopenharmony_ci vmx->loaded_vmcs = vmcs; 2688c2ecf20Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, prev); 2698c2ecf20Sopenharmony_ci vmx_sync_vmcs_host_state(vmx, prev); 2708c2ecf20Sopenharmony_ci put_cpu(); 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci vmx_register_cache_reset(vcpu); 2738c2ecf20Sopenharmony_ci} 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci/* 2768c2ecf20Sopenharmony_ci * Free whatever needs to be freed from vmx->nested when L1 goes down, or 2778c2ecf20Sopenharmony_ci * just stops using VMX. 2788c2ecf20Sopenharmony_ci */ 2798c2ecf20Sopenharmony_cistatic void free_nested(struct kvm_vcpu *vcpu) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01)) 2848c2ecf20Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) 2878c2ecf20Sopenharmony_ci return; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci vmx->nested.vmxon = false; 2928c2ecf20Sopenharmony_ci vmx->nested.smm.vmxon = false; 2938c2ecf20Sopenharmony_ci free_vpid(vmx->nested.vpid02); 2948c2ecf20Sopenharmony_ci vmx->nested.posted_intr_nv = -1; 2958c2ecf20Sopenharmony_ci vmx->nested.current_vmptr = -1ull; 2968c2ecf20Sopenharmony_ci if (enable_shadow_vmcs) { 2978c2ecf20Sopenharmony_ci vmx_disable_shadow_vmcs(vmx); 2988c2ecf20Sopenharmony_ci vmcs_clear(vmx->vmcs01.shadow_vmcs); 2998c2ecf20Sopenharmony_ci free_vmcs(vmx->vmcs01.shadow_vmcs); 3008c2ecf20Sopenharmony_ci vmx->vmcs01.shadow_vmcs = NULL; 3018c2ecf20Sopenharmony_ci } 3028c2ecf20Sopenharmony_ci kfree(vmx->nested.cached_vmcs12); 3038c2ecf20Sopenharmony_ci vmx->nested.cached_vmcs12 = NULL; 3048c2ecf20Sopenharmony_ci kfree(vmx->nested.cached_shadow_vmcs12); 3058c2ecf20Sopenharmony_ci vmx->nested.cached_shadow_vmcs12 = NULL; 3068c2ecf20Sopenharmony_ci /* Unpin physical memory we referred to in the vmcs02 */ 3078c2ecf20Sopenharmony_ci if (vmx->nested.apic_access_page) { 3088c2ecf20Sopenharmony_ci kvm_release_page_clean(vmx->nested.apic_access_page); 3098c2ecf20Sopenharmony_ci vmx->nested.apic_access_page = NULL; 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); 3128c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); 3138c2ecf20Sopenharmony_ci vmx->nested.pi_desc = NULL; 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci nested_release_evmcs(vcpu); 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci free_loaded_vmcs(&vmx->nested.vmcs02); 3208c2ecf20Sopenharmony_ci} 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci/* 3238c2ecf20Sopenharmony_ci * Ensure that the current vmcs of the logical processor is the 3248c2ecf20Sopenharmony_ci * vmcs01 of the vcpu before calling free_nested(). 3258c2ecf20Sopenharmony_ci */ 3268c2ecf20Sopenharmony_civoid nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) 3278c2ecf20Sopenharmony_ci{ 3288c2ecf20Sopenharmony_ci vcpu_load(vcpu); 3298c2ecf20Sopenharmony_ci vmx_leave_nested(vcpu); 3308c2ecf20Sopenharmony_ci vcpu_put(vcpu); 3318c2ecf20Sopenharmony_ci} 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_cistatic void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 3348c2ecf20Sopenharmony_ci struct x86_exception *fault) 3358c2ecf20Sopenharmony_ci{ 3368c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 3378c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 3388c2ecf20Sopenharmony_ci u32 vm_exit_reason; 3398c2ecf20Sopenharmony_ci unsigned long exit_qualification = vcpu->arch.exit_qualification; 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci if (vmx->nested.pml_full) { 3428c2ecf20Sopenharmony_ci vm_exit_reason = EXIT_REASON_PML_FULL; 3438c2ecf20Sopenharmony_ci vmx->nested.pml_full = false; 3448c2ecf20Sopenharmony_ci exit_qualification &= INTR_INFO_UNBLOCK_NMI; 3458c2ecf20Sopenharmony_ci } else if (fault->error_code & PFERR_RSVD_MASK) 3468c2ecf20Sopenharmony_ci vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 3478c2ecf20Sopenharmony_ci else 3488c2ecf20Sopenharmony_ci vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); 3518c2ecf20Sopenharmony_ci vmcs12->guest_physical_address = fault->address; 3528c2ecf20Sopenharmony_ci} 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_cistatic void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 3558c2ecf20Sopenharmony_ci{ 3568c2ecf20Sopenharmony_ci WARN_ON(mmu_is_nested(vcpu)); 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci vcpu->arch.mmu = &vcpu->arch.guest_mmu; 3598c2ecf20Sopenharmony_ci kvm_init_shadow_ept_mmu(vcpu, 3608c2ecf20Sopenharmony_ci to_vmx(vcpu)->nested.msrs.ept_caps & 3618c2ecf20Sopenharmony_ci VMX_EPT_EXECUTE_ONLY_BIT, 3628c2ecf20Sopenharmony_ci nested_ept_ad_enabled(vcpu), 3638c2ecf20Sopenharmony_ci nested_ept_get_eptp(vcpu)); 3648c2ecf20Sopenharmony_ci vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp; 3658c2ecf20Sopenharmony_ci vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault; 3668c2ecf20Sopenharmony_ci vcpu->arch.mmu->get_pdptr = kvm_pdptr_read; 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 3698c2ecf20Sopenharmony_ci} 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_cistatic void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 3728c2ecf20Sopenharmony_ci{ 3738c2ecf20Sopenharmony_ci vcpu->arch.mmu = &vcpu->arch.root_mmu; 3748c2ecf20Sopenharmony_ci vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; 3758c2ecf20Sopenharmony_ci} 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_cistatic bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, 3788c2ecf20Sopenharmony_ci u16 error_code) 3798c2ecf20Sopenharmony_ci{ 3808c2ecf20Sopenharmony_ci bool inequality, bit; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0; 3838c2ecf20Sopenharmony_ci inequality = 3848c2ecf20Sopenharmony_ci (error_code & vmcs12->page_fault_error_code_mask) != 3858c2ecf20Sopenharmony_ci vmcs12->page_fault_error_code_match; 3868c2ecf20Sopenharmony_ci return inequality ^ bit; 3878c2ecf20Sopenharmony_ci} 3888c2ecf20Sopenharmony_ci 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci/* 3918c2ecf20Sopenharmony_ci * KVM wants to inject page-faults which it got to the guest. This function 3928c2ecf20Sopenharmony_ci * checks whether in a nested guest, we need to inject them to L1 or L2. 3938c2ecf20Sopenharmony_ci */ 3948c2ecf20Sopenharmony_cistatic int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) 3958c2ecf20Sopenharmony_ci{ 3968c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 3978c2ecf20Sopenharmony_ci unsigned int nr = vcpu->arch.exception.nr; 3988c2ecf20Sopenharmony_ci bool has_payload = vcpu->arch.exception.has_payload; 3998c2ecf20Sopenharmony_ci unsigned long payload = vcpu->arch.exception.payload; 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci if (nr == PF_VECTOR) { 4028c2ecf20Sopenharmony_ci if (vcpu->arch.exception.nested_apf) { 4038c2ecf20Sopenharmony_ci *exit_qual = vcpu->arch.apf.nested_apf_token; 4048c2ecf20Sopenharmony_ci return 1; 4058c2ecf20Sopenharmony_ci } 4068c2ecf20Sopenharmony_ci if (nested_vmx_is_page_fault_vmexit(vmcs12, 4078c2ecf20Sopenharmony_ci vcpu->arch.exception.error_code)) { 4088c2ecf20Sopenharmony_ci *exit_qual = has_payload ? payload : vcpu->arch.cr2; 4098c2ecf20Sopenharmony_ci return 1; 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci } else if (vmcs12->exception_bitmap & (1u << nr)) { 4128c2ecf20Sopenharmony_ci if (nr == DB_VECTOR) { 4138c2ecf20Sopenharmony_ci if (!has_payload) { 4148c2ecf20Sopenharmony_ci payload = vcpu->arch.dr6; 4158c2ecf20Sopenharmony_ci payload &= ~(DR6_FIXED_1 | DR6_BT); 4168c2ecf20Sopenharmony_ci payload ^= DR6_RTM; 4178c2ecf20Sopenharmony_ci } 4188c2ecf20Sopenharmony_ci *exit_qual = payload; 4198c2ecf20Sopenharmony_ci } else 4208c2ecf20Sopenharmony_ci *exit_qual = 0; 4218c2ecf20Sopenharmony_ci return 1; 4228c2ecf20Sopenharmony_ci } 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci return 0; 4258c2ecf20Sopenharmony_ci} 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_cistatic void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, 4298c2ecf20Sopenharmony_ci struct x86_exception *fault) 4308c2ecf20Sopenharmony_ci{ 4318c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci WARN_ON(!is_guest_mode(vcpu)); 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && 4368c2ecf20Sopenharmony_ci !to_vmx(vcpu)->nested.nested_run_pending) { 4378c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_error_code = fault->error_code; 4388c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 4398c2ecf20Sopenharmony_ci PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | 4408c2ecf20Sopenharmony_ci INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, 4418c2ecf20Sopenharmony_ci fault->address); 4428c2ecf20Sopenharmony_ci } else { 4438c2ecf20Sopenharmony_ci kvm_inject_page_fault(vcpu, fault); 4448c2ecf20Sopenharmony_ci } 4458c2ecf20Sopenharmony_ci} 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_cistatic int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, 4488c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 4498c2ecf20Sopenharmony_ci{ 4508c2ecf20Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 4518c2ecf20Sopenharmony_ci return 0; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) || 4548c2ecf20Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b))) 4558c2ecf20Sopenharmony_ci return -EINVAL; 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci return 0; 4588c2ecf20Sopenharmony_ci} 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_cistatic int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, 4618c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 4628c2ecf20Sopenharmony_ci{ 4638c2ecf20Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 4648c2ecf20Sopenharmony_ci return 0; 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap))) 4678c2ecf20Sopenharmony_ci return -EINVAL; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci return 0; 4708c2ecf20Sopenharmony_ci} 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_cistatic int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, 4738c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 4748c2ecf20Sopenharmony_ci{ 4758c2ecf20Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) 4768c2ecf20Sopenharmony_ci return 0; 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))) 4798c2ecf20Sopenharmony_ci return -EINVAL; 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci return 0; 4828c2ecf20Sopenharmony_ci} 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci/* 4858c2ecf20Sopenharmony_ci * Check if MSR is intercepted for L01 MSR bitmap. 4868c2ecf20Sopenharmony_ci */ 4878c2ecf20Sopenharmony_cistatic bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) 4888c2ecf20Sopenharmony_ci{ 4898c2ecf20Sopenharmony_ci unsigned long *msr_bitmap; 4908c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap()) 4938c2ecf20Sopenharmony_ci return true; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci if (msr <= 0x1fff) { 4988c2ecf20Sopenharmony_ci return !!test_bit(msr, msr_bitmap + 0x800 / f); 4998c2ecf20Sopenharmony_ci } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 5008c2ecf20Sopenharmony_ci msr &= 0x1fff; 5018c2ecf20Sopenharmony_ci return !!test_bit(msr, msr_bitmap + 0xc00 / f); 5028c2ecf20Sopenharmony_ci } 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci return true; 5058c2ecf20Sopenharmony_ci} 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci/* 5088c2ecf20Sopenharmony_ci * If a msr is allowed by L0, we should check whether it is allowed by L1. 5098c2ecf20Sopenharmony_ci * The corresponding bit will be cleared unless both of L0 and L1 allow it. 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_cistatic void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, 5128c2ecf20Sopenharmony_ci unsigned long *msr_bitmap_nested, 5138c2ecf20Sopenharmony_ci u32 msr, int type) 5148c2ecf20Sopenharmony_ci{ 5158c2ecf20Sopenharmony_ci int f = sizeof(unsigned long); 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci /* 5188c2ecf20Sopenharmony_ci * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals 5198c2ecf20Sopenharmony_ci * have the write-low and read-high bitmap offsets the wrong way round. 5208c2ecf20Sopenharmony_ci * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. 5218c2ecf20Sopenharmony_ci */ 5228c2ecf20Sopenharmony_ci if (msr <= 0x1fff) { 5238c2ecf20Sopenharmony_ci if (type & MSR_TYPE_R && 5248c2ecf20Sopenharmony_ci !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) 5258c2ecf20Sopenharmony_ci /* read-low */ 5268c2ecf20Sopenharmony_ci __clear_bit(msr, msr_bitmap_nested + 0x000 / f); 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci if (type & MSR_TYPE_W && 5298c2ecf20Sopenharmony_ci !test_bit(msr, msr_bitmap_l1 + 0x800 / f)) 5308c2ecf20Sopenharmony_ci /* write-low */ 5318c2ecf20Sopenharmony_ci __clear_bit(msr, msr_bitmap_nested + 0x800 / f); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 5348c2ecf20Sopenharmony_ci msr &= 0x1fff; 5358c2ecf20Sopenharmony_ci if (type & MSR_TYPE_R && 5368c2ecf20Sopenharmony_ci !test_bit(msr, msr_bitmap_l1 + 0x400 / f)) 5378c2ecf20Sopenharmony_ci /* read-high */ 5388c2ecf20Sopenharmony_ci __clear_bit(msr, msr_bitmap_nested + 0x400 / f); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci if (type & MSR_TYPE_W && 5418c2ecf20Sopenharmony_ci !test_bit(msr, msr_bitmap_l1 + 0xc00 / f)) 5428c2ecf20Sopenharmony_ci /* write-high */ 5438c2ecf20Sopenharmony_ci __clear_bit(msr, msr_bitmap_nested + 0xc00 / f); 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci } 5468c2ecf20Sopenharmony_ci} 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_cistatic inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) 5498c2ecf20Sopenharmony_ci{ 5508c2ecf20Sopenharmony_ci int msr; 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { 5538c2ecf20Sopenharmony_ci unsigned word = msr / BITS_PER_LONG; 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci msr_bitmap[word] = ~0; 5568c2ecf20Sopenharmony_ci msr_bitmap[word + (0x800 / sizeof(long))] = ~0; 5578c2ecf20Sopenharmony_ci } 5588c2ecf20Sopenharmony_ci} 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci/* 5618c2ecf20Sopenharmony_ci * Merge L0's and L1's MSR bitmap, return false to indicate that 5628c2ecf20Sopenharmony_ci * we do not use the hardware. 5638c2ecf20Sopenharmony_ci */ 5648c2ecf20Sopenharmony_cistatic inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, 5658c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 5668c2ecf20Sopenharmony_ci{ 5678c2ecf20Sopenharmony_ci int msr; 5688c2ecf20Sopenharmony_ci unsigned long *msr_bitmap_l1; 5698c2ecf20Sopenharmony_ci unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; 5708c2ecf20Sopenharmony_ci struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci /* Nothing to do if the MSR bitmap is not in use. */ 5738c2ecf20Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap() || 5748c2ecf20Sopenharmony_ci !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 5758c2ecf20Sopenharmony_ci return false; 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) 5788c2ecf20Sopenharmony_ci return false; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci msr_bitmap_l1 = (unsigned long *)map->hva; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci /* 5838c2ecf20Sopenharmony_ci * To keep the control flow simple, pay eight 8-byte writes (sixteen 5848c2ecf20Sopenharmony_ci * 4-byte writes on 32-bit systems) up front to enable intercepts for 5858c2ecf20Sopenharmony_ci * the x2APIC MSR range and selectively disable them below. 5868c2ecf20Sopenharmony_ci */ 5878c2ecf20Sopenharmony_ci enable_x2apic_msr_intercepts(msr_bitmap_l0); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { 5908c2ecf20Sopenharmony_ci if (nested_cpu_has_apic_reg_virt(vmcs12)) { 5918c2ecf20Sopenharmony_ci /* 5928c2ecf20Sopenharmony_ci * L0 need not intercept reads for MSRs between 0x800 5938c2ecf20Sopenharmony_ci * and 0x8ff, it just lets the processor take the value 5948c2ecf20Sopenharmony_ci * from the virtual-APIC page; take those 256 bits 5958c2ecf20Sopenharmony_ci * directly from the L1 bitmap. 5968c2ecf20Sopenharmony_ci */ 5978c2ecf20Sopenharmony_ci for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { 5988c2ecf20Sopenharmony_ci unsigned word = msr / BITS_PER_LONG; 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci msr_bitmap_l0[word] = msr_bitmap_l1[word]; 6018c2ecf20Sopenharmony_ci } 6028c2ecf20Sopenharmony_ci } 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr( 6058c2ecf20Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 6068c2ecf20Sopenharmony_ci X2APIC_MSR(APIC_TASKPRI), 6078c2ecf20Sopenharmony_ci MSR_TYPE_R | MSR_TYPE_W); 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci if (nested_cpu_has_vid(vmcs12)) { 6108c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr( 6118c2ecf20Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 6128c2ecf20Sopenharmony_ci X2APIC_MSR(APIC_EOI), 6138c2ecf20Sopenharmony_ci MSR_TYPE_W); 6148c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr( 6158c2ecf20Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 6168c2ecf20Sopenharmony_ci X2APIC_MSR(APIC_SELF_IPI), 6178c2ecf20Sopenharmony_ci MSR_TYPE_W); 6188c2ecf20Sopenharmony_ci } 6198c2ecf20Sopenharmony_ci } 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ 6228c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 6238c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, 6248c2ecf20Sopenharmony_ci MSR_FS_BASE, MSR_TYPE_RW); 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, 6278c2ecf20Sopenharmony_ci MSR_GS_BASE, MSR_TYPE_RW); 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, 6308c2ecf20Sopenharmony_ci MSR_KERNEL_GS_BASE, MSR_TYPE_RW); 6318c2ecf20Sopenharmony_ci#endif 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci /* 6348c2ecf20Sopenharmony_ci * Checking the L0->L1 bitmap is trying to verify two things: 6358c2ecf20Sopenharmony_ci * 6368c2ecf20Sopenharmony_ci * 1. L0 gave a permission to L1 to actually passthrough the MSR. This 6378c2ecf20Sopenharmony_ci * ensures that we do not accidentally generate an L02 MSR bitmap 6388c2ecf20Sopenharmony_ci * from the L12 MSR bitmap that is too permissive. 6398c2ecf20Sopenharmony_ci * 2. That L1 or L2s have actually used the MSR. This avoids 6408c2ecf20Sopenharmony_ci * unnecessarily merging of the bitmap if the MSR is unused. This 6418c2ecf20Sopenharmony_ci * works properly because we only update the L01 MSR bitmap lazily. 6428c2ecf20Sopenharmony_ci * So even if L0 should pass L1 these MSRs, the L01 bitmap is only 6438c2ecf20Sopenharmony_ci * updated to reflect this when L1 (or its L2s) actually write to 6448c2ecf20Sopenharmony_ci * the MSR. 6458c2ecf20Sopenharmony_ci */ 6468c2ecf20Sopenharmony_ci if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) 6478c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr( 6488c2ecf20Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 6498c2ecf20Sopenharmony_ci MSR_IA32_SPEC_CTRL, 6508c2ecf20Sopenharmony_ci MSR_TYPE_R | MSR_TYPE_W); 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) 6538c2ecf20Sopenharmony_ci nested_vmx_disable_intercept_for_msr( 6548c2ecf20Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 6558c2ecf20Sopenharmony_ci MSR_IA32_PRED_CMD, 6568c2ecf20Sopenharmony_ci MSR_TYPE_W); 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci return true; 6618c2ecf20Sopenharmony_ci} 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_cistatic void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, 6648c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 6658c2ecf20Sopenharmony_ci{ 6668c2ecf20Sopenharmony_ci struct kvm_host_map map; 6678c2ecf20Sopenharmony_ci struct vmcs12 *shadow; 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12) || 6708c2ecf20Sopenharmony_ci vmcs12->vmcs_link_pointer == -1ull) 6718c2ecf20Sopenharmony_ci return; 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci shadow = get_shadow_vmcs12(vcpu); 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) 6768c2ecf20Sopenharmony_ci return; 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci memcpy(shadow, map.hva, VMCS12_SIZE); 6798c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &map, false); 6808c2ecf20Sopenharmony_ci} 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_cistatic void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, 6838c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 6848c2ecf20Sopenharmony_ci{ 6858c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12) || 6888c2ecf20Sopenharmony_ci vmcs12->vmcs_link_pointer == -1ull) 6898c2ecf20Sopenharmony_ci return; 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, 6928c2ecf20Sopenharmony_ci get_shadow_vmcs12(vcpu), VMCS12_SIZE); 6938c2ecf20Sopenharmony_ci} 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci/* 6968c2ecf20Sopenharmony_ci * In nested virtualization, check if L1 has set 6978c2ecf20Sopenharmony_ci * VM_EXIT_ACK_INTR_ON_EXIT 6988c2ecf20Sopenharmony_ci */ 6998c2ecf20Sopenharmony_cistatic bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) 7008c2ecf20Sopenharmony_ci{ 7018c2ecf20Sopenharmony_ci return get_vmcs12(vcpu)->vm_exit_controls & 7028c2ecf20Sopenharmony_ci VM_EXIT_ACK_INTR_ON_EXIT; 7038c2ecf20Sopenharmony_ci} 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_cistatic int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, 7068c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 7078c2ecf20Sopenharmony_ci{ 7088c2ecf20Sopenharmony_ci if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && 7098c2ecf20Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->apic_access_addr))) 7108c2ecf20Sopenharmony_ci return -EINVAL; 7118c2ecf20Sopenharmony_ci else 7128c2ecf20Sopenharmony_ci return 0; 7138c2ecf20Sopenharmony_ci} 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_cistatic int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, 7168c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 7178c2ecf20Sopenharmony_ci{ 7188c2ecf20Sopenharmony_ci if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && 7198c2ecf20Sopenharmony_ci !nested_cpu_has_apic_reg_virt(vmcs12) && 7208c2ecf20Sopenharmony_ci !nested_cpu_has_vid(vmcs12) && 7218c2ecf20Sopenharmony_ci !nested_cpu_has_posted_intr(vmcs12)) 7228c2ecf20Sopenharmony_ci return 0; 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci /* 7258c2ecf20Sopenharmony_ci * If virtualize x2apic mode is enabled, 7268c2ecf20Sopenharmony_ci * virtualize apic access must be disabled. 7278c2ecf20Sopenharmony_ci */ 7288c2ecf20Sopenharmony_ci if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) && 7298c2ecf20Sopenharmony_ci nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))) 7308c2ecf20Sopenharmony_ci return -EINVAL; 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci /* 7338c2ecf20Sopenharmony_ci * If virtual interrupt delivery is enabled, 7348c2ecf20Sopenharmony_ci * we must exit on external interrupts. 7358c2ecf20Sopenharmony_ci */ 7368c2ecf20Sopenharmony_ci if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu))) 7378c2ecf20Sopenharmony_ci return -EINVAL; 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci /* 7408c2ecf20Sopenharmony_ci * bits 15:8 should be zero in posted_intr_nv, 7418c2ecf20Sopenharmony_ci * the descriptor address has been already checked 7428c2ecf20Sopenharmony_ci * in nested_get_vmcs12_pages. 7438c2ecf20Sopenharmony_ci * 7448c2ecf20Sopenharmony_ci * bits 5:0 of posted_intr_desc_addr should be zero. 7458c2ecf20Sopenharmony_ci */ 7468c2ecf20Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12) && 7478c2ecf20Sopenharmony_ci (CC(!nested_cpu_has_vid(vmcs12)) || 7488c2ecf20Sopenharmony_ci CC(!nested_exit_intr_ack_set(vcpu)) || 7498c2ecf20Sopenharmony_ci CC((vmcs12->posted_intr_nv & 0xff00)) || 7508c2ecf20Sopenharmony_ci CC((vmcs12->posted_intr_desc_addr & 0x3f)) || 7518c2ecf20Sopenharmony_ci CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))) 7528c2ecf20Sopenharmony_ci return -EINVAL; 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci /* tpr shadow is needed by all apicv features. */ 7558c2ecf20Sopenharmony_ci if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))) 7568c2ecf20Sopenharmony_ci return -EINVAL; 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_ci return 0; 7598c2ecf20Sopenharmony_ci} 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_cistatic int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, 7628c2ecf20Sopenharmony_ci u32 count, u64 addr) 7638c2ecf20Sopenharmony_ci{ 7648c2ecf20Sopenharmony_ci int maxphyaddr; 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci if (count == 0) 7678c2ecf20Sopenharmony_ci return 0; 7688c2ecf20Sopenharmony_ci maxphyaddr = cpuid_maxphyaddr(vcpu); 7698c2ecf20Sopenharmony_ci if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || 7708c2ecf20Sopenharmony_ci (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) 7718c2ecf20Sopenharmony_ci return -EINVAL; 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ci return 0; 7748c2ecf20Sopenharmony_ci} 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_cistatic int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu, 7778c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 7788c2ecf20Sopenharmony_ci{ 7798c2ecf20Sopenharmony_ci if (CC(nested_vmx_check_msr_switch(vcpu, 7808c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_load_count, 7818c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_load_addr)) || 7828c2ecf20Sopenharmony_ci CC(nested_vmx_check_msr_switch(vcpu, 7838c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_store_count, 7848c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_store_addr))) 7858c2ecf20Sopenharmony_ci return -EINVAL; 7868c2ecf20Sopenharmony_ci 7878c2ecf20Sopenharmony_ci return 0; 7888c2ecf20Sopenharmony_ci} 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_cistatic int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu, 7918c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 7928c2ecf20Sopenharmony_ci{ 7938c2ecf20Sopenharmony_ci if (CC(nested_vmx_check_msr_switch(vcpu, 7948c2ecf20Sopenharmony_ci vmcs12->vm_entry_msr_load_count, 7958c2ecf20Sopenharmony_ci vmcs12->vm_entry_msr_load_addr))) 7968c2ecf20Sopenharmony_ci return -EINVAL; 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci return 0; 7998c2ecf20Sopenharmony_ci} 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_cistatic int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, 8028c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 8038c2ecf20Sopenharmony_ci{ 8048c2ecf20Sopenharmony_ci if (!nested_cpu_has_pml(vmcs12)) 8058c2ecf20Sopenharmony_ci return 0; 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci if (CC(!nested_cpu_has_ept(vmcs12)) || 8088c2ecf20Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->pml_address))) 8098c2ecf20Sopenharmony_ci return -EINVAL; 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci return 0; 8128c2ecf20Sopenharmony_ci} 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_cistatic int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu, 8158c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 8168c2ecf20Sopenharmony_ci{ 8178c2ecf20Sopenharmony_ci if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) && 8188c2ecf20Sopenharmony_ci !nested_cpu_has_ept(vmcs12))) 8198c2ecf20Sopenharmony_ci return -EINVAL; 8208c2ecf20Sopenharmony_ci return 0; 8218c2ecf20Sopenharmony_ci} 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_cistatic int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu, 8248c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 8258c2ecf20Sopenharmony_ci{ 8268c2ecf20Sopenharmony_ci if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) && 8278c2ecf20Sopenharmony_ci !nested_cpu_has_ept(vmcs12))) 8288c2ecf20Sopenharmony_ci return -EINVAL; 8298c2ecf20Sopenharmony_ci return 0; 8308c2ecf20Sopenharmony_ci} 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_cistatic int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu, 8338c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 8348c2ecf20Sopenharmony_ci{ 8358c2ecf20Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12)) 8368c2ecf20Sopenharmony_ci return 0; 8378c2ecf20Sopenharmony_ci 8388c2ecf20Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) || 8398c2ecf20Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap))) 8408c2ecf20Sopenharmony_ci return -EINVAL; 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci return 0; 8438c2ecf20Sopenharmony_ci} 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_cistatic int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, 8468c2ecf20Sopenharmony_ci struct vmx_msr_entry *e) 8478c2ecf20Sopenharmony_ci{ 8488c2ecf20Sopenharmony_ci /* x2APIC MSR accesses are not allowed */ 8498c2ecf20Sopenharmony_ci if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)) 8508c2ecf20Sopenharmony_ci return -EINVAL; 8518c2ecf20Sopenharmony_ci if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */ 8528c2ecf20Sopenharmony_ci CC(e->index == MSR_IA32_UCODE_REV)) 8538c2ecf20Sopenharmony_ci return -EINVAL; 8548c2ecf20Sopenharmony_ci if (CC(e->reserved != 0)) 8558c2ecf20Sopenharmony_ci return -EINVAL; 8568c2ecf20Sopenharmony_ci return 0; 8578c2ecf20Sopenharmony_ci} 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_cistatic int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu, 8608c2ecf20Sopenharmony_ci struct vmx_msr_entry *e) 8618c2ecf20Sopenharmony_ci{ 8628c2ecf20Sopenharmony_ci if (CC(e->index == MSR_FS_BASE) || 8638c2ecf20Sopenharmony_ci CC(e->index == MSR_GS_BASE) || 8648c2ecf20Sopenharmony_ci CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */ 8658c2ecf20Sopenharmony_ci nested_vmx_msr_check_common(vcpu, e)) 8668c2ecf20Sopenharmony_ci return -EINVAL; 8678c2ecf20Sopenharmony_ci return 0; 8688c2ecf20Sopenharmony_ci} 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_cistatic int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, 8718c2ecf20Sopenharmony_ci struct vmx_msr_entry *e) 8728c2ecf20Sopenharmony_ci{ 8738c2ecf20Sopenharmony_ci if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */ 8748c2ecf20Sopenharmony_ci nested_vmx_msr_check_common(vcpu, e)) 8758c2ecf20Sopenharmony_ci return -EINVAL; 8768c2ecf20Sopenharmony_ci return 0; 8778c2ecf20Sopenharmony_ci} 8788c2ecf20Sopenharmony_ci 8798c2ecf20Sopenharmony_cistatic u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) 8808c2ecf20Sopenharmony_ci{ 8818c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 8828c2ecf20Sopenharmony_ci u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, 8838c2ecf20Sopenharmony_ci vmx->nested.msrs.misc_high); 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER; 8868c2ecf20Sopenharmony_ci} 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci/* 8898c2ecf20Sopenharmony_ci * Load guest's/host's msr at nested entry/exit. 8908c2ecf20Sopenharmony_ci * return 0 for success, entry index for failure. 8918c2ecf20Sopenharmony_ci * 8928c2ecf20Sopenharmony_ci * One of the failure modes for MSR load/store is when a list exceeds the 8938c2ecf20Sopenharmony_ci * virtual hardware's capacity. To maintain compatibility with hardware inasmuch 8948c2ecf20Sopenharmony_ci * as possible, process all valid entries before failing rather than precheck 8958c2ecf20Sopenharmony_ci * for a capacity violation. 8968c2ecf20Sopenharmony_ci */ 8978c2ecf20Sopenharmony_cistatic u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) 8988c2ecf20Sopenharmony_ci{ 8998c2ecf20Sopenharmony_ci u32 i; 9008c2ecf20Sopenharmony_ci struct vmx_msr_entry e; 9018c2ecf20Sopenharmony_ci u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); 9028c2ecf20Sopenharmony_ci 9038c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) { 9048c2ecf20Sopenharmony_ci if (unlikely(i >= max_msr_list_size)) 9058c2ecf20Sopenharmony_ci goto fail; 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), 9088c2ecf20Sopenharmony_ci &e, sizeof(e))) { 9098c2ecf20Sopenharmony_ci pr_debug_ratelimited( 9108c2ecf20Sopenharmony_ci "%s cannot read MSR entry (%u, 0x%08llx)\n", 9118c2ecf20Sopenharmony_ci __func__, i, gpa + i * sizeof(e)); 9128c2ecf20Sopenharmony_ci goto fail; 9138c2ecf20Sopenharmony_ci } 9148c2ecf20Sopenharmony_ci if (nested_vmx_load_msr_check(vcpu, &e)) { 9158c2ecf20Sopenharmony_ci pr_debug_ratelimited( 9168c2ecf20Sopenharmony_ci "%s check failed (%u, 0x%x, 0x%x)\n", 9178c2ecf20Sopenharmony_ci __func__, i, e.index, e.reserved); 9188c2ecf20Sopenharmony_ci goto fail; 9198c2ecf20Sopenharmony_ci } 9208c2ecf20Sopenharmony_ci if (kvm_set_msr(vcpu, e.index, e.value)) { 9218c2ecf20Sopenharmony_ci pr_debug_ratelimited( 9228c2ecf20Sopenharmony_ci "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", 9238c2ecf20Sopenharmony_ci __func__, i, e.index, e.value); 9248c2ecf20Sopenharmony_ci goto fail; 9258c2ecf20Sopenharmony_ci } 9268c2ecf20Sopenharmony_ci } 9278c2ecf20Sopenharmony_ci return 0; 9288c2ecf20Sopenharmony_cifail: 9298c2ecf20Sopenharmony_ci /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */ 9308c2ecf20Sopenharmony_ci return i + 1; 9318c2ecf20Sopenharmony_ci} 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_cistatic bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, 9348c2ecf20Sopenharmony_ci u32 msr_index, 9358c2ecf20Sopenharmony_ci u64 *data) 9368c2ecf20Sopenharmony_ci{ 9378c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci /* 9408c2ecf20Sopenharmony_ci * If the L0 hypervisor stored a more accurate value for the TSC that 9418c2ecf20Sopenharmony_ci * does not include the time taken for emulation of the L2->L1 9428c2ecf20Sopenharmony_ci * VM-exit in L0, use the more accurate value. 9438c2ecf20Sopenharmony_ci */ 9448c2ecf20Sopenharmony_ci if (msr_index == MSR_IA32_TSC) { 9458c2ecf20Sopenharmony_ci int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest, 9468c2ecf20Sopenharmony_ci MSR_IA32_TSC); 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci if (i >= 0) { 9498c2ecf20Sopenharmony_ci u64 val = vmx->msr_autostore.guest.val[i].value; 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci *data = kvm_read_l1_tsc(vcpu, val); 9528c2ecf20Sopenharmony_ci return true; 9538c2ecf20Sopenharmony_ci } 9548c2ecf20Sopenharmony_ci } 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci if (kvm_get_msr(vcpu, msr_index, data)) { 9578c2ecf20Sopenharmony_ci pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, 9588c2ecf20Sopenharmony_ci msr_index); 9598c2ecf20Sopenharmony_ci return false; 9608c2ecf20Sopenharmony_ci } 9618c2ecf20Sopenharmony_ci return true; 9628c2ecf20Sopenharmony_ci} 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_cistatic bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i, 9658c2ecf20Sopenharmony_ci struct vmx_msr_entry *e) 9668c2ecf20Sopenharmony_ci{ 9678c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, 9688c2ecf20Sopenharmony_ci gpa + i * sizeof(*e), 9698c2ecf20Sopenharmony_ci e, 2 * sizeof(u32))) { 9708c2ecf20Sopenharmony_ci pr_debug_ratelimited( 9718c2ecf20Sopenharmony_ci "%s cannot read MSR entry (%u, 0x%08llx)\n", 9728c2ecf20Sopenharmony_ci __func__, i, gpa + i * sizeof(*e)); 9738c2ecf20Sopenharmony_ci return false; 9748c2ecf20Sopenharmony_ci } 9758c2ecf20Sopenharmony_ci if (nested_vmx_store_msr_check(vcpu, e)) { 9768c2ecf20Sopenharmony_ci pr_debug_ratelimited( 9778c2ecf20Sopenharmony_ci "%s check failed (%u, 0x%x, 0x%x)\n", 9788c2ecf20Sopenharmony_ci __func__, i, e->index, e->reserved); 9798c2ecf20Sopenharmony_ci return false; 9808c2ecf20Sopenharmony_ci } 9818c2ecf20Sopenharmony_ci return true; 9828c2ecf20Sopenharmony_ci} 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_cistatic int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) 9858c2ecf20Sopenharmony_ci{ 9868c2ecf20Sopenharmony_ci u64 data; 9878c2ecf20Sopenharmony_ci u32 i; 9888c2ecf20Sopenharmony_ci struct vmx_msr_entry e; 9898c2ecf20Sopenharmony_ci u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) { 9928c2ecf20Sopenharmony_ci if (unlikely(i >= max_msr_list_size)) 9938c2ecf20Sopenharmony_ci return -EINVAL; 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) 9968c2ecf20Sopenharmony_ci return -EINVAL; 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data)) 9998c2ecf20Sopenharmony_ci return -EINVAL; 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci if (kvm_vcpu_write_guest(vcpu, 10028c2ecf20Sopenharmony_ci gpa + i * sizeof(e) + 10038c2ecf20Sopenharmony_ci offsetof(struct vmx_msr_entry, value), 10048c2ecf20Sopenharmony_ci &data, sizeof(data))) { 10058c2ecf20Sopenharmony_ci pr_debug_ratelimited( 10068c2ecf20Sopenharmony_ci "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", 10078c2ecf20Sopenharmony_ci __func__, i, e.index, data); 10088c2ecf20Sopenharmony_ci return -EINVAL; 10098c2ecf20Sopenharmony_ci } 10108c2ecf20Sopenharmony_ci } 10118c2ecf20Sopenharmony_ci return 0; 10128c2ecf20Sopenharmony_ci} 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_cistatic bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index) 10158c2ecf20Sopenharmony_ci{ 10168c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 10178c2ecf20Sopenharmony_ci u32 count = vmcs12->vm_exit_msr_store_count; 10188c2ecf20Sopenharmony_ci u64 gpa = vmcs12->vm_exit_msr_store_addr; 10198c2ecf20Sopenharmony_ci struct vmx_msr_entry e; 10208c2ecf20Sopenharmony_ci u32 i; 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) { 10238c2ecf20Sopenharmony_ci if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) 10248c2ecf20Sopenharmony_ci return false; 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_ci if (e.index == msr_index) 10278c2ecf20Sopenharmony_ci return true; 10288c2ecf20Sopenharmony_ci } 10298c2ecf20Sopenharmony_ci return false; 10308c2ecf20Sopenharmony_ci} 10318c2ecf20Sopenharmony_ci 10328c2ecf20Sopenharmony_cistatic void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, 10338c2ecf20Sopenharmony_ci u32 msr_index) 10348c2ecf20Sopenharmony_ci{ 10358c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 10368c2ecf20Sopenharmony_ci struct vmx_msrs *autostore = &vmx->msr_autostore.guest; 10378c2ecf20Sopenharmony_ci bool in_vmcs12_store_list; 10388c2ecf20Sopenharmony_ci int msr_autostore_slot; 10398c2ecf20Sopenharmony_ci bool in_autostore_list; 10408c2ecf20Sopenharmony_ci int last; 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index); 10438c2ecf20Sopenharmony_ci in_autostore_list = msr_autostore_slot >= 0; 10448c2ecf20Sopenharmony_ci in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index); 10458c2ecf20Sopenharmony_ci 10468c2ecf20Sopenharmony_ci if (in_vmcs12_store_list && !in_autostore_list) { 10478c2ecf20Sopenharmony_ci if (autostore->nr == MAX_NR_LOADSTORE_MSRS) { 10488c2ecf20Sopenharmony_ci /* 10498c2ecf20Sopenharmony_ci * Emulated VMEntry does not fail here. Instead a less 10508c2ecf20Sopenharmony_ci * accurate value will be returned by 10518c2ecf20Sopenharmony_ci * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() 10528c2ecf20Sopenharmony_ci * instead of reading the value from the vmcs02 VMExit 10538c2ecf20Sopenharmony_ci * MSR-store area. 10548c2ecf20Sopenharmony_ci */ 10558c2ecf20Sopenharmony_ci pr_warn_ratelimited( 10568c2ecf20Sopenharmony_ci "Not enough msr entries in msr_autostore. Can't add msr %x\n", 10578c2ecf20Sopenharmony_ci msr_index); 10588c2ecf20Sopenharmony_ci return; 10598c2ecf20Sopenharmony_ci } 10608c2ecf20Sopenharmony_ci last = autostore->nr++; 10618c2ecf20Sopenharmony_ci autostore->val[last].index = msr_index; 10628c2ecf20Sopenharmony_ci } else if (!in_vmcs12_store_list && in_autostore_list) { 10638c2ecf20Sopenharmony_ci last = --autostore->nr; 10648c2ecf20Sopenharmony_ci autostore->val[msr_autostore_slot] = autostore->val[last]; 10658c2ecf20Sopenharmony_ci } 10668c2ecf20Sopenharmony_ci} 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_cistatic bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) 10698c2ecf20Sopenharmony_ci{ 10708c2ecf20Sopenharmony_ci unsigned long invalid_mask; 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu); 10738c2ecf20Sopenharmony_ci return (val & invalid_mask) == 0; 10748c2ecf20Sopenharmony_ci} 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci/* 10778c2ecf20Sopenharmony_ci * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit. 10788c2ecf20Sopenharmony_ci * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't 10798c2ecf20Sopenharmony_ci * enable VPID for L2 (implying it expects a TLB flush on VMX transitions). 10808c2ecf20Sopenharmony_ci * Here's why. 10818c2ecf20Sopenharmony_ci * 10828c2ecf20Sopenharmony_ci * If EPT is enabled by L0 a sync is never needed: 10838c2ecf20Sopenharmony_ci * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there 10848c2ecf20Sopenharmony_ci * cannot be unsync'd SPTEs for either L1 or L2. 10858c2ecf20Sopenharmony_ci * 10868c2ecf20Sopenharmony_ci * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter 10878c2ecf20Sopenharmony_ci * VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings 10888c2ecf20Sopenharmony_ci * (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush 10898c2ecf20Sopenharmony_ci * stale guest-physical mappings for L2 from the TLB. And as above, L0 isn't 10908c2ecf20Sopenharmony_ci * shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit. 10918c2ecf20Sopenharmony_ci * 10928c2ecf20Sopenharmony_ci * If EPT is disabled by L0: 10938c2ecf20Sopenharmony_ci * - if VPID is enabled by L1 (for L2), the situation is similar to when L1 10948c2ecf20Sopenharmony_ci * enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't 10958c2ecf20Sopenharmony_ci * required to invalidate linear mappings (EPT is disabled so there are 10968c2ecf20Sopenharmony_ci * no combined or guest-physical mappings), i.e. L1 can't rely on the 10978c2ecf20Sopenharmony_ci * (virtual) CPU to flush stale linear mappings for either L2 or itself (L1). 10988c2ecf20Sopenharmony_ci * 10998c2ecf20Sopenharmony_ci * - however if VPID is disabled by L1, then a sync is needed as L1 expects all 11008c2ecf20Sopenharmony_ci * linear mappings (EPT is disabled so there are no combined or guest-physical 11018c2ecf20Sopenharmony_ci * mappings) to be invalidated on both VM-Enter and VM-Exit. 11028c2ecf20Sopenharmony_ci * 11038c2ecf20Sopenharmony_ci * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which 11048c2ecf20Sopenharmony_ci * additionally checks that L2 has been assigned a VPID (when EPT is disabled). 11058c2ecf20Sopenharmony_ci * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect 11068c2ecf20Sopenharmony_ci * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2 11078c2ecf20Sopenharmony_ci * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has 11088c2ecf20Sopenharmony_ci * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1 11098c2ecf20Sopenharmony_ci * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't 11108c2ecf20Sopenharmony_ci * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush 11118c2ecf20Sopenharmony_ci * stale TLB entries, at which point L0 will sync L2's MMU. 11128c2ecf20Sopenharmony_ci */ 11138c2ecf20Sopenharmony_cistatic bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu) 11148c2ecf20Sopenharmony_ci{ 11158c2ecf20Sopenharmony_ci return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu)); 11168c2ecf20Sopenharmony_ci} 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci/* 11198c2ecf20Sopenharmony_ci * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are 11208c2ecf20Sopenharmony_ci * emulating VM-Entry into a guest with EPT enabled. On failure, the expected 11218c2ecf20Sopenharmony_ci * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to 11228c2ecf20Sopenharmony_ci * @entry_failure_code. 11238c2ecf20Sopenharmony_ci */ 11248c2ecf20Sopenharmony_cistatic int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, 11258c2ecf20Sopenharmony_ci enum vm_entry_failure_code *entry_failure_code) 11268c2ecf20Sopenharmony_ci{ 11278c2ecf20Sopenharmony_ci if (CC(!nested_cr3_valid(vcpu, cr3))) { 11288c2ecf20Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 11298c2ecf20Sopenharmony_ci return -EINVAL; 11308c2ecf20Sopenharmony_ci } 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci /* 11338c2ecf20Sopenharmony_ci * If PAE paging and EPT are both on, CR3 is not used by the CPU and 11348c2ecf20Sopenharmony_ci * must not be dereferenced. 11358c2ecf20Sopenharmony_ci */ 11368c2ecf20Sopenharmony_ci if (!nested_ept && is_pae_paging(vcpu) && 11378c2ecf20Sopenharmony_ci (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) { 11388c2ecf20Sopenharmony_ci if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { 11398c2ecf20Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_PDPTE; 11408c2ecf20Sopenharmony_ci return -EINVAL; 11418c2ecf20Sopenharmony_ci } 11428c2ecf20Sopenharmony_ci } 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_ci /* 11458c2ecf20Sopenharmony_ci * Unconditionally skip the TLB flush on fast CR3 switch, all TLB 11468c2ecf20Sopenharmony_ci * flushes are handled by nested_vmx_transition_tlb_flush(). 11478c2ecf20Sopenharmony_ci */ 11488c2ecf20Sopenharmony_ci if (!nested_ept) { 11498c2ecf20Sopenharmony_ci kvm_mmu_new_pgd(vcpu, cr3, true, true); 11508c2ecf20Sopenharmony_ci 11518c2ecf20Sopenharmony_ci /* 11528c2ecf20Sopenharmony_ci * A TLB flush on VM-Enter/VM-Exit flushes all linear mappings 11538c2ecf20Sopenharmony_ci * across all PCIDs, i.e. all PGDs need to be synchronized. 11548c2ecf20Sopenharmony_ci * See nested_vmx_transition_mmu_sync() for more details. 11558c2ecf20Sopenharmony_ci */ 11568c2ecf20Sopenharmony_ci if (nested_vmx_transition_mmu_sync(vcpu)) 11578c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); 11588c2ecf20Sopenharmony_ci } 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci vcpu->arch.cr3 = cr3; 11618c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_ci kvm_init_mmu(vcpu, false); 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci return 0; 11668c2ecf20Sopenharmony_ci} 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci/* 11698c2ecf20Sopenharmony_ci * Returns if KVM is able to config CPU to tag TLB entries 11708c2ecf20Sopenharmony_ci * populated by L2 differently than TLB entries populated 11718c2ecf20Sopenharmony_ci * by L1. 11728c2ecf20Sopenharmony_ci * 11738c2ecf20Sopenharmony_ci * If L0 uses EPT, L1 and L2 run with different EPTP because 11748c2ecf20Sopenharmony_ci * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries 11758c2ecf20Sopenharmony_ci * are tagged with different EPTP. 11768c2ecf20Sopenharmony_ci * 11778c2ecf20Sopenharmony_ci * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged 11788c2ecf20Sopenharmony_ci * with different VPID (L1 entries are tagged with vmx->vpid 11798c2ecf20Sopenharmony_ci * while L2 entries are tagged with vmx->nested.vpid02). 11808c2ecf20Sopenharmony_ci */ 11818c2ecf20Sopenharmony_cistatic bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) 11828c2ecf20Sopenharmony_ci{ 11838c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 11848c2ecf20Sopenharmony_ci 11858c2ecf20Sopenharmony_ci return enable_ept || 11868c2ecf20Sopenharmony_ci (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); 11878c2ecf20Sopenharmony_ci} 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_cistatic void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, 11908c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12, 11918c2ecf20Sopenharmony_ci bool is_vmenter) 11928c2ecf20Sopenharmony_ci{ 11938c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_ci /* 11968c2ecf20Sopenharmony_ci * If VPID is disabled, linear and combined mappings are flushed on 11978c2ecf20Sopenharmony_ci * VM-Enter/VM-Exit, and guest-physical mappings are valid only for 11988c2ecf20Sopenharmony_ci * their associated EPTP. 11998c2ecf20Sopenharmony_ci */ 12008c2ecf20Sopenharmony_ci if (!enable_vpid) 12018c2ecf20Sopenharmony_ci return; 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci /* 12048c2ecf20Sopenharmony_ci * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings 12058c2ecf20Sopenharmony_ci * for *all* contexts to be flushed on VM-Enter/VM-Exit. 12068c2ecf20Sopenharmony_ci * 12078c2ecf20Sopenharmony_ci * If VPID is enabled and used by vmc12, but L2 does not have a unique 12088c2ecf20Sopenharmony_ci * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate 12098c2ecf20Sopenharmony_ci * a VPID for L2, flush the current context as the effective ASID is 12108c2ecf20Sopenharmony_ci * common to both L1 and L2. 12118c2ecf20Sopenharmony_ci * 12128c2ecf20Sopenharmony_ci * Defer the flush so that it runs after vmcs02.EPTP has been set by 12138c2ecf20Sopenharmony_ci * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid 12148c2ecf20Sopenharmony_ci * redundant flushes further down the nested pipeline. 12158c2ecf20Sopenharmony_ci * 12168c2ecf20Sopenharmony_ci * If a TLB flush isn't required due to any of the above, and vpid12 is 12178c2ecf20Sopenharmony_ci * changing then the new "virtual" VPID (vpid12) will reuse the same 12188c2ecf20Sopenharmony_ci * "real" VPID (vpid02), and so needs to be sync'd. There is no direct 12198c2ecf20Sopenharmony_ci * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for 12208c2ecf20Sopenharmony_ci * all nested vCPUs. 12218c2ecf20Sopenharmony_ci */ 12228c2ecf20Sopenharmony_ci if (!nested_cpu_has_vpid(vmcs12)) { 12238c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 12248c2ecf20Sopenharmony_ci } else if (!nested_has_guest_tlb_tag(vcpu)) { 12258c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 12268c2ecf20Sopenharmony_ci } else if (is_vmenter && 12278c2ecf20Sopenharmony_ci vmcs12->virtual_processor_id != vmx->nested.last_vpid) { 12288c2ecf20Sopenharmony_ci vmx->nested.last_vpid = vmcs12->virtual_processor_id; 12298c2ecf20Sopenharmony_ci vpid_sync_context(nested_get_vpid02(vcpu)); 12308c2ecf20Sopenharmony_ci } 12318c2ecf20Sopenharmony_ci} 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_cistatic bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) 12348c2ecf20Sopenharmony_ci{ 12358c2ecf20Sopenharmony_ci superset &= mask; 12368c2ecf20Sopenharmony_ci subset &= mask; 12378c2ecf20Sopenharmony_ci 12388c2ecf20Sopenharmony_ci return (superset | subset) == superset; 12398c2ecf20Sopenharmony_ci} 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_cistatic int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) 12428c2ecf20Sopenharmony_ci{ 12438c2ecf20Sopenharmony_ci const u64 feature_and_reserved = 12448c2ecf20Sopenharmony_ci /* feature (except bit 48; see below) */ 12458c2ecf20Sopenharmony_ci BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | 12468c2ecf20Sopenharmony_ci /* reserved */ 12478c2ecf20Sopenharmony_ci BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); 12488c2ecf20Sopenharmony_ci u64 vmx_basic = vmcs_config.nested.basic; 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) 12518c2ecf20Sopenharmony_ci return -EINVAL; 12528c2ecf20Sopenharmony_ci 12538c2ecf20Sopenharmony_ci /* 12548c2ecf20Sopenharmony_ci * KVM does not emulate a version of VMX that constrains physical 12558c2ecf20Sopenharmony_ci * addresses of VMX structures (e.g. VMCS) to 32-bits. 12568c2ecf20Sopenharmony_ci */ 12578c2ecf20Sopenharmony_ci if (data & BIT_ULL(48)) 12588c2ecf20Sopenharmony_ci return -EINVAL; 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci if (vmx_basic_vmcs_revision_id(vmx_basic) != 12618c2ecf20Sopenharmony_ci vmx_basic_vmcs_revision_id(data)) 12628c2ecf20Sopenharmony_ci return -EINVAL; 12638c2ecf20Sopenharmony_ci 12648c2ecf20Sopenharmony_ci if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) 12658c2ecf20Sopenharmony_ci return -EINVAL; 12668c2ecf20Sopenharmony_ci 12678c2ecf20Sopenharmony_ci vmx->nested.msrs.basic = data; 12688c2ecf20Sopenharmony_ci return 0; 12698c2ecf20Sopenharmony_ci} 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_cistatic void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index, 12728c2ecf20Sopenharmony_ci u32 **low, u32 **high) 12738c2ecf20Sopenharmony_ci{ 12748c2ecf20Sopenharmony_ci switch (msr_index) { 12758c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 12768c2ecf20Sopenharmony_ci *low = &msrs->pinbased_ctls_low; 12778c2ecf20Sopenharmony_ci *high = &msrs->pinbased_ctls_high; 12788c2ecf20Sopenharmony_ci break; 12798c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 12808c2ecf20Sopenharmony_ci *low = &msrs->procbased_ctls_low; 12818c2ecf20Sopenharmony_ci *high = &msrs->procbased_ctls_high; 12828c2ecf20Sopenharmony_ci break; 12838c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_EXIT_CTLS: 12848c2ecf20Sopenharmony_ci *low = &msrs->exit_ctls_low; 12858c2ecf20Sopenharmony_ci *high = &msrs->exit_ctls_high; 12868c2ecf20Sopenharmony_ci break; 12878c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 12888c2ecf20Sopenharmony_ci *low = &msrs->entry_ctls_low; 12898c2ecf20Sopenharmony_ci *high = &msrs->entry_ctls_high; 12908c2ecf20Sopenharmony_ci break; 12918c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS2: 12928c2ecf20Sopenharmony_ci *low = &msrs->secondary_ctls_low; 12938c2ecf20Sopenharmony_ci *high = &msrs->secondary_ctls_high; 12948c2ecf20Sopenharmony_ci break; 12958c2ecf20Sopenharmony_ci default: 12968c2ecf20Sopenharmony_ci BUG(); 12978c2ecf20Sopenharmony_ci } 12988c2ecf20Sopenharmony_ci} 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_cistatic int 13018c2ecf20Sopenharmony_civmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) 13028c2ecf20Sopenharmony_ci{ 13038c2ecf20Sopenharmony_ci u32 *lowp, *highp; 13048c2ecf20Sopenharmony_ci u64 supported; 13058c2ecf20Sopenharmony_ci 13068c2ecf20Sopenharmony_ci vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp); 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci supported = vmx_control_msr(*lowp, *highp); 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_ci /* Check must-be-1 bits are still 1. */ 13118c2ecf20Sopenharmony_ci if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0))) 13128c2ecf20Sopenharmony_ci return -EINVAL; 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci /* Check must-be-0 bits are still 0. */ 13158c2ecf20Sopenharmony_ci if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32))) 13168c2ecf20Sopenharmony_ci return -EINVAL; 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp); 13198c2ecf20Sopenharmony_ci *lowp = data; 13208c2ecf20Sopenharmony_ci *highp = data >> 32; 13218c2ecf20Sopenharmony_ci return 0; 13228c2ecf20Sopenharmony_ci} 13238c2ecf20Sopenharmony_ci 13248c2ecf20Sopenharmony_cistatic int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) 13258c2ecf20Sopenharmony_ci{ 13268c2ecf20Sopenharmony_ci const u64 feature_and_reserved_bits = 13278c2ecf20Sopenharmony_ci /* feature */ 13288c2ecf20Sopenharmony_ci BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | 13298c2ecf20Sopenharmony_ci BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | 13308c2ecf20Sopenharmony_ci /* reserved */ 13318c2ecf20Sopenharmony_ci GENMASK_ULL(13, 9) | BIT_ULL(31); 13328c2ecf20Sopenharmony_ci u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, 13338c2ecf20Sopenharmony_ci vmcs_config.nested.misc_high); 13348c2ecf20Sopenharmony_ci 13358c2ecf20Sopenharmony_ci if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) 13368c2ecf20Sopenharmony_ci return -EINVAL; 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci if ((vmx->nested.msrs.pinbased_ctls_high & 13398c2ecf20Sopenharmony_ci PIN_BASED_VMX_PREEMPTION_TIMER) && 13408c2ecf20Sopenharmony_ci vmx_misc_preemption_timer_rate(data) != 13418c2ecf20Sopenharmony_ci vmx_misc_preemption_timer_rate(vmx_misc)) 13428c2ecf20Sopenharmony_ci return -EINVAL; 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc)) 13458c2ecf20Sopenharmony_ci return -EINVAL; 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc)) 13488c2ecf20Sopenharmony_ci return -EINVAL; 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) 13518c2ecf20Sopenharmony_ci return -EINVAL; 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci vmx->nested.msrs.misc_low = data; 13548c2ecf20Sopenharmony_ci vmx->nested.msrs.misc_high = data >> 32; 13558c2ecf20Sopenharmony_ci 13568c2ecf20Sopenharmony_ci return 0; 13578c2ecf20Sopenharmony_ci} 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_cistatic int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) 13608c2ecf20Sopenharmony_ci{ 13618c2ecf20Sopenharmony_ci u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps, 13628c2ecf20Sopenharmony_ci vmcs_config.nested.vpid_caps); 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_ci /* Every bit is either reserved or a feature bit. */ 13658c2ecf20Sopenharmony_ci if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) 13668c2ecf20Sopenharmony_ci return -EINVAL; 13678c2ecf20Sopenharmony_ci 13688c2ecf20Sopenharmony_ci vmx->nested.msrs.ept_caps = data; 13698c2ecf20Sopenharmony_ci vmx->nested.msrs.vpid_caps = data >> 32; 13708c2ecf20Sopenharmony_ci return 0; 13718c2ecf20Sopenharmony_ci} 13728c2ecf20Sopenharmony_ci 13738c2ecf20Sopenharmony_cistatic u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index) 13748c2ecf20Sopenharmony_ci{ 13758c2ecf20Sopenharmony_ci switch (msr_index) { 13768c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED0: 13778c2ecf20Sopenharmony_ci return &msrs->cr0_fixed0; 13788c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED0: 13798c2ecf20Sopenharmony_ci return &msrs->cr4_fixed0; 13808c2ecf20Sopenharmony_ci default: 13818c2ecf20Sopenharmony_ci BUG(); 13828c2ecf20Sopenharmony_ci } 13838c2ecf20Sopenharmony_ci} 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_cistatic int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) 13868c2ecf20Sopenharmony_ci{ 13878c2ecf20Sopenharmony_ci const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index); 13888c2ecf20Sopenharmony_ci 13898c2ecf20Sopenharmony_ci /* 13908c2ecf20Sopenharmony_ci * 1 bits (which indicates bits which "must-be-1" during VMX operation) 13918c2ecf20Sopenharmony_ci * must be 1 in the restored value. 13928c2ecf20Sopenharmony_ci */ 13938c2ecf20Sopenharmony_ci if (!is_bitwise_subset(data, *msr, -1ULL)) 13948c2ecf20Sopenharmony_ci return -EINVAL; 13958c2ecf20Sopenharmony_ci 13968c2ecf20Sopenharmony_ci *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data; 13978c2ecf20Sopenharmony_ci return 0; 13988c2ecf20Sopenharmony_ci} 13998c2ecf20Sopenharmony_ci 14008c2ecf20Sopenharmony_ci/* 14018c2ecf20Sopenharmony_ci * Called when userspace is restoring VMX MSRs. 14028c2ecf20Sopenharmony_ci * 14038c2ecf20Sopenharmony_ci * Returns 0 on success, non-0 otherwise. 14048c2ecf20Sopenharmony_ci */ 14058c2ecf20Sopenharmony_ciint vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 14068c2ecf20Sopenharmony_ci{ 14078c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci /* 14108c2ecf20Sopenharmony_ci * Don't allow changes to the VMX capability MSRs while the vCPU 14118c2ecf20Sopenharmony_ci * is in VMX operation. 14128c2ecf20Sopenharmony_ci */ 14138c2ecf20Sopenharmony_ci if (vmx->nested.vmxon) 14148c2ecf20Sopenharmony_ci return -EBUSY; 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci switch (msr_index) { 14178c2ecf20Sopenharmony_ci case MSR_IA32_VMX_BASIC: 14188c2ecf20Sopenharmony_ci return vmx_restore_vmx_basic(vmx, data); 14198c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PINBASED_CTLS: 14208c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS: 14218c2ecf20Sopenharmony_ci case MSR_IA32_VMX_EXIT_CTLS: 14228c2ecf20Sopenharmony_ci case MSR_IA32_VMX_ENTRY_CTLS: 14238c2ecf20Sopenharmony_ci /* 14248c2ecf20Sopenharmony_ci * The "non-true" VMX capability MSRs are generated from the 14258c2ecf20Sopenharmony_ci * "true" MSRs, so we do not support restoring them directly. 14268c2ecf20Sopenharmony_ci * 14278c2ecf20Sopenharmony_ci * If userspace wants to emulate VMX_BASIC[55]=0, userspace 14288c2ecf20Sopenharmony_ci * should restore the "true" MSRs with the must-be-1 bits 14298c2ecf20Sopenharmony_ci * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND 14308c2ecf20Sopenharmony_ci * DEFAULT SETTINGS". 14318c2ecf20Sopenharmony_ci */ 14328c2ecf20Sopenharmony_ci return -EINVAL; 14338c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 14348c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 14358c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_EXIT_CTLS: 14368c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 14378c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS2: 14388c2ecf20Sopenharmony_ci return vmx_restore_control_msr(vmx, msr_index, data); 14398c2ecf20Sopenharmony_ci case MSR_IA32_VMX_MISC: 14408c2ecf20Sopenharmony_ci return vmx_restore_vmx_misc(vmx, data); 14418c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED0: 14428c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED0: 14438c2ecf20Sopenharmony_ci return vmx_restore_fixed0_msr(vmx, msr_index, data); 14448c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED1: 14458c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED1: 14468c2ecf20Sopenharmony_ci /* 14478c2ecf20Sopenharmony_ci * These MSRs are generated based on the vCPU's CPUID, so we 14488c2ecf20Sopenharmony_ci * do not support restoring them directly. 14498c2ecf20Sopenharmony_ci */ 14508c2ecf20Sopenharmony_ci return -EINVAL; 14518c2ecf20Sopenharmony_ci case MSR_IA32_VMX_EPT_VPID_CAP: 14528c2ecf20Sopenharmony_ci return vmx_restore_vmx_ept_vpid_cap(vmx, data); 14538c2ecf20Sopenharmony_ci case MSR_IA32_VMX_VMCS_ENUM: 14548c2ecf20Sopenharmony_ci vmx->nested.msrs.vmcs_enum = data; 14558c2ecf20Sopenharmony_ci return 0; 14568c2ecf20Sopenharmony_ci case MSR_IA32_VMX_VMFUNC: 14578c2ecf20Sopenharmony_ci if (data & ~vmcs_config.nested.vmfunc_controls) 14588c2ecf20Sopenharmony_ci return -EINVAL; 14598c2ecf20Sopenharmony_ci vmx->nested.msrs.vmfunc_controls = data; 14608c2ecf20Sopenharmony_ci return 0; 14618c2ecf20Sopenharmony_ci default: 14628c2ecf20Sopenharmony_ci /* 14638c2ecf20Sopenharmony_ci * The rest of the VMX capability MSRs do not support restore. 14648c2ecf20Sopenharmony_ci */ 14658c2ecf20Sopenharmony_ci return -EINVAL; 14668c2ecf20Sopenharmony_ci } 14678c2ecf20Sopenharmony_ci} 14688c2ecf20Sopenharmony_ci 14698c2ecf20Sopenharmony_ci/* Returns 0 on success, non-0 otherwise. */ 14708c2ecf20Sopenharmony_ciint vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) 14718c2ecf20Sopenharmony_ci{ 14728c2ecf20Sopenharmony_ci switch (msr_index) { 14738c2ecf20Sopenharmony_ci case MSR_IA32_VMX_BASIC: 14748c2ecf20Sopenharmony_ci *pdata = msrs->basic; 14758c2ecf20Sopenharmony_ci break; 14768c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 14778c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PINBASED_CTLS: 14788c2ecf20Sopenharmony_ci *pdata = vmx_control_msr( 14798c2ecf20Sopenharmony_ci msrs->pinbased_ctls_low, 14808c2ecf20Sopenharmony_ci msrs->pinbased_ctls_high); 14818c2ecf20Sopenharmony_ci if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) 14828c2ecf20Sopenharmony_ci *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 14838c2ecf20Sopenharmony_ci break; 14848c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 14858c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS: 14868c2ecf20Sopenharmony_ci *pdata = vmx_control_msr( 14878c2ecf20Sopenharmony_ci msrs->procbased_ctls_low, 14888c2ecf20Sopenharmony_ci msrs->procbased_ctls_high); 14898c2ecf20Sopenharmony_ci if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) 14908c2ecf20Sopenharmony_ci *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 14918c2ecf20Sopenharmony_ci break; 14928c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_EXIT_CTLS: 14938c2ecf20Sopenharmony_ci case MSR_IA32_VMX_EXIT_CTLS: 14948c2ecf20Sopenharmony_ci *pdata = vmx_control_msr( 14958c2ecf20Sopenharmony_ci msrs->exit_ctls_low, 14968c2ecf20Sopenharmony_ci msrs->exit_ctls_high); 14978c2ecf20Sopenharmony_ci if (msr_index == MSR_IA32_VMX_EXIT_CTLS) 14988c2ecf20Sopenharmony_ci *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 14998c2ecf20Sopenharmony_ci break; 15008c2ecf20Sopenharmony_ci case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 15018c2ecf20Sopenharmony_ci case MSR_IA32_VMX_ENTRY_CTLS: 15028c2ecf20Sopenharmony_ci *pdata = vmx_control_msr( 15038c2ecf20Sopenharmony_ci msrs->entry_ctls_low, 15048c2ecf20Sopenharmony_ci msrs->entry_ctls_high); 15058c2ecf20Sopenharmony_ci if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) 15068c2ecf20Sopenharmony_ci *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; 15078c2ecf20Sopenharmony_ci break; 15088c2ecf20Sopenharmony_ci case MSR_IA32_VMX_MISC: 15098c2ecf20Sopenharmony_ci *pdata = vmx_control_msr( 15108c2ecf20Sopenharmony_ci msrs->misc_low, 15118c2ecf20Sopenharmony_ci msrs->misc_high); 15128c2ecf20Sopenharmony_ci break; 15138c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED0: 15148c2ecf20Sopenharmony_ci *pdata = msrs->cr0_fixed0; 15158c2ecf20Sopenharmony_ci break; 15168c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED1: 15178c2ecf20Sopenharmony_ci *pdata = msrs->cr0_fixed1; 15188c2ecf20Sopenharmony_ci break; 15198c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED0: 15208c2ecf20Sopenharmony_ci *pdata = msrs->cr4_fixed0; 15218c2ecf20Sopenharmony_ci break; 15228c2ecf20Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED1: 15238c2ecf20Sopenharmony_ci *pdata = msrs->cr4_fixed1; 15248c2ecf20Sopenharmony_ci break; 15258c2ecf20Sopenharmony_ci case MSR_IA32_VMX_VMCS_ENUM: 15268c2ecf20Sopenharmony_ci *pdata = msrs->vmcs_enum; 15278c2ecf20Sopenharmony_ci break; 15288c2ecf20Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS2: 15298c2ecf20Sopenharmony_ci *pdata = vmx_control_msr( 15308c2ecf20Sopenharmony_ci msrs->secondary_ctls_low, 15318c2ecf20Sopenharmony_ci msrs->secondary_ctls_high); 15328c2ecf20Sopenharmony_ci break; 15338c2ecf20Sopenharmony_ci case MSR_IA32_VMX_EPT_VPID_CAP: 15348c2ecf20Sopenharmony_ci *pdata = msrs->ept_caps | 15358c2ecf20Sopenharmony_ci ((u64)msrs->vpid_caps << 32); 15368c2ecf20Sopenharmony_ci break; 15378c2ecf20Sopenharmony_ci case MSR_IA32_VMX_VMFUNC: 15388c2ecf20Sopenharmony_ci *pdata = msrs->vmfunc_controls; 15398c2ecf20Sopenharmony_ci break; 15408c2ecf20Sopenharmony_ci default: 15418c2ecf20Sopenharmony_ci return 1; 15428c2ecf20Sopenharmony_ci } 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci return 0; 15458c2ecf20Sopenharmony_ci} 15468c2ecf20Sopenharmony_ci 15478c2ecf20Sopenharmony_ci/* 15488c2ecf20Sopenharmony_ci * Copy the writable VMCS shadow fields back to the VMCS12, in case they have 15498c2ecf20Sopenharmony_ci * been modified by the L1 guest. Note, "writable" in this context means 15508c2ecf20Sopenharmony_ci * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of 15518c2ecf20Sopenharmony_ci * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only" 15528c2ecf20Sopenharmony_ci * VM-exit information fields (which are actually writable if the vCPU is 15538c2ecf20Sopenharmony_ci * configured to support "VMWRITE to any supported field in the VMCS"). 15548c2ecf20Sopenharmony_ci */ 15558c2ecf20Sopenharmony_cistatic void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) 15568c2ecf20Sopenharmony_ci{ 15578c2ecf20Sopenharmony_ci struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; 15588c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); 15598c2ecf20Sopenharmony_ci struct shadow_vmcs_field field; 15608c2ecf20Sopenharmony_ci unsigned long val; 15618c2ecf20Sopenharmony_ci int i; 15628c2ecf20Sopenharmony_ci 15638c2ecf20Sopenharmony_ci if (WARN_ON(!shadow_vmcs)) 15648c2ecf20Sopenharmony_ci return; 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci preempt_disable(); 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci vmcs_load(shadow_vmcs); 15698c2ecf20Sopenharmony_ci 15708c2ecf20Sopenharmony_ci for (i = 0; i < max_shadow_read_write_fields; i++) { 15718c2ecf20Sopenharmony_ci field = shadow_read_write_fields[i]; 15728c2ecf20Sopenharmony_ci val = __vmcs_readl(field.encoding); 15738c2ecf20Sopenharmony_ci vmcs12_write_any(vmcs12, field.encoding, field.offset, val); 15748c2ecf20Sopenharmony_ci } 15758c2ecf20Sopenharmony_ci 15768c2ecf20Sopenharmony_ci vmcs_clear(shadow_vmcs); 15778c2ecf20Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 15788c2ecf20Sopenharmony_ci 15798c2ecf20Sopenharmony_ci preempt_enable(); 15808c2ecf20Sopenharmony_ci} 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_cistatic void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) 15838c2ecf20Sopenharmony_ci{ 15848c2ecf20Sopenharmony_ci const struct shadow_vmcs_field *fields[] = { 15858c2ecf20Sopenharmony_ci shadow_read_write_fields, 15868c2ecf20Sopenharmony_ci shadow_read_only_fields 15878c2ecf20Sopenharmony_ci }; 15888c2ecf20Sopenharmony_ci const int max_fields[] = { 15898c2ecf20Sopenharmony_ci max_shadow_read_write_fields, 15908c2ecf20Sopenharmony_ci max_shadow_read_only_fields 15918c2ecf20Sopenharmony_ci }; 15928c2ecf20Sopenharmony_ci struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; 15938c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); 15948c2ecf20Sopenharmony_ci struct shadow_vmcs_field field; 15958c2ecf20Sopenharmony_ci unsigned long val; 15968c2ecf20Sopenharmony_ci int i, q; 15978c2ecf20Sopenharmony_ci 15988c2ecf20Sopenharmony_ci if (WARN_ON(!shadow_vmcs)) 15998c2ecf20Sopenharmony_ci return; 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci vmcs_load(shadow_vmcs); 16028c2ecf20Sopenharmony_ci 16038c2ecf20Sopenharmony_ci for (q = 0; q < ARRAY_SIZE(fields); q++) { 16048c2ecf20Sopenharmony_ci for (i = 0; i < max_fields[q]; i++) { 16058c2ecf20Sopenharmony_ci field = fields[q][i]; 16068c2ecf20Sopenharmony_ci val = vmcs12_read_any(vmcs12, field.encoding, 16078c2ecf20Sopenharmony_ci field.offset); 16088c2ecf20Sopenharmony_ci __vmcs_writel(field.encoding, val); 16098c2ecf20Sopenharmony_ci } 16108c2ecf20Sopenharmony_ci } 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci vmcs_clear(shadow_vmcs); 16138c2ecf20Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 16148c2ecf20Sopenharmony_ci} 16158c2ecf20Sopenharmony_ci 16168c2ecf20Sopenharmony_cistatic int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) 16178c2ecf20Sopenharmony_ci{ 16188c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; 16198c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_ci /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ 16228c2ecf20Sopenharmony_ci vmcs12->tpr_threshold = evmcs->tpr_threshold; 16238c2ecf20Sopenharmony_ci vmcs12->guest_rip = evmcs->guest_rip; 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16268c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) { 16278c2ecf20Sopenharmony_ci vmcs12->guest_rsp = evmcs->guest_rsp; 16288c2ecf20Sopenharmony_ci vmcs12->guest_rflags = evmcs->guest_rflags; 16298c2ecf20Sopenharmony_ci vmcs12->guest_interruptibility_info = 16308c2ecf20Sopenharmony_ci evmcs->guest_interruptibility_info; 16318c2ecf20Sopenharmony_ci } 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16348c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) { 16358c2ecf20Sopenharmony_ci vmcs12->cpu_based_vm_exec_control = 16368c2ecf20Sopenharmony_ci evmcs->cpu_based_vm_exec_control; 16378c2ecf20Sopenharmony_ci } 16388c2ecf20Sopenharmony_ci 16398c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16408c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) { 16418c2ecf20Sopenharmony_ci vmcs12->exception_bitmap = evmcs->exception_bitmap; 16428c2ecf20Sopenharmony_ci } 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16458c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) { 16468c2ecf20Sopenharmony_ci vmcs12->vm_entry_controls = evmcs->vm_entry_controls; 16478c2ecf20Sopenharmony_ci } 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16508c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) { 16518c2ecf20Sopenharmony_ci vmcs12->vm_entry_intr_info_field = 16528c2ecf20Sopenharmony_ci evmcs->vm_entry_intr_info_field; 16538c2ecf20Sopenharmony_ci vmcs12->vm_entry_exception_error_code = 16548c2ecf20Sopenharmony_ci evmcs->vm_entry_exception_error_code; 16558c2ecf20Sopenharmony_ci vmcs12->vm_entry_instruction_len = 16568c2ecf20Sopenharmony_ci evmcs->vm_entry_instruction_len; 16578c2ecf20Sopenharmony_ci } 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16608c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) { 16618c2ecf20Sopenharmony_ci vmcs12->host_ia32_pat = evmcs->host_ia32_pat; 16628c2ecf20Sopenharmony_ci vmcs12->host_ia32_efer = evmcs->host_ia32_efer; 16638c2ecf20Sopenharmony_ci vmcs12->host_cr0 = evmcs->host_cr0; 16648c2ecf20Sopenharmony_ci vmcs12->host_cr3 = evmcs->host_cr3; 16658c2ecf20Sopenharmony_ci vmcs12->host_cr4 = evmcs->host_cr4; 16668c2ecf20Sopenharmony_ci vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp; 16678c2ecf20Sopenharmony_ci vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip; 16688c2ecf20Sopenharmony_ci vmcs12->host_rip = evmcs->host_rip; 16698c2ecf20Sopenharmony_ci vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs; 16708c2ecf20Sopenharmony_ci vmcs12->host_es_selector = evmcs->host_es_selector; 16718c2ecf20Sopenharmony_ci vmcs12->host_cs_selector = evmcs->host_cs_selector; 16728c2ecf20Sopenharmony_ci vmcs12->host_ss_selector = evmcs->host_ss_selector; 16738c2ecf20Sopenharmony_ci vmcs12->host_ds_selector = evmcs->host_ds_selector; 16748c2ecf20Sopenharmony_ci vmcs12->host_fs_selector = evmcs->host_fs_selector; 16758c2ecf20Sopenharmony_ci vmcs12->host_gs_selector = evmcs->host_gs_selector; 16768c2ecf20Sopenharmony_ci vmcs12->host_tr_selector = evmcs->host_tr_selector; 16778c2ecf20Sopenharmony_ci } 16788c2ecf20Sopenharmony_ci 16798c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16808c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) { 16818c2ecf20Sopenharmony_ci vmcs12->pin_based_vm_exec_control = 16828c2ecf20Sopenharmony_ci evmcs->pin_based_vm_exec_control; 16838c2ecf20Sopenharmony_ci vmcs12->vm_exit_controls = evmcs->vm_exit_controls; 16848c2ecf20Sopenharmony_ci vmcs12->secondary_vm_exec_control = 16858c2ecf20Sopenharmony_ci evmcs->secondary_vm_exec_control; 16868c2ecf20Sopenharmony_ci } 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16898c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) { 16908c2ecf20Sopenharmony_ci vmcs12->io_bitmap_a = evmcs->io_bitmap_a; 16918c2ecf20Sopenharmony_ci vmcs12->io_bitmap_b = evmcs->io_bitmap_b; 16928c2ecf20Sopenharmony_ci } 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 16958c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) { 16968c2ecf20Sopenharmony_ci vmcs12->msr_bitmap = evmcs->msr_bitmap; 16978c2ecf20Sopenharmony_ci } 16988c2ecf20Sopenharmony_ci 16998c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 17008c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) { 17018c2ecf20Sopenharmony_ci vmcs12->guest_es_base = evmcs->guest_es_base; 17028c2ecf20Sopenharmony_ci vmcs12->guest_cs_base = evmcs->guest_cs_base; 17038c2ecf20Sopenharmony_ci vmcs12->guest_ss_base = evmcs->guest_ss_base; 17048c2ecf20Sopenharmony_ci vmcs12->guest_ds_base = evmcs->guest_ds_base; 17058c2ecf20Sopenharmony_ci vmcs12->guest_fs_base = evmcs->guest_fs_base; 17068c2ecf20Sopenharmony_ci vmcs12->guest_gs_base = evmcs->guest_gs_base; 17078c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base; 17088c2ecf20Sopenharmony_ci vmcs12->guest_tr_base = evmcs->guest_tr_base; 17098c2ecf20Sopenharmony_ci vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base; 17108c2ecf20Sopenharmony_ci vmcs12->guest_idtr_base = evmcs->guest_idtr_base; 17118c2ecf20Sopenharmony_ci vmcs12->guest_es_limit = evmcs->guest_es_limit; 17128c2ecf20Sopenharmony_ci vmcs12->guest_cs_limit = evmcs->guest_cs_limit; 17138c2ecf20Sopenharmony_ci vmcs12->guest_ss_limit = evmcs->guest_ss_limit; 17148c2ecf20Sopenharmony_ci vmcs12->guest_ds_limit = evmcs->guest_ds_limit; 17158c2ecf20Sopenharmony_ci vmcs12->guest_fs_limit = evmcs->guest_fs_limit; 17168c2ecf20Sopenharmony_ci vmcs12->guest_gs_limit = evmcs->guest_gs_limit; 17178c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit; 17188c2ecf20Sopenharmony_ci vmcs12->guest_tr_limit = evmcs->guest_tr_limit; 17198c2ecf20Sopenharmony_ci vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit; 17208c2ecf20Sopenharmony_ci vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit; 17218c2ecf20Sopenharmony_ci vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes; 17228c2ecf20Sopenharmony_ci vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes; 17238c2ecf20Sopenharmony_ci vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes; 17248c2ecf20Sopenharmony_ci vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes; 17258c2ecf20Sopenharmony_ci vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes; 17268c2ecf20Sopenharmony_ci vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes; 17278c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes; 17288c2ecf20Sopenharmony_ci vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes; 17298c2ecf20Sopenharmony_ci vmcs12->guest_es_selector = evmcs->guest_es_selector; 17308c2ecf20Sopenharmony_ci vmcs12->guest_cs_selector = evmcs->guest_cs_selector; 17318c2ecf20Sopenharmony_ci vmcs12->guest_ss_selector = evmcs->guest_ss_selector; 17328c2ecf20Sopenharmony_ci vmcs12->guest_ds_selector = evmcs->guest_ds_selector; 17338c2ecf20Sopenharmony_ci vmcs12->guest_fs_selector = evmcs->guest_fs_selector; 17348c2ecf20Sopenharmony_ci vmcs12->guest_gs_selector = evmcs->guest_gs_selector; 17358c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector; 17368c2ecf20Sopenharmony_ci vmcs12->guest_tr_selector = evmcs->guest_tr_selector; 17378c2ecf20Sopenharmony_ci } 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 17408c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) { 17418c2ecf20Sopenharmony_ci vmcs12->tsc_offset = evmcs->tsc_offset; 17428c2ecf20Sopenharmony_ci vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr; 17438c2ecf20Sopenharmony_ci vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap; 17448c2ecf20Sopenharmony_ci } 17458c2ecf20Sopenharmony_ci 17468c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 17478c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) { 17488c2ecf20Sopenharmony_ci vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask; 17498c2ecf20Sopenharmony_ci vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask; 17508c2ecf20Sopenharmony_ci vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow; 17518c2ecf20Sopenharmony_ci vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow; 17528c2ecf20Sopenharmony_ci vmcs12->guest_cr0 = evmcs->guest_cr0; 17538c2ecf20Sopenharmony_ci vmcs12->guest_cr3 = evmcs->guest_cr3; 17548c2ecf20Sopenharmony_ci vmcs12->guest_cr4 = evmcs->guest_cr4; 17558c2ecf20Sopenharmony_ci vmcs12->guest_dr7 = evmcs->guest_dr7; 17568c2ecf20Sopenharmony_ci } 17578c2ecf20Sopenharmony_ci 17588c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 17598c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) { 17608c2ecf20Sopenharmony_ci vmcs12->host_fs_base = evmcs->host_fs_base; 17618c2ecf20Sopenharmony_ci vmcs12->host_gs_base = evmcs->host_gs_base; 17628c2ecf20Sopenharmony_ci vmcs12->host_tr_base = evmcs->host_tr_base; 17638c2ecf20Sopenharmony_ci vmcs12->host_gdtr_base = evmcs->host_gdtr_base; 17648c2ecf20Sopenharmony_ci vmcs12->host_idtr_base = evmcs->host_idtr_base; 17658c2ecf20Sopenharmony_ci vmcs12->host_rsp = evmcs->host_rsp; 17668c2ecf20Sopenharmony_ci } 17678c2ecf20Sopenharmony_ci 17688c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 17698c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) { 17708c2ecf20Sopenharmony_ci vmcs12->ept_pointer = evmcs->ept_pointer; 17718c2ecf20Sopenharmony_ci vmcs12->virtual_processor_id = evmcs->virtual_processor_id; 17728c2ecf20Sopenharmony_ci } 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci if (unlikely(!(evmcs->hv_clean_fields & 17758c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) { 17768c2ecf20Sopenharmony_ci vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer; 17778c2ecf20Sopenharmony_ci vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl; 17788c2ecf20Sopenharmony_ci vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat; 17798c2ecf20Sopenharmony_ci vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer; 17808c2ecf20Sopenharmony_ci vmcs12->guest_pdptr0 = evmcs->guest_pdptr0; 17818c2ecf20Sopenharmony_ci vmcs12->guest_pdptr1 = evmcs->guest_pdptr1; 17828c2ecf20Sopenharmony_ci vmcs12->guest_pdptr2 = evmcs->guest_pdptr2; 17838c2ecf20Sopenharmony_ci vmcs12->guest_pdptr3 = evmcs->guest_pdptr3; 17848c2ecf20Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions = 17858c2ecf20Sopenharmony_ci evmcs->guest_pending_dbg_exceptions; 17868c2ecf20Sopenharmony_ci vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp; 17878c2ecf20Sopenharmony_ci vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip; 17888c2ecf20Sopenharmony_ci vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs; 17898c2ecf20Sopenharmony_ci vmcs12->guest_activity_state = evmcs->guest_activity_state; 17908c2ecf20Sopenharmony_ci vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs; 17918c2ecf20Sopenharmony_ci } 17928c2ecf20Sopenharmony_ci 17938c2ecf20Sopenharmony_ci /* 17948c2ecf20Sopenharmony_ci * Not used? 17958c2ecf20Sopenharmony_ci * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr; 17968c2ecf20Sopenharmony_ci * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr; 17978c2ecf20Sopenharmony_ci * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr; 17988c2ecf20Sopenharmony_ci * vmcs12->page_fault_error_code_mask = 17998c2ecf20Sopenharmony_ci * evmcs->page_fault_error_code_mask; 18008c2ecf20Sopenharmony_ci * vmcs12->page_fault_error_code_match = 18018c2ecf20Sopenharmony_ci * evmcs->page_fault_error_code_match; 18028c2ecf20Sopenharmony_ci * vmcs12->cr3_target_count = evmcs->cr3_target_count; 18038c2ecf20Sopenharmony_ci * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count; 18048c2ecf20Sopenharmony_ci * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count; 18058c2ecf20Sopenharmony_ci * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count; 18068c2ecf20Sopenharmony_ci */ 18078c2ecf20Sopenharmony_ci 18088c2ecf20Sopenharmony_ci /* 18098c2ecf20Sopenharmony_ci * Read only fields: 18108c2ecf20Sopenharmony_ci * vmcs12->guest_physical_address = evmcs->guest_physical_address; 18118c2ecf20Sopenharmony_ci * vmcs12->vm_instruction_error = evmcs->vm_instruction_error; 18128c2ecf20Sopenharmony_ci * vmcs12->vm_exit_reason = evmcs->vm_exit_reason; 18138c2ecf20Sopenharmony_ci * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info; 18148c2ecf20Sopenharmony_ci * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code; 18158c2ecf20Sopenharmony_ci * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field; 18168c2ecf20Sopenharmony_ci * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code; 18178c2ecf20Sopenharmony_ci * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len; 18188c2ecf20Sopenharmony_ci * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info; 18198c2ecf20Sopenharmony_ci * vmcs12->exit_qualification = evmcs->exit_qualification; 18208c2ecf20Sopenharmony_ci * vmcs12->guest_linear_address = evmcs->guest_linear_address; 18218c2ecf20Sopenharmony_ci * 18228c2ecf20Sopenharmony_ci * Not present in struct vmcs12: 18238c2ecf20Sopenharmony_ci * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx; 18248c2ecf20Sopenharmony_ci * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi; 18258c2ecf20Sopenharmony_ci * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi; 18268c2ecf20Sopenharmony_ci * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip; 18278c2ecf20Sopenharmony_ci */ 18288c2ecf20Sopenharmony_ci 18298c2ecf20Sopenharmony_ci return 0; 18308c2ecf20Sopenharmony_ci} 18318c2ecf20Sopenharmony_ci 18328c2ecf20Sopenharmony_cistatic int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) 18338c2ecf20Sopenharmony_ci{ 18348c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; 18358c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; 18368c2ecf20Sopenharmony_ci 18378c2ecf20Sopenharmony_ci /* 18388c2ecf20Sopenharmony_ci * Should not be changed by KVM: 18398c2ecf20Sopenharmony_ci * 18408c2ecf20Sopenharmony_ci * evmcs->host_es_selector = vmcs12->host_es_selector; 18418c2ecf20Sopenharmony_ci * evmcs->host_cs_selector = vmcs12->host_cs_selector; 18428c2ecf20Sopenharmony_ci * evmcs->host_ss_selector = vmcs12->host_ss_selector; 18438c2ecf20Sopenharmony_ci * evmcs->host_ds_selector = vmcs12->host_ds_selector; 18448c2ecf20Sopenharmony_ci * evmcs->host_fs_selector = vmcs12->host_fs_selector; 18458c2ecf20Sopenharmony_ci * evmcs->host_gs_selector = vmcs12->host_gs_selector; 18468c2ecf20Sopenharmony_ci * evmcs->host_tr_selector = vmcs12->host_tr_selector; 18478c2ecf20Sopenharmony_ci * evmcs->host_ia32_pat = vmcs12->host_ia32_pat; 18488c2ecf20Sopenharmony_ci * evmcs->host_ia32_efer = vmcs12->host_ia32_efer; 18498c2ecf20Sopenharmony_ci * evmcs->host_cr0 = vmcs12->host_cr0; 18508c2ecf20Sopenharmony_ci * evmcs->host_cr3 = vmcs12->host_cr3; 18518c2ecf20Sopenharmony_ci * evmcs->host_cr4 = vmcs12->host_cr4; 18528c2ecf20Sopenharmony_ci * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp; 18538c2ecf20Sopenharmony_ci * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip; 18548c2ecf20Sopenharmony_ci * evmcs->host_rip = vmcs12->host_rip; 18558c2ecf20Sopenharmony_ci * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs; 18568c2ecf20Sopenharmony_ci * evmcs->host_fs_base = vmcs12->host_fs_base; 18578c2ecf20Sopenharmony_ci * evmcs->host_gs_base = vmcs12->host_gs_base; 18588c2ecf20Sopenharmony_ci * evmcs->host_tr_base = vmcs12->host_tr_base; 18598c2ecf20Sopenharmony_ci * evmcs->host_gdtr_base = vmcs12->host_gdtr_base; 18608c2ecf20Sopenharmony_ci * evmcs->host_idtr_base = vmcs12->host_idtr_base; 18618c2ecf20Sopenharmony_ci * evmcs->host_rsp = vmcs12->host_rsp; 18628c2ecf20Sopenharmony_ci * sync_vmcs02_to_vmcs12() doesn't read these: 18638c2ecf20Sopenharmony_ci * evmcs->io_bitmap_a = vmcs12->io_bitmap_a; 18648c2ecf20Sopenharmony_ci * evmcs->io_bitmap_b = vmcs12->io_bitmap_b; 18658c2ecf20Sopenharmony_ci * evmcs->msr_bitmap = vmcs12->msr_bitmap; 18668c2ecf20Sopenharmony_ci * evmcs->ept_pointer = vmcs12->ept_pointer; 18678c2ecf20Sopenharmony_ci * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap; 18688c2ecf20Sopenharmony_ci * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr; 18698c2ecf20Sopenharmony_ci * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr; 18708c2ecf20Sopenharmony_ci * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr; 18718c2ecf20Sopenharmony_ci * evmcs->tpr_threshold = vmcs12->tpr_threshold; 18728c2ecf20Sopenharmony_ci * evmcs->virtual_processor_id = vmcs12->virtual_processor_id; 18738c2ecf20Sopenharmony_ci * evmcs->exception_bitmap = vmcs12->exception_bitmap; 18748c2ecf20Sopenharmony_ci * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer; 18758c2ecf20Sopenharmony_ci * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control; 18768c2ecf20Sopenharmony_ci * evmcs->vm_exit_controls = vmcs12->vm_exit_controls; 18778c2ecf20Sopenharmony_ci * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control; 18788c2ecf20Sopenharmony_ci * evmcs->page_fault_error_code_mask = 18798c2ecf20Sopenharmony_ci * vmcs12->page_fault_error_code_mask; 18808c2ecf20Sopenharmony_ci * evmcs->page_fault_error_code_match = 18818c2ecf20Sopenharmony_ci * vmcs12->page_fault_error_code_match; 18828c2ecf20Sopenharmony_ci * evmcs->cr3_target_count = vmcs12->cr3_target_count; 18838c2ecf20Sopenharmony_ci * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr; 18848c2ecf20Sopenharmony_ci * evmcs->tsc_offset = vmcs12->tsc_offset; 18858c2ecf20Sopenharmony_ci * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl; 18868c2ecf20Sopenharmony_ci * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask; 18878c2ecf20Sopenharmony_ci * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask; 18888c2ecf20Sopenharmony_ci * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow; 18898c2ecf20Sopenharmony_ci * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow; 18908c2ecf20Sopenharmony_ci * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count; 18918c2ecf20Sopenharmony_ci * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count; 18928c2ecf20Sopenharmony_ci * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count; 18938c2ecf20Sopenharmony_ci * 18948c2ecf20Sopenharmony_ci * Not present in struct vmcs12: 18958c2ecf20Sopenharmony_ci * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx; 18968c2ecf20Sopenharmony_ci * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi; 18978c2ecf20Sopenharmony_ci * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi; 18988c2ecf20Sopenharmony_ci * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip; 18998c2ecf20Sopenharmony_ci */ 19008c2ecf20Sopenharmony_ci 19018c2ecf20Sopenharmony_ci evmcs->guest_es_selector = vmcs12->guest_es_selector; 19028c2ecf20Sopenharmony_ci evmcs->guest_cs_selector = vmcs12->guest_cs_selector; 19038c2ecf20Sopenharmony_ci evmcs->guest_ss_selector = vmcs12->guest_ss_selector; 19048c2ecf20Sopenharmony_ci evmcs->guest_ds_selector = vmcs12->guest_ds_selector; 19058c2ecf20Sopenharmony_ci evmcs->guest_fs_selector = vmcs12->guest_fs_selector; 19068c2ecf20Sopenharmony_ci evmcs->guest_gs_selector = vmcs12->guest_gs_selector; 19078c2ecf20Sopenharmony_ci evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector; 19088c2ecf20Sopenharmony_ci evmcs->guest_tr_selector = vmcs12->guest_tr_selector; 19098c2ecf20Sopenharmony_ci 19108c2ecf20Sopenharmony_ci evmcs->guest_es_limit = vmcs12->guest_es_limit; 19118c2ecf20Sopenharmony_ci evmcs->guest_cs_limit = vmcs12->guest_cs_limit; 19128c2ecf20Sopenharmony_ci evmcs->guest_ss_limit = vmcs12->guest_ss_limit; 19138c2ecf20Sopenharmony_ci evmcs->guest_ds_limit = vmcs12->guest_ds_limit; 19148c2ecf20Sopenharmony_ci evmcs->guest_fs_limit = vmcs12->guest_fs_limit; 19158c2ecf20Sopenharmony_ci evmcs->guest_gs_limit = vmcs12->guest_gs_limit; 19168c2ecf20Sopenharmony_ci evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit; 19178c2ecf20Sopenharmony_ci evmcs->guest_tr_limit = vmcs12->guest_tr_limit; 19188c2ecf20Sopenharmony_ci evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit; 19198c2ecf20Sopenharmony_ci evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit; 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes; 19228c2ecf20Sopenharmony_ci evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes; 19238c2ecf20Sopenharmony_ci evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes; 19248c2ecf20Sopenharmony_ci evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes; 19258c2ecf20Sopenharmony_ci evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes; 19268c2ecf20Sopenharmony_ci evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes; 19278c2ecf20Sopenharmony_ci evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes; 19288c2ecf20Sopenharmony_ci evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes; 19298c2ecf20Sopenharmony_ci 19308c2ecf20Sopenharmony_ci evmcs->guest_es_base = vmcs12->guest_es_base; 19318c2ecf20Sopenharmony_ci evmcs->guest_cs_base = vmcs12->guest_cs_base; 19328c2ecf20Sopenharmony_ci evmcs->guest_ss_base = vmcs12->guest_ss_base; 19338c2ecf20Sopenharmony_ci evmcs->guest_ds_base = vmcs12->guest_ds_base; 19348c2ecf20Sopenharmony_ci evmcs->guest_fs_base = vmcs12->guest_fs_base; 19358c2ecf20Sopenharmony_ci evmcs->guest_gs_base = vmcs12->guest_gs_base; 19368c2ecf20Sopenharmony_ci evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base; 19378c2ecf20Sopenharmony_ci evmcs->guest_tr_base = vmcs12->guest_tr_base; 19388c2ecf20Sopenharmony_ci evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base; 19398c2ecf20Sopenharmony_ci evmcs->guest_idtr_base = vmcs12->guest_idtr_base; 19408c2ecf20Sopenharmony_ci 19418c2ecf20Sopenharmony_ci evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat; 19428c2ecf20Sopenharmony_ci evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer; 19438c2ecf20Sopenharmony_ci 19448c2ecf20Sopenharmony_ci evmcs->guest_pdptr0 = vmcs12->guest_pdptr0; 19458c2ecf20Sopenharmony_ci evmcs->guest_pdptr1 = vmcs12->guest_pdptr1; 19468c2ecf20Sopenharmony_ci evmcs->guest_pdptr2 = vmcs12->guest_pdptr2; 19478c2ecf20Sopenharmony_ci evmcs->guest_pdptr3 = vmcs12->guest_pdptr3; 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci evmcs->guest_pending_dbg_exceptions = 19508c2ecf20Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions; 19518c2ecf20Sopenharmony_ci evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp; 19528c2ecf20Sopenharmony_ci evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip; 19538c2ecf20Sopenharmony_ci 19548c2ecf20Sopenharmony_ci evmcs->guest_activity_state = vmcs12->guest_activity_state; 19558c2ecf20Sopenharmony_ci evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs; 19568c2ecf20Sopenharmony_ci 19578c2ecf20Sopenharmony_ci evmcs->guest_cr0 = vmcs12->guest_cr0; 19588c2ecf20Sopenharmony_ci evmcs->guest_cr3 = vmcs12->guest_cr3; 19598c2ecf20Sopenharmony_ci evmcs->guest_cr4 = vmcs12->guest_cr4; 19608c2ecf20Sopenharmony_ci evmcs->guest_dr7 = vmcs12->guest_dr7; 19618c2ecf20Sopenharmony_ci 19628c2ecf20Sopenharmony_ci evmcs->guest_physical_address = vmcs12->guest_physical_address; 19638c2ecf20Sopenharmony_ci 19648c2ecf20Sopenharmony_ci evmcs->vm_instruction_error = vmcs12->vm_instruction_error; 19658c2ecf20Sopenharmony_ci evmcs->vm_exit_reason = vmcs12->vm_exit_reason; 19668c2ecf20Sopenharmony_ci evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info; 19678c2ecf20Sopenharmony_ci evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code; 19688c2ecf20Sopenharmony_ci evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field; 19698c2ecf20Sopenharmony_ci evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code; 19708c2ecf20Sopenharmony_ci evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len; 19718c2ecf20Sopenharmony_ci evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info; 19728c2ecf20Sopenharmony_ci 19738c2ecf20Sopenharmony_ci evmcs->exit_qualification = vmcs12->exit_qualification; 19748c2ecf20Sopenharmony_ci 19758c2ecf20Sopenharmony_ci evmcs->guest_linear_address = vmcs12->guest_linear_address; 19768c2ecf20Sopenharmony_ci evmcs->guest_rsp = vmcs12->guest_rsp; 19778c2ecf20Sopenharmony_ci evmcs->guest_rflags = vmcs12->guest_rflags; 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci evmcs->guest_interruptibility_info = 19808c2ecf20Sopenharmony_ci vmcs12->guest_interruptibility_info; 19818c2ecf20Sopenharmony_ci evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control; 19828c2ecf20Sopenharmony_ci evmcs->vm_entry_controls = vmcs12->vm_entry_controls; 19838c2ecf20Sopenharmony_ci evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field; 19848c2ecf20Sopenharmony_ci evmcs->vm_entry_exception_error_code = 19858c2ecf20Sopenharmony_ci vmcs12->vm_entry_exception_error_code; 19868c2ecf20Sopenharmony_ci evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len; 19878c2ecf20Sopenharmony_ci 19888c2ecf20Sopenharmony_ci evmcs->guest_rip = vmcs12->guest_rip; 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs; 19918c2ecf20Sopenharmony_ci 19928c2ecf20Sopenharmony_ci return 0; 19938c2ecf20Sopenharmony_ci} 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci/* 19968c2ecf20Sopenharmony_ci * This is an equivalent of the nested hypervisor executing the vmptrld 19978c2ecf20Sopenharmony_ci * instruction. 19988c2ecf20Sopenharmony_ci */ 19998c2ecf20Sopenharmony_cistatic enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( 20008c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu, bool from_launch) 20018c2ecf20Sopenharmony_ci{ 20028c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 20038c2ecf20Sopenharmony_ci bool evmcs_gpa_changed = false; 20048c2ecf20Sopenharmony_ci u64 evmcs_gpa; 20058c2ecf20Sopenharmony_ci 20068c2ecf20Sopenharmony_ci if (likely(!vmx->nested.enlightened_vmcs_enabled)) 20078c2ecf20Sopenharmony_ci return EVMPTRLD_DISABLED; 20088c2ecf20Sopenharmony_ci 20098c2ecf20Sopenharmony_ci if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) 20108c2ecf20Sopenharmony_ci return EVMPTRLD_DISABLED; 20118c2ecf20Sopenharmony_ci 20128c2ecf20Sopenharmony_ci if (unlikely(!vmx->nested.hv_evmcs || 20138c2ecf20Sopenharmony_ci evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { 20148c2ecf20Sopenharmony_ci if (!vmx->nested.hv_evmcs) 20158c2ecf20Sopenharmony_ci vmx->nested.current_vmptr = -1ull; 20168c2ecf20Sopenharmony_ci 20178c2ecf20Sopenharmony_ci nested_release_evmcs(vcpu); 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ci if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa), 20208c2ecf20Sopenharmony_ci &vmx->nested.hv_evmcs_map)) 20218c2ecf20Sopenharmony_ci return EVMPTRLD_ERROR; 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva; 20248c2ecf20Sopenharmony_ci 20258c2ecf20Sopenharmony_ci /* 20268c2ecf20Sopenharmony_ci * Currently, KVM only supports eVMCS version 1 20278c2ecf20Sopenharmony_ci * (== KVM_EVMCS_VERSION) and thus we expect guest to set this 20288c2ecf20Sopenharmony_ci * value to first u32 field of eVMCS which should specify eVMCS 20298c2ecf20Sopenharmony_ci * VersionNumber. 20308c2ecf20Sopenharmony_ci * 20318c2ecf20Sopenharmony_ci * Guest should be aware of supported eVMCS versions by host by 20328c2ecf20Sopenharmony_ci * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is 20338c2ecf20Sopenharmony_ci * expected to set this CPUID leaf according to the value 20348c2ecf20Sopenharmony_ci * returned in vmcs_version from nested_enable_evmcs(). 20358c2ecf20Sopenharmony_ci * 20368c2ecf20Sopenharmony_ci * However, it turns out that Microsoft Hyper-V fails to comply 20378c2ecf20Sopenharmony_ci * to their own invented interface: When Hyper-V use eVMCS, it 20388c2ecf20Sopenharmony_ci * just sets first u32 field of eVMCS to revision_id specified 20398c2ecf20Sopenharmony_ci * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number 20408c2ecf20Sopenharmony_ci * which is one of the supported versions specified in 20418c2ecf20Sopenharmony_ci * CPUID.0x4000000A.EAX[0:15]. 20428c2ecf20Sopenharmony_ci * 20438c2ecf20Sopenharmony_ci * To overcome Hyper-V bug, we accept here either a supported 20448c2ecf20Sopenharmony_ci * eVMCS version or VMCS12 revision_id as valid values for first 20458c2ecf20Sopenharmony_ci * u32 field of eVMCS. 20468c2ecf20Sopenharmony_ci */ 20478c2ecf20Sopenharmony_ci if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) && 20488c2ecf20Sopenharmony_ci (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) { 20498c2ecf20Sopenharmony_ci nested_release_evmcs(vcpu); 20508c2ecf20Sopenharmony_ci return EVMPTRLD_VMFAIL; 20518c2ecf20Sopenharmony_ci } 20528c2ecf20Sopenharmony_ci 20538c2ecf20Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 20548c2ecf20Sopenharmony_ci vmx->nested.hv_evmcs_vmptr = evmcs_gpa; 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci evmcs_gpa_changed = true; 20578c2ecf20Sopenharmony_ci /* 20588c2ecf20Sopenharmony_ci * Unlike normal vmcs12, enlightened vmcs12 is not fully 20598c2ecf20Sopenharmony_ci * reloaded from guest's memory (read only fields, fields not 20608c2ecf20Sopenharmony_ci * present in struct hv_enlightened_vmcs, ...). Make sure there 20618c2ecf20Sopenharmony_ci * are no leftovers. 20628c2ecf20Sopenharmony_ci */ 20638c2ecf20Sopenharmony_ci if (from_launch) { 20648c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 20658c2ecf20Sopenharmony_ci memset(vmcs12, 0, sizeof(*vmcs12)); 20668c2ecf20Sopenharmony_ci vmcs12->hdr.revision_id = VMCS12_REVISION; 20678c2ecf20Sopenharmony_ci } 20688c2ecf20Sopenharmony_ci 20698c2ecf20Sopenharmony_ci } 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci /* 20728c2ecf20Sopenharmony_ci * Clean fields data can't be used on VMLAUNCH and when we switch 20738c2ecf20Sopenharmony_ci * between different L2 guests as KVM keeps a single VMCS12 per L1. 20748c2ecf20Sopenharmony_ci */ 20758c2ecf20Sopenharmony_ci if (from_launch || evmcs_gpa_changed) 20768c2ecf20Sopenharmony_ci vmx->nested.hv_evmcs->hv_clean_fields &= 20778c2ecf20Sopenharmony_ci ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 20788c2ecf20Sopenharmony_ci 20798c2ecf20Sopenharmony_ci return EVMPTRLD_SUCCEEDED; 20808c2ecf20Sopenharmony_ci} 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_civoid nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu) 20838c2ecf20Sopenharmony_ci{ 20848c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 20858c2ecf20Sopenharmony_ci 20868c2ecf20Sopenharmony_ci if (vmx->nested.hv_evmcs) { 20878c2ecf20Sopenharmony_ci copy_vmcs12_to_enlightened(vmx); 20888c2ecf20Sopenharmony_ci /* All fields are clean */ 20898c2ecf20Sopenharmony_ci vmx->nested.hv_evmcs->hv_clean_fields |= 20908c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 20918c2ecf20Sopenharmony_ci } else { 20928c2ecf20Sopenharmony_ci copy_vmcs12_to_shadow(vmx); 20938c2ecf20Sopenharmony_ci } 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = false; 20968c2ecf20Sopenharmony_ci} 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_cistatic enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) 20998c2ecf20Sopenharmony_ci{ 21008c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = 21018c2ecf20Sopenharmony_ci container_of(timer, struct vcpu_vmx, nested.preemption_timer); 21028c2ecf20Sopenharmony_ci 21038c2ecf20Sopenharmony_ci vmx->nested.preemption_timer_expired = true; 21048c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); 21058c2ecf20Sopenharmony_ci kvm_vcpu_kick(&vmx->vcpu); 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_ci return HRTIMER_NORESTART; 21088c2ecf20Sopenharmony_ci} 21098c2ecf20Sopenharmony_ci 21108c2ecf20Sopenharmony_cistatic u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu) 21118c2ecf20Sopenharmony_ci{ 21128c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 21138c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 21148c2ecf20Sopenharmony_ci 21158c2ecf20Sopenharmony_ci u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >> 21168c2ecf20Sopenharmony_ci VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci if (!vmx->nested.has_preemption_timer_deadline) { 21198c2ecf20Sopenharmony_ci vmx->nested.preemption_timer_deadline = 21208c2ecf20Sopenharmony_ci vmcs12->vmx_preemption_timer_value + l1_scaled_tsc; 21218c2ecf20Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = true; 21228c2ecf20Sopenharmony_ci } 21238c2ecf20Sopenharmony_ci return vmx->nested.preemption_timer_deadline - l1_scaled_tsc; 21248c2ecf20Sopenharmony_ci} 21258c2ecf20Sopenharmony_ci 21268c2ecf20Sopenharmony_cistatic void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, 21278c2ecf20Sopenharmony_ci u64 preemption_timeout) 21288c2ecf20Sopenharmony_ci{ 21298c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 21308c2ecf20Sopenharmony_ci 21318c2ecf20Sopenharmony_ci /* 21328c2ecf20Sopenharmony_ci * A timer value of zero is architecturally guaranteed to cause 21338c2ecf20Sopenharmony_ci * a VMExit prior to executing any instructions in the guest. 21348c2ecf20Sopenharmony_ci */ 21358c2ecf20Sopenharmony_ci if (preemption_timeout == 0) { 21368c2ecf20Sopenharmony_ci vmx_preemption_timer_fn(&vmx->nested.preemption_timer); 21378c2ecf20Sopenharmony_ci return; 21388c2ecf20Sopenharmony_ci } 21398c2ecf20Sopenharmony_ci 21408c2ecf20Sopenharmony_ci if (vcpu->arch.virtual_tsc_khz == 0) 21418c2ecf20Sopenharmony_ci return; 21428c2ecf20Sopenharmony_ci 21438c2ecf20Sopenharmony_ci preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 21448c2ecf20Sopenharmony_ci preemption_timeout *= 1000000; 21458c2ecf20Sopenharmony_ci do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); 21468c2ecf20Sopenharmony_ci hrtimer_start(&vmx->nested.preemption_timer, 21478c2ecf20Sopenharmony_ci ktime_add_ns(ktime_get(), preemption_timeout), 21488c2ecf20Sopenharmony_ci HRTIMER_MODE_ABS_PINNED); 21498c2ecf20Sopenharmony_ci} 21508c2ecf20Sopenharmony_ci 21518c2ecf20Sopenharmony_cistatic u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) 21528c2ecf20Sopenharmony_ci{ 21538c2ecf20Sopenharmony_ci if (vmx->nested.nested_run_pending && 21548c2ecf20Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) 21558c2ecf20Sopenharmony_ci return vmcs12->guest_ia32_efer; 21568c2ecf20Sopenharmony_ci else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) 21578c2ecf20Sopenharmony_ci return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME); 21588c2ecf20Sopenharmony_ci else 21598c2ecf20Sopenharmony_ci return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME); 21608c2ecf20Sopenharmony_ci} 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_cistatic void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) 21638c2ecf20Sopenharmony_ci{ 21648c2ecf20Sopenharmony_ci /* 21658c2ecf20Sopenharmony_ci * If vmcs02 hasn't been initialized, set the constant vmcs02 state 21668c2ecf20Sopenharmony_ci * according to L0's settings (vmcs12 is irrelevant here). Host 21678c2ecf20Sopenharmony_ci * fields that come from L0 and are not constant, e.g. HOST_CR3, 21688c2ecf20Sopenharmony_ci * will be set as needed prior to VMLAUNCH/VMRESUME. 21698c2ecf20Sopenharmony_ci */ 21708c2ecf20Sopenharmony_ci if (vmx->nested.vmcs02_initialized) 21718c2ecf20Sopenharmony_ci return; 21728c2ecf20Sopenharmony_ci vmx->nested.vmcs02_initialized = true; 21738c2ecf20Sopenharmony_ci 21748c2ecf20Sopenharmony_ci /* 21758c2ecf20Sopenharmony_ci * We don't care what the EPTP value is we just need to guarantee 21768c2ecf20Sopenharmony_ci * it's valid so we don't get a false positive when doing early 21778c2ecf20Sopenharmony_ci * consistency checks. 21788c2ecf20Sopenharmony_ci */ 21798c2ecf20Sopenharmony_ci if (enable_ept && nested_early_check) 21808c2ecf20Sopenharmony_ci vmcs_write64(EPT_POINTER, 21818c2ecf20Sopenharmony_ci construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL)); 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci /* All VMFUNCs are currently emulated through L0 vmexits. */ 21848c2ecf20Sopenharmony_ci if (cpu_has_vmx_vmfunc()) 21858c2ecf20Sopenharmony_ci vmcs_write64(VM_FUNCTION_CONTROL, 0); 21868c2ecf20Sopenharmony_ci 21878c2ecf20Sopenharmony_ci if (cpu_has_vmx_posted_intr()) 21888c2ecf20Sopenharmony_ci vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 21918c2ecf20Sopenharmony_ci vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci /* 21948c2ecf20Sopenharmony_ci * The PML address never changes, so it is constant in vmcs02. 21958c2ecf20Sopenharmony_ci * Conceptually we want to copy the PML index from vmcs01 here, 21968c2ecf20Sopenharmony_ci * and then back to vmcs01 on nested vmexit. But since we flush 21978c2ecf20Sopenharmony_ci * the log and reset GUEST_PML_INDEX on each vmexit, the PML 21988c2ecf20Sopenharmony_ci * index is also effectively constant in vmcs02. 21998c2ecf20Sopenharmony_ci */ 22008c2ecf20Sopenharmony_ci if (enable_pml) { 22018c2ecf20Sopenharmony_ci vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); 22028c2ecf20Sopenharmony_ci vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); 22038c2ecf20Sopenharmony_ci } 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci if (cpu_has_vmx_encls_vmexit()) 22068c2ecf20Sopenharmony_ci vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); 22078c2ecf20Sopenharmony_ci 22088c2ecf20Sopenharmony_ci /* 22098c2ecf20Sopenharmony_ci * Set the MSR load/store lists to match L0's settings. Only the 22108c2ecf20Sopenharmony_ci * addresses are constant (for vmcs02), the counts can change based 22118c2ecf20Sopenharmony_ci * on L2's behavior, e.g. switching to/from long mode. 22128c2ecf20Sopenharmony_ci */ 22138c2ecf20Sopenharmony_ci vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val)); 22148c2ecf20Sopenharmony_ci vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); 22158c2ecf20Sopenharmony_ci vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); 22168c2ecf20Sopenharmony_ci 22178c2ecf20Sopenharmony_ci vmx_set_constant_host_state(vmx); 22188c2ecf20Sopenharmony_ci} 22198c2ecf20Sopenharmony_ci 22208c2ecf20Sopenharmony_cistatic void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, 22218c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 22228c2ecf20Sopenharmony_ci{ 22238c2ecf20Sopenharmony_ci prepare_vmcs02_constant_state(vmx); 22248c2ecf20Sopenharmony_ci 22258c2ecf20Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, -1ull); 22268c2ecf20Sopenharmony_ci 22278c2ecf20Sopenharmony_ci if (enable_vpid) { 22288c2ecf20Sopenharmony_ci if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) 22298c2ecf20Sopenharmony_ci vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); 22308c2ecf20Sopenharmony_ci else 22318c2ecf20Sopenharmony_ci vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 22328c2ecf20Sopenharmony_ci } 22338c2ecf20Sopenharmony_ci} 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_cistatic void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01, 22368c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 22378c2ecf20Sopenharmony_ci{ 22388c2ecf20Sopenharmony_ci u32 exec_control, vmcs12_exec_ctrl; 22398c2ecf20Sopenharmony_ci u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); 22408c2ecf20Sopenharmony_ci 22418c2ecf20Sopenharmony_ci if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) 22428c2ecf20Sopenharmony_ci prepare_vmcs02_early_rare(vmx, vmcs12); 22438c2ecf20Sopenharmony_ci 22448c2ecf20Sopenharmony_ci /* 22458c2ecf20Sopenharmony_ci * PIN CONTROLS 22468c2ecf20Sopenharmony_ci */ 22478c2ecf20Sopenharmony_ci exec_control = __pin_controls_get(vmcs01); 22488c2ecf20Sopenharmony_ci exec_control |= (vmcs12->pin_based_vm_exec_control & 22498c2ecf20Sopenharmony_ci ~PIN_BASED_VMX_PREEMPTION_TIMER); 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_ci /* Posted interrupts setting is only taken from vmcs12. */ 22528c2ecf20Sopenharmony_ci vmx->nested.pi_pending = false; 22538c2ecf20Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12)) 22548c2ecf20Sopenharmony_ci vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; 22558c2ecf20Sopenharmony_ci else 22568c2ecf20Sopenharmony_ci exec_control &= ~PIN_BASED_POSTED_INTR; 22578c2ecf20Sopenharmony_ci pin_controls_set(vmx, exec_control); 22588c2ecf20Sopenharmony_ci 22598c2ecf20Sopenharmony_ci /* 22608c2ecf20Sopenharmony_ci * EXEC CONTROLS 22618c2ecf20Sopenharmony_ci */ 22628c2ecf20Sopenharmony_ci exec_control = __exec_controls_get(vmcs01); /* L0's desires */ 22638c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; 22648c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; 22658c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_TPR_SHADOW; 22668c2ecf20Sopenharmony_ci exec_control |= vmcs12->cpu_based_vm_exec_control; 22678c2ecf20Sopenharmony_ci 22688c2ecf20Sopenharmony_ci vmx->nested.l1_tpr_threshold = -1; 22698c2ecf20Sopenharmony_ci if (exec_control & CPU_BASED_TPR_SHADOW) 22708c2ecf20Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); 22718c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 22728c2ecf20Sopenharmony_ci else 22738c2ecf20Sopenharmony_ci exec_control |= CPU_BASED_CR8_LOAD_EXITING | 22748c2ecf20Sopenharmony_ci CPU_BASED_CR8_STORE_EXITING; 22758c2ecf20Sopenharmony_ci#endif 22768c2ecf20Sopenharmony_ci 22778c2ecf20Sopenharmony_ci /* 22788c2ecf20Sopenharmony_ci * A vmexit (to either L1 hypervisor or L0 userspace) is always needed 22798c2ecf20Sopenharmony_ci * for I/O port accesses. 22808c2ecf20Sopenharmony_ci */ 22818c2ecf20Sopenharmony_ci exec_control |= CPU_BASED_UNCOND_IO_EXITING; 22828c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_USE_IO_BITMAPS; 22838c2ecf20Sopenharmony_ci 22848c2ecf20Sopenharmony_ci /* 22858c2ecf20Sopenharmony_ci * This bit will be computed in nested_get_vmcs12_pages, because 22868c2ecf20Sopenharmony_ci * we do not have access to L1's MSR bitmap yet. For now, keep 22878c2ecf20Sopenharmony_ci * the same bit as before, hoping to avoid multiple VMWRITEs that 22888c2ecf20Sopenharmony_ci * only set/clear this bit. 22898c2ecf20Sopenharmony_ci */ 22908c2ecf20Sopenharmony_ci exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; 22918c2ecf20Sopenharmony_ci exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS; 22928c2ecf20Sopenharmony_ci 22938c2ecf20Sopenharmony_ci exec_controls_set(vmx, exec_control); 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_ci /* 22968c2ecf20Sopenharmony_ci * SECONDARY EXEC CONTROLS 22978c2ecf20Sopenharmony_ci */ 22988c2ecf20Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) { 22998c2ecf20Sopenharmony_ci exec_control = __secondary_exec_controls_get(vmcs01); 23008c2ecf20Sopenharmony_ci 23018c2ecf20Sopenharmony_ci /* Take the following fields only from vmcs12 */ 23028c2ecf20Sopenharmony_ci exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 23038c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 23048c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_INVPCID | 23058c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_RDTSCP | 23068c2ecf20Sopenharmony_ci SECONDARY_EXEC_XSAVES | 23078c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | 23088c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 23098c2ecf20Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 23108c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_VMFUNC | 23118c2ecf20Sopenharmony_ci SECONDARY_EXEC_DESC); 23128c2ecf20Sopenharmony_ci 23138c2ecf20Sopenharmony_ci if (nested_cpu_has(vmcs12, 23148c2ecf20Sopenharmony_ci CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { 23158c2ecf20Sopenharmony_ci vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & 23168c2ecf20Sopenharmony_ci ~SECONDARY_EXEC_ENABLE_PML; 23178c2ecf20Sopenharmony_ci exec_control |= vmcs12_exec_ctrl; 23188c2ecf20Sopenharmony_ci } 23198c2ecf20Sopenharmony_ci 23208c2ecf20Sopenharmony_ci /* VMCS shadowing for L2 is emulated for now */ 23218c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci /* 23248c2ecf20Sopenharmony_ci * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4() 23258c2ecf20Sopenharmony_ci * will not have to rewrite the controls just for this bit. 23268c2ecf20Sopenharmony_ci */ 23278c2ecf20Sopenharmony_ci if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() && 23288c2ecf20Sopenharmony_ci (vmcs12->guest_cr4 & X86_CR4_UMIP)) 23298c2ecf20Sopenharmony_ci exec_control |= SECONDARY_EXEC_DESC; 23308c2ecf20Sopenharmony_ci 23318c2ecf20Sopenharmony_ci if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) 23328c2ecf20Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, 23338c2ecf20Sopenharmony_ci vmcs12->guest_intr_status); 23348c2ecf20Sopenharmony_ci 23358c2ecf20Sopenharmony_ci if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) 23368c2ecf20Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 23378c2ecf20Sopenharmony_ci 23388c2ecf20Sopenharmony_ci secondary_exec_controls_set(vmx, exec_control); 23398c2ecf20Sopenharmony_ci } 23408c2ecf20Sopenharmony_ci 23418c2ecf20Sopenharmony_ci /* 23428c2ecf20Sopenharmony_ci * ENTRY CONTROLS 23438c2ecf20Sopenharmony_ci * 23448c2ecf20Sopenharmony_ci * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE 23458c2ecf20Sopenharmony_ci * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate 23468c2ecf20Sopenharmony_ci * on the related bits (if supported by the CPU) in the hope that 23478c2ecf20Sopenharmony_ci * we can avoid VMWrites during vmx_set_efer(). 23488c2ecf20Sopenharmony_ci * 23498c2ecf20Sopenharmony_ci * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is 23508c2ecf20Sopenharmony_ci * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to 23518c2ecf20Sopenharmony_ci * do the same for L2. 23528c2ecf20Sopenharmony_ci */ 23538c2ecf20Sopenharmony_ci exec_control = __vm_entry_controls_get(vmcs01); 23548c2ecf20Sopenharmony_ci exec_control |= (vmcs12->vm_entry_controls & 23558c2ecf20Sopenharmony_ci ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); 23568c2ecf20Sopenharmony_ci exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); 23578c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer()) { 23588c2ecf20Sopenharmony_ci if (guest_efer & EFER_LMA) 23598c2ecf20Sopenharmony_ci exec_control |= VM_ENTRY_IA32E_MODE; 23608c2ecf20Sopenharmony_ci if (guest_efer != host_efer) 23618c2ecf20Sopenharmony_ci exec_control |= VM_ENTRY_LOAD_IA32_EFER; 23628c2ecf20Sopenharmony_ci } 23638c2ecf20Sopenharmony_ci vm_entry_controls_set(vmx, exec_control); 23648c2ecf20Sopenharmony_ci 23658c2ecf20Sopenharmony_ci /* 23668c2ecf20Sopenharmony_ci * EXIT CONTROLS 23678c2ecf20Sopenharmony_ci * 23688c2ecf20Sopenharmony_ci * L2->L1 exit controls are emulated - the hardware exit is to L0 so 23698c2ecf20Sopenharmony_ci * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 23708c2ecf20Sopenharmony_ci * bits may be modified by vmx_set_efer() in prepare_vmcs02(). 23718c2ecf20Sopenharmony_ci */ 23728c2ecf20Sopenharmony_ci exec_control = __vm_exit_controls_get(vmcs01); 23738c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer() && guest_efer != host_efer) 23748c2ecf20Sopenharmony_ci exec_control |= VM_EXIT_LOAD_IA32_EFER; 23758c2ecf20Sopenharmony_ci else 23768c2ecf20Sopenharmony_ci exec_control &= ~VM_EXIT_LOAD_IA32_EFER; 23778c2ecf20Sopenharmony_ci vm_exit_controls_set(vmx, exec_control); 23788c2ecf20Sopenharmony_ci 23798c2ecf20Sopenharmony_ci /* 23808c2ecf20Sopenharmony_ci * Interrupt/Exception Fields 23818c2ecf20Sopenharmony_ci */ 23828c2ecf20Sopenharmony_ci if (vmx->nested.nested_run_pending) { 23838c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 23848c2ecf20Sopenharmony_ci vmcs12->vm_entry_intr_info_field); 23858c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 23868c2ecf20Sopenharmony_ci vmcs12->vm_entry_exception_error_code); 23878c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 23888c2ecf20Sopenharmony_ci vmcs12->vm_entry_instruction_len); 23898c2ecf20Sopenharmony_ci vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 23908c2ecf20Sopenharmony_ci vmcs12->guest_interruptibility_info); 23918c2ecf20Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = 23928c2ecf20Sopenharmony_ci !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); 23938c2ecf20Sopenharmony_ci } else { 23948c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); 23958c2ecf20Sopenharmony_ci } 23968c2ecf20Sopenharmony_ci} 23978c2ecf20Sopenharmony_ci 23988c2ecf20Sopenharmony_cistatic void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) 23998c2ecf20Sopenharmony_ci{ 24008c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; 24018c2ecf20Sopenharmony_ci 24028c2ecf20Sopenharmony_ci if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & 24038c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { 24048c2ecf20Sopenharmony_ci vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 24058c2ecf20Sopenharmony_ci vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 24068c2ecf20Sopenharmony_ci vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); 24078c2ecf20Sopenharmony_ci vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); 24088c2ecf20Sopenharmony_ci vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); 24098c2ecf20Sopenharmony_ci vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector); 24108c2ecf20Sopenharmony_ci vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); 24118c2ecf20Sopenharmony_ci vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); 24128c2ecf20Sopenharmony_ci vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); 24138c2ecf20Sopenharmony_ci vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); 24148c2ecf20Sopenharmony_ci vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); 24158c2ecf20Sopenharmony_ci vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); 24168c2ecf20Sopenharmony_ci vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); 24178c2ecf20Sopenharmony_ci vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit); 24188c2ecf20Sopenharmony_ci vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit); 24198c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); 24208c2ecf20Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); 24218c2ecf20Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); 24228c2ecf20Sopenharmony_ci vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); 24238c2ecf20Sopenharmony_ci vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); 24248c2ecf20Sopenharmony_ci vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); 24258c2ecf20Sopenharmony_ci vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); 24268c2ecf20Sopenharmony_ci vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); 24278c2ecf20Sopenharmony_ci vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); 24288c2ecf20Sopenharmony_ci vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); 24298c2ecf20Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); 24308c2ecf20Sopenharmony_ci vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); 24318c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); 24328c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); 24338c2ecf20Sopenharmony_ci vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); 24348c2ecf20Sopenharmony_ci vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); 24358c2ecf20Sopenharmony_ci vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base); 24368c2ecf20Sopenharmony_ci vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base); 24378c2ecf20Sopenharmony_ci vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base); 24388c2ecf20Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); 24398c2ecf20Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); 24408c2ecf20Sopenharmony_ci 24418c2ecf20Sopenharmony_ci vmx->segment_cache.bitmask = 0; 24428c2ecf20Sopenharmony_ci } 24438c2ecf20Sopenharmony_ci 24448c2ecf20Sopenharmony_ci if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & 24458c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) { 24468c2ecf20Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); 24478c2ecf20Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 24488c2ecf20Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions); 24498c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); 24508c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ci /* 24538c2ecf20Sopenharmony_ci * L1 may access the L2's PDPTR, so save them to construct 24548c2ecf20Sopenharmony_ci * vmcs12 24558c2ecf20Sopenharmony_ci */ 24568c2ecf20Sopenharmony_ci if (enable_ept) { 24578c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); 24588c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); 24598c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); 24608c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); 24618c2ecf20Sopenharmony_ci } 24628c2ecf20Sopenharmony_ci 24638c2ecf20Sopenharmony_ci if (kvm_mpx_supported() && vmx->nested.nested_run_pending && 24648c2ecf20Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) 24658c2ecf20Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); 24668c2ecf20Sopenharmony_ci } 24678c2ecf20Sopenharmony_ci 24688c2ecf20Sopenharmony_ci if (nested_cpu_has_xsaves(vmcs12)) 24698c2ecf20Sopenharmony_ci vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); 24708c2ecf20Sopenharmony_ci 24718c2ecf20Sopenharmony_ci /* 24728c2ecf20Sopenharmony_ci * Whether page-faults are trapped is determined by a combination of 24738c2ecf20Sopenharmony_ci * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. If L0 24748c2ecf20Sopenharmony_ci * doesn't care about page faults then we should set all of these to 24758c2ecf20Sopenharmony_ci * L1's desires. However, if L0 does care about (some) page faults, it 24768c2ecf20Sopenharmony_ci * is not easy (if at all possible?) to merge L0 and L1's desires, we 24778c2ecf20Sopenharmony_ci * simply ask to exit on each and every L2 page fault. This is done by 24788c2ecf20Sopenharmony_ci * setting MASK=MATCH=0 and (see below) EB.PF=1. 24798c2ecf20Sopenharmony_ci * Note that below we don't need special code to set EB.PF beyond the 24808c2ecf20Sopenharmony_ci * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, 24818c2ecf20Sopenharmony_ci * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when 24828c2ecf20Sopenharmony_ci * !enable_ept, EB.PF is 1, so the "or" will always be 1. 24838c2ecf20Sopenharmony_ci */ 24848c2ecf20Sopenharmony_ci if (vmx_need_pf_intercept(&vmx->vcpu)) { 24858c2ecf20Sopenharmony_ci /* 24868c2ecf20Sopenharmony_ci * TODO: if both L0 and L1 need the same MASK and MATCH, 24878c2ecf20Sopenharmony_ci * go ahead and use it? 24888c2ecf20Sopenharmony_ci */ 24898c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 24908c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); 24918c2ecf20Sopenharmony_ci } else { 24928c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask); 24938c2ecf20Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match); 24948c2ecf20Sopenharmony_ci } 24958c2ecf20Sopenharmony_ci 24968c2ecf20Sopenharmony_ci if (cpu_has_vmx_apicv()) { 24978c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); 24988c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1); 24998c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2); 25008c2ecf20Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); 25018c2ecf20Sopenharmony_ci } 25028c2ecf20Sopenharmony_ci 25038c2ecf20Sopenharmony_ci /* 25048c2ecf20Sopenharmony_ci * Make sure the msr_autostore list is up to date before we set the 25058c2ecf20Sopenharmony_ci * count in the vmcs02. 25068c2ecf20Sopenharmony_ci */ 25078c2ecf20Sopenharmony_ci prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC); 25088c2ecf20Sopenharmony_ci 25098c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr); 25108c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); 25118c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); 25128c2ecf20Sopenharmony_ci 25138c2ecf20Sopenharmony_ci set_cr4_guest_host_mask(vmx); 25148c2ecf20Sopenharmony_ci} 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_ci/* 25178c2ecf20Sopenharmony_ci * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 25188c2ecf20Sopenharmony_ci * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 25198c2ecf20Sopenharmony_ci * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 25208c2ecf20Sopenharmony_ci * guest in a way that will both be appropriate to L1's requests, and our 25218c2ecf20Sopenharmony_ci * needs. In addition to modifying the active vmcs (which is vmcs02), this 25228c2ecf20Sopenharmony_ci * function also has additional necessary side-effects, like setting various 25238c2ecf20Sopenharmony_ci * vcpu->arch fields. 25248c2ecf20Sopenharmony_ci * Returns 0 on success, 1 on failure. Invalid state exit qualification code 25258c2ecf20Sopenharmony_ci * is assigned to entry_failure_code on failure. 25268c2ecf20Sopenharmony_ci */ 25278c2ecf20Sopenharmony_cistatic int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, 25288c2ecf20Sopenharmony_ci enum vm_entry_failure_code *entry_failure_code) 25298c2ecf20Sopenharmony_ci{ 25308c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 25318c2ecf20Sopenharmony_ci struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; 25328c2ecf20Sopenharmony_ci bool load_guest_pdptrs_vmcs12 = false; 25338c2ecf20Sopenharmony_ci 25348c2ecf20Sopenharmony_ci if (vmx->nested.dirty_vmcs12 || hv_evmcs) { 25358c2ecf20Sopenharmony_ci prepare_vmcs02_rare(vmx, vmcs12); 25368c2ecf20Sopenharmony_ci vmx->nested.dirty_vmcs12 = false; 25378c2ecf20Sopenharmony_ci 25388c2ecf20Sopenharmony_ci load_guest_pdptrs_vmcs12 = !hv_evmcs || 25398c2ecf20Sopenharmony_ci !(hv_evmcs->hv_clean_fields & 25408c2ecf20Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1); 25418c2ecf20Sopenharmony_ci } 25428c2ecf20Sopenharmony_ci 25438c2ecf20Sopenharmony_ci if (vmx->nested.nested_run_pending && 25448c2ecf20Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { 25458c2ecf20Sopenharmony_ci kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); 25468c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); 25478c2ecf20Sopenharmony_ci } else { 25488c2ecf20Sopenharmony_ci kvm_set_dr(vcpu, 7, vcpu->arch.dr7); 25498c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); 25508c2ecf20Sopenharmony_ci } 25518c2ecf20Sopenharmony_ci if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || 25528c2ecf20Sopenharmony_ci !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) 25538c2ecf20Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); 25548c2ecf20Sopenharmony_ci vmx_set_rflags(vcpu, vmcs12->guest_rflags); 25558c2ecf20Sopenharmony_ci 25568c2ecf20Sopenharmony_ci /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the 25578c2ecf20Sopenharmony_ci * bitwise-or of what L1 wants to trap for L2, and what we want to 25588c2ecf20Sopenharmony_ci * trap. Note that CR0.TS also needs updating - we do this later. 25598c2ecf20Sopenharmony_ci */ 25608c2ecf20Sopenharmony_ci update_exception_bitmap(vcpu); 25618c2ecf20Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; 25628c2ecf20Sopenharmony_ci vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); 25638c2ecf20Sopenharmony_ci 25648c2ecf20Sopenharmony_ci if (vmx->nested.nested_run_pending && 25658c2ecf20Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { 25668c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); 25678c2ecf20Sopenharmony_ci vcpu->arch.pat = vmcs12->guest_ia32_pat; 25688c2ecf20Sopenharmony_ci } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 25698c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 25708c2ecf20Sopenharmony_ci } 25718c2ecf20Sopenharmony_ci 25728c2ecf20Sopenharmony_ci vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); 25738c2ecf20Sopenharmony_ci 25748c2ecf20Sopenharmony_ci if (kvm_has_tsc_control) 25758c2ecf20Sopenharmony_ci decache_tsc_multiplier(vmx); 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_ci nested_vmx_transition_tlb_flush(vcpu, vmcs12, true); 25788c2ecf20Sopenharmony_ci 25798c2ecf20Sopenharmony_ci if (nested_cpu_has_ept(vmcs12)) 25808c2ecf20Sopenharmony_ci nested_ept_init_mmu_context(vcpu); 25818c2ecf20Sopenharmony_ci 25828c2ecf20Sopenharmony_ci /* 25838c2ecf20Sopenharmony_ci * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those 25848c2ecf20Sopenharmony_ci * bits which we consider mandatory enabled. 25858c2ecf20Sopenharmony_ci * The CR0_READ_SHADOW is what L2 should have expected to read given 25868c2ecf20Sopenharmony_ci * the specifications by L1; It's not enough to take 25878c2ecf20Sopenharmony_ci * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we 25888c2ecf20Sopenharmony_ci * have more bits than L1 expected. 25898c2ecf20Sopenharmony_ci */ 25908c2ecf20Sopenharmony_ci vmx_set_cr0(vcpu, vmcs12->guest_cr0); 25918c2ecf20Sopenharmony_ci vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); 25928c2ecf20Sopenharmony_ci 25938c2ecf20Sopenharmony_ci vmx_set_cr4(vcpu, vmcs12->guest_cr4); 25948c2ecf20Sopenharmony_ci vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); 25958c2ecf20Sopenharmony_ci 25968c2ecf20Sopenharmony_ci vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12); 25978c2ecf20Sopenharmony_ci /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ 25988c2ecf20Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer); 25998c2ecf20Sopenharmony_ci 26008c2ecf20Sopenharmony_ci /* 26018c2ecf20Sopenharmony_ci * Guest state is invalid and unrestricted guest is disabled, 26028c2ecf20Sopenharmony_ci * which means L1 attempted VMEntry to L2 with invalid state. 26038c2ecf20Sopenharmony_ci * Fail the VMEntry. 26048c2ecf20Sopenharmony_ci */ 26058c2ecf20Sopenharmony_ci if (CC(!vmx_guest_state_valid(vcpu))) { 26068c2ecf20Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 26078c2ecf20Sopenharmony_ci return -EINVAL; 26088c2ecf20Sopenharmony_ci } 26098c2ecf20Sopenharmony_ci 26108c2ecf20Sopenharmony_ci /* Shadow page tables on either EPT or shadow page tables. */ 26118c2ecf20Sopenharmony_ci if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), 26128c2ecf20Sopenharmony_ci entry_failure_code)) 26138c2ecf20Sopenharmony_ci return -EINVAL; 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci /* 26168c2ecf20Sopenharmony_ci * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12 26178c2ecf20Sopenharmony_ci * on nested VM-Exit, which can occur without actually running L2 and 26188c2ecf20Sopenharmony_ci * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with 26198c2ecf20Sopenharmony_ci * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the 26208c2ecf20Sopenharmony_ci * transition to HLT instead of running L2. 26218c2ecf20Sopenharmony_ci */ 26228c2ecf20Sopenharmony_ci if (enable_ept) 26238c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR3, vmcs12->guest_cr3); 26248c2ecf20Sopenharmony_ci 26258c2ecf20Sopenharmony_ci /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */ 26268c2ecf20Sopenharmony_ci if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) && 26278c2ecf20Sopenharmony_ci is_pae_paging(vcpu)) { 26288c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); 26298c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); 26308c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); 26318c2ecf20Sopenharmony_ci vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); 26328c2ecf20Sopenharmony_ci } 26338c2ecf20Sopenharmony_ci 26348c2ecf20Sopenharmony_ci if (!enable_ept) 26358c2ecf20Sopenharmony_ci vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; 26368c2ecf20Sopenharmony_ci 26378c2ecf20Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && 26388c2ecf20Sopenharmony_ci WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 26398c2ecf20Sopenharmony_ci vmcs12->guest_ia32_perf_global_ctrl))) { 26408c2ecf20Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 26418c2ecf20Sopenharmony_ci return -EINVAL; 26428c2ecf20Sopenharmony_ci } 26438c2ecf20Sopenharmony_ci 26448c2ecf20Sopenharmony_ci kvm_rsp_write(vcpu, vmcs12->guest_rsp); 26458c2ecf20Sopenharmony_ci kvm_rip_write(vcpu, vmcs12->guest_rip); 26468c2ecf20Sopenharmony_ci return 0; 26478c2ecf20Sopenharmony_ci} 26488c2ecf20Sopenharmony_ci 26498c2ecf20Sopenharmony_cistatic int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) 26508c2ecf20Sopenharmony_ci{ 26518c2ecf20Sopenharmony_ci if (CC(!nested_cpu_has_nmi_exiting(vmcs12) && 26528c2ecf20Sopenharmony_ci nested_cpu_has_virtual_nmis(vmcs12))) 26538c2ecf20Sopenharmony_ci return -EINVAL; 26548c2ecf20Sopenharmony_ci 26558c2ecf20Sopenharmony_ci if (CC(!nested_cpu_has_virtual_nmis(vmcs12) && 26568c2ecf20Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING))) 26578c2ecf20Sopenharmony_ci return -EINVAL; 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci return 0; 26608c2ecf20Sopenharmony_ci} 26618c2ecf20Sopenharmony_ci 26628c2ecf20Sopenharmony_cistatic bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp) 26638c2ecf20Sopenharmony_ci{ 26648c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 26658c2ecf20Sopenharmony_ci int maxphyaddr = cpuid_maxphyaddr(vcpu); 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci /* Check for memory type validity */ 26688c2ecf20Sopenharmony_ci switch (new_eptp & VMX_EPTP_MT_MASK) { 26698c2ecf20Sopenharmony_ci case VMX_EPTP_MT_UC: 26708c2ecf20Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))) 26718c2ecf20Sopenharmony_ci return false; 26728c2ecf20Sopenharmony_ci break; 26738c2ecf20Sopenharmony_ci case VMX_EPTP_MT_WB: 26748c2ecf20Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))) 26758c2ecf20Sopenharmony_ci return false; 26768c2ecf20Sopenharmony_ci break; 26778c2ecf20Sopenharmony_ci default: 26788c2ecf20Sopenharmony_ci return false; 26798c2ecf20Sopenharmony_ci } 26808c2ecf20Sopenharmony_ci 26818c2ecf20Sopenharmony_ci /* Page-walk levels validity. */ 26828c2ecf20Sopenharmony_ci switch (new_eptp & VMX_EPTP_PWL_MASK) { 26838c2ecf20Sopenharmony_ci case VMX_EPTP_PWL_5: 26848c2ecf20Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT))) 26858c2ecf20Sopenharmony_ci return false; 26868c2ecf20Sopenharmony_ci break; 26878c2ecf20Sopenharmony_ci case VMX_EPTP_PWL_4: 26888c2ecf20Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT))) 26898c2ecf20Sopenharmony_ci return false; 26908c2ecf20Sopenharmony_ci break; 26918c2ecf20Sopenharmony_ci default: 26928c2ecf20Sopenharmony_ci return false; 26938c2ecf20Sopenharmony_ci } 26948c2ecf20Sopenharmony_ci 26958c2ecf20Sopenharmony_ci /* Reserved bits should not be set */ 26968c2ecf20Sopenharmony_ci if (CC(new_eptp >> maxphyaddr || ((new_eptp >> 7) & 0x1f))) 26978c2ecf20Sopenharmony_ci return false; 26988c2ecf20Sopenharmony_ci 26998c2ecf20Sopenharmony_ci /* AD, if set, should be supported */ 27008c2ecf20Sopenharmony_ci if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) { 27018c2ecf20Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))) 27028c2ecf20Sopenharmony_ci return false; 27038c2ecf20Sopenharmony_ci } 27048c2ecf20Sopenharmony_ci 27058c2ecf20Sopenharmony_ci return true; 27068c2ecf20Sopenharmony_ci} 27078c2ecf20Sopenharmony_ci 27088c2ecf20Sopenharmony_ci/* 27098c2ecf20Sopenharmony_ci * Checks related to VM-Execution Control Fields 27108c2ecf20Sopenharmony_ci */ 27118c2ecf20Sopenharmony_cistatic int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, 27128c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 27138c2ecf20Sopenharmony_ci{ 27148c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 27158c2ecf20Sopenharmony_ci 27168c2ecf20Sopenharmony_ci if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control, 27178c2ecf20Sopenharmony_ci vmx->nested.msrs.pinbased_ctls_low, 27188c2ecf20Sopenharmony_ci vmx->nested.msrs.pinbased_ctls_high)) || 27198c2ecf20Sopenharmony_ci CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, 27208c2ecf20Sopenharmony_ci vmx->nested.msrs.procbased_ctls_low, 27218c2ecf20Sopenharmony_ci vmx->nested.msrs.procbased_ctls_high))) 27228c2ecf20Sopenharmony_ci return -EINVAL; 27238c2ecf20Sopenharmony_ci 27248c2ecf20Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && 27258c2ecf20Sopenharmony_ci CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control, 27268c2ecf20Sopenharmony_ci vmx->nested.msrs.secondary_ctls_low, 27278c2ecf20Sopenharmony_ci vmx->nested.msrs.secondary_ctls_high))) 27288c2ecf20Sopenharmony_ci return -EINVAL; 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_ci if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) || 27318c2ecf20Sopenharmony_ci nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) || 27328c2ecf20Sopenharmony_ci nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) || 27338c2ecf20Sopenharmony_ci nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) || 27348c2ecf20Sopenharmony_ci nested_vmx_check_apic_access_controls(vcpu, vmcs12) || 27358c2ecf20Sopenharmony_ci nested_vmx_check_apicv_controls(vcpu, vmcs12) || 27368c2ecf20Sopenharmony_ci nested_vmx_check_nmi_controls(vmcs12) || 27378c2ecf20Sopenharmony_ci nested_vmx_check_pml_controls(vcpu, vmcs12) || 27388c2ecf20Sopenharmony_ci nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) || 27398c2ecf20Sopenharmony_ci nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) || 27408c2ecf20Sopenharmony_ci nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) || 27418c2ecf20Sopenharmony_ci CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) 27428c2ecf20Sopenharmony_ci return -EINVAL; 27438c2ecf20Sopenharmony_ci 27448c2ecf20Sopenharmony_ci if (!nested_cpu_has_preemption_timer(vmcs12) && 27458c2ecf20Sopenharmony_ci nested_cpu_has_save_preemption_timer(vmcs12)) 27468c2ecf20Sopenharmony_ci return -EINVAL; 27478c2ecf20Sopenharmony_ci 27488c2ecf20Sopenharmony_ci if (nested_cpu_has_ept(vmcs12) && 27498c2ecf20Sopenharmony_ci CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer))) 27508c2ecf20Sopenharmony_ci return -EINVAL; 27518c2ecf20Sopenharmony_ci 27528c2ecf20Sopenharmony_ci if (nested_cpu_has_vmfunc(vmcs12)) { 27538c2ecf20Sopenharmony_ci if (CC(vmcs12->vm_function_control & 27548c2ecf20Sopenharmony_ci ~vmx->nested.msrs.vmfunc_controls)) 27558c2ecf20Sopenharmony_ci return -EINVAL; 27568c2ecf20Sopenharmony_ci 27578c2ecf20Sopenharmony_ci if (nested_cpu_has_eptp_switching(vmcs12)) { 27588c2ecf20Sopenharmony_ci if (CC(!nested_cpu_has_ept(vmcs12)) || 27598c2ecf20Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->eptp_list_address))) 27608c2ecf20Sopenharmony_ci return -EINVAL; 27618c2ecf20Sopenharmony_ci } 27628c2ecf20Sopenharmony_ci } 27638c2ecf20Sopenharmony_ci 27648c2ecf20Sopenharmony_ci return 0; 27658c2ecf20Sopenharmony_ci} 27668c2ecf20Sopenharmony_ci 27678c2ecf20Sopenharmony_ci/* 27688c2ecf20Sopenharmony_ci * Checks related to VM-Exit Control Fields 27698c2ecf20Sopenharmony_ci */ 27708c2ecf20Sopenharmony_cistatic int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu, 27718c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 27728c2ecf20Sopenharmony_ci{ 27738c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 27748c2ecf20Sopenharmony_ci 27758c2ecf20Sopenharmony_ci if (CC(!vmx_control_verify(vmcs12->vm_exit_controls, 27768c2ecf20Sopenharmony_ci vmx->nested.msrs.exit_ctls_low, 27778c2ecf20Sopenharmony_ci vmx->nested.msrs.exit_ctls_high)) || 27788c2ecf20Sopenharmony_ci CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))) 27798c2ecf20Sopenharmony_ci return -EINVAL; 27808c2ecf20Sopenharmony_ci 27818c2ecf20Sopenharmony_ci return 0; 27828c2ecf20Sopenharmony_ci} 27838c2ecf20Sopenharmony_ci 27848c2ecf20Sopenharmony_ci/* 27858c2ecf20Sopenharmony_ci * Checks related to VM-Entry Control Fields 27868c2ecf20Sopenharmony_ci */ 27878c2ecf20Sopenharmony_cistatic int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, 27888c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 27898c2ecf20Sopenharmony_ci{ 27908c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 27918c2ecf20Sopenharmony_ci 27928c2ecf20Sopenharmony_ci if (CC(!vmx_control_verify(vmcs12->vm_entry_controls, 27938c2ecf20Sopenharmony_ci vmx->nested.msrs.entry_ctls_low, 27948c2ecf20Sopenharmony_ci vmx->nested.msrs.entry_ctls_high))) 27958c2ecf20Sopenharmony_ci return -EINVAL; 27968c2ecf20Sopenharmony_ci 27978c2ecf20Sopenharmony_ci /* 27988c2ecf20Sopenharmony_ci * From the Intel SDM, volume 3: 27998c2ecf20Sopenharmony_ci * Fields relevant to VM-entry event injection must be set properly. 28008c2ecf20Sopenharmony_ci * These fields are the VM-entry interruption-information field, the 28018c2ecf20Sopenharmony_ci * VM-entry exception error code, and the VM-entry instruction length. 28028c2ecf20Sopenharmony_ci */ 28038c2ecf20Sopenharmony_ci if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { 28048c2ecf20Sopenharmony_ci u32 intr_info = vmcs12->vm_entry_intr_info_field; 28058c2ecf20Sopenharmony_ci u8 vector = intr_info & INTR_INFO_VECTOR_MASK; 28068c2ecf20Sopenharmony_ci u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; 28078c2ecf20Sopenharmony_ci bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; 28088c2ecf20Sopenharmony_ci bool should_have_error_code; 28098c2ecf20Sopenharmony_ci bool urg = nested_cpu_has2(vmcs12, 28108c2ecf20Sopenharmony_ci SECONDARY_EXEC_UNRESTRICTED_GUEST); 28118c2ecf20Sopenharmony_ci bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_ci /* VM-entry interruption-info field: interruption type */ 28148c2ecf20Sopenharmony_ci if (CC(intr_type == INTR_TYPE_RESERVED) || 28158c2ecf20Sopenharmony_ci CC(intr_type == INTR_TYPE_OTHER_EVENT && 28168c2ecf20Sopenharmony_ci !nested_cpu_supports_monitor_trap_flag(vcpu))) 28178c2ecf20Sopenharmony_ci return -EINVAL; 28188c2ecf20Sopenharmony_ci 28198c2ecf20Sopenharmony_ci /* VM-entry interruption-info field: vector */ 28208c2ecf20Sopenharmony_ci if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || 28218c2ecf20Sopenharmony_ci CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || 28228c2ecf20Sopenharmony_ci CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) 28238c2ecf20Sopenharmony_ci return -EINVAL; 28248c2ecf20Sopenharmony_ci 28258c2ecf20Sopenharmony_ci /* VM-entry interruption-info field: deliver error code */ 28268c2ecf20Sopenharmony_ci should_have_error_code = 28278c2ecf20Sopenharmony_ci intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && 28288c2ecf20Sopenharmony_ci x86_exception_has_error_code(vector); 28298c2ecf20Sopenharmony_ci if (CC(has_error_code != should_have_error_code)) 28308c2ecf20Sopenharmony_ci return -EINVAL; 28318c2ecf20Sopenharmony_ci 28328c2ecf20Sopenharmony_ci /* VM-entry exception error code */ 28338c2ecf20Sopenharmony_ci if (CC(has_error_code && 28348c2ecf20Sopenharmony_ci vmcs12->vm_entry_exception_error_code & GENMASK(31, 16))) 28358c2ecf20Sopenharmony_ci return -EINVAL; 28368c2ecf20Sopenharmony_ci 28378c2ecf20Sopenharmony_ci /* VM-entry interruption-info field: reserved bits */ 28388c2ecf20Sopenharmony_ci if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK)) 28398c2ecf20Sopenharmony_ci return -EINVAL; 28408c2ecf20Sopenharmony_ci 28418c2ecf20Sopenharmony_ci /* VM-entry instruction length */ 28428c2ecf20Sopenharmony_ci switch (intr_type) { 28438c2ecf20Sopenharmony_ci case INTR_TYPE_SOFT_EXCEPTION: 28448c2ecf20Sopenharmony_ci case INTR_TYPE_SOFT_INTR: 28458c2ecf20Sopenharmony_ci case INTR_TYPE_PRIV_SW_EXCEPTION: 28468c2ecf20Sopenharmony_ci if (CC(vmcs12->vm_entry_instruction_len > 15) || 28478c2ecf20Sopenharmony_ci CC(vmcs12->vm_entry_instruction_len == 0 && 28488c2ecf20Sopenharmony_ci CC(!nested_cpu_has_zero_length_injection(vcpu)))) 28498c2ecf20Sopenharmony_ci return -EINVAL; 28508c2ecf20Sopenharmony_ci } 28518c2ecf20Sopenharmony_ci } 28528c2ecf20Sopenharmony_ci 28538c2ecf20Sopenharmony_ci if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12)) 28548c2ecf20Sopenharmony_ci return -EINVAL; 28558c2ecf20Sopenharmony_ci 28568c2ecf20Sopenharmony_ci return 0; 28578c2ecf20Sopenharmony_ci} 28588c2ecf20Sopenharmony_ci 28598c2ecf20Sopenharmony_cistatic int nested_vmx_check_controls(struct kvm_vcpu *vcpu, 28608c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 28618c2ecf20Sopenharmony_ci{ 28628c2ecf20Sopenharmony_ci if (nested_check_vm_execution_controls(vcpu, vmcs12) || 28638c2ecf20Sopenharmony_ci nested_check_vm_exit_controls(vcpu, vmcs12) || 28648c2ecf20Sopenharmony_ci nested_check_vm_entry_controls(vcpu, vmcs12)) 28658c2ecf20Sopenharmony_ci return -EINVAL; 28668c2ecf20Sopenharmony_ci 28678c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled) 28688c2ecf20Sopenharmony_ci return nested_evmcs_check_controls(vmcs12); 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci return 0; 28718c2ecf20Sopenharmony_ci} 28728c2ecf20Sopenharmony_ci 28738c2ecf20Sopenharmony_cistatic int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, 28748c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 28758c2ecf20Sopenharmony_ci{ 28768c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 28778c2ecf20Sopenharmony_ci if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) != 28788c2ecf20Sopenharmony_ci !!(vcpu->arch.efer & EFER_LMA))) 28798c2ecf20Sopenharmony_ci return -EINVAL; 28808c2ecf20Sopenharmony_ci#endif 28818c2ecf20Sopenharmony_ci return 0; 28828c2ecf20Sopenharmony_ci} 28838c2ecf20Sopenharmony_ci 28848c2ecf20Sopenharmony_cistatic int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, 28858c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 28868c2ecf20Sopenharmony_ci{ 28878c2ecf20Sopenharmony_ci bool ia32e; 28888c2ecf20Sopenharmony_ci 28898c2ecf20Sopenharmony_ci if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) || 28908c2ecf20Sopenharmony_ci CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) || 28918c2ecf20Sopenharmony_ci CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3))) 28928c2ecf20Sopenharmony_ci return -EINVAL; 28938c2ecf20Sopenharmony_ci 28948c2ecf20Sopenharmony_ci if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || 28958c2ecf20Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))) 28968c2ecf20Sopenharmony_ci return -EINVAL; 28978c2ecf20Sopenharmony_ci 28988c2ecf20Sopenharmony_ci if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && 28998c2ecf20Sopenharmony_ci CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) 29008c2ecf20Sopenharmony_ci return -EINVAL; 29018c2ecf20Sopenharmony_ci 29028c2ecf20Sopenharmony_ci if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && 29038c2ecf20Sopenharmony_ci CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), 29048c2ecf20Sopenharmony_ci vmcs12->host_ia32_perf_global_ctrl))) 29058c2ecf20Sopenharmony_ci return -EINVAL; 29068c2ecf20Sopenharmony_ci 29078c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 29088c2ecf20Sopenharmony_ci ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE); 29098c2ecf20Sopenharmony_ci#else 29108c2ecf20Sopenharmony_ci ia32e = false; 29118c2ecf20Sopenharmony_ci#endif 29128c2ecf20Sopenharmony_ci 29138c2ecf20Sopenharmony_ci if (ia32e) { 29148c2ecf20Sopenharmony_ci if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) 29158c2ecf20Sopenharmony_ci return -EINVAL; 29168c2ecf20Sopenharmony_ci } else { 29178c2ecf20Sopenharmony_ci if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || 29188c2ecf20Sopenharmony_ci CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || 29198c2ecf20Sopenharmony_ci CC((vmcs12->host_rip) >> 32)) 29208c2ecf20Sopenharmony_ci return -EINVAL; 29218c2ecf20Sopenharmony_ci } 29228c2ecf20Sopenharmony_ci 29238c2ecf20Sopenharmony_ci if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29248c2ecf20Sopenharmony_ci CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29258c2ecf20Sopenharmony_ci CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29268c2ecf20Sopenharmony_ci CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29278c2ecf20Sopenharmony_ci CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29288c2ecf20Sopenharmony_ci CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29298c2ecf20Sopenharmony_ci CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 29308c2ecf20Sopenharmony_ci CC(vmcs12->host_cs_selector == 0) || 29318c2ecf20Sopenharmony_ci CC(vmcs12->host_tr_selector == 0) || 29328c2ecf20Sopenharmony_ci CC(vmcs12->host_ss_selector == 0 && !ia32e)) 29338c2ecf20Sopenharmony_ci return -EINVAL; 29348c2ecf20Sopenharmony_ci 29358c2ecf20Sopenharmony_ci if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || 29368c2ecf20Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || 29378c2ecf20Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || 29388c2ecf20Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || 29398c2ecf20Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || 29408c2ecf20Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) 29418c2ecf20Sopenharmony_ci return -EINVAL; 29428c2ecf20Sopenharmony_ci 29438c2ecf20Sopenharmony_ci /* 29448c2ecf20Sopenharmony_ci * If the load IA32_EFER VM-exit control is 1, bits reserved in the 29458c2ecf20Sopenharmony_ci * IA32_EFER MSR must be 0 in the field for that register. In addition, 29468c2ecf20Sopenharmony_ci * the values of the LMA and LME bits in the field must each be that of 29478c2ecf20Sopenharmony_ci * the host address-space size VM-exit control. 29488c2ecf20Sopenharmony_ci */ 29498c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { 29508c2ecf20Sopenharmony_ci if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) || 29518c2ecf20Sopenharmony_ci CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) || 29528c2ecf20Sopenharmony_ci CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))) 29538c2ecf20Sopenharmony_ci return -EINVAL; 29548c2ecf20Sopenharmony_ci } 29558c2ecf20Sopenharmony_ci 29568c2ecf20Sopenharmony_ci return 0; 29578c2ecf20Sopenharmony_ci} 29588c2ecf20Sopenharmony_ci 29598c2ecf20Sopenharmony_cistatic int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, 29608c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 29618c2ecf20Sopenharmony_ci{ 29628c2ecf20Sopenharmony_ci int r = 0; 29638c2ecf20Sopenharmony_ci struct vmcs12 *shadow; 29648c2ecf20Sopenharmony_ci struct kvm_host_map map; 29658c2ecf20Sopenharmony_ci 29668c2ecf20Sopenharmony_ci if (vmcs12->vmcs_link_pointer == -1ull) 29678c2ecf20Sopenharmony_ci return 0; 29688c2ecf20Sopenharmony_ci 29698c2ecf20Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))) 29708c2ecf20Sopenharmony_ci return -EINVAL; 29718c2ecf20Sopenharmony_ci 29728c2ecf20Sopenharmony_ci if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))) 29738c2ecf20Sopenharmony_ci return -EINVAL; 29748c2ecf20Sopenharmony_ci 29758c2ecf20Sopenharmony_ci shadow = map.hva; 29768c2ecf20Sopenharmony_ci 29778c2ecf20Sopenharmony_ci if (CC(shadow->hdr.revision_id != VMCS12_REVISION) || 29788c2ecf20Sopenharmony_ci CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) 29798c2ecf20Sopenharmony_ci r = -EINVAL; 29808c2ecf20Sopenharmony_ci 29818c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &map, false); 29828c2ecf20Sopenharmony_ci return r; 29838c2ecf20Sopenharmony_ci} 29848c2ecf20Sopenharmony_ci 29858c2ecf20Sopenharmony_ci/* 29868c2ecf20Sopenharmony_ci * Checks related to Guest Non-register State 29878c2ecf20Sopenharmony_ci */ 29888c2ecf20Sopenharmony_cistatic int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) 29898c2ecf20Sopenharmony_ci{ 29908c2ecf20Sopenharmony_ci if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && 29918c2ecf20Sopenharmony_ci vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)) 29928c2ecf20Sopenharmony_ci return -EINVAL; 29938c2ecf20Sopenharmony_ci 29948c2ecf20Sopenharmony_ci return 0; 29958c2ecf20Sopenharmony_ci} 29968c2ecf20Sopenharmony_ci 29978c2ecf20Sopenharmony_cistatic int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, 29988c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12, 29998c2ecf20Sopenharmony_ci enum vm_entry_failure_code *entry_failure_code) 30008c2ecf20Sopenharmony_ci{ 30018c2ecf20Sopenharmony_ci bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE); 30028c2ecf20Sopenharmony_ci 30038c2ecf20Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 30048c2ecf20Sopenharmony_ci 30058c2ecf20Sopenharmony_ci if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) || 30068c2ecf20Sopenharmony_ci CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) 30078c2ecf20Sopenharmony_ci return -EINVAL; 30088c2ecf20Sopenharmony_ci 30098c2ecf20Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && 30108c2ecf20Sopenharmony_ci CC(!kvm_dr7_valid(vmcs12->guest_dr7))) 30118c2ecf20Sopenharmony_ci return -EINVAL; 30128c2ecf20Sopenharmony_ci 30138c2ecf20Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && 30148c2ecf20Sopenharmony_ci CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) 30158c2ecf20Sopenharmony_ci return -EINVAL; 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_ci if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { 30188c2ecf20Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR; 30198c2ecf20Sopenharmony_ci return -EINVAL; 30208c2ecf20Sopenharmony_ci } 30218c2ecf20Sopenharmony_ci 30228c2ecf20Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && 30238c2ecf20Sopenharmony_ci CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), 30248c2ecf20Sopenharmony_ci vmcs12->guest_ia32_perf_global_ctrl))) 30258c2ecf20Sopenharmony_ci return -EINVAL; 30268c2ecf20Sopenharmony_ci 30278c2ecf20Sopenharmony_ci if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG)) 30288c2ecf20Sopenharmony_ci return -EINVAL; 30298c2ecf20Sopenharmony_ci 30308c2ecf20Sopenharmony_ci if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) || 30318c2ecf20Sopenharmony_ci CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG))) 30328c2ecf20Sopenharmony_ci return -EINVAL; 30338c2ecf20Sopenharmony_ci 30348c2ecf20Sopenharmony_ci /* 30358c2ecf20Sopenharmony_ci * If the load IA32_EFER VM-entry control is 1, the following checks 30368c2ecf20Sopenharmony_ci * are performed on the field for the IA32_EFER MSR: 30378c2ecf20Sopenharmony_ci * - Bits reserved in the IA32_EFER MSR must be 0. 30388c2ecf20Sopenharmony_ci * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of 30398c2ecf20Sopenharmony_ci * the IA-32e mode guest VM-exit control. It must also be identical 30408c2ecf20Sopenharmony_ci * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to 30418c2ecf20Sopenharmony_ci * CR0.PG) is 1. 30428c2ecf20Sopenharmony_ci */ 30438c2ecf20Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending && 30448c2ecf20Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { 30458c2ecf20Sopenharmony_ci if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || 30468c2ecf20Sopenharmony_ci CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || 30478c2ecf20Sopenharmony_ci CC(((vmcs12->guest_cr0 & X86_CR0_PG) && 30488c2ecf20Sopenharmony_ci ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))) 30498c2ecf20Sopenharmony_ci return -EINVAL; 30508c2ecf20Sopenharmony_ci } 30518c2ecf20Sopenharmony_ci 30528c2ecf20Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && 30538c2ecf20Sopenharmony_ci (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || 30548c2ecf20Sopenharmony_ci CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) 30558c2ecf20Sopenharmony_ci return -EINVAL; 30568c2ecf20Sopenharmony_ci 30578c2ecf20Sopenharmony_ci if (nested_check_guest_non_reg_state(vmcs12)) 30588c2ecf20Sopenharmony_ci return -EINVAL; 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_ci return 0; 30618c2ecf20Sopenharmony_ci} 30628c2ecf20Sopenharmony_ci 30638c2ecf20Sopenharmony_cistatic int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) 30648c2ecf20Sopenharmony_ci{ 30658c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 30668c2ecf20Sopenharmony_ci unsigned long cr3, cr4; 30678c2ecf20Sopenharmony_ci bool vm_fail; 30688c2ecf20Sopenharmony_ci 30698c2ecf20Sopenharmony_ci if (!nested_early_check) 30708c2ecf20Sopenharmony_ci return 0; 30718c2ecf20Sopenharmony_ci 30728c2ecf20Sopenharmony_ci if (vmx->msr_autoload.host.nr) 30738c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); 30748c2ecf20Sopenharmony_ci if (vmx->msr_autoload.guest.nr) 30758c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); 30768c2ecf20Sopenharmony_ci 30778c2ecf20Sopenharmony_ci preempt_disable(); 30788c2ecf20Sopenharmony_ci 30798c2ecf20Sopenharmony_ci vmx_prepare_switch_to_guest(vcpu); 30808c2ecf20Sopenharmony_ci 30818c2ecf20Sopenharmony_ci /* 30828c2ecf20Sopenharmony_ci * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS, 30838c2ecf20Sopenharmony_ci * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to 30848c2ecf20Sopenharmony_ci * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e. 30858c2ecf20Sopenharmony_ci * there is no need to preserve other bits or save/restore the field. 30868c2ecf20Sopenharmony_ci */ 30878c2ecf20Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, 0); 30888c2ecf20Sopenharmony_ci 30898c2ecf20Sopenharmony_ci cr3 = __get_current_cr3_fast(); 30908c2ecf20Sopenharmony_ci if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { 30918c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR3, cr3); 30928c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.cr3 = cr3; 30938c2ecf20Sopenharmony_ci } 30948c2ecf20Sopenharmony_ci 30958c2ecf20Sopenharmony_ci cr4 = cr4_read_shadow(); 30968c2ecf20Sopenharmony_ci if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { 30978c2ecf20Sopenharmony_ci vmcs_writel(HOST_CR4, cr4); 30988c2ecf20Sopenharmony_ci vmx->loaded_vmcs->host_state.cr4 = cr4; 30998c2ecf20Sopenharmony_ci } 31008c2ecf20Sopenharmony_ci 31018c2ecf20Sopenharmony_ci vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, 31028c2ecf20Sopenharmony_ci __vmx_vcpu_run_flags(vmx)); 31038c2ecf20Sopenharmony_ci 31048c2ecf20Sopenharmony_ci if (vmx->msr_autoload.host.nr) 31058c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); 31068c2ecf20Sopenharmony_ci if (vmx->msr_autoload.guest.nr) 31078c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); 31088c2ecf20Sopenharmony_ci 31098c2ecf20Sopenharmony_ci if (vm_fail) { 31108c2ecf20Sopenharmony_ci u32 error = vmcs_read32(VM_INSTRUCTION_ERROR); 31118c2ecf20Sopenharmony_ci 31128c2ecf20Sopenharmony_ci preempt_enable(); 31138c2ecf20Sopenharmony_ci 31148c2ecf20Sopenharmony_ci trace_kvm_nested_vmenter_failed( 31158c2ecf20Sopenharmony_ci "early hardware check VM-instruction error: ", error); 31168c2ecf20Sopenharmony_ci WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD); 31178c2ecf20Sopenharmony_ci return 1; 31188c2ecf20Sopenharmony_ci } 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci /* 31218c2ecf20Sopenharmony_ci * VMExit clears RFLAGS.IF and DR7, even on a consistency check. 31228c2ecf20Sopenharmony_ci */ 31238c2ecf20Sopenharmony_ci if (hw_breakpoint_active()) 31248c2ecf20Sopenharmony_ci set_debugreg(__this_cpu_read(cpu_dr7), 7); 31258c2ecf20Sopenharmony_ci local_irq_enable(); 31268c2ecf20Sopenharmony_ci preempt_enable(); 31278c2ecf20Sopenharmony_ci 31288c2ecf20Sopenharmony_ci /* 31298c2ecf20Sopenharmony_ci * A non-failing VMEntry means we somehow entered guest mode with 31308c2ecf20Sopenharmony_ci * an illegal RIP, and that's just the tip of the iceberg. There 31318c2ecf20Sopenharmony_ci * is no telling what memory has been modified or what state has 31328c2ecf20Sopenharmony_ci * been exposed to unknown code. Hitting this all but guarantees 31338c2ecf20Sopenharmony_ci * a (very critical) hardware issue. 31348c2ecf20Sopenharmony_ci */ 31358c2ecf20Sopenharmony_ci WARN_ON(!(vmcs_read32(VM_EXIT_REASON) & 31368c2ecf20Sopenharmony_ci VMX_EXIT_REASONS_FAILED_VMENTRY)); 31378c2ecf20Sopenharmony_ci 31388c2ecf20Sopenharmony_ci return 0; 31398c2ecf20Sopenharmony_ci} 31408c2ecf20Sopenharmony_ci 31418c2ecf20Sopenharmony_cistatic bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) 31428c2ecf20Sopenharmony_ci{ 31438c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 31448c2ecf20Sopenharmony_ci 31458c2ecf20Sopenharmony_ci /* 31468c2ecf20Sopenharmony_ci * hv_evmcs may end up being not mapped after migration (when 31478c2ecf20Sopenharmony_ci * L2 was running), map it here to make sure vmcs12 changes are 31488c2ecf20Sopenharmony_ci * properly reflected. 31498c2ecf20Sopenharmony_ci */ 31508c2ecf20Sopenharmony_ci if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) { 31518c2ecf20Sopenharmony_ci enum nested_evmptrld_status evmptrld_status = 31528c2ecf20Sopenharmony_ci nested_vmx_handle_enlightened_vmptrld(vcpu, false); 31538c2ecf20Sopenharmony_ci 31548c2ecf20Sopenharmony_ci if (evmptrld_status == EVMPTRLD_VMFAIL || 31558c2ecf20Sopenharmony_ci evmptrld_status == EVMPTRLD_ERROR) 31568c2ecf20Sopenharmony_ci return false; 31578c2ecf20Sopenharmony_ci } 31588c2ecf20Sopenharmony_ci 31598c2ecf20Sopenharmony_ci return true; 31608c2ecf20Sopenharmony_ci} 31618c2ecf20Sopenharmony_ci 31628c2ecf20Sopenharmony_cistatic bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) 31638c2ecf20Sopenharmony_ci{ 31648c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 31658c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 31668c2ecf20Sopenharmony_ci struct kvm_host_map *map; 31678c2ecf20Sopenharmony_ci struct page *page; 31688c2ecf20Sopenharmony_ci u64 hpa; 31698c2ecf20Sopenharmony_ci 31708c2ecf20Sopenharmony_ci if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { 31718c2ecf20Sopenharmony_ci /* 31728c2ecf20Sopenharmony_ci * Translate L1 physical address to host physical 31738c2ecf20Sopenharmony_ci * address for vmcs02. Keep the page pinned, so this 31748c2ecf20Sopenharmony_ci * physical address remains valid. We keep a reference 31758c2ecf20Sopenharmony_ci * to it so we can release it later. 31768c2ecf20Sopenharmony_ci */ 31778c2ecf20Sopenharmony_ci if (vmx->nested.apic_access_page) { /* shouldn't happen */ 31788c2ecf20Sopenharmony_ci kvm_release_page_clean(vmx->nested.apic_access_page); 31798c2ecf20Sopenharmony_ci vmx->nested.apic_access_page = NULL; 31808c2ecf20Sopenharmony_ci } 31818c2ecf20Sopenharmony_ci page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); 31828c2ecf20Sopenharmony_ci if (!is_error_page(page)) { 31838c2ecf20Sopenharmony_ci vmx->nested.apic_access_page = page; 31848c2ecf20Sopenharmony_ci hpa = page_to_phys(vmx->nested.apic_access_page); 31858c2ecf20Sopenharmony_ci vmcs_write64(APIC_ACCESS_ADDR, hpa); 31868c2ecf20Sopenharmony_ci } else { 31878c2ecf20Sopenharmony_ci pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n", 31888c2ecf20Sopenharmony_ci __func__); 31898c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 31908c2ecf20Sopenharmony_ci vcpu->run->internal.suberror = 31918c2ecf20Sopenharmony_ci KVM_INTERNAL_ERROR_EMULATION; 31928c2ecf20Sopenharmony_ci vcpu->run->internal.ndata = 0; 31938c2ecf20Sopenharmony_ci return false; 31948c2ecf20Sopenharmony_ci } 31958c2ecf20Sopenharmony_ci } 31968c2ecf20Sopenharmony_ci 31978c2ecf20Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 31988c2ecf20Sopenharmony_ci map = &vmx->nested.virtual_apic_map; 31998c2ecf20Sopenharmony_ci 32008c2ecf20Sopenharmony_ci if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { 32018c2ecf20Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); 32028c2ecf20Sopenharmony_ci } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && 32038c2ecf20Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && 32048c2ecf20Sopenharmony_ci !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { 32058c2ecf20Sopenharmony_ci /* 32068c2ecf20Sopenharmony_ci * The processor will never use the TPR shadow, simply 32078c2ecf20Sopenharmony_ci * clear the bit from the execution control. Such a 32088c2ecf20Sopenharmony_ci * configuration is useless, but it happens in tests. 32098c2ecf20Sopenharmony_ci * For any other configuration, failing the vm entry is 32108c2ecf20Sopenharmony_ci * _not_ what the processor does but it's basically the 32118c2ecf20Sopenharmony_ci * only possibility we have. 32128c2ecf20Sopenharmony_ci */ 32138c2ecf20Sopenharmony_ci exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW); 32148c2ecf20Sopenharmony_ci } else { 32158c2ecf20Sopenharmony_ci /* 32168c2ecf20Sopenharmony_ci * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to 32178c2ecf20Sopenharmony_ci * force VM-Entry to fail. 32188c2ecf20Sopenharmony_ci */ 32198c2ecf20Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); 32208c2ecf20Sopenharmony_ci } 32218c2ecf20Sopenharmony_ci } 32228c2ecf20Sopenharmony_ci 32238c2ecf20Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12)) { 32248c2ecf20Sopenharmony_ci map = &vmx->nested.pi_desc_map; 32258c2ecf20Sopenharmony_ci 32268c2ecf20Sopenharmony_ci if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { 32278c2ecf20Sopenharmony_ci vmx->nested.pi_desc = 32288c2ecf20Sopenharmony_ci (struct pi_desc *)(((void *)map->hva) + 32298c2ecf20Sopenharmony_ci offset_in_page(vmcs12->posted_intr_desc_addr)); 32308c2ecf20Sopenharmony_ci vmcs_write64(POSTED_INTR_DESC_ADDR, 32318c2ecf20Sopenharmony_ci pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); 32328c2ecf20Sopenharmony_ci } 32338c2ecf20Sopenharmony_ci } 32348c2ecf20Sopenharmony_ci if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) 32358c2ecf20Sopenharmony_ci exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); 32368c2ecf20Sopenharmony_ci else 32378c2ecf20Sopenharmony_ci exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); 32388c2ecf20Sopenharmony_ci 32398c2ecf20Sopenharmony_ci return true; 32408c2ecf20Sopenharmony_ci} 32418c2ecf20Sopenharmony_ci 32428c2ecf20Sopenharmony_cistatic bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) 32438c2ecf20Sopenharmony_ci{ 32448c2ecf20Sopenharmony_ci if (!nested_get_evmcs_page(vcpu)) { 32458c2ecf20Sopenharmony_ci pr_debug_ratelimited("%s: enlightened vmptrld failed\n", 32468c2ecf20Sopenharmony_ci __func__); 32478c2ecf20Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 32488c2ecf20Sopenharmony_ci vcpu->run->internal.suberror = 32498c2ecf20Sopenharmony_ci KVM_INTERNAL_ERROR_EMULATION; 32508c2ecf20Sopenharmony_ci vcpu->run->internal.ndata = 0; 32518c2ecf20Sopenharmony_ci 32528c2ecf20Sopenharmony_ci return false; 32538c2ecf20Sopenharmony_ci } 32548c2ecf20Sopenharmony_ci 32558c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) 32568c2ecf20Sopenharmony_ci return false; 32578c2ecf20Sopenharmony_ci 32588c2ecf20Sopenharmony_ci return true; 32598c2ecf20Sopenharmony_ci} 32608c2ecf20Sopenharmony_ci 32618c2ecf20Sopenharmony_cistatic int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) 32628c2ecf20Sopenharmony_ci{ 32638c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12; 32648c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 32658c2ecf20Sopenharmony_ci gpa_t dst; 32668c2ecf20Sopenharmony_ci 32678c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!is_guest_mode(vcpu))) 32688c2ecf20Sopenharmony_ci return 0; 32698c2ecf20Sopenharmony_ci 32708c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(vmx->nested.pml_full)) 32718c2ecf20Sopenharmony_ci return 1; 32728c2ecf20Sopenharmony_ci 32738c2ecf20Sopenharmony_ci /* 32748c2ecf20Sopenharmony_ci * Check if PML is enabled for the nested guest. Whether eptp bit 6 is 32758c2ecf20Sopenharmony_ci * set is already checked as part of A/D emulation. 32768c2ecf20Sopenharmony_ci */ 32778c2ecf20Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 32788c2ecf20Sopenharmony_ci if (!nested_cpu_has_pml(vmcs12)) 32798c2ecf20Sopenharmony_ci return 0; 32808c2ecf20Sopenharmony_ci 32818c2ecf20Sopenharmony_ci if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { 32828c2ecf20Sopenharmony_ci vmx->nested.pml_full = true; 32838c2ecf20Sopenharmony_ci return 1; 32848c2ecf20Sopenharmony_ci } 32858c2ecf20Sopenharmony_ci 32868c2ecf20Sopenharmony_ci gpa &= ~0xFFFull; 32878c2ecf20Sopenharmony_ci dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; 32888c2ecf20Sopenharmony_ci 32898c2ecf20Sopenharmony_ci if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, 32908c2ecf20Sopenharmony_ci offset_in_page(dst), sizeof(gpa))) 32918c2ecf20Sopenharmony_ci return 0; 32928c2ecf20Sopenharmony_ci 32938c2ecf20Sopenharmony_ci vmcs12->guest_pml_index--; 32948c2ecf20Sopenharmony_ci 32958c2ecf20Sopenharmony_ci return 0; 32968c2ecf20Sopenharmony_ci} 32978c2ecf20Sopenharmony_ci 32988c2ecf20Sopenharmony_ci/* 32998c2ecf20Sopenharmony_ci * Intel's VMX Instruction Reference specifies a common set of prerequisites 33008c2ecf20Sopenharmony_ci * for running VMX instructions (except VMXON, whose prerequisites are 33018c2ecf20Sopenharmony_ci * slightly different). It also specifies what exception to inject otherwise. 33028c2ecf20Sopenharmony_ci * Note that many of these exceptions have priority over VM exits, so they 33038c2ecf20Sopenharmony_ci * don't have to be checked again here. 33048c2ecf20Sopenharmony_ci */ 33058c2ecf20Sopenharmony_cistatic int nested_vmx_check_permission(struct kvm_vcpu *vcpu) 33068c2ecf20Sopenharmony_ci{ 33078c2ecf20Sopenharmony_ci if (!to_vmx(vcpu)->nested.vmxon) { 33088c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 33098c2ecf20Sopenharmony_ci return 0; 33108c2ecf20Sopenharmony_ci } 33118c2ecf20Sopenharmony_ci 33128c2ecf20Sopenharmony_ci if (vmx_get_cpl(vcpu)) { 33138c2ecf20Sopenharmony_ci kvm_inject_gp(vcpu, 0); 33148c2ecf20Sopenharmony_ci return 0; 33158c2ecf20Sopenharmony_ci } 33168c2ecf20Sopenharmony_ci 33178c2ecf20Sopenharmony_ci return 1; 33188c2ecf20Sopenharmony_ci} 33198c2ecf20Sopenharmony_ci 33208c2ecf20Sopenharmony_cistatic u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) 33218c2ecf20Sopenharmony_ci{ 33228c2ecf20Sopenharmony_ci u8 rvi = vmx_get_rvi(); 33238c2ecf20Sopenharmony_ci u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); 33248c2ecf20Sopenharmony_ci 33258c2ecf20Sopenharmony_ci return ((rvi & 0xf0) > (vppr & 0xf0)); 33268c2ecf20Sopenharmony_ci} 33278c2ecf20Sopenharmony_ci 33288c2ecf20Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu, 33298c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12); 33308c2ecf20Sopenharmony_ci 33318c2ecf20Sopenharmony_ci/* 33328c2ecf20Sopenharmony_ci * If from_vmentry is false, this is being called from state restore (either RSM 33338c2ecf20Sopenharmony_ci * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. 33348c2ecf20Sopenharmony_ci * 33358c2ecf20Sopenharmony_ci * Returns: 33368c2ecf20Sopenharmony_ci * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode 33378c2ecf20Sopenharmony_ci * NVMX_VMENTRY_VMFAIL: Consistency check VMFail 33388c2ecf20Sopenharmony_ci * NVMX_VMENTRY_VMEXIT: Consistency check VMExit 33398c2ecf20Sopenharmony_ci * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error 33408c2ecf20Sopenharmony_ci */ 33418c2ecf20Sopenharmony_cienum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, 33428c2ecf20Sopenharmony_ci bool from_vmentry) 33438c2ecf20Sopenharmony_ci{ 33448c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 33458c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 33468c2ecf20Sopenharmony_ci enum vm_entry_failure_code entry_failure_code; 33478c2ecf20Sopenharmony_ci bool evaluate_pending_interrupts; 33488c2ecf20Sopenharmony_ci union vmx_exit_reason exit_reason = { 33498c2ecf20Sopenharmony_ci .basic = EXIT_REASON_INVALID_STATE, 33508c2ecf20Sopenharmony_ci .failed_vmentry = 1, 33518c2ecf20Sopenharmony_ci }; 33528c2ecf20Sopenharmony_ci u32 failed_index; 33538c2ecf20Sopenharmony_ci 33548c2ecf20Sopenharmony_ci if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) 33558c2ecf20Sopenharmony_ci kvm_vcpu_flush_tlb_current(vcpu); 33568c2ecf20Sopenharmony_ci 33578c2ecf20Sopenharmony_ci evaluate_pending_interrupts = exec_controls_get(vmx) & 33588c2ecf20Sopenharmony_ci (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); 33598c2ecf20Sopenharmony_ci if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) 33608c2ecf20Sopenharmony_ci evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); 33618c2ecf20Sopenharmony_ci 33628c2ecf20Sopenharmony_ci if (!vmx->nested.nested_run_pending || 33638c2ecf20Sopenharmony_ci !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 33648c2ecf20Sopenharmony_ci vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 33658c2ecf20Sopenharmony_ci if (kvm_mpx_supported() && 33668c2ecf20Sopenharmony_ci (!vmx->nested.nested_run_pending || 33678c2ecf20Sopenharmony_ci !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) 33688c2ecf20Sopenharmony_ci vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 33698c2ecf20Sopenharmony_ci 33708c2ecf20Sopenharmony_ci /* 33718c2ecf20Sopenharmony_ci * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and* 33728c2ecf20Sopenharmony_ci * nested early checks are disabled. In the event of a "late" VM-Fail, 33738c2ecf20Sopenharmony_ci * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its 33748c2ecf20Sopenharmony_ci * software model to the pre-VMEntry host state. When EPT is disabled, 33758c2ecf20Sopenharmony_ci * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes 33768c2ecf20Sopenharmony_ci * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing 33778c2ecf20Sopenharmony_ci * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to 33788c2ecf20Sopenharmony_ci * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested 33798c2ecf20Sopenharmony_ci * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is 33808c2ecf20Sopenharmony_ci * guaranteed to be overwritten with a shadow CR3 prior to re-entering 33818c2ecf20Sopenharmony_ci * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as 33828c2ecf20Sopenharmony_ci * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks 33838c2ecf20Sopenharmony_ci * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail 33848c2ecf20Sopenharmony_ci * path would need to manually save/restore vmcs01.GUEST_CR3. 33858c2ecf20Sopenharmony_ci */ 33868c2ecf20Sopenharmony_ci if (!enable_ept && !nested_early_check) 33878c2ecf20Sopenharmony_ci vmcs_writel(GUEST_CR3, vcpu->arch.cr3); 33888c2ecf20Sopenharmony_ci 33898c2ecf20Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); 33908c2ecf20Sopenharmony_ci 33918c2ecf20Sopenharmony_ci prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12); 33928c2ecf20Sopenharmony_ci 33938c2ecf20Sopenharmony_ci if (from_vmentry) { 33948c2ecf20Sopenharmony_ci if (unlikely(!nested_get_vmcs12_pages(vcpu))) { 33958c2ecf20Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 33968c2ecf20Sopenharmony_ci return NVMX_VMENTRY_KVM_INTERNAL_ERROR; 33978c2ecf20Sopenharmony_ci } 33988c2ecf20Sopenharmony_ci 33998c2ecf20Sopenharmony_ci if (nested_vmx_check_vmentry_hw(vcpu)) { 34008c2ecf20Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 34018c2ecf20Sopenharmony_ci return NVMX_VMENTRY_VMFAIL; 34028c2ecf20Sopenharmony_ci } 34038c2ecf20Sopenharmony_ci 34048c2ecf20Sopenharmony_ci if (nested_vmx_check_guest_state(vcpu, vmcs12, 34058c2ecf20Sopenharmony_ci &entry_failure_code)) { 34068c2ecf20Sopenharmony_ci exit_reason.basic = EXIT_REASON_INVALID_STATE; 34078c2ecf20Sopenharmony_ci vmcs12->exit_qualification = entry_failure_code; 34088c2ecf20Sopenharmony_ci goto vmentry_fail_vmexit; 34098c2ecf20Sopenharmony_ci } 34108c2ecf20Sopenharmony_ci } 34118c2ecf20Sopenharmony_ci 34128c2ecf20Sopenharmony_ci enter_guest_mode(vcpu); 34138c2ecf20Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) 34148c2ecf20Sopenharmony_ci vcpu->arch.tsc_offset += vmcs12->tsc_offset; 34158c2ecf20Sopenharmony_ci 34168c2ecf20Sopenharmony_ci if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { 34178c2ecf20Sopenharmony_ci exit_reason.basic = EXIT_REASON_INVALID_STATE; 34188c2ecf20Sopenharmony_ci vmcs12->exit_qualification = entry_failure_code; 34198c2ecf20Sopenharmony_ci goto vmentry_fail_vmexit_guest_mode; 34208c2ecf20Sopenharmony_ci } 34218c2ecf20Sopenharmony_ci 34228c2ecf20Sopenharmony_ci if (from_vmentry) { 34238c2ecf20Sopenharmony_ci failed_index = nested_vmx_load_msr(vcpu, 34248c2ecf20Sopenharmony_ci vmcs12->vm_entry_msr_load_addr, 34258c2ecf20Sopenharmony_ci vmcs12->vm_entry_msr_load_count); 34268c2ecf20Sopenharmony_ci if (failed_index) { 34278c2ecf20Sopenharmony_ci exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL; 34288c2ecf20Sopenharmony_ci vmcs12->exit_qualification = failed_index; 34298c2ecf20Sopenharmony_ci goto vmentry_fail_vmexit_guest_mode; 34308c2ecf20Sopenharmony_ci } 34318c2ecf20Sopenharmony_ci } else { 34328c2ecf20Sopenharmony_ci /* 34338c2ecf20Sopenharmony_ci * The MMU is not initialized to point at the right entities yet and 34348c2ecf20Sopenharmony_ci * "get pages" would need to read data from the guest (i.e. we will 34358c2ecf20Sopenharmony_ci * need to perform gpa to hpa translation). Request a call 34368c2ecf20Sopenharmony_ci * to nested_get_vmcs12_pages before the next VM-entry. The MSRs 34378c2ecf20Sopenharmony_ci * have already been set at vmentry time and should not be reset. 34388c2ecf20Sopenharmony_ci */ 34398c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 34408c2ecf20Sopenharmony_ci } 34418c2ecf20Sopenharmony_ci 34428c2ecf20Sopenharmony_ci /* 34438c2ecf20Sopenharmony_ci * If L1 had a pending IRQ/NMI until it executed 34448c2ecf20Sopenharmony_ci * VMLAUNCH/VMRESUME which wasn't delivered because it was 34458c2ecf20Sopenharmony_ci * disallowed (e.g. interrupts disabled), L0 needs to 34468c2ecf20Sopenharmony_ci * evaluate if this pending event should cause an exit from L2 34478c2ecf20Sopenharmony_ci * to L1 or delivered directly to L2 (e.g. In case L1 don't 34488c2ecf20Sopenharmony_ci * intercept EXTERNAL_INTERRUPT). 34498c2ecf20Sopenharmony_ci * 34508c2ecf20Sopenharmony_ci * Usually this would be handled by the processor noticing an 34518c2ecf20Sopenharmony_ci * IRQ/NMI window request, or checking RVI during evaluation of 34528c2ecf20Sopenharmony_ci * pending virtual interrupts. However, this setting was done 34538c2ecf20Sopenharmony_ci * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 34548c2ecf20Sopenharmony_ci * to perform pending event evaluation by requesting a KVM_REQ_EVENT. 34558c2ecf20Sopenharmony_ci */ 34568c2ecf20Sopenharmony_ci if (unlikely(evaluate_pending_interrupts)) 34578c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 34588c2ecf20Sopenharmony_ci 34598c2ecf20Sopenharmony_ci /* 34608c2ecf20Sopenharmony_ci * Do not start the preemption timer hrtimer until after we know 34618c2ecf20Sopenharmony_ci * we are successful, so that only nested_vmx_vmexit needs to cancel 34628c2ecf20Sopenharmony_ci * the timer. 34638c2ecf20Sopenharmony_ci */ 34648c2ecf20Sopenharmony_ci vmx->nested.preemption_timer_expired = false; 34658c2ecf20Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12)) { 34668c2ecf20Sopenharmony_ci u64 timer_value = vmx_calc_preemption_timer_value(vcpu); 34678c2ecf20Sopenharmony_ci vmx_start_preemption_timer(vcpu, timer_value); 34688c2ecf20Sopenharmony_ci } 34698c2ecf20Sopenharmony_ci 34708c2ecf20Sopenharmony_ci /* 34718c2ecf20Sopenharmony_ci * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 34728c2ecf20Sopenharmony_ci * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 34738c2ecf20Sopenharmony_ci * returned as far as L1 is concerned. It will only return (and set 34748c2ecf20Sopenharmony_ci * the success flag) when L2 exits (see nested_vmx_vmexit()). 34758c2ecf20Sopenharmony_ci */ 34768c2ecf20Sopenharmony_ci return NVMX_VMENTRY_SUCCESS; 34778c2ecf20Sopenharmony_ci 34788c2ecf20Sopenharmony_ci /* 34798c2ecf20Sopenharmony_ci * A failed consistency check that leads to a VMExit during L1's 34808c2ecf20Sopenharmony_ci * VMEnter to L2 is a variation of a normal VMexit, as explained in 34818c2ecf20Sopenharmony_ci * 26.7 "VM-entry failures during or after loading guest state". 34828c2ecf20Sopenharmony_ci */ 34838c2ecf20Sopenharmony_civmentry_fail_vmexit_guest_mode: 34848c2ecf20Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) 34858c2ecf20Sopenharmony_ci vcpu->arch.tsc_offset -= vmcs12->tsc_offset; 34868c2ecf20Sopenharmony_ci leave_guest_mode(vcpu); 34878c2ecf20Sopenharmony_ci 34888c2ecf20Sopenharmony_civmentry_fail_vmexit: 34898c2ecf20Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 34908c2ecf20Sopenharmony_ci 34918c2ecf20Sopenharmony_ci if (!from_vmentry) 34928c2ecf20Sopenharmony_ci return NVMX_VMENTRY_VMEXIT; 34938c2ecf20Sopenharmony_ci 34948c2ecf20Sopenharmony_ci load_vmcs12_host_state(vcpu, vmcs12); 34958c2ecf20Sopenharmony_ci vmcs12->vm_exit_reason = exit_reason.full; 34968c2ecf20Sopenharmony_ci if (enable_shadow_vmcs || vmx->nested.hv_evmcs) 34978c2ecf20Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 34988c2ecf20Sopenharmony_ci return NVMX_VMENTRY_VMEXIT; 34998c2ecf20Sopenharmony_ci} 35008c2ecf20Sopenharmony_ci 35018c2ecf20Sopenharmony_ci/* 35028c2ecf20Sopenharmony_ci * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 35038c2ecf20Sopenharmony_ci * for running an L2 nested guest. 35048c2ecf20Sopenharmony_ci */ 35058c2ecf20Sopenharmony_cistatic int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) 35068c2ecf20Sopenharmony_ci{ 35078c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12; 35088c2ecf20Sopenharmony_ci enum nvmx_vmentry_status status; 35098c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 35108c2ecf20Sopenharmony_ci u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); 35118c2ecf20Sopenharmony_ci enum nested_evmptrld_status evmptrld_status; 35128c2ecf20Sopenharmony_ci 35138c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 35148c2ecf20Sopenharmony_ci return 1; 35158c2ecf20Sopenharmony_ci 35168c2ecf20Sopenharmony_ci evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch); 35178c2ecf20Sopenharmony_ci if (evmptrld_status == EVMPTRLD_ERROR) { 35188c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 35198c2ecf20Sopenharmony_ci return 1; 35208c2ecf20Sopenharmony_ci } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) { 35218c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 35228c2ecf20Sopenharmony_ci } 35238c2ecf20Sopenharmony_ci 35248c2ecf20Sopenharmony_ci if (CC(!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)) 35258c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 35268c2ecf20Sopenharmony_ci 35278c2ecf20Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 35288c2ecf20Sopenharmony_ci 35298c2ecf20Sopenharmony_ci /* 35308c2ecf20Sopenharmony_ci * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact 35318c2ecf20Sopenharmony_ci * that there *is* a valid VMCS pointer, RFLAGS.CF is set 35328c2ecf20Sopenharmony_ci * rather than RFLAGS.ZF, and no error number is stored to the 35338c2ecf20Sopenharmony_ci * VM-instruction error field. 35348c2ecf20Sopenharmony_ci */ 35358c2ecf20Sopenharmony_ci if (CC(vmcs12->hdr.shadow_vmcs)) 35368c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 35378c2ecf20Sopenharmony_ci 35388c2ecf20Sopenharmony_ci if (vmx->nested.hv_evmcs) { 35398c2ecf20Sopenharmony_ci copy_enlightened_to_vmcs12(vmx); 35408c2ecf20Sopenharmony_ci /* Enlightened VMCS doesn't have launch state */ 35418c2ecf20Sopenharmony_ci vmcs12->launch_state = !launch; 35428c2ecf20Sopenharmony_ci } else if (enable_shadow_vmcs) { 35438c2ecf20Sopenharmony_ci copy_shadow_to_vmcs12(vmx); 35448c2ecf20Sopenharmony_ci } 35458c2ecf20Sopenharmony_ci 35468c2ecf20Sopenharmony_ci /* 35478c2ecf20Sopenharmony_ci * The nested entry process starts with enforcing various prerequisites 35488c2ecf20Sopenharmony_ci * on vmcs12 as required by the Intel SDM, and act appropriately when 35498c2ecf20Sopenharmony_ci * they fail: As the SDM explains, some conditions should cause the 35508c2ecf20Sopenharmony_ci * instruction to fail, while others will cause the instruction to seem 35518c2ecf20Sopenharmony_ci * to succeed, but return an EXIT_REASON_INVALID_STATE. 35528c2ecf20Sopenharmony_ci * To speed up the normal (success) code path, we should avoid checking 35538c2ecf20Sopenharmony_ci * for misconfigurations which will anyway be caught by the processor 35548c2ecf20Sopenharmony_ci * when using the merged vmcs02. 35558c2ecf20Sopenharmony_ci */ 35568c2ecf20Sopenharmony_ci if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)) 35578c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); 35588c2ecf20Sopenharmony_ci 35598c2ecf20Sopenharmony_ci if (CC(vmcs12->launch_state == launch)) 35608c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 35618c2ecf20Sopenharmony_ci launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS 35628c2ecf20Sopenharmony_ci : VMXERR_VMRESUME_NONLAUNCHED_VMCS); 35638c2ecf20Sopenharmony_ci 35648c2ecf20Sopenharmony_ci if (nested_vmx_check_controls(vcpu, vmcs12)) 35658c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 35668c2ecf20Sopenharmony_ci 35678c2ecf20Sopenharmony_ci if (nested_vmx_check_address_space_size(vcpu, vmcs12)) 35688c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 35698c2ecf20Sopenharmony_ci 35708c2ecf20Sopenharmony_ci if (nested_vmx_check_host_state(vcpu, vmcs12)) 35718c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 35728c2ecf20Sopenharmony_ci 35738c2ecf20Sopenharmony_ci /* 35748c2ecf20Sopenharmony_ci * We're finally done with prerequisite checking, and can start with 35758c2ecf20Sopenharmony_ci * the nested entry. 35768c2ecf20Sopenharmony_ci */ 35778c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending = 1; 35788c2ecf20Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = false; 35798c2ecf20Sopenharmony_ci status = nested_vmx_enter_non_root_mode(vcpu, true); 35808c2ecf20Sopenharmony_ci if (unlikely(status != NVMX_VMENTRY_SUCCESS)) 35818c2ecf20Sopenharmony_ci goto vmentry_failed; 35828c2ecf20Sopenharmony_ci 35838c2ecf20Sopenharmony_ci /* Emulate processing of posted interrupts on VM-Enter. */ 35848c2ecf20Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12) && 35858c2ecf20Sopenharmony_ci kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) { 35868c2ecf20Sopenharmony_ci vmx->nested.pi_pending = true; 35878c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 35888c2ecf20Sopenharmony_ci kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv); 35898c2ecf20Sopenharmony_ci } 35908c2ecf20Sopenharmony_ci 35918c2ecf20Sopenharmony_ci /* Hide L1D cache contents from the nested guest. */ 35928c2ecf20Sopenharmony_ci vmx->vcpu.arch.l1tf_flush_l1d = true; 35938c2ecf20Sopenharmony_ci 35948c2ecf20Sopenharmony_ci /* 35958c2ecf20Sopenharmony_ci * Must happen outside of nested_vmx_enter_non_root_mode() as it will 35968c2ecf20Sopenharmony_ci * also be used as part of restoring nVMX state for 35978c2ecf20Sopenharmony_ci * snapshot restore (migration). 35988c2ecf20Sopenharmony_ci * 35998c2ecf20Sopenharmony_ci * In this flow, it is assumed that vmcs12 cache was 36008c2ecf20Sopenharmony_ci * trasferred as part of captured nVMX state and should 36018c2ecf20Sopenharmony_ci * therefore not be read from guest memory (which may not 36028c2ecf20Sopenharmony_ci * exist on destination host yet). 36038c2ecf20Sopenharmony_ci */ 36048c2ecf20Sopenharmony_ci nested_cache_shadow_vmcs12(vcpu, vmcs12); 36058c2ecf20Sopenharmony_ci 36068c2ecf20Sopenharmony_ci /* 36078c2ecf20Sopenharmony_ci * If we're entering a halted L2 vcpu and the L2 vcpu won't be 36088c2ecf20Sopenharmony_ci * awakened by event injection or by an NMI-window VM-exit or 36098c2ecf20Sopenharmony_ci * by an interrupt-window VM-exit, halt the vcpu. 36108c2ecf20Sopenharmony_ci */ 36118c2ecf20Sopenharmony_ci if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && 36128c2ecf20Sopenharmony_ci !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && 36138c2ecf20Sopenharmony_ci !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && 36148c2ecf20Sopenharmony_ci !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && 36158c2ecf20Sopenharmony_ci (vmcs12->guest_rflags & X86_EFLAGS_IF))) { 36168c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending = 0; 36178c2ecf20Sopenharmony_ci return kvm_vcpu_halt(vcpu); 36188c2ecf20Sopenharmony_ci } 36198c2ecf20Sopenharmony_ci return 1; 36208c2ecf20Sopenharmony_ci 36218c2ecf20Sopenharmony_civmentry_failed: 36228c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending = 0; 36238c2ecf20Sopenharmony_ci if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR) 36248c2ecf20Sopenharmony_ci return 0; 36258c2ecf20Sopenharmony_ci if (status == NVMX_VMENTRY_VMEXIT) 36268c2ecf20Sopenharmony_ci return 1; 36278c2ecf20Sopenharmony_ci WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL); 36288c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 36298c2ecf20Sopenharmony_ci} 36308c2ecf20Sopenharmony_ci 36318c2ecf20Sopenharmony_ci/* 36328c2ecf20Sopenharmony_ci * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date 36338c2ecf20Sopenharmony_ci * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK). 36348c2ecf20Sopenharmony_ci * This function returns the new value we should put in vmcs12.guest_cr0. 36358c2ecf20Sopenharmony_ci * It's not enough to just return the vmcs02 GUEST_CR0. Rather, 36368c2ecf20Sopenharmony_ci * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now 36378c2ecf20Sopenharmony_ci * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0 36388c2ecf20Sopenharmony_ci * didn't trap the bit, because if L1 did, so would L0). 36398c2ecf20Sopenharmony_ci * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have 36408c2ecf20Sopenharmony_ci * been modified by L2, and L1 knows it. So just leave the old value of 36418c2ecf20Sopenharmony_ci * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0 36428c2ecf20Sopenharmony_ci * isn't relevant, because if L0 traps this bit it can set it to anything. 36438c2ecf20Sopenharmony_ci * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have 36448c2ecf20Sopenharmony_ci * changed these bits, and therefore they need to be updated, but L0 36458c2ecf20Sopenharmony_ci * didn't necessarily allow them to be changed in GUEST_CR0 - and rather 36468c2ecf20Sopenharmony_ci * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there. 36478c2ecf20Sopenharmony_ci */ 36488c2ecf20Sopenharmony_cistatic inline unsigned long 36498c2ecf20Sopenharmony_civmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 36508c2ecf20Sopenharmony_ci{ 36518c2ecf20Sopenharmony_ci return 36528c2ecf20Sopenharmony_ci /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) | 36538c2ecf20Sopenharmony_ci /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) | 36548c2ecf20Sopenharmony_ci /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask | 36558c2ecf20Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits)); 36568c2ecf20Sopenharmony_ci} 36578c2ecf20Sopenharmony_ci 36588c2ecf20Sopenharmony_cistatic inline unsigned long 36598c2ecf20Sopenharmony_civmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 36608c2ecf20Sopenharmony_ci{ 36618c2ecf20Sopenharmony_ci return 36628c2ecf20Sopenharmony_ci /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) | 36638c2ecf20Sopenharmony_ci /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) | 36648c2ecf20Sopenharmony_ci /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask | 36658c2ecf20Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits)); 36668c2ecf20Sopenharmony_ci} 36678c2ecf20Sopenharmony_ci 36688c2ecf20Sopenharmony_cistatic void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, 36698c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12, 36708c2ecf20Sopenharmony_ci u32 vm_exit_reason, u32 exit_intr_info) 36718c2ecf20Sopenharmony_ci{ 36728c2ecf20Sopenharmony_ci u32 idt_vectoring; 36738c2ecf20Sopenharmony_ci unsigned int nr; 36748c2ecf20Sopenharmony_ci 36758c2ecf20Sopenharmony_ci /* 36768c2ecf20Sopenharmony_ci * Per the SDM, VM-Exits due to double and triple faults are never 36778c2ecf20Sopenharmony_ci * considered to occur during event delivery, even if the double/triple 36788c2ecf20Sopenharmony_ci * fault is the result of an escalating vectoring issue. 36798c2ecf20Sopenharmony_ci * 36808c2ecf20Sopenharmony_ci * Note, the SDM qualifies the double fault behavior with "The original 36818c2ecf20Sopenharmony_ci * event results in a double-fault exception". It's unclear why the 36828c2ecf20Sopenharmony_ci * qualification exists since exits due to double fault can occur only 36838c2ecf20Sopenharmony_ci * while vectoring a different exception (injected events are never 36848c2ecf20Sopenharmony_ci * subject to interception), i.e. there's _always_ an original event. 36858c2ecf20Sopenharmony_ci * 36868c2ecf20Sopenharmony_ci * The SDM also uses NMI as a confusing example for the "original event 36878c2ecf20Sopenharmony_ci * causes the VM exit directly" clause. NMI isn't special in any way, 36888c2ecf20Sopenharmony_ci * the same rule applies to all events that cause an exit directly. 36898c2ecf20Sopenharmony_ci * NMI is an odd choice for the example because NMIs can only occur on 36908c2ecf20Sopenharmony_ci * instruction boundaries, i.e. they _can't_ occur during vectoring. 36918c2ecf20Sopenharmony_ci */ 36928c2ecf20Sopenharmony_ci if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT || 36938c2ecf20Sopenharmony_ci ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI && 36948c2ecf20Sopenharmony_ci is_double_fault(exit_intr_info))) { 36958c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_info_field = 0; 36968c2ecf20Sopenharmony_ci } else if (vcpu->arch.exception.injected) { 36978c2ecf20Sopenharmony_ci nr = vcpu->arch.exception.nr; 36988c2ecf20Sopenharmony_ci idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 36998c2ecf20Sopenharmony_ci 37008c2ecf20Sopenharmony_ci if (kvm_exception_is_soft(nr)) { 37018c2ecf20Sopenharmony_ci vmcs12->vm_exit_instruction_len = 37028c2ecf20Sopenharmony_ci vcpu->arch.event_exit_inst_len; 37038c2ecf20Sopenharmony_ci idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; 37048c2ecf20Sopenharmony_ci } else 37058c2ecf20Sopenharmony_ci idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; 37068c2ecf20Sopenharmony_ci 37078c2ecf20Sopenharmony_ci if (vcpu->arch.exception.has_error_code) { 37088c2ecf20Sopenharmony_ci idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; 37098c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_error_code = 37108c2ecf20Sopenharmony_ci vcpu->arch.exception.error_code; 37118c2ecf20Sopenharmony_ci } 37128c2ecf20Sopenharmony_ci 37138c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_info_field = idt_vectoring; 37148c2ecf20Sopenharmony_ci } else if (vcpu->arch.nmi_injected) { 37158c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_info_field = 37168c2ecf20Sopenharmony_ci INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 37178c2ecf20Sopenharmony_ci } else if (vcpu->arch.interrupt.injected) { 37188c2ecf20Sopenharmony_ci nr = vcpu->arch.interrupt.nr; 37198c2ecf20Sopenharmony_ci idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 37208c2ecf20Sopenharmony_ci 37218c2ecf20Sopenharmony_ci if (vcpu->arch.interrupt.soft) { 37228c2ecf20Sopenharmony_ci idt_vectoring |= INTR_TYPE_SOFT_INTR; 37238c2ecf20Sopenharmony_ci vmcs12->vm_entry_instruction_len = 37248c2ecf20Sopenharmony_ci vcpu->arch.event_exit_inst_len; 37258c2ecf20Sopenharmony_ci } else 37268c2ecf20Sopenharmony_ci idt_vectoring |= INTR_TYPE_EXT_INTR; 37278c2ecf20Sopenharmony_ci 37288c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_info_field = idt_vectoring; 37298c2ecf20Sopenharmony_ci } else { 37308c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_info_field = 0; 37318c2ecf20Sopenharmony_ci } 37328c2ecf20Sopenharmony_ci} 37338c2ecf20Sopenharmony_ci 37348c2ecf20Sopenharmony_ci 37358c2ecf20Sopenharmony_civoid nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) 37368c2ecf20Sopenharmony_ci{ 37378c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 37388c2ecf20Sopenharmony_ci gfn_t gfn; 37398c2ecf20Sopenharmony_ci 37408c2ecf20Sopenharmony_ci /* 37418c2ecf20Sopenharmony_ci * Don't need to mark the APIC access page dirty; it is never 37428c2ecf20Sopenharmony_ci * written to by the CPU during APIC virtualization. 37438c2ecf20Sopenharmony_ci */ 37448c2ecf20Sopenharmony_ci 37458c2ecf20Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 37468c2ecf20Sopenharmony_ci gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; 37478c2ecf20Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, gfn); 37488c2ecf20Sopenharmony_ci } 37498c2ecf20Sopenharmony_ci 37508c2ecf20Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12)) { 37518c2ecf20Sopenharmony_ci gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; 37528c2ecf20Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, gfn); 37538c2ecf20Sopenharmony_ci } 37548c2ecf20Sopenharmony_ci} 37558c2ecf20Sopenharmony_ci 37568c2ecf20Sopenharmony_cistatic void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) 37578c2ecf20Sopenharmony_ci{ 37588c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 37598c2ecf20Sopenharmony_ci int max_irr; 37608c2ecf20Sopenharmony_ci void *vapic_page; 37618c2ecf20Sopenharmony_ci u16 status; 37628c2ecf20Sopenharmony_ci 37638c2ecf20Sopenharmony_ci if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) 37648c2ecf20Sopenharmony_ci return; 37658c2ecf20Sopenharmony_ci 37668c2ecf20Sopenharmony_ci vmx->nested.pi_pending = false; 37678c2ecf20Sopenharmony_ci if (!pi_test_and_clear_on(vmx->nested.pi_desc)) 37688c2ecf20Sopenharmony_ci return; 37698c2ecf20Sopenharmony_ci 37708c2ecf20Sopenharmony_ci max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); 37718c2ecf20Sopenharmony_ci if (max_irr != 256) { 37728c2ecf20Sopenharmony_ci vapic_page = vmx->nested.virtual_apic_map.hva; 37738c2ecf20Sopenharmony_ci if (!vapic_page) 37748c2ecf20Sopenharmony_ci return; 37758c2ecf20Sopenharmony_ci 37768c2ecf20Sopenharmony_ci __kvm_apic_update_irr(vmx->nested.pi_desc->pir, 37778c2ecf20Sopenharmony_ci vapic_page, &max_irr); 37788c2ecf20Sopenharmony_ci status = vmcs_read16(GUEST_INTR_STATUS); 37798c2ecf20Sopenharmony_ci if ((u8)max_irr > ((u8)status & 0xff)) { 37808c2ecf20Sopenharmony_ci status &= ~0xff; 37818c2ecf20Sopenharmony_ci status |= (u8)max_irr; 37828c2ecf20Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, status); 37838c2ecf20Sopenharmony_ci } 37848c2ecf20Sopenharmony_ci } 37858c2ecf20Sopenharmony_ci 37868c2ecf20Sopenharmony_ci nested_mark_vmcs12_pages_dirty(vcpu); 37878c2ecf20Sopenharmony_ci} 37888c2ecf20Sopenharmony_ci 37898c2ecf20Sopenharmony_cistatic void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, 37908c2ecf20Sopenharmony_ci unsigned long exit_qual) 37918c2ecf20Sopenharmony_ci{ 37928c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 37938c2ecf20Sopenharmony_ci unsigned int nr = vcpu->arch.exception.nr; 37948c2ecf20Sopenharmony_ci u32 intr_info = nr | INTR_INFO_VALID_MASK; 37958c2ecf20Sopenharmony_ci 37968c2ecf20Sopenharmony_ci if (vcpu->arch.exception.has_error_code) { 37978c2ecf20Sopenharmony_ci /* 37988c2ecf20Sopenharmony_ci * Intel CPUs do not generate error codes with bits 31:16 set, 37998c2ecf20Sopenharmony_ci * and more importantly VMX disallows setting bits 31:16 in the 38008c2ecf20Sopenharmony_ci * injected error code for VM-Entry. Drop the bits to mimic 38018c2ecf20Sopenharmony_ci * hardware and avoid inducing failure on nested VM-Entry if L1 38028c2ecf20Sopenharmony_ci * chooses to inject the exception back to L2. AMD CPUs _do_ 38038c2ecf20Sopenharmony_ci * generate "full" 32-bit error codes, so KVM allows userspace 38048c2ecf20Sopenharmony_ci * to inject exception error codes with bits 31:16 set. 38058c2ecf20Sopenharmony_ci */ 38068c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code; 38078c2ecf20Sopenharmony_ci intr_info |= INTR_INFO_DELIVER_CODE_MASK; 38088c2ecf20Sopenharmony_ci } 38098c2ecf20Sopenharmony_ci 38108c2ecf20Sopenharmony_ci if (kvm_exception_is_soft(nr)) 38118c2ecf20Sopenharmony_ci intr_info |= INTR_TYPE_SOFT_EXCEPTION; 38128c2ecf20Sopenharmony_ci else 38138c2ecf20Sopenharmony_ci intr_info |= INTR_TYPE_HARD_EXCEPTION; 38148c2ecf20Sopenharmony_ci 38158c2ecf20Sopenharmony_ci if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && 38168c2ecf20Sopenharmony_ci vmx_get_nmi_mask(vcpu)) 38178c2ecf20Sopenharmony_ci intr_info |= INTR_INFO_UNBLOCK_NMI; 38188c2ecf20Sopenharmony_ci 38198c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); 38208c2ecf20Sopenharmony_ci} 38218c2ecf20Sopenharmony_ci 38228c2ecf20Sopenharmony_ci/* 38238c2ecf20Sopenharmony_ci * Returns true if a debug trap is pending delivery. 38248c2ecf20Sopenharmony_ci * 38258c2ecf20Sopenharmony_ci * In KVM, debug traps bear an exception payload. As such, the class of a #DB 38268c2ecf20Sopenharmony_ci * exception may be inferred from the presence of an exception payload. 38278c2ecf20Sopenharmony_ci */ 38288c2ecf20Sopenharmony_cistatic inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) 38298c2ecf20Sopenharmony_ci{ 38308c2ecf20Sopenharmony_ci return vcpu->arch.exception.pending && 38318c2ecf20Sopenharmony_ci vcpu->arch.exception.nr == DB_VECTOR && 38328c2ecf20Sopenharmony_ci vcpu->arch.exception.payload; 38338c2ecf20Sopenharmony_ci} 38348c2ecf20Sopenharmony_ci 38358c2ecf20Sopenharmony_ci/* 38368c2ecf20Sopenharmony_ci * Certain VM-exits set the 'pending debug exceptions' field to indicate a 38378c2ecf20Sopenharmony_ci * recognized #DB (data or single-step) that has yet to be delivered. Since KVM 38388c2ecf20Sopenharmony_ci * represents these debug traps with a payload that is said to be compatible 38398c2ecf20Sopenharmony_ci * with the 'pending debug exceptions' field, write the payload to the VMCS 38408c2ecf20Sopenharmony_ci * field if a VM-exit is delivered before the debug trap. 38418c2ecf20Sopenharmony_ci */ 38428c2ecf20Sopenharmony_cistatic void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) 38438c2ecf20Sopenharmony_ci{ 38448c2ecf20Sopenharmony_ci if (vmx_pending_dbg_trap(vcpu)) 38458c2ecf20Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 38468c2ecf20Sopenharmony_ci vcpu->arch.exception.payload); 38478c2ecf20Sopenharmony_ci} 38488c2ecf20Sopenharmony_ci 38498c2ecf20Sopenharmony_cistatic bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) 38508c2ecf20Sopenharmony_ci{ 38518c2ecf20Sopenharmony_ci return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && 38528c2ecf20Sopenharmony_ci to_vmx(vcpu)->nested.preemption_timer_expired; 38538c2ecf20Sopenharmony_ci} 38548c2ecf20Sopenharmony_ci 38558c2ecf20Sopenharmony_cistatic int vmx_check_nested_events(struct kvm_vcpu *vcpu) 38568c2ecf20Sopenharmony_ci{ 38578c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 38588c2ecf20Sopenharmony_ci unsigned long exit_qual; 38598c2ecf20Sopenharmony_ci bool block_nested_events = 38608c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); 38618c2ecf20Sopenharmony_ci bool mtf_pending = vmx->nested.mtf_pending; 38628c2ecf20Sopenharmony_ci struct kvm_lapic *apic = vcpu->arch.apic; 38638c2ecf20Sopenharmony_ci 38648c2ecf20Sopenharmony_ci /* 38658c2ecf20Sopenharmony_ci * Clear the MTF state. If a higher priority VM-exit is delivered first, 38668c2ecf20Sopenharmony_ci * this state is discarded. 38678c2ecf20Sopenharmony_ci */ 38688c2ecf20Sopenharmony_ci if (!block_nested_events) 38698c2ecf20Sopenharmony_ci vmx->nested.mtf_pending = false; 38708c2ecf20Sopenharmony_ci 38718c2ecf20Sopenharmony_ci if (lapic_in_kernel(vcpu) && 38728c2ecf20Sopenharmony_ci test_bit(KVM_APIC_INIT, &apic->pending_events)) { 38738c2ecf20Sopenharmony_ci if (block_nested_events) 38748c2ecf20Sopenharmony_ci return -EBUSY; 38758c2ecf20Sopenharmony_ci nested_vmx_update_pending_dbg(vcpu); 38768c2ecf20Sopenharmony_ci clear_bit(KVM_APIC_INIT, &apic->pending_events); 38778c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); 38788c2ecf20Sopenharmony_ci return 0; 38798c2ecf20Sopenharmony_ci } 38808c2ecf20Sopenharmony_ci 38818c2ecf20Sopenharmony_ci /* 38828c2ecf20Sopenharmony_ci * Process any exceptions that are not debug traps before MTF. 38838c2ecf20Sopenharmony_ci */ 38848c2ecf20Sopenharmony_ci if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) { 38858c2ecf20Sopenharmony_ci if (block_nested_events) 38868c2ecf20Sopenharmony_ci return -EBUSY; 38878c2ecf20Sopenharmony_ci if (!nested_vmx_check_exception(vcpu, &exit_qual)) 38888c2ecf20Sopenharmony_ci goto no_vmexit; 38898c2ecf20Sopenharmony_ci nested_vmx_inject_exception_vmexit(vcpu, exit_qual); 38908c2ecf20Sopenharmony_ci return 0; 38918c2ecf20Sopenharmony_ci } 38928c2ecf20Sopenharmony_ci 38938c2ecf20Sopenharmony_ci if (mtf_pending) { 38948c2ecf20Sopenharmony_ci if (block_nested_events) 38958c2ecf20Sopenharmony_ci return -EBUSY; 38968c2ecf20Sopenharmony_ci nested_vmx_update_pending_dbg(vcpu); 38978c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0); 38988c2ecf20Sopenharmony_ci return 0; 38998c2ecf20Sopenharmony_ci } 39008c2ecf20Sopenharmony_ci 39018c2ecf20Sopenharmony_ci if (vcpu->arch.exception.pending) { 39028c2ecf20Sopenharmony_ci if (block_nested_events) 39038c2ecf20Sopenharmony_ci return -EBUSY; 39048c2ecf20Sopenharmony_ci if (!nested_vmx_check_exception(vcpu, &exit_qual)) 39058c2ecf20Sopenharmony_ci goto no_vmexit; 39068c2ecf20Sopenharmony_ci nested_vmx_inject_exception_vmexit(vcpu, exit_qual); 39078c2ecf20Sopenharmony_ci return 0; 39088c2ecf20Sopenharmony_ci } 39098c2ecf20Sopenharmony_ci 39108c2ecf20Sopenharmony_ci if (nested_vmx_preemption_timer_pending(vcpu)) { 39118c2ecf20Sopenharmony_ci if (block_nested_events) 39128c2ecf20Sopenharmony_ci return -EBUSY; 39138c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); 39148c2ecf20Sopenharmony_ci return 0; 39158c2ecf20Sopenharmony_ci } 39168c2ecf20Sopenharmony_ci 39178c2ecf20Sopenharmony_ci if (vcpu->arch.smi_pending && !is_smm(vcpu)) { 39188c2ecf20Sopenharmony_ci if (block_nested_events) 39198c2ecf20Sopenharmony_ci return -EBUSY; 39208c2ecf20Sopenharmony_ci goto no_vmexit; 39218c2ecf20Sopenharmony_ci } 39228c2ecf20Sopenharmony_ci 39238c2ecf20Sopenharmony_ci if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) { 39248c2ecf20Sopenharmony_ci if (block_nested_events) 39258c2ecf20Sopenharmony_ci return -EBUSY; 39268c2ecf20Sopenharmony_ci if (!nested_exit_on_nmi(vcpu)) 39278c2ecf20Sopenharmony_ci goto no_vmexit; 39288c2ecf20Sopenharmony_ci 39298c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 39308c2ecf20Sopenharmony_ci NMI_VECTOR | INTR_TYPE_NMI_INTR | 39318c2ecf20Sopenharmony_ci INTR_INFO_VALID_MASK, 0); 39328c2ecf20Sopenharmony_ci /* 39338c2ecf20Sopenharmony_ci * The NMI-triggered VM exit counts as injection: 39348c2ecf20Sopenharmony_ci * clear this one and block further NMIs. 39358c2ecf20Sopenharmony_ci */ 39368c2ecf20Sopenharmony_ci vcpu->arch.nmi_pending = 0; 39378c2ecf20Sopenharmony_ci vmx_set_nmi_mask(vcpu, true); 39388c2ecf20Sopenharmony_ci return 0; 39398c2ecf20Sopenharmony_ci } 39408c2ecf20Sopenharmony_ci 39418c2ecf20Sopenharmony_ci if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { 39428c2ecf20Sopenharmony_ci if (block_nested_events) 39438c2ecf20Sopenharmony_ci return -EBUSY; 39448c2ecf20Sopenharmony_ci if (!nested_exit_on_intr(vcpu)) 39458c2ecf20Sopenharmony_ci goto no_vmexit; 39468c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); 39478c2ecf20Sopenharmony_ci return 0; 39488c2ecf20Sopenharmony_ci } 39498c2ecf20Sopenharmony_ci 39508c2ecf20Sopenharmony_cino_vmexit: 39518c2ecf20Sopenharmony_ci vmx_complete_nested_posted_interrupt(vcpu); 39528c2ecf20Sopenharmony_ci return 0; 39538c2ecf20Sopenharmony_ci} 39548c2ecf20Sopenharmony_ci 39558c2ecf20Sopenharmony_cistatic u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) 39568c2ecf20Sopenharmony_ci{ 39578c2ecf20Sopenharmony_ci ktime_t remaining = 39588c2ecf20Sopenharmony_ci hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer); 39598c2ecf20Sopenharmony_ci u64 value; 39608c2ecf20Sopenharmony_ci 39618c2ecf20Sopenharmony_ci if (ktime_to_ns(remaining) <= 0) 39628c2ecf20Sopenharmony_ci return 0; 39638c2ecf20Sopenharmony_ci 39648c2ecf20Sopenharmony_ci value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz; 39658c2ecf20Sopenharmony_ci do_div(value, 1000000); 39668c2ecf20Sopenharmony_ci return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 39678c2ecf20Sopenharmony_ci} 39688c2ecf20Sopenharmony_ci 39698c2ecf20Sopenharmony_cistatic bool is_vmcs12_ext_field(unsigned long field) 39708c2ecf20Sopenharmony_ci{ 39718c2ecf20Sopenharmony_ci switch (field) { 39728c2ecf20Sopenharmony_ci case GUEST_ES_SELECTOR: 39738c2ecf20Sopenharmony_ci case GUEST_CS_SELECTOR: 39748c2ecf20Sopenharmony_ci case GUEST_SS_SELECTOR: 39758c2ecf20Sopenharmony_ci case GUEST_DS_SELECTOR: 39768c2ecf20Sopenharmony_ci case GUEST_FS_SELECTOR: 39778c2ecf20Sopenharmony_ci case GUEST_GS_SELECTOR: 39788c2ecf20Sopenharmony_ci case GUEST_LDTR_SELECTOR: 39798c2ecf20Sopenharmony_ci case GUEST_TR_SELECTOR: 39808c2ecf20Sopenharmony_ci case GUEST_ES_LIMIT: 39818c2ecf20Sopenharmony_ci case GUEST_CS_LIMIT: 39828c2ecf20Sopenharmony_ci case GUEST_SS_LIMIT: 39838c2ecf20Sopenharmony_ci case GUEST_DS_LIMIT: 39848c2ecf20Sopenharmony_ci case GUEST_FS_LIMIT: 39858c2ecf20Sopenharmony_ci case GUEST_GS_LIMIT: 39868c2ecf20Sopenharmony_ci case GUEST_LDTR_LIMIT: 39878c2ecf20Sopenharmony_ci case GUEST_TR_LIMIT: 39888c2ecf20Sopenharmony_ci case GUEST_GDTR_LIMIT: 39898c2ecf20Sopenharmony_ci case GUEST_IDTR_LIMIT: 39908c2ecf20Sopenharmony_ci case GUEST_ES_AR_BYTES: 39918c2ecf20Sopenharmony_ci case GUEST_DS_AR_BYTES: 39928c2ecf20Sopenharmony_ci case GUEST_FS_AR_BYTES: 39938c2ecf20Sopenharmony_ci case GUEST_GS_AR_BYTES: 39948c2ecf20Sopenharmony_ci case GUEST_LDTR_AR_BYTES: 39958c2ecf20Sopenharmony_ci case GUEST_TR_AR_BYTES: 39968c2ecf20Sopenharmony_ci case GUEST_ES_BASE: 39978c2ecf20Sopenharmony_ci case GUEST_CS_BASE: 39988c2ecf20Sopenharmony_ci case GUEST_SS_BASE: 39998c2ecf20Sopenharmony_ci case GUEST_DS_BASE: 40008c2ecf20Sopenharmony_ci case GUEST_FS_BASE: 40018c2ecf20Sopenharmony_ci case GUEST_GS_BASE: 40028c2ecf20Sopenharmony_ci case GUEST_LDTR_BASE: 40038c2ecf20Sopenharmony_ci case GUEST_TR_BASE: 40048c2ecf20Sopenharmony_ci case GUEST_GDTR_BASE: 40058c2ecf20Sopenharmony_ci case GUEST_IDTR_BASE: 40068c2ecf20Sopenharmony_ci case GUEST_PENDING_DBG_EXCEPTIONS: 40078c2ecf20Sopenharmony_ci case GUEST_BNDCFGS: 40088c2ecf20Sopenharmony_ci return true; 40098c2ecf20Sopenharmony_ci default: 40108c2ecf20Sopenharmony_ci break; 40118c2ecf20Sopenharmony_ci } 40128c2ecf20Sopenharmony_ci 40138c2ecf20Sopenharmony_ci return false; 40148c2ecf20Sopenharmony_ci} 40158c2ecf20Sopenharmony_ci 40168c2ecf20Sopenharmony_cistatic void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, 40178c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 40188c2ecf20Sopenharmony_ci{ 40198c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 40208c2ecf20Sopenharmony_ci 40218c2ecf20Sopenharmony_ci vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); 40228c2ecf20Sopenharmony_ci vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); 40238c2ecf20Sopenharmony_ci vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR); 40248c2ecf20Sopenharmony_ci vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR); 40258c2ecf20Sopenharmony_ci vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR); 40268c2ecf20Sopenharmony_ci vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR); 40278c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR); 40288c2ecf20Sopenharmony_ci vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR); 40298c2ecf20Sopenharmony_ci vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT); 40308c2ecf20Sopenharmony_ci vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT); 40318c2ecf20Sopenharmony_ci vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT); 40328c2ecf20Sopenharmony_ci vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT); 40338c2ecf20Sopenharmony_ci vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT); 40348c2ecf20Sopenharmony_ci vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT); 40358c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT); 40368c2ecf20Sopenharmony_ci vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT); 40378c2ecf20Sopenharmony_ci vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); 40388c2ecf20Sopenharmony_ci vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); 40398c2ecf20Sopenharmony_ci vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); 40408c2ecf20Sopenharmony_ci vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); 40418c2ecf20Sopenharmony_ci vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); 40428c2ecf20Sopenharmony_ci vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); 40438c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES); 40448c2ecf20Sopenharmony_ci vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES); 40458c2ecf20Sopenharmony_ci vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE); 40468c2ecf20Sopenharmony_ci vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE); 40478c2ecf20Sopenharmony_ci vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE); 40488c2ecf20Sopenharmony_ci vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE); 40498c2ecf20Sopenharmony_ci vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE); 40508c2ecf20Sopenharmony_ci vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE); 40518c2ecf20Sopenharmony_ci vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE); 40528c2ecf20Sopenharmony_ci vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); 40538c2ecf20Sopenharmony_ci vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); 40548c2ecf20Sopenharmony_ci vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); 40558c2ecf20Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions = 40568c2ecf20Sopenharmony_ci vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 40578c2ecf20Sopenharmony_ci if (kvm_mpx_supported()) 40588c2ecf20Sopenharmony_ci vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 40598c2ecf20Sopenharmony_ci 40608c2ecf20Sopenharmony_ci vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; 40618c2ecf20Sopenharmony_ci} 40628c2ecf20Sopenharmony_ci 40638c2ecf20Sopenharmony_cistatic void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, 40648c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 40658c2ecf20Sopenharmony_ci{ 40668c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 40678c2ecf20Sopenharmony_ci int cpu; 40688c2ecf20Sopenharmony_ci 40698c2ecf20Sopenharmony_ci if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare) 40708c2ecf20Sopenharmony_ci return; 40718c2ecf20Sopenharmony_ci 40728c2ecf20Sopenharmony_ci 40738c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); 40748c2ecf20Sopenharmony_ci 40758c2ecf20Sopenharmony_ci cpu = get_cpu(); 40768c2ecf20Sopenharmony_ci vmx->loaded_vmcs = &vmx->nested.vmcs02; 40778c2ecf20Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01); 40788c2ecf20Sopenharmony_ci 40798c2ecf20Sopenharmony_ci sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 40808c2ecf20Sopenharmony_ci 40818c2ecf20Sopenharmony_ci vmx->loaded_vmcs = &vmx->vmcs01; 40828c2ecf20Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02); 40838c2ecf20Sopenharmony_ci put_cpu(); 40848c2ecf20Sopenharmony_ci} 40858c2ecf20Sopenharmony_ci 40868c2ecf20Sopenharmony_ci/* 40878c2ecf20Sopenharmony_ci * Update the guest state fields of vmcs12 to reflect changes that 40888c2ecf20Sopenharmony_ci * occurred while L2 was running. (The "IA-32e mode guest" bit of the 40898c2ecf20Sopenharmony_ci * VM-entry controls is also updated, since this is really a guest 40908c2ecf20Sopenharmony_ci * state bit.) 40918c2ecf20Sopenharmony_ci */ 40928c2ecf20Sopenharmony_cistatic void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 40938c2ecf20Sopenharmony_ci{ 40948c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 40958c2ecf20Sopenharmony_ci 40968c2ecf20Sopenharmony_ci if (vmx->nested.hv_evmcs) 40978c2ecf20Sopenharmony_ci sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 40988c2ecf20Sopenharmony_ci 40998c2ecf20Sopenharmony_ci vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs; 41008c2ecf20Sopenharmony_ci 41018c2ecf20Sopenharmony_ci vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 41028c2ecf20Sopenharmony_ci vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); 41038c2ecf20Sopenharmony_ci 41048c2ecf20Sopenharmony_ci vmcs12->guest_rsp = kvm_rsp_read(vcpu); 41058c2ecf20Sopenharmony_ci vmcs12->guest_rip = kvm_rip_read(vcpu); 41068c2ecf20Sopenharmony_ci vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); 41078c2ecf20Sopenharmony_ci 41088c2ecf20Sopenharmony_ci vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); 41098c2ecf20Sopenharmony_ci vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); 41108c2ecf20Sopenharmony_ci 41118c2ecf20Sopenharmony_ci vmcs12->guest_interruptibility_info = 41128c2ecf20Sopenharmony_ci vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 41138c2ecf20Sopenharmony_ci 41148c2ecf20Sopenharmony_ci if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 41158c2ecf20Sopenharmony_ci vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; 41168c2ecf20Sopenharmony_ci else 41178c2ecf20Sopenharmony_ci vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; 41188c2ecf20Sopenharmony_ci 41198c2ecf20Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12) && 41208c2ecf20Sopenharmony_ci vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER && 41218c2ecf20Sopenharmony_ci !vmx->nested.nested_run_pending) 41228c2ecf20Sopenharmony_ci vmcs12->vmx_preemption_timer_value = 41238c2ecf20Sopenharmony_ci vmx_get_preemption_timer_value(vcpu); 41248c2ecf20Sopenharmony_ci 41258c2ecf20Sopenharmony_ci /* 41268c2ecf20Sopenharmony_ci * In some cases (usually, nested EPT), L2 is allowed to change its 41278c2ecf20Sopenharmony_ci * own CR3 without exiting. If it has changed it, we must keep it. 41288c2ecf20Sopenharmony_ci * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined 41298c2ecf20Sopenharmony_ci * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12. 41308c2ecf20Sopenharmony_ci * 41318c2ecf20Sopenharmony_ci * Additionally, restore L2's PDPTR to vmcs12. 41328c2ecf20Sopenharmony_ci */ 41338c2ecf20Sopenharmony_ci if (enable_ept) { 41348c2ecf20Sopenharmony_ci vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); 41358c2ecf20Sopenharmony_ci if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) { 41368c2ecf20Sopenharmony_ci vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); 41378c2ecf20Sopenharmony_ci vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); 41388c2ecf20Sopenharmony_ci vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); 41398c2ecf20Sopenharmony_ci vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); 41408c2ecf20Sopenharmony_ci } 41418c2ecf20Sopenharmony_ci } 41428c2ecf20Sopenharmony_ci 41438c2ecf20Sopenharmony_ci vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); 41448c2ecf20Sopenharmony_ci 41458c2ecf20Sopenharmony_ci if (nested_cpu_has_vid(vmcs12)) 41468c2ecf20Sopenharmony_ci vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); 41478c2ecf20Sopenharmony_ci 41488c2ecf20Sopenharmony_ci vmcs12->vm_entry_controls = 41498c2ecf20Sopenharmony_ci (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 41508c2ecf20Sopenharmony_ci (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); 41518c2ecf20Sopenharmony_ci 41528c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) 41538c2ecf20Sopenharmony_ci kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); 41548c2ecf20Sopenharmony_ci 41558c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) 41568c2ecf20Sopenharmony_ci vmcs12->guest_ia32_efer = vcpu->arch.efer; 41578c2ecf20Sopenharmony_ci} 41588c2ecf20Sopenharmony_ci 41598c2ecf20Sopenharmony_ci/* 41608c2ecf20Sopenharmony_ci * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 41618c2ecf20Sopenharmony_ci * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 41628c2ecf20Sopenharmony_ci * and this function updates it to reflect the changes to the guest state while 41638c2ecf20Sopenharmony_ci * L2 was running (and perhaps made some exits which were handled directly by L0 41648c2ecf20Sopenharmony_ci * without going back to L1), and to reflect the exit reason. 41658c2ecf20Sopenharmony_ci * Note that we do not have to copy here all VMCS fields, just those that 41668c2ecf20Sopenharmony_ci * could have changed by the L2 guest or the exit - i.e., the guest-state and 41678c2ecf20Sopenharmony_ci * exit-information fields only. Other fields are modified by L1 with VMWRITE, 41688c2ecf20Sopenharmony_ci * which already writes to vmcs12 directly. 41698c2ecf20Sopenharmony_ci */ 41708c2ecf20Sopenharmony_cistatic void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, 41718c2ecf20Sopenharmony_ci u32 vm_exit_reason, u32 exit_intr_info, 41728c2ecf20Sopenharmony_ci unsigned long exit_qualification) 41738c2ecf20Sopenharmony_ci{ 41748c2ecf20Sopenharmony_ci /* update exit information fields: */ 41758c2ecf20Sopenharmony_ci vmcs12->vm_exit_reason = vm_exit_reason; 41768c2ecf20Sopenharmony_ci vmcs12->exit_qualification = exit_qualification; 41778c2ecf20Sopenharmony_ci 41788c2ecf20Sopenharmony_ci /* 41798c2ecf20Sopenharmony_ci * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched 41808c2ecf20Sopenharmony_ci * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other 41818c2ecf20Sopenharmony_ci * exit info fields are unmodified. 41828c2ecf20Sopenharmony_ci */ 41838c2ecf20Sopenharmony_ci if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { 41848c2ecf20Sopenharmony_ci vmcs12->launch_state = 1; 41858c2ecf20Sopenharmony_ci 41868c2ecf20Sopenharmony_ci /* vm_entry_intr_info_field is cleared on exit. Emulate this 41878c2ecf20Sopenharmony_ci * instead of reading the real value. */ 41888c2ecf20Sopenharmony_ci vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; 41898c2ecf20Sopenharmony_ci 41908c2ecf20Sopenharmony_ci /* 41918c2ecf20Sopenharmony_ci * Transfer the event that L0 or L1 may wanted to inject into 41928c2ecf20Sopenharmony_ci * L2 to IDT_VECTORING_INFO_FIELD. 41938c2ecf20Sopenharmony_ci */ 41948c2ecf20Sopenharmony_ci vmcs12_save_pending_event(vcpu, vmcs12, 41958c2ecf20Sopenharmony_ci vm_exit_reason, exit_intr_info); 41968c2ecf20Sopenharmony_ci 41978c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_info = exit_intr_info; 41988c2ecf20Sopenharmony_ci vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 41998c2ecf20Sopenharmony_ci vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 42008c2ecf20Sopenharmony_ci 42018c2ecf20Sopenharmony_ci /* 42028c2ecf20Sopenharmony_ci * According to spec, there's no need to store the guest's 42038c2ecf20Sopenharmony_ci * MSRs if the exit is due to a VM-entry failure that occurs 42048c2ecf20Sopenharmony_ci * during or after loading the guest state. Since this exit 42058c2ecf20Sopenharmony_ci * does not fall in that category, we need to save the MSRs. 42068c2ecf20Sopenharmony_ci */ 42078c2ecf20Sopenharmony_ci if (nested_vmx_store_msr(vcpu, 42088c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_store_addr, 42098c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_store_count)) 42108c2ecf20Sopenharmony_ci nested_vmx_abort(vcpu, 42118c2ecf20Sopenharmony_ci VMX_ABORT_SAVE_GUEST_MSR_FAIL); 42128c2ecf20Sopenharmony_ci } 42138c2ecf20Sopenharmony_ci} 42148c2ecf20Sopenharmony_ci 42158c2ecf20Sopenharmony_ci/* 42168c2ecf20Sopenharmony_ci * A part of what we need to when the nested L2 guest exits and we want to 42178c2ecf20Sopenharmony_ci * run its L1 parent, is to reset L1's guest state to the host state specified 42188c2ecf20Sopenharmony_ci * in vmcs12. 42198c2ecf20Sopenharmony_ci * This function is to be called not only on normal nested exit, but also on 42208c2ecf20Sopenharmony_ci * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry 42218c2ecf20Sopenharmony_ci * Failures During or After Loading Guest State"). 42228c2ecf20Sopenharmony_ci * This function should be called when the active VMCS is L1's (vmcs01). 42238c2ecf20Sopenharmony_ci */ 42248c2ecf20Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu, 42258c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 42268c2ecf20Sopenharmony_ci{ 42278c2ecf20Sopenharmony_ci enum vm_entry_failure_code ignored; 42288c2ecf20Sopenharmony_ci struct kvm_segment seg; 42298c2ecf20Sopenharmony_ci 42308c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 42318c2ecf20Sopenharmony_ci vcpu->arch.efer = vmcs12->host_ia32_efer; 42328c2ecf20Sopenharmony_ci else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) 42338c2ecf20Sopenharmony_ci vcpu->arch.efer |= (EFER_LMA | EFER_LME); 42348c2ecf20Sopenharmony_ci else 42358c2ecf20Sopenharmony_ci vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); 42368c2ecf20Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer); 42378c2ecf20Sopenharmony_ci 42388c2ecf20Sopenharmony_ci kvm_rsp_write(vcpu, vmcs12->host_rsp); 42398c2ecf20Sopenharmony_ci kvm_rip_write(vcpu, vmcs12->host_rip); 42408c2ecf20Sopenharmony_ci vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); 42418c2ecf20Sopenharmony_ci vmx_set_interrupt_shadow(vcpu, 0); 42428c2ecf20Sopenharmony_ci 42438c2ecf20Sopenharmony_ci /* 42448c2ecf20Sopenharmony_ci * Note that calling vmx_set_cr0 is important, even if cr0 hasn't 42458c2ecf20Sopenharmony_ci * actually changed, because vmx_set_cr0 refers to efer set above. 42468c2ecf20Sopenharmony_ci * 42478c2ecf20Sopenharmony_ci * CR0_GUEST_HOST_MASK is already set in the original vmcs01 42488c2ecf20Sopenharmony_ci * (KVM doesn't change it); 42498c2ecf20Sopenharmony_ci */ 42508c2ecf20Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; 42518c2ecf20Sopenharmony_ci vmx_set_cr0(vcpu, vmcs12->host_cr0); 42528c2ecf20Sopenharmony_ci 42538c2ecf20Sopenharmony_ci /* Same as above - no reason to call set_cr4_guest_host_mask(). */ 42548c2ecf20Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 42558c2ecf20Sopenharmony_ci vmx_set_cr4(vcpu, vmcs12->host_cr4); 42568c2ecf20Sopenharmony_ci 42578c2ecf20Sopenharmony_ci nested_ept_uninit_mmu_context(vcpu); 42588c2ecf20Sopenharmony_ci 42598c2ecf20Sopenharmony_ci /* 42608c2ecf20Sopenharmony_ci * Only PDPTE load can fail as the value of cr3 was checked on entry and 42618c2ecf20Sopenharmony_ci * couldn't have changed. 42628c2ecf20Sopenharmony_ci */ 42638c2ecf20Sopenharmony_ci if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored)) 42648c2ecf20Sopenharmony_ci nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); 42658c2ecf20Sopenharmony_ci 42668c2ecf20Sopenharmony_ci if (!enable_ept) 42678c2ecf20Sopenharmony_ci vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 42688c2ecf20Sopenharmony_ci 42698c2ecf20Sopenharmony_ci nested_vmx_transition_tlb_flush(vcpu, vmcs12, false); 42708c2ecf20Sopenharmony_ci 42718c2ecf20Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); 42728c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); 42738c2ecf20Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); 42748c2ecf20Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); 42758c2ecf20Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); 42768c2ecf20Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF); 42778c2ecf20Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF); 42788c2ecf20Sopenharmony_ci 42798c2ecf20Sopenharmony_ci /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ 42808c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) 42818c2ecf20Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, 0); 42828c2ecf20Sopenharmony_ci 42838c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { 42848c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); 42858c2ecf20Sopenharmony_ci vcpu->arch.pat = vmcs12->host_ia32_pat; 42868c2ecf20Sopenharmony_ci } 42878c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) 42888c2ecf20Sopenharmony_ci WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 42898c2ecf20Sopenharmony_ci vmcs12->host_ia32_perf_global_ctrl)); 42908c2ecf20Sopenharmony_ci 42918c2ecf20Sopenharmony_ci /* Set L1 segment info according to Intel SDM 42928c2ecf20Sopenharmony_ci 27.5.2 Loading Host Segment and Descriptor-Table Registers */ 42938c2ecf20Sopenharmony_ci seg = (struct kvm_segment) { 42948c2ecf20Sopenharmony_ci .base = 0, 42958c2ecf20Sopenharmony_ci .limit = 0xFFFFFFFF, 42968c2ecf20Sopenharmony_ci .selector = vmcs12->host_cs_selector, 42978c2ecf20Sopenharmony_ci .type = 11, 42988c2ecf20Sopenharmony_ci .present = 1, 42998c2ecf20Sopenharmony_ci .s = 1, 43008c2ecf20Sopenharmony_ci .g = 1 43018c2ecf20Sopenharmony_ci }; 43028c2ecf20Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) 43038c2ecf20Sopenharmony_ci seg.l = 1; 43048c2ecf20Sopenharmony_ci else 43058c2ecf20Sopenharmony_ci seg.db = 1; 43068c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); 43078c2ecf20Sopenharmony_ci seg = (struct kvm_segment) { 43088c2ecf20Sopenharmony_ci .base = 0, 43098c2ecf20Sopenharmony_ci .limit = 0xFFFFFFFF, 43108c2ecf20Sopenharmony_ci .type = 3, 43118c2ecf20Sopenharmony_ci .present = 1, 43128c2ecf20Sopenharmony_ci .s = 1, 43138c2ecf20Sopenharmony_ci .db = 1, 43148c2ecf20Sopenharmony_ci .g = 1 43158c2ecf20Sopenharmony_ci }; 43168c2ecf20Sopenharmony_ci seg.selector = vmcs12->host_ds_selector; 43178c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); 43188c2ecf20Sopenharmony_ci seg.selector = vmcs12->host_es_selector; 43198c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); 43208c2ecf20Sopenharmony_ci seg.selector = vmcs12->host_ss_selector; 43218c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); 43228c2ecf20Sopenharmony_ci seg.selector = vmcs12->host_fs_selector; 43238c2ecf20Sopenharmony_ci seg.base = vmcs12->host_fs_base; 43248c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); 43258c2ecf20Sopenharmony_ci seg.selector = vmcs12->host_gs_selector; 43268c2ecf20Sopenharmony_ci seg.base = vmcs12->host_gs_base; 43278c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); 43288c2ecf20Sopenharmony_ci seg = (struct kvm_segment) { 43298c2ecf20Sopenharmony_ci .base = vmcs12->host_tr_base, 43308c2ecf20Sopenharmony_ci .limit = 0x67, 43318c2ecf20Sopenharmony_ci .selector = vmcs12->host_tr_selector, 43328c2ecf20Sopenharmony_ci .type = 11, 43338c2ecf20Sopenharmony_ci .present = 1 43348c2ecf20Sopenharmony_ci }; 43358c2ecf20Sopenharmony_ci vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); 43368c2ecf20Sopenharmony_ci 43378c2ecf20Sopenharmony_ci kvm_set_dr(vcpu, 7, 0x400); 43388c2ecf20Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 43398c2ecf20Sopenharmony_ci 43408c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 43418c2ecf20Sopenharmony_ci vmx_update_msr_bitmap(vcpu); 43428c2ecf20Sopenharmony_ci 43438c2ecf20Sopenharmony_ci if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, 43448c2ecf20Sopenharmony_ci vmcs12->vm_exit_msr_load_count)) 43458c2ecf20Sopenharmony_ci nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); 43468c2ecf20Sopenharmony_ci} 43478c2ecf20Sopenharmony_ci 43488c2ecf20Sopenharmony_cistatic inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx) 43498c2ecf20Sopenharmony_ci{ 43508c2ecf20Sopenharmony_ci struct vmx_uret_msr *efer_msr; 43518c2ecf20Sopenharmony_ci unsigned int i; 43528c2ecf20Sopenharmony_ci 43538c2ecf20Sopenharmony_ci if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER) 43548c2ecf20Sopenharmony_ci return vmcs_read64(GUEST_IA32_EFER); 43558c2ecf20Sopenharmony_ci 43568c2ecf20Sopenharmony_ci if (cpu_has_load_ia32_efer()) 43578c2ecf20Sopenharmony_ci return host_efer; 43588c2ecf20Sopenharmony_ci 43598c2ecf20Sopenharmony_ci for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) { 43608c2ecf20Sopenharmony_ci if (vmx->msr_autoload.guest.val[i].index == MSR_EFER) 43618c2ecf20Sopenharmony_ci return vmx->msr_autoload.guest.val[i].value; 43628c2ecf20Sopenharmony_ci } 43638c2ecf20Sopenharmony_ci 43648c2ecf20Sopenharmony_ci efer_msr = vmx_find_uret_msr(vmx, MSR_EFER); 43658c2ecf20Sopenharmony_ci if (efer_msr) 43668c2ecf20Sopenharmony_ci return efer_msr->data; 43678c2ecf20Sopenharmony_ci 43688c2ecf20Sopenharmony_ci return host_efer; 43698c2ecf20Sopenharmony_ci} 43708c2ecf20Sopenharmony_ci 43718c2ecf20Sopenharmony_cistatic void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) 43728c2ecf20Sopenharmony_ci{ 43738c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 43748c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 43758c2ecf20Sopenharmony_ci struct vmx_msr_entry g, h; 43768c2ecf20Sopenharmony_ci gpa_t gpa; 43778c2ecf20Sopenharmony_ci u32 i, j; 43788c2ecf20Sopenharmony_ci 43798c2ecf20Sopenharmony_ci vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT); 43808c2ecf20Sopenharmony_ci 43818c2ecf20Sopenharmony_ci if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { 43828c2ecf20Sopenharmony_ci /* 43838c2ecf20Sopenharmony_ci * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set 43848c2ecf20Sopenharmony_ci * as vmcs01.GUEST_DR7 contains a userspace defined value 43858c2ecf20Sopenharmony_ci * and vcpu->arch.dr7 is not squirreled away before the 43868c2ecf20Sopenharmony_ci * nested VMENTER (not worth adding a variable in nested_vmx). 43878c2ecf20Sopenharmony_ci */ 43888c2ecf20Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 43898c2ecf20Sopenharmony_ci kvm_set_dr(vcpu, 7, DR7_FIXED_1); 43908c2ecf20Sopenharmony_ci else 43918c2ecf20Sopenharmony_ci WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); 43928c2ecf20Sopenharmony_ci } 43938c2ecf20Sopenharmony_ci 43948c2ecf20Sopenharmony_ci /* 43958c2ecf20Sopenharmony_ci * Note that calling vmx_set_{efer,cr0,cr4} is important as they 43968c2ecf20Sopenharmony_ci * handle a variety of side effects to KVM's software model. 43978c2ecf20Sopenharmony_ci */ 43988c2ecf20Sopenharmony_ci vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); 43998c2ecf20Sopenharmony_ci 44008c2ecf20Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; 44018c2ecf20Sopenharmony_ci vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); 44028c2ecf20Sopenharmony_ci 44038c2ecf20Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 44048c2ecf20Sopenharmony_ci vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); 44058c2ecf20Sopenharmony_ci 44068c2ecf20Sopenharmony_ci nested_ept_uninit_mmu_context(vcpu); 44078c2ecf20Sopenharmony_ci vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 44088c2ecf20Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); 44098c2ecf20Sopenharmony_ci 44108c2ecf20Sopenharmony_ci /* 44118c2ecf20Sopenharmony_ci * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs 44128c2ecf20Sopenharmony_ci * from vmcs01 (if necessary). The PDPTRs are not loaded on 44138c2ecf20Sopenharmony_ci * VMFail, like everything else we just need to ensure our 44148c2ecf20Sopenharmony_ci * software model is up-to-date. 44158c2ecf20Sopenharmony_ci */ 44168c2ecf20Sopenharmony_ci if (enable_ept && is_pae_paging(vcpu)) 44178c2ecf20Sopenharmony_ci ept_save_pdptrs(vcpu); 44188c2ecf20Sopenharmony_ci 44198c2ecf20Sopenharmony_ci kvm_mmu_reset_context(vcpu); 44208c2ecf20Sopenharmony_ci 44218c2ecf20Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 44228c2ecf20Sopenharmony_ci vmx_update_msr_bitmap(vcpu); 44238c2ecf20Sopenharmony_ci 44248c2ecf20Sopenharmony_ci /* 44258c2ecf20Sopenharmony_ci * This nasty bit of open coding is a compromise between blindly 44268c2ecf20Sopenharmony_ci * loading L1's MSRs using the exit load lists (incorrect emulation 44278c2ecf20Sopenharmony_ci * of VMFail), leaving the nested VM's MSRs in the software model 44288c2ecf20Sopenharmony_ci * (incorrect behavior) and snapshotting the modified MSRs (too 44298c2ecf20Sopenharmony_ci * expensive since the lists are unbound by hardware). For each 44308c2ecf20Sopenharmony_ci * MSR that was (prematurely) loaded from the nested VMEntry load 44318c2ecf20Sopenharmony_ci * list, reload it from the exit load list if it exists and differs 44328c2ecf20Sopenharmony_ci * from the guest value. The intent is to stuff host state as 44338c2ecf20Sopenharmony_ci * silently as possible, not to fully process the exit load list. 44348c2ecf20Sopenharmony_ci */ 44358c2ecf20Sopenharmony_ci for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) { 44368c2ecf20Sopenharmony_ci gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g)); 44378c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) { 44388c2ecf20Sopenharmony_ci pr_debug_ratelimited( 44398c2ecf20Sopenharmony_ci "%s read MSR index failed (%u, 0x%08llx)\n", 44408c2ecf20Sopenharmony_ci __func__, i, gpa); 44418c2ecf20Sopenharmony_ci goto vmabort; 44428c2ecf20Sopenharmony_ci } 44438c2ecf20Sopenharmony_ci 44448c2ecf20Sopenharmony_ci for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) { 44458c2ecf20Sopenharmony_ci gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h)); 44468c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) { 44478c2ecf20Sopenharmony_ci pr_debug_ratelimited( 44488c2ecf20Sopenharmony_ci "%s read MSR failed (%u, 0x%08llx)\n", 44498c2ecf20Sopenharmony_ci __func__, j, gpa); 44508c2ecf20Sopenharmony_ci goto vmabort; 44518c2ecf20Sopenharmony_ci } 44528c2ecf20Sopenharmony_ci if (h.index != g.index) 44538c2ecf20Sopenharmony_ci continue; 44548c2ecf20Sopenharmony_ci if (h.value == g.value) 44558c2ecf20Sopenharmony_ci break; 44568c2ecf20Sopenharmony_ci 44578c2ecf20Sopenharmony_ci if (nested_vmx_load_msr_check(vcpu, &h)) { 44588c2ecf20Sopenharmony_ci pr_debug_ratelimited( 44598c2ecf20Sopenharmony_ci "%s check failed (%u, 0x%x, 0x%x)\n", 44608c2ecf20Sopenharmony_ci __func__, j, h.index, h.reserved); 44618c2ecf20Sopenharmony_ci goto vmabort; 44628c2ecf20Sopenharmony_ci } 44638c2ecf20Sopenharmony_ci 44648c2ecf20Sopenharmony_ci if (kvm_set_msr(vcpu, h.index, h.value)) { 44658c2ecf20Sopenharmony_ci pr_debug_ratelimited( 44668c2ecf20Sopenharmony_ci "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", 44678c2ecf20Sopenharmony_ci __func__, j, h.index, h.value); 44688c2ecf20Sopenharmony_ci goto vmabort; 44698c2ecf20Sopenharmony_ci } 44708c2ecf20Sopenharmony_ci } 44718c2ecf20Sopenharmony_ci } 44728c2ecf20Sopenharmony_ci 44738c2ecf20Sopenharmony_ci return; 44748c2ecf20Sopenharmony_ci 44758c2ecf20Sopenharmony_civmabort: 44768c2ecf20Sopenharmony_ci nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); 44778c2ecf20Sopenharmony_ci} 44788c2ecf20Sopenharmony_ci 44798c2ecf20Sopenharmony_ci/* 44808c2ecf20Sopenharmony_ci * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 44818c2ecf20Sopenharmony_ci * and modify vmcs12 to make it see what it would expect to see there if 44828c2ecf20Sopenharmony_ci * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 44838c2ecf20Sopenharmony_ci */ 44848c2ecf20Sopenharmony_civoid nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, 44858c2ecf20Sopenharmony_ci u32 exit_intr_info, unsigned long exit_qualification) 44868c2ecf20Sopenharmony_ci{ 44878c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 44888c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 44898c2ecf20Sopenharmony_ci 44908c2ecf20Sopenharmony_ci /* trying to cancel vmlaunch/vmresume is a bug */ 44918c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmx->nested.nested_run_pending); 44928c2ecf20Sopenharmony_ci 44938c2ecf20Sopenharmony_ci if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { 44948c2ecf20Sopenharmony_ci /* 44958c2ecf20Sopenharmony_ci * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map 44968c2ecf20Sopenharmony_ci * Enlightened VMCS after migration and we still need to 44978c2ecf20Sopenharmony_ci * do that when something is forcing L2->L1 exit prior to 44988c2ecf20Sopenharmony_ci * the first L2 run. 44998c2ecf20Sopenharmony_ci */ 45008c2ecf20Sopenharmony_ci (void)nested_get_evmcs_page(vcpu); 45018c2ecf20Sopenharmony_ci } 45028c2ecf20Sopenharmony_ci 45038c2ecf20Sopenharmony_ci /* Service the TLB flush request for L2 before switching to L1. */ 45048c2ecf20Sopenharmony_ci if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) 45058c2ecf20Sopenharmony_ci kvm_vcpu_flush_tlb_current(vcpu); 45068c2ecf20Sopenharmony_ci 45078c2ecf20Sopenharmony_ci /* 45088c2ecf20Sopenharmony_ci * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between 45098c2ecf20Sopenharmony_ci * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are 45108c2ecf20Sopenharmony_ci * up-to-date before switching to L1. 45118c2ecf20Sopenharmony_ci */ 45128c2ecf20Sopenharmony_ci if (enable_ept && is_pae_paging(vcpu)) 45138c2ecf20Sopenharmony_ci vmx_ept_load_pdptrs(vcpu); 45148c2ecf20Sopenharmony_ci 45158c2ecf20Sopenharmony_ci leave_guest_mode(vcpu); 45168c2ecf20Sopenharmony_ci 45178c2ecf20Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12)) 45188c2ecf20Sopenharmony_ci hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); 45198c2ecf20Sopenharmony_ci 45208c2ecf20Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) 45218c2ecf20Sopenharmony_ci vcpu->arch.tsc_offset -= vmcs12->tsc_offset; 45228c2ecf20Sopenharmony_ci 45238c2ecf20Sopenharmony_ci if (likely(!vmx->fail)) { 45248c2ecf20Sopenharmony_ci sync_vmcs02_to_vmcs12(vcpu, vmcs12); 45258c2ecf20Sopenharmony_ci 45268c2ecf20Sopenharmony_ci if (vm_exit_reason != -1) 45278c2ecf20Sopenharmony_ci prepare_vmcs12(vcpu, vmcs12, vm_exit_reason, 45288c2ecf20Sopenharmony_ci exit_intr_info, exit_qualification); 45298c2ecf20Sopenharmony_ci 45308c2ecf20Sopenharmony_ci /* 45318c2ecf20Sopenharmony_ci * Must happen outside of sync_vmcs02_to_vmcs12() as it will 45328c2ecf20Sopenharmony_ci * also be used to capture vmcs12 cache as part of 45338c2ecf20Sopenharmony_ci * capturing nVMX state for snapshot (migration). 45348c2ecf20Sopenharmony_ci * 45358c2ecf20Sopenharmony_ci * Otherwise, this flush will dirty guest memory at a 45368c2ecf20Sopenharmony_ci * point it is already assumed by user-space to be 45378c2ecf20Sopenharmony_ci * immutable. 45388c2ecf20Sopenharmony_ci */ 45398c2ecf20Sopenharmony_ci nested_flush_cached_shadow_vmcs12(vcpu, vmcs12); 45408c2ecf20Sopenharmony_ci } else { 45418c2ecf20Sopenharmony_ci /* 45428c2ecf20Sopenharmony_ci * The only expected VM-instruction error is "VM entry with 45438c2ecf20Sopenharmony_ci * invalid control field(s)." Anything else indicates a 45448c2ecf20Sopenharmony_ci * problem with L0. And we should never get here with a 45458c2ecf20Sopenharmony_ci * VMFail of any type if early consistency checks are enabled. 45468c2ecf20Sopenharmony_ci */ 45478c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != 45488c2ecf20Sopenharmony_ci VMXERR_ENTRY_INVALID_CONTROL_FIELD); 45498c2ecf20Sopenharmony_ci WARN_ON_ONCE(nested_early_check); 45508c2ecf20Sopenharmony_ci } 45518c2ecf20Sopenharmony_ci 45528c2ecf20Sopenharmony_ci /* 45538c2ecf20Sopenharmony_ci * Drop events/exceptions that were queued for re-injection to L2 45548c2ecf20Sopenharmony_ci * (picked up via vmx_complete_interrupts()), as well as exceptions 45558c2ecf20Sopenharmony_ci * that were pending for L2. Note, this must NOT be hoisted above 45568c2ecf20Sopenharmony_ci * prepare_vmcs12(), events/exceptions queued for re-injection need to 45578c2ecf20Sopenharmony_ci * be captured in vmcs12 (see vmcs12_save_pending_event()). 45588c2ecf20Sopenharmony_ci */ 45598c2ecf20Sopenharmony_ci vcpu->arch.nmi_injected = false; 45608c2ecf20Sopenharmony_ci kvm_clear_exception_queue(vcpu); 45618c2ecf20Sopenharmony_ci kvm_clear_interrupt_queue(vcpu); 45628c2ecf20Sopenharmony_ci 45638c2ecf20Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 45648c2ecf20Sopenharmony_ci 45658c2ecf20Sopenharmony_ci /* 45668c2ecf20Sopenharmony_ci * If IBRS is advertised to the vCPU, KVM must flush the indirect 45678c2ecf20Sopenharmony_ci * branch predictors when transitioning from L2 to L1, as L1 expects 45688c2ecf20Sopenharmony_ci * hardware (KVM in this case) to provide separate predictor modes. 45698c2ecf20Sopenharmony_ci * Bare metal isolates VMX root (host) from VMX non-root (guest), but 45708c2ecf20Sopenharmony_ci * doesn't isolate different VMCSs, i.e. in this case, doesn't provide 45718c2ecf20Sopenharmony_ci * separate modes for L2 vs L1. 45728c2ecf20Sopenharmony_ci */ 45738c2ecf20Sopenharmony_ci if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) 45748c2ecf20Sopenharmony_ci indirect_branch_prediction_barrier(); 45758c2ecf20Sopenharmony_ci 45768c2ecf20Sopenharmony_ci /* Update any VMCS fields that might have changed while L2 ran */ 45778c2ecf20Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); 45788c2ecf20Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); 45798c2ecf20Sopenharmony_ci vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); 45808c2ecf20Sopenharmony_ci if (vmx->nested.l1_tpr_threshold != -1) 45818c2ecf20Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold); 45828c2ecf20Sopenharmony_ci 45838c2ecf20Sopenharmony_ci if (kvm_has_tsc_control) 45848c2ecf20Sopenharmony_ci decache_tsc_multiplier(vmx); 45858c2ecf20Sopenharmony_ci 45868c2ecf20Sopenharmony_ci if (vmx->nested.change_vmcs01_virtual_apic_mode) { 45878c2ecf20Sopenharmony_ci vmx->nested.change_vmcs01_virtual_apic_mode = false; 45888c2ecf20Sopenharmony_ci vmx_set_virtual_apic_mode(vcpu); 45898c2ecf20Sopenharmony_ci } 45908c2ecf20Sopenharmony_ci 45918c2ecf20Sopenharmony_ci /* Unpin physical memory we referred to in vmcs02 */ 45928c2ecf20Sopenharmony_ci if (vmx->nested.apic_access_page) { 45938c2ecf20Sopenharmony_ci kvm_release_page_clean(vmx->nested.apic_access_page); 45948c2ecf20Sopenharmony_ci vmx->nested.apic_access_page = NULL; 45958c2ecf20Sopenharmony_ci } 45968c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); 45978c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); 45988c2ecf20Sopenharmony_ci vmx->nested.pi_desc = NULL; 45998c2ecf20Sopenharmony_ci 46008c2ecf20Sopenharmony_ci if (vmx->nested.reload_vmcs01_apic_access_page) { 46018c2ecf20Sopenharmony_ci vmx->nested.reload_vmcs01_apic_access_page = false; 46028c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 46038c2ecf20Sopenharmony_ci } 46048c2ecf20Sopenharmony_ci 46058c2ecf20Sopenharmony_ci if ((vm_exit_reason != -1) && 46068c2ecf20Sopenharmony_ci (enable_shadow_vmcs || vmx->nested.hv_evmcs)) 46078c2ecf20Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 46088c2ecf20Sopenharmony_ci 46098c2ecf20Sopenharmony_ci /* in case we halted in L2 */ 46108c2ecf20Sopenharmony_ci vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 46118c2ecf20Sopenharmony_ci 46128c2ecf20Sopenharmony_ci if (likely(!vmx->fail)) { 46138c2ecf20Sopenharmony_ci if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && 46148c2ecf20Sopenharmony_ci nested_exit_intr_ack_set(vcpu)) { 46158c2ecf20Sopenharmony_ci int irq = kvm_cpu_get_interrupt(vcpu); 46168c2ecf20Sopenharmony_ci WARN_ON(irq < 0); 46178c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_info = irq | 46188c2ecf20Sopenharmony_ci INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; 46198c2ecf20Sopenharmony_ci } 46208c2ecf20Sopenharmony_ci 46218c2ecf20Sopenharmony_ci if (vm_exit_reason != -1) 46228c2ecf20Sopenharmony_ci trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, 46238c2ecf20Sopenharmony_ci vmcs12->exit_qualification, 46248c2ecf20Sopenharmony_ci vmcs12->idt_vectoring_info_field, 46258c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_info, 46268c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_error_code, 46278c2ecf20Sopenharmony_ci KVM_ISA_VMX); 46288c2ecf20Sopenharmony_ci 46298c2ecf20Sopenharmony_ci load_vmcs12_host_state(vcpu, vmcs12); 46308c2ecf20Sopenharmony_ci 46318c2ecf20Sopenharmony_ci return; 46328c2ecf20Sopenharmony_ci } 46338c2ecf20Sopenharmony_ci 46348c2ecf20Sopenharmony_ci /* 46358c2ecf20Sopenharmony_ci * After an early L2 VM-entry failure, we're now back 46368c2ecf20Sopenharmony_ci * in L1 which thinks it just finished a VMLAUNCH or 46378c2ecf20Sopenharmony_ci * VMRESUME instruction, so we need to set the failure 46388c2ecf20Sopenharmony_ci * flag and the VM-instruction error field of the VMCS 46398c2ecf20Sopenharmony_ci * accordingly, and skip the emulated instruction. 46408c2ecf20Sopenharmony_ci */ 46418c2ecf20Sopenharmony_ci (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 46428c2ecf20Sopenharmony_ci 46438c2ecf20Sopenharmony_ci /* 46448c2ecf20Sopenharmony_ci * Restore L1's host state to KVM's software model. We're here 46458c2ecf20Sopenharmony_ci * because a consistency check was caught by hardware, which 46468c2ecf20Sopenharmony_ci * means some amount of guest state has been propagated to KVM's 46478c2ecf20Sopenharmony_ci * model and needs to be unwound to the host's state. 46488c2ecf20Sopenharmony_ci */ 46498c2ecf20Sopenharmony_ci nested_vmx_restore_host_state(vcpu); 46508c2ecf20Sopenharmony_ci 46518c2ecf20Sopenharmony_ci vmx->fail = 0; 46528c2ecf20Sopenharmony_ci} 46538c2ecf20Sopenharmony_ci 46548c2ecf20Sopenharmony_ci/* 46558c2ecf20Sopenharmony_ci * Decode the memory-address operand of a vmx instruction, as recorded on an 46568c2ecf20Sopenharmony_ci * exit caused by such an instruction (run by a guest hypervisor). 46578c2ecf20Sopenharmony_ci * On success, returns 0. When the operand is invalid, returns 1 and throws 46588c2ecf20Sopenharmony_ci * #UD, #GP, or #SS. 46598c2ecf20Sopenharmony_ci */ 46608c2ecf20Sopenharmony_ciint get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, 46618c2ecf20Sopenharmony_ci u32 vmx_instruction_info, bool wr, int len, gva_t *ret) 46628c2ecf20Sopenharmony_ci{ 46638c2ecf20Sopenharmony_ci gva_t off; 46648c2ecf20Sopenharmony_ci bool exn; 46658c2ecf20Sopenharmony_ci struct kvm_segment s; 46668c2ecf20Sopenharmony_ci 46678c2ecf20Sopenharmony_ci /* 46688c2ecf20Sopenharmony_ci * According to Vol. 3B, "Information for VM Exits Due to Instruction 46698c2ecf20Sopenharmony_ci * Execution", on an exit, vmx_instruction_info holds most of the 46708c2ecf20Sopenharmony_ci * addressing components of the operand. Only the displacement part 46718c2ecf20Sopenharmony_ci * is put in exit_qualification (see 3B, "Basic VM-Exit Information"). 46728c2ecf20Sopenharmony_ci * For how an actual address is calculated from all these components, 46738c2ecf20Sopenharmony_ci * refer to Vol. 1, "Operand Addressing". 46748c2ecf20Sopenharmony_ci */ 46758c2ecf20Sopenharmony_ci int scaling = vmx_instruction_info & 3; 46768c2ecf20Sopenharmony_ci int addr_size = (vmx_instruction_info >> 7) & 7; 46778c2ecf20Sopenharmony_ci bool is_reg = vmx_instruction_info & (1u << 10); 46788c2ecf20Sopenharmony_ci int seg_reg = (vmx_instruction_info >> 15) & 7; 46798c2ecf20Sopenharmony_ci int index_reg = (vmx_instruction_info >> 18) & 0xf; 46808c2ecf20Sopenharmony_ci bool index_is_valid = !(vmx_instruction_info & (1u << 22)); 46818c2ecf20Sopenharmony_ci int base_reg = (vmx_instruction_info >> 23) & 0xf; 46828c2ecf20Sopenharmony_ci bool base_is_valid = !(vmx_instruction_info & (1u << 27)); 46838c2ecf20Sopenharmony_ci 46848c2ecf20Sopenharmony_ci if (is_reg) { 46858c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 46868c2ecf20Sopenharmony_ci return 1; 46878c2ecf20Sopenharmony_ci } 46888c2ecf20Sopenharmony_ci 46898c2ecf20Sopenharmony_ci /* Addr = segment_base + offset */ 46908c2ecf20Sopenharmony_ci /* offset = base + [index * scale] + displacement */ 46918c2ecf20Sopenharmony_ci off = exit_qualification; /* holds the displacement */ 46928c2ecf20Sopenharmony_ci if (addr_size == 1) 46938c2ecf20Sopenharmony_ci off = (gva_t)sign_extend64(off, 31); 46948c2ecf20Sopenharmony_ci else if (addr_size == 0) 46958c2ecf20Sopenharmony_ci off = (gva_t)sign_extend64(off, 15); 46968c2ecf20Sopenharmony_ci if (base_is_valid) 46978c2ecf20Sopenharmony_ci off += kvm_register_readl(vcpu, base_reg); 46988c2ecf20Sopenharmony_ci if (index_is_valid) 46998c2ecf20Sopenharmony_ci off += kvm_register_readl(vcpu, index_reg) << scaling; 47008c2ecf20Sopenharmony_ci vmx_get_segment(vcpu, &s, seg_reg); 47018c2ecf20Sopenharmony_ci 47028c2ecf20Sopenharmony_ci /* 47038c2ecf20Sopenharmony_ci * The effective address, i.e. @off, of a memory operand is truncated 47048c2ecf20Sopenharmony_ci * based on the address size of the instruction. Note that this is 47058c2ecf20Sopenharmony_ci * the *effective address*, i.e. the address prior to accounting for 47068c2ecf20Sopenharmony_ci * the segment's base. 47078c2ecf20Sopenharmony_ci */ 47088c2ecf20Sopenharmony_ci if (addr_size == 1) /* 32 bit */ 47098c2ecf20Sopenharmony_ci off &= 0xffffffff; 47108c2ecf20Sopenharmony_ci else if (addr_size == 0) /* 16 bit */ 47118c2ecf20Sopenharmony_ci off &= 0xffff; 47128c2ecf20Sopenharmony_ci 47138c2ecf20Sopenharmony_ci /* Checks for #GP/#SS exceptions. */ 47148c2ecf20Sopenharmony_ci exn = false; 47158c2ecf20Sopenharmony_ci if (is_long_mode(vcpu)) { 47168c2ecf20Sopenharmony_ci /* 47178c2ecf20Sopenharmony_ci * The virtual/linear address is never truncated in 64-bit 47188c2ecf20Sopenharmony_ci * mode, e.g. a 32-bit address size can yield a 64-bit virtual 47198c2ecf20Sopenharmony_ci * address when using FS/GS with a non-zero base. 47208c2ecf20Sopenharmony_ci */ 47218c2ecf20Sopenharmony_ci if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS) 47228c2ecf20Sopenharmony_ci *ret = s.base + off; 47238c2ecf20Sopenharmony_ci else 47248c2ecf20Sopenharmony_ci *ret = off; 47258c2ecf20Sopenharmony_ci 47268c2ecf20Sopenharmony_ci /* Long mode: #GP(0)/#SS(0) if the memory address is in a 47278c2ecf20Sopenharmony_ci * non-canonical form. This is the only check on the memory 47288c2ecf20Sopenharmony_ci * destination for long mode! 47298c2ecf20Sopenharmony_ci */ 47308c2ecf20Sopenharmony_ci exn = is_noncanonical_address(*ret, vcpu); 47318c2ecf20Sopenharmony_ci } else { 47328c2ecf20Sopenharmony_ci /* 47338c2ecf20Sopenharmony_ci * When not in long mode, the virtual/linear address is 47348c2ecf20Sopenharmony_ci * unconditionally truncated to 32 bits regardless of the 47358c2ecf20Sopenharmony_ci * address size. 47368c2ecf20Sopenharmony_ci */ 47378c2ecf20Sopenharmony_ci *ret = (s.base + off) & 0xffffffff; 47388c2ecf20Sopenharmony_ci 47398c2ecf20Sopenharmony_ci /* Protected mode: apply checks for segment validity in the 47408c2ecf20Sopenharmony_ci * following order: 47418c2ecf20Sopenharmony_ci * - segment type check (#GP(0) may be thrown) 47428c2ecf20Sopenharmony_ci * - usability check (#GP(0)/#SS(0)) 47438c2ecf20Sopenharmony_ci * - limit check (#GP(0)/#SS(0)) 47448c2ecf20Sopenharmony_ci */ 47458c2ecf20Sopenharmony_ci if (wr) 47468c2ecf20Sopenharmony_ci /* #GP(0) if the destination operand is located in a 47478c2ecf20Sopenharmony_ci * read-only data segment or any code segment. 47488c2ecf20Sopenharmony_ci */ 47498c2ecf20Sopenharmony_ci exn = ((s.type & 0xa) == 0 || (s.type & 8)); 47508c2ecf20Sopenharmony_ci else 47518c2ecf20Sopenharmony_ci /* #GP(0) if the source operand is located in an 47528c2ecf20Sopenharmony_ci * execute-only code segment 47538c2ecf20Sopenharmony_ci */ 47548c2ecf20Sopenharmony_ci exn = ((s.type & 0xa) == 8); 47558c2ecf20Sopenharmony_ci if (exn) { 47568c2ecf20Sopenharmony_ci kvm_queue_exception_e(vcpu, GP_VECTOR, 0); 47578c2ecf20Sopenharmony_ci return 1; 47588c2ecf20Sopenharmony_ci } 47598c2ecf20Sopenharmony_ci /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. 47608c2ecf20Sopenharmony_ci */ 47618c2ecf20Sopenharmony_ci exn = (s.unusable != 0); 47628c2ecf20Sopenharmony_ci 47638c2ecf20Sopenharmony_ci /* 47648c2ecf20Sopenharmony_ci * Protected mode: #GP(0)/#SS(0) if the memory operand is 47658c2ecf20Sopenharmony_ci * outside the segment limit. All CPUs that support VMX ignore 47668c2ecf20Sopenharmony_ci * limit checks for flat segments, i.e. segments with base==0, 47678c2ecf20Sopenharmony_ci * limit==0xffffffff and of type expand-up data or code. 47688c2ecf20Sopenharmony_ci */ 47698c2ecf20Sopenharmony_ci if (!(s.base == 0 && s.limit == 0xffffffff && 47708c2ecf20Sopenharmony_ci ((s.type & 8) || !(s.type & 4)))) 47718c2ecf20Sopenharmony_ci exn = exn || ((u64)off + len - 1 > s.limit); 47728c2ecf20Sopenharmony_ci } 47738c2ecf20Sopenharmony_ci if (exn) { 47748c2ecf20Sopenharmony_ci kvm_queue_exception_e(vcpu, 47758c2ecf20Sopenharmony_ci seg_reg == VCPU_SREG_SS ? 47768c2ecf20Sopenharmony_ci SS_VECTOR : GP_VECTOR, 47778c2ecf20Sopenharmony_ci 0); 47788c2ecf20Sopenharmony_ci return 1; 47798c2ecf20Sopenharmony_ci } 47808c2ecf20Sopenharmony_ci 47818c2ecf20Sopenharmony_ci return 0; 47828c2ecf20Sopenharmony_ci} 47838c2ecf20Sopenharmony_ci 47848c2ecf20Sopenharmony_civoid nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) 47858c2ecf20Sopenharmony_ci{ 47868c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx; 47878c2ecf20Sopenharmony_ci 47888c2ecf20Sopenharmony_ci if (!nested_vmx_allowed(vcpu)) 47898c2ecf20Sopenharmony_ci return; 47908c2ecf20Sopenharmony_ci 47918c2ecf20Sopenharmony_ci vmx = to_vmx(vcpu); 47928c2ecf20Sopenharmony_ci if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) { 47938c2ecf20Sopenharmony_ci vmx->nested.msrs.entry_ctls_high |= 47948c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 47958c2ecf20Sopenharmony_ci vmx->nested.msrs.exit_ctls_high |= 47968c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 47978c2ecf20Sopenharmony_ci } else { 47988c2ecf20Sopenharmony_ci vmx->nested.msrs.entry_ctls_high &= 47998c2ecf20Sopenharmony_ci ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 48008c2ecf20Sopenharmony_ci vmx->nested.msrs.exit_ctls_high &= 48018c2ecf20Sopenharmony_ci ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 48028c2ecf20Sopenharmony_ci } 48038c2ecf20Sopenharmony_ci} 48048c2ecf20Sopenharmony_ci 48058c2ecf20Sopenharmony_cistatic int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer, 48068c2ecf20Sopenharmony_ci int *ret) 48078c2ecf20Sopenharmony_ci{ 48088c2ecf20Sopenharmony_ci gva_t gva; 48098c2ecf20Sopenharmony_ci struct x86_exception e; 48108c2ecf20Sopenharmony_ci int r; 48118c2ecf20Sopenharmony_ci 48128c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 48138c2ecf20Sopenharmony_ci vmcs_read32(VMX_INSTRUCTION_INFO), false, 48148c2ecf20Sopenharmony_ci sizeof(*vmpointer), &gva)) { 48158c2ecf20Sopenharmony_ci *ret = 1; 48168c2ecf20Sopenharmony_ci return -EINVAL; 48178c2ecf20Sopenharmony_ci } 48188c2ecf20Sopenharmony_ci 48198c2ecf20Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e); 48208c2ecf20Sopenharmony_ci if (r != X86EMUL_CONTINUE) { 48218c2ecf20Sopenharmony_ci *ret = kvm_handle_memory_failure(vcpu, r, &e); 48228c2ecf20Sopenharmony_ci return -EINVAL; 48238c2ecf20Sopenharmony_ci } 48248c2ecf20Sopenharmony_ci 48258c2ecf20Sopenharmony_ci return 0; 48268c2ecf20Sopenharmony_ci} 48278c2ecf20Sopenharmony_ci 48288c2ecf20Sopenharmony_ci/* 48298c2ecf20Sopenharmony_ci * Allocate a shadow VMCS and associate it with the currently loaded 48308c2ecf20Sopenharmony_ci * VMCS, unless such a shadow VMCS already exists. The newly allocated 48318c2ecf20Sopenharmony_ci * VMCS is also VMCLEARed, so that it is ready for use. 48328c2ecf20Sopenharmony_ci */ 48338c2ecf20Sopenharmony_cistatic struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) 48348c2ecf20Sopenharmony_ci{ 48358c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 48368c2ecf20Sopenharmony_ci struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; 48378c2ecf20Sopenharmony_ci 48388c2ecf20Sopenharmony_ci /* 48398c2ecf20Sopenharmony_ci * We should allocate a shadow vmcs for vmcs01 only when L1 48408c2ecf20Sopenharmony_ci * executes VMXON and free it when L1 executes VMXOFF. 48418c2ecf20Sopenharmony_ci * As it is invalid to execute VMXON twice, we shouldn't reach 48428c2ecf20Sopenharmony_ci * here when vmcs01 already have an allocated shadow vmcs. 48438c2ecf20Sopenharmony_ci */ 48448c2ecf20Sopenharmony_ci WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); 48458c2ecf20Sopenharmony_ci 48468c2ecf20Sopenharmony_ci if (!loaded_vmcs->shadow_vmcs) { 48478c2ecf20Sopenharmony_ci loaded_vmcs->shadow_vmcs = alloc_vmcs(true); 48488c2ecf20Sopenharmony_ci if (loaded_vmcs->shadow_vmcs) 48498c2ecf20Sopenharmony_ci vmcs_clear(loaded_vmcs->shadow_vmcs); 48508c2ecf20Sopenharmony_ci } 48518c2ecf20Sopenharmony_ci return loaded_vmcs->shadow_vmcs; 48528c2ecf20Sopenharmony_ci} 48538c2ecf20Sopenharmony_ci 48548c2ecf20Sopenharmony_cistatic int enter_vmx_operation(struct kvm_vcpu *vcpu) 48558c2ecf20Sopenharmony_ci{ 48568c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 48578c2ecf20Sopenharmony_ci int r; 48588c2ecf20Sopenharmony_ci 48598c2ecf20Sopenharmony_ci r = alloc_loaded_vmcs(&vmx->nested.vmcs02); 48608c2ecf20Sopenharmony_ci if (r < 0) 48618c2ecf20Sopenharmony_ci goto out_vmcs02; 48628c2ecf20Sopenharmony_ci 48638c2ecf20Sopenharmony_ci vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); 48648c2ecf20Sopenharmony_ci if (!vmx->nested.cached_vmcs12) 48658c2ecf20Sopenharmony_ci goto out_cached_vmcs12; 48668c2ecf20Sopenharmony_ci 48678c2ecf20Sopenharmony_ci vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); 48688c2ecf20Sopenharmony_ci if (!vmx->nested.cached_shadow_vmcs12) 48698c2ecf20Sopenharmony_ci goto out_cached_shadow_vmcs12; 48708c2ecf20Sopenharmony_ci 48718c2ecf20Sopenharmony_ci if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu)) 48728c2ecf20Sopenharmony_ci goto out_shadow_vmcs; 48738c2ecf20Sopenharmony_ci 48748c2ecf20Sopenharmony_ci hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, 48758c2ecf20Sopenharmony_ci HRTIMER_MODE_ABS_PINNED); 48768c2ecf20Sopenharmony_ci vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; 48778c2ecf20Sopenharmony_ci 48788c2ecf20Sopenharmony_ci vmx->nested.vpid02 = allocate_vpid(); 48798c2ecf20Sopenharmony_ci 48808c2ecf20Sopenharmony_ci vmx->nested.vmcs02_initialized = false; 48818c2ecf20Sopenharmony_ci vmx->nested.vmxon = true; 48828c2ecf20Sopenharmony_ci 48838c2ecf20Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) { 48848c2ecf20Sopenharmony_ci vmx->pt_desc.guest.ctl = 0; 48858c2ecf20Sopenharmony_ci pt_update_intercept_for_msr(vcpu); 48868c2ecf20Sopenharmony_ci } 48878c2ecf20Sopenharmony_ci 48888c2ecf20Sopenharmony_ci return 0; 48898c2ecf20Sopenharmony_ci 48908c2ecf20Sopenharmony_ciout_shadow_vmcs: 48918c2ecf20Sopenharmony_ci kfree(vmx->nested.cached_shadow_vmcs12); 48928c2ecf20Sopenharmony_ci 48938c2ecf20Sopenharmony_ciout_cached_shadow_vmcs12: 48948c2ecf20Sopenharmony_ci kfree(vmx->nested.cached_vmcs12); 48958c2ecf20Sopenharmony_ci 48968c2ecf20Sopenharmony_ciout_cached_vmcs12: 48978c2ecf20Sopenharmony_ci free_loaded_vmcs(&vmx->nested.vmcs02); 48988c2ecf20Sopenharmony_ci 48998c2ecf20Sopenharmony_ciout_vmcs02: 49008c2ecf20Sopenharmony_ci return -ENOMEM; 49018c2ecf20Sopenharmony_ci} 49028c2ecf20Sopenharmony_ci 49038c2ecf20Sopenharmony_ci/* 49048c2ecf20Sopenharmony_ci * Emulate the VMXON instruction. 49058c2ecf20Sopenharmony_ci * Currently, we just remember that VMX is active, and do not save or even 49068c2ecf20Sopenharmony_ci * inspect the argument to VMXON (the so-called "VMXON pointer") because we 49078c2ecf20Sopenharmony_ci * do not currently need to store anything in that guest-allocated memory 49088c2ecf20Sopenharmony_ci * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their 49098c2ecf20Sopenharmony_ci * argument is different from the VMXON pointer (which the spec says they do). 49108c2ecf20Sopenharmony_ci */ 49118c2ecf20Sopenharmony_cistatic int handle_vmon(struct kvm_vcpu *vcpu) 49128c2ecf20Sopenharmony_ci{ 49138c2ecf20Sopenharmony_ci int ret; 49148c2ecf20Sopenharmony_ci gpa_t vmptr; 49158c2ecf20Sopenharmony_ci uint32_t revision; 49168c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 49178c2ecf20Sopenharmony_ci const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED 49188c2ecf20Sopenharmony_ci | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; 49198c2ecf20Sopenharmony_ci 49208c2ecf20Sopenharmony_ci /* 49218c2ecf20Sopenharmony_ci * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter 49228c2ecf20Sopenharmony_ci * the guest and so cannot rely on hardware to perform the check, 49238c2ecf20Sopenharmony_ci * which has higher priority than VM-Exit (see Intel SDM's pseudocode 49248c2ecf20Sopenharmony_ci * for VMXON). 49258c2ecf20Sopenharmony_ci * 49268c2ecf20Sopenharmony_ci * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 49278c2ecf20Sopenharmony_ci * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't 49288c2ecf20Sopenharmony_ci * force any of the relevant guest state. For a restricted guest, KVM 49298c2ecf20Sopenharmony_ci * does force CR0.PE=1, but only to also force VM86 in order to emulate 49308c2ecf20Sopenharmony_ci * Real Mode, and so there's no need to check CR0.PE manually. 49318c2ecf20Sopenharmony_ci */ 49328c2ecf20Sopenharmony_ci if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { 49338c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 49348c2ecf20Sopenharmony_ci return 1; 49358c2ecf20Sopenharmony_ci } 49368c2ecf20Sopenharmony_ci 49378c2ecf20Sopenharmony_ci /* 49388c2ecf20Sopenharmony_ci * The CPL is checked for "not in VMX operation" and for "in VMX root", 49398c2ecf20Sopenharmony_ci * and has higher priority than the VM-Fail due to being post-VMXON, 49408c2ecf20Sopenharmony_ci * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root, 49418c2ecf20Sopenharmony_ci * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits 49428c2ecf20Sopenharmony_ci * from L2 to L1, i.e. there's no need to check for the vCPU being in 49438c2ecf20Sopenharmony_ci * VMX non-root. 49448c2ecf20Sopenharmony_ci * 49458c2ecf20Sopenharmony_ci * Forwarding the VM-Exit unconditionally, i.e. without performing the 49468c2ecf20Sopenharmony_ci * #UD checks (see above), is functionally ok because KVM doesn't allow 49478c2ecf20Sopenharmony_ci * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's 49488c2ecf20Sopenharmony_ci * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are 49498c2ecf20Sopenharmony_ci * missed by hardware due to shadowing CR0 and/or CR4. 49508c2ecf20Sopenharmony_ci */ 49518c2ecf20Sopenharmony_ci if (vmx_get_cpl(vcpu)) { 49528c2ecf20Sopenharmony_ci kvm_inject_gp(vcpu, 0); 49538c2ecf20Sopenharmony_ci return 1; 49548c2ecf20Sopenharmony_ci } 49558c2ecf20Sopenharmony_ci 49568c2ecf20Sopenharmony_ci if (vmx->nested.vmxon) 49578c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); 49588c2ecf20Sopenharmony_ci 49598c2ecf20Sopenharmony_ci /* 49608c2ecf20Sopenharmony_ci * Invalid CR0/CR4 generates #GP. These checks are performed if and 49618c2ecf20Sopenharmony_ci * only if the vCPU isn't already in VMX operation, i.e. effectively 49628c2ecf20Sopenharmony_ci * have lower priority than the VM-Fail above. 49638c2ecf20Sopenharmony_ci */ 49648c2ecf20Sopenharmony_ci if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || 49658c2ecf20Sopenharmony_ci !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { 49668c2ecf20Sopenharmony_ci kvm_inject_gp(vcpu, 0); 49678c2ecf20Sopenharmony_ci return 1; 49688c2ecf20Sopenharmony_ci } 49698c2ecf20Sopenharmony_ci 49708c2ecf20Sopenharmony_ci if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) 49718c2ecf20Sopenharmony_ci != VMXON_NEEDED_FEATURES) { 49728c2ecf20Sopenharmony_ci kvm_inject_gp(vcpu, 0); 49738c2ecf20Sopenharmony_ci return 1; 49748c2ecf20Sopenharmony_ci } 49758c2ecf20Sopenharmony_ci 49768c2ecf20Sopenharmony_ci if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret)) 49778c2ecf20Sopenharmony_ci return ret; 49788c2ecf20Sopenharmony_ci 49798c2ecf20Sopenharmony_ci /* 49808c2ecf20Sopenharmony_ci * SDM 3: 24.11.5 49818c2ecf20Sopenharmony_ci * The first 4 bytes of VMXON region contain the supported 49828c2ecf20Sopenharmony_ci * VMCS revision identifier 49838c2ecf20Sopenharmony_ci * 49848c2ecf20Sopenharmony_ci * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; 49858c2ecf20Sopenharmony_ci * which replaces physical address width with 32 49868c2ecf20Sopenharmony_ci */ 49878c2ecf20Sopenharmony_ci if (!page_address_valid(vcpu, vmptr)) 49888c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 49898c2ecf20Sopenharmony_ci 49908c2ecf20Sopenharmony_ci if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) || 49918c2ecf20Sopenharmony_ci revision != VMCS12_REVISION) 49928c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 49938c2ecf20Sopenharmony_ci 49948c2ecf20Sopenharmony_ci vmx->nested.vmxon_ptr = vmptr; 49958c2ecf20Sopenharmony_ci ret = enter_vmx_operation(vcpu); 49968c2ecf20Sopenharmony_ci if (ret) 49978c2ecf20Sopenharmony_ci return ret; 49988c2ecf20Sopenharmony_ci 49998c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 50008c2ecf20Sopenharmony_ci} 50018c2ecf20Sopenharmony_ci 50028c2ecf20Sopenharmony_cistatic inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) 50038c2ecf20Sopenharmony_ci{ 50048c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 50058c2ecf20Sopenharmony_ci 50068c2ecf20Sopenharmony_ci if (vmx->nested.current_vmptr == -1ull) 50078c2ecf20Sopenharmony_ci return; 50088c2ecf20Sopenharmony_ci 50098c2ecf20Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); 50108c2ecf20Sopenharmony_ci 50118c2ecf20Sopenharmony_ci if (enable_shadow_vmcs) { 50128c2ecf20Sopenharmony_ci /* copy to memory all shadowed fields in case 50138c2ecf20Sopenharmony_ci they were modified */ 50148c2ecf20Sopenharmony_ci copy_shadow_to_vmcs12(vmx); 50158c2ecf20Sopenharmony_ci vmx_disable_shadow_vmcs(vmx); 50168c2ecf20Sopenharmony_ci } 50178c2ecf20Sopenharmony_ci vmx->nested.posted_intr_nv = -1; 50188c2ecf20Sopenharmony_ci 50198c2ecf20Sopenharmony_ci /* Flush VMCS12 to guest memory */ 50208c2ecf20Sopenharmony_ci kvm_vcpu_write_guest_page(vcpu, 50218c2ecf20Sopenharmony_ci vmx->nested.current_vmptr >> PAGE_SHIFT, 50228c2ecf20Sopenharmony_ci vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); 50238c2ecf20Sopenharmony_ci 50248c2ecf20Sopenharmony_ci kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); 50258c2ecf20Sopenharmony_ci 50268c2ecf20Sopenharmony_ci vmx->nested.current_vmptr = -1ull; 50278c2ecf20Sopenharmony_ci} 50288c2ecf20Sopenharmony_ci 50298c2ecf20Sopenharmony_ci/* Emulate the VMXOFF instruction */ 50308c2ecf20Sopenharmony_cistatic int handle_vmoff(struct kvm_vcpu *vcpu) 50318c2ecf20Sopenharmony_ci{ 50328c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 50338c2ecf20Sopenharmony_ci return 1; 50348c2ecf20Sopenharmony_ci 50358c2ecf20Sopenharmony_ci free_nested(vcpu); 50368c2ecf20Sopenharmony_ci 50378c2ecf20Sopenharmony_ci /* Process a latched INIT during time CPU was in VMX operation */ 50388c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 50398c2ecf20Sopenharmony_ci 50408c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 50418c2ecf20Sopenharmony_ci} 50428c2ecf20Sopenharmony_ci 50438c2ecf20Sopenharmony_ci/* Emulate the VMCLEAR instruction */ 50448c2ecf20Sopenharmony_cistatic int handle_vmclear(struct kvm_vcpu *vcpu) 50458c2ecf20Sopenharmony_ci{ 50468c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 50478c2ecf20Sopenharmony_ci u32 zero = 0; 50488c2ecf20Sopenharmony_ci gpa_t vmptr; 50498c2ecf20Sopenharmony_ci u64 evmcs_gpa; 50508c2ecf20Sopenharmony_ci int r; 50518c2ecf20Sopenharmony_ci 50528c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 50538c2ecf20Sopenharmony_ci return 1; 50548c2ecf20Sopenharmony_ci 50558c2ecf20Sopenharmony_ci if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) 50568c2ecf20Sopenharmony_ci return r; 50578c2ecf20Sopenharmony_ci 50588c2ecf20Sopenharmony_ci if (!page_address_valid(vcpu, vmptr)) 50598c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); 50608c2ecf20Sopenharmony_ci 50618c2ecf20Sopenharmony_ci if (vmptr == vmx->nested.vmxon_ptr) 50628c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); 50638c2ecf20Sopenharmony_ci 50648c2ecf20Sopenharmony_ci /* 50658c2ecf20Sopenharmony_ci * When Enlightened VMEntry is enabled on the calling CPU we treat 50668c2ecf20Sopenharmony_ci * memory area pointer by vmptr as Enlightened VMCS (as there's no good 50678c2ecf20Sopenharmony_ci * way to distinguish it from VMCS12) and we must not corrupt it by 50688c2ecf20Sopenharmony_ci * writing to the non-existent 'launch_state' field. The area doesn't 50698c2ecf20Sopenharmony_ci * have to be the currently active EVMCS on the calling CPU and there's 50708c2ecf20Sopenharmony_ci * nothing KVM has to do to transition it from 'active' to 'non-active' 50718c2ecf20Sopenharmony_ci * state. It is possible that the area will stay mapped as 50728c2ecf20Sopenharmony_ci * vmx->nested.hv_evmcs but this shouldn't be a problem. 50738c2ecf20Sopenharmony_ci */ 50748c2ecf20Sopenharmony_ci if (likely(!vmx->nested.enlightened_vmcs_enabled || 50758c2ecf20Sopenharmony_ci !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) { 50768c2ecf20Sopenharmony_ci if (vmptr == vmx->nested.current_vmptr) 50778c2ecf20Sopenharmony_ci nested_release_vmcs12(vcpu); 50788c2ecf20Sopenharmony_ci 50798c2ecf20Sopenharmony_ci kvm_vcpu_write_guest(vcpu, 50808c2ecf20Sopenharmony_ci vmptr + offsetof(struct vmcs12, 50818c2ecf20Sopenharmony_ci launch_state), 50828c2ecf20Sopenharmony_ci &zero, sizeof(zero)); 50838c2ecf20Sopenharmony_ci } 50848c2ecf20Sopenharmony_ci 50858c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 50868c2ecf20Sopenharmony_ci} 50878c2ecf20Sopenharmony_ci 50888c2ecf20Sopenharmony_ci/* Emulate the VMLAUNCH instruction */ 50898c2ecf20Sopenharmony_cistatic int handle_vmlaunch(struct kvm_vcpu *vcpu) 50908c2ecf20Sopenharmony_ci{ 50918c2ecf20Sopenharmony_ci return nested_vmx_run(vcpu, true); 50928c2ecf20Sopenharmony_ci} 50938c2ecf20Sopenharmony_ci 50948c2ecf20Sopenharmony_ci/* Emulate the VMRESUME instruction */ 50958c2ecf20Sopenharmony_cistatic int handle_vmresume(struct kvm_vcpu *vcpu) 50968c2ecf20Sopenharmony_ci{ 50978c2ecf20Sopenharmony_ci 50988c2ecf20Sopenharmony_ci return nested_vmx_run(vcpu, false); 50998c2ecf20Sopenharmony_ci} 51008c2ecf20Sopenharmony_ci 51018c2ecf20Sopenharmony_cistatic int handle_vmread(struct kvm_vcpu *vcpu) 51028c2ecf20Sopenharmony_ci{ 51038c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) 51048c2ecf20Sopenharmony_ci : get_vmcs12(vcpu); 51058c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 51068c2ecf20Sopenharmony_ci u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); 51078c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 51088c2ecf20Sopenharmony_ci struct x86_exception e; 51098c2ecf20Sopenharmony_ci unsigned long field; 51108c2ecf20Sopenharmony_ci u64 value; 51118c2ecf20Sopenharmony_ci gva_t gva = 0; 51128c2ecf20Sopenharmony_ci short offset; 51138c2ecf20Sopenharmony_ci int len, r; 51148c2ecf20Sopenharmony_ci 51158c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 51168c2ecf20Sopenharmony_ci return 1; 51178c2ecf20Sopenharmony_ci 51188c2ecf20Sopenharmony_ci /* 51198c2ecf20Sopenharmony_ci * In VMX non-root operation, when the VMCS-link pointer is -1ull, 51208c2ecf20Sopenharmony_ci * any VMREAD sets the ALU flags for VMfailInvalid. 51218c2ecf20Sopenharmony_ci */ 51228c2ecf20Sopenharmony_ci if (vmx->nested.current_vmptr == -1ull || 51238c2ecf20Sopenharmony_ci (is_guest_mode(vcpu) && 51248c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) 51258c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 51268c2ecf20Sopenharmony_ci 51278c2ecf20Sopenharmony_ci /* Decode instruction info and find the field to read */ 51288c2ecf20Sopenharmony_ci field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); 51298c2ecf20Sopenharmony_ci 51308c2ecf20Sopenharmony_ci offset = vmcs_field_to_offset(field); 51318c2ecf20Sopenharmony_ci if (offset < 0) 51328c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 51338c2ecf20Sopenharmony_ci 51348c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) 51358c2ecf20Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 51368c2ecf20Sopenharmony_ci 51378c2ecf20Sopenharmony_ci /* Read the field, zero-extended to a u64 value */ 51388c2ecf20Sopenharmony_ci value = vmcs12_read_any(vmcs12, field, offset); 51398c2ecf20Sopenharmony_ci 51408c2ecf20Sopenharmony_ci /* 51418c2ecf20Sopenharmony_ci * Now copy part of this value to register or memory, as requested. 51428c2ecf20Sopenharmony_ci * Note that the number of bits actually copied is 32 or 64 depending 51438c2ecf20Sopenharmony_ci * on the guest's mode (32 or 64 bit), not on the given field's length. 51448c2ecf20Sopenharmony_ci */ 51458c2ecf20Sopenharmony_ci if (instr_info & BIT(10)) { 51468c2ecf20Sopenharmony_ci kvm_register_writel(vcpu, (((instr_info) >> 3) & 0xf), value); 51478c2ecf20Sopenharmony_ci } else { 51488c2ecf20Sopenharmony_ci len = is_64_bit_mode(vcpu) ? 8 : 4; 51498c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, exit_qualification, 51508c2ecf20Sopenharmony_ci instr_info, true, len, &gva)) 51518c2ecf20Sopenharmony_ci return 1; 51528c2ecf20Sopenharmony_ci /* _system ok, nested_vmx_check_permission has verified cpl=0 */ 51538c2ecf20Sopenharmony_ci r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e); 51548c2ecf20Sopenharmony_ci if (r != X86EMUL_CONTINUE) 51558c2ecf20Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 51568c2ecf20Sopenharmony_ci } 51578c2ecf20Sopenharmony_ci 51588c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 51598c2ecf20Sopenharmony_ci} 51608c2ecf20Sopenharmony_ci 51618c2ecf20Sopenharmony_cistatic bool is_shadow_field_rw(unsigned long field) 51628c2ecf20Sopenharmony_ci{ 51638c2ecf20Sopenharmony_ci switch (field) { 51648c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) case x: 51658c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h" 51668c2ecf20Sopenharmony_ci return true; 51678c2ecf20Sopenharmony_ci default: 51688c2ecf20Sopenharmony_ci break; 51698c2ecf20Sopenharmony_ci } 51708c2ecf20Sopenharmony_ci return false; 51718c2ecf20Sopenharmony_ci} 51728c2ecf20Sopenharmony_ci 51738c2ecf20Sopenharmony_cistatic bool is_shadow_field_ro(unsigned long field) 51748c2ecf20Sopenharmony_ci{ 51758c2ecf20Sopenharmony_ci switch (field) { 51768c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) case x: 51778c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h" 51788c2ecf20Sopenharmony_ci return true; 51798c2ecf20Sopenharmony_ci default: 51808c2ecf20Sopenharmony_ci break; 51818c2ecf20Sopenharmony_ci } 51828c2ecf20Sopenharmony_ci return false; 51838c2ecf20Sopenharmony_ci} 51848c2ecf20Sopenharmony_ci 51858c2ecf20Sopenharmony_cistatic int handle_vmwrite(struct kvm_vcpu *vcpu) 51868c2ecf20Sopenharmony_ci{ 51878c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) 51888c2ecf20Sopenharmony_ci : get_vmcs12(vcpu); 51898c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 51908c2ecf20Sopenharmony_ci u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); 51918c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 51928c2ecf20Sopenharmony_ci struct x86_exception e; 51938c2ecf20Sopenharmony_ci unsigned long field; 51948c2ecf20Sopenharmony_ci short offset; 51958c2ecf20Sopenharmony_ci gva_t gva; 51968c2ecf20Sopenharmony_ci int len, r; 51978c2ecf20Sopenharmony_ci 51988c2ecf20Sopenharmony_ci /* 51998c2ecf20Sopenharmony_ci * The value to write might be 32 or 64 bits, depending on L1's long 52008c2ecf20Sopenharmony_ci * mode, and eventually we need to write that into a field of several 52018c2ecf20Sopenharmony_ci * possible lengths. The code below first zero-extends the value to 64 52028c2ecf20Sopenharmony_ci * bit (value), and then copies only the appropriate number of 52038c2ecf20Sopenharmony_ci * bits into the vmcs12 field. 52048c2ecf20Sopenharmony_ci */ 52058c2ecf20Sopenharmony_ci u64 value = 0; 52068c2ecf20Sopenharmony_ci 52078c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 52088c2ecf20Sopenharmony_ci return 1; 52098c2ecf20Sopenharmony_ci 52108c2ecf20Sopenharmony_ci /* 52118c2ecf20Sopenharmony_ci * In VMX non-root operation, when the VMCS-link pointer is -1ull, 52128c2ecf20Sopenharmony_ci * any VMWRITE sets the ALU flags for VMfailInvalid. 52138c2ecf20Sopenharmony_ci */ 52148c2ecf20Sopenharmony_ci if (vmx->nested.current_vmptr == -1ull || 52158c2ecf20Sopenharmony_ci (is_guest_mode(vcpu) && 52168c2ecf20Sopenharmony_ci get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) 52178c2ecf20Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 52188c2ecf20Sopenharmony_ci 52198c2ecf20Sopenharmony_ci if (instr_info & BIT(10)) 52208c2ecf20Sopenharmony_ci value = kvm_register_readl(vcpu, (((instr_info) >> 3) & 0xf)); 52218c2ecf20Sopenharmony_ci else { 52228c2ecf20Sopenharmony_ci len = is_64_bit_mode(vcpu) ? 8 : 4; 52238c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, exit_qualification, 52248c2ecf20Sopenharmony_ci instr_info, false, len, &gva)) 52258c2ecf20Sopenharmony_ci return 1; 52268c2ecf20Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, &value, len, &e); 52278c2ecf20Sopenharmony_ci if (r != X86EMUL_CONTINUE) 52288c2ecf20Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 52298c2ecf20Sopenharmony_ci } 52308c2ecf20Sopenharmony_ci 52318c2ecf20Sopenharmony_ci field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); 52328c2ecf20Sopenharmony_ci 52338c2ecf20Sopenharmony_ci offset = vmcs_field_to_offset(field); 52348c2ecf20Sopenharmony_ci if (offset < 0) 52358c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 52368c2ecf20Sopenharmony_ci 52378c2ecf20Sopenharmony_ci /* 52388c2ecf20Sopenharmony_ci * If the vCPU supports "VMWRITE to any supported field in the 52398c2ecf20Sopenharmony_ci * VMCS," then the "read-only" fields are actually read/write. 52408c2ecf20Sopenharmony_ci */ 52418c2ecf20Sopenharmony_ci if (vmcs_field_readonly(field) && 52428c2ecf20Sopenharmony_ci !nested_cpu_has_vmwrite_any_field(vcpu)) 52438c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); 52448c2ecf20Sopenharmony_ci 52458c2ecf20Sopenharmony_ci /* 52468c2ecf20Sopenharmony_ci * Ensure vmcs12 is up-to-date before any VMWRITE that dirties 52478c2ecf20Sopenharmony_ci * vmcs12, else we may crush a field or consume a stale value. 52488c2ecf20Sopenharmony_ci */ 52498c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) 52508c2ecf20Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 52518c2ecf20Sopenharmony_ci 52528c2ecf20Sopenharmony_ci /* 52538c2ecf20Sopenharmony_ci * Some Intel CPUs intentionally drop the reserved bits of the AR byte 52548c2ecf20Sopenharmony_ci * fields on VMWRITE. Emulate this behavior to ensure consistent KVM 52558c2ecf20Sopenharmony_ci * behavior regardless of the underlying hardware, e.g. if an AR_BYTE 52568c2ecf20Sopenharmony_ci * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD 52578c2ecf20Sopenharmony_ci * from L1 will return a different value than VMREAD from L2 (L1 sees 52588c2ecf20Sopenharmony_ci * the stripped down value, L2 sees the full value as stored by KVM). 52598c2ecf20Sopenharmony_ci */ 52608c2ecf20Sopenharmony_ci if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES) 52618c2ecf20Sopenharmony_ci value &= 0x1f0ff; 52628c2ecf20Sopenharmony_ci 52638c2ecf20Sopenharmony_ci vmcs12_write_any(vmcs12, field, offset, value); 52648c2ecf20Sopenharmony_ci 52658c2ecf20Sopenharmony_ci /* 52668c2ecf20Sopenharmony_ci * Do not track vmcs12 dirty-state if in guest-mode as we actually 52678c2ecf20Sopenharmony_ci * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated 52688c2ecf20Sopenharmony_ci * by L1 without a vmexit are always updated in the vmcs02, i.e. don't 52698c2ecf20Sopenharmony_ci * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path. 52708c2ecf20Sopenharmony_ci */ 52718c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) { 52728c2ecf20Sopenharmony_ci /* 52738c2ecf20Sopenharmony_ci * L1 can read these fields without exiting, ensure the 52748c2ecf20Sopenharmony_ci * shadow VMCS is up-to-date. 52758c2ecf20Sopenharmony_ci */ 52768c2ecf20Sopenharmony_ci if (enable_shadow_vmcs && is_shadow_field_ro(field)) { 52778c2ecf20Sopenharmony_ci preempt_disable(); 52788c2ecf20Sopenharmony_ci vmcs_load(vmx->vmcs01.shadow_vmcs); 52798c2ecf20Sopenharmony_ci 52808c2ecf20Sopenharmony_ci __vmcs_writel(field, value); 52818c2ecf20Sopenharmony_ci 52828c2ecf20Sopenharmony_ci vmcs_clear(vmx->vmcs01.shadow_vmcs); 52838c2ecf20Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 52848c2ecf20Sopenharmony_ci preempt_enable(); 52858c2ecf20Sopenharmony_ci } 52868c2ecf20Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 52878c2ecf20Sopenharmony_ci } 52888c2ecf20Sopenharmony_ci 52898c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 52908c2ecf20Sopenharmony_ci} 52918c2ecf20Sopenharmony_ci 52928c2ecf20Sopenharmony_cistatic void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) 52938c2ecf20Sopenharmony_ci{ 52948c2ecf20Sopenharmony_ci vmx->nested.current_vmptr = vmptr; 52958c2ecf20Sopenharmony_ci if (enable_shadow_vmcs) { 52968c2ecf20Sopenharmony_ci secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); 52978c2ecf20Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, 52988c2ecf20Sopenharmony_ci __pa(vmx->vmcs01.shadow_vmcs)); 52998c2ecf20Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 53008c2ecf20Sopenharmony_ci } 53018c2ecf20Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 53028c2ecf20Sopenharmony_ci} 53038c2ecf20Sopenharmony_ci 53048c2ecf20Sopenharmony_ci/* Emulate the VMPTRLD instruction */ 53058c2ecf20Sopenharmony_cistatic int handle_vmptrld(struct kvm_vcpu *vcpu) 53068c2ecf20Sopenharmony_ci{ 53078c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 53088c2ecf20Sopenharmony_ci gpa_t vmptr; 53098c2ecf20Sopenharmony_ci int r; 53108c2ecf20Sopenharmony_ci 53118c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 53128c2ecf20Sopenharmony_ci return 1; 53138c2ecf20Sopenharmony_ci 53148c2ecf20Sopenharmony_ci if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) 53158c2ecf20Sopenharmony_ci return r; 53168c2ecf20Sopenharmony_ci 53178c2ecf20Sopenharmony_ci if (!page_address_valid(vcpu, vmptr)) 53188c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); 53198c2ecf20Sopenharmony_ci 53208c2ecf20Sopenharmony_ci if (vmptr == vmx->nested.vmxon_ptr) 53218c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); 53228c2ecf20Sopenharmony_ci 53238c2ecf20Sopenharmony_ci /* Forbid normal VMPTRLD if Enlightened version was used */ 53248c2ecf20Sopenharmony_ci if (vmx->nested.hv_evmcs) 53258c2ecf20Sopenharmony_ci return 1; 53268c2ecf20Sopenharmony_ci 53278c2ecf20Sopenharmony_ci if (vmx->nested.current_vmptr != vmptr) { 53288c2ecf20Sopenharmony_ci struct kvm_host_map map; 53298c2ecf20Sopenharmony_ci struct vmcs12 *new_vmcs12; 53308c2ecf20Sopenharmony_ci 53318c2ecf20Sopenharmony_ci if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { 53328c2ecf20Sopenharmony_ci /* 53338c2ecf20Sopenharmony_ci * Reads from an unbacked page return all 1s, 53348c2ecf20Sopenharmony_ci * which means that the 32 bits located at the 53358c2ecf20Sopenharmony_ci * given physical address won't match the required 53368c2ecf20Sopenharmony_ci * VMCS12_REVISION identifier. 53378c2ecf20Sopenharmony_ci */ 53388c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 53398c2ecf20Sopenharmony_ci VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); 53408c2ecf20Sopenharmony_ci } 53418c2ecf20Sopenharmony_ci 53428c2ecf20Sopenharmony_ci new_vmcs12 = map.hva; 53438c2ecf20Sopenharmony_ci 53448c2ecf20Sopenharmony_ci if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || 53458c2ecf20Sopenharmony_ci (new_vmcs12->hdr.shadow_vmcs && 53468c2ecf20Sopenharmony_ci !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { 53478c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &map, false); 53488c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 53498c2ecf20Sopenharmony_ci VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); 53508c2ecf20Sopenharmony_ci } 53518c2ecf20Sopenharmony_ci 53528c2ecf20Sopenharmony_ci nested_release_vmcs12(vcpu); 53538c2ecf20Sopenharmony_ci 53548c2ecf20Sopenharmony_ci /* 53558c2ecf20Sopenharmony_ci * Load VMCS12 from guest memory since it is not already 53568c2ecf20Sopenharmony_ci * cached. 53578c2ecf20Sopenharmony_ci */ 53588c2ecf20Sopenharmony_ci memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); 53598c2ecf20Sopenharmony_ci kvm_vcpu_unmap(vcpu, &map, false); 53608c2ecf20Sopenharmony_ci 53618c2ecf20Sopenharmony_ci set_current_vmptr(vmx, vmptr); 53628c2ecf20Sopenharmony_ci } 53638c2ecf20Sopenharmony_ci 53648c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 53658c2ecf20Sopenharmony_ci} 53668c2ecf20Sopenharmony_ci 53678c2ecf20Sopenharmony_ci/* Emulate the VMPTRST instruction */ 53688c2ecf20Sopenharmony_cistatic int handle_vmptrst(struct kvm_vcpu *vcpu) 53698c2ecf20Sopenharmony_ci{ 53708c2ecf20Sopenharmony_ci unsigned long exit_qual = vmx_get_exit_qual(vcpu); 53718c2ecf20Sopenharmony_ci u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); 53728c2ecf20Sopenharmony_ci gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; 53738c2ecf20Sopenharmony_ci struct x86_exception e; 53748c2ecf20Sopenharmony_ci gva_t gva; 53758c2ecf20Sopenharmony_ci int r; 53768c2ecf20Sopenharmony_ci 53778c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 53788c2ecf20Sopenharmony_ci return 1; 53798c2ecf20Sopenharmony_ci 53808c2ecf20Sopenharmony_ci if (unlikely(to_vmx(vcpu)->nested.hv_evmcs)) 53818c2ecf20Sopenharmony_ci return 1; 53828c2ecf20Sopenharmony_ci 53838c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, exit_qual, instr_info, 53848c2ecf20Sopenharmony_ci true, sizeof(gpa_t), &gva)) 53858c2ecf20Sopenharmony_ci return 1; 53868c2ecf20Sopenharmony_ci /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ 53878c2ecf20Sopenharmony_ci r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, 53888c2ecf20Sopenharmony_ci sizeof(gpa_t), &e); 53898c2ecf20Sopenharmony_ci if (r != X86EMUL_CONTINUE) 53908c2ecf20Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 53918c2ecf20Sopenharmony_ci 53928c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 53938c2ecf20Sopenharmony_ci} 53948c2ecf20Sopenharmony_ci 53958c2ecf20Sopenharmony_ci#define EPTP_PA_MASK GENMASK_ULL(51, 12) 53968c2ecf20Sopenharmony_ci 53978c2ecf20Sopenharmony_cistatic bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) 53988c2ecf20Sopenharmony_ci{ 53998c2ecf20Sopenharmony_ci return VALID_PAGE(root_hpa) && 54008c2ecf20Sopenharmony_ci ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); 54018c2ecf20Sopenharmony_ci} 54028c2ecf20Sopenharmony_ci 54038c2ecf20Sopenharmony_ci/* Emulate the INVEPT instruction */ 54048c2ecf20Sopenharmony_cistatic int handle_invept(struct kvm_vcpu *vcpu) 54058c2ecf20Sopenharmony_ci{ 54068c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 54078c2ecf20Sopenharmony_ci u32 vmx_instruction_info, types; 54088c2ecf20Sopenharmony_ci unsigned long type, roots_to_free; 54098c2ecf20Sopenharmony_ci struct kvm_mmu *mmu; 54108c2ecf20Sopenharmony_ci gva_t gva; 54118c2ecf20Sopenharmony_ci struct x86_exception e; 54128c2ecf20Sopenharmony_ci struct { 54138c2ecf20Sopenharmony_ci u64 eptp, gpa; 54148c2ecf20Sopenharmony_ci } operand; 54158c2ecf20Sopenharmony_ci int i, r; 54168c2ecf20Sopenharmony_ci 54178c2ecf20Sopenharmony_ci if (!(vmx->nested.msrs.secondary_ctls_high & 54188c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_EPT) || 54198c2ecf20Sopenharmony_ci !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) { 54208c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 54218c2ecf20Sopenharmony_ci return 1; 54228c2ecf20Sopenharmony_ci } 54238c2ecf20Sopenharmony_ci 54248c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 54258c2ecf20Sopenharmony_ci return 1; 54268c2ecf20Sopenharmony_ci 54278c2ecf20Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 54288c2ecf20Sopenharmony_ci type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); 54298c2ecf20Sopenharmony_ci 54308c2ecf20Sopenharmony_ci types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; 54318c2ecf20Sopenharmony_ci 54328c2ecf20Sopenharmony_ci if (type >= 32 || !(types & (1 << type))) 54338c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 54348c2ecf20Sopenharmony_ci 54358c2ecf20Sopenharmony_ci /* According to the Intel VMX instruction reference, the memory 54368c2ecf20Sopenharmony_ci * operand is read even if it isn't needed (e.g., for type==global) 54378c2ecf20Sopenharmony_ci */ 54388c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 54398c2ecf20Sopenharmony_ci vmx_instruction_info, false, sizeof(operand), &gva)) 54408c2ecf20Sopenharmony_ci return 1; 54418c2ecf20Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 54428c2ecf20Sopenharmony_ci if (r != X86EMUL_CONTINUE) 54438c2ecf20Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 54448c2ecf20Sopenharmony_ci 54458c2ecf20Sopenharmony_ci /* 54468c2ecf20Sopenharmony_ci * Nested EPT roots are always held through guest_mmu, 54478c2ecf20Sopenharmony_ci * not root_mmu. 54488c2ecf20Sopenharmony_ci */ 54498c2ecf20Sopenharmony_ci mmu = &vcpu->arch.guest_mmu; 54508c2ecf20Sopenharmony_ci 54518c2ecf20Sopenharmony_ci switch (type) { 54528c2ecf20Sopenharmony_ci case VMX_EPT_EXTENT_CONTEXT: 54538c2ecf20Sopenharmony_ci if (!nested_vmx_check_eptp(vcpu, operand.eptp)) 54548c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 54558c2ecf20Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 54568c2ecf20Sopenharmony_ci 54578c2ecf20Sopenharmony_ci roots_to_free = 0; 54588c2ecf20Sopenharmony_ci if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd, 54598c2ecf20Sopenharmony_ci operand.eptp)) 54608c2ecf20Sopenharmony_ci roots_to_free |= KVM_MMU_ROOT_CURRENT; 54618c2ecf20Sopenharmony_ci 54628c2ecf20Sopenharmony_ci for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { 54638c2ecf20Sopenharmony_ci if (nested_ept_root_matches(mmu->prev_roots[i].hpa, 54648c2ecf20Sopenharmony_ci mmu->prev_roots[i].pgd, 54658c2ecf20Sopenharmony_ci operand.eptp)) 54668c2ecf20Sopenharmony_ci roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); 54678c2ecf20Sopenharmony_ci } 54688c2ecf20Sopenharmony_ci break; 54698c2ecf20Sopenharmony_ci case VMX_EPT_EXTENT_GLOBAL: 54708c2ecf20Sopenharmony_ci roots_to_free = KVM_MMU_ROOTS_ALL; 54718c2ecf20Sopenharmony_ci break; 54728c2ecf20Sopenharmony_ci default: 54738c2ecf20Sopenharmony_ci BUG(); 54748c2ecf20Sopenharmony_ci break; 54758c2ecf20Sopenharmony_ci } 54768c2ecf20Sopenharmony_ci 54778c2ecf20Sopenharmony_ci if (roots_to_free) 54788c2ecf20Sopenharmony_ci kvm_mmu_free_roots(vcpu, mmu, roots_to_free); 54798c2ecf20Sopenharmony_ci 54808c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 54818c2ecf20Sopenharmony_ci} 54828c2ecf20Sopenharmony_ci 54838c2ecf20Sopenharmony_cistatic int handle_invvpid(struct kvm_vcpu *vcpu) 54848c2ecf20Sopenharmony_ci{ 54858c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 54868c2ecf20Sopenharmony_ci u32 vmx_instruction_info; 54878c2ecf20Sopenharmony_ci unsigned long type, types; 54888c2ecf20Sopenharmony_ci gva_t gva; 54898c2ecf20Sopenharmony_ci struct x86_exception e; 54908c2ecf20Sopenharmony_ci struct { 54918c2ecf20Sopenharmony_ci u64 vpid; 54928c2ecf20Sopenharmony_ci u64 gla; 54938c2ecf20Sopenharmony_ci } operand; 54948c2ecf20Sopenharmony_ci u16 vpid02; 54958c2ecf20Sopenharmony_ci int r; 54968c2ecf20Sopenharmony_ci 54978c2ecf20Sopenharmony_ci if (!(vmx->nested.msrs.secondary_ctls_high & 54988c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_VPID) || 54998c2ecf20Sopenharmony_ci !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) { 55008c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 55018c2ecf20Sopenharmony_ci return 1; 55028c2ecf20Sopenharmony_ci } 55038c2ecf20Sopenharmony_ci 55048c2ecf20Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 55058c2ecf20Sopenharmony_ci return 1; 55068c2ecf20Sopenharmony_ci 55078c2ecf20Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 55088c2ecf20Sopenharmony_ci type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); 55098c2ecf20Sopenharmony_ci 55108c2ecf20Sopenharmony_ci types = (vmx->nested.msrs.vpid_caps & 55118c2ecf20Sopenharmony_ci VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; 55128c2ecf20Sopenharmony_ci 55138c2ecf20Sopenharmony_ci if (type >= 32 || !(types & (1 << type))) 55148c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 55158c2ecf20Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 55168c2ecf20Sopenharmony_ci 55178c2ecf20Sopenharmony_ci /* according to the intel vmx instruction reference, the memory 55188c2ecf20Sopenharmony_ci * operand is read even if it isn't needed (e.g., for type==global) 55198c2ecf20Sopenharmony_ci */ 55208c2ecf20Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 55218c2ecf20Sopenharmony_ci vmx_instruction_info, false, sizeof(operand), &gva)) 55228c2ecf20Sopenharmony_ci return 1; 55238c2ecf20Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 55248c2ecf20Sopenharmony_ci if (r != X86EMUL_CONTINUE) 55258c2ecf20Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 55268c2ecf20Sopenharmony_ci 55278c2ecf20Sopenharmony_ci if (operand.vpid >> 16) 55288c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 55298c2ecf20Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 55308c2ecf20Sopenharmony_ci 55318c2ecf20Sopenharmony_ci vpid02 = nested_get_vpid02(vcpu); 55328c2ecf20Sopenharmony_ci switch (type) { 55338c2ecf20Sopenharmony_ci case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: 55348c2ecf20Sopenharmony_ci if (!operand.vpid || 55358c2ecf20Sopenharmony_ci is_noncanonical_address(operand.gla, vcpu)) 55368c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 55378c2ecf20Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 55388c2ecf20Sopenharmony_ci vpid_sync_vcpu_addr(vpid02, operand.gla); 55398c2ecf20Sopenharmony_ci break; 55408c2ecf20Sopenharmony_ci case VMX_VPID_EXTENT_SINGLE_CONTEXT: 55418c2ecf20Sopenharmony_ci case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: 55428c2ecf20Sopenharmony_ci if (!operand.vpid) 55438c2ecf20Sopenharmony_ci return nested_vmx_fail(vcpu, 55448c2ecf20Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 55458c2ecf20Sopenharmony_ci vpid_sync_context(vpid02); 55468c2ecf20Sopenharmony_ci break; 55478c2ecf20Sopenharmony_ci case VMX_VPID_EXTENT_ALL_CONTEXT: 55488c2ecf20Sopenharmony_ci vpid_sync_context(vpid02); 55498c2ecf20Sopenharmony_ci break; 55508c2ecf20Sopenharmony_ci default: 55518c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 55528c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 55538c2ecf20Sopenharmony_ci } 55548c2ecf20Sopenharmony_ci 55558c2ecf20Sopenharmony_ci /* 55568c2ecf20Sopenharmony_ci * Sync the shadow page tables if EPT is disabled, L1 is invalidating 55578c2ecf20Sopenharmony_ci * linear mappings for L2 (tagged with L2's VPID). Free all roots as 55588c2ecf20Sopenharmony_ci * VPIDs are not tracked in the MMU role. 55598c2ecf20Sopenharmony_ci * 55608c2ecf20Sopenharmony_ci * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share 55618c2ecf20Sopenharmony_ci * an MMU when EPT is disabled. 55628c2ecf20Sopenharmony_ci * 55638c2ecf20Sopenharmony_ci * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR. 55648c2ecf20Sopenharmony_ci */ 55658c2ecf20Sopenharmony_ci if (!enable_ept) 55668c2ecf20Sopenharmony_ci kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, 55678c2ecf20Sopenharmony_ci KVM_MMU_ROOTS_ALL); 55688c2ecf20Sopenharmony_ci 55698c2ecf20Sopenharmony_ci return nested_vmx_succeed(vcpu); 55708c2ecf20Sopenharmony_ci} 55718c2ecf20Sopenharmony_ci 55728c2ecf20Sopenharmony_cistatic int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, 55738c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 55748c2ecf20Sopenharmony_ci{ 55758c2ecf20Sopenharmony_ci u32 index = kvm_rcx_read(vcpu); 55768c2ecf20Sopenharmony_ci u64 new_eptp; 55778c2ecf20Sopenharmony_ci 55788c2ecf20Sopenharmony_ci if (!nested_cpu_has_eptp_switching(vmcs12) || 55798c2ecf20Sopenharmony_ci !nested_cpu_has_ept(vmcs12)) 55808c2ecf20Sopenharmony_ci return 1; 55818c2ecf20Sopenharmony_ci 55828c2ecf20Sopenharmony_ci if (index >= VMFUNC_EPTP_ENTRIES) 55838c2ecf20Sopenharmony_ci return 1; 55848c2ecf20Sopenharmony_ci 55858c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, 55868c2ecf20Sopenharmony_ci &new_eptp, index * 8, 8)) 55878c2ecf20Sopenharmony_ci return 1; 55888c2ecf20Sopenharmony_ci 55898c2ecf20Sopenharmony_ci /* 55908c2ecf20Sopenharmony_ci * If the (L2) guest does a vmfunc to the currently 55918c2ecf20Sopenharmony_ci * active ept pointer, we don't have to do anything else 55928c2ecf20Sopenharmony_ci */ 55938c2ecf20Sopenharmony_ci if (vmcs12->ept_pointer != new_eptp) { 55948c2ecf20Sopenharmony_ci if (!nested_vmx_check_eptp(vcpu, new_eptp)) 55958c2ecf20Sopenharmony_ci return 1; 55968c2ecf20Sopenharmony_ci 55978c2ecf20Sopenharmony_ci vmcs12->ept_pointer = new_eptp; 55988c2ecf20Sopenharmony_ci 55998c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 56008c2ecf20Sopenharmony_ci } 56018c2ecf20Sopenharmony_ci 56028c2ecf20Sopenharmony_ci return 0; 56038c2ecf20Sopenharmony_ci} 56048c2ecf20Sopenharmony_ci 56058c2ecf20Sopenharmony_cistatic int handle_vmfunc(struct kvm_vcpu *vcpu) 56068c2ecf20Sopenharmony_ci{ 56078c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 56088c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12; 56098c2ecf20Sopenharmony_ci u32 function = kvm_rax_read(vcpu); 56108c2ecf20Sopenharmony_ci 56118c2ecf20Sopenharmony_ci /* 56128c2ecf20Sopenharmony_ci * VMFUNC is only supported for nested guests, but we always enable the 56138c2ecf20Sopenharmony_ci * secondary control for simplicity; for non-nested mode, fake that we 56148c2ecf20Sopenharmony_ci * didn't by injecting #UD. 56158c2ecf20Sopenharmony_ci */ 56168c2ecf20Sopenharmony_ci if (!is_guest_mode(vcpu)) { 56178c2ecf20Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 56188c2ecf20Sopenharmony_ci return 1; 56198c2ecf20Sopenharmony_ci } 56208c2ecf20Sopenharmony_ci 56218c2ecf20Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 56228c2ecf20Sopenharmony_ci if (!(vmcs12->vm_function_control & BIT_ULL(function))) 56238c2ecf20Sopenharmony_ci goto fail; 56248c2ecf20Sopenharmony_ci 56258c2ecf20Sopenharmony_ci switch (function) { 56268c2ecf20Sopenharmony_ci case 0: 56278c2ecf20Sopenharmony_ci if (nested_vmx_eptp_switching(vcpu, vmcs12)) 56288c2ecf20Sopenharmony_ci goto fail; 56298c2ecf20Sopenharmony_ci break; 56308c2ecf20Sopenharmony_ci default: 56318c2ecf20Sopenharmony_ci goto fail; 56328c2ecf20Sopenharmony_ci } 56338c2ecf20Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 56348c2ecf20Sopenharmony_ci 56358c2ecf20Sopenharmony_cifail: 56368c2ecf20Sopenharmony_ci /* 56378c2ecf20Sopenharmony_ci * This is effectively a reflected VM-Exit, as opposed to a synthesized 56388c2ecf20Sopenharmony_ci * nested VM-Exit. Pass the original exit reason, i.e. don't hardcode 56398c2ecf20Sopenharmony_ci * EXIT_REASON_VMFUNC as the exit reason. 56408c2ecf20Sopenharmony_ci */ 56418c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, vmx->exit_reason.full, 56428c2ecf20Sopenharmony_ci vmx_get_intr_info(vcpu), 56438c2ecf20Sopenharmony_ci vmx_get_exit_qual(vcpu)); 56448c2ecf20Sopenharmony_ci return 1; 56458c2ecf20Sopenharmony_ci} 56468c2ecf20Sopenharmony_ci 56478c2ecf20Sopenharmony_ci/* 56488c2ecf20Sopenharmony_ci * Return true if an IO instruction with the specified port and size should cause 56498c2ecf20Sopenharmony_ci * a VM-exit into L1. 56508c2ecf20Sopenharmony_ci */ 56518c2ecf20Sopenharmony_cibool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, 56528c2ecf20Sopenharmony_ci int size) 56538c2ecf20Sopenharmony_ci{ 56548c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 56558c2ecf20Sopenharmony_ci gpa_t bitmap, last_bitmap; 56568c2ecf20Sopenharmony_ci u8 b; 56578c2ecf20Sopenharmony_ci 56588c2ecf20Sopenharmony_ci last_bitmap = (gpa_t)-1; 56598c2ecf20Sopenharmony_ci b = -1; 56608c2ecf20Sopenharmony_ci 56618c2ecf20Sopenharmony_ci while (size > 0) { 56628c2ecf20Sopenharmony_ci if (port < 0x8000) 56638c2ecf20Sopenharmony_ci bitmap = vmcs12->io_bitmap_a; 56648c2ecf20Sopenharmony_ci else if (port < 0x10000) 56658c2ecf20Sopenharmony_ci bitmap = vmcs12->io_bitmap_b; 56668c2ecf20Sopenharmony_ci else 56678c2ecf20Sopenharmony_ci return true; 56688c2ecf20Sopenharmony_ci bitmap += (port & 0x7fff) / 8; 56698c2ecf20Sopenharmony_ci 56708c2ecf20Sopenharmony_ci if (last_bitmap != bitmap) 56718c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1)) 56728c2ecf20Sopenharmony_ci return true; 56738c2ecf20Sopenharmony_ci if (b & (1 << (port & 7))) 56748c2ecf20Sopenharmony_ci return true; 56758c2ecf20Sopenharmony_ci 56768c2ecf20Sopenharmony_ci port++; 56778c2ecf20Sopenharmony_ci size--; 56788c2ecf20Sopenharmony_ci last_bitmap = bitmap; 56798c2ecf20Sopenharmony_ci } 56808c2ecf20Sopenharmony_ci 56818c2ecf20Sopenharmony_ci return false; 56828c2ecf20Sopenharmony_ci} 56838c2ecf20Sopenharmony_ci 56848c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, 56858c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 56868c2ecf20Sopenharmony_ci{ 56878c2ecf20Sopenharmony_ci unsigned long exit_qualification; 56888c2ecf20Sopenharmony_ci unsigned short port; 56898c2ecf20Sopenharmony_ci int size; 56908c2ecf20Sopenharmony_ci 56918c2ecf20Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 56928c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); 56938c2ecf20Sopenharmony_ci 56948c2ecf20Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 56958c2ecf20Sopenharmony_ci 56968c2ecf20Sopenharmony_ci port = exit_qualification >> 16; 56978c2ecf20Sopenharmony_ci size = (exit_qualification & 7) + 1; 56988c2ecf20Sopenharmony_ci 56998c2ecf20Sopenharmony_ci return nested_vmx_check_io_bitmaps(vcpu, port, size); 57008c2ecf20Sopenharmony_ci} 57018c2ecf20Sopenharmony_ci 57028c2ecf20Sopenharmony_ci/* 57038c2ecf20Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle an MSR access, 57048c2ecf20Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check whether L1 expressed 57058c2ecf20Sopenharmony_ci * disinterest in the current event (read or write a specific MSR) by using an 57068c2ecf20Sopenharmony_ci * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. 57078c2ecf20Sopenharmony_ci */ 57088c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, 57098c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12, 57108c2ecf20Sopenharmony_ci union vmx_exit_reason exit_reason) 57118c2ecf20Sopenharmony_ci{ 57128c2ecf20Sopenharmony_ci u32 msr_index = kvm_rcx_read(vcpu); 57138c2ecf20Sopenharmony_ci gpa_t bitmap; 57148c2ecf20Sopenharmony_ci 57158c2ecf20Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 57168c2ecf20Sopenharmony_ci return true; 57178c2ecf20Sopenharmony_ci 57188c2ecf20Sopenharmony_ci /* 57198c2ecf20Sopenharmony_ci * The MSR_BITMAP page is divided into four 1024-byte bitmaps, 57208c2ecf20Sopenharmony_ci * for the four combinations of read/write and low/high MSR numbers. 57218c2ecf20Sopenharmony_ci * First we need to figure out which of the four to use: 57228c2ecf20Sopenharmony_ci */ 57238c2ecf20Sopenharmony_ci bitmap = vmcs12->msr_bitmap; 57248c2ecf20Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_MSR_WRITE) 57258c2ecf20Sopenharmony_ci bitmap += 2048; 57268c2ecf20Sopenharmony_ci if (msr_index >= 0xc0000000) { 57278c2ecf20Sopenharmony_ci msr_index -= 0xc0000000; 57288c2ecf20Sopenharmony_ci bitmap += 1024; 57298c2ecf20Sopenharmony_ci } 57308c2ecf20Sopenharmony_ci 57318c2ecf20Sopenharmony_ci /* Then read the msr_index'th bit from this bitmap: */ 57328c2ecf20Sopenharmony_ci if (msr_index < 1024*8) { 57338c2ecf20Sopenharmony_ci unsigned char b; 57348c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1)) 57358c2ecf20Sopenharmony_ci return true; 57368c2ecf20Sopenharmony_ci return 1 & (b >> (msr_index & 7)); 57378c2ecf20Sopenharmony_ci } else 57388c2ecf20Sopenharmony_ci return true; /* let L1 handle the wrong parameter */ 57398c2ecf20Sopenharmony_ci} 57408c2ecf20Sopenharmony_ci 57418c2ecf20Sopenharmony_ci/* 57428c2ecf20Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle a CR access exit, 57438c2ecf20Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check if L1 wanted to 57448c2ecf20Sopenharmony_ci * intercept (via guest_host_mask etc.) the current event. 57458c2ecf20Sopenharmony_ci */ 57468c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, 57478c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12) 57488c2ecf20Sopenharmony_ci{ 57498c2ecf20Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 57508c2ecf20Sopenharmony_ci int cr = exit_qualification & 15; 57518c2ecf20Sopenharmony_ci int reg; 57528c2ecf20Sopenharmony_ci unsigned long val; 57538c2ecf20Sopenharmony_ci 57548c2ecf20Sopenharmony_ci switch ((exit_qualification >> 4) & 3) { 57558c2ecf20Sopenharmony_ci case 0: /* mov to cr */ 57568c2ecf20Sopenharmony_ci reg = (exit_qualification >> 8) & 15; 57578c2ecf20Sopenharmony_ci val = kvm_register_readl(vcpu, reg); 57588c2ecf20Sopenharmony_ci switch (cr) { 57598c2ecf20Sopenharmony_ci case 0: 57608c2ecf20Sopenharmony_ci if (vmcs12->cr0_guest_host_mask & 57618c2ecf20Sopenharmony_ci (val ^ vmcs12->cr0_read_shadow)) 57628c2ecf20Sopenharmony_ci return true; 57638c2ecf20Sopenharmony_ci break; 57648c2ecf20Sopenharmony_ci case 3: 57658c2ecf20Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) 57668c2ecf20Sopenharmony_ci return true; 57678c2ecf20Sopenharmony_ci break; 57688c2ecf20Sopenharmony_ci case 4: 57698c2ecf20Sopenharmony_ci if (vmcs12->cr4_guest_host_mask & 57708c2ecf20Sopenharmony_ci (vmcs12->cr4_read_shadow ^ val)) 57718c2ecf20Sopenharmony_ci return true; 57728c2ecf20Sopenharmony_ci break; 57738c2ecf20Sopenharmony_ci case 8: 57748c2ecf20Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) 57758c2ecf20Sopenharmony_ci return true; 57768c2ecf20Sopenharmony_ci break; 57778c2ecf20Sopenharmony_ci } 57788c2ecf20Sopenharmony_ci break; 57798c2ecf20Sopenharmony_ci case 2: /* clts */ 57808c2ecf20Sopenharmony_ci if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && 57818c2ecf20Sopenharmony_ci (vmcs12->cr0_read_shadow & X86_CR0_TS)) 57828c2ecf20Sopenharmony_ci return true; 57838c2ecf20Sopenharmony_ci break; 57848c2ecf20Sopenharmony_ci case 1: /* mov from cr */ 57858c2ecf20Sopenharmony_ci switch (cr) { 57868c2ecf20Sopenharmony_ci case 3: 57878c2ecf20Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & 57888c2ecf20Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING) 57898c2ecf20Sopenharmony_ci return true; 57908c2ecf20Sopenharmony_ci break; 57918c2ecf20Sopenharmony_ci case 8: 57928c2ecf20Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & 57938c2ecf20Sopenharmony_ci CPU_BASED_CR8_STORE_EXITING) 57948c2ecf20Sopenharmony_ci return true; 57958c2ecf20Sopenharmony_ci break; 57968c2ecf20Sopenharmony_ci } 57978c2ecf20Sopenharmony_ci break; 57988c2ecf20Sopenharmony_ci case 3: /* lmsw */ 57998c2ecf20Sopenharmony_ci /* 58008c2ecf20Sopenharmony_ci * lmsw can change bits 1..3 of cr0, and only set bit 0 of 58018c2ecf20Sopenharmony_ci * cr0. Other attempted changes are ignored, with no exit. 58028c2ecf20Sopenharmony_ci */ 58038c2ecf20Sopenharmony_ci val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; 58048c2ecf20Sopenharmony_ci if (vmcs12->cr0_guest_host_mask & 0xe & 58058c2ecf20Sopenharmony_ci (val ^ vmcs12->cr0_read_shadow)) 58068c2ecf20Sopenharmony_ci return true; 58078c2ecf20Sopenharmony_ci if ((vmcs12->cr0_guest_host_mask & 0x1) && 58088c2ecf20Sopenharmony_ci !(vmcs12->cr0_read_shadow & 0x1) && 58098c2ecf20Sopenharmony_ci (val & 0x1)) 58108c2ecf20Sopenharmony_ci return true; 58118c2ecf20Sopenharmony_ci break; 58128c2ecf20Sopenharmony_ci } 58138c2ecf20Sopenharmony_ci return false; 58148c2ecf20Sopenharmony_ci} 58158c2ecf20Sopenharmony_ci 58168c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, 58178c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12, gpa_t bitmap) 58188c2ecf20Sopenharmony_ci{ 58198c2ecf20Sopenharmony_ci u32 vmx_instruction_info; 58208c2ecf20Sopenharmony_ci unsigned long field; 58218c2ecf20Sopenharmony_ci u8 b; 58228c2ecf20Sopenharmony_ci 58238c2ecf20Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12)) 58248c2ecf20Sopenharmony_ci return true; 58258c2ecf20Sopenharmony_ci 58268c2ecf20Sopenharmony_ci /* Decode instruction info and find the field to access */ 58278c2ecf20Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 58288c2ecf20Sopenharmony_ci field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 58298c2ecf20Sopenharmony_ci 58308c2ecf20Sopenharmony_ci /* Out-of-range fields always cause a VM exit from L2 to L1 */ 58318c2ecf20Sopenharmony_ci if (field >> 15) 58328c2ecf20Sopenharmony_ci return true; 58338c2ecf20Sopenharmony_ci 58348c2ecf20Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1)) 58358c2ecf20Sopenharmony_ci return true; 58368c2ecf20Sopenharmony_ci 58378c2ecf20Sopenharmony_ci return 1 & (b >> (field & 7)); 58388c2ecf20Sopenharmony_ci} 58398c2ecf20Sopenharmony_ci 58408c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) 58418c2ecf20Sopenharmony_ci{ 58428c2ecf20Sopenharmony_ci u32 entry_intr_info = vmcs12->vm_entry_intr_info_field; 58438c2ecf20Sopenharmony_ci 58448c2ecf20Sopenharmony_ci if (nested_cpu_has_mtf(vmcs12)) 58458c2ecf20Sopenharmony_ci return true; 58468c2ecf20Sopenharmony_ci 58478c2ecf20Sopenharmony_ci /* 58488c2ecf20Sopenharmony_ci * An MTF VM-exit may be injected into the guest by setting the 58498c2ecf20Sopenharmony_ci * interruption-type to 7 (other event) and the vector field to 0. Such 58508c2ecf20Sopenharmony_ci * is the case regardless of the 'monitor trap flag' VM-execution 58518c2ecf20Sopenharmony_ci * control. 58528c2ecf20Sopenharmony_ci */ 58538c2ecf20Sopenharmony_ci return entry_intr_info == (INTR_INFO_VALID_MASK 58548c2ecf20Sopenharmony_ci | INTR_TYPE_OTHER_EVENT); 58558c2ecf20Sopenharmony_ci} 58568c2ecf20Sopenharmony_ci 58578c2ecf20Sopenharmony_ci/* 58588c2ecf20Sopenharmony_ci * Return true if L0 wants to handle an exit from L2 regardless of whether or not 58598c2ecf20Sopenharmony_ci * L1 wants the exit. Only call this when in is_guest_mode (L2). 58608c2ecf20Sopenharmony_ci */ 58618c2ecf20Sopenharmony_cistatic bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, 58628c2ecf20Sopenharmony_ci union vmx_exit_reason exit_reason) 58638c2ecf20Sopenharmony_ci{ 58648c2ecf20Sopenharmony_ci u32 intr_info; 58658c2ecf20Sopenharmony_ci 58668c2ecf20Sopenharmony_ci switch ((u16)exit_reason.basic) { 58678c2ecf20Sopenharmony_ci case EXIT_REASON_EXCEPTION_NMI: 58688c2ecf20Sopenharmony_ci intr_info = vmx_get_intr_info(vcpu); 58698c2ecf20Sopenharmony_ci if (is_nmi(intr_info)) 58708c2ecf20Sopenharmony_ci return true; 58718c2ecf20Sopenharmony_ci else if (is_page_fault(intr_info)) 58728c2ecf20Sopenharmony_ci return vcpu->arch.apf.host_apf_flags || 58738c2ecf20Sopenharmony_ci vmx_need_pf_intercept(vcpu); 58748c2ecf20Sopenharmony_ci else if (is_debug(intr_info) && 58758c2ecf20Sopenharmony_ci vcpu->guest_debug & 58768c2ecf20Sopenharmony_ci (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) 58778c2ecf20Sopenharmony_ci return true; 58788c2ecf20Sopenharmony_ci else if (is_breakpoint(intr_info) && 58798c2ecf20Sopenharmony_ci vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 58808c2ecf20Sopenharmony_ci return true; 58818c2ecf20Sopenharmony_ci else if (is_alignment_check(intr_info) && 58828c2ecf20Sopenharmony_ci !vmx_guest_inject_ac(vcpu)) 58838c2ecf20Sopenharmony_ci return true; 58848c2ecf20Sopenharmony_ci return false; 58858c2ecf20Sopenharmony_ci case EXIT_REASON_EXTERNAL_INTERRUPT: 58868c2ecf20Sopenharmony_ci return true; 58878c2ecf20Sopenharmony_ci case EXIT_REASON_MCE_DURING_VMENTRY: 58888c2ecf20Sopenharmony_ci return true; 58898c2ecf20Sopenharmony_ci case EXIT_REASON_EPT_VIOLATION: 58908c2ecf20Sopenharmony_ci /* 58918c2ecf20Sopenharmony_ci * L0 always deals with the EPT violation. If nested EPT is 58928c2ecf20Sopenharmony_ci * used, and the nested mmu code discovers that the address is 58938c2ecf20Sopenharmony_ci * missing in the guest EPT table (EPT12), the EPT violation 58948c2ecf20Sopenharmony_ci * will be injected with nested_ept_inject_page_fault() 58958c2ecf20Sopenharmony_ci */ 58968c2ecf20Sopenharmony_ci return true; 58978c2ecf20Sopenharmony_ci case EXIT_REASON_EPT_MISCONFIG: 58988c2ecf20Sopenharmony_ci /* 58998c2ecf20Sopenharmony_ci * L2 never uses directly L1's EPT, but rather L0's own EPT 59008c2ecf20Sopenharmony_ci * table (shadow on EPT) or a merged EPT table that L0 built 59018c2ecf20Sopenharmony_ci * (EPT on EPT). So any problems with the structure of the 59028c2ecf20Sopenharmony_ci * table is L0's fault. 59038c2ecf20Sopenharmony_ci */ 59048c2ecf20Sopenharmony_ci return true; 59058c2ecf20Sopenharmony_ci case EXIT_REASON_PREEMPTION_TIMER: 59068c2ecf20Sopenharmony_ci return true; 59078c2ecf20Sopenharmony_ci case EXIT_REASON_PML_FULL: 59088c2ecf20Sopenharmony_ci /* We emulate PML support to L1. */ 59098c2ecf20Sopenharmony_ci return true; 59108c2ecf20Sopenharmony_ci case EXIT_REASON_VMFUNC: 59118c2ecf20Sopenharmony_ci /* VM functions are emulated through L2->L0 vmexits. */ 59128c2ecf20Sopenharmony_ci return true; 59138c2ecf20Sopenharmony_ci case EXIT_REASON_ENCLS: 59148c2ecf20Sopenharmony_ci /* SGX is never exposed to L1 */ 59158c2ecf20Sopenharmony_ci return true; 59168c2ecf20Sopenharmony_ci default: 59178c2ecf20Sopenharmony_ci break; 59188c2ecf20Sopenharmony_ci } 59198c2ecf20Sopenharmony_ci return false; 59208c2ecf20Sopenharmony_ci} 59218c2ecf20Sopenharmony_ci 59228c2ecf20Sopenharmony_ci/* 59238c2ecf20Sopenharmony_ci * Return 1 if L1 wants to intercept an exit from L2. Only call this when in 59248c2ecf20Sopenharmony_ci * is_guest_mode (L2). 59258c2ecf20Sopenharmony_ci */ 59268c2ecf20Sopenharmony_cistatic bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, 59278c2ecf20Sopenharmony_ci union vmx_exit_reason exit_reason) 59288c2ecf20Sopenharmony_ci{ 59298c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 59308c2ecf20Sopenharmony_ci u32 intr_info; 59318c2ecf20Sopenharmony_ci 59328c2ecf20Sopenharmony_ci switch ((u16)exit_reason.basic) { 59338c2ecf20Sopenharmony_ci case EXIT_REASON_EXCEPTION_NMI: 59348c2ecf20Sopenharmony_ci intr_info = vmx_get_intr_info(vcpu); 59358c2ecf20Sopenharmony_ci if (is_nmi(intr_info)) 59368c2ecf20Sopenharmony_ci return true; 59378c2ecf20Sopenharmony_ci else if (is_page_fault(intr_info)) 59388c2ecf20Sopenharmony_ci return true; 59398c2ecf20Sopenharmony_ci return vmcs12->exception_bitmap & 59408c2ecf20Sopenharmony_ci (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 59418c2ecf20Sopenharmony_ci case EXIT_REASON_EXTERNAL_INTERRUPT: 59428c2ecf20Sopenharmony_ci return nested_exit_on_intr(vcpu); 59438c2ecf20Sopenharmony_ci case EXIT_REASON_TRIPLE_FAULT: 59448c2ecf20Sopenharmony_ci return true; 59458c2ecf20Sopenharmony_ci case EXIT_REASON_INTERRUPT_WINDOW: 59468c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING); 59478c2ecf20Sopenharmony_ci case EXIT_REASON_NMI_WINDOW: 59488c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING); 59498c2ecf20Sopenharmony_ci case EXIT_REASON_TASK_SWITCH: 59508c2ecf20Sopenharmony_ci return true; 59518c2ecf20Sopenharmony_ci case EXIT_REASON_CPUID: 59528c2ecf20Sopenharmony_ci return true; 59538c2ecf20Sopenharmony_ci case EXIT_REASON_HLT: 59548c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); 59558c2ecf20Sopenharmony_ci case EXIT_REASON_INVD: 59568c2ecf20Sopenharmony_ci return true; 59578c2ecf20Sopenharmony_ci case EXIT_REASON_INVLPG: 59588c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); 59598c2ecf20Sopenharmony_ci case EXIT_REASON_RDPMC: 59608c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); 59618c2ecf20Sopenharmony_ci case EXIT_REASON_RDRAND: 59628c2ecf20Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING); 59638c2ecf20Sopenharmony_ci case EXIT_REASON_RDSEED: 59648c2ecf20Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING); 59658c2ecf20Sopenharmony_ci case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: 59668c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); 59678c2ecf20Sopenharmony_ci case EXIT_REASON_VMREAD: 59688c2ecf20Sopenharmony_ci return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, 59698c2ecf20Sopenharmony_ci vmcs12->vmread_bitmap); 59708c2ecf20Sopenharmony_ci case EXIT_REASON_VMWRITE: 59718c2ecf20Sopenharmony_ci return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, 59728c2ecf20Sopenharmony_ci vmcs12->vmwrite_bitmap); 59738c2ecf20Sopenharmony_ci case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: 59748c2ecf20Sopenharmony_ci case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: 59758c2ecf20Sopenharmony_ci case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME: 59768c2ecf20Sopenharmony_ci case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 59778c2ecf20Sopenharmony_ci case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: 59788c2ecf20Sopenharmony_ci /* 59798c2ecf20Sopenharmony_ci * VMX instructions trap unconditionally. This allows L1 to 59808c2ecf20Sopenharmony_ci * emulate them for its L2 guest, i.e., allows 3-level nesting! 59818c2ecf20Sopenharmony_ci */ 59828c2ecf20Sopenharmony_ci return true; 59838c2ecf20Sopenharmony_ci case EXIT_REASON_CR_ACCESS: 59848c2ecf20Sopenharmony_ci return nested_vmx_exit_handled_cr(vcpu, vmcs12); 59858c2ecf20Sopenharmony_ci case EXIT_REASON_DR_ACCESS: 59868c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); 59878c2ecf20Sopenharmony_ci case EXIT_REASON_IO_INSTRUCTION: 59888c2ecf20Sopenharmony_ci return nested_vmx_exit_handled_io(vcpu, vmcs12); 59898c2ecf20Sopenharmony_ci case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR: 59908c2ecf20Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC); 59918c2ecf20Sopenharmony_ci case EXIT_REASON_MSR_READ: 59928c2ecf20Sopenharmony_ci case EXIT_REASON_MSR_WRITE: 59938c2ecf20Sopenharmony_ci return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); 59948c2ecf20Sopenharmony_ci case EXIT_REASON_INVALID_STATE: 59958c2ecf20Sopenharmony_ci return true; 59968c2ecf20Sopenharmony_ci case EXIT_REASON_MWAIT_INSTRUCTION: 59978c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); 59988c2ecf20Sopenharmony_ci case EXIT_REASON_MONITOR_TRAP_FLAG: 59998c2ecf20Sopenharmony_ci return nested_vmx_exit_handled_mtf(vmcs12); 60008c2ecf20Sopenharmony_ci case EXIT_REASON_MONITOR_INSTRUCTION: 60018c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); 60028c2ecf20Sopenharmony_ci case EXIT_REASON_PAUSE_INSTRUCTION: 60038c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) || 60048c2ecf20Sopenharmony_ci nested_cpu_has2(vmcs12, 60058c2ecf20Sopenharmony_ci SECONDARY_EXEC_PAUSE_LOOP_EXITING); 60068c2ecf20Sopenharmony_ci case EXIT_REASON_MCE_DURING_VMENTRY: 60078c2ecf20Sopenharmony_ci return true; 60088c2ecf20Sopenharmony_ci case EXIT_REASON_TPR_BELOW_THRESHOLD: 60098c2ecf20Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); 60108c2ecf20Sopenharmony_ci case EXIT_REASON_APIC_ACCESS: 60118c2ecf20Sopenharmony_ci case EXIT_REASON_APIC_WRITE: 60128c2ecf20Sopenharmony_ci case EXIT_REASON_EOI_INDUCED: 60138c2ecf20Sopenharmony_ci /* 60148c2ecf20Sopenharmony_ci * The controls for "virtualize APIC accesses," "APIC- 60158c2ecf20Sopenharmony_ci * register virtualization," and "virtual-interrupt 60168c2ecf20Sopenharmony_ci * delivery" only come from vmcs12. 60178c2ecf20Sopenharmony_ci */ 60188c2ecf20Sopenharmony_ci return true; 60198c2ecf20Sopenharmony_ci case EXIT_REASON_INVPCID: 60208c2ecf20Sopenharmony_ci return 60218c2ecf20Sopenharmony_ci nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && 60228c2ecf20Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); 60238c2ecf20Sopenharmony_ci case EXIT_REASON_WBINVD: 60248c2ecf20Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); 60258c2ecf20Sopenharmony_ci case EXIT_REASON_XSETBV: 60268c2ecf20Sopenharmony_ci return true; 60278c2ecf20Sopenharmony_ci case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: 60288c2ecf20Sopenharmony_ci /* 60298c2ecf20Sopenharmony_ci * This should never happen, since it is not possible to 60308c2ecf20Sopenharmony_ci * set XSS to a non-zero value---neither in L1 nor in L2. 60318c2ecf20Sopenharmony_ci * If if it were, XSS would have to be checked against 60328c2ecf20Sopenharmony_ci * the XSS exit bitmap in vmcs12. 60338c2ecf20Sopenharmony_ci */ 60348c2ecf20Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); 60358c2ecf20Sopenharmony_ci case EXIT_REASON_UMWAIT: 60368c2ecf20Sopenharmony_ci case EXIT_REASON_TPAUSE: 60378c2ecf20Sopenharmony_ci return nested_cpu_has2(vmcs12, 60388c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); 60398c2ecf20Sopenharmony_ci default: 60408c2ecf20Sopenharmony_ci return true; 60418c2ecf20Sopenharmony_ci } 60428c2ecf20Sopenharmony_ci} 60438c2ecf20Sopenharmony_ci 60448c2ecf20Sopenharmony_ci/* 60458c2ecf20Sopenharmony_ci * Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was 60468c2ecf20Sopenharmony_ci * reflected into L1. 60478c2ecf20Sopenharmony_ci */ 60488c2ecf20Sopenharmony_cibool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) 60498c2ecf20Sopenharmony_ci{ 60508c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 60518c2ecf20Sopenharmony_ci union vmx_exit_reason exit_reason = vmx->exit_reason; 60528c2ecf20Sopenharmony_ci unsigned long exit_qual; 60538c2ecf20Sopenharmony_ci u32 exit_intr_info; 60548c2ecf20Sopenharmony_ci 60558c2ecf20Sopenharmony_ci WARN_ON_ONCE(vmx->nested.nested_run_pending); 60568c2ecf20Sopenharmony_ci 60578c2ecf20Sopenharmony_ci /* 60588c2ecf20Sopenharmony_ci * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM 60598c2ecf20Sopenharmony_ci * has already loaded L2's state. 60608c2ecf20Sopenharmony_ci */ 60618c2ecf20Sopenharmony_ci if (unlikely(vmx->fail)) { 60628c2ecf20Sopenharmony_ci trace_kvm_nested_vmenter_failed( 60638c2ecf20Sopenharmony_ci "hardware VM-instruction error: ", 60648c2ecf20Sopenharmony_ci vmcs_read32(VM_INSTRUCTION_ERROR)); 60658c2ecf20Sopenharmony_ci exit_intr_info = 0; 60668c2ecf20Sopenharmony_ci exit_qual = 0; 60678c2ecf20Sopenharmony_ci goto reflect_vmexit; 60688c2ecf20Sopenharmony_ci } 60698c2ecf20Sopenharmony_ci 60708c2ecf20Sopenharmony_ci trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX); 60718c2ecf20Sopenharmony_ci 60728c2ecf20Sopenharmony_ci /* If L0 (KVM) wants the exit, it trumps L1's desires. */ 60738c2ecf20Sopenharmony_ci if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) 60748c2ecf20Sopenharmony_ci return false; 60758c2ecf20Sopenharmony_ci 60768c2ecf20Sopenharmony_ci /* If L1 doesn't want the exit, handle it in L0. */ 60778c2ecf20Sopenharmony_ci if (!nested_vmx_l1_wants_exit(vcpu, exit_reason)) 60788c2ecf20Sopenharmony_ci return false; 60798c2ecf20Sopenharmony_ci 60808c2ecf20Sopenharmony_ci /* 60818c2ecf20Sopenharmony_ci * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For 60828c2ecf20Sopenharmony_ci * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would 60838c2ecf20Sopenharmony_ci * need to be synthesized by querying the in-kernel LAPIC, but external 60848c2ecf20Sopenharmony_ci * interrupts are never reflected to L1 so it's a non-issue. 60858c2ecf20Sopenharmony_ci */ 60868c2ecf20Sopenharmony_ci exit_intr_info = vmx_get_intr_info(vcpu); 60878c2ecf20Sopenharmony_ci if (is_exception_with_error_code(exit_intr_info)) { 60888c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 60898c2ecf20Sopenharmony_ci 60908c2ecf20Sopenharmony_ci vmcs12->vm_exit_intr_error_code = 60918c2ecf20Sopenharmony_ci vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 60928c2ecf20Sopenharmony_ci } 60938c2ecf20Sopenharmony_ci exit_qual = vmx_get_exit_qual(vcpu); 60948c2ecf20Sopenharmony_ci 60958c2ecf20Sopenharmony_cireflect_vmexit: 60968c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual); 60978c2ecf20Sopenharmony_ci return true; 60988c2ecf20Sopenharmony_ci} 60998c2ecf20Sopenharmony_ci 61008c2ecf20Sopenharmony_cistatic int vmx_get_nested_state(struct kvm_vcpu *vcpu, 61018c2ecf20Sopenharmony_ci struct kvm_nested_state __user *user_kvm_nested_state, 61028c2ecf20Sopenharmony_ci u32 user_data_size) 61038c2ecf20Sopenharmony_ci{ 61048c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx; 61058c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12; 61068c2ecf20Sopenharmony_ci struct kvm_nested_state kvm_state = { 61078c2ecf20Sopenharmony_ci .flags = 0, 61088c2ecf20Sopenharmony_ci .format = KVM_STATE_NESTED_FORMAT_VMX, 61098c2ecf20Sopenharmony_ci .size = sizeof(kvm_state), 61108c2ecf20Sopenharmony_ci .hdr.vmx.flags = 0, 61118c2ecf20Sopenharmony_ci .hdr.vmx.vmxon_pa = -1ull, 61128c2ecf20Sopenharmony_ci .hdr.vmx.vmcs12_pa = -1ull, 61138c2ecf20Sopenharmony_ci .hdr.vmx.preemption_timer_deadline = 0, 61148c2ecf20Sopenharmony_ci }; 61158c2ecf20Sopenharmony_ci struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = 61168c2ecf20Sopenharmony_ci &user_kvm_nested_state->data.vmx[0]; 61178c2ecf20Sopenharmony_ci 61188c2ecf20Sopenharmony_ci if (!vcpu) 61198c2ecf20Sopenharmony_ci return kvm_state.size + sizeof(*user_vmx_nested_state); 61208c2ecf20Sopenharmony_ci 61218c2ecf20Sopenharmony_ci vmx = to_vmx(vcpu); 61228c2ecf20Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 61238c2ecf20Sopenharmony_ci 61248c2ecf20Sopenharmony_ci if (nested_vmx_allowed(vcpu) && 61258c2ecf20Sopenharmony_ci (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { 61268c2ecf20Sopenharmony_ci kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; 61278c2ecf20Sopenharmony_ci kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr; 61288c2ecf20Sopenharmony_ci 61298c2ecf20Sopenharmony_ci if (vmx_has_valid_vmcs12(vcpu)) { 61308c2ecf20Sopenharmony_ci kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); 61318c2ecf20Sopenharmony_ci 61328c2ecf20Sopenharmony_ci if (vmx->nested.hv_evmcs) 61338c2ecf20Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_EVMCS; 61348c2ecf20Sopenharmony_ci 61358c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu) && 61368c2ecf20Sopenharmony_ci nested_cpu_has_shadow_vmcs(vmcs12) && 61378c2ecf20Sopenharmony_ci vmcs12->vmcs_link_pointer != -1ull) 61388c2ecf20Sopenharmony_ci kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12); 61398c2ecf20Sopenharmony_ci } 61408c2ecf20Sopenharmony_ci 61418c2ecf20Sopenharmony_ci if (vmx->nested.smm.vmxon) 61428c2ecf20Sopenharmony_ci kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; 61438c2ecf20Sopenharmony_ci 61448c2ecf20Sopenharmony_ci if (vmx->nested.smm.guest_mode) 61458c2ecf20Sopenharmony_ci kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; 61468c2ecf20Sopenharmony_ci 61478c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 61488c2ecf20Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; 61498c2ecf20Sopenharmony_ci 61508c2ecf20Sopenharmony_ci if (vmx->nested.nested_run_pending) 61518c2ecf20Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; 61528c2ecf20Sopenharmony_ci 61538c2ecf20Sopenharmony_ci if (vmx->nested.mtf_pending) 61548c2ecf20Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING; 61558c2ecf20Sopenharmony_ci 61568c2ecf20Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12) && 61578c2ecf20Sopenharmony_ci vmx->nested.has_preemption_timer_deadline) { 61588c2ecf20Sopenharmony_ci kvm_state.hdr.vmx.flags |= 61598c2ecf20Sopenharmony_ci KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE; 61608c2ecf20Sopenharmony_ci kvm_state.hdr.vmx.preemption_timer_deadline = 61618c2ecf20Sopenharmony_ci vmx->nested.preemption_timer_deadline; 61628c2ecf20Sopenharmony_ci } 61638c2ecf20Sopenharmony_ci } 61648c2ecf20Sopenharmony_ci } 61658c2ecf20Sopenharmony_ci 61668c2ecf20Sopenharmony_ci if (user_data_size < kvm_state.size) 61678c2ecf20Sopenharmony_ci goto out; 61688c2ecf20Sopenharmony_ci 61698c2ecf20Sopenharmony_ci if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) 61708c2ecf20Sopenharmony_ci return -EFAULT; 61718c2ecf20Sopenharmony_ci 61728c2ecf20Sopenharmony_ci if (!vmx_has_valid_vmcs12(vcpu)) 61738c2ecf20Sopenharmony_ci goto out; 61748c2ecf20Sopenharmony_ci 61758c2ecf20Sopenharmony_ci /* 61768c2ecf20Sopenharmony_ci * When running L2, the authoritative vmcs12 state is in the 61778c2ecf20Sopenharmony_ci * vmcs02. When running L1, the authoritative vmcs12 state is 61788c2ecf20Sopenharmony_ci * in the shadow or enlightened vmcs linked to vmcs01, unless 61798c2ecf20Sopenharmony_ci * need_vmcs12_to_shadow_sync is set, in which case, the authoritative 61808c2ecf20Sopenharmony_ci * vmcs12 state is in the vmcs12 already. 61818c2ecf20Sopenharmony_ci */ 61828c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 61838c2ecf20Sopenharmony_ci sync_vmcs02_to_vmcs12(vcpu, vmcs12); 61848c2ecf20Sopenharmony_ci sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 61858c2ecf20Sopenharmony_ci } else { 61868c2ecf20Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); 61878c2ecf20Sopenharmony_ci if (!vmx->nested.need_vmcs12_to_shadow_sync) { 61888c2ecf20Sopenharmony_ci if (vmx->nested.hv_evmcs) 61898c2ecf20Sopenharmony_ci copy_enlightened_to_vmcs12(vmx); 61908c2ecf20Sopenharmony_ci else if (enable_shadow_vmcs) 61918c2ecf20Sopenharmony_ci copy_shadow_to_vmcs12(vmx); 61928c2ecf20Sopenharmony_ci } 61938c2ecf20Sopenharmony_ci } 61948c2ecf20Sopenharmony_ci 61958c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); 61968c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE); 61978c2ecf20Sopenharmony_ci 61988c2ecf20Sopenharmony_ci /* 61998c2ecf20Sopenharmony_ci * Copy over the full allocated size of vmcs12 rather than just the size 62008c2ecf20Sopenharmony_ci * of the struct. 62018c2ecf20Sopenharmony_ci */ 62028c2ecf20Sopenharmony_ci if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE)) 62038c2ecf20Sopenharmony_ci return -EFAULT; 62048c2ecf20Sopenharmony_ci 62058c2ecf20Sopenharmony_ci if (nested_cpu_has_shadow_vmcs(vmcs12) && 62068c2ecf20Sopenharmony_ci vmcs12->vmcs_link_pointer != -1ull) { 62078c2ecf20Sopenharmony_ci if (copy_to_user(user_vmx_nested_state->shadow_vmcs12, 62088c2ecf20Sopenharmony_ci get_shadow_vmcs12(vcpu), VMCS12_SIZE)) 62098c2ecf20Sopenharmony_ci return -EFAULT; 62108c2ecf20Sopenharmony_ci } 62118c2ecf20Sopenharmony_ciout: 62128c2ecf20Sopenharmony_ci return kvm_state.size; 62138c2ecf20Sopenharmony_ci} 62148c2ecf20Sopenharmony_ci 62158c2ecf20Sopenharmony_ci/* 62168c2ecf20Sopenharmony_ci * Forcibly leave nested mode in order to be able to reset the VCPU later on. 62178c2ecf20Sopenharmony_ci */ 62188c2ecf20Sopenharmony_civoid vmx_leave_nested(struct kvm_vcpu *vcpu) 62198c2ecf20Sopenharmony_ci{ 62208c2ecf20Sopenharmony_ci if (is_guest_mode(vcpu)) { 62218c2ecf20Sopenharmony_ci to_vmx(vcpu)->nested.nested_run_pending = 0; 62228c2ecf20Sopenharmony_ci nested_vmx_vmexit(vcpu, -1, 0, 0); 62238c2ecf20Sopenharmony_ci } 62248c2ecf20Sopenharmony_ci free_nested(vcpu); 62258c2ecf20Sopenharmony_ci} 62268c2ecf20Sopenharmony_ci 62278c2ecf20Sopenharmony_cistatic int vmx_set_nested_state(struct kvm_vcpu *vcpu, 62288c2ecf20Sopenharmony_ci struct kvm_nested_state __user *user_kvm_nested_state, 62298c2ecf20Sopenharmony_ci struct kvm_nested_state *kvm_state) 62308c2ecf20Sopenharmony_ci{ 62318c2ecf20Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 62328c2ecf20Sopenharmony_ci struct vmcs12 *vmcs12; 62338c2ecf20Sopenharmony_ci enum vm_entry_failure_code ignored; 62348c2ecf20Sopenharmony_ci struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = 62358c2ecf20Sopenharmony_ci &user_kvm_nested_state->data.vmx[0]; 62368c2ecf20Sopenharmony_ci int ret; 62378c2ecf20Sopenharmony_ci 62388c2ecf20Sopenharmony_ci if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX) 62398c2ecf20Sopenharmony_ci return -EINVAL; 62408c2ecf20Sopenharmony_ci 62418c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.vmxon_pa == -1ull) { 62428c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags) 62438c2ecf20Sopenharmony_ci return -EINVAL; 62448c2ecf20Sopenharmony_ci 62458c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) 62468c2ecf20Sopenharmony_ci return -EINVAL; 62478c2ecf20Sopenharmony_ci 62488c2ecf20Sopenharmony_ci /* 62498c2ecf20Sopenharmony_ci * KVM_STATE_NESTED_EVMCS used to signal that KVM should 62508c2ecf20Sopenharmony_ci * enable eVMCS capability on vCPU. However, since then 62518c2ecf20Sopenharmony_ci * code was changed such that flag signals vmcs12 should 62528c2ecf20Sopenharmony_ci * be copied into eVMCS in guest memory. 62538c2ecf20Sopenharmony_ci * 62548c2ecf20Sopenharmony_ci * To preserve backwards compatability, allow user 62558c2ecf20Sopenharmony_ci * to set this flag even when there is no VMXON region. 62568c2ecf20Sopenharmony_ci */ 62578c2ecf20Sopenharmony_ci if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) 62588c2ecf20Sopenharmony_ci return -EINVAL; 62598c2ecf20Sopenharmony_ci } else { 62608c2ecf20Sopenharmony_ci if (!nested_vmx_allowed(vcpu)) 62618c2ecf20Sopenharmony_ci return -EINVAL; 62628c2ecf20Sopenharmony_ci 62638c2ecf20Sopenharmony_ci if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) 62648c2ecf20Sopenharmony_ci return -EINVAL; 62658c2ecf20Sopenharmony_ci } 62668c2ecf20Sopenharmony_ci 62678c2ecf20Sopenharmony_ci if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && 62688c2ecf20Sopenharmony_ci (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) 62698c2ecf20Sopenharmony_ci return -EINVAL; 62708c2ecf20Sopenharmony_ci 62718c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags & 62728c2ecf20Sopenharmony_ci ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) 62738c2ecf20Sopenharmony_ci return -EINVAL; 62748c2ecf20Sopenharmony_ci 62758c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) 62768c2ecf20Sopenharmony_ci return -EINVAL; 62778c2ecf20Sopenharmony_ci 62788c2ecf20Sopenharmony_ci /* 62798c2ecf20Sopenharmony_ci * SMM temporarily disables VMX, so we cannot be in guest mode, 62808c2ecf20Sopenharmony_ci * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags 62818c2ecf20Sopenharmony_ci * must be zero. 62828c2ecf20Sopenharmony_ci */ 62838c2ecf20Sopenharmony_ci if (is_smm(vcpu) ? 62848c2ecf20Sopenharmony_ci (kvm_state->flags & 62858c2ecf20Sopenharmony_ci (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING)) 62868c2ecf20Sopenharmony_ci : kvm_state->hdr.vmx.smm.flags) 62878c2ecf20Sopenharmony_ci return -EINVAL; 62888c2ecf20Sopenharmony_ci 62898c2ecf20Sopenharmony_ci if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && 62908c2ecf20Sopenharmony_ci !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) 62918c2ecf20Sopenharmony_ci return -EINVAL; 62928c2ecf20Sopenharmony_ci 62938c2ecf20Sopenharmony_ci if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && 62948c2ecf20Sopenharmony_ci (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) 62958c2ecf20Sopenharmony_ci return -EINVAL; 62968c2ecf20Sopenharmony_ci 62978c2ecf20Sopenharmony_ci vmx_leave_nested(vcpu); 62988c2ecf20Sopenharmony_ci 62998c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.vmxon_pa == -1ull) 63008c2ecf20Sopenharmony_ci return 0; 63018c2ecf20Sopenharmony_ci 63028c2ecf20Sopenharmony_ci vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa; 63038c2ecf20Sopenharmony_ci ret = enter_vmx_operation(vcpu); 63048c2ecf20Sopenharmony_ci if (ret) 63058c2ecf20Sopenharmony_ci return ret; 63068c2ecf20Sopenharmony_ci 63078c2ecf20Sopenharmony_ci /* Empty 'VMXON' state is permitted if no VMCS loaded */ 63088c2ecf20Sopenharmony_ci if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) { 63098c2ecf20Sopenharmony_ci /* See vmx_has_valid_vmcs12. */ 63108c2ecf20Sopenharmony_ci if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) || 63118c2ecf20Sopenharmony_ci (kvm_state->flags & KVM_STATE_NESTED_EVMCS) || 63128c2ecf20Sopenharmony_ci (kvm_state->hdr.vmx.vmcs12_pa != -1ull)) 63138c2ecf20Sopenharmony_ci return -EINVAL; 63148c2ecf20Sopenharmony_ci else 63158c2ecf20Sopenharmony_ci return 0; 63168c2ecf20Sopenharmony_ci } 63178c2ecf20Sopenharmony_ci 63188c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) { 63198c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa || 63208c2ecf20Sopenharmony_ci !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa)) 63218c2ecf20Sopenharmony_ci return -EINVAL; 63228c2ecf20Sopenharmony_ci 63238c2ecf20Sopenharmony_ci set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa); 63248c2ecf20Sopenharmony_ci } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { 63258c2ecf20Sopenharmony_ci /* 63268c2ecf20Sopenharmony_ci * nested_vmx_handle_enlightened_vmptrld() cannot be called 63278c2ecf20Sopenharmony_ci * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be 63288c2ecf20Sopenharmony_ci * restored yet. EVMCS will be mapped from 63298c2ecf20Sopenharmony_ci * nested_get_vmcs12_pages(). 63308c2ecf20Sopenharmony_ci */ 63318c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 63328c2ecf20Sopenharmony_ci } else { 63338c2ecf20Sopenharmony_ci return -EINVAL; 63348c2ecf20Sopenharmony_ci } 63358c2ecf20Sopenharmony_ci 63368c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { 63378c2ecf20Sopenharmony_ci vmx->nested.smm.vmxon = true; 63388c2ecf20Sopenharmony_ci vmx->nested.vmxon = false; 63398c2ecf20Sopenharmony_ci 63408c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) 63418c2ecf20Sopenharmony_ci vmx->nested.smm.guest_mode = true; 63428c2ecf20Sopenharmony_ci } 63438c2ecf20Sopenharmony_ci 63448c2ecf20Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 63458c2ecf20Sopenharmony_ci if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12))) 63468c2ecf20Sopenharmony_ci return -EFAULT; 63478c2ecf20Sopenharmony_ci 63488c2ecf20Sopenharmony_ci if (vmcs12->hdr.revision_id != VMCS12_REVISION) 63498c2ecf20Sopenharmony_ci return -EINVAL; 63508c2ecf20Sopenharmony_ci 63518c2ecf20Sopenharmony_ci if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) 63528c2ecf20Sopenharmony_ci return 0; 63538c2ecf20Sopenharmony_ci 63548c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending = 63558c2ecf20Sopenharmony_ci !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); 63568c2ecf20Sopenharmony_ci 63578c2ecf20Sopenharmony_ci vmx->nested.mtf_pending = 63588c2ecf20Sopenharmony_ci !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING); 63598c2ecf20Sopenharmony_ci 63608c2ecf20Sopenharmony_ci ret = -EINVAL; 63618c2ecf20Sopenharmony_ci if (nested_cpu_has_shadow_vmcs(vmcs12) && 63628c2ecf20Sopenharmony_ci vmcs12->vmcs_link_pointer != -1ull) { 63638c2ecf20Sopenharmony_ci struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); 63648c2ecf20Sopenharmony_ci 63658c2ecf20Sopenharmony_ci if (kvm_state->size < 63668c2ecf20Sopenharmony_ci sizeof(*kvm_state) + 63678c2ecf20Sopenharmony_ci sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12)) 63688c2ecf20Sopenharmony_ci goto error_guest_mode; 63698c2ecf20Sopenharmony_ci 63708c2ecf20Sopenharmony_ci if (copy_from_user(shadow_vmcs12, 63718c2ecf20Sopenharmony_ci user_vmx_nested_state->shadow_vmcs12, 63728c2ecf20Sopenharmony_ci sizeof(*shadow_vmcs12))) { 63738c2ecf20Sopenharmony_ci ret = -EFAULT; 63748c2ecf20Sopenharmony_ci goto error_guest_mode; 63758c2ecf20Sopenharmony_ci } 63768c2ecf20Sopenharmony_ci 63778c2ecf20Sopenharmony_ci if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || 63788c2ecf20Sopenharmony_ci !shadow_vmcs12->hdr.shadow_vmcs) 63798c2ecf20Sopenharmony_ci goto error_guest_mode; 63808c2ecf20Sopenharmony_ci } 63818c2ecf20Sopenharmony_ci 63828c2ecf20Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = false; 63838c2ecf20Sopenharmony_ci if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) { 63848c2ecf20Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = true; 63858c2ecf20Sopenharmony_ci vmx->nested.preemption_timer_deadline = 63868c2ecf20Sopenharmony_ci kvm_state->hdr.vmx.preemption_timer_deadline; 63878c2ecf20Sopenharmony_ci } 63888c2ecf20Sopenharmony_ci 63898c2ecf20Sopenharmony_ci if (nested_vmx_check_controls(vcpu, vmcs12) || 63908c2ecf20Sopenharmony_ci nested_vmx_check_host_state(vcpu, vmcs12) || 63918c2ecf20Sopenharmony_ci nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) 63928c2ecf20Sopenharmony_ci goto error_guest_mode; 63938c2ecf20Sopenharmony_ci 63948c2ecf20Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 63958c2ecf20Sopenharmony_ci ret = nested_vmx_enter_non_root_mode(vcpu, false); 63968c2ecf20Sopenharmony_ci if (ret) 63978c2ecf20Sopenharmony_ci goto error_guest_mode; 63988c2ecf20Sopenharmony_ci 63998c2ecf20Sopenharmony_ci return 0; 64008c2ecf20Sopenharmony_ci 64018c2ecf20Sopenharmony_cierror_guest_mode: 64028c2ecf20Sopenharmony_ci vmx->nested.nested_run_pending = 0; 64038c2ecf20Sopenharmony_ci return ret; 64048c2ecf20Sopenharmony_ci} 64058c2ecf20Sopenharmony_ci 64068c2ecf20Sopenharmony_civoid nested_vmx_set_vmcs_shadowing_bitmap(void) 64078c2ecf20Sopenharmony_ci{ 64088c2ecf20Sopenharmony_ci if (enable_shadow_vmcs) { 64098c2ecf20Sopenharmony_ci vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); 64108c2ecf20Sopenharmony_ci vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); 64118c2ecf20Sopenharmony_ci } 64128c2ecf20Sopenharmony_ci} 64138c2ecf20Sopenharmony_ci 64148c2ecf20Sopenharmony_ci/* 64158c2ecf20Sopenharmony_ci * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be 64168c2ecf20Sopenharmony_ci * returned for the various VMX controls MSRs when nested VMX is enabled. 64178c2ecf20Sopenharmony_ci * The same values should also be used to verify that vmcs12 control fields are 64188c2ecf20Sopenharmony_ci * valid during nested entry from L1 to L2. 64198c2ecf20Sopenharmony_ci * Each of these control msrs has a low and high 32-bit half: A low bit is on 64208c2ecf20Sopenharmony_ci * if the corresponding bit in the (32-bit) control field *must* be on, and a 64218c2ecf20Sopenharmony_ci * bit in the high half is on if the corresponding bit in the control field 64228c2ecf20Sopenharmony_ci * may be on. See also vmx_control_verify(). 64238c2ecf20Sopenharmony_ci */ 64248c2ecf20Sopenharmony_civoid nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) 64258c2ecf20Sopenharmony_ci{ 64268c2ecf20Sopenharmony_ci /* 64278c2ecf20Sopenharmony_ci * Note that as a general rule, the high half of the MSRs (bits in 64288c2ecf20Sopenharmony_ci * the control fields which may be 1) should be initialized by the 64298c2ecf20Sopenharmony_ci * intersection of the underlying hardware's MSR (i.e., features which 64308c2ecf20Sopenharmony_ci * can be supported) and the list of features we want to expose - 64318c2ecf20Sopenharmony_ci * because they are known to be properly supported in our code. 64328c2ecf20Sopenharmony_ci * Also, usually, the low half of the MSRs (bits which must be 1) can 64338c2ecf20Sopenharmony_ci * be set to 0, meaning that L1 may turn off any of these bits. The 64348c2ecf20Sopenharmony_ci * reason is that if one of these bits is necessary, it will appear 64358c2ecf20Sopenharmony_ci * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control 64368c2ecf20Sopenharmony_ci * fields of vmcs01 and vmcs02, will turn these bits off - and 64378c2ecf20Sopenharmony_ci * nested_vmx_l1_wants_exit() will not pass related exits to L1. 64388c2ecf20Sopenharmony_ci * These rules have exceptions below. 64398c2ecf20Sopenharmony_ci */ 64408c2ecf20Sopenharmony_ci 64418c2ecf20Sopenharmony_ci /* pin-based controls */ 64428c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_PINBASED_CTLS, 64438c2ecf20Sopenharmony_ci msrs->pinbased_ctls_low, 64448c2ecf20Sopenharmony_ci msrs->pinbased_ctls_high); 64458c2ecf20Sopenharmony_ci msrs->pinbased_ctls_low |= 64468c2ecf20Sopenharmony_ci PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 64478c2ecf20Sopenharmony_ci msrs->pinbased_ctls_high &= 64488c2ecf20Sopenharmony_ci PIN_BASED_EXT_INTR_MASK | 64498c2ecf20Sopenharmony_ci PIN_BASED_NMI_EXITING | 64508c2ecf20Sopenharmony_ci PIN_BASED_VIRTUAL_NMIS | 64518c2ecf20Sopenharmony_ci (enable_apicv ? PIN_BASED_POSTED_INTR : 0); 64528c2ecf20Sopenharmony_ci msrs->pinbased_ctls_high |= 64538c2ecf20Sopenharmony_ci PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 64548c2ecf20Sopenharmony_ci PIN_BASED_VMX_PREEMPTION_TIMER; 64558c2ecf20Sopenharmony_ci 64568c2ecf20Sopenharmony_ci /* exit controls */ 64578c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_EXIT_CTLS, 64588c2ecf20Sopenharmony_ci msrs->exit_ctls_low, 64598c2ecf20Sopenharmony_ci msrs->exit_ctls_high); 64608c2ecf20Sopenharmony_ci msrs->exit_ctls_low = 64618c2ecf20Sopenharmony_ci VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 64628c2ecf20Sopenharmony_ci 64638c2ecf20Sopenharmony_ci msrs->exit_ctls_high &= 64648c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 64658c2ecf20Sopenharmony_ci VM_EXIT_HOST_ADDR_SPACE_SIZE | 64668c2ecf20Sopenharmony_ci#endif 64678c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | 64688c2ecf20Sopenharmony_ci VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 64698c2ecf20Sopenharmony_ci msrs->exit_ctls_high |= 64708c2ecf20Sopenharmony_ci VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 64718c2ecf20Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | 64728c2ecf20Sopenharmony_ci VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; 64738c2ecf20Sopenharmony_ci 64748c2ecf20Sopenharmony_ci /* We support free control of debug control saving. */ 64758c2ecf20Sopenharmony_ci msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; 64768c2ecf20Sopenharmony_ci 64778c2ecf20Sopenharmony_ci /* entry controls */ 64788c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 64798c2ecf20Sopenharmony_ci msrs->entry_ctls_low, 64808c2ecf20Sopenharmony_ci msrs->entry_ctls_high); 64818c2ecf20Sopenharmony_ci msrs->entry_ctls_low = 64828c2ecf20Sopenharmony_ci VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; 64838c2ecf20Sopenharmony_ci msrs->entry_ctls_high &= 64848c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 64858c2ecf20Sopenharmony_ci VM_ENTRY_IA32E_MODE | 64868c2ecf20Sopenharmony_ci#endif 64878c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | 64888c2ecf20Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 64898c2ecf20Sopenharmony_ci msrs->entry_ctls_high |= 64908c2ecf20Sopenharmony_ci (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); 64918c2ecf20Sopenharmony_ci 64928c2ecf20Sopenharmony_ci /* We support free control of debug control loading. */ 64938c2ecf20Sopenharmony_ci msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; 64948c2ecf20Sopenharmony_ci 64958c2ecf20Sopenharmony_ci /* cpu-based controls */ 64968c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, 64978c2ecf20Sopenharmony_ci msrs->procbased_ctls_low, 64988c2ecf20Sopenharmony_ci msrs->procbased_ctls_high); 64998c2ecf20Sopenharmony_ci msrs->procbased_ctls_low = 65008c2ecf20Sopenharmony_ci CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 65018c2ecf20Sopenharmony_ci msrs->procbased_ctls_high &= 65028c2ecf20Sopenharmony_ci CPU_BASED_INTR_WINDOW_EXITING | 65038c2ecf20Sopenharmony_ci CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING | 65048c2ecf20Sopenharmony_ci CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 65058c2ecf20Sopenharmony_ci CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 65068c2ecf20Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING | 65078c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 65088c2ecf20Sopenharmony_ci CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | 65098c2ecf20Sopenharmony_ci#endif 65108c2ecf20Sopenharmony_ci CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | 65118c2ecf20Sopenharmony_ci CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG | 65128c2ecf20Sopenharmony_ci CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | 65138c2ecf20Sopenharmony_ci CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING | 65148c2ecf20Sopenharmony_ci CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 65158c2ecf20Sopenharmony_ci /* 65168c2ecf20Sopenharmony_ci * We can allow some features even when not supported by the 65178c2ecf20Sopenharmony_ci * hardware. For example, L1 can specify an MSR bitmap - and we 65188c2ecf20Sopenharmony_ci * can use it to avoid exits to L1 - even when L0 runs L2 65198c2ecf20Sopenharmony_ci * without MSR bitmaps. 65208c2ecf20Sopenharmony_ci */ 65218c2ecf20Sopenharmony_ci msrs->procbased_ctls_high |= 65228c2ecf20Sopenharmony_ci CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 65238c2ecf20Sopenharmony_ci CPU_BASED_USE_MSR_BITMAPS; 65248c2ecf20Sopenharmony_ci 65258c2ecf20Sopenharmony_ci /* We support free control of CR3 access interception. */ 65268c2ecf20Sopenharmony_ci msrs->procbased_ctls_low &= 65278c2ecf20Sopenharmony_ci ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); 65288c2ecf20Sopenharmony_ci 65298c2ecf20Sopenharmony_ci /* 65308c2ecf20Sopenharmony_ci * secondary cpu-based controls. Do not include those that 65318c2ecf20Sopenharmony_ci * depend on CPUID bits, they are added later by 65328c2ecf20Sopenharmony_ci * vmx_vcpu_after_set_cpuid. 65338c2ecf20Sopenharmony_ci */ 65348c2ecf20Sopenharmony_ci if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) 65358c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, 65368c2ecf20Sopenharmony_ci msrs->secondary_ctls_low, 65378c2ecf20Sopenharmony_ci msrs->secondary_ctls_high); 65388c2ecf20Sopenharmony_ci 65398c2ecf20Sopenharmony_ci msrs->secondary_ctls_low = 0; 65408c2ecf20Sopenharmony_ci msrs->secondary_ctls_high &= 65418c2ecf20Sopenharmony_ci SECONDARY_EXEC_DESC | 65428c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_RDTSCP | 65438c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 65448c2ecf20Sopenharmony_ci SECONDARY_EXEC_WBINVD_EXITING | 65458c2ecf20Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 65468c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 65478c2ecf20Sopenharmony_ci SECONDARY_EXEC_RDRAND_EXITING | 65488c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_INVPCID | 65498c2ecf20Sopenharmony_ci SECONDARY_EXEC_RDSEED_EXITING | 65508c2ecf20Sopenharmony_ci SECONDARY_EXEC_XSAVES; 65518c2ecf20Sopenharmony_ci 65528c2ecf20Sopenharmony_ci /* 65538c2ecf20Sopenharmony_ci * We can emulate "VMCS shadowing," even if the hardware 65548c2ecf20Sopenharmony_ci * doesn't support it. 65558c2ecf20Sopenharmony_ci */ 65568c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 65578c2ecf20Sopenharmony_ci SECONDARY_EXEC_SHADOW_VMCS; 65588c2ecf20Sopenharmony_ci 65598c2ecf20Sopenharmony_ci if (enable_ept) { 65608c2ecf20Sopenharmony_ci /* nested EPT: emulate EPT also to L1 */ 65618c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 65628c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_EPT; 65638c2ecf20Sopenharmony_ci msrs->ept_caps = 65648c2ecf20Sopenharmony_ci VMX_EPT_PAGE_WALK_4_BIT | 65658c2ecf20Sopenharmony_ci VMX_EPT_PAGE_WALK_5_BIT | 65668c2ecf20Sopenharmony_ci VMX_EPTP_WB_BIT | 65678c2ecf20Sopenharmony_ci VMX_EPT_INVEPT_BIT | 65688c2ecf20Sopenharmony_ci VMX_EPT_EXECUTE_ONLY_BIT; 65698c2ecf20Sopenharmony_ci 65708c2ecf20Sopenharmony_ci msrs->ept_caps &= ept_caps; 65718c2ecf20Sopenharmony_ci msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | 65728c2ecf20Sopenharmony_ci VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | 65738c2ecf20Sopenharmony_ci VMX_EPT_1GB_PAGE_BIT; 65748c2ecf20Sopenharmony_ci if (enable_ept_ad_bits) { 65758c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 65768c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_PML; 65778c2ecf20Sopenharmony_ci msrs->ept_caps |= VMX_EPT_AD_BIT; 65788c2ecf20Sopenharmony_ci } 65798c2ecf20Sopenharmony_ci } 65808c2ecf20Sopenharmony_ci 65818c2ecf20Sopenharmony_ci if (cpu_has_vmx_vmfunc()) { 65828c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 65838c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_VMFUNC; 65848c2ecf20Sopenharmony_ci /* 65858c2ecf20Sopenharmony_ci * Advertise EPTP switching unconditionally 65868c2ecf20Sopenharmony_ci * since we emulate it 65878c2ecf20Sopenharmony_ci */ 65888c2ecf20Sopenharmony_ci if (enable_ept) 65898c2ecf20Sopenharmony_ci msrs->vmfunc_controls = 65908c2ecf20Sopenharmony_ci VMX_VMFUNC_EPTP_SWITCHING; 65918c2ecf20Sopenharmony_ci } 65928c2ecf20Sopenharmony_ci 65938c2ecf20Sopenharmony_ci /* 65948c2ecf20Sopenharmony_ci * Old versions of KVM use the single-context version without 65958c2ecf20Sopenharmony_ci * checking for support, so declare that it is supported even 65968c2ecf20Sopenharmony_ci * though it is treated as global context. The alternative is 65978c2ecf20Sopenharmony_ci * not failing the single-context invvpid, and it is worse. 65988c2ecf20Sopenharmony_ci */ 65998c2ecf20Sopenharmony_ci if (enable_vpid) { 66008c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 66018c2ecf20Sopenharmony_ci SECONDARY_EXEC_ENABLE_VPID; 66028c2ecf20Sopenharmony_ci msrs->vpid_caps = VMX_VPID_INVVPID_BIT | 66038c2ecf20Sopenharmony_ci VMX_VPID_EXTENT_SUPPORTED_MASK; 66048c2ecf20Sopenharmony_ci } 66058c2ecf20Sopenharmony_ci 66068c2ecf20Sopenharmony_ci if (enable_unrestricted_guest) 66078c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 66088c2ecf20Sopenharmony_ci SECONDARY_EXEC_UNRESTRICTED_GUEST; 66098c2ecf20Sopenharmony_ci 66108c2ecf20Sopenharmony_ci if (flexpriority_enabled) 66118c2ecf20Sopenharmony_ci msrs->secondary_ctls_high |= 66128c2ecf20Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 66138c2ecf20Sopenharmony_ci 66148c2ecf20Sopenharmony_ci /* miscellaneous data */ 66158c2ecf20Sopenharmony_ci rdmsr(MSR_IA32_VMX_MISC, 66168c2ecf20Sopenharmony_ci msrs->misc_low, 66178c2ecf20Sopenharmony_ci msrs->misc_high); 66188c2ecf20Sopenharmony_ci msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; 66198c2ecf20Sopenharmony_ci msrs->misc_low |= 66208c2ecf20Sopenharmony_ci MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | 66218c2ecf20Sopenharmony_ci VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | 66228c2ecf20Sopenharmony_ci VMX_MISC_ACTIVITY_HLT; 66238c2ecf20Sopenharmony_ci msrs->misc_high = 0; 66248c2ecf20Sopenharmony_ci 66258c2ecf20Sopenharmony_ci /* 66268c2ecf20Sopenharmony_ci * This MSR reports some information about VMX support. We 66278c2ecf20Sopenharmony_ci * should return information about the VMX we emulate for the 66288c2ecf20Sopenharmony_ci * guest, and the VMCS structure we give it - not about the 66298c2ecf20Sopenharmony_ci * VMX support of the underlying hardware. 66308c2ecf20Sopenharmony_ci */ 66318c2ecf20Sopenharmony_ci msrs->basic = 66328c2ecf20Sopenharmony_ci VMCS12_REVISION | 66338c2ecf20Sopenharmony_ci VMX_BASIC_TRUE_CTLS | 66348c2ecf20Sopenharmony_ci ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | 66358c2ecf20Sopenharmony_ci (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); 66368c2ecf20Sopenharmony_ci 66378c2ecf20Sopenharmony_ci if (cpu_has_vmx_basic_inout()) 66388c2ecf20Sopenharmony_ci msrs->basic |= VMX_BASIC_INOUT; 66398c2ecf20Sopenharmony_ci 66408c2ecf20Sopenharmony_ci /* 66418c2ecf20Sopenharmony_ci * These MSRs specify bits which the guest must keep fixed on 66428c2ecf20Sopenharmony_ci * while L1 is in VMXON mode (in L1's root mode, or running an L2). 66438c2ecf20Sopenharmony_ci * We picked the standard core2 setting. 66448c2ecf20Sopenharmony_ci */ 66458c2ecf20Sopenharmony_ci#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) 66468c2ecf20Sopenharmony_ci#define VMXON_CR4_ALWAYSON X86_CR4_VMXE 66478c2ecf20Sopenharmony_ci msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON; 66488c2ecf20Sopenharmony_ci msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON; 66498c2ecf20Sopenharmony_ci 66508c2ecf20Sopenharmony_ci /* These MSRs specify bits which the guest must keep fixed off. */ 66518c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); 66528c2ecf20Sopenharmony_ci rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); 66538c2ecf20Sopenharmony_ci 66548c2ecf20Sopenharmony_ci /* highest index: VMX_PREEMPTION_TIMER_VALUE */ 66558c2ecf20Sopenharmony_ci msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; 66568c2ecf20Sopenharmony_ci} 66578c2ecf20Sopenharmony_ci 66588c2ecf20Sopenharmony_civoid nested_vmx_hardware_unsetup(void) 66598c2ecf20Sopenharmony_ci{ 66608c2ecf20Sopenharmony_ci int i; 66618c2ecf20Sopenharmony_ci 66628c2ecf20Sopenharmony_ci if (enable_shadow_vmcs) { 66638c2ecf20Sopenharmony_ci for (i = 0; i < VMX_BITMAP_NR; i++) 66648c2ecf20Sopenharmony_ci free_page((unsigned long)vmx_bitmap[i]); 66658c2ecf20Sopenharmony_ci } 66668c2ecf20Sopenharmony_ci} 66678c2ecf20Sopenharmony_ci 66688c2ecf20Sopenharmony_ci__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) 66698c2ecf20Sopenharmony_ci{ 66708c2ecf20Sopenharmony_ci int i; 66718c2ecf20Sopenharmony_ci 66728c2ecf20Sopenharmony_ci if (!cpu_has_vmx_shadow_vmcs()) 66738c2ecf20Sopenharmony_ci enable_shadow_vmcs = 0; 66748c2ecf20Sopenharmony_ci if (enable_shadow_vmcs) { 66758c2ecf20Sopenharmony_ci for (i = 0; i < VMX_BITMAP_NR; i++) { 66768c2ecf20Sopenharmony_ci /* 66778c2ecf20Sopenharmony_ci * The vmx_bitmap is not tied to a VM and so should 66788c2ecf20Sopenharmony_ci * not be charged to a memcg. 66798c2ecf20Sopenharmony_ci */ 66808c2ecf20Sopenharmony_ci vmx_bitmap[i] = (unsigned long *) 66818c2ecf20Sopenharmony_ci __get_free_page(GFP_KERNEL); 66828c2ecf20Sopenharmony_ci if (!vmx_bitmap[i]) { 66838c2ecf20Sopenharmony_ci nested_vmx_hardware_unsetup(); 66848c2ecf20Sopenharmony_ci return -ENOMEM; 66858c2ecf20Sopenharmony_ci } 66868c2ecf20Sopenharmony_ci } 66878c2ecf20Sopenharmony_ci 66888c2ecf20Sopenharmony_ci init_vmcs_shadow_fields(); 66898c2ecf20Sopenharmony_ci } 66908c2ecf20Sopenharmony_ci 66918c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear; 66928c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch; 66938c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld; 66948c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst; 66958c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMREAD] = handle_vmread; 66968c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume; 66978c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite; 66988c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff; 66998c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMON] = handle_vmon; 67008c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_INVEPT] = handle_invept; 67018c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid; 67028c2ecf20Sopenharmony_ci exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc; 67038c2ecf20Sopenharmony_ci 67048c2ecf20Sopenharmony_ci return 0; 67058c2ecf20Sopenharmony_ci} 67068c2ecf20Sopenharmony_ci 67078c2ecf20Sopenharmony_cistruct kvm_x86_nested_ops vmx_nested_ops = { 67088c2ecf20Sopenharmony_ci .leave_nested = vmx_leave_nested, 67098c2ecf20Sopenharmony_ci .check_events = vmx_check_nested_events, 67108c2ecf20Sopenharmony_ci .hv_timer_pending = nested_vmx_preemption_timer_pending, 67118c2ecf20Sopenharmony_ci .get_state = vmx_get_nested_state, 67128c2ecf20Sopenharmony_ci .set_state = vmx_set_nested_state, 67138c2ecf20Sopenharmony_ci .get_nested_state_pages = vmx_get_nested_state_pages, 67148c2ecf20Sopenharmony_ci .write_log_dirty = nested_vmx_write_pml_buffer, 67158c2ecf20Sopenharmony_ci .enable_evmcs = nested_enable_evmcs, 67168c2ecf20Sopenharmony_ci .get_evmcs_version = nested_get_evmcs_version, 67178c2ecf20Sopenharmony_ci}; 6718