162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/objtool.h> 562306a36Sopenharmony_ci#include <linux/percpu.h> 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <asm/debugreg.h> 862306a36Sopenharmony_ci#include <asm/mmu_context.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include "cpuid.h" 1162306a36Sopenharmony_ci#include "hyperv.h" 1262306a36Sopenharmony_ci#include "mmu.h" 1362306a36Sopenharmony_ci#include "nested.h" 1462306a36Sopenharmony_ci#include "pmu.h" 1562306a36Sopenharmony_ci#include "sgx.h" 1662306a36Sopenharmony_ci#include "trace.h" 1762306a36Sopenharmony_ci#include "vmx.h" 1862306a36Sopenharmony_ci#include "x86.h" 1962306a36Sopenharmony_ci#include "smm.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistatic bool __read_mostly enable_shadow_vmcs = 1; 2262306a36Sopenharmony_cimodule_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic bool __read_mostly nested_early_check = 0; 2562306a36Sopenharmony_cimodule_param(nested_early_check, bool, S_IRUGO); 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci/* 3062306a36Sopenharmony_ci * Hyper-V requires all of these, so mark them as supported even though 3162306a36Sopenharmony_ci * they are just treated the same as all-context. 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#define VMX_VPID_EXTENT_SUPPORTED_MASK \ 3462306a36Sopenharmony_ci (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \ 3562306a36Sopenharmony_ci VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \ 3662306a36Sopenharmony_ci VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \ 3762306a36Sopenharmony_ci VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT) 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cienum { 4262306a36Sopenharmony_ci VMX_VMREAD_BITMAP, 4362306a36Sopenharmony_ci VMX_VMWRITE_BITMAP, 4462306a36Sopenharmony_ci VMX_BITMAP_NR 4562306a36Sopenharmony_ci}; 4662306a36Sopenharmony_cistatic unsigned long *vmx_bitmap[VMX_BITMAP_NR]; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) 4962306a36Sopenharmony_ci#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistruct shadow_vmcs_field { 5262306a36Sopenharmony_ci u16 encoding; 5362306a36Sopenharmony_ci u16 offset; 5462306a36Sopenharmony_ci}; 5562306a36Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_only_fields[] = { 5662306a36Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) }, 5762306a36Sopenharmony_ci#include "vmcs_shadow_fields.h" 5862306a36Sopenharmony_ci}; 5962306a36Sopenharmony_cistatic int max_shadow_read_only_fields = 6062306a36Sopenharmony_ci ARRAY_SIZE(shadow_read_only_fields); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_write_fields[] = { 6362306a36Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) }, 6462306a36Sopenharmony_ci#include "vmcs_shadow_fields.h" 6562306a36Sopenharmony_ci}; 6662306a36Sopenharmony_cistatic int max_shadow_read_write_fields = 6762306a36Sopenharmony_ci ARRAY_SIZE(shadow_read_write_fields); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic void init_vmcs_shadow_fields(void) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci int i, j; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); 7462306a36Sopenharmony_ci memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci for (i = j = 0; i < max_shadow_read_only_fields; i++) { 7762306a36Sopenharmony_ci struct shadow_vmcs_field entry = shadow_read_only_fields[i]; 7862306a36Sopenharmony_ci u16 field = entry.encoding; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && 8162306a36Sopenharmony_ci (i + 1 == max_shadow_read_only_fields || 8262306a36Sopenharmony_ci shadow_read_only_fields[i + 1].encoding != field + 1)) 8362306a36Sopenharmony_ci pr_err("Missing field from shadow_read_only_field %x\n", 8462306a36Sopenharmony_ci field + 1); 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci clear_bit(field, vmx_vmread_bitmap); 8762306a36Sopenharmony_ci if (field & 1) 8862306a36Sopenharmony_ci#ifdef CONFIG_X86_64 8962306a36Sopenharmony_ci continue; 9062306a36Sopenharmony_ci#else 9162306a36Sopenharmony_ci entry.offset += sizeof(u32); 9262306a36Sopenharmony_ci#endif 9362306a36Sopenharmony_ci shadow_read_only_fields[j++] = entry; 9462306a36Sopenharmony_ci } 9562306a36Sopenharmony_ci max_shadow_read_only_fields = j; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci for (i = j = 0; i < max_shadow_read_write_fields; i++) { 9862306a36Sopenharmony_ci struct shadow_vmcs_field entry = shadow_read_write_fields[i]; 9962306a36Sopenharmony_ci u16 field = entry.encoding; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && 10262306a36Sopenharmony_ci (i + 1 == max_shadow_read_write_fields || 10362306a36Sopenharmony_ci shadow_read_write_fields[i + 1].encoding != field + 1)) 10462306a36Sopenharmony_ci pr_err("Missing field from shadow_read_write_field %x\n", 10562306a36Sopenharmony_ci field + 1); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci WARN_ONCE(field >= GUEST_ES_AR_BYTES && 10862306a36Sopenharmony_ci field <= GUEST_TR_AR_BYTES, 10962306a36Sopenharmony_ci "Update vmcs12_write_any() to drop reserved bits from AR_BYTES"); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci /* 11262306a36Sopenharmony_ci * PML and the preemption timer can be emulated, but the 11362306a36Sopenharmony_ci * processor cannot vmwrite to fields that don't exist 11462306a36Sopenharmony_ci * on bare metal. 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_ci switch (field) { 11762306a36Sopenharmony_ci case GUEST_PML_INDEX: 11862306a36Sopenharmony_ci if (!cpu_has_vmx_pml()) 11962306a36Sopenharmony_ci continue; 12062306a36Sopenharmony_ci break; 12162306a36Sopenharmony_ci case VMX_PREEMPTION_TIMER_VALUE: 12262306a36Sopenharmony_ci if (!cpu_has_vmx_preemption_timer()) 12362306a36Sopenharmony_ci continue; 12462306a36Sopenharmony_ci break; 12562306a36Sopenharmony_ci case GUEST_INTR_STATUS: 12662306a36Sopenharmony_ci if (!cpu_has_vmx_apicv()) 12762306a36Sopenharmony_ci continue; 12862306a36Sopenharmony_ci break; 12962306a36Sopenharmony_ci default: 13062306a36Sopenharmony_ci break; 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci clear_bit(field, vmx_vmwrite_bitmap); 13462306a36Sopenharmony_ci clear_bit(field, vmx_vmread_bitmap); 13562306a36Sopenharmony_ci if (field & 1) 13662306a36Sopenharmony_ci#ifdef CONFIG_X86_64 13762306a36Sopenharmony_ci continue; 13862306a36Sopenharmony_ci#else 13962306a36Sopenharmony_ci entry.offset += sizeof(u32); 14062306a36Sopenharmony_ci#endif 14162306a36Sopenharmony_ci shadow_read_write_fields[j++] = entry; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci max_shadow_read_write_fields = j; 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci/* 14762306a36Sopenharmony_ci * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), 14862306a36Sopenharmony_ci * set the success or error code of an emulated VMX instruction (as specified 14962306a36Sopenharmony_ci * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated 15062306a36Sopenharmony_ci * instruction. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_cistatic int nested_vmx_succeed(struct kvm_vcpu *vcpu) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) 15562306a36Sopenharmony_ci & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | 15662306a36Sopenharmony_ci X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); 15762306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 15862306a36Sopenharmony_ci} 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_cistatic int nested_vmx_failInvalid(struct kvm_vcpu *vcpu) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) 16362306a36Sopenharmony_ci & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | 16462306a36Sopenharmony_ci X86_EFLAGS_SF | X86_EFLAGS_OF)) 16562306a36Sopenharmony_ci | X86_EFLAGS_CF); 16662306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_cistatic int nested_vmx_failValid(struct kvm_vcpu *vcpu, 17062306a36Sopenharmony_ci u32 vm_instruction_error) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) 17362306a36Sopenharmony_ci & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | 17462306a36Sopenharmony_ci X86_EFLAGS_SF | X86_EFLAGS_OF)) 17562306a36Sopenharmony_ci | X86_EFLAGS_ZF); 17662306a36Sopenharmony_ci get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; 17762306a36Sopenharmony_ci /* 17862306a36Sopenharmony_ci * We don't need to force sync to shadow VMCS because 17962306a36Sopenharmony_ci * VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all 18062306a36Sopenharmony_ci * fields and thus must be synced. 18162306a36Sopenharmony_ci */ 18262306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID) 18362306a36Sopenharmony_ci to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 18662306a36Sopenharmony_ci} 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_cistatic int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error) 18962306a36Sopenharmony_ci{ 19062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci /* 19362306a36Sopenharmony_ci * failValid writes the error number to the current VMCS, which 19462306a36Sopenharmony_ci * can't be done if there isn't a current VMCS. 19562306a36Sopenharmony_ci */ 19662306a36Sopenharmony_ci if (vmx->nested.current_vmptr == INVALID_GPA && 19762306a36Sopenharmony_ci !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 19862306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci return nested_vmx_failValid(vcpu, vm_instruction_error); 20162306a36Sopenharmony_ci} 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_cistatic void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci /* TODO: not to reset guest simply here. */ 20662306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 20762306a36Sopenharmony_ci pr_debug_ratelimited("nested vmx abort, indicator %d\n", indicator); 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic inline bool vmx_control_verify(u32 control, u32 low, u32 high) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci return fixed_bits_valid(control, low, high); 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_cistatic inline u64 vmx_control_msr(u32 low, u32 high) 21662306a36Sopenharmony_ci{ 21762306a36Sopenharmony_ci return low | ((u64)high << 32); 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistatic void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) 22162306a36Sopenharmony_ci{ 22262306a36Sopenharmony_ci secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); 22362306a36Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); 22462306a36Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = false; 22562306a36Sopenharmony_ci} 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_cistatic inline void nested_release_evmcs(struct kvm_vcpu *vcpu) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); 23062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { 23362306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); 23462306a36Sopenharmony_ci vmx->nested.hv_evmcs = NULL; 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (hv_vcpu) { 24062306a36Sopenharmony_ci hv_vcpu->nested.pa_page_gpa = INVALID_GPA; 24162306a36Sopenharmony_ci hv_vcpu->nested.vm_id = 0; 24262306a36Sopenharmony_ci hv_vcpu->nested.vp_id = 0; 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, 24762306a36Sopenharmony_ci struct loaded_vmcs *prev) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct vmcs_host_state *dest, *src; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci if (unlikely(!vmx->guest_state_loaded)) 25262306a36Sopenharmony_ci return; 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci src = &prev->host_state; 25562306a36Sopenharmony_ci dest = &vmx->loaded_vmcs->host_state; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); 25862306a36Sopenharmony_ci dest->ldt_sel = src->ldt_sel; 25962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 26062306a36Sopenharmony_ci dest->ds_sel = src->ds_sel; 26162306a36Sopenharmony_ci dest->es_sel = src->es_sel; 26262306a36Sopenharmony_ci#endif 26362306a36Sopenharmony_ci} 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_cistatic void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) 26662306a36Sopenharmony_ci{ 26762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 26862306a36Sopenharmony_ci struct loaded_vmcs *prev; 26962306a36Sopenharmony_ci int cpu; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs)) 27262306a36Sopenharmony_ci return; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci cpu = get_cpu(); 27562306a36Sopenharmony_ci prev = vmx->loaded_vmcs; 27662306a36Sopenharmony_ci vmx->loaded_vmcs = vmcs; 27762306a36Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, prev); 27862306a36Sopenharmony_ci vmx_sync_vmcs_host_state(vmx, prev); 27962306a36Sopenharmony_ci put_cpu(); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci /* 28462306a36Sopenharmony_ci * All lazily updated registers will be reloaded from VMCS12 on both 28562306a36Sopenharmony_ci * vmentry and vmexit. 28662306a36Sopenharmony_ci */ 28762306a36Sopenharmony_ci vcpu->arch.regs_dirty = 0; 28862306a36Sopenharmony_ci} 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci/* 29162306a36Sopenharmony_ci * Free whatever needs to be freed from vmx->nested when L1 goes down, or 29262306a36Sopenharmony_ci * just stops using VMX. 29362306a36Sopenharmony_ci */ 29462306a36Sopenharmony_cistatic void free_nested(struct kvm_vcpu *vcpu) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01)) 29962306a36Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) 30262306a36Sopenharmony_ci return; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci vmx->nested.vmxon = false; 30762306a36Sopenharmony_ci vmx->nested.smm.vmxon = false; 30862306a36Sopenharmony_ci vmx->nested.vmxon_ptr = INVALID_GPA; 30962306a36Sopenharmony_ci free_vpid(vmx->nested.vpid02); 31062306a36Sopenharmony_ci vmx->nested.posted_intr_nv = -1; 31162306a36Sopenharmony_ci vmx->nested.current_vmptr = INVALID_GPA; 31262306a36Sopenharmony_ci if (enable_shadow_vmcs) { 31362306a36Sopenharmony_ci vmx_disable_shadow_vmcs(vmx); 31462306a36Sopenharmony_ci vmcs_clear(vmx->vmcs01.shadow_vmcs); 31562306a36Sopenharmony_ci free_vmcs(vmx->vmcs01.shadow_vmcs); 31662306a36Sopenharmony_ci vmx->vmcs01.shadow_vmcs = NULL; 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci kfree(vmx->nested.cached_vmcs12); 31962306a36Sopenharmony_ci vmx->nested.cached_vmcs12 = NULL; 32062306a36Sopenharmony_ci kfree(vmx->nested.cached_shadow_vmcs12); 32162306a36Sopenharmony_ci vmx->nested.cached_shadow_vmcs12 = NULL; 32262306a36Sopenharmony_ci /* 32362306a36Sopenharmony_ci * Unpin physical memory we referred to in the vmcs02. The APIC access 32462306a36Sopenharmony_ci * page's backing page (yeah, confusing) shouldn't actually be accessed, 32562306a36Sopenharmony_ci * and if it is written, the contents are irrelevant. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false); 32862306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); 32962306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); 33062306a36Sopenharmony_ci vmx->nested.pi_desc = NULL; 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci nested_release_evmcs(vcpu); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci free_loaded_vmcs(&vmx->nested.vmcs02); 33762306a36Sopenharmony_ci} 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci/* 34062306a36Sopenharmony_ci * Ensure that the current vmcs of the logical processor is the 34162306a36Sopenharmony_ci * vmcs01 of the vcpu before calling free_nested(). 34262306a36Sopenharmony_ci */ 34362306a36Sopenharmony_civoid nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci vcpu_load(vcpu); 34662306a36Sopenharmony_ci vmx_leave_nested(vcpu); 34762306a36Sopenharmony_ci vcpu_put(vcpu); 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci#define EPTP_PA_MASK GENMASK_ULL(51, 12) 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_cistatic bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) 35362306a36Sopenharmony_ci{ 35462306a36Sopenharmony_ci return VALID_PAGE(root_hpa) && 35562306a36Sopenharmony_ci ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_cistatic void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, 35962306a36Sopenharmony_ci gpa_t addr) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci unsigned long roots = 0; 36262306a36Sopenharmony_ci uint i; 36362306a36Sopenharmony_ci struct kvm_mmu_root_info *cached_root; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci WARN_ON_ONCE(!mmu_is_nested(vcpu)); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { 36862306a36Sopenharmony_ci cached_root = &vcpu->arch.mmu->prev_roots[i]; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd, 37162306a36Sopenharmony_ci eptp)) 37262306a36Sopenharmony_ci roots |= KVM_MMU_ROOT_PREVIOUS(i); 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci if (roots) 37562306a36Sopenharmony_ci kvm_mmu_invalidate_addr(vcpu, vcpu->arch.mmu, addr, roots); 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_cistatic void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 37962306a36Sopenharmony_ci struct x86_exception *fault) 38062306a36Sopenharmony_ci{ 38162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 38262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 38362306a36Sopenharmony_ci u32 vm_exit_reason; 38462306a36Sopenharmony_ci unsigned long exit_qualification = vcpu->arch.exit_qualification; 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci if (vmx->nested.pml_full) { 38762306a36Sopenharmony_ci vm_exit_reason = EXIT_REASON_PML_FULL; 38862306a36Sopenharmony_ci vmx->nested.pml_full = false; 38962306a36Sopenharmony_ci exit_qualification &= INTR_INFO_UNBLOCK_NMI; 39062306a36Sopenharmony_ci } else { 39162306a36Sopenharmony_ci if (fault->error_code & PFERR_RSVD_MASK) 39262306a36Sopenharmony_ci vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 39362306a36Sopenharmony_ci else 39462306a36Sopenharmony_ci vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci /* 39762306a36Sopenharmony_ci * Although the caller (kvm_inject_emulated_page_fault) would 39862306a36Sopenharmony_ci * have already synced the faulting address in the shadow EPT 39962306a36Sopenharmony_ci * tables for the current EPTP12, we also need to sync it for 40062306a36Sopenharmony_ci * any other cached EPTP02s based on the same EP4TA, since the 40162306a36Sopenharmony_ci * TLB associates mappings to the EP4TA rather than the full EPTP. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_ci nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer, 40462306a36Sopenharmony_ci fault->address); 40562306a36Sopenharmony_ci } 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); 40862306a36Sopenharmony_ci vmcs12->guest_physical_address = fault->address; 40962306a36Sopenharmony_ci} 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_cistatic void nested_ept_new_eptp(struct kvm_vcpu *vcpu) 41262306a36Sopenharmony_ci{ 41362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 41462306a36Sopenharmony_ci bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT; 41562306a36Sopenharmony_ci int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps); 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level, 41862306a36Sopenharmony_ci nested_ept_ad_enabled(vcpu), 41962306a36Sopenharmony_ci nested_ept_get_eptp(vcpu)); 42062306a36Sopenharmony_ci} 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_cistatic void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci WARN_ON(mmu_is_nested(vcpu)); 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci vcpu->arch.mmu = &vcpu->arch.guest_mmu; 42762306a36Sopenharmony_ci nested_ept_new_eptp(vcpu); 42862306a36Sopenharmony_ci vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp; 42962306a36Sopenharmony_ci vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault; 43062306a36Sopenharmony_ci vcpu->arch.mmu->get_pdptr = kvm_pdptr_read; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistatic void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 43662306a36Sopenharmony_ci{ 43762306a36Sopenharmony_ci vcpu->arch.mmu = &vcpu->arch.root_mmu; 43862306a36Sopenharmony_ci vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; 43962306a36Sopenharmony_ci} 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_cistatic bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, 44262306a36Sopenharmony_ci u16 error_code) 44362306a36Sopenharmony_ci{ 44462306a36Sopenharmony_ci bool inequality, bit; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0; 44762306a36Sopenharmony_ci inequality = 44862306a36Sopenharmony_ci (error_code & vmcs12->page_fault_error_code_mask) != 44962306a36Sopenharmony_ci vmcs12->page_fault_error_code_match; 45062306a36Sopenharmony_ci return inequality ^ bit; 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_cistatic bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, 45462306a36Sopenharmony_ci u32 error_code) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci /* 45962306a36Sopenharmony_ci * Drop bits 31:16 of the error code when performing the #PF mask+match 46062306a36Sopenharmony_ci * check. All VMCS fields involved are 32 bits, but Intel CPUs never 46162306a36Sopenharmony_ci * set bits 31:16 and VMX disallows setting bits 31:16 in the injected 46262306a36Sopenharmony_ci * error code. Including the to-be-dropped bits in the check might 46362306a36Sopenharmony_ci * result in an "impossible" or missed exit from L1's perspective. 46462306a36Sopenharmony_ci */ 46562306a36Sopenharmony_ci if (vector == PF_VECTOR) 46662306a36Sopenharmony_ci return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code); 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci return (vmcs12->exception_bitmap & (1u << vector)); 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_cistatic int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, 47262306a36Sopenharmony_ci struct vmcs12 *vmcs12) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 47562306a36Sopenharmony_ci return 0; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) || 47862306a36Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b))) 47962306a36Sopenharmony_ci return -EINVAL; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci return 0; 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_cistatic int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, 48562306a36Sopenharmony_ci struct vmcs12 *vmcs12) 48662306a36Sopenharmony_ci{ 48762306a36Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 48862306a36Sopenharmony_ci return 0; 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap))) 49162306a36Sopenharmony_ci return -EINVAL; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci return 0; 49462306a36Sopenharmony_ci} 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_cistatic int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, 49762306a36Sopenharmony_ci struct vmcs12 *vmcs12) 49862306a36Sopenharmony_ci{ 49962306a36Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) 50062306a36Sopenharmony_ci return 0; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))) 50362306a36Sopenharmony_ci return -EINVAL; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci return 0; 50662306a36Sopenharmony_ci} 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci/* 50962306a36Sopenharmony_ci * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1 51062306a36Sopenharmony_ci * itself utilizing x2APIC. All MSRs were previously set to be intercepted, 51162306a36Sopenharmony_ci * only the "disable intercept" case needs to be handled. 51262306a36Sopenharmony_ci */ 51362306a36Sopenharmony_cistatic void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1, 51462306a36Sopenharmony_ci unsigned long *msr_bitmap_l0, 51562306a36Sopenharmony_ci u32 msr, int type) 51662306a36Sopenharmony_ci{ 51762306a36Sopenharmony_ci if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr)) 51862306a36Sopenharmony_ci vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr)) 52162306a36Sopenharmony_ci vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr); 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_cistatic inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) 52562306a36Sopenharmony_ci{ 52662306a36Sopenharmony_ci int msr; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { 52962306a36Sopenharmony_ci unsigned word = msr / BITS_PER_LONG; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci msr_bitmap[word] = ~0; 53262306a36Sopenharmony_ci msr_bitmap[word + (0x800 / sizeof(long))] = ~0; 53362306a36Sopenharmony_ci } 53462306a36Sopenharmony_ci} 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \ 53762306a36Sopenharmony_cistatic inline \ 53862306a36Sopenharmony_civoid nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \ 53962306a36Sopenharmony_ci unsigned long *msr_bitmap_l1, \ 54062306a36Sopenharmony_ci unsigned long *msr_bitmap_l0, u32 msr) \ 54162306a36Sopenharmony_ci{ \ 54262306a36Sopenharmony_ci if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \ 54362306a36Sopenharmony_ci vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \ 54462306a36Sopenharmony_ci vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \ 54562306a36Sopenharmony_ci else \ 54662306a36Sopenharmony_ci vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \ 54762306a36Sopenharmony_ci} 54862306a36Sopenharmony_ciBUILD_NVMX_MSR_INTERCEPT_HELPER(read) 54962306a36Sopenharmony_ciBUILD_NVMX_MSR_INTERCEPT_HELPER(write) 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_cistatic inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx, 55262306a36Sopenharmony_ci unsigned long *msr_bitmap_l1, 55362306a36Sopenharmony_ci unsigned long *msr_bitmap_l0, 55462306a36Sopenharmony_ci u32 msr, int types) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci if (types & MSR_TYPE_R) 55762306a36Sopenharmony_ci nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1, 55862306a36Sopenharmony_ci msr_bitmap_l0, msr); 55962306a36Sopenharmony_ci if (types & MSR_TYPE_W) 56062306a36Sopenharmony_ci nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1, 56162306a36Sopenharmony_ci msr_bitmap_l0, msr); 56262306a36Sopenharmony_ci} 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci/* 56562306a36Sopenharmony_ci * Merge L0's and L1's MSR bitmap, return false to indicate that 56662306a36Sopenharmony_ci * we do not use the hardware. 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_cistatic inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, 56962306a36Sopenharmony_ci struct vmcs12 *vmcs12) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 57262306a36Sopenharmony_ci int msr; 57362306a36Sopenharmony_ci unsigned long *msr_bitmap_l1; 57462306a36Sopenharmony_ci unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; 57562306a36Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; 57662306a36Sopenharmony_ci struct kvm_host_map *map = &vmx->nested.msr_bitmap_map; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci /* Nothing to do if the MSR bitmap is not in use. */ 57962306a36Sopenharmony_ci if (!cpu_has_vmx_msr_bitmap() || 58062306a36Sopenharmony_ci !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 58162306a36Sopenharmony_ci return false; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci /* 58462306a36Sopenharmony_ci * MSR bitmap update can be skipped when: 58562306a36Sopenharmony_ci * - MSR bitmap for L1 hasn't changed. 58662306a36Sopenharmony_ci * - Nested hypervisor (L1) is attempting to launch the same L2 as 58762306a36Sopenharmony_ci * before. 58862306a36Sopenharmony_ci * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature 58962306a36Sopenharmony_ci * and tells KVM (L0) there were no changes in MSR bitmap for L2. 59062306a36Sopenharmony_ci */ 59162306a36Sopenharmony_ci if (!vmx->nested.force_msr_bitmap_recalc && evmcs && 59262306a36Sopenharmony_ci evmcs->hv_enlightenments_control.msr_bitmap && 59362306a36Sopenharmony_ci evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP) 59462306a36Sopenharmony_ci return true; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) 59762306a36Sopenharmony_ci return false; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci msr_bitmap_l1 = (unsigned long *)map->hva; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci /* 60262306a36Sopenharmony_ci * To keep the control flow simple, pay eight 8-byte writes (sixteen 60362306a36Sopenharmony_ci * 4-byte writes on 32-bit systems) up front to enable intercepts for 60462306a36Sopenharmony_ci * the x2APIC MSR range and selectively toggle those relevant to L2. 60562306a36Sopenharmony_ci */ 60662306a36Sopenharmony_ci enable_x2apic_msr_intercepts(msr_bitmap_l0); 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { 60962306a36Sopenharmony_ci if (nested_cpu_has_apic_reg_virt(vmcs12)) { 61062306a36Sopenharmony_ci /* 61162306a36Sopenharmony_ci * L0 need not intercept reads for MSRs between 0x800 61262306a36Sopenharmony_ci * and 0x8ff, it just lets the processor take the value 61362306a36Sopenharmony_ci * from the virtual-APIC page; take those 256 bits 61462306a36Sopenharmony_ci * directly from the L1 bitmap. 61562306a36Sopenharmony_ci */ 61662306a36Sopenharmony_ci for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { 61762306a36Sopenharmony_ci unsigned word = msr / BITS_PER_LONG; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci msr_bitmap_l0[word] = msr_bitmap_l1[word]; 62062306a36Sopenharmony_ci } 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci nested_vmx_disable_intercept_for_x2apic_msr( 62462306a36Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 62562306a36Sopenharmony_ci X2APIC_MSR(APIC_TASKPRI), 62662306a36Sopenharmony_ci MSR_TYPE_R | MSR_TYPE_W); 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci if (nested_cpu_has_vid(vmcs12)) { 62962306a36Sopenharmony_ci nested_vmx_disable_intercept_for_x2apic_msr( 63062306a36Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 63162306a36Sopenharmony_ci X2APIC_MSR(APIC_EOI), 63262306a36Sopenharmony_ci MSR_TYPE_W); 63362306a36Sopenharmony_ci nested_vmx_disable_intercept_for_x2apic_msr( 63462306a36Sopenharmony_ci msr_bitmap_l1, msr_bitmap_l0, 63562306a36Sopenharmony_ci X2APIC_MSR(APIC_SELF_IPI), 63662306a36Sopenharmony_ci MSR_TYPE_W); 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci } 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci /* 64162306a36Sopenharmony_ci * Always check vmcs01's bitmap to honor userspace MSR filters and any 64262306a36Sopenharmony_ci * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through. 64362306a36Sopenharmony_ci */ 64462306a36Sopenharmony_ci#ifdef CONFIG_X86_64 64562306a36Sopenharmony_ci nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, 64662306a36Sopenharmony_ci MSR_FS_BASE, MSR_TYPE_RW); 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, 64962306a36Sopenharmony_ci MSR_GS_BASE, MSR_TYPE_RW); 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, 65262306a36Sopenharmony_ci MSR_KERNEL_GS_BASE, MSR_TYPE_RW); 65362306a36Sopenharmony_ci#endif 65462306a36Sopenharmony_ci nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, 65562306a36Sopenharmony_ci MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, 65862306a36Sopenharmony_ci MSR_IA32_PRED_CMD, MSR_TYPE_W); 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, 66162306a36Sopenharmony_ci MSR_IA32_FLUSH_CMD, MSR_TYPE_W); 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci vmx->nested.force_msr_bitmap_recalc = false; 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci return true; 66862306a36Sopenharmony_ci} 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_cistatic void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, 67162306a36Sopenharmony_ci struct vmcs12 *vmcs12) 67262306a36Sopenharmony_ci{ 67362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 67462306a36Sopenharmony_ci struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12) || 67762306a36Sopenharmony_ci vmcs12->vmcs_link_pointer == INVALID_GPA) 67862306a36Sopenharmony_ci return; 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci if (ghc->gpa != vmcs12->vmcs_link_pointer && 68162306a36Sopenharmony_ci kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, 68262306a36Sopenharmony_ci vmcs12->vmcs_link_pointer, VMCS12_SIZE)) 68362306a36Sopenharmony_ci return; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), 68662306a36Sopenharmony_ci VMCS12_SIZE); 68762306a36Sopenharmony_ci} 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_cistatic void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, 69062306a36Sopenharmony_ci struct vmcs12 *vmcs12) 69162306a36Sopenharmony_ci{ 69262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 69362306a36Sopenharmony_ci struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12) || 69662306a36Sopenharmony_ci vmcs12->vmcs_link_pointer == INVALID_GPA) 69762306a36Sopenharmony_ci return; 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci if (ghc->gpa != vmcs12->vmcs_link_pointer && 70062306a36Sopenharmony_ci kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, 70162306a36Sopenharmony_ci vmcs12->vmcs_link_pointer, VMCS12_SIZE)) 70262306a36Sopenharmony_ci return; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), 70562306a36Sopenharmony_ci VMCS12_SIZE); 70662306a36Sopenharmony_ci} 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci/* 70962306a36Sopenharmony_ci * In nested virtualization, check if L1 has set 71062306a36Sopenharmony_ci * VM_EXIT_ACK_INTR_ON_EXIT 71162306a36Sopenharmony_ci */ 71262306a36Sopenharmony_cistatic bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) 71362306a36Sopenharmony_ci{ 71462306a36Sopenharmony_ci return get_vmcs12(vcpu)->vm_exit_controls & 71562306a36Sopenharmony_ci VM_EXIT_ACK_INTR_ON_EXIT; 71662306a36Sopenharmony_ci} 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_cistatic int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, 71962306a36Sopenharmony_ci struct vmcs12 *vmcs12) 72062306a36Sopenharmony_ci{ 72162306a36Sopenharmony_ci if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && 72262306a36Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->apic_access_addr))) 72362306a36Sopenharmony_ci return -EINVAL; 72462306a36Sopenharmony_ci else 72562306a36Sopenharmony_ci return 0; 72662306a36Sopenharmony_ci} 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_cistatic int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, 72962306a36Sopenharmony_ci struct vmcs12 *vmcs12) 73062306a36Sopenharmony_ci{ 73162306a36Sopenharmony_ci if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && 73262306a36Sopenharmony_ci !nested_cpu_has_apic_reg_virt(vmcs12) && 73362306a36Sopenharmony_ci !nested_cpu_has_vid(vmcs12) && 73462306a36Sopenharmony_ci !nested_cpu_has_posted_intr(vmcs12)) 73562306a36Sopenharmony_ci return 0; 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci /* 73862306a36Sopenharmony_ci * If virtualize x2apic mode is enabled, 73962306a36Sopenharmony_ci * virtualize apic access must be disabled. 74062306a36Sopenharmony_ci */ 74162306a36Sopenharmony_ci if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) && 74262306a36Sopenharmony_ci nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))) 74362306a36Sopenharmony_ci return -EINVAL; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci /* 74662306a36Sopenharmony_ci * If virtual interrupt delivery is enabled, 74762306a36Sopenharmony_ci * we must exit on external interrupts. 74862306a36Sopenharmony_ci */ 74962306a36Sopenharmony_ci if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu))) 75062306a36Sopenharmony_ci return -EINVAL; 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci /* 75362306a36Sopenharmony_ci * bits 15:8 should be zero in posted_intr_nv, 75462306a36Sopenharmony_ci * the descriptor address has been already checked 75562306a36Sopenharmony_ci * in nested_get_vmcs12_pages. 75662306a36Sopenharmony_ci * 75762306a36Sopenharmony_ci * bits 5:0 of posted_intr_desc_addr should be zero. 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12) && 76062306a36Sopenharmony_ci (CC(!nested_cpu_has_vid(vmcs12)) || 76162306a36Sopenharmony_ci CC(!nested_exit_intr_ack_set(vcpu)) || 76262306a36Sopenharmony_ci CC((vmcs12->posted_intr_nv & 0xff00)) || 76362306a36Sopenharmony_ci CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64)))) 76462306a36Sopenharmony_ci return -EINVAL; 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci /* tpr shadow is needed by all apicv features. */ 76762306a36Sopenharmony_ci if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))) 76862306a36Sopenharmony_ci return -EINVAL; 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci return 0; 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_cistatic int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, 77462306a36Sopenharmony_ci u32 count, u64 addr) 77562306a36Sopenharmony_ci{ 77662306a36Sopenharmony_ci if (count == 0) 77762306a36Sopenharmony_ci return 0; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) || 78062306a36Sopenharmony_ci !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1))) 78162306a36Sopenharmony_ci return -EINVAL; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci return 0; 78462306a36Sopenharmony_ci} 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_cistatic int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu, 78762306a36Sopenharmony_ci struct vmcs12 *vmcs12) 78862306a36Sopenharmony_ci{ 78962306a36Sopenharmony_ci if (CC(nested_vmx_check_msr_switch(vcpu, 79062306a36Sopenharmony_ci vmcs12->vm_exit_msr_load_count, 79162306a36Sopenharmony_ci vmcs12->vm_exit_msr_load_addr)) || 79262306a36Sopenharmony_ci CC(nested_vmx_check_msr_switch(vcpu, 79362306a36Sopenharmony_ci vmcs12->vm_exit_msr_store_count, 79462306a36Sopenharmony_ci vmcs12->vm_exit_msr_store_addr))) 79562306a36Sopenharmony_ci return -EINVAL; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci return 0; 79862306a36Sopenharmony_ci} 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_cistatic int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu, 80162306a36Sopenharmony_ci struct vmcs12 *vmcs12) 80262306a36Sopenharmony_ci{ 80362306a36Sopenharmony_ci if (CC(nested_vmx_check_msr_switch(vcpu, 80462306a36Sopenharmony_ci vmcs12->vm_entry_msr_load_count, 80562306a36Sopenharmony_ci vmcs12->vm_entry_msr_load_addr))) 80662306a36Sopenharmony_ci return -EINVAL; 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci return 0; 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_cistatic int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, 81262306a36Sopenharmony_ci struct vmcs12 *vmcs12) 81362306a36Sopenharmony_ci{ 81462306a36Sopenharmony_ci if (!nested_cpu_has_pml(vmcs12)) 81562306a36Sopenharmony_ci return 0; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci if (CC(!nested_cpu_has_ept(vmcs12)) || 81862306a36Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->pml_address))) 81962306a36Sopenharmony_ci return -EINVAL; 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci return 0; 82262306a36Sopenharmony_ci} 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_cistatic int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu, 82562306a36Sopenharmony_ci struct vmcs12 *vmcs12) 82662306a36Sopenharmony_ci{ 82762306a36Sopenharmony_ci if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) && 82862306a36Sopenharmony_ci !nested_cpu_has_ept(vmcs12))) 82962306a36Sopenharmony_ci return -EINVAL; 83062306a36Sopenharmony_ci return 0; 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_cistatic int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu, 83462306a36Sopenharmony_ci struct vmcs12 *vmcs12) 83562306a36Sopenharmony_ci{ 83662306a36Sopenharmony_ci if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) && 83762306a36Sopenharmony_ci !nested_cpu_has_ept(vmcs12))) 83862306a36Sopenharmony_ci return -EINVAL; 83962306a36Sopenharmony_ci return 0; 84062306a36Sopenharmony_ci} 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_cistatic int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu, 84362306a36Sopenharmony_ci struct vmcs12 *vmcs12) 84462306a36Sopenharmony_ci{ 84562306a36Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12)) 84662306a36Sopenharmony_ci return 0; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) || 84962306a36Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap))) 85062306a36Sopenharmony_ci return -EINVAL; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci return 0; 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_cistatic int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, 85662306a36Sopenharmony_ci struct vmx_msr_entry *e) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci /* x2APIC MSR accesses are not allowed */ 85962306a36Sopenharmony_ci if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)) 86062306a36Sopenharmony_ci return -EINVAL; 86162306a36Sopenharmony_ci if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */ 86262306a36Sopenharmony_ci CC(e->index == MSR_IA32_UCODE_REV)) 86362306a36Sopenharmony_ci return -EINVAL; 86462306a36Sopenharmony_ci if (CC(e->reserved != 0)) 86562306a36Sopenharmony_ci return -EINVAL; 86662306a36Sopenharmony_ci return 0; 86762306a36Sopenharmony_ci} 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_cistatic int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu, 87062306a36Sopenharmony_ci struct vmx_msr_entry *e) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci if (CC(e->index == MSR_FS_BASE) || 87362306a36Sopenharmony_ci CC(e->index == MSR_GS_BASE) || 87462306a36Sopenharmony_ci CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */ 87562306a36Sopenharmony_ci nested_vmx_msr_check_common(vcpu, e)) 87662306a36Sopenharmony_ci return -EINVAL; 87762306a36Sopenharmony_ci return 0; 87862306a36Sopenharmony_ci} 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_cistatic int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, 88162306a36Sopenharmony_ci struct vmx_msr_entry *e) 88262306a36Sopenharmony_ci{ 88362306a36Sopenharmony_ci if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */ 88462306a36Sopenharmony_ci nested_vmx_msr_check_common(vcpu, e)) 88562306a36Sopenharmony_ci return -EINVAL; 88662306a36Sopenharmony_ci return 0; 88762306a36Sopenharmony_ci} 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_cistatic u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) 89062306a36Sopenharmony_ci{ 89162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 89262306a36Sopenharmony_ci u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, 89362306a36Sopenharmony_ci vmx->nested.msrs.misc_high); 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER; 89662306a36Sopenharmony_ci} 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci/* 89962306a36Sopenharmony_ci * Load guest's/host's msr at nested entry/exit. 90062306a36Sopenharmony_ci * return 0 for success, entry index for failure. 90162306a36Sopenharmony_ci * 90262306a36Sopenharmony_ci * One of the failure modes for MSR load/store is when a list exceeds the 90362306a36Sopenharmony_ci * virtual hardware's capacity. To maintain compatibility with hardware inasmuch 90462306a36Sopenharmony_ci * as possible, process all valid entries before failing rather than precheck 90562306a36Sopenharmony_ci * for a capacity violation. 90662306a36Sopenharmony_ci */ 90762306a36Sopenharmony_cistatic u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) 90862306a36Sopenharmony_ci{ 90962306a36Sopenharmony_ci u32 i; 91062306a36Sopenharmony_ci struct vmx_msr_entry e; 91162306a36Sopenharmony_ci u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci for (i = 0; i < count; i++) { 91462306a36Sopenharmony_ci if (unlikely(i >= max_msr_list_size)) 91562306a36Sopenharmony_ci goto fail; 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), 91862306a36Sopenharmony_ci &e, sizeof(e))) { 91962306a36Sopenharmony_ci pr_debug_ratelimited( 92062306a36Sopenharmony_ci "%s cannot read MSR entry (%u, 0x%08llx)\n", 92162306a36Sopenharmony_ci __func__, i, gpa + i * sizeof(e)); 92262306a36Sopenharmony_ci goto fail; 92362306a36Sopenharmony_ci } 92462306a36Sopenharmony_ci if (nested_vmx_load_msr_check(vcpu, &e)) { 92562306a36Sopenharmony_ci pr_debug_ratelimited( 92662306a36Sopenharmony_ci "%s check failed (%u, 0x%x, 0x%x)\n", 92762306a36Sopenharmony_ci __func__, i, e.index, e.reserved); 92862306a36Sopenharmony_ci goto fail; 92962306a36Sopenharmony_ci } 93062306a36Sopenharmony_ci if (kvm_set_msr(vcpu, e.index, e.value)) { 93162306a36Sopenharmony_ci pr_debug_ratelimited( 93262306a36Sopenharmony_ci "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", 93362306a36Sopenharmony_ci __func__, i, e.index, e.value); 93462306a36Sopenharmony_ci goto fail; 93562306a36Sopenharmony_ci } 93662306a36Sopenharmony_ci } 93762306a36Sopenharmony_ci return 0; 93862306a36Sopenharmony_cifail: 93962306a36Sopenharmony_ci /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */ 94062306a36Sopenharmony_ci return i + 1; 94162306a36Sopenharmony_ci} 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_cistatic bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, 94462306a36Sopenharmony_ci u32 msr_index, 94562306a36Sopenharmony_ci u64 *data) 94662306a36Sopenharmony_ci{ 94762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci /* 95062306a36Sopenharmony_ci * If the L0 hypervisor stored a more accurate value for the TSC that 95162306a36Sopenharmony_ci * does not include the time taken for emulation of the L2->L1 95262306a36Sopenharmony_ci * VM-exit in L0, use the more accurate value. 95362306a36Sopenharmony_ci */ 95462306a36Sopenharmony_ci if (msr_index == MSR_IA32_TSC) { 95562306a36Sopenharmony_ci int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest, 95662306a36Sopenharmony_ci MSR_IA32_TSC); 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci if (i >= 0) { 95962306a36Sopenharmony_ci u64 val = vmx->msr_autostore.guest.val[i].value; 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci *data = kvm_read_l1_tsc(vcpu, val); 96262306a36Sopenharmony_ci return true; 96362306a36Sopenharmony_ci } 96462306a36Sopenharmony_ci } 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci if (kvm_get_msr(vcpu, msr_index, data)) { 96762306a36Sopenharmony_ci pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, 96862306a36Sopenharmony_ci msr_index); 96962306a36Sopenharmony_ci return false; 97062306a36Sopenharmony_ci } 97162306a36Sopenharmony_ci return true; 97262306a36Sopenharmony_ci} 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_cistatic bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i, 97562306a36Sopenharmony_ci struct vmx_msr_entry *e) 97662306a36Sopenharmony_ci{ 97762306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, 97862306a36Sopenharmony_ci gpa + i * sizeof(*e), 97962306a36Sopenharmony_ci e, 2 * sizeof(u32))) { 98062306a36Sopenharmony_ci pr_debug_ratelimited( 98162306a36Sopenharmony_ci "%s cannot read MSR entry (%u, 0x%08llx)\n", 98262306a36Sopenharmony_ci __func__, i, gpa + i * sizeof(*e)); 98362306a36Sopenharmony_ci return false; 98462306a36Sopenharmony_ci } 98562306a36Sopenharmony_ci if (nested_vmx_store_msr_check(vcpu, e)) { 98662306a36Sopenharmony_ci pr_debug_ratelimited( 98762306a36Sopenharmony_ci "%s check failed (%u, 0x%x, 0x%x)\n", 98862306a36Sopenharmony_ci __func__, i, e->index, e->reserved); 98962306a36Sopenharmony_ci return false; 99062306a36Sopenharmony_ci } 99162306a36Sopenharmony_ci return true; 99262306a36Sopenharmony_ci} 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_cistatic int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) 99562306a36Sopenharmony_ci{ 99662306a36Sopenharmony_ci u64 data; 99762306a36Sopenharmony_ci u32 i; 99862306a36Sopenharmony_ci struct vmx_msr_entry e; 99962306a36Sopenharmony_ci u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci for (i = 0; i < count; i++) { 100262306a36Sopenharmony_ci if (unlikely(i >= max_msr_list_size)) 100362306a36Sopenharmony_ci return -EINVAL; 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) 100662306a36Sopenharmony_ci return -EINVAL; 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data)) 100962306a36Sopenharmony_ci return -EINVAL; 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci if (kvm_vcpu_write_guest(vcpu, 101262306a36Sopenharmony_ci gpa + i * sizeof(e) + 101362306a36Sopenharmony_ci offsetof(struct vmx_msr_entry, value), 101462306a36Sopenharmony_ci &data, sizeof(data))) { 101562306a36Sopenharmony_ci pr_debug_ratelimited( 101662306a36Sopenharmony_ci "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", 101762306a36Sopenharmony_ci __func__, i, e.index, data); 101862306a36Sopenharmony_ci return -EINVAL; 101962306a36Sopenharmony_ci } 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci return 0; 102262306a36Sopenharmony_ci} 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_cistatic bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index) 102562306a36Sopenharmony_ci{ 102662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 102762306a36Sopenharmony_ci u32 count = vmcs12->vm_exit_msr_store_count; 102862306a36Sopenharmony_ci u64 gpa = vmcs12->vm_exit_msr_store_addr; 102962306a36Sopenharmony_ci struct vmx_msr_entry e; 103062306a36Sopenharmony_ci u32 i; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci for (i = 0; i < count; i++) { 103362306a36Sopenharmony_ci if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) 103462306a36Sopenharmony_ci return false; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci if (e.index == msr_index) 103762306a36Sopenharmony_ci return true; 103862306a36Sopenharmony_ci } 103962306a36Sopenharmony_ci return false; 104062306a36Sopenharmony_ci} 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_cistatic void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, 104362306a36Sopenharmony_ci u32 msr_index) 104462306a36Sopenharmony_ci{ 104562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 104662306a36Sopenharmony_ci struct vmx_msrs *autostore = &vmx->msr_autostore.guest; 104762306a36Sopenharmony_ci bool in_vmcs12_store_list; 104862306a36Sopenharmony_ci int msr_autostore_slot; 104962306a36Sopenharmony_ci bool in_autostore_list; 105062306a36Sopenharmony_ci int last; 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index); 105362306a36Sopenharmony_ci in_autostore_list = msr_autostore_slot >= 0; 105462306a36Sopenharmony_ci in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index); 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci if (in_vmcs12_store_list && !in_autostore_list) { 105762306a36Sopenharmony_ci if (autostore->nr == MAX_NR_LOADSTORE_MSRS) { 105862306a36Sopenharmony_ci /* 105962306a36Sopenharmony_ci * Emulated VMEntry does not fail here. Instead a less 106062306a36Sopenharmony_ci * accurate value will be returned by 106162306a36Sopenharmony_ci * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() 106262306a36Sopenharmony_ci * instead of reading the value from the vmcs02 VMExit 106362306a36Sopenharmony_ci * MSR-store area. 106462306a36Sopenharmony_ci */ 106562306a36Sopenharmony_ci pr_warn_ratelimited( 106662306a36Sopenharmony_ci "Not enough msr entries in msr_autostore. Can't add msr %x\n", 106762306a36Sopenharmony_ci msr_index); 106862306a36Sopenharmony_ci return; 106962306a36Sopenharmony_ci } 107062306a36Sopenharmony_ci last = autostore->nr++; 107162306a36Sopenharmony_ci autostore->val[last].index = msr_index; 107262306a36Sopenharmony_ci } else if (!in_vmcs12_store_list && in_autostore_list) { 107362306a36Sopenharmony_ci last = --autostore->nr; 107462306a36Sopenharmony_ci autostore->val[msr_autostore_slot] = autostore->val[last]; 107562306a36Sopenharmony_ci } 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci/* 107962306a36Sopenharmony_ci * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are 108062306a36Sopenharmony_ci * emulating VM-Entry into a guest with EPT enabled. On failure, the expected 108162306a36Sopenharmony_ci * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to 108262306a36Sopenharmony_ci * @entry_failure_code. 108362306a36Sopenharmony_ci */ 108462306a36Sopenharmony_cistatic int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, 108562306a36Sopenharmony_ci bool nested_ept, bool reload_pdptrs, 108662306a36Sopenharmony_ci enum vm_entry_failure_code *entry_failure_code) 108762306a36Sopenharmony_ci{ 108862306a36Sopenharmony_ci if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) { 108962306a36Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 109062306a36Sopenharmony_ci return -EINVAL; 109162306a36Sopenharmony_ci } 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci /* 109462306a36Sopenharmony_ci * If PAE paging and EPT are both on, CR3 is not used by the CPU and 109562306a36Sopenharmony_ci * must not be dereferenced. 109662306a36Sopenharmony_ci */ 109762306a36Sopenharmony_ci if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) && 109862306a36Sopenharmony_ci CC(!load_pdptrs(vcpu, cr3))) { 109962306a36Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_PDPTE; 110062306a36Sopenharmony_ci return -EINVAL; 110162306a36Sopenharmony_ci } 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci vcpu->arch.cr3 = cr3; 110462306a36Sopenharmony_ci kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3); 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ 110762306a36Sopenharmony_ci kvm_init_mmu(vcpu); 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci if (!nested_ept) 111062306a36Sopenharmony_ci kvm_mmu_new_pgd(vcpu, cr3); 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci return 0; 111362306a36Sopenharmony_ci} 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci/* 111662306a36Sopenharmony_ci * Returns if KVM is able to config CPU to tag TLB entries 111762306a36Sopenharmony_ci * populated by L2 differently than TLB entries populated 111862306a36Sopenharmony_ci * by L1. 111962306a36Sopenharmony_ci * 112062306a36Sopenharmony_ci * If L0 uses EPT, L1 and L2 run with different EPTP because 112162306a36Sopenharmony_ci * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries 112262306a36Sopenharmony_ci * are tagged with different EPTP. 112362306a36Sopenharmony_ci * 112462306a36Sopenharmony_ci * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged 112562306a36Sopenharmony_ci * with different VPID (L1 entries are tagged with vmx->vpid 112662306a36Sopenharmony_ci * while L2 entries are tagged with vmx->nested.vpid02). 112762306a36Sopenharmony_ci */ 112862306a36Sopenharmony_cistatic bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) 112962306a36Sopenharmony_ci{ 113062306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci return enable_ept || 113362306a36Sopenharmony_ci (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); 113462306a36Sopenharmony_ci} 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_cistatic void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, 113762306a36Sopenharmony_ci struct vmcs12 *vmcs12, 113862306a36Sopenharmony_ci bool is_vmenter) 113962306a36Sopenharmony_ci{ 114062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci /* 114362306a36Sopenharmony_ci * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or 114462306a36Sopenharmony_ci * L2's VP_ID upon request from the guest. Make sure we check for 114562306a36Sopenharmony_ci * pending entries in the right FIFO upon L1/L2 transition as these 114662306a36Sopenharmony_ci * requests are put by other vCPUs asynchronously. 114762306a36Sopenharmony_ci */ 114862306a36Sopenharmony_ci if (to_hv_vcpu(vcpu) && enable_ept) 114962306a36Sopenharmony_ci kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu); 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci /* 115262306a36Sopenharmony_ci * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings 115362306a36Sopenharmony_ci * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a 115462306a36Sopenharmony_ci * full TLB flush from the guest's perspective. This is required even 115562306a36Sopenharmony_ci * if VPID is disabled in the host as KVM may need to synchronize the 115662306a36Sopenharmony_ci * MMU in response to the guest TLB flush. 115762306a36Sopenharmony_ci * 115862306a36Sopenharmony_ci * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use. 115962306a36Sopenharmony_ci * EPT is a special snowflake, as guest-physical mappings aren't 116062306a36Sopenharmony_ci * flushed on VPID invalidations, including VM-Enter or VM-Exit with 116162306a36Sopenharmony_ci * VPID disabled. As a result, KVM _never_ needs to sync nEPT 116262306a36Sopenharmony_ci * entries on VM-Enter because L1 can't rely on VM-Enter to flush 116362306a36Sopenharmony_ci * those mappings. 116462306a36Sopenharmony_ci */ 116562306a36Sopenharmony_ci if (!nested_cpu_has_vpid(vmcs12)) { 116662306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); 116762306a36Sopenharmony_ci return; 116862306a36Sopenharmony_ci } 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci /* L2 should never have a VPID if VPID is disabled. */ 117162306a36Sopenharmony_ci WARN_ON(!enable_vpid); 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci /* 117462306a36Sopenharmony_ci * VPID is enabled and in use by vmcs12. If vpid12 is changing, then 117562306a36Sopenharmony_ci * emulate a guest TLB flush as KVM does not track vpid12 history nor 117662306a36Sopenharmony_ci * is the VPID incorporated into the MMU context. I.e. KVM must assume 117762306a36Sopenharmony_ci * that the new vpid12 has never been used and thus represents a new 117862306a36Sopenharmony_ci * guest ASID that cannot have entries in the TLB. 117962306a36Sopenharmony_ci */ 118062306a36Sopenharmony_ci if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) { 118162306a36Sopenharmony_ci vmx->nested.last_vpid = vmcs12->virtual_processor_id; 118262306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); 118362306a36Sopenharmony_ci return; 118462306a36Sopenharmony_ci } 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci /* 118762306a36Sopenharmony_ci * If VPID is enabled, used by vmc12, and vpid12 is not changing but 118862306a36Sopenharmony_ci * does not have a unique TLB tag (ASID), i.e. EPT is disabled and 118962306a36Sopenharmony_ci * KVM was unable to allocate a VPID for L2, flush the current context 119062306a36Sopenharmony_ci * as the effective ASID is common to both L1 and L2. 119162306a36Sopenharmony_ci */ 119262306a36Sopenharmony_ci if (!nested_has_guest_tlb_tag(vcpu)) 119362306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 119462306a36Sopenharmony_ci} 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_cistatic bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) 119762306a36Sopenharmony_ci{ 119862306a36Sopenharmony_ci superset &= mask; 119962306a36Sopenharmony_ci subset &= mask; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci return (superset | subset) == superset; 120262306a36Sopenharmony_ci} 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_cistatic int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) 120562306a36Sopenharmony_ci{ 120662306a36Sopenharmony_ci const u64 feature_and_reserved = 120762306a36Sopenharmony_ci /* feature (except bit 48; see below) */ 120862306a36Sopenharmony_ci BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | 120962306a36Sopenharmony_ci /* reserved */ 121062306a36Sopenharmony_ci BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); 121162306a36Sopenharmony_ci u64 vmx_basic = vmcs_config.nested.basic; 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) 121462306a36Sopenharmony_ci return -EINVAL; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci /* 121762306a36Sopenharmony_ci * KVM does not emulate a version of VMX that constrains physical 121862306a36Sopenharmony_ci * addresses of VMX structures (e.g. VMCS) to 32-bits. 121962306a36Sopenharmony_ci */ 122062306a36Sopenharmony_ci if (data & BIT_ULL(48)) 122162306a36Sopenharmony_ci return -EINVAL; 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci if (vmx_basic_vmcs_revision_id(vmx_basic) != 122462306a36Sopenharmony_ci vmx_basic_vmcs_revision_id(data)) 122562306a36Sopenharmony_ci return -EINVAL; 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) 122862306a36Sopenharmony_ci return -EINVAL; 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci vmx->nested.msrs.basic = data; 123162306a36Sopenharmony_ci return 0; 123262306a36Sopenharmony_ci} 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_cistatic void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index, 123562306a36Sopenharmony_ci u32 **low, u32 **high) 123662306a36Sopenharmony_ci{ 123762306a36Sopenharmony_ci switch (msr_index) { 123862306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 123962306a36Sopenharmony_ci *low = &msrs->pinbased_ctls_low; 124062306a36Sopenharmony_ci *high = &msrs->pinbased_ctls_high; 124162306a36Sopenharmony_ci break; 124262306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 124362306a36Sopenharmony_ci *low = &msrs->procbased_ctls_low; 124462306a36Sopenharmony_ci *high = &msrs->procbased_ctls_high; 124562306a36Sopenharmony_ci break; 124662306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_EXIT_CTLS: 124762306a36Sopenharmony_ci *low = &msrs->exit_ctls_low; 124862306a36Sopenharmony_ci *high = &msrs->exit_ctls_high; 124962306a36Sopenharmony_ci break; 125062306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 125162306a36Sopenharmony_ci *low = &msrs->entry_ctls_low; 125262306a36Sopenharmony_ci *high = &msrs->entry_ctls_high; 125362306a36Sopenharmony_ci break; 125462306a36Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS2: 125562306a36Sopenharmony_ci *low = &msrs->secondary_ctls_low; 125662306a36Sopenharmony_ci *high = &msrs->secondary_ctls_high; 125762306a36Sopenharmony_ci break; 125862306a36Sopenharmony_ci default: 125962306a36Sopenharmony_ci BUG(); 126062306a36Sopenharmony_ci } 126162306a36Sopenharmony_ci} 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_cistatic int 126462306a36Sopenharmony_civmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) 126562306a36Sopenharmony_ci{ 126662306a36Sopenharmony_ci u32 *lowp, *highp; 126762306a36Sopenharmony_ci u64 supported; 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci supported = vmx_control_msr(*lowp, *highp); 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci /* Check must-be-1 bits are still 1. */ 127462306a36Sopenharmony_ci if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0))) 127562306a36Sopenharmony_ci return -EINVAL; 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci /* Check must-be-0 bits are still 0. */ 127862306a36Sopenharmony_ci if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32))) 127962306a36Sopenharmony_ci return -EINVAL; 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp); 128262306a36Sopenharmony_ci *lowp = data; 128362306a36Sopenharmony_ci *highp = data >> 32; 128462306a36Sopenharmony_ci return 0; 128562306a36Sopenharmony_ci} 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_cistatic int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) 128862306a36Sopenharmony_ci{ 128962306a36Sopenharmony_ci const u64 feature_and_reserved_bits = 129062306a36Sopenharmony_ci /* feature */ 129162306a36Sopenharmony_ci BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | 129262306a36Sopenharmony_ci BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | 129362306a36Sopenharmony_ci /* reserved */ 129462306a36Sopenharmony_ci GENMASK_ULL(13, 9) | BIT_ULL(31); 129562306a36Sopenharmony_ci u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, 129662306a36Sopenharmony_ci vmcs_config.nested.misc_high); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) 129962306a36Sopenharmony_ci return -EINVAL; 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci if ((vmx->nested.msrs.pinbased_ctls_high & 130262306a36Sopenharmony_ci PIN_BASED_VMX_PREEMPTION_TIMER) && 130362306a36Sopenharmony_ci vmx_misc_preemption_timer_rate(data) != 130462306a36Sopenharmony_ci vmx_misc_preemption_timer_rate(vmx_misc)) 130562306a36Sopenharmony_ci return -EINVAL; 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc)) 130862306a36Sopenharmony_ci return -EINVAL; 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ci if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc)) 131162306a36Sopenharmony_ci return -EINVAL; 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) 131462306a36Sopenharmony_ci return -EINVAL; 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci vmx->nested.msrs.misc_low = data; 131762306a36Sopenharmony_ci vmx->nested.msrs.misc_high = data >> 32; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci return 0; 132062306a36Sopenharmony_ci} 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_cistatic int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) 132362306a36Sopenharmony_ci{ 132462306a36Sopenharmony_ci u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps, 132562306a36Sopenharmony_ci vmcs_config.nested.vpid_caps); 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci /* Every bit is either reserved or a feature bit. */ 132862306a36Sopenharmony_ci if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) 132962306a36Sopenharmony_ci return -EINVAL; 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci vmx->nested.msrs.ept_caps = data; 133262306a36Sopenharmony_ci vmx->nested.msrs.vpid_caps = data >> 32; 133362306a36Sopenharmony_ci return 0; 133462306a36Sopenharmony_ci} 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_cistatic u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index) 133762306a36Sopenharmony_ci{ 133862306a36Sopenharmony_ci switch (msr_index) { 133962306a36Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED0: 134062306a36Sopenharmony_ci return &msrs->cr0_fixed0; 134162306a36Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED0: 134262306a36Sopenharmony_ci return &msrs->cr4_fixed0; 134362306a36Sopenharmony_ci default: 134462306a36Sopenharmony_ci BUG(); 134562306a36Sopenharmony_ci } 134662306a36Sopenharmony_ci} 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_cistatic int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) 134962306a36Sopenharmony_ci{ 135062306a36Sopenharmony_ci const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index); 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci /* 135362306a36Sopenharmony_ci * 1 bits (which indicates bits which "must-be-1" during VMX operation) 135462306a36Sopenharmony_ci * must be 1 in the restored value. 135562306a36Sopenharmony_ci */ 135662306a36Sopenharmony_ci if (!is_bitwise_subset(data, *msr, -1ULL)) 135762306a36Sopenharmony_ci return -EINVAL; 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data; 136062306a36Sopenharmony_ci return 0; 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci/* 136462306a36Sopenharmony_ci * Called when userspace is restoring VMX MSRs. 136562306a36Sopenharmony_ci * 136662306a36Sopenharmony_ci * Returns 0 on success, non-0 otherwise. 136762306a36Sopenharmony_ci */ 136862306a36Sopenharmony_ciint vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 136962306a36Sopenharmony_ci{ 137062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci /* 137362306a36Sopenharmony_ci * Don't allow changes to the VMX capability MSRs while the vCPU 137462306a36Sopenharmony_ci * is in VMX operation. 137562306a36Sopenharmony_ci */ 137662306a36Sopenharmony_ci if (vmx->nested.vmxon) 137762306a36Sopenharmony_ci return -EBUSY; 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci switch (msr_index) { 138062306a36Sopenharmony_ci case MSR_IA32_VMX_BASIC: 138162306a36Sopenharmony_ci return vmx_restore_vmx_basic(vmx, data); 138262306a36Sopenharmony_ci case MSR_IA32_VMX_PINBASED_CTLS: 138362306a36Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS: 138462306a36Sopenharmony_ci case MSR_IA32_VMX_EXIT_CTLS: 138562306a36Sopenharmony_ci case MSR_IA32_VMX_ENTRY_CTLS: 138662306a36Sopenharmony_ci /* 138762306a36Sopenharmony_ci * The "non-true" VMX capability MSRs are generated from the 138862306a36Sopenharmony_ci * "true" MSRs, so we do not support restoring them directly. 138962306a36Sopenharmony_ci * 139062306a36Sopenharmony_ci * If userspace wants to emulate VMX_BASIC[55]=0, userspace 139162306a36Sopenharmony_ci * should restore the "true" MSRs with the must-be-1 bits 139262306a36Sopenharmony_ci * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND 139362306a36Sopenharmony_ci * DEFAULT SETTINGS". 139462306a36Sopenharmony_ci */ 139562306a36Sopenharmony_ci return -EINVAL; 139662306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 139762306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 139862306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_EXIT_CTLS: 139962306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 140062306a36Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS2: 140162306a36Sopenharmony_ci return vmx_restore_control_msr(vmx, msr_index, data); 140262306a36Sopenharmony_ci case MSR_IA32_VMX_MISC: 140362306a36Sopenharmony_ci return vmx_restore_vmx_misc(vmx, data); 140462306a36Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED0: 140562306a36Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED0: 140662306a36Sopenharmony_ci return vmx_restore_fixed0_msr(vmx, msr_index, data); 140762306a36Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED1: 140862306a36Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED1: 140962306a36Sopenharmony_ci /* 141062306a36Sopenharmony_ci * These MSRs are generated based on the vCPU's CPUID, so we 141162306a36Sopenharmony_ci * do not support restoring them directly. 141262306a36Sopenharmony_ci */ 141362306a36Sopenharmony_ci return -EINVAL; 141462306a36Sopenharmony_ci case MSR_IA32_VMX_EPT_VPID_CAP: 141562306a36Sopenharmony_ci return vmx_restore_vmx_ept_vpid_cap(vmx, data); 141662306a36Sopenharmony_ci case MSR_IA32_VMX_VMCS_ENUM: 141762306a36Sopenharmony_ci vmx->nested.msrs.vmcs_enum = data; 141862306a36Sopenharmony_ci return 0; 141962306a36Sopenharmony_ci case MSR_IA32_VMX_VMFUNC: 142062306a36Sopenharmony_ci if (data & ~vmcs_config.nested.vmfunc_controls) 142162306a36Sopenharmony_ci return -EINVAL; 142262306a36Sopenharmony_ci vmx->nested.msrs.vmfunc_controls = data; 142362306a36Sopenharmony_ci return 0; 142462306a36Sopenharmony_ci default: 142562306a36Sopenharmony_ci /* 142662306a36Sopenharmony_ci * The rest of the VMX capability MSRs do not support restore. 142762306a36Sopenharmony_ci */ 142862306a36Sopenharmony_ci return -EINVAL; 142962306a36Sopenharmony_ci } 143062306a36Sopenharmony_ci} 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci/* Returns 0 on success, non-0 otherwise. */ 143362306a36Sopenharmony_ciint vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) 143462306a36Sopenharmony_ci{ 143562306a36Sopenharmony_ci switch (msr_index) { 143662306a36Sopenharmony_ci case MSR_IA32_VMX_BASIC: 143762306a36Sopenharmony_ci *pdata = msrs->basic; 143862306a36Sopenharmony_ci break; 143962306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_PINBASED_CTLS: 144062306a36Sopenharmony_ci case MSR_IA32_VMX_PINBASED_CTLS: 144162306a36Sopenharmony_ci *pdata = vmx_control_msr( 144262306a36Sopenharmony_ci msrs->pinbased_ctls_low, 144362306a36Sopenharmony_ci msrs->pinbased_ctls_high); 144462306a36Sopenharmony_ci if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) 144562306a36Sopenharmony_ci *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 144662306a36Sopenharmony_ci break; 144762306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 144862306a36Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS: 144962306a36Sopenharmony_ci *pdata = vmx_control_msr( 145062306a36Sopenharmony_ci msrs->procbased_ctls_low, 145162306a36Sopenharmony_ci msrs->procbased_ctls_high); 145262306a36Sopenharmony_ci if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) 145362306a36Sopenharmony_ci *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 145462306a36Sopenharmony_ci break; 145562306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_EXIT_CTLS: 145662306a36Sopenharmony_ci case MSR_IA32_VMX_EXIT_CTLS: 145762306a36Sopenharmony_ci *pdata = vmx_control_msr( 145862306a36Sopenharmony_ci msrs->exit_ctls_low, 145962306a36Sopenharmony_ci msrs->exit_ctls_high); 146062306a36Sopenharmony_ci if (msr_index == MSR_IA32_VMX_EXIT_CTLS) 146162306a36Sopenharmony_ci *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 146262306a36Sopenharmony_ci break; 146362306a36Sopenharmony_ci case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 146462306a36Sopenharmony_ci case MSR_IA32_VMX_ENTRY_CTLS: 146562306a36Sopenharmony_ci *pdata = vmx_control_msr( 146662306a36Sopenharmony_ci msrs->entry_ctls_low, 146762306a36Sopenharmony_ci msrs->entry_ctls_high); 146862306a36Sopenharmony_ci if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) 146962306a36Sopenharmony_ci *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; 147062306a36Sopenharmony_ci break; 147162306a36Sopenharmony_ci case MSR_IA32_VMX_MISC: 147262306a36Sopenharmony_ci *pdata = vmx_control_msr( 147362306a36Sopenharmony_ci msrs->misc_low, 147462306a36Sopenharmony_ci msrs->misc_high); 147562306a36Sopenharmony_ci break; 147662306a36Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED0: 147762306a36Sopenharmony_ci *pdata = msrs->cr0_fixed0; 147862306a36Sopenharmony_ci break; 147962306a36Sopenharmony_ci case MSR_IA32_VMX_CR0_FIXED1: 148062306a36Sopenharmony_ci *pdata = msrs->cr0_fixed1; 148162306a36Sopenharmony_ci break; 148262306a36Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED0: 148362306a36Sopenharmony_ci *pdata = msrs->cr4_fixed0; 148462306a36Sopenharmony_ci break; 148562306a36Sopenharmony_ci case MSR_IA32_VMX_CR4_FIXED1: 148662306a36Sopenharmony_ci *pdata = msrs->cr4_fixed1; 148762306a36Sopenharmony_ci break; 148862306a36Sopenharmony_ci case MSR_IA32_VMX_VMCS_ENUM: 148962306a36Sopenharmony_ci *pdata = msrs->vmcs_enum; 149062306a36Sopenharmony_ci break; 149162306a36Sopenharmony_ci case MSR_IA32_VMX_PROCBASED_CTLS2: 149262306a36Sopenharmony_ci *pdata = vmx_control_msr( 149362306a36Sopenharmony_ci msrs->secondary_ctls_low, 149462306a36Sopenharmony_ci msrs->secondary_ctls_high); 149562306a36Sopenharmony_ci break; 149662306a36Sopenharmony_ci case MSR_IA32_VMX_EPT_VPID_CAP: 149762306a36Sopenharmony_ci *pdata = msrs->ept_caps | 149862306a36Sopenharmony_ci ((u64)msrs->vpid_caps << 32); 149962306a36Sopenharmony_ci break; 150062306a36Sopenharmony_ci case MSR_IA32_VMX_VMFUNC: 150162306a36Sopenharmony_ci *pdata = msrs->vmfunc_controls; 150262306a36Sopenharmony_ci break; 150362306a36Sopenharmony_ci default: 150462306a36Sopenharmony_ci return 1; 150562306a36Sopenharmony_ci } 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci return 0; 150862306a36Sopenharmony_ci} 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci/* 151162306a36Sopenharmony_ci * Copy the writable VMCS shadow fields back to the VMCS12, in case they have 151262306a36Sopenharmony_ci * been modified by the L1 guest. Note, "writable" in this context means 151362306a36Sopenharmony_ci * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of 151462306a36Sopenharmony_ci * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only" 151562306a36Sopenharmony_ci * VM-exit information fields (which are actually writable if the vCPU is 151662306a36Sopenharmony_ci * configured to support "VMWRITE to any supported field in the VMCS"). 151762306a36Sopenharmony_ci */ 151862306a36Sopenharmony_cistatic void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) 151962306a36Sopenharmony_ci{ 152062306a36Sopenharmony_ci struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; 152162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); 152262306a36Sopenharmony_ci struct shadow_vmcs_field field; 152362306a36Sopenharmony_ci unsigned long val; 152462306a36Sopenharmony_ci int i; 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci if (WARN_ON(!shadow_vmcs)) 152762306a36Sopenharmony_ci return; 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci preempt_disable(); 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci vmcs_load(shadow_vmcs); 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_ci for (i = 0; i < max_shadow_read_write_fields; i++) { 153462306a36Sopenharmony_ci field = shadow_read_write_fields[i]; 153562306a36Sopenharmony_ci val = __vmcs_readl(field.encoding); 153662306a36Sopenharmony_ci vmcs12_write_any(vmcs12, field.encoding, field.offset, val); 153762306a36Sopenharmony_ci } 153862306a36Sopenharmony_ci 153962306a36Sopenharmony_ci vmcs_clear(shadow_vmcs); 154062306a36Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci preempt_enable(); 154362306a36Sopenharmony_ci} 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_cistatic void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) 154662306a36Sopenharmony_ci{ 154762306a36Sopenharmony_ci const struct shadow_vmcs_field *fields[] = { 154862306a36Sopenharmony_ci shadow_read_write_fields, 154962306a36Sopenharmony_ci shadow_read_only_fields 155062306a36Sopenharmony_ci }; 155162306a36Sopenharmony_ci const int max_fields[] = { 155262306a36Sopenharmony_ci max_shadow_read_write_fields, 155362306a36Sopenharmony_ci max_shadow_read_only_fields 155462306a36Sopenharmony_ci }; 155562306a36Sopenharmony_ci struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; 155662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); 155762306a36Sopenharmony_ci struct shadow_vmcs_field field; 155862306a36Sopenharmony_ci unsigned long val; 155962306a36Sopenharmony_ci int i, q; 156062306a36Sopenharmony_ci 156162306a36Sopenharmony_ci if (WARN_ON(!shadow_vmcs)) 156262306a36Sopenharmony_ci return; 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci vmcs_load(shadow_vmcs); 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci for (q = 0; q < ARRAY_SIZE(fields); q++) { 156762306a36Sopenharmony_ci for (i = 0; i < max_fields[q]; i++) { 156862306a36Sopenharmony_ci field = fields[q][i]; 156962306a36Sopenharmony_ci val = vmcs12_read_any(vmcs12, field.encoding, 157062306a36Sopenharmony_ci field.offset); 157162306a36Sopenharmony_ci __vmcs_writel(field.encoding, val); 157262306a36Sopenharmony_ci } 157362306a36Sopenharmony_ci } 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci vmcs_clear(shadow_vmcs); 157662306a36Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 157762306a36Sopenharmony_ci} 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_cistatic void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields) 158062306a36Sopenharmony_ci{ 158162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; 158262306a36Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; 158362306a36Sopenharmony_ci struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu); 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ 158662306a36Sopenharmony_ci vmcs12->tpr_threshold = evmcs->tpr_threshold; 158762306a36Sopenharmony_ci vmcs12->guest_rip = evmcs->guest_rip; 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 159062306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL))) { 159162306a36Sopenharmony_ci hv_vcpu->nested.pa_page_gpa = evmcs->partition_assist_page; 159262306a36Sopenharmony_ci hv_vcpu->nested.vm_id = evmcs->hv_vm_id; 159362306a36Sopenharmony_ci hv_vcpu->nested.vp_id = evmcs->hv_vp_id; 159462306a36Sopenharmony_ci } 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 159762306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) { 159862306a36Sopenharmony_ci vmcs12->guest_rsp = evmcs->guest_rsp; 159962306a36Sopenharmony_ci vmcs12->guest_rflags = evmcs->guest_rflags; 160062306a36Sopenharmony_ci vmcs12->guest_interruptibility_info = 160162306a36Sopenharmony_ci evmcs->guest_interruptibility_info; 160262306a36Sopenharmony_ci /* 160362306a36Sopenharmony_ci * Not present in struct vmcs12: 160462306a36Sopenharmony_ci * vmcs12->guest_ssp = evmcs->guest_ssp; 160562306a36Sopenharmony_ci */ 160662306a36Sopenharmony_ci } 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 160962306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) { 161062306a36Sopenharmony_ci vmcs12->cpu_based_vm_exec_control = 161162306a36Sopenharmony_ci evmcs->cpu_based_vm_exec_control; 161262306a36Sopenharmony_ci } 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 161562306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) { 161662306a36Sopenharmony_ci vmcs12->exception_bitmap = evmcs->exception_bitmap; 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 162062306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) { 162162306a36Sopenharmony_ci vmcs12->vm_entry_controls = evmcs->vm_entry_controls; 162262306a36Sopenharmony_ci } 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 162562306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) { 162662306a36Sopenharmony_ci vmcs12->vm_entry_intr_info_field = 162762306a36Sopenharmony_ci evmcs->vm_entry_intr_info_field; 162862306a36Sopenharmony_ci vmcs12->vm_entry_exception_error_code = 162962306a36Sopenharmony_ci evmcs->vm_entry_exception_error_code; 163062306a36Sopenharmony_ci vmcs12->vm_entry_instruction_len = 163162306a36Sopenharmony_ci evmcs->vm_entry_instruction_len; 163262306a36Sopenharmony_ci } 163362306a36Sopenharmony_ci 163462306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 163562306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) { 163662306a36Sopenharmony_ci vmcs12->host_ia32_pat = evmcs->host_ia32_pat; 163762306a36Sopenharmony_ci vmcs12->host_ia32_efer = evmcs->host_ia32_efer; 163862306a36Sopenharmony_ci vmcs12->host_cr0 = evmcs->host_cr0; 163962306a36Sopenharmony_ci vmcs12->host_cr3 = evmcs->host_cr3; 164062306a36Sopenharmony_ci vmcs12->host_cr4 = evmcs->host_cr4; 164162306a36Sopenharmony_ci vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp; 164262306a36Sopenharmony_ci vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip; 164362306a36Sopenharmony_ci vmcs12->host_rip = evmcs->host_rip; 164462306a36Sopenharmony_ci vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs; 164562306a36Sopenharmony_ci vmcs12->host_es_selector = evmcs->host_es_selector; 164662306a36Sopenharmony_ci vmcs12->host_cs_selector = evmcs->host_cs_selector; 164762306a36Sopenharmony_ci vmcs12->host_ss_selector = evmcs->host_ss_selector; 164862306a36Sopenharmony_ci vmcs12->host_ds_selector = evmcs->host_ds_selector; 164962306a36Sopenharmony_ci vmcs12->host_fs_selector = evmcs->host_fs_selector; 165062306a36Sopenharmony_ci vmcs12->host_gs_selector = evmcs->host_gs_selector; 165162306a36Sopenharmony_ci vmcs12->host_tr_selector = evmcs->host_tr_selector; 165262306a36Sopenharmony_ci vmcs12->host_ia32_perf_global_ctrl = evmcs->host_ia32_perf_global_ctrl; 165362306a36Sopenharmony_ci /* 165462306a36Sopenharmony_ci * Not present in struct vmcs12: 165562306a36Sopenharmony_ci * vmcs12->host_ia32_s_cet = evmcs->host_ia32_s_cet; 165662306a36Sopenharmony_ci * vmcs12->host_ssp = evmcs->host_ssp; 165762306a36Sopenharmony_ci * vmcs12->host_ia32_int_ssp_table_addr = evmcs->host_ia32_int_ssp_table_addr; 165862306a36Sopenharmony_ci */ 165962306a36Sopenharmony_ci } 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 166262306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) { 166362306a36Sopenharmony_ci vmcs12->pin_based_vm_exec_control = 166462306a36Sopenharmony_ci evmcs->pin_based_vm_exec_control; 166562306a36Sopenharmony_ci vmcs12->vm_exit_controls = evmcs->vm_exit_controls; 166662306a36Sopenharmony_ci vmcs12->secondary_vm_exec_control = 166762306a36Sopenharmony_ci evmcs->secondary_vm_exec_control; 166862306a36Sopenharmony_ci } 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 167162306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) { 167262306a36Sopenharmony_ci vmcs12->io_bitmap_a = evmcs->io_bitmap_a; 167362306a36Sopenharmony_ci vmcs12->io_bitmap_b = evmcs->io_bitmap_b; 167462306a36Sopenharmony_ci } 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 167762306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) { 167862306a36Sopenharmony_ci vmcs12->msr_bitmap = evmcs->msr_bitmap; 167962306a36Sopenharmony_ci } 168062306a36Sopenharmony_ci 168162306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 168262306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) { 168362306a36Sopenharmony_ci vmcs12->guest_es_base = evmcs->guest_es_base; 168462306a36Sopenharmony_ci vmcs12->guest_cs_base = evmcs->guest_cs_base; 168562306a36Sopenharmony_ci vmcs12->guest_ss_base = evmcs->guest_ss_base; 168662306a36Sopenharmony_ci vmcs12->guest_ds_base = evmcs->guest_ds_base; 168762306a36Sopenharmony_ci vmcs12->guest_fs_base = evmcs->guest_fs_base; 168862306a36Sopenharmony_ci vmcs12->guest_gs_base = evmcs->guest_gs_base; 168962306a36Sopenharmony_ci vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base; 169062306a36Sopenharmony_ci vmcs12->guest_tr_base = evmcs->guest_tr_base; 169162306a36Sopenharmony_ci vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base; 169262306a36Sopenharmony_ci vmcs12->guest_idtr_base = evmcs->guest_idtr_base; 169362306a36Sopenharmony_ci vmcs12->guest_es_limit = evmcs->guest_es_limit; 169462306a36Sopenharmony_ci vmcs12->guest_cs_limit = evmcs->guest_cs_limit; 169562306a36Sopenharmony_ci vmcs12->guest_ss_limit = evmcs->guest_ss_limit; 169662306a36Sopenharmony_ci vmcs12->guest_ds_limit = evmcs->guest_ds_limit; 169762306a36Sopenharmony_ci vmcs12->guest_fs_limit = evmcs->guest_fs_limit; 169862306a36Sopenharmony_ci vmcs12->guest_gs_limit = evmcs->guest_gs_limit; 169962306a36Sopenharmony_ci vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit; 170062306a36Sopenharmony_ci vmcs12->guest_tr_limit = evmcs->guest_tr_limit; 170162306a36Sopenharmony_ci vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit; 170262306a36Sopenharmony_ci vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit; 170362306a36Sopenharmony_ci vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes; 170462306a36Sopenharmony_ci vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes; 170562306a36Sopenharmony_ci vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes; 170662306a36Sopenharmony_ci vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes; 170762306a36Sopenharmony_ci vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes; 170862306a36Sopenharmony_ci vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes; 170962306a36Sopenharmony_ci vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes; 171062306a36Sopenharmony_ci vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes; 171162306a36Sopenharmony_ci vmcs12->guest_es_selector = evmcs->guest_es_selector; 171262306a36Sopenharmony_ci vmcs12->guest_cs_selector = evmcs->guest_cs_selector; 171362306a36Sopenharmony_ci vmcs12->guest_ss_selector = evmcs->guest_ss_selector; 171462306a36Sopenharmony_ci vmcs12->guest_ds_selector = evmcs->guest_ds_selector; 171562306a36Sopenharmony_ci vmcs12->guest_fs_selector = evmcs->guest_fs_selector; 171662306a36Sopenharmony_ci vmcs12->guest_gs_selector = evmcs->guest_gs_selector; 171762306a36Sopenharmony_ci vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector; 171862306a36Sopenharmony_ci vmcs12->guest_tr_selector = evmcs->guest_tr_selector; 171962306a36Sopenharmony_ci } 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 172262306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) { 172362306a36Sopenharmony_ci vmcs12->tsc_offset = evmcs->tsc_offset; 172462306a36Sopenharmony_ci vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr; 172562306a36Sopenharmony_ci vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap; 172662306a36Sopenharmony_ci vmcs12->encls_exiting_bitmap = evmcs->encls_exiting_bitmap; 172762306a36Sopenharmony_ci vmcs12->tsc_multiplier = evmcs->tsc_multiplier; 172862306a36Sopenharmony_ci } 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 173162306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) { 173262306a36Sopenharmony_ci vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask; 173362306a36Sopenharmony_ci vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask; 173462306a36Sopenharmony_ci vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow; 173562306a36Sopenharmony_ci vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow; 173662306a36Sopenharmony_ci vmcs12->guest_cr0 = evmcs->guest_cr0; 173762306a36Sopenharmony_ci vmcs12->guest_cr3 = evmcs->guest_cr3; 173862306a36Sopenharmony_ci vmcs12->guest_cr4 = evmcs->guest_cr4; 173962306a36Sopenharmony_ci vmcs12->guest_dr7 = evmcs->guest_dr7; 174062306a36Sopenharmony_ci } 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 174362306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) { 174462306a36Sopenharmony_ci vmcs12->host_fs_base = evmcs->host_fs_base; 174562306a36Sopenharmony_ci vmcs12->host_gs_base = evmcs->host_gs_base; 174662306a36Sopenharmony_ci vmcs12->host_tr_base = evmcs->host_tr_base; 174762306a36Sopenharmony_ci vmcs12->host_gdtr_base = evmcs->host_gdtr_base; 174862306a36Sopenharmony_ci vmcs12->host_idtr_base = evmcs->host_idtr_base; 174962306a36Sopenharmony_ci vmcs12->host_rsp = evmcs->host_rsp; 175062306a36Sopenharmony_ci } 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 175362306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) { 175462306a36Sopenharmony_ci vmcs12->ept_pointer = evmcs->ept_pointer; 175562306a36Sopenharmony_ci vmcs12->virtual_processor_id = evmcs->virtual_processor_id; 175662306a36Sopenharmony_ci } 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci if (unlikely(!(hv_clean_fields & 175962306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) { 176062306a36Sopenharmony_ci vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer; 176162306a36Sopenharmony_ci vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl; 176262306a36Sopenharmony_ci vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat; 176362306a36Sopenharmony_ci vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer; 176462306a36Sopenharmony_ci vmcs12->guest_pdptr0 = evmcs->guest_pdptr0; 176562306a36Sopenharmony_ci vmcs12->guest_pdptr1 = evmcs->guest_pdptr1; 176662306a36Sopenharmony_ci vmcs12->guest_pdptr2 = evmcs->guest_pdptr2; 176762306a36Sopenharmony_ci vmcs12->guest_pdptr3 = evmcs->guest_pdptr3; 176862306a36Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions = 176962306a36Sopenharmony_ci evmcs->guest_pending_dbg_exceptions; 177062306a36Sopenharmony_ci vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp; 177162306a36Sopenharmony_ci vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip; 177262306a36Sopenharmony_ci vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs; 177362306a36Sopenharmony_ci vmcs12->guest_activity_state = evmcs->guest_activity_state; 177462306a36Sopenharmony_ci vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs; 177562306a36Sopenharmony_ci vmcs12->guest_ia32_perf_global_ctrl = evmcs->guest_ia32_perf_global_ctrl; 177662306a36Sopenharmony_ci /* 177762306a36Sopenharmony_ci * Not present in struct vmcs12: 177862306a36Sopenharmony_ci * vmcs12->guest_ia32_s_cet = evmcs->guest_ia32_s_cet; 177962306a36Sopenharmony_ci * vmcs12->guest_ia32_lbr_ctl = evmcs->guest_ia32_lbr_ctl; 178062306a36Sopenharmony_ci * vmcs12->guest_ia32_int_ssp_table_addr = evmcs->guest_ia32_int_ssp_table_addr; 178162306a36Sopenharmony_ci */ 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci /* 178562306a36Sopenharmony_ci * Not used? 178662306a36Sopenharmony_ci * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr; 178762306a36Sopenharmony_ci * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr; 178862306a36Sopenharmony_ci * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr; 178962306a36Sopenharmony_ci * vmcs12->page_fault_error_code_mask = 179062306a36Sopenharmony_ci * evmcs->page_fault_error_code_mask; 179162306a36Sopenharmony_ci * vmcs12->page_fault_error_code_match = 179262306a36Sopenharmony_ci * evmcs->page_fault_error_code_match; 179362306a36Sopenharmony_ci * vmcs12->cr3_target_count = evmcs->cr3_target_count; 179462306a36Sopenharmony_ci * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count; 179562306a36Sopenharmony_ci * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count; 179662306a36Sopenharmony_ci * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count; 179762306a36Sopenharmony_ci */ 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_ci /* 180062306a36Sopenharmony_ci * Read only fields: 180162306a36Sopenharmony_ci * vmcs12->guest_physical_address = evmcs->guest_physical_address; 180262306a36Sopenharmony_ci * vmcs12->vm_instruction_error = evmcs->vm_instruction_error; 180362306a36Sopenharmony_ci * vmcs12->vm_exit_reason = evmcs->vm_exit_reason; 180462306a36Sopenharmony_ci * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info; 180562306a36Sopenharmony_ci * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code; 180662306a36Sopenharmony_ci * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field; 180762306a36Sopenharmony_ci * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code; 180862306a36Sopenharmony_ci * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len; 180962306a36Sopenharmony_ci * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info; 181062306a36Sopenharmony_ci * vmcs12->exit_qualification = evmcs->exit_qualification; 181162306a36Sopenharmony_ci * vmcs12->guest_linear_address = evmcs->guest_linear_address; 181262306a36Sopenharmony_ci * 181362306a36Sopenharmony_ci * Not present in struct vmcs12: 181462306a36Sopenharmony_ci * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx; 181562306a36Sopenharmony_ci * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi; 181662306a36Sopenharmony_ci * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi; 181762306a36Sopenharmony_ci * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip; 181862306a36Sopenharmony_ci */ 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci return; 182162306a36Sopenharmony_ci} 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_cistatic void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) 182462306a36Sopenharmony_ci{ 182562306a36Sopenharmony_ci struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; 182662306a36Sopenharmony_ci struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci /* 182962306a36Sopenharmony_ci * Should not be changed by KVM: 183062306a36Sopenharmony_ci * 183162306a36Sopenharmony_ci * evmcs->host_es_selector = vmcs12->host_es_selector; 183262306a36Sopenharmony_ci * evmcs->host_cs_selector = vmcs12->host_cs_selector; 183362306a36Sopenharmony_ci * evmcs->host_ss_selector = vmcs12->host_ss_selector; 183462306a36Sopenharmony_ci * evmcs->host_ds_selector = vmcs12->host_ds_selector; 183562306a36Sopenharmony_ci * evmcs->host_fs_selector = vmcs12->host_fs_selector; 183662306a36Sopenharmony_ci * evmcs->host_gs_selector = vmcs12->host_gs_selector; 183762306a36Sopenharmony_ci * evmcs->host_tr_selector = vmcs12->host_tr_selector; 183862306a36Sopenharmony_ci * evmcs->host_ia32_pat = vmcs12->host_ia32_pat; 183962306a36Sopenharmony_ci * evmcs->host_ia32_efer = vmcs12->host_ia32_efer; 184062306a36Sopenharmony_ci * evmcs->host_cr0 = vmcs12->host_cr0; 184162306a36Sopenharmony_ci * evmcs->host_cr3 = vmcs12->host_cr3; 184262306a36Sopenharmony_ci * evmcs->host_cr4 = vmcs12->host_cr4; 184362306a36Sopenharmony_ci * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp; 184462306a36Sopenharmony_ci * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip; 184562306a36Sopenharmony_ci * evmcs->host_rip = vmcs12->host_rip; 184662306a36Sopenharmony_ci * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs; 184762306a36Sopenharmony_ci * evmcs->host_fs_base = vmcs12->host_fs_base; 184862306a36Sopenharmony_ci * evmcs->host_gs_base = vmcs12->host_gs_base; 184962306a36Sopenharmony_ci * evmcs->host_tr_base = vmcs12->host_tr_base; 185062306a36Sopenharmony_ci * evmcs->host_gdtr_base = vmcs12->host_gdtr_base; 185162306a36Sopenharmony_ci * evmcs->host_idtr_base = vmcs12->host_idtr_base; 185262306a36Sopenharmony_ci * evmcs->host_rsp = vmcs12->host_rsp; 185362306a36Sopenharmony_ci * sync_vmcs02_to_vmcs12() doesn't read these: 185462306a36Sopenharmony_ci * evmcs->io_bitmap_a = vmcs12->io_bitmap_a; 185562306a36Sopenharmony_ci * evmcs->io_bitmap_b = vmcs12->io_bitmap_b; 185662306a36Sopenharmony_ci * evmcs->msr_bitmap = vmcs12->msr_bitmap; 185762306a36Sopenharmony_ci * evmcs->ept_pointer = vmcs12->ept_pointer; 185862306a36Sopenharmony_ci * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap; 185962306a36Sopenharmony_ci * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr; 186062306a36Sopenharmony_ci * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr; 186162306a36Sopenharmony_ci * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr; 186262306a36Sopenharmony_ci * evmcs->tpr_threshold = vmcs12->tpr_threshold; 186362306a36Sopenharmony_ci * evmcs->virtual_processor_id = vmcs12->virtual_processor_id; 186462306a36Sopenharmony_ci * evmcs->exception_bitmap = vmcs12->exception_bitmap; 186562306a36Sopenharmony_ci * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer; 186662306a36Sopenharmony_ci * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control; 186762306a36Sopenharmony_ci * evmcs->vm_exit_controls = vmcs12->vm_exit_controls; 186862306a36Sopenharmony_ci * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control; 186962306a36Sopenharmony_ci * evmcs->page_fault_error_code_mask = 187062306a36Sopenharmony_ci * vmcs12->page_fault_error_code_mask; 187162306a36Sopenharmony_ci * evmcs->page_fault_error_code_match = 187262306a36Sopenharmony_ci * vmcs12->page_fault_error_code_match; 187362306a36Sopenharmony_ci * evmcs->cr3_target_count = vmcs12->cr3_target_count; 187462306a36Sopenharmony_ci * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr; 187562306a36Sopenharmony_ci * evmcs->tsc_offset = vmcs12->tsc_offset; 187662306a36Sopenharmony_ci * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl; 187762306a36Sopenharmony_ci * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask; 187862306a36Sopenharmony_ci * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask; 187962306a36Sopenharmony_ci * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow; 188062306a36Sopenharmony_ci * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow; 188162306a36Sopenharmony_ci * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count; 188262306a36Sopenharmony_ci * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count; 188362306a36Sopenharmony_ci * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count; 188462306a36Sopenharmony_ci * evmcs->guest_ia32_perf_global_ctrl = vmcs12->guest_ia32_perf_global_ctrl; 188562306a36Sopenharmony_ci * evmcs->host_ia32_perf_global_ctrl = vmcs12->host_ia32_perf_global_ctrl; 188662306a36Sopenharmony_ci * evmcs->encls_exiting_bitmap = vmcs12->encls_exiting_bitmap; 188762306a36Sopenharmony_ci * evmcs->tsc_multiplier = vmcs12->tsc_multiplier; 188862306a36Sopenharmony_ci * 188962306a36Sopenharmony_ci * Not present in struct vmcs12: 189062306a36Sopenharmony_ci * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx; 189162306a36Sopenharmony_ci * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi; 189262306a36Sopenharmony_ci * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi; 189362306a36Sopenharmony_ci * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip; 189462306a36Sopenharmony_ci * evmcs->host_ia32_s_cet = vmcs12->host_ia32_s_cet; 189562306a36Sopenharmony_ci * evmcs->host_ssp = vmcs12->host_ssp; 189662306a36Sopenharmony_ci * evmcs->host_ia32_int_ssp_table_addr = vmcs12->host_ia32_int_ssp_table_addr; 189762306a36Sopenharmony_ci * evmcs->guest_ia32_s_cet = vmcs12->guest_ia32_s_cet; 189862306a36Sopenharmony_ci * evmcs->guest_ia32_lbr_ctl = vmcs12->guest_ia32_lbr_ctl; 189962306a36Sopenharmony_ci * evmcs->guest_ia32_int_ssp_table_addr = vmcs12->guest_ia32_int_ssp_table_addr; 190062306a36Sopenharmony_ci * evmcs->guest_ssp = vmcs12->guest_ssp; 190162306a36Sopenharmony_ci */ 190262306a36Sopenharmony_ci 190362306a36Sopenharmony_ci evmcs->guest_es_selector = vmcs12->guest_es_selector; 190462306a36Sopenharmony_ci evmcs->guest_cs_selector = vmcs12->guest_cs_selector; 190562306a36Sopenharmony_ci evmcs->guest_ss_selector = vmcs12->guest_ss_selector; 190662306a36Sopenharmony_ci evmcs->guest_ds_selector = vmcs12->guest_ds_selector; 190762306a36Sopenharmony_ci evmcs->guest_fs_selector = vmcs12->guest_fs_selector; 190862306a36Sopenharmony_ci evmcs->guest_gs_selector = vmcs12->guest_gs_selector; 190962306a36Sopenharmony_ci evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector; 191062306a36Sopenharmony_ci evmcs->guest_tr_selector = vmcs12->guest_tr_selector; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci evmcs->guest_es_limit = vmcs12->guest_es_limit; 191362306a36Sopenharmony_ci evmcs->guest_cs_limit = vmcs12->guest_cs_limit; 191462306a36Sopenharmony_ci evmcs->guest_ss_limit = vmcs12->guest_ss_limit; 191562306a36Sopenharmony_ci evmcs->guest_ds_limit = vmcs12->guest_ds_limit; 191662306a36Sopenharmony_ci evmcs->guest_fs_limit = vmcs12->guest_fs_limit; 191762306a36Sopenharmony_ci evmcs->guest_gs_limit = vmcs12->guest_gs_limit; 191862306a36Sopenharmony_ci evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit; 191962306a36Sopenharmony_ci evmcs->guest_tr_limit = vmcs12->guest_tr_limit; 192062306a36Sopenharmony_ci evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit; 192162306a36Sopenharmony_ci evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit; 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_ci evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes; 192462306a36Sopenharmony_ci evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes; 192562306a36Sopenharmony_ci evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes; 192662306a36Sopenharmony_ci evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes; 192762306a36Sopenharmony_ci evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes; 192862306a36Sopenharmony_ci evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes; 192962306a36Sopenharmony_ci evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes; 193062306a36Sopenharmony_ci evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes; 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci evmcs->guest_es_base = vmcs12->guest_es_base; 193362306a36Sopenharmony_ci evmcs->guest_cs_base = vmcs12->guest_cs_base; 193462306a36Sopenharmony_ci evmcs->guest_ss_base = vmcs12->guest_ss_base; 193562306a36Sopenharmony_ci evmcs->guest_ds_base = vmcs12->guest_ds_base; 193662306a36Sopenharmony_ci evmcs->guest_fs_base = vmcs12->guest_fs_base; 193762306a36Sopenharmony_ci evmcs->guest_gs_base = vmcs12->guest_gs_base; 193862306a36Sopenharmony_ci evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base; 193962306a36Sopenharmony_ci evmcs->guest_tr_base = vmcs12->guest_tr_base; 194062306a36Sopenharmony_ci evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base; 194162306a36Sopenharmony_ci evmcs->guest_idtr_base = vmcs12->guest_idtr_base; 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ci evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat; 194462306a36Sopenharmony_ci evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer; 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci evmcs->guest_pdptr0 = vmcs12->guest_pdptr0; 194762306a36Sopenharmony_ci evmcs->guest_pdptr1 = vmcs12->guest_pdptr1; 194862306a36Sopenharmony_ci evmcs->guest_pdptr2 = vmcs12->guest_pdptr2; 194962306a36Sopenharmony_ci evmcs->guest_pdptr3 = vmcs12->guest_pdptr3; 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci evmcs->guest_pending_dbg_exceptions = 195262306a36Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions; 195362306a36Sopenharmony_ci evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp; 195462306a36Sopenharmony_ci evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip; 195562306a36Sopenharmony_ci 195662306a36Sopenharmony_ci evmcs->guest_activity_state = vmcs12->guest_activity_state; 195762306a36Sopenharmony_ci evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs; 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci evmcs->guest_cr0 = vmcs12->guest_cr0; 196062306a36Sopenharmony_ci evmcs->guest_cr3 = vmcs12->guest_cr3; 196162306a36Sopenharmony_ci evmcs->guest_cr4 = vmcs12->guest_cr4; 196262306a36Sopenharmony_ci evmcs->guest_dr7 = vmcs12->guest_dr7; 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ci evmcs->guest_physical_address = vmcs12->guest_physical_address; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci evmcs->vm_instruction_error = vmcs12->vm_instruction_error; 196762306a36Sopenharmony_ci evmcs->vm_exit_reason = vmcs12->vm_exit_reason; 196862306a36Sopenharmony_ci evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info; 196962306a36Sopenharmony_ci evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code; 197062306a36Sopenharmony_ci evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field; 197162306a36Sopenharmony_ci evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code; 197262306a36Sopenharmony_ci evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len; 197362306a36Sopenharmony_ci evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info; 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci evmcs->exit_qualification = vmcs12->exit_qualification; 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_ci evmcs->guest_linear_address = vmcs12->guest_linear_address; 197862306a36Sopenharmony_ci evmcs->guest_rsp = vmcs12->guest_rsp; 197962306a36Sopenharmony_ci evmcs->guest_rflags = vmcs12->guest_rflags; 198062306a36Sopenharmony_ci 198162306a36Sopenharmony_ci evmcs->guest_interruptibility_info = 198262306a36Sopenharmony_ci vmcs12->guest_interruptibility_info; 198362306a36Sopenharmony_ci evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control; 198462306a36Sopenharmony_ci evmcs->vm_entry_controls = vmcs12->vm_entry_controls; 198562306a36Sopenharmony_ci evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field; 198662306a36Sopenharmony_ci evmcs->vm_entry_exception_error_code = 198762306a36Sopenharmony_ci vmcs12->vm_entry_exception_error_code; 198862306a36Sopenharmony_ci evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len; 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci evmcs->guest_rip = vmcs12->guest_rip; 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs; 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci return; 199562306a36Sopenharmony_ci} 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci/* 199862306a36Sopenharmony_ci * This is an equivalent of the nested hypervisor executing the vmptrld 199962306a36Sopenharmony_ci * instruction. 200062306a36Sopenharmony_ci */ 200162306a36Sopenharmony_cistatic enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( 200262306a36Sopenharmony_ci struct kvm_vcpu *vcpu, bool from_launch) 200362306a36Sopenharmony_ci{ 200462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 200562306a36Sopenharmony_ci bool evmcs_gpa_changed = false; 200662306a36Sopenharmony_ci u64 evmcs_gpa; 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci if (likely(!guest_cpuid_has_evmcs(vcpu))) 200962306a36Sopenharmony_ci return EVMPTRLD_DISABLED; 201062306a36Sopenharmony_ci 201162306a36Sopenharmony_ci evmcs_gpa = nested_get_evmptr(vcpu); 201262306a36Sopenharmony_ci if (!evmptr_is_valid(evmcs_gpa)) { 201362306a36Sopenharmony_ci nested_release_evmcs(vcpu); 201462306a36Sopenharmony_ci return EVMPTRLD_DISABLED; 201562306a36Sopenharmony_ci } 201662306a36Sopenharmony_ci 201762306a36Sopenharmony_ci if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { 201862306a36Sopenharmony_ci vmx->nested.current_vmptr = INVALID_GPA; 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci nested_release_evmcs(vcpu); 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa), 202362306a36Sopenharmony_ci &vmx->nested.hv_evmcs_map)) 202462306a36Sopenharmony_ci return EVMPTRLD_ERROR; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva; 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci /* 202962306a36Sopenharmony_ci * Currently, KVM only supports eVMCS version 1 203062306a36Sopenharmony_ci * (== KVM_EVMCS_VERSION) and thus we expect guest to set this 203162306a36Sopenharmony_ci * value to first u32 field of eVMCS which should specify eVMCS 203262306a36Sopenharmony_ci * VersionNumber. 203362306a36Sopenharmony_ci * 203462306a36Sopenharmony_ci * Guest should be aware of supported eVMCS versions by host by 203562306a36Sopenharmony_ci * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is 203662306a36Sopenharmony_ci * expected to set this CPUID leaf according to the value 203762306a36Sopenharmony_ci * returned in vmcs_version from nested_enable_evmcs(). 203862306a36Sopenharmony_ci * 203962306a36Sopenharmony_ci * However, it turns out that Microsoft Hyper-V fails to comply 204062306a36Sopenharmony_ci * to their own invented interface: When Hyper-V use eVMCS, it 204162306a36Sopenharmony_ci * just sets first u32 field of eVMCS to revision_id specified 204262306a36Sopenharmony_ci * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number 204362306a36Sopenharmony_ci * which is one of the supported versions specified in 204462306a36Sopenharmony_ci * CPUID.0x4000000A.EAX[0:15]. 204562306a36Sopenharmony_ci * 204662306a36Sopenharmony_ci * To overcome Hyper-V bug, we accept here either a supported 204762306a36Sopenharmony_ci * eVMCS version or VMCS12 revision_id as valid values for first 204862306a36Sopenharmony_ci * u32 field of eVMCS. 204962306a36Sopenharmony_ci */ 205062306a36Sopenharmony_ci if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) && 205162306a36Sopenharmony_ci (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) { 205262306a36Sopenharmony_ci nested_release_evmcs(vcpu); 205362306a36Sopenharmony_ci return EVMPTRLD_VMFAIL; 205462306a36Sopenharmony_ci } 205562306a36Sopenharmony_ci 205662306a36Sopenharmony_ci vmx->nested.hv_evmcs_vmptr = evmcs_gpa; 205762306a36Sopenharmony_ci 205862306a36Sopenharmony_ci evmcs_gpa_changed = true; 205962306a36Sopenharmony_ci /* 206062306a36Sopenharmony_ci * Unlike normal vmcs12, enlightened vmcs12 is not fully 206162306a36Sopenharmony_ci * reloaded from guest's memory (read only fields, fields not 206262306a36Sopenharmony_ci * present in struct hv_enlightened_vmcs, ...). Make sure there 206362306a36Sopenharmony_ci * are no leftovers. 206462306a36Sopenharmony_ci */ 206562306a36Sopenharmony_ci if (from_launch) { 206662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 206762306a36Sopenharmony_ci memset(vmcs12, 0, sizeof(*vmcs12)); 206862306a36Sopenharmony_ci vmcs12->hdr.revision_id = VMCS12_REVISION; 206962306a36Sopenharmony_ci } 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci } 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci /* 207462306a36Sopenharmony_ci * Clean fields data can't be used on VMLAUNCH and when we switch 207562306a36Sopenharmony_ci * between different L2 guests as KVM keeps a single VMCS12 per L1. 207662306a36Sopenharmony_ci */ 207762306a36Sopenharmony_ci if (from_launch || evmcs_gpa_changed) { 207862306a36Sopenharmony_ci vmx->nested.hv_evmcs->hv_clean_fields &= 207962306a36Sopenharmony_ci ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci vmx->nested.force_msr_bitmap_recalc = true; 208262306a36Sopenharmony_ci } 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci return EVMPTRLD_SUCCEEDED; 208562306a36Sopenharmony_ci} 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_civoid nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu) 208862306a36Sopenharmony_ci{ 208962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 209262306a36Sopenharmony_ci copy_vmcs12_to_enlightened(vmx); 209362306a36Sopenharmony_ci else 209462306a36Sopenharmony_ci copy_vmcs12_to_shadow(vmx); 209562306a36Sopenharmony_ci 209662306a36Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = false; 209762306a36Sopenharmony_ci} 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_cistatic enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) 210062306a36Sopenharmony_ci{ 210162306a36Sopenharmony_ci struct vcpu_vmx *vmx = 210262306a36Sopenharmony_ci container_of(timer, struct vcpu_vmx, nested.preemption_timer); 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci vmx->nested.preemption_timer_expired = true; 210562306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); 210662306a36Sopenharmony_ci kvm_vcpu_kick(&vmx->vcpu); 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci return HRTIMER_NORESTART; 210962306a36Sopenharmony_ci} 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_cistatic u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu) 211262306a36Sopenharmony_ci{ 211362306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 211462306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_ci u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >> 211762306a36Sopenharmony_ci VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 211862306a36Sopenharmony_ci 211962306a36Sopenharmony_ci if (!vmx->nested.has_preemption_timer_deadline) { 212062306a36Sopenharmony_ci vmx->nested.preemption_timer_deadline = 212162306a36Sopenharmony_ci vmcs12->vmx_preemption_timer_value + l1_scaled_tsc; 212262306a36Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = true; 212362306a36Sopenharmony_ci } 212462306a36Sopenharmony_ci return vmx->nested.preemption_timer_deadline - l1_scaled_tsc; 212562306a36Sopenharmony_ci} 212662306a36Sopenharmony_ci 212762306a36Sopenharmony_cistatic void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, 212862306a36Sopenharmony_ci u64 preemption_timeout) 212962306a36Sopenharmony_ci{ 213062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci /* 213362306a36Sopenharmony_ci * A timer value of zero is architecturally guaranteed to cause 213462306a36Sopenharmony_ci * a VMExit prior to executing any instructions in the guest. 213562306a36Sopenharmony_ci */ 213662306a36Sopenharmony_ci if (preemption_timeout == 0) { 213762306a36Sopenharmony_ci vmx_preemption_timer_fn(&vmx->nested.preemption_timer); 213862306a36Sopenharmony_ci return; 213962306a36Sopenharmony_ci } 214062306a36Sopenharmony_ci 214162306a36Sopenharmony_ci if (vcpu->arch.virtual_tsc_khz == 0) 214262306a36Sopenharmony_ci return; 214362306a36Sopenharmony_ci 214462306a36Sopenharmony_ci preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 214562306a36Sopenharmony_ci preemption_timeout *= 1000000; 214662306a36Sopenharmony_ci do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); 214762306a36Sopenharmony_ci hrtimer_start(&vmx->nested.preemption_timer, 214862306a36Sopenharmony_ci ktime_add_ns(ktime_get(), preemption_timeout), 214962306a36Sopenharmony_ci HRTIMER_MODE_ABS_PINNED); 215062306a36Sopenharmony_ci} 215162306a36Sopenharmony_ci 215262306a36Sopenharmony_cistatic u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) 215362306a36Sopenharmony_ci{ 215462306a36Sopenharmony_ci if (vmx->nested.nested_run_pending && 215562306a36Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) 215662306a36Sopenharmony_ci return vmcs12->guest_ia32_efer; 215762306a36Sopenharmony_ci else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) 215862306a36Sopenharmony_ci return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME); 215962306a36Sopenharmony_ci else 216062306a36Sopenharmony_ci return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME); 216162306a36Sopenharmony_ci} 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_cistatic void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) 216462306a36Sopenharmony_ci{ 216562306a36Sopenharmony_ci struct kvm *kvm = vmx->vcpu.kvm; 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci /* 216862306a36Sopenharmony_ci * If vmcs02 hasn't been initialized, set the constant vmcs02 state 216962306a36Sopenharmony_ci * according to L0's settings (vmcs12 is irrelevant here). Host 217062306a36Sopenharmony_ci * fields that come from L0 and are not constant, e.g. HOST_CR3, 217162306a36Sopenharmony_ci * will be set as needed prior to VMLAUNCH/VMRESUME. 217262306a36Sopenharmony_ci */ 217362306a36Sopenharmony_ci if (vmx->nested.vmcs02_initialized) 217462306a36Sopenharmony_ci return; 217562306a36Sopenharmony_ci vmx->nested.vmcs02_initialized = true; 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci /* 217862306a36Sopenharmony_ci * We don't care what the EPTP value is we just need to guarantee 217962306a36Sopenharmony_ci * it's valid so we don't get a false positive when doing early 218062306a36Sopenharmony_ci * consistency checks. 218162306a36Sopenharmony_ci */ 218262306a36Sopenharmony_ci if (enable_ept && nested_early_check) 218362306a36Sopenharmony_ci vmcs_write64(EPT_POINTER, 218462306a36Sopenharmony_ci construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL)); 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci /* All VMFUNCs are currently emulated through L0 vmexits. */ 218762306a36Sopenharmony_ci if (cpu_has_vmx_vmfunc()) 218862306a36Sopenharmony_ci vmcs_write64(VM_FUNCTION_CONTROL, 0); 218962306a36Sopenharmony_ci 219062306a36Sopenharmony_ci if (cpu_has_vmx_posted_intr()) 219162306a36Sopenharmony_ci vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); 219262306a36Sopenharmony_ci 219362306a36Sopenharmony_ci if (cpu_has_vmx_msr_bitmap()) 219462306a36Sopenharmony_ci vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci /* 219762306a36Sopenharmony_ci * PML is emulated for L2, but never enabled in hardware as the MMU 219862306a36Sopenharmony_ci * handles A/D emulation. Disabling PML for L2 also avoids having to 219962306a36Sopenharmony_ci * deal with filtering out L2 GPAs from the buffer. 220062306a36Sopenharmony_ci */ 220162306a36Sopenharmony_ci if (enable_pml) { 220262306a36Sopenharmony_ci vmcs_write64(PML_ADDRESS, 0); 220362306a36Sopenharmony_ci vmcs_write16(GUEST_PML_INDEX, -1); 220462306a36Sopenharmony_ci } 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci if (cpu_has_vmx_encls_vmexit()) 220762306a36Sopenharmony_ci vmcs_write64(ENCLS_EXITING_BITMAP, INVALID_GPA); 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci if (kvm_notify_vmexit_enabled(kvm)) 221062306a36Sopenharmony_ci vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window); 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci /* 221362306a36Sopenharmony_ci * Set the MSR load/store lists to match L0's settings. Only the 221462306a36Sopenharmony_ci * addresses are constant (for vmcs02), the counts can change based 221562306a36Sopenharmony_ci * on L2's behavior, e.g. switching to/from long mode. 221662306a36Sopenharmony_ci */ 221762306a36Sopenharmony_ci vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val)); 221862306a36Sopenharmony_ci vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); 221962306a36Sopenharmony_ci vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci vmx_set_constant_host_state(vmx); 222262306a36Sopenharmony_ci} 222362306a36Sopenharmony_ci 222462306a36Sopenharmony_cistatic void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, 222562306a36Sopenharmony_ci struct vmcs12 *vmcs12) 222662306a36Sopenharmony_ci{ 222762306a36Sopenharmony_ci prepare_vmcs02_constant_state(vmx); 222862306a36Sopenharmony_ci 222962306a36Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); 223062306a36Sopenharmony_ci 223162306a36Sopenharmony_ci if (enable_vpid) { 223262306a36Sopenharmony_ci if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) 223362306a36Sopenharmony_ci vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); 223462306a36Sopenharmony_ci else 223562306a36Sopenharmony_ci vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 223662306a36Sopenharmony_ci } 223762306a36Sopenharmony_ci} 223862306a36Sopenharmony_ci 223962306a36Sopenharmony_cistatic void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01, 224062306a36Sopenharmony_ci struct vmcs12 *vmcs12) 224162306a36Sopenharmony_ci{ 224262306a36Sopenharmony_ci u32 exec_control; 224362306a36Sopenharmony_ci u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 224662306a36Sopenharmony_ci prepare_vmcs02_early_rare(vmx, vmcs12); 224762306a36Sopenharmony_ci 224862306a36Sopenharmony_ci /* 224962306a36Sopenharmony_ci * PIN CONTROLS 225062306a36Sopenharmony_ci */ 225162306a36Sopenharmony_ci exec_control = __pin_controls_get(vmcs01); 225262306a36Sopenharmony_ci exec_control |= (vmcs12->pin_based_vm_exec_control & 225362306a36Sopenharmony_ci ~PIN_BASED_VMX_PREEMPTION_TIMER); 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci /* Posted interrupts setting is only taken from vmcs12. */ 225662306a36Sopenharmony_ci vmx->nested.pi_pending = false; 225762306a36Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12)) 225862306a36Sopenharmony_ci vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; 225962306a36Sopenharmony_ci else 226062306a36Sopenharmony_ci exec_control &= ~PIN_BASED_POSTED_INTR; 226162306a36Sopenharmony_ci pin_controls_set(vmx, exec_control); 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci /* 226462306a36Sopenharmony_ci * EXEC CONTROLS 226562306a36Sopenharmony_ci */ 226662306a36Sopenharmony_ci exec_control = __exec_controls_get(vmcs01); /* L0's desires */ 226762306a36Sopenharmony_ci exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; 226862306a36Sopenharmony_ci exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; 226962306a36Sopenharmony_ci exec_control &= ~CPU_BASED_TPR_SHADOW; 227062306a36Sopenharmony_ci exec_control |= vmcs12->cpu_based_vm_exec_control; 227162306a36Sopenharmony_ci 227262306a36Sopenharmony_ci vmx->nested.l1_tpr_threshold = -1; 227362306a36Sopenharmony_ci if (exec_control & CPU_BASED_TPR_SHADOW) 227462306a36Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); 227562306a36Sopenharmony_ci#ifdef CONFIG_X86_64 227662306a36Sopenharmony_ci else 227762306a36Sopenharmony_ci exec_control |= CPU_BASED_CR8_LOAD_EXITING | 227862306a36Sopenharmony_ci CPU_BASED_CR8_STORE_EXITING; 227962306a36Sopenharmony_ci#endif 228062306a36Sopenharmony_ci 228162306a36Sopenharmony_ci /* 228262306a36Sopenharmony_ci * A vmexit (to either L1 hypervisor or L0 userspace) is always needed 228362306a36Sopenharmony_ci * for I/O port accesses. 228462306a36Sopenharmony_ci */ 228562306a36Sopenharmony_ci exec_control |= CPU_BASED_UNCOND_IO_EXITING; 228662306a36Sopenharmony_ci exec_control &= ~CPU_BASED_USE_IO_BITMAPS; 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci /* 228962306a36Sopenharmony_ci * This bit will be computed in nested_get_vmcs12_pages, because 229062306a36Sopenharmony_ci * we do not have access to L1's MSR bitmap yet. For now, keep 229162306a36Sopenharmony_ci * the same bit as before, hoping to avoid multiple VMWRITEs that 229262306a36Sopenharmony_ci * only set/clear this bit. 229362306a36Sopenharmony_ci */ 229462306a36Sopenharmony_ci exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; 229562306a36Sopenharmony_ci exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS; 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci exec_controls_set(vmx, exec_control); 229862306a36Sopenharmony_ci 229962306a36Sopenharmony_ci /* 230062306a36Sopenharmony_ci * SECONDARY EXEC CONTROLS 230162306a36Sopenharmony_ci */ 230262306a36Sopenharmony_ci if (cpu_has_secondary_exec_ctrls()) { 230362306a36Sopenharmony_ci exec_control = __secondary_exec_controls_get(vmcs01); 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci /* Take the following fields only from vmcs12 */ 230662306a36Sopenharmony_ci exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 230762306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 230862306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_INVPCID | 230962306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_RDTSCP | 231062306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_XSAVES | 231162306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | 231262306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 231362306a36Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 231462306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_VMFUNC | 231562306a36Sopenharmony_ci SECONDARY_EXEC_DESC); 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, 231862306a36Sopenharmony_ci CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 231962306a36Sopenharmony_ci exec_control |= vmcs12->secondary_vm_exec_control; 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci /* PML is emulated and never enabled in hardware for L2. */ 232262306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_ENABLE_PML; 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci /* VMCS shadowing for L2 is emulated for now */ 232562306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci /* 232862306a36Sopenharmony_ci * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4() 232962306a36Sopenharmony_ci * will not have to rewrite the controls just for this bit. 233062306a36Sopenharmony_ci */ 233162306a36Sopenharmony_ci if (vmx_umip_emulated() && (vmcs12->guest_cr4 & X86_CR4_UMIP)) 233262306a36Sopenharmony_ci exec_control |= SECONDARY_EXEC_DESC; 233362306a36Sopenharmony_ci 233462306a36Sopenharmony_ci if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) 233562306a36Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, 233662306a36Sopenharmony_ci vmcs12->guest_intr_status); 233762306a36Sopenharmony_ci 233862306a36Sopenharmony_ci if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) 233962306a36Sopenharmony_ci exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 234062306a36Sopenharmony_ci 234162306a36Sopenharmony_ci if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) 234262306a36Sopenharmony_ci vmx_write_encls_bitmap(&vmx->vcpu, vmcs12); 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci secondary_exec_controls_set(vmx, exec_control); 234562306a36Sopenharmony_ci } 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci /* 234862306a36Sopenharmony_ci * ENTRY CONTROLS 234962306a36Sopenharmony_ci * 235062306a36Sopenharmony_ci * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE 235162306a36Sopenharmony_ci * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate 235262306a36Sopenharmony_ci * on the related bits (if supported by the CPU) in the hope that 235362306a36Sopenharmony_ci * we can avoid VMWrites during vmx_set_efer(). 235462306a36Sopenharmony_ci * 235562306a36Sopenharmony_ci * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is 235662306a36Sopenharmony_ci * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to 235762306a36Sopenharmony_ci * do the same for L2. 235862306a36Sopenharmony_ci */ 235962306a36Sopenharmony_ci exec_control = __vm_entry_controls_get(vmcs01); 236062306a36Sopenharmony_ci exec_control |= (vmcs12->vm_entry_controls & 236162306a36Sopenharmony_ci ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); 236262306a36Sopenharmony_ci exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); 236362306a36Sopenharmony_ci if (cpu_has_load_ia32_efer()) { 236462306a36Sopenharmony_ci if (guest_efer & EFER_LMA) 236562306a36Sopenharmony_ci exec_control |= VM_ENTRY_IA32E_MODE; 236662306a36Sopenharmony_ci if (guest_efer != host_efer) 236762306a36Sopenharmony_ci exec_control |= VM_ENTRY_LOAD_IA32_EFER; 236862306a36Sopenharmony_ci } 236962306a36Sopenharmony_ci vm_entry_controls_set(vmx, exec_control); 237062306a36Sopenharmony_ci 237162306a36Sopenharmony_ci /* 237262306a36Sopenharmony_ci * EXIT CONTROLS 237362306a36Sopenharmony_ci * 237462306a36Sopenharmony_ci * L2->L1 exit controls are emulated - the hardware exit is to L0 so 237562306a36Sopenharmony_ci * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 237662306a36Sopenharmony_ci * bits may be modified by vmx_set_efer() in prepare_vmcs02(). 237762306a36Sopenharmony_ci */ 237862306a36Sopenharmony_ci exec_control = __vm_exit_controls_get(vmcs01); 237962306a36Sopenharmony_ci if (cpu_has_load_ia32_efer() && guest_efer != host_efer) 238062306a36Sopenharmony_ci exec_control |= VM_EXIT_LOAD_IA32_EFER; 238162306a36Sopenharmony_ci else 238262306a36Sopenharmony_ci exec_control &= ~VM_EXIT_LOAD_IA32_EFER; 238362306a36Sopenharmony_ci vm_exit_controls_set(vmx, exec_control); 238462306a36Sopenharmony_ci 238562306a36Sopenharmony_ci /* 238662306a36Sopenharmony_ci * Interrupt/Exception Fields 238762306a36Sopenharmony_ci */ 238862306a36Sopenharmony_ci if (vmx->nested.nested_run_pending) { 238962306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 239062306a36Sopenharmony_ci vmcs12->vm_entry_intr_info_field); 239162306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 239262306a36Sopenharmony_ci vmcs12->vm_entry_exception_error_code); 239362306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 239462306a36Sopenharmony_ci vmcs12->vm_entry_instruction_len); 239562306a36Sopenharmony_ci vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 239662306a36Sopenharmony_ci vmcs12->guest_interruptibility_info); 239762306a36Sopenharmony_ci vmx->loaded_vmcs->nmi_known_unmasked = 239862306a36Sopenharmony_ci !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); 239962306a36Sopenharmony_ci } else { 240062306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); 240162306a36Sopenharmony_ci } 240262306a36Sopenharmony_ci} 240362306a36Sopenharmony_ci 240462306a36Sopenharmony_cistatic void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) 240562306a36Sopenharmony_ci{ 240662306a36Sopenharmony_ci struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & 240962306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { 241062306a36Sopenharmony_ci vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 241162306a36Sopenharmony_ci vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 241262306a36Sopenharmony_ci vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); 241362306a36Sopenharmony_ci vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); 241462306a36Sopenharmony_ci vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); 241562306a36Sopenharmony_ci vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector); 241662306a36Sopenharmony_ci vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); 241762306a36Sopenharmony_ci vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); 241862306a36Sopenharmony_ci vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); 241962306a36Sopenharmony_ci vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); 242062306a36Sopenharmony_ci vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); 242162306a36Sopenharmony_ci vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); 242262306a36Sopenharmony_ci vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); 242362306a36Sopenharmony_ci vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit); 242462306a36Sopenharmony_ci vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit); 242562306a36Sopenharmony_ci vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); 242662306a36Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); 242762306a36Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); 242862306a36Sopenharmony_ci vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); 242962306a36Sopenharmony_ci vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); 243062306a36Sopenharmony_ci vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); 243162306a36Sopenharmony_ci vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); 243262306a36Sopenharmony_ci vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); 243362306a36Sopenharmony_ci vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); 243462306a36Sopenharmony_ci vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); 243562306a36Sopenharmony_ci vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); 243662306a36Sopenharmony_ci vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); 243762306a36Sopenharmony_ci vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); 243862306a36Sopenharmony_ci vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); 243962306a36Sopenharmony_ci vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); 244062306a36Sopenharmony_ci vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); 244162306a36Sopenharmony_ci vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base); 244262306a36Sopenharmony_ci vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base); 244362306a36Sopenharmony_ci vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base); 244462306a36Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); 244562306a36Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci vmx->segment_cache.bitmask = 0; 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & 245162306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) { 245262306a36Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); 245362306a36Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 245462306a36Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions); 245562306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); 245662306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); 245762306a36Sopenharmony_ci 245862306a36Sopenharmony_ci /* 245962306a36Sopenharmony_ci * L1 may access the L2's PDPTR, so save them to construct 246062306a36Sopenharmony_ci * vmcs12 246162306a36Sopenharmony_ci */ 246262306a36Sopenharmony_ci if (enable_ept) { 246362306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); 246462306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); 246562306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); 246662306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); 246762306a36Sopenharmony_ci } 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci if (kvm_mpx_supported() && vmx->nested.nested_run_pending && 247062306a36Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) 247162306a36Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); 247262306a36Sopenharmony_ci } 247362306a36Sopenharmony_ci 247462306a36Sopenharmony_ci if (nested_cpu_has_xsaves(vmcs12)) 247562306a36Sopenharmony_ci vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); 247662306a36Sopenharmony_ci 247762306a36Sopenharmony_ci /* 247862306a36Sopenharmony_ci * Whether page-faults are trapped is determined by a combination of 247962306a36Sopenharmony_ci * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. If L0 248062306a36Sopenharmony_ci * doesn't care about page faults then we should set all of these to 248162306a36Sopenharmony_ci * L1's desires. However, if L0 does care about (some) page faults, it 248262306a36Sopenharmony_ci * is not easy (if at all possible?) to merge L0 and L1's desires, we 248362306a36Sopenharmony_ci * simply ask to exit on each and every L2 page fault. This is done by 248462306a36Sopenharmony_ci * setting MASK=MATCH=0 and (see below) EB.PF=1. 248562306a36Sopenharmony_ci * Note that below we don't need special code to set EB.PF beyond the 248662306a36Sopenharmony_ci * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, 248762306a36Sopenharmony_ci * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when 248862306a36Sopenharmony_ci * !enable_ept, EB.PF is 1, so the "or" will always be 1. 248962306a36Sopenharmony_ci */ 249062306a36Sopenharmony_ci if (vmx_need_pf_intercept(&vmx->vcpu)) { 249162306a36Sopenharmony_ci /* 249262306a36Sopenharmony_ci * TODO: if both L0 and L1 need the same MASK and MATCH, 249362306a36Sopenharmony_ci * go ahead and use it? 249462306a36Sopenharmony_ci */ 249562306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 249662306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); 249762306a36Sopenharmony_ci } else { 249862306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask); 249962306a36Sopenharmony_ci vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match); 250062306a36Sopenharmony_ci } 250162306a36Sopenharmony_ci 250262306a36Sopenharmony_ci if (cpu_has_vmx_apicv()) { 250362306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); 250462306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1); 250562306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2); 250662306a36Sopenharmony_ci vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); 250762306a36Sopenharmony_ci } 250862306a36Sopenharmony_ci 250962306a36Sopenharmony_ci /* 251062306a36Sopenharmony_ci * Make sure the msr_autostore list is up to date before we set the 251162306a36Sopenharmony_ci * count in the vmcs02. 251262306a36Sopenharmony_ci */ 251362306a36Sopenharmony_ci prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC); 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr); 251662306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); 251762306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci set_cr4_guest_host_mask(vmx); 252062306a36Sopenharmony_ci} 252162306a36Sopenharmony_ci 252262306a36Sopenharmony_ci/* 252362306a36Sopenharmony_ci * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 252462306a36Sopenharmony_ci * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 252562306a36Sopenharmony_ci * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 252662306a36Sopenharmony_ci * guest in a way that will both be appropriate to L1's requests, and our 252762306a36Sopenharmony_ci * needs. In addition to modifying the active vmcs (which is vmcs02), this 252862306a36Sopenharmony_ci * function also has additional necessary side-effects, like setting various 252962306a36Sopenharmony_ci * vcpu->arch fields. 253062306a36Sopenharmony_ci * Returns 0 on success, 1 on failure. Invalid state exit qualification code 253162306a36Sopenharmony_ci * is assigned to entry_failure_code on failure. 253262306a36Sopenharmony_ci */ 253362306a36Sopenharmony_cistatic int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, 253462306a36Sopenharmony_ci bool from_vmentry, 253562306a36Sopenharmony_ci enum vm_entry_failure_code *entry_failure_code) 253662306a36Sopenharmony_ci{ 253762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 253862306a36Sopenharmony_ci bool load_guest_pdptrs_vmcs12 = false; 253962306a36Sopenharmony_ci 254062306a36Sopenharmony_ci if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { 254162306a36Sopenharmony_ci prepare_vmcs02_rare(vmx, vmcs12); 254262306a36Sopenharmony_ci vmx->nested.dirty_vmcs12 = false; 254362306a36Sopenharmony_ci 254462306a36Sopenharmony_ci load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) || 254562306a36Sopenharmony_ci !(vmx->nested.hv_evmcs->hv_clean_fields & 254662306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1); 254762306a36Sopenharmony_ci } 254862306a36Sopenharmony_ci 254962306a36Sopenharmony_ci if (vmx->nested.nested_run_pending && 255062306a36Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { 255162306a36Sopenharmony_ci kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); 255262306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); 255362306a36Sopenharmony_ci } else { 255462306a36Sopenharmony_ci kvm_set_dr(vcpu, 7, vcpu->arch.dr7); 255562306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); 255662306a36Sopenharmony_ci } 255762306a36Sopenharmony_ci if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || 255862306a36Sopenharmony_ci !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) 255962306a36Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs); 256062306a36Sopenharmony_ci vmx_set_rflags(vcpu, vmcs12->guest_rflags); 256162306a36Sopenharmony_ci 256262306a36Sopenharmony_ci /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the 256362306a36Sopenharmony_ci * bitwise-or of what L1 wants to trap for L2, and what we want to 256462306a36Sopenharmony_ci * trap. Note that CR0.TS also needs updating - we do this later. 256562306a36Sopenharmony_ci */ 256662306a36Sopenharmony_ci vmx_update_exception_bitmap(vcpu); 256762306a36Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; 256862306a36Sopenharmony_ci vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); 256962306a36Sopenharmony_ci 257062306a36Sopenharmony_ci if (vmx->nested.nested_run_pending && 257162306a36Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { 257262306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); 257362306a36Sopenharmony_ci vcpu->arch.pat = vmcs12->guest_ia32_pat; 257462306a36Sopenharmony_ci } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 257562306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 257662306a36Sopenharmony_ci } 257762306a36Sopenharmony_ci 257862306a36Sopenharmony_ci vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset( 257962306a36Sopenharmony_ci vcpu->arch.l1_tsc_offset, 258062306a36Sopenharmony_ci vmx_get_l2_tsc_offset(vcpu), 258162306a36Sopenharmony_ci vmx_get_l2_tsc_multiplier(vcpu)); 258262306a36Sopenharmony_ci 258362306a36Sopenharmony_ci vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier( 258462306a36Sopenharmony_ci vcpu->arch.l1_tsc_scaling_ratio, 258562306a36Sopenharmony_ci vmx_get_l2_tsc_multiplier(vcpu)); 258662306a36Sopenharmony_ci 258762306a36Sopenharmony_ci vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); 258862306a36Sopenharmony_ci if (kvm_caps.has_tsc_control) 258962306a36Sopenharmony_ci vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); 259062306a36Sopenharmony_ci 259162306a36Sopenharmony_ci nested_vmx_transition_tlb_flush(vcpu, vmcs12, true); 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci if (nested_cpu_has_ept(vmcs12)) 259462306a36Sopenharmony_ci nested_ept_init_mmu_context(vcpu); 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci /* 259762306a36Sopenharmony_ci * Override the CR0/CR4 read shadows after setting the effective guest 259862306a36Sopenharmony_ci * CR0/CR4. The common helpers also set the shadows, but they don't 259962306a36Sopenharmony_ci * account for vmcs12's cr0/4_guest_host_mask. 260062306a36Sopenharmony_ci */ 260162306a36Sopenharmony_ci vmx_set_cr0(vcpu, vmcs12->guest_cr0); 260262306a36Sopenharmony_ci vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci vmx_set_cr4(vcpu, vmcs12->guest_cr4); 260562306a36Sopenharmony_ci vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_ci vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12); 260862306a36Sopenharmony_ci /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ 260962306a36Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer); 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci /* 261262306a36Sopenharmony_ci * Guest state is invalid and unrestricted guest is disabled, 261362306a36Sopenharmony_ci * which means L1 attempted VMEntry to L2 with invalid state. 261462306a36Sopenharmony_ci * Fail the VMEntry. 261562306a36Sopenharmony_ci * 261662306a36Sopenharmony_ci * However when force loading the guest state (SMM exit or 261762306a36Sopenharmony_ci * loading nested state after migration, it is possible to 261862306a36Sopenharmony_ci * have invalid guest state now, which will be later fixed by 261962306a36Sopenharmony_ci * restoring L2 register state 262062306a36Sopenharmony_ci */ 262162306a36Sopenharmony_ci if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) { 262262306a36Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 262362306a36Sopenharmony_ci return -EINVAL; 262462306a36Sopenharmony_ci } 262562306a36Sopenharmony_ci 262662306a36Sopenharmony_ci /* Shadow page tables on either EPT or shadow page tables. */ 262762306a36Sopenharmony_ci if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), 262862306a36Sopenharmony_ci from_vmentry, entry_failure_code)) 262962306a36Sopenharmony_ci return -EINVAL; 263062306a36Sopenharmony_ci 263162306a36Sopenharmony_ci /* 263262306a36Sopenharmony_ci * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12 263362306a36Sopenharmony_ci * on nested VM-Exit, which can occur without actually running L2 and 263462306a36Sopenharmony_ci * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with 263562306a36Sopenharmony_ci * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the 263662306a36Sopenharmony_ci * transition to HLT instead of running L2. 263762306a36Sopenharmony_ci */ 263862306a36Sopenharmony_ci if (enable_ept) 263962306a36Sopenharmony_ci vmcs_writel(GUEST_CR3, vmcs12->guest_cr3); 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */ 264262306a36Sopenharmony_ci if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) && 264362306a36Sopenharmony_ci is_pae_paging(vcpu)) { 264462306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); 264562306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); 264662306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); 264762306a36Sopenharmony_ci vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); 264862306a36Sopenharmony_ci } 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && 265162306a36Sopenharmony_ci kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) && 265262306a36Sopenharmony_ci WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 265362306a36Sopenharmony_ci vmcs12->guest_ia32_perf_global_ctrl))) { 265462306a36Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 265562306a36Sopenharmony_ci return -EINVAL; 265662306a36Sopenharmony_ci } 265762306a36Sopenharmony_ci 265862306a36Sopenharmony_ci kvm_rsp_write(vcpu, vmcs12->guest_rsp); 265962306a36Sopenharmony_ci kvm_rip_write(vcpu, vmcs12->guest_rip); 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci /* 266262306a36Sopenharmony_ci * It was observed that genuine Hyper-V running in L1 doesn't reset 266362306a36Sopenharmony_ci * 'hv_clean_fields' by itself, it only sets the corresponding dirty 266462306a36Sopenharmony_ci * bits when it changes a field in eVMCS. Mark all fields as clean 266562306a36Sopenharmony_ci * here. 266662306a36Sopenharmony_ci */ 266762306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 266862306a36Sopenharmony_ci vmx->nested.hv_evmcs->hv_clean_fields |= 266962306a36Sopenharmony_ci HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 267062306a36Sopenharmony_ci 267162306a36Sopenharmony_ci return 0; 267262306a36Sopenharmony_ci} 267362306a36Sopenharmony_ci 267462306a36Sopenharmony_cistatic int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) 267562306a36Sopenharmony_ci{ 267662306a36Sopenharmony_ci if (CC(!nested_cpu_has_nmi_exiting(vmcs12) && 267762306a36Sopenharmony_ci nested_cpu_has_virtual_nmis(vmcs12))) 267862306a36Sopenharmony_ci return -EINVAL; 267962306a36Sopenharmony_ci 268062306a36Sopenharmony_ci if (CC(!nested_cpu_has_virtual_nmis(vmcs12) && 268162306a36Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING))) 268262306a36Sopenharmony_ci return -EINVAL; 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci return 0; 268562306a36Sopenharmony_ci} 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_cistatic bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp) 268862306a36Sopenharmony_ci{ 268962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 269062306a36Sopenharmony_ci 269162306a36Sopenharmony_ci /* Check for memory type validity */ 269262306a36Sopenharmony_ci switch (new_eptp & VMX_EPTP_MT_MASK) { 269362306a36Sopenharmony_ci case VMX_EPTP_MT_UC: 269462306a36Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))) 269562306a36Sopenharmony_ci return false; 269662306a36Sopenharmony_ci break; 269762306a36Sopenharmony_ci case VMX_EPTP_MT_WB: 269862306a36Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))) 269962306a36Sopenharmony_ci return false; 270062306a36Sopenharmony_ci break; 270162306a36Sopenharmony_ci default: 270262306a36Sopenharmony_ci return false; 270362306a36Sopenharmony_ci } 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci /* Page-walk levels validity. */ 270662306a36Sopenharmony_ci switch (new_eptp & VMX_EPTP_PWL_MASK) { 270762306a36Sopenharmony_ci case VMX_EPTP_PWL_5: 270862306a36Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT))) 270962306a36Sopenharmony_ci return false; 271062306a36Sopenharmony_ci break; 271162306a36Sopenharmony_ci case VMX_EPTP_PWL_4: 271262306a36Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT))) 271362306a36Sopenharmony_ci return false; 271462306a36Sopenharmony_ci break; 271562306a36Sopenharmony_ci default: 271662306a36Sopenharmony_ci return false; 271762306a36Sopenharmony_ci } 271862306a36Sopenharmony_ci 271962306a36Sopenharmony_ci /* Reserved bits should not be set */ 272062306a36Sopenharmony_ci if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f))) 272162306a36Sopenharmony_ci return false; 272262306a36Sopenharmony_ci 272362306a36Sopenharmony_ci /* AD, if set, should be supported */ 272462306a36Sopenharmony_ci if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) { 272562306a36Sopenharmony_ci if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))) 272662306a36Sopenharmony_ci return false; 272762306a36Sopenharmony_ci } 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_ci return true; 273062306a36Sopenharmony_ci} 273162306a36Sopenharmony_ci 273262306a36Sopenharmony_ci/* 273362306a36Sopenharmony_ci * Checks related to VM-Execution Control Fields 273462306a36Sopenharmony_ci */ 273562306a36Sopenharmony_cistatic int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, 273662306a36Sopenharmony_ci struct vmcs12 *vmcs12) 273762306a36Sopenharmony_ci{ 273862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 273962306a36Sopenharmony_ci 274062306a36Sopenharmony_ci if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control, 274162306a36Sopenharmony_ci vmx->nested.msrs.pinbased_ctls_low, 274262306a36Sopenharmony_ci vmx->nested.msrs.pinbased_ctls_high)) || 274362306a36Sopenharmony_ci CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, 274462306a36Sopenharmony_ci vmx->nested.msrs.procbased_ctls_low, 274562306a36Sopenharmony_ci vmx->nested.msrs.procbased_ctls_high))) 274662306a36Sopenharmony_ci return -EINVAL; 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && 274962306a36Sopenharmony_ci CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control, 275062306a36Sopenharmony_ci vmx->nested.msrs.secondary_ctls_low, 275162306a36Sopenharmony_ci vmx->nested.msrs.secondary_ctls_high))) 275262306a36Sopenharmony_ci return -EINVAL; 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) || 275562306a36Sopenharmony_ci nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) || 275662306a36Sopenharmony_ci nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) || 275762306a36Sopenharmony_ci nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) || 275862306a36Sopenharmony_ci nested_vmx_check_apic_access_controls(vcpu, vmcs12) || 275962306a36Sopenharmony_ci nested_vmx_check_apicv_controls(vcpu, vmcs12) || 276062306a36Sopenharmony_ci nested_vmx_check_nmi_controls(vmcs12) || 276162306a36Sopenharmony_ci nested_vmx_check_pml_controls(vcpu, vmcs12) || 276262306a36Sopenharmony_ci nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) || 276362306a36Sopenharmony_ci nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) || 276462306a36Sopenharmony_ci nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) || 276562306a36Sopenharmony_ci CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) 276662306a36Sopenharmony_ci return -EINVAL; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci if (!nested_cpu_has_preemption_timer(vmcs12) && 276962306a36Sopenharmony_ci nested_cpu_has_save_preemption_timer(vmcs12)) 277062306a36Sopenharmony_ci return -EINVAL; 277162306a36Sopenharmony_ci 277262306a36Sopenharmony_ci if (nested_cpu_has_ept(vmcs12) && 277362306a36Sopenharmony_ci CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer))) 277462306a36Sopenharmony_ci return -EINVAL; 277562306a36Sopenharmony_ci 277662306a36Sopenharmony_ci if (nested_cpu_has_vmfunc(vmcs12)) { 277762306a36Sopenharmony_ci if (CC(vmcs12->vm_function_control & 277862306a36Sopenharmony_ci ~vmx->nested.msrs.vmfunc_controls)) 277962306a36Sopenharmony_ci return -EINVAL; 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci if (nested_cpu_has_eptp_switching(vmcs12)) { 278262306a36Sopenharmony_ci if (CC(!nested_cpu_has_ept(vmcs12)) || 278362306a36Sopenharmony_ci CC(!page_address_valid(vcpu, vmcs12->eptp_list_address))) 278462306a36Sopenharmony_ci return -EINVAL; 278562306a36Sopenharmony_ci } 278662306a36Sopenharmony_ci } 278762306a36Sopenharmony_ci 278862306a36Sopenharmony_ci return 0; 278962306a36Sopenharmony_ci} 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_ci/* 279262306a36Sopenharmony_ci * Checks related to VM-Exit Control Fields 279362306a36Sopenharmony_ci */ 279462306a36Sopenharmony_cistatic int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu, 279562306a36Sopenharmony_ci struct vmcs12 *vmcs12) 279662306a36Sopenharmony_ci{ 279762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci if (CC(!vmx_control_verify(vmcs12->vm_exit_controls, 280062306a36Sopenharmony_ci vmx->nested.msrs.exit_ctls_low, 280162306a36Sopenharmony_ci vmx->nested.msrs.exit_ctls_high)) || 280262306a36Sopenharmony_ci CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))) 280362306a36Sopenharmony_ci return -EINVAL; 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_ci return 0; 280662306a36Sopenharmony_ci} 280762306a36Sopenharmony_ci 280862306a36Sopenharmony_ci/* 280962306a36Sopenharmony_ci * Checks related to VM-Entry Control Fields 281062306a36Sopenharmony_ci */ 281162306a36Sopenharmony_cistatic int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, 281262306a36Sopenharmony_ci struct vmcs12 *vmcs12) 281362306a36Sopenharmony_ci{ 281462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 281562306a36Sopenharmony_ci 281662306a36Sopenharmony_ci if (CC(!vmx_control_verify(vmcs12->vm_entry_controls, 281762306a36Sopenharmony_ci vmx->nested.msrs.entry_ctls_low, 281862306a36Sopenharmony_ci vmx->nested.msrs.entry_ctls_high))) 281962306a36Sopenharmony_ci return -EINVAL; 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci /* 282262306a36Sopenharmony_ci * From the Intel SDM, volume 3: 282362306a36Sopenharmony_ci * Fields relevant to VM-entry event injection must be set properly. 282462306a36Sopenharmony_ci * These fields are the VM-entry interruption-information field, the 282562306a36Sopenharmony_ci * VM-entry exception error code, and the VM-entry instruction length. 282662306a36Sopenharmony_ci */ 282762306a36Sopenharmony_ci if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { 282862306a36Sopenharmony_ci u32 intr_info = vmcs12->vm_entry_intr_info_field; 282962306a36Sopenharmony_ci u8 vector = intr_info & INTR_INFO_VECTOR_MASK; 283062306a36Sopenharmony_ci u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; 283162306a36Sopenharmony_ci bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; 283262306a36Sopenharmony_ci bool should_have_error_code; 283362306a36Sopenharmony_ci bool urg = nested_cpu_has2(vmcs12, 283462306a36Sopenharmony_ci SECONDARY_EXEC_UNRESTRICTED_GUEST); 283562306a36Sopenharmony_ci bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci /* VM-entry interruption-info field: interruption type */ 283862306a36Sopenharmony_ci if (CC(intr_type == INTR_TYPE_RESERVED) || 283962306a36Sopenharmony_ci CC(intr_type == INTR_TYPE_OTHER_EVENT && 284062306a36Sopenharmony_ci !nested_cpu_supports_monitor_trap_flag(vcpu))) 284162306a36Sopenharmony_ci return -EINVAL; 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_ci /* VM-entry interruption-info field: vector */ 284462306a36Sopenharmony_ci if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || 284562306a36Sopenharmony_ci CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || 284662306a36Sopenharmony_ci CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) 284762306a36Sopenharmony_ci return -EINVAL; 284862306a36Sopenharmony_ci 284962306a36Sopenharmony_ci /* VM-entry interruption-info field: deliver error code */ 285062306a36Sopenharmony_ci should_have_error_code = 285162306a36Sopenharmony_ci intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && 285262306a36Sopenharmony_ci x86_exception_has_error_code(vector); 285362306a36Sopenharmony_ci if (CC(has_error_code != should_have_error_code)) 285462306a36Sopenharmony_ci return -EINVAL; 285562306a36Sopenharmony_ci 285662306a36Sopenharmony_ci /* VM-entry exception error code */ 285762306a36Sopenharmony_ci if (CC(has_error_code && 285862306a36Sopenharmony_ci vmcs12->vm_entry_exception_error_code & GENMASK(31, 16))) 285962306a36Sopenharmony_ci return -EINVAL; 286062306a36Sopenharmony_ci 286162306a36Sopenharmony_ci /* VM-entry interruption-info field: reserved bits */ 286262306a36Sopenharmony_ci if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK)) 286362306a36Sopenharmony_ci return -EINVAL; 286462306a36Sopenharmony_ci 286562306a36Sopenharmony_ci /* VM-entry instruction length */ 286662306a36Sopenharmony_ci switch (intr_type) { 286762306a36Sopenharmony_ci case INTR_TYPE_SOFT_EXCEPTION: 286862306a36Sopenharmony_ci case INTR_TYPE_SOFT_INTR: 286962306a36Sopenharmony_ci case INTR_TYPE_PRIV_SW_EXCEPTION: 287062306a36Sopenharmony_ci if (CC(vmcs12->vm_entry_instruction_len > 15) || 287162306a36Sopenharmony_ci CC(vmcs12->vm_entry_instruction_len == 0 && 287262306a36Sopenharmony_ci CC(!nested_cpu_has_zero_length_injection(vcpu)))) 287362306a36Sopenharmony_ci return -EINVAL; 287462306a36Sopenharmony_ci } 287562306a36Sopenharmony_ci } 287662306a36Sopenharmony_ci 287762306a36Sopenharmony_ci if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12)) 287862306a36Sopenharmony_ci return -EINVAL; 287962306a36Sopenharmony_ci 288062306a36Sopenharmony_ci return 0; 288162306a36Sopenharmony_ci} 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_cistatic int nested_vmx_check_controls(struct kvm_vcpu *vcpu, 288462306a36Sopenharmony_ci struct vmcs12 *vmcs12) 288562306a36Sopenharmony_ci{ 288662306a36Sopenharmony_ci if (nested_check_vm_execution_controls(vcpu, vmcs12) || 288762306a36Sopenharmony_ci nested_check_vm_exit_controls(vcpu, vmcs12) || 288862306a36Sopenharmony_ci nested_check_vm_entry_controls(vcpu, vmcs12)) 288962306a36Sopenharmony_ci return -EINVAL; 289062306a36Sopenharmony_ci 289162306a36Sopenharmony_ci if (guest_cpuid_has_evmcs(vcpu)) 289262306a36Sopenharmony_ci return nested_evmcs_check_controls(vmcs12); 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_ci return 0; 289562306a36Sopenharmony_ci} 289662306a36Sopenharmony_ci 289762306a36Sopenharmony_cistatic int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, 289862306a36Sopenharmony_ci struct vmcs12 *vmcs12) 289962306a36Sopenharmony_ci{ 290062306a36Sopenharmony_ci#ifdef CONFIG_X86_64 290162306a36Sopenharmony_ci if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) != 290262306a36Sopenharmony_ci !!(vcpu->arch.efer & EFER_LMA))) 290362306a36Sopenharmony_ci return -EINVAL; 290462306a36Sopenharmony_ci#endif 290562306a36Sopenharmony_ci return 0; 290662306a36Sopenharmony_ci} 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_cistatic int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, 290962306a36Sopenharmony_ci struct vmcs12 *vmcs12) 291062306a36Sopenharmony_ci{ 291162306a36Sopenharmony_ci bool ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE); 291262306a36Sopenharmony_ci 291362306a36Sopenharmony_ci if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) || 291462306a36Sopenharmony_ci CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) || 291562306a36Sopenharmony_ci CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3))) 291662306a36Sopenharmony_ci return -EINVAL; 291762306a36Sopenharmony_ci 291862306a36Sopenharmony_ci if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || 291962306a36Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))) 292062306a36Sopenharmony_ci return -EINVAL; 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_ci if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && 292362306a36Sopenharmony_ci CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) 292462306a36Sopenharmony_ci return -EINVAL; 292562306a36Sopenharmony_ci 292662306a36Sopenharmony_ci if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && 292762306a36Sopenharmony_ci CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), 292862306a36Sopenharmony_ci vmcs12->host_ia32_perf_global_ctrl))) 292962306a36Sopenharmony_ci return -EINVAL; 293062306a36Sopenharmony_ci 293162306a36Sopenharmony_ci if (ia32e) { 293262306a36Sopenharmony_ci if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) 293362306a36Sopenharmony_ci return -EINVAL; 293462306a36Sopenharmony_ci } else { 293562306a36Sopenharmony_ci if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || 293662306a36Sopenharmony_ci CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || 293762306a36Sopenharmony_ci CC((vmcs12->host_rip) >> 32)) 293862306a36Sopenharmony_ci return -EINVAL; 293962306a36Sopenharmony_ci } 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_ci if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294262306a36Sopenharmony_ci CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294362306a36Sopenharmony_ci CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294462306a36Sopenharmony_ci CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294562306a36Sopenharmony_ci CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294662306a36Sopenharmony_ci CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294762306a36Sopenharmony_ci CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || 294862306a36Sopenharmony_ci CC(vmcs12->host_cs_selector == 0) || 294962306a36Sopenharmony_ci CC(vmcs12->host_tr_selector == 0) || 295062306a36Sopenharmony_ci CC(vmcs12->host_ss_selector == 0 && !ia32e)) 295162306a36Sopenharmony_ci return -EINVAL; 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_ci if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || 295462306a36Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || 295562306a36Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || 295662306a36Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || 295762306a36Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || 295862306a36Sopenharmony_ci CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) 295962306a36Sopenharmony_ci return -EINVAL; 296062306a36Sopenharmony_ci 296162306a36Sopenharmony_ci /* 296262306a36Sopenharmony_ci * If the load IA32_EFER VM-exit control is 1, bits reserved in the 296362306a36Sopenharmony_ci * IA32_EFER MSR must be 0 in the field for that register. In addition, 296462306a36Sopenharmony_ci * the values of the LMA and LME bits in the field must each be that of 296562306a36Sopenharmony_ci * the host address-space size VM-exit control. 296662306a36Sopenharmony_ci */ 296762306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { 296862306a36Sopenharmony_ci if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) || 296962306a36Sopenharmony_ci CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) || 297062306a36Sopenharmony_ci CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))) 297162306a36Sopenharmony_ci return -EINVAL; 297262306a36Sopenharmony_ci } 297362306a36Sopenharmony_ci 297462306a36Sopenharmony_ci return 0; 297562306a36Sopenharmony_ci} 297662306a36Sopenharmony_ci 297762306a36Sopenharmony_cistatic int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, 297862306a36Sopenharmony_ci struct vmcs12 *vmcs12) 297962306a36Sopenharmony_ci{ 298062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 298162306a36Sopenharmony_ci struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; 298262306a36Sopenharmony_ci struct vmcs_hdr hdr; 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci if (vmcs12->vmcs_link_pointer == INVALID_GPA) 298562306a36Sopenharmony_ci return 0; 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_ci if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))) 298862306a36Sopenharmony_ci return -EINVAL; 298962306a36Sopenharmony_ci 299062306a36Sopenharmony_ci if (ghc->gpa != vmcs12->vmcs_link_pointer && 299162306a36Sopenharmony_ci CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, 299262306a36Sopenharmony_ci vmcs12->vmcs_link_pointer, VMCS12_SIZE))) 299362306a36Sopenharmony_ci return -EINVAL; 299462306a36Sopenharmony_ci 299562306a36Sopenharmony_ci if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, 299662306a36Sopenharmony_ci offsetof(struct vmcs12, hdr), 299762306a36Sopenharmony_ci sizeof(hdr)))) 299862306a36Sopenharmony_ci return -EINVAL; 299962306a36Sopenharmony_ci 300062306a36Sopenharmony_ci if (CC(hdr.revision_id != VMCS12_REVISION) || 300162306a36Sopenharmony_ci CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) 300262306a36Sopenharmony_ci return -EINVAL; 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ci return 0; 300562306a36Sopenharmony_ci} 300662306a36Sopenharmony_ci 300762306a36Sopenharmony_ci/* 300862306a36Sopenharmony_ci * Checks related to Guest Non-register State 300962306a36Sopenharmony_ci */ 301062306a36Sopenharmony_cistatic int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) 301162306a36Sopenharmony_ci{ 301262306a36Sopenharmony_ci if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && 301362306a36Sopenharmony_ci vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT && 301462306a36Sopenharmony_ci vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI)) 301562306a36Sopenharmony_ci return -EINVAL; 301662306a36Sopenharmony_ci 301762306a36Sopenharmony_ci return 0; 301862306a36Sopenharmony_ci} 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_cistatic int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, 302162306a36Sopenharmony_ci struct vmcs12 *vmcs12, 302262306a36Sopenharmony_ci enum vm_entry_failure_code *entry_failure_code) 302362306a36Sopenharmony_ci{ 302462306a36Sopenharmony_ci bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE); 302562306a36Sopenharmony_ci 302662306a36Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_DEFAULT; 302762306a36Sopenharmony_ci 302862306a36Sopenharmony_ci if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) || 302962306a36Sopenharmony_ci CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) 303062306a36Sopenharmony_ci return -EINVAL; 303162306a36Sopenharmony_ci 303262306a36Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && 303362306a36Sopenharmony_ci CC(!kvm_dr7_valid(vmcs12->guest_dr7))) 303462306a36Sopenharmony_ci return -EINVAL; 303562306a36Sopenharmony_ci 303662306a36Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && 303762306a36Sopenharmony_ci CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) 303862306a36Sopenharmony_ci return -EINVAL; 303962306a36Sopenharmony_ci 304062306a36Sopenharmony_ci if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { 304162306a36Sopenharmony_ci *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR; 304262306a36Sopenharmony_ci return -EINVAL; 304362306a36Sopenharmony_ci } 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && 304662306a36Sopenharmony_ci CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), 304762306a36Sopenharmony_ci vmcs12->guest_ia32_perf_global_ctrl))) 304862306a36Sopenharmony_ci return -EINVAL; 304962306a36Sopenharmony_ci 305062306a36Sopenharmony_ci if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG)) 305162306a36Sopenharmony_ci return -EINVAL; 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_ci if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) || 305462306a36Sopenharmony_ci CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG))) 305562306a36Sopenharmony_ci return -EINVAL; 305662306a36Sopenharmony_ci 305762306a36Sopenharmony_ci /* 305862306a36Sopenharmony_ci * If the load IA32_EFER VM-entry control is 1, the following checks 305962306a36Sopenharmony_ci * are performed on the field for the IA32_EFER MSR: 306062306a36Sopenharmony_ci * - Bits reserved in the IA32_EFER MSR must be 0. 306162306a36Sopenharmony_ci * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of 306262306a36Sopenharmony_ci * the IA-32e mode guest VM-exit control. It must also be identical 306362306a36Sopenharmony_ci * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to 306462306a36Sopenharmony_ci * CR0.PG) is 1. 306562306a36Sopenharmony_ci */ 306662306a36Sopenharmony_ci if (to_vmx(vcpu)->nested.nested_run_pending && 306762306a36Sopenharmony_ci (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { 306862306a36Sopenharmony_ci if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || 306962306a36Sopenharmony_ci CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || 307062306a36Sopenharmony_ci CC(((vmcs12->guest_cr0 & X86_CR0_PG) && 307162306a36Sopenharmony_ci ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))) 307262306a36Sopenharmony_ci return -EINVAL; 307362306a36Sopenharmony_ci } 307462306a36Sopenharmony_ci 307562306a36Sopenharmony_ci if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && 307662306a36Sopenharmony_ci (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || 307762306a36Sopenharmony_ci CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) 307862306a36Sopenharmony_ci return -EINVAL; 307962306a36Sopenharmony_ci 308062306a36Sopenharmony_ci if (nested_check_guest_non_reg_state(vmcs12)) 308162306a36Sopenharmony_ci return -EINVAL; 308262306a36Sopenharmony_ci 308362306a36Sopenharmony_ci return 0; 308462306a36Sopenharmony_ci} 308562306a36Sopenharmony_ci 308662306a36Sopenharmony_cistatic int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) 308762306a36Sopenharmony_ci{ 308862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 308962306a36Sopenharmony_ci unsigned long cr3, cr4; 309062306a36Sopenharmony_ci bool vm_fail; 309162306a36Sopenharmony_ci 309262306a36Sopenharmony_ci if (!nested_early_check) 309362306a36Sopenharmony_ci return 0; 309462306a36Sopenharmony_ci 309562306a36Sopenharmony_ci if (vmx->msr_autoload.host.nr) 309662306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); 309762306a36Sopenharmony_ci if (vmx->msr_autoload.guest.nr) 309862306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); 309962306a36Sopenharmony_ci 310062306a36Sopenharmony_ci preempt_disable(); 310162306a36Sopenharmony_ci 310262306a36Sopenharmony_ci vmx_prepare_switch_to_guest(vcpu); 310362306a36Sopenharmony_ci 310462306a36Sopenharmony_ci /* 310562306a36Sopenharmony_ci * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS, 310662306a36Sopenharmony_ci * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to 310762306a36Sopenharmony_ci * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e. 310862306a36Sopenharmony_ci * there is no need to preserve other bits or save/restore the field. 310962306a36Sopenharmony_ci */ 311062306a36Sopenharmony_ci vmcs_writel(GUEST_RFLAGS, 0); 311162306a36Sopenharmony_ci 311262306a36Sopenharmony_ci cr3 = __get_current_cr3_fast(); 311362306a36Sopenharmony_ci if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { 311462306a36Sopenharmony_ci vmcs_writel(HOST_CR3, cr3); 311562306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.cr3 = cr3; 311662306a36Sopenharmony_ci } 311762306a36Sopenharmony_ci 311862306a36Sopenharmony_ci cr4 = cr4_read_shadow(); 311962306a36Sopenharmony_ci if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { 312062306a36Sopenharmony_ci vmcs_writel(HOST_CR4, cr4); 312162306a36Sopenharmony_ci vmx->loaded_vmcs->host_state.cr4 = cr4; 312262306a36Sopenharmony_ci } 312362306a36Sopenharmony_ci 312462306a36Sopenharmony_ci vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, 312562306a36Sopenharmony_ci __vmx_vcpu_run_flags(vmx)); 312662306a36Sopenharmony_ci 312762306a36Sopenharmony_ci if (vmx->msr_autoload.host.nr) 312862306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); 312962306a36Sopenharmony_ci if (vmx->msr_autoload.guest.nr) 313062306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); 313162306a36Sopenharmony_ci 313262306a36Sopenharmony_ci if (vm_fail) { 313362306a36Sopenharmony_ci u32 error = vmcs_read32(VM_INSTRUCTION_ERROR); 313462306a36Sopenharmony_ci 313562306a36Sopenharmony_ci preempt_enable(); 313662306a36Sopenharmony_ci 313762306a36Sopenharmony_ci trace_kvm_nested_vmenter_failed( 313862306a36Sopenharmony_ci "early hardware check VM-instruction error: ", error); 313962306a36Sopenharmony_ci WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD); 314062306a36Sopenharmony_ci return 1; 314162306a36Sopenharmony_ci } 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci /* 314462306a36Sopenharmony_ci * VMExit clears RFLAGS.IF and DR7, even on a consistency check. 314562306a36Sopenharmony_ci */ 314662306a36Sopenharmony_ci if (hw_breakpoint_active()) 314762306a36Sopenharmony_ci set_debugreg(__this_cpu_read(cpu_dr7), 7); 314862306a36Sopenharmony_ci local_irq_enable(); 314962306a36Sopenharmony_ci preempt_enable(); 315062306a36Sopenharmony_ci 315162306a36Sopenharmony_ci /* 315262306a36Sopenharmony_ci * A non-failing VMEntry means we somehow entered guest mode with 315362306a36Sopenharmony_ci * an illegal RIP, and that's just the tip of the iceberg. There 315462306a36Sopenharmony_ci * is no telling what memory has been modified or what state has 315562306a36Sopenharmony_ci * been exposed to unknown code. Hitting this all but guarantees 315662306a36Sopenharmony_ci * a (very critical) hardware issue. 315762306a36Sopenharmony_ci */ 315862306a36Sopenharmony_ci WARN_ON(!(vmcs_read32(VM_EXIT_REASON) & 315962306a36Sopenharmony_ci VMX_EXIT_REASONS_FAILED_VMENTRY)); 316062306a36Sopenharmony_ci 316162306a36Sopenharmony_ci return 0; 316262306a36Sopenharmony_ci} 316362306a36Sopenharmony_ci 316462306a36Sopenharmony_cistatic bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) 316562306a36Sopenharmony_ci{ 316662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 316762306a36Sopenharmony_ci 316862306a36Sopenharmony_ci /* 316962306a36Sopenharmony_ci * hv_evmcs may end up being not mapped after migration (when 317062306a36Sopenharmony_ci * L2 was running), map it here to make sure vmcs12 changes are 317162306a36Sopenharmony_ci * properly reflected. 317262306a36Sopenharmony_ci */ 317362306a36Sopenharmony_ci if (guest_cpuid_has_evmcs(vcpu) && 317462306a36Sopenharmony_ci vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) { 317562306a36Sopenharmony_ci enum nested_evmptrld_status evmptrld_status = 317662306a36Sopenharmony_ci nested_vmx_handle_enlightened_vmptrld(vcpu, false); 317762306a36Sopenharmony_ci 317862306a36Sopenharmony_ci if (evmptrld_status == EVMPTRLD_VMFAIL || 317962306a36Sopenharmony_ci evmptrld_status == EVMPTRLD_ERROR) 318062306a36Sopenharmony_ci return false; 318162306a36Sopenharmony_ci 318262306a36Sopenharmony_ci /* 318362306a36Sopenharmony_ci * Post migration VMCS12 always provides the most actual 318462306a36Sopenharmony_ci * information, copy it to eVMCS upon entry. 318562306a36Sopenharmony_ci */ 318662306a36Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 318762306a36Sopenharmony_ci } 318862306a36Sopenharmony_ci 318962306a36Sopenharmony_ci return true; 319062306a36Sopenharmony_ci} 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_cistatic bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) 319362306a36Sopenharmony_ci{ 319462306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 319562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 319662306a36Sopenharmony_ci struct kvm_host_map *map; 319762306a36Sopenharmony_ci 319862306a36Sopenharmony_ci if (!vcpu->arch.pdptrs_from_userspace && 319962306a36Sopenharmony_ci !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) { 320062306a36Sopenharmony_ci /* 320162306a36Sopenharmony_ci * Reload the guest's PDPTRs since after a migration 320262306a36Sopenharmony_ci * the guest CR3 might be restored prior to setting the nested 320362306a36Sopenharmony_ci * state which can lead to a load of wrong PDPTRs. 320462306a36Sopenharmony_ci */ 320562306a36Sopenharmony_ci if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3))) 320662306a36Sopenharmony_ci return false; 320762306a36Sopenharmony_ci } 320862306a36Sopenharmony_ci 320962306a36Sopenharmony_ci 321062306a36Sopenharmony_ci if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { 321162306a36Sopenharmony_ci map = &vmx->nested.apic_access_page_map; 321262306a36Sopenharmony_ci 321362306a36Sopenharmony_ci if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->apic_access_addr), map)) { 321462306a36Sopenharmony_ci vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(map->pfn)); 321562306a36Sopenharmony_ci } else { 321662306a36Sopenharmony_ci pr_debug_ratelimited("%s: no backing for APIC-access address in vmcs12\n", 321762306a36Sopenharmony_ci __func__); 321862306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 321962306a36Sopenharmony_ci vcpu->run->internal.suberror = 322062306a36Sopenharmony_ci KVM_INTERNAL_ERROR_EMULATION; 322162306a36Sopenharmony_ci vcpu->run->internal.ndata = 0; 322262306a36Sopenharmony_ci return false; 322362306a36Sopenharmony_ci } 322462306a36Sopenharmony_ci } 322562306a36Sopenharmony_ci 322662306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 322762306a36Sopenharmony_ci map = &vmx->nested.virtual_apic_map; 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { 323062306a36Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); 323162306a36Sopenharmony_ci } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && 323262306a36Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && 323362306a36Sopenharmony_ci !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { 323462306a36Sopenharmony_ci /* 323562306a36Sopenharmony_ci * The processor will never use the TPR shadow, simply 323662306a36Sopenharmony_ci * clear the bit from the execution control. Such a 323762306a36Sopenharmony_ci * configuration is useless, but it happens in tests. 323862306a36Sopenharmony_ci * For any other configuration, failing the vm entry is 323962306a36Sopenharmony_ci * _not_ what the processor does but it's basically the 324062306a36Sopenharmony_ci * only possibility we have. 324162306a36Sopenharmony_ci */ 324262306a36Sopenharmony_ci exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW); 324362306a36Sopenharmony_ci } else { 324462306a36Sopenharmony_ci /* 324562306a36Sopenharmony_ci * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to 324662306a36Sopenharmony_ci * force VM-Entry to fail. 324762306a36Sopenharmony_ci */ 324862306a36Sopenharmony_ci vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, INVALID_GPA); 324962306a36Sopenharmony_ci } 325062306a36Sopenharmony_ci } 325162306a36Sopenharmony_ci 325262306a36Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12)) { 325362306a36Sopenharmony_ci map = &vmx->nested.pi_desc_map; 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ci if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { 325662306a36Sopenharmony_ci vmx->nested.pi_desc = 325762306a36Sopenharmony_ci (struct pi_desc *)(((void *)map->hva) + 325862306a36Sopenharmony_ci offset_in_page(vmcs12->posted_intr_desc_addr)); 325962306a36Sopenharmony_ci vmcs_write64(POSTED_INTR_DESC_ADDR, 326062306a36Sopenharmony_ci pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); 326162306a36Sopenharmony_ci } else { 326262306a36Sopenharmony_ci /* 326362306a36Sopenharmony_ci * Defer the KVM_INTERNAL_EXIT until KVM tries to 326462306a36Sopenharmony_ci * access the contents of the VMCS12 posted interrupt 326562306a36Sopenharmony_ci * descriptor. (Note that KVM may do this when it 326662306a36Sopenharmony_ci * should not, per the architectural specification.) 326762306a36Sopenharmony_ci */ 326862306a36Sopenharmony_ci vmx->nested.pi_desc = NULL; 326962306a36Sopenharmony_ci pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR); 327062306a36Sopenharmony_ci } 327162306a36Sopenharmony_ci } 327262306a36Sopenharmony_ci if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) 327362306a36Sopenharmony_ci exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); 327462306a36Sopenharmony_ci else 327562306a36Sopenharmony_ci exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ci return true; 327862306a36Sopenharmony_ci} 327962306a36Sopenharmony_ci 328062306a36Sopenharmony_cistatic bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) 328162306a36Sopenharmony_ci{ 328262306a36Sopenharmony_ci /* 328362306a36Sopenharmony_ci * Note: nested_get_evmcs_page() also updates 'vp_assist_page' copy 328462306a36Sopenharmony_ci * in 'struct kvm_vcpu_hv' in case eVMCS is in use, this is mandatory 328562306a36Sopenharmony_ci * to make nested_evmcs_l2_tlb_flush_enabled() work correctly post 328662306a36Sopenharmony_ci * migration. 328762306a36Sopenharmony_ci */ 328862306a36Sopenharmony_ci if (!nested_get_evmcs_page(vcpu)) { 328962306a36Sopenharmony_ci pr_debug_ratelimited("%s: enlightened vmptrld failed\n", 329062306a36Sopenharmony_ci __func__); 329162306a36Sopenharmony_ci vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 329262306a36Sopenharmony_ci vcpu->run->internal.suberror = 329362306a36Sopenharmony_ci KVM_INTERNAL_ERROR_EMULATION; 329462306a36Sopenharmony_ci vcpu->run->internal.ndata = 0; 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci return false; 329762306a36Sopenharmony_ci } 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) 330062306a36Sopenharmony_ci return false; 330162306a36Sopenharmony_ci 330262306a36Sopenharmony_ci return true; 330362306a36Sopenharmony_ci} 330462306a36Sopenharmony_ci 330562306a36Sopenharmony_cistatic int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) 330662306a36Sopenharmony_ci{ 330762306a36Sopenharmony_ci struct vmcs12 *vmcs12; 330862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 330962306a36Sopenharmony_ci gpa_t dst; 331062306a36Sopenharmony_ci 331162306a36Sopenharmony_ci if (WARN_ON_ONCE(!is_guest_mode(vcpu))) 331262306a36Sopenharmony_ci return 0; 331362306a36Sopenharmony_ci 331462306a36Sopenharmony_ci if (WARN_ON_ONCE(vmx->nested.pml_full)) 331562306a36Sopenharmony_ci return 1; 331662306a36Sopenharmony_ci 331762306a36Sopenharmony_ci /* 331862306a36Sopenharmony_ci * Check if PML is enabled for the nested guest. Whether eptp bit 6 is 331962306a36Sopenharmony_ci * set is already checked as part of A/D emulation. 332062306a36Sopenharmony_ci */ 332162306a36Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 332262306a36Sopenharmony_ci if (!nested_cpu_has_pml(vmcs12)) 332362306a36Sopenharmony_ci return 0; 332462306a36Sopenharmony_ci 332562306a36Sopenharmony_ci if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { 332662306a36Sopenharmony_ci vmx->nested.pml_full = true; 332762306a36Sopenharmony_ci return 1; 332862306a36Sopenharmony_ci } 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci gpa &= ~0xFFFull; 333162306a36Sopenharmony_ci dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; 333262306a36Sopenharmony_ci 333362306a36Sopenharmony_ci if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, 333462306a36Sopenharmony_ci offset_in_page(dst), sizeof(gpa))) 333562306a36Sopenharmony_ci return 0; 333662306a36Sopenharmony_ci 333762306a36Sopenharmony_ci vmcs12->guest_pml_index--; 333862306a36Sopenharmony_ci 333962306a36Sopenharmony_ci return 0; 334062306a36Sopenharmony_ci} 334162306a36Sopenharmony_ci 334262306a36Sopenharmony_ci/* 334362306a36Sopenharmony_ci * Intel's VMX Instruction Reference specifies a common set of prerequisites 334462306a36Sopenharmony_ci * for running VMX instructions (except VMXON, whose prerequisites are 334562306a36Sopenharmony_ci * slightly different). It also specifies what exception to inject otherwise. 334662306a36Sopenharmony_ci * Note that many of these exceptions have priority over VM exits, so they 334762306a36Sopenharmony_ci * don't have to be checked again here. 334862306a36Sopenharmony_ci */ 334962306a36Sopenharmony_cistatic int nested_vmx_check_permission(struct kvm_vcpu *vcpu) 335062306a36Sopenharmony_ci{ 335162306a36Sopenharmony_ci if (!to_vmx(vcpu)->nested.vmxon) { 335262306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 335362306a36Sopenharmony_ci return 0; 335462306a36Sopenharmony_ci } 335562306a36Sopenharmony_ci 335662306a36Sopenharmony_ci if (vmx_get_cpl(vcpu)) { 335762306a36Sopenharmony_ci kvm_inject_gp(vcpu, 0); 335862306a36Sopenharmony_ci return 0; 335962306a36Sopenharmony_ci } 336062306a36Sopenharmony_ci 336162306a36Sopenharmony_ci return 1; 336262306a36Sopenharmony_ci} 336362306a36Sopenharmony_ci 336462306a36Sopenharmony_cistatic u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) 336562306a36Sopenharmony_ci{ 336662306a36Sopenharmony_ci u8 rvi = vmx_get_rvi(); 336762306a36Sopenharmony_ci u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); 336862306a36Sopenharmony_ci 336962306a36Sopenharmony_ci return ((rvi & 0xf0) > (vppr & 0xf0)); 337062306a36Sopenharmony_ci} 337162306a36Sopenharmony_ci 337262306a36Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu, 337362306a36Sopenharmony_ci struct vmcs12 *vmcs12); 337462306a36Sopenharmony_ci 337562306a36Sopenharmony_ci/* 337662306a36Sopenharmony_ci * If from_vmentry is false, this is being called from state restore (either RSM 337762306a36Sopenharmony_ci * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. 337862306a36Sopenharmony_ci * 337962306a36Sopenharmony_ci * Returns: 338062306a36Sopenharmony_ci * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode 338162306a36Sopenharmony_ci * NVMX_VMENTRY_VMFAIL: Consistency check VMFail 338262306a36Sopenharmony_ci * NVMX_VMENTRY_VMEXIT: Consistency check VMExit 338362306a36Sopenharmony_ci * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error 338462306a36Sopenharmony_ci */ 338562306a36Sopenharmony_cienum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, 338662306a36Sopenharmony_ci bool from_vmentry) 338762306a36Sopenharmony_ci{ 338862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 338962306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 339062306a36Sopenharmony_ci enum vm_entry_failure_code entry_failure_code; 339162306a36Sopenharmony_ci bool evaluate_pending_interrupts; 339262306a36Sopenharmony_ci union vmx_exit_reason exit_reason = { 339362306a36Sopenharmony_ci .basic = EXIT_REASON_INVALID_STATE, 339462306a36Sopenharmony_ci .failed_vmentry = 1, 339562306a36Sopenharmony_ci }; 339662306a36Sopenharmony_ci u32 failed_index; 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci trace_kvm_nested_vmenter(kvm_rip_read(vcpu), 339962306a36Sopenharmony_ci vmx->nested.current_vmptr, 340062306a36Sopenharmony_ci vmcs12->guest_rip, 340162306a36Sopenharmony_ci vmcs12->guest_intr_status, 340262306a36Sopenharmony_ci vmcs12->vm_entry_intr_info_field, 340362306a36Sopenharmony_ci vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_ENABLE_EPT, 340462306a36Sopenharmony_ci vmcs12->ept_pointer, 340562306a36Sopenharmony_ci vmcs12->guest_cr3, 340662306a36Sopenharmony_ci KVM_ISA_VMX); 340762306a36Sopenharmony_ci 340862306a36Sopenharmony_ci kvm_service_local_tlb_flush_requests(vcpu); 340962306a36Sopenharmony_ci 341062306a36Sopenharmony_ci evaluate_pending_interrupts = exec_controls_get(vmx) & 341162306a36Sopenharmony_ci (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); 341262306a36Sopenharmony_ci if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) 341362306a36Sopenharmony_ci evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); 341462306a36Sopenharmony_ci if (!evaluate_pending_interrupts) 341562306a36Sopenharmony_ci evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu); 341662306a36Sopenharmony_ci 341762306a36Sopenharmony_ci if (!vmx->nested.nested_run_pending || 341862306a36Sopenharmony_ci !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 341962306a36Sopenharmony_ci vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 342062306a36Sopenharmony_ci if (kvm_mpx_supported() && 342162306a36Sopenharmony_ci (!vmx->nested.nested_run_pending || 342262306a36Sopenharmony_ci !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) 342362306a36Sopenharmony_ci vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 342462306a36Sopenharmony_ci 342562306a36Sopenharmony_ci /* 342662306a36Sopenharmony_ci * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and* 342762306a36Sopenharmony_ci * nested early checks are disabled. In the event of a "late" VM-Fail, 342862306a36Sopenharmony_ci * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its 342962306a36Sopenharmony_ci * software model to the pre-VMEntry host state. When EPT is disabled, 343062306a36Sopenharmony_ci * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes 343162306a36Sopenharmony_ci * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing 343262306a36Sopenharmony_ci * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to 343362306a36Sopenharmony_ci * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested 343462306a36Sopenharmony_ci * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is 343562306a36Sopenharmony_ci * guaranteed to be overwritten with a shadow CR3 prior to re-entering 343662306a36Sopenharmony_ci * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as 343762306a36Sopenharmony_ci * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks 343862306a36Sopenharmony_ci * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail 343962306a36Sopenharmony_ci * path would need to manually save/restore vmcs01.GUEST_CR3. 344062306a36Sopenharmony_ci */ 344162306a36Sopenharmony_ci if (!enable_ept && !nested_early_check) 344262306a36Sopenharmony_ci vmcs_writel(GUEST_CR3, vcpu->arch.cr3); 344362306a36Sopenharmony_ci 344462306a36Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_ci prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12); 344762306a36Sopenharmony_ci 344862306a36Sopenharmony_ci if (from_vmentry) { 344962306a36Sopenharmony_ci if (unlikely(!nested_get_vmcs12_pages(vcpu))) { 345062306a36Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 345162306a36Sopenharmony_ci return NVMX_VMENTRY_KVM_INTERNAL_ERROR; 345262306a36Sopenharmony_ci } 345362306a36Sopenharmony_ci 345462306a36Sopenharmony_ci if (nested_vmx_check_vmentry_hw(vcpu)) { 345562306a36Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 345662306a36Sopenharmony_ci return NVMX_VMENTRY_VMFAIL; 345762306a36Sopenharmony_ci } 345862306a36Sopenharmony_ci 345962306a36Sopenharmony_ci if (nested_vmx_check_guest_state(vcpu, vmcs12, 346062306a36Sopenharmony_ci &entry_failure_code)) { 346162306a36Sopenharmony_ci exit_reason.basic = EXIT_REASON_INVALID_STATE; 346262306a36Sopenharmony_ci vmcs12->exit_qualification = entry_failure_code; 346362306a36Sopenharmony_ci goto vmentry_fail_vmexit; 346462306a36Sopenharmony_ci } 346562306a36Sopenharmony_ci } 346662306a36Sopenharmony_ci 346762306a36Sopenharmony_ci enter_guest_mode(vcpu); 346862306a36Sopenharmony_ci 346962306a36Sopenharmony_ci if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) { 347062306a36Sopenharmony_ci exit_reason.basic = EXIT_REASON_INVALID_STATE; 347162306a36Sopenharmony_ci vmcs12->exit_qualification = entry_failure_code; 347262306a36Sopenharmony_ci goto vmentry_fail_vmexit_guest_mode; 347362306a36Sopenharmony_ci } 347462306a36Sopenharmony_ci 347562306a36Sopenharmony_ci if (from_vmentry) { 347662306a36Sopenharmony_ci failed_index = nested_vmx_load_msr(vcpu, 347762306a36Sopenharmony_ci vmcs12->vm_entry_msr_load_addr, 347862306a36Sopenharmony_ci vmcs12->vm_entry_msr_load_count); 347962306a36Sopenharmony_ci if (failed_index) { 348062306a36Sopenharmony_ci exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL; 348162306a36Sopenharmony_ci vmcs12->exit_qualification = failed_index; 348262306a36Sopenharmony_ci goto vmentry_fail_vmexit_guest_mode; 348362306a36Sopenharmony_ci } 348462306a36Sopenharmony_ci } else { 348562306a36Sopenharmony_ci /* 348662306a36Sopenharmony_ci * The MMU is not initialized to point at the right entities yet and 348762306a36Sopenharmony_ci * "get pages" would need to read data from the guest (i.e. we will 348862306a36Sopenharmony_ci * need to perform gpa to hpa translation). Request a call 348962306a36Sopenharmony_ci * to nested_get_vmcs12_pages before the next VM-entry. The MSRs 349062306a36Sopenharmony_ci * have already been set at vmentry time and should not be reset. 349162306a36Sopenharmony_ci */ 349262306a36Sopenharmony_ci kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 349362306a36Sopenharmony_ci } 349462306a36Sopenharmony_ci 349562306a36Sopenharmony_ci /* 349662306a36Sopenharmony_ci * Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI 349762306a36Sopenharmony_ci * when it executed VMLAUNCH/VMRESUME, as entering non-root mode can 349862306a36Sopenharmony_ci * effectively unblock various events, e.g. INIT/SIPI cause VM-Exit 349962306a36Sopenharmony_ci * unconditionally. 350062306a36Sopenharmony_ci */ 350162306a36Sopenharmony_ci if (unlikely(evaluate_pending_interrupts)) 350262306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 350362306a36Sopenharmony_ci 350462306a36Sopenharmony_ci /* 350562306a36Sopenharmony_ci * Do not start the preemption timer hrtimer until after we know 350662306a36Sopenharmony_ci * we are successful, so that only nested_vmx_vmexit needs to cancel 350762306a36Sopenharmony_ci * the timer. 350862306a36Sopenharmony_ci */ 350962306a36Sopenharmony_ci vmx->nested.preemption_timer_expired = false; 351062306a36Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12)) { 351162306a36Sopenharmony_ci u64 timer_value = vmx_calc_preemption_timer_value(vcpu); 351262306a36Sopenharmony_ci vmx_start_preemption_timer(vcpu, timer_value); 351362306a36Sopenharmony_ci } 351462306a36Sopenharmony_ci 351562306a36Sopenharmony_ci /* 351662306a36Sopenharmony_ci * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 351762306a36Sopenharmony_ci * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 351862306a36Sopenharmony_ci * returned as far as L1 is concerned. It will only return (and set 351962306a36Sopenharmony_ci * the success flag) when L2 exits (see nested_vmx_vmexit()). 352062306a36Sopenharmony_ci */ 352162306a36Sopenharmony_ci return NVMX_VMENTRY_SUCCESS; 352262306a36Sopenharmony_ci 352362306a36Sopenharmony_ci /* 352462306a36Sopenharmony_ci * A failed consistency check that leads to a VMExit during L1's 352562306a36Sopenharmony_ci * VMEnter to L2 is a variation of a normal VMexit, as explained in 352662306a36Sopenharmony_ci * 26.7 "VM-entry failures during or after loading guest state". 352762306a36Sopenharmony_ci */ 352862306a36Sopenharmony_civmentry_fail_vmexit_guest_mode: 352962306a36Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) 353062306a36Sopenharmony_ci vcpu->arch.tsc_offset -= vmcs12->tsc_offset; 353162306a36Sopenharmony_ci leave_guest_mode(vcpu); 353262306a36Sopenharmony_ci 353362306a36Sopenharmony_civmentry_fail_vmexit: 353462306a36Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 353562306a36Sopenharmony_ci 353662306a36Sopenharmony_ci if (!from_vmentry) 353762306a36Sopenharmony_ci return NVMX_VMENTRY_VMEXIT; 353862306a36Sopenharmony_ci 353962306a36Sopenharmony_ci load_vmcs12_host_state(vcpu, vmcs12); 354062306a36Sopenharmony_ci vmcs12->vm_exit_reason = exit_reason.full; 354162306a36Sopenharmony_ci if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 354262306a36Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 354362306a36Sopenharmony_ci return NVMX_VMENTRY_VMEXIT; 354462306a36Sopenharmony_ci} 354562306a36Sopenharmony_ci 354662306a36Sopenharmony_ci/* 354762306a36Sopenharmony_ci * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 354862306a36Sopenharmony_ci * for running an L2 nested guest. 354962306a36Sopenharmony_ci */ 355062306a36Sopenharmony_cistatic int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) 355162306a36Sopenharmony_ci{ 355262306a36Sopenharmony_ci struct vmcs12 *vmcs12; 355362306a36Sopenharmony_ci enum nvmx_vmentry_status status; 355462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 355562306a36Sopenharmony_ci u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); 355662306a36Sopenharmony_ci enum nested_evmptrld_status evmptrld_status; 355762306a36Sopenharmony_ci 355862306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 355962306a36Sopenharmony_ci return 1; 356062306a36Sopenharmony_ci 356162306a36Sopenharmony_ci evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch); 356262306a36Sopenharmony_ci if (evmptrld_status == EVMPTRLD_ERROR) { 356362306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 356462306a36Sopenharmony_ci return 1; 356562306a36Sopenharmony_ci } 356662306a36Sopenharmony_ci 356762306a36Sopenharmony_ci kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 356862306a36Sopenharmony_ci 356962306a36Sopenharmony_ci if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) 357062306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 357162306a36Sopenharmony_ci 357262306a36Sopenharmony_ci if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) && 357362306a36Sopenharmony_ci vmx->nested.current_vmptr == INVALID_GPA)) 357462306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 357562306a36Sopenharmony_ci 357662306a36Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 357762306a36Sopenharmony_ci 357862306a36Sopenharmony_ci /* 357962306a36Sopenharmony_ci * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact 358062306a36Sopenharmony_ci * that there *is* a valid VMCS pointer, RFLAGS.CF is set 358162306a36Sopenharmony_ci * rather than RFLAGS.ZF, and no error number is stored to the 358262306a36Sopenharmony_ci * VM-instruction error field. 358362306a36Sopenharmony_ci */ 358462306a36Sopenharmony_ci if (CC(vmcs12->hdr.shadow_vmcs)) 358562306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 358662306a36Sopenharmony_ci 358762306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { 358862306a36Sopenharmony_ci copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields); 358962306a36Sopenharmony_ci /* Enlightened VMCS doesn't have launch state */ 359062306a36Sopenharmony_ci vmcs12->launch_state = !launch; 359162306a36Sopenharmony_ci } else if (enable_shadow_vmcs) { 359262306a36Sopenharmony_ci copy_shadow_to_vmcs12(vmx); 359362306a36Sopenharmony_ci } 359462306a36Sopenharmony_ci 359562306a36Sopenharmony_ci /* 359662306a36Sopenharmony_ci * The nested entry process starts with enforcing various prerequisites 359762306a36Sopenharmony_ci * on vmcs12 as required by the Intel SDM, and act appropriately when 359862306a36Sopenharmony_ci * they fail: As the SDM explains, some conditions should cause the 359962306a36Sopenharmony_ci * instruction to fail, while others will cause the instruction to seem 360062306a36Sopenharmony_ci * to succeed, but return an EXIT_REASON_INVALID_STATE. 360162306a36Sopenharmony_ci * To speed up the normal (success) code path, we should avoid checking 360262306a36Sopenharmony_ci * for misconfigurations which will anyway be caught by the processor 360362306a36Sopenharmony_ci * when using the merged vmcs02. 360462306a36Sopenharmony_ci */ 360562306a36Sopenharmony_ci if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)) 360662306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); 360762306a36Sopenharmony_ci 360862306a36Sopenharmony_ci if (CC(vmcs12->launch_state == launch)) 360962306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 361062306a36Sopenharmony_ci launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS 361162306a36Sopenharmony_ci : VMXERR_VMRESUME_NONLAUNCHED_VMCS); 361262306a36Sopenharmony_ci 361362306a36Sopenharmony_ci if (nested_vmx_check_controls(vcpu, vmcs12)) 361462306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 361562306a36Sopenharmony_ci 361662306a36Sopenharmony_ci if (nested_vmx_check_address_space_size(vcpu, vmcs12)) 361762306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 361862306a36Sopenharmony_ci 361962306a36Sopenharmony_ci if (nested_vmx_check_host_state(vcpu, vmcs12)) 362062306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 362162306a36Sopenharmony_ci 362262306a36Sopenharmony_ci /* 362362306a36Sopenharmony_ci * We're finally done with prerequisite checking, and can start with 362462306a36Sopenharmony_ci * the nested entry. 362562306a36Sopenharmony_ci */ 362662306a36Sopenharmony_ci vmx->nested.nested_run_pending = 1; 362762306a36Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = false; 362862306a36Sopenharmony_ci status = nested_vmx_enter_non_root_mode(vcpu, true); 362962306a36Sopenharmony_ci if (unlikely(status != NVMX_VMENTRY_SUCCESS)) 363062306a36Sopenharmony_ci goto vmentry_failed; 363162306a36Sopenharmony_ci 363262306a36Sopenharmony_ci /* Emulate processing of posted interrupts on VM-Enter. */ 363362306a36Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12) && 363462306a36Sopenharmony_ci kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) { 363562306a36Sopenharmony_ci vmx->nested.pi_pending = true; 363662306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 363762306a36Sopenharmony_ci kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv); 363862306a36Sopenharmony_ci } 363962306a36Sopenharmony_ci 364062306a36Sopenharmony_ci /* Hide L1D cache contents from the nested guest. */ 364162306a36Sopenharmony_ci vmx->vcpu.arch.l1tf_flush_l1d = true; 364262306a36Sopenharmony_ci 364362306a36Sopenharmony_ci /* 364462306a36Sopenharmony_ci * Must happen outside of nested_vmx_enter_non_root_mode() as it will 364562306a36Sopenharmony_ci * also be used as part of restoring nVMX state for 364662306a36Sopenharmony_ci * snapshot restore (migration). 364762306a36Sopenharmony_ci * 364862306a36Sopenharmony_ci * In this flow, it is assumed that vmcs12 cache was 364962306a36Sopenharmony_ci * transferred as part of captured nVMX state and should 365062306a36Sopenharmony_ci * therefore not be read from guest memory (which may not 365162306a36Sopenharmony_ci * exist on destination host yet). 365262306a36Sopenharmony_ci */ 365362306a36Sopenharmony_ci nested_cache_shadow_vmcs12(vcpu, vmcs12); 365462306a36Sopenharmony_ci 365562306a36Sopenharmony_ci switch (vmcs12->guest_activity_state) { 365662306a36Sopenharmony_ci case GUEST_ACTIVITY_HLT: 365762306a36Sopenharmony_ci /* 365862306a36Sopenharmony_ci * If we're entering a halted L2 vcpu and the L2 vcpu won't be 365962306a36Sopenharmony_ci * awakened by event injection or by an NMI-window VM-exit or 366062306a36Sopenharmony_ci * by an interrupt-window VM-exit, halt the vcpu. 366162306a36Sopenharmony_ci */ 366262306a36Sopenharmony_ci if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && 366362306a36Sopenharmony_ci !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) && 366462306a36Sopenharmony_ci !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && 366562306a36Sopenharmony_ci (vmcs12->guest_rflags & X86_EFLAGS_IF))) { 366662306a36Sopenharmony_ci vmx->nested.nested_run_pending = 0; 366762306a36Sopenharmony_ci return kvm_emulate_halt_noskip(vcpu); 366862306a36Sopenharmony_ci } 366962306a36Sopenharmony_ci break; 367062306a36Sopenharmony_ci case GUEST_ACTIVITY_WAIT_SIPI: 367162306a36Sopenharmony_ci vmx->nested.nested_run_pending = 0; 367262306a36Sopenharmony_ci vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 367362306a36Sopenharmony_ci break; 367462306a36Sopenharmony_ci default: 367562306a36Sopenharmony_ci break; 367662306a36Sopenharmony_ci } 367762306a36Sopenharmony_ci 367862306a36Sopenharmony_ci return 1; 367962306a36Sopenharmony_ci 368062306a36Sopenharmony_civmentry_failed: 368162306a36Sopenharmony_ci vmx->nested.nested_run_pending = 0; 368262306a36Sopenharmony_ci if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR) 368362306a36Sopenharmony_ci return 0; 368462306a36Sopenharmony_ci if (status == NVMX_VMENTRY_VMEXIT) 368562306a36Sopenharmony_ci return 1; 368662306a36Sopenharmony_ci WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL); 368762306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 368862306a36Sopenharmony_ci} 368962306a36Sopenharmony_ci 369062306a36Sopenharmony_ci/* 369162306a36Sopenharmony_ci * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date 369262306a36Sopenharmony_ci * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK). 369362306a36Sopenharmony_ci * This function returns the new value we should put in vmcs12.guest_cr0. 369462306a36Sopenharmony_ci * It's not enough to just return the vmcs02 GUEST_CR0. Rather, 369562306a36Sopenharmony_ci * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now 369662306a36Sopenharmony_ci * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0 369762306a36Sopenharmony_ci * didn't trap the bit, because if L1 did, so would L0). 369862306a36Sopenharmony_ci * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have 369962306a36Sopenharmony_ci * been modified by L2, and L1 knows it. So just leave the old value of 370062306a36Sopenharmony_ci * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0 370162306a36Sopenharmony_ci * isn't relevant, because if L0 traps this bit it can set it to anything. 370262306a36Sopenharmony_ci * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have 370362306a36Sopenharmony_ci * changed these bits, and therefore they need to be updated, but L0 370462306a36Sopenharmony_ci * didn't necessarily allow them to be changed in GUEST_CR0 - and rather 370562306a36Sopenharmony_ci * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there. 370662306a36Sopenharmony_ci */ 370762306a36Sopenharmony_cistatic inline unsigned long 370862306a36Sopenharmony_civmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 370962306a36Sopenharmony_ci{ 371062306a36Sopenharmony_ci return 371162306a36Sopenharmony_ci /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) | 371262306a36Sopenharmony_ci /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) | 371362306a36Sopenharmony_ci /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask | 371462306a36Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits)); 371562306a36Sopenharmony_ci} 371662306a36Sopenharmony_ci 371762306a36Sopenharmony_cistatic inline unsigned long 371862306a36Sopenharmony_civmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 371962306a36Sopenharmony_ci{ 372062306a36Sopenharmony_ci return 372162306a36Sopenharmony_ci /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) | 372262306a36Sopenharmony_ci /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) | 372362306a36Sopenharmony_ci /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask | 372462306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits)); 372562306a36Sopenharmony_ci} 372662306a36Sopenharmony_ci 372762306a36Sopenharmony_cistatic void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, 372862306a36Sopenharmony_ci struct vmcs12 *vmcs12, 372962306a36Sopenharmony_ci u32 vm_exit_reason, u32 exit_intr_info) 373062306a36Sopenharmony_ci{ 373162306a36Sopenharmony_ci u32 idt_vectoring; 373262306a36Sopenharmony_ci unsigned int nr; 373362306a36Sopenharmony_ci 373462306a36Sopenharmony_ci /* 373562306a36Sopenharmony_ci * Per the SDM, VM-Exits due to double and triple faults are never 373662306a36Sopenharmony_ci * considered to occur during event delivery, even if the double/triple 373762306a36Sopenharmony_ci * fault is the result of an escalating vectoring issue. 373862306a36Sopenharmony_ci * 373962306a36Sopenharmony_ci * Note, the SDM qualifies the double fault behavior with "The original 374062306a36Sopenharmony_ci * event results in a double-fault exception". It's unclear why the 374162306a36Sopenharmony_ci * qualification exists since exits due to double fault can occur only 374262306a36Sopenharmony_ci * while vectoring a different exception (injected events are never 374362306a36Sopenharmony_ci * subject to interception), i.e. there's _always_ an original event. 374462306a36Sopenharmony_ci * 374562306a36Sopenharmony_ci * The SDM also uses NMI as a confusing example for the "original event 374662306a36Sopenharmony_ci * causes the VM exit directly" clause. NMI isn't special in any way, 374762306a36Sopenharmony_ci * the same rule applies to all events that cause an exit directly. 374862306a36Sopenharmony_ci * NMI is an odd choice for the example because NMIs can only occur on 374962306a36Sopenharmony_ci * instruction boundaries, i.e. they _can't_ occur during vectoring. 375062306a36Sopenharmony_ci */ 375162306a36Sopenharmony_ci if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT || 375262306a36Sopenharmony_ci ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI && 375362306a36Sopenharmony_ci is_double_fault(exit_intr_info))) { 375462306a36Sopenharmony_ci vmcs12->idt_vectoring_info_field = 0; 375562306a36Sopenharmony_ci } else if (vcpu->arch.exception.injected) { 375662306a36Sopenharmony_ci nr = vcpu->arch.exception.vector; 375762306a36Sopenharmony_ci idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 375862306a36Sopenharmony_ci 375962306a36Sopenharmony_ci if (kvm_exception_is_soft(nr)) { 376062306a36Sopenharmony_ci vmcs12->vm_exit_instruction_len = 376162306a36Sopenharmony_ci vcpu->arch.event_exit_inst_len; 376262306a36Sopenharmony_ci idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; 376362306a36Sopenharmony_ci } else 376462306a36Sopenharmony_ci idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; 376562306a36Sopenharmony_ci 376662306a36Sopenharmony_ci if (vcpu->arch.exception.has_error_code) { 376762306a36Sopenharmony_ci idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; 376862306a36Sopenharmony_ci vmcs12->idt_vectoring_error_code = 376962306a36Sopenharmony_ci vcpu->arch.exception.error_code; 377062306a36Sopenharmony_ci } 377162306a36Sopenharmony_ci 377262306a36Sopenharmony_ci vmcs12->idt_vectoring_info_field = idt_vectoring; 377362306a36Sopenharmony_ci } else if (vcpu->arch.nmi_injected) { 377462306a36Sopenharmony_ci vmcs12->idt_vectoring_info_field = 377562306a36Sopenharmony_ci INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 377662306a36Sopenharmony_ci } else if (vcpu->arch.interrupt.injected) { 377762306a36Sopenharmony_ci nr = vcpu->arch.interrupt.nr; 377862306a36Sopenharmony_ci idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 377962306a36Sopenharmony_ci 378062306a36Sopenharmony_ci if (vcpu->arch.interrupt.soft) { 378162306a36Sopenharmony_ci idt_vectoring |= INTR_TYPE_SOFT_INTR; 378262306a36Sopenharmony_ci vmcs12->vm_entry_instruction_len = 378362306a36Sopenharmony_ci vcpu->arch.event_exit_inst_len; 378462306a36Sopenharmony_ci } else 378562306a36Sopenharmony_ci idt_vectoring |= INTR_TYPE_EXT_INTR; 378662306a36Sopenharmony_ci 378762306a36Sopenharmony_ci vmcs12->idt_vectoring_info_field = idt_vectoring; 378862306a36Sopenharmony_ci } else { 378962306a36Sopenharmony_ci vmcs12->idt_vectoring_info_field = 0; 379062306a36Sopenharmony_ci } 379162306a36Sopenharmony_ci} 379262306a36Sopenharmony_ci 379362306a36Sopenharmony_ci 379462306a36Sopenharmony_civoid nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) 379562306a36Sopenharmony_ci{ 379662306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 379762306a36Sopenharmony_ci gfn_t gfn; 379862306a36Sopenharmony_ci 379962306a36Sopenharmony_ci /* 380062306a36Sopenharmony_ci * Don't need to mark the APIC access page dirty; it is never 380162306a36Sopenharmony_ci * written to by the CPU during APIC virtualization. 380262306a36Sopenharmony_ci */ 380362306a36Sopenharmony_ci 380462306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 380562306a36Sopenharmony_ci gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; 380662306a36Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, gfn); 380762306a36Sopenharmony_ci } 380862306a36Sopenharmony_ci 380962306a36Sopenharmony_ci if (nested_cpu_has_posted_intr(vmcs12)) { 381062306a36Sopenharmony_ci gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; 381162306a36Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, gfn); 381262306a36Sopenharmony_ci } 381362306a36Sopenharmony_ci} 381462306a36Sopenharmony_ci 381562306a36Sopenharmony_cistatic int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) 381662306a36Sopenharmony_ci{ 381762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 381862306a36Sopenharmony_ci int max_irr; 381962306a36Sopenharmony_ci void *vapic_page; 382062306a36Sopenharmony_ci u16 status; 382162306a36Sopenharmony_ci 382262306a36Sopenharmony_ci if (!vmx->nested.pi_pending) 382362306a36Sopenharmony_ci return 0; 382462306a36Sopenharmony_ci 382562306a36Sopenharmony_ci if (!vmx->nested.pi_desc) 382662306a36Sopenharmony_ci goto mmio_needed; 382762306a36Sopenharmony_ci 382862306a36Sopenharmony_ci vmx->nested.pi_pending = false; 382962306a36Sopenharmony_ci 383062306a36Sopenharmony_ci if (!pi_test_and_clear_on(vmx->nested.pi_desc)) 383162306a36Sopenharmony_ci return 0; 383262306a36Sopenharmony_ci 383362306a36Sopenharmony_ci max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); 383462306a36Sopenharmony_ci if (max_irr != 256) { 383562306a36Sopenharmony_ci vapic_page = vmx->nested.virtual_apic_map.hva; 383662306a36Sopenharmony_ci if (!vapic_page) 383762306a36Sopenharmony_ci goto mmio_needed; 383862306a36Sopenharmony_ci 383962306a36Sopenharmony_ci __kvm_apic_update_irr(vmx->nested.pi_desc->pir, 384062306a36Sopenharmony_ci vapic_page, &max_irr); 384162306a36Sopenharmony_ci status = vmcs_read16(GUEST_INTR_STATUS); 384262306a36Sopenharmony_ci if ((u8)max_irr > ((u8)status & 0xff)) { 384362306a36Sopenharmony_ci status &= ~0xff; 384462306a36Sopenharmony_ci status |= (u8)max_irr; 384562306a36Sopenharmony_ci vmcs_write16(GUEST_INTR_STATUS, status); 384662306a36Sopenharmony_ci } 384762306a36Sopenharmony_ci } 384862306a36Sopenharmony_ci 384962306a36Sopenharmony_ci nested_mark_vmcs12_pages_dirty(vcpu); 385062306a36Sopenharmony_ci return 0; 385162306a36Sopenharmony_ci 385262306a36Sopenharmony_cimmio_needed: 385362306a36Sopenharmony_ci kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL); 385462306a36Sopenharmony_ci return -ENXIO; 385562306a36Sopenharmony_ci} 385662306a36Sopenharmony_ci 385762306a36Sopenharmony_cistatic void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu) 385862306a36Sopenharmony_ci{ 385962306a36Sopenharmony_ci struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; 386062306a36Sopenharmony_ci u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; 386162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 386262306a36Sopenharmony_ci unsigned long exit_qual; 386362306a36Sopenharmony_ci 386462306a36Sopenharmony_ci if (ex->has_payload) { 386562306a36Sopenharmony_ci exit_qual = ex->payload; 386662306a36Sopenharmony_ci } else if (ex->vector == PF_VECTOR) { 386762306a36Sopenharmony_ci exit_qual = vcpu->arch.cr2; 386862306a36Sopenharmony_ci } else if (ex->vector == DB_VECTOR) { 386962306a36Sopenharmony_ci exit_qual = vcpu->arch.dr6; 387062306a36Sopenharmony_ci exit_qual &= ~DR6_BT; 387162306a36Sopenharmony_ci exit_qual ^= DR6_ACTIVE_LOW; 387262306a36Sopenharmony_ci } else { 387362306a36Sopenharmony_ci exit_qual = 0; 387462306a36Sopenharmony_ci } 387562306a36Sopenharmony_ci 387662306a36Sopenharmony_ci /* 387762306a36Sopenharmony_ci * Unlike AMD's Paged Real Mode, which reports an error code on #PF 387862306a36Sopenharmony_ci * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the 387962306a36Sopenharmony_ci * "has error code" flags on VM-Exit if the CPU is in Real Mode. 388062306a36Sopenharmony_ci */ 388162306a36Sopenharmony_ci if (ex->has_error_code && is_protmode(vcpu)) { 388262306a36Sopenharmony_ci /* 388362306a36Sopenharmony_ci * Intel CPUs do not generate error codes with bits 31:16 set, 388462306a36Sopenharmony_ci * and more importantly VMX disallows setting bits 31:16 in the 388562306a36Sopenharmony_ci * injected error code for VM-Entry. Drop the bits to mimic 388662306a36Sopenharmony_ci * hardware and avoid inducing failure on nested VM-Entry if L1 388762306a36Sopenharmony_ci * chooses to inject the exception back to L2. AMD CPUs _do_ 388862306a36Sopenharmony_ci * generate "full" 32-bit error codes, so KVM allows userspace 388962306a36Sopenharmony_ci * to inject exception error codes with bits 31:16 set. 389062306a36Sopenharmony_ci */ 389162306a36Sopenharmony_ci vmcs12->vm_exit_intr_error_code = (u16)ex->error_code; 389262306a36Sopenharmony_ci intr_info |= INTR_INFO_DELIVER_CODE_MASK; 389362306a36Sopenharmony_ci } 389462306a36Sopenharmony_ci 389562306a36Sopenharmony_ci if (kvm_exception_is_soft(ex->vector)) 389662306a36Sopenharmony_ci intr_info |= INTR_TYPE_SOFT_EXCEPTION; 389762306a36Sopenharmony_ci else 389862306a36Sopenharmony_ci intr_info |= INTR_TYPE_HARD_EXCEPTION; 389962306a36Sopenharmony_ci 390062306a36Sopenharmony_ci if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && 390162306a36Sopenharmony_ci vmx_get_nmi_mask(vcpu)) 390262306a36Sopenharmony_ci intr_info |= INTR_INFO_UNBLOCK_NMI; 390362306a36Sopenharmony_ci 390462306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); 390562306a36Sopenharmony_ci} 390662306a36Sopenharmony_ci 390762306a36Sopenharmony_ci/* 390862306a36Sopenharmony_ci * Returns true if a debug trap is (likely) pending delivery. Infer the class 390962306a36Sopenharmony_ci * of a #DB (trap-like vs. fault-like) from the exception payload (to-be-DR6). 391062306a36Sopenharmony_ci * Using the payload is flawed because code breakpoints (fault-like) and data 391162306a36Sopenharmony_ci * breakpoints (trap-like) set the same bits in DR6 (breakpoint detected), i.e. 391262306a36Sopenharmony_ci * this will return false positives if a to-be-injected code breakpoint #DB is 391362306a36Sopenharmony_ci * pending (from KVM's perspective, but not "pending" across an instruction 391462306a36Sopenharmony_ci * boundary). ICEBP, a.k.a. INT1, is also not reflected here even though it 391562306a36Sopenharmony_ci * too is trap-like. 391662306a36Sopenharmony_ci * 391762306a36Sopenharmony_ci * KVM "works" despite these flaws as ICEBP isn't currently supported by the 391862306a36Sopenharmony_ci * emulator, Monitor Trap Flag is not marked pending on intercepted #DBs (the 391962306a36Sopenharmony_ci * #DB has already happened), and MTF isn't marked pending on code breakpoints 392062306a36Sopenharmony_ci * from the emulator (because such #DBs are fault-like and thus don't trigger 392162306a36Sopenharmony_ci * actions that fire on instruction retire). 392262306a36Sopenharmony_ci */ 392362306a36Sopenharmony_cistatic unsigned long vmx_get_pending_dbg_trap(struct kvm_queued_exception *ex) 392462306a36Sopenharmony_ci{ 392562306a36Sopenharmony_ci if (!ex->pending || ex->vector != DB_VECTOR) 392662306a36Sopenharmony_ci return 0; 392762306a36Sopenharmony_ci 392862306a36Sopenharmony_ci /* General Detect #DBs are always fault-like. */ 392962306a36Sopenharmony_ci return ex->payload & ~DR6_BD; 393062306a36Sopenharmony_ci} 393162306a36Sopenharmony_ci 393262306a36Sopenharmony_ci/* 393362306a36Sopenharmony_ci * Returns true if there's a pending #DB exception that is lower priority than 393462306a36Sopenharmony_ci * a pending Monitor Trap Flag VM-Exit. TSS T-flag #DBs are not emulated by 393562306a36Sopenharmony_ci * KVM, but could theoretically be injected by userspace. Note, this code is 393662306a36Sopenharmony_ci * imperfect, see above. 393762306a36Sopenharmony_ci */ 393862306a36Sopenharmony_cistatic bool vmx_is_low_priority_db_trap(struct kvm_queued_exception *ex) 393962306a36Sopenharmony_ci{ 394062306a36Sopenharmony_ci return vmx_get_pending_dbg_trap(ex) & ~DR6_BT; 394162306a36Sopenharmony_ci} 394262306a36Sopenharmony_ci 394362306a36Sopenharmony_ci/* 394462306a36Sopenharmony_ci * Certain VM-exits set the 'pending debug exceptions' field to indicate a 394562306a36Sopenharmony_ci * recognized #DB (data or single-step) that has yet to be delivered. Since KVM 394662306a36Sopenharmony_ci * represents these debug traps with a payload that is said to be compatible 394762306a36Sopenharmony_ci * with the 'pending debug exceptions' field, write the payload to the VMCS 394862306a36Sopenharmony_ci * field if a VM-exit is delivered before the debug trap. 394962306a36Sopenharmony_ci */ 395062306a36Sopenharmony_cistatic void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) 395162306a36Sopenharmony_ci{ 395262306a36Sopenharmony_ci unsigned long pending_dbg; 395362306a36Sopenharmony_ci 395462306a36Sopenharmony_ci pending_dbg = vmx_get_pending_dbg_trap(&vcpu->arch.exception); 395562306a36Sopenharmony_ci if (pending_dbg) 395662306a36Sopenharmony_ci vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg); 395762306a36Sopenharmony_ci} 395862306a36Sopenharmony_ci 395962306a36Sopenharmony_cistatic bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) 396062306a36Sopenharmony_ci{ 396162306a36Sopenharmony_ci return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && 396262306a36Sopenharmony_ci to_vmx(vcpu)->nested.preemption_timer_expired; 396362306a36Sopenharmony_ci} 396462306a36Sopenharmony_ci 396562306a36Sopenharmony_cistatic bool vmx_has_nested_events(struct kvm_vcpu *vcpu) 396662306a36Sopenharmony_ci{ 396762306a36Sopenharmony_ci return nested_vmx_preemption_timer_pending(vcpu) || 396862306a36Sopenharmony_ci to_vmx(vcpu)->nested.mtf_pending; 396962306a36Sopenharmony_ci} 397062306a36Sopenharmony_ci 397162306a36Sopenharmony_ci/* 397262306a36Sopenharmony_ci * Per the Intel SDM's table "Priority Among Concurrent Events", with minor 397362306a36Sopenharmony_ci * edits to fill in missing examples, e.g. #DB due to split-lock accesses, 397462306a36Sopenharmony_ci * and less minor edits to splice in the priority of VMX Non-Root specific 397562306a36Sopenharmony_ci * events, e.g. MTF and NMI/INTR-window exiting. 397662306a36Sopenharmony_ci * 397762306a36Sopenharmony_ci * 1 Hardware Reset and Machine Checks 397862306a36Sopenharmony_ci * - RESET 397962306a36Sopenharmony_ci * - Machine Check 398062306a36Sopenharmony_ci * 398162306a36Sopenharmony_ci * 2 Trap on Task Switch 398262306a36Sopenharmony_ci * - T flag in TSS is set (on task switch) 398362306a36Sopenharmony_ci * 398462306a36Sopenharmony_ci * 3 External Hardware Interventions 398562306a36Sopenharmony_ci * - FLUSH 398662306a36Sopenharmony_ci * - STOPCLK 398762306a36Sopenharmony_ci * - SMI 398862306a36Sopenharmony_ci * - INIT 398962306a36Sopenharmony_ci * 399062306a36Sopenharmony_ci * 3.5 Monitor Trap Flag (MTF) VM-exit[1] 399162306a36Sopenharmony_ci * 399262306a36Sopenharmony_ci * 4 Traps on Previous Instruction 399362306a36Sopenharmony_ci * - Breakpoints 399462306a36Sopenharmony_ci * - Trap-class Debug Exceptions (#DB due to TF flag set, data/I-O 399562306a36Sopenharmony_ci * breakpoint, or #DB due to a split-lock access) 399662306a36Sopenharmony_ci * 399762306a36Sopenharmony_ci * 4.3 VMX-preemption timer expired VM-exit 399862306a36Sopenharmony_ci * 399962306a36Sopenharmony_ci * 4.6 NMI-window exiting VM-exit[2] 400062306a36Sopenharmony_ci * 400162306a36Sopenharmony_ci * 5 Nonmaskable Interrupts (NMI) 400262306a36Sopenharmony_ci * 400362306a36Sopenharmony_ci * 5.5 Interrupt-window exiting VM-exit and Virtual-interrupt delivery 400462306a36Sopenharmony_ci * 400562306a36Sopenharmony_ci * 6 Maskable Hardware Interrupts 400662306a36Sopenharmony_ci * 400762306a36Sopenharmony_ci * 7 Code Breakpoint Fault 400862306a36Sopenharmony_ci * 400962306a36Sopenharmony_ci * 8 Faults from Fetching Next Instruction 401062306a36Sopenharmony_ci * - Code-Segment Limit Violation 401162306a36Sopenharmony_ci * - Code Page Fault 401262306a36Sopenharmony_ci * - Control protection exception (missing ENDBRANCH at target of indirect 401362306a36Sopenharmony_ci * call or jump) 401462306a36Sopenharmony_ci * 401562306a36Sopenharmony_ci * 9 Faults from Decoding Next Instruction 401662306a36Sopenharmony_ci * - Instruction length > 15 bytes 401762306a36Sopenharmony_ci * - Invalid Opcode 401862306a36Sopenharmony_ci * - Coprocessor Not Available 401962306a36Sopenharmony_ci * 402062306a36Sopenharmony_ci *10 Faults on Executing Instruction 402162306a36Sopenharmony_ci * - Overflow 402262306a36Sopenharmony_ci * - Bound error 402362306a36Sopenharmony_ci * - Invalid TSS 402462306a36Sopenharmony_ci * - Segment Not Present 402562306a36Sopenharmony_ci * - Stack fault 402662306a36Sopenharmony_ci * - General Protection 402762306a36Sopenharmony_ci * - Data Page Fault 402862306a36Sopenharmony_ci * - Alignment Check 402962306a36Sopenharmony_ci * - x86 FPU Floating-point exception 403062306a36Sopenharmony_ci * - SIMD floating-point exception 403162306a36Sopenharmony_ci * - Virtualization exception 403262306a36Sopenharmony_ci * - Control protection exception 403362306a36Sopenharmony_ci * 403462306a36Sopenharmony_ci * [1] Per the "Monitor Trap Flag" section: System-management interrupts (SMIs), 403562306a36Sopenharmony_ci * INIT signals, and higher priority events take priority over MTF VM exits. 403662306a36Sopenharmony_ci * MTF VM exits take priority over debug-trap exceptions and lower priority 403762306a36Sopenharmony_ci * events. 403862306a36Sopenharmony_ci * 403962306a36Sopenharmony_ci * [2] Debug-trap exceptions and higher priority events take priority over VM exits 404062306a36Sopenharmony_ci * caused by the VMX-preemption timer. VM exits caused by the VMX-preemption 404162306a36Sopenharmony_ci * timer take priority over VM exits caused by the "NMI-window exiting" 404262306a36Sopenharmony_ci * VM-execution control and lower priority events. 404362306a36Sopenharmony_ci * 404462306a36Sopenharmony_ci * [3] Debug-trap exceptions and higher priority events take priority over VM exits 404562306a36Sopenharmony_ci * caused by "NMI-window exiting". VM exits caused by this control take 404662306a36Sopenharmony_ci * priority over non-maskable interrupts (NMIs) and lower priority events. 404762306a36Sopenharmony_ci * 404862306a36Sopenharmony_ci * [4] Virtual-interrupt delivery has the same priority as that of VM exits due to 404962306a36Sopenharmony_ci * the 1-setting of the "interrupt-window exiting" VM-execution control. Thus, 405062306a36Sopenharmony_ci * non-maskable interrupts (NMIs) and higher priority events take priority over 405162306a36Sopenharmony_ci * delivery of a virtual interrupt; delivery of a virtual interrupt takes 405262306a36Sopenharmony_ci * priority over external interrupts and lower priority events. 405362306a36Sopenharmony_ci */ 405462306a36Sopenharmony_cistatic int vmx_check_nested_events(struct kvm_vcpu *vcpu) 405562306a36Sopenharmony_ci{ 405662306a36Sopenharmony_ci struct kvm_lapic *apic = vcpu->arch.apic; 405762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 405862306a36Sopenharmony_ci /* 405962306a36Sopenharmony_ci * Only a pending nested run blocks a pending exception. If there is a 406062306a36Sopenharmony_ci * previously injected event, the pending exception occurred while said 406162306a36Sopenharmony_ci * event was being delivered and thus needs to be handled. 406262306a36Sopenharmony_ci */ 406362306a36Sopenharmony_ci bool block_nested_exceptions = vmx->nested.nested_run_pending; 406462306a36Sopenharmony_ci /* 406562306a36Sopenharmony_ci * New events (not exceptions) are only recognized at instruction 406662306a36Sopenharmony_ci * boundaries. If an event needs reinjection, then KVM is handling a 406762306a36Sopenharmony_ci * VM-Exit that occurred _during_ instruction execution; new events are 406862306a36Sopenharmony_ci * blocked until the instruction completes. 406962306a36Sopenharmony_ci */ 407062306a36Sopenharmony_ci bool block_nested_events = block_nested_exceptions || 407162306a36Sopenharmony_ci kvm_event_needs_reinjection(vcpu); 407262306a36Sopenharmony_ci 407362306a36Sopenharmony_ci if (lapic_in_kernel(vcpu) && 407462306a36Sopenharmony_ci test_bit(KVM_APIC_INIT, &apic->pending_events)) { 407562306a36Sopenharmony_ci if (block_nested_events) 407662306a36Sopenharmony_ci return -EBUSY; 407762306a36Sopenharmony_ci nested_vmx_update_pending_dbg(vcpu); 407862306a36Sopenharmony_ci clear_bit(KVM_APIC_INIT, &apic->pending_events); 407962306a36Sopenharmony_ci if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) 408062306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); 408162306a36Sopenharmony_ci 408262306a36Sopenharmony_ci /* MTF is discarded if the vCPU is in WFS. */ 408362306a36Sopenharmony_ci vmx->nested.mtf_pending = false; 408462306a36Sopenharmony_ci return 0; 408562306a36Sopenharmony_ci } 408662306a36Sopenharmony_ci 408762306a36Sopenharmony_ci if (lapic_in_kernel(vcpu) && 408862306a36Sopenharmony_ci test_bit(KVM_APIC_SIPI, &apic->pending_events)) { 408962306a36Sopenharmony_ci if (block_nested_events) 409062306a36Sopenharmony_ci return -EBUSY; 409162306a36Sopenharmony_ci 409262306a36Sopenharmony_ci clear_bit(KVM_APIC_SIPI, &apic->pending_events); 409362306a36Sopenharmony_ci if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 409462306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, 409562306a36Sopenharmony_ci apic->sipi_vector & 0xFFUL); 409662306a36Sopenharmony_ci return 0; 409762306a36Sopenharmony_ci } 409862306a36Sopenharmony_ci /* Fallthrough, the SIPI is completely ignored. */ 409962306a36Sopenharmony_ci } 410062306a36Sopenharmony_ci 410162306a36Sopenharmony_ci /* 410262306a36Sopenharmony_ci * Process exceptions that are higher priority than Monitor Trap Flag: 410362306a36Sopenharmony_ci * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but 410462306a36Sopenharmony_ci * could theoretically come in from userspace), and ICEBP (INT1). 410562306a36Sopenharmony_ci * 410662306a36Sopenharmony_ci * TODO: SMIs have higher priority than MTF and trap-like #DBs (except 410762306a36Sopenharmony_ci * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF 410862306a36Sopenharmony_ci * across SMI/RSM as it should; that needs to be addressed in order to 410962306a36Sopenharmony_ci * prioritize SMI over MTF and trap-like #DBs. 411062306a36Sopenharmony_ci */ 411162306a36Sopenharmony_ci if (vcpu->arch.exception_vmexit.pending && 411262306a36Sopenharmony_ci !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) { 411362306a36Sopenharmony_ci if (block_nested_exceptions) 411462306a36Sopenharmony_ci return -EBUSY; 411562306a36Sopenharmony_ci 411662306a36Sopenharmony_ci nested_vmx_inject_exception_vmexit(vcpu); 411762306a36Sopenharmony_ci return 0; 411862306a36Sopenharmony_ci } 411962306a36Sopenharmony_ci 412062306a36Sopenharmony_ci if (vcpu->arch.exception.pending && 412162306a36Sopenharmony_ci !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) { 412262306a36Sopenharmony_ci if (block_nested_exceptions) 412362306a36Sopenharmony_ci return -EBUSY; 412462306a36Sopenharmony_ci goto no_vmexit; 412562306a36Sopenharmony_ci } 412662306a36Sopenharmony_ci 412762306a36Sopenharmony_ci if (vmx->nested.mtf_pending) { 412862306a36Sopenharmony_ci if (block_nested_events) 412962306a36Sopenharmony_ci return -EBUSY; 413062306a36Sopenharmony_ci nested_vmx_update_pending_dbg(vcpu); 413162306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0); 413262306a36Sopenharmony_ci return 0; 413362306a36Sopenharmony_ci } 413462306a36Sopenharmony_ci 413562306a36Sopenharmony_ci if (vcpu->arch.exception_vmexit.pending) { 413662306a36Sopenharmony_ci if (block_nested_exceptions) 413762306a36Sopenharmony_ci return -EBUSY; 413862306a36Sopenharmony_ci 413962306a36Sopenharmony_ci nested_vmx_inject_exception_vmexit(vcpu); 414062306a36Sopenharmony_ci return 0; 414162306a36Sopenharmony_ci } 414262306a36Sopenharmony_ci 414362306a36Sopenharmony_ci if (vcpu->arch.exception.pending) { 414462306a36Sopenharmony_ci if (block_nested_exceptions) 414562306a36Sopenharmony_ci return -EBUSY; 414662306a36Sopenharmony_ci goto no_vmexit; 414762306a36Sopenharmony_ci } 414862306a36Sopenharmony_ci 414962306a36Sopenharmony_ci if (nested_vmx_preemption_timer_pending(vcpu)) { 415062306a36Sopenharmony_ci if (block_nested_events) 415162306a36Sopenharmony_ci return -EBUSY; 415262306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); 415362306a36Sopenharmony_ci return 0; 415462306a36Sopenharmony_ci } 415562306a36Sopenharmony_ci 415662306a36Sopenharmony_ci if (vcpu->arch.smi_pending && !is_smm(vcpu)) { 415762306a36Sopenharmony_ci if (block_nested_events) 415862306a36Sopenharmony_ci return -EBUSY; 415962306a36Sopenharmony_ci goto no_vmexit; 416062306a36Sopenharmony_ci } 416162306a36Sopenharmony_ci 416262306a36Sopenharmony_ci if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) { 416362306a36Sopenharmony_ci if (block_nested_events) 416462306a36Sopenharmony_ci return -EBUSY; 416562306a36Sopenharmony_ci if (!nested_exit_on_nmi(vcpu)) 416662306a36Sopenharmony_ci goto no_vmexit; 416762306a36Sopenharmony_ci 416862306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 416962306a36Sopenharmony_ci NMI_VECTOR | INTR_TYPE_NMI_INTR | 417062306a36Sopenharmony_ci INTR_INFO_VALID_MASK, 0); 417162306a36Sopenharmony_ci /* 417262306a36Sopenharmony_ci * The NMI-triggered VM exit counts as injection: 417362306a36Sopenharmony_ci * clear this one and block further NMIs. 417462306a36Sopenharmony_ci */ 417562306a36Sopenharmony_ci vcpu->arch.nmi_pending = 0; 417662306a36Sopenharmony_ci vmx_set_nmi_mask(vcpu, true); 417762306a36Sopenharmony_ci return 0; 417862306a36Sopenharmony_ci } 417962306a36Sopenharmony_ci 418062306a36Sopenharmony_ci if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { 418162306a36Sopenharmony_ci if (block_nested_events) 418262306a36Sopenharmony_ci return -EBUSY; 418362306a36Sopenharmony_ci if (!nested_exit_on_intr(vcpu)) 418462306a36Sopenharmony_ci goto no_vmexit; 418562306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); 418662306a36Sopenharmony_ci return 0; 418762306a36Sopenharmony_ci } 418862306a36Sopenharmony_ci 418962306a36Sopenharmony_cino_vmexit: 419062306a36Sopenharmony_ci return vmx_complete_nested_posted_interrupt(vcpu); 419162306a36Sopenharmony_ci} 419262306a36Sopenharmony_ci 419362306a36Sopenharmony_cistatic u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) 419462306a36Sopenharmony_ci{ 419562306a36Sopenharmony_ci ktime_t remaining = 419662306a36Sopenharmony_ci hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer); 419762306a36Sopenharmony_ci u64 value; 419862306a36Sopenharmony_ci 419962306a36Sopenharmony_ci if (ktime_to_ns(remaining) <= 0) 420062306a36Sopenharmony_ci return 0; 420162306a36Sopenharmony_ci 420262306a36Sopenharmony_ci value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz; 420362306a36Sopenharmony_ci do_div(value, 1000000); 420462306a36Sopenharmony_ci return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 420562306a36Sopenharmony_ci} 420662306a36Sopenharmony_ci 420762306a36Sopenharmony_cistatic bool is_vmcs12_ext_field(unsigned long field) 420862306a36Sopenharmony_ci{ 420962306a36Sopenharmony_ci switch (field) { 421062306a36Sopenharmony_ci case GUEST_ES_SELECTOR: 421162306a36Sopenharmony_ci case GUEST_CS_SELECTOR: 421262306a36Sopenharmony_ci case GUEST_SS_SELECTOR: 421362306a36Sopenharmony_ci case GUEST_DS_SELECTOR: 421462306a36Sopenharmony_ci case GUEST_FS_SELECTOR: 421562306a36Sopenharmony_ci case GUEST_GS_SELECTOR: 421662306a36Sopenharmony_ci case GUEST_LDTR_SELECTOR: 421762306a36Sopenharmony_ci case GUEST_TR_SELECTOR: 421862306a36Sopenharmony_ci case GUEST_ES_LIMIT: 421962306a36Sopenharmony_ci case GUEST_CS_LIMIT: 422062306a36Sopenharmony_ci case GUEST_SS_LIMIT: 422162306a36Sopenharmony_ci case GUEST_DS_LIMIT: 422262306a36Sopenharmony_ci case GUEST_FS_LIMIT: 422362306a36Sopenharmony_ci case GUEST_GS_LIMIT: 422462306a36Sopenharmony_ci case GUEST_LDTR_LIMIT: 422562306a36Sopenharmony_ci case GUEST_TR_LIMIT: 422662306a36Sopenharmony_ci case GUEST_GDTR_LIMIT: 422762306a36Sopenharmony_ci case GUEST_IDTR_LIMIT: 422862306a36Sopenharmony_ci case GUEST_ES_AR_BYTES: 422962306a36Sopenharmony_ci case GUEST_DS_AR_BYTES: 423062306a36Sopenharmony_ci case GUEST_FS_AR_BYTES: 423162306a36Sopenharmony_ci case GUEST_GS_AR_BYTES: 423262306a36Sopenharmony_ci case GUEST_LDTR_AR_BYTES: 423362306a36Sopenharmony_ci case GUEST_TR_AR_BYTES: 423462306a36Sopenharmony_ci case GUEST_ES_BASE: 423562306a36Sopenharmony_ci case GUEST_CS_BASE: 423662306a36Sopenharmony_ci case GUEST_SS_BASE: 423762306a36Sopenharmony_ci case GUEST_DS_BASE: 423862306a36Sopenharmony_ci case GUEST_FS_BASE: 423962306a36Sopenharmony_ci case GUEST_GS_BASE: 424062306a36Sopenharmony_ci case GUEST_LDTR_BASE: 424162306a36Sopenharmony_ci case GUEST_TR_BASE: 424262306a36Sopenharmony_ci case GUEST_GDTR_BASE: 424362306a36Sopenharmony_ci case GUEST_IDTR_BASE: 424462306a36Sopenharmony_ci case GUEST_PENDING_DBG_EXCEPTIONS: 424562306a36Sopenharmony_ci case GUEST_BNDCFGS: 424662306a36Sopenharmony_ci return true; 424762306a36Sopenharmony_ci default: 424862306a36Sopenharmony_ci break; 424962306a36Sopenharmony_ci } 425062306a36Sopenharmony_ci 425162306a36Sopenharmony_ci return false; 425262306a36Sopenharmony_ci} 425362306a36Sopenharmony_ci 425462306a36Sopenharmony_cistatic void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, 425562306a36Sopenharmony_ci struct vmcs12 *vmcs12) 425662306a36Sopenharmony_ci{ 425762306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 425862306a36Sopenharmony_ci 425962306a36Sopenharmony_ci vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); 426062306a36Sopenharmony_ci vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); 426162306a36Sopenharmony_ci vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR); 426262306a36Sopenharmony_ci vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR); 426362306a36Sopenharmony_ci vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR); 426462306a36Sopenharmony_ci vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR); 426562306a36Sopenharmony_ci vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR); 426662306a36Sopenharmony_ci vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR); 426762306a36Sopenharmony_ci vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT); 426862306a36Sopenharmony_ci vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT); 426962306a36Sopenharmony_ci vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT); 427062306a36Sopenharmony_ci vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT); 427162306a36Sopenharmony_ci vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT); 427262306a36Sopenharmony_ci vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT); 427362306a36Sopenharmony_ci vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT); 427462306a36Sopenharmony_ci vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT); 427562306a36Sopenharmony_ci vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); 427662306a36Sopenharmony_ci vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); 427762306a36Sopenharmony_ci vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); 427862306a36Sopenharmony_ci vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); 427962306a36Sopenharmony_ci vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); 428062306a36Sopenharmony_ci vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); 428162306a36Sopenharmony_ci vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES); 428262306a36Sopenharmony_ci vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES); 428362306a36Sopenharmony_ci vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE); 428462306a36Sopenharmony_ci vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE); 428562306a36Sopenharmony_ci vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE); 428662306a36Sopenharmony_ci vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE); 428762306a36Sopenharmony_ci vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE); 428862306a36Sopenharmony_ci vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE); 428962306a36Sopenharmony_ci vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE); 429062306a36Sopenharmony_ci vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); 429162306a36Sopenharmony_ci vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); 429262306a36Sopenharmony_ci vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); 429362306a36Sopenharmony_ci vmcs12->guest_pending_dbg_exceptions = 429462306a36Sopenharmony_ci vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 429562306a36Sopenharmony_ci 429662306a36Sopenharmony_ci vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; 429762306a36Sopenharmony_ci} 429862306a36Sopenharmony_ci 429962306a36Sopenharmony_cistatic void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, 430062306a36Sopenharmony_ci struct vmcs12 *vmcs12) 430162306a36Sopenharmony_ci{ 430262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 430362306a36Sopenharmony_ci int cpu; 430462306a36Sopenharmony_ci 430562306a36Sopenharmony_ci if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare) 430662306a36Sopenharmony_ci return; 430762306a36Sopenharmony_ci 430862306a36Sopenharmony_ci 430962306a36Sopenharmony_ci WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); 431062306a36Sopenharmony_ci 431162306a36Sopenharmony_ci cpu = get_cpu(); 431262306a36Sopenharmony_ci vmx->loaded_vmcs = &vmx->nested.vmcs02; 431362306a36Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01); 431462306a36Sopenharmony_ci 431562306a36Sopenharmony_ci sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 431662306a36Sopenharmony_ci 431762306a36Sopenharmony_ci vmx->loaded_vmcs = &vmx->vmcs01; 431862306a36Sopenharmony_ci vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02); 431962306a36Sopenharmony_ci put_cpu(); 432062306a36Sopenharmony_ci} 432162306a36Sopenharmony_ci 432262306a36Sopenharmony_ci/* 432362306a36Sopenharmony_ci * Update the guest state fields of vmcs12 to reflect changes that 432462306a36Sopenharmony_ci * occurred while L2 was running. (The "IA-32e mode guest" bit of the 432562306a36Sopenharmony_ci * VM-entry controls is also updated, since this is really a guest 432662306a36Sopenharmony_ci * state bit.) 432762306a36Sopenharmony_ci */ 432862306a36Sopenharmony_cistatic void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 432962306a36Sopenharmony_ci{ 433062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 433162306a36Sopenharmony_ci 433262306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 433362306a36Sopenharmony_ci sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 433462306a36Sopenharmony_ci 433562306a36Sopenharmony_ci vmx->nested.need_sync_vmcs02_to_vmcs12_rare = 433662306a36Sopenharmony_ci !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr); 433762306a36Sopenharmony_ci 433862306a36Sopenharmony_ci vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 433962306a36Sopenharmony_ci vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); 434062306a36Sopenharmony_ci 434162306a36Sopenharmony_ci vmcs12->guest_rsp = kvm_rsp_read(vcpu); 434262306a36Sopenharmony_ci vmcs12->guest_rip = kvm_rip_read(vcpu); 434362306a36Sopenharmony_ci vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); 434462306a36Sopenharmony_ci 434562306a36Sopenharmony_ci vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); 434662306a36Sopenharmony_ci vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); 434762306a36Sopenharmony_ci 434862306a36Sopenharmony_ci vmcs12->guest_interruptibility_info = 434962306a36Sopenharmony_ci vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 435062306a36Sopenharmony_ci 435162306a36Sopenharmony_ci if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 435262306a36Sopenharmony_ci vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; 435362306a36Sopenharmony_ci else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) 435462306a36Sopenharmony_ci vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI; 435562306a36Sopenharmony_ci else 435662306a36Sopenharmony_ci vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; 435762306a36Sopenharmony_ci 435862306a36Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12) && 435962306a36Sopenharmony_ci vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER && 436062306a36Sopenharmony_ci !vmx->nested.nested_run_pending) 436162306a36Sopenharmony_ci vmcs12->vmx_preemption_timer_value = 436262306a36Sopenharmony_ci vmx_get_preemption_timer_value(vcpu); 436362306a36Sopenharmony_ci 436462306a36Sopenharmony_ci /* 436562306a36Sopenharmony_ci * In some cases (usually, nested EPT), L2 is allowed to change its 436662306a36Sopenharmony_ci * own CR3 without exiting. If it has changed it, we must keep it. 436762306a36Sopenharmony_ci * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined 436862306a36Sopenharmony_ci * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12. 436962306a36Sopenharmony_ci * 437062306a36Sopenharmony_ci * Additionally, restore L2's PDPTR to vmcs12. 437162306a36Sopenharmony_ci */ 437262306a36Sopenharmony_ci if (enable_ept) { 437362306a36Sopenharmony_ci vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); 437462306a36Sopenharmony_ci if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) { 437562306a36Sopenharmony_ci vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); 437662306a36Sopenharmony_ci vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); 437762306a36Sopenharmony_ci vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); 437862306a36Sopenharmony_ci vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); 437962306a36Sopenharmony_ci } 438062306a36Sopenharmony_ci } 438162306a36Sopenharmony_ci 438262306a36Sopenharmony_ci vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); 438362306a36Sopenharmony_ci 438462306a36Sopenharmony_ci if (nested_cpu_has_vid(vmcs12)) 438562306a36Sopenharmony_ci vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); 438662306a36Sopenharmony_ci 438762306a36Sopenharmony_ci vmcs12->vm_entry_controls = 438862306a36Sopenharmony_ci (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 438962306a36Sopenharmony_ci (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); 439062306a36Sopenharmony_ci 439162306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) 439262306a36Sopenharmony_ci kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); 439362306a36Sopenharmony_ci 439462306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) 439562306a36Sopenharmony_ci vmcs12->guest_ia32_efer = vcpu->arch.efer; 439662306a36Sopenharmony_ci} 439762306a36Sopenharmony_ci 439862306a36Sopenharmony_ci/* 439962306a36Sopenharmony_ci * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 440062306a36Sopenharmony_ci * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 440162306a36Sopenharmony_ci * and this function updates it to reflect the changes to the guest state while 440262306a36Sopenharmony_ci * L2 was running (and perhaps made some exits which were handled directly by L0 440362306a36Sopenharmony_ci * without going back to L1), and to reflect the exit reason. 440462306a36Sopenharmony_ci * Note that we do not have to copy here all VMCS fields, just those that 440562306a36Sopenharmony_ci * could have changed by the L2 guest or the exit - i.e., the guest-state and 440662306a36Sopenharmony_ci * exit-information fields only. Other fields are modified by L1 with VMWRITE, 440762306a36Sopenharmony_ci * which already writes to vmcs12 directly. 440862306a36Sopenharmony_ci */ 440962306a36Sopenharmony_cistatic void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, 441062306a36Sopenharmony_ci u32 vm_exit_reason, u32 exit_intr_info, 441162306a36Sopenharmony_ci unsigned long exit_qualification) 441262306a36Sopenharmony_ci{ 441362306a36Sopenharmony_ci /* update exit information fields: */ 441462306a36Sopenharmony_ci vmcs12->vm_exit_reason = vm_exit_reason; 441562306a36Sopenharmony_ci if (to_vmx(vcpu)->exit_reason.enclave_mode) 441662306a36Sopenharmony_ci vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE; 441762306a36Sopenharmony_ci vmcs12->exit_qualification = exit_qualification; 441862306a36Sopenharmony_ci 441962306a36Sopenharmony_ci /* 442062306a36Sopenharmony_ci * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched 442162306a36Sopenharmony_ci * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other 442262306a36Sopenharmony_ci * exit info fields are unmodified. 442362306a36Sopenharmony_ci */ 442462306a36Sopenharmony_ci if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { 442562306a36Sopenharmony_ci vmcs12->launch_state = 1; 442662306a36Sopenharmony_ci 442762306a36Sopenharmony_ci /* vm_entry_intr_info_field is cleared on exit. Emulate this 442862306a36Sopenharmony_ci * instead of reading the real value. */ 442962306a36Sopenharmony_ci vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; 443062306a36Sopenharmony_ci 443162306a36Sopenharmony_ci /* 443262306a36Sopenharmony_ci * Transfer the event that L0 or L1 may wanted to inject into 443362306a36Sopenharmony_ci * L2 to IDT_VECTORING_INFO_FIELD. 443462306a36Sopenharmony_ci */ 443562306a36Sopenharmony_ci vmcs12_save_pending_event(vcpu, vmcs12, 443662306a36Sopenharmony_ci vm_exit_reason, exit_intr_info); 443762306a36Sopenharmony_ci 443862306a36Sopenharmony_ci vmcs12->vm_exit_intr_info = exit_intr_info; 443962306a36Sopenharmony_ci vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 444062306a36Sopenharmony_ci vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 444162306a36Sopenharmony_ci 444262306a36Sopenharmony_ci /* 444362306a36Sopenharmony_ci * According to spec, there's no need to store the guest's 444462306a36Sopenharmony_ci * MSRs if the exit is due to a VM-entry failure that occurs 444562306a36Sopenharmony_ci * during or after loading the guest state. Since this exit 444662306a36Sopenharmony_ci * does not fall in that category, we need to save the MSRs. 444762306a36Sopenharmony_ci */ 444862306a36Sopenharmony_ci if (nested_vmx_store_msr(vcpu, 444962306a36Sopenharmony_ci vmcs12->vm_exit_msr_store_addr, 445062306a36Sopenharmony_ci vmcs12->vm_exit_msr_store_count)) 445162306a36Sopenharmony_ci nested_vmx_abort(vcpu, 445262306a36Sopenharmony_ci VMX_ABORT_SAVE_GUEST_MSR_FAIL); 445362306a36Sopenharmony_ci } 445462306a36Sopenharmony_ci} 445562306a36Sopenharmony_ci 445662306a36Sopenharmony_ci/* 445762306a36Sopenharmony_ci * A part of what we need to when the nested L2 guest exits and we want to 445862306a36Sopenharmony_ci * run its L1 parent, is to reset L1's guest state to the host state specified 445962306a36Sopenharmony_ci * in vmcs12. 446062306a36Sopenharmony_ci * This function is to be called not only on normal nested exit, but also on 446162306a36Sopenharmony_ci * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry 446262306a36Sopenharmony_ci * Failures During or After Loading Guest State"). 446362306a36Sopenharmony_ci * This function should be called when the active VMCS is L1's (vmcs01). 446462306a36Sopenharmony_ci */ 446562306a36Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu, 446662306a36Sopenharmony_ci struct vmcs12 *vmcs12) 446762306a36Sopenharmony_ci{ 446862306a36Sopenharmony_ci enum vm_entry_failure_code ignored; 446962306a36Sopenharmony_ci struct kvm_segment seg; 447062306a36Sopenharmony_ci 447162306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 447262306a36Sopenharmony_ci vcpu->arch.efer = vmcs12->host_ia32_efer; 447362306a36Sopenharmony_ci else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) 447462306a36Sopenharmony_ci vcpu->arch.efer |= (EFER_LMA | EFER_LME); 447562306a36Sopenharmony_ci else 447662306a36Sopenharmony_ci vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); 447762306a36Sopenharmony_ci vmx_set_efer(vcpu, vcpu->arch.efer); 447862306a36Sopenharmony_ci 447962306a36Sopenharmony_ci kvm_rsp_write(vcpu, vmcs12->host_rsp); 448062306a36Sopenharmony_ci kvm_rip_write(vcpu, vmcs12->host_rip); 448162306a36Sopenharmony_ci vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); 448262306a36Sopenharmony_ci vmx_set_interrupt_shadow(vcpu, 0); 448362306a36Sopenharmony_ci 448462306a36Sopenharmony_ci /* 448562306a36Sopenharmony_ci * Note that calling vmx_set_cr0 is important, even if cr0 hasn't 448662306a36Sopenharmony_ci * actually changed, because vmx_set_cr0 refers to efer set above. 448762306a36Sopenharmony_ci * 448862306a36Sopenharmony_ci * CR0_GUEST_HOST_MASK is already set in the original vmcs01 448962306a36Sopenharmony_ci * (KVM doesn't change it); 449062306a36Sopenharmony_ci */ 449162306a36Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); 449262306a36Sopenharmony_ci vmx_set_cr0(vcpu, vmcs12->host_cr0); 449362306a36Sopenharmony_ci 449462306a36Sopenharmony_ci /* Same as above - no reason to call set_cr4_guest_host_mask(). */ 449562306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 449662306a36Sopenharmony_ci vmx_set_cr4(vcpu, vmcs12->host_cr4); 449762306a36Sopenharmony_ci 449862306a36Sopenharmony_ci nested_ept_uninit_mmu_context(vcpu); 449962306a36Sopenharmony_ci 450062306a36Sopenharmony_ci /* 450162306a36Sopenharmony_ci * Only PDPTE load can fail as the value of cr3 was checked on entry and 450262306a36Sopenharmony_ci * couldn't have changed. 450362306a36Sopenharmony_ci */ 450462306a36Sopenharmony_ci if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored)) 450562306a36Sopenharmony_ci nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); 450662306a36Sopenharmony_ci 450762306a36Sopenharmony_ci nested_vmx_transition_tlb_flush(vcpu, vmcs12, false); 450862306a36Sopenharmony_ci 450962306a36Sopenharmony_ci vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); 451062306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); 451162306a36Sopenharmony_ci vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); 451262306a36Sopenharmony_ci vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); 451362306a36Sopenharmony_ci vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); 451462306a36Sopenharmony_ci vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF); 451562306a36Sopenharmony_ci vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF); 451662306a36Sopenharmony_ci 451762306a36Sopenharmony_ci /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ 451862306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) 451962306a36Sopenharmony_ci vmcs_write64(GUEST_BNDCFGS, 0); 452062306a36Sopenharmony_ci 452162306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { 452262306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); 452362306a36Sopenharmony_ci vcpu->arch.pat = vmcs12->host_ia32_pat; 452462306a36Sopenharmony_ci } 452562306a36Sopenharmony_ci if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && 452662306a36Sopenharmony_ci kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu))) 452762306a36Sopenharmony_ci WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 452862306a36Sopenharmony_ci vmcs12->host_ia32_perf_global_ctrl)); 452962306a36Sopenharmony_ci 453062306a36Sopenharmony_ci /* Set L1 segment info according to Intel SDM 453162306a36Sopenharmony_ci 27.5.2 Loading Host Segment and Descriptor-Table Registers */ 453262306a36Sopenharmony_ci seg = (struct kvm_segment) { 453362306a36Sopenharmony_ci .base = 0, 453462306a36Sopenharmony_ci .limit = 0xFFFFFFFF, 453562306a36Sopenharmony_ci .selector = vmcs12->host_cs_selector, 453662306a36Sopenharmony_ci .type = 11, 453762306a36Sopenharmony_ci .present = 1, 453862306a36Sopenharmony_ci .s = 1, 453962306a36Sopenharmony_ci .g = 1 454062306a36Sopenharmony_ci }; 454162306a36Sopenharmony_ci if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) 454262306a36Sopenharmony_ci seg.l = 1; 454362306a36Sopenharmony_ci else 454462306a36Sopenharmony_ci seg.db = 1; 454562306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); 454662306a36Sopenharmony_ci seg = (struct kvm_segment) { 454762306a36Sopenharmony_ci .base = 0, 454862306a36Sopenharmony_ci .limit = 0xFFFFFFFF, 454962306a36Sopenharmony_ci .type = 3, 455062306a36Sopenharmony_ci .present = 1, 455162306a36Sopenharmony_ci .s = 1, 455262306a36Sopenharmony_ci .db = 1, 455362306a36Sopenharmony_ci .g = 1 455462306a36Sopenharmony_ci }; 455562306a36Sopenharmony_ci seg.selector = vmcs12->host_ds_selector; 455662306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); 455762306a36Sopenharmony_ci seg.selector = vmcs12->host_es_selector; 455862306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); 455962306a36Sopenharmony_ci seg.selector = vmcs12->host_ss_selector; 456062306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); 456162306a36Sopenharmony_ci seg.selector = vmcs12->host_fs_selector; 456262306a36Sopenharmony_ci seg.base = vmcs12->host_fs_base; 456362306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); 456462306a36Sopenharmony_ci seg.selector = vmcs12->host_gs_selector; 456562306a36Sopenharmony_ci seg.base = vmcs12->host_gs_base; 456662306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); 456762306a36Sopenharmony_ci seg = (struct kvm_segment) { 456862306a36Sopenharmony_ci .base = vmcs12->host_tr_base, 456962306a36Sopenharmony_ci .limit = 0x67, 457062306a36Sopenharmony_ci .selector = vmcs12->host_tr_selector, 457162306a36Sopenharmony_ci .type = 11, 457262306a36Sopenharmony_ci .present = 1 457362306a36Sopenharmony_ci }; 457462306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); 457562306a36Sopenharmony_ci 457662306a36Sopenharmony_ci memset(&seg, 0, sizeof(seg)); 457762306a36Sopenharmony_ci seg.unusable = 1; 457862306a36Sopenharmony_ci __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); 457962306a36Sopenharmony_ci 458062306a36Sopenharmony_ci kvm_set_dr(vcpu, 7, 0x400); 458162306a36Sopenharmony_ci vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 458262306a36Sopenharmony_ci 458362306a36Sopenharmony_ci if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, 458462306a36Sopenharmony_ci vmcs12->vm_exit_msr_load_count)) 458562306a36Sopenharmony_ci nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); 458662306a36Sopenharmony_ci 458762306a36Sopenharmony_ci to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu); 458862306a36Sopenharmony_ci} 458962306a36Sopenharmony_ci 459062306a36Sopenharmony_cistatic inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx) 459162306a36Sopenharmony_ci{ 459262306a36Sopenharmony_ci struct vmx_uret_msr *efer_msr; 459362306a36Sopenharmony_ci unsigned int i; 459462306a36Sopenharmony_ci 459562306a36Sopenharmony_ci if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER) 459662306a36Sopenharmony_ci return vmcs_read64(GUEST_IA32_EFER); 459762306a36Sopenharmony_ci 459862306a36Sopenharmony_ci if (cpu_has_load_ia32_efer()) 459962306a36Sopenharmony_ci return host_efer; 460062306a36Sopenharmony_ci 460162306a36Sopenharmony_ci for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) { 460262306a36Sopenharmony_ci if (vmx->msr_autoload.guest.val[i].index == MSR_EFER) 460362306a36Sopenharmony_ci return vmx->msr_autoload.guest.val[i].value; 460462306a36Sopenharmony_ci } 460562306a36Sopenharmony_ci 460662306a36Sopenharmony_ci efer_msr = vmx_find_uret_msr(vmx, MSR_EFER); 460762306a36Sopenharmony_ci if (efer_msr) 460862306a36Sopenharmony_ci return efer_msr->data; 460962306a36Sopenharmony_ci 461062306a36Sopenharmony_ci return host_efer; 461162306a36Sopenharmony_ci} 461262306a36Sopenharmony_ci 461362306a36Sopenharmony_cistatic void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) 461462306a36Sopenharmony_ci{ 461562306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 461662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 461762306a36Sopenharmony_ci struct vmx_msr_entry g, h; 461862306a36Sopenharmony_ci gpa_t gpa; 461962306a36Sopenharmony_ci u32 i, j; 462062306a36Sopenharmony_ci 462162306a36Sopenharmony_ci vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT); 462262306a36Sopenharmony_ci 462362306a36Sopenharmony_ci if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { 462462306a36Sopenharmony_ci /* 462562306a36Sopenharmony_ci * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set 462662306a36Sopenharmony_ci * as vmcs01.GUEST_DR7 contains a userspace defined value 462762306a36Sopenharmony_ci * and vcpu->arch.dr7 is not squirreled away before the 462862306a36Sopenharmony_ci * nested VMENTER (not worth adding a variable in nested_vmx). 462962306a36Sopenharmony_ci */ 463062306a36Sopenharmony_ci if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 463162306a36Sopenharmony_ci kvm_set_dr(vcpu, 7, DR7_FIXED_1); 463262306a36Sopenharmony_ci else 463362306a36Sopenharmony_ci WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); 463462306a36Sopenharmony_ci } 463562306a36Sopenharmony_ci 463662306a36Sopenharmony_ci /* 463762306a36Sopenharmony_ci * Note that calling vmx_set_{efer,cr0,cr4} is important as they 463862306a36Sopenharmony_ci * handle a variety of side effects to KVM's software model. 463962306a36Sopenharmony_ci */ 464062306a36Sopenharmony_ci vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); 464162306a36Sopenharmony_ci 464262306a36Sopenharmony_ci vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); 464362306a36Sopenharmony_ci vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); 464462306a36Sopenharmony_ci 464562306a36Sopenharmony_ci vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 464662306a36Sopenharmony_ci vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); 464762306a36Sopenharmony_ci 464862306a36Sopenharmony_ci nested_ept_uninit_mmu_context(vcpu); 464962306a36Sopenharmony_ci vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); 465062306a36Sopenharmony_ci kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); 465162306a36Sopenharmony_ci 465262306a36Sopenharmony_ci /* 465362306a36Sopenharmony_ci * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs 465462306a36Sopenharmony_ci * from vmcs01 (if necessary). The PDPTRs are not loaded on 465562306a36Sopenharmony_ci * VMFail, like everything else we just need to ensure our 465662306a36Sopenharmony_ci * software model is up-to-date. 465762306a36Sopenharmony_ci */ 465862306a36Sopenharmony_ci if (enable_ept && is_pae_paging(vcpu)) 465962306a36Sopenharmony_ci ept_save_pdptrs(vcpu); 466062306a36Sopenharmony_ci 466162306a36Sopenharmony_ci kvm_mmu_reset_context(vcpu); 466262306a36Sopenharmony_ci 466362306a36Sopenharmony_ci /* 466462306a36Sopenharmony_ci * This nasty bit of open coding is a compromise between blindly 466562306a36Sopenharmony_ci * loading L1's MSRs using the exit load lists (incorrect emulation 466662306a36Sopenharmony_ci * of VMFail), leaving the nested VM's MSRs in the software model 466762306a36Sopenharmony_ci * (incorrect behavior) and snapshotting the modified MSRs (too 466862306a36Sopenharmony_ci * expensive since the lists are unbound by hardware). For each 466962306a36Sopenharmony_ci * MSR that was (prematurely) loaded from the nested VMEntry load 467062306a36Sopenharmony_ci * list, reload it from the exit load list if it exists and differs 467162306a36Sopenharmony_ci * from the guest value. The intent is to stuff host state as 467262306a36Sopenharmony_ci * silently as possible, not to fully process the exit load list. 467362306a36Sopenharmony_ci */ 467462306a36Sopenharmony_ci for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) { 467562306a36Sopenharmony_ci gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g)); 467662306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) { 467762306a36Sopenharmony_ci pr_debug_ratelimited( 467862306a36Sopenharmony_ci "%s read MSR index failed (%u, 0x%08llx)\n", 467962306a36Sopenharmony_ci __func__, i, gpa); 468062306a36Sopenharmony_ci goto vmabort; 468162306a36Sopenharmony_ci } 468262306a36Sopenharmony_ci 468362306a36Sopenharmony_ci for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) { 468462306a36Sopenharmony_ci gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h)); 468562306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) { 468662306a36Sopenharmony_ci pr_debug_ratelimited( 468762306a36Sopenharmony_ci "%s read MSR failed (%u, 0x%08llx)\n", 468862306a36Sopenharmony_ci __func__, j, gpa); 468962306a36Sopenharmony_ci goto vmabort; 469062306a36Sopenharmony_ci } 469162306a36Sopenharmony_ci if (h.index != g.index) 469262306a36Sopenharmony_ci continue; 469362306a36Sopenharmony_ci if (h.value == g.value) 469462306a36Sopenharmony_ci break; 469562306a36Sopenharmony_ci 469662306a36Sopenharmony_ci if (nested_vmx_load_msr_check(vcpu, &h)) { 469762306a36Sopenharmony_ci pr_debug_ratelimited( 469862306a36Sopenharmony_ci "%s check failed (%u, 0x%x, 0x%x)\n", 469962306a36Sopenharmony_ci __func__, j, h.index, h.reserved); 470062306a36Sopenharmony_ci goto vmabort; 470162306a36Sopenharmony_ci } 470262306a36Sopenharmony_ci 470362306a36Sopenharmony_ci if (kvm_set_msr(vcpu, h.index, h.value)) { 470462306a36Sopenharmony_ci pr_debug_ratelimited( 470562306a36Sopenharmony_ci "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", 470662306a36Sopenharmony_ci __func__, j, h.index, h.value); 470762306a36Sopenharmony_ci goto vmabort; 470862306a36Sopenharmony_ci } 470962306a36Sopenharmony_ci } 471062306a36Sopenharmony_ci } 471162306a36Sopenharmony_ci 471262306a36Sopenharmony_ci return; 471362306a36Sopenharmony_ci 471462306a36Sopenharmony_civmabort: 471562306a36Sopenharmony_ci nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); 471662306a36Sopenharmony_ci} 471762306a36Sopenharmony_ci 471862306a36Sopenharmony_ci/* 471962306a36Sopenharmony_ci * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 472062306a36Sopenharmony_ci * and modify vmcs12 to make it see what it would expect to see there if 472162306a36Sopenharmony_ci * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 472262306a36Sopenharmony_ci */ 472362306a36Sopenharmony_civoid nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, 472462306a36Sopenharmony_ci u32 exit_intr_info, unsigned long exit_qualification) 472562306a36Sopenharmony_ci{ 472662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 472762306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 472862306a36Sopenharmony_ci 472962306a36Sopenharmony_ci /* Pending MTF traps are discarded on VM-Exit. */ 473062306a36Sopenharmony_ci vmx->nested.mtf_pending = false; 473162306a36Sopenharmony_ci 473262306a36Sopenharmony_ci /* trying to cancel vmlaunch/vmresume is a bug */ 473362306a36Sopenharmony_ci WARN_ON_ONCE(vmx->nested.nested_run_pending); 473462306a36Sopenharmony_ci 473562306a36Sopenharmony_ci if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { 473662306a36Sopenharmony_ci /* 473762306a36Sopenharmony_ci * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map 473862306a36Sopenharmony_ci * Enlightened VMCS after migration and we still need to 473962306a36Sopenharmony_ci * do that when something is forcing L2->L1 exit prior to 474062306a36Sopenharmony_ci * the first L2 run. 474162306a36Sopenharmony_ci */ 474262306a36Sopenharmony_ci (void)nested_get_evmcs_page(vcpu); 474362306a36Sopenharmony_ci } 474462306a36Sopenharmony_ci 474562306a36Sopenharmony_ci /* Service pending TLB flush requests for L2 before switching to L1. */ 474662306a36Sopenharmony_ci kvm_service_local_tlb_flush_requests(vcpu); 474762306a36Sopenharmony_ci 474862306a36Sopenharmony_ci /* 474962306a36Sopenharmony_ci * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between 475062306a36Sopenharmony_ci * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are 475162306a36Sopenharmony_ci * up-to-date before switching to L1. 475262306a36Sopenharmony_ci */ 475362306a36Sopenharmony_ci if (enable_ept && is_pae_paging(vcpu)) 475462306a36Sopenharmony_ci vmx_ept_load_pdptrs(vcpu); 475562306a36Sopenharmony_ci 475662306a36Sopenharmony_ci leave_guest_mode(vcpu); 475762306a36Sopenharmony_ci 475862306a36Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12)) 475962306a36Sopenharmony_ci hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); 476062306a36Sopenharmony_ci 476162306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) { 476262306a36Sopenharmony_ci vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset; 476362306a36Sopenharmony_ci if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING)) 476462306a36Sopenharmony_ci vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; 476562306a36Sopenharmony_ci } 476662306a36Sopenharmony_ci 476762306a36Sopenharmony_ci if (likely(!vmx->fail)) { 476862306a36Sopenharmony_ci sync_vmcs02_to_vmcs12(vcpu, vmcs12); 476962306a36Sopenharmony_ci 477062306a36Sopenharmony_ci if (vm_exit_reason != -1) 477162306a36Sopenharmony_ci prepare_vmcs12(vcpu, vmcs12, vm_exit_reason, 477262306a36Sopenharmony_ci exit_intr_info, exit_qualification); 477362306a36Sopenharmony_ci 477462306a36Sopenharmony_ci /* 477562306a36Sopenharmony_ci * Must happen outside of sync_vmcs02_to_vmcs12() as it will 477662306a36Sopenharmony_ci * also be used to capture vmcs12 cache as part of 477762306a36Sopenharmony_ci * capturing nVMX state for snapshot (migration). 477862306a36Sopenharmony_ci * 477962306a36Sopenharmony_ci * Otherwise, this flush will dirty guest memory at a 478062306a36Sopenharmony_ci * point it is already assumed by user-space to be 478162306a36Sopenharmony_ci * immutable. 478262306a36Sopenharmony_ci */ 478362306a36Sopenharmony_ci nested_flush_cached_shadow_vmcs12(vcpu, vmcs12); 478462306a36Sopenharmony_ci } else { 478562306a36Sopenharmony_ci /* 478662306a36Sopenharmony_ci * The only expected VM-instruction error is "VM entry with 478762306a36Sopenharmony_ci * invalid control field(s)." Anything else indicates a 478862306a36Sopenharmony_ci * problem with L0. And we should never get here with a 478962306a36Sopenharmony_ci * VMFail of any type if early consistency checks are enabled. 479062306a36Sopenharmony_ci */ 479162306a36Sopenharmony_ci WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != 479262306a36Sopenharmony_ci VMXERR_ENTRY_INVALID_CONTROL_FIELD); 479362306a36Sopenharmony_ci WARN_ON_ONCE(nested_early_check); 479462306a36Sopenharmony_ci } 479562306a36Sopenharmony_ci 479662306a36Sopenharmony_ci /* 479762306a36Sopenharmony_ci * Drop events/exceptions that were queued for re-injection to L2 479862306a36Sopenharmony_ci * (picked up via vmx_complete_interrupts()), as well as exceptions 479962306a36Sopenharmony_ci * that were pending for L2. Note, this must NOT be hoisted above 480062306a36Sopenharmony_ci * prepare_vmcs12(), events/exceptions queued for re-injection need to 480162306a36Sopenharmony_ci * be captured in vmcs12 (see vmcs12_save_pending_event()). 480262306a36Sopenharmony_ci */ 480362306a36Sopenharmony_ci vcpu->arch.nmi_injected = false; 480462306a36Sopenharmony_ci kvm_clear_exception_queue(vcpu); 480562306a36Sopenharmony_ci kvm_clear_interrupt_queue(vcpu); 480662306a36Sopenharmony_ci 480762306a36Sopenharmony_ci vmx_switch_vmcs(vcpu, &vmx->vmcs01); 480862306a36Sopenharmony_ci 480962306a36Sopenharmony_ci /* 481062306a36Sopenharmony_ci * If IBRS is advertised to the vCPU, KVM must flush the indirect 481162306a36Sopenharmony_ci * branch predictors when transitioning from L2 to L1, as L1 expects 481262306a36Sopenharmony_ci * hardware (KVM in this case) to provide separate predictor modes. 481362306a36Sopenharmony_ci * Bare metal isolates VMX root (host) from VMX non-root (guest), but 481462306a36Sopenharmony_ci * doesn't isolate different VMCSs, i.e. in this case, doesn't provide 481562306a36Sopenharmony_ci * separate modes for L2 vs L1. 481662306a36Sopenharmony_ci */ 481762306a36Sopenharmony_ci if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) 481862306a36Sopenharmony_ci indirect_branch_prediction_barrier(); 481962306a36Sopenharmony_ci 482062306a36Sopenharmony_ci /* Update any VMCS fields that might have changed while L2 ran */ 482162306a36Sopenharmony_ci vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); 482262306a36Sopenharmony_ci vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); 482362306a36Sopenharmony_ci vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); 482462306a36Sopenharmony_ci if (kvm_caps.has_tsc_control) 482562306a36Sopenharmony_ci vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); 482662306a36Sopenharmony_ci 482762306a36Sopenharmony_ci if (vmx->nested.l1_tpr_threshold != -1) 482862306a36Sopenharmony_ci vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold); 482962306a36Sopenharmony_ci 483062306a36Sopenharmony_ci if (vmx->nested.change_vmcs01_virtual_apic_mode) { 483162306a36Sopenharmony_ci vmx->nested.change_vmcs01_virtual_apic_mode = false; 483262306a36Sopenharmony_ci vmx_set_virtual_apic_mode(vcpu); 483362306a36Sopenharmony_ci } 483462306a36Sopenharmony_ci 483562306a36Sopenharmony_ci if (vmx->nested.update_vmcs01_cpu_dirty_logging) { 483662306a36Sopenharmony_ci vmx->nested.update_vmcs01_cpu_dirty_logging = false; 483762306a36Sopenharmony_ci vmx_update_cpu_dirty_logging(vcpu); 483862306a36Sopenharmony_ci } 483962306a36Sopenharmony_ci 484062306a36Sopenharmony_ci /* Unpin physical memory we referred to in vmcs02 */ 484162306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false); 484262306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); 484362306a36Sopenharmony_ci kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); 484462306a36Sopenharmony_ci vmx->nested.pi_desc = NULL; 484562306a36Sopenharmony_ci 484662306a36Sopenharmony_ci if (vmx->nested.reload_vmcs01_apic_access_page) { 484762306a36Sopenharmony_ci vmx->nested.reload_vmcs01_apic_access_page = false; 484862306a36Sopenharmony_ci kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 484962306a36Sopenharmony_ci } 485062306a36Sopenharmony_ci 485162306a36Sopenharmony_ci if (vmx->nested.update_vmcs01_apicv_status) { 485262306a36Sopenharmony_ci vmx->nested.update_vmcs01_apicv_status = false; 485362306a36Sopenharmony_ci kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); 485462306a36Sopenharmony_ci } 485562306a36Sopenharmony_ci 485662306a36Sopenharmony_ci if ((vm_exit_reason != -1) && 485762306a36Sopenharmony_ci (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))) 485862306a36Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 485962306a36Sopenharmony_ci 486062306a36Sopenharmony_ci /* in case we halted in L2 */ 486162306a36Sopenharmony_ci vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 486262306a36Sopenharmony_ci 486362306a36Sopenharmony_ci if (likely(!vmx->fail)) { 486462306a36Sopenharmony_ci if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && 486562306a36Sopenharmony_ci nested_exit_intr_ack_set(vcpu)) { 486662306a36Sopenharmony_ci int irq = kvm_cpu_get_interrupt(vcpu); 486762306a36Sopenharmony_ci WARN_ON(irq < 0); 486862306a36Sopenharmony_ci vmcs12->vm_exit_intr_info = irq | 486962306a36Sopenharmony_ci INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; 487062306a36Sopenharmony_ci } 487162306a36Sopenharmony_ci 487262306a36Sopenharmony_ci if (vm_exit_reason != -1) 487362306a36Sopenharmony_ci trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, 487462306a36Sopenharmony_ci vmcs12->exit_qualification, 487562306a36Sopenharmony_ci vmcs12->idt_vectoring_info_field, 487662306a36Sopenharmony_ci vmcs12->vm_exit_intr_info, 487762306a36Sopenharmony_ci vmcs12->vm_exit_intr_error_code, 487862306a36Sopenharmony_ci KVM_ISA_VMX); 487962306a36Sopenharmony_ci 488062306a36Sopenharmony_ci load_vmcs12_host_state(vcpu, vmcs12); 488162306a36Sopenharmony_ci 488262306a36Sopenharmony_ci return; 488362306a36Sopenharmony_ci } 488462306a36Sopenharmony_ci 488562306a36Sopenharmony_ci /* 488662306a36Sopenharmony_ci * After an early L2 VM-entry failure, we're now back 488762306a36Sopenharmony_ci * in L1 which thinks it just finished a VMLAUNCH or 488862306a36Sopenharmony_ci * VMRESUME instruction, so we need to set the failure 488962306a36Sopenharmony_ci * flag and the VM-instruction error field of the VMCS 489062306a36Sopenharmony_ci * accordingly, and skip the emulated instruction. 489162306a36Sopenharmony_ci */ 489262306a36Sopenharmony_ci (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 489362306a36Sopenharmony_ci 489462306a36Sopenharmony_ci /* 489562306a36Sopenharmony_ci * Restore L1's host state to KVM's software model. We're here 489662306a36Sopenharmony_ci * because a consistency check was caught by hardware, which 489762306a36Sopenharmony_ci * means some amount of guest state has been propagated to KVM's 489862306a36Sopenharmony_ci * model and needs to be unwound to the host's state. 489962306a36Sopenharmony_ci */ 490062306a36Sopenharmony_ci nested_vmx_restore_host_state(vcpu); 490162306a36Sopenharmony_ci 490262306a36Sopenharmony_ci vmx->fail = 0; 490362306a36Sopenharmony_ci} 490462306a36Sopenharmony_ci 490562306a36Sopenharmony_cistatic void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) 490662306a36Sopenharmony_ci{ 490762306a36Sopenharmony_ci kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); 490862306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); 490962306a36Sopenharmony_ci} 491062306a36Sopenharmony_ci 491162306a36Sopenharmony_ci/* 491262306a36Sopenharmony_ci * Decode the memory-address operand of a vmx instruction, as recorded on an 491362306a36Sopenharmony_ci * exit caused by such an instruction (run by a guest hypervisor). 491462306a36Sopenharmony_ci * On success, returns 0. When the operand is invalid, returns 1 and throws 491562306a36Sopenharmony_ci * #UD, #GP, or #SS. 491662306a36Sopenharmony_ci */ 491762306a36Sopenharmony_ciint get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, 491862306a36Sopenharmony_ci u32 vmx_instruction_info, bool wr, int len, gva_t *ret) 491962306a36Sopenharmony_ci{ 492062306a36Sopenharmony_ci gva_t off; 492162306a36Sopenharmony_ci bool exn; 492262306a36Sopenharmony_ci struct kvm_segment s; 492362306a36Sopenharmony_ci 492462306a36Sopenharmony_ci /* 492562306a36Sopenharmony_ci * According to Vol. 3B, "Information for VM Exits Due to Instruction 492662306a36Sopenharmony_ci * Execution", on an exit, vmx_instruction_info holds most of the 492762306a36Sopenharmony_ci * addressing components of the operand. Only the displacement part 492862306a36Sopenharmony_ci * is put in exit_qualification (see 3B, "Basic VM-Exit Information"). 492962306a36Sopenharmony_ci * For how an actual address is calculated from all these components, 493062306a36Sopenharmony_ci * refer to Vol. 1, "Operand Addressing". 493162306a36Sopenharmony_ci */ 493262306a36Sopenharmony_ci int scaling = vmx_instruction_info & 3; 493362306a36Sopenharmony_ci int addr_size = (vmx_instruction_info >> 7) & 7; 493462306a36Sopenharmony_ci bool is_reg = vmx_instruction_info & (1u << 10); 493562306a36Sopenharmony_ci int seg_reg = (vmx_instruction_info >> 15) & 7; 493662306a36Sopenharmony_ci int index_reg = (vmx_instruction_info >> 18) & 0xf; 493762306a36Sopenharmony_ci bool index_is_valid = !(vmx_instruction_info & (1u << 22)); 493862306a36Sopenharmony_ci int base_reg = (vmx_instruction_info >> 23) & 0xf; 493962306a36Sopenharmony_ci bool base_is_valid = !(vmx_instruction_info & (1u << 27)); 494062306a36Sopenharmony_ci 494162306a36Sopenharmony_ci if (is_reg) { 494262306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 494362306a36Sopenharmony_ci return 1; 494462306a36Sopenharmony_ci } 494562306a36Sopenharmony_ci 494662306a36Sopenharmony_ci /* Addr = segment_base + offset */ 494762306a36Sopenharmony_ci /* offset = base + [index * scale] + displacement */ 494862306a36Sopenharmony_ci off = exit_qualification; /* holds the displacement */ 494962306a36Sopenharmony_ci if (addr_size == 1) 495062306a36Sopenharmony_ci off = (gva_t)sign_extend64(off, 31); 495162306a36Sopenharmony_ci else if (addr_size == 0) 495262306a36Sopenharmony_ci off = (gva_t)sign_extend64(off, 15); 495362306a36Sopenharmony_ci if (base_is_valid) 495462306a36Sopenharmony_ci off += kvm_register_read(vcpu, base_reg); 495562306a36Sopenharmony_ci if (index_is_valid) 495662306a36Sopenharmony_ci off += kvm_register_read(vcpu, index_reg) << scaling; 495762306a36Sopenharmony_ci vmx_get_segment(vcpu, &s, seg_reg); 495862306a36Sopenharmony_ci 495962306a36Sopenharmony_ci /* 496062306a36Sopenharmony_ci * The effective address, i.e. @off, of a memory operand is truncated 496162306a36Sopenharmony_ci * based on the address size of the instruction. Note that this is 496262306a36Sopenharmony_ci * the *effective address*, i.e. the address prior to accounting for 496362306a36Sopenharmony_ci * the segment's base. 496462306a36Sopenharmony_ci */ 496562306a36Sopenharmony_ci if (addr_size == 1) /* 32 bit */ 496662306a36Sopenharmony_ci off &= 0xffffffff; 496762306a36Sopenharmony_ci else if (addr_size == 0) /* 16 bit */ 496862306a36Sopenharmony_ci off &= 0xffff; 496962306a36Sopenharmony_ci 497062306a36Sopenharmony_ci /* Checks for #GP/#SS exceptions. */ 497162306a36Sopenharmony_ci exn = false; 497262306a36Sopenharmony_ci if (is_long_mode(vcpu)) { 497362306a36Sopenharmony_ci /* 497462306a36Sopenharmony_ci * The virtual/linear address is never truncated in 64-bit 497562306a36Sopenharmony_ci * mode, e.g. a 32-bit address size can yield a 64-bit virtual 497662306a36Sopenharmony_ci * address when using FS/GS with a non-zero base. 497762306a36Sopenharmony_ci */ 497862306a36Sopenharmony_ci if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS) 497962306a36Sopenharmony_ci *ret = s.base + off; 498062306a36Sopenharmony_ci else 498162306a36Sopenharmony_ci *ret = off; 498262306a36Sopenharmony_ci 498362306a36Sopenharmony_ci /* Long mode: #GP(0)/#SS(0) if the memory address is in a 498462306a36Sopenharmony_ci * non-canonical form. This is the only check on the memory 498562306a36Sopenharmony_ci * destination for long mode! 498662306a36Sopenharmony_ci */ 498762306a36Sopenharmony_ci exn = is_noncanonical_address(*ret, vcpu); 498862306a36Sopenharmony_ci } else { 498962306a36Sopenharmony_ci /* 499062306a36Sopenharmony_ci * When not in long mode, the virtual/linear address is 499162306a36Sopenharmony_ci * unconditionally truncated to 32 bits regardless of the 499262306a36Sopenharmony_ci * address size. 499362306a36Sopenharmony_ci */ 499462306a36Sopenharmony_ci *ret = (s.base + off) & 0xffffffff; 499562306a36Sopenharmony_ci 499662306a36Sopenharmony_ci /* Protected mode: apply checks for segment validity in the 499762306a36Sopenharmony_ci * following order: 499862306a36Sopenharmony_ci * - segment type check (#GP(0) may be thrown) 499962306a36Sopenharmony_ci * - usability check (#GP(0)/#SS(0)) 500062306a36Sopenharmony_ci * - limit check (#GP(0)/#SS(0)) 500162306a36Sopenharmony_ci */ 500262306a36Sopenharmony_ci if (wr) 500362306a36Sopenharmony_ci /* #GP(0) if the destination operand is located in a 500462306a36Sopenharmony_ci * read-only data segment or any code segment. 500562306a36Sopenharmony_ci */ 500662306a36Sopenharmony_ci exn = ((s.type & 0xa) == 0 || (s.type & 8)); 500762306a36Sopenharmony_ci else 500862306a36Sopenharmony_ci /* #GP(0) if the source operand is located in an 500962306a36Sopenharmony_ci * execute-only code segment 501062306a36Sopenharmony_ci */ 501162306a36Sopenharmony_ci exn = ((s.type & 0xa) == 8); 501262306a36Sopenharmony_ci if (exn) { 501362306a36Sopenharmony_ci kvm_queue_exception_e(vcpu, GP_VECTOR, 0); 501462306a36Sopenharmony_ci return 1; 501562306a36Sopenharmony_ci } 501662306a36Sopenharmony_ci /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. 501762306a36Sopenharmony_ci */ 501862306a36Sopenharmony_ci exn = (s.unusable != 0); 501962306a36Sopenharmony_ci 502062306a36Sopenharmony_ci /* 502162306a36Sopenharmony_ci * Protected mode: #GP(0)/#SS(0) if the memory operand is 502262306a36Sopenharmony_ci * outside the segment limit. All CPUs that support VMX ignore 502362306a36Sopenharmony_ci * limit checks for flat segments, i.e. segments with base==0, 502462306a36Sopenharmony_ci * limit==0xffffffff and of type expand-up data or code. 502562306a36Sopenharmony_ci */ 502662306a36Sopenharmony_ci if (!(s.base == 0 && s.limit == 0xffffffff && 502762306a36Sopenharmony_ci ((s.type & 8) || !(s.type & 4)))) 502862306a36Sopenharmony_ci exn = exn || ((u64)off + len - 1 > s.limit); 502962306a36Sopenharmony_ci } 503062306a36Sopenharmony_ci if (exn) { 503162306a36Sopenharmony_ci kvm_queue_exception_e(vcpu, 503262306a36Sopenharmony_ci seg_reg == VCPU_SREG_SS ? 503362306a36Sopenharmony_ci SS_VECTOR : GP_VECTOR, 503462306a36Sopenharmony_ci 0); 503562306a36Sopenharmony_ci return 1; 503662306a36Sopenharmony_ci } 503762306a36Sopenharmony_ci 503862306a36Sopenharmony_ci return 0; 503962306a36Sopenharmony_ci} 504062306a36Sopenharmony_ci 504162306a36Sopenharmony_cistatic int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer, 504262306a36Sopenharmony_ci int *ret) 504362306a36Sopenharmony_ci{ 504462306a36Sopenharmony_ci gva_t gva; 504562306a36Sopenharmony_ci struct x86_exception e; 504662306a36Sopenharmony_ci int r; 504762306a36Sopenharmony_ci 504862306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 504962306a36Sopenharmony_ci vmcs_read32(VMX_INSTRUCTION_INFO), false, 505062306a36Sopenharmony_ci sizeof(*vmpointer), &gva)) { 505162306a36Sopenharmony_ci *ret = 1; 505262306a36Sopenharmony_ci return -EINVAL; 505362306a36Sopenharmony_ci } 505462306a36Sopenharmony_ci 505562306a36Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e); 505662306a36Sopenharmony_ci if (r != X86EMUL_CONTINUE) { 505762306a36Sopenharmony_ci *ret = kvm_handle_memory_failure(vcpu, r, &e); 505862306a36Sopenharmony_ci return -EINVAL; 505962306a36Sopenharmony_ci } 506062306a36Sopenharmony_ci 506162306a36Sopenharmony_ci return 0; 506262306a36Sopenharmony_ci} 506362306a36Sopenharmony_ci 506462306a36Sopenharmony_ci/* 506562306a36Sopenharmony_ci * Allocate a shadow VMCS and associate it with the currently loaded 506662306a36Sopenharmony_ci * VMCS, unless such a shadow VMCS already exists. The newly allocated 506762306a36Sopenharmony_ci * VMCS is also VMCLEARed, so that it is ready for use. 506862306a36Sopenharmony_ci */ 506962306a36Sopenharmony_cistatic struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) 507062306a36Sopenharmony_ci{ 507162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 507262306a36Sopenharmony_ci struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; 507362306a36Sopenharmony_ci 507462306a36Sopenharmony_ci /* 507562306a36Sopenharmony_ci * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it 507662306a36Sopenharmony_ci * when L1 executes VMXOFF or the vCPU is forced out of nested 507762306a36Sopenharmony_ci * operation. VMXON faults if the CPU is already post-VMXON, so it 507862306a36Sopenharmony_ci * should be impossible to already have an allocated shadow VMCS. KVM 507962306a36Sopenharmony_ci * doesn't support virtualization of VMCS shadowing, so vmcs01 should 508062306a36Sopenharmony_ci * always be the loaded VMCS. 508162306a36Sopenharmony_ci */ 508262306a36Sopenharmony_ci if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs)) 508362306a36Sopenharmony_ci return loaded_vmcs->shadow_vmcs; 508462306a36Sopenharmony_ci 508562306a36Sopenharmony_ci loaded_vmcs->shadow_vmcs = alloc_vmcs(true); 508662306a36Sopenharmony_ci if (loaded_vmcs->shadow_vmcs) 508762306a36Sopenharmony_ci vmcs_clear(loaded_vmcs->shadow_vmcs); 508862306a36Sopenharmony_ci 508962306a36Sopenharmony_ci return loaded_vmcs->shadow_vmcs; 509062306a36Sopenharmony_ci} 509162306a36Sopenharmony_ci 509262306a36Sopenharmony_cistatic int enter_vmx_operation(struct kvm_vcpu *vcpu) 509362306a36Sopenharmony_ci{ 509462306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 509562306a36Sopenharmony_ci int r; 509662306a36Sopenharmony_ci 509762306a36Sopenharmony_ci r = alloc_loaded_vmcs(&vmx->nested.vmcs02); 509862306a36Sopenharmony_ci if (r < 0) 509962306a36Sopenharmony_ci goto out_vmcs02; 510062306a36Sopenharmony_ci 510162306a36Sopenharmony_ci vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); 510262306a36Sopenharmony_ci if (!vmx->nested.cached_vmcs12) 510362306a36Sopenharmony_ci goto out_cached_vmcs12; 510462306a36Sopenharmony_ci 510562306a36Sopenharmony_ci vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA; 510662306a36Sopenharmony_ci vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); 510762306a36Sopenharmony_ci if (!vmx->nested.cached_shadow_vmcs12) 510862306a36Sopenharmony_ci goto out_cached_shadow_vmcs12; 510962306a36Sopenharmony_ci 511062306a36Sopenharmony_ci if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu)) 511162306a36Sopenharmony_ci goto out_shadow_vmcs; 511262306a36Sopenharmony_ci 511362306a36Sopenharmony_ci hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, 511462306a36Sopenharmony_ci HRTIMER_MODE_ABS_PINNED); 511562306a36Sopenharmony_ci vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; 511662306a36Sopenharmony_ci 511762306a36Sopenharmony_ci vmx->nested.vpid02 = allocate_vpid(); 511862306a36Sopenharmony_ci 511962306a36Sopenharmony_ci vmx->nested.vmcs02_initialized = false; 512062306a36Sopenharmony_ci vmx->nested.vmxon = true; 512162306a36Sopenharmony_ci 512262306a36Sopenharmony_ci if (vmx_pt_mode_is_host_guest()) { 512362306a36Sopenharmony_ci vmx->pt_desc.guest.ctl = 0; 512462306a36Sopenharmony_ci pt_update_intercept_for_msr(vcpu); 512562306a36Sopenharmony_ci } 512662306a36Sopenharmony_ci 512762306a36Sopenharmony_ci return 0; 512862306a36Sopenharmony_ci 512962306a36Sopenharmony_ciout_shadow_vmcs: 513062306a36Sopenharmony_ci kfree(vmx->nested.cached_shadow_vmcs12); 513162306a36Sopenharmony_ci 513262306a36Sopenharmony_ciout_cached_shadow_vmcs12: 513362306a36Sopenharmony_ci kfree(vmx->nested.cached_vmcs12); 513462306a36Sopenharmony_ci 513562306a36Sopenharmony_ciout_cached_vmcs12: 513662306a36Sopenharmony_ci free_loaded_vmcs(&vmx->nested.vmcs02); 513762306a36Sopenharmony_ci 513862306a36Sopenharmony_ciout_vmcs02: 513962306a36Sopenharmony_ci return -ENOMEM; 514062306a36Sopenharmony_ci} 514162306a36Sopenharmony_ci 514262306a36Sopenharmony_ci/* Emulate the VMXON instruction. */ 514362306a36Sopenharmony_cistatic int handle_vmxon(struct kvm_vcpu *vcpu) 514462306a36Sopenharmony_ci{ 514562306a36Sopenharmony_ci int ret; 514662306a36Sopenharmony_ci gpa_t vmptr; 514762306a36Sopenharmony_ci uint32_t revision; 514862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 514962306a36Sopenharmony_ci const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED 515062306a36Sopenharmony_ci | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; 515162306a36Sopenharmony_ci 515262306a36Sopenharmony_ci /* 515362306a36Sopenharmony_ci * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter 515462306a36Sopenharmony_ci * the guest and so cannot rely on hardware to perform the check, 515562306a36Sopenharmony_ci * which has higher priority than VM-Exit (see Intel SDM's pseudocode 515662306a36Sopenharmony_ci * for VMXON). 515762306a36Sopenharmony_ci * 515862306a36Sopenharmony_ci * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 515962306a36Sopenharmony_ci * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't 516062306a36Sopenharmony_ci * force any of the relevant guest state. For a restricted guest, KVM 516162306a36Sopenharmony_ci * does force CR0.PE=1, but only to also force VM86 in order to emulate 516262306a36Sopenharmony_ci * Real Mode, and so there's no need to check CR0.PE manually. 516362306a36Sopenharmony_ci */ 516462306a36Sopenharmony_ci if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) { 516562306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 516662306a36Sopenharmony_ci return 1; 516762306a36Sopenharmony_ci } 516862306a36Sopenharmony_ci 516962306a36Sopenharmony_ci /* 517062306a36Sopenharmony_ci * The CPL is checked for "not in VMX operation" and for "in VMX root", 517162306a36Sopenharmony_ci * and has higher priority than the VM-Fail due to being post-VMXON, 517262306a36Sopenharmony_ci * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root, 517362306a36Sopenharmony_ci * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits 517462306a36Sopenharmony_ci * from L2 to L1, i.e. there's no need to check for the vCPU being in 517562306a36Sopenharmony_ci * VMX non-root. 517662306a36Sopenharmony_ci * 517762306a36Sopenharmony_ci * Forwarding the VM-Exit unconditionally, i.e. without performing the 517862306a36Sopenharmony_ci * #UD checks (see above), is functionally ok because KVM doesn't allow 517962306a36Sopenharmony_ci * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's 518062306a36Sopenharmony_ci * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are 518162306a36Sopenharmony_ci * missed by hardware due to shadowing CR0 and/or CR4. 518262306a36Sopenharmony_ci */ 518362306a36Sopenharmony_ci if (vmx_get_cpl(vcpu)) { 518462306a36Sopenharmony_ci kvm_inject_gp(vcpu, 0); 518562306a36Sopenharmony_ci return 1; 518662306a36Sopenharmony_ci } 518762306a36Sopenharmony_ci 518862306a36Sopenharmony_ci if (vmx->nested.vmxon) 518962306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); 519062306a36Sopenharmony_ci 519162306a36Sopenharmony_ci /* 519262306a36Sopenharmony_ci * Invalid CR0/CR4 generates #GP. These checks are performed if and 519362306a36Sopenharmony_ci * only if the vCPU isn't already in VMX operation, i.e. effectively 519462306a36Sopenharmony_ci * have lower priority than the VM-Fail above. 519562306a36Sopenharmony_ci */ 519662306a36Sopenharmony_ci if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || 519762306a36Sopenharmony_ci !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { 519862306a36Sopenharmony_ci kvm_inject_gp(vcpu, 0); 519962306a36Sopenharmony_ci return 1; 520062306a36Sopenharmony_ci } 520162306a36Sopenharmony_ci 520262306a36Sopenharmony_ci if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) 520362306a36Sopenharmony_ci != VMXON_NEEDED_FEATURES) { 520462306a36Sopenharmony_ci kvm_inject_gp(vcpu, 0); 520562306a36Sopenharmony_ci return 1; 520662306a36Sopenharmony_ci } 520762306a36Sopenharmony_ci 520862306a36Sopenharmony_ci if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret)) 520962306a36Sopenharmony_ci return ret; 521062306a36Sopenharmony_ci 521162306a36Sopenharmony_ci /* 521262306a36Sopenharmony_ci * SDM 3: 24.11.5 521362306a36Sopenharmony_ci * The first 4 bytes of VMXON region contain the supported 521462306a36Sopenharmony_ci * VMCS revision identifier 521562306a36Sopenharmony_ci * 521662306a36Sopenharmony_ci * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; 521762306a36Sopenharmony_ci * which replaces physical address width with 32 521862306a36Sopenharmony_ci */ 521962306a36Sopenharmony_ci if (!page_address_valid(vcpu, vmptr)) 522062306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 522162306a36Sopenharmony_ci 522262306a36Sopenharmony_ci if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) || 522362306a36Sopenharmony_ci revision != VMCS12_REVISION) 522462306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 522562306a36Sopenharmony_ci 522662306a36Sopenharmony_ci vmx->nested.vmxon_ptr = vmptr; 522762306a36Sopenharmony_ci ret = enter_vmx_operation(vcpu); 522862306a36Sopenharmony_ci if (ret) 522962306a36Sopenharmony_ci return ret; 523062306a36Sopenharmony_ci 523162306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 523262306a36Sopenharmony_ci} 523362306a36Sopenharmony_ci 523462306a36Sopenharmony_cistatic inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) 523562306a36Sopenharmony_ci{ 523662306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 523762306a36Sopenharmony_ci 523862306a36Sopenharmony_ci if (vmx->nested.current_vmptr == INVALID_GPA) 523962306a36Sopenharmony_ci return; 524062306a36Sopenharmony_ci 524162306a36Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); 524262306a36Sopenharmony_ci 524362306a36Sopenharmony_ci if (enable_shadow_vmcs) { 524462306a36Sopenharmony_ci /* copy to memory all shadowed fields in case 524562306a36Sopenharmony_ci they were modified */ 524662306a36Sopenharmony_ci copy_shadow_to_vmcs12(vmx); 524762306a36Sopenharmony_ci vmx_disable_shadow_vmcs(vmx); 524862306a36Sopenharmony_ci } 524962306a36Sopenharmony_ci vmx->nested.posted_intr_nv = -1; 525062306a36Sopenharmony_ci 525162306a36Sopenharmony_ci /* Flush VMCS12 to guest memory */ 525262306a36Sopenharmony_ci kvm_vcpu_write_guest_page(vcpu, 525362306a36Sopenharmony_ci vmx->nested.current_vmptr >> PAGE_SHIFT, 525462306a36Sopenharmony_ci vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); 525562306a36Sopenharmony_ci 525662306a36Sopenharmony_ci kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); 525762306a36Sopenharmony_ci 525862306a36Sopenharmony_ci vmx->nested.current_vmptr = INVALID_GPA; 525962306a36Sopenharmony_ci} 526062306a36Sopenharmony_ci 526162306a36Sopenharmony_ci/* Emulate the VMXOFF instruction */ 526262306a36Sopenharmony_cistatic int handle_vmxoff(struct kvm_vcpu *vcpu) 526362306a36Sopenharmony_ci{ 526462306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 526562306a36Sopenharmony_ci return 1; 526662306a36Sopenharmony_ci 526762306a36Sopenharmony_ci free_nested(vcpu); 526862306a36Sopenharmony_ci 526962306a36Sopenharmony_ci if (kvm_apic_has_pending_init_or_sipi(vcpu)) 527062306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 527162306a36Sopenharmony_ci 527262306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 527362306a36Sopenharmony_ci} 527462306a36Sopenharmony_ci 527562306a36Sopenharmony_ci/* Emulate the VMCLEAR instruction */ 527662306a36Sopenharmony_cistatic int handle_vmclear(struct kvm_vcpu *vcpu) 527762306a36Sopenharmony_ci{ 527862306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 527962306a36Sopenharmony_ci u32 zero = 0; 528062306a36Sopenharmony_ci gpa_t vmptr; 528162306a36Sopenharmony_ci int r; 528262306a36Sopenharmony_ci 528362306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 528462306a36Sopenharmony_ci return 1; 528562306a36Sopenharmony_ci 528662306a36Sopenharmony_ci if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) 528762306a36Sopenharmony_ci return r; 528862306a36Sopenharmony_ci 528962306a36Sopenharmony_ci if (!page_address_valid(vcpu, vmptr)) 529062306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); 529162306a36Sopenharmony_ci 529262306a36Sopenharmony_ci if (vmptr == vmx->nested.vmxon_ptr) 529362306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); 529462306a36Sopenharmony_ci 529562306a36Sopenharmony_ci /* 529662306a36Sopenharmony_ci * When Enlightened VMEntry is enabled on the calling CPU we treat 529762306a36Sopenharmony_ci * memory area pointer by vmptr as Enlightened VMCS (as there's no good 529862306a36Sopenharmony_ci * way to distinguish it from VMCS12) and we must not corrupt it by 529962306a36Sopenharmony_ci * writing to the non-existent 'launch_state' field. The area doesn't 530062306a36Sopenharmony_ci * have to be the currently active EVMCS on the calling CPU and there's 530162306a36Sopenharmony_ci * nothing KVM has to do to transition it from 'active' to 'non-active' 530262306a36Sopenharmony_ci * state. It is possible that the area will stay mapped as 530362306a36Sopenharmony_ci * vmx->nested.hv_evmcs but this shouldn't be a problem. 530462306a36Sopenharmony_ci */ 530562306a36Sopenharmony_ci if (likely(!guest_cpuid_has_evmcs(vcpu) || 530662306a36Sopenharmony_ci !evmptr_is_valid(nested_get_evmptr(vcpu)))) { 530762306a36Sopenharmony_ci if (vmptr == vmx->nested.current_vmptr) 530862306a36Sopenharmony_ci nested_release_vmcs12(vcpu); 530962306a36Sopenharmony_ci 531062306a36Sopenharmony_ci /* 531162306a36Sopenharmony_ci * Silently ignore memory errors on VMCLEAR, Intel's pseudocode 531262306a36Sopenharmony_ci * for VMCLEAR includes a "ensure that data for VMCS referenced 531362306a36Sopenharmony_ci * by the operand is in memory" clause that guards writes to 531462306a36Sopenharmony_ci * memory, i.e. doing nothing for I/O is architecturally valid. 531562306a36Sopenharmony_ci * 531662306a36Sopenharmony_ci * FIXME: Suppress failures if and only if no memslot is found, 531762306a36Sopenharmony_ci * i.e. exit to userspace if __copy_to_user() fails. 531862306a36Sopenharmony_ci */ 531962306a36Sopenharmony_ci (void)kvm_vcpu_write_guest(vcpu, 532062306a36Sopenharmony_ci vmptr + offsetof(struct vmcs12, 532162306a36Sopenharmony_ci launch_state), 532262306a36Sopenharmony_ci &zero, sizeof(zero)); 532362306a36Sopenharmony_ci } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) { 532462306a36Sopenharmony_ci nested_release_evmcs(vcpu); 532562306a36Sopenharmony_ci } 532662306a36Sopenharmony_ci 532762306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 532862306a36Sopenharmony_ci} 532962306a36Sopenharmony_ci 533062306a36Sopenharmony_ci/* Emulate the VMLAUNCH instruction */ 533162306a36Sopenharmony_cistatic int handle_vmlaunch(struct kvm_vcpu *vcpu) 533262306a36Sopenharmony_ci{ 533362306a36Sopenharmony_ci return nested_vmx_run(vcpu, true); 533462306a36Sopenharmony_ci} 533562306a36Sopenharmony_ci 533662306a36Sopenharmony_ci/* Emulate the VMRESUME instruction */ 533762306a36Sopenharmony_cistatic int handle_vmresume(struct kvm_vcpu *vcpu) 533862306a36Sopenharmony_ci{ 533962306a36Sopenharmony_ci 534062306a36Sopenharmony_ci return nested_vmx_run(vcpu, false); 534162306a36Sopenharmony_ci} 534262306a36Sopenharmony_ci 534362306a36Sopenharmony_cistatic int handle_vmread(struct kvm_vcpu *vcpu) 534462306a36Sopenharmony_ci{ 534562306a36Sopenharmony_ci struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) 534662306a36Sopenharmony_ci : get_vmcs12(vcpu); 534762306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 534862306a36Sopenharmony_ci u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); 534962306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 535062306a36Sopenharmony_ci struct x86_exception e; 535162306a36Sopenharmony_ci unsigned long field; 535262306a36Sopenharmony_ci u64 value; 535362306a36Sopenharmony_ci gva_t gva = 0; 535462306a36Sopenharmony_ci short offset; 535562306a36Sopenharmony_ci int len, r; 535662306a36Sopenharmony_ci 535762306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 535862306a36Sopenharmony_ci return 1; 535962306a36Sopenharmony_ci 536062306a36Sopenharmony_ci /* Decode instruction info and find the field to read */ 536162306a36Sopenharmony_ci field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); 536262306a36Sopenharmony_ci 536362306a36Sopenharmony_ci if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { 536462306a36Sopenharmony_ci /* 536562306a36Sopenharmony_ci * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, 536662306a36Sopenharmony_ci * any VMREAD sets the ALU flags for VMfailInvalid. 536762306a36Sopenharmony_ci */ 536862306a36Sopenharmony_ci if (vmx->nested.current_vmptr == INVALID_GPA || 536962306a36Sopenharmony_ci (is_guest_mode(vcpu) && 537062306a36Sopenharmony_ci get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) 537162306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 537262306a36Sopenharmony_ci 537362306a36Sopenharmony_ci offset = get_vmcs12_field_offset(field); 537462306a36Sopenharmony_ci if (offset < 0) 537562306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 537662306a36Sopenharmony_ci 537762306a36Sopenharmony_ci if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) 537862306a36Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 537962306a36Sopenharmony_ci 538062306a36Sopenharmony_ci /* Read the field, zero-extended to a u64 value */ 538162306a36Sopenharmony_ci value = vmcs12_read_any(vmcs12, field, offset); 538262306a36Sopenharmony_ci } else { 538362306a36Sopenharmony_ci /* 538462306a36Sopenharmony_ci * Hyper-V TLFS (as of 6.0b) explicitly states, that while an 538562306a36Sopenharmony_ci * enlightened VMCS is active VMREAD/VMWRITE instructions are 538662306a36Sopenharmony_ci * unsupported. Unfortunately, certain versions of Windows 11 538762306a36Sopenharmony_ci * don't comply with this requirement which is not enforced in 538862306a36Sopenharmony_ci * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a 538962306a36Sopenharmony_ci * workaround, as misbehaving guests will panic on VM-Fail. 539062306a36Sopenharmony_ci * Note, enlightened VMCS is incompatible with shadow VMCS so 539162306a36Sopenharmony_ci * all VMREADs from L2 should go to L1. 539262306a36Sopenharmony_ci */ 539362306a36Sopenharmony_ci if (WARN_ON_ONCE(is_guest_mode(vcpu))) 539462306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 539562306a36Sopenharmony_ci 539662306a36Sopenharmony_ci offset = evmcs_field_offset(field, NULL); 539762306a36Sopenharmony_ci if (offset < 0) 539862306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 539962306a36Sopenharmony_ci 540062306a36Sopenharmony_ci /* Read the field, zero-extended to a u64 value */ 540162306a36Sopenharmony_ci value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset); 540262306a36Sopenharmony_ci } 540362306a36Sopenharmony_ci 540462306a36Sopenharmony_ci /* 540562306a36Sopenharmony_ci * Now copy part of this value to register or memory, as requested. 540662306a36Sopenharmony_ci * Note that the number of bits actually copied is 32 or 64 depending 540762306a36Sopenharmony_ci * on the guest's mode (32 or 64 bit), not on the given field's length. 540862306a36Sopenharmony_ci */ 540962306a36Sopenharmony_ci if (instr_info & BIT(10)) { 541062306a36Sopenharmony_ci kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value); 541162306a36Sopenharmony_ci } else { 541262306a36Sopenharmony_ci len = is_64_bit_mode(vcpu) ? 8 : 4; 541362306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, exit_qualification, 541462306a36Sopenharmony_ci instr_info, true, len, &gva)) 541562306a36Sopenharmony_ci return 1; 541662306a36Sopenharmony_ci /* _system ok, nested_vmx_check_permission has verified cpl=0 */ 541762306a36Sopenharmony_ci r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e); 541862306a36Sopenharmony_ci if (r != X86EMUL_CONTINUE) 541962306a36Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 542062306a36Sopenharmony_ci } 542162306a36Sopenharmony_ci 542262306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 542362306a36Sopenharmony_ci} 542462306a36Sopenharmony_ci 542562306a36Sopenharmony_cistatic bool is_shadow_field_rw(unsigned long field) 542662306a36Sopenharmony_ci{ 542762306a36Sopenharmony_ci switch (field) { 542862306a36Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) case x: 542962306a36Sopenharmony_ci#include "vmcs_shadow_fields.h" 543062306a36Sopenharmony_ci return true; 543162306a36Sopenharmony_ci default: 543262306a36Sopenharmony_ci break; 543362306a36Sopenharmony_ci } 543462306a36Sopenharmony_ci return false; 543562306a36Sopenharmony_ci} 543662306a36Sopenharmony_ci 543762306a36Sopenharmony_cistatic bool is_shadow_field_ro(unsigned long field) 543862306a36Sopenharmony_ci{ 543962306a36Sopenharmony_ci switch (field) { 544062306a36Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) case x: 544162306a36Sopenharmony_ci#include "vmcs_shadow_fields.h" 544262306a36Sopenharmony_ci return true; 544362306a36Sopenharmony_ci default: 544462306a36Sopenharmony_ci break; 544562306a36Sopenharmony_ci } 544662306a36Sopenharmony_ci return false; 544762306a36Sopenharmony_ci} 544862306a36Sopenharmony_ci 544962306a36Sopenharmony_cistatic int handle_vmwrite(struct kvm_vcpu *vcpu) 545062306a36Sopenharmony_ci{ 545162306a36Sopenharmony_ci struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) 545262306a36Sopenharmony_ci : get_vmcs12(vcpu); 545362306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 545462306a36Sopenharmony_ci u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); 545562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 545662306a36Sopenharmony_ci struct x86_exception e; 545762306a36Sopenharmony_ci unsigned long field; 545862306a36Sopenharmony_ci short offset; 545962306a36Sopenharmony_ci gva_t gva; 546062306a36Sopenharmony_ci int len, r; 546162306a36Sopenharmony_ci 546262306a36Sopenharmony_ci /* 546362306a36Sopenharmony_ci * The value to write might be 32 or 64 bits, depending on L1's long 546462306a36Sopenharmony_ci * mode, and eventually we need to write that into a field of several 546562306a36Sopenharmony_ci * possible lengths. The code below first zero-extends the value to 64 546662306a36Sopenharmony_ci * bit (value), and then copies only the appropriate number of 546762306a36Sopenharmony_ci * bits into the vmcs12 field. 546862306a36Sopenharmony_ci */ 546962306a36Sopenharmony_ci u64 value = 0; 547062306a36Sopenharmony_ci 547162306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 547262306a36Sopenharmony_ci return 1; 547362306a36Sopenharmony_ci 547462306a36Sopenharmony_ci /* 547562306a36Sopenharmony_ci * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, 547662306a36Sopenharmony_ci * any VMWRITE sets the ALU flags for VMfailInvalid. 547762306a36Sopenharmony_ci */ 547862306a36Sopenharmony_ci if (vmx->nested.current_vmptr == INVALID_GPA || 547962306a36Sopenharmony_ci (is_guest_mode(vcpu) && 548062306a36Sopenharmony_ci get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) 548162306a36Sopenharmony_ci return nested_vmx_failInvalid(vcpu); 548262306a36Sopenharmony_ci 548362306a36Sopenharmony_ci if (instr_info & BIT(10)) 548462306a36Sopenharmony_ci value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf)); 548562306a36Sopenharmony_ci else { 548662306a36Sopenharmony_ci len = is_64_bit_mode(vcpu) ? 8 : 4; 548762306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, exit_qualification, 548862306a36Sopenharmony_ci instr_info, false, len, &gva)) 548962306a36Sopenharmony_ci return 1; 549062306a36Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, &value, len, &e); 549162306a36Sopenharmony_ci if (r != X86EMUL_CONTINUE) 549262306a36Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 549362306a36Sopenharmony_ci } 549462306a36Sopenharmony_ci 549562306a36Sopenharmony_ci field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); 549662306a36Sopenharmony_ci 549762306a36Sopenharmony_ci offset = get_vmcs12_field_offset(field); 549862306a36Sopenharmony_ci if (offset < 0) 549962306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 550062306a36Sopenharmony_ci 550162306a36Sopenharmony_ci /* 550262306a36Sopenharmony_ci * If the vCPU supports "VMWRITE to any supported field in the 550362306a36Sopenharmony_ci * VMCS," then the "read-only" fields are actually read/write. 550462306a36Sopenharmony_ci */ 550562306a36Sopenharmony_ci if (vmcs_field_readonly(field) && 550662306a36Sopenharmony_ci !nested_cpu_has_vmwrite_any_field(vcpu)) 550762306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); 550862306a36Sopenharmony_ci 550962306a36Sopenharmony_ci /* 551062306a36Sopenharmony_ci * Ensure vmcs12 is up-to-date before any VMWRITE that dirties 551162306a36Sopenharmony_ci * vmcs12, else we may crush a field or consume a stale value. 551262306a36Sopenharmony_ci */ 551362306a36Sopenharmony_ci if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) 551462306a36Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 551562306a36Sopenharmony_ci 551662306a36Sopenharmony_ci /* 551762306a36Sopenharmony_ci * Some Intel CPUs intentionally drop the reserved bits of the AR byte 551862306a36Sopenharmony_ci * fields on VMWRITE. Emulate this behavior to ensure consistent KVM 551962306a36Sopenharmony_ci * behavior regardless of the underlying hardware, e.g. if an AR_BYTE 552062306a36Sopenharmony_ci * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD 552162306a36Sopenharmony_ci * from L1 will return a different value than VMREAD from L2 (L1 sees 552262306a36Sopenharmony_ci * the stripped down value, L2 sees the full value as stored by KVM). 552362306a36Sopenharmony_ci */ 552462306a36Sopenharmony_ci if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES) 552562306a36Sopenharmony_ci value &= 0x1f0ff; 552662306a36Sopenharmony_ci 552762306a36Sopenharmony_ci vmcs12_write_any(vmcs12, field, offset, value); 552862306a36Sopenharmony_ci 552962306a36Sopenharmony_ci /* 553062306a36Sopenharmony_ci * Do not track vmcs12 dirty-state if in guest-mode as we actually 553162306a36Sopenharmony_ci * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated 553262306a36Sopenharmony_ci * by L1 without a vmexit are always updated in the vmcs02, i.e. don't 553362306a36Sopenharmony_ci * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path. 553462306a36Sopenharmony_ci */ 553562306a36Sopenharmony_ci if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) { 553662306a36Sopenharmony_ci /* 553762306a36Sopenharmony_ci * L1 can read these fields without exiting, ensure the 553862306a36Sopenharmony_ci * shadow VMCS is up-to-date. 553962306a36Sopenharmony_ci */ 554062306a36Sopenharmony_ci if (enable_shadow_vmcs && is_shadow_field_ro(field)) { 554162306a36Sopenharmony_ci preempt_disable(); 554262306a36Sopenharmony_ci vmcs_load(vmx->vmcs01.shadow_vmcs); 554362306a36Sopenharmony_ci 554462306a36Sopenharmony_ci __vmcs_writel(field, value); 554562306a36Sopenharmony_ci 554662306a36Sopenharmony_ci vmcs_clear(vmx->vmcs01.shadow_vmcs); 554762306a36Sopenharmony_ci vmcs_load(vmx->loaded_vmcs->vmcs); 554862306a36Sopenharmony_ci preempt_enable(); 554962306a36Sopenharmony_ci } 555062306a36Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 555162306a36Sopenharmony_ci } 555262306a36Sopenharmony_ci 555362306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 555462306a36Sopenharmony_ci} 555562306a36Sopenharmony_ci 555662306a36Sopenharmony_cistatic void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) 555762306a36Sopenharmony_ci{ 555862306a36Sopenharmony_ci vmx->nested.current_vmptr = vmptr; 555962306a36Sopenharmony_ci if (enable_shadow_vmcs) { 556062306a36Sopenharmony_ci secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); 556162306a36Sopenharmony_ci vmcs_write64(VMCS_LINK_POINTER, 556262306a36Sopenharmony_ci __pa(vmx->vmcs01.shadow_vmcs)); 556362306a36Sopenharmony_ci vmx->nested.need_vmcs12_to_shadow_sync = true; 556462306a36Sopenharmony_ci } 556562306a36Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 556662306a36Sopenharmony_ci vmx->nested.force_msr_bitmap_recalc = true; 556762306a36Sopenharmony_ci} 556862306a36Sopenharmony_ci 556962306a36Sopenharmony_ci/* Emulate the VMPTRLD instruction */ 557062306a36Sopenharmony_cistatic int handle_vmptrld(struct kvm_vcpu *vcpu) 557162306a36Sopenharmony_ci{ 557262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 557362306a36Sopenharmony_ci gpa_t vmptr; 557462306a36Sopenharmony_ci int r; 557562306a36Sopenharmony_ci 557662306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 557762306a36Sopenharmony_ci return 1; 557862306a36Sopenharmony_ci 557962306a36Sopenharmony_ci if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) 558062306a36Sopenharmony_ci return r; 558162306a36Sopenharmony_ci 558262306a36Sopenharmony_ci if (!page_address_valid(vcpu, vmptr)) 558362306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); 558462306a36Sopenharmony_ci 558562306a36Sopenharmony_ci if (vmptr == vmx->nested.vmxon_ptr) 558662306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); 558762306a36Sopenharmony_ci 558862306a36Sopenharmony_ci /* Forbid normal VMPTRLD if Enlightened version was used */ 558962306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 559062306a36Sopenharmony_ci return 1; 559162306a36Sopenharmony_ci 559262306a36Sopenharmony_ci if (vmx->nested.current_vmptr != vmptr) { 559362306a36Sopenharmony_ci struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache; 559462306a36Sopenharmony_ci struct vmcs_hdr hdr; 559562306a36Sopenharmony_ci 559662306a36Sopenharmony_ci if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) { 559762306a36Sopenharmony_ci /* 559862306a36Sopenharmony_ci * Reads from an unbacked page return all 1s, 559962306a36Sopenharmony_ci * which means that the 32 bits located at the 560062306a36Sopenharmony_ci * given physical address won't match the required 560162306a36Sopenharmony_ci * VMCS12_REVISION identifier. 560262306a36Sopenharmony_ci */ 560362306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 560462306a36Sopenharmony_ci VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); 560562306a36Sopenharmony_ci } 560662306a36Sopenharmony_ci 560762306a36Sopenharmony_ci if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, 560862306a36Sopenharmony_ci offsetof(struct vmcs12, hdr), 560962306a36Sopenharmony_ci sizeof(hdr))) { 561062306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 561162306a36Sopenharmony_ci VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); 561262306a36Sopenharmony_ci } 561362306a36Sopenharmony_ci 561462306a36Sopenharmony_ci if (hdr.revision_id != VMCS12_REVISION || 561562306a36Sopenharmony_ci (hdr.shadow_vmcs && 561662306a36Sopenharmony_ci !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { 561762306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 561862306a36Sopenharmony_ci VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); 561962306a36Sopenharmony_ci } 562062306a36Sopenharmony_ci 562162306a36Sopenharmony_ci nested_release_vmcs12(vcpu); 562262306a36Sopenharmony_ci 562362306a36Sopenharmony_ci /* 562462306a36Sopenharmony_ci * Load VMCS12 from guest memory since it is not already 562562306a36Sopenharmony_ci * cached. 562662306a36Sopenharmony_ci */ 562762306a36Sopenharmony_ci if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12, 562862306a36Sopenharmony_ci VMCS12_SIZE)) { 562962306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 563062306a36Sopenharmony_ci VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); 563162306a36Sopenharmony_ci } 563262306a36Sopenharmony_ci 563362306a36Sopenharmony_ci set_current_vmptr(vmx, vmptr); 563462306a36Sopenharmony_ci } 563562306a36Sopenharmony_ci 563662306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 563762306a36Sopenharmony_ci} 563862306a36Sopenharmony_ci 563962306a36Sopenharmony_ci/* Emulate the VMPTRST instruction */ 564062306a36Sopenharmony_cistatic int handle_vmptrst(struct kvm_vcpu *vcpu) 564162306a36Sopenharmony_ci{ 564262306a36Sopenharmony_ci unsigned long exit_qual = vmx_get_exit_qual(vcpu); 564362306a36Sopenharmony_ci u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); 564462306a36Sopenharmony_ci gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; 564562306a36Sopenharmony_ci struct x86_exception e; 564662306a36Sopenharmony_ci gva_t gva; 564762306a36Sopenharmony_ci int r; 564862306a36Sopenharmony_ci 564962306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 565062306a36Sopenharmony_ci return 1; 565162306a36Sopenharmony_ci 565262306a36Sopenharmony_ci if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr))) 565362306a36Sopenharmony_ci return 1; 565462306a36Sopenharmony_ci 565562306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, exit_qual, instr_info, 565662306a36Sopenharmony_ci true, sizeof(gpa_t), &gva)) 565762306a36Sopenharmony_ci return 1; 565862306a36Sopenharmony_ci /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ 565962306a36Sopenharmony_ci r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, 566062306a36Sopenharmony_ci sizeof(gpa_t), &e); 566162306a36Sopenharmony_ci if (r != X86EMUL_CONTINUE) 566262306a36Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 566362306a36Sopenharmony_ci 566462306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 566562306a36Sopenharmony_ci} 566662306a36Sopenharmony_ci 566762306a36Sopenharmony_ci/* Emulate the INVEPT instruction */ 566862306a36Sopenharmony_cistatic int handle_invept(struct kvm_vcpu *vcpu) 566962306a36Sopenharmony_ci{ 567062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 567162306a36Sopenharmony_ci u32 vmx_instruction_info, types; 567262306a36Sopenharmony_ci unsigned long type, roots_to_free; 567362306a36Sopenharmony_ci struct kvm_mmu *mmu; 567462306a36Sopenharmony_ci gva_t gva; 567562306a36Sopenharmony_ci struct x86_exception e; 567662306a36Sopenharmony_ci struct { 567762306a36Sopenharmony_ci u64 eptp, gpa; 567862306a36Sopenharmony_ci } operand; 567962306a36Sopenharmony_ci int i, r, gpr_index; 568062306a36Sopenharmony_ci 568162306a36Sopenharmony_ci if (!(vmx->nested.msrs.secondary_ctls_high & 568262306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_EPT) || 568362306a36Sopenharmony_ci !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) { 568462306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 568562306a36Sopenharmony_ci return 1; 568662306a36Sopenharmony_ci } 568762306a36Sopenharmony_ci 568862306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 568962306a36Sopenharmony_ci return 1; 569062306a36Sopenharmony_ci 569162306a36Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 569262306a36Sopenharmony_ci gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); 569362306a36Sopenharmony_ci type = kvm_register_read(vcpu, gpr_index); 569462306a36Sopenharmony_ci 569562306a36Sopenharmony_ci types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; 569662306a36Sopenharmony_ci 569762306a36Sopenharmony_ci if (type >= 32 || !(types & (1 << type))) 569862306a36Sopenharmony_ci return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 569962306a36Sopenharmony_ci 570062306a36Sopenharmony_ci /* According to the Intel VMX instruction reference, the memory 570162306a36Sopenharmony_ci * operand is read even if it isn't needed (e.g., for type==global) 570262306a36Sopenharmony_ci */ 570362306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 570462306a36Sopenharmony_ci vmx_instruction_info, false, sizeof(operand), &gva)) 570562306a36Sopenharmony_ci return 1; 570662306a36Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 570762306a36Sopenharmony_ci if (r != X86EMUL_CONTINUE) 570862306a36Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 570962306a36Sopenharmony_ci 571062306a36Sopenharmony_ci /* 571162306a36Sopenharmony_ci * Nested EPT roots are always held through guest_mmu, 571262306a36Sopenharmony_ci * not root_mmu. 571362306a36Sopenharmony_ci */ 571462306a36Sopenharmony_ci mmu = &vcpu->arch.guest_mmu; 571562306a36Sopenharmony_ci 571662306a36Sopenharmony_ci switch (type) { 571762306a36Sopenharmony_ci case VMX_EPT_EXTENT_CONTEXT: 571862306a36Sopenharmony_ci if (!nested_vmx_check_eptp(vcpu, operand.eptp)) 571962306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 572062306a36Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 572162306a36Sopenharmony_ci 572262306a36Sopenharmony_ci roots_to_free = 0; 572362306a36Sopenharmony_ci if (nested_ept_root_matches(mmu->root.hpa, mmu->root.pgd, 572462306a36Sopenharmony_ci operand.eptp)) 572562306a36Sopenharmony_ci roots_to_free |= KVM_MMU_ROOT_CURRENT; 572662306a36Sopenharmony_ci 572762306a36Sopenharmony_ci for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { 572862306a36Sopenharmony_ci if (nested_ept_root_matches(mmu->prev_roots[i].hpa, 572962306a36Sopenharmony_ci mmu->prev_roots[i].pgd, 573062306a36Sopenharmony_ci operand.eptp)) 573162306a36Sopenharmony_ci roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); 573262306a36Sopenharmony_ci } 573362306a36Sopenharmony_ci break; 573462306a36Sopenharmony_ci case VMX_EPT_EXTENT_GLOBAL: 573562306a36Sopenharmony_ci roots_to_free = KVM_MMU_ROOTS_ALL; 573662306a36Sopenharmony_ci break; 573762306a36Sopenharmony_ci default: 573862306a36Sopenharmony_ci BUG(); 573962306a36Sopenharmony_ci break; 574062306a36Sopenharmony_ci } 574162306a36Sopenharmony_ci 574262306a36Sopenharmony_ci if (roots_to_free) 574362306a36Sopenharmony_ci kvm_mmu_free_roots(vcpu->kvm, mmu, roots_to_free); 574462306a36Sopenharmony_ci 574562306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 574662306a36Sopenharmony_ci} 574762306a36Sopenharmony_ci 574862306a36Sopenharmony_cistatic int handle_invvpid(struct kvm_vcpu *vcpu) 574962306a36Sopenharmony_ci{ 575062306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 575162306a36Sopenharmony_ci u32 vmx_instruction_info; 575262306a36Sopenharmony_ci unsigned long type, types; 575362306a36Sopenharmony_ci gva_t gva; 575462306a36Sopenharmony_ci struct x86_exception e; 575562306a36Sopenharmony_ci struct { 575662306a36Sopenharmony_ci u64 vpid; 575762306a36Sopenharmony_ci u64 gla; 575862306a36Sopenharmony_ci } operand; 575962306a36Sopenharmony_ci u16 vpid02; 576062306a36Sopenharmony_ci int r, gpr_index; 576162306a36Sopenharmony_ci 576262306a36Sopenharmony_ci if (!(vmx->nested.msrs.secondary_ctls_high & 576362306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_VPID) || 576462306a36Sopenharmony_ci !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) { 576562306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 576662306a36Sopenharmony_ci return 1; 576762306a36Sopenharmony_ci } 576862306a36Sopenharmony_ci 576962306a36Sopenharmony_ci if (!nested_vmx_check_permission(vcpu)) 577062306a36Sopenharmony_ci return 1; 577162306a36Sopenharmony_ci 577262306a36Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 577362306a36Sopenharmony_ci gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); 577462306a36Sopenharmony_ci type = kvm_register_read(vcpu, gpr_index); 577562306a36Sopenharmony_ci 577662306a36Sopenharmony_ci types = (vmx->nested.msrs.vpid_caps & 577762306a36Sopenharmony_ci VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; 577862306a36Sopenharmony_ci 577962306a36Sopenharmony_ci if (type >= 32 || !(types & (1 << type))) 578062306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 578162306a36Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 578262306a36Sopenharmony_ci 578362306a36Sopenharmony_ci /* according to the intel vmx instruction reference, the memory 578462306a36Sopenharmony_ci * operand is read even if it isn't needed (e.g., for type==global) 578562306a36Sopenharmony_ci */ 578662306a36Sopenharmony_ci if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 578762306a36Sopenharmony_ci vmx_instruction_info, false, sizeof(operand), &gva)) 578862306a36Sopenharmony_ci return 1; 578962306a36Sopenharmony_ci r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 579062306a36Sopenharmony_ci if (r != X86EMUL_CONTINUE) 579162306a36Sopenharmony_ci return kvm_handle_memory_failure(vcpu, r, &e); 579262306a36Sopenharmony_ci 579362306a36Sopenharmony_ci if (operand.vpid >> 16) 579462306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 579562306a36Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 579662306a36Sopenharmony_ci 579762306a36Sopenharmony_ci vpid02 = nested_get_vpid02(vcpu); 579862306a36Sopenharmony_ci switch (type) { 579962306a36Sopenharmony_ci case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: 580062306a36Sopenharmony_ci if (!operand.vpid || 580162306a36Sopenharmony_ci is_noncanonical_address(operand.gla, vcpu)) 580262306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 580362306a36Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 580462306a36Sopenharmony_ci vpid_sync_vcpu_addr(vpid02, operand.gla); 580562306a36Sopenharmony_ci break; 580662306a36Sopenharmony_ci case VMX_VPID_EXTENT_SINGLE_CONTEXT: 580762306a36Sopenharmony_ci case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: 580862306a36Sopenharmony_ci if (!operand.vpid) 580962306a36Sopenharmony_ci return nested_vmx_fail(vcpu, 581062306a36Sopenharmony_ci VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 581162306a36Sopenharmony_ci vpid_sync_context(vpid02); 581262306a36Sopenharmony_ci break; 581362306a36Sopenharmony_ci case VMX_VPID_EXTENT_ALL_CONTEXT: 581462306a36Sopenharmony_ci vpid_sync_context(vpid02); 581562306a36Sopenharmony_ci break; 581662306a36Sopenharmony_ci default: 581762306a36Sopenharmony_ci WARN_ON_ONCE(1); 581862306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 581962306a36Sopenharmony_ci } 582062306a36Sopenharmony_ci 582162306a36Sopenharmony_ci /* 582262306a36Sopenharmony_ci * Sync the shadow page tables if EPT is disabled, L1 is invalidating 582362306a36Sopenharmony_ci * linear mappings for L2 (tagged with L2's VPID). Free all guest 582462306a36Sopenharmony_ci * roots as VPIDs are not tracked in the MMU role. 582562306a36Sopenharmony_ci * 582662306a36Sopenharmony_ci * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share 582762306a36Sopenharmony_ci * an MMU when EPT is disabled. 582862306a36Sopenharmony_ci * 582962306a36Sopenharmony_ci * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR. 583062306a36Sopenharmony_ci */ 583162306a36Sopenharmony_ci if (!enable_ept) 583262306a36Sopenharmony_ci kvm_mmu_free_guest_mode_roots(vcpu->kvm, &vcpu->arch.root_mmu); 583362306a36Sopenharmony_ci 583462306a36Sopenharmony_ci return nested_vmx_succeed(vcpu); 583562306a36Sopenharmony_ci} 583662306a36Sopenharmony_ci 583762306a36Sopenharmony_cistatic int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, 583862306a36Sopenharmony_ci struct vmcs12 *vmcs12) 583962306a36Sopenharmony_ci{ 584062306a36Sopenharmony_ci u32 index = kvm_rcx_read(vcpu); 584162306a36Sopenharmony_ci u64 new_eptp; 584262306a36Sopenharmony_ci 584362306a36Sopenharmony_ci if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12))) 584462306a36Sopenharmony_ci return 1; 584562306a36Sopenharmony_ci if (index >= VMFUNC_EPTP_ENTRIES) 584662306a36Sopenharmony_ci return 1; 584762306a36Sopenharmony_ci 584862306a36Sopenharmony_ci if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, 584962306a36Sopenharmony_ci &new_eptp, index * 8, 8)) 585062306a36Sopenharmony_ci return 1; 585162306a36Sopenharmony_ci 585262306a36Sopenharmony_ci /* 585362306a36Sopenharmony_ci * If the (L2) guest does a vmfunc to the currently 585462306a36Sopenharmony_ci * active ept pointer, we don't have to do anything else 585562306a36Sopenharmony_ci */ 585662306a36Sopenharmony_ci if (vmcs12->ept_pointer != new_eptp) { 585762306a36Sopenharmony_ci if (!nested_vmx_check_eptp(vcpu, new_eptp)) 585862306a36Sopenharmony_ci return 1; 585962306a36Sopenharmony_ci 586062306a36Sopenharmony_ci vmcs12->ept_pointer = new_eptp; 586162306a36Sopenharmony_ci nested_ept_new_eptp(vcpu); 586262306a36Sopenharmony_ci 586362306a36Sopenharmony_ci if (!nested_cpu_has_vpid(vmcs12)) 586462306a36Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); 586562306a36Sopenharmony_ci } 586662306a36Sopenharmony_ci 586762306a36Sopenharmony_ci return 0; 586862306a36Sopenharmony_ci} 586962306a36Sopenharmony_ci 587062306a36Sopenharmony_cistatic int handle_vmfunc(struct kvm_vcpu *vcpu) 587162306a36Sopenharmony_ci{ 587262306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 587362306a36Sopenharmony_ci struct vmcs12 *vmcs12; 587462306a36Sopenharmony_ci u32 function = kvm_rax_read(vcpu); 587562306a36Sopenharmony_ci 587662306a36Sopenharmony_ci /* 587762306a36Sopenharmony_ci * VMFUNC should never execute cleanly while L1 is active; KVM supports 587862306a36Sopenharmony_ci * VMFUNC for nested VMs, but not for L1. 587962306a36Sopenharmony_ci */ 588062306a36Sopenharmony_ci if (WARN_ON_ONCE(!is_guest_mode(vcpu))) { 588162306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 588262306a36Sopenharmony_ci return 1; 588362306a36Sopenharmony_ci } 588462306a36Sopenharmony_ci 588562306a36Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 588662306a36Sopenharmony_ci 588762306a36Sopenharmony_ci /* 588862306a36Sopenharmony_ci * #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC 588962306a36Sopenharmony_ci * is enabled in vmcs02 if and only if it's enabled in vmcs12. 589062306a36Sopenharmony_ci */ 589162306a36Sopenharmony_ci if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) { 589262306a36Sopenharmony_ci kvm_queue_exception(vcpu, UD_VECTOR); 589362306a36Sopenharmony_ci return 1; 589462306a36Sopenharmony_ci } 589562306a36Sopenharmony_ci 589662306a36Sopenharmony_ci if (!(vmcs12->vm_function_control & BIT_ULL(function))) 589762306a36Sopenharmony_ci goto fail; 589862306a36Sopenharmony_ci 589962306a36Sopenharmony_ci switch (function) { 590062306a36Sopenharmony_ci case 0: 590162306a36Sopenharmony_ci if (nested_vmx_eptp_switching(vcpu, vmcs12)) 590262306a36Sopenharmony_ci goto fail; 590362306a36Sopenharmony_ci break; 590462306a36Sopenharmony_ci default: 590562306a36Sopenharmony_ci goto fail; 590662306a36Sopenharmony_ci } 590762306a36Sopenharmony_ci return kvm_skip_emulated_instruction(vcpu); 590862306a36Sopenharmony_ci 590962306a36Sopenharmony_cifail: 591062306a36Sopenharmony_ci /* 591162306a36Sopenharmony_ci * This is effectively a reflected VM-Exit, as opposed to a synthesized 591262306a36Sopenharmony_ci * nested VM-Exit. Pass the original exit reason, i.e. don't hardcode 591362306a36Sopenharmony_ci * EXIT_REASON_VMFUNC as the exit reason. 591462306a36Sopenharmony_ci */ 591562306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, vmx->exit_reason.full, 591662306a36Sopenharmony_ci vmx_get_intr_info(vcpu), 591762306a36Sopenharmony_ci vmx_get_exit_qual(vcpu)); 591862306a36Sopenharmony_ci return 1; 591962306a36Sopenharmony_ci} 592062306a36Sopenharmony_ci 592162306a36Sopenharmony_ci/* 592262306a36Sopenharmony_ci * Return true if an IO instruction with the specified port and size should cause 592362306a36Sopenharmony_ci * a VM-exit into L1. 592462306a36Sopenharmony_ci */ 592562306a36Sopenharmony_cibool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, 592662306a36Sopenharmony_ci int size) 592762306a36Sopenharmony_ci{ 592862306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 592962306a36Sopenharmony_ci gpa_t bitmap, last_bitmap; 593062306a36Sopenharmony_ci u8 b; 593162306a36Sopenharmony_ci 593262306a36Sopenharmony_ci last_bitmap = INVALID_GPA; 593362306a36Sopenharmony_ci b = -1; 593462306a36Sopenharmony_ci 593562306a36Sopenharmony_ci while (size > 0) { 593662306a36Sopenharmony_ci if (port < 0x8000) 593762306a36Sopenharmony_ci bitmap = vmcs12->io_bitmap_a; 593862306a36Sopenharmony_ci else if (port < 0x10000) 593962306a36Sopenharmony_ci bitmap = vmcs12->io_bitmap_b; 594062306a36Sopenharmony_ci else 594162306a36Sopenharmony_ci return true; 594262306a36Sopenharmony_ci bitmap += (port & 0x7fff) / 8; 594362306a36Sopenharmony_ci 594462306a36Sopenharmony_ci if (last_bitmap != bitmap) 594562306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1)) 594662306a36Sopenharmony_ci return true; 594762306a36Sopenharmony_ci if (b & (1 << (port & 7))) 594862306a36Sopenharmony_ci return true; 594962306a36Sopenharmony_ci 595062306a36Sopenharmony_ci port++; 595162306a36Sopenharmony_ci size--; 595262306a36Sopenharmony_ci last_bitmap = bitmap; 595362306a36Sopenharmony_ci } 595462306a36Sopenharmony_ci 595562306a36Sopenharmony_ci return false; 595662306a36Sopenharmony_ci} 595762306a36Sopenharmony_ci 595862306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, 595962306a36Sopenharmony_ci struct vmcs12 *vmcs12) 596062306a36Sopenharmony_ci{ 596162306a36Sopenharmony_ci unsigned long exit_qualification; 596262306a36Sopenharmony_ci unsigned short port; 596362306a36Sopenharmony_ci int size; 596462306a36Sopenharmony_ci 596562306a36Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 596662306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); 596762306a36Sopenharmony_ci 596862306a36Sopenharmony_ci exit_qualification = vmx_get_exit_qual(vcpu); 596962306a36Sopenharmony_ci 597062306a36Sopenharmony_ci port = exit_qualification >> 16; 597162306a36Sopenharmony_ci size = (exit_qualification & 7) + 1; 597262306a36Sopenharmony_ci 597362306a36Sopenharmony_ci return nested_vmx_check_io_bitmaps(vcpu, port, size); 597462306a36Sopenharmony_ci} 597562306a36Sopenharmony_ci 597662306a36Sopenharmony_ci/* 597762306a36Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle an MSR access, 597862306a36Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check whether L1 expressed 597962306a36Sopenharmony_ci * disinterest in the current event (read or write a specific MSR) by using an 598062306a36Sopenharmony_ci * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. 598162306a36Sopenharmony_ci */ 598262306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, 598362306a36Sopenharmony_ci struct vmcs12 *vmcs12, 598462306a36Sopenharmony_ci union vmx_exit_reason exit_reason) 598562306a36Sopenharmony_ci{ 598662306a36Sopenharmony_ci u32 msr_index = kvm_rcx_read(vcpu); 598762306a36Sopenharmony_ci gpa_t bitmap; 598862306a36Sopenharmony_ci 598962306a36Sopenharmony_ci if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 599062306a36Sopenharmony_ci return true; 599162306a36Sopenharmony_ci 599262306a36Sopenharmony_ci /* 599362306a36Sopenharmony_ci * The MSR_BITMAP page is divided into four 1024-byte bitmaps, 599462306a36Sopenharmony_ci * for the four combinations of read/write and low/high MSR numbers. 599562306a36Sopenharmony_ci * First we need to figure out which of the four to use: 599662306a36Sopenharmony_ci */ 599762306a36Sopenharmony_ci bitmap = vmcs12->msr_bitmap; 599862306a36Sopenharmony_ci if (exit_reason.basic == EXIT_REASON_MSR_WRITE) 599962306a36Sopenharmony_ci bitmap += 2048; 600062306a36Sopenharmony_ci if (msr_index >= 0xc0000000) { 600162306a36Sopenharmony_ci msr_index -= 0xc0000000; 600262306a36Sopenharmony_ci bitmap += 1024; 600362306a36Sopenharmony_ci } 600462306a36Sopenharmony_ci 600562306a36Sopenharmony_ci /* Then read the msr_index'th bit from this bitmap: */ 600662306a36Sopenharmony_ci if (msr_index < 1024*8) { 600762306a36Sopenharmony_ci unsigned char b; 600862306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1)) 600962306a36Sopenharmony_ci return true; 601062306a36Sopenharmony_ci return 1 & (b >> (msr_index & 7)); 601162306a36Sopenharmony_ci } else 601262306a36Sopenharmony_ci return true; /* let L1 handle the wrong parameter */ 601362306a36Sopenharmony_ci} 601462306a36Sopenharmony_ci 601562306a36Sopenharmony_ci/* 601662306a36Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle a CR access exit, 601762306a36Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check if L1 wanted to 601862306a36Sopenharmony_ci * intercept (via guest_host_mask etc.) the current event. 601962306a36Sopenharmony_ci */ 602062306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, 602162306a36Sopenharmony_ci struct vmcs12 *vmcs12) 602262306a36Sopenharmony_ci{ 602362306a36Sopenharmony_ci unsigned long exit_qualification = vmx_get_exit_qual(vcpu); 602462306a36Sopenharmony_ci int cr = exit_qualification & 15; 602562306a36Sopenharmony_ci int reg; 602662306a36Sopenharmony_ci unsigned long val; 602762306a36Sopenharmony_ci 602862306a36Sopenharmony_ci switch ((exit_qualification >> 4) & 3) { 602962306a36Sopenharmony_ci case 0: /* mov to cr */ 603062306a36Sopenharmony_ci reg = (exit_qualification >> 8) & 15; 603162306a36Sopenharmony_ci val = kvm_register_read(vcpu, reg); 603262306a36Sopenharmony_ci switch (cr) { 603362306a36Sopenharmony_ci case 0: 603462306a36Sopenharmony_ci if (vmcs12->cr0_guest_host_mask & 603562306a36Sopenharmony_ci (val ^ vmcs12->cr0_read_shadow)) 603662306a36Sopenharmony_ci return true; 603762306a36Sopenharmony_ci break; 603862306a36Sopenharmony_ci case 3: 603962306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) 604062306a36Sopenharmony_ci return true; 604162306a36Sopenharmony_ci break; 604262306a36Sopenharmony_ci case 4: 604362306a36Sopenharmony_ci if (vmcs12->cr4_guest_host_mask & 604462306a36Sopenharmony_ci (vmcs12->cr4_read_shadow ^ val)) 604562306a36Sopenharmony_ci return true; 604662306a36Sopenharmony_ci break; 604762306a36Sopenharmony_ci case 8: 604862306a36Sopenharmony_ci if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) 604962306a36Sopenharmony_ci return true; 605062306a36Sopenharmony_ci break; 605162306a36Sopenharmony_ci } 605262306a36Sopenharmony_ci break; 605362306a36Sopenharmony_ci case 2: /* clts */ 605462306a36Sopenharmony_ci if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && 605562306a36Sopenharmony_ci (vmcs12->cr0_read_shadow & X86_CR0_TS)) 605662306a36Sopenharmony_ci return true; 605762306a36Sopenharmony_ci break; 605862306a36Sopenharmony_ci case 1: /* mov from cr */ 605962306a36Sopenharmony_ci switch (cr) { 606062306a36Sopenharmony_ci case 3: 606162306a36Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & 606262306a36Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING) 606362306a36Sopenharmony_ci return true; 606462306a36Sopenharmony_ci break; 606562306a36Sopenharmony_ci case 8: 606662306a36Sopenharmony_ci if (vmcs12->cpu_based_vm_exec_control & 606762306a36Sopenharmony_ci CPU_BASED_CR8_STORE_EXITING) 606862306a36Sopenharmony_ci return true; 606962306a36Sopenharmony_ci break; 607062306a36Sopenharmony_ci } 607162306a36Sopenharmony_ci break; 607262306a36Sopenharmony_ci case 3: /* lmsw */ 607362306a36Sopenharmony_ci /* 607462306a36Sopenharmony_ci * lmsw can change bits 1..3 of cr0, and only set bit 0 of 607562306a36Sopenharmony_ci * cr0. Other attempted changes are ignored, with no exit. 607662306a36Sopenharmony_ci */ 607762306a36Sopenharmony_ci val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; 607862306a36Sopenharmony_ci if (vmcs12->cr0_guest_host_mask & 0xe & 607962306a36Sopenharmony_ci (val ^ vmcs12->cr0_read_shadow)) 608062306a36Sopenharmony_ci return true; 608162306a36Sopenharmony_ci if ((vmcs12->cr0_guest_host_mask & 0x1) && 608262306a36Sopenharmony_ci !(vmcs12->cr0_read_shadow & 0x1) && 608362306a36Sopenharmony_ci (val & 0x1)) 608462306a36Sopenharmony_ci return true; 608562306a36Sopenharmony_ci break; 608662306a36Sopenharmony_ci } 608762306a36Sopenharmony_ci return false; 608862306a36Sopenharmony_ci} 608962306a36Sopenharmony_ci 609062306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu, 609162306a36Sopenharmony_ci struct vmcs12 *vmcs12) 609262306a36Sopenharmony_ci{ 609362306a36Sopenharmony_ci u32 encls_leaf; 609462306a36Sopenharmony_ci 609562306a36Sopenharmony_ci if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) || 609662306a36Sopenharmony_ci !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING)) 609762306a36Sopenharmony_ci return false; 609862306a36Sopenharmony_ci 609962306a36Sopenharmony_ci encls_leaf = kvm_rax_read(vcpu); 610062306a36Sopenharmony_ci if (encls_leaf > 62) 610162306a36Sopenharmony_ci encls_leaf = 63; 610262306a36Sopenharmony_ci return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf); 610362306a36Sopenharmony_ci} 610462306a36Sopenharmony_ci 610562306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, 610662306a36Sopenharmony_ci struct vmcs12 *vmcs12, gpa_t bitmap) 610762306a36Sopenharmony_ci{ 610862306a36Sopenharmony_ci u32 vmx_instruction_info; 610962306a36Sopenharmony_ci unsigned long field; 611062306a36Sopenharmony_ci u8 b; 611162306a36Sopenharmony_ci 611262306a36Sopenharmony_ci if (!nested_cpu_has_shadow_vmcs(vmcs12)) 611362306a36Sopenharmony_ci return true; 611462306a36Sopenharmony_ci 611562306a36Sopenharmony_ci /* Decode instruction info and find the field to access */ 611662306a36Sopenharmony_ci vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 611762306a36Sopenharmony_ci field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 611862306a36Sopenharmony_ci 611962306a36Sopenharmony_ci /* Out-of-range fields always cause a VM exit from L2 to L1 */ 612062306a36Sopenharmony_ci if (field >> 15) 612162306a36Sopenharmony_ci return true; 612262306a36Sopenharmony_ci 612362306a36Sopenharmony_ci if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1)) 612462306a36Sopenharmony_ci return true; 612562306a36Sopenharmony_ci 612662306a36Sopenharmony_ci return 1 & (b >> (field & 7)); 612762306a36Sopenharmony_ci} 612862306a36Sopenharmony_ci 612962306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) 613062306a36Sopenharmony_ci{ 613162306a36Sopenharmony_ci u32 entry_intr_info = vmcs12->vm_entry_intr_info_field; 613262306a36Sopenharmony_ci 613362306a36Sopenharmony_ci if (nested_cpu_has_mtf(vmcs12)) 613462306a36Sopenharmony_ci return true; 613562306a36Sopenharmony_ci 613662306a36Sopenharmony_ci /* 613762306a36Sopenharmony_ci * An MTF VM-exit may be injected into the guest by setting the 613862306a36Sopenharmony_ci * interruption-type to 7 (other event) and the vector field to 0. Such 613962306a36Sopenharmony_ci * is the case regardless of the 'monitor trap flag' VM-execution 614062306a36Sopenharmony_ci * control. 614162306a36Sopenharmony_ci */ 614262306a36Sopenharmony_ci return entry_intr_info == (INTR_INFO_VALID_MASK 614362306a36Sopenharmony_ci | INTR_TYPE_OTHER_EVENT); 614462306a36Sopenharmony_ci} 614562306a36Sopenharmony_ci 614662306a36Sopenharmony_ci/* 614762306a36Sopenharmony_ci * Return true if L0 wants to handle an exit from L2 regardless of whether or not 614862306a36Sopenharmony_ci * L1 wants the exit. Only call this when in is_guest_mode (L2). 614962306a36Sopenharmony_ci */ 615062306a36Sopenharmony_cistatic bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, 615162306a36Sopenharmony_ci union vmx_exit_reason exit_reason) 615262306a36Sopenharmony_ci{ 615362306a36Sopenharmony_ci u32 intr_info; 615462306a36Sopenharmony_ci 615562306a36Sopenharmony_ci switch ((u16)exit_reason.basic) { 615662306a36Sopenharmony_ci case EXIT_REASON_EXCEPTION_NMI: 615762306a36Sopenharmony_ci intr_info = vmx_get_intr_info(vcpu); 615862306a36Sopenharmony_ci if (is_nmi(intr_info)) 615962306a36Sopenharmony_ci return true; 616062306a36Sopenharmony_ci else if (is_page_fault(intr_info)) 616162306a36Sopenharmony_ci return vcpu->arch.apf.host_apf_flags || 616262306a36Sopenharmony_ci vmx_need_pf_intercept(vcpu); 616362306a36Sopenharmony_ci else if (is_debug(intr_info) && 616462306a36Sopenharmony_ci vcpu->guest_debug & 616562306a36Sopenharmony_ci (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) 616662306a36Sopenharmony_ci return true; 616762306a36Sopenharmony_ci else if (is_breakpoint(intr_info) && 616862306a36Sopenharmony_ci vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 616962306a36Sopenharmony_ci return true; 617062306a36Sopenharmony_ci else if (is_alignment_check(intr_info) && 617162306a36Sopenharmony_ci !vmx_guest_inject_ac(vcpu)) 617262306a36Sopenharmony_ci return true; 617362306a36Sopenharmony_ci return false; 617462306a36Sopenharmony_ci case EXIT_REASON_EXTERNAL_INTERRUPT: 617562306a36Sopenharmony_ci return true; 617662306a36Sopenharmony_ci case EXIT_REASON_MCE_DURING_VMENTRY: 617762306a36Sopenharmony_ci return true; 617862306a36Sopenharmony_ci case EXIT_REASON_EPT_VIOLATION: 617962306a36Sopenharmony_ci /* 618062306a36Sopenharmony_ci * L0 always deals with the EPT violation. If nested EPT is 618162306a36Sopenharmony_ci * used, and the nested mmu code discovers that the address is 618262306a36Sopenharmony_ci * missing in the guest EPT table (EPT12), the EPT violation 618362306a36Sopenharmony_ci * will be injected with nested_ept_inject_page_fault() 618462306a36Sopenharmony_ci */ 618562306a36Sopenharmony_ci return true; 618662306a36Sopenharmony_ci case EXIT_REASON_EPT_MISCONFIG: 618762306a36Sopenharmony_ci /* 618862306a36Sopenharmony_ci * L2 never uses directly L1's EPT, but rather L0's own EPT 618962306a36Sopenharmony_ci * table (shadow on EPT) or a merged EPT table that L0 built 619062306a36Sopenharmony_ci * (EPT on EPT). So any problems with the structure of the 619162306a36Sopenharmony_ci * table is L0's fault. 619262306a36Sopenharmony_ci */ 619362306a36Sopenharmony_ci return true; 619462306a36Sopenharmony_ci case EXIT_REASON_PREEMPTION_TIMER: 619562306a36Sopenharmony_ci return true; 619662306a36Sopenharmony_ci case EXIT_REASON_PML_FULL: 619762306a36Sopenharmony_ci /* 619862306a36Sopenharmony_ci * PML is emulated for an L1 VMM and should never be enabled in 619962306a36Sopenharmony_ci * vmcs02, always "handle" PML_FULL by exiting to userspace. 620062306a36Sopenharmony_ci */ 620162306a36Sopenharmony_ci return true; 620262306a36Sopenharmony_ci case EXIT_REASON_VMFUNC: 620362306a36Sopenharmony_ci /* VM functions are emulated through L2->L0 vmexits. */ 620462306a36Sopenharmony_ci return true; 620562306a36Sopenharmony_ci case EXIT_REASON_BUS_LOCK: 620662306a36Sopenharmony_ci /* 620762306a36Sopenharmony_ci * At present, bus lock VM exit is never exposed to L1. 620862306a36Sopenharmony_ci * Handle L2's bus locks in L0 directly. 620962306a36Sopenharmony_ci */ 621062306a36Sopenharmony_ci return true; 621162306a36Sopenharmony_ci case EXIT_REASON_VMCALL: 621262306a36Sopenharmony_ci /* Hyper-V L2 TLB flush hypercall is handled by L0 */ 621362306a36Sopenharmony_ci return guest_hv_cpuid_has_l2_tlb_flush(vcpu) && 621462306a36Sopenharmony_ci nested_evmcs_l2_tlb_flush_enabled(vcpu) && 621562306a36Sopenharmony_ci kvm_hv_is_tlb_flush_hcall(vcpu); 621662306a36Sopenharmony_ci default: 621762306a36Sopenharmony_ci break; 621862306a36Sopenharmony_ci } 621962306a36Sopenharmony_ci return false; 622062306a36Sopenharmony_ci} 622162306a36Sopenharmony_ci 622262306a36Sopenharmony_ci/* 622362306a36Sopenharmony_ci * Return 1 if L1 wants to intercept an exit from L2. Only call this when in 622462306a36Sopenharmony_ci * is_guest_mode (L2). 622562306a36Sopenharmony_ci */ 622662306a36Sopenharmony_cistatic bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, 622762306a36Sopenharmony_ci union vmx_exit_reason exit_reason) 622862306a36Sopenharmony_ci{ 622962306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 623062306a36Sopenharmony_ci u32 intr_info; 623162306a36Sopenharmony_ci 623262306a36Sopenharmony_ci switch ((u16)exit_reason.basic) { 623362306a36Sopenharmony_ci case EXIT_REASON_EXCEPTION_NMI: 623462306a36Sopenharmony_ci intr_info = vmx_get_intr_info(vcpu); 623562306a36Sopenharmony_ci if (is_nmi(intr_info)) 623662306a36Sopenharmony_ci return true; 623762306a36Sopenharmony_ci else if (is_page_fault(intr_info)) 623862306a36Sopenharmony_ci return true; 623962306a36Sopenharmony_ci return vmcs12->exception_bitmap & 624062306a36Sopenharmony_ci (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 624162306a36Sopenharmony_ci case EXIT_REASON_EXTERNAL_INTERRUPT: 624262306a36Sopenharmony_ci return nested_exit_on_intr(vcpu); 624362306a36Sopenharmony_ci case EXIT_REASON_TRIPLE_FAULT: 624462306a36Sopenharmony_ci return true; 624562306a36Sopenharmony_ci case EXIT_REASON_INTERRUPT_WINDOW: 624662306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING); 624762306a36Sopenharmony_ci case EXIT_REASON_NMI_WINDOW: 624862306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING); 624962306a36Sopenharmony_ci case EXIT_REASON_TASK_SWITCH: 625062306a36Sopenharmony_ci return true; 625162306a36Sopenharmony_ci case EXIT_REASON_CPUID: 625262306a36Sopenharmony_ci return true; 625362306a36Sopenharmony_ci case EXIT_REASON_HLT: 625462306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); 625562306a36Sopenharmony_ci case EXIT_REASON_INVD: 625662306a36Sopenharmony_ci return true; 625762306a36Sopenharmony_ci case EXIT_REASON_INVLPG: 625862306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); 625962306a36Sopenharmony_ci case EXIT_REASON_RDPMC: 626062306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); 626162306a36Sopenharmony_ci case EXIT_REASON_RDRAND: 626262306a36Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING); 626362306a36Sopenharmony_ci case EXIT_REASON_RDSEED: 626462306a36Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING); 626562306a36Sopenharmony_ci case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: 626662306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); 626762306a36Sopenharmony_ci case EXIT_REASON_VMREAD: 626862306a36Sopenharmony_ci return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, 626962306a36Sopenharmony_ci vmcs12->vmread_bitmap); 627062306a36Sopenharmony_ci case EXIT_REASON_VMWRITE: 627162306a36Sopenharmony_ci return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, 627262306a36Sopenharmony_ci vmcs12->vmwrite_bitmap); 627362306a36Sopenharmony_ci case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: 627462306a36Sopenharmony_ci case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: 627562306a36Sopenharmony_ci case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME: 627662306a36Sopenharmony_ci case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 627762306a36Sopenharmony_ci case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: 627862306a36Sopenharmony_ci /* 627962306a36Sopenharmony_ci * VMX instructions trap unconditionally. This allows L1 to 628062306a36Sopenharmony_ci * emulate them for its L2 guest, i.e., allows 3-level nesting! 628162306a36Sopenharmony_ci */ 628262306a36Sopenharmony_ci return true; 628362306a36Sopenharmony_ci case EXIT_REASON_CR_ACCESS: 628462306a36Sopenharmony_ci return nested_vmx_exit_handled_cr(vcpu, vmcs12); 628562306a36Sopenharmony_ci case EXIT_REASON_DR_ACCESS: 628662306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); 628762306a36Sopenharmony_ci case EXIT_REASON_IO_INSTRUCTION: 628862306a36Sopenharmony_ci return nested_vmx_exit_handled_io(vcpu, vmcs12); 628962306a36Sopenharmony_ci case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR: 629062306a36Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC); 629162306a36Sopenharmony_ci case EXIT_REASON_MSR_READ: 629262306a36Sopenharmony_ci case EXIT_REASON_MSR_WRITE: 629362306a36Sopenharmony_ci return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); 629462306a36Sopenharmony_ci case EXIT_REASON_INVALID_STATE: 629562306a36Sopenharmony_ci return true; 629662306a36Sopenharmony_ci case EXIT_REASON_MWAIT_INSTRUCTION: 629762306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); 629862306a36Sopenharmony_ci case EXIT_REASON_MONITOR_TRAP_FLAG: 629962306a36Sopenharmony_ci return nested_vmx_exit_handled_mtf(vmcs12); 630062306a36Sopenharmony_ci case EXIT_REASON_MONITOR_INSTRUCTION: 630162306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); 630262306a36Sopenharmony_ci case EXIT_REASON_PAUSE_INSTRUCTION: 630362306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) || 630462306a36Sopenharmony_ci nested_cpu_has2(vmcs12, 630562306a36Sopenharmony_ci SECONDARY_EXEC_PAUSE_LOOP_EXITING); 630662306a36Sopenharmony_ci case EXIT_REASON_MCE_DURING_VMENTRY: 630762306a36Sopenharmony_ci return true; 630862306a36Sopenharmony_ci case EXIT_REASON_TPR_BELOW_THRESHOLD: 630962306a36Sopenharmony_ci return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); 631062306a36Sopenharmony_ci case EXIT_REASON_APIC_ACCESS: 631162306a36Sopenharmony_ci case EXIT_REASON_APIC_WRITE: 631262306a36Sopenharmony_ci case EXIT_REASON_EOI_INDUCED: 631362306a36Sopenharmony_ci /* 631462306a36Sopenharmony_ci * The controls for "virtualize APIC accesses," "APIC- 631562306a36Sopenharmony_ci * register virtualization," and "virtual-interrupt 631662306a36Sopenharmony_ci * delivery" only come from vmcs12. 631762306a36Sopenharmony_ci */ 631862306a36Sopenharmony_ci return true; 631962306a36Sopenharmony_ci case EXIT_REASON_INVPCID: 632062306a36Sopenharmony_ci return 632162306a36Sopenharmony_ci nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && 632262306a36Sopenharmony_ci nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); 632362306a36Sopenharmony_ci case EXIT_REASON_WBINVD: 632462306a36Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); 632562306a36Sopenharmony_ci case EXIT_REASON_XSETBV: 632662306a36Sopenharmony_ci return true; 632762306a36Sopenharmony_ci case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: 632862306a36Sopenharmony_ci /* 632962306a36Sopenharmony_ci * This should never happen, since it is not possible to 633062306a36Sopenharmony_ci * set XSS to a non-zero value---neither in L1 nor in L2. 633162306a36Sopenharmony_ci * If if it were, XSS would have to be checked against 633262306a36Sopenharmony_ci * the XSS exit bitmap in vmcs12. 633362306a36Sopenharmony_ci */ 633462306a36Sopenharmony_ci return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES); 633562306a36Sopenharmony_ci case EXIT_REASON_UMWAIT: 633662306a36Sopenharmony_ci case EXIT_REASON_TPAUSE: 633762306a36Sopenharmony_ci return nested_cpu_has2(vmcs12, 633862306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); 633962306a36Sopenharmony_ci case EXIT_REASON_ENCLS: 634062306a36Sopenharmony_ci return nested_vmx_exit_handled_encls(vcpu, vmcs12); 634162306a36Sopenharmony_ci case EXIT_REASON_NOTIFY: 634262306a36Sopenharmony_ci /* Notify VM exit is not exposed to L1 */ 634362306a36Sopenharmony_ci return false; 634462306a36Sopenharmony_ci default: 634562306a36Sopenharmony_ci return true; 634662306a36Sopenharmony_ci } 634762306a36Sopenharmony_ci} 634862306a36Sopenharmony_ci 634962306a36Sopenharmony_ci/* 635062306a36Sopenharmony_ci * Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was 635162306a36Sopenharmony_ci * reflected into L1. 635262306a36Sopenharmony_ci */ 635362306a36Sopenharmony_cibool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) 635462306a36Sopenharmony_ci{ 635562306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 635662306a36Sopenharmony_ci union vmx_exit_reason exit_reason = vmx->exit_reason; 635762306a36Sopenharmony_ci unsigned long exit_qual; 635862306a36Sopenharmony_ci u32 exit_intr_info; 635962306a36Sopenharmony_ci 636062306a36Sopenharmony_ci WARN_ON_ONCE(vmx->nested.nested_run_pending); 636162306a36Sopenharmony_ci 636262306a36Sopenharmony_ci /* 636362306a36Sopenharmony_ci * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM 636462306a36Sopenharmony_ci * has already loaded L2's state. 636562306a36Sopenharmony_ci */ 636662306a36Sopenharmony_ci if (unlikely(vmx->fail)) { 636762306a36Sopenharmony_ci trace_kvm_nested_vmenter_failed( 636862306a36Sopenharmony_ci "hardware VM-instruction error: ", 636962306a36Sopenharmony_ci vmcs_read32(VM_INSTRUCTION_ERROR)); 637062306a36Sopenharmony_ci exit_intr_info = 0; 637162306a36Sopenharmony_ci exit_qual = 0; 637262306a36Sopenharmony_ci goto reflect_vmexit; 637362306a36Sopenharmony_ci } 637462306a36Sopenharmony_ci 637562306a36Sopenharmony_ci trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX); 637662306a36Sopenharmony_ci 637762306a36Sopenharmony_ci /* If L0 (KVM) wants the exit, it trumps L1's desires. */ 637862306a36Sopenharmony_ci if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) 637962306a36Sopenharmony_ci return false; 638062306a36Sopenharmony_ci 638162306a36Sopenharmony_ci /* If L1 doesn't want the exit, handle it in L0. */ 638262306a36Sopenharmony_ci if (!nested_vmx_l1_wants_exit(vcpu, exit_reason)) 638362306a36Sopenharmony_ci return false; 638462306a36Sopenharmony_ci 638562306a36Sopenharmony_ci /* 638662306a36Sopenharmony_ci * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For 638762306a36Sopenharmony_ci * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would 638862306a36Sopenharmony_ci * need to be synthesized by querying the in-kernel LAPIC, but external 638962306a36Sopenharmony_ci * interrupts are never reflected to L1 so it's a non-issue. 639062306a36Sopenharmony_ci */ 639162306a36Sopenharmony_ci exit_intr_info = vmx_get_intr_info(vcpu); 639262306a36Sopenharmony_ci if (is_exception_with_error_code(exit_intr_info)) { 639362306a36Sopenharmony_ci struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 639462306a36Sopenharmony_ci 639562306a36Sopenharmony_ci vmcs12->vm_exit_intr_error_code = 639662306a36Sopenharmony_ci vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 639762306a36Sopenharmony_ci } 639862306a36Sopenharmony_ci exit_qual = vmx_get_exit_qual(vcpu); 639962306a36Sopenharmony_ci 640062306a36Sopenharmony_cireflect_vmexit: 640162306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual); 640262306a36Sopenharmony_ci return true; 640362306a36Sopenharmony_ci} 640462306a36Sopenharmony_ci 640562306a36Sopenharmony_cistatic int vmx_get_nested_state(struct kvm_vcpu *vcpu, 640662306a36Sopenharmony_ci struct kvm_nested_state __user *user_kvm_nested_state, 640762306a36Sopenharmony_ci u32 user_data_size) 640862306a36Sopenharmony_ci{ 640962306a36Sopenharmony_ci struct vcpu_vmx *vmx; 641062306a36Sopenharmony_ci struct vmcs12 *vmcs12; 641162306a36Sopenharmony_ci struct kvm_nested_state kvm_state = { 641262306a36Sopenharmony_ci .flags = 0, 641362306a36Sopenharmony_ci .format = KVM_STATE_NESTED_FORMAT_VMX, 641462306a36Sopenharmony_ci .size = sizeof(kvm_state), 641562306a36Sopenharmony_ci .hdr.vmx.flags = 0, 641662306a36Sopenharmony_ci .hdr.vmx.vmxon_pa = INVALID_GPA, 641762306a36Sopenharmony_ci .hdr.vmx.vmcs12_pa = INVALID_GPA, 641862306a36Sopenharmony_ci .hdr.vmx.preemption_timer_deadline = 0, 641962306a36Sopenharmony_ci }; 642062306a36Sopenharmony_ci struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = 642162306a36Sopenharmony_ci &user_kvm_nested_state->data.vmx[0]; 642262306a36Sopenharmony_ci 642362306a36Sopenharmony_ci if (!vcpu) 642462306a36Sopenharmony_ci return kvm_state.size + sizeof(*user_vmx_nested_state); 642562306a36Sopenharmony_ci 642662306a36Sopenharmony_ci vmx = to_vmx(vcpu); 642762306a36Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 642862306a36Sopenharmony_ci 642962306a36Sopenharmony_ci if (guest_can_use(vcpu, X86_FEATURE_VMX) && 643062306a36Sopenharmony_ci (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { 643162306a36Sopenharmony_ci kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; 643262306a36Sopenharmony_ci kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr; 643362306a36Sopenharmony_ci 643462306a36Sopenharmony_ci if (vmx_has_valid_vmcs12(vcpu)) { 643562306a36Sopenharmony_ci kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); 643662306a36Sopenharmony_ci 643762306a36Sopenharmony_ci /* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */ 643862306a36Sopenharmony_ci if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID) 643962306a36Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_EVMCS; 644062306a36Sopenharmony_ci 644162306a36Sopenharmony_ci if (is_guest_mode(vcpu) && 644262306a36Sopenharmony_ci nested_cpu_has_shadow_vmcs(vmcs12) && 644362306a36Sopenharmony_ci vmcs12->vmcs_link_pointer != INVALID_GPA) 644462306a36Sopenharmony_ci kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12); 644562306a36Sopenharmony_ci } 644662306a36Sopenharmony_ci 644762306a36Sopenharmony_ci if (vmx->nested.smm.vmxon) 644862306a36Sopenharmony_ci kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; 644962306a36Sopenharmony_ci 645062306a36Sopenharmony_ci if (vmx->nested.smm.guest_mode) 645162306a36Sopenharmony_ci kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; 645262306a36Sopenharmony_ci 645362306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 645462306a36Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; 645562306a36Sopenharmony_ci 645662306a36Sopenharmony_ci if (vmx->nested.nested_run_pending) 645762306a36Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; 645862306a36Sopenharmony_ci 645962306a36Sopenharmony_ci if (vmx->nested.mtf_pending) 646062306a36Sopenharmony_ci kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING; 646162306a36Sopenharmony_ci 646262306a36Sopenharmony_ci if (nested_cpu_has_preemption_timer(vmcs12) && 646362306a36Sopenharmony_ci vmx->nested.has_preemption_timer_deadline) { 646462306a36Sopenharmony_ci kvm_state.hdr.vmx.flags |= 646562306a36Sopenharmony_ci KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE; 646662306a36Sopenharmony_ci kvm_state.hdr.vmx.preemption_timer_deadline = 646762306a36Sopenharmony_ci vmx->nested.preemption_timer_deadline; 646862306a36Sopenharmony_ci } 646962306a36Sopenharmony_ci } 647062306a36Sopenharmony_ci } 647162306a36Sopenharmony_ci 647262306a36Sopenharmony_ci if (user_data_size < kvm_state.size) 647362306a36Sopenharmony_ci goto out; 647462306a36Sopenharmony_ci 647562306a36Sopenharmony_ci if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) 647662306a36Sopenharmony_ci return -EFAULT; 647762306a36Sopenharmony_ci 647862306a36Sopenharmony_ci if (!vmx_has_valid_vmcs12(vcpu)) 647962306a36Sopenharmony_ci goto out; 648062306a36Sopenharmony_ci 648162306a36Sopenharmony_ci /* 648262306a36Sopenharmony_ci * When running L2, the authoritative vmcs12 state is in the 648362306a36Sopenharmony_ci * vmcs02. When running L1, the authoritative vmcs12 state is 648462306a36Sopenharmony_ci * in the shadow or enlightened vmcs linked to vmcs01, unless 648562306a36Sopenharmony_ci * need_vmcs12_to_shadow_sync is set, in which case, the authoritative 648662306a36Sopenharmony_ci * vmcs12 state is in the vmcs12 already. 648762306a36Sopenharmony_ci */ 648862306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 648962306a36Sopenharmony_ci sync_vmcs02_to_vmcs12(vcpu, vmcs12); 649062306a36Sopenharmony_ci sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); 649162306a36Sopenharmony_ci } else { 649262306a36Sopenharmony_ci copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); 649362306a36Sopenharmony_ci if (!vmx->nested.need_vmcs12_to_shadow_sync) { 649462306a36Sopenharmony_ci if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) 649562306a36Sopenharmony_ci /* 649662306a36Sopenharmony_ci * L1 hypervisor is not obliged to keep eVMCS 649762306a36Sopenharmony_ci * clean fields data always up-to-date while 649862306a36Sopenharmony_ci * not in guest mode, 'hv_clean_fields' is only 649962306a36Sopenharmony_ci * supposed to be actual upon vmentry so we need 650062306a36Sopenharmony_ci * to ignore it here and do full copy. 650162306a36Sopenharmony_ci */ 650262306a36Sopenharmony_ci copy_enlightened_to_vmcs12(vmx, 0); 650362306a36Sopenharmony_ci else if (enable_shadow_vmcs) 650462306a36Sopenharmony_ci copy_shadow_to_vmcs12(vmx); 650562306a36Sopenharmony_ci } 650662306a36Sopenharmony_ci } 650762306a36Sopenharmony_ci 650862306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); 650962306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE); 651062306a36Sopenharmony_ci 651162306a36Sopenharmony_ci /* 651262306a36Sopenharmony_ci * Copy over the full allocated size of vmcs12 rather than just the size 651362306a36Sopenharmony_ci * of the struct. 651462306a36Sopenharmony_ci */ 651562306a36Sopenharmony_ci if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE)) 651662306a36Sopenharmony_ci return -EFAULT; 651762306a36Sopenharmony_ci 651862306a36Sopenharmony_ci if (nested_cpu_has_shadow_vmcs(vmcs12) && 651962306a36Sopenharmony_ci vmcs12->vmcs_link_pointer != INVALID_GPA) { 652062306a36Sopenharmony_ci if (copy_to_user(user_vmx_nested_state->shadow_vmcs12, 652162306a36Sopenharmony_ci get_shadow_vmcs12(vcpu), VMCS12_SIZE)) 652262306a36Sopenharmony_ci return -EFAULT; 652362306a36Sopenharmony_ci } 652462306a36Sopenharmony_ciout: 652562306a36Sopenharmony_ci return kvm_state.size; 652662306a36Sopenharmony_ci} 652762306a36Sopenharmony_ci 652862306a36Sopenharmony_civoid vmx_leave_nested(struct kvm_vcpu *vcpu) 652962306a36Sopenharmony_ci{ 653062306a36Sopenharmony_ci if (is_guest_mode(vcpu)) { 653162306a36Sopenharmony_ci to_vmx(vcpu)->nested.nested_run_pending = 0; 653262306a36Sopenharmony_ci nested_vmx_vmexit(vcpu, -1, 0, 0); 653362306a36Sopenharmony_ci } 653462306a36Sopenharmony_ci free_nested(vcpu); 653562306a36Sopenharmony_ci} 653662306a36Sopenharmony_ci 653762306a36Sopenharmony_cistatic int vmx_set_nested_state(struct kvm_vcpu *vcpu, 653862306a36Sopenharmony_ci struct kvm_nested_state __user *user_kvm_nested_state, 653962306a36Sopenharmony_ci struct kvm_nested_state *kvm_state) 654062306a36Sopenharmony_ci{ 654162306a36Sopenharmony_ci struct vcpu_vmx *vmx = to_vmx(vcpu); 654262306a36Sopenharmony_ci struct vmcs12 *vmcs12; 654362306a36Sopenharmony_ci enum vm_entry_failure_code ignored; 654462306a36Sopenharmony_ci struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = 654562306a36Sopenharmony_ci &user_kvm_nested_state->data.vmx[0]; 654662306a36Sopenharmony_ci int ret; 654762306a36Sopenharmony_ci 654862306a36Sopenharmony_ci if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX) 654962306a36Sopenharmony_ci return -EINVAL; 655062306a36Sopenharmony_ci 655162306a36Sopenharmony_ci if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) { 655262306a36Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags) 655362306a36Sopenharmony_ci return -EINVAL; 655462306a36Sopenharmony_ci 655562306a36Sopenharmony_ci if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) 655662306a36Sopenharmony_ci return -EINVAL; 655762306a36Sopenharmony_ci 655862306a36Sopenharmony_ci /* 655962306a36Sopenharmony_ci * KVM_STATE_NESTED_EVMCS used to signal that KVM should 656062306a36Sopenharmony_ci * enable eVMCS capability on vCPU. However, since then 656162306a36Sopenharmony_ci * code was changed such that flag signals vmcs12 should 656262306a36Sopenharmony_ci * be copied into eVMCS in guest memory. 656362306a36Sopenharmony_ci * 656462306a36Sopenharmony_ci * To preserve backwards compatability, allow user 656562306a36Sopenharmony_ci * to set this flag even when there is no VMXON region. 656662306a36Sopenharmony_ci */ 656762306a36Sopenharmony_ci if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) 656862306a36Sopenharmony_ci return -EINVAL; 656962306a36Sopenharmony_ci } else { 657062306a36Sopenharmony_ci if (!guest_can_use(vcpu, X86_FEATURE_VMX)) 657162306a36Sopenharmony_ci return -EINVAL; 657262306a36Sopenharmony_ci 657362306a36Sopenharmony_ci if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) 657462306a36Sopenharmony_ci return -EINVAL; 657562306a36Sopenharmony_ci } 657662306a36Sopenharmony_ci 657762306a36Sopenharmony_ci if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && 657862306a36Sopenharmony_ci (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) 657962306a36Sopenharmony_ci return -EINVAL; 658062306a36Sopenharmony_ci 658162306a36Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags & 658262306a36Sopenharmony_ci ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) 658362306a36Sopenharmony_ci return -EINVAL; 658462306a36Sopenharmony_ci 658562306a36Sopenharmony_ci if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) 658662306a36Sopenharmony_ci return -EINVAL; 658762306a36Sopenharmony_ci 658862306a36Sopenharmony_ci /* 658962306a36Sopenharmony_ci * SMM temporarily disables VMX, so we cannot be in guest mode, 659062306a36Sopenharmony_ci * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags 659162306a36Sopenharmony_ci * must be zero. 659262306a36Sopenharmony_ci */ 659362306a36Sopenharmony_ci if (is_smm(vcpu) ? 659462306a36Sopenharmony_ci (kvm_state->flags & 659562306a36Sopenharmony_ci (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING)) 659662306a36Sopenharmony_ci : kvm_state->hdr.vmx.smm.flags) 659762306a36Sopenharmony_ci return -EINVAL; 659862306a36Sopenharmony_ci 659962306a36Sopenharmony_ci if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && 660062306a36Sopenharmony_ci !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) 660162306a36Sopenharmony_ci return -EINVAL; 660262306a36Sopenharmony_ci 660362306a36Sopenharmony_ci if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && 660462306a36Sopenharmony_ci (!guest_can_use(vcpu, X86_FEATURE_VMX) || 660562306a36Sopenharmony_ci !vmx->nested.enlightened_vmcs_enabled)) 660662306a36Sopenharmony_ci return -EINVAL; 660762306a36Sopenharmony_ci 660862306a36Sopenharmony_ci vmx_leave_nested(vcpu); 660962306a36Sopenharmony_ci 661062306a36Sopenharmony_ci if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) 661162306a36Sopenharmony_ci return 0; 661262306a36Sopenharmony_ci 661362306a36Sopenharmony_ci vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa; 661462306a36Sopenharmony_ci ret = enter_vmx_operation(vcpu); 661562306a36Sopenharmony_ci if (ret) 661662306a36Sopenharmony_ci return ret; 661762306a36Sopenharmony_ci 661862306a36Sopenharmony_ci /* Empty 'VMXON' state is permitted if no VMCS loaded */ 661962306a36Sopenharmony_ci if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) { 662062306a36Sopenharmony_ci /* See vmx_has_valid_vmcs12. */ 662162306a36Sopenharmony_ci if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) || 662262306a36Sopenharmony_ci (kvm_state->flags & KVM_STATE_NESTED_EVMCS) || 662362306a36Sopenharmony_ci (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA)) 662462306a36Sopenharmony_ci return -EINVAL; 662562306a36Sopenharmony_ci else 662662306a36Sopenharmony_ci return 0; 662762306a36Sopenharmony_ci } 662862306a36Sopenharmony_ci 662962306a36Sopenharmony_ci if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) { 663062306a36Sopenharmony_ci if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa || 663162306a36Sopenharmony_ci !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa)) 663262306a36Sopenharmony_ci return -EINVAL; 663362306a36Sopenharmony_ci 663462306a36Sopenharmony_ci set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa); 663562306a36Sopenharmony_ci } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { 663662306a36Sopenharmony_ci /* 663762306a36Sopenharmony_ci * nested_vmx_handle_enlightened_vmptrld() cannot be called 663862306a36Sopenharmony_ci * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be 663962306a36Sopenharmony_ci * restored yet. EVMCS will be mapped from 664062306a36Sopenharmony_ci * nested_get_vmcs12_pages(). 664162306a36Sopenharmony_ci */ 664262306a36Sopenharmony_ci vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING; 664362306a36Sopenharmony_ci kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 664462306a36Sopenharmony_ci } else { 664562306a36Sopenharmony_ci return -EINVAL; 664662306a36Sopenharmony_ci } 664762306a36Sopenharmony_ci 664862306a36Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { 664962306a36Sopenharmony_ci vmx->nested.smm.vmxon = true; 665062306a36Sopenharmony_ci vmx->nested.vmxon = false; 665162306a36Sopenharmony_ci 665262306a36Sopenharmony_ci if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) 665362306a36Sopenharmony_ci vmx->nested.smm.guest_mode = true; 665462306a36Sopenharmony_ci } 665562306a36Sopenharmony_ci 665662306a36Sopenharmony_ci vmcs12 = get_vmcs12(vcpu); 665762306a36Sopenharmony_ci if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12))) 665862306a36Sopenharmony_ci return -EFAULT; 665962306a36Sopenharmony_ci 666062306a36Sopenharmony_ci if (vmcs12->hdr.revision_id != VMCS12_REVISION) 666162306a36Sopenharmony_ci return -EINVAL; 666262306a36Sopenharmony_ci 666362306a36Sopenharmony_ci if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) 666462306a36Sopenharmony_ci return 0; 666562306a36Sopenharmony_ci 666662306a36Sopenharmony_ci vmx->nested.nested_run_pending = 666762306a36Sopenharmony_ci !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); 666862306a36Sopenharmony_ci 666962306a36Sopenharmony_ci vmx->nested.mtf_pending = 667062306a36Sopenharmony_ci !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING); 667162306a36Sopenharmony_ci 667262306a36Sopenharmony_ci ret = -EINVAL; 667362306a36Sopenharmony_ci if (nested_cpu_has_shadow_vmcs(vmcs12) && 667462306a36Sopenharmony_ci vmcs12->vmcs_link_pointer != INVALID_GPA) { 667562306a36Sopenharmony_ci struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); 667662306a36Sopenharmony_ci 667762306a36Sopenharmony_ci if (kvm_state->size < 667862306a36Sopenharmony_ci sizeof(*kvm_state) + 667962306a36Sopenharmony_ci sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12)) 668062306a36Sopenharmony_ci goto error_guest_mode; 668162306a36Sopenharmony_ci 668262306a36Sopenharmony_ci if (copy_from_user(shadow_vmcs12, 668362306a36Sopenharmony_ci user_vmx_nested_state->shadow_vmcs12, 668462306a36Sopenharmony_ci sizeof(*shadow_vmcs12))) { 668562306a36Sopenharmony_ci ret = -EFAULT; 668662306a36Sopenharmony_ci goto error_guest_mode; 668762306a36Sopenharmony_ci } 668862306a36Sopenharmony_ci 668962306a36Sopenharmony_ci if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || 669062306a36Sopenharmony_ci !shadow_vmcs12->hdr.shadow_vmcs) 669162306a36Sopenharmony_ci goto error_guest_mode; 669262306a36Sopenharmony_ci } 669362306a36Sopenharmony_ci 669462306a36Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = false; 669562306a36Sopenharmony_ci if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) { 669662306a36Sopenharmony_ci vmx->nested.has_preemption_timer_deadline = true; 669762306a36Sopenharmony_ci vmx->nested.preemption_timer_deadline = 669862306a36Sopenharmony_ci kvm_state->hdr.vmx.preemption_timer_deadline; 669962306a36Sopenharmony_ci } 670062306a36Sopenharmony_ci 670162306a36Sopenharmony_ci if (nested_vmx_check_controls(vcpu, vmcs12) || 670262306a36Sopenharmony_ci nested_vmx_check_host_state(vcpu, vmcs12) || 670362306a36Sopenharmony_ci nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) 670462306a36Sopenharmony_ci goto error_guest_mode; 670562306a36Sopenharmony_ci 670662306a36Sopenharmony_ci vmx->nested.dirty_vmcs12 = true; 670762306a36Sopenharmony_ci vmx->nested.force_msr_bitmap_recalc = true; 670862306a36Sopenharmony_ci ret = nested_vmx_enter_non_root_mode(vcpu, false); 670962306a36Sopenharmony_ci if (ret) 671062306a36Sopenharmony_ci goto error_guest_mode; 671162306a36Sopenharmony_ci 671262306a36Sopenharmony_ci if (vmx->nested.mtf_pending) 671362306a36Sopenharmony_ci kvm_make_request(KVM_REQ_EVENT, vcpu); 671462306a36Sopenharmony_ci 671562306a36Sopenharmony_ci return 0; 671662306a36Sopenharmony_ci 671762306a36Sopenharmony_cierror_guest_mode: 671862306a36Sopenharmony_ci vmx->nested.nested_run_pending = 0; 671962306a36Sopenharmony_ci return ret; 672062306a36Sopenharmony_ci} 672162306a36Sopenharmony_ci 672262306a36Sopenharmony_civoid nested_vmx_set_vmcs_shadowing_bitmap(void) 672362306a36Sopenharmony_ci{ 672462306a36Sopenharmony_ci if (enable_shadow_vmcs) { 672562306a36Sopenharmony_ci vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); 672662306a36Sopenharmony_ci vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); 672762306a36Sopenharmony_ci } 672862306a36Sopenharmony_ci} 672962306a36Sopenharmony_ci 673062306a36Sopenharmony_ci/* 673162306a36Sopenharmony_ci * Indexing into the vmcs12 uses the VMCS encoding rotated left by 6. Undo 673262306a36Sopenharmony_ci * that madness to get the encoding for comparison. 673362306a36Sopenharmony_ci */ 673462306a36Sopenharmony_ci#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10))) 673562306a36Sopenharmony_ci 673662306a36Sopenharmony_cistatic u64 nested_vmx_calc_vmcs_enum_msr(void) 673762306a36Sopenharmony_ci{ 673862306a36Sopenharmony_ci /* 673962306a36Sopenharmony_ci * Note these are the so called "index" of the VMCS field encoding, not 674062306a36Sopenharmony_ci * the index into vmcs12. 674162306a36Sopenharmony_ci */ 674262306a36Sopenharmony_ci unsigned int max_idx, idx; 674362306a36Sopenharmony_ci int i; 674462306a36Sopenharmony_ci 674562306a36Sopenharmony_ci /* 674662306a36Sopenharmony_ci * For better or worse, KVM allows VMREAD/VMWRITE to all fields in 674762306a36Sopenharmony_ci * vmcs12, regardless of whether or not the associated feature is 674862306a36Sopenharmony_ci * exposed to L1. Simply find the field with the highest index. 674962306a36Sopenharmony_ci */ 675062306a36Sopenharmony_ci max_idx = 0; 675162306a36Sopenharmony_ci for (i = 0; i < nr_vmcs12_fields; i++) { 675262306a36Sopenharmony_ci /* The vmcs12 table is very, very sparsely populated. */ 675362306a36Sopenharmony_ci if (!vmcs12_field_offsets[i]) 675462306a36Sopenharmony_ci continue; 675562306a36Sopenharmony_ci 675662306a36Sopenharmony_ci idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i)); 675762306a36Sopenharmony_ci if (idx > max_idx) 675862306a36Sopenharmony_ci max_idx = idx; 675962306a36Sopenharmony_ci } 676062306a36Sopenharmony_ci 676162306a36Sopenharmony_ci return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT; 676262306a36Sopenharmony_ci} 676362306a36Sopenharmony_ci 676462306a36Sopenharmony_cistatic void nested_vmx_setup_pinbased_ctls(struct vmcs_config *vmcs_conf, 676562306a36Sopenharmony_ci struct nested_vmx_msrs *msrs) 676662306a36Sopenharmony_ci{ 676762306a36Sopenharmony_ci msrs->pinbased_ctls_low = 676862306a36Sopenharmony_ci PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 676962306a36Sopenharmony_ci 677062306a36Sopenharmony_ci msrs->pinbased_ctls_high = vmcs_conf->pin_based_exec_ctrl; 677162306a36Sopenharmony_ci msrs->pinbased_ctls_high &= 677262306a36Sopenharmony_ci PIN_BASED_EXT_INTR_MASK | 677362306a36Sopenharmony_ci PIN_BASED_NMI_EXITING | 677462306a36Sopenharmony_ci PIN_BASED_VIRTUAL_NMIS | 677562306a36Sopenharmony_ci (enable_apicv ? PIN_BASED_POSTED_INTR : 0); 677662306a36Sopenharmony_ci msrs->pinbased_ctls_high |= 677762306a36Sopenharmony_ci PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 677862306a36Sopenharmony_ci PIN_BASED_VMX_PREEMPTION_TIMER; 677962306a36Sopenharmony_ci} 678062306a36Sopenharmony_ci 678162306a36Sopenharmony_cistatic void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, 678262306a36Sopenharmony_ci struct nested_vmx_msrs *msrs) 678362306a36Sopenharmony_ci{ 678462306a36Sopenharmony_ci msrs->exit_ctls_low = 678562306a36Sopenharmony_ci VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 678662306a36Sopenharmony_ci 678762306a36Sopenharmony_ci msrs->exit_ctls_high = vmcs_conf->vmexit_ctrl; 678862306a36Sopenharmony_ci msrs->exit_ctls_high &= 678962306a36Sopenharmony_ci#ifdef CONFIG_X86_64 679062306a36Sopenharmony_ci VM_EXIT_HOST_ADDR_SPACE_SIZE | 679162306a36Sopenharmony_ci#endif 679262306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | 679362306a36Sopenharmony_ci VM_EXIT_CLEAR_BNDCFGS; 679462306a36Sopenharmony_ci msrs->exit_ctls_high |= 679562306a36Sopenharmony_ci VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 679662306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | 679762306a36Sopenharmony_ci VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT | 679862306a36Sopenharmony_ci VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 679962306a36Sopenharmony_ci 680062306a36Sopenharmony_ci /* We support free control of debug control saving. */ 680162306a36Sopenharmony_ci msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; 680262306a36Sopenharmony_ci} 680362306a36Sopenharmony_ci 680462306a36Sopenharmony_cistatic void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, 680562306a36Sopenharmony_ci struct nested_vmx_msrs *msrs) 680662306a36Sopenharmony_ci{ 680762306a36Sopenharmony_ci msrs->entry_ctls_low = 680862306a36Sopenharmony_ci VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; 680962306a36Sopenharmony_ci 681062306a36Sopenharmony_ci msrs->entry_ctls_high = vmcs_conf->vmentry_ctrl; 681162306a36Sopenharmony_ci msrs->entry_ctls_high &= 681262306a36Sopenharmony_ci#ifdef CONFIG_X86_64 681362306a36Sopenharmony_ci VM_ENTRY_IA32E_MODE | 681462306a36Sopenharmony_ci#endif 681562306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; 681662306a36Sopenharmony_ci msrs->entry_ctls_high |= 681762306a36Sopenharmony_ci (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | 681862306a36Sopenharmony_ci VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); 681962306a36Sopenharmony_ci 682062306a36Sopenharmony_ci /* We support free control of debug control loading. */ 682162306a36Sopenharmony_ci msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; 682262306a36Sopenharmony_ci} 682362306a36Sopenharmony_ci 682462306a36Sopenharmony_cistatic void nested_vmx_setup_cpubased_ctls(struct vmcs_config *vmcs_conf, 682562306a36Sopenharmony_ci struct nested_vmx_msrs *msrs) 682662306a36Sopenharmony_ci{ 682762306a36Sopenharmony_ci msrs->procbased_ctls_low = 682862306a36Sopenharmony_ci CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 682962306a36Sopenharmony_ci 683062306a36Sopenharmony_ci msrs->procbased_ctls_high = vmcs_conf->cpu_based_exec_ctrl; 683162306a36Sopenharmony_ci msrs->procbased_ctls_high &= 683262306a36Sopenharmony_ci CPU_BASED_INTR_WINDOW_EXITING | 683362306a36Sopenharmony_ci CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING | 683462306a36Sopenharmony_ci CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 683562306a36Sopenharmony_ci CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 683662306a36Sopenharmony_ci CPU_BASED_CR3_STORE_EXITING | 683762306a36Sopenharmony_ci#ifdef CONFIG_X86_64 683862306a36Sopenharmony_ci CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | 683962306a36Sopenharmony_ci#endif 684062306a36Sopenharmony_ci CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | 684162306a36Sopenharmony_ci CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG | 684262306a36Sopenharmony_ci CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | 684362306a36Sopenharmony_ci CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING | 684462306a36Sopenharmony_ci CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 684562306a36Sopenharmony_ci /* 684662306a36Sopenharmony_ci * We can allow some features even when not supported by the 684762306a36Sopenharmony_ci * hardware. For example, L1 can specify an MSR bitmap - and we 684862306a36Sopenharmony_ci * can use it to avoid exits to L1 - even when L0 runs L2 684962306a36Sopenharmony_ci * without MSR bitmaps. 685062306a36Sopenharmony_ci */ 685162306a36Sopenharmony_ci msrs->procbased_ctls_high |= 685262306a36Sopenharmony_ci CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 685362306a36Sopenharmony_ci CPU_BASED_USE_MSR_BITMAPS; 685462306a36Sopenharmony_ci 685562306a36Sopenharmony_ci /* We support free control of CR3 access interception. */ 685662306a36Sopenharmony_ci msrs->procbased_ctls_low &= 685762306a36Sopenharmony_ci ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); 685862306a36Sopenharmony_ci} 685962306a36Sopenharmony_ci 686062306a36Sopenharmony_cistatic void nested_vmx_setup_secondary_ctls(u32 ept_caps, 686162306a36Sopenharmony_ci struct vmcs_config *vmcs_conf, 686262306a36Sopenharmony_ci struct nested_vmx_msrs *msrs) 686362306a36Sopenharmony_ci{ 686462306a36Sopenharmony_ci msrs->secondary_ctls_low = 0; 686562306a36Sopenharmony_ci 686662306a36Sopenharmony_ci msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl; 686762306a36Sopenharmony_ci msrs->secondary_ctls_high &= 686862306a36Sopenharmony_ci SECONDARY_EXEC_DESC | 686962306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_RDTSCP | 687062306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 687162306a36Sopenharmony_ci SECONDARY_EXEC_WBINVD_EXITING | 687262306a36Sopenharmony_ci SECONDARY_EXEC_APIC_REGISTER_VIRT | 687362306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 687462306a36Sopenharmony_ci SECONDARY_EXEC_RDRAND_EXITING | 687562306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_INVPCID | 687662306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_VMFUNC | 687762306a36Sopenharmony_ci SECONDARY_EXEC_RDSEED_EXITING | 687862306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_XSAVES | 687962306a36Sopenharmony_ci SECONDARY_EXEC_TSC_SCALING | 688062306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; 688162306a36Sopenharmony_ci 688262306a36Sopenharmony_ci /* 688362306a36Sopenharmony_ci * We can emulate "VMCS shadowing," even if the hardware 688462306a36Sopenharmony_ci * doesn't support it. 688562306a36Sopenharmony_ci */ 688662306a36Sopenharmony_ci msrs->secondary_ctls_high |= 688762306a36Sopenharmony_ci SECONDARY_EXEC_SHADOW_VMCS; 688862306a36Sopenharmony_ci 688962306a36Sopenharmony_ci if (enable_ept) { 689062306a36Sopenharmony_ci /* nested EPT: emulate EPT also to L1 */ 689162306a36Sopenharmony_ci msrs->secondary_ctls_high |= 689262306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_EPT; 689362306a36Sopenharmony_ci msrs->ept_caps = 689462306a36Sopenharmony_ci VMX_EPT_PAGE_WALK_4_BIT | 689562306a36Sopenharmony_ci VMX_EPT_PAGE_WALK_5_BIT | 689662306a36Sopenharmony_ci VMX_EPTP_WB_BIT | 689762306a36Sopenharmony_ci VMX_EPT_INVEPT_BIT | 689862306a36Sopenharmony_ci VMX_EPT_EXECUTE_ONLY_BIT; 689962306a36Sopenharmony_ci 690062306a36Sopenharmony_ci msrs->ept_caps &= ept_caps; 690162306a36Sopenharmony_ci msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | 690262306a36Sopenharmony_ci VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | 690362306a36Sopenharmony_ci VMX_EPT_1GB_PAGE_BIT; 690462306a36Sopenharmony_ci if (enable_ept_ad_bits) { 690562306a36Sopenharmony_ci msrs->secondary_ctls_high |= 690662306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_PML; 690762306a36Sopenharmony_ci msrs->ept_caps |= VMX_EPT_AD_BIT; 690862306a36Sopenharmony_ci } 690962306a36Sopenharmony_ci 691062306a36Sopenharmony_ci /* 691162306a36Sopenharmony_ci * Advertise EPTP switching irrespective of hardware support, 691262306a36Sopenharmony_ci * KVM emulates it in software so long as VMFUNC is supported. 691362306a36Sopenharmony_ci */ 691462306a36Sopenharmony_ci if (cpu_has_vmx_vmfunc()) 691562306a36Sopenharmony_ci msrs->vmfunc_controls = VMX_VMFUNC_EPTP_SWITCHING; 691662306a36Sopenharmony_ci } 691762306a36Sopenharmony_ci 691862306a36Sopenharmony_ci /* 691962306a36Sopenharmony_ci * Old versions of KVM use the single-context version without 692062306a36Sopenharmony_ci * checking for support, so declare that it is supported even 692162306a36Sopenharmony_ci * though it is treated as global context. The alternative is 692262306a36Sopenharmony_ci * not failing the single-context invvpid, and it is worse. 692362306a36Sopenharmony_ci */ 692462306a36Sopenharmony_ci if (enable_vpid) { 692562306a36Sopenharmony_ci msrs->secondary_ctls_high |= 692662306a36Sopenharmony_ci SECONDARY_EXEC_ENABLE_VPID; 692762306a36Sopenharmony_ci msrs->vpid_caps = VMX_VPID_INVVPID_BIT | 692862306a36Sopenharmony_ci VMX_VPID_EXTENT_SUPPORTED_MASK; 692962306a36Sopenharmony_ci } 693062306a36Sopenharmony_ci 693162306a36Sopenharmony_ci if (enable_unrestricted_guest) 693262306a36Sopenharmony_ci msrs->secondary_ctls_high |= 693362306a36Sopenharmony_ci SECONDARY_EXEC_UNRESTRICTED_GUEST; 693462306a36Sopenharmony_ci 693562306a36Sopenharmony_ci if (flexpriority_enabled) 693662306a36Sopenharmony_ci msrs->secondary_ctls_high |= 693762306a36Sopenharmony_ci SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 693862306a36Sopenharmony_ci 693962306a36Sopenharmony_ci if (enable_sgx) 694062306a36Sopenharmony_ci msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING; 694162306a36Sopenharmony_ci} 694262306a36Sopenharmony_ci 694362306a36Sopenharmony_cistatic void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf, 694462306a36Sopenharmony_ci struct nested_vmx_msrs *msrs) 694562306a36Sopenharmony_ci{ 694662306a36Sopenharmony_ci msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; 694762306a36Sopenharmony_ci msrs->misc_low |= 694862306a36Sopenharmony_ci MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | 694962306a36Sopenharmony_ci VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | 695062306a36Sopenharmony_ci VMX_MISC_ACTIVITY_HLT | 695162306a36Sopenharmony_ci VMX_MISC_ACTIVITY_WAIT_SIPI; 695262306a36Sopenharmony_ci msrs->misc_high = 0; 695362306a36Sopenharmony_ci} 695462306a36Sopenharmony_ci 695562306a36Sopenharmony_cistatic void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) 695662306a36Sopenharmony_ci{ 695762306a36Sopenharmony_ci /* 695862306a36Sopenharmony_ci * This MSR reports some information about VMX support. We 695962306a36Sopenharmony_ci * should return information about the VMX we emulate for the 696062306a36Sopenharmony_ci * guest, and the VMCS structure we give it - not about the 696162306a36Sopenharmony_ci * VMX support of the underlying hardware. 696262306a36Sopenharmony_ci */ 696362306a36Sopenharmony_ci msrs->basic = 696462306a36Sopenharmony_ci VMCS12_REVISION | 696562306a36Sopenharmony_ci VMX_BASIC_TRUE_CTLS | 696662306a36Sopenharmony_ci ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | 696762306a36Sopenharmony_ci (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); 696862306a36Sopenharmony_ci 696962306a36Sopenharmony_ci if (cpu_has_vmx_basic_inout()) 697062306a36Sopenharmony_ci msrs->basic |= VMX_BASIC_INOUT; 697162306a36Sopenharmony_ci} 697262306a36Sopenharmony_ci 697362306a36Sopenharmony_cistatic void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) 697462306a36Sopenharmony_ci{ 697562306a36Sopenharmony_ci /* 697662306a36Sopenharmony_ci * These MSRs specify bits which the guest must keep fixed on 697762306a36Sopenharmony_ci * while L1 is in VMXON mode (in L1's root mode, or running an L2). 697862306a36Sopenharmony_ci * We picked the standard core2 setting. 697962306a36Sopenharmony_ci */ 698062306a36Sopenharmony_ci#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) 698162306a36Sopenharmony_ci#define VMXON_CR4_ALWAYSON X86_CR4_VMXE 698262306a36Sopenharmony_ci msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON; 698362306a36Sopenharmony_ci msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON; 698462306a36Sopenharmony_ci 698562306a36Sopenharmony_ci /* These MSRs specify bits which the guest must keep fixed off. */ 698662306a36Sopenharmony_ci rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); 698762306a36Sopenharmony_ci rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); 698862306a36Sopenharmony_ci 698962306a36Sopenharmony_ci if (vmx_umip_emulated()) 699062306a36Sopenharmony_ci msrs->cr4_fixed1 |= X86_CR4_UMIP; 699162306a36Sopenharmony_ci} 699262306a36Sopenharmony_ci 699362306a36Sopenharmony_ci/* 699462306a36Sopenharmony_ci * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be 699562306a36Sopenharmony_ci * returned for the various VMX controls MSRs when nested VMX is enabled. 699662306a36Sopenharmony_ci * The same values should also be used to verify that vmcs12 control fields are 699762306a36Sopenharmony_ci * valid during nested entry from L1 to L2. 699862306a36Sopenharmony_ci * Each of these control msrs has a low and high 32-bit half: A low bit is on 699962306a36Sopenharmony_ci * if the corresponding bit in the (32-bit) control field *must* be on, and a 700062306a36Sopenharmony_ci * bit in the high half is on if the corresponding bit in the control field 700162306a36Sopenharmony_ci * may be on. See also vmx_control_verify(). 700262306a36Sopenharmony_ci */ 700362306a36Sopenharmony_civoid nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) 700462306a36Sopenharmony_ci{ 700562306a36Sopenharmony_ci struct nested_vmx_msrs *msrs = &vmcs_conf->nested; 700662306a36Sopenharmony_ci 700762306a36Sopenharmony_ci /* 700862306a36Sopenharmony_ci * Note that as a general rule, the high half of the MSRs (bits in 700962306a36Sopenharmony_ci * the control fields which may be 1) should be initialized by the 701062306a36Sopenharmony_ci * intersection of the underlying hardware's MSR (i.e., features which 701162306a36Sopenharmony_ci * can be supported) and the list of features we want to expose - 701262306a36Sopenharmony_ci * because they are known to be properly supported in our code. 701362306a36Sopenharmony_ci * Also, usually, the low half of the MSRs (bits which must be 1) can 701462306a36Sopenharmony_ci * be set to 0, meaning that L1 may turn off any of these bits. The 701562306a36Sopenharmony_ci * reason is that if one of these bits is necessary, it will appear 701662306a36Sopenharmony_ci * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control 701762306a36Sopenharmony_ci * fields of vmcs01 and vmcs02, will turn these bits off - and 701862306a36Sopenharmony_ci * nested_vmx_l1_wants_exit() will not pass related exits to L1. 701962306a36Sopenharmony_ci * These rules have exceptions below. 702062306a36Sopenharmony_ci */ 702162306a36Sopenharmony_ci nested_vmx_setup_pinbased_ctls(vmcs_conf, msrs); 702262306a36Sopenharmony_ci 702362306a36Sopenharmony_ci nested_vmx_setup_exit_ctls(vmcs_conf, msrs); 702462306a36Sopenharmony_ci 702562306a36Sopenharmony_ci nested_vmx_setup_entry_ctls(vmcs_conf, msrs); 702662306a36Sopenharmony_ci 702762306a36Sopenharmony_ci nested_vmx_setup_cpubased_ctls(vmcs_conf, msrs); 702862306a36Sopenharmony_ci 702962306a36Sopenharmony_ci nested_vmx_setup_secondary_ctls(ept_caps, vmcs_conf, msrs); 703062306a36Sopenharmony_ci 703162306a36Sopenharmony_ci nested_vmx_setup_misc_data(vmcs_conf, msrs); 703262306a36Sopenharmony_ci 703362306a36Sopenharmony_ci nested_vmx_setup_basic(msrs); 703462306a36Sopenharmony_ci 703562306a36Sopenharmony_ci nested_vmx_setup_cr_fixed(msrs); 703662306a36Sopenharmony_ci 703762306a36Sopenharmony_ci msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr(); 703862306a36Sopenharmony_ci} 703962306a36Sopenharmony_ci 704062306a36Sopenharmony_civoid nested_vmx_hardware_unsetup(void) 704162306a36Sopenharmony_ci{ 704262306a36Sopenharmony_ci int i; 704362306a36Sopenharmony_ci 704462306a36Sopenharmony_ci if (enable_shadow_vmcs) { 704562306a36Sopenharmony_ci for (i = 0; i < VMX_BITMAP_NR; i++) 704662306a36Sopenharmony_ci free_page((unsigned long)vmx_bitmap[i]); 704762306a36Sopenharmony_ci } 704862306a36Sopenharmony_ci} 704962306a36Sopenharmony_ci 705062306a36Sopenharmony_ci__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) 705162306a36Sopenharmony_ci{ 705262306a36Sopenharmony_ci int i; 705362306a36Sopenharmony_ci 705462306a36Sopenharmony_ci if (!cpu_has_vmx_shadow_vmcs()) 705562306a36Sopenharmony_ci enable_shadow_vmcs = 0; 705662306a36Sopenharmony_ci if (enable_shadow_vmcs) { 705762306a36Sopenharmony_ci for (i = 0; i < VMX_BITMAP_NR; i++) { 705862306a36Sopenharmony_ci /* 705962306a36Sopenharmony_ci * The vmx_bitmap is not tied to a VM and so should 706062306a36Sopenharmony_ci * not be charged to a memcg. 706162306a36Sopenharmony_ci */ 706262306a36Sopenharmony_ci vmx_bitmap[i] = (unsigned long *) 706362306a36Sopenharmony_ci __get_free_page(GFP_KERNEL); 706462306a36Sopenharmony_ci if (!vmx_bitmap[i]) { 706562306a36Sopenharmony_ci nested_vmx_hardware_unsetup(); 706662306a36Sopenharmony_ci return -ENOMEM; 706762306a36Sopenharmony_ci } 706862306a36Sopenharmony_ci } 706962306a36Sopenharmony_ci 707062306a36Sopenharmony_ci init_vmcs_shadow_fields(); 707162306a36Sopenharmony_ci } 707262306a36Sopenharmony_ci 707362306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear; 707462306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch; 707562306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld; 707662306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst; 707762306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMREAD] = handle_vmread; 707862306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume; 707962306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite; 708062306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMOFF] = handle_vmxoff; 708162306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMON] = handle_vmxon; 708262306a36Sopenharmony_ci exit_handlers[EXIT_REASON_INVEPT] = handle_invept; 708362306a36Sopenharmony_ci exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid; 708462306a36Sopenharmony_ci exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc; 708562306a36Sopenharmony_ci 708662306a36Sopenharmony_ci return 0; 708762306a36Sopenharmony_ci} 708862306a36Sopenharmony_ci 708962306a36Sopenharmony_cistruct kvm_x86_nested_ops vmx_nested_ops = { 709062306a36Sopenharmony_ci .leave_nested = vmx_leave_nested, 709162306a36Sopenharmony_ci .is_exception_vmexit = nested_vmx_is_exception_vmexit, 709262306a36Sopenharmony_ci .check_events = vmx_check_nested_events, 709362306a36Sopenharmony_ci .has_events = vmx_has_nested_events, 709462306a36Sopenharmony_ci .triple_fault = nested_vmx_triple_fault, 709562306a36Sopenharmony_ci .get_state = vmx_get_nested_state, 709662306a36Sopenharmony_ci .set_state = vmx_set_nested_state, 709762306a36Sopenharmony_ci .get_nested_state_pages = vmx_get_nested_state_pages, 709862306a36Sopenharmony_ci .write_log_dirty = nested_vmx_write_pml_buffer, 709962306a36Sopenharmony_ci .enable_evmcs = nested_enable_evmcs, 710062306a36Sopenharmony_ci .get_evmcs_version = nested_get_evmcs_version, 710162306a36Sopenharmony_ci .hv_inject_synthetic_vmexit_post_tlb_flush = vmx_hv_inject_synthetic_vmexit_post_tlb_flush, 710262306a36Sopenharmony_ci}; 7103