162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci#include <linux/objtool.h>
562306a36Sopenharmony_ci#include <linux/percpu.h>
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <asm/debugreg.h>
862306a36Sopenharmony_ci#include <asm/mmu_context.h>
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include "cpuid.h"
1162306a36Sopenharmony_ci#include "hyperv.h"
1262306a36Sopenharmony_ci#include "mmu.h"
1362306a36Sopenharmony_ci#include "nested.h"
1462306a36Sopenharmony_ci#include "pmu.h"
1562306a36Sopenharmony_ci#include "sgx.h"
1662306a36Sopenharmony_ci#include "trace.h"
1762306a36Sopenharmony_ci#include "vmx.h"
1862306a36Sopenharmony_ci#include "x86.h"
1962306a36Sopenharmony_ci#include "smm.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_cistatic bool __read_mostly enable_shadow_vmcs = 1;
2262306a36Sopenharmony_cimodule_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistatic bool __read_mostly nested_early_check = 0;
2562306a36Sopenharmony_cimodule_param(nested_early_check, bool, S_IRUGO);
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci/*
3062306a36Sopenharmony_ci * Hyper-V requires all of these, so mark them as supported even though
3162306a36Sopenharmony_ci * they are just treated the same as all-context.
3262306a36Sopenharmony_ci */
3362306a36Sopenharmony_ci#define VMX_VPID_EXTENT_SUPPORTED_MASK		\
3462306a36Sopenharmony_ci	(VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT |	\
3562306a36Sopenharmony_ci	VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |	\
3662306a36Sopenharmony_ci	VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT |	\
3762306a36Sopenharmony_ci	VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cienum {
4262306a36Sopenharmony_ci	VMX_VMREAD_BITMAP,
4362306a36Sopenharmony_ci	VMX_VMWRITE_BITMAP,
4462306a36Sopenharmony_ci	VMX_BITMAP_NR
4562306a36Sopenharmony_ci};
4662306a36Sopenharmony_cistatic unsigned long *vmx_bitmap[VMX_BITMAP_NR];
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci#define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
4962306a36Sopenharmony_ci#define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_cistruct shadow_vmcs_field {
5262306a36Sopenharmony_ci	u16	encoding;
5362306a36Sopenharmony_ci	u16	offset;
5462306a36Sopenharmony_ci};
5562306a36Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_only_fields[] = {
5662306a36Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
5762306a36Sopenharmony_ci#include "vmcs_shadow_fields.h"
5862306a36Sopenharmony_ci};
5962306a36Sopenharmony_cistatic int max_shadow_read_only_fields =
6062306a36Sopenharmony_ci	ARRAY_SIZE(shadow_read_only_fields);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_write_fields[] = {
6362306a36Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
6462306a36Sopenharmony_ci#include "vmcs_shadow_fields.h"
6562306a36Sopenharmony_ci};
6662306a36Sopenharmony_cistatic int max_shadow_read_write_fields =
6762306a36Sopenharmony_ci	ARRAY_SIZE(shadow_read_write_fields);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic void init_vmcs_shadow_fields(void)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	int i, j;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
7462306a36Sopenharmony_ci	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	for (i = j = 0; i < max_shadow_read_only_fields; i++) {
7762306a36Sopenharmony_ci		struct shadow_vmcs_field entry = shadow_read_only_fields[i];
7862306a36Sopenharmony_ci		u16 field = entry.encoding;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
8162306a36Sopenharmony_ci		    (i + 1 == max_shadow_read_only_fields ||
8262306a36Sopenharmony_ci		     shadow_read_only_fields[i + 1].encoding != field + 1))
8362306a36Sopenharmony_ci			pr_err("Missing field from shadow_read_only_field %x\n",
8462306a36Sopenharmony_ci			       field + 1);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci		clear_bit(field, vmx_vmread_bitmap);
8762306a36Sopenharmony_ci		if (field & 1)
8862306a36Sopenharmony_ci#ifdef CONFIG_X86_64
8962306a36Sopenharmony_ci			continue;
9062306a36Sopenharmony_ci#else
9162306a36Sopenharmony_ci			entry.offset += sizeof(u32);
9262306a36Sopenharmony_ci#endif
9362306a36Sopenharmony_ci		shadow_read_only_fields[j++] = entry;
9462306a36Sopenharmony_ci	}
9562306a36Sopenharmony_ci	max_shadow_read_only_fields = j;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	for (i = j = 0; i < max_shadow_read_write_fields; i++) {
9862306a36Sopenharmony_ci		struct shadow_vmcs_field entry = shadow_read_write_fields[i];
9962306a36Sopenharmony_ci		u16 field = entry.encoding;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
10262306a36Sopenharmony_ci		    (i + 1 == max_shadow_read_write_fields ||
10362306a36Sopenharmony_ci		     shadow_read_write_fields[i + 1].encoding != field + 1))
10462306a36Sopenharmony_ci			pr_err("Missing field from shadow_read_write_field %x\n",
10562306a36Sopenharmony_ci			       field + 1);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci		WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
10862306a36Sopenharmony_ci			  field <= GUEST_TR_AR_BYTES,
10962306a36Sopenharmony_ci			  "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci		/*
11262306a36Sopenharmony_ci		 * PML and the preemption timer can be emulated, but the
11362306a36Sopenharmony_ci		 * processor cannot vmwrite to fields that don't exist
11462306a36Sopenharmony_ci		 * on bare metal.
11562306a36Sopenharmony_ci		 */
11662306a36Sopenharmony_ci		switch (field) {
11762306a36Sopenharmony_ci		case GUEST_PML_INDEX:
11862306a36Sopenharmony_ci			if (!cpu_has_vmx_pml())
11962306a36Sopenharmony_ci				continue;
12062306a36Sopenharmony_ci			break;
12162306a36Sopenharmony_ci		case VMX_PREEMPTION_TIMER_VALUE:
12262306a36Sopenharmony_ci			if (!cpu_has_vmx_preemption_timer())
12362306a36Sopenharmony_ci				continue;
12462306a36Sopenharmony_ci			break;
12562306a36Sopenharmony_ci		case GUEST_INTR_STATUS:
12662306a36Sopenharmony_ci			if (!cpu_has_vmx_apicv())
12762306a36Sopenharmony_ci				continue;
12862306a36Sopenharmony_ci			break;
12962306a36Sopenharmony_ci		default:
13062306a36Sopenharmony_ci			break;
13162306a36Sopenharmony_ci		}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci		clear_bit(field, vmx_vmwrite_bitmap);
13462306a36Sopenharmony_ci		clear_bit(field, vmx_vmread_bitmap);
13562306a36Sopenharmony_ci		if (field & 1)
13662306a36Sopenharmony_ci#ifdef CONFIG_X86_64
13762306a36Sopenharmony_ci			continue;
13862306a36Sopenharmony_ci#else
13962306a36Sopenharmony_ci			entry.offset += sizeof(u32);
14062306a36Sopenharmony_ci#endif
14162306a36Sopenharmony_ci		shadow_read_write_fields[j++] = entry;
14262306a36Sopenharmony_ci	}
14362306a36Sopenharmony_ci	max_shadow_read_write_fields = j;
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci/*
14762306a36Sopenharmony_ci * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
14862306a36Sopenharmony_ci * set the success or error code of an emulated VMX instruction (as specified
14962306a36Sopenharmony_ci * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
15062306a36Sopenharmony_ci * instruction.
15162306a36Sopenharmony_ci */
15262306a36Sopenharmony_cistatic int nested_vmx_succeed(struct kvm_vcpu *vcpu)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
15562306a36Sopenharmony_ci			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
15662306a36Sopenharmony_ci			    X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
15762306a36Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
15862306a36Sopenharmony_ci}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_cistatic int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
16162306a36Sopenharmony_ci{
16262306a36Sopenharmony_ci	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
16362306a36Sopenharmony_ci			& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
16462306a36Sopenharmony_ci			    X86_EFLAGS_SF | X86_EFLAGS_OF))
16562306a36Sopenharmony_ci			| X86_EFLAGS_CF);
16662306a36Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cistatic int nested_vmx_failValid(struct kvm_vcpu *vcpu,
17062306a36Sopenharmony_ci				u32 vm_instruction_error)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
17362306a36Sopenharmony_ci			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
17462306a36Sopenharmony_ci			    X86_EFLAGS_SF | X86_EFLAGS_OF))
17562306a36Sopenharmony_ci			| X86_EFLAGS_ZF);
17662306a36Sopenharmony_ci	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
17762306a36Sopenharmony_ci	/*
17862306a36Sopenharmony_ci	 * We don't need to force sync to shadow VMCS because
17962306a36Sopenharmony_ci	 * VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
18062306a36Sopenharmony_ci	 * fields and thus must be synced.
18162306a36Sopenharmony_ci	 */
18262306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
18362306a36Sopenharmony_ci		to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
18662306a36Sopenharmony_ci}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_cistatic int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	/*
19362306a36Sopenharmony_ci	 * failValid writes the error number to the current VMCS, which
19462306a36Sopenharmony_ci	 * can't be done if there isn't a current VMCS.
19562306a36Sopenharmony_ci	 */
19662306a36Sopenharmony_ci	if (vmx->nested.current_vmptr == INVALID_GPA &&
19762306a36Sopenharmony_ci	    !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
19862306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	return nested_vmx_failValid(vcpu, vm_instruction_error);
20162306a36Sopenharmony_ci}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_cistatic void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
20462306a36Sopenharmony_ci{
20562306a36Sopenharmony_ci	/* TODO: not to reset guest simply here. */
20662306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
20762306a36Sopenharmony_ci	pr_debug_ratelimited("nested vmx abort, indicator %d\n", indicator);
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cistatic inline bool vmx_control_verify(u32 control, u32 low, u32 high)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	return fixed_bits_valid(control, low, high);
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistatic inline u64 vmx_control_msr(u32 low, u32 high)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	return low | ((u64)high << 32);
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cistatic void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
22162306a36Sopenharmony_ci{
22262306a36Sopenharmony_ci	secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
22362306a36Sopenharmony_ci	vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
22462306a36Sopenharmony_ci	vmx->nested.need_vmcs12_to_shadow_sync = false;
22562306a36Sopenharmony_ci}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_cistatic inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
23062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
23362306a36Sopenharmony_ci		kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
23462306a36Sopenharmony_ci		vmx->nested.hv_evmcs = NULL;
23562306a36Sopenharmony_ci	}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	if (hv_vcpu) {
24062306a36Sopenharmony_ci		hv_vcpu->nested.pa_page_gpa = INVALID_GPA;
24162306a36Sopenharmony_ci		hv_vcpu->nested.vm_id = 0;
24262306a36Sopenharmony_ci		hv_vcpu->nested.vp_id = 0;
24362306a36Sopenharmony_ci	}
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_cistatic void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
24762306a36Sopenharmony_ci				     struct loaded_vmcs *prev)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	struct vmcs_host_state *dest, *src;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	if (unlikely(!vmx->guest_state_loaded))
25262306a36Sopenharmony_ci		return;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	src = &prev->host_state;
25562306a36Sopenharmony_ci	dest = &vmx->loaded_vmcs->host_state;
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
25862306a36Sopenharmony_ci	dest->ldt_sel = src->ldt_sel;
25962306a36Sopenharmony_ci#ifdef CONFIG_X86_64
26062306a36Sopenharmony_ci	dest->ds_sel = src->ds_sel;
26162306a36Sopenharmony_ci	dest->es_sel = src->es_sel;
26262306a36Sopenharmony_ci#endif
26362306a36Sopenharmony_ci}
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_cistatic void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
26862306a36Sopenharmony_ci	struct loaded_vmcs *prev;
26962306a36Sopenharmony_ci	int cpu;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
27262306a36Sopenharmony_ci		return;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	cpu = get_cpu();
27562306a36Sopenharmony_ci	prev = vmx->loaded_vmcs;
27662306a36Sopenharmony_ci	vmx->loaded_vmcs = vmcs;
27762306a36Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, prev);
27862306a36Sopenharmony_ci	vmx_sync_vmcs_host_state(vmx, prev);
27962306a36Sopenharmony_ci	put_cpu();
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	/*
28462306a36Sopenharmony_ci	 * All lazily updated registers will be reloaded from VMCS12 on both
28562306a36Sopenharmony_ci	 * vmentry and vmexit.
28662306a36Sopenharmony_ci	 */
28762306a36Sopenharmony_ci	vcpu->arch.regs_dirty = 0;
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci/*
29162306a36Sopenharmony_ci * Free whatever needs to be freed from vmx->nested when L1 goes down, or
29262306a36Sopenharmony_ci * just stops using VMX.
29362306a36Sopenharmony_ci */
29462306a36Sopenharmony_cistatic void free_nested(struct kvm_vcpu *vcpu)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
29962306a36Sopenharmony_ci		vmx_switch_vmcs(vcpu, &vmx->vmcs01);
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
30262306a36Sopenharmony_ci		return;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	vmx->nested.vmxon = false;
30762306a36Sopenharmony_ci	vmx->nested.smm.vmxon = false;
30862306a36Sopenharmony_ci	vmx->nested.vmxon_ptr = INVALID_GPA;
30962306a36Sopenharmony_ci	free_vpid(vmx->nested.vpid02);
31062306a36Sopenharmony_ci	vmx->nested.posted_intr_nv = -1;
31162306a36Sopenharmony_ci	vmx->nested.current_vmptr = INVALID_GPA;
31262306a36Sopenharmony_ci	if (enable_shadow_vmcs) {
31362306a36Sopenharmony_ci		vmx_disable_shadow_vmcs(vmx);
31462306a36Sopenharmony_ci		vmcs_clear(vmx->vmcs01.shadow_vmcs);
31562306a36Sopenharmony_ci		free_vmcs(vmx->vmcs01.shadow_vmcs);
31662306a36Sopenharmony_ci		vmx->vmcs01.shadow_vmcs = NULL;
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci	kfree(vmx->nested.cached_vmcs12);
31962306a36Sopenharmony_ci	vmx->nested.cached_vmcs12 = NULL;
32062306a36Sopenharmony_ci	kfree(vmx->nested.cached_shadow_vmcs12);
32162306a36Sopenharmony_ci	vmx->nested.cached_shadow_vmcs12 = NULL;
32262306a36Sopenharmony_ci	/*
32362306a36Sopenharmony_ci	 * Unpin physical memory we referred to in the vmcs02.  The APIC access
32462306a36Sopenharmony_ci	 * page's backing page (yeah, confusing) shouldn't actually be accessed,
32562306a36Sopenharmony_ci	 * and if it is written, the contents are irrelevant.
32662306a36Sopenharmony_ci	 */
32762306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false);
32862306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
32962306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
33062306a36Sopenharmony_ci	vmx->nested.pi_desc = NULL;
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	nested_release_evmcs(vcpu);
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	free_loaded_vmcs(&vmx->nested.vmcs02);
33762306a36Sopenharmony_ci}
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci/*
34062306a36Sopenharmony_ci * Ensure that the current vmcs of the logical processor is the
34162306a36Sopenharmony_ci * vmcs01 of the vcpu before calling free_nested().
34262306a36Sopenharmony_ci */
34362306a36Sopenharmony_civoid nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
34462306a36Sopenharmony_ci{
34562306a36Sopenharmony_ci	vcpu_load(vcpu);
34662306a36Sopenharmony_ci	vmx_leave_nested(vcpu);
34762306a36Sopenharmony_ci	vcpu_put(vcpu);
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci#define EPTP_PA_MASK   GENMASK_ULL(51, 12)
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_cistatic bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
35362306a36Sopenharmony_ci{
35462306a36Sopenharmony_ci	return VALID_PAGE(root_hpa) &&
35562306a36Sopenharmony_ci	       ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
35662306a36Sopenharmony_ci}
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_cistatic void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
35962306a36Sopenharmony_ci				       gpa_t addr)
36062306a36Sopenharmony_ci{
36162306a36Sopenharmony_ci	unsigned long roots = 0;
36262306a36Sopenharmony_ci	uint i;
36362306a36Sopenharmony_ci	struct kvm_mmu_root_info *cached_root;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	WARN_ON_ONCE(!mmu_is_nested(vcpu));
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
36862306a36Sopenharmony_ci		cached_root = &vcpu->arch.mmu->prev_roots[i];
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci		if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
37162306a36Sopenharmony_ci					    eptp))
37262306a36Sopenharmony_ci			roots |= KVM_MMU_ROOT_PREVIOUS(i);
37362306a36Sopenharmony_ci	}
37462306a36Sopenharmony_ci	if (roots)
37562306a36Sopenharmony_ci		kvm_mmu_invalidate_addr(vcpu, vcpu->arch.mmu, addr, roots);
37662306a36Sopenharmony_ci}
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_cistatic void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
37962306a36Sopenharmony_ci		struct x86_exception *fault)
38062306a36Sopenharmony_ci{
38162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
38262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
38362306a36Sopenharmony_ci	u32 vm_exit_reason;
38462306a36Sopenharmony_ci	unsigned long exit_qualification = vcpu->arch.exit_qualification;
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	if (vmx->nested.pml_full) {
38762306a36Sopenharmony_ci		vm_exit_reason = EXIT_REASON_PML_FULL;
38862306a36Sopenharmony_ci		vmx->nested.pml_full = false;
38962306a36Sopenharmony_ci		exit_qualification &= INTR_INFO_UNBLOCK_NMI;
39062306a36Sopenharmony_ci	} else {
39162306a36Sopenharmony_ci		if (fault->error_code & PFERR_RSVD_MASK)
39262306a36Sopenharmony_ci			vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
39362306a36Sopenharmony_ci		else
39462306a36Sopenharmony_ci			vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci		/*
39762306a36Sopenharmony_ci		 * Although the caller (kvm_inject_emulated_page_fault) would
39862306a36Sopenharmony_ci		 * have already synced the faulting address in the shadow EPT
39962306a36Sopenharmony_ci		 * tables for the current EPTP12, we also need to sync it for
40062306a36Sopenharmony_ci		 * any other cached EPTP02s based on the same EP4TA, since the
40162306a36Sopenharmony_ci		 * TLB associates mappings to the EP4TA rather than the full EPTP.
40262306a36Sopenharmony_ci		 */
40362306a36Sopenharmony_ci		nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
40462306a36Sopenharmony_ci					   fault->address);
40562306a36Sopenharmony_ci	}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
40862306a36Sopenharmony_ci	vmcs12->guest_physical_address = fault->address;
40962306a36Sopenharmony_ci}
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_cistatic void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
41262306a36Sopenharmony_ci{
41362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
41462306a36Sopenharmony_ci	bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT;
41562306a36Sopenharmony_ci	int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps);
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
41862306a36Sopenharmony_ci				nested_ept_ad_enabled(vcpu),
41962306a36Sopenharmony_ci				nested_ept_get_eptp(vcpu));
42062306a36Sopenharmony_ci}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_cistatic void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	WARN_ON(mmu_is_nested(vcpu));
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
42762306a36Sopenharmony_ci	nested_ept_new_eptp(vcpu);
42862306a36Sopenharmony_ci	vcpu->arch.mmu->get_guest_pgd     = nested_ept_get_eptp;
42962306a36Sopenharmony_ci	vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
43062306a36Sopenharmony_ci	vcpu->arch.mmu->get_pdptr         = kvm_pdptr_read;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
43362306a36Sopenharmony_ci}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_cistatic void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
43662306a36Sopenharmony_ci{
43762306a36Sopenharmony_ci	vcpu->arch.mmu = &vcpu->arch.root_mmu;
43862306a36Sopenharmony_ci	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
43962306a36Sopenharmony_ci}
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_cistatic bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
44262306a36Sopenharmony_ci					    u16 error_code)
44362306a36Sopenharmony_ci{
44462306a36Sopenharmony_ci	bool inequality, bit;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
44762306a36Sopenharmony_ci	inequality =
44862306a36Sopenharmony_ci		(error_code & vmcs12->page_fault_error_code_mask) !=
44962306a36Sopenharmony_ci		 vmcs12->page_fault_error_code_match;
45062306a36Sopenharmony_ci	return inequality ^ bit;
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_cistatic bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
45462306a36Sopenharmony_ci					   u32 error_code)
45562306a36Sopenharmony_ci{
45662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	/*
45962306a36Sopenharmony_ci	 * Drop bits 31:16 of the error code when performing the #PF mask+match
46062306a36Sopenharmony_ci	 * check.  All VMCS fields involved are 32 bits, but Intel CPUs never
46162306a36Sopenharmony_ci	 * set bits 31:16 and VMX disallows setting bits 31:16 in the injected
46262306a36Sopenharmony_ci	 * error code.  Including the to-be-dropped bits in the check might
46362306a36Sopenharmony_ci	 * result in an "impossible" or missed exit from L1's perspective.
46462306a36Sopenharmony_ci	 */
46562306a36Sopenharmony_ci	if (vector == PF_VECTOR)
46662306a36Sopenharmony_ci		return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code);
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	return (vmcs12->exception_bitmap & (1u << vector));
46962306a36Sopenharmony_ci}
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_cistatic int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
47262306a36Sopenharmony_ci					       struct vmcs12 *vmcs12)
47362306a36Sopenharmony_ci{
47462306a36Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
47562306a36Sopenharmony_ci		return 0;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
47862306a36Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
47962306a36Sopenharmony_ci		return -EINVAL;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	return 0;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_cistatic int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
48562306a36Sopenharmony_ci						struct vmcs12 *vmcs12)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
48862306a36Sopenharmony_ci		return 0;
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
49162306a36Sopenharmony_ci		return -EINVAL;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	return 0;
49462306a36Sopenharmony_ci}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_cistatic int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
49762306a36Sopenharmony_ci						struct vmcs12 *vmcs12)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
50062306a36Sopenharmony_ci		return 0;
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
50362306a36Sopenharmony_ci		return -EINVAL;
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	return 0;
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci/*
50962306a36Sopenharmony_ci * For x2APIC MSRs, ignore the vmcs01 bitmap.  L1 can enable x2APIC without L1
51062306a36Sopenharmony_ci * itself utilizing x2APIC.  All MSRs were previously set to be intercepted,
51162306a36Sopenharmony_ci * only the "disable intercept" case needs to be handled.
51262306a36Sopenharmony_ci */
51362306a36Sopenharmony_cistatic void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
51462306a36Sopenharmony_ci							unsigned long *msr_bitmap_l0,
51562306a36Sopenharmony_ci							u32 msr, int type)
51662306a36Sopenharmony_ci{
51762306a36Sopenharmony_ci	if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
51862306a36Sopenharmony_ci		vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
52162306a36Sopenharmony_ci		vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
52262306a36Sopenharmony_ci}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_cistatic inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
52562306a36Sopenharmony_ci{
52662306a36Sopenharmony_ci	int msr;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
52962306a36Sopenharmony_ci		unsigned word = msr / BITS_PER_LONG;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci		msr_bitmap[word] = ~0;
53262306a36Sopenharmony_ci		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
53362306a36Sopenharmony_ci	}
53462306a36Sopenharmony_ci}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw)					\
53762306a36Sopenharmony_cistatic inline									\
53862306a36Sopenharmony_civoid nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx,			\
53962306a36Sopenharmony_ci					 unsigned long *msr_bitmap_l1,		\
54062306a36Sopenharmony_ci					 unsigned long *msr_bitmap_l0, u32 msr)	\
54162306a36Sopenharmony_ci{										\
54262306a36Sopenharmony_ci	if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) ||		\
54362306a36Sopenharmony_ci	    vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr))			\
54462306a36Sopenharmony_ci		vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr);			\
54562306a36Sopenharmony_ci	else									\
54662306a36Sopenharmony_ci		vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr);			\
54762306a36Sopenharmony_ci}
54862306a36Sopenharmony_ciBUILD_NVMX_MSR_INTERCEPT_HELPER(read)
54962306a36Sopenharmony_ciBUILD_NVMX_MSR_INTERCEPT_HELPER(write)
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_cistatic inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
55262306a36Sopenharmony_ci						    unsigned long *msr_bitmap_l1,
55362306a36Sopenharmony_ci						    unsigned long *msr_bitmap_l0,
55462306a36Sopenharmony_ci						    u32 msr, int types)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	if (types & MSR_TYPE_R)
55762306a36Sopenharmony_ci		nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
55862306a36Sopenharmony_ci						  msr_bitmap_l0, msr);
55962306a36Sopenharmony_ci	if (types & MSR_TYPE_W)
56062306a36Sopenharmony_ci		nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
56162306a36Sopenharmony_ci						   msr_bitmap_l0, msr);
56262306a36Sopenharmony_ci}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci/*
56562306a36Sopenharmony_ci * Merge L0's and L1's MSR bitmap, return false to indicate that
56662306a36Sopenharmony_ci * we do not use the hardware.
56762306a36Sopenharmony_ci */
56862306a36Sopenharmony_cistatic inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
56962306a36Sopenharmony_ci						 struct vmcs12 *vmcs12)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
57262306a36Sopenharmony_ci	int msr;
57362306a36Sopenharmony_ci	unsigned long *msr_bitmap_l1;
57462306a36Sopenharmony_ci	unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
57562306a36Sopenharmony_ci	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
57662306a36Sopenharmony_ci	struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	/* Nothing to do if the MSR bitmap is not in use.  */
57962306a36Sopenharmony_ci	if (!cpu_has_vmx_msr_bitmap() ||
58062306a36Sopenharmony_ci	    !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
58162306a36Sopenharmony_ci		return false;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	/*
58462306a36Sopenharmony_ci	 * MSR bitmap update can be skipped when:
58562306a36Sopenharmony_ci	 * - MSR bitmap for L1 hasn't changed.
58662306a36Sopenharmony_ci	 * - Nested hypervisor (L1) is attempting to launch the same L2 as
58762306a36Sopenharmony_ci	 *   before.
58862306a36Sopenharmony_ci	 * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature
58962306a36Sopenharmony_ci	 *   and tells KVM (L0) there were no changes in MSR bitmap for L2.
59062306a36Sopenharmony_ci	 */
59162306a36Sopenharmony_ci	if (!vmx->nested.force_msr_bitmap_recalc && evmcs &&
59262306a36Sopenharmony_ci	    evmcs->hv_enlightenments_control.msr_bitmap &&
59362306a36Sopenharmony_ci	    evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
59462306a36Sopenharmony_ci		return true;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
59762306a36Sopenharmony_ci		return false;
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	msr_bitmap_l1 = (unsigned long *)map->hva;
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	/*
60262306a36Sopenharmony_ci	 * To keep the control flow simple, pay eight 8-byte writes (sixteen
60362306a36Sopenharmony_ci	 * 4-byte writes on 32-bit systems) up front to enable intercepts for
60462306a36Sopenharmony_ci	 * the x2APIC MSR range and selectively toggle those relevant to L2.
60562306a36Sopenharmony_ci	 */
60662306a36Sopenharmony_ci	enable_x2apic_msr_intercepts(msr_bitmap_l0);
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
60962306a36Sopenharmony_ci		if (nested_cpu_has_apic_reg_virt(vmcs12)) {
61062306a36Sopenharmony_ci			/*
61162306a36Sopenharmony_ci			 * L0 need not intercept reads for MSRs between 0x800
61262306a36Sopenharmony_ci			 * and 0x8ff, it just lets the processor take the value
61362306a36Sopenharmony_ci			 * from the virtual-APIC page; take those 256 bits
61462306a36Sopenharmony_ci			 * directly from the L1 bitmap.
61562306a36Sopenharmony_ci			 */
61662306a36Sopenharmony_ci			for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
61762306a36Sopenharmony_ci				unsigned word = msr / BITS_PER_LONG;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci				msr_bitmap_l0[word] = msr_bitmap_l1[word];
62062306a36Sopenharmony_ci			}
62162306a36Sopenharmony_ci		}
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci		nested_vmx_disable_intercept_for_x2apic_msr(
62462306a36Sopenharmony_ci			msr_bitmap_l1, msr_bitmap_l0,
62562306a36Sopenharmony_ci			X2APIC_MSR(APIC_TASKPRI),
62662306a36Sopenharmony_ci			MSR_TYPE_R | MSR_TYPE_W);
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci		if (nested_cpu_has_vid(vmcs12)) {
62962306a36Sopenharmony_ci			nested_vmx_disable_intercept_for_x2apic_msr(
63062306a36Sopenharmony_ci				msr_bitmap_l1, msr_bitmap_l0,
63162306a36Sopenharmony_ci				X2APIC_MSR(APIC_EOI),
63262306a36Sopenharmony_ci				MSR_TYPE_W);
63362306a36Sopenharmony_ci			nested_vmx_disable_intercept_for_x2apic_msr(
63462306a36Sopenharmony_ci				msr_bitmap_l1, msr_bitmap_l0,
63562306a36Sopenharmony_ci				X2APIC_MSR(APIC_SELF_IPI),
63662306a36Sopenharmony_ci				MSR_TYPE_W);
63762306a36Sopenharmony_ci		}
63862306a36Sopenharmony_ci	}
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	/*
64162306a36Sopenharmony_ci	 * Always check vmcs01's bitmap to honor userspace MSR filters and any
64262306a36Sopenharmony_ci	 * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
64362306a36Sopenharmony_ci	 */
64462306a36Sopenharmony_ci#ifdef CONFIG_X86_64
64562306a36Sopenharmony_ci	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
64662306a36Sopenharmony_ci					 MSR_FS_BASE, MSR_TYPE_RW);
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
64962306a36Sopenharmony_ci					 MSR_GS_BASE, MSR_TYPE_RW);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
65262306a36Sopenharmony_ci					 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
65362306a36Sopenharmony_ci#endif
65462306a36Sopenharmony_ci	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
65562306a36Sopenharmony_ci					 MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
65862306a36Sopenharmony_ci					 MSR_IA32_PRED_CMD, MSR_TYPE_W);
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
66162306a36Sopenharmony_ci					 MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci	vmx->nested.force_msr_bitmap_recalc = false;
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	return true;
66862306a36Sopenharmony_ci}
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_cistatic void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
67162306a36Sopenharmony_ci				       struct vmcs12 *vmcs12)
67262306a36Sopenharmony_ci{
67362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
67462306a36Sopenharmony_ci	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
67762306a36Sopenharmony_ci	    vmcs12->vmcs_link_pointer == INVALID_GPA)
67862306a36Sopenharmony_ci		return;
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ci	if (ghc->gpa != vmcs12->vmcs_link_pointer &&
68162306a36Sopenharmony_ci	    kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
68262306a36Sopenharmony_ci				      vmcs12->vmcs_link_pointer, VMCS12_SIZE))
68362306a36Sopenharmony_ci		return;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
68662306a36Sopenharmony_ci			      VMCS12_SIZE);
68762306a36Sopenharmony_ci}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_cistatic void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
69062306a36Sopenharmony_ci					      struct vmcs12 *vmcs12)
69162306a36Sopenharmony_ci{
69262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
69362306a36Sopenharmony_ci	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
69662306a36Sopenharmony_ci	    vmcs12->vmcs_link_pointer == INVALID_GPA)
69762306a36Sopenharmony_ci		return;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	if (ghc->gpa != vmcs12->vmcs_link_pointer &&
70062306a36Sopenharmony_ci	    kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
70162306a36Sopenharmony_ci				      vmcs12->vmcs_link_pointer, VMCS12_SIZE))
70262306a36Sopenharmony_ci		return;
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
70562306a36Sopenharmony_ci			       VMCS12_SIZE);
70662306a36Sopenharmony_ci}
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci/*
70962306a36Sopenharmony_ci * In nested virtualization, check if L1 has set
71062306a36Sopenharmony_ci * VM_EXIT_ACK_INTR_ON_EXIT
71162306a36Sopenharmony_ci */
71262306a36Sopenharmony_cistatic bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci	return get_vmcs12(vcpu)->vm_exit_controls &
71562306a36Sopenharmony_ci		VM_EXIT_ACK_INTR_ON_EXIT;
71662306a36Sopenharmony_ci}
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_cistatic int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
71962306a36Sopenharmony_ci					  struct vmcs12 *vmcs12)
72062306a36Sopenharmony_ci{
72162306a36Sopenharmony_ci	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
72262306a36Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
72362306a36Sopenharmony_ci		return -EINVAL;
72462306a36Sopenharmony_ci	else
72562306a36Sopenharmony_ci		return 0;
72662306a36Sopenharmony_ci}
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_cistatic int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
72962306a36Sopenharmony_ci					   struct vmcs12 *vmcs12)
73062306a36Sopenharmony_ci{
73162306a36Sopenharmony_ci	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
73262306a36Sopenharmony_ci	    !nested_cpu_has_apic_reg_virt(vmcs12) &&
73362306a36Sopenharmony_ci	    !nested_cpu_has_vid(vmcs12) &&
73462306a36Sopenharmony_ci	    !nested_cpu_has_posted_intr(vmcs12))
73562306a36Sopenharmony_ci		return 0;
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci	/*
73862306a36Sopenharmony_ci	 * If virtualize x2apic mode is enabled,
73962306a36Sopenharmony_ci	 * virtualize apic access must be disabled.
74062306a36Sopenharmony_ci	 */
74162306a36Sopenharmony_ci	if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
74262306a36Sopenharmony_ci	       nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
74362306a36Sopenharmony_ci		return -EINVAL;
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	/*
74662306a36Sopenharmony_ci	 * If virtual interrupt delivery is enabled,
74762306a36Sopenharmony_ci	 * we must exit on external interrupts.
74862306a36Sopenharmony_ci	 */
74962306a36Sopenharmony_ci	if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
75062306a36Sopenharmony_ci		return -EINVAL;
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci	/*
75362306a36Sopenharmony_ci	 * bits 15:8 should be zero in posted_intr_nv,
75462306a36Sopenharmony_ci	 * the descriptor address has been already checked
75562306a36Sopenharmony_ci	 * in nested_get_vmcs12_pages.
75662306a36Sopenharmony_ci	 *
75762306a36Sopenharmony_ci	 * bits 5:0 of posted_intr_desc_addr should be zero.
75862306a36Sopenharmony_ci	 */
75962306a36Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12) &&
76062306a36Sopenharmony_ci	   (CC(!nested_cpu_has_vid(vmcs12)) ||
76162306a36Sopenharmony_ci	    CC(!nested_exit_intr_ack_set(vcpu)) ||
76262306a36Sopenharmony_ci	    CC((vmcs12->posted_intr_nv & 0xff00)) ||
76362306a36Sopenharmony_ci	    CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
76462306a36Sopenharmony_ci		return -EINVAL;
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	/* tpr shadow is needed by all apicv features. */
76762306a36Sopenharmony_ci	if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
76862306a36Sopenharmony_ci		return -EINVAL;
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	return 0;
77162306a36Sopenharmony_ci}
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_cistatic int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
77462306a36Sopenharmony_ci				       u32 count, u64 addr)
77562306a36Sopenharmony_ci{
77662306a36Sopenharmony_ci	if (count == 0)
77762306a36Sopenharmony_ci		return 0;
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
78062306a36Sopenharmony_ci	    !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
78162306a36Sopenharmony_ci		return -EINVAL;
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	return 0;
78462306a36Sopenharmony_ci}
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_cistatic int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
78762306a36Sopenharmony_ci						     struct vmcs12 *vmcs12)
78862306a36Sopenharmony_ci{
78962306a36Sopenharmony_ci	if (CC(nested_vmx_check_msr_switch(vcpu,
79062306a36Sopenharmony_ci					   vmcs12->vm_exit_msr_load_count,
79162306a36Sopenharmony_ci					   vmcs12->vm_exit_msr_load_addr)) ||
79262306a36Sopenharmony_ci	    CC(nested_vmx_check_msr_switch(vcpu,
79362306a36Sopenharmony_ci					   vmcs12->vm_exit_msr_store_count,
79462306a36Sopenharmony_ci					   vmcs12->vm_exit_msr_store_addr)))
79562306a36Sopenharmony_ci		return -EINVAL;
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	return 0;
79862306a36Sopenharmony_ci}
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_cistatic int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
80162306a36Sopenharmony_ci                                                      struct vmcs12 *vmcs12)
80262306a36Sopenharmony_ci{
80362306a36Sopenharmony_ci	if (CC(nested_vmx_check_msr_switch(vcpu,
80462306a36Sopenharmony_ci					   vmcs12->vm_entry_msr_load_count,
80562306a36Sopenharmony_ci					   vmcs12->vm_entry_msr_load_addr)))
80662306a36Sopenharmony_ci                return -EINVAL;
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	return 0;
80962306a36Sopenharmony_ci}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_cistatic int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
81262306a36Sopenharmony_ci					 struct vmcs12 *vmcs12)
81362306a36Sopenharmony_ci{
81462306a36Sopenharmony_ci	if (!nested_cpu_has_pml(vmcs12))
81562306a36Sopenharmony_ci		return 0;
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	if (CC(!nested_cpu_has_ept(vmcs12)) ||
81862306a36Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->pml_address)))
81962306a36Sopenharmony_ci		return -EINVAL;
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci	return 0;
82262306a36Sopenharmony_ci}
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_cistatic int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
82562306a36Sopenharmony_ci							struct vmcs12 *vmcs12)
82662306a36Sopenharmony_ci{
82762306a36Sopenharmony_ci	if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
82862306a36Sopenharmony_ci	       !nested_cpu_has_ept(vmcs12)))
82962306a36Sopenharmony_ci		return -EINVAL;
83062306a36Sopenharmony_ci	return 0;
83162306a36Sopenharmony_ci}
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_cistatic int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
83462306a36Sopenharmony_ci							 struct vmcs12 *vmcs12)
83562306a36Sopenharmony_ci{
83662306a36Sopenharmony_ci	if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
83762306a36Sopenharmony_ci	       !nested_cpu_has_ept(vmcs12)))
83862306a36Sopenharmony_ci		return -EINVAL;
83962306a36Sopenharmony_ci	return 0;
84062306a36Sopenharmony_ci}
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_cistatic int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
84362306a36Sopenharmony_ci						 struct vmcs12 *vmcs12)
84462306a36Sopenharmony_ci{
84562306a36Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12))
84662306a36Sopenharmony_ci		return 0;
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
84962306a36Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
85062306a36Sopenharmony_ci		return -EINVAL;
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	return 0;
85362306a36Sopenharmony_ci}
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_cistatic int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
85662306a36Sopenharmony_ci				       struct vmx_msr_entry *e)
85762306a36Sopenharmony_ci{
85862306a36Sopenharmony_ci	/* x2APIC MSR accesses are not allowed */
85962306a36Sopenharmony_ci	if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
86062306a36Sopenharmony_ci		return -EINVAL;
86162306a36Sopenharmony_ci	if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
86262306a36Sopenharmony_ci	    CC(e->index == MSR_IA32_UCODE_REV))
86362306a36Sopenharmony_ci		return -EINVAL;
86462306a36Sopenharmony_ci	if (CC(e->reserved != 0))
86562306a36Sopenharmony_ci		return -EINVAL;
86662306a36Sopenharmony_ci	return 0;
86762306a36Sopenharmony_ci}
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_cistatic int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
87062306a36Sopenharmony_ci				     struct vmx_msr_entry *e)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	if (CC(e->index == MSR_FS_BASE) ||
87362306a36Sopenharmony_ci	    CC(e->index == MSR_GS_BASE) ||
87462306a36Sopenharmony_ci	    CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
87562306a36Sopenharmony_ci	    nested_vmx_msr_check_common(vcpu, e))
87662306a36Sopenharmony_ci		return -EINVAL;
87762306a36Sopenharmony_ci	return 0;
87862306a36Sopenharmony_ci}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_cistatic int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
88162306a36Sopenharmony_ci				      struct vmx_msr_entry *e)
88262306a36Sopenharmony_ci{
88362306a36Sopenharmony_ci	if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
88462306a36Sopenharmony_ci	    nested_vmx_msr_check_common(vcpu, e))
88562306a36Sopenharmony_ci		return -EINVAL;
88662306a36Sopenharmony_ci	return 0;
88762306a36Sopenharmony_ci}
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_cistatic u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
89062306a36Sopenharmony_ci{
89162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
89262306a36Sopenharmony_ci	u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
89362306a36Sopenharmony_ci				       vmx->nested.msrs.misc_high);
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
89662306a36Sopenharmony_ci}
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci/*
89962306a36Sopenharmony_ci * Load guest's/host's msr at nested entry/exit.
90062306a36Sopenharmony_ci * return 0 for success, entry index for failure.
90162306a36Sopenharmony_ci *
90262306a36Sopenharmony_ci * One of the failure modes for MSR load/store is when a list exceeds the
90362306a36Sopenharmony_ci * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
90462306a36Sopenharmony_ci * as possible, process all valid entries before failing rather than precheck
90562306a36Sopenharmony_ci * for a capacity violation.
90662306a36Sopenharmony_ci */
90762306a36Sopenharmony_cistatic u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
90862306a36Sopenharmony_ci{
90962306a36Sopenharmony_ci	u32 i;
91062306a36Sopenharmony_ci	struct vmx_msr_entry e;
91162306a36Sopenharmony_ci	u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
91462306a36Sopenharmony_ci		if (unlikely(i >= max_msr_list_size))
91562306a36Sopenharmony_ci			goto fail;
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci		if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
91862306a36Sopenharmony_ci					&e, sizeof(e))) {
91962306a36Sopenharmony_ci			pr_debug_ratelimited(
92062306a36Sopenharmony_ci				"%s cannot read MSR entry (%u, 0x%08llx)\n",
92162306a36Sopenharmony_ci				__func__, i, gpa + i * sizeof(e));
92262306a36Sopenharmony_ci			goto fail;
92362306a36Sopenharmony_ci		}
92462306a36Sopenharmony_ci		if (nested_vmx_load_msr_check(vcpu, &e)) {
92562306a36Sopenharmony_ci			pr_debug_ratelimited(
92662306a36Sopenharmony_ci				"%s check failed (%u, 0x%x, 0x%x)\n",
92762306a36Sopenharmony_ci				__func__, i, e.index, e.reserved);
92862306a36Sopenharmony_ci			goto fail;
92962306a36Sopenharmony_ci		}
93062306a36Sopenharmony_ci		if (kvm_set_msr(vcpu, e.index, e.value)) {
93162306a36Sopenharmony_ci			pr_debug_ratelimited(
93262306a36Sopenharmony_ci				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
93362306a36Sopenharmony_ci				__func__, i, e.index, e.value);
93462306a36Sopenharmony_ci			goto fail;
93562306a36Sopenharmony_ci		}
93662306a36Sopenharmony_ci	}
93762306a36Sopenharmony_ci	return 0;
93862306a36Sopenharmony_cifail:
93962306a36Sopenharmony_ci	/* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
94062306a36Sopenharmony_ci	return i + 1;
94162306a36Sopenharmony_ci}
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_cistatic bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
94462306a36Sopenharmony_ci					    u32 msr_index,
94562306a36Sopenharmony_ci					    u64 *data)
94662306a36Sopenharmony_ci{
94762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	/*
95062306a36Sopenharmony_ci	 * If the L0 hypervisor stored a more accurate value for the TSC that
95162306a36Sopenharmony_ci	 * does not include the time taken for emulation of the L2->L1
95262306a36Sopenharmony_ci	 * VM-exit in L0, use the more accurate value.
95362306a36Sopenharmony_ci	 */
95462306a36Sopenharmony_ci	if (msr_index == MSR_IA32_TSC) {
95562306a36Sopenharmony_ci		int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
95662306a36Sopenharmony_ci						    MSR_IA32_TSC);
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci		if (i >= 0) {
95962306a36Sopenharmony_ci			u64 val = vmx->msr_autostore.guest.val[i].value;
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci			*data = kvm_read_l1_tsc(vcpu, val);
96262306a36Sopenharmony_ci			return true;
96362306a36Sopenharmony_ci		}
96462306a36Sopenharmony_ci	}
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	if (kvm_get_msr(vcpu, msr_index, data)) {
96762306a36Sopenharmony_ci		pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
96862306a36Sopenharmony_ci			msr_index);
96962306a36Sopenharmony_ci		return false;
97062306a36Sopenharmony_ci	}
97162306a36Sopenharmony_ci	return true;
97262306a36Sopenharmony_ci}
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_cistatic bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
97562306a36Sopenharmony_ci				     struct vmx_msr_entry *e)
97662306a36Sopenharmony_ci{
97762306a36Sopenharmony_ci	if (kvm_vcpu_read_guest(vcpu,
97862306a36Sopenharmony_ci				gpa + i * sizeof(*e),
97962306a36Sopenharmony_ci				e, 2 * sizeof(u32))) {
98062306a36Sopenharmony_ci		pr_debug_ratelimited(
98162306a36Sopenharmony_ci			"%s cannot read MSR entry (%u, 0x%08llx)\n",
98262306a36Sopenharmony_ci			__func__, i, gpa + i * sizeof(*e));
98362306a36Sopenharmony_ci		return false;
98462306a36Sopenharmony_ci	}
98562306a36Sopenharmony_ci	if (nested_vmx_store_msr_check(vcpu, e)) {
98662306a36Sopenharmony_ci		pr_debug_ratelimited(
98762306a36Sopenharmony_ci			"%s check failed (%u, 0x%x, 0x%x)\n",
98862306a36Sopenharmony_ci			__func__, i, e->index, e->reserved);
98962306a36Sopenharmony_ci		return false;
99062306a36Sopenharmony_ci	}
99162306a36Sopenharmony_ci	return true;
99262306a36Sopenharmony_ci}
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_cistatic int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
99562306a36Sopenharmony_ci{
99662306a36Sopenharmony_ci	u64 data;
99762306a36Sopenharmony_ci	u32 i;
99862306a36Sopenharmony_ci	struct vmx_msr_entry e;
99962306a36Sopenharmony_ci	u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
100262306a36Sopenharmony_ci		if (unlikely(i >= max_msr_list_size))
100362306a36Sopenharmony_ci			return -EINVAL;
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
100662306a36Sopenharmony_ci			return -EINVAL;
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci		if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
100962306a36Sopenharmony_ci			return -EINVAL;
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci		if (kvm_vcpu_write_guest(vcpu,
101262306a36Sopenharmony_ci					 gpa + i * sizeof(e) +
101362306a36Sopenharmony_ci					     offsetof(struct vmx_msr_entry, value),
101462306a36Sopenharmony_ci					 &data, sizeof(data))) {
101562306a36Sopenharmony_ci			pr_debug_ratelimited(
101662306a36Sopenharmony_ci				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
101762306a36Sopenharmony_ci				__func__, i, e.index, data);
101862306a36Sopenharmony_ci			return -EINVAL;
101962306a36Sopenharmony_ci		}
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_ci	return 0;
102262306a36Sopenharmony_ci}
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_cistatic bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
102562306a36Sopenharmony_ci{
102662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
102762306a36Sopenharmony_ci	u32 count = vmcs12->vm_exit_msr_store_count;
102862306a36Sopenharmony_ci	u64 gpa = vmcs12->vm_exit_msr_store_addr;
102962306a36Sopenharmony_ci	struct vmx_msr_entry e;
103062306a36Sopenharmony_ci	u32 i;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
103362306a36Sopenharmony_ci		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
103462306a36Sopenharmony_ci			return false;
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci		if (e.index == msr_index)
103762306a36Sopenharmony_ci			return true;
103862306a36Sopenharmony_ci	}
103962306a36Sopenharmony_ci	return false;
104062306a36Sopenharmony_ci}
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_cistatic void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
104362306a36Sopenharmony_ci					   u32 msr_index)
104462306a36Sopenharmony_ci{
104562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
104662306a36Sopenharmony_ci	struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
104762306a36Sopenharmony_ci	bool in_vmcs12_store_list;
104862306a36Sopenharmony_ci	int msr_autostore_slot;
104962306a36Sopenharmony_ci	bool in_autostore_list;
105062306a36Sopenharmony_ci	int last;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
105362306a36Sopenharmony_ci	in_autostore_list = msr_autostore_slot >= 0;
105462306a36Sopenharmony_ci	in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	if (in_vmcs12_store_list && !in_autostore_list) {
105762306a36Sopenharmony_ci		if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
105862306a36Sopenharmony_ci			/*
105962306a36Sopenharmony_ci			 * Emulated VMEntry does not fail here.  Instead a less
106062306a36Sopenharmony_ci			 * accurate value will be returned by
106162306a36Sopenharmony_ci			 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
106262306a36Sopenharmony_ci			 * instead of reading the value from the vmcs02 VMExit
106362306a36Sopenharmony_ci			 * MSR-store area.
106462306a36Sopenharmony_ci			 */
106562306a36Sopenharmony_ci			pr_warn_ratelimited(
106662306a36Sopenharmony_ci				"Not enough msr entries in msr_autostore.  Can't add msr %x\n",
106762306a36Sopenharmony_ci				msr_index);
106862306a36Sopenharmony_ci			return;
106962306a36Sopenharmony_ci		}
107062306a36Sopenharmony_ci		last = autostore->nr++;
107162306a36Sopenharmony_ci		autostore->val[last].index = msr_index;
107262306a36Sopenharmony_ci	} else if (!in_vmcs12_store_list && in_autostore_list) {
107362306a36Sopenharmony_ci		last = --autostore->nr;
107462306a36Sopenharmony_ci		autostore->val[msr_autostore_slot] = autostore->val[last];
107562306a36Sopenharmony_ci	}
107662306a36Sopenharmony_ci}
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci/*
107962306a36Sopenharmony_ci * Load guest's/host's cr3 at nested entry/exit.  @nested_ept is true if we are
108062306a36Sopenharmony_ci * emulating VM-Entry into a guest with EPT enabled.  On failure, the expected
108162306a36Sopenharmony_ci * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
108262306a36Sopenharmony_ci * @entry_failure_code.
108362306a36Sopenharmony_ci */
108462306a36Sopenharmony_cistatic int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
108562306a36Sopenharmony_ci			       bool nested_ept, bool reload_pdptrs,
108662306a36Sopenharmony_ci			       enum vm_entry_failure_code *entry_failure_code)
108762306a36Sopenharmony_ci{
108862306a36Sopenharmony_ci	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
108962306a36Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_DEFAULT;
109062306a36Sopenharmony_ci		return -EINVAL;
109162306a36Sopenharmony_ci	}
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	/*
109462306a36Sopenharmony_ci	 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
109562306a36Sopenharmony_ci	 * must not be dereferenced.
109662306a36Sopenharmony_ci	 */
109762306a36Sopenharmony_ci	if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
109862306a36Sopenharmony_ci	    CC(!load_pdptrs(vcpu, cr3))) {
109962306a36Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_PDPTE;
110062306a36Sopenharmony_ci		return -EINVAL;
110162306a36Sopenharmony_ci	}
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	vcpu->arch.cr3 = cr3;
110462306a36Sopenharmony_ci	kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
110762306a36Sopenharmony_ci	kvm_init_mmu(vcpu);
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	if (!nested_ept)
111062306a36Sopenharmony_ci		kvm_mmu_new_pgd(vcpu, cr3);
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci	return 0;
111362306a36Sopenharmony_ci}
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci/*
111662306a36Sopenharmony_ci * Returns if KVM is able to config CPU to tag TLB entries
111762306a36Sopenharmony_ci * populated by L2 differently than TLB entries populated
111862306a36Sopenharmony_ci * by L1.
111962306a36Sopenharmony_ci *
112062306a36Sopenharmony_ci * If L0 uses EPT, L1 and L2 run with different EPTP because
112162306a36Sopenharmony_ci * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
112262306a36Sopenharmony_ci * are tagged with different EPTP.
112362306a36Sopenharmony_ci *
112462306a36Sopenharmony_ci * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
112562306a36Sopenharmony_ci * with different VPID (L1 entries are tagged with vmx->vpid
112662306a36Sopenharmony_ci * while L2 entries are tagged with vmx->nested.vpid02).
112762306a36Sopenharmony_ci */
112862306a36Sopenharmony_cistatic bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
112962306a36Sopenharmony_ci{
113062306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	return enable_ept ||
113362306a36Sopenharmony_ci	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
113462306a36Sopenharmony_ci}
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_cistatic void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
113762306a36Sopenharmony_ci					    struct vmcs12 *vmcs12,
113862306a36Sopenharmony_ci					    bool is_vmenter)
113962306a36Sopenharmony_ci{
114062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	/*
114362306a36Sopenharmony_ci	 * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
114462306a36Sopenharmony_ci	 * L2's VP_ID upon request from the guest. Make sure we check for
114562306a36Sopenharmony_ci	 * pending entries in the right FIFO upon L1/L2 transition as these
114662306a36Sopenharmony_ci	 * requests are put by other vCPUs asynchronously.
114762306a36Sopenharmony_ci	 */
114862306a36Sopenharmony_ci	if (to_hv_vcpu(vcpu) && enable_ept)
114962306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci	/*
115262306a36Sopenharmony_ci	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
115362306a36Sopenharmony_ci	 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
115462306a36Sopenharmony_ci	 * full TLB flush from the guest's perspective.  This is required even
115562306a36Sopenharmony_ci	 * if VPID is disabled in the host as KVM may need to synchronize the
115662306a36Sopenharmony_ci	 * MMU in response to the guest TLB flush.
115762306a36Sopenharmony_ci	 *
115862306a36Sopenharmony_ci	 * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
115962306a36Sopenharmony_ci	 * EPT is a special snowflake, as guest-physical mappings aren't
116062306a36Sopenharmony_ci	 * flushed on VPID invalidations, including VM-Enter or VM-Exit with
116162306a36Sopenharmony_ci	 * VPID disabled.  As a result, KVM _never_ needs to sync nEPT
116262306a36Sopenharmony_ci	 * entries on VM-Enter because L1 can't rely on VM-Enter to flush
116362306a36Sopenharmony_ci	 * those mappings.
116462306a36Sopenharmony_ci	 */
116562306a36Sopenharmony_ci	if (!nested_cpu_has_vpid(vmcs12)) {
116662306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
116762306a36Sopenharmony_ci		return;
116862306a36Sopenharmony_ci	}
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	/* L2 should never have a VPID if VPID is disabled. */
117162306a36Sopenharmony_ci	WARN_ON(!enable_vpid);
117262306a36Sopenharmony_ci
117362306a36Sopenharmony_ci	/*
117462306a36Sopenharmony_ci	 * VPID is enabled and in use by vmcs12.  If vpid12 is changing, then
117562306a36Sopenharmony_ci	 * emulate a guest TLB flush as KVM does not track vpid12 history nor
117662306a36Sopenharmony_ci	 * is the VPID incorporated into the MMU context.  I.e. KVM must assume
117762306a36Sopenharmony_ci	 * that the new vpid12 has never been used and thus represents a new
117862306a36Sopenharmony_ci	 * guest ASID that cannot have entries in the TLB.
117962306a36Sopenharmony_ci	 */
118062306a36Sopenharmony_ci	if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
118162306a36Sopenharmony_ci		vmx->nested.last_vpid = vmcs12->virtual_processor_id;
118262306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
118362306a36Sopenharmony_ci		return;
118462306a36Sopenharmony_ci	}
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci	/*
118762306a36Sopenharmony_ci	 * If VPID is enabled, used by vmc12, and vpid12 is not changing but
118862306a36Sopenharmony_ci	 * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
118962306a36Sopenharmony_ci	 * KVM was unable to allocate a VPID for L2, flush the current context
119062306a36Sopenharmony_ci	 * as the effective ASID is common to both L1 and L2.
119162306a36Sopenharmony_ci	 */
119262306a36Sopenharmony_ci	if (!nested_has_guest_tlb_tag(vcpu))
119362306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
119462306a36Sopenharmony_ci}
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_cistatic bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
119762306a36Sopenharmony_ci{
119862306a36Sopenharmony_ci	superset &= mask;
119962306a36Sopenharmony_ci	subset &= mask;
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci	return (superset | subset) == superset;
120262306a36Sopenharmony_ci}
120362306a36Sopenharmony_ci
120462306a36Sopenharmony_cistatic int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
120562306a36Sopenharmony_ci{
120662306a36Sopenharmony_ci	const u64 feature_and_reserved =
120762306a36Sopenharmony_ci		/* feature (except bit 48; see below) */
120862306a36Sopenharmony_ci		BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
120962306a36Sopenharmony_ci		/* reserved */
121062306a36Sopenharmony_ci		BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
121162306a36Sopenharmony_ci	u64 vmx_basic = vmcs_config.nested.basic;
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci	if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
121462306a36Sopenharmony_ci		return -EINVAL;
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	/*
121762306a36Sopenharmony_ci	 * KVM does not emulate a version of VMX that constrains physical
121862306a36Sopenharmony_ci	 * addresses of VMX structures (e.g. VMCS) to 32-bits.
121962306a36Sopenharmony_ci	 */
122062306a36Sopenharmony_ci	if (data & BIT_ULL(48))
122162306a36Sopenharmony_ci		return -EINVAL;
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	if (vmx_basic_vmcs_revision_id(vmx_basic) !=
122462306a36Sopenharmony_ci	    vmx_basic_vmcs_revision_id(data))
122562306a36Sopenharmony_ci		return -EINVAL;
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
122862306a36Sopenharmony_ci		return -EINVAL;
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	vmx->nested.msrs.basic = data;
123162306a36Sopenharmony_ci	return 0;
123262306a36Sopenharmony_ci}
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_cistatic void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
123562306a36Sopenharmony_ci				u32 **low, u32 **high)
123662306a36Sopenharmony_ci{
123762306a36Sopenharmony_ci	switch (msr_index) {
123862306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
123962306a36Sopenharmony_ci		*low = &msrs->pinbased_ctls_low;
124062306a36Sopenharmony_ci		*high = &msrs->pinbased_ctls_high;
124162306a36Sopenharmony_ci		break;
124262306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
124362306a36Sopenharmony_ci		*low = &msrs->procbased_ctls_low;
124462306a36Sopenharmony_ci		*high = &msrs->procbased_ctls_high;
124562306a36Sopenharmony_ci		break;
124662306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
124762306a36Sopenharmony_ci		*low = &msrs->exit_ctls_low;
124862306a36Sopenharmony_ci		*high = &msrs->exit_ctls_high;
124962306a36Sopenharmony_ci		break;
125062306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
125162306a36Sopenharmony_ci		*low = &msrs->entry_ctls_low;
125262306a36Sopenharmony_ci		*high = &msrs->entry_ctls_high;
125362306a36Sopenharmony_ci		break;
125462306a36Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS2:
125562306a36Sopenharmony_ci		*low = &msrs->secondary_ctls_low;
125662306a36Sopenharmony_ci		*high = &msrs->secondary_ctls_high;
125762306a36Sopenharmony_ci		break;
125862306a36Sopenharmony_ci	default:
125962306a36Sopenharmony_ci		BUG();
126062306a36Sopenharmony_ci	}
126162306a36Sopenharmony_ci}
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_cistatic int
126462306a36Sopenharmony_civmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
126562306a36Sopenharmony_ci{
126662306a36Sopenharmony_ci	u32 *lowp, *highp;
126762306a36Sopenharmony_ci	u64 supported;
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci	vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci	supported = vmx_control_msr(*lowp, *highp);
127262306a36Sopenharmony_ci
127362306a36Sopenharmony_ci	/* Check must-be-1 bits are still 1. */
127462306a36Sopenharmony_ci	if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
127562306a36Sopenharmony_ci		return -EINVAL;
127662306a36Sopenharmony_ci
127762306a36Sopenharmony_ci	/* Check must-be-0 bits are still 0. */
127862306a36Sopenharmony_ci	if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
127962306a36Sopenharmony_ci		return -EINVAL;
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci	vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
128262306a36Sopenharmony_ci	*lowp = data;
128362306a36Sopenharmony_ci	*highp = data >> 32;
128462306a36Sopenharmony_ci	return 0;
128562306a36Sopenharmony_ci}
128662306a36Sopenharmony_ci
128762306a36Sopenharmony_cistatic int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
128862306a36Sopenharmony_ci{
128962306a36Sopenharmony_ci	const u64 feature_and_reserved_bits =
129062306a36Sopenharmony_ci		/* feature */
129162306a36Sopenharmony_ci		BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
129262306a36Sopenharmony_ci		BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
129362306a36Sopenharmony_ci		/* reserved */
129462306a36Sopenharmony_ci		GENMASK_ULL(13, 9) | BIT_ULL(31);
129562306a36Sopenharmony_ci	u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
129662306a36Sopenharmony_ci				       vmcs_config.nested.misc_high);
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci	if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
129962306a36Sopenharmony_ci		return -EINVAL;
130062306a36Sopenharmony_ci
130162306a36Sopenharmony_ci	if ((vmx->nested.msrs.pinbased_ctls_high &
130262306a36Sopenharmony_ci	     PIN_BASED_VMX_PREEMPTION_TIMER) &&
130362306a36Sopenharmony_ci	    vmx_misc_preemption_timer_rate(data) !=
130462306a36Sopenharmony_ci	    vmx_misc_preemption_timer_rate(vmx_misc))
130562306a36Sopenharmony_ci		return -EINVAL;
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_ci	if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
130862306a36Sopenharmony_ci		return -EINVAL;
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ci	if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
131162306a36Sopenharmony_ci		return -EINVAL;
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
131462306a36Sopenharmony_ci		return -EINVAL;
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci	vmx->nested.msrs.misc_low = data;
131762306a36Sopenharmony_ci	vmx->nested.msrs.misc_high = data >> 32;
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	return 0;
132062306a36Sopenharmony_ci}
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_cistatic int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
132362306a36Sopenharmony_ci{
132462306a36Sopenharmony_ci	u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
132562306a36Sopenharmony_ci					       vmcs_config.nested.vpid_caps);
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	/* Every bit is either reserved or a feature bit. */
132862306a36Sopenharmony_ci	if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
132962306a36Sopenharmony_ci		return -EINVAL;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	vmx->nested.msrs.ept_caps = data;
133262306a36Sopenharmony_ci	vmx->nested.msrs.vpid_caps = data >> 32;
133362306a36Sopenharmony_ci	return 0;
133462306a36Sopenharmony_ci}
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_cistatic u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
133762306a36Sopenharmony_ci{
133862306a36Sopenharmony_ci	switch (msr_index) {
133962306a36Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED0:
134062306a36Sopenharmony_ci		return &msrs->cr0_fixed0;
134162306a36Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED0:
134262306a36Sopenharmony_ci		return &msrs->cr4_fixed0;
134362306a36Sopenharmony_ci	default:
134462306a36Sopenharmony_ci		BUG();
134562306a36Sopenharmony_ci	}
134662306a36Sopenharmony_ci}
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_cistatic int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
134962306a36Sopenharmony_ci{
135062306a36Sopenharmony_ci	const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci	/*
135362306a36Sopenharmony_ci	 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
135462306a36Sopenharmony_ci	 * must be 1 in the restored value.
135562306a36Sopenharmony_ci	 */
135662306a36Sopenharmony_ci	if (!is_bitwise_subset(data, *msr, -1ULL))
135762306a36Sopenharmony_ci		return -EINVAL;
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	*vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
136062306a36Sopenharmony_ci	return 0;
136162306a36Sopenharmony_ci}
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_ci/*
136462306a36Sopenharmony_ci * Called when userspace is restoring VMX MSRs.
136562306a36Sopenharmony_ci *
136662306a36Sopenharmony_ci * Returns 0 on success, non-0 otherwise.
136762306a36Sopenharmony_ci */
136862306a36Sopenharmony_ciint vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
136962306a36Sopenharmony_ci{
137062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
137162306a36Sopenharmony_ci
137262306a36Sopenharmony_ci	/*
137362306a36Sopenharmony_ci	 * Don't allow changes to the VMX capability MSRs while the vCPU
137462306a36Sopenharmony_ci	 * is in VMX operation.
137562306a36Sopenharmony_ci	 */
137662306a36Sopenharmony_ci	if (vmx->nested.vmxon)
137762306a36Sopenharmony_ci		return -EBUSY;
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	switch (msr_index) {
138062306a36Sopenharmony_ci	case MSR_IA32_VMX_BASIC:
138162306a36Sopenharmony_ci		return vmx_restore_vmx_basic(vmx, data);
138262306a36Sopenharmony_ci	case MSR_IA32_VMX_PINBASED_CTLS:
138362306a36Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS:
138462306a36Sopenharmony_ci	case MSR_IA32_VMX_EXIT_CTLS:
138562306a36Sopenharmony_ci	case MSR_IA32_VMX_ENTRY_CTLS:
138662306a36Sopenharmony_ci		/*
138762306a36Sopenharmony_ci		 * The "non-true" VMX capability MSRs are generated from the
138862306a36Sopenharmony_ci		 * "true" MSRs, so we do not support restoring them directly.
138962306a36Sopenharmony_ci		 *
139062306a36Sopenharmony_ci		 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
139162306a36Sopenharmony_ci		 * should restore the "true" MSRs with the must-be-1 bits
139262306a36Sopenharmony_ci		 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
139362306a36Sopenharmony_ci		 * DEFAULT SETTINGS".
139462306a36Sopenharmony_ci		 */
139562306a36Sopenharmony_ci		return -EINVAL;
139662306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
139762306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
139862306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
139962306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
140062306a36Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS2:
140162306a36Sopenharmony_ci		return vmx_restore_control_msr(vmx, msr_index, data);
140262306a36Sopenharmony_ci	case MSR_IA32_VMX_MISC:
140362306a36Sopenharmony_ci		return vmx_restore_vmx_misc(vmx, data);
140462306a36Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED0:
140562306a36Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED0:
140662306a36Sopenharmony_ci		return vmx_restore_fixed0_msr(vmx, msr_index, data);
140762306a36Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED1:
140862306a36Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED1:
140962306a36Sopenharmony_ci		/*
141062306a36Sopenharmony_ci		 * These MSRs are generated based on the vCPU's CPUID, so we
141162306a36Sopenharmony_ci		 * do not support restoring them directly.
141262306a36Sopenharmony_ci		 */
141362306a36Sopenharmony_ci		return -EINVAL;
141462306a36Sopenharmony_ci	case MSR_IA32_VMX_EPT_VPID_CAP:
141562306a36Sopenharmony_ci		return vmx_restore_vmx_ept_vpid_cap(vmx, data);
141662306a36Sopenharmony_ci	case MSR_IA32_VMX_VMCS_ENUM:
141762306a36Sopenharmony_ci		vmx->nested.msrs.vmcs_enum = data;
141862306a36Sopenharmony_ci		return 0;
141962306a36Sopenharmony_ci	case MSR_IA32_VMX_VMFUNC:
142062306a36Sopenharmony_ci		if (data & ~vmcs_config.nested.vmfunc_controls)
142162306a36Sopenharmony_ci			return -EINVAL;
142262306a36Sopenharmony_ci		vmx->nested.msrs.vmfunc_controls = data;
142362306a36Sopenharmony_ci		return 0;
142462306a36Sopenharmony_ci	default:
142562306a36Sopenharmony_ci		/*
142662306a36Sopenharmony_ci		 * The rest of the VMX capability MSRs do not support restore.
142762306a36Sopenharmony_ci		 */
142862306a36Sopenharmony_ci		return -EINVAL;
142962306a36Sopenharmony_ci	}
143062306a36Sopenharmony_ci}
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci/* Returns 0 on success, non-0 otherwise. */
143362306a36Sopenharmony_ciint vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
143462306a36Sopenharmony_ci{
143562306a36Sopenharmony_ci	switch (msr_index) {
143662306a36Sopenharmony_ci	case MSR_IA32_VMX_BASIC:
143762306a36Sopenharmony_ci		*pdata = msrs->basic;
143862306a36Sopenharmony_ci		break;
143962306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
144062306a36Sopenharmony_ci	case MSR_IA32_VMX_PINBASED_CTLS:
144162306a36Sopenharmony_ci		*pdata = vmx_control_msr(
144262306a36Sopenharmony_ci			msrs->pinbased_ctls_low,
144362306a36Sopenharmony_ci			msrs->pinbased_ctls_high);
144462306a36Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
144562306a36Sopenharmony_ci			*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
144662306a36Sopenharmony_ci		break;
144762306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
144862306a36Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS:
144962306a36Sopenharmony_ci		*pdata = vmx_control_msr(
145062306a36Sopenharmony_ci			msrs->procbased_ctls_low,
145162306a36Sopenharmony_ci			msrs->procbased_ctls_high);
145262306a36Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
145362306a36Sopenharmony_ci			*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
145462306a36Sopenharmony_ci		break;
145562306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
145662306a36Sopenharmony_ci	case MSR_IA32_VMX_EXIT_CTLS:
145762306a36Sopenharmony_ci		*pdata = vmx_control_msr(
145862306a36Sopenharmony_ci			msrs->exit_ctls_low,
145962306a36Sopenharmony_ci			msrs->exit_ctls_high);
146062306a36Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
146162306a36Sopenharmony_ci			*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
146262306a36Sopenharmony_ci		break;
146362306a36Sopenharmony_ci	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
146462306a36Sopenharmony_ci	case MSR_IA32_VMX_ENTRY_CTLS:
146562306a36Sopenharmony_ci		*pdata = vmx_control_msr(
146662306a36Sopenharmony_ci			msrs->entry_ctls_low,
146762306a36Sopenharmony_ci			msrs->entry_ctls_high);
146862306a36Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
146962306a36Sopenharmony_ci			*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
147062306a36Sopenharmony_ci		break;
147162306a36Sopenharmony_ci	case MSR_IA32_VMX_MISC:
147262306a36Sopenharmony_ci		*pdata = vmx_control_msr(
147362306a36Sopenharmony_ci			msrs->misc_low,
147462306a36Sopenharmony_ci			msrs->misc_high);
147562306a36Sopenharmony_ci		break;
147662306a36Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED0:
147762306a36Sopenharmony_ci		*pdata = msrs->cr0_fixed0;
147862306a36Sopenharmony_ci		break;
147962306a36Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED1:
148062306a36Sopenharmony_ci		*pdata = msrs->cr0_fixed1;
148162306a36Sopenharmony_ci		break;
148262306a36Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED0:
148362306a36Sopenharmony_ci		*pdata = msrs->cr4_fixed0;
148462306a36Sopenharmony_ci		break;
148562306a36Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED1:
148662306a36Sopenharmony_ci		*pdata = msrs->cr4_fixed1;
148762306a36Sopenharmony_ci		break;
148862306a36Sopenharmony_ci	case MSR_IA32_VMX_VMCS_ENUM:
148962306a36Sopenharmony_ci		*pdata = msrs->vmcs_enum;
149062306a36Sopenharmony_ci		break;
149162306a36Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS2:
149262306a36Sopenharmony_ci		*pdata = vmx_control_msr(
149362306a36Sopenharmony_ci			msrs->secondary_ctls_low,
149462306a36Sopenharmony_ci			msrs->secondary_ctls_high);
149562306a36Sopenharmony_ci		break;
149662306a36Sopenharmony_ci	case MSR_IA32_VMX_EPT_VPID_CAP:
149762306a36Sopenharmony_ci		*pdata = msrs->ept_caps |
149862306a36Sopenharmony_ci			((u64)msrs->vpid_caps << 32);
149962306a36Sopenharmony_ci		break;
150062306a36Sopenharmony_ci	case MSR_IA32_VMX_VMFUNC:
150162306a36Sopenharmony_ci		*pdata = msrs->vmfunc_controls;
150262306a36Sopenharmony_ci		break;
150362306a36Sopenharmony_ci	default:
150462306a36Sopenharmony_ci		return 1;
150562306a36Sopenharmony_ci	}
150662306a36Sopenharmony_ci
150762306a36Sopenharmony_ci	return 0;
150862306a36Sopenharmony_ci}
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ci/*
151162306a36Sopenharmony_ci * Copy the writable VMCS shadow fields back to the VMCS12, in case they have
151262306a36Sopenharmony_ci * been modified by the L1 guest.  Note, "writable" in this context means
151362306a36Sopenharmony_ci * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
151462306a36Sopenharmony_ci * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
151562306a36Sopenharmony_ci * VM-exit information fields (which are actually writable if the vCPU is
151662306a36Sopenharmony_ci * configured to support "VMWRITE to any supported field in the VMCS").
151762306a36Sopenharmony_ci */
151862306a36Sopenharmony_cistatic void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
151962306a36Sopenharmony_ci{
152062306a36Sopenharmony_ci	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
152162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
152262306a36Sopenharmony_ci	struct shadow_vmcs_field field;
152362306a36Sopenharmony_ci	unsigned long val;
152462306a36Sopenharmony_ci	int i;
152562306a36Sopenharmony_ci
152662306a36Sopenharmony_ci	if (WARN_ON(!shadow_vmcs))
152762306a36Sopenharmony_ci		return;
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	preempt_disable();
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_ci	vmcs_load(shadow_vmcs);
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	for (i = 0; i < max_shadow_read_write_fields; i++) {
153462306a36Sopenharmony_ci		field = shadow_read_write_fields[i];
153562306a36Sopenharmony_ci		val = __vmcs_readl(field.encoding);
153662306a36Sopenharmony_ci		vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
153762306a36Sopenharmony_ci	}
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	vmcs_clear(shadow_vmcs);
154062306a36Sopenharmony_ci	vmcs_load(vmx->loaded_vmcs->vmcs);
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	preempt_enable();
154362306a36Sopenharmony_ci}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_cistatic void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
154662306a36Sopenharmony_ci{
154762306a36Sopenharmony_ci	const struct shadow_vmcs_field *fields[] = {
154862306a36Sopenharmony_ci		shadow_read_write_fields,
154962306a36Sopenharmony_ci		shadow_read_only_fields
155062306a36Sopenharmony_ci	};
155162306a36Sopenharmony_ci	const int max_fields[] = {
155262306a36Sopenharmony_ci		max_shadow_read_write_fields,
155362306a36Sopenharmony_ci		max_shadow_read_only_fields
155462306a36Sopenharmony_ci	};
155562306a36Sopenharmony_ci	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
155662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
155762306a36Sopenharmony_ci	struct shadow_vmcs_field field;
155862306a36Sopenharmony_ci	unsigned long val;
155962306a36Sopenharmony_ci	int i, q;
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_ci	if (WARN_ON(!shadow_vmcs))
156262306a36Sopenharmony_ci		return;
156362306a36Sopenharmony_ci
156462306a36Sopenharmony_ci	vmcs_load(shadow_vmcs);
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci	for (q = 0; q < ARRAY_SIZE(fields); q++) {
156762306a36Sopenharmony_ci		for (i = 0; i < max_fields[q]; i++) {
156862306a36Sopenharmony_ci			field = fields[q][i];
156962306a36Sopenharmony_ci			val = vmcs12_read_any(vmcs12, field.encoding,
157062306a36Sopenharmony_ci					      field.offset);
157162306a36Sopenharmony_ci			__vmcs_writel(field.encoding, val);
157262306a36Sopenharmony_ci		}
157362306a36Sopenharmony_ci	}
157462306a36Sopenharmony_ci
157562306a36Sopenharmony_ci	vmcs_clear(shadow_vmcs);
157662306a36Sopenharmony_ci	vmcs_load(vmx->loaded_vmcs->vmcs);
157762306a36Sopenharmony_ci}
157862306a36Sopenharmony_ci
157962306a36Sopenharmony_cistatic void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
158062306a36Sopenharmony_ci{
158162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
158262306a36Sopenharmony_ci	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
158362306a36Sopenharmony_ci	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu);
158462306a36Sopenharmony_ci
158562306a36Sopenharmony_ci	/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
158662306a36Sopenharmony_ci	vmcs12->tpr_threshold = evmcs->tpr_threshold;
158762306a36Sopenharmony_ci	vmcs12->guest_rip = evmcs->guest_rip;
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
159062306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL))) {
159162306a36Sopenharmony_ci		hv_vcpu->nested.pa_page_gpa = evmcs->partition_assist_page;
159262306a36Sopenharmony_ci		hv_vcpu->nested.vm_id = evmcs->hv_vm_id;
159362306a36Sopenharmony_ci		hv_vcpu->nested.vp_id = evmcs->hv_vp_id;
159462306a36Sopenharmony_ci	}
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
159762306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
159862306a36Sopenharmony_ci		vmcs12->guest_rsp = evmcs->guest_rsp;
159962306a36Sopenharmony_ci		vmcs12->guest_rflags = evmcs->guest_rflags;
160062306a36Sopenharmony_ci		vmcs12->guest_interruptibility_info =
160162306a36Sopenharmony_ci			evmcs->guest_interruptibility_info;
160262306a36Sopenharmony_ci		/*
160362306a36Sopenharmony_ci		 * Not present in struct vmcs12:
160462306a36Sopenharmony_ci		 * vmcs12->guest_ssp = evmcs->guest_ssp;
160562306a36Sopenharmony_ci		 */
160662306a36Sopenharmony_ci	}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
160962306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
161062306a36Sopenharmony_ci		vmcs12->cpu_based_vm_exec_control =
161162306a36Sopenharmony_ci			evmcs->cpu_based_vm_exec_control;
161262306a36Sopenharmony_ci	}
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
161562306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
161662306a36Sopenharmony_ci		vmcs12->exception_bitmap = evmcs->exception_bitmap;
161762306a36Sopenharmony_ci	}
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
162062306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
162162306a36Sopenharmony_ci		vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
162262306a36Sopenharmony_ci	}
162362306a36Sopenharmony_ci
162462306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
162562306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
162662306a36Sopenharmony_ci		vmcs12->vm_entry_intr_info_field =
162762306a36Sopenharmony_ci			evmcs->vm_entry_intr_info_field;
162862306a36Sopenharmony_ci		vmcs12->vm_entry_exception_error_code =
162962306a36Sopenharmony_ci			evmcs->vm_entry_exception_error_code;
163062306a36Sopenharmony_ci		vmcs12->vm_entry_instruction_len =
163162306a36Sopenharmony_ci			evmcs->vm_entry_instruction_len;
163262306a36Sopenharmony_ci	}
163362306a36Sopenharmony_ci
163462306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
163562306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
163662306a36Sopenharmony_ci		vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
163762306a36Sopenharmony_ci		vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
163862306a36Sopenharmony_ci		vmcs12->host_cr0 = evmcs->host_cr0;
163962306a36Sopenharmony_ci		vmcs12->host_cr3 = evmcs->host_cr3;
164062306a36Sopenharmony_ci		vmcs12->host_cr4 = evmcs->host_cr4;
164162306a36Sopenharmony_ci		vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
164262306a36Sopenharmony_ci		vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
164362306a36Sopenharmony_ci		vmcs12->host_rip = evmcs->host_rip;
164462306a36Sopenharmony_ci		vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
164562306a36Sopenharmony_ci		vmcs12->host_es_selector = evmcs->host_es_selector;
164662306a36Sopenharmony_ci		vmcs12->host_cs_selector = evmcs->host_cs_selector;
164762306a36Sopenharmony_ci		vmcs12->host_ss_selector = evmcs->host_ss_selector;
164862306a36Sopenharmony_ci		vmcs12->host_ds_selector = evmcs->host_ds_selector;
164962306a36Sopenharmony_ci		vmcs12->host_fs_selector = evmcs->host_fs_selector;
165062306a36Sopenharmony_ci		vmcs12->host_gs_selector = evmcs->host_gs_selector;
165162306a36Sopenharmony_ci		vmcs12->host_tr_selector = evmcs->host_tr_selector;
165262306a36Sopenharmony_ci		vmcs12->host_ia32_perf_global_ctrl = evmcs->host_ia32_perf_global_ctrl;
165362306a36Sopenharmony_ci		/*
165462306a36Sopenharmony_ci		 * Not present in struct vmcs12:
165562306a36Sopenharmony_ci		 * vmcs12->host_ia32_s_cet = evmcs->host_ia32_s_cet;
165662306a36Sopenharmony_ci		 * vmcs12->host_ssp = evmcs->host_ssp;
165762306a36Sopenharmony_ci		 * vmcs12->host_ia32_int_ssp_table_addr = evmcs->host_ia32_int_ssp_table_addr;
165862306a36Sopenharmony_ci		 */
165962306a36Sopenharmony_ci	}
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
166262306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
166362306a36Sopenharmony_ci		vmcs12->pin_based_vm_exec_control =
166462306a36Sopenharmony_ci			evmcs->pin_based_vm_exec_control;
166562306a36Sopenharmony_ci		vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
166662306a36Sopenharmony_ci		vmcs12->secondary_vm_exec_control =
166762306a36Sopenharmony_ci			evmcs->secondary_vm_exec_control;
166862306a36Sopenharmony_ci	}
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
167162306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
167262306a36Sopenharmony_ci		vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
167362306a36Sopenharmony_ci		vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
167462306a36Sopenharmony_ci	}
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
167762306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
167862306a36Sopenharmony_ci		vmcs12->msr_bitmap = evmcs->msr_bitmap;
167962306a36Sopenharmony_ci	}
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
168262306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
168362306a36Sopenharmony_ci		vmcs12->guest_es_base = evmcs->guest_es_base;
168462306a36Sopenharmony_ci		vmcs12->guest_cs_base = evmcs->guest_cs_base;
168562306a36Sopenharmony_ci		vmcs12->guest_ss_base = evmcs->guest_ss_base;
168662306a36Sopenharmony_ci		vmcs12->guest_ds_base = evmcs->guest_ds_base;
168762306a36Sopenharmony_ci		vmcs12->guest_fs_base = evmcs->guest_fs_base;
168862306a36Sopenharmony_ci		vmcs12->guest_gs_base = evmcs->guest_gs_base;
168962306a36Sopenharmony_ci		vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
169062306a36Sopenharmony_ci		vmcs12->guest_tr_base = evmcs->guest_tr_base;
169162306a36Sopenharmony_ci		vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
169262306a36Sopenharmony_ci		vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
169362306a36Sopenharmony_ci		vmcs12->guest_es_limit = evmcs->guest_es_limit;
169462306a36Sopenharmony_ci		vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
169562306a36Sopenharmony_ci		vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
169662306a36Sopenharmony_ci		vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
169762306a36Sopenharmony_ci		vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
169862306a36Sopenharmony_ci		vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
169962306a36Sopenharmony_ci		vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
170062306a36Sopenharmony_ci		vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
170162306a36Sopenharmony_ci		vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
170262306a36Sopenharmony_ci		vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
170362306a36Sopenharmony_ci		vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
170462306a36Sopenharmony_ci		vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
170562306a36Sopenharmony_ci		vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
170662306a36Sopenharmony_ci		vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
170762306a36Sopenharmony_ci		vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
170862306a36Sopenharmony_ci		vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
170962306a36Sopenharmony_ci		vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
171062306a36Sopenharmony_ci		vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
171162306a36Sopenharmony_ci		vmcs12->guest_es_selector = evmcs->guest_es_selector;
171262306a36Sopenharmony_ci		vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
171362306a36Sopenharmony_ci		vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
171462306a36Sopenharmony_ci		vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
171562306a36Sopenharmony_ci		vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
171662306a36Sopenharmony_ci		vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
171762306a36Sopenharmony_ci		vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
171862306a36Sopenharmony_ci		vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
171962306a36Sopenharmony_ci	}
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
172262306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
172362306a36Sopenharmony_ci		vmcs12->tsc_offset = evmcs->tsc_offset;
172462306a36Sopenharmony_ci		vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
172562306a36Sopenharmony_ci		vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
172662306a36Sopenharmony_ci		vmcs12->encls_exiting_bitmap = evmcs->encls_exiting_bitmap;
172762306a36Sopenharmony_ci		vmcs12->tsc_multiplier = evmcs->tsc_multiplier;
172862306a36Sopenharmony_ci	}
172962306a36Sopenharmony_ci
173062306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
173162306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
173262306a36Sopenharmony_ci		vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
173362306a36Sopenharmony_ci		vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
173462306a36Sopenharmony_ci		vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
173562306a36Sopenharmony_ci		vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
173662306a36Sopenharmony_ci		vmcs12->guest_cr0 = evmcs->guest_cr0;
173762306a36Sopenharmony_ci		vmcs12->guest_cr3 = evmcs->guest_cr3;
173862306a36Sopenharmony_ci		vmcs12->guest_cr4 = evmcs->guest_cr4;
173962306a36Sopenharmony_ci		vmcs12->guest_dr7 = evmcs->guest_dr7;
174062306a36Sopenharmony_ci	}
174162306a36Sopenharmony_ci
174262306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
174362306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
174462306a36Sopenharmony_ci		vmcs12->host_fs_base = evmcs->host_fs_base;
174562306a36Sopenharmony_ci		vmcs12->host_gs_base = evmcs->host_gs_base;
174662306a36Sopenharmony_ci		vmcs12->host_tr_base = evmcs->host_tr_base;
174762306a36Sopenharmony_ci		vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
174862306a36Sopenharmony_ci		vmcs12->host_idtr_base = evmcs->host_idtr_base;
174962306a36Sopenharmony_ci		vmcs12->host_rsp = evmcs->host_rsp;
175062306a36Sopenharmony_ci	}
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
175362306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
175462306a36Sopenharmony_ci		vmcs12->ept_pointer = evmcs->ept_pointer;
175562306a36Sopenharmony_ci		vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
175662306a36Sopenharmony_ci	}
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci	if (unlikely(!(hv_clean_fields &
175962306a36Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
176062306a36Sopenharmony_ci		vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
176162306a36Sopenharmony_ci		vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
176262306a36Sopenharmony_ci		vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
176362306a36Sopenharmony_ci		vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
176462306a36Sopenharmony_ci		vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
176562306a36Sopenharmony_ci		vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
176662306a36Sopenharmony_ci		vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
176762306a36Sopenharmony_ci		vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
176862306a36Sopenharmony_ci		vmcs12->guest_pending_dbg_exceptions =
176962306a36Sopenharmony_ci			evmcs->guest_pending_dbg_exceptions;
177062306a36Sopenharmony_ci		vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
177162306a36Sopenharmony_ci		vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
177262306a36Sopenharmony_ci		vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
177362306a36Sopenharmony_ci		vmcs12->guest_activity_state = evmcs->guest_activity_state;
177462306a36Sopenharmony_ci		vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
177562306a36Sopenharmony_ci		vmcs12->guest_ia32_perf_global_ctrl = evmcs->guest_ia32_perf_global_ctrl;
177662306a36Sopenharmony_ci		/*
177762306a36Sopenharmony_ci		 * Not present in struct vmcs12:
177862306a36Sopenharmony_ci		 * vmcs12->guest_ia32_s_cet = evmcs->guest_ia32_s_cet;
177962306a36Sopenharmony_ci		 * vmcs12->guest_ia32_lbr_ctl = evmcs->guest_ia32_lbr_ctl;
178062306a36Sopenharmony_ci		 * vmcs12->guest_ia32_int_ssp_table_addr = evmcs->guest_ia32_int_ssp_table_addr;
178162306a36Sopenharmony_ci		 */
178262306a36Sopenharmony_ci	}
178362306a36Sopenharmony_ci
178462306a36Sopenharmony_ci	/*
178562306a36Sopenharmony_ci	 * Not used?
178662306a36Sopenharmony_ci	 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
178762306a36Sopenharmony_ci	 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
178862306a36Sopenharmony_ci	 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
178962306a36Sopenharmony_ci	 * vmcs12->page_fault_error_code_mask =
179062306a36Sopenharmony_ci	 *		evmcs->page_fault_error_code_mask;
179162306a36Sopenharmony_ci	 * vmcs12->page_fault_error_code_match =
179262306a36Sopenharmony_ci	 *		evmcs->page_fault_error_code_match;
179362306a36Sopenharmony_ci	 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
179462306a36Sopenharmony_ci	 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
179562306a36Sopenharmony_ci	 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
179662306a36Sopenharmony_ci	 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
179762306a36Sopenharmony_ci	 */
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci	/*
180062306a36Sopenharmony_ci	 * Read only fields:
180162306a36Sopenharmony_ci	 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
180262306a36Sopenharmony_ci	 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
180362306a36Sopenharmony_ci	 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
180462306a36Sopenharmony_ci	 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
180562306a36Sopenharmony_ci	 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
180662306a36Sopenharmony_ci	 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
180762306a36Sopenharmony_ci	 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
180862306a36Sopenharmony_ci	 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
180962306a36Sopenharmony_ci	 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
181062306a36Sopenharmony_ci	 * vmcs12->exit_qualification = evmcs->exit_qualification;
181162306a36Sopenharmony_ci	 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
181262306a36Sopenharmony_ci	 *
181362306a36Sopenharmony_ci	 * Not present in struct vmcs12:
181462306a36Sopenharmony_ci	 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
181562306a36Sopenharmony_ci	 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
181662306a36Sopenharmony_ci	 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
181762306a36Sopenharmony_ci	 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
181862306a36Sopenharmony_ci	 */
181962306a36Sopenharmony_ci
182062306a36Sopenharmony_ci	return;
182162306a36Sopenharmony_ci}
182262306a36Sopenharmony_ci
182362306a36Sopenharmony_cistatic void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
182462306a36Sopenharmony_ci{
182562306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
182662306a36Sopenharmony_ci	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
182762306a36Sopenharmony_ci
182862306a36Sopenharmony_ci	/*
182962306a36Sopenharmony_ci	 * Should not be changed by KVM:
183062306a36Sopenharmony_ci	 *
183162306a36Sopenharmony_ci	 * evmcs->host_es_selector = vmcs12->host_es_selector;
183262306a36Sopenharmony_ci	 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
183362306a36Sopenharmony_ci	 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
183462306a36Sopenharmony_ci	 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
183562306a36Sopenharmony_ci	 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
183662306a36Sopenharmony_ci	 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
183762306a36Sopenharmony_ci	 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
183862306a36Sopenharmony_ci	 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
183962306a36Sopenharmony_ci	 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
184062306a36Sopenharmony_ci	 * evmcs->host_cr0 = vmcs12->host_cr0;
184162306a36Sopenharmony_ci	 * evmcs->host_cr3 = vmcs12->host_cr3;
184262306a36Sopenharmony_ci	 * evmcs->host_cr4 = vmcs12->host_cr4;
184362306a36Sopenharmony_ci	 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
184462306a36Sopenharmony_ci	 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
184562306a36Sopenharmony_ci	 * evmcs->host_rip = vmcs12->host_rip;
184662306a36Sopenharmony_ci	 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
184762306a36Sopenharmony_ci	 * evmcs->host_fs_base = vmcs12->host_fs_base;
184862306a36Sopenharmony_ci	 * evmcs->host_gs_base = vmcs12->host_gs_base;
184962306a36Sopenharmony_ci	 * evmcs->host_tr_base = vmcs12->host_tr_base;
185062306a36Sopenharmony_ci	 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
185162306a36Sopenharmony_ci	 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
185262306a36Sopenharmony_ci	 * evmcs->host_rsp = vmcs12->host_rsp;
185362306a36Sopenharmony_ci	 * sync_vmcs02_to_vmcs12() doesn't read these:
185462306a36Sopenharmony_ci	 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
185562306a36Sopenharmony_ci	 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
185662306a36Sopenharmony_ci	 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
185762306a36Sopenharmony_ci	 * evmcs->ept_pointer = vmcs12->ept_pointer;
185862306a36Sopenharmony_ci	 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
185962306a36Sopenharmony_ci	 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
186062306a36Sopenharmony_ci	 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
186162306a36Sopenharmony_ci	 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
186262306a36Sopenharmony_ci	 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
186362306a36Sopenharmony_ci	 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
186462306a36Sopenharmony_ci	 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
186562306a36Sopenharmony_ci	 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
186662306a36Sopenharmony_ci	 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
186762306a36Sopenharmony_ci	 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
186862306a36Sopenharmony_ci	 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
186962306a36Sopenharmony_ci	 * evmcs->page_fault_error_code_mask =
187062306a36Sopenharmony_ci	 *		vmcs12->page_fault_error_code_mask;
187162306a36Sopenharmony_ci	 * evmcs->page_fault_error_code_match =
187262306a36Sopenharmony_ci	 *		vmcs12->page_fault_error_code_match;
187362306a36Sopenharmony_ci	 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
187462306a36Sopenharmony_ci	 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
187562306a36Sopenharmony_ci	 * evmcs->tsc_offset = vmcs12->tsc_offset;
187662306a36Sopenharmony_ci	 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
187762306a36Sopenharmony_ci	 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
187862306a36Sopenharmony_ci	 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
187962306a36Sopenharmony_ci	 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
188062306a36Sopenharmony_ci	 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
188162306a36Sopenharmony_ci	 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
188262306a36Sopenharmony_ci	 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
188362306a36Sopenharmony_ci	 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
188462306a36Sopenharmony_ci	 * evmcs->guest_ia32_perf_global_ctrl = vmcs12->guest_ia32_perf_global_ctrl;
188562306a36Sopenharmony_ci	 * evmcs->host_ia32_perf_global_ctrl = vmcs12->host_ia32_perf_global_ctrl;
188662306a36Sopenharmony_ci	 * evmcs->encls_exiting_bitmap = vmcs12->encls_exiting_bitmap;
188762306a36Sopenharmony_ci	 * evmcs->tsc_multiplier = vmcs12->tsc_multiplier;
188862306a36Sopenharmony_ci	 *
188962306a36Sopenharmony_ci	 * Not present in struct vmcs12:
189062306a36Sopenharmony_ci	 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
189162306a36Sopenharmony_ci	 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
189262306a36Sopenharmony_ci	 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
189362306a36Sopenharmony_ci	 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
189462306a36Sopenharmony_ci	 * evmcs->host_ia32_s_cet = vmcs12->host_ia32_s_cet;
189562306a36Sopenharmony_ci	 * evmcs->host_ssp = vmcs12->host_ssp;
189662306a36Sopenharmony_ci	 * evmcs->host_ia32_int_ssp_table_addr = vmcs12->host_ia32_int_ssp_table_addr;
189762306a36Sopenharmony_ci	 * evmcs->guest_ia32_s_cet = vmcs12->guest_ia32_s_cet;
189862306a36Sopenharmony_ci	 * evmcs->guest_ia32_lbr_ctl = vmcs12->guest_ia32_lbr_ctl;
189962306a36Sopenharmony_ci	 * evmcs->guest_ia32_int_ssp_table_addr = vmcs12->guest_ia32_int_ssp_table_addr;
190062306a36Sopenharmony_ci	 * evmcs->guest_ssp = vmcs12->guest_ssp;
190162306a36Sopenharmony_ci	 */
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci	evmcs->guest_es_selector = vmcs12->guest_es_selector;
190462306a36Sopenharmony_ci	evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
190562306a36Sopenharmony_ci	evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
190662306a36Sopenharmony_ci	evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
190762306a36Sopenharmony_ci	evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
190862306a36Sopenharmony_ci	evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
190962306a36Sopenharmony_ci	evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
191062306a36Sopenharmony_ci	evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	evmcs->guest_es_limit = vmcs12->guest_es_limit;
191362306a36Sopenharmony_ci	evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
191462306a36Sopenharmony_ci	evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
191562306a36Sopenharmony_ci	evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
191662306a36Sopenharmony_ci	evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
191762306a36Sopenharmony_ci	evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
191862306a36Sopenharmony_ci	evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
191962306a36Sopenharmony_ci	evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
192062306a36Sopenharmony_ci	evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
192162306a36Sopenharmony_ci	evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
192262306a36Sopenharmony_ci
192362306a36Sopenharmony_ci	evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
192462306a36Sopenharmony_ci	evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
192562306a36Sopenharmony_ci	evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
192662306a36Sopenharmony_ci	evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
192762306a36Sopenharmony_ci	evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
192862306a36Sopenharmony_ci	evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
192962306a36Sopenharmony_ci	evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
193062306a36Sopenharmony_ci	evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
193162306a36Sopenharmony_ci
193262306a36Sopenharmony_ci	evmcs->guest_es_base = vmcs12->guest_es_base;
193362306a36Sopenharmony_ci	evmcs->guest_cs_base = vmcs12->guest_cs_base;
193462306a36Sopenharmony_ci	evmcs->guest_ss_base = vmcs12->guest_ss_base;
193562306a36Sopenharmony_ci	evmcs->guest_ds_base = vmcs12->guest_ds_base;
193662306a36Sopenharmony_ci	evmcs->guest_fs_base = vmcs12->guest_fs_base;
193762306a36Sopenharmony_ci	evmcs->guest_gs_base = vmcs12->guest_gs_base;
193862306a36Sopenharmony_ci	evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
193962306a36Sopenharmony_ci	evmcs->guest_tr_base = vmcs12->guest_tr_base;
194062306a36Sopenharmony_ci	evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
194162306a36Sopenharmony_ci	evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ci	evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
194462306a36Sopenharmony_ci	evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_ci	evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
194762306a36Sopenharmony_ci	evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
194862306a36Sopenharmony_ci	evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
194962306a36Sopenharmony_ci	evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
195062306a36Sopenharmony_ci
195162306a36Sopenharmony_ci	evmcs->guest_pending_dbg_exceptions =
195262306a36Sopenharmony_ci		vmcs12->guest_pending_dbg_exceptions;
195362306a36Sopenharmony_ci	evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
195462306a36Sopenharmony_ci	evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
195562306a36Sopenharmony_ci
195662306a36Sopenharmony_ci	evmcs->guest_activity_state = vmcs12->guest_activity_state;
195762306a36Sopenharmony_ci	evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
195862306a36Sopenharmony_ci
195962306a36Sopenharmony_ci	evmcs->guest_cr0 = vmcs12->guest_cr0;
196062306a36Sopenharmony_ci	evmcs->guest_cr3 = vmcs12->guest_cr3;
196162306a36Sopenharmony_ci	evmcs->guest_cr4 = vmcs12->guest_cr4;
196262306a36Sopenharmony_ci	evmcs->guest_dr7 = vmcs12->guest_dr7;
196362306a36Sopenharmony_ci
196462306a36Sopenharmony_ci	evmcs->guest_physical_address = vmcs12->guest_physical_address;
196562306a36Sopenharmony_ci
196662306a36Sopenharmony_ci	evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
196762306a36Sopenharmony_ci	evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
196862306a36Sopenharmony_ci	evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
196962306a36Sopenharmony_ci	evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
197062306a36Sopenharmony_ci	evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
197162306a36Sopenharmony_ci	evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
197262306a36Sopenharmony_ci	evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
197362306a36Sopenharmony_ci	evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci	evmcs->exit_qualification = vmcs12->exit_qualification;
197662306a36Sopenharmony_ci
197762306a36Sopenharmony_ci	evmcs->guest_linear_address = vmcs12->guest_linear_address;
197862306a36Sopenharmony_ci	evmcs->guest_rsp = vmcs12->guest_rsp;
197962306a36Sopenharmony_ci	evmcs->guest_rflags = vmcs12->guest_rflags;
198062306a36Sopenharmony_ci
198162306a36Sopenharmony_ci	evmcs->guest_interruptibility_info =
198262306a36Sopenharmony_ci		vmcs12->guest_interruptibility_info;
198362306a36Sopenharmony_ci	evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
198462306a36Sopenharmony_ci	evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
198562306a36Sopenharmony_ci	evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
198662306a36Sopenharmony_ci	evmcs->vm_entry_exception_error_code =
198762306a36Sopenharmony_ci		vmcs12->vm_entry_exception_error_code;
198862306a36Sopenharmony_ci	evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
198962306a36Sopenharmony_ci
199062306a36Sopenharmony_ci	evmcs->guest_rip = vmcs12->guest_rip;
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci	evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci	return;
199562306a36Sopenharmony_ci}
199662306a36Sopenharmony_ci
199762306a36Sopenharmony_ci/*
199862306a36Sopenharmony_ci * This is an equivalent of the nested hypervisor executing the vmptrld
199962306a36Sopenharmony_ci * instruction.
200062306a36Sopenharmony_ci */
200162306a36Sopenharmony_cistatic enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
200262306a36Sopenharmony_ci	struct kvm_vcpu *vcpu, bool from_launch)
200362306a36Sopenharmony_ci{
200462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
200562306a36Sopenharmony_ci	bool evmcs_gpa_changed = false;
200662306a36Sopenharmony_ci	u64 evmcs_gpa;
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci	if (likely(!guest_cpuid_has_evmcs(vcpu)))
200962306a36Sopenharmony_ci		return EVMPTRLD_DISABLED;
201062306a36Sopenharmony_ci
201162306a36Sopenharmony_ci	evmcs_gpa = nested_get_evmptr(vcpu);
201262306a36Sopenharmony_ci	if (!evmptr_is_valid(evmcs_gpa)) {
201362306a36Sopenharmony_ci		nested_release_evmcs(vcpu);
201462306a36Sopenharmony_ci		return EVMPTRLD_DISABLED;
201562306a36Sopenharmony_ci	}
201662306a36Sopenharmony_ci
201762306a36Sopenharmony_ci	if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
201862306a36Sopenharmony_ci		vmx->nested.current_vmptr = INVALID_GPA;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci		nested_release_evmcs(vcpu);
202162306a36Sopenharmony_ci
202262306a36Sopenharmony_ci		if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
202362306a36Sopenharmony_ci				 &vmx->nested.hv_evmcs_map))
202462306a36Sopenharmony_ci			return EVMPTRLD_ERROR;
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci		vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci		/*
202962306a36Sopenharmony_ci		 * Currently, KVM only supports eVMCS version 1
203062306a36Sopenharmony_ci		 * (== KVM_EVMCS_VERSION) and thus we expect guest to set this
203162306a36Sopenharmony_ci		 * value to first u32 field of eVMCS which should specify eVMCS
203262306a36Sopenharmony_ci		 * VersionNumber.
203362306a36Sopenharmony_ci		 *
203462306a36Sopenharmony_ci		 * Guest should be aware of supported eVMCS versions by host by
203562306a36Sopenharmony_ci		 * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
203662306a36Sopenharmony_ci		 * expected to set this CPUID leaf according to the value
203762306a36Sopenharmony_ci		 * returned in vmcs_version from nested_enable_evmcs().
203862306a36Sopenharmony_ci		 *
203962306a36Sopenharmony_ci		 * However, it turns out that Microsoft Hyper-V fails to comply
204062306a36Sopenharmony_ci		 * to their own invented interface: When Hyper-V use eVMCS, it
204162306a36Sopenharmony_ci		 * just sets first u32 field of eVMCS to revision_id specified
204262306a36Sopenharmony_ci		 * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
204362306a36Sopenharmony_ci		 * which is one of the supported versions specified in
204462306a36Sopenharmony_ci		 * CPUID.0x4000000A.EAX[0:15].
204562306a36Sopenharmony_ci		 *
204662306a36Sopenharmony_ci		 * To overcome Hyper-V bug, we accept here either a supported
204762306a36Sopenharmony_ci		 * eVMCS version or VMCS12 revision_id as valid values for first
204862306a36Sopenharmony_ci		 * u32 field of eVMCS.
204962306a36Sopenharmony_ci		 */
205062306a36Sopenharmony_ci		if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
205162306a36Sopenharmony_ci		    (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
205262306a36Sopenharmony_ci			nested_release_evmcs(vcpu);
205362306a36Sopenharmony_ci			return EVMPTRLD_VMFAIL;
205462306a36Sopenharmony_ci		}
205562306a36Sopenharmony_ci
205662306a36Sopenharmony_ci		vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
205762306a36Sopenharmony_ci
205862306a36Sopenharmony_ci		evmcs_gpa_changed = true;
205962306a36Sopenharmony_ci		/*
206062306a36Sopenharmony_ci		 * Unlike normal vmcs12, enlightened vmcs12 is not fully
206162306a36Sopenharmony_ci		 * reloaded from guest's memory (read only fields, fields not
206262306a36Sopenharmony_ci		 * present in struct hv_enlightened_vmcs, ...). Make sure there
206362306a36Sopenharmony_ci		 * are no leftovers.
206462306a36Sopenharmony_ci		 */
206562306a36Sopenharmony_ci		if (from_launch) {
206662306a36Sopenharmony_ci			struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
206762306a36Sopenharmony_ci			memset(vmcs12, 0, sizeof(*vmcs12));
206862306a36Sopenharmony_ci			vmcs12->hdr.revision_id = VMCS12_REVISION;
206962306a36Sopenharmony_ci		}
207062306a36Sopenharmony_ci
207162306a36Sopenharmony_ci	}
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci	/*
207462306a36Sopenharmony_ci	 * Clean fields data can't be used on VMLAUNCH and when we switch
207562306a36Sopenharmony_ci	 * between different L2 guests as KVM keeps a single VMCS12 per L1.
207662306a36Sopenharmony_ci	 */
207762306a36Sopenharmony_ci	if (from_launch || evmcs_gpa_changed) {
207862306a36Sopenharmony_ci		vmx->nested.hv_evmcs->hv_clean_fields &=
207962306a36Sopenharmony_ci			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci		vmx->nested.force_msr_bitmap_recalc = true;
208262306a36Sopenharmony_ci	}
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci	return EVMPTRLD_SUCCEEDED;
208562306a36Sopenharmony_ci}
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_civoid nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
208862306a36Sopenharmony_ci{
208962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
209062306a36Sopenharmony_ci
209162306a36Sopenharmony_ci	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
209262306a36Sopenharmony_ci		copy_vmcs12_to_enlightened(vmx);
209362306a36Sopenharmony_ci	else
209462306a36Sopenharmony_ci		copy_vmcs12_to_shadow(vmx);
209562306a36Sopenharmony_ci
209662306a36Sopenharmony_ci	vmx->nested.need_vmcs12_to_shadow_sync = false;
209762306a36Sopenharmony_ci}
209862306a36Sopenharmony_ci
209962306a36Sopenharmony_cistatic enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
210062306a36Sopenharmony_ci{
210162306a36Sopenharmony_ci	struct vcpu_vmx *vmx =
210262306a36Sopenharmony_ci		container_of(timer, struct vcpu_vmx, nested.preemption_timer);
210362306a36Sopenharmony_ci
210462306a36Sopenharmony_ci	vmx->nested.preemption_timer_expired = true;
210562306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
210662306a36Sopenharmony_ci	kvm_vcpu_kick(&vmx->vcpu);
210762306a36Sopenharmony_ci
210862306a36Sopenharmony_ci	return HRTIMER_NORESTART;
210962306a36Sopenharmony_ci}
211062306a36Sopenharmony_ci
211162306a36Sopenharmony_cistatic u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
211262306a36Sopenharmony_ci{
211362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
211462306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_ci	u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
211762306a36Sopenharmony_ci			    VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
211862306a36Sopenharmony_ci
211962306a36Sopenharmony_ci	if (!vmx->nested.has_preemption_timer_deadline) {
212062306a36Sopenharmony_ci		vmx->nested.preemption_timer_deadline =
212162306a36Sopenharmony_ci			vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
212262306a36Sopenharmony_ci		vmx->nested.has_preemption_timer_deadline = true;
212362306a36Sopenharmony_ci	}
212462306a36Sopenharmony_ci	return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
212562306a36Sopenharmony_ci}
212662306a36Sopenharmony_ci
212762306a36Sopenharmony_cistatic void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
212862306a36Sopenharmony_ci					u64 preemption_timeout)
212962306a36Sopenharmony_ci{
213062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci	/*
213362306a36Sopenharmony_ci	 * A timer value of zero is architecturally guaranteed to cause
213462306a36Sopenharmony_ci	 * a VMExit prior to executing any instructions in the guest.
213562306a36Sopenharmony_ci	 */
213662306a36Sopenharmony_ci	if (preemption_timeout == 0) {
213762306a36Sopenharmony_ci		vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
213862306a36Sopenharmony_ci		return;
213962306a36Sopenharmony_ci	}
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci	if (vcpu->arch.virtual_tsc_khz == 0)
214262306a36Sopenharmony_ci		return;
214362306a36Sopenharmony_ci
214462306a36Sopenharmony_ci	preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
214562306a36Sopenharmony_ci	preemption_timeout *= 1000000;
214662306a36Sopenharmony_ci	do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
214762306a36Sopenharmony_ci	hrtimer_start(&vmx->nested.preemption_timer,
214862306a36Sopenharmony_ci		      ktime_add_ns(ktime_get(), preemption_timeout),
214962306a36Sopenharmony_ci		      HRTIMER_MODE_ABS_PINNED);
215062306a36Sopenharmony_ci}
215162306a36Sopenharmony_ci
215262306a36Sopenharmony_cistatic u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
215362306a36Sopenharmony_ci{
215462306a36Sopenharmony_ci	if (vmx->nested.nested_run_pending &&
215562306a36Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
215662306a36Sopenharmony_ci		return vmcs12->guest_ia32_efer;
215762306a36Sopenharmony_ci	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
215862306a36Sopenharmony_ci		return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
215962306a36Sopenharmony_ci	else
216062306a36Sopenharmony_ci		return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
216162306a36Sopenharmony_ci}
216262306a36Sopenharmony_ci
216362306a36Sopenharmony_cistatic void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
216462306a36Sopenharmony_ci{
216562306a36Sopenharmony_ci	struct kvm *kvm = vmx->vcpu.kvm;
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci	/*
216862306a36Sopenharmony_ci	 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
216962306a36Sopenharmony_ci	 * according to L0's settings (vmcs12 is irrelevant here).  Host
217062306a36Sopenharmony_ci	 * fields that come from L0 and are not constant, e.g. HOST_CR3,
217162306a36Sopenharmony_ci	 * will be set as needed prior to VMLAUNCH/VMRESUME.
217262306a36Sopenharmony_ci	 */
217362306a36Sopenharmony_ci	if (vmx->nested.vmcs02_initialized)
217462306a36Sopenharmony_ci		return;
217562306a36Sopenharmony_ci	vmx->nested.vmcs02_initialized = true;
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci	/*
217862306a36Sopenharmony_ci	 * We don't care what the EPTP value is we just need to guarantee
217962306a36Sopenharmony_ci	 * it's valid so we don't get a false positive when doing early
218062306a36Sopenharmony_ci	 * consistency checks.
218162306a36Sopenharmony_ci	 */
218262306a36Sopenharmony_ci	if (enable_ept && nested_early_check)
218362306a36Sopenharmony_ci		vmcs_write64(EPT_POINTER,
218462306a36Sopenharmony_ci			     construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
218562306a36Sopenharmony_ci
218662306a36Sopenharmony_ci	/* All VMFUNCs are currently emulated through L0 vmexits.  */
218762306a36Sopenharmony_ci	if (cpu_has_vmx_vmfunc())
218862306a36Sopenharmony_ci		vmcs_write64(VM_FUNCTION_CONTROL, 0);
218962306a36Sopenharmony_ci
219062306a36Sopenharmony_ci	if (cpu_has_vmx_posted_intr())
219162306a36Sopenharmony_ci		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci	if (cpu_has_vmx_msr_bitmap())
219462306a36Sopenharmony_ci		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci	/*
219762306a36Sopenharmony_ci	 * PML is emulated for L2, but never enabled in hardware as the MMU
219862306a36Sopenharmony_ci	 * handles A/D emulation.  Disabling PML for L2 also avoids having to
219962306a36Sopenharmony_ci	 * deal with filtering out L2 GPAs from the buffer.
220062306a36Sopenharmony_ci	 */
220162306a36Sopenharmony_ci	if (enable_pml) {
220262306a36Sopenharmony_ci		vmcs_write64(PML_ADDRESS, 0);
220362306a36Sopenharmony_ci		vmcs_write16(GUEST_PML_INDEX, -1);
220462306a36Sopenharmony_ci	}
220562306a36Sopenharmony_ci
220662306a36Sopenharmony_ci	if (cpu_has_vmx_encls_vmexit())
220762306a36Sopenharmony_ci		vmcs_write64(ENCLS_EXITING_BITMAP, INVALID_GPA);
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ci	if (kvm_notify_vmexit_enabled(kvm))
221062306a36Sopenharmony_ci		vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window);
221162306a36Sopenharmony_ci
221262306a36Sopenharmony_ci	/*
221362306a36Sopenharmony_ci	 * Set the MSR load/store lists to match L0's settings.  Only the
221462306a36Sopenharmony_ci	 * addresses are constant (for vmcs02), the counts can change based
221562306a36Sopenharmony_ci	 * on L2's behavior, e.g. switching to/from long mode.
221662306a36Sopenharmony_ci	 */
221762306a36Sopenharmony_ci	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
221862306a36Sopenharmony_ci	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
221962306a36Sopenharmony_ci	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci	vmx_set_constant_host_state(vmx);
222262306a36Sopenharmony_ci}
222362306a36Sopenharmony_ci
222462306a36Sopenharmony_cistatic void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
222562306a36Sopenharmony_ci				      struct vmcs12 *vmcs12)
222662306a36Sopenharmony_ci{
222762306a36Sopenharmony_ci	prepare_vmcs02_constant_state(vmx);
222862306a36Sopenharmony_ci
222962306a36Sopenharmony_ci	vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
223062306a36Sopenharmony_ci
223162306a36Sopenharmony_ci	if (enable_vpid) {
223262306a36Sopenharmony_ci		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
223362306a36Sopenharmony_ci			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
223462306a36Sopenharmony_ci		else
223562306a36Sopenharmony_ci			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
223662306a36Sopenharmony_ci	}
223762306a36Sopenharmony_ci}
223862306a36Sopenharmony_ci
223962306a36Sopenharmony_cistatic void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
224062306a36Sopenharmony_ci				 struct vmcs12 *vmcs12)
224162306a36Sopenharmony_ci{
224262306a36Sopenharmony_ci	u32 exec_control;
224362306a36Sopenharmony_ci	u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ci	if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
224662306a36Sopenharmony_ci		prepare_vmcs02_early_rare(vmx, vmcs12);
224762306a36Sopenharmony_ci
224862306a36Sopenharmony_ci	/*
224962306a36Sopenharmony_ci	 * PIN CONTROLS
225062306a36Sopenharmony_ci	 */
225162306a36Sopenharmony_ci	exec_control = __pin_controls_get(vmcs01);
225262306a36Sopenharmony_ci	exec_control |= (vmcs12->pin_based_vm_exec_control &
225362306a36Sopenharmony_ci			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ci	/* Posted interrupts setting is only taken from vmcs12.  */
225662306a36Sopenharmony_ci	vmx->nested.pi_pending = false;
225762306a36Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12))
225862306a36Sopenharmony_ci		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
225962306a36Sopenharmony_ci	else
226062306a36Sopenharmony_ci		exec_control &= ~PIN_BASED_POSTED_INTR;
226162306a36Sopenharmony_ci	pin_controls_set(vmx, exec_control);
226262306a36Sopenharmony_ci
226362306a36Sopenharmony_ci	/*
226462306a36Sopenharmony_ci	 * EXEC CONTROLS
226562306a36Sopenharmony_ci	 */
226662306a36Sopenharmony_ci	exec_control = __exec_controls_get(vmcs01); /* L0's desires */
226762306a36Sopenharmony_ci	exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
226862306a36Sopenharmony_ci	exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
226962306a36Sopenharmony_ci	exec_control &= ~CPU_BASED_TPR_SHADOW;
227062306a36Sopenharmony_ci	exec_control |= vmcs12->cpu_based_vm_exec_control;
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ci	vmx->nested.l1_tpr_threshold = -1;
227362306a36Sopenharmony_ci	if (exec_control & CPU_BASED_TPR_SHADOW)
227462306a36Sopenharmony_ci		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
227562306a36Sopenharmony_ci#ifdef CONFIG_X86_64
227662306a36Sopenharmony_ci	else
227762306a36Sopenharmony_ci		exec_control |= CPU_BASED_CR8_LOAD_EXITING |
227862306a36Sopenharmony_ci				CPU_BASED_CR8_STORE_EXITING;
227962306a36Sopenharmony_ci#endif
228062306a36Sopenharmony_ci
228162306a36Sopenharmony_ci	/*
228262306a36Sopenharmony_ci	 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
228362306a36Sopenharmony_ci	 * for I/O port accesses.
228462306a36Sopenharmony_ci	 */
228562306a36Sopenharmony_ci	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
228662306a36Sopenharmony_ci	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci	/*
228962306a36Sopenharmony_ci	 * This bit will be computed in nested_get_vmcs12_pages, because
229062306a36Sopenharmony_ci	 * we do not have access to L1's MSR bitmap yet.  For now, keep
229162306a36Sopenharmony_ci	 * the same bit as before, hoping to avoid multiple VMWRITEs that
229262306a36Sopenharmony_ci	 * only set/clear this bit.
229362306a36Sopenharmony_ci	 */
229462306a36Sopenharmony_ci	exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
229562306a36Sopenharmony_ci	exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_ci	exec_controls_set(vmx, exec_control);
229862306a36Sopenharmony_ci
229962306a36Sopenharmony_ci	/*
230062306a36Sopenharmony_ci	 * SECONDARY EXEC CONTROLS
230162306a36Sopenharmony_ci	 */
230262306a36Sopenharmony_ci	if (cpu_has_secondary_exec_ctrls()) {
230362306a36Sopenharmony_ci		exec_control = __secondary_exec_controls_get(vmcs01);
230462306a36Sopenharmony_ci
230562306a36Sopenharmony_ci		/* Take the following fields only from vmcs12 */
230662306a36Sopenharmony_ci		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
230762306a36Sopenharmony_ci				  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
230862306a36Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_INVPCID |
230962306a36Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_RDTSCP |
231062306a36Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_XSAVES |
231162306a36Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
231262306a36Sopenharmony_ci				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
231362306a36Sopenharmony_ci				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
231462306a36Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_VMFUNC |
231562306a36Sopenharmony_ci				  SECONDARY_EXEC_DESC);
231662306a36Sopenharmony_ci
231762306a36Sopenharmony_ci		if (nested_cpu_has(vmcs12,
231862306a36Sopenharmony_ci				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
231962306a36Sopenharmony_ci			exec_control |= vmcs12->secondary_vm_exec_control;
232062306a36Sopenharmony_ci
232162306a36Sopenharmony_ci		/* PML is emulated and never enabled in hardware for L2. */
232262306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
232362306a36Sopenharmony_ci
232462306a36Sopenharmony_ci		/* VMCS shadowing for L2 is emulated for now */
232562306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci		/*
232862306a36Sopenharmony_ci		 * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
232962306a36Sopenharmony_ci		 * will not have to rewrite the controls just for this bit.
233062306a36Sopenharmony_ci		 */
233162306a36Sopenharmony_ci		if (vmx_umip_emulated() && (vmcs12->guest_cr4 & X86_CR4_UMIP))
233262306a36Sopenharmony_ci			exec_control |= SECONDARY_EXEC_DESC;
233362306a36Sopenharmony_ci
233462306a36Sopenharmony_ci		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
233562306a36Sopenharmony_ci			vmcs_write16(GUEST_INTR_STATUS,
233662306a36Sopenharmony_ci				vmcs12->guest_intr_status);
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_ci		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
233962306a36Sopenharmony_ci		    exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
234062306a36Sopenharmony_ci
234162306a36Sopenharmony_ci		if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
234262306a36Sopenharmony_ci			vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci		secondary_exec_controls_set(vmx, exec_control);
234562306a36Sopenharmony_ci	}
234662306a36Sopenharmony_ci
234762306a36Sopenharmony_ci	/*
234862306a36Sopenharmony_ci	 * ENTRY CONTROLS
234962306a36Sopenharmony_ci	 *
235062306a36Sopenharmony_ci	 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
235162306a36Sopenharmony_ci	 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
235262306a36Sopenharmony_ci	 * on the related bits (if supported by the CPU) in the hope that
235362306a36Sopenharmony_ci	 * we can avoid VMWrites during vmx_set_efer().
235462306a36Sopenharmony_ci	 *
235562306a36Sopenharmony_ci	 * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is
235662306a36Sopenharmony_ci	 * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to
235762306a36Sopenharmony_ci	 * do the same for L2.
235862306a36Sopenharmony_ci	 */
235962306a36Sopenharmony_ci	exec_control = __vm_entry_controls_get(vmcs01);
236062306a36Sopenharmony_ci	exec_control |= (vmcs12->vm_entry_controls &
236162306a36Sopenharmony_ci			 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
236262306a36Sopenharmony_ci	exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
236362306a36Sopenharmony_ci	if (cpu_has_load_ia32_efer()) {
236462306a36Sopenharmony_ci		if (guest_efer & EFER_LMA)
236562306a36Sopenharmony_ci			exec_control |= VM_ENTRY_IA32E_MODE;
236662306a36Sopenharmony_ci		if (guest_efer != host_efer)
236762306a36Sopenharmony_ci			exec_control |= VM_ENTRY_LOAD_IA32_EFER;
236862306a36Sopenharmony_ci	}
236962306a36Sopenharmony_ci	vm_entry_controls_set(vmx, exec_control);
237062306a36Sopenharmony_ci
237162306a36Sopenharmony_ci	/*
237262306a36Sopenharmony_ci	 * EXIT CONTROLS
237362306a36Sopenharmony_ci	 *
237462306a36Sopenharmony_ci	 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
237562306a36Sopenharmony_ci	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
237662306a36Sopenharmony_ci	 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
237762306a36Sopenharmony_ci	 */
237862306a36Sopenharmony_ci	exec_control = __vm_exit_controls_get(vmcs01);
237962306a36Sopenharmony_ci	if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
238062306a36Sopenharmony_ci		exec_control |= VM_EXIT_LOAD_IA32_EFER;
238162306a36Sopenharmony_ci	else
238262306a36Sopenharmony_ci		exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
238362306a36Sopenharmony_ci	vm_exit_controls_set(vmx, exec_control);
238462306a36Sopenharmony_ci
238562306a36Sopenharmony_ci	/*
238662306a36Sopenharmony_ci	 * Interrupt/Exception Fields
238762306a36Sopenharmony_ci	 */
238862306a36Sopenharmony_ci	if (vmx->nested.nested_run_pending) {
238962306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
239062306a36Sopenharmony_ci			     vmcs12->vm_entry_intr_info_field);
239162306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
239262306a36Sopenharmony_ci			     vmcs12->vm_entry_exception_error_code);
239362306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
239462306a36Sopenharmony_ci			     vmcs12->vm_entry_instruction_len);
239562306a36Sopenharmony_ci		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
239662306a36Sopenharmony_ci			     vmcs12->guest_interruptibility_info);
239762306a36Sopenharmony_ci		vmx->loaded_vmcs->nmi_known_unmasked =
239862306a36Sopenharmony_ci			!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
239962306a36Sopenharmony_ci	} else {
240062306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
240162306a36Sopenharmony_ci	}
240262306a36Sopenharmony_ci}
240362306a36Sopenharmony_ci
240462306a36Sopenharmony_cistatic void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
240562306a36Sopenharmony_ci{
240662306a36Sopenharmony_ci	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
240962306a36Sopenharmony_ci			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
241062306a36Sopenharmony_ci		vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
241162306a36Sopenharmony_ci		vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
241262306a36Sopenharmony_ci		vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
241362306a36Sopenharmony_ci		vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
241462306a36Sopenharmony_ci		vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
241562306a36Sopenharmony_ci		vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
241662306a36Sopenharmony_ci		vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
241762306a36Sopenharmony_ci		vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
241862306a36Sopenharmony_ci		vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
241962306a36Sopenharmony_ci		vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
242062306a36Sopenharmony_ci		vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
242162306a36Sopenharmony_ci		vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
242262306a36Sopenharmony_ci		vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
242362306a36Sopenharmony_ci		vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
242462306a36Sopenharmony_ci		vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
242562306a36Sopenharmony_ci		vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
242662306a36Sopenharmony_ci		vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
242762306a36Sopenharmony_ci		vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
242862306a36Sopenharmony_ci		vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
242962306a36Sopenharmony_ci		vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
243062306a36Sopenharmony_ci		vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
243162306a36Sopenharmony_ci		vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
243262306a36Sopenharmony_ci		vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
243362306a36Sopenharmony_ci		vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
243462306a36Sopenharmony_ci		vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
243562306a36Sopenharmony_ci		vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
243662306a36Sopenharmony_ci		vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
243762306a36Sopenharmony_ci		vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
243862306a36Sopenharmony_ci		vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
243962306a36Sopenharmony_ci		vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
244062306a36Sopenharmony_ci		vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
244162306a36Sopenharmony_ci		vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
244262306a36Sopenharmony_ci		vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
244362306a36Sopenharmony_ci		vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
244462306a36Sopenharmony_ci		vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
244562306a36Sopenharmony_ci		vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
244662306a36Sopenharmony_ci
244762306a36Sopenharmony_ci		vmx->segment_cache.bitmask = 0;
244862306a36Sopenharmony_ci	}
244962306a36Sopenharmony_ci
245062306a36Sopenharmony_ci	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
245162306a36Sopenharmony_ci			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
245262306a36Sopenharmony_ci		vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
245362306a36Sopenharmony_ci		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
245462306a36Sopenharmony_ci			    vmcs12->guest_pending_dbg_exceptions);
245562306a36Sopenharmony_ci		vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
245662306a36Sopenharmony_ci		vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
245762306a36Sopenharmony_ci
245862306a36Sopenharmony_ci		/*
245962306a36Sopenharmony_ci		 * L1 may access the L2's PDPTR, so save them to construct
246062306a36Sopenharmony_ci		 * vmcs12
246162306a36Sopenharmony_ci		 */
246262306a36Sopenharmony_ci		if (enable_ept) {
246362306a36Sopenharmony_ci			vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
246462306a36Sopenharmony_ci			vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
246562306a36Sopenharmony_ci			vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
246662306a36Sopenharmony_ci			vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
246762306a36Sopenharmony_ci		}
246862306a36Sopenharmony_ci
246962306a36Sopenharmony_ci		if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
247062306a36Sopenharmony_ci		    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
247162306a36Sopenharmony_ci			vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
247262306a36Sopenharmony_ci	}
247362306a36Sopenharmony_ci
247462306a36Sopenharmony_ci	if (nested_cpu_has_xsaves(vmcs12))
247562306a36Sopenharmony_ci		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
247662306a36Sopenharmony_ci
247762306a36Sopenharmony_ci	/*
247862306a36Sopenharmony_ci	 * Whether page-faults are trapped is determined by a combination of
247962306a36Sopenharmony_ci	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.  If L0
248062306a36Sopenharmony_ci	 * doesn't care about page faults then we should set all of these to
248162306a36Sopenharmony_ci	 * L1's desires. However, if L0 does care about (some) page faults, it
248262306a36Sopenharmony_ci	 * is not easy (if at all possible?) to merge L0 and L1's desires, we
248362306a36Sopenharmony_ci	 * simply ask to exit on each and every L2 page fault. This is done by
248462306a36Sopenharmony_ci	 * setting MASK=MATCH=0 and (see below) EB.PF=1.
248562306a36Sopenharmony_ci	 * Note that below we don't need special code to set EB.PF beyond the
248662306a36Sopenharmony_ci	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
248762306a36Sopenharmony_ci	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
248862306a36Sopenharmony_ci	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
248962306a36Sopenharmony_ci	 */
249062306a36Sopenharmony_ci	if (vmx_need_pf_intercept(&vmx->vcpu)) {
249162306a36Sopenharmony_ci		/*
249262306a36Sopenharmony_ci		 * TODO: if both L0 and L1 need the same MASK and MATCH,
249362306a36Sopenharmony_ci		 * go ahead and use it?
249462306a36Sopenharmony_ci		 */
249562306a36Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
249662306a36Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
249762306a36Sopenharmony_ci	} else {
249862306a36Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
249962306a36Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
250062306a36Sopenharmony_ci	}
250162306a36Sopenharmony_ci
250262306a36Sopenharmony_ci	if (cpu_has_vmx_apicv()) {
250362306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
250462306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
250562306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
250662306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
250762306a36Sopenharmony_ci	}
250862306a36Sopenharmony_ci
250962306a36Sopenharmony_ci	/*
251062306a36Sopenharmony_ci	 * Make sure the msr_autostore list is up to date before we set the
251162306a36Sopenharmony_ci	 * count in the vmcs02.
251262306a36Sopenharmony_ci	 */
251362306a36Sopenharmony_ci	prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
251462306a36Sopenharmony_ci
251562306a36Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
251662306a36Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
251762306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
251862306a36Sopenharmony_ci
251962306a36Sopenharmony_ci	set_cr4_guest_host_mask(vmx);
252062306a36Sopenharmony_ci}
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci/*
252362306a36Sopenharmony_ci * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
252462306a36Sopenharmony_ci * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
252562306a36Sopenharmony_ci * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
252662306a36Sopenharmony_ci * guest in a way that will both be appropriate to L1's requests, and our
252762306a36Sopenharmony_ci * needs. In addition to modifying the active vmcs (which is vmcs02), this
252862306a36Sopenharmony_ci * function also has additional necessary side-effects, like setting various
252962306a36Sopenharmony_ci * vcpu->arch fields.
253062306a36Sopenharmony_ci * Returns 0 on success, 1 on failure. Invalid state exit qualification code
253162306a36Sopenharmony_ci * is assigned to entry_failure_code on failure.
253262306a36Sopenharmony_ci */
253362306a36Sopenharmony_cistatic int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
253462306a36Sopenharmony_ci			  bool from_vmentry,
253562306a36Sopenharmony_ci			  enum vm_entry_failure_code *entry_failure_code)
253662306a36Sopenharmony_ci{
253762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
253862306a36Sopenharmony_ci	bool load_guest_pdptrs_vmcs12 = false;
253962306a36Sopenharmony_ci
254062306a36Sopenharmony_ci	if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
254162306a36Sopenharmony_ci		prepare_vmcs02_rare(vmx, vmcs12);
254262306a36Sopenharmony_ci		vmx->nested.dirty_vmcs12 = false;
254362306a36Sopenharmony_ci
254462306a36Sopenharmony_ci		load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
254562306a36Sopenharmony_ci			!(vmx->nested.hv_evmcs->hv_clean_fields &
254662306a36Sopenharmony_ci			  HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
254762306a36Sopenharmony_ci	}
254862306a36Sopenharmony_ci
254962306a36Sopenharmony_ci	if (vmx->nested.nested_run_pending &&
255062306a36Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
255162306a36Sopenharmony_ci		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
255262306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
255362306a36Sopenharmony_ci	} else {
255462306a36Sopenharmony_ci		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
255562306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
255662306a36Sopenharmony_ci	}
255762306a36Sopenharmony_ci	if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
255862306a36Sopenharmony_ci	    !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
255962306a36Sopenharmony_ci		vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
256062306a36Sopenharmony_ci	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
256162306a36Sopenharmony_ci
256262306a36Sopenharmony_ci	/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
256362306a36Sopenharmony_ci	 * bitwise-or of what L1 wants to trap for L2, and what we want to
256462306a36Sopenharmony_ci	 * trap. Note that CR0.TS also needs updating - we do this later.
256562306a36Sopenharmony_ci	 */
256662306a36Sopenharmony_ci	vmx_update_exception_bitmap(vcpu);
256762306a36Sopenharmony_ci	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
256862306a36Sopenharmony_ci	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci	if (vmx->nested.nested_run_pending &&
257162306a36Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
257262306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
257362306a36Sopenharmony_ci		vcpu->arch.pat = vmcs12->guest_ia32_pat;
257462306a36Sopenharmony_ci	} else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
257562306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
257662306a36Sopenharmony_ci	}
257762306a36Sopenharmony_ci
257862306a36Sopenharmony_ci	vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
257962306a36Sopenharmony_ci			vcpu->arch.l1_tsc_offset,
258062306a36Sopenharmony_ci			vmx_get_l2_tsc_offset(vcpu),
258162306a36Sopenharmony_ci			vmx_get_l2_tsc_multiplier(vcpu));
258262306a36Sopenharmony_ci
258362306a36Sopenharmony_ci	vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
258462306a36Sopenharmony_ci			vcpu->arch.l1_tsc_scaling_ratio,
258562306a36Sopenharmony_ci			vmx_get_l2_tsc_multiplier(vcpu));
258662306a36Sopenharmony_ci
258762306a36Sopenharmony_ci	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
258862306a36Sopenharmony_ci	if (kvm_caps.has_tsc_control)
258962306a36Sopenharmony_ci		vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
259062306a36Sopenharmony_ci
259162306a36Sopenharmony_ci	nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
259262306a36Sopenharmony_ci
259362306a36Sopenharmony_ci	if (nested_cpu_has_ept(vmcs12))
259462306a36Sopenharmony_ci		nested_ept_init_mmu_context(vcpu);
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci	/*
259762306a36Sopenharmony_ci	 * Override the CR0/CR4 read shadows after setting the effective guest
259862306a36Sopenharmony_ci	 * CR0/CR4.  The common helpers also set the shadows, but they don't
259962306a36Sopenharmony_ci	 * account for vmcs12's cr0/4_guest_host_mask.
260062306a36Sopenharmony_ci	 */
260162306a36Sopenharmony_ci	vmx_set_cr0(vcpu, vmcs12->guest_cr0);
260262306a36Sopenharmony_ci	vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
260362306a36Sopenharmony_ci
260462306a36Sopenharmony_ci	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
260562306a36Sopenharmony_ci	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
260662306a36Sopenharmony_ci
260762306a36Sopenharmony_ci	vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
260862306a36Sopenharmony_ci	/* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
260962306a36Sopenharmony_ci	vmx_set_efer(vcpu, vcpu->arch.efer);
261062306a36Sopenharmony_ci
261162306a36Sopenharmony_ci	/*
261262306a36Sopenharmony_ci	 * Guest state is invalid and unrestricted guest is disabled,
261362306a36Sopenharmony_ci	 * which means L1 attempted VMEntry to L2 with invalid state.
261462306a36Sopenharmony_ci	 * Fail the VMEntry.
261562306a36Sopenharmony_ci	 *
261662306a36Sopenharmony_ci	 * However when force loading the guest state (SMM exit or
261762306a36Sopenharmony_ci	 * loading nested state after migration, it is possible to
261862306a36Sopenharmony_ci	 * have invalid guest state now, which will be later fixed by
261962306a36Sopenharmony_ci	 * restoring L2 register state
262062306a36Sopenharmony_ci	 */
262162306a36Sopenharmony_ci	if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
262262306a36Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_DEFAULT;
262362306a36Sopenharmony_ci		return -EINVAL;
262462306a36Sopenharmony_ci	}
262562306a36Sopenharmony_ci
262662306a36Sopenharmony_ci	/* Shadow page tables on either EPT or shadow page tables. */
262762306a36Sopenharmony_ci	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
262862306a36Sopenharmony_ci				from_vmentry, entry_failure_code))
262962306a36Sopenharmony_ci		return -EINVAL;
263062306a36Sopenharmony_ci
263162306a36Sopenharmony_ci	/*
263262306a36Sopenharmony_ci	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
263362306a36Sopenharmony_ci	 * on nested VM-Exit, which can occur without actually running L2 and
263462306a36Sopenharmony_ci	 * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
263562306a36Sopenharmony_ci	 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
263662306a36Sopenharmony_ci	 * transition to HLT instead of running L2.
263762306a36Sopenharmony_ci	 */
263862306a36Sopenharmony_ci	if (enable_ept)
263962306a36Sopenharmony_ci		vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
264262306a36Sopenharmony_ci	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
264362306a36Sopenharmony_ci	    is_pae_paging(vcpu)) {
264462306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
264562306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
264662306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
264762306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
264862306a36Sopenharmony_ci	}
264962306a36Sopenharmony_ci
265062306a36Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
265162306a36Sopenharmony_ci	    kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
265262306a36Sopenharmony_ci	    WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
265362306a36Sopenharmony_ci				     vmcs12->guest_ia32_perf_global_ctrl))) {
265462306a36Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_DEFAULT;
265562306a36Sopenharmony_ci		return -EINVAL;
265662306a36Sopenharmony_ci	}
265762306a36Sopenharmony_ci
265862306a36Sopenharmony_ci	kvm_rsp_write(vcpu, vmcs12->guest_rsp);
265962306a36Sopenharmony_ci	kvm_rip_write(vcpu, vmcs12->guest_rip);
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci	/*
266262306a36Sopenharmony_ci	 * It was observed that genuine Hyper-V running in L1 doesn't reset
266362306a36Sopenharmony_ci	 * 'hv_clean_fields' by itself, it only sets the corresponding dirty
266462306a36Sopenharmony_ci	 * bits when it changes a field in eVMCS. Mark all fields as clean
266562306a36Sopenharmony_ci	 * here.
266662306a36Sopenharmony_ci	 */
266762306a36Sopenharmony_ci	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
266862306a36Sopenharmony_ci		vmx->nested.hv_evmcs->hv_clean_fields |=
266962306a36Sopenharmony_ci			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
267062306a36Sopenharmony_ci
267162306a36Sopenharmony_ci	return 0;
267262306a36Sopenharmony_ci}
267362306a36Sopenharmony_ci
267462306a36Sopenharmony_cistatic int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
267562306a36Sopenharmony_ci{
267662306a36Sopenharmony_ci	if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
267762306a36Sopenharmony_ci	       nested_cpu_has_virtual_nmis(vmcs12)))
267862306a36Sopenharmony_ci		return -EINVAL;
267962306a36Sopenharmony_ci
268062306a36Sopenharmony_ci	if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
268162306a36Sopenharmony_ci	       nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
268262306a36Sopenharmony_ci		return -EINVAL;
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	return 0;
268562306a36Sopenharmony_ci}
268662306a36Sopenharmony_ci
268762306a36Sopenharmony_cistatic bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
268862306a36Sopenharmony_ci{
268962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
269062306a36Sopenharmony_ci
269162306a36Sopenharmony_ci	/* Check for memory type validity */
269262306a36Sopenharmony_ci	switch (new_eptp & VMX_EPTP_MT_MASK) {
269362306a36Sopenharmony_ci	case VMX_EPTP_MT_UC:
269462306a36Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
269562306a36Sopenharmony_ci			return false;
269662306a36Sopenharmony_ci		break;
269762306a36Sopenharmony_ci	case VMX_EPTP_MT_WB:
269862306a36Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
269962306a36Sopenharmony_ci			return false;
270062306a36Sopenharmony_ci		break;
270162306a36Sopenharmony_ci	default:
270262306a36Sopenharmony_ci		return false;
270362306a36Sopenharmony_ci	}
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_ci	/* Page-walk levels validity. */
270662306a36Sopenharmony_ci	switch (new_eptp & VMX_EPTP_PWL_MASK) {
270762306a36Sopenharmony_ci	case VMX_EPTP_PWL_5:
270862306a36Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
270962306a36Sopenharmony_ci			return false;
271062306a36Sopenharmony_ci		break;
271162306a36Sopenharmony_ci	case VMX_EPTP_PWL_4:
271262306a36Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
271362306a36Sopenharmony_ci			return false;
271462306a36Sopenharmony_ci		break;
271562306a36Sopenharmony_ci	default:
271662306a36Sopenharmony_ci		return false;
271762306a36Sopenharmony_ci	}
271862306a36Sopenharmony_ci
271962306a36Sopenharmony_ci	/* Reserved bits should not be set */
272062306a36Sopenharmony_ci	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
272162306a36Sopenharmony_ci		return false;
272262306a36Sopenharmony_ci
272362306a36Sopenharmony_ci	/* AD, if set, should be supported */
272462306a36Sopenharmony_ci	if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
272562306a36Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
272662306a36Sopenharmony_ci			return false;
272762306a36Sopenharmony_ci	}
272862306a36Sopenharmony_ci
272962306a36Sopenharmony_ci	return true;
273062306a36Sopenharmony_ci}
273162306a36Sopenharmony_ci
273262306a36Sopenharmony_ci/*
273362306a36Sopenharmony_ci * Checks related to VM-Execution Control Fields
273462306a36Sopenharmony_ci */
273562306a36Sopenharmony_cistatic int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
273662306a36Sopenharmony_ci                                              struct vmcs12 *vmcs12)
273762306a36Sopenharmony_ci{
273862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
273962306a36Sopenharmony_ci
274062306a36Sopenharmony_ci	if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
274162306a36Sopenharmony_ci				   vmx->nested.msrs.pinbased_ctls_low,
274262306a36Sopenharmony_ci				   vmx->nested.msrs.pinbased_ctls_high)) ||
274362306a36Sopenharmony_ci	    CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
274462306a36Sopenharmony_ci				   vmx->nested.msrs.procbased_ctls_low,
274562306a36Sopenharmony_ci				   vmx->nested.msrs.procbased_ctls_high)))
274662306a36Sopenharmony_ci		return -EINVAL;
274762306a36Sopenharmony_ci
274862306a36Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
274962306a36Sopenharmony_ci	    CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
275062306a36Sopenharmony_ci				   vmx->nested.msrs.secondary_ctls_low,
275162306a36Sopenharmony_ci				   vmx->nested.msrs.secondary_ctls_high)))
275262306a36Sopenharmony_ci		return -EINVAL;
275362306a36Sopenharmony_ci
275462306a36Sopenharmony_ci	if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
275562306a36Sopenharmony_ci	    nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
275662306a36Sopenharmony_ci	    nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
275762306a36Sopenharmony_ci	    nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
275862306a36Sopenharmony_ci	    nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
275962306a36Sopenharmony_ci	    nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
276062306a36Sopenharmony_ci	    nested_vmx_check_nmi_controls(vmcs12) ||
276162306a36Sopenharmony_ci	    nested_vmx_check_pml_controls(vcpu, vmcs12) ||
276262306a36Sopenharmony_ci	    nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
276362306a36Sopenharmony_ci	    nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
276462306a36Sopenharmony_ci	    nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
276562306a36Sopenharmony_ci	    CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
276662306a36Sopenharmony_ci		return -EINVAL;
276762306a36Sopenharmony_ci
276862306a36Sopenharmony_ci	if (!nested_cpu_has_preemption_timer(vmcs12) &&
276962306a36Sopenharmony_ci	    nested_cpu_has_save_preemption_timer(vmcs12))
277062306a36Sopenharmony_ci		return -EINVAL;
277162306a36Sopenharmony_ci
277262306a36Sopenharmony_ci	if (nested_cpu_has_ept(vmcs12) &&
277362306a36Sopenharmony_ci	    CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
277462306a36Sopenharmony_ci		return -EINVAL;
277562306a36Sopenharmony_ci
277662306a36Sopenharmony_ci	if (nested_cpu_has_vmfunc(vmcs12)) {
277762306a36Sopenharmony_ci		if (CC(vmcs12->vm_function_control &
277862306a36Sopenharmony_ci		       ~vmx->nested.msrs.vmfunc_controls))
277962306a36Sopenharmony_ci			return -EINVAL;
278062306a36Sopenharmony_ci
278162306a36Sopenharmony_ci		if (nested_cpu_has_eptp_switching(vmcs12)) {
278262306a36Sopenharmony_ci			if (CC(!nested_cpu_has_ept(vmcs12)) ||
278362306a36Sopenharmony_ci			    CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
278462306a36Sopenharmony_ci				return -EINVAL;
278562306a36Sopenharmony_ci		}
278662306a36Sopenharmony_ci	}
278762306a36Sopenharmony_ci
278862306a36Sopenharmony_ci	return 0;
278962306a36Sopenharmony_ci}
279062306a36Sopenharmony_ci
279162306a36Sopenharmony_ci/*
279262306a36Sopenharmony_ci * Checks related to VM-Exit Control Fields
279362306a36Sopenharmony_ci */
279462306a36Sopenharmony_cistatic int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
279562306a36Sopenharmony_ci                                         struct vmcs12 *vmcs12)
279662306a36Sopenharmony_ci{
279762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci	if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
280062306a36Sopenharmony_ci				    vmx->nested.msrs.exit_ctls_low,
280162306a36Sopenharmony_ci				    vmx->nested.msrs.exit_ctls_high)) ||
280262306a36Sopenharmony_ci	    CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
280362306a36Sopenharmony_ci		return -EINVAL;
280462306a36Sopenharmony_ci
280562306a36Sopenharmony_ci	return 0;
280662306a36Sopenharmony_ci}
280762306a36Sopenharmony_ci
280862306a36Sopenharmony_ci/*
280962306a36Sopenharmony_ci * Checks related to VM-Entry Control Fields
281062306a36Sopenharmony_ci */
281162306a36Sopenharmony_cistatic int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
281262306a36Sopenharmony_ci					  struct vmcs12 *vmcs12)
281362306a36Sopenharmony_ci{
281462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
281562306a36Sopenharmony_ci
281662306a36Sopenharmony_ci	if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
281762306a36Sopenharmony_ci				    vmx->nested.msrs.entry_ctls_low,
281862306a36Sopenharmony_ci				    vmx->nested.msrs.entry_ctls_high)))
281962306a36Sopenharmony_ci		return -EINVAL;
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci	/*
282262306a36Sopenharmony_ci	 * From the Intel SDM, volume 3:
282362306a36Sopenharmony_ci	 * Fields relevant to VM-entry event injection must be set properly.
282462306a36Sopenharmony_ci	 * These fields are the VM-entry interruption-information field, the
282562306a36Sopenharmony_ci	 * VM-entry exception error code, and the VM-entry instruction length.
282662306a36Sopenharmony_ci	 */
282762306a36Sopenharmony_ci	if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
282862306a36Sopenharmony_ci		u32 intr_info = vmcs12->vm_entry_intr_info_field;
282962306a36Sopenharmony_ci		u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
283062306a36Sopenharmony_ci		u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
283162306a36Sopenharmony_ci		bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
283262306a36Sopenharmony_ci		bool should_have_error_code;
283362306a36Sopenharmony_ci		bool urg = nested_cpu_has2(vmcs12,
283462306a36Sopenharmony_ci					   SECONDARY_EXEC_UNRESTRICTED_GUEST);
283562306a36Sopenharmony_ci		bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
283662306a36Sopenharmony_ci
283762306a36Sopenharmony_ci		/* VM-entry interruption-info field: interruption type */
283862306a36Sopenharmony_ci		if (CC(intr_type == INTR_TYPE_RESERVED) ||
283962306a36Sopenharmony_ci		    CC(intr_type == INTR_TYPE_OTHER_EVENT &&
284062306a36Sopenharmony_ci		       !nested_cpu_supports_monitor_trap_flag(vcpu)))
284162306a36Sopenharmony_ci			return -EINVAL;
284262306a36Sopenharmony_ci
284362306a36Sopenharmony_ci		/* VM-entry interruption-info field: vector */
284462306a36Sopenharmony_ci		if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
284562306a36Sopenharmony_ci		    CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
284662306a36Sopenharmony_ci		    CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
284762306a36Sopenharmony_ci			return -EINVAL;
284862306a36Sopenharmony_ci
284962306a36Sopenharmony_ci		/* VM-entry interruption-info field: deliver error code */
285062306a36Sopenharmony_ci		should_have_error_code =
285162306a36Sopenharmony_ci			intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
285262306a36Sopenharmony_ci			x86_exception_has_error_code(vector);
285362306a36Sopenharmony_ci		if (CC(has_error_code != should_have_error_code))
285462306a36Sopenharmony_ci			return -EINVAL;
285562306a36Sopenharmony_ci
285662306a36Sopenharmony_ci		/* VM-entry exception error code */
285762306a36Sopenharmony_ci		if (CC(has_error_code &&
285862306a36Sopenharmony_ci		       vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
285962306a36Sopenharmony_ci			return -EINVAL;
286062306a36Sopenharmony_ci
286162306a36Sopenharmony_ci		/* VM-entry interruption-info field: reserved bits */
286262306a36Sopenharmony_ci		if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
286362306a36Sopenharmony_ci			return -EINVAL;
286462306a36Sopenharmony_ci
286562306a36Sopenharmony_ci		/* VM-entry instruction length */
286662306a36Sopenharmony_ci		switch (intr_type) {
286762306a36Sopenharmony_ci		case INTR_TYPE_SOFT_EXCEPTION:
286862306a36Sopenharmony_ci		case INTR_TYPE_SOFT_INTR:
286962306a36Sopenharmony_ci		case INTR_TYPE_PRIV_SW_EXCEPTION:
287062306a36Sopenharmony_ci			if (CC(vmcs12->vm_entry_instruction_len > 15) ||
287162306a36Sopenharmony_ci			    CC(vmcs12->vm_entry_instruction_len == 0 &&
287262306a36Sopenharmony_ci			    CC(!nested_cpu_has_zero_length_injection(vcpu))))
287362306a36Sopenharmony_ci				return -EINVAL;
287462306a36Sopenharmony_ci		}
287562306a36Sopenharmony_ci	}
287662306a36Sopenharmony_ci
287762306a36Sopenharmony_ci	if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
287862306a36Sopenharmony_ci		return -EINVAL;
287962306a36Sopenharmony_ci
288062306a36Sopenharmony_ci	return 0;
288162306a36Sopenharmony_ci}
288262306a36Sopenharmony_ci
288362306a36Sopenharmony_cistatic int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
288462306a36Sopenharmony_ci				     struct vmcs12 *vmcs12)
288562306a36Sopenharmony_ci{
288662306a36Sopenharmony_ci	if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
288762306a36Sopenharmony_ci	    nested_check_vm_exit_controls(vcpu, vmcs12) ||
288862306a36Sopenharmony_ci	    nested_check_vm_entry_controls(vcpu, vmcs12))
288962306a36Sopenharmony_ci		return -EINVAL;
289062306a36Sopenharmony_ci
289162306a36Sopenharmony_ci	if (guest_cpuid_has_evmcs(vcpu))
289262306a36Sopenharmony_ci		return nested_evmcs_check_controls(vmcs12);
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_ci	return 0;
289562306a36Sopenharmony_ci}
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_cistatic int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
289862306a36Sopenharmony_ci				       struct vmcs12 *vmcs12)
289962306a36Sopenharmony_ci{
290062306a36Sopenharmony_ci#ifdef CONFIG_X86_64
290162306a36Sopenharmony_ci	if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
290262306a36Sopenharmony_ci		!!(vcpu->arch.efer & EFER_LMA)))
290362306a36Sopenharmony_ci		return -EINVAL;
290462306a36Sopenharmony_ci#endif
290562306a36Sopenharmony_ci	return 0;
290662306a36Sopenharmony_ci}
290762306a36Sopenharmony_ci
290862306a36Sopenharmony_cistatic int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
290962306a36Sopenharmony_ci				       struct vmcs12 *vmcs12)
291062306a36Sopenharmony_ci{
291162306a36Sopenharmony_ci	bool ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
291262306a36Sopenharmony_ci
291362306a36Sopenharmony_ci	if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
291462306a36Sopenharmony_ci	    CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
291562306a36Sopenharmony_ci	    CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
291662306a36Sopenharmony_ci		return -EINVAL;
291762306a36Sopenharmony_ci
291862306a36Sopenharmony_ci	if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
291962306a36Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
292062306a36Sopenharmony_ci		return -EINVAL;
292162306a36Sopenharmony_ci
292262306a36Sopenharmony_ci	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
292362306a36Sopenharmony_ci	    CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
292462306a36Sopenharmony_ci		return -EINVAL;
292562306a36Sopenharmony_ci
292662306a36Sopenharmony_ci	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
292762306a36Sopenharmony_ci	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
292862306a36Sopenharmony_ci					   vmcs12->host_ia32_perf_global_ctrl)))
292962306a36Sopenharmony_ci		return -EINVAL;
293062306a36Sopenharmony_ci
293162306a36Sopenharmony_ci	if (ia32e) {
293262306a36Sopenharmony_ci		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
293362306a36Sopenharmony_ci			return -EINVAL;
293462306a36Sopenharmony_ci	} else {
293562306a36Sopenharmony_ci		if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
293662306a36Sopenharmony_ci		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
293762306a36Sopenharmony_ci		    CC((vmcs12->host_rip) >> 32))
293862306a36Sopenharmony_ci			return -EINVAL;
293962306a36Sopenharmony_ci	}
294062306a36Sopenharmony_ci
294162306a36Sopenharmony_ci	if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294262306a36Sopenharmony_ci	    CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294362306a36Sopenharmony_ci	    CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294462306a36Sopenharmony_ci	    CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294562306a36Sopenharmony_ci	    CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294662306a36Sopenharmony_ci	    CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294762306a36Sopenharmony_ci	    CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
294862306a36Sopenharmony_ci	    CC(vmcs12->host_cs_selector == 0) ||
294962306a36Sopenharmony_ci	    CC(vmcs12->host_tr_selector == 0) ||
295062306a36Sopenharmony_ci	    CC(vmcs12->host_ss_selector == 0 && !ia32e))
295162306a36Sopenharmony_ci		return -EINVAL;
295262306a36Sopenharmony_ci
295362306a36Sopenharmony_ci	if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
295462306a36Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
295562306a36Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
295662306a36Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
295762306a36Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
295862306a36Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
295962306a36Sopenharmony_ci		return -EINVAL;
296062306a36Sopenharmony_ci
296162306a36Sopenharmony_ci	/*
296262306a36Sopenharmony_ci	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
296362306a36Sopenharmony_ci	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
296462306a36Sopenharmony_ci	 * the values of the LMA and LME bits in the field must each be that of
296562306a36Sopenharmony_ci	 * the host address-space size VM-exit control.
296662306a36Sopenharmony_ci	 */
296762306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
296862306a36Sopenharmony_ci		if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
296962306a36Sopenharmony_ci		    CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
297062306a36Sopenharmony_ci		    CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
297162306a36Sopenharmony_ci			return -EINVAL;
297262306a36Sopenharmony_ci	}
297362306a36Sopenharmony_ci
297462306a36Sopenharmony_ci	return 0;
297562306a36Sopenharmony_ci}
297662306a36Sopenharmony_ci
297762306a36Sopenharmony_cistatic int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
297862306a36Sopenharmony_ci					  struct vmcs12 *vmcs12)
297962306a36Sopenharmony_ci{
298062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
298162306a36Sopenharmony_ci	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
298262306a36Sopenharmony_ci	struct vmcs_hdr hdr;
298362306a36Sopenharmony_ci
298462306a36Sopenharmony_ci	if (vmcs12->vmcs_link_pointer == INVALID_GPA)
298562306a36Sopenharmony_ci		return 0;
298662306a36Sopenharmony_ci
298762306a36Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
298862306a36Sopenharmony_ci		return -EINVAL;
298962306a36Sopenharmony_ci
299062306a36Sopenharmony_ci	if (ghc->gpa != vmcs12->vmcs_link_pointer &&
299162306a36Sopenharmony_ci	    CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
299262306a36Sopenharmony_ci					 vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
299362306a36Sopenharmony_ci                return -EINVAL;
299462306a36Sopenharmony_ci
299562306a36Sopenharmony_ci	if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
299662306a36Sopenharmony_ci					    offsetof(struct vmcs12, hdr),
299762306a36Sopenharmony_ci					    sizeof(hdr))))
299862306a36Sopenharmony_ci		return -EINVAL;
299962306a36Sopenharmony_ci
300062306a36Sopenharmony_ci	if (CC(hdr.revision_id != VMCS12_REVISION) ||
300162306a36Sopenharmony_ci	    CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
300262306a36Sopenharmony_ci		return -EINVAL;
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_ci	return 0;
300562306a36Sopenharmony_ci}
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci/*
300862306a36Sopenharmony_ci * Checks related to Guest Non-register State
300962306a36Sopenharmony_ci */
301062306a36Sopenharmony_cistatic int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
301162306a36Sopenharmony_ci{
301262306a36Sopenharmony_ci	if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
301362306a36Sopenharmony_ci	       vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
301462306a36Sopenharmony_ci	       vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
301562306a36Sopenharmony_ci		return -EINVAL;
301662306a36Sopenharmony_ci
301762306a36Sopenharmony_ci	return 0;
301862306a36Sopenharmony_ci}
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_cistatic int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
302162306a36Sopenharmony_ci					struct vmcs12 *vmcs12,
302262306a36Sopenharmony_ci					enum vm_entry_failure_code *entry_failure_code)
302362306a36Sopenharmony_ci{
302462306a36Sopenharmony_ci	bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
302562306a36Sopenharmony_ci
302662306a36Sopenharmony_ci	*entry_failure_code = ENTRY_FAIL_DEFAULT;
302762306a36Sopenharmony_ci
302862306a36Sopenharmony_ci	if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
302962306a36Sopenharmony_ci	    CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
303062306a36Sopenharmony_ci		return -EINVAL;
303162306a36Sopenharmony_ci
303262306a36Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
303362306a36Sopenharmony_ci	    CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
303462306a36Sopenharmony_ci		return -EINVAL;
303562306a36Sopenharmony_ci
303662306a36Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
303762306a36Sopenharmony_ci	    CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
303862306a36Sopenharmony_ci		return -EINVAL;
303962306a36Sopenharmony_ci
304062306a36Sopenharmony_ci	if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
304162306a36Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
304262306a36Sopenharmony_ci		return -EINVAL;
304362306a36Sopenharmony_ci	}
304462306a36Sopenharmony_ci
304562306a36Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
304662306a36Sopenharmony_ci	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
304762306a36Sopenharmony_ci					   vmcs12->guest_ia32_perf_global_ctrl)))
304862306a36Sopenharmony_ci		return -EINVAL;
304962306a36Sopenharmony_ci
305062306a36Sopenharmony_ci	if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
305162306a36Sopenharmony_ci		return -EINVAL;
305262306a36Sopenharmony_ci
305362306a36Sopenharmony_ci	if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
305462306a36Sopenharmony_ci	    CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
305562306a36Sopenharmony_ci		return -EINVAL;
305662306a36Sopenharmony_ci
305762306a36Sopenharmony_ci	/*
305862306a36Sopenharmony_ci	 * If the load IA32_EFER VM-entry control is 1, the following checks
305962306a36Sopenharmony_ci	 * are performed on the field for the IA32_EFER MSR:
306062306a36Sopenharmony_ci	 * - Bits reserved in the IA32_EFER MSR must be 0.
306162306a36Sopenharmony_ci	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
306262306a36Sopenharmony_ci	 *   the IA-32e mode guest VM-exit control. It must also be identical
306362306a36Sopenharmony_ci	 *   to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
306462306a36Sopenharmony_ci	 *   CR0.PG) is 1.
306562306a36Sopenharmony_ci	 */
306662306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.nested_run_pending &&
306762306a36Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
306862306a36Sopenharmony_ci		if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
306962306a36Sopenharmony_ci		    CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
307062306a36Sopenharmony_ci		    CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
307162306a36Sopenharmony_ci		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
307262306a36Sopenharmony_ci			return -EINVAL;
307362306a36Sopenharmony_ci	}
307462306a36Sopenharmony_ci
307562306a36Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
307662306a36Sopenharmony_ci	    (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
307762306a36Sopenharmony_ci	     CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
307862306a36Sopenharmony_ci		return -EINVAL;
307962306a36Sopenharmony_ci
308062306a36Sopenharmony_ci	if (nested_check_guest_non_reg_state(vmcs12))
308162306a36Sopenharmony_ci		return -EINVAL;
308262306a36Sopenharmony_ci
308362306a36Sopenharmony_ci	return 0;
308462306a36Sopenharmony_ci}
308562306a36Sopenharmony_ci
308662306a36Sopenharmony_cistatic int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
308762306a36Sopenharmony_ci{
308862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
308962306a36Sopenharmony_ci	unsigned long cr3, cr4;
309062306a36Sopenharmony_ci	bool vm_fail;
309162306a36Sopenharmony_ci
309262306a36Sopenharmony_ci	if (!nested_early_check)
309362306a36Sopenharmony_ci		return 0;
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_ci	if (vmx->msr_autoload.host.nr)
309662306a36Sopenharmony_ci		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
309762306a36Sopenharmony_ci	if (vmx->msr_autoload.guest.nr)
309862306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
309962306a36Sopenharmony_ci
310062306a36Sopenharmony_ci	preempt_disable();
310162306a36Sopenharmony_ci
310262306a36Sopenharmony_ci	vmx_prepare_switch_to_guest(vcpu);
310362306a36Sopenharmony_ci
310462306a36Sopenharmony_ci	/*
310562306a36Sopenharmony_ci	 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
310662306a36Sopenharmony_ci	 * which is reserved to '1' by hardware.  GUEST_RFLAGS is guaranteed to
310762306a36Sopenharmony_ci	 * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
310862306a36Sopenharmony_ci	 * there is no need to preserve other bits or save/restore the field.
310962306a36Sopenharmony_ci	 */
311062306a36Sopenharmony_ci	vmcs_writel(GUEST_RFLAGS, 0);
311162306a36Sopenharmony_ci
311262306a36Sopenharmony_ci	cr3 = __get_current_cr3_fast();
311362306a36Sopenharmony_ci	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
311462306a36Sopenharmony_ci		vmcs_writel(HOST_CR3, cr3);
311562306a36Sopenharmony_ci		vmx->loaded_vmcs->host_state.cr3 = cr3;
311662306a36Sopenharmony_ci	}
311762306a36Sopenharmony_ci
311862306a36Sopenharmony_ci	cr4 = cr4_read_shadow();
311962306a36Sopenharmony_ci	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
312062306a36Sopenharmony_ci		vmcs_writel(HOST_CR4, cr4);
312162306a36Sopenharmony_ci		vmx->loaded_vmcs->host_state.cr4 = cr4;
312262306a36Sopenharmony_ci	}
312362306a36Sopenharmony_ci
312462306a36Sopenharmony_ci	vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
312562306a36Sopenharmony_ci				 __vmx_vcpu_run_flags(vmx));
312662306a36Sopenharmony_ci
312762306a36Sopenharmony_ci	if (vmx->msr_autoload.host.nr)
312862306a36Sopenharmony_ci		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
312962306a36Sopenharmony_ci	if (vmx->msr_autoload.guest.nr)
313062306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
313162306a36Sopenharmony_ci
313262306a36Sopenharmony_ci	if (vm_fail) {
313362306a36Sopenharmony_ci		u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
313462306a36Sopenharmony_ci
313562306a36Sopenharmony_ci		preempt_enable();
313662306a36Sopenharmony_ci
313762306a36Sopenharmony_ci		trace_kvm_nested_vmenter_failed(
313862306a36Sopenharmony_ci			"early hardware check VM-instruction error: ", error);
313962306a36Sopenharmony_ci		WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
314062306a36Sopenharmony_ci		return 1;
314162306a36Sopenharmony_ci	}
314262306a36Sopenharmony_ci
314362306a36Sopenharmony_ci	/*
314462306a36Sopenharmony_ci	 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
314562306a36Sopenharmony_ci	 */
314662306a36Sopenharmony_ci	if (hw_breakpoint_active())
314762306a36Sopenharmony_ci		set_debugreg(__this_cpu_read(cpu_dr7), 7);
314862306a36Sopenharmony_ci	local_irq_enable();
314962306a36Sopenharmony_ci	preempt_enable();
315062306a36Sopenharmony_ci
315162306a36Sopenharmony_ci	/*
315262306a36Sopenharmony_ci	 * A non-failing VMEntry means we somehow entered guest mode with
315362306a36Sopenharmony_ci	 * an illegal RIP, and that's just the tip of the iceberg.  There
315462306a36Sopenharmony_ci	 * is no telling what memory has been modified or what state has
315562306a36Sopenharmony_ci	 * been exposed to unknown code.  Hitting this all but guarantees
315662306a36Sopenharmony_ci	 * a (very critical) hardware issue.
315762306a36Sopenharmony_ci	 */
315862306a36Sopenharmony_ci	WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
315962306a36Sopenharmony_ci		VMX_EXIT_REASONS_FAILED_VMENTRY));
316062306a36Sopenharmony_ci
316162306a36Sopenharmony_ci	return 0;
316262306a36Sopenharmony_ci}
316362306a36Sopenharmony_ci
316462306a36Sopenharmony_cistatic bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
316562306a36Sopenharmony_ci{
316662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
316762306a36Sopenharmony_ci
316862306a36Sopenharmony_ci	/*
316962306a36Sopenharmony_ci	 * hv_evmcs may end up being not mapped after migration (when
317062306a36Sopenharmony_ci	 * L2 was running), map it here to make sure vmcs12 changes are
317162306a36Sopenharmony_ci	 * properly reflected.
317262306a36Sopenharmony_ci	 */
317362306a36Sopenharmony_ci	if (guest_cpuid_has_evmcs(vcpu) &&
317462306a36Sopenharmony_ci	    vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
317562306a36Sopenharmony_ci		enum nested_evmptrld_status evmptrld_status =
317662306a36Sopenharmony_ci			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
317762306a36Sopenharmony_ci
317862306a36Sopenharmony_ci		if (evmptrld_status == EVMPTRLD_VMFAIL ||
317962306a36Sopenharmony_ci		    evmptrld_status == EVMPTRLD_ERROR)
318062306a36Sopenharmony_ci			return false;
318162306a36Sopenharmony_ci
318262306a36Sopenharmony_ci		/*
318362306a36Sopenharmony_ci		 * Post migration VMCS12 always provides the most actual
318462306a36Sopenharmony_ci		 * information, copy it to eVMCS upon entry.
318562306a36Sopenharmony_ci		 */
318662306a36Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
318762306a36Sopenharmony_ci	}
318862306a36Sopenharmony_ci
318962306a36Sopenharmony_ci	return true;
319062306a36Sopenharmony_ci}
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_cistatic bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
319362306a36Sopenharmony_ci{
319462306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
319562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
319662306a36Sopenharmony_ci	struct kvm_host_map *map;
319762306a36Sopenharmony_ci
319862306a36Sopenharmony_ci	if (!vcpu->arch.pdptrs_from_userspace &&
319962306a36Sopenharmony_ci	    !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
320062306a36Sopenharmony_ci		/*
320162306a36Sopenharmony_ci		 * Reload the guest's PDPTRs since after a migration
320262306a36Sopenharmony_ci		 * the guest CR3 might be restored prior to setting the nested
320362306a36Sopenharmony_ci		 * state which can lead to a load of wrong PDPTRs.
320462306a36Sopenharmony_ci		 */
320562306a36Sopenharmony_ci		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
320662306a36Sopenharmony_ci			return false;
320762306a36Sopenharmony_ci	}
320862306a36Sopenharmony_ci
320962306a36Sopenharmony_ci
321062306a36Sopenharmony_ci	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
321162306a36Sopenharmony_ci		map = &vmx->nested.apic_access_page_map;
321262306a36Sopenharmony_ci
321362306a36Sopenharmony_ci		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->apic_access_addr), map)) {
321462306a36Sopenharmony_ci			vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(map->pfn));
321562306a36Sopenharmony_ci		} else {
321662306a36Sopenharmony_ci			pr_debug_ratelimited("%s: no backing for APIC-access address in vmcs12\n",
321762306a36Sopenharmony_ci					     __func__);
321862306a36Sopenharmony_ci			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
321962306a36Sopenharmony_ci			vcpu->run->internal.suberror =
322062306a36Sopenharmony_ci				KVM_INTERNAL_ERROR_EMULATION;
322162306a36Sopenharmony_ci			vcpu->run->internal.ndata = 0;
322262306a36Sopenharmony_ci			return false;
322362306a36Sopenharmony_ci		}
322462306a36Sopenharmony_ci	}
322562306a36Sopenharmony_ci
322662306a36Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
322762306a36Sopenharmony_ci		map = &vmx->nested.virtual_apic_map;
322862306a36Sopenharmony_ci
322962306a36Sopenharmony_ci		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
323062306a36Sopenharmony_ci			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
323162306a36Sopenharmony_ci		} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
323262306a36Sopenharmony_ci		           nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
323362306a36Sopenharmony_ci			   !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
323462306a36Sopenharmony_ci			/*
323562306a36Sopenharmony_ci			 * The processor will never use the TPR shadow, simply
323662306a36Sopenharmony_ci			 * clear the bit from the execution control.  Such a
323762306a36Sopenharmony_ci			 * configuration is useless, but it happens in tests.
323862306a36Sopenharmony_ci			 * For any other configuration, failing the vm entry is
323962306a36Sopenharmony_ci			 * _not_ what the processor does but it's basically the
324062306a36Sopenharmony_ci			 * only possibility we have.
324162306a36Sopenharmony_ci			 */
324262306a36Sopenharmony_ci			exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
324362306a36Sopenharmony_ci		} else {
324462306a36Sopenharmony_ci			/*
324562306a36Sopenharmony_ci			 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
324662306a36Sopenharmony_ci			 * force VM-Entry to fail.
324762306a36Sopenharmony_ci			 */
324862306a36Sopenharmony_ci			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, INVALID_GPA);
324962306a36Sopenharmony_ci		}
325062306a36Sopenharmony_ci	}
325162306a36Sopenharmony_ci
325262306a36Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12)) {
325362306a36Sopenharmony_ci		map = &vmx->nested.pi_desc_map;
325462306a36Sopenharmony_ci
325562306a36Sopenharmony_ci		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
325662306a36Sopenharmony_ci			vmx->nested.pi_desc =
325762306a36Sopenharmony_ci				(struct pi_desc *)(((void *)map->hva) +
325862306a36Sopenharmony_ci				offset_in_page(vmcs12->posted_intr_desc_addr));
325962306a36Sopenharmony_ci			vmcs_write64(POSTED_INTR_DESC_ADDR,
326062306a36Sopenharmony_ci				     pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
326162306a36Sopenharmony_ci		} else {
326262306a36Sopenharmony_ci			/*
326362306a36Sopenharmony_ci			 * Defer the KVM_INTERNAL_EXIT until KVM tries to
326462306a36Sopenharmony_ci			 * access the contents of the VMCS12 posted interrupt
326562306a36Sopenharmony_ci			 * descriptor. (Note that KVM may do this when it
326662306a36Sopenharmony_ci			 * should not, per the architectural specification.)
326762306a36Sopenharmony_ci			 */
326862306a36Sopenharmony_ci			vmx->nested.pi_desc = NULL;
326962306a36Sopenharmony_ci			pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
327062306a36Sopenharmony_ci		}
327162306a36Sopenharmony_ci	}
327262306a36Sopenharmony_ci	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
327362306a36Sopenharmony_ci		exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
327462306a36Sopenharmony_ci	else
327562306a36Sopenharmony_ci		exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ci	return true;
327862306a36Sopenharmony_ci}
327962306a36Sopenharmony_ci
328062306a36Sopenharmony_cistatic bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
328162306a36Sopenharmony_ci{
328262306a36Sopenharmony_ci	/*
328362306a36Sopenharmony_ci	 * Note: nested_get_evmcs_page() also updates 'vp_assist_page' copy
328462306a36Sopenharmony_ci	 * in 'struct kvm_vcpu_hv' in case eVMCS is in use, this is mandatory
328562306a36Sopenharmony_ci	 * to make nested_evmcs_l2_tlb_flush_enabled() work correctly post
328662306a36Sopenharmony_ci	 * migration.
328762306a36Sopenharmony_ci	 */
328862306a36Sopenharmony_ci	if (!nested_get_evmcs_page(vcpu)) {
328962306a36Sopenharmony_ci		pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
329062306a36Sopenharmony_ci				     __func__);
329162306a36Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
329262306a36Sopenharmony_ci		vcpu->run->internal.suberror =
329362306a36Sopenharmony_ci			KVM_INTERNAL_ERROR_EMULATION;
329462306a36Sopenharmony_ci		vcpu->run->internal.ndata = 0;
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci		return false;
329762306a36Sopenharmony_ci	}
329862306a36Sopenharmony_ci
329962306a36Sopenharmony_ci	if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
330062306a36Sopenharmony_ci		return false;
330162306a36Sopenharmony_ci
330262306a36Sopenharmony_ci	return true;
330362306a36Sopenharmony_ci}
330462306a36Sopenharmony_ci
330562306a36Sopenharmony_cistatic int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
330662306a36Sopenharmony_ci{
330762306a36Sopenharmony_ci	struct vmcs12 *vmcs12;
330862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
330962306a36Sopenharmony_ci	gpa_t dst;
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci	if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
331262306a36Sopenharmony_ci		return 0;
331362306a36Sopenharmony_ci
331462306a36Sopenharmony_ci	if (WARN_ON_ONCE(vmx->nested.pml_full))
331562306a36Sopenharmony_ci		return 1;
331662306a36Sopenharmony_ci
331762306a36Sopenharmony_ci	/*
331862306a36Sopenharmony_ci	 * Check if PML is enabled for the nested guest. Whether eptp bit 6 is
331962306a36Sopenharmony_ci	 * set is already checked as part of A/D emulation.
332062306a36Sopenharmony_ci	 */
332162306a36Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
332262306a36Sopenharmony_ci	if (!nested_cpu_has_pml(vmcs12))
332362306a36Sopenharmony_ci		return 0;
332462306a36Sopenharmony_ci
332562306a36Sopenharmony_ci	if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
332662306a36Sopenharmony_ci		vmx->nested.pml_full = true;
332762306a36Sopenharmony_ci		return 1;
332862306a36Sopenharmony_ci	}
332962306a36Sopenharmony_ci
333062306a36Sopenharmony_ci	gpa &= ~0xFFFull;
333162306a36Sopenharmony_ci	dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
333262306a36Sopenharmony_ci
333362306a36Sopenharmony_ci	if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
333462306a36Sopenharmony_ci				 offset_in_page(dst), sizeof(gpa)))
333562306a36Sopenharmony_ci		return 0;
333662306a36Sopenharmony_ci
333762306a36Sopenharmony_ci	vmcs12->guest_pml_index--;
333862306a36Sopenharmony_ci
333962306a36Sopenharmony_ci	return 0;
334062306a36Sopenharmony_ci}
334162306a36Sopenharmony_ci
334262306a36Sopenharmony_ci/*
334362306a36Sopenharmony_ci * Intel's VMX Instruction Reference specifies a common set of prerequisites
334462306a36Sopenharmony_ci * for running VMX instructions (except VMXON, whose prerequisites are
334562306a36Sopenharmony_ci * slightly different). It also specifies what exception to inject otherwise.
334662306a36Sopenharmony_ci * Note that many of these exceptions have priority over VM exits, so they
334762306a36Sopenharmony_ci * don't have to be checked again here.
334862306a36Sopenharmony_ci */
334962306a36Sopenharmony_cistatic int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
335062306a36Sopenharmony_ci{
335162306a36Sopenharmony_ci	if (!to_vmx(vcpu)->nested.vmxon) {
335262306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
335362306a36Sopenharmony_ci		return 0;
335462306a36Sopenharmony_ci	}
335562306a36Sopenharmony_ci
335662306a36Sopenharmony_ci	if (vmx_get_cpl(vcpu)) {
335762306a36Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
335862306a36Sopenharmony_ci		return 0;
335962306a36Sopenharmony_ci	}
336062306a36Sopenharmony_ci
336162306a36Sopenharmony_ci	return 1;
336262306a36Sopenharmony_ci}
336362306a36Sopenharmony_ci
336462306a36Sopenharmony_cistatic u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
336562306a36Sopenharmony_ci{
336662306a36Sopenharmony_ci	u8 rvi = vmx_get_rvi();
336762306a36Sopenharmony_ci	u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
336862306a36Sopenharmony_ci
336962306a36Sopenharmony_ci	return ((rvi & 0xf0) > (vppr & 0xf0));
337062306a36Sopenharmony_ci}
337162306a36Sopenharmony_ci
337262306a36Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
337362306a36Sopenharmony_ci				   struct vmcs12 *vmcs12);
337462306a36Sopenharmony_ci
337562306a36Sopenharmony_ci/*
337662306a36Sopenharmony_ci * If from_vmentry is false, this is being called from state restore (either RSM
337762306a36Sopenharmony_ci * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
337862306a36Sopenharmony_ci *
337962306a36Sopenharmony_ci * Returns:
338062306a36Sopenharmony_ci *	NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
338162306a36Sopenharmony_ci *	NVMX_VMENTRY_VMFAIL:  Consistency check VMFail
338262306a36Sopenharmony_ci *	NVMX_VMENTRY_VMEXIT:  Consistency check VMExit
338362306a36Sopenharmony_ci *	NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
338462306a36Sopenharmony_ci */
338562306a36Sopenharmony_cienum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
338662306a36Sopenharmony_ci							bool from_vmentry)
338762306a36Sopenharmony_ci{
338862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
338962306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
339062306a36Sopenharmony_ci	enum vm_entry_failure_code entry_failure_code;
339162306a36Sopenharmony_ci	bool evaluate_pending_interrupts;
339262306a36Sopenharmony_ci	union vmx_exit_reason exit_reason = {
339362306a36Sopenharmony_ci		.basic = EXIT_REASON_INVALID_STATE,
339462306a36Sopenharmony_ci		.failed_vmentry = 1,
339562306a36Sopenharmony_ci	};
339662306a36Sopenharmony_ci	u32 failed_index;
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci	trace_kvm_nested_vmenter(kvm_rip_read(vcpu),
339962306a36Sopenharmony_ci				 vmx->nested.current_vmptr,
340062306a36Sopenharmony_ci				 vmcs12->guest_rip,
340162306a36Sopenharmony_ci				 vmcs12->guest_intr_status,
340262306a36Sopenharmony_ci				 vmcs12->vm_entry_intr_info_field,
340362306a36Sopenharmony_ci				 vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_ENABLE_EPT,
340462306a36Sopenharmony_ci				 vmcs12->ept_pointer,
340562306a36Sopenharmony_ci				 vmcs12->guest_cr3,
340662306a36Sopenharmony_ci				 KVM_ISA_VMX);
340762306a36Sopenharmony_ci
340862306a36Sopenharmony_ci	kvm_service_local_tlb_flush_requests(vcpu);
340962306a36Sopenharmony_ci
341062306a36Sopenharmony_ci	evaluate_pending_interrupts = exec_controls_get(vmx) &
341162306a36Sopenharmony_ci		(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
341262306a36Sopenharmony_ci	if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
341362306a36Sopenharmony_ci		evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
341462306a36Sopenharmony_ci	if (!evaluate_pending_interrupts)
341562306a36Sopenharmony_ci		evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu);
341662306a36Sopenharmony_ci
341762306a36Sopenharmony_ci	if (!vmx->nested.nested_run_pending ||
341862306a36Sopenharmony_ci	    !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
341962306a36Sopenharmony_ci		vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
342062306a36Sopenharmony_ci	if (kvm_mpx_supported() &&
342162306a36Sopenharmony_ci	    (!vmx->nested.nested_run_pending ||
342262306a36Sopenharmony_ci	     !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
342362306a36Sopenharmony_ci		vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
342462306a36Sopenharmony_ci
342562306a36Sopenharmony_ci	/*
342662306a36Sopenharmony_ci	 * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
342762306a36Sopenharmony_ci	 * nested early checks are disabled.  In the event of a "late" VM-Fail,
342862306a36Sopenharmony_ci	 * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
342962306a36Sopenharmony_ci	 * software model to the pre-VMEntry host state.  When EPT is disabled,
343062306a36Sopenharmony_ci	 * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
343162306a36Sopenharmony_ci	 * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3.  Stuffing
343262306a36Sopenharmony_ci	 * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
343362306a36Sopenharmony_ci	 * the correct value.  Smashing vmcs01.GUEST_CR3 is safe because nested
343462306a36Sopenharmony_ci	 * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
343562306a36Sopenharmony_ci	 * guaranteed to be overwritten with a shadow CR3 prior to re-entering
343662306a36Sopenharmony_ci	 * L1.  Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
343762306a36Sopenharmony_ci	 * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
343862306a36Sopenharmony_ci	 * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
343962306a36Sopenharmony_ci	 * path would need to manually save/restore vmcs01.GUEST_CR3.
344062306a36Sopenharmony_ci	 */
344162306a36Sopenharmony_ci	if (!enable_ept && !nested_early_check)
344262306a36Sopenharmony_ci		vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
344362306a36Sopenharmony_ci
344462306a36Sopenharmony_ci	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
344562306a36Sopenharmony_ci
344662306a36Sopenharmony_ci	prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
344762306a36Sopenharmony_ci
344862306a36Sopenharmony_ci	if (from_vmentry) {
344962306a36Sopenharmony_ci		if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
345062306a36Sopenharmony_ci			vmx_switch_vmcs(vcpu, &vmx->vmcs01);
345162306a36Sopenharmony_ci			return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
345262306a36Sopenharmony_ci		}
345362306a36Sopenharmony_ci
345462306a36Sopenharmony_ci		if (nested_vmx_check_vmentry_hw(vcpu)) {
345562306a36Sopenharmony_ci			vmx_switch_vmcs(vcpu, &vmx->vmcs01);
345662306a36Sopenharmony_ci			return NVMX_VMENTRY_VMFAIL;
345762306a36Sopenharmony_ci		}
345862306a36Sopenharmony_ci
345962306a36Sopenharmony_ci		if (nested_vmx_check_guest_state(vcpu, vmcs12,
346062306a36Sopenharmony_ci						 &entry_failure_code)) {
346162306a36Sopenharmony_ci			exit_reason.basic = EXIT_REASON_INVALID_STATE;
346262306a36Sopenharmony_ci			vmcs12->exit_qualification = entry_failure_code;
346362306a36Sopenharmony_ci			goto vmentry_fail_vmexit;
346462306a36Sopenharmony_ci		}
346562306a36Sopenharmony_ci	}
346662306a36Sopenharmony_ci
346762306a36Sopenharmony_ci	enter_guest_mode(vcpu);
346862306a36Sopenharmony_ci
346962306a36Sopenharmony_ci	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
347062306a36Sopenharmony_ci		exit_reason.basic = EXIT_REASON_INVALID_STATE;
347162306a36Sopenharmony_ci		vmcs12->exit_qualification = entry_failure_code;
347262306a36Sopenharmony_ci		goto vmentry_fail_vmexit_guest_mode;
347362306a36Sopenharmony_ci	}
347462306a36Sopenharmony_ci
347562306a36Sopenharmony_ci	if (from_vmentry) {
347662306a36Sopenharmony_ci		failed_index = nested_vmx_load_msr(vcpu,
347762306a36Sopenharmony_ci						   vmcs12->vm_entry_msr_load_addr,
347862306a36Sopenharmony_ci						   vmcs12->vm_entry_msr_load_count);
347962306a36Sopenharmony_ci		if (failed_index) {
348062306a36Sopenharmony_ci			exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
348162306a36Sopenharmony_ci			vmcs12->exit_qualification = failed_index;
348262306a36Sopenharmony_ci			goto vmentry_fail_vmexit_guest_mode;
348362306a36Sopenharmony_ci		}
348462306a36Sopenharmony_ci	} else {
348562306a36Sopenharmony_ci		/*
348662306a36Sopenharmony_ci		 * The MMU is not initialized to point at the right entities yet and
348762306a36Sopenharmony_ci		 * "get pages" would need to read data from the guest (i.e. we will
348862306a36Sopenharmony_ci		 * need to perform gpa to hpa translation). Request a call
348962306a36Sopenharmony_ci		 * to nested_get_vmcs12_pages before the next VM-entry.  The MSRs
349062306a36Sopenharmony_ci		 * have already been set at vmentry time and should not be reset.
349162306a36Sopenharmony_ci		 */
349262306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
349362306a36Sopenharmony_ci	}
349462306a36Sopenharmony_ci
349562306a36Sopenharmony_ci	/*
349662306a36Sopenharmony_ci	 * Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI
349762306a36Sopenharmony_ci	 * when it executed VMLAUNCH/VMRESUME, as entering non-root mode can
349862306a36Sopenharmony_ci	 * effectively unblock various events, e.g. INIT/SIPI cause VM-Exit
349962306a36Sopenharmony_ci	 * unconditionally.
350062306a36Sopenharmony_ci	 */
350162306a36Sopenharmony_ci	if (unlikely(evaluate_pending_interrupts))
350262306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
350362306a36Sopenharmony_ci
350462306a36Sopenharmony_ci	/*
350562306a36Sopenharmony_ci	 * Do not start the preemption timer hrtimer until after we know
350662306a36Sopenharmony_ci	 * we are successful, so that only nested_vmx_vmexit needs to cancel
350762306a36Sopenharmony_ci	 * the timer.
350862306a36Sopenharmony_ci	 */
350962306a36Sopenharmony_ci	vmx->nested.preemption_timer_expired = false;
351062306a36Sopenharmony_ci	if (nested_cpu_has_preemption_timer(vmcs12)) {
351162306a36Sopenharmony_ci		u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
351262306a36Sopenharmony_ci		vmx_start_preemption_timer(vcpu, timer_value);
351362306a36Sopenharmony_ci	}
351462306a36Sopenharmony_ci
351562306a36Sopenharmony_ci	/*
351662306a36Sopenharmony_ci	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
351762306a36Sopenharmony_ci	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
351862306a36Sopenharmony_ci	 * returned as far as L1 is concerned. It will only return (and set
351962306a36Sopenharmony_ci	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
352062306a36Sopenharmony_ci	 */
352162306a36Sopenharmony_ci	return NVMX_VMENTRY_SUCCESS;
352262306a36Sopenharmony_ci
352362306a36Sopenharmony_ci	/*
352462306a36Sopenharmony_ci	 * A failed consistency check that leads to a VMExit during L1's
352562306a36Sopenharmony_ci	 * VMEnter to L2 is a variation of a normal VMexit, as explained in
352662306a36Sopenharmony_ci	 * 26.7 "VM-entry failures during or after loading guest state".
352762306a36Sopenharmony_ci	 */
352862306a36Sopenharmony_civmentry_fail_vmexit_guest_mode:
352962306a36Sopenharmony_ci	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
353062306a36Sopenharmony_ci		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
353162306a36Sopenharmony_ci	leave_guest_mode(vcpu);
353262306a36Sopenharmony_ci
353362306a36Sopenharmony_civmentry_fail_vmexit:
353462306a36Sopenharmony_ci	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
353562306a36Sopenharmony_ci
353662306a36Sopenharmony_ci	if (!from_vmentry)
353762306a36Sopenharmony_ci		return NVMX_VMENTRY_VMEXIT;
353862306a36Sopenharmony_ci
353962306a36Sopenharmony_ci	load_vmcs12_host_state(vcpu, vmcs12);
354062306a36Sopenharmony_ci	vmcs12->vm_exit_reason = exit_reason.full;
354162306a36Sopenharmony_ci	if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
354262306a36Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
354362306a36Sopenharmony_ci	return NVMX_VMENTRY_VMEXIT;
354462306a36Sopenharmony_ci}
354562306a36Sopenharmony_ci
354662306a36Sopenharmony_ci/*
354762306a36Sopenharmony_ci * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
354862306a36Sopenharmony_ci * for running an L2 nested guest.
354962306a36Sopenharmony_ci */
355062306a36Sopenharmony_cistatic int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
355162306a36Sopenharmony_ci{
355262306a36Sopenharmony_ci	struct vmcs12 *vmcs12;
355362306a36Sopenharmony_ci	enum nvmx_vmentry_status status;
355462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
355562306a36Sopenharmony_ci	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
355662306a36Sopenharmony_ci	enum nested_evmptrld_status evmptrld_status;
355762306a36Sopenharmony_ci
355862306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
355962306a36Sopenharmony_ci		return 1;
356062306a36Sopenharmony_ci
356162306a36Sopenharmony_ci	evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
356262306a36Sopenharmony_ci	if (evmptrld_status == EVMPTRLD_ERROR) {
356362306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
356462306a36Sopenharmony_ci		return 1;
356562306a36Sopenharmony_ci	}
356662306a36Sopenharmony_ci
356762306a36Sopenharmony_ci	kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
356862306a36Sopenharmony_ci
356962306a36Sopenharmony_ci	if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
357062306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
357162306a36Sopenharmony_ci
357262306a36Sopenharmony_ci	if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
357362306a36Sopenharmony_ci	       vmx->nested.current_vmptr == INVALID_GPA))
357462306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
357562306a36Sopenharmony_ci
357662306a36Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
357762306a36Sopenharmony_ci
357862306a36Sopenharmony_ci	/*
357962306a36Sopenharmony_ci	 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
358062306a36Sopenharmony_ci	 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
358162306a36Sopenharmony_ci	 * rather than RFLAGS.ZF, and no error number is stored to the
358262306a36Sopenharmony_ci	 * VM-instruction error field.
358362306a36Sopenharmony_ci	 */
358462306a36Sopenharmony_ci	if (CC(vmcs12->hdr.shadow_vmcs))
358562306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
358662306a36Sopenharmony_ci
358762306a36Sopenharmony_ci	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
358862306a36Sopenharmony_ci		copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
358962306a36Sopenharmony_ci		/* Enlightened VMCS doesn't have launch state */
359062306a36Sopenharmony_ci		vmcs12->launch_state = !launch;
359162306a36Sopenharmony_ci	} else if (enable_shadow_vmcs) {
359262306a36Sopenharmony_ci		copy_shadow_to_vmcs12(vmx);
359362306a36Sopenharmony_ci	}
359462306a36Sopenharmony_ci
359562306a36Sopenharmony_ci	/*
359662306a36Sopenharmony_ci	 * The nested entry process starts with enforcing various prerequisites
359762306a36Sopenharmony_ci	 * on vmcs12 as required by the Intel SDM, and act appropriately when
359862306a36Sopenharmony_ci	 * they fail: As the SDM explains, some conditions should cause the
359962306a36Sopenharmony_ci	 * instruction to fail, while others will cause the instruction to seem
360062306a36Sopenharmony_ci	 * to succeed, but return an EXIT_REASON_INVALID_STATE.
360162306a36Sopenharmony_ci	 * To speed up the normal (success) code path, we should avoid checking
360262306a36Sopenharmony_ci	 * for misconfigurations which will anyway be caught by the processor
360362306a36Sopenharmony_ci	 * when using the merged vmcs02.
360462306a36Sopenharmony_ci	 */
360562306a36Sopenharmony_ci	if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
360662306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
360762306a36Sopenharmony_ci
360862306a36Sopenharmony_ci	if (CC(vmcs12->launch_state == launch))
360962306a36Sopenharmony_ci		return nested_vmx_fail(vcpu,
361062306a36Sopenharmony_ci			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
361162306a36Sopenharmony_ci			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
361262306a36Sopenharmony_ci
361362306a36Sopenharmony_ci	if (nested_vmx_check_controls(vcpu, vmcs12))
361462306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
361562306a36Sopenharmony_ci
361662306a36Sopenharmony_ci	if (nested_vmx_check_address_space_size(vcpu, vmcs12))
361762306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
361862306a36Sopenharmony_ci
361962306a36Sopenharmony_ci	if (nested_vmx_check_host_state(vcpu, vmcs12))
362062306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
362162306a36Sopenharmony_ci
362262306a36Sopenharmony_ci	/*
362362306a36Sopenharmony_ci	 * We're finally done with prerequisite checking, and can start with
362462306a36Sopenharmony_ci	 * the nested entry.
362562306a36Sopenharmony_ci	 */
362662306a36Sopenharmony_ci	vmx->nested.nested_run_pending = 1;
362762306a36Sopenharmony_ci	vmx->nested.has_preemption_timer_deadline = false;
362862306a36Sopenharmony_ci	status = nested_vmx_enter_non_root_mode(vcpu, true);
362962306a36Sopenharmony_ci	if (unlikely(status != NVMX_VMENTRY_SUCCESS))
363062306a36Sopenharmony_ci		goto vmentry_failed;
363162306a36Sopenharmony_ci
363262306a36Sopenharmony_ci	/* Emulate processing of posted interrupts on VM-Enter. */
363362306a36Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12) &&
363462306a36Sopenharmony_ci	    kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
363562306a36Sopenharmony_ci		vmx->nested.pi_pending = true;
363662306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
363762306a36Sopenharmony_ci		kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
363862306a36Sopenharmony_ci	}
363962306a36Sopenharmony_ci
364062306a36Sopenharmony_ci	/* Hide L1D cache contents from the nested guest.  */
364162306a36Sopenharmony_ci	vmx->vcpu.arch.l1tf_flush_l1d = true;
364262306a36Sopenharmony_ci
364362306a36Sopenharmony_ci	/*
364462306a36Sopenharmony_ci	 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
364562306a36Sopenharmony_ci	 * also be used as part of restoring nVMX state for
364662306a36Sopenharmony_ci	 * snapshot restore (migration).
364762306a36Sopenharmony_ci	 *
364862306a36Sopenharmony_ci	 * In this flow, it is assumed that vmcs12 cache was
364962306a36Sopenharmony_ci	 * transferred as part of captured nVMX state and should
365062306a36Sopenharmony_ci	 * therefore not be read from guest memory (which may not
365162306a36Sopenharmony_ci	 * exist on destination host yet).
365262306a36Sopenharmony_ci	 */
365362306a36Sopenharmony_ci	nested_cache_shadow_vmcs12(vcpu, vmcs12);
365462306a36Sopenharmony_ci
365562306a36Sopenharmony_ci	switch (vmcs12->guest_activity_state) {
365662306a36Sopenharmony_ci	case GUEST_ACTIVITY_HLT:
365762306a36Sopenharmony_ci		/*
365862306a36Sopenharmony_ci		 * If we're entering a halted L2 vcpu and the L2 vcpu won't be
365962306a36Sopenharmony_ci		 * awakened by event injection or by an NMI-window VM-exit or
366062306a36Sopenharmony_ci		 * by an interrupt-window VM-exit, halt the vcpu.
366162306a36Sopenharmony_ci		 */
366262306a36Sopenharmony_ci		if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
366362306a36Sopenharmony_ci		    !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
366462306a36Sopenharmony_ci		    !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
366562306a36Sopenharmony_ci		      (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
366662306a36Sopenharmony_ci			vmx->nested.nested_run_pending = 0;
366762306a36Sopenharmony_ci			return kvm_emulate_halt_noskip(vcpu);
366862306a36Sopenharmony_ci		}
366962306a36Sopenharmony_ci		break;
367062306a36Sopenharmony_ci	case GUEST_ACTIVITY_WAIT_SIPI:
367162306a36Sopenharmony_ci		vmx->nested.nested_run_pending = 0;
367262306a36Sopenharmony_ci		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
367362306a36Sopenharmony_ci		break;
367462306a36Sopenharmony_ci	default:
367562306a36Sopenharmony_ci		break;
367662306a36Sopenharmony_ci	}
367762306a36Sopenharmony_ci
367862306a36Sopenharmony_ci	return 1;
367962306a36Sopenharmony_ci
368062306a36Sopenharmony_civmentry_failed:
368162306a36Sopenharmony_ci	vmx->nested.nested_run_pending = 0;
368262306a36Sopenharmony_ci	if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
368362306a36Sopenharmony_ci		return 0;
368462306a36Sopenharmony_ci	if (status == NVMX_VMENTRY_VMEXIT)
368562306a36Sopenharmony_ci		return 1;
368662306a36Sopenharmony_ci	WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
368762306a36Sopenharmony_ci	return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
368862306a36Sopenharmony_ci}
368962306a36Sopenharmony_ci
369062306a36Sopenharmony_ci/*
369162306a36Sopenharmony_ci * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
369262306a36Sopenharmony_ci * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK).
369362306a36Sopenharmony_ci * This function returns the new value we should put in vmcs12.guest_cr0.
369462306a36Sopenharmony_ci * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
369562306a36Sopenharmony_ci *  1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
369662306a36Sopenharmony_ci *     available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
369762306a36Sopenharmony_ci *     didn't trap the bit, because if L1 did, so would L0).
369862306a36Sopenharmony_ci *  2. Bits that L1 asked to trap (and therefore L0 also did) could not have
369962306a36Sopenharmony_ci *     been modified by L2, and L1 knows it. So just leave the old value of
370062306a36Sopenharmony_ci *     the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
370162306a36Sopenharmony_ci *     isn't relevant, because if L0 traps this bit it can set it to anything.
370262306a36Sopenharmony_ci *  3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
370362306a36Sopenharmony_ci *     changed these bits, and therefore they need to be updated, but L0
370462306a36Sopenharmony_ci *     didn't necessarily allow them to be changed in GUEST_CR0 - and rather
370562306a36Sopenharmony_ci *     put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
370662306a36Sopenharmony_ci */
370762306a36Sopenharmony_cistatic inline unsigned long
370862306a36Sopenharmony_civmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
370962306a36Sopenharmony_ci{
371062306a36Sopenharmony_ci	return
371162306a36Sopenharmony_ci	/*1*/	(vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
371262306a36Sopenharmony_ci	/*2*/	(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
371362306a36Sopenharmony_ci	/*3*/	(vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
371462306a36Sopenharmony_ci			vcpu->arch.cr0_guest_owned_bits));
371562306a36Sopenharmony_ci}
371662306a36Sopenharmony_ci
371762306a36Sopenharmony_cistatic inline unsigned long
371862306a36Sopenharmony_civmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
371962306a36Sopenharmony_ci{
372062306a36Sopenharmony_ci	return
372162306a36Sopenharmony_ci	/*1*/	(vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
372262306a36Sopenharmony_ci	/*2*/	(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
372362306a36Sopenharmony_ci	/*3*/	(vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
372462306a36Sopenharmony_ci			vcpu->arch.cr4_guest_owned_bits));
372562306a36Sopenharmony_ci}
372662306a36Sopenharmony_ci
372762306a36Sopenharmony_cistatic void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
372862306a36Sopenharmony_ci				      struct vmcs12 *vmcs12,
372962306a36Sopenharmony_ci				      u32 vm_exit_reason, u32 exit_intr_info)
373062306a36Sopenharmony_ci{
373162306a36Sopenharmony_ci	u32 idt_vectoring;
373262306a36Sopenharmony_ci	unsigned int nr;
373362306a36Sopenharmony_ci
373462306a36Sopenharmony_ci	/*
373562306a36Sopenharmony_ci	 * Per the SDM, VM-Exits due to double and triple faults are never
373662306a36Sopenharmony_ci	 * considered to occur during event delivery, even if the double/triple
373762306a36Sopenharmony_ci	 * fault is the result of an escalating vectoring issue.
373862306a36Sopenharmony_ci	 *
373962306a36Sopenharmony_ci	 * Note, the SDM qualifies the double fault behavior with "The original
374062306a36Sopenharmony_ci	 * event results in a double-fault exception".  It's unclear why the
374162306a36Sopenharmony_ci	 * qualification exists since exits due to double fault can occur only
374262306a36Sopenharmony_ci	 * while vectoring a different exception (injected events are never
374362306a36Sopenharmony_ci	 * subject to interception), i.e. there's _always_ an original event.
374462306a36Sopenharmony_ci	 *
374562306a36Sopenharmony_ci	 * The SDM also uses NMI as a confusing example for the "original event
374662306a36Sopenharmony_ci	 * causes the VM exit directly" clause.  NMI isn't special in any way,
374762306a36Sopenharmony_ci	 * the same rule applies to all events that cause an exit directly.
374862306a36Sopenharmony_ci	 * NMI is an odd choice for the example because NMIs can only occur on
374962306a36Sopenharmony_ci	 * instruction boundaries, i.e. they _can't_ occur during vectoring.
375062306a36Sopenharmony_ci	 */
375162306a36Sopenharmony_ci	if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT ||
375262306a36Sopenharmony_ci	    ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI &&
375362306a36Sopenharmony_ci	     is_double_fault(exit_intr_info))) {
375462306a36Sopenharmony_ci		vmcs12->idt_vectoring_info_field = 0;
375562306a36Sopenharmony_ci	} else if (vcpu->arch.exception.injected) {
375662306a36Sopenharmony_ci		nr = vcpu->arch.exception.vector;
375762306a36Sopenharmony_ci		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
375862306a36Sopenharmony_ci
375962306a36Sopenharmony_ci		if (kvm_exception_is_soft(nr)) {
376062306a36Sopenharmony_ci			vmcs12->vm_exit_instruction_len =
376162306a36Sopenharmony_ci				vcpu->arch.event_exit_inst_len;
376262306a36Sopenharmony_ci			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
376362306a36Sopenharmony_ci		} else
376462306a36Sopenharmony_ci			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
376562306a36Sopenharmony_ci
376662306a36Sopenharmony_ci		if (vcpu->arch.exception.has_error_code) {
376762306a36Sopenharmony_ci			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
376862306a36Sopenharmony_ci			vmcs12->idt_vectoring_error_code =
376962306a36Sopenharmony_ci				vcpu->arch.exception.error_code;
377062306a36Sopenharmony_ci		}
377162306a36Sopenharmony_ci
377262306a36Sopenharmony_ci		vmcs12->idt_vectoring_info_field = idt_vectoring;
377362306a36Sopenharmony_ci	} else if (vcpu->arch.nmi_injected) {
377462306a36Sopenharmony_ci		vmcs12->idt_vectoring_info_field =
377562306a36Sopenharmony_ci			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
377662306a36Sopenharmony_ci	} else if (vcpu->arch.interrupt.injected) {
377762306a36Sopenharmony_ci		nr = vcpu->arch.interrupt.nr;
377862306a36Sopenharmony_ci		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci		if (vcpu->arch.interrupt.soft) {
378162306a36Sopenharmony_ci			idt_vectoring |= INTR_TYPE_SOFT_INTR;
378262306a36Sopenharmony_ci			vmcs12->vm_entry_instruction_len =
378362306a36Sopenharmony_ci				vcpu->arch.event_exit_inst_len;
378462306a36Sopenharmony_ci		} else
378562306a36Sopenharmony_ci			idt_vectoring |= INTR_TYPE_EXT_INTR;
378662306a36Sopenharmony_ci
378762306a36Sopenharmony_ci		vmcs12->idt_vectoring_info_field = idt_vectoring;
378862306a36Sopenharmony_ci	} else {
378962306a36Sopenharmony_ci		vmcs12->idt_vectoring_info_field = 0;
379062306a36Sopenharmony_ci	}
379162306a36Sopenharmony_ci}
379262306a36Sopenharmony_ci
379362306a36Sopenharmony_ci
379462306a36Sopenharmony_civoid nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
379562306a36Sopenharmony_ci{
379662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
379762306a36Sopenharmony_ci	gfn_t gfn;
379862306a36Sopenharmony_ci
379962306a36Sopenharmony_ci	/*
380062306a36Sopenharmony_ci	 * Don't need to mark the APIC access page dirty; it is never
380162306a36Sopenharmony_ci	 * written to by the CPU during APIC virtualization.
380262306a36Sopenharmony_ci	 */
380362306a36Sopenharmony_ci
380462306a36Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
380562306a36Sopenharmony_ci		gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
380662306a36Sopenharmony_ci		kvm_vcpu_mark_page_dirty(vcpu, gfn);
380762306a36Sopenharmony_ci	}
380862306a36Sopenharmony_ci
380962306a36Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12)) {
381062306a36Sopenharmony_ci		gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
381162306a36Sopenharmony_ci		kvm_vcpu_mark_page_dirty(vcpu, gfn);
381262306a36Sopenharmony_ci	}
381362306a36Sopenharmony_ci}
381462306a36Sopenharmony_ci
381562306a36Sopenharmony_cistatic int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
381662306a36Sopenharmony_ci{
381762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
381862306a36Sopenharmony_ci	int max_irr;
381962306a36Sopenharmony_ci	void *vapic_page;
382062306a36Sopenharmony_ci	u16 status;
382162306a36Sopenharmony_ci
382262306a36Sopenharmony_ci	if (!vmx->nested.pi_pending)
382362306a36Sopenharmony_ci		return 0;
382462306a36Sopenharmony_ci
382562306a36Sopenharmony_ci	if (!vmx->nested.pi_desc)
382662306a36Sopenharmony_ci		goto mmio_needed;
382762306a36Sopenharmony_ci
382862306a36Sopenharmony_ci	vmx->nested.pi_pending = false;
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
383162306a36Sopenharmony_ci		return 0;
383262306a36Sopenharmony_ci
383362306a36Sopenharmony_ci	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
383462306a36Sopenharmony_ci	if (max_irr != 256) {
383562306a36Sopenharmony_ci		vapic_page = vmx->nested.virtual_apic_map.hva;
383662306a36Sopenharmony_ci		if (!vapic_page)
383762306a36Sopenharmony_ci			goto mmio_needed;
383862306a36Sopenharmony_ci
383962306a36Sopenharmony_ci		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
384062306a36Sopenharmony_ci			vapic_page, &max_irr);
384162306a36Sopenharmony_ci		status = vmcs_read16(GUEST_INTR_STATUS);
384262306a36Sopenharmony_ci		if ((u8)max_irr > ((u8)status & 0xff)) {
384362306a36Sopenharmony_ci			status &= ~0xff;
384462306a36Sopenharmony_ci			status |= (u8)max_irr;
384562306a36Sopenharmony_ci			vmcs_write16(GUEST_INTR_STATUS, status);
384662306a36Sopenharmony_ci		}
384762306a36Sopenharmony_ci	}
384862306a36Sopenharmony_ci
384962306a36Sopenharmony_ci	nested_mark_vmcs12_pages_dirty(vcpu);
385062306a36Sopenharmony_ci	return 0;
385162306a36Sopenharmony_ci
385262306a36Sopenharmony_cimmio_needed:
385362306a36Sopenharmony_ci	kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
385462306a36Sopenharmony_ci	return -ENXIO;
385562306a36Sopenharmony_ci}
385662306a36Sopenharmony_ci
385762306a36Sopenharmony_cistatic void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
385862306a36Sopenharmony_ci{
385962306a36Sopenharmony_ci	struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
386062306a36Sopenharmony_ci	u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
386162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
386262306a36Sopenharmony_ci	unsigned long exit_qual;
386362306a36Sopenharmony_ci
386462306a36Sopenharmony_ci	if (ex->has_payload) {
386562306a36Sopenharmony_ci		exit_qual = ex->payload;
386662306a36Sopenharmony_ci	} else if (ex->vector == PF_VECTOR) {
386762306a36Sopenharmony_ci		exit_qual = vcpu->arch.cr2;
386862306a36Sopenharmony_ci	} else if (ex->vector == DB_VECTOR) {
386962306a36Sopenharmony_ci		exit_qual = vcpu->arch.dr6;
387062306a36Sopenharmony_ci		exit_qual &= ~DR6_BT;
387162306a36Sopenharmony_ci		exit_qual ^= DR6_ACTIVE_LOW;
387262306a36Sopenharmony_ci	} else {
387362306a36Sopenharmony_ci		exit_qual = 0;
387462306a36Sopenharmony_ci	}
387562306a36Sopenharmony_ci
387662306a36Sopenharmony_ci	/*
387762306a36Sopenharmony_ci	 * Unlike AMD's Paged Real Mode, which reports an error code on #PF
387862306a36Sopenharmony_ci	 * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the
387962306a36Sopenharmony_ci	 * "has error code" flags on VM-Exit if the CPU is in Real Mode.
388062306a36Sopenharmony_ci	 */
388162306a36Sopenharmony_ci	if (ex->has_error_code && is_protmode(vcpu)) {
388262306a36Sopenharmony_ci		/*
388362306a36Sopenharmony_ci		 * Intel CPUs do not generate error codes with bits 31:16 set,
388462306a36Sopenharmony_ci		 * and more importantly VMX disallows setting bits 31:16 in the
388562306a36Sopenharmony_ci		 * injected error code for VM-Entry.  Drop the bits to mimic
388662306a36Sopenharmony_ci		 * hardware and avoid inducing failure on nested VM-Entry if L1
388762306a36Sopenharmony_ci		 * chooses to inject the exception back to L2.  AMD CPUs _do_
388862306a36Sopenharmony_ci		 * generate "full" 32-bit error codes, so KVM allows userspace
388962306a36Sopenharmony_ci		 * to inject exception error codes with bits 31:16 set.
389062306a36Sopenharmony_ci		 */
389162306a36Sopenharmony_ci		vmcs12->vm_exit_intr_error_code = (u16)ex->error_code;
389262306a36Sopenharmony_ci		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
389362306a36Sopenharmony_ci	}
389462306a36Sopenharmony_ci
389562306a36Sopenharmony_ci	if (kvm_exception_is_soft(ex->vector))
389662306a36Sopenharmony_ci		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
389762306a36Sopenharmony_ci	else
389862306a36Sopenharmony_ci		intr_info |= INTR_TYPE_HARD_EXCEPTION;
389962306a36Sopenharmony_ci
390062306a36Sopenharmony_ci	if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
390162306a36Sopenharmony_ci	    vmx_get_nmi_mask(vcpu))
390262306a36Sopenharmony_ci		intr_info |= INTR_INFO_UNBLOCK_NMI;
390362306a36Sopenharmony_ci
390462306a36Sopenharmony_ci	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
390562306a36Sopenharmony_ci}
390662306a36Sopenharmony_ci
390762306a36Sopenharmony_ci/*
390862306a36Sopenharmony_ci * Returns true if a debug trap is (likely) pending delivery.  Infer the class
390962306a36Sopenharmony_ci * of a #DB (trap-like vs. fault-like) from the exception payload (to-be-DR6).
391062306a36Sopenharmony_ci * Using the payload is flawed because code breakpoints (fault-like) and data
391162306a36Sopenharmony_ci * breakpoints (trap-like) set the same bits in DR6 (breakpoint detected), i.e.
391262306a36Sopenharmony_ci * this will return false positives if a to-be-injected code breakpoint #DB is
391362306a36Sopenharmony_ci * pending (from KVM's perspective, but not "pending" across an instruction
391462306a36Sopenharmony_ci * boundary).  ICEBP, a.k.a. INT1, is also not reflected here even though it
391562306a36Sopenharmony_ci * too is trap-like.
391662306a36Sopenharmony_ci *
391762306a36Sopenharmony_ci * KVM "works" despite these flaws as ICEBP isn't currently supported by the
391862306a36Sopenharmony_ci * emulator, Monitor Trap Flag is not marked pending on intercepted #DBs (the
391962306a36Sopenharmony_ci * #DB has already happened), and MTF isn't marked pending on code breakpoints
392062306a36Sopenharmony_ci * from the emulator (because such #DBs are fault-like and thus don't trigger
392162306a36Sopenharmony_ci * actions that fire on instruction retire).
392262306a36Sopenharmony_ci */
392362306a36Sopenharmony_cistatic unsigned long vmx_get_pending_dbg_trap(struct kvm_queued_exception *ex)
392462306a36Sopenharmony_ci{
392562306a36Sopenharmony_ci	if (!ex->pending || ex->vector != DB_VECTOR)
392662306a36Sopenharmony_ci		return 0;
392762306a36Sopenharmony_ci
392862306a36Sopenharmony_ci	/* General Detect #DBs are always fault-like. */
392962306a36Sopenharmony_ci	return ex->payload & ~DR6_BD;
393062306a36Sopenharmony_ci}
393162306a36Sopenharmony_ci
393262306a36Sopenharmony_ci/*
393362306a36Sopenharmony_ci * Returns true if there's a pending #DB exception that is lower priority than
393462306a36Sopenharmony_ci * a pending Monitor Trap Flag VM-Exit.  TSS T-flag #DBs are not emulated by
393562306a36Sopenharmony_ci * KVM, but could theoretically be injected by userspace.  Note, this code is
393662306a36Sopenharmony_ci * imperfect, see above.
393762306a36Sopenharmony_ci */
393862306a36Sopenharmony_cistatic bool vmx_is_low_priority_db_trap(struct kvm_queued_exception *ex)
393962306a36Sopenharmony_ci{
394062306a36Sopenharmony_ci	return vmx_get_pending_dbg_trap(ex) & ~DR6_BT;
394162306a36Sopenharmony_ci}
394262306a36Sopenharmony_ci
394362306a36Sopenharmony_ci/*
394462306a36Sopenharmony_ci * Certain VM-exits set the 'pending debug exceptions' field to indicate a
394562306a36Sopenharmony_ci * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
394662306a36Sopenharmony_ci * represents these debug traps with a payload that is said to be compatible
394762306a36Sopenharmony_ci * with the 'pending debug exceptions' field, write the payload to the VMCS
394862306a36Sopenharmony_ci * field if a VM-exit is delivered before the debug trap.
394962306a36Sopenharmony_ci */
395062306a36Sopenharmony_cistatic void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
395162306a36Sopenharmony_ci{
395262306a36Sopenharmony_ci	unsigned long pending_dbg;
395362306a36Sopenharmony_ci
395462306a36Sopenharmony_ci	pending_dbg = vmx_get_pending_dbg_trap(&vcpu->arch.exception);
395562306a36Sopenharmony_ci	if (pending_dbg)
395662306a36Sopenharmony_ci		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg);
395762306a36Sopenharmony_ci}
395862306a36Sopenharmony_ci
395962306a36Sopenharmony_cistatic bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
396062306a36Sopenharmony_ci{
396162306a36Sopenharmony_ci	return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
396262306a36Sopenharmony_ci	       to_vmx(vcpu)->nested.preemption_timer_expired;
396362306a36Sopenharmony_ci}
396462306a36Sopenharmony_ci
396562306a36Sopenharmony_cistatic bool vmx_has_nested_events(struct kvm_vcpu *vcpu)
396662306a36Sopenharmony_ci{
396762306a36Sopenharmony_ci	return nested_vmx_preemption_timer_pending(vcpu) ||
396862306a36Sopenharmony_ci	       to_vmx(vcpu)->nested.mtf_pending;
396962306a36Sopenharmony_ci}
397062306a36Sopenharmony_ci
397162306a36Sopenharmony_ci/*
397262306a36Sopenharmony_ci * Per the Intel SDM's table "Priority Among Concurrent Events", with minor
397362306a36Sopenharmony_ci * edits to fill in missing examples, e.g. #DB due to split-lock accesses,
397462306a36Sopenharmony_ci * and less minor edits to splice in the priority of VMX Non-Root specific
397562306a36Sopenharmony_ci * events, e.g. MTF and NMI/INTR-window exiting.
397662306a36Sopenharmony_ci *
397762306a36Sopenharmony_ci * 1 Hardware Reset and Machine Checks
397862306a36Sopenharmony_ci *	- RESET
397962306a36Sopenharmony_ci *	- Machine Check
398062306a36Sopenharmony_ci *
398162306a36Sopenharmony_ci * 2 Trap on Task Switch
398262306a36Sopenharmony_ci *	- T flag in TSS is set (on task switch)
398362306a36Sopenharmony_ci *
398462306a36Sopenharmony_ci * 3 External Hardware Interventions
398562306a36Sopenharmony_ci *	- FLUSH
398662306a36Sopenharmony_ci *	- STOPCLK
398762306a36Sopenharmony_ci *	- SMI
398862306a36Sopenharmony_ci *	- INIT
398962306a36Sopenharmony_ci *
399062306a36Sopenharmony_ci * 3.5 Monitor Trap Flag (MTF) VM-exit[1]
399162306a36Sopenharmony_ci *
399262306a36Sopenharmony_ci * 4 Traps on Previous Instruction
399362306a36Sopenharmony_ci *	- Breakpoints
399462306a36Sopenharmony_ci *	- Trap-class Debug Exceptions (#DB due to TF flag set, data/I-O
399562306a36Sopenharmony_ci *	  breakpoint, or #DB due to a split-lock access)
399662306a36Sopenharmony_ci *
399762306a36Sopenharmony_ci * 4.3	VMX-preemption timer expired VM-exit
399862306a36Sopenharmony_ci *
399962306a36Sopenharmony_ci * 4.6	NMI-window exiting VM-exit[2]
400062306a36Sopenharmony_ci *
400162306a36Sopenharmony_ci * 5 Nonmaskable Interrupts (NMI)
400262306a36Sopenharmony_ci *
400362306a36Sopenharmony_ci * 5.5 Interrupt-window exiting VM-exit and Virtual-interrupt delivery
400462306a36Sopenharmony_ci *
400562306a36Sopenharmony_ci * 6 Maskable Hardware Interrupts
400662306a36Sopenharmony_ci *
400762306a36Sopenharmony_ci * 7 Code Breakpoint Fault
400862306a36Sopenharmony_ci *
400962306a36Sopenharmony_ci * 8 Faults from Fetching Next Instruction
401062306a36Sopenharmony_ci *	- Code-Segment Limit Violation
401162306a36Sopenharmony_ci *	- Code Page Fault
401262306a36Sopenharmony_ci *	- Control protection exception (missing ENDBRANCH at target of indirect
401362306a36Sopenharmony_ci *					call or jump)
401462306a36Sopenharmony_ci *
401562306a36Sopenharmony_ci * 9 Faults from Decoding Next Instruction
401662306a36Sopenharmony_ci *	- Instruction length > 15 bytes
401762306a36Sopenharmony_ci *	- Invalid Opcode
401862306a36Sopenharmony_ci *	- Coprocessor Not Available
401962306a36Sopenharmony_ci *
402062306a36Sopenharmony_ci *10 Faults on Executing Instruction
402162306a36Sopenharmony_ci *	- Overflow
402262306a36Sopenharmony_ci *	- Bound error
402362306a36Sopenharmony_ci *	- Invalid TSS
402462306a36Sopenharmony_ci *	- Segment Not Present
402562306a36Sopenharmony_ci *	- Stack fault
402662306a36Sopenharmony_ci *	- General Protection
402762306a36Sopenharmony_ci *	- Data Page Fault
402862306a36Sopenharmony_ci *	- Alignment Check
402962306a36Sopenharmony_ci *	- x86 FPU Floating-point exception
403062306a36Sopenharmony_ci *	- SIMD floating-point exception
403162306a36Sopenharmony_ci *	- Virtualization exception
403262306a36Sopenharmony_ci *	- Control protection exception
403362306a36Sopenharmony_ci *
403462306a36Sopenharmony_ci * [1] Per the "Monitor Trap Flag" section: System-management interrupts (SMIs),
403562306a36Sopenharmony_ci *     INIT signals, and higher priority events take priority over MTF VM exits.
403662306a36Sopenharmony_ci *     MTF VM exits take priority over debug-trap exceptions and lower priority
403762306a36Sopenharmony_ci *     events.
403862306a36Sopenharmony_ci *
403962306a36Sopenharmony_ci * [2] Debug-trap exceptions and higher priority events take priority over VM exits
404062306a36Sopenharmony_ci *     caused by the VMX-preemption timer.  VM exits caused by the VMX-preemption
404162306a36Sopenharmony_ci *     timer take priority over VM exits caused by the "NMI-window exiting"
404262306a36Sopenharmony_ci *     VM-execution control and lower priority events.
404362306a36Sopenharmony_ci *
404462306a36Sopenharmony_ci * [3] Debug-trap exceptions and higher priority events take priority over VM exits
404562306a36Sopenharmony_ci *     caused by "NMI-window exiting".  VM exits caused by this control take
404662306a36Sopenharmony_ci *     priority over non-maskable interrupts (NMIs) and lower priority events.
404762306a36Sopenharmony_ci *
404862306a36Sopenharmony_ci * [4] Virtual-interrupt delivery has the same priority as that of VM exits due to
404962306a36Sopenharmony_ci *     the 1-setting of the "interrupt-window exiting" VM-execution control.  Thus,
405062306a36Sopenharmony_ci *     non-maskable interrupts (NMIs) and higher priority events take priority over
405162306a36Sopenharmony_ci *     delivery of a virtual interrupt; delivery of a virtual interrupt takes
405262306a36Sopenharmony_ci *     priority over external interrupts and lower priority events.
405362306a36Sopenharmony_ci */
405462306a36Sopenharmony_cistatic int vmx_check_nested_events(struct kvm_vcpu *vcpu)
405562306a36Sopenharmony_ci{
405662306a36Sopenharmony_ci	struct kvm_lapic *apic = vcpu->arch.apic;
405762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
405862306a36Sopenharmony_ci	/*
405962306a36Sopenharmony_ci	 * Only a pending nested run blocks a pending exception.  If there is a
406062306a36Sopenharmony_ci	 * previously injected event, the pending exception occurred while said
406162306a36Sopenharmony_ci	 * event was being delivered and thus needs to be handled.
406262306a36Sopenharmony_ci	 */
406362306a36Sopenharmony_ci	bool block_nested_exceptions = vmx->nested.nested_run_pending;
406462306a36Sopenharmony_ci	/*
406562306a36Sopenharmony_ci	 * New events (not exceptions) are only recognized at instruction
406662306a36Sopenharmony_ci	 * boundaries.  If an event needs reinjection, then KVM is handling a
406762306a36Sopenharmony_ci	 * VM-Exit that occurred _during_ instruction execution; new events are
406862306a36Sopenharmony_ci	 * blocked until the instruction completes.
406962306a36Sopenharmony_ci	 */
407062306a36Sopenharmony_ci	bool block_nested_events = block_nested_exceptions ||
407162306a36Sopenharmony_ci				   kvm_event_needs_reinjection(vcpu);
407262306a36Sopenharmony_ci
407362306a36Sopenharmony_ci	if (lapic_in_kernel(vcpu) &&
407462306a36Sopenharmony_ci		test_bit(KVM_APIC_INIT, &apic->pending_events)) {
407562306a36Sopenharmony_ci		if (block_nested_events)
407662306a36Sopenharmony_ci			return -EBUSY;
407762306a36Sopenharmony_ci		nested_vmx_update_pending_dbg(vcpu);
407862306a36Sopenharmony_ci		clear_bit(KVM_APIC_INIT, &apic->pending_events);
407962306a36Sopenharmony_ci		if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
408062306a36Sopenharmony_ci			nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
408162306a36Sopenharmony_ci
408262306a36Sopenharmony_ci		/* MTF is discarded if the vCPU is in WFS. */
408362306a36Sopenharmony_ci		vmx->nested.mtf_pending = false;
408462306a36Sopenharmony_ci		return 0;
408562306a36Sopenharmony_ci	}
408662306a36Sopenharmony_ci
408762306a36Sopenharmony_ci	if (lapic_in_kernel(vcpu) &&
408862306a36Sopenharmony_ci	    test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
408962306a36Sopenharmony_ci		if (block_nested_events)
409062306a36Sopenharmony_ci			return -EBUSY;
409162306a36Sopenharmony_ci
409262306a36Sopenharmony_ci		clear_bit(KVM_APIC_SIPI, &apic->pending_events);
409362306a36Sopenharmony_ci		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
409462306a36Sopenharmony_ci			nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
409562306a36Sopenharmony_ci						apic->sipi_vector & 0xFFUL);
409662306a36Sopenharmony_ci			return 0;
409762306a36Sopenharmony_ci		}
409862306a36Sopenharmony_ci		/* Fallthrough, the SIPI is completely ignored. */
409962306a36Sopenharmony_ci	}
410062306a36Sopenharmony_ci
410162306a36Sopenharmony_ci	/*
410262306a36Sopenharmony_ci	 * Process exceptions that are higher priority than Monitor Trap Flag:
410362306a36Sopenharmony_ci	 * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but
410462306a36Sopenharmony_ci	 * could theoretically come in from userspace), and ICEBP (INT1).
410562306a36Sopenharmony_ci	 *
410662306a36Sopenharmony_ci	 * TODO: SMIs have higher priority than MTF and trap-like #DBs (except
410762306a36Sopenharmony_ci	 * for TSS T flag #DBs).  KVM also doesn't save/restore pending MTF
410862306a36Sopenharmony_ci	 * across SMI/RSM as it should; that needs to be addressed in order to
410962306a36Sopenharmony_ci	 * prioritize SMI over MTF and trap-like #DBs.
411062306a36Sopenharmony_ci	 */
411162306a36Sopenharmony_ci	if (vcpu->arch.exception_vmexit.pending &&
411262306a36Sopenharmony_ci	    !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) {
411362306a36Sopenharmony_ci		if (block_nested_exceptions)
411462306a36Sopenharmony_ci			return -EBUSY;
411562306a36Sopenharmony_ci
411662306a36Sopenharmony_ci		nested_vmx_inject_exception_vmexit(vcpu);
411762306a36Sopenharmony_ci		return 0;
411862306a36Sopenharmony_ci	}
411962306a36Sopenharmony_ci
412062306a36Sopenharmony_ci	if (vcpu->arch.exception.pending &&
412162306a36Sopenharmony_ci	    !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) {
412262306a36Sopenharmony_ci		if (block_nested_exceptions)
412362306a36Sopenharmony_ci			return -EBUSY;
412462306a36Sopenharmony_ci		goto no_vmexit;
412562306a36Sopenharmony_ci	}
412662306a36Sopenharmony_ci
412762306a36Sopenharmony_ci	if (vmx->nested.mtf_pending) {
412862306a36Sopenharmony_ci		if (block_nested_events)
412962306a36Sopenharmony_ci			return -EBUSY;
413062306a36Sopenharmony_ci		nested_vmx_update_pending_dbg(vcpu);
413162306a36Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
413262306a36Sopenharmony_ci		return 0;
413362306a36Sopenharmony_ci	}
413462306a36Sopenharmony_ci
413562306a36Sopenharmony_ci	if (vcpu->arch.exception_vmexit.pending) {
413662306a36Sopenharmony_ci		if (block_nested_exceptions)
413762306a36Sopenharmony_ci			return -EBUSY;
413862306a36Sopenharmony_ci
413962306a36Sopenharmony_ci		nested_vmx_inject_exception_vmexit(vcpu);
414062306a36Sopenharmony_ci		return 0;
414162306a36Sopenharmony_ci	}
414262306a36Sopenharmony_ci
414362306a36Sopenharmony_ci	if (vcpu->arch.exception.pending) {
414462306a36Sopenharmony_ci		if (block_nested_exceptions)
414562306a36Sopenharmony_ci			return -EBUSY;
414662306a36Sopenharmony_ci		goto no_vmexit;
414762306a36Sopenharmony_ci	}
414862306a36Sopenharmony_ci
414962306a36Sopenharmony_ci	if (nested_vmx_preemption_timer_pending(vcpu)) {
415062306a36Sopenharmony_ci		if (block_nested_events)
415162306a36Sopenharmony_ci			return -EBUSY;
415262306a36Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
415362306a36Sopenharmony_ci		return 0;
415462306a36Sopenharmony_ci	}
415562306a36Sopenharmony_ci
415662306a36Sopenharmony_ci	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
415762306a36Sopenharmony_ci		if (block_nested_events)
415862306a36Sopenharmony_ci			return -EBUSY;
415962306a36Sopenharmony_ci		goto no_vmexit;
416062306a36Sopenharmony_ci	}
416162306a36Sopenharmony_ci
416262306a36Sopenharmony_ci	if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
416362306a36Sopenharmony_ci		if (block_nested_events)
416462306a36Sopenharmony_ci			return -EBUSY;
416562306a36Sopenharmony_ci		if (!nested_exit_on_nmi(vcpu))
416662306a36Sopenharmony_ci			goto no_vmexit;
416762306a36Sopenharmony_ci
416862306a36Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
416962306a36Sopenharmony_ci				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
417062306a36Sopenharmony_ci				  INTR_INFO_VALID_MASK, 0);
417162306a36Sopenharmony_ci		/*
417262306a36Sopenharmony_ci		 * The NMI-triggered VM exit counts as injection:
417362306a36Sopenharmony_ci		 * clear this one and block further NMIs.
417462306a36Sopenharmony_ci		 */
417562306a36Sopenharmony_ci		vcpu->arch.nmi_pending = 0;
417662306a36Sopenharmony_ci		vmx_set_nmi_mask(vcpu, true);
417762306a36Sopenharmony_ci		return 0;
417862306a36Sopenharmony_ci	}
417962306a36Sopenharmony_ci
418062306a36Sopenharmony_ci	if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
418162306a36Sopenharmony_ci		if (block_nested_events)
418262306a36Sopenharmony_ci			return -EBUSY;
418362306a36Sopenharmony_ci		if (!nested_exit_on_intr(vcpu))
418462306a36Sopenharmony_ci			goto no_vmexit;
418562306a36Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
418662306a36Sopenharmony_ci		return 0;
418762306a36Sopenharmony_ci	}
418862306a36Sopenharmony_ci
418962306a36Sopenharmony_cino_vmexit:
419062306a36Sopenharmony_ci	return vmx_complete_nested_posted_interrupt(vcpu);
419162306a36Sopenharmony_ci}
419262306a36Sopenharmony_ci
419362306a36Sopenharmony_cistatic u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
419462306a36Sopenharmony_ci{
419562306a36Sopenharmony_ci	ktime_t remaining =
419662306a36Sopenharmony_ci		hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
419762306a36Sopenharmony_ci	u64 value;
419862306a36Sopenharmony_ci
419962306a36Sopenharmony_ci	if (ktime_to_ns(remaining) <= 0)
420062306a36Sopenharmony_ci		return 0;
420162306a36Sopenharmony_ci
420262306a36Sopenharmony_ci	value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
420362306a36Sopenharmony_ci	do_div(value, 1000000);
420462306a36Sopenharmony_ci	return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
420562306a36Sopenharmony_ci}
420662306a36Sopenharmony_ci
420762306a36Sopenharmony_cistatic bool is_vmcs12_ext_field(unsigned long field)
420862306a36Sopenharmony_ci{
420962306a36Sopenharmony_ci	switch (field) {
421062306a36Sopenharmony_ci	case GUEST_ES_SELECTOR:
421162306a36Sopenharmony_ci	case GUEST_CS_SELECTOR:
421262306a36Sopenharmony_ci	case GUEST_SS_SELECTOR:
421362306a36Sopenharmony_ci	case GUEST_DS_SELECTOR:
421462306a36Sopenharmony_ci	case GUEST_FS_SELECTOR:
421562306a36Sopenharmony_ci	case GUEST_GS_SELECTOR:
421662306a36Sopenharmony_ci	case GUEST_LDTR_SELECTOR:
421762306a36Sopenharmony_ci	case GUEST_TR_SELECTOR:
421862306a36Sopenharmony_ci	case GUEST_ES_LIMIT:
421962306a36Sopenharmony_ci	case GUEST_CS_LIMIT:
422062306a36Sopenharmony_ci	case GUEST_SS_LIMIT:
422162306a36Sopenharmony_ci	case GUEST_DS_LIMIT:
422262306a36Sopenharmony_ci	case GUEST_FS_LIMIT:
422362306a36Sopenharmony_ci	case GUEST_GS_LIMIT:
422462306a36Sopenharmony_ci	case GUEST_LDTR_LIMIT:
422562306a36Sopenharmony_ci	case GUEST_TR_LIMIT:
422662306a36Sopenharmony_ci	case GUEST_GDTR_LIMIT:
422762306a36Sopenharmony_ci	case GUEST_IDTR_LIMIT:
422862306a36Sopenharmony_ci	case GUEST_ES_AR_BYTES:
422962306a36Sopenharmony_ci	case GUEST_DS_AR_BYTES:
423062306a36Sopenharmony_ci	case GUEST_FS_AR_BYTES:
423162306a36Sopenharmony_ci	case GUEST_GS_AR_BYTES:
423262306a36Sopenharmony_ci	case GUEST_LDTR_AR_BYTES:
423362306a36Sopenharmony_ci	case GUEST_TR_AR_BYTES:
423462306a36Sopenharmony_ci	case GUEST_ES_BASE:
423562306a36Sopenharmony_ci	case GUEST_CS_BASE:
423662306a36Sopenharmony_ci	case GUEST_SS_BASE:
423762306a36Sopenharmony_ci	case GUEST_DS_BASE:
423862306a36Sopenharmony_ci	case GUEST_FS_BASE:
423962306a36Sopenharmony_ci	case GUEST_GS_BASE:
424062306a36Sopenharmony_ci	case GUEST_LDTR_BASE:
424162306a36Sopenharmony_ci	case GUEST_TR_BASE:
424262306a36Sopenharmony_ci	case GUEST_GDTR_BASE:
424362306a36Sopenharmony_ci	case GUEST_IDTR_BASE:
424462306a36Sopenharmony_ci	case GUEST_PENDING_DBG_EXCEPTIONS:
424562306a36Sopenharmony_ci	case GUEST_BNDCFGS:
424662306a36Sopenharmony_ci		return true;
424762306a36Sopenharmony_ci	default:
424862306a36Sopenharmony_ci		break;
424962306a36Sopenharmony_ci	}
425062306a36Sopenharmony_ci
425162306a36Sopenharmony_ci	return false;
425262306a36Sopenharmony_ci}
425362306a36Sopenharmony_ci
425462306a36Sopenharmony_cistatic void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
425562306a36Sopenharmony_ci				       struct vmcs12 *vmcs12)
425662306a36Sopenharmony_ci{
425762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
425862306a36Sopenharmony_ci
425962306a36Sopenharmony_ci	vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
426062306a36Sopenharmony_ci	vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
426162306a36Sopenharmony_ci	vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
426262306a36Sopenharmony_ci	vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
426362306a36Sopenharmony_ci	vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
426462306a36Sopenharmony_ci	vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
426562306a36Sopenharmony_ci	vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
426662306a36Sopenharmony_ci	vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
426762306a36Sopenharmony_ci	vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
426862306a36Sopenharmony_ci	vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
426962306a36Sopenharmony_ci	vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
427062306a36Sopenharmony_ci	vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
427162306a36Sopenharmony_ci	vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
427262306a36Sopenharmony_ci	vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
427362306a36Sopenharmony_ci	vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
427462306a36Sopenharmony_ci	vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
427562306a36Sopenharmony_ci	vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
427662306a36Sopenharmony_ci	vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
427762306a36Sopenharmony_ci	vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
427862306a36Sopenharmony_ci	vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
427962306a36Sopenharmony_ci	vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
428062306a36Sopenharmony_ci	vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
428162306a36Sopenharmony_ci	vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
428262306a36Sopenharmony_ci	vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
428362306a36Sopenharmony_ci	vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
428462306a36Sopenharmony_ci	vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
428562306a36Sopenharmony_ci	vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
428662306a36Sopenharmony_ci	vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
428762306a36Sopenharmony_ci	vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
428862306a36Sopenharmony_ci	vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
428962306a36Sopenharmony_ci	vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
429062306a36Sopenharmony_ci	vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
429162306a36Sopenharmony_ci	vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
429262306a36Sopenharmony_ci	vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
429362306a36Sopenharmony_ci	vmcs12->guest_pending_dbg_exceptions =
429462306a36Sopenharmony_ci		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
429562306a36Sopenharmony_ci
429662306a36Sopenharmony_ci	vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
429762306a36Sopenharmony_ci}
429862306a36Sopenharmony_ci
429962306a36Sopenharmony_cistatic void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
430062306a36Sopenharmony_ci				       struct vmcs12 *vmcs12)
430162306a36Sopenharmony_ci{
430262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
430362306a36Sopenharmony_ci	int cpu;
430462306a36Sopenharmony_ci
430562306a36Sopenharmony_ci	if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
430662306a36Sopenharmony_ci		return;
430762306a36Sopenharmony_ci
430862306a36Sopenharmony_ci
430962306a36Sopenharmony_ci	WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
431062306a36Sopenharmony_ci
431162306a36Sopenharmony_ci	cpu = get_cpu();
431262306a36Sopenharmony_ci	vmx->loaded_vmcs = &vmx->nested.vmcs02;
431362306a36Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
431462306a36Sopenharmony_ci
431562306a36Sopenharmony_ci	sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
431662306a36Sopenharmony_ci
431762306a36Sopenharmony_ci	vmx->loaded_vmcs = &vmx->vmcs01;
431862306a36Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
431962306a36Sopenharmony_ci	put_cpu();
432062306a36Sopenharmony_ci}
432162306a36Sopenharmony_ci
432262306a36Sopenharmony_ci/*
432362306a36Sopenharmony_ci * Update the guest state fields of vmcs12 to reflect changes that
432462306a36Sopenharmony_ci * occurred while L2 was running. (The "IA-32e mode guest" bit of the
432562306a36Sopenharmony_ci * VM-entry controls is also updated, since this is really a guest
432662306a36Sopenharmony_ci * state bit.)
432762306a36Sopenharmony_ci */
432862306a36Sopenharmony_cistatic void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
432962306a36Sopenharmony_ci{
433062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
433162306a36Sopenharmony_ci
433262306a36Sopenharmony_ci	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
433362306a36Sopenharmony_ci		sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
433462306a36Sopenharmony_ci
433562306a36Sopenharmony_ci	vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
433662306a36Sopenharmony_ci		!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
433762306a36Sopenharmony_ci
433862306a36Sopenharmony_ci	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
433962306a36Sopenharmony_ci	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
434062306a36Sopenharmony_ci
434162306a36Sopenharmony_ci	vmcs12->guest_rsp = kvm_rsp_read(vcpu);
434262306a36Sopenharmony_ci	vmcs12->guest_rip = kvm_rip_read(vcpu);
434362306a36Sopenharmony_ci	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
434462306a36Sopenharmony_ci
434562306a36Sopenharmony_ci	vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
434662306a36Sopenharmony_ci	vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
434762306a36Sopenharmony_ci
434862306a36Sopenharmony_ci	vmcs12->guest_interruptibility_info =
434962306a36Sopenharmony_ci		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
435062306a36Sopenharmony_ci
435162306a36Sopenharmony_ci	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
435262306a36Sopenharmony_ci		vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
435362306a36Sopenharmony_ci	else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
435462306a36Sopenharmony_ci		vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
435562306a36Sopenharmony_ci	else
435662306a36Sopenharmony_ci		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
435762306a36Sopenharmony_ci
435862306a36Sopenharmony_ci	if (nested_cpu_has_preemption_timer(vmcs12) &&
435962306a36Sopenharmony_ci	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
436062306a36Sopenharmony_ci	    !vmx->nested.nested_run_pending)
436162306a36Sopenharmony_ci		vmcs12->vmx_preemption_timer_value =
436262306a36Sopenharmony_ci			vmx_get_preemption_timer_value(vcpu);
436362306a36Sopenharmony_ci
436462306a36Sopenharmony_ci	/*
436562306a36Sopenharmony_ci	 * In some cases (usually, nested EPT), L2 is allowed to change its
436662306a36Sopenharmony_ci	 * own CR3 without exiting. If it has changed it, we must keep it.
436762306a36Sopenharmony_ci	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
436862306a36Sopenharmony_ci	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
436962306a36Sopenharmony_ci	 *
437062306a36Sopenharmony_ci	 * Additionally, restore L2's PDPTR to vmcs12.
437162306a36Sopenharmony_ci	 */
437262306a36Sopenharmony_ci	if (enable_ept) {
437362306a36Sopenharmony_ci		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
437462306a36Sopenharmony_ci		if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
437562306a36Sopenharmony_ci			vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
437662306a36Sopenharmony_ci			vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
437762306a36Sopenharmony_ci			vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
437862306a36Sopenharmony_ci			vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
437962306a36Sopenharmony_ci		}
438062306a36Sopenharmony_ci	}
438162306a36Sopenharmony_ci
438262306a36Sopenharmony_ci	vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
438362306a36Sopenharmony_ci
438462306a36Sopenharmony_ci	if (nested_cpu_has_vid(vmcs12))
438562306a36Sopenharmony_ci		vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
438662306a36Sopenharmony_ci
438762306a36Sopenharmony_ci	vmcs12->vm_entry_controls =
438862306a36Sopenharmony_ci		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
438962306a36Sopenharmony_ci		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
439062306a36Sopenharmony_ci
439162306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
439262306a36Sopenharmony_ci		kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
439362306a36Sopenharmony_ci
439462306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
439562306a36Sopenharmony_ci		vmcs12->guest_ia32_efer = vcpu->arch.efer;
439662306a36Sopenharmony_ci}
439762306a36Sopenharmony_ci
439862306a36Sopenharmony_ci/*
439962306a36Sopenharmony_ci * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
440062306a36Sopenharmony_ci * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
440162306a36Sopenharmony_ci * and this function updates it to reflect the changes to the guest state while
440262306a36Sopenharmony_ci * L2 was running (and perhaps made some exits which were handled directly by L0
440362306a36Sopenharmony_ci * without going back to L1), and to reflect the exit reason.
440462306a36Sopenharmony_ci * Note that we do not have to copy here all VMCS fields, just those that
440562306a36Sopenharmony_ci * could have changed by the L2 guest or the exit - i.e., the guest-state and
440662306a36Sopenharmony_ci * exit-information fields only. Other fields are modified by L1 with VMWRITE,
440762306a36Sopenharmony_ci * which already writes to vmcs12 directly.
440862306a36Sopenharmony_ci */
440962306a36Sopenharmony_cistatic void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
441062306a36Sopenharmony_ci			   u32 vm_exit_reason, u32 exit_intr_info,
441162306a36Sopenharmony_ci			   unsigned long exit_qualification)
441262306a36Sopenharmony_ci{
441362306a36Sopenharmony_ci	/* update exit information fields: */
441462306a36Sopenharmony_ci	vmcs12->vm_exit_reason = vm_exit_reason;
441562306a36Sopenharmony_ci	if (to_vmx(vcpu)->exit_reason.enclave_mode)
441662306a36Sopenharmony_ci		vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
441762306a36Sopenharmony_ci	vmcs12->exit_qualification = exit_qualification;
441862306a36Sopenharmony_ci
441962306a36Sopenharmony_ci	/*
442062306a36Sopenharmony_ci	 * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched
442162306a36Sopenharmony_ci	 * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other
442262306a36Sopenharmony_ci	 * exit info fields are unmodified.
442362306a36Sopenharmony_ci	 */
442462306a36Sopenharmony_ci	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
442562306a36Sopenharmony_ci		vmcs12->launch_state = 1;
442662306a36Sopenharmony_ci
442762306a36Sopenharmony_ci		/* vm_entry_intr_info_field is cleared on exit. Emulate this
442862306a36Sopenharmony_ci		 * instead of reading the real value. */
442962306a36Sopenharmony_ci		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
443062306a36Sopenharmony_ci
443162306a36Sopenharmony_ci		/*
443262306a36Sopenharmony_ci		 * Transfer the event that L0 or L1 may wanted to inject into
443362306a36Sopenharmony_ci		 * L2 to IDT_VECTORING_INFO_FIELD.
443462306a36Sopenharmony_ci		 */
443562306a36Sopenharmony_ci		vmcs12_save_pending_event(vcpu, vmcs12,
443662306a36Sopenharmony_ci					  vm_exit_reason, exit_intr_info);
443762306a36Sopenharmony_ci
443862306a36Sopenharmony_ci		vmcs12->vm_exit_intr_info = exit_intr_info;
443962306a36Sopenharmony_ci		vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
444062306a36Sopenharmony_ci		vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
444162306a36Sopenharmony_ci
444262306a36Sopenharmony_ci		/*
444362306a36Sopenharmony_ci		 * According to spec, there's no need to store the guest's
444462306a36Sopenharmony_ci		 * MSRs if the exit is due to a VM-entry failure that occurs
444562306a36Sopenharmony_ci		 * during or after loading the guest state. Since this exit
444662306a36Sopenharmony_ci		 * does not fall in that category, we need to save the MSRs.
444762306a36Sopenharmony_ci		 */
444862306a36Sopenharmony_ci		if (nested_vmx_store_msr(vcpu,
444962306a36Sopenharmony_ci					 vmcs12->vm_exit_msr_store_addr,
445062306a36Sopenharmony_ci					 vmcs12->vm_exit_msr_store_count))
445162306a36Sopenharmony_ci			nested_vmx_abort(vcpu,
445262306a36Sopenharmony_ci					 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
445362306a36Sopenharmony_ci	}
445462306a36Sopenharmony_ci}
445562306a36Sopenharmony_ci
445662306a36Sopenharmony_ci/*
445762306a36Sopenharmony_ci * A part of what we need to when the nested L2 guest exits and we want to
445862306a36Sopenharmony_ci * run its L1 parent, is to reset L1's guest state to the host state specified
445962306a36Sopenharmony_ci * in vmcs12.
446062306a36Sopenharmony_ci * This function is to be called not only on normal nested exit, but also on
446162306a36Sopenharmony_ci * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
446262306a36Sopenharmony_ci * Failures During or After Loading Guest State").
446362306a36Sopenharmony_ci * This function should be called when the active VMCS is L1's (vmcs01).
446462306a36Sopenharmony_ci */
446562306a36Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
446662306a36Sopenharmony_ci				   struct vmcs12 *vmcs12)
446762306a36Sopenharmony_ci{
446862306a36Sopenharmony_ci	enum vm_entry_failure_code ignored;
446962306a36Sopenharmony_ci	struct kvm_segment seg;
447062306a36Sopenharmony_ci
447162306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
447262306a36Sopenharmony_ci		vcpu->arch.efer = vmcs12->host_ia32_efer;
447362306a36Sopenharmony_ci	else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
447462306a36Sopenharmony_ci		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
447562306a36Sopenharmony_ci	else
447662306a36Sopenharmony_ci		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
447762306a36Sopenharmony_ci	vmx_set_efer(vcpu, vcpu->arch.efer);
447862306a36Sopenharmony_ci
447962306a36Sopenharmony_ci	kvm_rsp_write(vcpu, vmcs12->host_rsp);
448062306a36Sopenharmony_ci	kvm_rip_write(vcpu, vmcs12->host_rip);
448162306a36Sopenharmony_ci	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
448262306a36Sopenharmony_ci	vmx_set_interrupt_shadow(vcpu, 0);
448362306a36Sopenharmony_ci
448462306a36Sopenharmony_ci	/*
448562306a36Sopenharmony_ci	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
448662306a36Sopenharmony_ci	 * actually changed, because vmx_set_cr0 refers to efer set above.
448762306a36Sopenharmony_ci	 *
448862306a36Sopenharmony_ci	 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
448962306a36Sopenharmony_ci	 * (KVM doesn't change it);
449062306a36Sopenharmony_ci	 */
449162306a36Sopenharmony_ci	vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
449262306a36Sopenharmony_ci	vmx_set_cr0(vcpu, vmcs12->host_cr0);
449362306a36Sopenharmony_ci
449462306a36Sopenharmony_ci	/* Same as above - no reason to call set_cr4_guest_host_mask().  */
449562306a36Sopenharmony_ci	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
449662306a36Sopenharmony_ci	vmx_set_cr4(vcpu, vmcs12->host_cr4);
449762306a36Sopenharmony_ci
449862306a36Sopenharmony_ci	nested_ept_uninit_mmu_context(vcpu);
449962306a36Sopenharmony_ci
450062306a36Sopenharmony_ci	/*
450162306a36Sopenharmony_ci	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
450262306a36Sopenharmony_ci	 * couldn't have changed.
450362306a36Sopenharmony_ci	 */
450462306a36Sopenharmony_ci	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
450562306a36Sopenharmony_ci		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
450662306a36Sopenharmony_ci
450762306a36Sopenharmony_ci	nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ci	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
451062306a36Sopenharmony_ci	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
451162306a36Sopenharmony_ci	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
451262306a36Sopenharmony_ci	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
451362306a36Sopenharmony_ci	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
451462306a36Sopenharmony_ci	vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
451562306a36Sopenharmony_ci	vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
451662306a36Sopenharmony_ci
451762306a36Sopenharmony_ci	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
451862306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
451962306a36Sopenharmony_ci		vmcs_write64(GUEST_BNDCFGS, 0);
452062306a36Sopenharmony_ci
452162306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
452262306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
452362306a36Sopenharmony_ci		vcpu->arch.pat = vmcs12->host_ia32_pat;
452462306a36Sopenharmony_ci	}
452562306a36Sopenharmony_ci	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
452662306a36Sopenharmony_ci	    kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
452762306a36Sopenharmony_ci		WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
452862306a36Sopenharmony_ci					 vmcs12->host_ia32_perf_global_ctrl));
452962306a36Sopenharmony_ci
453062306a36Sopenharmony_ci	/* Set L1 segment info according to Intel SDM
453162306a36Sopenharmony_ci	    27.5.2 Loading Host Segment and Descriptor-Table Registers */
453262306a36Sopenharmony_ci	seg = (struct kvm_segment) {
453362306a36Sopenharmony_ci		.base = 0,
453462306a36Sopenharmony_ci		.limit = 0xFFFFFFFF,
453562306a36Sopenharmony_ci		.selector = vmcs12->host_cs_selector,
453662306a36Sopenharmony_ci		.type = 11,
453762306a36Sopenharmony_ci		.present = 1,
453862306a36Sopenharmony_ci		.s = 1,
453962306a36Sopenharmony_ci		.g = 1
454062306a36Sopenharmony_ci	};
454162306a36Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
454262306a36Sopenharmony_ci		seg.l = 1;
454362306a36Sopenharmony_ci	else
454462306a36Sopenharmony_ci		seg.db = 1;
454562306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
454662306a36Sopenharmony_ci	seg = (struct kvm_segment) {
454762306a36Sopenharmony_ci		.base = 0,
454862306a36Sopenharmony_ci		.limit = 0xFFFFFFFF,
454962306a36Sopenharmony_ci		.type = 3,
455062306a36Sopenharmony_ci		.present = 1,
455162306a36Sopenharmony_ci		.s = 1,
455262306a36Sopenharmony_ci		.db = 1,
455362306a36Sopenharmony_ci		.g = 1
455462306a36Sopenharmony_ci	};
455562306a36Sopenharmony_ci	seg.selector = vmcs12->host_ds_selector;
455662306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
455762306a36Sopenharmony_ci	seg.selector = vmcs12->host_es_selector;
455862306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
455962306a36Sopenharmony_ci	seg.selector = vmcs12->host_ss_selector;
456062306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
456162306a36Sopenharmony_ci	seg.selector = vmcs12->host_fs_selector;
456262306a36Sopenharmony_ci	seg.base = vmcs12->host_fs_base;
456362306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
456462306a36Sopenharmony_ci	seg.selector = vmcs12->host_gs_selector;
456562306a36Sopenharmony_ci	seg.base = vmcs12->host_gs_base;
456662306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
456762306a36Sopenharmony_ci	seg = (struct kvm_segment) {
456862306a36Sopenharmony_ci		.base = vmcs12->host_tr_base,
456962306a36Sopenharmony_ci		.limit = 0x67,
457062306a36Sopenharmony_ci		.selector = vmcs12->host_tr_selector,
457162306a36Sopenharmony_ci		.type = 11,
457262306a36Sopenharmony_ci		.present = 1
457362306a36Sopenharmony_ci	};
457462306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
457562306a36Sopenharmony_ci
457662306a36Sopenharmony_ci	memset(&seg, 0, sizeof(seg));
457762306a36Sopenharmony_ci	seg.unusable = 1;
457862306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
457962306a36Sopenharmony_ci
458062306a36Sopenharmony_ci	kvm_set_dr(vcpu, 7, 0x400);
458162306a36Sopenharmony_ci	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
458262306a36Sopenharmony_ci
458362306a36Sopenharmony_ci	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
458462306a36Sopenharmony_ci				vmcs12->vm_exit_msr_load_count))
458562306a36Sopenharmony_ci		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
458662306a36Sopenharmony_ci
458762306a36Sopenharmony_ci	to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
458862306a36Sopenharmony_ci}
458962306a36Sopenharmony_ci
459062306a36Sopenharmony_cistatic inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
459162306a36Sopenharmony_ci{
459262306a36Sopenharmony_ci	struct vmx_uret_msr *efer_msr;
459362306a36Sopenharmony_ci	unsigned int i;
459462306a36Sopenharmony_ci
459562306a36Sopenharmony_ci	if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
459662306a36Sopenharmony_ci		return vmcs_read64(GUEST_IA32_EFER);
459762306a36Sopenharmony_ci
459862306a36Sopenharmony_ci	if (cpu_has_load_ia32_efer())
459962306a36Sopenharmony_ci		return host_efer;
460062306a36Sopenharmony_ci
460162306a36Sopenharmony_ci	for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
460262306a36Sopenharmony_ci		if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
460362306a36Sopenharmony_ci			return vmx->msr_autoload.guest.val[i].value;
460462306a36Sopenharmony_ci	}
460562306a36Sopenharmony_ci
460662306a36Sopenharmony_ci	efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
460762306a36Sopenharmony_ci	if (efer_msr)
460862306a36Sopenharmony_ci		return efer_msr->data;
460962306a36Sopenharmony_ci
461062306a36Sopenharmony_ci	return host_efer;
461162306a36Sopenharmony_ci}
461262306a36Sopenharmony_ci
461362306a36Sopenharmony_cistatic void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
461462306a36Sopenharmony_ci{
461562306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
461662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
461762306a36Sopenharmony_ci	struct vmx_msr_entry g, h;
461862306a36Sopenharmony_ci	gpa_t gpa;
461962306a36Sopenharmony_ci	u32 i, j;
462062306a36Sopenharmony_ci
462162306a36Sopenharmony_ci	vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
462262306a36Sopenharmony_ci
462362306a36Sopenharmony_ci	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
462462306a36Sopenharmony_ci		/*
462562306a36Sopenharmony_ci		 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
462662306a36Sopenharmony_ci		 * as vmcs01.GUEST_DR7 contains a userspace defined value
462762306a36Sopenharmony_ci		 * and vcpu->arch.dr7 is not squirreled away before the
462862306a36Sopenharmony_ci		 * nested VMENTER (not worth adding a variable in nested_vmx).
462962306a36Sopenharmony_ci		 */
463062306a36Sopenharmony_ci		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
463162306a36Sopenharmony_ci			kvm_set_dr(vcpu, 7, DR7_FIXED_1);
463262306a36Sopenharmony_ci		else
463362306a36Sopenharmony_ci			WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
463462306a36Sopenharmony_ci	}
463562306a36Sopenharmony_ci
463662306a36Sopenharmony_ci	/*
463762306a36Sopenharmony_ci	 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
463862306a36Sopenharmony_ci	 * handle a variety of side effects to KVM's software model.
463962306a36Sopenharmony_ci	 */
464062306a36Sopenharmony_ci	vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
464162306a36Sopenharmony_ci
464262306a36Sopenharmony_ci	vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
464362306a36Sopenharmony_ci	vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
464462306a36Sopenharmony_ci
464562306a36Sopenharmony_ci	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
464662306a36Sopenharmony_ci	vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
464762306a36Sopenharmony_ci
464862306a36Sopenharmony_ci	nested_ept_uninit_mmu_context(vcpu);
464962306a36Sopenharmony_ci	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
465062306a36Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
465162306a36Sopenharmony_ci
465262306a36Sopenharmony_ci	/*
465362306a36Sopenharmony_ci	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
465462306a36Sopenharmony_ci	 * from vmcs01 (if necessary).  The PDPTRs are not loaded on
465562306a36Sopenharmony_ci	 * VMFail, like everything else we just need to ensure our
465662306a36Sopenharmony_ci	 * software model is up-to-date.
465762306a36Sopenharmony_ci	 */
465862306a36Sopenharmony_ci	if (enable_ept && is_pae_paging(vcpu))
465962306a36Sopenharmony_ci		ept_save_pdptrs(vcpu);
466062306a36Sopenharmony_ci
466162306a36Sopenharmony_ci	kvm_mmu_reset_context(vcpu);
466262306a36Sopenharmony_ci
466362306a36Sopenharmony_ci	/*
466462306a36Sopenharmony_ci	 * This nasty bit of open coding is a compromise between blindly
466562306a36Sopenharmony_ci	 * loading L1's MSRs using the exit load lists (incorrect emulation
466662306a36Sopenharmony_ci	 * of VMFail), leaving the nested VM's MSRs in the software model
466762306a36Sopenharmony_ci	 * (incorrect behavior) and snapshotting the modified MSRs (too
466862306a36Sopenharmony_ci	 * expensive since the lists are unbound by hardware).  For each
466962306a36Sopenharmony_ci	 * MSR that was (prematurely) loaded from the nested VMEntry load
467062306a36Sopenharmony_ci	 * list, reload it from the exit load list if it exists and differs
467162306a36Sopenharmony_ci	 * from the guest value.  The intent is to stuff host state as
467262306a36Sopenharmony_ci	 * silently as possible, not to fully process the exit load list.
467362306a36Sopenharmony_ci	 */
467462306a36Sopenharmony_ci	for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
467562306a36Sopenharmony_ci		gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
467662306a36Sopenharmony_ci		if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
467762306a36Sopenharmony_ci			pr_debug_ratelimited(
467862306a36Sopenharmony_ci				"%s read MSR index failed (%u, 0x%08llx)\n",
467962306a36Sopenharmony_ci				__func__, i, gpa);
468062306a36Sopenharmony_ci			goto vmabort;
468162306a36Sopenharmony_ci		}
468262306a36Sopenharmony_ci
468362306a36Sopenharmony_ci		for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
468462306a36Sopenharmony_ci			gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
468562306a36Sopenharmony_ci			if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
468662306a36Sopenharmony_ci				pr_debug_ratelimited(
468762306a36Sopenharmony_ci					"%s read MSR failed (%u, 0x%08llx)\n",
468862306a36Sopenharmony_ci					__func__, j, gpa);
468962306a36Sopenharmony_ci				goto vmabort;
469062306a36Sopenharmony_ci			}
469162306a36Sopenharmony_ci			if (h.index != g.index)
469262306a36Sopenharmony_ci				continue;
469362306a36Sopenharmony_ci			if (h.value == g.value)
469462306a36Sopenharmony_ci				break;
469562306a36Sopenharmony_ci
469662306a36Sopenharmony_ci			if (nested_vmx_load_msr_check(vcpu, &h)) {
469762306a36Sopenharmony_ci				pr_debug_ratelimited(
469862306a36Sopenharmony_ci					"%s check failed (%u, 0x%x, 0x%x)\n",
469962306a36Sopenharmony_ci					__func__, j, h.index, h.reserved);
470062306a36Sopenharmony_ci				goto vmabort;
470162306a36Sopenharmony_ci			}
470262306a36Sopenharmony_ci
470362306a36Sopenharmony_ci			if (kvm_set_msr(vcpu, h.index, h.value)) {
470462306a36Sopenharmony_ci				pr_debug_ratelimited(
470562306a36Sopenharmony_ci					"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
470662306a36Sopenharmony_ci					__func__, j, h.index, h.value);
470762306a36Sopenharmony_ci				goto vmabort;
470862306a36Sopenharmony_ci			}
470962306a36Sopenharmony_ci		}
471062306a36Sopenharmony_ci	}
471162306a36Sopenharmony_ci
471262306a36Sopenharmony_ci	return;
471362306a36Sopenharmony_ci
471462306a36Sopenharmony_civmabort:
471562306a36Sopenharmony_ci	nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
471662306a36Sopenharmony_ci}
471762306a36Sopenharmony_ci
471862306a36Sopenharmony_ci/*
471962306a36Sopenharmony_ci * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
472062306a36Sopenharmony_ci * and modify vmcs12 to make it see what it would expect to see there if
472162306a36Sopenharmony_ci * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
472262306a36Sopenharmony_ci */
472362306a36Sopenharmony_civoid nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
472462306a36Sopenharmony_ci		       u32 exit_intr_info, unsigned long exit_qualification)
472562306a36Sopenharmony_ci{
472662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
472762306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
472862306a36Sopenharmony_ci
472962306a36Sopenharmony_ci	/* Pending MTF traps are discarded on VM-Exit. */
473062306a36Sopenharmony_ci	vmx->nested.mtf_pending = false;
473162306a36Sopenharmony_ci
473262306a36Sopenharmony_ci	/* trying to cancel vmlaunch/vmresume is a bug */
473362306a36Sopenharmony_ci	WARN_ON_ONCE(vmx->nested.nested_run_pending);
473462306a36Sopenharmony_ci
473562306a36Sopenharmony_ci	if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
473662306a36Sopenharmony_ci		/*
473762306a36Sopenharmony_ci		 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
473862306a36Sopenharmony_ci		 * Enlightened VMCS after migration and we still need to
473962306a36Sopenharmony_ci		 * do that when something is forcing L2->L1 exit prior to
474062306a36Sopenharmony_ci		 * the first L2 run.
474162306a36Sopenharmony_ci		 */
474262306a36Sopenharmony_ci		(void)nested_get_evmcs_page(vcpu);
474362306a36Sopenharmony_ci	}
474462306a36Sopenharmony_ci
474562306a36Sopenharmony_ci	/* Service pending TLB flush requests for L2 before switching to L1. */
474662306a36Sopenharmony_ci	kvm_service_local_tlb_flush_requests(vcpu);
474762306a36Sopenharmony_ci
474862306a36Sopenharmony_ci	/*
474962306a36Sopenharmony_ci	 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
475062306a36Sopenharmony_ci	 * now and the new vmentry.  Ensure that the VMCS02 PDPTR fields are
475162306a36Sopenharmony_ci	 * up-to-date before switching to L1.
475262306a36Sopenharmony_ci	 */
475362306a36Sopenharmony_ci	if (enable_ept && is_pae_paging(vcpu))
475462306a36Sopenharmony_ci		vmx_ept_load_pdptrs(vcpu);
475562306a36Sopenharmony_ci
475662306a36Sopenharmony_ci	leave_guest_mode(vcpu);
475762306a36Sopenharmony_ci
475862306a36Sopenharmony_ci	if (nested_cpu_has_preemption_timer(vmcs12))
475962306a36Sopenharmony_ci		hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
476062306a36Sopenharmony_ci
476162306a36Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
476262306a36Sopenharmony_ci		vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
476362306a36Sopenharmony_ci		if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
476462306a36Sopenharmony_ci			vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
476562306a36Sopenharmony_ci	}
476662306a36Sopenharmony_ci
476762306a36Sopenharmony_ci	if (likely(!vmx->fail)) {
476862306a36Sopenharmony_ci		sync_vmcs02_to_vmcs12(vcpu, vmcs12);
476962306a36Sopenharmony_ci
477062306a36Sopenharmony_ci		if (vm_exit_reason != -1)
477162306a36Sopenharmony_ci			prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
477262306a36Sopenharmony_ci				       exit_intr_info, exit_qualification);
477362306a36Sopenharmony_ci
477462306a36Sopenharmony_ci		/*
477562306a36Sopenharmony_ci		 * Must happen outside of sync_vmcs02_to_vmcs12() as it will
477662306a36Sopenharmony_ci		 * also be used to capture vmcs12 cache as part of
477762306a36Sopenharmony_ci		 * capturing nVMX state for snapshot (migration).
477862306a36Sopenharmony_ci		 *
477962306a36Sopenharmony_ci		 * Otherwise, this flush will dirty guest memory at a
478062306a36Sopenharmony_ci		 * point it is already assumed by user-space to be
478162306a36Sopenharmony_ci		 * immutable.
478262306a36Sopenharmony_ci		 */
478362306a36Sopenharmony_ci		nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
478462306a36Sopenharmony_ci	} else {
478562306a36Sopenharmony_ci		/*
478662306a36Sopenharmony_ci		 * The only expected VM-instruction error is "VM entry with
478762306a36Sopenharmony_ci		 * invalid control field(s)." Anything else indicates a
478862306a36Sopenharmony_ci		 * problem with L0.  And we should never get here with a
478962306a36Sopenharmony_ci		 * VMFail of any type if early consistency checks are enabled.
479062306a36Sopenharmony_ci		 */
479162306a36Sopenharmony_ci		WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
479262306a36Sopenharmony_ci			     VMXERR_ENTRY_INVALID_CONTROL_FIELD);
479362306a36Sopenharmony_ci		WARN_ON_ONCE(nested_early_check);
479462306a36Sopenharmony_ci	}
479562306a36Sopenharmony_ci
479662306a36Sopenharmony_ci	/*
479762306a36Sopenharmony_ci	 * Drop events/exceptions that were queued for re-injection to L2
479862306a36Sopenharmony_ci	 * (picked up via vmx_complete_interrupts()), as well as exceptions
479962306a36Sopenharmony_ci	 * that were pending for L2.  Note, this must NOT be hoisted above
480062306a36Sopenharmony_ci	 * prepare_vmcs12(), events/exceptions queued for re-injection need to
480162306a36Sopenharmony_ci	 * be captured in vmcs12 (see vmcs12_save_pending_event()).
480262306a36Sopenharmony_ci	 */
480362306a36Sopenharmony_ci	vcpu->arch.nmi_injected = false;
480462306a36Sopenharmony_ci	kvm_clear_exception_queue(vcpu);
480562306a36Sopenharmony_ci	kvm_clear_interrupt_queue(vcpu);
480662306a36Sopenharmony_ci
480762306a36Sopenharmony_ci	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
480862306a36Sopenharmony_ci
480962306a36Sopenharmony_ci	/*
481062306a36Sopenharmony_ci	 * If IBRS is advertised to the vCPU, KVM must flush the indirect
481162306a36Sopenharmony_ci	 * branch predictors when transitioning from L2 to L1, as L1 expects
481262306a36Sopenharmony_ci	 * hardware (KVM in this case) to provide separate predictor modes.
481362306a36Sopenharmony_ci	 * Bare metal isolates VMX root (host) from VMX non-root (guest), but
481462306a36Sopenharmony_ci	 * doesn't isolate different VMCSs, i.e. in this case, doesn't provide
481562306a36Sopenharmony_ci	 * separate modes for L2 vs L1.
481662306a36Sopenharmony_ci	 */
481762306a36Sopenharmony_ci	if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
481862306a36Sopenharmony_ci		indirect_branch_prediction_barrier();
481962306a36Sopenharmony_ci
482062306a36Sopenharmony_ci	/* Update any VMCS fields that might have changed while L2 ran */
482162306a36Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
482262306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
482362306a36Sopenharmony_ci	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
482462306a36Sopenharmony_ci	if (kvm_caps.has_tsc_control)
482562306a36Sopenharmony_ci		vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
482662306a36Sopenharmony_ci
482762306a36Sopenharmony_ci	if (vmx->nested.l1_tpr_threshold != -1)
482862306a36Sopenharmony_ci		vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
482962306a36Sopenharmony_ci
483062306a36Sopenharmony_ci	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
483162306a36Sopenharmony_ci		vmx->nested.change_vmcs01_virtual_apic_mode = false;
483262306a36Sopenharmony_ci		vmx_set_virtual_apic_mode(vcpu);
483362306a36Sopenharmony_ci	}
483462306a36Sopenharmony_ci
483562306a36Sopenharmony_ci	if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
483662306a36Sopenharmony_ci		vmx->nested.update_vmcs01_cpu_dirty_logging = false;
483762306a36Sopenharmony_ci		vmx_update_cpu_dirty_logging(vcpu);
483862306a36Sopenharmony_ci	}
483962306a36Sopenharmony_ci
484062306a36Sopenharmony_ci	/* Unpin physical memory we referred to in vmcs02 */
484162306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false);
484262306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
484362306a36Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
484462306a36Sopenharmony_ci	vmx->nested.pi_desc = NULL;
484562306a36Sopenharmony_ci
484662306a36Sopenharmony_ci	if (vmx->nested.reload_vmcs01_apic_access_page) {
484762306a36Sopenharmony_ci		vmx->nested.reload_vmcs01_apic_access_page = false;
484862306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
484962306a36Sopenharmony_ci	}
485062306a36Sopenharmony_ci
485162306a36Sopenharmony_ci	if (vmx->nested.update_vmcs01_apicv_status) {
485262306a36Sopenharmony_ci		vmx->nested.update_vmcs01_apicv_status = false;
485362306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
485462306a36Sopenharmony_ci	}
485562306a36Sopenharmony_ci
485662306a36Sopenharmony_ci	if ((vm_exit_reason != -1) &&
485762306a36Sopenharmony_ci	    (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
485862306a36Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
485962306a36Sopenharmony_ci
486062306a36Sopenharmony_ci	/* in case we halted in L2 */
486162306a36Sopenharmony_ci	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
486262306a36Sopenharmony_ci
486362306a36Sopenharmony_ci	if (likely(!vmx->fail)) {
486462306a36Sopenharmony_ci		if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
486562306a36Sopenharmony_ci		    nested_exit_intr_ack_set(vcpu)) {
486662306a36Sopenharmony_ci			int irq = kvm_cpu_get_interrupt(vcpu);
486762306a36Sopenharmony_ci			WARN_ON(irq < 0);
486862306a36Sopenharmony_ci			vmcs12->vm_exit_intr_info = irq |
486962306a36Sopenharmony_ci				INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
487062306a36Sopenharmony_ci		}
487162306a36Sopenharmony_ci
487262306a36Sopenharmony_ci		if (vm_exit_reason != -1)
487362306a36Sopenharmony_ci			trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
487462306a36Sopenharmony_ci						       vmcs12->exit_qualification,
487562306a36Sopenharmony_ci						       vmcs12->idt_vectoring_info_field,
487662306a36Sopenharmony_ci						       vmcs12->vm_exit_intr_info,
487762306a36Sopenharmony_ci						       vmcs12->vm_exit_intr_error_code,
487862306a36Sopenharmony_ci						       KVM_ISA_VMX);
487962306a36Sopenharmony_ci
488062306a36Sopenharmony_ci		load_vmcs12_host_state(vcpu, vmcs12);
488162306a36Sopenharmony_ci
488262306a36Sopenharmony_ci		return;
488362306a36Sopenharmony_ci	}
488462306a36Sopenharmony_ci
488562306a36Sopenharmony_ci	/*
488662306a36Sopenharmony_ci	 * After an early L2 VM-entry failure, we're now back
488762306a36Sopenharmony_ci	 * in L1 which thinks it just finished a VMLAUNCH or
488862306a36Sopenharmony_ci	 * VMRESUME instruction, so we need to set the failure
488962306a36Sopenharmony_ci	 * flag and the VM-instruction error field of the VMCS
489062306a36Sopenharmony_ci	 * accordingly, and skip the emulated instruction.
489162306a36Sopenharmony_ci	 */
489262306a36Sopenharmony_ci	(void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
489362306a36Sopenharmony_ci
489462306a36Sopenharmony_ci	/*
489562306a36Sopenharmony_ci	 * Restore L1's host state to KVM's software model.  We're here
489662306a36Sopenharmony_ci	 * because a consistency check was caught by hardware, which
489762306a36Sopenharmony_ci	 * means some amount of guest state has been propagated to KVM's
489862306a36Sopenharmony_ci	 * model and needs to be unwound to the host's state.
489962306a36Sopenharmony_ci	 */
490062306a36Sopenharmony_ci	nested_vmx_restore_host_state(vcpu);
490162306a36Sopenharmony_ci
490262306a36Sopenharmony_ci	vmx->fail = 0;
490362306a36Sopenharmony_ci}
490462306a36Sopenharmony_ci
490562306a36Sopenharmony_cistatic void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
490662306a36Sopenharmony_ci{
490762306a36Sopenharmony_ci	kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
490862306a36Sopenharmony_ci	nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
490962306a36Sopenharmony_ci}
491062306a36Sopenharmony_ci
491162306a36Sopenharmony_ci/*
491262306a36Sopenharmony_ci * Decode the memory-address operand of a vmx instruction, as recorded on an
491362306a36Sopenharmony_ci * exit caused by such an instruction (run by a guest hypervisor).
491462306a36Sopenharmony_ci * On success, returns 0. When the operand is invalid, returns 1 and throws
491562306a36Sopenharmony_ci * #UD, #GP, or #SS.
491662306a36Sopenharmony_ci */
491762306a36Sopenharmony_ciint get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
491862306a36Sopenharmony_ci			u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
491962306a36Sopenharmony_ci{
492062306a36Sopenharmony_ci	gva_t off;
492162306a36Sopenharmony_ci	bool exn;
492262306a36Sopenharmony_ci	struct kvm_segment s;
492362306a36Sopenharmony_ci
492462306a36Sopenharmony_ci	/*
492562306a36Sopenharmony_ci	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
492662306a36Sopenharmony_ci	 * Execution", on an exit, vmx_instruction_info holds most of the
492762306a36Sopenharmony_ci	 * addressing components of the operand. Only the displacement part
492862306a36Sopenharmony_ci	 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
492962306a36Sopenharmony_ci	 * For how an actual address is calculated from all these components,
493062306a36Sopenharmony_ci	 * refer to Vol. 1, "Operand Addressing".
493162306a36Sopenharmony_ci	 */
493262306a36Sopenharmony_ci	int  scaling = vmx_instruction_info & 3;
493362306a36Sopenharmony_ci	int  addr_size = (vmx_instruction_info >> 7) & 7;
493462306a36Sopenharmony_ci	bool is_reg = vmx_instruction_info & (1u << 10);
493562306a36Sopenharmony_ci	int  seg_reg = (vmx_instruction_info >> 15) & 7;
493662306a36Sopenharmony_ci	int  index_reg = (vmx_instruction_info >> 18) & 0xf;
493762306a36Sopenharmony_ci	bool index_is_valid = !(vmx_instruction_info & (1u << 22));
493862306a36Sopenharmony_ci	int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
493962306a36Sopenharmony_ci	bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
494062306a36Sopenharmony_ci
494162306a36Sopenharmony_ci	if (is_reg) {
494262306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
494362306a36Sopenharmony_ci		return 1;
494462306a36Sopenharmony_ci	}
494562306a36Sopenharmony_ci
494662306a36Sopenharmony_ci	/* Addr = segment_base + offset */
494762306a36Sopenharmony_ci	/* offset = base + [index * scale] + displacement */
494862306a36Sopenharmony_ci	off = exit_qualification; /* holds the displacement */
494962306a36Sopenharmony_ci	if (addr_size == 1)
495062306a36Sopenharmony_ci		off = (gva_t)sign_extend64(off, 31);
495162306a36Sopenharmony_ci	else if (addr_size == 0)
495262306a36Sopenharmony_ci		off = (gva_t)sign_extend64(off, 15);
495362306a36Sopenharmony_ci	if (base_is_valid)
495462306a36Sopenharmony_ci		off += kvm_register_read(vcpu, base_reg);
495562306a36Sopenharmony_ci	if (index_is_valid)
495662306a36Sopenharmony_ci		off += kvm_register_read(vcpu, index_reg) << scaling;
495762306a36Sopenharmony_ci	vmx_get_segment(vcpu, &s, seg_reg);
495862306a36Sopenharmony_ci
495962306a36Sopenharmony_ci	/*
496062306a36Sopenharmony_ci	 * The effective address, i.e. @off, of a memory operand is truncated
496162306a36Sopenharmony_ci	 * based on the address size of the instruction.  Note that this is
496262306a36Sopenharmony_ci	 * the *effective address*, i.e. the address prior to accounting for
496362306a36Sopenharmony_ci	 * the segment's base.
496462306a36Sopenharmony_ci	 */
496562306a36Sopenharmony_ci	if (addr_size == 1) /* 32 bit */
496662306a36Sopenharmony_ci		off &= 0xffffffff;
496762306a36Sopenharmony_ci	else if (addr_size == 0) /* 16 bit */
496862306a36Sopenharmony_ci		off &= 0xffff;
496962306a36Sopenharmony_ci
497062306a36Sopenharmony_ci	/* Checks for #GP/#SS exceptions. */
497162306a36Sopenharmony_ci	exn = false;
497262306a36Sopenharmony_ci	if (is_long_mode(vcpu)) {
497362306a36Sopenharmony_ci		/*
497462306a36Sopenharmony_ci		 * The virtual/linear address is never truncated in 64-bit
497562306a36Sopenharmony_ci		 * mode, e.g. a 32-bit address size can yield a 64-bit virtual
497662306a36Sopenharmony_ci		 * address when using FS/GS with a non-zero base.
497762306a36Sopenharmony_ci		 */
497862306a36Sopenharmony_ci		if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
497962306a36Sopenharmony_ci			*ret = s.base + off;
498062306a36Sopenharmony_ci		else
498162306a36Sopenharmony_ci			*ret = off;
498262306a36Sopenharmony_ci
498362306a36Sopenharmony_ci		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
498462306a36Sopenharmony_ci		 * non-canonical form. This is the only check on the memory
498562306a36Sopenharmony_ci		 * destination for long mode!
498662306a36Sopenharmony_ci		 */
498762306a36Sopenharmony_ci		exn = is_noncanonical_address(*ret, vcpu);
498862306a36Sopenharmony_ci	} else {
498962306a36Sopenharmony_ci		/*
499062306a36Sopenharmony_ci		 * When not in long mode, the virtual/linear address is
499162306a36Sopenharmony_ci		 * unconditionally truncated to 32 bits regardless of the
499262306a36Sopenharmony_ci		 * address size.
499362306a36Sopenharmony_ci		 */
499462306a36Sopenharmony_ci		*ret = (s.base + off) & 0xffffffff;
499562306a36Sopenharmony_ci
499662306a36Sopenharmony_ci		/* Protected mode: apply checks for segment validity in the
499762306a36Sopenharmony_ci		 * following order:
499862306a36Sopenharmony_ci		 * - segment type check (#GP(0) may be thrown)
499962306a36Sopenharmony_ci		 * - usability check (#GP(0)/#SS(0))
500062306a36Sopenharmony_ci		 * - limit check (#GP(0)/#SS(0))
500162306a36Sopenharmony_ci		 */
500262306a36Sopenharmony_ci		if (wr)
500362306a36Sopenharmony_ci			/* #GP(0) if the destination operand is located in a
500462306a36Sopenharmony_ci			 * read-only data segment or any code segment.
500562306a36Sopenharmony_ci			 */
500662306a36Sopenharmony_ci			exn = ((s.type & 0xa) == 0 || (s.type & 8));
500762306a36Sopenharmony_ci		else
500862306a36Sopenharmony_ci			/* #GP(0) if the source operand is located in an
500962306a36Sopenharmony_ci			 * execute-only code segment
501062306a36Sopenharmony_ci			 */
501162306a36Sopenharmony_ci			exn = ((s.type & 0xa) == 8);
501262306a36Sopenharmony_ci		if (exn) {
501362306a36Sopenharmony_ci			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
501462306a36Sopenharmony_ci			return 1;
501562306a36Sopenharmony_ci		}
501662306a36Sopenharmony_ci		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
501762306a36Sopenharmony_ci		 */
501862306a36Sopenharmony_ci		exn = (s.unusable != 0);
501962306a36Sopenharmony_ci
502062306a36Sopenharmony_ci		/*
502162306a36Sopenharmony_ci		 * Protected mode: #GP(0)/#SS(0) if the memory operand is
502262306a36Sopenharmony_ci		 * outside the segment limit.  All CPUs that support VMX ignore
502362306a36Sopenharmony_ci		 * limit checks for flat segments, i.e. segments with base==0,
502462306a36Sopenharmony_ci		 * limit==0xffffffff and of type expand-up data or code.
502562306a36Sopenharmony_ci		 */
502662306a36Sopenharmony_ci		if (!(s.base == 0 && s.limit == 0xffffffff &&
502762306a36Sopenharmony_ci		     ((s.type & 8) || !(s.type & 4))))
502862306a36Sopenharmony_ci			exn = exn || ((u64)off + len - 1 > s.limit);
502962306a36Sopenharmony_ci	}
503062306a36Sopenharmony_ci	if (exn) {
503162306a36Sopenharmony_ci		kvm_queue_exception_e(vcpu,
503262306a36Sopenharmony_ci				      seg_reg == VCPU_SREG_SS ?
503362306a36Sopenharmony_ci						SS_VECTOR : GP_VECTOR,
503462306a36Sopenharmony_ci				      0);
503562306a36Sopenharmony_ci		return 1;
503662306a36Sopenharmony_ci	}
503762306a36Sopenharmony_ci
503862306a36Sopenharmony_ci	return 0;
503962306a36Sopenharmony_ci}
504062306a36Sopenharmony_ci
504162306a36Sopenharmony_cistatic int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
504262306a36Sopenharmony_ci				int *ret)
504362306a36Sopenharmony_ci{
504462306a36Sopenharmony_ci	gva_t gva;
504562306a36Sopenharmony_ci	struct x86_exception e;
504662306a36Sopenharmony_ci	int r;
504762306a36Sopenharmony_ci
504862306a36Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
504962306a36Sopenharmony_ci				vmcs_read32(VMX_INSTRUCTION_INFO), false,
505062306a36Sopenharmony_ci				sizeof(*vmpointer), &gva)) {
505162306a36Sopenharmony_ci		*ret = 1;
505262306a36Sopenharmony_ci		return -EINVAL;
505362306a36Sopenharmony_ci	}
505462306a36Sopenharmony_ci
505562306a36Sopenharmony_ci	r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
505662306a36Sopenharmony_ci	if (r != X86EMUL_CONTINUE) {
505762306a36Sopenharmony_ci		*ret = kvm_handle_memory_failure(vcpu, r, &e);
505862306a36Sopenharmony_ci		return -EINVAL;
505962306a36Sopenharmony_ci	}
506062306a36Sopenharmony_ci
506162306a36Sopenharmony_ci	return 0;
506262306a36Sopenharmony_ci}
506362306a36Sopenharmony_ci
506462306a36Sopenharmony_ci/*
506562306a36Sopenharmony_ci * Allocate a shadow VMCS and associate it with the currently loaded
506662306a36Sopenharmony_ci * VMCS, unless such a shadow VMCS already exists. The newly allocated
506762306a36Sopenharmony_ci * VMCS is also VMCLEARed, so that it is ready for use.
506862306a36Sopenharmony_ci */
506962306a36Sopenharmony_cistatic struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
507062306a36Sopenharmony_ci{
507162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
507262306a36Sopenharmony_ci	struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
507362306a36Sopenharmony_ci
507462306a36Sopenharmony_ci	/*
507562306a36Sopenharmony_ci	 * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it
507662306a36Sopenharmony_ci	 * when L1 executes VMXOFF or the vCPU is forced out of nested
507762306a36Sopenharmony_ci	 * operation.  VMXON faults if the CPU is already post-VMXON, so it
507862306a36Sopenharmony_ci	 * should be impossible to already have an allocated shadow VMCS.  KVM
507962306a36Sopenharmony_ci	 * doesn't support virtualization of VMCS shadowing, so vmcs01 should
508062306a36Sopenharmony_ci	 * always be the loaded VMCS.
508162306a36Sopenharmony_ci	 */
508262306a36Sopenharmony_ci	if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs))
508362306a36Sopenharmony_ci		return loaded_vmcs->shadow_vmcs;
508462306a36Sopenharmony_ci
508562306a36Sopenharmony_ci	loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
508662306a36Sopenharmony_ci	if (loaded_vmcs->shadow_vmcs)
508762306a36Sopenharmony_ci		vmcs_clear(loaded_vmcs->shadow_vmcs);
508862306a36Sopenharmony_ci
508962306a36Sopenharmony_ci	return loaded_vmcs->shadow_vmcs;
509062306a36Sopenharmony_ci}
509162306a36Sopenharmony_ci
509262306a36Sopenharmony_cistatic int enter_vmx_operation(struct kvm_vcpu *vcpu)
509362306a36Sopenharmony_ci{
509462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
509562306a36Sopenharmony_ci	int r;
509662306a36Sopenharmony_ci
509762306a36Sopenharmony_ci	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
509862306a36Sopenharmony_ci	if (r < 0)
509962306a36Sopenharmony_ci		goto out_vmcs02;
510062306a36Sopenharmony_ci
510162306a36Sopenharmony_ci	vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
510262306a36Sopenharmony_ci	if (!vmx->nested.cached_vmcs12)
510362306a36Sopenharmony_ci		goto out_cached_vmcs12;
510462306a36Sopenharmony_ci
510562306a36Sopenharmony_ci	vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
510662306a36Sopenharmony_ci	vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
510762306a36Sopenharmony_ci	if (!vmx->nested.cached_shadow_vmcs12)
510862306a36Sopenharmony_ci		goto out_cached_shadow_vmcs12;
510962306a36Sopenharmony_ci
511062306a36Sopenharmony_ci	if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
511162306a36Sopenharmony_ci		goto out_shadow_vmcs;
511262306a36Sopenharmony_ci
511362306a36Sopenharmony_ci	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
511462306a36Sopenharmony_ci		     HRTIMER_MODE_ABS_PINNED);
511562306a36Sopenharmony_ci	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
511662306a36Sopenharmony_ci
511762306a36Sopenharmony_ci	vmx->nested.vpid02 = allocate_vpid();
511862306a36Sopenharmony_ci
511962306a36Sopenharmony_ci	vmx->nested.vmcs02_initialized = false;
512062306a36Sopenharmony_ci	vmx->nested.vmxon = true;
512162306a36Sopenharmony_ci
512262306a36Sopenharmony_ci	if (vmx_pt_mode_is_host_guest()) {
512362306a36Sopenharmony_ci		vmx->pt_desc.guest.ctl = 0;
512462306a36Sopenharmony_ci		pt_update_intercept_for_msr(vcpu);
512562306a36Sopenharmony_ci	}
512662306a36Sopenharmony_ci
512762306a36Sopenharmony_ci	return 0;
512862306a36Sopenharmony_ci
512962306a36Sopenharmony_ciout_shadow_vmcs:
513062306a36Sopenharmony_ci	kfree(vmx->nested.cached_shadow_vmcs12);
513162306a36Sopenharmony_ci
513262306a36Sopenharmony_ciout_cached_shadow_vmcs12:
513362306a36Sopenharmony_ci	kfree(vmx->nested.cached_vmcs12);
513462306a36Sopenharmony_ci
513562306a36Sopenharmony_ciout_cached_vmcs12:
513662306a36Sopenharmony_ci	free_loaded_vmcs(&vmx->nested.vmcs02);
513762306a36Sopenharmony_ci
513862306a36Sopenharmony_ciout_vmcs02:
513962306a36Sopenharmony_ci	return -ENOMEM;
514062306a36Sopenharmony_ci}
514162306a36Sopenharmony_ci
514262306a36Sopenharmony_ci/* Emulate the VMXON instruction. */
514362306a36Sopenharmony_cistatic int handle_vmxon(struct kvm_vcpu *vcpu)
514462306a36Sopenharmony_ci{
514562306a36Sopenharmony_ci	int ret;
514662306a36Sopenharmony_ci	gpa_t vmptr;
514762306a36Sopenharmony_ci	uint32_t revision;
514862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
514962306a36Sopenharmony_ci	const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
515062306a36Sopenharmony_ci		| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
515162306a36Sopenharmony_ci
515262306a36Sopenharmony_ci	/*
515362306a36Sopenharmony_ci	 * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
515462306a36Sopenharmony_ci	 * the guest and so cannot rely on hardware to perform the check,
515562306a36Sopenharmony_ci	 * which has higher priority than VM-Exit (see Intel SDM's pseudocode
515662306a36Sopenharmony_ci	 * for VMXON).
515762306a36Sopenharmony_ci	 *
515862306a36Sopenharmony_ci	 * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
515962306a36Sopenharmony_ci	 * and !COMPATIBILITY modes.  For an unrestricted guest, KVM doesn't
516062306a36Sopenharmony_ci	 * force any of the relevant guest state.  For a restricted guest, KVM
516162306a36Sopenharmony_ci	 * does force CR0.PE=1, but only to also force VM86 in order to emulate
516262306a36Sopenharmony_ci	 * Real Mode, and so there's no need to check CR0.PE manually.
516362306a36Sopenharmony_ci	 */
516462306a36Sopenharmony_ci	if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) {
516562306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
516662306a36Sopenharmony_ci		return 1;
516762306a36Sopenharmony_ci	}
516862306a36Sopenharmony_ci
516962306a36Sopenharmony_ci	/*
517062306a36Sopenharmony_ci	 * The CPL is checked for "not in VMX operation" and for "in VMX root",
517162306a36Sopenharmony_ci	 * and has higher priority than the VM-Fail due to being post-VMXON,
517262306a36Sopenharmony_ci	 * i.e. VMXON #GPs outside of VMX non-root if CPL!=0.  In VMX non-root,
517362306a36Sopenharmony_ci	 * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
517462306a36Sopenharmony_ci	 * from L2 to L1, i.e. there's no need to check for the vCPU being in
517562306a36Sopenharmony_ci	 * VMX non-root.
517662306a36Sopenharmony_ci	 *
517762306a36Sopenharmony_ci	 * Forwarding the VM-Exit unconditionally, i.e. without performing the
517862306a36Sopenharmony_ci	 * #UD checks (see above), is functionally ok because KVM doesn't allow
517962306a36Sopenharmony_ci	 * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
518062306a36Sopenharmony_ci	 * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
518162306a36Sopenharmony_ci	 * missed by hardware due to shadowing CR0 and/or CR4.
518262306a36Sopenharmony_ci	 */
518362306a36Sopenharmony_ci	if (vmx_get_cpl(vcpu)) {
518462306a36Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
518562306a36Sopenharmony_ci		return 1;
518662306a36Sopenharmony_ci	}
518762306a36Sopenharmony_ci
518862306a36Sopenharmony_ci	if (vmx->nested.vmxon)
518962306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
519062306a36Sopenharmony_ci
519162306a36Sopenharmony_ci	/*
519262306a36Sopenharmony_ci	 * Invalid CR0/CR4 generates #GP.  These checks are performed if and
519362306a36Sopenharmony_ci	 * only if the vCPU isn't already in VMX operation, i.e. effectively
519462306a36Sopenharmony_ci	 * have lower priority than the VM-Fail above.
519562306a36Sopenharmony_ci	 */
519662306a36Sopenharmony_ci	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
519762306a36Sopenharmony_ci	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
519862306a36Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
519962306a36Sopenharmony_ci		return 1;
520062306a36Sopenharmony_ci	}
520162306a36Sopenharmony_ci
520262306a36Sopenharmony_ci	if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
520362306a36Sopenharmony_ci			!= VMXON_NEEDED_FEATURES) {
520462306a36Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
520562306a36Sopenharmony_ci		return 1;
520662306a36Sopenharmony_ci	}
520762306a36Sopenharmony_ci
520862306a36Sopenharmony_ci	if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
520962306a36Sopenharmony_ci		return ret;
521062306a36Sopenharmony_ci
521162306a36Sopenharmony_ci	/*
521262306a36Sopenharmony_ci	 * SDM 3: 24.11.5
521362306a36Sopenharmony_ci	 * The first 4 bytes of VMXON region contain the supported
521462306a36Sopenharmony_ci	 * VMCS revision identifier
521562306a36Sopenharmony_ci	 *
521662306a36Sopenharmony_ci	 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
521762306a36Sopenharmony_ci	 * which replaces physical address width with 32
521862306a36Sopenharmony_ci	 */
521962306a36Sopenharmony_ci	if (!page_address_valid(vcpu, vmptr))
522062306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
522162306a36Sopenharmony_ci
522262306a36Sopenharmony_ci	if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
522362306a36Sopenharmony_ci	    revision != VMCS12_REVISION)
522462306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
522562306a36Sopenharmony_ci
522662306a36Sopenharmony_ci	vmx->nested.vmxon_ptr = vmptr;
522762306a36Sopenharmony_ci	ret = enter_vmx_operation(vcpu);
522862306a36Sopenharmony_ci	if (ret)
522962306a36Sopenharmony_ci		return ret;
523062306a36Sopenharmony_ci
523162306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
523262306a36Sopenharmony_ci}
523362306a36Sopenharmony_ci
523462306a36Sopenharmony_cistatic inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
523562306a36Sopenharmony_ci{
523662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
523762306a36Sopenharmony_ci
523862306a36Sopenharmony_ci	if (vmx->nested.current_vmptr == INVALID_GPA)
523962306a36Sopenharmony_ci		return;
524062306a36Sopenharmony_ci
524162306a36Sopenharmony_ci	copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
524262306a36Sopenharmony_ci
524362306a36Sopenharmony_ci	if (enable_shadow_vmcs) {
524462306a36Sopenharmony_ci		/* copy to memory all shadowed fields in case
524562306a36Sopenharmony_ci		   they were modified */
524662306a36Sopenharmony_ci		copy_shadow_to_vmcs12(vmx);
524762306a36Sopenharmony_ci		vmx_disable_shadow_vmcs(vmx);
524862306a36Sopenharmony_ci	}
524962306a36Sopenharmony_ci	vmx->nested.posted_intr_nv = -1;
525062306a36Sopenharmony_ci
525162306a36Sopenharmony_ci	/* Flush VMCS12 to guest memory */
525262306a36Sopenharmony_ci	kvm_vcpu_write_guest_page(vcpu,
525362306a36Sopenharmony_ci				  vmx->nested.current_vmptr >> PAGE_SHIFT,
525462306a36Sopenharmony_ci				  vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
525562306a36Sopenharmony_ci
525662306a36Sopenharmony_ci	kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
525762306a36Sopenharmony_ci
525862306a36Sopenharmony_ci	vmx->nested.current_vmptr = INVALID_GPA;
525962306a36Sopenharmony_ci}
526062306a36Sopenharmony_ci
526162306a36Sopenharmony_ci/* Emulate the VMXOFF instruction */
526262306a36Sopenharmony_cistatic int handle_vmxoff(struct kvm_vcpu *vcpu)
526362306a36Sopenharmony_ci{
526462306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
526562306a36Sopenharmony_ci		return 1;
526662306a36Sopenharmony_ci
526762306a36Sopenharmony_ci	free_nested(vcpu);
526862306a36Sopenharmony_ci
526962306a36Sopenharmony_ci	if (kvm_apic_has_pending_init_or_sipi(vcpu))
527062306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
527162306a36Sopenharmony_ci
527262306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
527362306a36Sopenharmony_ci}
527462306a36Sopenharmony_ci
527562306a36Sopenharmony_ci/* Emulate the VMCLEAR instruction */
527662306a36Sopenharmony_cistatic int handle_vmclear(struct kvm_vcpu *vcpu)
527762306a36Sopenharmony_ci{
527862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
527962306a36Sopenharmony_ci	u32 zero = 0;
528062306a36Sopenharmony_ci	gpa_t vmptr;
528162306a36Sopenharmony_ci	int r;
528262306a36Sopenharmony_ci
528362306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
528462306a36Sopenharmony_ci		return 1;
528562306a36Sopenharmony_ci
528662306a36Sopenharmony_ci	if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
528762306a36Sopenharmony_ci		return r;
528862306a36Sopenharmony_ci
528962306a36Sopenharmony_ci	if (!page_address_valid(vcpu, vmptr))
529062306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
529162306a36Sopenharmony_ci
529262306a36Sopenharmony_ci	if (vmptr == vmx->nested.vmxon_ptr)
529362306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
529462306a36Sopenharmony_ci
529562306a36Sopenharmony_ci	/*
529662306a36Sopenharmony_ci	 * When Enlightened VMEntry is enabled on the calling CPU we treat
529762306a36Sopenharmony_ci	 * memory area pointer by vmptr as Enlightened VMCS (as there's no good
529862306a36Sopenharmony_ci	 * way to distinguish it from VMCS12) and we must not corrupt it by
529962306a36Sopenharmony_ci	 * writing to the non-existent 'launch_state' field. The area doesn't
530062306a36Sopenharmony_ci	 * have to be the currently active EVMCS on the calling CPU and there's
530162306a36Sopenharmony_ci	 * nothing KVM has to do to transition it from 'active' to 'non-active'
530262306a36Sopenharmony_ci	 * state. It is possible that the area will stay mapped as
530362306a36Sopenharmony_ci	 * vmx->nested.hv_evmcs but this shouldn't be a problem.
530462306a36Sopenharmony_ci	 */
530562306a36Sopenharmony_ci	if (likely(!guest_cpuid_has_evmcs(vcpu) ||
530662306a36Sopenharmony_ci		   !evmptr_is_valid(nested_get_evmptr(vcpu)))) {
530762306a36Sopenharmony_ci		if (vmptr == vmx->nested.current_vmptr)
530862306a36Sopenharmony_ci			nested_release_vmcs12(vcpu);
530962306a36Sopenharmony_ci
531062306a36Sopenharmony_ci		/*
531162306a36Sopenharmony_ci		 * Silently ignore memory errors on VMCLEAR, Intel's pseudocode
531262306a36Sopenharmony_ci		 * for VMCLEAR includes a "ensure that data for VMCS referenced
531362306a36Sopenharmony_ci		 * by the operand is in memory" clause that guards writes to
531462306a36Sopenharmony_ci		 * memory, i.e. doing nothing for I/O is architecturally valid.
531562306a36Sopenharmony_ci		 *
531662306a36Sopenharmony_ci		 * FIXME: Suppress failures if and only if no memslot is found,
531762306a36Sopenharmony_ci		 * i.e. exit to userspace if __copy_to_user() fails.
531862306a36Sopenharmony_ci		 */
531962306a36Sopenharmony_ci		(void)kvm_vcpu_write_guest(vcpu,
532062306a36Sopenharmony_ci					   vmptr + offsetof(struct vmcs12,
532162306a36Sopenharmony_ci							    launch_state),
532262306a36Sopenharmony_ci					   &zero, sizeof(zero));
532362306a36Sopenharmony_ci	} else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
532462306a36Sopenharmony_ci		nested_release_evmcs(vcpu);
532562306a36Sopenharmony_ci	}
532662306a36Sopenharmony_ci
532762306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
532862306a36Sopenharmony_ci}
532962306a36Sopenharmony_ci
533062306a36Sopenharmony_ci/* Emulate the VMLAUNCH instruction */
533162306a36Sopenharmony_cistatic int handle_vmlaunch(struct kvm_vcpu *vcpu)
533262306a36Sopenharmony_ci{
533362306a36Sopenharmony_ci	return nested_vmx_run(vcpu, true);
533462306a36Sopenharmony_ci}
533562306a36Sopenharmony_ci
533662306a36Sopenharmony_ci/* Emulate the VMRESUME instruction */
533762306a36Sopenharmony_cistatic int handle_vmresume(struct kvm_vcpu *vcpu)
533862306a36Sopenharmony_ci{
533962306a36Sopenharmony_ci
534062306a36Sopenharmony_ci	return nested_vmx_run(vcpu, false);
534162306a36Sopenharmony_ci}
534262306a36Sopenharmony_ci
534362306a36Sopenharmony_cistatic int handle_vmread(struct kvm_vcpu *vcpu)
534462306a36Sopenharmony_ci{
534562306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
534662306a36Sopenharmony_ci						    : get_vmcs12(vcpu);
534762306a36Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
534862306a36Sopenharmony_ci	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
534962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
535062306a36Sopenharmony_ci	struct x86_exception e;
535162306a36Sopenharmony_ci	unsigned long field;
535262306a36Sopenharmony_ci	u64 value;
535362306a36Sopenharmony_ci	gva_t gva = 0;
535462306a36Sopenharmony_ci	short offset;
535562306a36Sopenharmony_ci	int len, r;
535662306a36Sopenharmony_ci
535762306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
535862306a36Sopenharmony_ci		return 1;
535962306a36Sopenharmony_ci
536062306a36Sopenharmony_ci	/* Decode instruction info and find the field to read */
536162306a36Sopenharmony_ci	field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
536262306a36Sopenharmony_ci
536362306a36Sopenharmony_ci	if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
536462306a36Sopenharmony_ci		/*
536562306a36Sopenharmony_ci		 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
536662306a36Sopenharmony_ci		 * any VMREAD sets the ALU flags for VMfailInvalid.
536762306a36Sopenharmony_ci		 */
536862306a36Sopenharmony_ci		if (vmx->nested.current_vmptr == INVALID_GPA ||
536962306a36Sopenharmony_ci		    (is_guest_mode(vcpu) &&
537062306a36Sopenharmony_ci		     get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
537162306a36Sopenharmony_ci			return nested_vmx_failInvalid(vcpu);
537262306a36Sopenharmony_ci
537362306a36Sopenharmony_ci		offset = get_vmcs12_field_offset(field);
537462306a36Sopenharmony_ci		if (offset < 0)
537562306a36Sopenharmony_ci			return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
537662306a36Sopenharmony_ci
537762306a36Sopenharmony_ci		if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
537862306a36Sopenharmony_ci			copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
537962306a36Sopenharmony_ci
538062306a36Sopenharmony_ci		/* Read the field, zero-extended to a u64 value */
538162306a36Sopenharmony_ci		value = vmcs12_read_any(vmcs12, field, offset);
538262306a36Sopenharmony_ci	} else {
538362306a36Sopenharmony_ci		/*
538462306a36Sopenharmony_ci		 * Hyper-V TLFS (as of 6.0b) explicitly states, that while an
538562306a36Sopenharmony_ci		 * enlightened VMCS is active VMREAD/VMWRITE instructions are
538662306a36Sopenharmony_ci		 * unsupported. Unfortunately, certain versions of Windows 11
538762306a36Sopenharmony_ci		 * don't comply with this requirement which is not enforced in
538862306a36Sopenharmony_ci		 * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a
538962306a36Sopenharmony_ci		 * workaround, as misbehaving guests will panic on VM-Fail.
539062306a36Sopenharmony_ci		 * Note, enlightened VMCS is incompatible with shadow VMCS so
539162306a36Sopenharmony_ci		 * all VMREADs from L2 should go to L1.
539262306a36Sopenharmony_ci		 */
539362306a36Sopenharmony_ci		if (WARN_ON_ONCE(is_guest_mode(vcpu)))
539462306a36Sopenharmony_ci			return nested_vmx_failInvalid(vcpu);
539562306a36Sopenharmony_ci
539662306a36Sopenharmony_ci		offset = evmcs_field_offset(field, NULL);
539762306a36Sopenharmony_ci		if (offset < 0)
539862306a36Sopenharmony_ci			return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
539962306a36Sopenharmony_ci
540062306a36Sopenharmony_ci		/* Read the field, zero-extended to a u64 value */
540162306a36Sopenharmony_ci		value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset);
540262306a36Sopenharmony_ci	}
540362306a36Sopenharmony_ci
540462306a36Sopenharmony_ci	/*
540562306a36Sopenharmony_ci	 * Now copy part of this value to register or memory, as requested.
540662306a36Sopenharmony_ci	 * Note that the number of bits actually copied is 32 or 64 depending
540762306a36Sopenharmony_ci	 * on the guest's mode (32 or 64 bit), not on the given field's length.
540862306a36Sopenharmony_ci	 */
540962306a36Sopenharmony_ci	if (instr_info & BIT(10)) {
541062306a36Sopenharmony_ci		kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value);
541162306a36Sopenharmony_ci	} else {
541262306a36Sopenharmony_ci		len = is_64_bit_mode(vcpu) ? 8 : 4;
541362306a36Sopenharmony_ci		if (get_vmx_mem_address(vcpu, exit_qualification,
541462306a36Sopenharmony_ci					instr_info, true, len, &gva))
541562306a36Sopenharmony_ci			return 1;
541662306a36Sopenharmony_ci		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
541762306a36Sopenharmony_ci		r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
541862306a36Sopenharmony_ci		if (r != X86EMUL_CONTINUE)
541962306a36Sopenharmony_ci			return kvm_handle_memory_failure(vcpu, r, &e);
542062306a36Sopenharmony_ci	}
542162306a36Sopenharmony_ci
542262306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
542362306a36Sopenharmony_ci}
542462306a36Sopenharmony_ci
542562306a36Sopenharmony_cistatic bool is_shadow_field_rw(unsigned long field)
542662306a36Sopenharmony_ci{
542762306a36Sopenharmony_ci	switch (field) {
542862306a36Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) case x:
542962306a36Sopenharmony_ci#include "vmcs_shadow_fields.h"
543062306a36Sopenharmony_ci		return true;
543162306a36Sopenharmony_ci	default:
543262306a36Sopenharmony_ci		break;
543362306a36Sopenharmony_ci	}
543462306a36Sopenharmony_ci	return false;
543562306a36Sopenharmony_ci}
543662306a36Sopenharmony_ci
543762306a36Sopenharmony_cistatic bool is_shadow_field_ro(unsigned long field)
543862306a36Sopenharmony_ci{
543962306a36Sopenharmony_ci	switch (field) {
544062306a36Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) case x:
544162306a36Sopenharmony_ci#include "vmcs_shadow_fields.h"
544262306a36Sopenharmony_ci		return true;
544362306a36Sopenharmony_ci	default:
544462306a36Sopenharmony_ci		break;
544562306a36Sopenharmony_ci	}
544662306a36Sopenharmony_ci	return false;
544762306a36Sopenharmony_ci}
544862306a36Sopenharmony_ci
544962306a36Sopenharmony_cistatic int handle_vmwrite(struct kvm_vcpu *vcpu)
545062306a36Sopenharmony_ci{
545162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
545262306a36Sopenharmony_ci						    : get_vmcs12(vcpu);
545362306a36Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
545462306a36Sopenharmony_ci	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
545562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
545662306a36Sopenharmony_ci	struct x86_exception e;
545762306a36Sopenharmony_ci	unsigned long field;
545862306a36Sopenharmony_ci	short offset;
545962306a36Sopenharmony_ci	gva_t gva;
546062306a36Sopenharmony_ci	int len, r;
546162306a36Sopenharmony_ci
546262306a36Sopenharmony_ci	/*
546362306a36Sopenharmony_ci	 * The value to write might be 32 or 64 bits, depending on L1's long
546462306a36Sopenharmony_ci	 * mode, and eventually we need to write that into a field of several
546562306a36Sopenharmony_ci	 * possible lengths. The code below first zero-extends the value to 64
546662306a36Sopenharmony_ci	 * bit (value), and then copies only the appropriate number of
546762306a36Sopenharmony_ci	 * bits into the vmcs12 field.
546862306a36Sopenharmony_ci	 */
546962306a36Sopenharmony_ci	u64 value = 0;
547062306a36Sopenharmony_ci
547162306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
547262306a36Sopenharmony_ci		return 1;
547362306a36Sopenharmony_ci
547462306a36Sopenharmony_ci	/*
547562306a36Sopenharmony_ci	 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
547662306a36Sopenharmony_ci	 * any VMWRITE sets the ALU flags for VMfailInvalid.
547762306a36Sopenharmony_ci	 */
547862306a36Sopenharmony_ci	if (vmx->nested.current_vmptr == INVALID_GPA ||
547962306a36Sopenharmony_ci	    (is_guest_mode(vcpu) &&
548062306a36Sopenharmony_ci	     get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
548162306a36Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
548262306a36Sopenharmony_ci
548362306a36Sopenharmony_ci	if (instr_info & BIT(10))
548462306a36Sopenharmony_ci		value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf));
548562306a36Sopenharmony_ci	else {
548662306a36Sopenharmony_ci		len = is_64_bit_mode(vcpu) ? 8 : 4;
548762306a36Sopenharmony_ci		if (get_vmx_mem_address(vcpu, exit_qualification,
548862306a36Sopenharmony_ci					instr_info, false, len, &gva))
548962306a36Sopenharmony_ci			return 1;
549062306a36Sopenharmony_ci		r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
549162306a36Sopenharmony_ci		if (r != X86EMUL_CONTINUE)
549262306a36Sopenharmony_ci			return kvm_handle_memory_failure(vcpu, r, &e);
549362306a36Sopenharmony_ci	}
549462306a36Sopenharmony_ci
549562306a36Sopenharmony_ci	field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
549662306a36Sopenharmony_ci
549762306a36Sopenharmony_ci	offset = get_vmcs12_field_offset(field);
549862306a36Sopenharmony_ci	if (offset < 0)
549962306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
550062306a36Sopenharmony_ci
550162306a36Sopenharmony_ci	/*
550262306a36Sopenharmony_ci	 * If the vCPU supports "VMWRITE to any supported field in the
550362306a36Sopenharmony_ci	 * VMCS," then the "read-only" fields are actually read/write.
550462306a36Sopenharmony_ci	 */
550562306a36Sopenharmony_ci	if (vmcs_field_readonly(field) &&
550662306a36Sopenharmony_ci	    !nested_cpu_has_vmwrite_any_field(vcpu))
550762306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
550862306a36Sopenharmony_ci
550962306a36Sopenharmony_ci	/*
551062306a36Sopenharmony_ci	 * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
551162306a36Sopenharmony_ci	 * vmcs12, else we may crush a field or consume a stale value.
551262306a36Sopenharmony_ci	 */
551362306a36Sopenharmony_ci	if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
551462306a36Sopenharmony_ci		copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
551562306a36Sopenharmony_ci
551662306a36Sopenharmony_ci	/*
551762306a36Sopenharmony_ci	 * Some Intel CPUs intentionally drop the reserved bits of the AR byte
551862306a36Sopenharmony_ci	 * fields on VMWRITE.  Emulate this behavior to ensure consistent KVM
551962306a36Sopenharmony_ci	 * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
552062306a36Sopenharmony_ci	 * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
552162306a36Sopenharmony_ci	 * from L1 will return a different value than VMREAD from L2 (L1 sees
552262306a36Sopenharmony_ci	 * the stripped down value, L2 sees the full value as stored by KVM).
552362306a36Sopenharmony_ci	 */
552462306a36Sopenharmony_ci	if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
552562306a36Sopenharmony_ci		value &= 0x1f0ff;
552662306a36Sopenharmony_ci
552762306a36Sopenharmony_ci	vmcs12_write_any(vmcs12, field, offset, value);
552862306a36Sopenharmony_ci
552962306a36Sopenharmony_ci	/*
553062306a36Sopenharmony_ci	 * Do not track vmcs12 dirty-state if in guest-mode as we actually
553162306a36Sopenharmony_ci	 * dirty shadow vmcs12 instead of vmcs12.  Fields that can be updated
553262306a36Sopenharmony_ci	 * by L1 without a vmexit are always updated in the vmcs02, i.e. don't
553362306a36Sopenharmony_ci	 * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
553462306a36Sopenharmony_ci	 */
553562306a36Sopenharmony_ci	if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
553662306a36Sopenharmony_ci		/*
553762306a36Sopenharmony_ci		 * L1 can read these fields without exiting, ensure the
553862306a36Sopenharmony_ci		 * shadow VMCS is up-to-date.
553962306a36Sopenharmony_ci		 */
554062306a36Sopenharmony_ci		if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
554162306a36Sopenharmony_ci			preempt_disable();
554262306a36Sopenharmony_ci			vmcs_load(vmx->vmcs01.shadow_vmcs);
554362306a36Sopenharmony_ci
554462306a36Sopenharmony_ci			__vmcs_writel(field, value);
554562306a36Sopenharmony_ci
554662306a36Sopenharmony_ci			vmcs_clear(vmx->vmcs01.shadow_vmcs);
554762306a36Sopenharmony_ci			vmcs_load(vmx->loaded_vmcs->vmcs);
554862306a36Sopenharmony_ci			preempt_enable();
554962306a36Sopenharmony_ci		}
555062306a36Sopenharmony_ci		vmx->nested.dirty_vmcs12 = true;
555162306a36Sopenharmony_ci	}
555262306a36Sopenharmony_ci
555362306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
555462306a36Sopenharmony_ci}
555562306a36Sopenharmony_ci
555662306a36Sopenharmony_cistatic void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
555762306a36Sopenharmony_ci{
555862306a36Sopenharmony_ci	vmx->nested.current_vmptr = vmptr;
555962306a36Sopenharmony_ci	if (enable_shadow_vmcs) {
556062306a36Sopenharmony_ci		secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
556162306a36Sopenharmony_ci		vmcs_write64(VMCS_LINK_POINTER,
556262306a36Sopenharmony_ci			     __pa(vmx->vmcs01.shadow_vmcs));
556362306a36Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
556462306a36Sopenharmony_ci	}
556562306a36Sopenharmony_ci	vmx->nested.dirty_vmcs12 = true;
556662306a36Sopenharmony_ci	vmx->nested.force_msr_bitmap_recalc = true;
556762306a36Sopenharmony_ci}
556862306a36Sopenharmony_ci
556962306a36Sopenharmony_ci/* Emulate the VMPTRLD instruction */
557062306a36Sopenharmony_cistatic int handle_vmptrld(struct kvm_vcpu *vcpu)
557162306a36Sopenharmony_ci{
557262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
557362306a36Sopenharmony_ci	gpa_t vmptr;
557462306a36Sopenharmony_ci	int r;
557562306a36Sopenharmony_ci
557662306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
557762306a36Sopenharmony_ci		return 1;
557862306a36Sopenharmony_ci
557962306a36Sopenharmony_ci	if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
558062306a36Sopenharmony_ci		return r;
558162306a36Sopenharmony_ci
558262306a36Sopenharmony_ci	if (!page_address_valid(vcpu, vmptr))
558362306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
558462306a36Sopenharmony_ci
558562306a36Sopenharmony_ci	if (vmptr == vmx->nested.vmxon_ptr)
558662306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
558762306a36Sopenharmony_ci
558862306a36Sopenharmony_ci	/* Forbid normal VMPTRLD if Enlightened version was used */
558962306a36Sopenharmony_ci	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
559062306a36Sopenharmony_ci		return 1;
559162306a36Sopenharmony_ci
559262306a36Sopenharmony_ci	if (vmx->nested.current_vmptr != vmptr) {
559362306a36Sopenharmony_ci		struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
559462306a36Sopenharmony_ci		struct vmcs_hdr hdr;
559562306a36Sopenharmony_ci
559662306a36Sopenharmony_ci		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
559762306a36Sopenharmony_ci			/*
559862306a36Sopenharmony_ci			 * Reads from an unbacked page return all 1s,
559962306a36Sopenharmony_ci			 * which means that the 32 bits located at the
560062306a36Sopenharmony_ci			 * given physical address won't match the required
560162306a36Sopenharmony_ci			 * VMCS12_REVISION identifier.
560262306a36Sopenharmony_ci			 */
560362306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
560462306a36Sopenharmony_ci				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
560562306a36Sopenharmony_ci		}
560662306a36Sopenharmony_ci
560762306a36Sopenharmony_ci		if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
560862306a36Sopenharmony_ci						 offsetof(struct vmcs12, hdr),
560962306a36Sopenharmony_ci						 sizeof(hdr))) {
561062306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
561162306a36Sopenharmony_ci				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
561262306a36Sopenharmony_ci		}
561362306a36Sopenharmony_ci
561462306a36Sopenharmony_ci		if (hdr.revision_id != VMCS12_REVISION ||
561562306a36Sopenharmony_ci		    (hdr.shadow_vmcs &&
561662306a36Sopenharmony_ci		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
561762306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
561862306a36Sopenharmony_ci				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
561962306a36Sopenharmony_ci		}
562062306a36Sopenharmony_ci
562162306a36Sopenharmony_ci		nested_release_vmcs12(vcpu);
562262306a36Sopenharmony_ci
562362306a36Sopenharmony_ci		/*
562462306a36Sopenharmony_ci		 * Load VMCS12 from guest memory since it is not already
562562306a36Sopenharmony_ci		 * cached.
562662306a36Sopenharmony_ci		 */
562762306a36Sopenharmony_ci		if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
562862306a36Sopenharmony_ci					  VMCS12_SIZE)) {
562962306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
563062306a36Sopenharmony_ci				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
563162306a36Sopenharmony_ci		}
563262306a36Sopenharmony_ci
563362306a36Sopenharmony_ci		set_current_vmptr(vmx, vmptr);
563462306a36Sopenharmony_ci	}
563562306a36Sopenharmony_ci
563662306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
563762306a36Sopenharmony_ci}
563862306a36Sopenharmony_ci
563962306a36Sopenharmony_ci/* Emulate the VMPTRST instruction */
564062306a36Sopenharmony_cistatic int handle_vmptrst(struct kvm_vcpu *vcpu)
564162306a36Sopenharmony_ci{
564262306a36Sopenharmony_ci	unsigned long exit_qual = vmx_get_exit_qual(vcpu);
564362306a36Sopenharmony_ci	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
564462306a36Sopenharmony_ci	gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
564562306a36Sopenharmony_ci	struct x86_exception e;
564662306a36Sopenharmony_ci	gva_t gva;
564762306a36Sopenharmony_ci	int r;
564862306a36Sopenharmony_ci
564962306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
565062306a36Sopenharmony_ci		return 1;
565162306a36Sopenharmony_ci
565262306a36Sopenharmony_ci	if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
565362306a36Sopenharmony_ci		return 1;
565462306a36Sopenharmony_ci
565562306a36Sopenharmony_ci	if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
565662306a36Sopenharmony_ci				true, sizeof(gpa_t), &gva))
565762306a36Sopenharmony_ci		return 1;
565862306a36Sopenharmony_ci	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
565962306a36Sopenharmony_ci	r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
566062306a36Sopenharmony_ci					sizeof(gpa_t), &e);
566162306a36Sopenharmony_ci	if (r != X86EMUL_CONTINUE)
566262306a36Sopenharmony_ci		return kvm_handle_memory_failure(vcpu, r, &e);
566362306a36Sopenharmony_ci
566462306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
566562306a36Sopenharmony_ci}
566662306a36Sopenharmony_ci
566762306a36Sopenharmony_ci/* Emulate the INVEPT instruction */
566862306a36Sopenharmony_cistatic int handle_invept(struct kvm_vcpu *vcpu)
566962306a36Sopenharmony_ci{
567062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
567162306a36Sopenharmony_ci	u32 vmx_instruction_info, types;
567262306a36Sopenharmony_ci	unsigned long type, roots_to_free;
567362306a36Sopenharmony_ci	struct kvm_mmu *mmu;
567462306a36Sopenharmony_ci	gva_t gva;
567562306a36Sopenharmony_ci	struct x86_exception e;
567662306a36Sopenharmony_ci	struct {
567762306a36Sopenharmony_ci		u64 eptp, gpa;
567862306a36Sopenharmony_ci	} operand;
567962306a36Sopenharmony_ci	int i, r, gpr_index;
568062306a36Sopenharmony_ci
568162306a36Sopenharmony_ci	if (!(vmx->nested.msrs.secondary_ctls_high &
568262306a36Sopenharmony_ci	      SECONDARY_EXEC_ENABLE_EPT) ||
568362306a36Sopenharmony_ci	    !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
568462306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
568562306a36Sopenharmony_ci		return 1;
568662306a36Sopenharmony_ci	}
568762306a36Sopenharmony_ci
568862306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
568962306a36Sopenharmony_ci		return 1;
569062306a36Sopenharmony_ci
569162306a36Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
569262306a36Sopenharmony_ci	gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
569362306a36Sopenharmony_ci	type = kvm_register_read(vcpu, gpr_index);
569462306a36Sopenharmony_ci
569562306a36Sopenharmony_ci	types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
569662306a36Sopenharmony_ci
569762306a36Sopenharmony_ci	if (type >= 32 || !(types & (1 << type)))
569862306a36Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
569962306a36Sopenharmony_ci
570062306a36Sopenharmony_ci	/* According to the Intel VMX instruction reference, the memory
570162306a36Sopenharmony_ci	 * operand is read even if it isn't needed (e.g., for type==global)
570262306a36Sopenharmony_ci	 */
570362306a36Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
570462306a36Sopenharmony_ci			vmx_instruction_info, false, sizeof(operand), &gva))
570562306a36Sopenharmony_ci		return 1;
570662306a36Sopenharmony_ci	r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
570762306a36Sopenharmony_ci	if (r != X86EMUL_CONTINUE)
570862306a36Sopenharmony_ci		return kvm_handle_memory_failure(vcpu, r, &e);
570962306a36Sopenharmony_ci
571062306a36Sopenharmony_ci	/*
571162306a36Sopenharmony_ci	 * Nested EPT roots are always held through guest_mmu,
571262306a36Sopenharmony_ci	 * not root_mmu.
571362306a36Sopenharmony_ci	 */
571462306a36Sopenharmony_ci	mmu = &vcpu->arch.guest_mmu;
571562306a36Sopenharmony_ci
571662306a36Sopenharmony_ci	switch (type) {
571762306a36Sopenharmony_ci	case VMX_EPT_EXTENT_CONTEXT:
571862306a36Sopenharmony_ci		if (!nested_vmx_check_eptp(vcpu, operand.eptp))
571962306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
572062306a36Sopenharmony_ci				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
572162306a36Sopenharmony_ci
572262306a36Sopenharmony_ci		roots_to_free = 0;
572362306a36Sopenharmony_ci		if (nested_ept_root_matches(mmu->root.hpa, mmu->root.pgd,
572462306a36Sopenharmony_ci					    operand.eptp))
572562306a36Sopenharmony_ci			roots_to_free |= KVM_MMU_ROOT_CURRENT;
572662306a36Sopenharmony_ci
572762306a36Sopenharmony_ci		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
572862306a36Sopenharmony_ci			if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
572962306a36Sopenharmony_ci						    mmu->prev_roots[i].pgd,
573062306a36Sopenharmony_ci						    operand.eptp))
573162306a36Sopenharmony_ci				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
573262306a36Sopenharmony_ci		}
573362306a36Sopenharmony_ci		break;
573462306a36Sopenharmony_ci	case VMX_EPT_EXTENT_GLOBAL:
573562306a36Sopenharmony_ci		roots_to_free = KVM_MMU_ROOTS_ALL;
573662306a36Sopenharmony_ci		break;
573762306a36Sopenharmony_ci	default:
573862306a36Sopenharmony_ci		BUG();
573962306a36Sopenharmony_ci		break;
574062306a36Sopenharmony_ci	}
574162306a36Sopenharmony_ci
574262306a36Sopenharmony_ci	if (roots_to_free)
574362306a36Sopenharmony_ci		kvm_mmu_free_roots(vcpu->kvm, mmu, roots_to_free);
574462306a36Sopenharmony_ci
574562306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
574662306a36Sopenharmony_ci}
574762306a36Sopenharmony_ci
574862306a36Sopenharmony_cistatic int handle_invvpid(struct kvm_vcpu *vcpu)
574962306a36Sopenharmony_ci{
575062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
575162306a36Sopenharmony_ci	u32 vmx_instruction_info;
575262306a36Sopenharmony_ci	unsigned long type, types;
575362306a36Sopenharmony_ci	gva_t gva;
575462306a36Sopenharmony_ci	struct x86_exception e;
575562306a36Sopenharmony_ci	struct {
575662306a36Sopenharmony_ci		u64 vpid;
575762306a36Sopenharmony_ci		u64 gla;
575862306a36Sopenharmony_ci	} operand;
575962306a36Sopenharmony_ci	u16 vpid02;
576062306a36Sopenharmony_ci	int r, gpr_index;
576162306a36Sopenharmony_ci
576262306a36Sopenharmony_ci	if (!(vmx->nested.msrs.secondary_ctls_high &
576362306a36Sopenharmony_ci	      SECONDARY_EXEC_ENABLE_VPID) ||
576462306a36Sopenharmony_ci			!(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
576562306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
576662306a36Sopenharmony_ci		return 1;
576762306a36Sopenharmony_ci	}
576862306a36Sopenharmony_ci
576962306a36Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
577062306a36Sopenharmony_ci		return 1;
577162306a36Sopenharmony_ci
577262306a36Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
577362306a36Sopenharmony_ci	gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
577462306a36Sopenharmony_ci	type = kvm_register_read(vcpu, gpr_index);
577562306a36Sopenharmony_ci
577662306a36Sopenharmony_ci	types = (vmx->nested.msrs.vpid_caps &
577762306a36Sopenharmony_ci			VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
577862306a36Sopenharmony_ci
577962306a36Sopenharmony_ci	if (type >= 32 || !(types & (1 << type)))
578062306a36Sopenharmony_ci		return nested_vmx_fail(vcpu,
578162306a36Sopenharmony_ci			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
578262306a36Sopenharmony_ci
578362306a36Sopenharmony_ci	/* according to the intel vmx instruction reference, the memory
578462306a36Sopenharmony_ci	 * operand is read even if it isn't needed (e.g., for type==global)
578562306a36Sopenharmony_ci	 */
578662306a36Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
578762306a36Sopenharmony_ci			vmx_instruction_info, false, sizeof(operand), &gva))
578862306a36Sopenharmony_ci		return 1;
578962306a36Sopenharmony_ci	r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
579062306a36Sopenharmony_ci	if (r != X86EMUL_CONTINUE)
579162306a36Sopenharmony_ci		return kvm_handle_memory_failure(vcpu, r, &e);
579262306a36Sopenharmony_ci
579362306a36Sopenharmony_ci	if (operand.vpid >> 16)
579462306a36Sopenharmony_ci		return nested_vmx_fail(vcpu,
579562306a36Sopenharmony_ci			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
579662306a36Sopenharmony_ci
579762306a36Sopenharmony_ci	vpid02 = nested_get_vpid02(vcpu);
579862306a36Sopenharmony_ci	switch (type) {
579962306a36Sopenharmony_ci	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
580062306a36Sopenharmony_ci		if (!operand.vpid ||
580162306a36Sopenharmony_ci		    is_noncanonical_address(operand.gla, vcpu))
580262306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
580362306a36Sopenharmony_ci				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
580462306a36Sopenharmony_ci		vpid_sync_vcpu_addr(vpid02, operand.gla);
580562306a36Sopenharmony_ci		break;
580662306a36Sopenharmony_ci	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
580762306a36Sopenharmony_ci	case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
580862306a36Sopenharmony_ci		if (!operand.vpid)
580962306a36Sopenharmony_ci			return nested_vmx_fail(vcpu,
581062306a36Sopenharmony_ci				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
581162306a36Sopenharmony_ci		vpid_sync_context(vpid02);
581262306a36Sopenharmony_ci		break;
581362306a36Sopenharmony_ci	case VMX_VPID_EXTENT_ALL_CONTEXT:
581462306a36Sopenharmony_ci		vpid_sync_context(vpid02);
581562306a36Sopenharmony_ci		break;
581662306a36Sopenharmony_ci	default:
581762306a36Sopenharmony_ci		WARN_ON_ONCE(1);
581862306a36Sopenharmony_ci		return kvm_skip_emulated_instruction(vcpu);
581962306a36Sopenharmony_ci	}
582062306a36Sopenharmony_ci
582162306a36Sopenharmony_ci	/*
582262306a36Sopenharmony_ci	 * Sync the shadow page tables if EPT is disabled, L1 is invalidating
582362306a36Sopenharmony_ci	 * linear mappings for L2 (tagged with L2's VPID).  Free all guest
582462306a36Sopenharmony_ci	 * roots as VPIDs are not tracked in the MMU role.
582562306a36Sopenharmony_ci	 *
582662306a36Sopenharmony_ci	 * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
582762306a36Sopenharmony_ci	 * an MMU when EPT is disabled.
582862306a36Sopenharmony_ci	 *
582962306a36Sopenharmony_ci	 * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
583062306a36Sopenharmony_ci	 */
583162306a36Sopenharmony_ci	if (!enable_ept)
583262306a36Sopenharmony_ci		kvm_mmu_free_guest_mode_roots(vcpu->kvm, &vcpu->arch.root_mmu);
583362306a36Sopenharmony_ci
583462306a36Sopenharmony_ci	return nested_vmx_succeed(vcpu);
583562306a36Sopenharmony_ci}
583662306a36Sopenharmony_ci
583762306a36Sopenharmony_cistatic int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
583862306a36Sopenharmony_ci				     struct vmcs12 *vmcs12)
583962306a36Sopenharmony_ci{
584062306a36Sopenharmony_ci	u32 index = kvm_rcx_read(vcpu);
584162306a36Sopenharmony_ci	u64 new_eptp;
584262306a36Sopenharmony_ci
584362306a36Sopenharmony_ci	if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
584462306a36Sopenharmony_ci		return 1;
584562306a36Sopenharmony_ci	if (index >= VMFUNC_EPTP_ENTRIES)
584662306a36Sopenharmony_ci		return 1;
584762306a36Sopenharmony_ci
584862306a36Sopenharmony_ci	if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
584962306a36Sopenharmony_ci				     &new_eptp, index * 8, 8))
585062306a36Sopenharmony_ci		return 1;
585162306a36Sopenharmony_ci
585262306a36Sopenharmony_ci	/*
585362306a36Sopenharmony_ci	 * If the (L2) guest does a vmfunc to the currently
585462306a36Sopenharmony_ci	 * active ept pointer, we don't have to do anything else
585562306a36Sopenharmony_ci	 */
585662306a36Sopenharmony_ci	if (vmcs12->ept_pointer != new_eptp) {
585762306a36Sopenharmony_ci		if (!nested_vmx_check_eptp(vcpu, new_eptp))
585862306a36Sopenharmony_ci			return 1;
585962306a36Sopenharmony_ci
586062306a36Sopenharmony_ci		vmcs12->ept_pointer = new_eptp;
586162306a36Sopenharmony_ci		nested_ept_new_eptp(vcpu);
586262306a36Sopenharmony_ci
586362306a36Sopenharmony_ci		if (!nested_cpu_has_vpid(vmcs12))
586462306a36Sopenharmony_ci			kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
586562306a36Sopenharmony_ci	}
586662306a36Sopenharmony_ci
586762306a36Sopenharmony_ci	return 0;
586862306a36Sopenharmony_ci}
586962306a36Sopenharmony_ci
587062306a36Sopenharmony_cistatic int handle_vmfunc(struct kvm_vcpu *vcpu)
587162306a36Sopenharmony_ci{
587262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
587362306a36Sopenharmony_ci	struct vmcs12 *vmcs12;
587462306a36Sopenharmony_ci	u32 function = kvm_rax_read(vcpu);
587562306a36Sopenharmony_ci
587662306a36Sopenharmony_ci	/*
587762306a36Sopenharmony_ci	 * VMFUNC should never execute cleanly while L1 is active; KVM supports
587862306a36Sopenharmony_ci	 * VMFUNC for nested VMs, but not for L1.
587962306a36Sopenharmony_ci	 */
588062306a36Sopenharmony_ci	if (WARN_ON_ONCE(!is_guest_mode(vcpu))) {
588162306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
588262306a36Sopenharmony_ci		return 1;
588362306a36Sopenharmony_ci	}
588462306a36Sopenharmony_ci
588562306a36Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
588662306a36Sopenharmony_ci
588762306a36Sopenharmony_ci	/*
588862306a36Sopenharmony_ci	 * #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC
588962306a36Sopenharmony_ci	 * is enabled in vmcs02 if and only if it's enabled in vmcs12.
589062306a36Sopenharmony_ci	 */
589162306a36Sopenharmony_ci	if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
589262306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
589362306a36Sopenharmony_ci		return 1;
589462306a36Sopenharmony_ci	}
589562306a36Sopenharmony_ci
589662306a36Sopenharmony_ci	if (!(vmcs12->vm_function_control & BIT_ULL(function)))
589762306a36Sopenharmony_ci		goto fail;
589862306a36Sopenharmony_ci
589962306a36Sopenharmony_ci	switch (function) {
590062306a36Sopenharmony_ci	case 0:
590162306a36Sopenharmony_ci		if (nested_vmx_eptp_switching(vcpu, vmcs12))
590262306a36Sopenharmony_ci			goto fail;
590362306a36Sopenharmony_ci		break;
590462306a36Sopenharmony_ci	default:
590562306a36Sopenharmony_ci		goto fail;
590662306a36Sopenharmony_ci	}
590762306a36Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
590862306a36Sopenharmony_ci
590962306a36Sopenharmony_cifail:
591062306a36Sopenharmony_ci	/*
591162306a36Sopenharmony_ci	 * This is effectively a reflected VM-Exit, as opposed to a synthesized
591262306a36Sopenharmony_ci	 * nested VM-Exit.  Pass the original exit reason, i.e. don't hardcode
591362306a36Sopenharmony_ci	 * EXIT_REASON_VMFUNC as the exit reason.
591462306a36Sopenharmony_ci	 */
591562306a36Sopenharmony_ci	nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
591662306a36Sopenharmony_ci			  vmx_get_intr_info(vcpu),
591762306a36Sopenharmony_ci			  vmx_get_exit_qual(vcpu));
591862306a36Sopenharmony_ci	return 1;
591962306a36Sopenharmony_ci}
592062306a36Sopenharmony_ci
592162306a36Sopenharmony_ci/*
592262306a36Sopenharmony_ci * Return true if an IO instruction with the specified port and size should cause
592362306a36Sopenharmony_ci * a VM-exit into L1.
592462306a36Sopenharmony_ci */
592562306a36Sopenharmony_cibool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
592662306a36Sopenharmony_ci				 int size)
592762306a36Sopenharmony_ci{
592862306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
592962306a36Sopenharmony_ci	gpa_t bitmap, last_bitmap;
593062306a36Sopenharmony_ci	u8 b;
593162306a36Sopenharmony_ci
593262306a36Sopenharmony_ci	last_bitmap = INVALID_GPA;
593362306a36Sopenharmony_ci	b = -1;
593462306a36Sopenharmony_ci
593562306a36Sopenharmony_ci	while (size > 0) {
593662306a36Sopenharmony_ci		if (port < 0x8000)
593762306a36Sopenharmony_ci			bitmap = vmcs12->io_bitmap_a;
593862306a36Sopenharmony_ci		else if (port < 0x10000)
593962306a36Sopenharmony_ci			bitmap = vmcs12->io_bitmap_b;
594062306a36Sopenharmony_ci		else
594162306a36Sopenharmony_ci			return true;
594262306a36Sopenharmony_ci		bitmap += (port & 0x7fff) / 8;
594362306a36Sopenharmony_ci
594462306a36Sopenharmony_ci		if (last_bitmap != bitmap)
594562306a36Sopenharmony_ci			if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
594662306a36Sopenharmony_ci				return true;
594762306a36Sopenharmony_ci		if (b & (1 << (port & 7)))
594862306a36Sopenharmony_ci			return true;
594962306a36Sopenharmony_ci
595062306a36Sopenharmony_ci		port++;
595162306a36Sopenharmony_ci		size--;
595262306a36Sopenharmony_ci		last_bitmap = bitmap;
595362306a36Sopenharmony_ci	}
595462306a36Sopenharmony_ci
595562306a36Sopenharmony_ci	return false;
595662306a36Sopenharmony_ci}
595762306a36Sopenharmony_ci
595862306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
595962306a36Sopenharmony_ci				       struct vmcs12 *vmcs12)
596062306a36Sopenharmony_ci{
596162306a36Sopenharmony_ci	unsigned long exit_qualification;
596262306a36Sopenharmony_ci	unsigned short port;
596362306a36Sopenharmony_ci	int size;
596462306a36Sopenharmony_ci
596562306a36Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
596662306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
596762306a36Sopenharmony_ci
596862306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
596962306a36Sopenharmony_ci
597062306a36Sopenharmony_ci	port = exit_qualification >> 16;
597162306a36Sopenharmony_ci	size = (exit_qualification & 7) + 1;
597262306a36Sopenharmony_ci
597362306a36Sopenharmony_ci	return nested_vmx_check_io_bitmaps(vcpu, port, size);
597462306a36Sopenharmony_ci}
597562306a36Sopenharmony_ci
597662306a36Sopenharmony_ci/*
597762306a36Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle an MSR access,
597862306a36Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check whether L1 expressed
597962306a36Sopenharmony_ci * disinterest in the current event (read or write a specific MSR) by using an
598062306a36Sopenharmony_ci * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
598162306a36Sopenharmony_ci */
598262306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
598362306a36Sopenharmony_ci					struct vmcs12 *vmcs12,
598462306a36Sopenharmony_ci					union vmx_exit_reason exit_reason)
598562306a36Sopenharmony_ci{
598662306a36Sopenharmony_ci	u32 msr_index = kvm_rcx_read(vcpu);
598762306a36Sopenharmony_ci	gpa_t bitmap;
598862306a36Sopenharmony_ci
598962306a36Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
599062306a36Sopenharmony_ci		return true;
599162306a36Sopenharmony_ci
599262306a36Sopenharmony_ci	/*
599362306a36Sopenharmony_ci	 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
599462306a36Sopenharmony_ci	 * for the four combinations of read/write and low/high MSR numbers.
599562306a36Sopenharmony_ci	 * First we need to figure out which of the four to use:
599662306a36Sopenharmony_ci	 */
599762306a36Sopenharmony_ci	bitmap = vmcs12->msr_bitmap;
599862306a36Sopenharmony_ci	if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
599962306a36Sopenharmony_ci		bitmap += 2048;
600062306a36Sopenharmony_ci	if (msr_index >= 0xc0000000) {
600162306a36Sopenharmony_ci		msr_index -= 0xc0000000;
600262306a36Sopenharmony_ci		bitmap += 1024;
600362306a36Sopenharmony_ci	}
600462306a36Sopenharmony_ci
600562306a36Sopenharmony_ci	/* Then read the msr_index'th bit from this bitmap: */
600662306a36Sopenharmony_ci	if (msr_index < 1024*8) {
600762306a36Sopenharmony_ci		unsigned char b;
600862306a36Sopenharmony_ci		if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
600962306a36Sopenharmony_ci			return true;
601062306a36Sopenharmony_ci		return 1 & (b >> (msr_index & 7));
601162306a36Sopenharmony_ci	} else
601262306a36Sopenharmony_ci		return true; /* let L1 handle the wrong parameter */
601362306a36Sopenharmony_ci}
601462306a36Sopenharmony_ci
601562306a36Sopenharmony_ci/*
601662306a36Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
601762306a36Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check if L1 wanted to
601862306a36Sopenharmony_ci * intercept (via guest_host_mask etc.) the current event.
601962306a36Sopenharmony_ci */
602062306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
602162306a36Sopenharmony_ci	struct vmcs12 *vmcs12)
602262306a36Sopenharmony_ci{
602362306a36Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
602462306a36Sopenharmony_ci	int cr = exit_qualification & 15;
602562306a36Sopenharmony_ci	int reg;
602662306a36Sopenharmony_ci	unsigned long val;
602762306a36Sopenharmony_ci
602862306a36Sopenharmony_ci	switch ((exit_qualification >> 4) & 3) {
602962306a36Sopenharmony_ci	case 0: /* mov to cr */
603062306a36Sopenharmony_ci		reg = (exit_qualification >> 8) & 15;
603162306a36Sopenharmony_ci		val = kvm_register_read(vcpu, reg);
603262306a36Sopenharmony_ci		switch (cr) {
603362306a36Sopenharmony_ci		case 0:
603462306a36Sopenharmony_ci			if (vmcs12->cr0_guest_host_mask &
603562306a36Sopenharmony_ci			    (val ^ vmcs12->cr0_read_shadow))
603662306a36Sopenharmony_ci				return true;
603762306a36Sopenharmony_ci			break;
603862306a36Sopenharmony_ci		case 3:
603962306a36Sopenharmony_ci			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
604062306a36Sopenharmony_ci				return true;
604162306a36Sopenharmony_ci			break;
604262306a36Sopenharmony_ci		case 4:
604362306a36Sopenharmony_ci			if (vmcs12->cr4_guest_host_mask &
604462306a36Sopenharmony_ci			    (vmcs12->cr4_read_shadow ^ val))
604562306a36Sopenharmony_ci				return true;
604662306a36Sopenharmony_ci			break;
604762306a36Sopenharmony_ci		case 8:
604862306a36Sopenharmony_ci			if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
604962306a36Sopenharmony_ci				return true;
605062306a36Sopenharmony_ci			break;
605162306a36Sopenharmony_ci		}
605262306a36Sopenharmony_ci		break;
605362306a36Sopenharmony_ci	case 2: /* clts */
605462306a36Sopenharmony_ci		if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
605562306a36Sopenharmony_ci		    (vmcs12->cr0_read_shadow & X86_CR0_TS))
605662306a36Sopenharmony_ci			return true;
605762306a36Sopenharmony_ci		break;
605862306a36Sopenharmony_ci	case 1: /* mov from cr */
605962306a36Sopenharmony_ci		switch (cr) {
606062306a36Sopenharmony_ci		case 3:
606162306a36Sopenharmony_ci			if (vmcs12->cpu_based_vm_exec_control &
606262306a36Sopenharmony_ci			    CPU_BASED_CR3_STORE_EXITING)
606362306a36Sopenharmony_ci				return true;
606462306a36Sopenharmony_ci			break;
606562306a36Sopenharmony_ci		case 8:
606662306a36Sopenharmony_ci			if (vmcs12->cpu_based_vm_exec_control &
606762306a36Sopenharmony_ci			    CPU_BASED_CR8_STORE_EXITING)
606862306a36Sopenharmony_ci				return true;
606962306a36Sopenharmony_ci			break;
607062306a36Sopenharmony_ci		}
607162306a36Sopenharmony_ci		break;
607262306a36Sopenharmony_ci	case 3: /* lmsw */
607362306a36Sopenharmony_ci		/*
607462306a36Sopenharmony_ci		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
607562306a36Sopenharmony_ci		 * cr0. Other attempted changes are ignored, with no exit.
607662306a36Sopenharmony_ci		 */
607762306a36Sopenharmony_ci		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
607862306a36Sopenharmony_ci		if (vmcs12->cr0_guest_host_mask & 0xe &
607962306a36Sopenharmony_ci		    (val ^ vmcs12->cr0_read_shadow))
608062306a36Sopenharmony_ci			return true;
608162306a36Sopenharmony_ci		if ((vmcs12->cr0_guest_host_mask & 0x1) &&
608262306a36Sopenharmony_ci		    !(vmcs12->cr0_read_shadow & 0x1) &&
608362306a36Sopenharmony_ci		    (val & 0x1))
608462306a36Sopenharmony_ci			return true;
608562306a36Sopenharmony_ci		break;
608662306a36Sopenharmony_ci	}
608762306a36Sopenharmony_ci	return false;
608862306a36Sopenharmony_ci}
608962306a36Sopenharmony_ci
609062306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
609162306a36Sopenharmony_ci					  struct vmcs12 *vmcs12)
609262306a36Sopenharmony_ci{
609362306a36Sopenharmony_ci	u32 encls_leaf;
609462306a36Sopenharmony_ci
609562306a36Sopenharmony_ci	if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
609662306a36Sopenharmony_ci	    !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
609762306a36Sopenharmony_ci		return false;
609862306a36Sopenharmony_ci
609962306a36Sopenharmony_ci	encls_leaf = kvm_rax_read(vcpu);
610062306a36Sopenharmony_ci	if (encls_leaf > 62)
610162306a36Sopenharmony_ci		encls_leaf = 63;
610262306a36Sopenharmony_ci	return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
610362306a36Sopenharmony_ci}
610462306a36Sopenharmony_ci
610562306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
610662306a36Sopenharmony_ci	struct vmcs12 *vmcs12, gpa_t bitmap)
610762306a36Sopenharmony_ci{
610862306a36Sopenharmony_ci	u32 vmx_instruction_info;
610962306a36Sopenharmony_ci	unsigned long field;
611062306a36Sopenharmony_ci	u8 b;
611162306a36Sopenharmony_ci
611262306a36Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12))
611362306a36Sopenharmony_ci		return true;
611462306a36Sopenharmony_ci
611562306a36Sopenharmony_ci	/* Decode instruction info and find the field to access */
611662306a36Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
611762306a36Sopenharmony_ci	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
611862306a36Sopenharmony_ci
611962306a36Sopenharmony_ci	/* Out-of-range fields always cause a VM exit from L2 to L1 */
612062306a36Sopenharmony_ci	if (field >> 15)
612162306a36Sopenharmony_ci		return true;
612262306a36Sopenharmony_ci
612362306a36Sopenharmony_ci	if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
612462306a36Sopenharmony_ci		return true;
612562306a36Sopenharmony_ci
612662306a36Sopenharmony_ci	return 1 & (b >> (field & 7));
612762306a36Sopenharmony_ci}
612862306a36Sopenharmony_ci
612962306a36Sopenharmony_cistatic bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
613062306a36Sopenharmony_ci{
613162306a36Sopenharmony_ci	u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
613262306a36Sopenharmony_ci
613362306a36Sopenharmony_ci	if (nested_cpu_has_mtf(vmcs12))
613462306a36Sopenharmony_ci		return true;
613562306a36Sopenharmony_ci
613662306a36Sopenharmony_ci	/*
613762306a36Sopenharmony_ci	 * An MTF VM-exit may be injected into the guest by setting the
613862306a36Sopenharmony_ci	 * interruption-type to 7 (other event) and the vector field to 0. Such
613962306a36Sopenharmony_ci	 * is the case regardless of the 'monitor trap flag' VM-execution
614062306a36Sopenharmony_ci	 * control.
614162306a36Sopenharmony_ci	 */
614262306a36Sopenharmony_ci	return entry_intr_info == (INTR_INFO_VALID_MASK
614362306a36Sopenharmony_ci				   | INTR_TYPE_OTHER_EVENT);
614462306a36Sopenharmony_ci}
614562306a36Sopenharmony_ci
614662306a36Sopenharmony_ci/*
614762306a36Sopenharmony_ci * Return true if L0 wants to handle an exit from L2 regardless of whether or not
614862306a36Sopenharmony_ci * L1 wants the exit.  Only call this when in is_guest_mode (L2).
614962306a36Sopenharmony_ci */
615062306a36Sopenharmony_cistatic bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
615162306a36Sopenharmony_ci				     union vmx_exit_reason exit_reason)
615262306a36Sopenharmony_ci{
615362306a36Sopenharmony_ci	u32 intr_info;
615462306a36Sopenharmony_ci
615562306a36Sopenharmony_ci	switch ((u16)exit_reason.basic) {
615662306a36Sopenharmony_ci	case EXIT_REASON_EXCEPTION_NMI:
615762306a36Sopenharmony_ci		intr_info = vmx_get_intr_info(vcpu);
615862306a36Sopenharmony_ci		if (is_nmi(intr_info))
615962306a36Sopenharmony_ci			return true;
616062306a36Sopenharmony_ci		else if (is_page_fault(intr_info))
616162306a36Sopenharmony_ci			return vcpu->arch.apf.host_apf_flags ||
616262306a36Sopenharmony_ci			       vmx_need_pf_intercept(vcpu);
616362306a36Sopenharmony_ci		else if (is_debug(intr_info) &&
616462306a36Sopenharmony_ci			 vcpu->guest_debug &
616562306a36Sopenharmony_ci			 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
616662306a36Sopenharmony_ci			return true;
616762306a36Sopenharmony_ci		else if (is_breakpoint(intr_info) &&
616862306a36Sopenharmony_ci			 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
616962306a36Sopenharmony_ci			return true;
617062306a36Sopenharmony_ci		else if (is_alignment_check(intr_info) &&
617162306a36Sopenharmony_ci			 !vmx_guest_inject_ac(vcpu))
617262306a36Sopenharmony_ci			return true;
617362306a36Sopenharmony_ci		return false;
617462306a36Sopenharmony_ci	case EXIT_REASON_EXTERNAL_INTERRUPT:
617562306a36Sopenharmony_ci		return true;
617662306a36Sopenharmony_ci	case EXIT_REASON_MCE_DURING_VMENTRY:
617762306a36Sopenharmony_ci		return true;
617862306a36Sopenharmony_ci	case EXIT_REASON_EPT_VIOLATION:
617962306a36Sopenharmony_ci		/*
618062306a36Sopenharmony_ci		 * L0 always deals with the EPT violation. If nested EPT is
618162306a36Sopenharmony_ci		 * used, and the nested mmu code discovers that the address is
618262306a36Sopenharmony_ci		 * missing in the guest EPT table (EPT12), the EPT violation
618362306a36Sopenharmony_ci		 * will be injected with nested_ept_inject_page_fault()
618462306a36Sopenharmony_ci		 */
618562306a36Sopenharmony_ci		return true;
618662306a36Sopenharmony_ci	case EXIT_REASON_EPT_MISCONFIG:
618762306a36Sopenharmony_ci		/*
618862306a36Sopenharmony_ci		 * L2 never uses directly L1's EPT, but rather L0's own EPT
618962306a36Sopenharmony_ci		 * table (shadow on EPT) or a merged EPT table that L0 built
619062306a36Sopenharmony_ci		 * (EPT on EPT). So any problems with the structure of the
619162306a36Sopenharmony_ci		 * table is L0's fault.
619262306a36Sopenharmony_ci		 */
619362306a36Sopenharmony_ci		return true;
619462306a36Sopenharmony_ci	case EXIT_REASON_PREEMPTION_TIMER:
619562306a36Sopenharmony_ci		return true;
619662306a36Sopenharmony_ci	case EXIT_REASON_PML_FULL:
619762306a36Sopenharmony_ci		/*
619862306a36Sopenharmony_ci		 * PML is emulated for an L1 VMM and should never be enabled in
619962306a36Sopenharmony_ci		 * vmcs02, always "handle" PML_FULL by exiting to userspace.
620062306a36Sopenharmony_ci		 */
620162306a36Sopenharmony_ci		return true;
620262306a36Sopenharmony_ci	case EXIT_REASON_VMFUNC:
620362306a36Sopenharmony_ci		/* VM functions are emulated through L2->L0 vmexits. */
620462306a36Sopenharmony_ci		return true;
620562306a36Sopenharmony_ci	case EXIT_REASON_BUS_LOCK:
620662306a36Sopenharmony_ci		/*
620762306a36Sopenharmony_ci		 * At present, bus lock VM exit is never exposed to L1.
620862306a36Sopenharmony_ci		 * Handle L2's bus locks in L0 directly.
620962306a36Sopenharmony_ci		 */
621062306a36Sopenharmony_ci		return true;
621162306a36Sopenharmony_ci	case EXIT_REASON_VMCALL:
621262306a36Sopenharmony_ci		/* Hyper-V L2 TLB flush hypercall is handled by L0 */
621362306a36Sopenharmony_ci		return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
621462306a36Sopenharmony_ci			nested_evmcs_l2_tlb_flush_enabled(vcpu) &&
621562306a36Sopenharmony_ci			kvm_hv_is_tlb_flush_hcall(vcpu);
621662306a36Sopenharmony_ci	default:
621762306a36Sopenharmony_ci		break;
621862306a36Sopenharmony_ci	}
621962306a36Sopenharmony_ci	return false;
622062306a36Sopenharmony_ci}
622162306a36Sopenharmony_ci
622262306a36Sopenharmony_ci/*
622362306a36Sopenharmony_ci * Return 1 if L1 wants to intercept an exit from L2.  Only call this when in
622462306a36Sopenharmony_ci * is_guest_mode (L2).
622562306a36Sopenharmony_ci */
622662306a36Sopenharmony_cistatic bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
622762306a36Sopenharmony_ci				     union vmx_exit_reason exit_reason)
622862306a36Sopenharmony_ci{
622962306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
623062306a36Sopenharmony_ci	u32 intr_info;
623162306a36Sopenharmony_ci
623262306a36Sopenharmony_ci	switch ((u16)exit_reason.basic) {
623362306a36Sopenharmony_ci	case EXIT_REASON_EXCEPTION_NMI:
623462306a36Sopenharmony_ci		intr_info = vmx_get_intr_info(vcpu);
623562306a36Sopenharmony_ci		if (is_nmi(intr_info))
623662306a36Sopenharmony_ci			return true;
623762306a36Sopenharmony_ci		else if (is_page_fault(intr_info))
623862306a36Sopenharmony_ci			return true;
623962306a36Sopenharmony_ci		return vmcs12->exception_bitmap &
624062306a36Sopenharmony_ci				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
624162306a36Sopenharmony_ci	case EXIT_REASON_EXTERNAL_INTERRUPT:
624262306a36Sopenharmony_ci		return nested_exit_on_intr(vcpu);
624362306a36Sopenharmony_ci	case EXIT_REASON_TRIPLE_FAULT:
624462306a36Sopenharmony_ci		return true;
624562306a36Sopenharmony_ci	case EXIT_REASON_INTERRUPT_WINDOW:
624662306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
624762306a36Sopenharmony_ci	case EXIT_REASON_NMI_WINDOW:
624862306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
624962306a36Sopenharmony_ci	case EXIT_REASON_TASK_SWITCH:
625062306a36Sopenharmony_ci		return true;
625162306a36Sopenharmony_ci	case EXIT_REASON_CPUID:
625262306a36Sopenharmony_ci		return true;
625362306a36Sopenharmony_ci	case EXIT_REASON_HLT:
625462306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
625562306a36Sopenharmony_ci	case EXIT_REASON_INVD:
625662306a36Sopenharmony_ci		return true;
625762306a36Sopenharmony_ci	case EXIT_REASON_INVLPG:
625862306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
625962306a36Sopenharmony_ci	case EXIT_REASON_RDPMC:
626062306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
626162306a36Sopenharmony_ci	case EXIT_REASON_RDRAND:
626262306a36Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
626362306a36Sopenharmony_ci	case EXIT_REASON_RDSEED:
626462306a36Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
626562306a36Sopenharmony_ci	case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
626662306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
626762306a36Sopenharmony_ci	case EXIT_REASON_VMREAD:
626862306a36Sopenharmony_ci		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
626962306a36Sopenharmony_ci			vmcs12->vmread_bitmap);
627062306a36Sopenharmony_ci	case EXIT_REASON_VMWRITE:
627162306a36Sopenharmony_ci		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
627262306a36Sopenharmony_ci			vmcs12->vmwrite_bitmap);
627362306a36Sopenharmony_ci	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
627462306a36Sopenharmony_ci	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
627562306a36Sopenharmony_ci	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
627662306a36Sopenharmony_ci	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
627762306a36Sopenharmony_ci	case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
627862306a36Sopenharmony_ci		/*
627962306a36Sopenharmony_ci		 * VMX instructions trap unconditionally. This allows L1 to
628062306a36Sopenharmony_ci		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
628162306a36Sopenharmony_ci		 */
628262306a36Sopenharmony_ci		return true;
628362306a36Sopenharmony_ci	case EXIT_REASON_CR_ACCESS:
628462306a36Sopenharmony_ci		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
628562306a36Sopenharmony_ci	case EXIT_REASON_DR_ACCESS:
628662306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
628762306a36Sopenharmony_ci	case EXIT_REASON_IO_INSTRUCTION:
628862306a36Sopenharmony_ci		return nested_vmx_exit_handled_io(vcpu, vmcs12);
628962306a36Sopenharmony_ci	case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
629062306a36Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
629162306a36Sopenharmony_ci	case EXIT_REASON_MSR_READ:
629262306a36Sopenharmony_ci	case EXIT_REASON_MSR_WRITE:
629362306a36Sopenharmony_ci		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
629462306a36Sopenharmony_ci	case EXIT_REASON_INVALID_STATE:
629562306a36Sopenharmony_ci		return true;
629662306a36Sopenharmony_ci	case EXIT_REASON_MWAIT_INSTRUCTION:
629762306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
629862306a36Sopenharmony_ci	case EXIT_REASON_MONITOR_TRAP_FLAG:
629962306a36Sopenharmony_ci		return nested_vmx_exit_handled_mtf(vmcs12);
630062306a36Sopenharmony_ci	case EXIT_REASON_MONITOR_INSTRUCTION:
630162306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
630262306a36Sopenharmony_ci	case EXIT_REASON_PAUSE_INSTRUCTION:
630362306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
630462306a36Sopenharmony_ci			nested_cpu_has2(vmcs12,
630562306a36Sopenharmony_ci				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
630662306a36Sopenharmony_ci	case EXIT_REASON_MCE_DURING_VMENTRY:
630762306a36Sopenharmony_ci		return true;
630862306a36Sopenharmony_ci	case EXIT_REASON_TPR_BELOW_THRESHOLD:
630962306a36Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
631062306a36Sopenharmony_ci	case EXIT_REASON_APIC_ACCESS:
631162306a36Sopenharmony_ci	case EXIT_REASON_APIC_WRITE:
631262306a36Sopenharmony_ci	case EXIT_REASON_EOI_INDUCED:
631362306a36Sopenharmony_ci		/*
631462306a36Sopenharmony_ci		 * The controls for "virtualize APIC accesses," "APIC-
631562306a36Sopenharmony_ci		 * register virtualization," and "virtual-interrupt
631662306a36Sopenharmony_ci		 * delivery" only come from vmcs12.
631762306a36Sopenharmony_ci		 */
631862306a36Sopenharmony_ci		return true;
631962306a36Sopenharmony_ci	case EXIT_REASON_INVPCID:
632062306a36Sopenharmony_ci		return
632162306a36Sopenharmony_ci			nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
632262306a36Sopenharmony_ci			nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
632362306a36Sopenharmony_ci	case EXIT_REASON_WBINVD:
632462306a36Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
632562306a36Sopenharmony_ci	case EXIT_REASON_XSETBV:
632662306a36Sopenharmony_ci		return true;
632762306a36Sopenharmony_ci	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
632862306a36Sopenharmony_ci		/*
632962306a36Sopenharmony_ci		 * This should never happen, since it is not possible to
633062306a36Sopenharmony_ci		 * set XSS to a non-zero value---neither in L1 nor in L2.
633162306a36Sopenharmony_ci		 * If if it were, XSS would have to be checked against
633262306a36Sopenharmony_ci		 * the XSS exit bitmap in vmcs12.
633362306a36Sopenharmony_ci		 */
633462306a36Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
633562306a36Sopenharmony_ci	case EXIT_REASON_UMWAIT:
633662306a36Sopenharmony_ci	case EXIT_REASON_TPAUSE:
633762306a36Sopenharmony_ci		return nested_cpu_has2(vmcs12,
633862306a36Sopenharmony_ci			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
633962306a36Sopenharmony_ci	case EXIT_REASON_ENCLS:
634062306a36Sopenharmony_ci		return nested_vmx_exit_handled_encls(vcpu, vmcs12);
634162306a36Sopenharmony_ci	case EXIT_REASON_NOTIFY:
634262306a36Sopenharmony_ci		/* Notify VM exit is not exposed to L1 */
634362306a36Sopenharmony_ci		return false;
634462306a36Sopenharmony_ci	default:
634562306a36Sopenharmony_ci		return true;
634662306a36Sopenharmony_ci	}
634762306a36Sopenharmony_ci}
634862306a36Sopenharmony_ci
634962306a36Sopenharmony_ci/*
635062306a36Sopenharmony_ci * Conditionally reflect a VM-Exit into L1.  Returns %true if the VM-Exit was
635162306a36Sopenharmony_ci * reflected into L1.
635262306a36Sopenharmony_ci */
635362306a36Sopenharmony_cibool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
635462306a36Sopenharmony_ci{
635562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
635662306a36Sopenharmony_ci	union vmx_exit_reason exit_reason = vmx->exit_reason;
635762306a36Sopenharmony_ci	unsigned long exit_qual;
635862306a36Sopenharmony_ci	u32 exit_intr_info;
635962306a36Sopenharmony_ci
636062306a36Sopenharmony_ci	WARN_ON_ONCE(vmx->nested.nested_run_pending);
636162306a36Sopenharmony_ci
636262306a36Sopenharmony_ci	/*
636362306a36Sopenharmony_ci	 * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
636462306a36Sopenharmony_ci	 * has already loaded L2's state.
636562306a36Sopenharmony_ci	 */
636662306a36Sopenharmony_ci	if (unlikely(vmx->fail)) {
636762306a36Sopenharmony_ci		trace_kvm_nested_vmenter_failed(
636862306a36Sopenharmony_ci			"hardware VM-instruction error: ",
636962306a36Sopenharmony_ci			vmcs_read32(VM_INSTRUCTION_ERROR));
637062306a36Sopenharmony_ci		exit_intr_info = 0;
637162306a36Sopenharmony_ci		exit_qual = 0;
637262306a36Sopenharmony_ci		goto reflect_vmexit;
637362306a36Sopenharmony_ci	}
637462306a36Sopenharmony_ci
637562306a36Sopenharmony_ci	trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX);
637662306a36Sopenharmony_ci
637762306a36Sopenharmony_ci	/* If L0 (KVM) wants the exit, it trumps L1's desires. */
637862306a36Sopenharmony_ci	if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
637962306a36Sopenharmony_ci		return false;
638062306a36Sopenharmony_ci
638162306a36Sopenharmony_ci	/* If L1 doesn't want the exit, handle it in L0. */
638262306a36Sopenharmony_ci	if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
638362306a36Sopenharmony_ci		return false;
638462306a36Sopenharmony_ci
638562306a36Sopenharmony_ci	/*
638662306a36Sopenharmony_ci	 * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits.  For
638762306a36Sopenharmony_ci	 * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
638862306a36Sopenharmony_ci	 * need to be synthesized by querying the in-kernel LAPIC, but external
638962306a36Sopenharmony_ci	 * interrupts are never reflected to L1 so it's a non-issue.
639062306a36Sopenharmony_ci	 */
639162306a36Sopenharmony_ci	exit_intr_info = vmx_get_intr_info(vcpu);
639262306a36Sopenharmony_ci	if (is_exception_with_error_code(exit_intr_info)) {
639362306a36Sopenharmony_ci		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
639462306a36Sopenharmony_ci
639562306a36Sopenharmony_ci		vmcs12->vm_exit_intr_error_code =
639662306a36Sopenharmony_ci			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
639762306a36Sopenharmony_ci	}
639862306a36Sopenharmony_ci	exit_qual = vmx_get_exit_qual(vcpu);
639962306a36Sopenharmony_ci
640062306a36Sopenharmony_cireflect_vmexit:
640162306a36Sopenharmony_ci	nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
640262306a36Sopenharmony_ci	return true;
640362306a36Sopenharmony_ci}
640462306a36Sopenharmony_ci
640562306a36Sopenharmony_cistatic int vmx_get_nested_state(struct kvm_vcpu *vcpu,
640662306a36Sopenharmony_ci				struct kvm_nested_state __user *user_kvm_nested_state,
640762306a36Sopenharmony_ci				u32 user_data_size)
640862306a36Sopenharmony_ci{
640962306a36Sopenharmony_ci	struct vcpu_vmx *vmx;
641062306a36Sopenharmony_ci	struct vmcs12 *vmcs12;
641162306a36Sopenharmony_ci	struct kvm_nested_state kvm_state = {
641262306a36Sopenharmony_ci		.flags = 0,
641362306a36Sopenharmony_ci		.format = KVM_STATE_NESTED_FORMAT_VMX,
641462306a36Sopenharmony_ci		.size = sizeof(kvm_state),
641562306a36Sopenharmony_ci		.hdr.vmx.flags = 0,
641662306a36Sopenharmony_ci		.hdr.vmx.vmxon_pa = INVALID_GPA,
641762306a36Sopenharmony_ci		.hdr.vmx.vmcs12_pa = INVALID_GPA,
641862306a36Sopenharmony_ci		.hdr.vmx.preemption_timer_deadline = 0,
641962306a36Sopenharmony_ci	};
642062306a36Sopenharmony_ci	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
642162306a36Sopenharmony_ci		&user_kvm_nested_state->data.vmx[0];
642262306a36Sopenharmony_ci
642362306a36Sopenharmony_ci	if (!vcpu)
642462306a36Sopenharmony_ci		return kvm_state.size + sizeof(*user_vmx_nested_state);
642562306a36Sopenharmony_ci
642662306a36Sopenharmony_ci	vmx = to_vmx(vcpu);
642762306a36Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
642862306a36Sopenharmony_ci
642962306a36Sopenharmony_ci	if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
643062306a36Sopenharmony_ci	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
643162306a36Sopenharmony_ci		kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
643262306a36Sopenharmony_ci		kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
643362306a36Sopenharmony_ci
643462306a36Sopenharmony_ci		if (vmx_has_valid_vmcs12(vcpu)) {
643562306a36Sopenharmony_ci			kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
643662306a36Sopenharmony_ci
643762306a36Sopenharmony_ci			/* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
643862306a36Sopenharmony_ci			if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
643962306a36Sopenharmony_ci				kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
644062306a36Sopenharmony_ci
644162306a36Sopenharmony_ci			if (is_guest_mode(vcpu) &&
644262306a36Sopenharmony_ci			    nested_cpu_has_shadow_vmcs(vmcs12) &&
644362306a36Sopenharmony_ci			    vmcs12->vmcs_link_pointer != INVALID_GPA)
644462306a36Sopenharmony_ci				kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
644562306a36Sopenharmony_ci		}
644662306a36Sopenharmony_ci
644762306a36Sopenharmony_ci		if (vmx->nested.smm.vmxon)
644862306a36Sopenharmony_ci			kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
644962306a36Sopenharmony_ci
645062306a36Sopenharmony_ci		if (vmx->nested.smm.guest_mode)
645162306a36Sopenharmony_ci			kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
645262306a36Sopenharmony_ci
645362306a36Sopenharmony_ci		if (is_guest_mode(vcpu)) {
645462306a36Sopenharmony_ci			kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
645562306a36Sopenharmony_ci
645662306a36Sopenharmony_ci			if (vmx->nested.nested_run_pending)
645762306a36Sopenharmony_ci				kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
645862306a36Sopenharmony_ci
645962306a36Sopenharmony_ci			if (vmx->nested.mtf_pending)
646062306a36Sopenharmony_ci				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
646162306a36Sopenharmony_ci
646262306a36Sopenharmony_ci			if (nested_cpu_has_preemption_timer(vmcs12) &&
646362306a36Sopenharmony_ci			    vmx->nested.has_preemption_timer_deadline) {
646462306a36Sopenharmony_ci				kvm_state.hdr.vmx.flags |=
646562306a36Sopenharmony_ci					KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
646662306a36Sopenharmony_ci				kvm_state.hdr.vmx.preemption_timer_deadline =
646762306a36Sopenharmony_ci					vmx->nested.preemption_timer_deadline;
646862306a36Sopenharmony_ci			}
646962306a36Sopenharmony_ci		}
647062306a36Sopenharmony_ci	}
647162306a36Sopenharmony_ci
647262306a36Sopenharmony_ci	if (user_data_size < kvm_state.size)
647362306a36Sopenharmony_ci		goto out;
647462306a36Sopenharmony_ci
647562306a36Sopenharmony_ci	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
647662306a36Sopenharmony_ci		return -EFAULT;
647762306a36Sopenharmony_ci
647862306a36Sopenharmony_ci	if (!vmx_has_valid_vmcs12(vcpu))
647962306a36Sopenharmony_ci		goto out;
648062306a36Sopenharmony_ci
648162306a36Sopenharmony_ci	/*
648262306a36Sopenharmony_ci	 * When running L2, the authoritative vmcs12 state is in the
648362306a36Sopenharmony_ci	 * vmcs02. When running L1, the authoritative vmcs12 state is
648462306a36Sopenharmony_ci	 * in the shadow or enlightened vmcs linked to vmcs01, unless
648562306a36Sopenharmony_ci	 * need_vmcs12_to_shadow_sync is set, in which case, the authoritative
648662306a36Sopenharmony_ci	 * vmcs12 state is in the vmcs12 already.
648762306a36Sopenharmony_ci	 */
648862306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
648962306a36Sopenharmony_ci		sync_vmcs02_to_vmcs12(vcpu, vmcs12);
649062306a36Sopenharmony_ci		sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
649162306a36Sopenharmony_ci	} else  {
649262306a36Sopenharmony_ci		copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
649362306a36Sopenharmony_ci		if (!vmx->nested.need_vmcs12_to_shadow_sync) {
649462306a36Sopenharmony_ci			if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
649562306a36Sopenharmony_ci				/*
649662306a36Sopenharmony_ci				 * L1 hypervisor is not obliged to keep eVMCS
649762306a36Sopenharmony_ci				 * clean fields data always up-to-date while
649862306a36Sopenharmony_ci				 * not in guest mode, 'hv_clean_fields' is only
649962306a36Sopenharmony_ci				 * supposed to be actual upon vmentry so we need
650062306a36Sopenharmony_ci				 * to ignore it here and do full copy.
650162306a36Sopenharmony_ci				 */
650262306a36Sopenharmony_ci				copy_enlightened_to_vmcs12(vmx, 0);
650362306a36Sopenharmony_ci			else if (enable_shadow_vmcs)
650462306a36Sopenharmony_ci				copy_shadow_to_vmcs12(vmx);
650562306a36Sopenharmony_ci		}
650662306a36Sopenharmony_ci	}
650762306a36Sopenharmony_ci
650862306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
650962306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
651062306a36Sopenharmony_ci
651162306a36Sopenharmony_ci	/*
651262306a36Sopenharmony_ci	 * Copy over the full allocated size of vmcs12 rather than just the size
651362306a36Sopenharmony_ci	 * of the struct.
651462306a36Sopenharmony_ci	 */
651562306a36Sopenharmony_ci	if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
651662306a36Sopenharmony_ci		return -EFAULT;
651762306a36Sopenharmony_ci
651862306a36Sopenharmony_ci	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
651962306a36Sopenharmony_ci	    vmcs12->vmcs_link_pointer != INVALID_GPA) {
652062306a36Sopenharmony_ci		if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
652162306a36Sopenharmony_ci				 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
652262306a36Sopenharmony_ci			return -EFAULT;
652362306a36Sopenharmony_ci	}
652462306a36Sopenharmony_ciout:
652562306a36Sopenharmony_ci	return kvm_state.size;
652662306a36Sopenharmony_ci}
652762306a36Sopenharmony_ci
652862306a36Sopenharmony_civoid vmx_leave_nested(struct kvm_vcpu *vcpu)
652962306a36Sopenharmony_ci{
653062306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
653162306a36Sopenharmony_ci		to_vmx(vcpu)->nested.nested_run_pending = 0;
653262306a36Sopenharmony_ci		nested_vmx_vmexit(vcpu, -1, 0, 0);
653362306a36Sopenharmony_ci	}
653462306a36Sopenharmony_ci	free_nested(vcpu);
653562306a36Sopenharmony_ci}
653662306a36Sopenharmony_ci
653762306a36Sopenharmony_cistatic int vmx_set_nested_state(struct kvm_vcpu *vcpu,
653862306a36Sopenharmony_ci				struct kvm_nested_state __user *user_kvm_nested_state,
653962306a36Sopenharmony_ci				struct kvm_nested_state *kvm_state)
654062306a36Sopenharmony_ci{
654162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
654262306a36Sopenharmony_ci	struct vmcs12 *vmcs12;
654362306a36Sopenharmony_ci	enum vm_entry_failure_code ignored;
654462306a36Sopenharmony_ci	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
654562306a36Sopenharmony_ci		&user_kvm_nested_state->data.vmx[0];
654662306a36Sopenharmony_ci	int ret;
654762306a36Sopenharmony_ci
654862306a36Sopenharmony_ci	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
654962306a36Sopenharmony_ci		return -EINVAL;
655062306a36Sopenharmony_ci
655162306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) {
655262306a36Sopenharmony_ci		if (kvm_state->hdr.vmx.smm.flags)
655362306a36Sopenharmony_ci			return -EINVAL;
655462306a36Sopenharmony_ci
655562306a36Sopenharmony_ci		if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA)
655662306a36Sopenharmony_ci			return -EINVAL;
655762306a36Sopenharmony_ci
655862306a36Sopenharmony_ci		/*
655962306a36Sopenharmony_ci		 * KVM_STATE_NESTED_EVMCS used to signal that KVM should
656062306a36Sopenharmony_ci		 * enable eVMCS capability on vCPU. However, since then
656162306a36Sopenharmony_ci		 * code was changed such that flag signals vmcs12 should
656262306a36Sopenharmony_ci		 * be copied into eVMCS in guest memory.
656362306a36Sopenharmony_ci		 *
656462306a36Sopenharmony_ci		 * To preserve backwards compatability, allow user
656562306a36Sopenharmony_ci		 * to set this flag even when there is no VMXON region.
656662306a36Sopenharmony_ci		 */
656762306a36Sopenharmony_ci		if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
656862306a36Sopenharmony_ci			return -EINVAL;
656962306a36Sopenharmony_ci	} else {
657062306a36Sopenharmony_ci		if (!guest_can_use(vcpu, X86_FEATURE_VMX))
657162306a36Sopenharmony_ci			return -EINVAL;
657262306a36Sopenharmony_ci
657362306a36Sopenharmony_ci		if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
657462306a36Sopenharmony_ci			return -EINVAL;
657562306a36Sopenharmony_ci	}
657662306a36Sopenharmony_ci
657762306a36Sopenharmony_ci	if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
657862306a36Sopenharmony_ci	    (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
657962306a36Sopenharmony_ci		return -EINVAL;
658062306a36Sopenharmony_ci
658162306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.smm.flags &
658262306a36Sopenharmony_ci	    ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
658362306a36Sopenharmony_ci		return -EINVAL;
658462306a36Sopenharmony_ci
658562306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
658662306a36Sopenharmony_ci		return -EINVAL;
658762306a36Sopenharmony_ci
658862306a36Sopenharmony_ci	/*
658962306a36Sopenharmony_ci	 * SMM temporarily disables VMX, so we cannot be in guest mode,
659062306a36Sopenharmony_ci	 * nor can VMLAUNCH/VMRESUME be pending.  Outside SMM, SMM flags
659162306a36Sopenharmony_ci	 * must be zero.
659262306a36Sopenharmony_ci	 */
659362306a36Sopenharmony_ci	if (is_smm(vcpu) ?
659462306a36Sopenharmony_ci		(kvm_state->flags &
659562306a36Sopenharmony_ci		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
659662306a36Sopenharmony_ci		: kvm_state->hdr.vmx.smm.flags)
659762306a36Sopenharmony_ci		return -EINVAL;
659862306a36Sopenharmony_ci
659962306a36Sopenharmony_ci	if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
660062306a36Sopenharmony_ci	    !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
660162306a36Sopenharmony_ci		return -EINVAL;
660262306a36Sopenharmony_ci
660362306a36Sopenharmony_ci	if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
660462306a36Sopenharmony_ci	    (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
660562306a36Sopenharmony_ci	     !vmx->nested.enlightened_vmcs_enabled))
660662306a36Sopenharmony_ci			return -EINVAL;
660762306a36Sopenharmony_ci
660862306a36Sopenharmony_ci	vmx_leave_nested(vcpu);
660962306a36Sopenharmony_ci
661062306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA)
661162306a36Sopenharmony_ci		return 0;
661262306a36Sopenharmony_ci
661362306a36Sopenharmony_ci	vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
661462306a36Sopenharmony_ci	ret = enter_vmx_operation(vcpu);
661562306a36Sopenharmony_ci	if (ret)
661662306a36Sopenharmony_ci		return ret;
661762306a36Sopenharmony_ci
661862306a36Sopenharmony_ci	/* Empty 'VMXON' state is permitted if no VMCS loaded */
661962306a36Sopenharmony_ci	if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
662062306a36Sopenharmony_ci		/* See vmx_has_valid_vmcs12.  */
662162306a36Sopenharmony_ci		if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
662262306a36Sopenharmony_ci		    (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
662362306a36Sopenharmony_ci		    (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA))
662462306a36Sopenharmony_ci			return -EINVAL;
662562306a36Sopenharmony_ci		else
662662306a36Sopenharmony_ci			return 0;
662762306a36Sopenharmony_ci	}
662862306a36Sopenharmony_ci
662962306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) {
663062306a36Sopenharmony_ci		if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
663162306a36Sopenharmony_ci		    !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
663262306a36Sopenharmony_ci			return -EINVAL;
663362306a36Sopenharmony_ci
663462306a36Sopenharmony_ci		set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
663562306a36Sopenharmony_ci	} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
663662306a36Sopenharmony_ci		/*
663762306a36Sopenharmony_ci		 * nested_vmx_handle_enlightened_vmptrld() cannot be called
663862306a36Sopenharmony_ci		 * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
663962306a36Sopenharmony_ci		 * restored yet. EVMCS will be mapped from
664062306a36Sopenharmony_ci		 * nested_get_vmcs12_pages().
664162306a36Sopenharmony_ci		 */
664262306a36Sopenharmony_ci		vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
664362306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
664462306a36Sopenharmony_ci	} else {
664562306a36Sopenharmony_ci		return -EINVAL;
664662306a36Sopenharmony_ci	}
664762306a36Sopenharmony_ci
664862306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
664962306a36Sopenharmony_ci		vmx->nested.smm.vmxon = true;
665062306a36Sopenharmony_ci		vmx->nested.vmxon = false;
665162306a36Sopenharmony_ci
665262306a36Sopenharmony_ci		if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
665362306a36Sopenharmony_ci			vmx->nested.smm.guest_mode = true;
665462306a36Sopenharmony_ci	}
665562306a36Sopenharmony_ci
665662306a36Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
665762306a36Sopenharmony_ci	if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
665862306a36Sopenharmony_ci		return -EFAULT;
665962306a36Sopenharmony_ci
666062306a36Sopenharmony_ci	if (vmcs12->hdr.revision_id != VMCS12_REVISION)
666162306a36Sopenharmony_ci		return -EINVAL;
666262306a36Sopenharmony_ci
666362306a36Sopenharmony_ci	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
666462306a36Sopenharmony_ci		return 0;
666562306a36Sopenharmony_ci
666662306a36Sopenharmony_ci	vmx->nested.nested_run_pending =
666762306a36Sopenharmony_ci		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
666862306a36Sopenharmony_ci
666962306a36Sopenharmony_ci	vmx->nested.mtf_pending =
667062306a36Sopenharmony_ci		!!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
667162306a36Sopenharmony_ci
667262306a36Sopenharmony_ci	ret = -EINVAL;
667362306a36Sopenharmony_ci	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
667462306a36Sopenharmony_ci	    vmcs12->vmcs_link_pointer != INVALID_GPA) {
667562306a36Sopenharmony_ci		struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
667662306a36Sopenharmony_ci
667762306a36Sopenharmony_ci		if (kvm_state->size <
667862306a36Sopenharmony_ci		    sizeof(*kvm_state) +
667962306a36Sopenharmony_ci		    sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
668062306a36Sopenharmony_ci			goto error_guest_mode;
668162306a36Sopenharmony_ci
668262306a36Sopenharmony_ci		if (copy_from_user(shadow_vmcs12,
668362306a36Sopenharmony_ci				   user_vmx_nested_state->shadow_vmcs12,
668462306a36Sopenharmony_ci				   sizeof(*shadow_vmcs12))) {
668562306a36Sopenharmony_ci			ret = -EFAULT;
668662306a36Sopenharmony_ci			goto error_guest_mode;
668762306a36Sopenharmony_ci		}
668862306a36Sopenharmony_ci
668962306a36Sopenharmony_ci		if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
669062306a36Sopenharmony_ci		    !shadow_vmcs12->hdr.shadow_vmcs)
669162306a36Sopenharmony_ci			goto error_guest_mode;
669262306a36Sopenharmony_ci	}
669362306a36Sopenharmony_ci
669462306a36Sopenharmony_ci	vmx->nested.has_preemption_timer_deadline = false;
669562306a36Sopenharmony_ci	if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
669662306a36Sopenharmony_ci		vmx->nested.has_preemption_timer_deadline = true;
669762306a36Sopenharmony_ci		vmx->nested.preemption_timer_deadline =
669862306a36Sopenharmony_ci			kvm_state->hdr.vmx.preemption_timer_deadline;
669962306a36Sopenharmony_ci	}
670062306a36Sopenharmony_ci
670162306a36Sopenharmony_ci	if (nested_vmx_check_controls(vcpu, vmcs12) ||
670262306a36Sopenharmony_ci	    nested_vmx_check_host_state(vcpu, vmcs12) ||
670362306a36Sopenharmony_ci	    nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
670462306a36Sopenharmony_ci		goto error_guest_mode;
670562306a36Sopenharmony_ci
670662306a36Sopenharmony_ci	vmx->nested.dirty_vmcs12 = true;
670762306a36Sopenharmony_ci	vmx->nested.force_msr_bitmap_recalc = true;
670862306a36Sopenharmony_ci	ret = nested_vmx_enter_non_root_mode(vcpu, false);
670962306a36Sopenharmony_ci	if (ret)
671062306a36Sopenharmony_ci		goto error_guest_mode;
671162306a36Sopenharmony_ci
671262306a36Sopenharmony_ci	if (vmx->nested.mtf_pending)
671362306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
671462306a36Sopenharmony_ci
671562306a36Sopenharmony_ci	return 0;
671662306a36Sopenharmony_ci
671762306a36Sopenharmony_cierror_guest_mode:
671862306a36Sopenharmony_ci	vmx->nested.nested_run_pending = 0;
671962306a36Sopenharmony_ci	return ret;
672062306a36Sopenharmony_ci}
672162306a36Sopenharmony_ci
672262306a36Sopenharmony_civoid nested_vmx_set_vmcs_shadowing_bitmap(void)
672362306a36Sopenharmony_ci{
672462306a36Sopenharmony_ci	if (enable_shadow_vmcs) {
672562306a36Sopenharmony_ci		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
672662306a36Sopenharmony_ci		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
672762306a36Sopenharmony_ci	}
672862306a36Sopenharmony_ci}
672962306a36Sopenharmony_ci
673062306a36Sopenharmony_ci/*
673162306a36Sopenharmony_ci * Indexing into the vmcs12 uses the VMCS encoding rotated left by 6.  Undo
673262306a36Sopenharmony_ci * that madness to get the encoding for comparison.
673362306a36Sopenharmony_ci */
673462306a36Sopenharmony_ci#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
673562306a36Sopenharmony_ci
673662306a36Sopenharmony_cistatic u64 nested_vmx_calc_vmcs_enum_msr(void)
673762306a36Sopenharmony_ci{
673862306a36Sopenharmony_ci	/*
673962306a36Sopenharmony_ci	 * Note these are the so called "index" of the VMCS field encoding, not
674062306a36Sopenharmony_ci	 * the index into vmcs12.
674162306a36Sopenharmony_ci	 */
674262306a36Sopenharmony_ci	unsigned int max_idx, idx;
674362306a36Sopenharmony_ci	int i;
674462306a36Sopenharmony_ci
674562306a36Sopenharmony_ci	/*
674662306a36Sopenharmony_ci	 * For better or worse, KVM allows VMREAD/VMWRITE to all fields in
674762306a36Sopenharmony_ci	 * vmcs12, regardless of whether or not the associated feature is
674862306a36Sopenharmony_ci	 * exposed to L1.  Simply find the field with the highest index.
674962306a36Sopenharmony_ci	 */
675062306a36Sopenharmony_ci	max_idx = 0;
675162306a36Sopenharmony_ci	for (i = 0; i < nr_vmcs12_fields; i++) {
675262306a36Sopenharmony_ci		/* The vmcs12 table is very, very sparsely populated. */
675362306a36Sopenharmony_ci		if (!vmcs12_field_offsets[i])
675462306a36Sopenharmony_ci			continue;
675562306a36Sopenharmony_ci
675662306a36Sopenharmony_ci		idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
675762306a36Sopenharmony_ci		if (idx > max_idx)
675862306a36Sopenharmony_ci			max_idx = idx;
675962306a36Sopenharmony_ci	}
676062306a36Sopenharmony_ci
676162306a36Sopenharmony_ci	return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
676262306a36Sopenharmony_ci}
676362306a36Sopenharmony_ci
676462306a36Sopenharmony_cistatic void nested_vmx_setup_pinbased_ctls(struct vmcs_config *vmcs_conf,
676562306a36Sopenharmony_ci					   struct nested_vmx_msrs *msrs)
676662306a36Sopenharmony_ci{
676762306a36Sopenharmony_ci	msrs->pinbased_ctls_low =
676862306a36Sopenharmony_ci		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
676962306a36Sopenharmony_ci
677062306a36Sopenharmony_ci	msrs->pinbased_ctls_high = vmcs_conf->pin_based_exec_ctrl;
677162306a36Sopenharmony_ci	msrs->pinbased_ctls_high &=
677262306a36Sopenharmony_ci		PIN_BASED_EXT_INTR_MASK |
677362306a36Sopenharmony_ci		PIN_BASED_NMI_EXITING |
677462306a36Sopenharmony_ci		PIN_BASED_VIRTUAL_NMIS |
677562306a36Sopenharmony_ci		(enable_apicv ? PIN_BASED_POSTED_INTR : 0);
677662306a36Sopenharmony_ci	msrs->pinbased_ctls_high |=
677762306a36Sopenharmony_ci		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
677862306a36Sopenharmony_ci		PIN_BASED_VMX_PREEMPTION_TIMER;
677962306a36Sopenharmony_ci}
678062306a36Sopenharmony_ci
678162306a36Sopenharmony_cistatic void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf,
678262306a36Sopenharmony_ci				       struct nested_vmx_msrs *msrs)
678362306a36Sopenharmony_ci{
678462306a36Sopenharmony_ci	msrs->exit_ctls_low =
678562306a36Sopenharmony_ci		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
678662306a36Sopenharmony_ci
678762306a36Sopenharmony_ci	msrs->exit_ctls_high = vmcs_conf->vmexit_ctrl;
678862306a36Sopenharmony_ci	msrs->exit_ctls_high &=
678962306a36Sopenharmony_ci#ifdef CONFIG_X86_64
679062306a36Sopenharmony_ci		VM_EXIT_HOST_ADDR_SPACE_SIZE |
679162306a36Sopenharmony_ci#endif
679262306a36Sopenharmony_ci		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
679362306a36Sopenharmony_ci		VM_EXIT_CLEAR_BNDCFGS;
679462306a36Sopenharmony_ci	msrs->exit_ctls_high |=
679562306a36Sopenharmony_ci		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
679662306a36Sopenharmony_ci		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
679762306a36Sopenharmony_ci		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT |
679862306a36Sopenharmony_ci		VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
679962306a36Sopenharmony_ci
680062306a36Sopenharmony_ci	/* We support free control of debug control saving. */
680162306a36Sopenharmony_ci	msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
680262306a36Sopenharmony_ci}
680362306a36Sopenharmony_ci
680462306a36Sopenharmony_cistatic void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf,
680562306a36Sopenharmony_ci					struct nested_vmx_msrs *msrs)
680662306a36Sopenharmony_ci{
680762306a36Sopenharmony_ci	msrs->entry_ctls_low =
680862306a36Sopenharmony_ci		VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
680962306a36Sopenharmony_ci
681062306a36Sopenharmony_ci	msrs->entry_ctls_high = vmcs_conf->vmentry_ctrl;
681162306a36Sopenharmony_ci	msrs->entry_ctls_high &=
681262306a36Sopenharmony_ci#ifdef CONFIG_X86_64
681362306a36Sopenharmony_ci		VM_ENTRY_IA32E_MODE |
681462306a36Sopenharmony_ci#endif
681562306a36Sopenharmony_ci		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
681662306a36Sopenharmony_ci	msrs->entry_ctls_high |=
681762306a36Sopenharmony_ci		(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER |
681862306a36Sopenharmony_ci		 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
681962306a36Sopenharmony_ci
682062306a36Sopenharmony_ci	/* We support free control of debug control loading. */
682162306a36Sopenharmony_ci	msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
682262306a36Sopenharmony_ci}
682362306a36Sopenharmony_ci
682462306a36Sopenharmony_cistatic void nested_vmx_setup_cpubased_ctls(struct vmcs_config *vmcs_conf,
682562306a36Sopenharmony_ci					   struct nested_vmx_msrs *msrs)
682662306a36Sopenharmony_ci{
682762306a36Sopenharmony_ci	msrs->procbased_ctls_low =
682862306a36Sopenharmony_ci		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
682962306a36Sopenharmony_ci
683062306a36Sopenharmony_ci	msrs->procbased_ctls_high = vmcs_conf->cpu_based_exec_ctrl;
683162306a36Sopenharmony_ci	msrs->procbased_ctls_high &=
683262306a36Sopenharmony_ci		CPU_BASED_INTR_WINDOW_EXITING |
683362306a36Sopenharmony_ci		CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
683462306a36Sopenharmony_ci		CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
683562306a36Sopenharmony_ci		CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
683662306a36Sopenharmony_ci		CPU_BASED_CR3_STORE_EXITING |
683762306a36Sopenharmony_ci#ifdef CONFIG_X86_64
683862306a36Sopenharmony_ci		CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
683962306a36Sopenharmony_ci#endif
684062306a36Sopenharmony_ci		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
684162306a36Sopenharmony_ci		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
684262306a36Sopenharmony_ci		CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
684362306a36Sopenharmony_ci		CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
684462306a36Sopenharmony_ci		CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
684562306a36Sopenharmony_ci	/*
684662306a36Sopenharmony_ci	 * We can allow some features even when not supported by the
684762306a36Sopenharmony_ci	 * hardware. For example, L1 can specify an MSR bitmap - and we
684862306a36Sopenharmony_ci	 * can use it to avoid exits to L1 - even when L0 runs L2
684962306a36Sopenharmony_ci	 * without MSR bitmaps.
685062306a36Sopenharmony_ci	 */
685162306a36Sopenharmony_ci	msrs->procbased_ctls_high |=
685262306a36Sopenharmony_ci		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
685362306a36Sopenharmony_ci		CPU_BASED_USE_MSR_BITMAPS;
685462306a36Sopenharmony_ci
685562306a36Sopenharmony_ci	/* We support free control of CR3 access interception. */
685662306a36Sopenharmony_ci	msrs->procbased_ctls_low &=
685762306a36Sopenharmony_ci		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
685862306a36Sopenharmony_ci}
685962306a36Sopenharmony_ci
686062306a36Sopenharmony_cistatic void nested_vmx_setup_secondary_ctls(u32 ept_caps,
686162306a36Sopenharmony_ci					    struct vmcs_config *vmcs_conf,
686262306a36Sopenharmony_ci					    struct nested_vmx_msrs *msrs)
686362306a36Sopenharmony_ci{
686462306a36Sopenharmony_ci	msrs->secondary_ctls_low = 0;
686562306a36Sopenharmony_ci
686662306a36Sopenharmony_ci	msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl;
686762306a36Sopenharmony_ci	msrs->secondary_ctls_high &=
686862306a36Sopenharmony_ci		SECONDARY_EXEC_DESC |
686962306a36Sopenharmony_ci		SECONDARY_EXEC_ENABLE_RDTSCP |
687062306a36Sopenharmony_ci		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
687162306a36Sopenharmony_ci		SECONDARY_EXEC_WBINVD_EXITING |
687262306a36Sopenharmony_ci		SECONDARY_EXEC_APIC_REGISTER_VIRT |
687362306a36Sopenharmony_ci		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
687462306a36Sopenharmony_ci		SECONDARY_EXEC_RDRAND_EXITING |
687562306a36Sopenharmony_ci		SECONDARY_EXEC_ENABLE_INVPCID |
687662306a36Sopenharmony_ci		SECONDARY_EXEC_ENABLE_VMFUNC |
687762306a36Sopenharmony_ci		SECONDARY_EXEC_RDSEED_EXITING |
687862306a36Sopenharmony_ci		SECONDARY_EXEC_ENABLE_XSAVES |
687962306a36Sopenharmony_ci		SECONDARY_EXEC_TSC_SCALING |
688062306a36Sopenharmony_ci		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
688162306a36Sopenharmony_ci
688262306a36Sopenharmony_ci	/*
688362306a36Sopenharmony_ci	 * We can emulate "VMCS shadowing," even if the hardware
688462306a36Sopenharmony_ci	 * doesn't support it.
688562306a36Sopenharmony_ci	 */
688662306a36Sopenharmony_ci	msrs->secondary_ctls_high |=
688762306a36Sopenharmony_ci		SECONDARY_EXEC_SHADOW_VMCS;
688862306a36Sopenharmony_ci
688962306a36Sopenharmony_ci	if (enable_ept) {
689062306a36Sopenharmony_ci		/* nested EPT: emulate EPT also to L1 */
689162306a36Sopenharmony_ci		msrs->secondary_ctls_high |=
689262306a36Sopenharmony_ci			SECONDARY_EXEC_ENABLE_EPT;
689362306a36Sopenharmony_ci		msrs->ept_caps =
689462306a36Sopenharmony_ci			VMX_EPT_PAGE_WALK_4_BIT |
689562306a36Sopenharmony_ci			VMX_EPT_PAGE_WALK_5_BIT |
689662306a36Sopenharmony_ci			VMX_EPTP_WB_BIT |
689762306a36Sopenharmony_ci			VMX_EPT_INVEPT_BIT |
689862306a36Sopenharmony_ci			VMX_EPT_EXECUTE_ONLY_BIT;
689962306a36Sopenharmony_ci
690062306a36Sopenharmony_ci		msrs->ept_caps &= ept_caps;
690162306a36Sopenharmony_ci		msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
690262306a36Sopenharmony_ci			VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
690362306a36Sopenharmony_ci			VMX_EPT_1GB_PAGE_BIT;
690462306a36Sopenharmony_ci		if (enable_ept_ad_bits) {
690562306a36Sopenharmony_ci			msrs->secondary_ctls_high |=
690662306a36Sopenharmony_ci				SECONDARY_EXEC_ENABLE_PML;
690762306a36Sopenharmony_ci			msrs->ept_caps |= VMX_EPT_AD_BIT;
690862306a36Sopenharmony_ci		}
690962306a36Sopenharmony_ci
691062306a36Sopenharmony_ci		/*
691162306a36Sopenharmony_ci		 * Advertise EPTP switching irrespective of hardware support,
691262306a36Sopenharmony_ci		 * KVM emulates it in software so long as VMFUNC is supported.
691362306a36Sopenharmony_ci		 */
691462306a36Sopenharmony_ci		if (cpu_has_vmx_vmfunc())
691562306a36Sopenharmony_ci			msrs->vmfunc_controls = VMX_VMFUNC_EPTP_SWITCHING;
691662306a36Sopenharmony_ci	}
691762306a36Sopenharmony_ci
691862306a36Sopenharmony_ci	/*
691962306a36Sopenharmony_ci	 * Old versions of KVM use the single-context version without
692062306a36Sopenharmony_ci	 * checking for support, so declare that it is supported even
692162306a36Sopenharmony_ci	 * though it is treated as global context.  The alternative is
692262306a36Sopenharmony_ci	 * not failing the single-context invvpid, and it is worse.
692362306a36Sopenharmony_ci	 */
692462306a36Sopenharmony_ci	if (enable_vpid) {
692562306a36Sopenharmony_ci		msrs->secondary_ctls_high |=
692662306a36Sopenharmony_ci			SECONDARY_EXEC_ENABLE_VPID;
692762306a36Sopenharmony_ci		msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
692862306a36Sopenharmony_ci			VMX_VPID_EXTENT_SUPPORTED_MASK;
692962306a36Sopenharmony_ci	}
693062306a36Sopenharmony_ci
693162306a36Sopenharmony_ci	if (enable_unrestricted_guest)
693262306a36Sopenharmony_ci		msrs->secondary_ctls_high |=
693362306a36Sopenharmony_ci			SECONDARY_EXEC_UNRESTRICTED_GUEST;
693462306a36Sopenharmony_ci
693562306a36Sopenharmony_ci	if (flexpriority_enabled)
693662306a36Sopenharmony_ci		msrs->secondary_ctls_high |=
693762306a36Sopenharmony_ci			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
693862306a36Sopenharmony_ci
693962306a36Sopenharmony_ci	if (enable_sgx)
694062306a36Sopenharmony_ci		msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
694162306a36Sopenharmony_ci}
694262306a36Sopenharmony_ci
694362306a36Sopenharmony_cistatic void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf,
694462306a36Sopenharmony_ci				       struct nested_vmx_msrs *msrs)
694562306a36Sopenharmony_ci{
694662306a36Sopenharmony_ci	msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA;
694762306a36Sopenharmony_ci	msrs->misc_low |=
694862306a36Sopenharmony_ci		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
694962306a36Sopenharmony_ci		VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
695062306a36Sopenharmony_ci		VMX_MISC_ACTIVITY_HLT |
695162306a36Sopenharmony_ci		VMX_MISC_ACTIVITY_WAIT_SIPI;
695262306a36Sopenharmony_ci	msrs->misc_high = 0;
695362306a36Sopenharmony_ci}
695462306a36Sopenharmony_ci
695562306a36Sopenharmony_cistatic void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs)
695662306a36Sopenharmony_ci{
695762306a36Sopenharmony_ci	/*
695862306a36Sopenharmony_ci	 * This MSR reports some information about VMX support. We
695962306a36Sopenharmony_ci	 * should return information about the VMX we emulate for the
696062306a36Sopenharmony_ci	 * guest, and the VMCS structure we give it - not about the
696162306a36Sopenharmony_ci	 * VMX support of the underlying hardware.
696262306a36Sopenharmony_ci	 */
696362306a36Sopenharmony_ci	msrs->basic =
696462306a36Sopenharmony_ci		VMCS12_REVISION |
696562306a36Sopenharmony_ci		VMX_BASIC_TRUE_CTLS |
696662306a36Sopenharmony_ci		((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
696762306a36Sopenharmony_ci		(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
696862306a36Sopenharmony_ci
696962306a36Sopenharmony_ci	if (cpu_has_vmx_basic_inout())
697062306a36Sopenharmony_ci		msrs->basic |= VMX_BASIC_INOUT;
697162306a36Sopenharmony_ci}
697262306a36Sopenharmony_ci
697362306a36Sopenharmony_cistatic void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs)
697462306a36Sopenharmony_ci{
697562306a36Sopenharmony_ci	/*
697662306a36Sopenharmony_ci	 * These MSRs specify bits which the guest must keep fixed on
697762306a36Sopenharmony_ci	 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
697862306a36Sopenharmony_ci	 * We picked the standard core2 setting.
697962306a36Sopenharmony_ci	 */
698062306a36Sopenharmony_ci#define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
698162306a36Sopenharmony_ci#define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
698262306a36Sopenharmony_ci	msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
698362306a36Sopenharmony_ci	msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
698462306a36Sopenharmony_ci
698562306a36Sopenharmony_ci	/* These MSRs specify bits which the guest must keep fixed off. */
698662306a36Sopenharmony_ci	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
698762306a36Sopenharmony_ci	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
698862306a36Sopenharmony_ci
698962306a36Sopenharmony_ci	if (vmx_umip_emulated())
699062306a36Sopenharmony_ci		msrs->cr4_fixed1 |= X86_CR4_UMIP;
699162306a36Sopenharmony_ci}
699262306a36Sopenharmony_ci
699362306a36Sopenharmony_ci/*
699462306a36Sopenharmony_ci * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
699562306a36Sopenharmony_ci * returned for the various VMX controls MSRs when nested VMX is enabled.
699662306a36Sopenharmony_ci * The same values should also be used to verify that vmcs12 control fields are
699762306a36Sopenharmony_ci * valid during nested entry from L1 to L2.
699862306a36Sopenharmony_ci * Each of these control msrs has a low and high 32-bit half: A low bit is on
699962306a36Sopenharmony_ci * if the corresponding bit in the (32-bit) control field *must* be on, and a
700062306a36Sopenharmony_ci * bit in the high half is on if the corresponding bit in the control field
700162306a36Sopenharmony_ci * may be on. See also vmx_control_verify().
700262306a36Sopenharmony_ci */
700362306a36Sopenharmony_civoid nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
700462306a36Sopenharmony_ci{
700562306a36Sopenharmony_ci	struct nested_vmx_msrs *msrs = &vmcs_conf->nested;
700662306a36Sopenharmony_ci
700762306a36Sopenharmony_ci	/*
700862306a36Sopenharmony_ci	 * Note that as a general rule, the high half of the MSRs (bits in
700962306a36Sopenharmony_ci	 * the control fields which may be 1) should be initialized by the
701062306a36Sopenharmony_ci	 * intersection of the underlying hardware's MSR (i.e., features which
701162306a36Sopenharmony_ci	 * can be supported) and the list of features we want to expose -
701262306a36Sopenharmony_ci	 * because they are known to be properly supported in our code.
701362306a36Sopenharmony_ci	 * Also, usually, the low half of the MSRs (bits which must be 1) can
701462306a36Sopenharmony_ci	 * be set to 0, meaning that L1 may turn off any of these bits. The
701562306a36Sopenharmony_ci	 * reason is that if one of these bits is necessary, it will appear
701662306a36Sopenharmony_ci	 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
701762306a36Sopenharmony_ci	 * fields of vmcs01 and vmcs02, will turn these bits off - and
701862306a36Sopenharmony_ci	 * nested_vmx_l1_wants_exit() will not pass related exits to L1.
701962306a36Sopenharmony_ci	 * These rules have exceptions below.
702062306a36Sopenharmony_ci	 */
702162306a36Sopenharmony_ci	nested_vmx_setup_pinbased_ctls(vmcs_conf, msrs);
702262306a36Sopenharmony_ci
702362306a36Sopenharmony_ci	nested_vmx_setup_exit_ctls(vmcs_conf, msrs);
702462306a36Sopenharmony_ci
702562306a36Sopenharmony_ci	nested_vmx_setup_entry_ctls(vmcs_conf, msrs);
702662306a36Sopenharmony_ci
702762306a36Sopenharmony_ci	nested_vmx_setup_cpubased_ctls(vmcs_conf, msrs);
702862306a36Sopenharmony_ci
702962306a36Sopenharmony_ci	nested_vmx_setup_secondary_ctls(ept_caps, vmcs_conf, msrs);
703062306a36Sopenharmony_ci
703162306a36Sopenharmony_ci	nested_vmx_setup_misc_data(vmcs_conf, msrs);
703262306a36Sopenharmony_ci
703362306a36Sopenharmony_ci	nested_vmx_setup_basic(msrs);
703462306a36Sopenharmony_ci
703562306a36Sopenharmony_ci	nested_vmx_setup_cr_fixed(msrs);
703662306a36Sopenharmony_ci
703762306a36Sopenharmony_ci	msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
703862306a36Sopenharmony_ci}
703962306a36Sopenharmony_ci
704062306a36Sopenharmony_civoid nested_vmx_hardware_unsetup(void)
704162306a36Sopenharmony_ci{
704262306a36Sopenharmony_ci	int i;
704362306a36Sopenharmony_ci
704462306a36Sopenharmony_ci	if (enable_shadow_vmcs) {
704562306a36Sopenharmony_ci		for (i = 0; i < VMX_BITMAP_NR; i++)
704662306a36Sopenharmony_ci			free_page((unsigned long)vmx_bitmap[i]);
704762306a36Sopenharmony_ci	}
704862306a36Sopenharmony_ci}
704962306a36Sopenharmony_ci
705062306a36Sopenharmony_ci__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
705162306a36Sopenharmony_ci{
705262306a36Sopenharmony_ci	int i;
705362306a36Sopenharmony_ci
705462306a36Sopenharmony_ci	if (!cpu_has_vmx_shadow_vmcs())
705562306a36Sopenharmony_ci		enable_shadow_vmcs = 0;
705662306a36Sopenharmony_ci	if (enable_shadow_vmcs) {
705762306a36Sopenharmony_ci		for (i = 0; i < VMX_BITMAP_NR; i++) {
705862306a36Sopenharmony_ci			/*
705962306a36Sopenharmony_ci			 * The vmx_bitmap is not tied to a VM and so should
706062306a36Sopenharmony_ci			 * not be charged to a memcg.
706162306a36Sopenharmony_ci			 */
706262306a36Sopenharmony_ci			vmx_bitmap[i] = (unsigned long *)
706362306a36Sopenharmony_ci				__get_free_page(GFP_KERNEL);
706462306a36Sopenharmony_ci			if (!vmx_bitmap[i]) {
706562306a36Sopenharmony_ci				nested_vmx_hardware_unsetup();
706662306a36Sopenharmony_ci				return -ENOMEM;
706762306a36Sopenharmony_ci			}
706862306a36Sopenharmony_ci		}
706962306a36Sopenharmony_ci
707062306a36Sopenharmony_ci		init_vmcs_shadow_fields();
707162306a36Sopenharmony_ci	}
707262306a36Sopenharmony_ci
707362306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear;
707462306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch;
707562306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld;
707662306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst;
707762306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread;
707862306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume;
707962306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite;
708062306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmxoff;
708162306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMON]		= handle_vmxon;
708262306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept;
708362306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid;
708462306a36Sopenharmony_ci	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc;
708562306a36Sopenharmony_ci
708662306a36Sopenharmony_ci	return 0;
708762306a36Sopenharmony_ci}
708862306a36Sopenharmony_ci
708962306a36Sopenharmony_cistruct kvm_x86_nested_ops vmx_nested_ops = {
709062306a36Sopenharmony_ci	.leave_nested = vmx_leave_nested,
709162306a36Sopenharmony_ci	.is_exception_vmexit = nested_vmx_is_exception_vmexit,
709262306a36Sopenharmony_ci	.check_events = vmx_check_nested_events,
709362306a36Sopenharmony_ci	.has_events = vmx_has_nested_events,
709462306a36Sopenharmony_ci	.triple_fault = nested_vmx_triple_fault,
709562306a36Sopenharmony_ci	.get_state = vmx_get_nested_state,
709662306a36Sopenharmony_ci	.set_state = vmx_set_nested_state,
709762306a36Sopenharmony_ci	.get_nested_state_pages = vmx_get_nested_state_pages,
709862306a36Sopenharmony_ci	.write_log_dirty = nested_vmx_write_pml_buffer,
709962306a36Sopenharmony_ci	.enable_evmcs = nested_enable_evmcs,
710062306a36Sopenharmony_ci	.get_evmcs_version = nested_get_evmcs_version,
710162306a36Sopenharmony_ci	.hv_inject_synthetic_vmexit_post_tlb_flush = vmx_hv_inject_synthetic_vmexit_post_tlb_flush,
710262306a36Sopenharmony_ci};
7103