18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci#include <linux/objtool.h>
48c2ecf20Sopenharmony_ci#include <linux/percpu.h>
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <asm/debugreg.h>
78c2ecf20Sopenharmony_ci#include <asm/mmu_context.h>
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include "cpuid.h"
108c2ecf20Sopenharmony_ci#include "hyperv.h"
118c2ecf20Sopenharmony_ci#include "mmu.h"
128c2ecf20Sopenharmony_ci#include "nested.h"
138c2ecf20Sopenharmony_ci#include "pmu.h"
148c2ecf20Sopenharmony_ci#include "trace.h"
158c2ecf20Sopenharmony_ci#include "vmx.h"
168c2ecf20Sopenharmony_ci#include "x86.h"
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_cistatic bool __read_mostly enable_shadow_vmcs = 1;
198c2ecf20Sopenharmony_cimodule_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_cistatic bool __read_mostly nested_early_check = 0;
228c2ecf20Sopenharmony_cimodule_param(nested_early_check, bool, S_IRUGO);
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#define CC(consistency_check)						\
258c2ecf20Sopenharmony_ci({									\
268c2ecf20Sopenharmony_ci	bool failed = (consistency_check);				\
278c2ecf20Sopenharmony_ci	if (failed)							\
288c2ecf20Sopenharmony_ci		trace_kvm_nested_vmenter_failed(#consistency_check, 0);	\
298c2ecf20Sopenharmony_ci	failed;								\
308c2ecf20Sopenharmony_ci})
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/*
338c2ecf20Sopenharmony_ci * Hyper-V requires all of these, so mark them as supported even though
348c2ecf20Sopenharmony_ci * they are just treated the same as all-context.
358c2ecf20Sopenharmony_ci */
368c2ecf20Sopenharmony_ci#define VMX_VPID_EXTENT_SUPPORTED_MASK		\
378c2ecf20Sopenharmony_ci	(VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT |	\
388c2ecf20Sopenharmony_ci	VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |	\
398c2ecf20Sopenharmony_ci	VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT |	\
408c2ecf20Sopenharmony_ci	VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_cienum {
458c2ecf20Sopenharmony_ci	VMX_VMREAD_BITMAP,
468c2ecf20Sopenharmony_ci	VMX_VMWRITE_BITMAP,
478c2ecf20Sopenharmony_ci	VMX_BITMAP_NR
488c2ecf20Sopenharmony_ci};
498c2ecf20Sopenharmony_cistatic unsigned long *vmx_bitmap[VMX_BITMAP_NR];
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci#define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
528c2ecf20Sopenharmony_ci#define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistruct shadow_vmcs_field {
558c2ecf20Sopenharmony_ci	u16	encoding;
568c2ecf20Sopenharmony_ci	u16	offset;
578c2ecf20Sopenharmony_ci};
588c2ecf20Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_only_fields[] = {
598c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
608c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h"
618c2ecf20Sopenharmony_ci};
628c2ecf20Sopenharmony_cistatic int max_shadow_read_only_fields =
638c2ecf20Sopenharmony_ci	ARRAY_SIZE(shadow_read_only_fields);
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_cistatic struct shadow_vmcs_field shadow_read_write_fields[] = {
668c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
678c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h"
688c2ecf20Sopenharmony_ci};
698c2ecf20Sopenharmony_cistatic int max_shadow_read_write_fields =
708c2ecf20Sopenharmony_ci	ARRAY_SIZE(shadow_read_write_fields);
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_cistatic void init_vmcs_shadow_fields(void)
738c2ecf20Sopenharmony_ci{
748c2ecf20Sopenharmony_ci	int i, j;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
778c2ecf20Sopenharmony_ci	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	for (i = j = 0; i < max_shadow_read_only_fields; i++) {
808c2ecf20Sopenharmony_ci		struct shadow_vmcs_field entry = shadow_read_only_fields[i];
818c2ecf20Sopenharmony_ci		u16 field = entry.encoding;
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
848c2ecf20Sopenharmony_ci		    (i + 1 == max_shadow_read_only_fields ||
858c2ecf20Sopenharmony_ci		     shadow_read_only_fields[i + 1].encoding != field + 1))
868c2ecf20Sopenharmony_ci			pr_err("Missing field from shadow_read_only_field %x\n",
878c2ecf20Sopenharmony_ci			       field + 1);
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci		clear_bit(field, vmx_vmread_bitmap);
908c2ecf20Sopenharmony_ci		if (field & 1)
918c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
928c2ecf20Sopenharmony_ci			continue;
938c2ecf20Sopenharmony_ci#else
948c2ecf20Sopenharmony_ci			entry.offset += sizeof(u32);
958c2ecf20Sopenharmony_ci#endif
968c2ecf20Sopenharmony_ci		shadow_read_only_fields[j++] = entry;
978c2ecf20Sopenharmony_ci	}
988c2ecf20Sopenharmony_ci	max_shadow_read_only_fields = j;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	for (i = j = 0; i < max_shadow_read_write_fields; i++) {
1018c2ecf20Sopenharmony_ci		struct shadow_vmcs_field entry = shadow_read_write_fields[i];
1028c2ecf20Sopenharmony_ci		u16 field = entry.encoding;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
1058c2ecf20Sopenharmony_ci		    (i + 1 == max_shadow_read_write_fields ||
1068c2ecf20Sopenharmony_ci		     shadow_read_write_fields[i + 1].encoding != field + 1))
1078c2ecf20Sopenharmony_ci			pr_err("Missing field from shadow_read_write_field %x\n",
1088c2ecf20Sopenharmony_ci			       field + 1);
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci		WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
1118c2ecf20Sopenharmony_ci			  field <= GUEST_TR_AR_BYTES,
1128c2ecf20Sopenharmony_ci			  "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci		/*
1158c2ecf20Sopenharmony_ci		 * PML and the preemption timer can be emulated, but the
1168c2ecf20Sopenharmony_ci		 * processor cannot vmwrite to fields that don't exist
1178c2ecf20Sopenharmony_ci		 * on bare metal.
1188c2ecf20Sopenharmony_ci		 */
1198c2ecf20Sopenharmony_ci		switch (field) {
1208c2ecf20Sopenharmony_ci		case GUEST_PML_INDEX:
1218c2ecf20Sopenharmony_ci			if (!cpu_has_vmx_pml())
1228c2ecf20Sopenharmony_ci				continue;
1238c2ecf20Sopenharmony_ci			break;
1248c2ecf20Sopenharmony_ci		case VMX_PREEMPTION_TIMER_VALUE:
1258c2ecf20Sopenharmony_ci			if (!cpu_has_vmx_preemption_timer())
1268c2ecf20Sopenharmony_ci				continue;
1278c2ecf20Sopenharmony_ci			break;
1288c2ecf20Sopenharmony_ci		case GUEST_INTR_STATUS:
1298c2ecf20Sopenharmony_ci			if (!cpu_has_vmx_apicv())
1308c2ecf20Sopenharmony_ci				continue;
1318c2ecf20Sopenharmony_ci			break;
1328c2ecf20Sopenharmony_ci		default:
1338c2ecf20Sopenharmony_ci			break;
1348c2ecf20Sopenharmony_ci		}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci		clear_bit(field, vmx_vmwrite_bitmap);
1378c2ecf20Sopenharmony_ci		clear_bit(field, vmx_vmread_bitmap);
1388c2ecf20Sopenharmony_ci		if (field & 1)
1398c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
1408c2ecf20Sopenharmony_ci			continue;
1418c2ecf20Sopenharmony_ci#else
1428c2ecf20Sopenharmony_ci			entry.offset += sizeof(u32);
1438c2ecf20Sopenharmony_ci#endif
1448c2ecf20Sopenharmony_ci		shadow_read_write_fields[j++] = entry;
1458c2ecf20Sopenharmony_ci	}
1468c2ecf20Sopenharmony_ci	max_shadow_read_write_fields = j;
1478c2ecf20Sopenharmony_ci}
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci/*
1508c2ecf20Sopenharmony_ci * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
1518c2ecf20Sopenharmony_ci * set the success or error code of an emulated VMX instruction (as specified
1528c2ecf20Sopenharmony_ci * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
1538c2ecf20Sopenharmony_ci * instruction.
1548c2ecf20Sopenharmony_ci */
1558c2ecf20Sopenharmony_cistatic int nested_vmx_succeed(struct kvm_vcpu *vcpu)
1568c2ecf20Sopenharmony_ci{
1578c2ecf20Sopenharmony_ci	vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
1588c2ecf20Sopenharmony_ci			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1598c2ecf20Sopenharmony_ci			    X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
1608c2ecf20Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
1618c2ecf20Sopenharmony_ci}
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_cistatic int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
1648c2ecf20Sopenharmony_ci{
1658c2ecf20Sopenharmony_ci	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
1668c2ecf20Sopenharmony_ci			& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
1678c2ecf20Sopenharmony_ci			    X86_EFLAGS_SF | X86_EFLAGS_OF))
1688c2ecf20Sopenharmony_ci			| X86_EFLAGS_CF);
1698c2ecf20Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
1708c2ecf20Sopenharmony_ci}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_cistatic int nested_vmx_failValid(struct kvm_vcpu *vcpu,
1738c2ecf20Sopenharmony_ci				u32 vm_instruction_error)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
1768c2ecf20Sopenharmony_ci			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1778c2ecf20Sopenharmony_ci			    X86_EFLAGS_SF | X86_EFLAGS_OF))
1788c2ecf20Sopenharmony_ci			| X86_EFLAGS_ZF);
1798c2ecf20Sopenharmony_ci	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
1808c2ecf20Sopenharmony_ci	/*
1818c2ecf20Sopenharmony_ci	 * We don't need to force a shadow sync because
1828c2ecf20Sopenharmony_ci	 * VM_INSTRUCTION_ERROR is not shadowed
1838c2ecf20Sopenharmony_ci	 */
1848c2ecf20Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_cistatic int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	/*
1928c2ecf20Sopenharmony_ci	 * failValid writes the error number to the current VMCS, which
1938c2ecf20Sopenharmony_ci	 * can't be done if there isn't a current VMCS.
1948c2ecf20Sopenharmony_ci	 */
1958c2ecf20Sopenharmony_ci	if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
1968c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	return nested_vmx_failValid(vcpu, vm_instruction_error);
1998c2ecf20Sopenharmony_ci}
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_cistatic void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
2028c2ecf20Sopenharmony_ci{
2038c2ecf20Sopenharmony_ci	/* TODO: not to reset guest simply here. */
2048c2ecf20Sopenharmony_ci	kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2058c2ecf20Sopenharmony_ci	pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
2068c2ecf20Sopenharmony_ci}
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_cistatic inline bool vmx_control_verify(u32 control, u32 low, u32 high)
2098c2ecf20Sopenharmony_ci{
2108c2ecf20Sopenharmony_ci	return fixed_bits_valid(control, low, high);
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistatic inline u64 vmx_control_msr(u32 low, u32 high)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	return low | ((u64)high << 32);
2168c2ecf20Sopenharmony_ci}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_cistatic void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
2198c2ecf20Sopenharmony_ci{
2208c2ecf20Sopenharmony_ci	secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
2218c2ecf20Sopenharmony_ci	vmcs_write64(VMCS_LINK_POINTER, -1ull);
2228c2ecf20Sopenharmony_ci	vmx->nested.need_vmcs12_to_shadow_sync = false;
2238c2ecf20Sopenharmony_ci}
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_cistatic inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
2268c2ecf20Sopenharmony_ci{
2278c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	if (!vmx->nested.hv_evmcs)
2308c2ecf20Sopenharmony_ci		return;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
2338c2ecf20Sopenharmony_ci	vmx->nested.hv_evmcs_vmptr = 0;
2348c2ecf20Sopenharmony_ci	vmx->nested.hv_evmcs = NULL;
2358c2ecf20Sopenharmony_ci}
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_cistatic void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
2388c2ecf20Sopenharmony_ci				     struct loaded_vmcs *prev)
2398c2ecf20Sopenharmony_ci{
2408c2ecf20Sopenharmony_ci	struct vmcs_host_state *dest, *src;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	if (unlikely(!vmx->guest_state_loaded))
2438c2ecf20Sopenharmony_ci		return;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	src = &prev->host_state;
2468c2ecf20Sopenharmony_ci	dest = &vmx->loaded_vmcs->host_state;
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
2498c2ecf20Sopenharmony_ci	dest->ldt_sel = src->ldt_sel;
2508c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
2518c2ecf20Sopenharmony_ci	dest->ds_sel = src->ds_sel;
2528c2ecf20Sopenharmony_ci	dest->es_sel = src->es_sel;
2538c2ecf20Sopenharmony_ci#endif
2548c2ecf20Sopenharmony_ci}
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_cistatic void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
2578c2ecf20Sopenharmony_ci{
2588c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
2598c2ecf20Sopenharmony_ci	struct loaded_vmcs *prev;
2608c2ecf20Sopenharmony_ci	int cpu;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
2638c2ecf20Sopenharmony_ci		return;
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	cpu = get_cpu();
2668c2ecf20Sopenharmony_ci	prev = vmx->loaded_vmcs;
2678c2ecf20Sopenharmony_ci	vmx->loaded_vmcs = vmcs;
2688c2ecf20Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, prev);
2698c2ecf20Sopenharmony_ci	vmx_sync_vmcs_host_state(vmx, prev);
2708c2ecf20Sopenharmony_ci	put_cpu();
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	vmx_register_cache_reset(vcpu);
2738c2ecf20Sopenharmony_ci}
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci/*
2768c2ecf20Sopenharmony_ci * Free whatever needs to be freed from vmx->nested when L1 goes down, or
2778c2ecf20Sopenharmony_ci * just stops using VMX.
2788c2ecf20Sopenharmony_ci */
2798c2ecf20Sopenharmony_cistatic void free_nested(struct kvm_vcpu *vcpu)
2808c2ecf20Sopenharmony_ci{
2818c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
2848c2ecf20Sopenharmony_ci		vmx_switch_vmcs(vcpu, &vmx->vmcs01);
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
2878c2ecf20Sopenharmony_ci		return;
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	vmx->nested.vmxon = false;
2928c2ecf20Sopenharmony_ci	vmx->nested.smm.vmxon = false;
2938c2ecf20Sopenharmony_ci	free_vpid(vmx->nested.vpid02);
2948c2ecf20Sopenharmony_ci	vmx->nested.posted_intr_nv = -1;
2958c2ecf20Sopenharmony_ci	vmx->nested.current_vmptr = -1ull;
2968c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs) {
2978c2ecf20Sopenharmony_ci		vmx_disable_shadow_vmcs(vmx);
2988c2ecf20Sopenharmony_ci		vmcs_clear(vmx->vmcs01.shadow_vmcs);
2998c2ecf20Sopenharmony_ci		free_vmcs(vmx->vmcs01.shadow_vmcs);
3008c2ecf20Sopenharmony_ci		vmx->vmcs01.shadow_vmcs = NULL;
3018c2ecf20Sopenharmony_ci	}
3028c2ecf20Sopenharmony_ci	kfree(vmx->nested.cached_vmcs12);
3038c2ecf20Sopenharmony_ci	vmx->nested.cached_vmcs12 = NULL;
3048c2ecf20Sopenharmony_ci	kfree(vmx->nested.cached_shadow_vmcs12);
3058c2ecf20Sopenharmony_ci	vmx->nested.cached_shadow_vmcs12 = NULL;
3068c2ecf20Sopenharmony_ci	/* Unpin physical memory we referred to in the vmcs02 */
3078c2ecf20Sopenharmony_ci	if (vmx->nested.apic_access_page) {
3088c2ecf20Sopenharmony_ci		kvm_release_page_clean(vmx->nested.apic_access_page);
3098c2ecf20Sopenharmony_ci		vmx->nested.apic_access_page = NULL;
3108c2ecf20Sopenharmony_ci	}
3118c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
3128c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
3138c2ecf20Sopenharmony_ci	vmx->nested.pi_desc = NULL;
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	nested_release_evmcs(vcpu);
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	free_loaded_vmcs(&vmx->nested.vmcs02);
3208c2ecf20Sopenharmony_ci}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci/*
3238c2ecf20Sopenharmony_ci * Ensure that the current vmcs of the logical processor is the
3248c2ecf20Sopenharmony_ci * vmcs01 of the vcpu before calling free_nested().
3258c2ecf20Sopenharmony_ci */
3268c2ecf20Sopenharmony_civoid nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
3278c2ecf20Sopenharmony_ci{
3288c2ecf20Sopenharmony_ci	vcpu_load(vcpu);
3298c2ecf20Sopenharmony_ci	vmx_leave_nested(vcpu);
3308c2ecf20Sopenharmony_ci	vcpu_put(vcpu);
3318c2ecf20Sopenharmony_ci}
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_cistatic void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
3348c2ecf20Sopenharmony_ci		struct x86_exception *fault)
3358c2ecf20Sopenharmony_ci{
3368c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3378c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
3388c2ecf20Sopenharmony_ci	u32 vm_exit_reason;
3398c2ecf20Sopenharmony_ci	unsigned long exit_qualification = vcpu->arch.exit_qualification;
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	if (vmx->nested.pml_full) {
3428c2ecf20Sopenharmony_ci		vm_exit_reason = EXIT_REASON_PML_FULL;
3438c2ecf20Sopenharmony_ci		vmx->nested.pml_full = false;
3448c2ecf20Sopenharmony_ci		exit_qualification &= INTR_INFO_UNBLOCK_NMI;
3458c2ecf20Sopenharmony_ci	} else if (fault->error_code & PFERR_RSVD_MASK)
3468c2ecf20Sopenharmony_ci		vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
3478c2ecf20Sopenharmony_ci	else
3488c2ecf20Sopenharmony_ci		vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
3518c2ecf20Sopenharmony_ci	vmcs12->guest_physical_address = fault->address;
3528c2ecf20Sopenharmony_ci}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_cistatic void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
3558c2ecf20Sopenharmony_ci{
3568c2ecf20Sopenharmony_ci	WARN_ON(mmu_is_nested(vcpu));
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
3598c2ecf20Sopenharmony_ci	kvm_init_shadow_ept_mmu(vcpu,
3608c2ecf20Sopenharmony_ci			to_vmx(vcpu)->nested.msrs.ept_caps &
3618c2ecf20Sopenharmony_ci			VMX_EPT_EXECUTE_ONLY_BIT,
3628c2ecf20Sopenharmony_ci			nested_ept_ad_enabled(vcpu),
3638c2ecf20Sopenharmony_ci			nested_ept_get_eptp(vcpu));
3648c2ecf20Sopenharmony_ci	vcpu->arch.mmu->get_guest_pgd     = nested_ept_get_eptp;
3658c2ecf20Sopenharmony_ci	vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
3668c2ecf20Sopenharmony_ci	vcpu->arch.mmu->get_pdptr         = kvm_pdptr_read;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
3698c2ecf20Sopenharmony_ci}
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_cistatic void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
3728c2ecf20Sopenharmony_ci{
3738c2ecf20Sopenharmony_ci	vcpu->arch.mmu = &vcpu->arch.root_mmu;
3748c2ecf20Sopenharmony_ci	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
3758c2ecf20Sopenharmony_ci}
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_cistatic bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
3788c2ecf20Sopenharmony_ci					    u16 error_code)
3798c2ecf20Sopenharmony_ci{
3808c2ecf20Sopenharmony_ci	bool inequality, bit;
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
3838c2ecf20Sopenharmony_ci	inequality =
3848c2ecf20Sopenharmony_ci		(error_code & vmcs12->page_fault_error_code_mask) !=
3858c2ecf20Sopenharmony_ci		 vmcs12->page_fault_error_code_match;
3868c2ecf20Sopenharmony_ci	return inequality ^ bit;
3878c2ecf20Sopenharmony_ci}
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci/*
3918c2ecf20Sopenharmony_ci * KVM wants to inject page-faults which it got to the guest. This function
3928c2ecf20Sopenharmony_ci * checks whether in a nested guest, we need to inject them to L1 or L2.
3938c2ecf20Sopenharmony_ci */
3948c2ecf20Sopenharmony_cistatic int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
3958c2ecf20Sopenharmony_ci{
3968c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3978c2ecf20Sopenharmony_ci	unsigned int nr = vcpu->arch.exception.nr;
3988c2ecf20Sopenharmony_ci	bool has_payload = vcpu->arch.exception.has_payload;
3998c2ecf20Sopenharmony_ci	unsigned long payload = vcpu->arch.exception.payload;
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	if (nr == PF_VECTOR) {
4028c2ecf20Sopenharmony_ci		if (vcpu->arch.exception.nested_apf) {
4038c2ecf20Sopenharmony_ci			*exit_qual = vcpu->arch.apf.nested_apf_token;
4048c2ecf20Sopenharmony_ci			return 1;
4058c2ecf20Sopenharmony_ci		}
4068c2ecf20Sopenharmony_ci		if (nested_vmx_is_page_fault_vmexit(vmcs12,
4078c2ecf20Sopenharmony_ci						    vcpu->arch.exception.error_code)) {
4088c2ecf20Sopenharmony_ci			*exit_qual = has_payload ? payload : vcpu->arch.cr2;
4098c2ecf20Sopenharmony_ci			return 1;
4108c2ecf20Sopenharmony_ci		}
4118c2ecf20Sopenharmony_ci	} else if (vmcs12->exception_bitmap & (1u << nr)) {
4128c2ecf20Sopenharmony_ci		if (nr == DB_VECTOR) {
4138c2ecf20Sopenharmony_ci			if (!has_payload) {
4148c2ecf20Sopenharmony_ci				payload = vcpu->arch.dr6;
4158c2ecf20Sopenharmony_ci				payload &= ~(DR6_FIXED_1 | DR6_BT);
4168c2ecf20Sopenharmony_ci				payload ^= DR6_RTM;
4178c2ecf20Sopenharmony_ci			}
4188c2ecf20Sopenharmony_ci			*exit_qual = payload;
4198c2ecf20Sopenharmony_ci		} else
4208c2ecf20Sopenharmony_ci			*exit_qual = 0;
4218c2ecf20Sopenharmony_ci		return 1;
4228c2ecf20Sopenharmony_ci	}
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	return 0;
4258c2ecf20Sopenharmony_ci}
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_cistatic void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
4298c2ecf20Sopenharmony_ci		struct x86_exception *fault)
4308c2ecf20Sopenharmony_ci{
4318c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	WARN_ON(!is_guest_mode(vcpu));
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
4368c2ecf20Sopenharmony_ci		!to_vmx(vcpu)->nested.nested_run_pending) {
4378c2ecf20Sopenharmony_ci		vmcs12->vm_exit_intr_error_code = fault->error_code;
4388c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4398c2ecf20Sopenharmony_ci				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
4408c2ecf20Sopenharmony_ci				  INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
4418c2ecf20Sopenharmony_ci				  fault->address);
4428c2ecf20Sopenharmony_ci	} else {
4438c2ecf20Sopenharmony_ci		kvm_inject_page_fault(vcpu, fault);
4448c2ecf20Sopenharmony_ci	}
4458c2ecf20Sopenharmony_ci}
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_cistatic int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
4488c2ecf20Sopenharmony_ci					       struct vmcs12 *vmcs12)
4498c2ecf20Sopenharmony_ci{
4508c2ecf20Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
4518c2ecf20Sopenharmony_ci		return 0;
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
4548c2ecf20Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
4558c2ecf20Sopenharmony_ci		return -EINVAL;
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	return 0;
4588c2ecf20Sopenharmony_ci}
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_cistatic int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
4618c2ecf20Sopenharmony_ci						struct vmcs12 *vmcs12)
4628c2ecf20Sopenharmony_ci{
4638c2ecf20Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
4648c2ecf20Sopenharmony_ci		return 0;
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
4678c2ecf20Sopenharmony_ci		return -EINVAL;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	return 0;
4708c2ecf20Sopenharmony_ci}
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_cistatic int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
4738c2ecf20Sopenharmony_ci						struct vmcs12 *vmcs12)
4748c2ecf20Sopenharmony_ci{
4758c2ecf20Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
4768c2ecf20Sopenharmony_ci		return 0;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
4798c2ecf20Sopenharmony_ci		return -EINVAL;
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	return 0;
4828c2ecf20Sopenharmony_ci}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci/*
4858c2ecf20Sopenharmony_ci * Check if MSR is intercepted for L01 MSR bitmap.
4868c2ecf20Sopenharmony_ci */
4878c2ecf20Sopenharmony_cistatic bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
4888c2ecf20Sopenharmony_ci{
4898c2ecf20Sopenharmony_ci	unsigned long *msr_bitmap;
4908c2ecf20Sopenharmony_ci	int f = sizeof(unsigned long);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	if (!cpu_has_vmx_msr_bitmap())
4938c2ecf20Sopenharmony_ci		return true;
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci	msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	if (msr <= 0x1fff) {
4988c2ecf20Sopenharmony_ci		return !!test_bit(msr, msr_bitmap + 0x800 / f);
4998c2ecf20Sopenharmony_ci	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5008c2ecf20Sopenharmony_ci		msr &= 0x1fff;
5018c2ecf20Sopenharmony_ci		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
5028c2ecf20Sopenharmony_ci	}
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	return true;
5058c2ecf20Sopenharmony_ci}
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci/*
5088c2ecf20Sopenharmony_ci * If a msr is allowed by L0, we should check whether it is allowed by L1.
5098c2ecf20Sopenharmony_ci * The corresponding bit will be cleared unless both of L0 and L1 allow it.
5108c2ecf20Sopenharmony_ci */
5118c2ecf20Sopenharmony_cistatic void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
5128c2ecf20Sopenharmony_ci					       unsigned long *msr_bitmap_nested,
5138c2ecf20Sopenharmony_ci					       u32 msr, int type)
5148c2ecf20Sopenharmony_ci{
5158c2ecf20Sopenharmony_ci	int f = sizeof(unsigned long);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	/*
5188c2ecf20Sopenharmony_ci	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
5198c2ecf20Sopenharmony_ci	 * have the write-low and read-high bitmap offsets the wrong way round.
5208c2ecf20Sopenharmony_ci	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
5218c2ecf20Sopenharmony_ci	 */
5228c2ecf20Sopenharmony_ci	if (msr <= 0x1fff) {
5238c2ecf20Sopenharmony_ci		if (type & MSR_TYPE_R &&
5248c2ecf20Sopenharmony_ci		   !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
5258c2ecf20Sopenharmony_ci			/* read-low */
5268c2ecf20Sopenharmony_ci			__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci		if (type & MSR_TYPE_W &&
5298c2ecf20Sopenharmony_ci		   !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
5308c2ecf20Sopenharmony_ci			/* write-low */
5318c2ecf20Sopenharmony_ci			__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
5348c2ecf20Sopenharmony_ci		msr &= 0x1fff;
5358c2ecf20Sopenharmony_ci		if (type & MSR_TYPE_R &&
5368c2ecf20Sopenharmony_ci		   !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
5378c2ecf20Sopenharmony_ci			/* read-high */
5388c2ecf20Sopenharmony_ci			__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci		if (type & MSR_TYPE_W &&
5418c2ecf20Sopenharmony_ci		   !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
5428c2ecf20Sopenharmony_ci			/* write-high */
5438c2ecf20Sopenharmony_ci			__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	}
5468c2ecf20Sopenharmony_ci}
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_cistatic inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
5498c2ecf20Sopenharmony_ci{
5508c2ecf20Sopenharmony_ci	int msr;
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
5538c2ecf20Sopenharmony_ci		unsigned word = msr / BITS_PER_LONG;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci		msr_bitmap[word] = ~0;
5568c2ecf20Sopenharmony_ci		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
5578c2ecf20Sopenharmony_ci	}
5588c2ecf20Sopenharmony_ci}
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci/*
5618c2ecf20Sopenharmony_ci * Merge L0's and L1's MSR bitmap, return false to indicate that
5628c2ecf20Sopenharmony_ci * we do not use the hardware.
5638c2ecf20Sopenharmony_ci */
5648c2ecf20Sopenharmony_cistatic inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
5658c2ecf20Sopenharmony_ci						 struct vmcs12 *vmcs12)
5668c2ecf20Sopenharmony_ci{
5678c2ecf20Sopenharmony_ci	int msr;
5688c2ecf20Sopenharmony_ci	unsigned long *msr_bitmap_l1;
5698c2ecf20Sopenharmony_ci	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
5708c2ecf20Sopenharmony_ci	struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	/* Nothing to do if the MSR bitmap is not in use.  */
5738c2ecf20Sopenharmony_ci	if (!cpu_has_vmx_msr_bitmap() ||
5748c2ecf20Sopenharmony_ci	    !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5758c2ecf20Sopenharmony_ci		return false;
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
5788c2ecf20Sopenharmony_ci		return false;
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	msr_bitmap_l1 = (unsigned long *)map->hva;
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	/*
5838c2ecf20Sopenharmony_ci	 * To keep the control flow simple, pay eight 8-byte writes (sixteen
5848c2ecf20Sopenharmony_ci	 * 4-byte writes on 32-bit systems) up front to enable intercepts for
5858c2ecf20Sopenharmony_ci	 * the x2APIC MSR range and selectively disable them below.
5868c2ecf20Sopenharmony_ci	 */
5878c2ecf20Sopenharmony_ci	enable_x2apic_msr_intercepts(msr_bitmap_l0);
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
5908c2ecf20Sopenharmony_ci		if (nested_cpu_has_apic_reg_virt(vmcs12)) {
5918c2ecf20Sopenharmony_ci			/*
5928c2ecf20Sopenharmony_ci			 * L0 need not intercept reads for MSRs between 0x800
5938c2ecf20Sopenharmony_ci			 * and 0x8ff, it just lets the processor take the value
5948c2ecf20Sopenharmony_ci			 * from the virtual-APIC page; take those 256 bits
5958c2ecf20Sopenharmony_ci			 * directly from the L1 bitmap.
5968c2ecf20Sopenharmony_ci			 */
5978c2ecf20Sopenharmony_ci			for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
5988c2ecf20Sopenharmony_ci				unsigned word = msr / BITS_PER_LONG;
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci				msr_bitmap_l0[word] = msr_bitmap_l1[word];
6018c2ecf20Sopenharmony_ci			}
6028c2ecf20Sopenharmony_ci		}
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_ci		nested_vmx_disable_intercept_for_msr(
6058c2ecf20Sopenharmony_ci			msr_bitmap_l1, msr_bitmap_l0,
6068c2ecf20Sopenharmony_ci			X2APIC_MSR(APIC_TASKPRI),
6078c2ecf20Sopenharmony_ci			MSR_TYPE_R | MSR_TYPE_W);
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci		if (nested_cpu_has_vid(vmcs12)) {
6108c2ecf20Sopenharmony_ci			nested_vmx_disable_intercept_for_msr(
6118c2ecf20Sopenharmony_ci				msr_bitmap_l1, msr_bitmap_l0,
6128c2ecf20Sopenharmony_ci				X2APIC_MSR(APIC_EOI),
6138c2ecf20Sopenharmony_ci				MSR_TYPE_W);
6148c2ecf20Sopenharmony_ci			nested_vmx_disable_intercept_for_msr(
6158c2ecf20Sopenharmony_ci				msr_bitmap_l1, msr_bitmap_l0,
6168c2ecf20Sopenharmony_ci				X2APIC_MSR(APIC_SELF_IPI),
6178c2ecf20Sopenharmony_ci				MSR_TYPE_W);
6188c2ecf20Sopenharmony_ci		}
6198c2ecf20Sopenharmony_ci	}
6208c2ecf20Sopenharmony_ci
6218c2ecf20Sopenharmony_ci	/* KVM unconditionally exposes the FS/GS base MSRs to L1. */
6228c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
6238c2ecf20Sopenharmony_ci	nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
6248c2ecf20Sopenharmony_ci					     MSR_FS_BASE, MSR_TYPE_RW);
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
6278c2ecf20Sopenharmony_ci					     MSR_GS_BASE, MSR_TYPE_RW);
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
6308c2ecf20Sopenharmony_ci					     MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
6318c2ecf20Sopenharmony_ci#endif
6328c2ecf20Sopenharmony_ci
6338c2ecf20Sopenharmony_ci	/*
6348c2ecf20Sopenharmony_ci	 * Checking the L0->L1 bitmap is trying to verify two things:
6358c2ecf20Sopenharmony_ci	 *
6368c2ecf20Sopenharmony_ci	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
6378c2ecf20Sopenharmony_ci	 *    ensures that we do not accidentally generate an L02 MSR bitmap
6388c2ecf20Sopenharmony_ci	 *    from the L12 MSR bitmap that is too permissive.
6398c2ecf20Sopenharmony_ci	 * 2. That L1 or L2s have actually used the MSR. This avoids
6408c2ecf20Sopenharmony_ci	 *    unnecessarily merging of the bitmap if the MSR is unused. This
6418c2ecf20Sopenharmony_ci	 *    works properly because we only update the L01 MSR bitmap lazily.
6428c2ecf20Sopenharmony_ci	 *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
6438c2ecf20Sopenharmony_ci	 *    updated to reflect this when L1 (or its L2s) actually write to
6448c2ecf20Sopenharmony_ci	 *    the MSR.
6458c2ecf20Sopenharmony_ci	 */
6468c2ecf20Sopenharmony_ci	if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
6478c2ecf20Sopenharmony_ci		nested_vmx_disable_intercept_for_msr(
6488c2ecf20Sopenharmony_ci					msr_bitmap_l1, msr_bitmap_l0,
6498c2ecf20Sopenharmony_ci					MSR_IA32_SPEC_CTRL,
6508c2ecf20Sopenharmony_ci					MSR_TYPE_R | MSR_TYPE_W);
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
6538c2ecf20Sopenharmony_ci		nested_vmx_disable_intercept_for_msr(
6548c2ecf20Sopenharmony_ci					msr_bitmap_l1, msr_bitmap_l0,
6558c2ecf20Sopenharmony_ci					MSR_IA32_PRED_CMD,
6568c2ecf20Sopenharmony_ci					MSR_TYPE_W);
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_ci	return true;
6618c2ecf20Sopenharmony_ci}
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_cistatic void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
6648c2ecf20Sopenharmony_ci				       struct vmcs12 *vmcs12)
6658c2ecf20Sopenharmony_ci{
6668c2ecf20Sopenharmony_ci	struct kvm_host_map map;
6678c2ecf20Sopenharmony_ci	struct vmcs12 *shadow;
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
6708c2ecf20Sopenharmony_ci	    vmcs12->vmcs_link_pointer == -1ull)
6718c2ecf20Sopenharmony_ci		return;
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	shadow = get_shadow_vmcs12(vcpu);
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
6768c2ecf20Sopenharmony_ci		return;
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	memcpy(shadow, map.hva, VMCS12_SIZE);
6798c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &map, false);
6808c2ecf20Sopenharmony_ci}
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_cistatic void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
6838c2ecf20Sopenharmony_ci					      struct vmcs12 *vmcs12)
6848c2ecf20Sopenharmony_ci{
6858c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
6888c2ecf20Sopenharmony_ci	    vmcs12->vmcs_link_pointer == -1ull)
6898c2ecf20Sopenharmony_ci		return;
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
6928c2ecf20Sopenharmony_ci			get_shadow_vmcs12(vcpu), VMCS12_SIZE);
6938c2ecf20Sopenharmony_ci}
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci/*
6968c2ecf20Sopenharmony_ci * In nested virtualization, check if L1 has set
6978c2ecf20Sopenharmony_ci * VM_EXIT_ACK_INTR_ON_EXIT
6988c2ecf20Sopenharmony_ci */
6998c2ecf20Sopenharmony_cistatic bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
7008c2ecf20Sopenharmony_ci{
7018c2ecf20Sopenharmony_ci	return get_vmcs12(vcpu)->vm_exit_controls &
7028c2ecf20Sopenharmony_ci		VM_EXIT_ACK_INTR_ON_EXIT;
7038c2ecf20Sopenharmony_ci}
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_cistatic int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
7068c2ecf20Sopenharmony_ci					  struct vmcs12 *vmcs12)
7078c2ecf20Sopenharmony_ci{
7088c2ecf20Sopenharmony_ci	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
7098c2ecf20Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
7108c2ecf20Sopenharmony_ci		return -EINVAL;
7118c2ecf20Sopenharmony_ci	else
7128c2ecf20Sopenharmony_ci		return 0;
7138c2ecf20Sopenharmony_ci}
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_cistatic int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
7168c2ecf20Sopenharmony_ci					   struct vmcs12 *vmcs12)
7178c2ecf20Sopenharmony_ci{
7188c2ecf20Sopenharmony_ci	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
7198c2ecf20Sopenharmony_ci	    !nested_cpu_has_apic_reg_virt(vmcs12) &&
7208c2ecf20Sopenharmony_ci	    !nested_cpu_has_vid(vmcs12) &&
7218c2ecf20Sopenharmony_ci	    !nested_cpu_has_posted_intr(vmcs12))
7228c2ecf20Sopenharmony_ci		return 0;
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	/*
7258c2ecf20Sopenharmony_ci	 * If virtualize x2apic mode is enabled,
7268c2ecf20Sopenharmony_ci	 * virtualize apic access must be disabled.
7278c2ecf20Sopenharmony_ci	 */
7288c2ecf20Sopenharmony_ci	if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
7298c2ecf20Sopenharmony_ci	       nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
7308c2ecf20Sopenharmony_ci		return -EINVAL;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci	/*
7338c2ecf20Sopenharmony_ci	 * If virtual interrupt delivery is enabled,
7348c2ecf20Sopenharmony_ci	 * we must exit on external interrupts.
7358c2ecf20Sopenharmony_ci	 */
7368c2ecf20Sopenharmony_ci	if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
7378c2ecf20Sopenharmony_ci		return -EINVAL;
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	/*
7408c2ecf20Sopenharmony_ci	 * bits 15:8 should be zero in posted_intr_nv,
7418c2ecf20Sopenharmony_ci	 * the descriptor address has been already checked
7428c2ecf20Sopenharmony_ci	 * in nested_get_vmcs12_pages.
7438c2ecf20Sopenharmony_ci	 *
7448c2ecf20Sopenharmony_ci	 * bits 5:0 of posted_intr_desc_addr should be zero.
7458c2ecf20Sopenharmony_ci	 */
7468c2ecf20Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12) &&
7478c2ecf20Sopenharmony_ci	   (CC(!nested_cpu_has_vid(vmcs12)) ||
7488c2ecf20Sopenharmony_ci	    CC(!nested_exit_intr_ack_set(vcpu)) ||
7498c2ecf20Sopenharmony_ci	    CC((vmcs12->posted_intr_nv & 0xff00)) ||
7508c2ecf20Sopenharmony_ci	    CC((vmcs12->posted_intr_desc_addr & 0x3f)) ||
7518c2ecf20Sopenharmony_ci	    CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))))
7528c2ecf20Sopenharmony_ci		return -EINVAL;
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	/* tpr shadow is needed by all apicv features. */
7558c2ecf20Sopenharmony_ci	if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
7568c2ecf20Sopenharmony_ci		return -EINVAL;
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_ci	return 0;
7598c2ecf20Sopenharmony_ci}
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_cistatic int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
7628c2ecf20Sopenharmony_ci				       u32 count, u64 addr)
7638c2ecf20Sopenharmony_ci{
7648c2ecf20Sopenharmony_ci	int maxphyaddr;
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	if (count == 0)
7678c2ecf20Sopenharmony_ci		return 0;
7688c2ecf20Sopenharmony_ci	maxphyaddr = cpuid_maxphyaddr(vcpu);
7698c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
7708c2ecf20Sopenharmony_ci	    (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
7718c2ecf20Sopenharmony_ci		return -EINVAL;
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	return 0;
7748c2ecf20Sopenharmony_ci}
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_cistatic int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
7778c2ecf20Sopenharmony_ci						     struct vmcs12 *vmcs12)
7788c2ecf20Sopenharmony_ci{
7798c2ecf20Sopenharmony_ci	if (CC(nested_vmx_check_msr_switch(vcpu,
7808c2ecf20Sopenharmony_ci					   vmcs12->vm_exit_msr_load_count,
7818c2ecf20Sopenharmony_ci					   vmcs12->vm_exit_msr_load_addr)) ||
7828c2ecf20Sopenharmony_ci	    CC(nested_vmx_check_msr_switch(vcpu,
7838c2ecf20Sopenharmony_ci					   vmcs12->vm_exit_msr_store_count,
7848c2ecf20Sopenharmony_ci					   vmcs12->vm_exit_msr_store_addr)))
7858c2ecf20Sopenharmony_ci		return -EINVAL;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	return 0;
7888c2ecf20Sopenharmony_ci}
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_cistatic int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
7918c2ecf20Sopenharmony_ci                                                      struct vmcs12 *vmcs12)
7928c2ecf20Sopenharmony_ci{
7938c2ecf20Sopenharmony_ci	if (CC(nested_vmx_check_msr_switch(vcpu,
7948c2ecf20Sopenharmony_ci					   vmcs12->vm_entry_msr_load_count,
7958c2ecf20Sopenharmony_ci					   vmcs12->vm_entry_msr_load_addr)))
7968c2ecf20Sopenharmony_ci                return -EINVAL;
7978c2ecf20Sopenharmony_ci
7988c2ecf20Sopenharmony_ci	return 0;
7998c2ecf20Sopenharmony_ci}
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_cistatic int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
8028c2ecf20Sopenharmony_ci					 struct vmcs12 *vmcs12)
8038c2ecf20Sopenharmony_ci{
8048c2ecf20Sopenharmony_ci	if (!nested_cpu_has_pml(vmcs12))
8058c2ecf20Sopenharmony_ci		return 0;
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	if (CC(!nested_cpu_has_ept(vmcs12)) ||
8088c2ecf20Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->pml_address)))
8098c2ecf20Sopenharmony_ci		return -EINVAL;
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci	return 0;
8128c2ecf20Sopenharmony_ci}
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_cistatic int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
8158c2ecf20Sopenharmony_ci							struct vmcs12 *vmcs12)
8168c2ecf20Sopenharmony_ci{
8178c2ecf20Sopenharmony_ci	if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
8188c2ecf20Sopenharmony_ci	       !nested_cpu_has_ept(vmcs12)))
8198c2ecf20Sopenharmony_ci		return -EINVAL;
8208c2ecf20Sopenharmony_ci	return 0;
8218c2ecf20Sopenharmony_ci}
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_cistatic int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
8248c2ecf20Sopenharmony_ci							 struct vmcs12 *vmcs12)
8258c2ecf20Sopenharmony_ci{
8268c2ecf20Sopenharmony_ci	if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
8278c2ecf20Sopenharmony_ci	       !nested_cpu_has_ept(vmcs12)))
8288c2ecf20Sopenharmony_ci		return -EINVAL;
8298c2ecf20Sopenharmony_ci	return 0;
8308c2ecf20Sopenharmony_ci}
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_cistatic int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
8338c2ecf20Sopenharmony_ci						 struct vmcs12 *vmcs12)
8348c2ecf20Sopenharmony_ci{
8358c2ecf20Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12))
8368c2ecf20Sopenharmony_ci		return 0;
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
8398c2ecf20Sopenharmony_ci	    CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
8408c2ecf20Sopenharmony_ci		return -EINVAL;
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ci	return 0;
8438c2ecf20Sopenharmony_ci}
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_cistatic int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
8468c2ecf20Sopenharmony_ci				       struct vmx_msr_entry *e)
8478c2ecf20Sopenharmony_ci{
8488c2ecf20Sopenharmony_ci	/* x2APIC MSR accesses are not allowed */
8498c2ecf20Sopenharmony_ci	if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
8508c2ecf20Sopenharmony_ci		return -EINVAL;
8518c2ecf20Sopenharmony_ci	if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
8528c2ecf20Sopenharmony_ci	    CC(e->index == MSR_IA32_UCODE_REV))
8538c2ecf20Sopenharmony_ci		return -EINVAL;
8548c2ecf20Sopenharmony_ci	if (CC(e->reserved != 0))
8558c2ecf20Sopenharmony_ci		return -EINVAL;
8568c2ecf20Sopenharmony_ci	return 0;
8578c2ecf20Sopenharmony_ci}
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_cistatic int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
8608c2ecf20Sopenharmony_ci				     struct vmx_msr_entry *e)
8618c2ecf20Sopenharmony_ci{
8628c2ecf20Sopenharmony_ci	if (CC(e->index == MSR_FS_BASE) ||
8638c2ecf20Sopenharmony_ci	    CC(e->index == MSR_GS_BASE) ||
8648c2ecf20Sopenharmony_ci	    CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
8658c2ecf20Sopenharmony_ci	    nested_vmx_msr_check_common(vcpu, e))
8668c2ecf20Sopenharmony_ci		return -EINVAL;
8678c2ecf20Sopenharmony_ci	return 0;
8688c2ecf20Sopenharmony_ci}
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_cistatic int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
8718c2ecf20Sopenharmony_ci				      struct vmx_msr_entry *e)
8728c2ecf20Sopenharmony_ci{
8738c2ecf20Sopenharmony_ci	if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
8748c2ecf20Sopenharmony_ci	    nested_vmx_msr_check_common(vcpu, e))
8758c2ecf20Sopenharmony_ci		return -EINVAL;
8768c2ecf20Sopenharmony_ci	return 0;
8778c2ecf20Sopenharmony_ci}
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_cistatic u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
8808c2ecf20Sopenharmony_ci{
8818c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
8828c2ecf20Sopenharmony_ci	u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
8838c2ecf20Sopenharmony_ci				       vmx->nested.msrs.misc_high);
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
8868c2ecf20Sopenharmony_ci}
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci/*
8898c2ecf20Sopenharmony_ci * Load guest's/host's msr at nested entry/exit.
8908c2ecf20Sopenharmony_ci * return 0 for success, entry index for failure.
8918c2ecf20Sopenharmony_ci *
8928c2ecf20Sopenharmony_ci * One of the failure modes for MSR load/store is when a list exceeds the
8938c2ecf20Sopenharmony_ci * virtual hardware's capacity. To maintain compatibility with hardware inasmuch
8948c2ecf20Sopenharmony_ci * as possible, process all valid entries before failing rather than precheck
8958c2ecf20Sopenharmony_ci * for a capacity violation.
8968c2ecf20Sopenharmony_ci */
8978c2ecf20Sopenharmony_cistatic u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
8988c2ecf20Sopenharmony_ci{
8998c2ecf20Sopenharmony_ci	u32 i;
9008c2ecf20Sopenharmony_ci	struct vmx_msr_entry e;
9018c2ecf20Sopenharmony_ci	u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
9028c2ecf20Sopenharmony_ci
9038c2ecf20Sopenharmony_ci	for (i = 0; i < count; i++) {
9048c2ecf20Sopenharmony_ci		if (unlikely(i >= max_msr_list_size))
9058c2ecf20Sopenharmony_ci			goto fail;
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci		if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
9088c2ecf20Sopenharmony_ci					&e, sizeof(e))) {
9098c2ecf20Sopenharmony_ci			pr_debug_ratelimited(
9108c2ecf20Sopenharmony_ci				"%s cannot read MSR entry (%u, 0x%08llx)\n",
9118c2ecf20Sopenharmony_ci				__func__, i, gpa + i * sizeof(e));
9128c2ecf20Sopenharmony_ci			goto fail;
9138c2ecf20Sopenharmony_ci		}
9148c2ecf20Sopenharmony_ci		if (nested_vmx_load_msr_check(vcpu, &e)) {
9158c2ecf20Sopenharmony_ci			pr_debug_ratelimited(
9168c2ecf20Sopenharmony_ci				"%s check failed (%u, 0x%x, 0x%x)\n",
9178c2ecf20Sopenharmony_ci				__func__, i, e.index, e.reserved);
9188c2ecf20Sopenharmony_ci			goto fail;
9198c2ecf20Sopenharmony_ci		}
9208c2ecf20Sopenharmony_ci		if (kvm_set_msr(vcpu, e.index, e.value)) {
9218c2ecf20Sopenharmony_ci			pr_debug_ratelimited(
9228c2ecf20Sopenharmony_ci				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
9238c2ecf20Sopenharmony_ci				__func__, i, e.index, e.value);
9248c2ecf20Sopenharmony_ci			goto fail;
9258c2ecf20Sopenharmony_ci		}
9268c2ecf20Sopenharmony_ci	}
9278c2ecf20Sopenharmony_ci	return 0;
9288c2ecf20Sopenharmony_cifail:
9298c2ecf20Sopenharmony_ci	/* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
9308c2ecf20Sopenharmony_ci	return i + 1;
9318c2ecf20Sopenharmony_ci}
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_cistatic bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
9348c2ecf20Sopenharmony_ci					    u32 msr_index,
9358c2ecf20Sopenharmony_ci					    u64 *data)
9368c2ecf20Sopenharmony_ci{
9378c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	/*
9408c2ecf20Sopenharmony_ci	 * If the L0 hypervisor stored a more accurate value for the TSC that
9418c2ecf20Sopenharmony_ci	 * does not include the time taken for emulation of the L2->L1
9428c2ecf20Sopenharmony_ci	 * VM-exit in L0, use the more accurate value.
9438c2ecf20Sopenharmony_ci	 */
9448c2ecf20Sopenharmony_ci	if (msr_index == MSR_IA32_TSC) {
9458c2ecf20Sopenharmony_ci		int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
9468c2ecf20Sopenharmony_ci						    MSR_IA32_TSC);
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci		if (i >= 0) {
9498c2ecf20Sopenharmony_ci			u64 val = vmx->msr_autostore.guest.val[i].value;
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_ci			*data = kvm_read_l1_tsc(vcpu, val);
9528c2ecf20Sopenharmony_ci			return true;
9538c2ecf20Sopenharmony_ci		}
9548c2ecf20Sopenharmony_ci	}
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_ci	if (kvm_get_msr(vcpu, msr_index, data)) {
9578c2ecf20Sopenharmony_ci		pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
9588c2ecf20Sopenharmony_ci			msr_index);
9598c2ecf20Sopenharmony_ci		return false;
9608c2ecf20Sopenharmony_ci	}
9618c2ecf20Sopenharmony_ci	return true;
9628c2ecf20Sopenharmony_ci}
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_cistatic bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
9658c2ecf20Sopenharmony_ci				     struct vmx_msr_entry *e)
9668c2ecf20Sopenharmony_ci{
9678c2ecf20Sopenharmony_ci	if (kvm_vcpu_read_guest(vcpu,
9688c2ecf20Sopenharmony_ci				gpa + i * sizeof(*e),
9698c2ecf20Sopenharmony_ci				e, 2 * sizeof(u32))) {
9708c2ecf20Sopenharmony_ci		pr_debug_ratelimited(
9718c2ecf20Sopenharmony_ci			"%s cannot read MSR entry (%u, 0x%08llx)\n",
9728c2ecf20Sopenharmony_ci			__func__, i, gpa + i * sizeof(*e));
9738c2ecf20Sopenharmony_ci		return false;
9748c2ecf20Sopenharmony_ci	}
9758c2ecf20Sopenharmony_ci	if (nested_vmx_store_msr_check(vcpu, e)) {
9768c2ecf20Sopenharmony_ci		pr_debug_ratelimited(
9778c2ecf20Sopenharmony_ci			"%s check failed (%u, 0x%x, 0x%x)\n",
9788c2ecf20Sopenharmony_ci			__func__, i, e->index, e->reserved);
9798c2ecf20Sopenharmony_ci		return false;
9808c2ecf20Sopenharmony_ci	}
9818c2ecf20Sopenharmony_ci	return true;
9828c2ecf20Sopenharmony_ci}
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_cistatic int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
9858c2ecf20Sopenharmony_ci{
9868c2ecf20Sopenharmony_ci	u64 data;
9878c2ecf20Sopenharmony_ci	u32 i;
9888c2ecf20Sopenharmony_ci	struct vmx_msr_entry e;
9898c2ecf20Sopenharmony_ci	u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ci	for (i = 0; i < count; i++) {
9928c2ecf20Sopenharmony_ci		if (unlikely(i >= max_msr_list_size))
9938c2ecf20Sopenharmony_ci			return -EINVAL;
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
9968c2ecf20Sopenharmony_ci			return -EINVAL;
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci		if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
9998c2ecf20Sopenharmony_ci			return -EINVAL;
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci		if (kvm_vcpu_write_guest(vcpu,
10028c2ecf20Sopenharmony_ci					 gpa + i * sizeof(e) +
10038c2ecf20Sopenharmony_ci					     offsetof(struct vmx_msr_entry, value),
10048c2ecf20Sopenharmony_ci					 &data, sizeof(data))) {
10058c2ecf20Sopenharmony_ci			pr_debug_ratelimited(
10068c2ecf20Sopenharmony_ci				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
10078c2ecf20Sopenharmony_ci				__func__, i, e.index, data);
10088c2ecf20Sopenharmony_ci			return -EINVAL;
10098c2ecf20Sopenharmony_ci		}
10108c2ecf20Sopenharmony_ci	}
10118c2ecf20Sopenharmony_ci	return 0;
10128c2ecf20Sopenharmony_ci}
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_cistatic bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
10158c2ecf20Sopenharmony_ci{
10168c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10178c2ecf20Sopenharmony_ci	u32 count = vmcs12->vm_exit_msr_store_count;
10188c2ecf20Sopenharmony_ci	u64 gpa = vmcs12->vm_exit_msr_store_addr;
10198c2ecf20Sopenharmony_ci	struct vmx_msr_entry e;
10208c2ecf20Sopenharmony_ci	u32 i;
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci	for (i = 0; i < count; i++) {
10238c2ecf20Sopenharmony_ci		if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
10248c2ecf20Sopenharmony_ci			return false;
10258c2ecf20Sopenharmony_ci
10268c2ecf20Sopenharmony_ci		if (e.index == msr_index)
10278c2ecf20Sopenharmony_ci			return true;
10288c2ecf20Sopenharmony_ci	}
10298c2ecf20Sopenharmony_ci	return false;
10308c2ecf20Sopenharmony_ci}
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_cistatic void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
10338c2ecf20Sopenharmony_ci					   u32 msr_index)
10348c2ecf20Sopenharmony_ci{
10358c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
10368c2ecf20Sopenharmony_ci	struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
10378c2ecf20Sopenharmony_ci	bool in_vmcs12_store_list;
10388c2ecf20Sopenharmony_ci	int msr_autostore_slot;
10398c2ecf20Sopenharmony_ci	bool in_autostore_list;
10408c2ecf20Sopenharmony_ci	int last;
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
10438c2ecf20Sopenharmony_ci	in_autostore_list = msr_autostore_slot >= 0;
10448c2ecf20Sopenharmony_ci	in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci	if (in_vmcs12_store_list && !in_autostore_list) {
10478c2ecf20Sopenharmony_ci		if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
10488c2ecf20Sopenharmony_ci			/*
10498c2ecf20Sopenharmony_ci			 * Emulated VMEntry does not fail here.  Instead a less
10508c2ecf20Sopenharmony_ci			 * accurate value will be returned by
10518c2ecf20Sopenharmony_ci			 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
10528c2ecf20Sopenharmony_ci			 * instead of reading the value from the vmcs02 VMExit
10538c2ecf20Sopenharmony_ci			 * MSR-store area.
10548c2ecf20Sopenharmony_ci			 */
10558c2ecf20Sopenharmony_ci			pr_warn_ratelimited(
10568c2ecf20Sopenharmony_ci				"Not enough msr entries in msr_autostore.  Can't add msr %x\n",
10578c2ecf20Sopenharmony_ci				msr_index);
10588c2ecf20Sopenharmony_ci			return;
10598c2ecf20Sopenharmony_ci		}
10608c2ecf20Sopenharmony_ci		last = autostore->nr++;
10618c2ecf20Sopenharmony_ci		autostore->val[last].index = msr_index;
10628c2ecf20Sopenharmony_ci	} else if (!in_vmcs12_store_list && in_autostore_list) {
10638c2ecf20Sopenharmony_ci		last = --autostore->nr;
10648c2ecf20Sopenharmony_ci		autostore->val[msr_autostore_slot] = autostore->val[last];
10658c2ecf20Sopenharmony_ci	}
10668c2ecf20Sopenharmony_ci}
10678c2ecf20Sopenharmony_ci
10688c2ecf20Sopenharmony_cistatic bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
10698c2ecf20Sopenharmony_ci{
10708c2ecf20Sopenharmony_ci	unsigned long invalid_mask;
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
10738c2ecf20Sopenharmony_ci	return (val & invalid_mask) == 0;
10748c2ecf20Sopenharmony_ci}
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci/*
10778c2ecf20Sopenharmony_ci * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit.
10788c2ecf20Sopenharmony_ci * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't
10798c2ecf20Sopenharmony_ci * enable VPID for L2 (implying it expects a TLB flush on VMX transitions).
10808c2ecf20Sopenharmony_ci * Here's why.
10818c2ecf20Sopenharmony_ci *
10828c2ecf20Sopenharmony_ci * If EPT is enabled by L0 a sync is never needed:
10838c2ecf20Sopenharmony_ci * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there
10848c2ecf20Sopenharmony_ci *   cannot be unsync'd SPTEs for either L1 or L2.
10858c2ecf20Sopenharmony_ci *
10868c2ecf20Sopenharmony_ci * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter
10878c2ecf20Sopenharmony_ci *   VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings
10888c2ecf20Sopenharmony_ci *   (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush
10898c2ecf20Sopenharmony_ci *   stale guest-physical mappings for L2 from the TLB.  And as above, L0 isn't
10908c2ecf20Sopenharmony_ci *   shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit.
10918c2ecf20Sopenharmony_ci *
10928c2ecf20Sopenharmony_ci * If EPT is disabled by L0:
10938c2ecf20Sopenharmony_ci * - if VPID is enabled by L1 (for L2), the situation is similar to when L1
10948c2ecf20Sopenharmony_ci *   enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't
10958c2ecf20Sopenharmony_ci *   required to invalidate linear mappings (EPT is disabled so there are
10968c2ecf20Sopenharmony_ci *   no combined or guest-physical mappings), i.e. L1 can't rely on the
10978c2ecf20Sopenharmony_ci *   (virtual) CPU to flush stale linear mappings for either L2 or itself (L1).
10988c2ecf20Sopenharmony_ci *
10998c2ecf20Sopenharmony_ci * - however if VPID is disabled by L1, then a sync is needed as L1 expects all
11008c2ecf20Sopenharmony_ci *   linear mappings (EPT is disabled so there are no combined or guest-physical
11018c2ecf20Sopenharmony_ci *   mappings) to be invalidated on both VM-Enter and VM-Exit.
11028c2ecf20Sopenharmony_ci *
11038c2ecf20Sopenharmony_ci * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which
11048c2ecf20Sopenharmony_ci * additionally checks that L2 has been assigned a VPID (when EPT is disabled).
11058c2ecf20Sopenharmony_ci * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect
11068c2ecf20Sopenharmony_ci * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2
11078c2ecf20Sopenharmony_ci * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has
11088c2ecf20Sopenharmony_ci * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1
11098c2ecf20Sopenharmony_ci * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't
11108c2ecf20Sopenharmony_ci * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush
11118c2ecf20Sopenharmony_ci * stale TLB entries, at which point L0 will sync L2's MMU.
11128c2ecf20Sopenharmony_ci */
11138c2ecf20Sopenharmony_cistatic bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
11148c2ecf20Sopenharmony_ci{
11158c2ecf20Sopenharmony_ci	return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu));
11168c2ecf20Sopenharmony_ci}
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ci/*
11198c2ecf20Sopenharmony_ci * Load guest's/host's cr3 at nested entry/exit.  @nested_ept is true if we are
11208c2ecf20Sopenharmony_ci * emulating VM-Entry into a guest with EPT enabled.  On failure, the expected
11218c2ecf20Sopenharmony_ci * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
11228c2ecf20Sopenharmony_ci * @entry_failure_code.
11238c2ecf20Sopenharmony_ci */
11248c2ecf20Sopenharmony_cistatic int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
11258c2ecf20Sopenharmony_ci			       enum vm_entry_failure_code *entry_failure_code)
11268c2ecf20Sopenharmony_ci{
11278c2ecf20Sopenharmony_ci	if (CC(!nested_cr3_valid(vcpu, cr3))) {
11288c2ecf20Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_DEFAULT;
11298c2ecf20Sopenharmony_ci		return -EINVAL;
11308c2ecf20Sopenharmony_ci	}
11318c2ecf20Sopenharmony_ci
11328c2ecf20Sopenharmony_ci	/*
11338c2ecf20Sopenharmony_ci	 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
11348c2ecf20Sopenharmony_ci	 * must not be dereferenced.
11358c2ecf20Sopenharmony_ci	 */
11368c2ecf20Sopenharmony_ci	if (!nested_ept && is_pae_paging(vcpu) &&
11378c2ecf20Sopenharmony_ci	    (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
11388c2ecf20Sopenharmony_ci		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
11398c2ecf20Sopenharmony_ci			*entry_failure_code = ENTRY_FAIL_PDPTE;
11408c2ecf20Sopenharmony_ci			return -EINVAL;
11418c2ecf20Sopenharmony_ci		}
11428c2ecf20Sopenharmony_ci	}
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_ci	/*
11458c2ecf20Sopenharmony_ci	 * Unconditionally skip the TLB flush on fast CR3 switch, all TLB
11468c2ecf20Sopenharmony_ci	 * flushes are handled by nested_vmx_transition_tlb_flush().
11478c2ecf20Sopenharmony_ci	 */
11488c2ecf20Sopenharmony_ci	if (!nested_ept) {
11498c2ecf20Sopenharmony_ci		kvm_mmu_new_pgd(vcpu, cr3, true, true);
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci		/*
11528c2ecf20Sopenharmony_ci		 * A TLB flush on VM-Enter/VM-Exit flushes all linear mappings
11538c2ecf20Sopenharmony_ci		 * across all PCIDs, i.e. all PGDs need to be synchronized.
11548c2ecf20Sopenharmony_ci		 * See nested_vmx_transition_mmu_sync() for more details.
11558c2ecf20Sopenharmony_ci		 */
11568c2ecf20Sopenharmony_ci		if (nested_vmx_transition_mmu_sync(vcpu))
11578c2ecf20Sopenharmony_ci			kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
11588c2ecf20Sopenharmony_ci	}
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_ci	vcpu->arch.cr3 = cr3;
11618c2ecf20Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
11628c2ecf20Sopenharmony_ci
11638c2ecf20Sopenharmony_ci	kvm_init_mmu(vcpu, false);
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci	return 0;
11668c2ecf20Sopenharmony_ci}
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci/*
11698c2ecf20Sopenharmony_ci * Returns if KVM is able to config CPU to tag TLB entries
11708c2ecf20Sopenharmony_ci * populated by L2 differently than TLB entries populated
11718c2ecf20Sopenharmony_ci * by L1.
11728c2ecf20Sopenharmony_ci *
11738c2ecf20Sopenharmony_ci * If L0 uses EPT, L1 and L2 run with different EPTP because
11748c2ecf20Sopenharmony_ci * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
11758c2ecf20Sopenharmony_ci * are tagged with different EPTP.
11768c2ecf20Sopenharmony_ci *
11778c2ecf20Sopenharmony_ci * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
11788c2ecf20Sopenharmony_ci * with different VPID (L1 entries are tagged with vmx->vpid
11798c2ecf20Sopenharmony_ci * while L2 entries are tagged with vmx->nested.vpid02).
11808c2ecf20Sopenharmony_ci */
11818c2ecf20Sopenharmony_cistatic bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
11828c2ecf20Sopenharmony_ci{
11838c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11848c2ecf20Sopenharmony_ci
11858c2ecf20Sopenharmony_ci	return enable_ept ||
11868c2ecf20Sopenharmony_ci	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
11878c2ecf20Sopenharmony_ci}
11888c2ecf20Sopenharmony_ci
11898c2ecf20Sopenharmony_cistatic void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
11908c2ecf20Sopenharmony_ci					    struct vmcs12 *vmcs12,
11918c2ecf20Sopenharmony_ci					    bool is_vmenter)
11928c2ecf20Sopenharmony_ci{
11938c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
11948c2ecf20Sopenharmony_ci
11958c2ecf20Sopenharmony_ci	/*
11968c2ecf20Sopenharmony_ci	 * If VPID is disabled, linear and combined mappings are flushed on
11978c2ecf20Sopenharmony_ci	 * VM-Enter/VM-Exit, and guest-physical mappings are valid only for
11988c2ecf20Sopenharmony_ci	 * their associated EPTP.
11998c2ecf20Sopenharmony_ci	 */
12008c2ecf20Sopenharmony_ci	if (!enable_vpid)
12018c2ecf20Sopenharmony_ci		return;
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_ci	/*
12048c2ecf20Sopenharmony_ci	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
12058c2ecf20Sopenharmony_ci	 * for *all* contexts to be flushed on VM-Enter/VM-Exit.
12068c2ecf20Sopenharmony_ci	 *
12078c2ecf20Sopenharmony_ci	 * If VPID is enabled and used by vmc12, but L2 does not have a unique
12088c2ecf20Sopenharmony_ci	 * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
12098c2ecf20Sopenharmony_ci	 * a VPID for L2, flush the current context as the effective ASID is
12108c2ecf20Sopenharmony_ci	 * common to both L1 and L2.
12118c2ecf20Sopenharmony_ci	 *
12128c2ecf20Sopenharmony_ci	 * Defer the flush so that it runs after vmcs02.EPTP has been set by
12138c2ecf20Sopenharmony_ci	 * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
12148c2ecf20Sopenharmony_ci	 * redundant flushes further down the nested pipeline.
12158c2ecf20Sopenharmony_ci	 *
12168c2ecf20Sopenharmony_ci	 * If a TLB flush isn't required due to any of the above, and vpid12 is
12178c2ecf20Sopenharmony_ci	 * changing then the new "virtual" VPID (vpid12) will reuse the same
12188c2ecf20Sopenharmony_ci	 * "real" VPID (vpid02), and so needs to be sync'd.  There is no direct
12198c2ecf20Sopenharmony_ci	 * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
12208c2ecf20Sopenharmony_ci	 * all nested vCPUs.
12218c2ecf20Sopenharmony_ci	 */
12228c2ecf20Sopenharmony_ci	if (!nested_cpu_has_vpid(vmcs12)) {
12238c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
12248c2ecf20Sopenharmony_ci	} else if (!nested_has_guest_tlb_tag(vcpu)) {
12258c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
12268c2ecf20Sopenharmony_ci	} else if (is_vmenter &&
12278c2ecf20Sopenharmony_ci		   vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
12288c2ecf20Sopenharmony_ci		vmx->nested.last_vpid = vmcs12->virtual_processor_id;
12298c2ecf20Sopenharmony_ci		vpid_sync_context(nested_get_vpid02(vcpu));
12308c2ecf20Sopenharmony_ci	}
12318c2ecf20Sopenharmony_ci}
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_cistatic bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
12348c2ecf20Sopenharmony_ci{
12358c2ecf20Sopenharmony_ci	superset &= mask;
12368c2ecf20Sopenharmony_ci	subset &= mask;
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_ci	return (superset | subset) == superset;
12398c2ecf20Sopenharmony_ci}
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_cistatic int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
12428c2ecf20Sopenharmony_ci{
12438c2ecf20Sopenharmony_ci	const u64 feature_and_reserved =
12448c2ecf20Sopenharmony_ci		/* feature (except bit 48; see below) */
12458c2ecf20Sopenharmony_ci		BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
12468c2ecf20Sopenharmony_ci		/* reserved */
12478c2ecf20Sopenharmony_ci		BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
12488c2ecf20Sopenharmony_ci	u64 vmx_basic = vmcs_config.nested.basic;
12498c2ecf20Sopenharmony_ci
12508c2ecf20Sopenharmony_ci	if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
12518c2ecf20Sopenharmony_ci		return -EINVAL;
12528c2ecf20Sopenharmony_ci
12538c2ecf20Sopenharmony_ci	/*
12548c2ecf20Sopenharmony_ci	 * KVM does not emulate a version of VMX that constrains physical
12558c2ecf20Sopenharmony_ci	 * addresses of VMX structures (e.g. VMCS) to 32-bits.
12568c2ecf20Sopenharmony_ci	 */
12578c2ecf20Sopenharmony_ci	if (data & BIT_ULL(48))
12588c2ecf20Sopenharmony_ci		return -EINVAL;
12598c2ecf20Sopenharmony_ci
12608c2ecf20Sopenharmony_ci	if (vmx_basic_vmcs_revision_id(vmx_basic) !=
12618c2ecf20Sopenharmony_ci	    vmx_basic_vmcs_revision_id(data))
12628c2ecf20Sopenharmony_ci		return -EINVAL;
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci	if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
12658c2ecf20Sopenharmony_ci		return -EINVAL;
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_ci	vmx->nested.msrs.basic = data;
12688c2ecf20Sopenharmony_ci	return 0;
12698c2ecf20Sopenharmony_ci}
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_cistatic void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
12728c2ecf20Sopenharmony_ci				u32 **low, u32 **high)
12738c2ecf20Sopenharmony_ci{
12748c2ecf20Sopenharmony_ci	switch (msr_index) {
12758c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
12768c2ecf20Sopenharmony_ci		*low = &msrs->pinbased_ctls_low;
12778c2ecf20Sopenharmony_ci		*high = &msrs->pinbased_ctls_high;
12788c2ecf20Sopenharmony_ci		break;
12798c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
12808c2ecf20Sopenharmony_ci		*low = &msrs->procbased_ctls_low;
12818c2ecf20Sopenharmony_ci		*high = &msrs->procbased_ctls_high;
12828c2ecf20Sopenharmony_ci		break;
12838c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
12848c2ecf20Sopenharmony_ci		*low = &msrs->exit_ctls_low;
12858c2ecf20Sopenharmony_ci		*high = &msrs->exit_ctls_high;
12868c2ecf20Sopenharmony_ci		break;
12878c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
12888c2ecf20Sopenharmony_ci		*low = &msrs->entry_ctls_low;
12898c2ecf20Sopenharmony_ci		*high = &msrs->entry_ctls_high;
12908c2ecf20Sopenharmony_ci		break;
12918c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS2:
12928c2ecf20Sopenharmony_ci		*low = &msrs->secondary_ctls_low;
12938c2ecf20Sopenharmony_ci		*high = &msrs->secondary_ctls_high;
12948c2ecf20Sopenharmony_ci		break;
12958c2ecf20Sopenharmony_ci	default:
12968c2ecf20Sopenharmony_ci		BUG();
12978c2ecf20Sopenharmony_ci	}
12988c2ecf20Sopenharmony_ci}
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_cistatic int
13018c2ecf20Sopenharmony_civmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
13028c2ecf20Sopenharmony_ci{
13038c2ecf20Sopenharmony_ci	u32 *lowp, *highp;
13048c2ecf20Sopenharmony_ci	u64 supported;
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci	vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci	supported = vmx_control_msr(*lowp, *highp);
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_ci	/* Check must-be-1 bits are still 1. */
13118c2ecf20Sopenharmony_ci	if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
13128c2ecf20Sopenharmony_ci		return -EINVAL;
13138c2ecf20Sopenharmony_ci
13148c2ecf20Sopenharmony_ci	/* Check must-be-0 bits are still 0. */
13158c2ecf20Sopenharmony_ci	if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
13168c2ecf20Sopenharmony_ci		return -EINVAL;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
13198c2ecf20Sopenharmony_ci	*lowp = data;
13208c2ecf20Sopenharmony_ci	*highp = data >> 32;
13218c2ecf20Sopenharmony_ci	return 0;
13228c2ecf20Sopenharmony_ci}
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_cistatic int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
13258c2ecf20Sopenharmony_ci{
13268c2ecf20Sopenharmony_ci	const u64 feature_and_reserved_bits =
13278c2ecf20Sopenharmony_ci		/* feature */
13288c2ecf20Sopenharmony_ci		BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
13298c2ecf20Sopenharmony_ci		BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
13308c2ecf20Sopenharmony_ci		/* reserved */
13318c2ecf20Sopenharmony_ci		GENMASK_ULL(13, 9) | BIT_ULL(31);
13328c2ecf20Sopenharmony_ci	u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
13338c2ecf20Sopenharmony_ci				       vmcs_config.nested.misc_high);
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci	if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
13368c2ecf20Sopenharmony_ci		return -EINVAL;
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci	if ((vmx->nested.msrs.pinbased_ctls_high &
13398c2ecf20Sopenharmony_ci	     PIN_BASED_VMX_PREEMPTION_TIMER) &&
13408c2ecf20Sopenharmony_ci	    vmx_misc_preemption_timer_rate(data) !=
13418c2ecf20Sopenharmony_ci	    vmx_misc_preemption_timer_rate(vmx_misc))
13428c2ecf20Sopenharmony_ci		return -EINVAL;
13438c2ecf20Sopenharmony_ci
13448c2ecf20Sopenharmony_ci	if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
13458c2ecf20Sopenharmony_ci		return -EINVAL;
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci	if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
13488c2ecf20Sopenharmony_ci		return -EINVAL;
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
13518c2ecf20Sopenharmony_ci		return -EINVAL;
13528c2ecf20Sopenharmony_ci
13538c2ecf20Sopenharmony_ci	vmx->nested.msrs.misc_low = data;
13548c2ecf20Sopenharmony_ci	vmx->nested.msrs.misc_high = data >> 32;
13558c2ecf20Sopenharmony_ci
13568c2ecf20Sopenharmony_ci	return 0;
13578c2ecf20Sopenharmony_ci}
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_cistatic int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
13608c2ecf20Sopenharmony_ci{
13618c2ecf20Sopenharmony_ci	u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
13628c2ecf20Sopenharmony_ci					       vmcs_config.nested.vpid_caps);
13638c2ecf20Sopenharmony_ci
13648c2ecf20Sopenharmony_ci	/* Every bit is either reserved or a feature bit. */
13658c2ecf20Sopenharmony_ci	if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
13668c2ecf20Sopenharmony_ci		return -EINVAL;
13678c2ecf20Sopenharmony_ci
13688c2ecf20Sopenharmony_ci	vmx->nested.msrs.ept_caps = data;
13698c2ecf20Sopenharmony_ci	vmx->nested.msrs.vpid_caps = data >> 32;
13708c2ecf20Sopenharmony_ci	return 0;
13718c2ecf20Sopenharmony_ci}
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_cistatic u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
13748c2ecf20Sopenharmony_ci{
13758c2ecf20Sopenharmony_ci	switch (msr_index) {
13768c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED0:
13778c2ecf20Sopenharmony_ci		return &msrs->cr0_fixed0;
13788c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED0:
13798c2ecf20Sopenharmony_ci		return &msrs->cr4_fixed0;
13808c2ecf20Sopenharmony_ci	default:
13818c2ecf20Sopenharmony_ci		BUG();
13828c2ecf20Sopenharmony_ci	}
13838c2ecf20Sopenharmony_ci}
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_cistatic int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
13868c2ecf20Sopenharmony_ci{
13878c2ecf20Sopenharmony_ci	const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ci	/*
13908c2ecf20Sopenharmony_ci	 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
13918c2ecf20Sopenharmony_ci	 * must be 1 in the restored value.
13928c2ecf20Sopenharmony_ci	 */
13938c2ecf20Sopenharmony_ci	if (!is_bitwise_subset(data, *msr, -1ULL))
13948c2ecf20Sopenharmony_ci		return -EINVAL;
13958c2ecf20Sopenharmony_ci
13968c2ecf20Sopenharmony_ci	*vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
13978c2ecf20Sopenharmony_ci	return 0;
13988c2ecf20Sopenharmony_ci}
13998c2ecf20Sopenharmony_ci
14008c2ecf20Sopenharmony_ci/*
14018c2ecf20Sopenharmony_ci * Called when userspace is restoring VMX MSRs.
14028c2ecf20Sopenharmony_ci *
14038c2ecf20Sopenharmony_ci * Returns 0 on success, non-0 otherwise.
14048c2ecf20Sopenharmony_ci */
14058c2ecf20Sopenharmony_ciint vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
14068c2ecf20Sopenharmony_ci{
14078c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ci	/*
14108c2ecf20Sopenharmony_ci	 * Don't allow changes to the VMX capability MSRs while the vCPU
14118c2ecf20Sopenharmony_ci	 * is in VMX operation.
14128c2ecf20Sopenharmony_ci	 */
14138c2ecf20Sopenharmony_ci	if (vmx->nested.vmxon)
14148c2ecf20Sopenharmony_ci		return -EBUSY;
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_ci	switch (msr_index) {
14178c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_BASIC:
14188c2ecf20Sopenharmony_ci		return vmx_restore_vmx_basic(vmx, data);
14198c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PINBASED_CTLS:
14208c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS:
14218c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_EXIT_CTLS:
14228c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_ENTRY_CTLS:
14238c2ecf20Sopenharmony_ci		/*
14248c2ecf20Sopenharmony_ci		 * The "non-true" VMX capability MSRs are generated from the
14258c2ecf20Sopenharmony_ci		 * "true" MSRs, so we do not support restoring them directly.
14268c2ecf20Sopenharmony_ci		 *
14278c2ecf20Sopenharmony_ci		 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
14288c2ecf20Sopenharmony_ci		 * should restore the "true" MSRs with the must-be-1 bits
14298c2ecf20Sopenharmony_ci		 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
14308c2ecf20Sopenharmony_ci		 * DEFAULT SETTINGS".
14318c2ecf20Sopenharmony_ci		 */
14328c2ecf20Sopenharmony_ci		return -EINVAL;
14338c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
14348c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
14358c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
14368c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
14378c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS2:
14388c2ecf20Sopenharmony_ci		return vmx_restore_control_msr(vmx, msr_index, data);
14398c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_MISC:
14408c2ecf20Sopenharmony_ci		return vmx_restore_vmx_misc(vmx, data);
14418c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED0:
14428c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED0:
14438c2ecf20Sopenharmony_ci		return vmx_restore_fixed0_msr(vmx, msr_index, data);
14448c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED1:
14458c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED1:
14468c2ecf20Sopenharmony_ci		/*
14478c2ecf20Sopenharmony_ci		 * These MSRs are generated based on the vCPU's CPUID, so we
14488c2ecf20Sopenharmony_ci		 * do not support restoring them directly.
14498c2ecf20Sopenharmony_ci		 */
14508c2ecf20Sopenharmony_ci		return -EINVAL;
14518c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_EPT_VPID_CAP:
14528c2ecf20Sopenharmony_ci		return vmx_restore_vmx_ept_vpid_cap(vmx, data);
14538c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_VMCS_ENUM:
14548c2ecf20Sopenharmony_ci		vmx->nested.msrs.vmcs_enum = data;
14558c2ecf20Sopenharmony_ci		return 0;
14568c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_VMFUNC:
14578c2ecf20Sopenharmony_ci		if (data & ~vmcs_config.nested.vmfunc_controls)
14588c2ecf20Sopenharmony_ci			return -EINVAL;
14598c2ecf20Sopenharmony_ci		vmx->nested.msrs.vmfunc_controls = data;
14608c2ecf20Sopenharmony_ci		return 0;
14618c2ecf20Sopenharmony_ci	default:
14628c2ecf20Sopenharmony_ci		/*
14638c2ecf20Sopenharmony_ci		 * The rest of the VMX capability MSRs do not support restore.
14648c2ecf20Sopenharmony_ci		 */
14658c2ecf20Sopenharmony_ci		return -EINVAL;
14668c2ecf20Sopenharmony_ci	}
14678c2ecf20Sopenharmony_ci}
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_ci/* Returns 0 on success, non-0 otherwise. */
14708c2ecf20Sopenharmony_ciint vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
14718c2ecf20Sopenharmony_ci{
14728c2ecf20Sopenharmony_ci	switch (msr_index) {
14738c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_BASIC:
14748c2ecf20Sopenharmony_ci		*pdata = msrs->basic;
14758c2ecf20Sopenharmony_ci		break;
14768c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
14778c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PINBASED_CTLS:
14788c2ecf20Sopenharmony_ci		*pdata = vmx_control_msr(
14798c2ecf20Sopenharmony_ci			msrs->pinbased_ctls_low,
14808c2ecf20Sopenharmony_ci			msrs->pinbased_ctls_high);
14818c2ecf20Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
14828c2ecf20Sopenharmony_ci			*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
14838c2ecf20Sopenharmony_ci		break;
14848c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
14858c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS:
14868c2ecf20Sopenharmony_ci		*pdata = vmx_control_msr(
14878c2ecf20Sopenharmony_ci			msrs->procbased_ctls_low,
14888c2ecf20Sopenharmony_ci			msrs->procbased_ctls_high);
14898c2ecf20Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
14908c2ecf20Sopenharmony_ci			*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
14918c2ecf20Sopenharmony_ci		break;
14928c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
14938c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_EXIT_CTLS:
14948c2ecf20Sopenharmony_ci		*pdata = vmx_control_msr(
14958c2ecf20Sopenharmony_ci			msrs->exit_ctls_low,
14968c2ecf20Sopenharmony_ci			msrs->exit_ctls_high);
14978c2ecf20Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
14988c2ecf20Sopenharmony_ci			*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
14998c2ecf20Sopenharmony_ci		break;
15008c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
15018c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_ENTRY_CTLS:
15028c2ecf20Sopenharmony_ci		*pdata = vmx_control_msr(
15038c2ecf20Sopenharmony_ci			msrs->entry_ctls_low,
15048c2ecf20Sopenharmony_ci			msrs->entry_ctls_high);
15058c2ecf20Sopenharmony_ci		if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
15068c2ecf20Sopenharmony_ci			*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
15078c2ecf20Sopenharmony_ci		break;
15088c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_MISC:
15098c2ecf20Sopenharmony_ci		*pdata = vmx_control_msr(
15108c2ecf20Sopenharmony_ci			msrs->misc_low,
15118c2ecf20Sopenharmony_ci			msrs->misc_high);
15128c2ecf20Sopenharmony_ci		break;
15138c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED0:
15148c2ecf20Sopenharmony_ci		*pdata = msrs->cr0_fixed0;
15158c2ecf20Sopenharmony_ci		break;
15168c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR0_FIXED1:
15178c2ecf20Sopenharmony_ci		*pdata = msrs->cr0_fixed1;
15188c2ecf20Sopenharmony_ci		break;
15198c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED0:
15208c2ecf20Sopenharmony_ci		*pdata = msrs->cr4_fixed0;
15218c2ecf20Sopenharmony_ci		break;
15228c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_CR4_FIXED1:
15238c2ecf20Sopenharmony_ci		*pdata = msrs->cr4_fixed1;
15248c2ecf20Sopenharmony_ci		break;
15258c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_VMCS_ENUM:
15268c2ecf20Sopenharmony_ci		*pdata = msrs->vmcs_enum;
15278c2ecf20Sopenharmony_ci		break;
15288c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_PROCBASED_CTLS2:
15298c2ecf20Sopenharmony_ci		*pdata = vmx_control_msr(
15308c2ecf20Sopenharmony_ci			msrs->secondary_ctls_low,
15318c2ecf20Sopenharmony_ci			msrs->secondary_ctls_high);
15328c2ecf20Sopenharmony_ci		break;
15338c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_EPT_VPID_CAP:
15348c2ecf20Sopenharmony_ci		*pdata = msrs->ept_caps |
15358c2ecf20Sopenharmony_ci			((u64)msrs->vpid_caps << 32);
15368c2ecf20Sopenharmony_ci		break;
15378c2ecf20Sopenharmony_ci	case MSR_IA32_VMX_VMFUNC:
15388c2ecf20Sopenharmony_ci		*pdata = msrs->vmfunc_controls;
15398c2ecf20Sopenharmony_ci		break;
15408c2ecf20Sopenharmony_ci	default:
15418c2ecf20Sopenharmony_ci		return 1;
15428c2ecf20Sopenharmony_ci	}
15438c2ecf20Sopenharmony_ci
15448c2ecf20Sopenharmony_ci	return 0;
15458c2ecf20Sopenharmony_ci}
15468c2ecf20Sopenharmony_ci
15478c2ecf20Sopenharmony_ci/*
15488c2ecf20Sopenharmony_ci * Copy the writable VMCS shadow fields back to the VMCS12, in case they have
15498c2ecf20Sopenharmony_ci * been modified by the L1 guest.  Note, "writable" in this context means
15508c2ecf20Sopenharmony_ci * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
15518c2ecf20Sopenharmony_ci * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
15528c2ecf20Sopenharmony_ci * VM-exit information fields (which are actually writable if the vCPU is
15538c2ecf20Sopenharmony_ci * configured to support "VMWRITE to any supported field in the VMCS").
15548c2ecf20Sopenharmony_ci */
15558c2ecf20Sopenharmony_cistatic void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
15568c2ecf20Sopenharmony_ci{
15578c2ecf20Sopenharmony_ci	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
15588c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
15598c2ecf20Sopenharmony_ci	struct shadow_vmcs_field field;
15608c2ecf20Sopenharmony_ci	unsigned long val;
15618c2ecf20Sopenharmony_ci	int i;
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	if (WARN_ON(!shadow_vmcs))
15648c2ecf20Sopenharmony_ci		return;
15658c2ecf20Sopenharmony_ci
15668c2ecf20Sopenharmony_ci	preempt_disable();
15678c2ecf20Sopenharmony_ci
15688c2ecf20Sopenharmony_ci	vmcs_load(shadow_vmcs);
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci	for (i = 0; i < max_shadow_read_write_fields; i++) {
15718c2ecf20Sopenharmony_ci		field = shadow_read_write_fields[i];
15728c2ecf20Sopenharmony_ci		val = __vmcs_readl(field.encoding);
15738c2ecf20Sopenharmony_ci		vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
15748c2ecf20Sopenharmony_ci	}
15758c2ecf20Sopenharmony_ci
15768c2ecf20Sopenharmony_ci	vmcs_clear(shadow_vmcs);
15778c2ecf20Sopenharmony_ci	vmcs_load(vmx->loaded_vmcs->vmcs);
15788c2ecf20Sopenharmony_ci
15798c2ecf20Sopenharmony_ci	preempt_enable();
15808c2ecf20Sopenharmony_ci}
15818c2ecf20Sopenharmony_ci
15828c2ecf20Sopenharmony_cistatic void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
15838c2ecf20Sopenharmony_ci{
15848c2ecf20Sopenharmony_ci	const struct shadow_vmcs_field *fields[] = {
15858c2ecf20Sopenharmony_ci		shadow_read_write_fields,
15868c2ecf20Sopenharmony_ci		shadow_read_only_fields
15878c2ecf20Sopenharmony_ci	};
15888c2ecf20Sopenharmony_ci	const int max_fields[] = {
15898c2ecf20Sopenharmony_ci		max_shadow_read_write_fields,
15908c2ecf20Sopenharmony_ci		max_shadow_read_only_fields
15918c2ecf20Sopenharmony_ci	};
15928c2ecf20Sopenharmony_ci	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
15938c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
15948c2ecf20Sopenharmony_ci	struct shadow_vmcs_field field;
15958c2ecf20Sopenharmony_ci	unsigned long val;
15968c2ecf20Sopenharmony_ci	int i, q;
15978c2ecf20Sopenharmony_ci
15988c2ecf20Sopenharmony_ci	if (WARN_ON(!shadow_vmcs))
15998c2ecf20Sopenharmony_ci		return;
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci	vmcs_load(shadow_vmcs);
16028c2ecf20Sopenharmony_ci
16038c2ecf20Sopenharmony_ci	for (q = 0; q < ARRAY_SIZE(fields); q++) {
16048c2ecf20Sopenharmony_ci		for (i = 0; i < max_fields[q]; i++) {
16058c2ecf20Sopenharmony_ci			field = fields[q][i];
16068c2ecf20Sopenharmony_ci			val = vmcs12_read_any(vmcs12, field.encoding,
16078c2ecf20Sopenharmony_ci					      field.offset);
16088c2ecf20Sopenharmony_ci			__vmcs_writel(field.encoding, val);
16098c2ecf20Sopenharmony_ci		}
16108c2ecf20Sopenharmony_ci	}
16118c2ecf20Sopenharmony_ci
16128c2ecf20Sopenharmony_ci	vmcs_clear(shadow_vmcs);
16138c2ecf20Sopenharmony_ci	vmcs_load(vmx->loaded_vmcs->vmcs);
16148c2ecf20Sopenharmony_ci}
16158c2ecf20Sopenharmony_ci
16168c2ecf20Sopenharmony_cistatic int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
16178c2ecf20Sopenharmony_ci{
16188c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
16198c2ecf20Sopenharmony_ci	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
16208c2ecf20Sopenharmony_ci
16218c2ecf20Sopenharmony_ci	/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
16228c2ecf20Sopenharmony_ci	vmcs12->tpr_threshold = evmcs->tpr_threshold;
16238c2ecf20Sopenharmony_ci	vmcs12->guest_rip = evmcs->guest_rip;
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16268c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
16278c2ecf20Sopenharmony_ci		vmcs12->guest_rsp = evmcs->guest_rsp;
16288c2ecf20Sopenharmony_ci		vmcs12->guest_rflags = evmcs->guest_rflags;
16298c2ecf20Sopenharmony_ci		vmcs12->guest_interruptibility_info =
16308c2ecf20Sopenharmony_ci			evmcs->guest_interruptibility_info;
16318c2ecf20Sopenharmony_ci	}
16328c2ecf20Sopenharmony_ci
16338c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16348c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
16358c2ecf20Sopenharmony_ci		vmcs12->cpu_based_vm_exec_control =
16368c2ecf20Sopenharmony_ci			evmcs->cpu_based_vm_exec_control;
16378c2ecf20Sopenharmony_ci	}
16388c2ecf20Sopenharmony_ci
16398c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16408c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
16418c2ecf20Sopenharmony_ci		vmcs12->exception_bitmap = evmcs->exception_bitmap;
16428c2ecf20Sopenharmony_ci	}
16438c2ecf20Sopenharmony_ci
16448c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16458c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
16468c2ecf20Sopenharmony_ci		vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
16478c2ecf20Sopenharmony_ci	}
16488c2ecf20Sopenharmony_ci
16498c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16508c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
16518c2ecf20Sopenharmony_ci		vmcs12->vm_entry_intr_info_field =
16528c2ecf20Sopenharmony_ci			evmcs->vm_entry_intr_info_field;
16538c2ecf20Sopenharmony_ci		vmcs12->vm_entry_exception_error_code =
16548c2ecf20Sopenharmony_ci			evmcs->vm_entry_exception_error_code;
16558c2ecf20Sopenharmony_ci		vmcs12->vm_entry_instruction_len =
16568c2ecf20Sopenharmony_ci			evmcs->vm_entry_instruction_len;
16578c2ecf20Sopenharmony_ci	}
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16608c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
16618c2ecf20Sopenharmony_ci		vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
16628c2ecf20Sopenharmony_ci		vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
16638c2ecf20Sopenharmony_ci		vmcs12->host_cr0 = evmcs->host_cr0;
16648c2ecf20Sopenharmony_ci		vmcs12->host_cr3 = evmcs->host_cr3;
16658c2ecf20Sopenharmony_ci		vmcs12->host_cr4 = evmcs->host_cr4;
16668c2ecf20Sopenharmony_ci		vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
16678c2ecf20Sopenharmony_ci		vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
16688c2ecf20Sopenharmony_ci		vmcs12->host_rip = evmcs->host_rip;
16698c2ecf20Sopenharmony_ci		vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
16708c2ecf20Sopenharmony_ci		vmcs12->host_es_selector = evmcs->host_es_selector;
16718c2ecf20Sopenharmony_ci		vmcs12->host_cs_selector = evmcs->host_cs_selector;
16728c2ecf20Sopenharmony_ci		vmcs12->host_ss_selector = evmcs->host_ss_selector;
16738c2ecf20Sopenharmony_ci		vmcs12->host_ds_selector = evmcs->host_ds_selector;
16748c2ecf20Sopenharmony_ci		vmcs12->host_fs_selector = evmcs->host_fs_selector;
16758c2ecf20Sopenharmony_ci		vmcs12->host_gs_selector = evmcs->host_gs_selector;
16768c2ecf20Sopenharmony_ci		vmcs12->host_tr_selector = evmcs->host_tr_selector;
16778c2ecf20Sopenharmony_ci	}
16788c2ecf20Sopenharmony_ci
16798c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16808c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
16818c2ecf20Sopenharmony_ci		vmcs12->pin_based_vm_exec_control =
16828c2ecf20Sopenharmony_ci			evmcs->pin_based_vm_exec_control;
16838c2ecf20Sopenharmony_ci		vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
16848c2ecf20Sopenharmony_ci		vmcs12->secondary_vm_exec_control =
16858c2ecf20Sopenharmony_ci			evmcs->secondary_vm_exec_control;
16868c2ecf20Sopenharmony_ci	}
16878c2ecf20Sopenharmony_ci
16888c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16898c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
16908c2ecf20Sopenharmony_ci		vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
16918c2ecf20Sopenharmony_ci		vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
16928c2ecf20Sopenharmony_ci	}
16938c2ecf20Sopenharmony_ci
16948c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
16958c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
16968c2ecf20Sopenharmony_ci		vmcs12->msr_bitmap = evmcs->msr_bitmap;
16978c2ecf20Sopenharmony_ci	}
16988c2ecf20Sopenharmony_ci
16998c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
17008c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
17018c2ecf20Sopenharmony_ci		vmcs12->guest_es_base = evmcs->guest_es_base;
17028c2ecf20Sopenharmony_ci		vmcs12->guest_cs_base = evmcs->guest_cs_base;
17038c2ecf20Sopenharmony_ci		vmcs12->guest_ss_base = evmcs->guest_ss_base;
17048c2ecf20Sopenharmony_ci		vmcs12->guest_ds_base = evmcs->guest_ds_base;
17058c2ecf20Sopenharmony_ci		vmcs12->guest_fs_base = evmcs->guest_fs_base;
17068c2ecf20Sopenharmony_ci		vmcs12->guest_gs_base = evmcs->guest_gs_base;
17078c2ecf20Sopenharmony_ci		vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
17088c2ecf20Sopenharmony_ci		vmcs12->guest_tr_base = evmcs->guest_tr_base;
17098c2ecf20Sopenharmony_ci		vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
17108c2ecf20Sopenharmony_ci		vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
17118c2ecf20Sopenharmony_ci		vmcs12->guest_es_limit = evmcs->guest_es_limit;
17128c2ecf20Sopenharmony_ci		vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
17138c2ecf20Sopenharmony_ci		vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
17148c2ecf20Sopenharmony_ci		vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
17158c2ecf20Sopenharmony_ci		vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
17168c2ecf20Sopenharmony_ci		vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
17178c2ecf20Sopenharmony_ci		vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
17188c2ecf20Sopenharmony_ci		vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
17198c2ecf20Sopenharmony_ci		vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
17208c2ecf20Sopenharmony_ci		vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
17218c2ecf20Sopenharmony_ci		vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
17228c2ecf20Sopenharmony_ci		vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
17238c2ecf20Sopenharmony_ci		vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
17248c2ecf20Sopenharmony_ci		vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
17258c2ecf20Sopenharmony_ci		vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
17268c2ecf20Sopenharmony_ci		vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
17278c2ecf20Sopenharmony_ci		vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
17288c2ecf20Sopenharmony_ci		vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
17298c2ecf20Sopenharmony_ci		vmcs12->guest_es_selector = evmcs->guest_es_selector;
17308c2ecf20Sopenharmony_ci		vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
17318c2ecf20Sopenharmony_ci		vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
17328c2ecf20Sopenharmony_ci		vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
17338c2ecf20Sopenharmony_ci		vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
17348c2ecf20Sopenharmony_ci		vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
17358c2ecf20Sopenharmony_ci		vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
17368c2ecf20Sopenharmony_ci		vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
17378c2ecf20Sopenharmony_ci	}
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
17408c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
17418c2ecf20Sopenharmony_ci		vmcs12->tsc_offset = evmcs->tsc_offset;
17428c2ecf20Sopenharmony_ci		vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
17438c2ecf20Sopenharmony_ci		vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
17448c2ecf20Sopenharmony_ci	}
17458c2ecf20Sopenharmony_ci
17468c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
17478c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
17488c2ecf20Sopenharmony_ci		vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
17498c2ecf20Sopenharmony_ci		vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
17508c2ecf20Sopenharmony_ci		vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
17518c2ecf20Sopenharmony_ci		vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
17528c2ecf20Sopenharmony_ci		vmcs12->guest_cr0 = evmcs->guest_cr0;
17538c2ecf20Sopenharmony_ci		vmcs12->guest_cr3 = evmcs->guest_cr3;
17548c2ecf20Sopenharmony_ci		vmcs12->guest_cr4 = evmcs->guest_cr4;
17558c2ecf20Sopenharmony_ci		vmcs12->guest_dr7 = evmcs->guest_dr7;
17568c2ecf20Sopenharmony_ci	}
17578c2ecf20Sopenharmony_ci
17588c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
17598c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
17608c2ecf20Sopenharmony_ci		vmcs12->host_fs_base = evmcs->host_fs_base;
17618c2ecf20Sopenharmony_ci		vmcs12->host_gs_base = evmcs->host_gs_base;
17628c2ecf20Sopenharmony_ci		vmcs12->host_tr_base = evmcs->host_tr_base;
17638c2ecf20Sopenharmony_ci		vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
17648c2ecf20Sopenharmony_ci		vmcs12->host_idtr_base = evmcs->host_idtr_base;
17658c2ecf20Sopenharmony_ci		vmcs12->host_rsp = evmcs->host_rsp;
17668c2ecf20Sopenharmony_ci	}
17678c2ecf20Sopenharmony_ci
17688c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
17698c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
17708c2ecf20Sopenharmony_ci		vmcs12->ept_pointer = evmcs->ept_pointer;
17718c2ecf20Sopenharmony_ci		vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
17728c2ecf20Sopenharmony_ci	}
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_ci	if (unlikely(!(evmcs->hv_clean_fields &
17758c2ecf20Sopenharmony_ci		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
17768c2ecf20Sopenharmony_ci		vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
17778c2ecf20Sopenharmony_ci		vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
17788c2ecf20Sopenharmony_ci		vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
17798c2ecf20Sopenharmony_ci		vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
17808c2ecf20Sopenharmony_ci		vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
17818c2ecf20Sopenharmony_ci		vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
17828c2ecf20Sopenharmony_ci		vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
17838c2ecf20Sopenharmony_ci		vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
17848c2ecf20Sopenharmony_ci		vmcs12->guest_pending_dbg_exceptions =
17858c2ecf20Sopenharmony_ci			evmcs->guest_pending_dbg_exceptions;
17868c2ecf20Sopenharmony_ci		vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
17878c2ecf20Sopenharmony_ci		vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
17888c2ecf20Sopenharmony_ci		vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
17898c2ecf20Sopenharmony_ci		vmcs12->guest_activity_state = evmcs->guest_activity_state;
17908c2ecf20Sopenharmony_ci		vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
17918c2ecf20Sopenharmony_ci	}
17928c2ecf20Sopenharmony_ci
17938c2ecf20Sopenharmony_ci	/*
17948c2ecf20Sopenharmony_ci	 * Not used?
17958c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
17968c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
17978c2ecf20Sopenharmony_ci	 * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
17988c2ecf20Sopenharmony_ci	 * vmcs12->page_fault_error_code_mask =
17998c2ecf20Sopenharmony_ci	 *		evmcs->page_fault_error_code_mask;
18008c2ecf20Sopenharmony_ci	 * vmcs12->page_fault_error_code_match =
18018c2ecf20Sopenharmony_ci	 *		evmcs->page_fault_error_code_match;
18028c2ecf20Sopenharmony_ci	 * vmcs12->cr3_target_count = evmcs->cr3_target_count;
18038c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
18048c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
18058c2ecf20Sopenharmony_ci	 * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
18068c2ecf20Sopenharmony_ci	 */
18078c2ecf20Sopenharmony_ci
18088c2ecf20Sopenharmony_ci	/*
18098c2ecf20Sopenharmony_ci	 * Read only fields:
18108c2ecf20Sopenharmony_ci	 * vmcs12->guest_physical_address = evmcs->guest_physical_address;
18118c2ecf20Sopenharmony_ci	 * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
18128c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
18138c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
18148c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
18158c2ecf20Sopenharmony_ci	 * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
18168c2ecf20Sopenharmony_ci	 * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
18178c2ecf20Sopenharmony_ci	 * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
18188c2ecf20Sopenharmony_ci	 * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
18198c2ecf20Sopenharmony_ci	 * vmcs12->exit_qualification = evmcs->exit_qualification;
18208c2ecf20Sopenharmony_ci	 * vmcs12->guest_linear_address = evmcs->guest_linear_address;
18218c2ecf20Sopenharmony_ci	 *
18228c2ecf20Sopenharmony_ci	 * Not present in struct vmcs12:
18238c2ecf20Sopenharmony_ci	 * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
18248c2ecf20Sopenharmony_ci	 * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
18258c2ecf20Sopenharmony_ci	 * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
18268c2ecf20Sopenharmony_ci	 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
18278c2ecf20Sopenharmony_ci	 */
18288c2ecf20Sopenharmony_ci
18298c2ecf20Sopenharmony_ci	return 0;
18308c2ecf20Sopenharmony_ci}
18318c2ecf20Sopenharmony_ci
18328c2ecf20Sopenharmony_cistatic int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
18338c2ecf20Sopenharmony_ci{
18348c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
18358c2ecf20Sopenharmony_ci	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
18368c2ecf20Sopenharmony_ci
18378c2ecf20Sopenharmony_ci	/*
18388c2ecf20Sopenharmony_ci	 * Should not be changed by KVM:
18398c2ecf20Sopenharmony_ci	 *
18408c2ecf20Sopenharmony_ci	 * evmcs->host_es_selector = vmcs12->host_es_selector;
18418c2ecf20Sopenharmony_ci	 * evmcs->host_cs_selector = vmcs12->host_cs_selector;
18428c2ecf20Sopenharmony_ci	 * evmcs->host_ss_selector = vmcs12->host_ss_selector;
18438c2ecf20Sopenharmony_ci	 * evmcs->host_ds_selector = vmcs12->host_ds_selector;
18448c2ecf20Sopenharmony_ci	 * evmcs->host_fs_selector = vmcs12->host_fs_selector;
18458c2ecf20Sopenharmony_ci	 * evmcs->host_gs_selector = vmcs12->host_gs_selector;
18468c2ecf20Sopenharmony_ci	 * evmcs->host_tr_selector = vmcs12->host_tr_selector;
18478c2ecf20Sopenharmony_ci	 * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
18488c2ecf20Sopenharmony_ci	 * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
18498c2ecf20Sopenharmony_ci	 * evmcs->host_cr0 = vmcs12->host_cr0;
18508c2ecf20Sopenharmony_ci	 * evmcs->host_cr3 = vmcs12->host_cr3;
18518c2ecf20Sopenharmony_ci	 * evmcs->host_cr4 = vmcs12->host_cr4;
18528c2ecf20Sopenharmony_ci	 * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
18538c2ecf20Sopenharmony_ci	 * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
18548c2ecf20Sopenharmony_ci	 * evmcs->host_rip = vmcs12->host_rip;
18558c2ecf20Sopenharmony_ci	 * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
18568c2ecf20Sopenharmony_ci	 * evmcs->host_fs_base = vmcs12->host_fs_base;
18578c2ecf20Sopenharmony_ci	 * evmcs->host_gs_base = vmcs12->host_gs_base;
18588c2ecf20Sopenharmony_ci	 * evmcs->host_tr_base = vmcs12->host_tr_base;
18598c2ecf20Sopenharmony_ci	 * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
18608c2ecf20Sopenharmony_ci	 * evmcs->host_idtr_base = vmcs12->host_idtr_base;
18618c2ecf20Sopenharmony_ci	 * evmcs->host_rsp = vmcs12->host_rsp;
18628c2ecf20Sopenharmony_ci	 * sync_vmcs02_to_vmcs12() doesn't read these:
18638c2ecf20Sopenharmony_ci	 * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
18648c2ecf20Sopenharmony_ci	 * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
18658c2ecf20Sopenharmony_ci	 * evmcs->msr_bitmap = vmcs12->msr_bitmap;
18668c2ecf20Sopenharmony_ci	 * evmcs->ept_pointer = vmcs12->ept_pointer;
18678c2ecf20Sopenharmony_ci	 * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
18688c2ecf20Sopenharmony_ci	 * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
18698c2ecf20Sopenharmony_ci	 * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
18708c2ecf20Sopenharmony_ci	 * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
18718c2ecf20Sopenharmony_ci	 * evmcs->tpr_threshold = vmcs12->tpr_threshold;
18728c2ecf20Sopenharmony_ci	 * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
18738c2ecf20Sopenharmony_ci	 * evmcs->exception_bitmap = vmcs12->exception_bitmap;
18748c2ecf20Sopenharmony_ci	 * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
18758c2ecf20Sopenharmony_ci	 * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
18768c2ecf20Sopenharmony_ci	 * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
18778c2ecf20Sopenharmony_ci	 * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
18788c2ecf20Sopenharmony_ci	 * evmcs->page_fault_error_code_mask =
18798c2ecf20Sopenharmony_ci	 *		vmcs12->page_fault_error_code_mask;
18808c2ecf20Sopenharmony_ci	 * evmcs->page_fault_error_code_match =
18818c2ecf20Sopenharmony_ci	 *		vmcs12->page_fault_error_code_match;
18828c2ecf20Sopenharmony_ci	 * evmcs->cr3_target_count = vmcs12->cr3_target_count;
18838c2ecf20Sopenharmony_ci	 * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
18848c2ecf20Sopenharmony_ci	 * evmcs->tsc_offset = vmcs12->tsc_offset;
18858c2ecf20Sopenharmony_ci	 * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
18868c2ecf20Sopenharmony_ci	 * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
18878c2ecf20Sopenharmony_ci	 * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
18888c2ecf20Sopenharmony_ci	 * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
18898c2ecf20Sopenharmony_ci	 * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
18908c2ecf20Sopenharmony_ci	 * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
18918c2ecf20Sopenharmony_ci	 * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
18928c2ecf20Sopenharmony_ci	 * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
18938c2ecf20Sopenharmony_ci	 *
18948c2ecf20Sopenharmony_ci	 * Not present in struct vmcs12:
18958c2ecf20Sopenharmony_ci	 * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
18968c2ecf20Sopenharmony_ci	 * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
18978c2ecf20Sopenharmony_ci	 * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
18988c2ecf20Sopenharmony_ci	 * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
18998c2ecf20Sopenharmony_ci	 */
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci	evmcs->guest_es_selector = vmcs12->guest_es_selector;
19028c2ecf20Sopenharmony_ci	evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
19038c2ecf20Sopenharmony_ci	evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
19048c2ecf20Sopenharmony_ci	evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
19058c2ecf20Sopenharmony_ci	evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
19068c2ecf20Sopenharmony_ci	evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
19078c2ecf20Sopenharmony_ci	evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
19088c2ecf20Sopenharmony_ci	evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
19098c2ecf20Sopenharmony_ci
19108c2ecf20Sopenharmony_ci	evmcs->guest_es_limit = vmcs12->guest_es_limit;
19118c2ecf20Sopenharmony_ci	evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
19128c2ecf20Sopenharmony_ci	evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
19138c2ecf20Sopenharmony_ci	evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
19148c2ecf20Sopenharmony_ci	evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
19158c2ecf20Sopenharmony_ci	evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
19168c2ecf20Sopenharmony_ci	evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
19178c2ecf20Sopenharmony_ci	evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
19188c2ecf20Sopenharmony_ci	evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
19198c2ecf20Sopenharmony_ci	evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
19208c2ecf20Sopenharmony_ci
19218c2ecf20Sopenharmony_ci	evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
19228c2ecf20Sopenharmony_ci	evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
19238c2ecf20Sopenharmony_ci	evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
19248c2ecf20Sopenharmony_ci	evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
19258c2ecf20Sopenharmony_ci	evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
19268c2ecf20Sopenharmony_ci	evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
19278c2ecf20Sopenharmony_ci	evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
19288c2ecf20Sopenharmony_ci	evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
19298c2ecf20Sopenharmony_ci
19308c2ecf20Sopenharmony_ci	evmcs->guest_es_base = vmcs12->guest_es_base;
19318c2ecf20Sopenharmony_ci	evmcs->guest_cs_base = vmcs12->guest_cs_base;
19328c2ecf20Sopenharmony_ci	evmcs->guest_ss_base = vmcs12->guest_ss_base;
19338c2ecf20Sopenharmony_ci	evmcs->guest_ds_base = vmcs12->guest_ds_base;
19348c2ecf20Sopenharmony_ci	evmcs->guest_fs_base = vmcs12->guest_fs_base;
19358c2ecf20Sopenharmony_ci	evmcs->guest_gs_base = vmcs12->guest_gs_base;
19368c2ecf20Sopenharmony_ci	evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
19378c2ecf20Sopenharmony_ci	evmcs->guest_tr_base = vmcs12->guest_tr_base;
19388c2ecf20Sopenharmony_ci	evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
19398c2ecf20Sopenharmony_ci	evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
19408c2ecf20Sopenharmony_ci
19418c2ecf20Sopenharmony_ci	evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
19428c2ecf20Sopenharmony_ci	evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
19438c2ecf20Sopenharmony_ci
19448c2ecf20Sopenharmony_ci	evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
19458c2ecf20Sopenharmony_ci	evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
19468c2ecf20Sopenharmony_ci	evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
19478c2ecf20Sopenharmony_ci	evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
19488c2ecf20Sopenharmony_ci
19498c2ecf20Sopenharmony_ci	evmcs->guest_pending_dbg_exceptions =
19508c2ecf20Sopenharmony_ci		vmcs12->guest_pending_dbg_exceptions;
19518c2ecf20Sopenharmony_ci	evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
19528c2ecf20Sopenharmony_ci	evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
19538c2ecf20Sopenharmony_ci
19548c2ecf20Sopenharmony_ci	evmcs->guest_activity_state = vmcs12->guest_activity_state;
19558c2ecf20Sopenharmony_ci	evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
19568c2ecf20Sopenharmony_ci
19578c2ecf20Sopenharmony_ci	evmcs->guest_cr0 = vmcs12->guest_cr0;
19588c2ecf20Sopenharmony_ci	evmcs->guest_cr3 = vmcs12->guest_cr3;
19598c2ecf20Sopenharmony_ci	evmcs->guest_cr4 = vmcs12->guest_cr4;
19608c2ecf20Sopenharmony_ci	evmcs->guest_dr7 = vmcs12->guest_dr7;
19618c2ecf20Sopenharmony_ci
19628c2ecf20Sopenharmony_ci	evmcs->guest_physical_address = vmcs12->guest_physical_address;
19638c2ecf20Sopenharmony_ci
19648c2ecf20Sopenharmony_ci	evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
19658c2ecf20Sopenharmony_ci	evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
19668c2ecf20Sopenharmony_ci	evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
19678c2ecf20Sopenharmony_ci	evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
19688c2ecf20Sopenharmony_ci	evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
19698c2ecf20Sopenharmony_ci	evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
19708c2ecf20Sopenharmony_ci	evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
19718c2ecf20Sopenharmony_ci	evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
19728c2ecf20Sopenharmony_ci
19738c2ecf20Sopenharmony_ci	evmcs->exit_qualification = vmcs12->exit_qualification;
19748c2ecf20Sopenharmony_ci
19758c2ecf20Sopenharmony_ci	evmcs->guest_linear_address = vmcs12->guest_linear_address;
19768c2ecf20Sopenharmony_ci	evmcs->guest_rsp = vmcs12->guest_rsp;
19778c2ecf20Sopenharmony_ci	evmcs->guest_rflags = vmcs12->guest_rflags;
19788c2ecf20Sopenharmony_ci
19798c2ecf20Sopenharmony_ci	evmcs->guest_interruptibility_info =
19808c2ecf20Sopenharmony_ci		vmcs12->guest_interruptibility_info;
19818c2ecf20Sopenharmony_ci	evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
19828c2ecf20Sopenharmony_ci	evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
19838c2ecf20Sopenharmony_ci	evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
19848c2ecf20Sopenharmony_ci	evmcs->vm_entry_exception_error_code =
19858c2ecf20Sopenharmony_ci		vmcs12->vm_entry_exception_error_code;
19868c2ecf20Sopenharmony_ci	evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
19878c2ecf20Sopenharmony_ci
19888c2ecf20Sopenharmony_ci	evmcs->guest_rip = vmcs12->guest_rip;
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci	evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_ci	return 0;
19938c2ecf20Sopenharmony_ci}
19948c2ecf20Sopenharmony_ci
19958c2ecf20Sopenharmony_ci/*
19968c2ecf20Sopenharmony_ci * This is an equivalent of the nested hypervisor executing the vmptrld
19978c2ecf20Sopenharmony_ci * instruction.
19988c2ecf20Sopenharmony_ci */
19998c2ecf20Sopenharmony_cistatic enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
20008c2ecf20Sopenharmony_ci	struct kvm_vcpu *vcpu, bool from_launch)
20018c2ecf20Sopenharmony_ci{
20028c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
20038c2ecf20Sopenharmony_ci	bool evmcs_gpa_changed = false;
20048c2ecf20Sopenharmony_ci	u64 evmcs_gpa;
20058c2ecf20Sopenharmony_ci
20068c2ecf20Sopenharmony_ci	if (likely(!vmx->nested.enlightened_vmcs_enabled))
20078c2ecf20Sopenharmony_ci		return EVMPTRLD_DISABLED;
20088c2ecf20Sopenharmony_ci
20098c2ecf20Sopenharmony_ci	if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
20108c2ecf20Sopenharmony_ci		return EVMPTRLD_DISABLED;
20118c2ecf20Sopenharmony_ci
20128c2ecf20Sopenharmony_ci	if (unlikely(!vmx->nested.hv_evmcs ||
20138c2ecf20Sopenharmony_ci		     evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
20148c2ecf20Sopenharmony_ci		if (!vmx->nested.hv_evmcs)
20158c2ecf20Sopenharmony_ci			vmx->nested.current_vmptr = -1ull;
20168c2ecf20Sopenharmony_ci
20178c2ecf20Sopenharmony_ci		nested_release_evmcs(vcpu);
20188c2ecf20Sopenharmony_ci
20198c2ecf20Sopenharmony_ci		if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
20208c2ecf20Sopenharmony_ci				 &vmx->nested.hv_evmcs_map))
20218c2ecf20Sopenharmony_ci			return EVMPTRLD_ERROR;
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci		vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
20248c2ecf20Sopenharmony_ci
20258c2ecf20Sopenharmony_ci		/*
20268c2ecf20Sopenharmony_ci		 * Currently, KVM only supports eVMCS version 1
20278c2ecf20Sopenharmony_ci		 * (== KVM_EVMCS_VERSION) and thus we expect guest to set this
20288c2ecf20Sopenharmony_ci		 * value to first u32 field of eVMCS which should specify eVMCS
20298c2ecf20Sopenharmony_ci		 * VersionNumber.
20308c2ecf20Sopenharmony_ci		 *
20318c2ecf20Sopenharmony_ci		 * Guest should be aware of supported eVMCS versions by host by
20328c2ecf20Sopenharmony_ci		 * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
20338c2ecf20Sopenharmony_ci		 * expected to set this CPUID leaf according to the value
20348c2ecf20Sopenharmony_ci		 * returned in vmcs_version from nested_enable_evmcs().
20358c2ecf20Sopenharmony_ci		 *
20368c2ecf20Sopenharmony_ci		 * However, it turns out that Microsoft Hyper-V fails to comply
20378c2ecf20Sopenharmony_ci		 * to their own invented interface: When Hyper-V use eVMCS, it
20388c2ecf20Sopenharmony_ci		 * just sets first u32 field of eVMCS to revision_id specified
20398c2ecf20Sopenharmony_ci		 * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
20408c2ecf20Sopenharmony_ci		 * which is one of the supported versions specified in
20418c2ecf20Sopenharmony_ci		 * CPUID.0x4000000A.EAX[0:15].
20428c2ecf20Sopenharmony_ci		 *
20438c2ecf20Sopenharmony_ci		 * To overcome Hyper-V bug, we accept here either a supported
20448c2ecf20Sopenharmony_ci		 * eVMCS version or VMCS12 revision_id as valid values for first
20458c2ecf20Sopenharmony_ci		 * u32 field of eVMCS.
20468c2ecf20Sopenharmony_ci		 */
20478c2ecf20Sopenharmony_ci		if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
20488c2ecf20Sopenharmony_ci		    (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
20498c2ecf20Sopenharmony_ci			nested_release_evmcs(vcpu);
20508c2ecf20Sopenharmony_ci			return EVMPTRLD_VMFAIL;
20518c2ecf20Sopenharmony_ci		}
20528c2ecf20Sopenharmony_ci
20538c2ecf20Sopenharmony_ci		vmx->nested.dirty_vmcs12 = true;
20548c2ecf20Sopenharmony_ci		vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
20558c2ecf20Sopenharmony_ci
20568c2ecf20Sopenharmony_ci		evmcs_gpa_changed = true;
20578c2ecf20Sopenharmony_ci		/*
20588c2ecf20Sopenharmony_ci		 * Unlike normal vmcs12, enlightened vmcs12 is not fully
20598c2ecf20Sopenharmony_ci		 * reloaded from guest's memory (read only fields, fields not
20608c2ecf20Sopenharmony_ci		 * present in struct hv_enlightened_vmcs, ...). Make sure there
20618c2ecf20Sopenharmony_ci		 * are no leftovers.
20628c2ecf20Sopenharmony_ci		 */
20638c2ecf20Sopenharmony_ci		if (from_launch) {
20648c2ecf20Sopenharmony_ci			struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
20658c2ecf20Sopenharmony_ci			memset(vmcs12, 0, sizeof(*vmcs12));
20668c2ecf20Sopenharmony_ci			vmcs12->hdr.revision_id = VMCS12_REVISION;
20678c2ecf20Sopenharmony_ci		}
20688c2ecf20Sopenharmony_ci
20698c2ecf20Sopenharmony_ci	}
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ci	/*
20728c2ecf20Sopenharmony_ci	 * Clean fields data can't be used on VMLAUNCH and when we switch
20738c2ecf20Sopenharmony_ci	 * between different L2 guests as KVM keeps a single VMCS12 per L1.
20748c2ecf20Sopenharmony_ci	 */
20758c2ecf20Sopenharmony_ci	if (from_launch || evmcs_gpa_changed)
20768c2ecf20Sopenharmony_ci		vmx->nested.hv_evmcs->hv_clean_fields &=
20778c2ecf20Sopenharmony_ci			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
20788c2ecf20Sopenharmony_ci
20798c2ecf20Sopenharmony_ci	return EVMPTRLD_SUCCEEDED;
20808c2ecf20Sopenharmony_ci}
20818c2ecf20Sopenharmony_ci
20828c2ecf20Sopenharmony_civoid nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
20838c2ecf20Sopenharmony_ci{
20848c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
20858c2ecf20Sopenharmony_ci
20868c2ecf20Sopenharmony_ci	if (vmx->nested.hv_evmcs) {
20878c2ecf20Sopenharmony_ci		copy_vmcs12_to_enlightened(vmx);
20888c2ecf20Sopenharmony_ci		/* All fields are clean */
20898c2ecf20Sopenharmony_ci		vmx->nested.hv_evmcs->hv_clean_fields |=
20908c2ecf20Sopenharmony_ci			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
20918c2ecf20Sopenharmony_ci	} else {
20928c2ecf20Sopenharmony_ci		copy_vmcs12_to_shadow(vmx);
20938c2ecf20Sopenharmony_ci	}
20948c2ecf20Sopenharmony_ci
20958c2ecf20Sopenharmony_ci	vmx->nested.need_vmcs12_to_shadow_sync = false;
20968c2ecf20Sopenharmony_ci}
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_cistatic enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
20998c2ecf20Sopenharmony_ci{
21008c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx =
21018c2ecf20Sopenharmony_ci		container_of(timer, struct vcpu_vmx, nested.preemption_timer);
21028c2ecf20Sopenharmony_ci
21038c2ecf20Sopenharmony_ci	vmx->nested.preemption_timer_expired = true;
21048c2ecf20Sopenharmony_ci	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
21058c2ecf20Sopenharmony_ci	kvm_vcpu_kick(&vmx->vcpu);
21068c2ecf20Sopenharmony_ci
21078c2ecf20Sopenharmony_ci	return HRTIMER_NORESTART;
21088c2ecf20Sopenharmony_ci}
21098c2ecf20Sopenharmony_ci
21108c2ecf20Sopenharmony_cistatic u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
21118c2ecf20Sopenharmony_ci{
21128c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
21138c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
21148c2ecf20Sopenharmony_ci
21158c2ecf20Sopenharmony_ci	u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
21168c2ecf20Sopenharmony_ci			    VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
21178c2ecf20Sopenharmony_ci
21188c2ecf20Sopenharmony_ci	if (!vmx->nested.has_preemption_timer_deadline) {
21198c2ecf20Sopenharmony_ci		vmx->nested.preemption_timer_deadline =
21208c2ecf20Sopenharmony_ci			vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
21218c2ecf20Sopenharmony_ci		vmx->nested.has_preemption_timer_deadline = true;
21228c2ecf20Sopenharmony_ci	}
21238c2ecf20Sopenharmony_ci	return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
21248c2ecf20Sopenharmony_ci}
21258c2ecf20Sopenharmony_ci
21268c2ecf20Sopenharmony_cistatic void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
21278c2ecf20Sopenharmony_ci					u64 preemption_timeout)
21288c2ecf20Sopenharmony_ci{
21298c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
21308c2ecf20Sopenharmony_ci
21318c2ecf20Sopenharmony_ci	/*
21328c2ecf20Sopenharmony_ci	 * A timer value of zero is architecturally guaranteed to cause
21338c2ecf20Sopenharmony_ci	 * a VMExit prior to executing any instructions in the guest.
21348c2ecf20Sopenharmony_ci	 */
21358c2ecf20Sopenharmony_ci	if (preemption_timeout == 0) {
21368c2ecf20Sopenharmony_ci		vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
21378c2ecf20Sopenharmony_ci		return;
21388c2ecf20Sopenharmony_ci	}
21398c2ecf20Sopenharmony_ci
21408c2ecf20Sopenharmony_ci	if (vcpu->arch.virtual_tsc_khz == 0)
21418c2ecf20Sopenharmony_ci		return;
21428c2ecf20Sopenharmony_ci
21438c2ecf20Sopenharmony_ci	preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
21448c2ecf20Sopenharmony_ci	preemption_timeout *= 1000000;
21458c2ecf20Sopenharmony_ci	do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
21468c2ecf20Sopenharmony_ci	hrtimer_start(&vmx->nested.preemption_timer,
21478c2ecf20Sopenharmony_ci		      ktime_add_ns(ktime_get(), preemption_timeout),
21488c2ecf20Sopenharmony_ci		      HRTIMER_MODE_ABS_PINNED);
21498c2ecf20Sopenharmony_ci}
21508c2ecf20Sopenharmony_ci
21518c2ecf20Sopenharmony_cistatic u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
21528c2ecf20Sopenharmony_ci{
21538c2ecf20Sopenharmony_ci	if (vmx->nested.nested_run_pending &&
21548c2ecf20Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
21558c2ecf20Sopenharmony_ci		return vmcs12->guest_ia32_efer;
21568c2ecf20Sopenharmony_ci	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
21578c2ecf20Sopenharmony_ci		return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
21588c2ecf20Sopenharmony_ci	else
21598c2ecf20Sopenharmony_ci		return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
21608c2ecf20Sopenharmony_ci}
21618c2ecf20Sopenharmony_ci
21628c2ecf20Sopenharmony_cistatic void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
21638c2ecf20Sopenharmony_ci{
21648c2ecf20Sopenharmony_ci	/*
21658c2ecf20Sopenharmony_ci	 * If vmcs02 hasn't been initialized, set the constant vmcs02 state
21668c2ecf20Sopenharmony_ci	 * according to L0's settings (vmcs12 is irrelevant here).  Host
21678c2ecf20Sopenharmony_ci	 * fields that come from L0 and are not constant, e.g. HOST_CR3,
21688c2ecf20Sopenharmony_ci	 * will be set as needed prior to VMLAUNCH/VMRESUME.
21698c2ecf20Sopenharmony_ci	 */
21708c2ecf20Sopenharmony_ci	if (vmx->nested.vmcs02_initialized)
21718c2ecf20Sopenharmony_ci		return;
21728c2ecf20Sopenharmony_ci	vmx->nested.vmcs02_initialized = true;
21738c2ecf20Sopenharmony_ci
21748c2ecf20Sopenharmony_ci	/*
21758c2ecf20Sopenharmony_ci	 * We don't care what the EPTP value is we just need to guarantee
21768c2ecf20Sopenharmony_ci	 * it's valid so we don't get a false positive when doing early
21778c2ecf20Sopenharmony_ci	 * consistency checks.
21788c2ecf20Sopenharmony_ci	 */
21798c2ecf20Sopenharmony_ci	if (enable_ept && nested_early_check)
21808c2ecf20Sopenharmony_ci		vmcs_write64(EPT_POINTER,
21818c2ecf20Sopenharmony_ci			     construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
21828c2ecf20Sopenharmony_ci
21838c2ecf20Sopenharmony_ci	/* All VMFUNCs are currently emulated through L0 vmexits.  */
21848c2ecf20Sopenharmony_ci	if (cpu_has_vmx_vmfunc())
21858c2ecf20Sopenharmony_ci		vmcs_write64(VM_FUNCTION_CONTROL, 0);
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci	if (cpu_has_vmx_posted_intr())
21888c2ecf20Sopenharmony_ci		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_ci	if (cpu_has_vmx_msr_bitmap())
21918c2ecf20Sopenharmony_ci		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
21928c2ecf20Sopenharmony_ci
21938c2ecf20Sopenharmony_ci	/*
21948c2ecf20Sopenharmony_ci	 * The PML address never changes, so it is constant in vmcs02.
21958c2ecf20Sopenharmony_ci	 * Conceptually we want to copy the PML index from vmcs01 here,
21968c2ecf20Sopenharmony_ci	 * and then back to vmcs01 on nested vmexit.  But since we flush
21978c2ecf20Sopenharmony_ci	 * the log and reset GUEST_PML_INDEX on each vmexit, the PML
21988c2ecf20Sopenharmony_ci	 * index is also effectively constant in vmcs02.
21998c2ecf20Sopenharmony_ci	 */
22008c2ecf20Sopenharmony_ci	if (enable_pml) {
22018c2ecf20Sopenharmony_ci		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
22028c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
22038c2ecf20Sopenharmony_ci	}
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci	if (cpu_has_vmx_encls_vmexit())
22068c2ecf20Sopenharmony_ci		vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
22078c2ecf20Sopenharmony_ci
22088c2ecf20Sopenharmony_ci	/*
22098c2ecf20Sopenharmony_ci	 * Set the MSR load/store lists to match L0's settings.  Only the
22108c2ecf20Sopenharmony_ci	 * addresses are constant (for vmcs02), the counts can change based
22118c2ecf20Sopenharmony_ci	 * on L2's behavior, e.g. switching to/from long mode.
22128c2ecf20Sopenharmony_ci	 */
22138c2ecf20Sopenharmony_ci	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
22148c2ecf20Sopenharmony_ci	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
22158c2ecf20Sopenharmony_ci	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci	vmx_set_constant_host_state(vmx);
22188c2ecf20Sopenharmony_ci}
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_cistatic void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
22218c2ecf20Sopenharmony_ci				      struct vmcs12 *vmcs12)
22228c2ecf20Sopenharmony_ci{
22238c2ecf20Sopenharmony_ci	prepare_vmcs02_constant_state(vmx);
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci	vmcs_write64(VMCS_LINK_POINTER, -1ull);
22268c2ecf20Sopenharmony_ci
22278c2ecf20Sopenharmony_ci	if (enable_vpid) {
22288c2ecf20Sopenharmony_ci		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
22298c2ecf20Sopenharmony_ci			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
22308c2ecf20Sopenharmony_ci		else
22318c2ecf20Sopenharmony_ci			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
22328c2ecf20Sopenharmony_ci	}
22338c2ecf20Sopenharmony_ci}
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_cistatic void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
22368c2ecf20Sopenharmony_ci				 struct vmcs12 *vmcs12)
22378c2ecf20Sopenharmony_ci{
22388c2ecf20Sopenharmony_ci	u32 exec_control, vmcs12_exec_ctrl;
22398c2ecf20Sopenharmony_ci	u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
22408c2ecf20Sopenharmony_ci
22418c2ecf20Sopenharmony_ci	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
22428c2ecf20Sopenharmony_ci		prepare_vmcs02_early_rare(vmx, vmcs12);
22438c2ecf20Sopenharmony_ci
22448c2ecf20Sopenharmony_ci	/*
22458c2ecf20Sopenharmony_ci	 * PIN CONTROLS
22468c2ecf20Sopenharmony_ci	 */
22478c2ecf20Sopenharmony_ci	exec_control = __pin_controls_get(vmcs01);
22488c2ecf20Sopenharmony_ci	exec_control |= (vmcs12->pin_based_vm_exec_control &
22498c2ecf20Sopenharmony_ci			 ~PIN_BASED_VMX_PREEMPTION_TIMER);
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ci	/* Posted interrupts setting is only taken from vmcs12.  */
22528c2ecf20Sopenharmony_ci	vmx->nested.pi_pending = false;
22538c2ecf20Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12))
22548c2ecf20Sopenharmony_ci		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
22558c2ecf20Sopenharmony_ci	else
22568c2ecf20Sopenharmony_ci		exec_control &= ~PIN_BASED_POSTED_INTR;
22578c2ecf20Sopenharmony_ci	pin_controls_set(vmx, exec_control);
22588c2ecf20Sopenharmony_ci
22598c2ecf20Sopenharmony_ci	/*
22608c2ecf20Sopenharmony_ci	 * EXEC CONTROLS
22618c2ecf20Sopenharmony_ci	 */
22628c2ecf20Sopenharmony_ci	exec_control = __exec_controls_get(vmcs01); /* L0's desires */
22638c2ecf20Sopenharmony_ci	exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
22648c2ecf20Sopenharmony_ci	exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
22658c2ecf20Sopenharmony_ci	exec_control &= ~CPU_BASED_TPR_SHADOW;
22668c2ecf20Sopenharmony_ci	exec_control |= vmcs12->cpu_based_vm_exec_control;
22678c2ecf20Sopenharmony_ci
22688c2ecf20Sopenharmony_ci	vmx->nested.l1_tpr_threshold = -1;
22698c2ecf20Sopenharmony_ci	if (exec_control & CPU_BASED_TPR_SHADOW)
22708c2ecf20Sopenharmony_ci		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
22718c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
22728c2ecf20Sopenharmony_ci	else
22738c2ecf20Sopenharmony_ci		exec_control |= CPU_BASED_CR8_LOAD_EXITING |
22748c2ecf20Sopenharmony_ci				CPU_BASED_CR8_STORE_EXITING;
22758c2ecf20Sopenharmony_ci#endif
22768c2ecf20Sopenharmony_ci
22778c2ecf20Sopenharmony_ci	/*
22788c2ecf20Sopenharmony_ci	 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
22798c2ecf20Sopenharmony_ci	 * for I/O port accesses.
22808c2ecf20Sopenharmony_ci	 */
22818c2ecf20Sopenharmony_ci	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
22828c2ecf20Sopenharmony_ci	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
22838c2ecf20Sopenharmony_ci
22848c2ecf20Sopenharmony_ci	/*
22858c2ecf20Sopenharmony_ci	 * This bit will be computed in nested_get_vmcs12_pages, because
22868c2ecf20Sopenharmony_ci	 * we do not have access to L1's MSR bitmap yet.  For now, keep
22878c2ecf20Sopenharmony_ci	 * the same bit as before, hoping to avoid multiple VMWRITEs that
22888c2ecf20Sopenharmony_ci	 * only set/clear this bit.
22898c2ecf20Sopenharmony_ci	 */
22908c2ecf20Sopenharmony_ci	exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
22918c2ecf20Sopenharmony_ci	exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
22928c2ecf20Sopenharmony_ci
22938c2ecf20Sopenharmony_ci	exec_controls_set(vmx, exec_control);
22948c2ecf20Sopenharmony_ci
22958c2ecf20Sopenharmony_ci	/*
22968c2ecf20Sopenharmony_ci	 * SECONDARY EXEC CONTROLS
22978c2ecf20Sopenharmony_ci	 */
22988c2ecf20Sopenharmony_ci	if (cpu_has_secondary_exec_ctrls()) {
22998c2ecf20Sopenharmony_ci		exec_control = __secondary_exec_controls_get(vmcs01);
23008c2ecf20Sopenharmony_ci
23018c2ecf20Sopenharmony_ci		/* Take the following fields only from vmcs12 */
23028c2ecf20Sopenharmony_ci		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
23038c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
23048c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_INVPCID |
23058c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_RDTSCP |
23068c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_XSAVES |
23078c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
23088c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
23098c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
23108c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_ENABLE_VMFUNC |
23118c2ecf20Sopenharmony_ci				  SECONDARY_EXEC_DESC);
23128c2ecf20Sopenharmony_ci
23138c2ecf20Sopenharmony_ci		if (nested_cpu_has(vmcs12,
23148c2ecf20Sopenharmony_ci				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
23158c2ecf20Sopenharmony_ci			vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
23168c2ecf20Sopenharmony_ci				~SECONDARY_EXEC_ENABLE_PML;
23178c2ecf20Sopenharmony_ci			exec_control |= vmcs12_exec_ctrl;
23188c2ecf20Sopenharmony_ci		}
23198c2ecf20Sopenharmony_ci
23208c2ecf20Sopenharmony_ci		/* VMCS shadowing for L2 is emulated for now */
23218c2ecf20Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
23228c2ecf20Sopenharmony_ci
23238c2ecf20Sopenharmony_ci		/*
23248c2ecf20Sopenharmony_ci		 * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
23258c2ecf20Sopenharmony_ci		 * will not have to rewrite the controls just for this bit.
23268c2ecf20Sopenharmony_ci		 */
23278c2ecf20Sopenharmony_ci		if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
23288c2ecf20Sopenharmony_ci		    (vmcs12->guest_cr4 & X86_CR4_UMIP))
23298c2ecf20Sopenharmony_ci			exec_control |= SECONDARY_EXEC_DESC;
23308c2ecf20Sopenharmony_ci
23318c2ecf20Sopenharmony_ci		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
23328c2ecf20Sopenharmony_ci			vmcs_write16(GUEST_INTR_STATUS,
23338c2ecf20Sopenharmony_ci				vmcs12->guest_intr_status);
23348c2ecf20Sopenharmony_ci
23358c2ecf20Sopenharmony_ci		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
23368c2ecf20Sopenharmony_ci		    exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
23378c2ecf20Sopenharmony_ci
23388c2ecf20Sopenharmony_ci		secondary_exec_controls_set(vmx, exec_control);
23398c2ecf20Sopenharmony_ci	}
23408c2ecf20Sopenharmony_ci
23418c2ecf20Sopenharmony_ci	/*
23428c2ecf20Sopenharmony_ci	 * ENTRY CONTROLS
23438c2ecf20Sopenharmony_ci	 *
23448c2ecf20Sopenharmony_ci	 * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
23458c2ecf20Sopenharmony_ci	 * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
23468c2ecf20Sopenharmony_ci	 * on the related bits (if supported by the CPU) in the hope that
23478c2ecf20Sopenharmony_ci	 * we can avoid VMWrites during vmx_set_efer().
23488c2ecf20Sopenharmony_ci	 *
23498c2ecf20Sopenharmony_ci	 * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is
23508c2ecf20Sopenharmony_ci	 * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to
23518c2ecf20Sopenharmony_ci	 * do the same for L2.
23528c2ecf20Sopenharmony_ci	 */
23538c2ecf20Sopenharmony_ci	exec_control = __vm_entry_controls_get(vmcs01);
23548c2ecf20Sopenharmony_ci	exec_control |= (vmcs12->vm_entry_controls &
23558c2ecf20Sopenharmony_ci			 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
23568c2ecf20Sopenharmony_ci	exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
23578c2ecf20Sopenharmony_ci	if (cpu_has_load_ia32_efer()) {
23588c2ecf20Sopenharmony_ci		if (guest_efer & EFER_LMA)
23598c2ecf20Sopenharmony_ci			exec_control |= VM_ENTRY_IA32E_MODE;
23608c2ecf20Sopenharmony_ci		if (guest_efer != host_efer)
23618c2ecf20Sopenharmony_ci			exec_control |= VM_ENTRY_LOAD_IA32_EFER;
23628c2ecf20Sopenharmony_ci	}
23638c2ecf20Sopenharmony_ci	vm_entry_controls_set(vmx, exec_control);
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_ci	/*
23668c2ecf20Sopenharmony_ci	 * EXIT CONTROLS
23678c2ecf20Sopenharmony_ci	 *
23688c2ecf20Sopenharmony_ci	 * L2->L1 exit controls are emulated - the hardware exit is to L0 so
23698c2ecf20Sopenharmony_ci	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
23708c2ecf20Sopenharmony_ci	 * bits may be modified by vmx_set_efer() in prepare_vmcs02().
23718c2ecf20Sopenharmony_ci	 */
23728c2ecf20Sopenharmony_ci	exec_control = __vm_exit_controls_get(vmcs01);
23738c2ecf20Sopenharmony_ci	if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
23748c2ecf20Sopenharmony_ci		exec_control |= VM_EXIT_LOAD_IA32_EFER;
23758c2ecf20Sopenharmony_ci	else
23768c2ecf20Sopenharmony_ci		exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
23778c2ecf20Sopenharmony_ci	vm_exit_controls_set(vmx, exec_control);
23788c2ecf20Sopenharmony_ci
23798c2ecf20Sopenharmony_ci	/*
23808c2ecf20Sopenharmony_ci	 * Interrupt/Exception Fields
23818c2ecf20Sopenharmony_ci	 */
23828c2ecf20Sopenharmony_ci	if (vmx->nested.nested_run_pending) {
23838c2ecf20Sopenharmony_ci		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
23848c2ecf20Sopenharmony_ci			     vmcs12->vm_entry_intr_info_field);
23858c2ecf20Sopenharmony_ci		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
23868c2ecf20Sopenharmony_ci			     vmcs12->vm_entry_exception_error_code);
23878c2ecf20Sopenharmony_ci		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
23888c2ecf20Sopenharmony_ci			     vmcs12->vm_entry_instruction_len);
23898c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
23908c2ecf20Sopenharmony_ci			     vmcs12->guest_interruptibility_info);
23918c2ecf20Sopenharmony_ci		vmx->loaded_vmcs->nmi_known_unmasked =
23928c2ecf20Sopenharmony_ci			!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
23938c2ecf20Sopenharmony_ci	} else {
23948c2ecf20Sopenharmony_ci		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
23958c2ecf20Sopenharmony_ci	}
23968c2ecf20Sopenharmony_ci}
23978c2ecf20Sopenharmony_ci
23988c2ecf20Sopenharmony_cistatic void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
23998c2ecf20Sopenharmony_ci{
24008c2ecf20Sopenharmony_ci	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
24018c2ecf20Sopenharmony_ci
24028c2ecf20Sopenharmony_ci	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
24038c2ecf20Sopenharmony_ci			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
24048c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
24058c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
24068c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
24078c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
24088c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
24098c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
24108c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
24118c2ecf20Sopenharmony_ci		vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
24128c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
24138c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
24148c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
24158c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
24168c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
24178c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
24188c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
24198c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
24208c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
24218c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
24228c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
24238c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
24248c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
24258c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
24268c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
24278c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
24288c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
24298c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
24308c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
24318c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
24328c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
24338c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
24348c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
24358c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
24368c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
24378c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
24388c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
24398c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
24408c2ecf20Sopenharmony_ci
24418c2ecf20Sopenharmony_ci		vmx->segment_cache.bitmask = 0;
24428c2ecf20Sopenharmony_ci	}
24438c2ecf20Sopenharmony_ci
24448c2ecf20Sopenharmony_ci	if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
24458c2ecf20Sopenharmony_ci			   HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
24468c2ecf20Sopenharmony_ci		vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
24478c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
24488c2ecf20Sopenharmony_ci			    vmcs12->guest_pending_dbg_exceptions);
24498c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
24508c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
24518c2ecf20Sopenharmony_ci
24528c2ecf20Sopenharmony_ci		/*
24538c2ecf20Sopenharmony_ci		 * L1 may access the L2's PDPTR, so save them to construct
24548c2ecf20Sopenharmony_ci		 * vmcs12
24558c2ecf20Sopenharmony_ci		 */
24568c2ecf20Sopenharmony_ci		if (enable_ept) {
24578c2ecf20Sopenharmony_ci			vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
24588c2ecf20Sopenharmony_ci			vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
24598c2ecf20Sopenharmony_ci			vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
24608c2ecf20Sopenharmony_ci			vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
24618c2ecf20Sopenharmony_ci		}
24628c2ecf20Sopenharmony_ci
24638c2ecf20Sopenharmony_ci		if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
24648c2ecf20Sopenharmony_ci		    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
24658c2ecf20Sopenharmony_ci			vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
24668c2ecf20Sopenharmony_ci	}
24678c2ecf20Sopenharmony_ci
24688c2ecf20Sopenharmony_ci	if (nested_cpu_has_xsaves(vmcs12))
24698c2ecf20Sopenharmony_ci		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
24708c2ecf20Sopenharmony_ci
24718c2ecf20Sopenharmony_ci	/*
24728c2ecf20Sopenharmony_ci	 * Whether page-faults are trapped is determined by a combination of
24738c2ecf20Sopenharmony_ci	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.  If L0
24748c2ecf20Sopenharmony_ci	 * doesn't care about page faults then we should set all of these to
24758c2ecf20Sopenharmony_ci	 * L1's desires. However, if L0 does care about (some) page faults, it
24768c2ecf20Sopenharmony_ci	 * is not easy (if at all possible?) to merge L0 and L1's desires, we
24778c2ecf20Sopenharmony_ci	 * simply ask to exit on each and every L2 page fault. This is done by
24788c2ecf20Sopenharmony_ci	 * setting MASK=MATCH=0 and (see below) EB.PF=1.
24798c2ecf20Sopenharmony_ci	 * Note that below we don't need special code to set EB.PF beyond the
24808c2ecf20Sopenharmony_ci	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
24818c2ecf20Sopenharmony_ci	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
24828c2ecf20Sopenharmony_ci	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
24838c2ecf20Sopenharmony_ci	 */
24848c2ecf20Sopenharmony_ci	if (vmx_need_pf_intercept(&vmx->vcpu)) {
24858c2ecf20Sopenharmony_ci		/*
24868c2ecf20Sopenharmony_ci		 * TODO: if both L0 and L1 need the same MASK and MATCH,
24878c2ecf20Sopenharmony_ci		 * go ahead and use it?
24888c2ecf20Sopenharmony_ci		 */
24898c2ecf20Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
24908c2ecf20Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
24918c2ecf20Sopenharmony_ci	} else {
24928c2ecf20Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
24938c2ecf20Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
24948c2ecf20Sopenharmony_ci	}
24958c2ecf20Sopenharmony_ci
24968c2ecf20Sopenharmony_ci	if (cpu_has_vmx_apicv()) {
24978c2ecf20Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
24988c2ecf20Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
24998c2ecf20Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
25008c2ecf20Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
25018c2ecf20Sopenharmony_ci	}
25028c2ecf20Sopenharmony_ci
25038c2ecf20Sopenharmony_ci	/*
25048c2ecf20Sopenharmony_ci	 * Make sure the msr_autostore list is up to date before we set the
25058c2ecf20Sopenharmony_ci	 * count in the vmcs02.
25068c2ecf20Sopenharmony_ci	 */
25078c2ecf20Sopenharmony_ci	prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
25088c2ecf20Sopenharmony_ci
25098c2ecf20Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
25108c2ecf20Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
25118c2ecf20Sopenharmony_ci	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
25128c2ecf20Sopenharmony_ci
25138c2ecf20Sopenharmony_ci	set_cr4_guest_host_mask(vmx);
25148c2ecf20Sopenharmony_ci}
25158c2ecf20Sopenharmony_ci
25168c2ecf20Sopenharmony_ci/*
25178c2ecf20Sopenharmony_ci * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
25188c2ecf20Sopenharmony_ci * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
25198c2ecf20Sopenharmony_ci * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
25208c2ecf20Sopenharmony_ci * guest in a way that will both be appropriate to L1's requests, and our
25218c2ecf20Sopenharmony_ci * needs. In addition to modifying the active vmcs (which is vmcs02), this
25228c2ecf20Sopenharmony_ci * function also has additional necessary side-effects, like setting various
25238c2ecf20Sopenharmony_ci * vcpu->arch fields.
25248c2ecf20Sopenharmony_ci * Returns 0 on success, 1 on failure. Invalid state exit qualification code
25258c2ecf20Sopenharmony_ci * is assigned to entry_failure_code on failure.
25268c2ecf20Sopenharmony_ci */
25278c2ecf20Sopenharmony_cistatic int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
25288c2ecf20Sopenharmony_ci			  enum vm_entry_failure_code *entry_failure_code)
25298c2ecf20Sopenharmony_ci{
25308c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
25318c2ecf20Sopenharmony_ci	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
25328c2ecf20Sopenharmony_ci	bool load_guest_pdptrs_vmcs12 = false;
25338c2ecf20Sopenharmony_ci
25348c2ecf20Sopenharmony_ci	if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
25358c2ecf20Sopenharmony_ci		prepare_vmcs02_rare(vmx, vmcs12);
25368c2ecf20Sopenharmony_ci		vmx->nested.dirty_vmcs12 = false;
25378c2ecf20Sopenharmony_ci
25388c2ecf20Sopenharmony_ci		load_guest_pdptrs_vmcs12 = !hv_evmcs ||
25398c2ecf20Sopenharmony_ci			!(hv_evmcs->hv_clean_fields &
25408c2ecf20Sopenharmony_ci			  HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
25418c2ecf20Sopenharmony_ci	}
25428c2ecf20Sopenharmony_ci
25438c2ecf20Sopenharmony_ci	if (vmx->nested.nested_run_pending &&
25448c2ecf20Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
25458c2ecf20Sopenharmony_ci		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
25468c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
25478c2ecf20Sopenharmony_ci	} else {
25488c2ecf20Sopenharmony_ci		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
25498c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
25508c2ecf20Sopenharmony_ci	}
25518c2ecf20Sopenharmony_ci	if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
25528c2ecf20Sopenharmony_ci	    !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
25538c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
25548c2ecf20Sopenharmony_ci	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
25558c2ecf20Sopenharmony_ci
25568c2ecf20Sopenharmony_ci	/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
25578c2ecf20Sopenharmony_ci	 * bitwise-or of what L1 wants to trap for L2, and what we want to
25588c2ecf20Sopenharmony_ci	 * trap. Note that CR0.TS also needs updating - we do this later.
25598c2ecf20Sopenharmony_ci	 */
25608c2ecf20Sopenharmony_ci	update_exception_bitmap(vcpu);
25618c2ecf20Sopenharmony_ci	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
25628c2ecf20Sopenharmony_ci	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
25638c2ecf20Sopenharmony_ci
25648c2ecf20Sopenharmony_ci	if (vmx->nested.nested_run_pending &&
25658c2ecf20Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
25668c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
25678c2ecf20Sopenharmony_ci		vcpu->arch.pat = vmcs12->guest_ia32_pat;
25688c2ecf20Sopenharmony_ci	} else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
25698c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
25708c2ecf20Sopenharmony_ci	}
25718c2ecf20Sopenharmony_ci
25728c2ecf20Sopenharmony_ci	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
25738c2ecf20Sopenharmony_ci
25748c2ecf20Sopenharmony_ci	if (kvm_has_tsc_control)
25758c2ecf20Sopenharmony_ci		decache_tsc_multiplier(vmx);
25768c2ecf20Sopenharmony_ci
25778c2ecf20Sopenharmony_ci	nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
25788c2ecf20Sopenharmony_ci
25798c2ecf20Sopenharmony_ci	if (nested_cpu_has_ept(vmcs12))
25808c2ecf20Sopenharmony_ci		nested_ept_init_mmu_context(vcpu);
25818c2ecf20Sopenharmony_ci
25828c2ecf20Sopenharmony_ci	/*
25838c2ecf20Sopenharmony_ci	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
25848c2ecf20Sopenharmony_ci	 * bits which we consider mandatory enabled.
25858c2ecf20Sopenharmony_ci	 * The CR0_READ_SHADOW is what L2 should have expected to read given
25868c2ecf20Sopenharmony_ci	 * the specifications by L1; It's not enough to take
25878c2ecf20Sopenharmony_ci	 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
25888c2ecf20Sopenharmony_ci	 * have more bits than L1 expected.
25898c2ecf20Sopenharmony_ci	 */
25908c2ecf20Sopenharmony_ci	vmx_set_cr0(vcpu, vmcs12->guest_cr0);
25918c2ecf20Sopenharmony_ci	vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
25928c2ecf20Sopenharmony_ci
25938c2ecf20Sopenharmony_ci	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
25948c2ecf20Sopenharmony_ci	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
25958c2ecf20Sopenharmony_ci
25968c2ecf20Sopenharmony_ci	vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
25978c2ecf20Sopenharmony_ci	/* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
25988c2ecf20Sopenharmony_ci	vmx_set_efer(vcpu, vcpu->arch.efer);
25998c2ecf20Sopenharmony_ci
26008c2ecf20Sopenharmony_ci	/*
26018c2ecf20Sopenharmony_ci	 * Guest state is invalid and unrestricted guest is disabled,
26028c2ecf20Sopenharmony_ci	 * which means L1 attempted VMEntry to L2 with invalid state.
26038c2ecf20Sopenharmony_ci	 * Fail the VMEntry.
26048c2ecf20Sopenharmony_ci	 */
26058c2ecf20Sopenharmony_ci	if (CC(!vmx_guest_state_valid(vcpu))) {
26068c2ecf20Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_DEFAULT;
26078c2ecf20Sopenharmony_ci		return -EINVAL;
26088c2ecf20Sopenharmony_ci	}
26098c2ecf20Sopenharmony_ci
26108c2ecf20Sopenharmony_ci	/* Shadow page tables on either EPT or shadow page tables. */
26118c2ecf20Sopenharmony_ci	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
26128c2ecf20Sopenharmony_ci				entry_failure_code))
26138c2ecf20Sopenharmony_ci		return -EINVAL;
26148c2ecf20Sopenharmony_ci
26158c2ecf20Sopenharmony_ci	/*
26168c2ecf20Sopenharmony_ci	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
26178c2ecf20Sopenharmony_ci	 * on nested VM-Exit, which can occur without actually running L2 and
26188c2ecf20Sopenharmony_ci	 * thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
26198c2ecf20Sopenharmony_ci	 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
26208c2ecf20Sopenharmony_ci	 * transition to HLT instead of running L2.
26218c2ecf20Sopenharmony_ci	 */
26228c2ecf20Sopenharmony_ci	if (enable_ept)
26238c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
26248c2ecf20Sopenharmony_ci
26258c2ecf20Sopenharmony_ci	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
26268c2ecf20Sopenharmony_ci	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
26278c2ecf20Sopenharmony_ci	    is_pae_paging(vcpu)) {
26288c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
26298c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
26308c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
26318c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
26328c2ecf20Sopenharmony_ci	}
26338c2ecf20Sopenharmony_ci
26348c2ecf20Sopenharmony_ci	if (!enable_ept)
26358c2ecf20Sopenharmony_ci		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
26368c2ecf20Sopenharmony_ci
26378c2ecf20Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
26388c2ecf20Sopenharmony_ci	    WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
26398c2ecf20Sopenharmony_ci				     vmcs12->guest_ia32_perf_global_ctrl))) {
26408c2ecf20Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_DEFAULT;
26418c2ecf20Sopenharmony_ci		return -EINVAL;
26428c2ecf20Sopenharmony_ci	}
26438c2ecf20Sopenharmony_ci
26448c2ecf20Sopenharmony_ci	kvm_rsp_write(vcpu, vmcs12->guest_rsp);
26458c2ecf20Sopenharmony_ci	kvm_rip_write(vcpu, vmcs12->guest_rip);
26468c2ecf20Sopenharmony_ci	return 0;
26478c2ecf20Sopenharmony_ci}
26488c2ecf20Sopenharmony_ci
26498c2ecf20Sopenharmony_cistatic int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
26508c2ecf20Sopenharmony_ci{
26518c2ecf20Sopenharmony_ci	if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
26528c2ecf20Sopenharmony_ci	       nested_cpu_has_virtual_nmis(vmcs12)))
26538c2ecf20Sopenharmony_ci		return -EINVAL;
26548c2ecf20Sopenharmony_ci
26558c2ecf20Sopenharmony_ci	if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
26568c2ecf20Sopenharmony_ci	       nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
26578c2ecf20Sopenharmony_ci		return -EINVAL;
26588c2ecf20Sopenharmony_ci
26598c2ecf20Sopenharmony_ci	return 0;
26608c2ecf20Sopenharmony_ci}
26618c2ecf20Sopenharmony_ci
26628c2ecf20Sopenharmony_cistatic bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
26638c2ecf20Sopenharmony_ci{
26648c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
26658c2ecf20Sopenharmony_ci	int maxphyaddr = cpuid_maxphyaddr(vcpu);
26668c2ecf20Sopenharmony_ci
26678c2ecf20Sopenharmony_ci	/* Check for memory type validity */
26688c2ecf20Sopenharmony_ci	switch (new_eptp & VMX_EPTP_MT_MASK) {
26698c2ecf20Sopenharmony_ci	case VMX_EPTP_MT_UC:
26708c2ecf20Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
26718c2ecf20Sopenharmony_ci			return false;
26728c2ecf20Sopenharmony_ci		break;
26738c2ecf20Sopenharmony_ci	case VMX_EPTP_MT_WB:
26748c2ecf20Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
26758c2ecf20Sopenharmony_ci			return false;
26768c2ecf20Sopenharmony_ci		break;
26778c2ecf20Sopenharmony_ci	default:
26788c2ecf20Sopenharmony_ci		return false;
26798c2ecf20Sopenharmony_ci	}
26808c2ecf20Sopenharmony_ci
26818c2ecf20Sopenharmony_ci	/* Page-walk levels validity. */
26828c2ecf20Sopenharmony_ci	switch (new_eptp & VMX_EPTP_PWL_MASK) {
26838c2ecf20Sopenharmony_ci	case VMX_EPTP_PWL_5:
26848c2ecf20Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
26858c2ecf20Sopenharmony_ci			return false;
26868c2ecf20Sopenharmony_ci		break;
26878c2ecf20Sopenharmony_ci	case VMX_EPTP_PWL_4:
26888c2ecf20Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
26898c2ecf20Sopenharmony_ci			return false;
26908c2ecf20Sopenharmony_ci		break;
26918c2ecf20Sopenharmony_ci	default:
26928c2ecf20Sopenharmony_ci		return false;
26938c2ecf20Sopenharmony_ci	}
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_ci	/* Reserved bits should not be set */
26968c2ecf20Sopenharmony_ci	if (CC(new_eptp >> maxphyaddr || ((new_eptp >> 7) & 0x1f)))
26978c2ecf20Sopenharmony_ci		return false;
26988c2ecf20Sopenharmony_ci
26998c2ecf20Sopenharmony_ci	/* AD, if set, should be supported */
27008c2ecf20Sopenharmony_ci	if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
27018c2ecf20Sopenharmony_ci		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
27028c2ecf20Sopenharmony_ci			return false;
27038c2ecf20Sopenharmony_ci	}
27048c2ecf20Sopenharmony_ci
27058c2ecf20Sopenharmony_ci	return true;
27068c2ecf20Sopenharmony_ci}
27078c2ecf20Sopenharmony_ci
27088c2ecf20Sopenharmony_ci/*
27098c2ecf20Sopenharmony_ci * Checks related to VM-Execution Control Fields
27108c2ecf20Sopenharmony_ci */
27118c2ecf20Sopenharmony_cistatic int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
27128c2ecf20Sopenharmony_ci                                              struct vmcs12 *vmcs12)
27138c2ecf20Sopenharmony_ci{
27148c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
27158c2ecf20Sopenharmony_ci
27168c2ecf20Sopenharmony_ci	if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
27178c2ecf20Sopenharmony_ci				   vmx->nested.msrs.pinbased_ctls_low,
27188c2ecf20Sopenharmony_ci				   vmx->nested.msrs.pinbased_ctls_high)) ||
27198c2ecf20Sopenharmony_ci	    CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
27208c2ecf20Sopenharmony_ci				   vmx->nested.msrs.procbased_ctls_low,
27218c2ecf20Sopenharmony_ci				   vmx->nested.msrs.procbased_ctls_high)))
27228c2ecf20Sopenharmony_ci		return -EINVAL;
27238c2ecf20Sopenharmony_ci
27248c2ecf20Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
27258c2ecf20Sopenharmony_ci	    CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
27268c2ecf20Sopenharmony_ci				   vmx->nested.msrs.secondary_ctls_low,
27278c2ecf20Sopenharmony_ci				   vmx->nested.msrs.secondary_ctls_high)))
27288c2ecf20Sopenharmony_ci		return -EINVAL;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci	if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
27318c2ecf20Sopenharmony_ci	    nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
27328c2ecf20Sopenharmony_ci	    nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
27338c2ecf20Sopenharmony_ci	    nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
27348c2ecf20Sopenharmony_ci	    nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
27358c2ecf20Sopenharmony_ci	    nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
27368c2ecf20Sopenharmony_ci	    nested_vmx_check_nmi_controls(vmcs12) ||
27378c2ecf20Sopenharmony_ci	    nested_vmx_check_pml_controls(vcpu, vmcs12) ||
27388c2ecf20Sopenharmony_ci	    nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
27398c2ecf20Sopenharmony_ci	    nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
27408c2ecf20Sopenharmony_ci	    nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
27418c2ecf20Sopenharmony_ci	    CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
27428c2ecf20Sopenharmony_ci		return -EINVAL;
27438c2ecf20Sopenharmony_ci
27448c2ecf20Sopenharmony_ci	if (!nested_cpu_has_preemption_timer(vmcs12) &&
27458c2ecf20Sopenharmony_ci	    nested_cpu_has_save_preemption_timer(vmcs12))
27468c2ecf20Sopenharmony_ci		return -EINVAL;
27478c2ecf20Sopenharmony_ci
27488c2ecf20Sopenharmony_ci	if (nested_cpu_has_ept(vmcs12) &&
27498c2ecf20Sopenharmony_ci	    CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
27508c2ecf20Sopenharmony_ci		return -EINVAL;
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ci	if (nested_cpu_has_vmfunc(vmcs12)) {
27538c2ecf20Sopenharmony_ci		if (CC(vmcs12->vm_function_control &
27548c2ecf20Sopenharmony_ci		       ~vmx->nested.msrs.vmfunc_controls))
27558c2ecf20Sopenharmony_ci			return -EINVAL;
27568c2ecf20Sopenharmony_ci
27578c2ecf20Sopenharmony_ci		if (nested_cpu_has_eptp_switching(vmcs12)) {
27588c2ecf20Sopenharmony_ci			if (CC(!nested_cpu_has_ept(vmcs12)) ||
27598c2ecf20Sopenharmony_ci			    CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
27608c2ecf20Sopenharmony_ci				return -EINVAL;
27618c2ecf20Sopenharmony_ci		}
27628c2ecf20Sopenharmony_ci	}
27638c2ecf20Sopenharmony_ci
27648c2ecf20Sopenharmony_ci	return 0;
27658c2ecf20Sopenharmony_ci}
27668c2ecf20Sopenharmony_ci
27678c2ecf20Sopenharmony_ci/*
27688c2ecf20Sopenharmony_ci * Checks related to VM-Exit Control Fields
27698c2ecf20Sopenharmony_ci */
27708c2ecf20Sopenharmony_cistatic int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
27718c2ecf20Sopenharmony_ci                                         struct vmcs12 *vmcs12)
27728c2ecf20Sopenharmony_ci{
27738c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
27748c2ecf20Sopenharmony_ci
27758c2ecf20Sopenharmony_ci	if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
27768c2ecf20Sopenharmony_ci				    vmx->nested.msrs.exit_ctls_low,
27778c2ecf20Sopenharmony_ci				    vmx->nested.msrs.exit_ctls_high)) ||
27788c2ecf20Sopenharmony_ci	    CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
27798c2ecf20Sopenharmony_ci		return -EINVAL;
27808c2ecf20Sopenharmony_ci
27818c2ecf20Sopenharmony_ci	return 0;
27828c2ecf20Sopenharmony_ci}
27838c2ecf20Sopenharmony_ci
27848c2ecf20Sopenharmony_ci/*
27858c2ecf20Sopenharmony_ci * Checks related to VM-Entry Control Fields
27868c2ecf20Sopenharmony_ci */
27878c2ecf20Sopenharmony_cistatic int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
27888c2ecf20Sopenharmony_ci					  struct vmcs12 *vmcs12)
27898c2ecf20Sopenharmony_ci{
27908c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
27918c2ecf20Sopenharmony_ci
27928c2ecf20Sopenharmony_ci	if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
27938c2ecf20Sopenharmony_ci				    vmx->nested.msrs.entry_ctls_low,
27948c2ecf20Sopenharmony_ci				    vmx->nested.msrs.entry_ctls_high)))
27958c2ecf20Sopenharmony_ci		return -EINVAL;
27968c2ecf20Sopenharmony_ci
27978c2ecf20Sopenharmony_ci	/*
27988c2ecf20Sopenharmony_ci	 * From the Intel SDM, volume 3:
27998c2ecf20Sopenharmony_ci	 * Fields relevant to VM-entry event injection must be set properly.
28008c2ecf20Sopenharmony_ci	 * These fields are the VM-entry interruption-information field, the
28018c2ecf20Sopenharmony_ci	 * VM-entry exception error code, and the VM-entry instruction length.
28028c2ecf20Sopenharmony_ci	 */
28038c2ecf20Sopenharmony_ci	if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
28048c2ecf20Sopenharmony_ci		u32 intr_info = vmcs12->vm_entry_intr_info_field;
28058c2ecf20Sopenharmony_ci		u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
28068c2ecf20Sopenharmony_ci		u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
28078c2ecf20Sopenharmony_ci		bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
28088c2ecf20Sopenharmony_ci		bool should_have_error_code;
28098c2ecf20Sopenharmony_ci		bool urg = nested_cpu_has2(vmcs12,
28108c2ecf20Sopenharmony_ci					   SECONDARY_EXEC_UNRESTRICTED_GUEST);
28118c2ecf20Sopenharmony_ci		bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
28128c2ecf20Sopenharmony_ci
28138c2ecf20Sopenharmony_ci		/* VM-entry interruption-info field: interruption type */
28148c2ecf20Sopenharmony_ci		if (CC(intr_type == INTR_TYPE_RESERVED) ||
28158c2ecf20Sopenharmony_ci		    CC(intr_type == INTR_TYPE_OTHER_EVENT &&
28168c2ecf20Sopenharmony_ci		       !nested_cpu_supports_monitor_trap_flag(vcpu)))
28178c2ecf20Sopenharmony_ci			return -EINVAL;
28188c2ecf20Sopenharmony_ci
28198c2ecf20Sopenharmony_ci		/* VM-entry interruption-info field: vector */
28208c2ecf20Sopenharmony_ci		if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
28218c2ecf20Sopenharmony_ci		    CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
28228c2ecf20Sopenharmony_ci		    CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
28238c2ecf20Sopenharmony_ci			return -EINVAL;
28248c2ecf20Sopenharmony_ci
28258c2ecf20Sopenharmony_ci		/* VM-entry interruption-info field: deliver error code */
28268c2ecf20Sopenharmony_ci		should_have_error_code =
28278c2ecf20Sopenharmony_ci			intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
28288c2ecf20Sopenharmony_ci			x86_exception_has_error_code(vector);
28298c2ecf20Sopenharmony_ci		if (CC(has_error_code != should_have_error_code))
28308c2ecf20Sopenharmony_ci			return -EINVAL;
28318c2ecf20Sopenharmony_ci
28328c2ecf20Sopenharmony_ci		/* VM-entry exception error code */
28338c2ecf20Sopenharmony_ci		if (CC(has_error_code &&
28348c2ecf20Sopenharmony_ci		       vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
28358c2ecf20Sopenharmony_ci			return -EINVAL;
28368c2ecf20Sopenharmony_ci
28378c2ecf20Sopenharmony_ci		/* VM-entry interruption-info field: reserved bits */
28388c2ecf20Sopenharmony_ci		if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
28398c2ecf20Sopenharmony_ci			return -EINVAL;
28408c2ecf20Sopenharmony_ci
28418c2ecf20Sopenharmony_ci		/* VM-entry instruction length */
28428c2ecf20Sopenharmony_ci		switch (intr_type) {
28438c2ecf20Sopenharmony_ci		case INTR_TYPE_SOFT_EXCEPTION:
28448c2ecf20Sopenharmony_ci		case INTR_TYPE_SOFT_INTR:
28458c2ecf20Sopenharmony_ci		case INTR_TYPE_PRIV_SW_EXCEPTION:
28468c2ecf20Sopenharmony_ci			if (CC(vmcs12->vm_entry_instruction_len > 15) ||
28478c2ecf20Sopenharmony_ci			    CC(vmcs12->vm_entry_instruction_len == 0 &&
28488c2ecf20Sopenharmony_ci			    CC(!nested_cpu_has_zero_length_injection(vcpu))))
28498c2ecf20Sopenharmony_ci				return -EINVAL;
28508c2ecf20Sopenharmony_ci		}
28518c2ecf20Sopenharmony_ci	}
28528c2ecf20Sopenharmony_ci
28538c2ecf20Sopenharmony_ci	if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
28548c2ecf20Sopenharmony_ci		return -EINVAL;
28558c2ecf20Sopenharmony_ci
28568c2ecf20Sopenharmony_ci	return 0;
28578c2ecf20Sopenharmony_ci}
28588c2ecf20Sopenharmony_ci
28598c2ecf20Sopenharmony_cistatic int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
28608c2ecf20Sopenharmony_ci				     struct vmcs12 *vmcs12)
28618c2ecf20Sopenharmony_ci{
28628c2ecf20Sopenharmony_ci	if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
28638c2ecf20Sopenharmony_ci	    nested_check_vm_exit_controls(vcpu, vmcs12) ||
28648c2ecf20Sopenharmony_ci	    nested_check_vm_entry_controls(vcpu, vmcs12))
28658c2ecf20Sopenharmony_ci		return -EINVAL;
28668c2ecf20Sopenharmony_ci
28678c2ecf20Sopenharmony_ci	if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
28688c2ecf20Sopenharmony_ci		return nested_evmcs_check_controls(vmcs12);
28698c2ecf20Sopenharmony_ci
28708c2ecf20Sopenharmony_ci	return 0;
28718c2ecf20Sopenharmony_ci}
28728c2ecf20Sopenharmony_ci
28738c2ecf20Sopenharmony_cistatic int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
28748c2ecf20Sopenharmony_ci				       struct vmcs12 *vmcs12)
28758c2ecf20Sopenharmony_ci{
28768c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
28778c2ecf20Sopenharmony_ci	if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
28788c2ecf20Sopenharmony_ci		!!(vcpu->arch.efer & EFER_LMA)))
28798c2ecf20Sopenharmony_ci		return -EINVAL;
28808c2ecf20Sopenharmony_ci#endif
28818c2ecf20Sopenharmony_ci	return 0;
28828c2ecf20Sopenharmony_ci}
28838c2ecf20Sopenharmony_ci
28848c2ecf20Sopenharmony_cistatic int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
28858c2ecf20Sopenharmony_ci				       struct vmcs12 *vmcs12)
28868c2ecf20Sopenharmony_ci{
28878c2ecf20Sopenharmony_ci	bool ia32e;
28888c2ecf20Sopenharmony_ci
28898c2ecf20Sopenharmony_ci	if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
28908c2ecf20Sopenharmony_ci	    CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
28918c2ecf20Sopenharmony_ci	    CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3)))
28928c2ecf20Sopenharmony_ci		return -EINVAL;
28938c2ecf20Sopenharmony_ci
28948c2ecf20Sopenharmony_ci	if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
28958c2ecf20Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
28968c2ecf20Sopenharmony_ci		return -EINVAL;
28978c2ecf20Sopenharmony_ci
28988c2ecf20Sopenharmony_ci	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
28998c2ecf20Sopenharmony_ci	    CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
29008c2ecf20Sopenharmony_ci		return -EINVAL;
29018c2ecf20Sopenharmony_ci
29028c2ecf20Sopenharmony_ci	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
29038c2ecf20Sopenharmony_ci	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
29048c2ecf20Sopenharmony_ci					   vmcs12->host_ia32_perf_global_ctrl)))
29058c2ecf20Sopenharmony_ci		return -EINVAL;
29068c2ecf20Sopenharmony_ci
29078c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
29088c2ecf20Sopenharmony_ci	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
29098c2ecf20Sopenharmony_ci#else
29108c2ecf20Sopenharmony_ci	ia32e = false;
29118c2ecf20Sopenharmony_ci#endif
29128c2ecf20Sopenharmony_ci
29138c2ecf20Sopenharmony_ci	if (ia32e) {
29148c2ecf20Sopenharmony_ci		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
29158c2ecf20Sopenharmony_ci			return -EINVAL;
29168c2ecf20Sopenharmony_ci	} else {
29178c2ecf20Sopenharmony_ci		if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
29188c2ecf20Sopenharmony_ci		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
29198c2ecf20Sopenharmony_ci		    CC((vmcs12->host_rip) >> 32))
29208c2ecf20Sopenharmony_ci			return -EINVAL;
29218c2ecf20Sopenharmony_ci	}
29228c2ecf20Sopenharmony_ci
29238c2ecf20Sopenharmony_ci	if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29248c2ecf20Sopenharmony_ci	    CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29258c2ecf20Sopenharmony_ci	    CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29268c2ecf20Sopenharmony_ci	    CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29278c2ecf20Sopenharmony_ci	    CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29288c2ecf20Sopenharmony_ci	    CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29298c2ecf20Sopenharmony_ci	    CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
29308c2ecf20Sopenharmony_ci	    CC(vmcs12->host_cs_selector == 0) ||
29318c2ecf20Sopenharmony_ci	    CC(vmcs12->host_tr_selector == 0) ||
29328c2ecf20Sopenharmony_ci	    CC(vmcs12->host_ss_selector == 0 && !ia32e))
29338c2ecf20Sopenharmony_ci		return -EINVAL;
29348c2ecf20Sopenharmony_ci
29358c2ecf20Sopenharmony_ci	if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
29368c2ecf20Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
29378c2ecf20Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
29388c2ecf20Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
29398c2ecf20Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
29408c2ecf20Sopenharmony_ci	    CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
29418c2ecf20Sopenharmony_ci		return -EINVAL;
29428c2ecf20Sopenharmony_ci
29438c2ecf20Sopenharmony_ci	/*
29448c2ecf20Sopenharmony_ci	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
29458c2ecf20Sopenharmony_ci	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
29468c2ecf20Sopenharmony_ci	 * the values of the LMA and LME bits in the field must each be that of
29478c2ecf20Sopenharmony_ci	 * the host address-space size VM-exit control.
29488c2ecf20Sopenharmony_ci	 */
29498c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
29508c2ecf20Sopenharmony_ci		if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
29518c2ecf20Sopenharmony_ci		    CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
29528c2ecf20Sopenharmony_ci		    CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
29538c2ecf20Sopenharmony_ci			return -EINVAL;
29548c2ecf20Sopenharmony_ci	}
29558c2ecf20Sopenharmony_ci
29568c2ecf20Sopenharmony_ci	return 0;
29578c2ecf20Sopenharmony_ci}
29588c2ecf20Sopenharmony_ci
29598c2ecf20Sopenharmony_cistatic int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
29608c2ecf20Sopenharmony_ci					  struct vmcs12 *vmcs12)
29618c2ecf20Sopenharmony_ci{
29628c2ecf20Sopenharmony_ci	int r = 0;
29638c2ecf20Sopenharmony_ci	struct vmcs12 *shadow;
29648c2ecf20Sopenharmony_ci	struct kvm_host_map map;
29658c2ecf20Sopenharmony_ci
29668c2ecf20Sopenharmony_ci	if (vmcs12->vmcs_link_pointer == -1ull)
29678c2ecf20Sopenharmony_ci		return 0;
29688c2ecf20Sopenharmony_ci
29698c2ecf20Sopenharmony_ci	if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
29708c2ecf20Sopenharmony_ci		return -EINVAL;
29718c2ecf20Sopenharmony_ci
29728c2ecf20Sopenharmony_ci	if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
29738c2ecf20Sopenharmony_ci		return -EINVAL;
29748c2ecf20Sopenharmony_ci
29758c2ecf20Sopenharmony_ci	shadow = map.hva;
29768c2ecf20Sopenharmony_ci
29778c2ecf20Sopenharmony_ci	if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
29788c2ecf20Sopenharmony_ci	    CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
29798c2ecf20Sopenharmony_ci		r = -EINVAL;
29808c2ecf20Sopenharmony_ci
29818c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &map, false);
29828c2ecf20Sopenharmony_ci	return r;
29838c2ecf20Sopenharmony_ci}
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_ci/*
29868c2ecf20Sopenharmony_ci * Checks related to Guest Non-register State
29878c2ecf20Sopenharmony_ci */
29888c2ecf20Sopenharmony_cistatic int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
29898c2ecf20Sopenharmony_ci{
29908c2ecf20Sopenharmony_ci	if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
29918c2ecf20Sopenharmony_ci	       vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
29928c2ecf20Sopenharmony_ci		return -EINVAL;
29938c2ecf20Sopenharmony_ci
29948c2ecf20Sopenharmony_ci	return 0;
29958c2ecf20Sopenharmony_ci}
29968c2ecf20Sopenharmony_ci
29978c2ecf20Sopenharmony_cistatic int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
29988c2ecf20Sopenharmony_ci					struct vmcs12 *vmcs12,
29998c2ecf20Sopenharmony_ci					enum vm_entry_failure_code *entry_failure_code)
30008c2ecf20Sopenharmony_ci{
30018c2ecf20Sopenharmony_ci	bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
30028c2ecf20Sopenharmony_ci
30038c2ecf20Sopenharmony_ci	*entry_failure_code = ENTRY_FAIL_DEFAULT;
30048c2ecf20Sopenharmony_ci
30058c2ecf20Sopenharmony_ci	if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
30068c2ecf20Sopenharmony_ci	    CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
30078c2ecf20Sopenharmony_ci		return -EINVAL;
30088c2ecf20Sopenharmony_ci
30098c2ecf20Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
30108c2ecf20Sopenharmony_ci	    CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
30118c2ecf20Sopenharmony_ci		return -EINVAL;
30128c2ecf20Sopenharmony_ci
30138c2ecf20Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
30148c2ecf20Sopenharmony_ci	    CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
30158c2ecf20Sopenharmony_ci		return -EINVAL;
30168c2ecf20Sopenharmony_ci
30178c2ecf20Sopenharmony_ci	if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
30188c2ecf20Sopenharmony_ci		*entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
30198c2ecf20Sopenharmony_ci		return -EINVAL;
30208c2ecf20Sopenharmony_ci	}
30218c2ecf20Sopenharmony_ci
30228c2ecf20Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
30238c2ecf20Sopenharmony_ci	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
30248c2ecf20Sopenharmony_ci					   vmcs12->guest_ia32_perf_global_ctrl)))
30258c2ecf20Sopenharmony_ci		return -EINVAL;
30268c2ecf20Sopenharmony_ci
30278c2ecf20Sopenharmony_ci	if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
30288c2ecf20Sopenharmony_ci		return -EINVAL;
30298c2ecf20Sopenharmony_ci
30308c2ecf20Sopenharmony_ci	if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
30318c2ecf20Sopenharmony_ci	    CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
30328c2ecf20Sopenharmony_ci		return -EINVAL;
30338c2ecf20Sopenharmony_ci
30348c2ecf20Sopenharmony_ci	/*
30358c2ecf20Sopenharmony_ci	 * If the load IA32_EFER VM-entry control is 1, the following checks
30368c2ecf20Sopenharmony_ci	 * are performed on the field for the IA32_EFER MSR:
30378c2ecf20Sopenharmony_ci	 * - Bits reserved in the IA32_EFER MSR must be 0.
30388c2ecf20Sopenharmony_ci	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
30398c2ecf20Sopenharmony_ci	 *   the IA-32e mode guest VM-exit control. It must also be identical
30408c2ecf20Sopenharmony_ci	 *   to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
30418c2ecf20Sopenharmony_ci	 *   CR0.PG) is 1.
30428c2ecf20Sopenharmony_ci	 */
30438c2ecf20Sopenharmony_ci	if (to_vmx(vcpu)->nested.nested_run_pending &&
30448c2ecf20Sopenharmony_ci	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
30458c2ecf20Sopenharmony_ci		if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
30468c2ecf20Sopenharmony_ci		    CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
30478c2ecf20Sopenharmony_ci		    CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
30488c2ecf20Sopenharmony_ci		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
30498c2ecf20Sopenharmony_ci			return -EINVAL;
30508c2ecf20Sopenharmony_ci	}
30518c2ecf20Sopenharmony_ci
30528c2ecf20Sopenharmony_ci	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
30538c2ecf20Sopenharmony_ci	    (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
30548c2ecf20Sopenharmony_ci	     CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
30558c2ecf20Sopenharmony_ci		return -EINVAL;
30568c2ecf20Sopenharmony_ci
30578c2ecf20Sopenharmony_ci	if (nested_check_guest_non_reg_state(vmcs12))
30588c2ecf20Sopenharmony_ci		return -EINVAL;
30598c2ecf20Sopenharmony_ci
30608c2ecf20Sopenharmony_ci	return 0;
30618c2ecf20Sopenharmony_ci}
30628c2ecf20Sopenharmony_ci
30638c2ecf20Sopenharmony_cistatic int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
30648c2ecf20Sopenharmony_ci{
30658c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
30668c2ecf20Sopenharmony_ci	unsigned long cr3, cr4;
30678c2ecf20Sopenharmony_ci	bool vm_fail;
30688c2ecf20Sopenharmony_ci
30698c2ecf20Sopenharmony_ci	if (!nested_early_check)
30708c2ecf20Sopenharmony_ci		return 0;
30718c2ecf20Sopenharmony_ci
30728c2ecf20Sopenharmony_ci	if (vmx->msr_autoload.host.nr)
30738c2ecf20Sopenharmony_ci		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
30748c2ecf20Sopenharmony_ci	if (vmx->msr_autoload.guest.nr)
30758c2ecf20Sopenharmony_ci		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
30768c2ecf20Sopenharmony_ci
30778c2ecf20Sopenharmony_ci	preempt_disable();
30788c2ecf20Sopenharmony_ci
30798c2ecf20Sopenharmony_ci	vmx_prepare_switch_to_guest(vcpu);
30808c2ecf20Sopenharmony_ci
30818c2ecf20Sopenharmony_ci	/*
30828c2ecf20Sopenharmony_ci	 * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
30838c2ecf20Sopenharmony_ci	 * which is reserved to '1' by hardware.  GUEST_RFLAGS is guaranteed to
30848c2ecf20Sopenharmony_ci	 * be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
30858c2ecf20Sopenharmony_ci	 * there is no need to preserve other bits or save/restore the field.
30868c2ecf20Sopenharmony_ci	 */
30878c2ecf20Sopenharmony_ci	vmcs_writel(GUEST_RFLAGS, 0);
30888c2ecf20Sopenharmony_ci
30898c2ecf20Sopenharmony_ci	cr3 = __get_current_cr3_fast();
30908c2ecf20Sopenharmony_ci	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
30918c2ecf20Sopenharmony_ci		vmcs_writel(HOST_CR3, cr3);
30928c2ecf20Sopenharmony_ci		vmx->loaded_vmcs->host_state.cr3 = cr3;
30938c2ecf20Sopenharmony_ci	}
30948c2ecf20Sopenharmony_ci
30958c2ecf20Sopenharmony_ci	cr4 = cr4_read_shadow();
30968c2ecf20Sopenharmony_ci	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
30978c2ecf20Sopenharmony_ci		vmcs_writel(HOST_CR4, cr4);
30988c2ecf20Sopenharmony_ci		vmx->loaded_vmcs->host_state.cr4 = cr4;
30998c2ecf20Sopenharmony_ci	}
31008c2ecf20Sopenharmony_ci
31018c2ecf20Sopenharmony_ci	vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
31028c2ecf20Sopenharmony_ci				 __vmx_vcpu_run_flags(vmx));
31038c2ecf20Sopenharmony_ci
31048c2ecf20Sopenharmony_ci	if (vmx->msr_autoload.host.nr)
31058c2ecf20Sopenharmony_ci		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
31068c2ecf20Sopenharmony_ci	if (vmx->msr_autoload.guest.nr)
31078c2ecf20Sopenharmony_ci		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
31088c2ecf20Sopenharmony_ci
31098c2ecf20Sopenharmony_ci	if (vm_fail) {
31108c2ecf20Sopenharmony_ci		u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
31118c2ecf20Sopenharmony_ci
31128c2ecf20Sopenharmony_ci		preempt_enable();
31138c2ecf20Sopenharmony_ci
31148c2ecf20Sopenharmony_ci		trace_kvm_nested_vmenter_failed(
31158c2ecf20Sopenharmony_ci			"early hardware check VM-instruction error: ", error);
31168c2ecf20Sopenharmony_ci		WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
31178c2ecf20Sopenharmony_ci		return 1;
31188c2ecf20Sopenharmony_ci	}
31198c2ecf20Sopenharmony_ci
31208c2ecf20Sopenharmony_ci	/*
31218c2ecf20Sopenharmony_ci	 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
31228c2ecf20Sopenharmony_ci	 */
31238c2ecf20Sopenharmony_ci	if (hw_breakpoint_active())
31248c2ecf20Sopenharmony_ci		set_debugreg(__this_cpu_read(cpu_dr7), 7);
31258c2ecf20Sopenharmony_ci	local_irq_enable();
31268c2ecf20Sopenharmony_ci	preempt_enable();
31278c2ecf20Sopenharmony_ci
31288c2ecf20Sopenharmony_ci	/*
31298c2ecf20Sopenharmony_ci	 * A non-failing VMEntry means we somehow entered guest mode with
31308c2ecf20Sopenharmony_ci	 * an illegal RIP, and that's just the tip of the iceberg.  There
31318c2ecf20Sopenharmony_ci	 * is no telling what memory has been modified or what state has
31328c2ecf20Sopenharmony_ci	 * been exposed to unknown code.  Hitting this all but guarantees
31338c2ecf20Sopenharmony_ci	 * a (very critical) hardware issue.
31348c2ecf20Sopenharmony_ci	 */
31358c2ecf20Sopenharmony_ci	WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
31368c2ecf20Sopenharmony_ci		VMX_EXIT_REASONS_FAILED_VMENTRY));
31378c2ecf20Sopenharmony_ci
31388c2ecf20Sopenharmony_ci	return 0;
31398c2ecf20Sopenharmony_ci}
31408c2ecf20Sopenharmony_ci
31418c2ecf20Sopenharmony_cistatic bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
31428c2ecf20Sopenharmony_ci{
31438c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
31448c2ecf20Sopenharmony_ci
31458c2ecf20Sopenharmony_ci	/*
31468c2ecf20Sopenharmony_ci	 * hv_evmcs may end up being not mapped after migration (when
31478c2ecf20Sopenharmony_ci	 * L2 was running), map it here to make sure vmcs12 changes are
31488c2ecf20Sopenharmony_ci	 * properly reflected.
31498c2ecf20Sopenharmony_ci	 */
31508c2ecf20Sopenharmony_ci	if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) {
31518c2ecf20Sopenharmony_ci		enum nested_evmptrld_status evmptrld_status =
31528c2ecf20Sopenharmony_ci			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
31538c2ecf20Sopenharmony_ci
31548c2ecf20Sopenharmony_ci		if (evmptrld_status == EVMPTRLD_VMFAIL ||
31558c2ecf20Sopenharmony_ci		    evmptrld_status == EVMPTRLD_ERROR)
31568c2ecf20Sopenharmony_ci			return false;
31578c2ecf20Sopenharmony_ci	}
31588c2ecf20Sopenharmony_ci
31598c2ecf20Sopenharmony_ci	return true;
31608c2ecf20Sopenharmony_ci}
31618c2ecf20Sopenharmony_ci
31628c2ecf20Sopenharmony_cistatic bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
31638c2ecf20Sopenharmony_ci{
31648c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
31658c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
31668c2ecf20Sopenharmony_ci	struct kvm_host_map *map;
31678c2ecf20Sopenharmony_ci	struct page *page;
31688c2ecf20Sopenharmony_ci	u64 hpa;
31698c2ecf20Sopenharmony_ci
31708c2ecf20Sopenharmony_ci	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
31718c2ecf20Sopenharmony_ci		/*
31728c2ecf20Sopenharmony_ci		 * Translate L1 physical address to host physical
31738c2ecf20Sopenharmony_ci		 * address for vmcs02. Keep the page pinned, so this
31748c2ecf20Sopenharmony_ci		 * physical address remains valid. We keep a reference
31758c2ecf20Sopenharmony_ci		 * to it so we can release it later.
31768c2ecf20Sopenharmony_ci		 */
31778c2ecf20Sopenharmony_ci		if (vmx->nested.apic_access_page) { /* shouldn't happen */
31788c2ecf20Sopenharmony_ci			kvm_release_page_clean(vmx->nested.apic_access_page);
31798c2ecf20Sopenharmony_ci			vmx->nested.apic_access_page = NULL;
31808c2ecf20Sopenharmony_ci		}
31818c2ecf20Sopenharmony_ci		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
31828c2ecf20Sopenharmony_ci		if (!is_error_page(page)) {
31838c2ecf20Sopenharmony_ci			vmx->nested.apic_access_page = page;
31848c2ecf20Sopenharmony_ci			hpa = page_to_phys(vmx->nested.apic_access_page);
31858c2ecf20Sopenharmony_ci			vmcs_write64(APIC_ACCESS_ADDR, hpa);
31868c2ecf20Sopenharmony_ci		} else {
31878c2ecf20Sopenharmony_ci			pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
31888c2ecf20Sopenharmony_ci					     __func__);
31898c2ecf20Sopenharmony_ci			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
31908c2ecf20Sopenharmony_ci			vcpu->run->internal.suberror =
31918c2ecf20Sopenharmony_ci				KVM_INTERNAL_ERROR_EMULATION;
31928c2ecf20Sopenharmony_ci			vcpu->run->internal.ndata = 0;
31938c2ecf20Sopenharmony_ci			return false;
31948c2ecf20Sopenharmony_ci		}
31958c2ecf20Sopenharmony_ci	}
31968c2ecf20Sopenharmony_ci
31978c2ecf20Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
31988c2ecf20Sopenharmony_ci		map = &vmx->nested.virtual_apic_map;
31998c2ecf20Sopenharmony_ci
32008c2ecf20Sopenharmony_ci		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
32018c2ecf20Sopenharmony_ci			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
32028c2ecf20Sopenharmony_ci		} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
32038c2ecf20Sopenharmony_ci		           nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
32048c2ecf20Sopenharmony_ci			   !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
32058c2ecf20Sopenharmony_ci			/*
32068c2ecf20Sopenharmony_ci			 * The processor will never use the TPR shadow, simply
32078c2ecf20Sopenharmony_ci			 * clear the bit from the execution control.  Such a
32088c2ecf20Sopenharmony_ci			 * configuration is useless, but it happens in tests.
32098c2ecf20Sopenharmony_ci			 * For any other configuration, failing the vm entry is
32108c2ecf20Sopenharmony_ci			 * _not_ what the processor does but it's basically the
32118c2ecf20Sopenharmony_ci			 * only possibility we have.
32128c2ecf20Sopenharmony_ci			 */
32138c2ecf20Sopenharmony_ci			exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
32148c2ecf20Sopenharmony_ci		} else {
32158c2ecf20Sopenharmony_ci			/*
32168c2ecf20Sopenharmony_ci			 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
32178c2ecf20Sopenharmony_ci			 * force VM-Entry to fail.
32188c2ecf20Sopenharmony_ci			 */
32198c2ecf20Sopenharmony_ci			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
32208c2ecf20Sopenharmony_ci		}
32218c2ecf20Sopenharmony_ci	}
32228c2ecf20Sopenharmony_ci
32238c2ecf20Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12)) {
32248c2ecf20Sopenharmony_ci		map = &vmx->nested.pi_desc_map;
32258c2ecf20Sopenharmony_ci
32268c2ecf20Sopenharmony_ci		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
32278c2ecf20Sopenharmony_ci			vmx->nested.pi_desc =
32288c2ecf20Sopenharmony_ci				(struct pi_desc *)(((void *)map->hva) +
32298c2ecf20Sopenharmony_ci				offset_in_page(vmcs12->posted_intr_desc_addr));
32308c2ecf20Sopenharmony_ci			vmcs_write64(POSTED_INTR_DESC_ADDR,
32318c2ecf20Sopenharmony_ci				     pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
32328c2ecf20Sopenharmony_ci		}
32338c2ecf20Sopenharmony_ci	}
32348c2ecf20Sopenharmony_ci	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
32358c2ecf20Sopenharmony_ci		exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
32368c2ecf20Sopenharmony_ci	else
32378c2ecf20Sopenharmony_ci		exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
32388c2ecf20Sopenharmony_ci
32398c2ecf20Sopenharmony_ci	return true;
32408c2ecf20Sopenharmony_ci}
32418c2ecf20Sopenharmony_ci
32428c2ecf20Sopenharmony_cistatic bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
32438c2ecf20Sopenharmony_ci{
32448c2ecf20Sopenharmony_ci	if (!nested_get_evmcs_page(vcpu)) {
32458c2ecf20Sopenharmony_ci		pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
32468c2ecf20Sopenharmony_ci				     __func__);
32478c2ecf20Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
32488c2ecf20Sopenharmony_ci		vcpu->run->internal.suberror =
32498c2ecf20Sopenharmony_ci			KVM_INTERNAL_ERROR_EMULATION;
32508c2ecf20Sopenharmony_ci		vcpu->run->internal.ndata = 0;
32518c2ecf20Sopenharmony_ci
32528c2ecf20Sopenharmony_ci		return false;
32538c2ecf20Sopenharmony_ci	}
32548c2ecf20Sopenharmony_ci
32558c2ecf20Sopenharmony_ci	if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
32568c2ecf20Sopenharmony_ci		return false;
32578c2ecf20Sopenharmony_ci
32588c2ecf20Sopenharmony_ci	return true;
32598c2ecf20Sopenharmony_ci}
32608c2ecf20Sopenharmony_ci
32618c2ecf20Sopenharmony_cistatic int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
32628c2ecf20Sopenharmony_ci{
32638c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12;
32648c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
32658c2ecf20Sopenharmony_ci	gpa_t dst;
32668c2ecf20Sopenharmony_ci
32678c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
32688c2ecf20Sopenharmony_ci		return 0;
32698c2ecf20Sopenharmony_ci
32708c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(vmx->nested.pml_full))
32718c2ecf20Sopenharmony_ci		return 1;
32728c2ecf20Sopenharmony_ci
32738c2ecf20Sopenharmony_ci	/*
32748c2ecf20Sopenharmony_ci	 * Check if PML is enabled for the nested guest. Whether eptp bit 6 is
32758c2ecf20Sopenharmony_ci	 * set is already checked as part of A/D emulation.
32768c2ecf20Sopenharmony_ci	 */
32778c2ecf20Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
32788c2ecf20Sopenharmony_ci	if (!nested_cpu_has_pml(vmcs12))
32798c2ecf20Sopenharmony_ci		return 0;
32808c2ecf20Sopenharmony_ci
32818c2ecf20Sopenharmony_ci	if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
32828c2ecf20Sopenharmony_ci		vmx->nested.pml_full = true;
32838c2ecf20Sopenharmony_ci		return 1;
32848c2ecf20Sopenharmony_ci	}
32858c2ecf20Sopenharmony_ci
32868c2ecf20Sopenharmony_ci	gpa &= ~0xFFFull;
32878c2ecf20Sopenharmony_ci	dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
32888c2ecf20Sopenharmony_ci
32898c2ecf20Sopenharmony_ci	if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
32908c2ecf20Sopenharmony_ci				 offset_in_page(dst), sizeof(gpa)))
32918c2ecf20Sopenharmony_ci		return 0;
32928c2ecf20Sopenharmony_ci
32938c2ecf20Sopenharmony_ci	vmcs12->guest_pml_index--;
32948c2ecf20Sopenharmony_ci
32958c2ecf20Sopenharmony_ci	return 0;
32968c2ecf20Sopenharmony_ci}
32978c2ecf20Sopenharmony_ci
32988c2ecf20Sopenharmony_ci/*
32998c2ecf20Sopenharmony_ci * Intel's VMX Instruction Reference specifies a common set of prerequisites
33008c2ecf20Sopenharmony_ci * for running VMX instructions (except VMXON, whose prerequisites are
33018c2ecf20Sopenharmony_ci * slightly different). It also specifies what exception to inject otherwise.
33028c2ecf20Sopenharmony_ci * Note that many of these exceptions have priority over VM exits, so they
33038c2ecf20Sopenharmony_ci * don't have to be checked again here.
33048c2ecf20Sopenharmony_ci */
33058c2ecf20Sopenharmony_cistatic int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
33068c2ecf20Sopenharmony_ci{
33078c2ecf20Sopenharmony_ci	if (!to_vmx(vcpu)->nested.vmxon) {
33088c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
33098c2ecf20Sopenharmony_ci		return 0;
33108c2ecf20Sopenharmony_ci	}
33118c2ecf20Sopenharmony_ci
33128c2ecf20Sopenharmony_ci	if (vmx_get_cpl(vcpu)) {
33138c2ecf20Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
33148c2ecf20Sopenharmony_ci		return 0;
33158c2ecf20Sopenharmony_ci	}
33168c2ecf20Sopenharmony_ci
33178c2ecf20Sopenharmony_ci	return 1;
33188c2ecf20Sopenharmony_ci}
33198c2ecf20Sopenharmony_ci
33208c2ecf20Sopenharmony_cistatic u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
33218c2ecf20Sopenharmony_ci{
33228c2ecf20Sopenharmony_ci	u8 rvi = vmx_get_rvi();
33238c2ecf20Sopenharmony_ci	u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
33248c2ecf20Sopenharmony_ci
33258c2ecf20Sopenharmony_ci	return ((rvi & 0xf0) > (vppr & 0xf0));
33268c2ecf20Sopenharmony_ci}
33278c2ecf20Sopenharmony_ci
33288c2ecf20Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
33298c2ecf20Sopenharmony_ci				   struct vmcs12 *vmcs12);
33308c2ecf20Sopenharmony_ci
33318c2ecf20Sopenharmony_ci/*
33328c2ecf20Sopenharmony_ci * If from_vmentry is false, this is being called from state restore (either RSM
33338c2ecf20Sopenharmony_ci * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
33348c2ecf20Sopenharmony_ci *
33358c2ecf20Sopenharmony_ci * Returns:
33368c2ecf20Sopenharmony_ci *	NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
33378c2ecf20Sopenharmony_ci *	NVMX_VMENTRY_VMFAIL:  Consistency check VMFail
33388c2ecf20Sopenharmony_ci *	NVMX_VMENTRY_VMEXIT:  Consistency check VMExit
33398c2ecf20Sopenharmony_ci *	NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
33408c2ecf20Sopenharmony_ci */
33418c2ecf20Sopenharmony_cienum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
33428c2ecf20Sopenharmony_ci							bool from_vmentry)
33438c2ecf20Sopenharmony_ci{
33448c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
33458c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
33468c2ecf20Sopenharmony_ci	enum vm_entry_failure_code entry_failure_code;
33478c2ecf20Sopenharmony_ci	bool evaluate_pending_interrupts;
33488c2ecf20Sopenharmony_ci	union vmx_exit_reason exit_reason = {
33498c2ecf20Sopenharmony_ci		.basic = EXIT_REASON_INVALID_STATE,
33508c2ecf20Sopenharmony_ci		.failed_vmentry = 1,
33518c2ecf20Sopenharmony_ci	};
33528c2ecf20Sopenharmony_ci	u32 failed_index;
33538c2ecf20Sopenharmony_ci
33548c2ecf20Sopenharmony_ci	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
33558c2ecf20Sopenharmony_ci		kvm_vcpu_flush_tlb_current(vcpu);
33568c2ecf20Sopenharmony_ci
33578c2ecf20Sopenharmony_ci	evaluate_pending_interrupts = exec_controls_get(vmx) &
33588c2ecf20Sopenharmony_ci		(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
33598c2ecf20Sopenharmony_ci	if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
33608c2ecf20Sopenharmony_ci		evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
33618c2ecf20Sopenharmony_ci
33628c2ecf20Sopenharmony_ci	if (!vmx->nested.nested_run_pending ||
33638c2ecf20Sopenharmony_ci	    !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
33648c2ecf20Sopenharmony_ci		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
33658c2ecf20Sopenharmony_ci	if (kvm_mpx_supported() &&
33668c2ecf20Sopenharmony_ci	    (!vmx->nested.nested_run_pending ||
33678c2ecf20Sopenharmony_ci	     !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
33688c2ecf20Sopenharmony_ci		vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
33698c2ecf20Sopenharmony_ci
33708c2ecf20Sopenharmony_ci	/*
33718c2ecf20Sopenharmony_ci	 * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
33728c2ecf20Sopenharmony_ci	 * nested early checks are disabled.  In the event of a "late" VM-Fail,
33738c2ecf20Sopenharmony_ci	 * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
33748c2ecf20Sopenharmony_ci	 * software model to the pre-VMEntry host state.  When EPT is disabled,
33758c2ecf20Sopenharmony_ci	 * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
33768c2ecf20Sopenharmony_ci	 * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3.  Stuffing
33778c2ecf20Sopenharmony_ci	 * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
33788c2ecf20Sopenharmony_ci	 * the correct value.  Smashing vmcs01.GUEST_CR3 is safe because nested
33798c2ecf20Sopenharmony_ci	 * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
33808c2ecf20Sopenharmony_ci	 * guaranteed to be overwritten with a shadow CR3 prior to re-entering
33818c2ecf20Sopenharmony_ci	 * L1.  Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
33828c2ecf20Sopenharmony_ci	 * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
33838c2ecf20Sopenharmony_ci	 * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
33848c2ecf20Sopenharmony_ci	 * path would need to manually save/restore vmcs01.GUEST_CR3.
33858c2ecf20Sopenharmony_ci	 */
33868c2ecf20Sopenharmony_ci	if (!enable_ept && !nested_early_check)
33878c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
33888c2ecf20Sopenharmony_ci
33898c2ecf20Sopenharmony_ci	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
33908c2ecf20Sopenharmony_ci
33918c2ecf20Sopenharmony_ci	prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
33928c2ecf20Sopenharmony_ci
33938c2ecf20Sopenharmony_ci	if (from_vmentry) {
33948c2ecf20Sopenharmony_ci		if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
33958c2ecf20Sopenharmony_ci			vmx_switch_vmcs(vcpu, &vmx->vmcs01);
33968c2ecf20Sopenharmony_ci			return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
33978c2ecf20Sopenharmony_ci		}
33988c2ecf20Sopenharmony_ci
33998c2ecf20Sopenharmony_ci		if (nested_vmx_check_vmentry_hw(vcpu)) {
34008c2ecf20Sopenharmony_ci			vmx_switch_vmcs(vcpu, &vmx->vmcs01);
34018c2ecf20Sopenharmony_ci			return NVMX_VMENTRY_VMFAIL;
34028c2ecf20Sopenharmony_ci		}
34038c2ecf20Sopenharmony_ci
34048c2ecf20Sopenharmony_ci		if (nested_vmx_check_guest_state(vcpu, vmcs12,
34058c2ecf20Sopenharmony_ci						 &entry_failure_code)) {
34068c2ecf20Sopenharmony_ci			exit_reason.basic = EXIT_REASON_INVALID_STATE;
34078c2ecf20Sopenharmony_ci			vmcs12->exit_qualification = entry_failure_code;
34088c2ecf20Sopenharmony_ci			goto vmentry_fail_vmexit;
34098c2ecf20Sopenharmony_ci		}
34108c2ecf20Sopenharmony_ci	}
34118c2ecf20Sopenharmony_ci
34128c2ecf20Sopenharmony_ci	enter_guest_mode(vcpu);
34138c2ecf20Sopenharmony_ci	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
34148c2ecf20Sopenharmony_ci		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
34158c2ecf20Sopenharmony_ci
34168c2ecf20Sopenharmony_ci	if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
34178c2ecf20Sopenharmony_ci		exit_reason.basic = EXIT_REASON_INVALID_STATE;
34188c2ecf20Sopenharmony_ci		vmcs12->exit_qualification = entry_failure_code;
34198c2ecf20Sopenharmony_ci		goto vmentry_fail_vmexit_guest_mode;
34208c2ecf20Sopenharmony_ci	}
34218c2ecf20Sopenharmony_ci
34228c2ecf20Sopenharmony_ci	if (from_vmentry) {
34238c2ecf20Sopenharmony_ci		failed_index = nested_vmx_load_msr(vcpu,
34248c2ecf20Sopenharmony_ci						   vmcs12->vm_entry_msr_load_addr,
34258c2ecf20Sopenharmony_ci						   vmcs12->vm_entry_msr_load_count);
34268c2ecf20Sopenharmony_ci		if (failed_index) {
34278c2ecf20Sopenharmony_ci			exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
34288c2ecf20Sopenharmony_ci			vmcs12->exit_qualification = failed_index;
34298c2ecf20Sopenharmony_ci			goto vmentry_fail_vmexit_guest_mode;
34308c2ecf20Sopenharmony_ci		}
34318c2ecf20Sopenharmony_ci	} else {
34328c2ecf20Sopenharmony_ci		/*
34338c2ecf20Sopenharmony_ci		 * The MMU is not initialized to point at the right entities yet and
34348c2ecf20Sopenharmony_ci		 * "get pages" would need to read data from the guest (i.e. we will
34358c2ecf20Sopenharmony_ci		 * need to perform gpa to hpa translation). Request a call
34368c2ecf20Sopenharmony_ci		 * to nested_get_vmcs12_pages before the next VM-entry.  The MSRs
34378c2ecf20Sopenharmony_ci		 * have already been set at vmentry time and should not be reset.
34388c2ecf20Sopenharmony_ci		 */
34398c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
34408c2ecf20Sopenharmony_ci	}
34418c2ecf20Sopenharmony_ci
34428c2ecf20Sopenharmony_ci	/*
34438c2ecf20Sopenharmony_ci	 * If L1 had a pending IRQ/NMI until it executed
34448c2ecf20Sopenharmony_ci	 * VMLAUNCH/VMRESUME which wasn't delivered because it was
34458c2ecf20Sopenharmony_ci	 * disallowed (e.g. interrupts disabled), L0 needs to
34468c2ecf20Sopenharmony_ci	 * evaluate if this pending event should cause an exit from L2
34478c2ecf20Sopenharmony_ci	 * to L1 or delivered directly to L2 (e.g. In case L1 don't
34488c2ecf20Sopenharmony_ci	 * intercept EXTERNAL_INTERRUPT).
34498c2ecf20Sopenharmony_ci	 *
34508c2ecf20Sopenharmony_ci	 * Usually this would be handled by the processor noticing an
34518c2ecf20Sopenharmony_ci	 * IRQ/NMI window request, or checking RVI during evaluation of
34528c2ecf20Sopenharmony_ci	 * pending virtual interrupts.  However, this setting was done
34538c2ecf20Sopenharmony_ci	 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
34548c2ecf20Sopenharmony_ci	 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
34558c2ecf20Sopenharmony_ci	 */
34568c2ecf20Sopenharmony_ci	if (unlikely(evaluate_pending_interrupts))
34578c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
34588c2ecf20Sopenharmony_ci
34598c2ecf20Sopenharmony_ci	/*
34608c2ecf20Sopenharmony_ci	 * Do not start the preemption timer hrtimer until after we know
34618c2ecf20Sopenharmony_ci	 * we are successful, so that only nested_vmx_vmexit needs to cancel
34628c2ecf20Sopenharmony_ci	 * the timer.
34638c2ecf20Sopenharmony_ci	 */
34648c2ecf20Sopenharmony_ci	vmx->nested.preemption_timer_expired = false;
34658c2ecf20Sopenharmony_ci	if (nested_cpu_has_preemption_timer(vmcs12)) {
34668c2ecf20Sopenharmony_ci		u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
34678c2ecf20Sopenharmony_ci		vmx_start_preemption_timer(vcpu, timer_value);
34688c2ecf20Sopenharmony_ci	}
34698c2ecf20Sopenharmony_ci
34708c2ecf20Sopenharmony_ci	/*
34718c2ecf20Sopenharmony_ci	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
34728c2ecf20Sopenharmony_ci	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
34738c2ecf20Sopenharmony_ci	 * returned as far as L1 is concerned. It will only return (and set
34748c2ecf20Sopenharmony_ci	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
34758c2ecf20Sopenharmony_ci	 */
34768c2ecf20Sopenharmony_ci	return NVMX_VMENTRY_SUCCESS;
34778c2ecf20Sopenharmony_ci
34788c2ecf20Sopenharmony_ci	/*
34798c2ecf20Sopenharmony_ci	 * A failed consistency check that leads to a VMExit during L1's
34808c2ecf20Sopenharmony_ci	 * VMEnter to L2 is a variation of a normal VMexit, as explained in
34818c2ecf20Sopenharmony_ci	 * 26.7 "VM-entry failures during or after loading guest state".
34828c2ecf20Sopenharmony_ci	 */
34838c2ecf20Sopenharmony_civmentry_fail_vmexit_guest_mode:
34848c2ecf20Sopenharmony_ci	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
34858c2ecf20Sopenharmony_ci		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
34868c2ecf20Sopenharmony_ci	leave_guest_mode(vcpu);
34878c2ecf20Sopenharmony_ci
34888c2ecf20Sopenharmony_civmentry_fail_vmexit:
34898c2ecf20Sopenharmony_ci	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
34908c2ecf20Sopenharmony_ci
34918c2ecf20Sopenharmony_ci	if (!from_vmentry)
34928c2ecf20Sopenharmony_ci		return NVMX_VMENTRY_VMEXIT;
34938c2ecf20Sopenharmony_ci
34948c2ecf20Sopenharmony_ci	load_vmcs12_host_state(vcpu, vmcs12);
34958c2ecf20Sopenharmony_ci	vmcs12->vm_exit_reason = exit_reason.full;
34968c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
34978c2ecf20Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
34988c2ecf20Sopenharmony_ci	return NVMX_VMENTRY_VMEXIT;
34998c2ecf20Sopenharmony_ci}
35008c2ecf20Sopenharmony_ci
35018c2ecf20Sopenharmony_ci/*
35028c2ecf20Sopenharmony_ci * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
35038c2ecf20Sopenharmony_ci * for running an L2 nested guest.
35048c2ecf20Sopenharmony_ci */
35058c2ecf20Sopenharmony_cistatic int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
35068c2ecf20Sopenharmony_ci{
35078c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12;
35088c2ecf20Sopenharmony_ci	enum nvmx_vmentry_status status;
35098c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
35108c2ecf20Sopenharmony_ci	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
35118c2ecf20Sopenharmony_ci	enum nested_evmptrld_status evmptrld_status;
35128c2ecf20Sopenharmony_ci
35138c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
35148c2ecf20Sopenharmony_ci		return 1;
35158c2ecf20Sopenharmony_ci
35168c2ecf20Sopenharmony_ci	evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
35178c2ecf20Sopenharmony_ci	if (evmptrld_status == EVMPTRLD_ERROR) {
35188c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
35198c2ecf20Sopenharmony_ci		return 1;
35208c2ecf20Sopenharmony_ci	} else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
35218c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
35228c2ecf20Sopenharmony_ci	}
35238c2ecf20Sopenharmony_ci
35248c2ecf20Sopenharmony_ci	if (CC(!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull))
35258c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
35268c2ecf20Sopenharmony_ci
35278c2ecf20Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
35288c2ecf20Sopenharmony_ci
35298c2ecf20Sopenharmony_ci	/*
35308c2ecf20Sopenharmony_ci	 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
35318c2ecf20Sopenharmony_ci	 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
35328c2ecf20Sopenharmony_ci	 * rather than RFLAGS.ZF, and no error number is stored to the
35338c2ecf20Sopenharmony_ci	 * VM-instruction error field.
35348c2ecf20Sopenharmony_ci	 */
35358c2ecf20Sopenharmony_ci	if (CC(vmcs12->hdr.shadow_vmcs))
35368c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
35378c2ecf20Sopenharmony_ci
35388c2ecf20Sopenharmony_ci	if (vmx->nested.hv_evmcs) {
35398c2ecf20Sopenharmony_ci		copy_enlightened_to_vmcs12(vmx);
35408c2ecf20Sopenharmony_ci		/* Enlightened VMCS doesn't have launch state */
35418c2ecf20Sopenharmony_ci		vmcs12->launch_state = !launch;
35428c2ecf20Sopenharmony_ci	} else if (enable_shadow_vmcs) {
35438c2ecf20Sopenharmony_ci		copy_shadow_to_vmcs12(vmx);
35448c2ecf20Sopenharmony_ci	}
35458c2ecf20Sopenharmony_ci
35468c2ecf20Sopenharmony_ci	/*
35478c2ecf20Sopenharmony_ci	 * The nested entry process starts with enforcing various prerequisites
35488c2ecf20Sopenharmony_ci	 * on vmcs12 as required by the Intel SDM, and act appropriately when
35498c2ecf20Sopenharmony_ci	 * they fail: As the SDM explains, some conditions should cause the
35508c2ecf20Sopenharmony_ci	 * instruction to fail, while others will cause the instruction to seem
35518c2ecf20Sopenharmony_ci	 * to succeed, but return an EXIT_REASON_INVALID_STATE.
35528c2ecf20Sopenharmony_ci	 * To speed up the normal (success) code path, we should avoid checking
35538c2ecf20Sopenharmony_ci	 * for misconfigurations which will anyway be caught by the processor
35548c2ecf20Sopenharmony_ci	 * when using the merged vmcs02.
35558c2ecf20Sopenharmony_ci	 */
35568c2ecf20Sopenharmony_ci	if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
35578c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
35588c2ecf20Sopenharmony_ci
35598c2ecf20Sopenharmony_ci	if (CC(vmcs12->launch_state == launch))
35608c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu,
35618c2ecf20Sopenharmony_ci			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
35628c2ecf20Sopenharmony_ci			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
35638c2ecf20Sopenharmony_ci
35648c2ecf20Sopenharmony_ci	if (nested_vmx_check_controls(vcpu, vmcs12))
35658c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
35668c2ecf20Sopenharmony_ci
35678c2ecf20Sopenharmony_ci	if (nested_vmx_check_address_space_size(vcpu, vmcs12))
35688c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
35698c2ecf20Sopenharmony_ci
35708c2ecf20Sopenharmony_ci	if (nested_vmx_check_host_state(vcpu, vmcs12))
35718c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
35728c2ecf20Sopenharmony_ci
35738c2ecf20Sopenharmony_ci	/*
35748c2ecf20Sopenharmony_ci	 * We're finally done with prerequisite checking, and can start with
35758c2ecf20Sopenharmony_ci	 * the nested entry.
35768c2ecf20Sopenharmony_ci	 */
35778c2ecf20Sopenharmony_ci	vmx->nested.nested_run_pending = 1;
35788c2ecf20Sopenharmony_ci	vmx->nested.has_preemption_timer_deadline = false;
35798c2ecf20Sopenharmony_ci	status = nested_vmx_enter_non_root_mode(vcpu, true);
35808c2ecf20Sopenharmony_ci	if (unlikely(status != NVMX_VMENTRY_SUCCESS))
35818c2ecf20Sopenharmony_ci		goto vmentry_failed;
35828c2ecf20Sopenharmony_ci
35838c2ecf20Sopenharmony_ci	/* Emulate processing of posted interrupts on VM-Enter. */
35848c2ecf20Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12) &&
35858c2ecf20Sopenharmony_ci	    kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
35868c2ecf20Sopenharmony_ci		vmx->nested.pi_pending = true;
35878c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
35888c2ecf20Sopenharmony_ci		kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
35898c2ecf20Sopenharmony_ci	}
35908c2ecf20Sopenharmony_ci
35918c2ecf20Sopenharmony_ci	/* Hide L1D cache contents from the nested guest.  */
35928c2ecf20Sopenharmony_ci	vmx->vcpu.arch.l1tf_flush_l1d = true;
35938c2ecf20Sopenharmony_ci
35948c2ecf20Sopenharmony_ci	/*
35958c2ecf20Sopenharmony_ci	 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
35968c2ecf20Sopenharmony_ci	 * also be used as part of restoring nVMX state for
35978c2ecf20Sopenharmony_ci	 * snapshot restore (migration).
35988c2ecf20Sopenharmony_ci	 *
35998c2ecf20Sopenharmony_ci	 * In this flow, it is assumed that vmcs12 cache was
36008c2ecf20Sopenharmony_ci	 * trasferred as part of captured nVMX state and should
36018c2ecf20Sopenharmony_ci	 * therefore not be read from guest memory (which may not
36028c2ecf20Sopenharmony_ci	 * exist on destination host yet).
36038c2ecf20Sopenharmony_ci	 */
36048c2ecf20Sopenharmony_ci	nested_cache_shadow_vmcs12(vcpu, vmcs12);
36058c2ecf20Sopenharmony_ci
36068c2ecf20Sopenharmony_ci	/*
36078c2ecf20Sopenharmony_ci	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be
36088c2ecf20Sopenharmony_ci	 * awakened by event injection or by an NMI-window VM-exit or
36098c2ecf20Sopenharmony_ci	 * by an interrupt-window VM-exit, halt the vcpu.
36108c2ecf20Sopenharmony_ci	 */
36118c2ecf20Sopenharmony_ci	if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
36128c2ecf20Sopenharmony_ci	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
36138c2ecf20Sopenharmony_ci	    !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) &&
36148c2ecf20Sopenharmony_ci	    !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
36158c2ecf20Sopenharmony_ci	      (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
36168c2ecf20Sopenharmony_ci		vmx->nested.nested_run_pending = 0;
36178c2ecf20Sopenharmony_ci		return kvm_vcpu_halt(vcpu);
36188c2ecf20Sopenharmony_ci	}
36198c2ecf20Sopenharmony_ci	return 1;
36208c2ecf20Sopenharmony_ci
36218c2ecf20Sopenharmony_civmentry_failed:
36228c2ecf20Sopenharmony_ci	vmx->nested.nested_run_pending = 0;
36238c2ecf20Sopenharmony_ci	if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
36248c2ecf20Sopenharmony_ci		return 0;
36258c2ecf20Sopenharmony_ci	if (status == NVMX_VMENTRY_VMEXIT)
36268c2ecf20Sopenharmony_ci		return 1;
36278c2ecf20Sopenharmony_ci	WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
36288c2ecf20Sopenharmony_ci	return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
36298c2ecf20Sopenharmony_ci}
36308c2ecf20Sopenharmony_ci
36318c2ecf20Sopenharmony_ci/*
36328c2ecf20Sopenharmony_ci * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
36338c2ecf20Sopenharmony_ci * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK).
36348c2ecf20Sopenharmony_ci * This function returns the new value we should put in vmcs12.guest_cr0.
36358c2ecf20Sopenharmony_ci * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
36368c2ecf20Sopenharmony_ci *  1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
36378c2ecf20Sopenharmony_ci *     available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
36388c2ecf20Sopenharmony_ci *     didn't trap the bit, because if L1 did, so would L0).
36398c2ecf20Sopenharmony_ci *  2. Bits that L1 asked to trap (and therefore L0 also did) could not have
36408c2ecf20Sopenharmony_ci *     been modified by L2, and L1 knows it. So just leave the old value of
36418c2ecf20Sopenharmony_ci *     the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
36428c2ecf20Sopenharmony_ci *     isn't relevant, because if L0 traps this bit it can set it to anything.
36438c2ecf20Sopenharmony_ci *  3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
36448c2ecf20Sopenharmony_ci *     changed these bits, and therefore they need to be updated, but L0
36458c2ecf20Sopenharmony_ci *     didn't necessarily allow them to be changed in GUEST_CR0 - and rather
36468c2ecf20Sopenharmony_ci *     put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
36478c2ecf20Sopenharmony_ci */
36488c2ecf20Sopenharmony_cistatic inline unsigned long
36498c2ecf20Sopenharmony_civmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
36508c2ecf20Sopenharmony_ci{
36518c2ecf20Sopenharmony_ci	return
36528c2ecf20Sopenharmony_ci	/*1*/	(vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
36538c2ecf20Sopenharmony_ci	/*2*/	(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
36548c2ecf20Sopenharmony_ci	/*3*/	(vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
36558c2ecf20Sopenharmony_ci			vcpu->arch.cr0_guest_owned_bits));
36568c2ecf20Sopenharmony_ci}
36578c2ecf20Sopenharmony_ci
36588c2ecf20Sopenharmony_cistatic inline unsigned long
36598c2ecf20Sopenharmony_civmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
36608c2ecf20Sopenharmony_ci{
36618c2ecf20Sopenharmony_ci	return
36628c2ecf20Sopenharmony_ci	/*1*/	(vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
36638c2ecf20Sopenharmony_ci	/*2*/	(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
36648c2ecf20Sopenharmony_ci	/*3*/	(vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
36658c2ecf20Sopenharmony_ci			vcpu->arch.cr4_guest_owned_bits));
36668c2ecf20Sopenharmony_ci}
36678c2ecf20Sopenharmony_ci
36688c2ecf20Sopenharmony_cistatic void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
36698c2ecf20Sopenharmony_ci				      struct vmcs12 *vmcs12,
36708c2ecf20Sopenharmony_ci				      u32 vm_exit_reason, u32 exit_intr_info)
36718c2ecf20Sopenharmony_ci{
36728c2ecf20Sopenharmony_ci	u32 idt_vectoring;
36738c2ecf20Sopenharmony_ci	unsigned int nr;
36748c2ecf20Sopenharmony_ci
36758c2ecf20Sopenharmony_ci	/*
36768c2ecf20Sopenharmony_ci	 * Per the SDM, VM-Exits due to double and triple faults are never
36778c2ecf20Sopenharmony_ci	 * considered to occur during event delivery, even if the double/triple
36788c2ecf20Sopenharmony_ci	 * fault is the result of an escalating vectoring issue.
36798c2ecf20Sopenharmony_ci	 *
36808c2ecf20Sopenharmony_ci	 * Note, the SDM qualifies the double fault behavior with "The original
36818c2ecf20Sopenharmony_ci	 * event results in a double-fault exception".  It's unclear why the
36828c2ecf20Sopenharmony_ci	 * qualification exists since exits due to double fault can occur only
36838c2ecf20Sopenharmony_ci	 * while vectoring a different exception (injected events are never
36848c2ecf20Sopenharmony_ci	 * subject to interception), i.e. there's _always_ an original event.
36858c2ecf20Sopenharmony_ci	 *
36868c2ecf20Sopenharmony_ci	 * The SDM also uses NMI as a confusing example for the "original event
36878c2ecf20Sopenharmony_ci	 * causes the VM exit directly" clause.  NMI isn't special in any way,
36888c2ecf20Sopenharmony_ci	 * the same rule applies to all events that cause an exit directly.
36898c2ecf20Sopenharmony_ci	 * NMI is an odd choice for the example because NMIs can only occur on
36908c2ecf20Sopenharmony_ci	 * instruction boundaries, i.e. they _can't_ occur during vectoring.
36918c2ecf20Sopenharmony_ci	 */
36928c2ecf20Sopenharmony_ci	if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT ||
36938c2ecf20Sopenharmony_ci	    ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI &&
36948c2ecf20Sopenharmony_ci	     is_double_fault(exit_intr_info))) {
36958c2ecf20Sopenharmony_ci		vmcs12->idt_vectoring_info_field = 0;
36968c2ecf20Sopenharmony_ci	} else if (vcpu->arch.exception.injected) {
36978c2ecf20Sopenharmony_ci		nr = vcpu->arch.exception.nr;
36988c2ecf20Sopenharmony_ci		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
36998c2ecf20Sopenharmony_ci
37008c2ecf20Sopenharmony_ci		if (kvm_exception_is_soft(nr)) {
37018c2ecf20Sopenharmony_ci			vmcs12->vm_exit_instruction_len =
37028c2ecf20Sopenharmony_ci				vcpu->arch.event_exit_inst_len;
37038c2ecf20Sopenharmony_ci			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
37048c2ecf20Sopenharmony_ci		} else
37058c2ecf20Sopenharmony_ci			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
37068c2ecf20Sopenharmony_ci
37078c2ecf20Sopenharmony_ci		if (vcpu->arch.exception.has_error_code) {
37088c2ecf20Sopenharmony_ci			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
37098c2ecf20Sopenharmony_ci			vmcs12->idt_vectoring_error_code =
37108c2ecf20Sopenharmony_ci				vcpu->arch.exception.error_code;
37118c2ecf20Sopenharmony_ci		}
37128c2ecf20Sopenharmony_ci
37138c2ecf20Sopenharmony_ci		vmcs12->idt_vectoring_info_field = idt_vectoring;
37148c2ecf20Sopenharmony_ci	} else if (vcpu->arch.nmi_injected) {
37158c2ecf20Sopenharmony_ci		vmcs12->idt_vectoring_info_field =
37168c2ecf20Sopenharmony_ci			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
37178c2ecf20Sopenharmony_ci	} else if (vcpu->arch.interrupt.injected) {
37188c2ecf20Sopenharmony_ci		nr = vcpu->arch.interrupt.nr;
37198c2ecf20Sopenharmony_ci		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
37208c2ecf20Sopenharmony_ci
37218c2ecf20Sopenharmony_ci		if (vcpu->arch.interrupt.soft) {
37228c2ecf20Sopenharmony_ci			idt_vectoring |= INTR_TYPE_SOFT_INTR;
37238c2ecf20Sopenharmony_ci			vmcs12->vm_entry_instruction_len =
37248c2ecf20Sopenharmony_ci				vcpu->arch.event_exit_inst_len;
37258c2ecf20Sopenharmony_ci		} else
37268c2ecf20Sopenharmony_ci			idt_vectoring |= INTR_TYPE_EXT_INTR;
37278c2ecf20Sopenharmony_ci
37288c2ecf20Sopenharmony_ci		vmcs12->idt_vectoring_info_field = idt_vectoring;
37298c2ecf20Sopenharmony_ci	} else {
37308c2ecf20Sopenharmony_ci		vmcs12->idt_vectoring_info_field = 0;
37318c2ecf20Sopenharmony_ci	}
37328c2ecf20Sopenharmony_ci}
37338c2ecf20Sopenharmony_ci
37348c2ecf20Sopenharmony_ci
37358c2ecf20Sopenharmony_civoid nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
37368c2ecf20Sopenharmony_ci{
37378c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
37388c2ecf20Sopenharmony_ci	gfn_t gfn;
37398c2ecf20Sopenharmony_ci
37408c2ecf20Sopenharmony_ci	/*
37418c2ecf20Sopenharmony_ci	 * Don't need to mark the APIC access page dirty; it is never
37428c2ecf20Sopenharmony_ci	 * written to by the CPU during APIC virtualization.
37438c2ecf20Sopenharmony_ci	 */
37448c2ecf20Sopenharmony_ci
37458c2ecf20Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
37468c2ecf20Sopenharmony_ci		gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
37478c2ecf20Sopenharmony_ci		kvm_vcpu_mark_page_dirty(vcpu, gfn);
37488c2ecf20Sopenharmony_ci	}
37498c2ecf20Sopenharmony_ci
37508c2ecf20Sopenharmony_ci	if (nested_cpu_has_posted_intr(vmcs12)) {
37518c2ecf20Sopenharmony_ci		gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
37528c2ecf20Sopenharmony_ci		kvm_vcpu_mark_page_dirty(vcpu, gfn);
37538c2ecf20Sopenharmony_ci	}
37548c2ecf20Sopenharmony_ci}
37558c2ecf20Sopenharmony_ci
37568c2ecf20Sopenharmony_cistatic void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
37578c2ecf20Sopenharmony_ci{
37588c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
37598c2ecf20Sopenharmony_ci	int max_irr;
37608c2ecf20Sopenharmony_ci	void *vapic_page;
37618c2ecf20Sopenharmony_ci	u16 status;
37628c2ecf20Sopenharmony_ci
37638c2ecf20Sopenharmony_ci	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
37648c2ecf20Sopenharmony_ci		return;
37658c2ecf20Sopenharmony_ci
37668c2ecf20Sopenharmony_ci	vmx->nested.pi_pending = false;
37678c2ecf20Sopenharmony_ci	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
37688c2ecf20Sopenharmony_ci		return;
37698c2ecf20Sopenharmony_ci
37708c2ecf20Sopenharmony_ci	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
37718c2ecf20Sopenharmony_ci	if (max_irr != 256) {
37728c2ecf20Sopenharmony_ci		vapic_page = vmx->nested.virtual_apic_map.hva;
37738c2ecf20Sopenharmony_ci		if (!vapic_page)
37748c2ecf20Sopenharmony_ci			return;
37758c2ecf20Sopenharmony_ci
37768c2ecf20Sopenharmony_ci		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
37778c2ecf20Sopenharmony_ci			vapic_page, &max_irr);
37788c2ecf20Sopenharmony_ci		status = vmcs_read16(GUEST_INTR_STATUS);
37798c2ecf20Sopenharmony_ci		if ((u8)max_irr > ((u8)status & 0xff)) {
37808c2ecf20Sopenharmony_ci			status &= ~0xff;
37818c2ecf20Sopenharmony_ci			status |= (u8)max_irr;
37828c2ecf20Sopenharmony_ci			vmcs_write16(GUEST_INTR_STATUS, status);
37838c2ecf20Sopenharmony_ci		}
37848c2ecf20Sopenharmony_ci	}
37858c2ecf20Sopenharmony_ci
37868c2ecf20Sopenharmony_ci	nested_mark_vmcs12_pages_dirty(vcpu);
37878c2ecf20Sopenharmony_ci}
37888c2ecf20Sopenharmony_ci
37898c2ecf20Sopenharmony_cistatic void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
37908c2ecf20Sopenharmony_ci					       unsigned long exit_qual)
37918c2ecf20Sopenharmony_ci{
37928c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
37938c2ecf20Sopenharmony_ci	unsigned int nr = vcpu->arch.exception.nr;
37948c2ecf20Sopenharmony_ci	u32 intr_info = nr | INTR_INFO_VALID_MASK;
37958c2ecf20Sopenharmony_ci
37968c2ecf20Sopenharmony_ci	if (vcpu->arch.exception.has_error_code) {
37978c2ecf20Sopenharmony_ci		/*
37988c2ecf20Sopenharmony_ci		 * Intel CPUs do not generate error codes with bits 31:16 set,
37998c2ecf20Sopenharmony_ci		 * and more importantly VMX disallows setting bits 31:16 in the
38008c2ecf20Sopenharmony_ci		 * injected error code for VM-Entry.  Drop the bits to mimic
38018c2ecf20Sopenharmony_ci		 * hardware and avoid inducing failure on nested VM-Entry if L1
38028c2ecf20Sopenharmony_ci		 * chooses to inject the exception back to L2.  AMD CPUs _do_
38038c2ecf20Sopenharmony_ci		 * generate "full" 32-bit error codes, so KVM allows userspace
38048c2ecf20Sopenharmony_ci		 * to inject exception error codes with bits 31:16 set.
38058c2ecf20Sopenharmony_ci		 */
38068c2ecf20Sopenharmony_ci		vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code;
38078c2ecf20Sopenharmony_ci		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
38088c2ecf20Sopenharmony_ci	}
38098c2ecf20Sopenharmony_ci
38108c2ecf20Sopenharmony_ci	if (kvm_exception_is_soft(nr))
38118c2ecf20Sopenharmony_ci		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
38128c2ecf20Sopenharmony_ci	else
38138c2ecf20Sopenharmony_ci		intr_info |= INTR_TYPE_HARD_EXCEPTION;
38148c2ecf20Sopenharmony_ci
38158c2ecf20Sopenharmony_ci	if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
38168c2ecf20Sopenharmony_ci	    vmx_get_nmi_mask(vcpu))
38178c2ecf20Sopenharmony_ci		intr_info |= INTR_INFO_UNBLOCK_NMI;
38188c2ecf20Sopenharmony_ci
38198c2ecf20Sopenharmony_ci	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
38208c2ecf20Sopenharmony_ci}
38218c2ecf20Sopenharmony_ci
38228c2ecf20Sopenharmony_ci/*
38238c2ecf20Sopenharmony_ci * Returns true if a debug trap is pending delivery.
38248c2ecf20Sopenharmony_ci *
38258c2ecf20Sopenharmony_ci * In KVM, debug traps bear an exception payload. As such, the class of a #DB
38268c2ecf20Sopenharmony_ci * exception may be inferred from the presence of an exception payload.
38278c2ecf20Sopenharmony_ci */
38288c2ecf20Sopenharmony_cistatic inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
38298c2ecf20Sopenharmony_ci{
38308c2ecf20Sopenharmony_ci	return vcpu->arch.exception.pending &&
38318c2ecf20Sopenharmony_ci			vcpu->arch.exception.nr == DB_VECTOR &&
38328c2ecf20Sopenharmony_ci			vcpu->arch.exception.payload;
38338c2ecf20Sopenharmony_ci}
38348c2ecf20Sopenharmony_ci
38358c2ecf20Sopenharmony_ci/*
38368c2ecf20Sopenharmony_ci * Certain VM-exits set the 'pending debug exceptions' field to indicate a
38378c2ecf20Sopenharmony_ci * recognized #DB (data or single-step) that has yet to be delivered. Since KVM
38388c2ecf20Sopenharmony_ci * represents these debug traps with a payload that is said to be compatible
38398c2ecf20Sopenharmony_ci * with the 'pending debug exceptions' field, write the payload to the VMCS
38408c2ecf20Sopenharmony_ci * field if a VM-exit is delivered before the debug trap.
38418c2ecf20Sopenharmony_ci */
38428c2ecf20Sopenharmony_cistatic void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
38438c2ecf20Sopenharmony_ci{
38448c2ecf20Sopenharmony_ci	if (vmx_pending_dbg_trap(vcpu))
38458c2ecf20Sopenharmony_ci		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
38468c2ecf20Sopenharmony_ci			    vcpu->arch.exception.payload);
38478c2ecf20Sopenharmony_ci}
38488c2ecf20Sopenharmony_ci
38498c2ecf20Sopenharmony_cistatic bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
38508c2ecf20Sopenharmony_ci{
38518c2ecf20Sopenharmony_ci	return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
38528c2ecf20Sopenharmony_ci	       to_vmx(vcpu)->nested.preemption_timer_expired;
38538c2ecf20Sopenharmony_ci}
38548c2ecf20Sopenharmony_ci
38558c2ecf20Sopenharmony_cistatic int vmx_check_nested_events(struct kvm_vcpu *vcpu)
38568c2ecf20Sopenharmony_ci{
38578c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
38588c2ecf20Sopenharmony_ci	unsigned long exit_qual;
38598c2ecf20Sopenharmony_ci	bool block_nested_events =
38608c2ecf20Sopenharmony_ci	    vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
38618c2ecf20Sopenharmony_ci	bool mtf_pending = vmx->nested.mtf_pending;
38628c2ecf20Sopenharmony_ci	struct kvm_lapic *apic = vcpu->arch.apic;
38638c2ecf20Sopenharmony_ci
38648c2ecf20Sopenharmony_ci	/*
38658c2ecf20Sopenharmony_ci	 * Clear the MTF state. If a higher priority VM-exit is delivered first,
38668c2ecf20Sopenharmony_ci	 * this state is discarded.
38678c2ecf20Sopenharmony_ci	 */
38688c2ecf20Sopenharmony_ci	if (!block_nested_events)
38698c2ecf20Sopenharmony_ci		vmx->nested.mtf_pending = false;
38708c2ecf20Sopenharmony_ci
38718c2ecf20Sopenharmony_ci	if (lapic_in_kernel(vcpu) &&
38728c2ecf20Sopenharmony_ci		test_bit(KVM_APIC_INIT, &apic->pending_events)) {
38738c2ecf20Sopenharmony_ci		if (block_nested_events)
38748c2ecf20Sopenharmony_ci			return -EBUSY;
38758c2ecf20Sopenharmony_ci		nested_vmx_update_pending_dbg(vcpu);
38768c2ecf20Sopenharmony_ci		clear_bit(KVM_APIC_INIT, &apic->pending_events);
38778c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
38788c2ecf20Sopenharmony_ci		return 0;
38798c2ecf20Sopenharmony_ci	}
38808c2ecf20Sopenharmony_ci
38818c2ecf20Sopenharmony_ci	/*
38828c2ecf20Sopenharmony_ci	 * Process any exceptions that are not debug traps before MTF.
38838c2ecf20Sopenharmony_ci	 */
38848c2ecf20Sopenharmony_ci	if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
38858c2ecf20Sopenharmony_ci		if (block_nested_events)
38868c2ecf20Sopenharmony_ci			return -EBUSY;
38878c2ecf20Sopenharmony_ci		if (!nested_vmx_check_exception(vcpu, &exit_qual))
38888c2ecf20Sopenharmony_ci			goto no_vmexit;
38898c2ecf20Sopenharmony_ci		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
38908c2ecf20Sopenharmony_ci		return 0;
38918c2ecf20Sopenharmony_ci	}
38928c2ecf20Sopenharmony_ci
38938c2ecf20Sopenharmony_ci	if (mtf_pending) {
38948c2ecf20Sopenharmony_ci		if (block_nested_events)
38958c2ecf20Sopenharmony_ci			return -EBUSY;
38968c2ecf20Sopenharmony_ci		nested_vmx_update_pending_dbg(vcpu);
38978c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
38988c2ecf20Sopenharmony_ci		return 0;
38998c2ecf20Sopenharmony_ci	}
39008c2ecf20Sopenharmony_ci
39018c2ecf20Sopenharmony_ci	if (vcpu->arch.exception.pending) {
39028c2ecf20Sopenharmony_ci		if (block_nested_events)
39038c2ecf20Sopenharmony_ci			return -EBUSY;
39048c2ecf20Sopenharmony_ci		if (!nested_vmx_check_exception(vcpu, &exit_qual))
39058c2ecf20Sopenharmony_ci			goto no_vmexit;
39068c2ecf20Sopenharmony_ci		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
39078c2ecf20Sopenharmony_ci		return 0;
39088c2ecf20Sopenharmony_ci	}
39098c2ecf20Sopenharmony_ci
39108c2ecf20Sopenharmony_ci	if (nested_vmx_preemption_timer_pending(vcpu)) {
39118c2ecf20Sopenharmony_ci		if (block_nested_events)
39128c2ecf20Sopenharmony_ci			return -EBUSY;
39138c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
39148c2ecf20Sopenharmony_ci		return 0;
39158c2ecf20Sopenharmony_ci	}
39168c2ecf20Sopenharmony_ci
39178c2ecf20Sopenharmony_ci	if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
39188c2ecf20Sopenharmony_ci		if (block_nested_events)
39198c2ecf20Sopenharmony_ci			return -EBUSY;
39208c2ecf20Sopenharmony_ci		goto no_vmexit;
39218c2ecf20Sopenharmony_ci	}
39228c2ecf20Sopenharmony_ci
39238c2ecf20Sopenharmony_ci	if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
39248c2ecf20Sopenharmony_ci		if (block_nested_events)
39258c2ecf20Sopenharmony_ci			return -EBUSY;
39268c2ecf20Sopenharmony_ci		if (!nested_exit_on_nmi(vcpu))
39278c2ecf20Sopenharmony_ci			goto no_vmexit;
39288c2ecf20Sopenharmony_ci
39298c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
39308c2ecf20Sopenharmony_ci				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
39318c2ecf20Sopenharmony_ci				  INTR_INFO_VALID_MASK, 0);
39328c2ecf20Sopenharmony_ci		/*
39338c2ecf20Sopenharmony_ci		 * The NMI-triggered VM exit counts as injection:
39348c2ecf20Sopenharmony_ci		 * clear this one and block further NMIs.
39358c2ecf20Sopenharmony_ci		 */
39368c2ecf20Sopenharmony_ci		vcpu->arch.nmi_pending = 0;
39378c2ecf20Sopenharmony_ci		vmx_set_nmi_mask(vcpu, true);
39388c2ecf20Sopenharmony_ci		return 0;
39398c2ecf20Sopenharmony_ci	}
39408c2ecf20Sopenharmony_ci
39418c2ecf20Sopenharmony_ci	if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
39428c2ecf20Sopenharmony_ci		if (block_nested_events)
39438c2ecf20Sopenharmony_ci			return -EBUSY;
39448c2ecf20Sopenharmony_ci		if (!nested_exit_on_intr(vcpu))
39458c2ecf20Sopenharmony_ci			goto no_vmexit;
39468c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
39478c2ecf20Sopenharmony_ci		return 0;
39488c2ecf20Sopenharmony_ci	}
39498c2ecf20Sopenharmony_ci
39508c2ecf20Sopenharmony_cino_vmexit:
39518c2ecf20Sopenharmony_ci	vmx_complete_nested_posted_interrupt(vcpu);
39528c2ecf20Sopenharmony_ci	return 0;
39538c2ecf20Sopenharmony_ci}
39548c2ecf20Sopenharmony_ci
39558c2ecf20Sopenharmony_cistatic u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
39568c2ecf20Sopenharmony_ci{
39578c2ecf20Sopenharmony_ci	ktime_t remaining =
39588c2ecf20Sopenharmony_ci		hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
39598c2ecf20Sopenharmony_ci	u64 value;
39608c2ecf20Sopenharmony_ci
39618c2ecf20Sopenharmony_ci	if (ktime_to_ns(remaining) <= 0)
39628c2ecf20Sopenharmony_ci		return 0;
39638c2ecf20Sopenharmony_ci
39648c2ecf20Sopenharmony_ci	value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
39658c2ecf20Sopenharmony_ci	do_div(value, 1000000);
39668c2ecf20Sopenharmony_ci	return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
39678c2ecf20Sopenharmony_ci}
39688c2ecf20Sopenharmony_ci
39698c2ecf20Sopenharmony_cistatic bool is_vmcs12_ext_field(unsigned long field)
39708c2ecf20Sopenharmony_ci{
39718c2ecf20Sopenharmony_ci	switch (field) {
39728c2ecf20Sopenharmony_ci	case GUEST_ES_SELECTOR:
39738c2ecf20Sopenharmony_ci	case GUEST_CS_SELECTOR:
39748c2ecf20Sopenharmony_ci	case GUEST_SS_SELECTOR:
39758c2ecf20Sopenharmony_ci	case GUEST_DS_SELECTOR:
39768c2ecf20Sopenharmony_ci	case GUEST_FS_SELECTOR:
39778c2ecf20Sopenharmony_ci	case GUEST_GS_SELECTOR:
39788c2ecf20Sopenharmony_ci	case GUEST_LDTR_SELECTOR:
39798c2ecf20Sopenharmony_ci	case GUEST_TR_SELECTOR:
39808c2ecf20Sopenharmony_ci	case GUEST_ES_LIMIT:
39818c2ecf20Sopenharmony_ci	case GUEST_CS_LIMIT:
39828c2ecf20Sopenharmony_ci	case GUEST_SS_LIMIT:
39838c2ecf20Sopenharmony_ci	case GUEST_DS_LIMIT:
39848c2ecf20Sopenharmony_ci	case GUEST_FS_LIMIT:
39858c2ecf20Sopenharmony_ci	case GUEST_GS_LIMIT:
39868c2ecf20Sopenharmony_ci	case GUEST_LDTR_LIMIT:
39878c2ecf20Sopenharmony_ci	case GUEST_TR_LIMIT:
39888c2ecf20Sopenharmony_ci	case GUEST_GDTR_LIMIT:
39898c2ecf20Sopenharmony_ci	case GUEST_IDTR_LIMIT:
39908c2ecf20Sopenharmony_ci	case GUEST_ES_AR_BYTES:
39918c2ecf20Sopenharmony_ci	case GUEST_DS_AR_BYTES:
39928c2ecf20Sopenharmony_ci	case GUEST_FS_AR_BYTES:
39938c2ecf20Sopenharmony_ci	case GUEST_GS_AR_BYTES:
39948c2ecf20Sopenharmony_ci	case GUEST_LDTR_AR_BYTES:
39958c2ecf20Sopenharmony_ci	case GUEST_TR_AR_BYTES:
39968c2ecf20Sopenharmony_ci	case GUEST_ES_BASE:
39978c2ecf20Sopenharmony_ci	case GUEST_CS_BASE:
39988c2ecf20Sopenharmony_ci	case GUEST_SS_BASE:
39998c2ecf20Sopenharmony_ci	case GUEST_DS_BASE:
40008c2ecf20Sopenharmony_ci	case GUEST_FS_BASE:
40018c2ecf20Sopenharmony_ci	case GUEST_GS_BASE:
40028c2ecf20Sopenharmony_ci	case GUEST_LDTR_BASE:
40038c2ecf20Sopenharmony_ci	case GUEST_TR_BASE:
40048c2ecf20Sopenharmony_ci	case GUEST_GDTR_BASE:
40058c2ecf20Sopenharmony_ci	case GUEST_IDTR_BASE:
40068c2ecf20Sopenharmony_ci	case GUEST_PENDING_DBG_EXCEPTIONS:
40078c2ecf20Sopenharmony_ci	case GUEST_BNDCFGS:
40088c2ecf20Sopenharmony_ci		return true;
40098c2ecf20Sopenharmony_ci	default:
40108c2ecf20Sopenharmony_ci		break;
40118c2ecf20Sopenharmony_ci	}
40128c2ecf20Sopenharmony_ci
40138c2ecf20Sopenharmony_ci	return false;
40148c2ecf20Sopenharmony_ci}
40158c2ecf20Sopenharmony_ci
40168c2ecf20Sopenharmony_cistatic void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
40178c2ecf20Sopenharmony_ci				       struct vmcs12 *vmcs12)
40188c2ecf20Sopenharmony_ci{
40198c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
40208c2ecf20Sopenharmony_ci
40218c2ecf20Sopenharmony_ci	vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
40228c2ecf20Sopenharmony_ci	vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
40238c2ecf20Sopenharmony_ci	vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
40248c2ecf20Sopenharmony_ci	vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
40258c2ecf20Sopenharmony_ci	vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
40268c2ecf20Sopenharmony_ci	vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
40278c2ecf20Sopenharmony_ci	vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
40288c2ecf20Sopenharmony_ci	vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
40298c2ecf20Sopenharmony_ci	vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
40308c2ecf20Sopenharmony_ci	vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
40318c2ecf20Sopenharmony_ci	vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
40328c2ecf20Sopenharmony_ci	vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
40338c2ecf20Sopenharmony_ci	vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
40348c2ecf20Sopenharmony_ci	vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
40358c2ecf20Sopenharmony_ci	vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
40368c2ecf20Sopenharmony_ci	vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
40378c2ecf20Sopenharmony_ci	vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
40388c2ecf20Sopenharmony_ci	vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
40398c2ecf20Sopenharmony_ci	vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
40408c2ecf20Sopenharmony_ci	vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
40418c2ecf20Sopenharmony_ci	vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
40428c2ecf20Sopenharmony_ci	vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
40438c2ecf20Sopenharmony_ci	vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
40448c2ecf20Sopenharmony_ci	vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
40458c2ecf20Sopenharmony_ci	vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
40468c2ecf20Sopenharmony_ci	vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
40478c2ecf20Sopenharmony_ci	vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
40488c2ecf20Sopenharmony_ci	vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
40498c2ecf20Sopenharmony_ci	vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
40508c2ecf20Sopenharmony_ci	vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
40518c2ecf20Sopenharmony_ci	vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
40528c2ecf20Sopenharmony_ci	vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
40538c2ecf20Sopenharmony_ci	vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
40548c2ecf20Sopenharmony_ci	vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
40558c2ecf20Sopenharmony_ci	vmcs12->guest_pending_dbg_exceptions =
40568c2ecf20Sopenharmony_ci		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
40578c2ecf20Sopenharmony_ci	if (kvm_mpx_supported())
40588c2ecf20Sopenharmony_ci		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
40598c2ecf20Sopenharmony_ci
40608c2ecf20Sopenharmony_ci	vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
40618c2ecf20Sopenharmony_ci}
40628c2ecf20Sopenharmony_ci
40638c2ecf20Sopenharmony_cistatic void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
40648c2ecf20Sopenharmony_ci				       struct vmcs12 *vmcs12)
40658c2ecf20Sopenharmony_ci{
40668c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
40678c2ecf20Sopenharmony_ci	int cpu;
40688c2ecf20Sopenharmony_ci
40698c2ecf20Sopenharmony_ci	if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
40708c2ecf20Sopenharmony_ci		return;
40718c2ecf20Sopenharmony_ci
40728c2ecf20Sopenharmony_ci
40738c2ecf20Sopenharmony_ci	WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
40748c2ecf20Sopenharmony_ci
40758c2ecf20Sopenharmony_ci	cpu = get_cpu();
40768c2ecf20Sopenharmony_ci	vmx->loaded_vmcs = &vmx->nested.vmcs02;
40778c2ecf20Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
40788c2ecf20Sopenharmony_ci
40798c2ecf20Sopenharmony_ci	sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
40808c2ecf20Sopenharmony_ci
40818c2ecf20Sopenharmony_ci	vmx->loaded_vmcs = &vmx->vmcs01;
40828c2ecf20Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
40838c2ecf20Sopenharmony_ci	put_cpu();
40848c2ecf20Sopenharmony_ci}
40858c2ecf20Sopenharmony_ci
40868c2ecf20Sopenharmony_ci/*
40878c2ecf20Sopenharmony_ci * Update the guest state fields of vmcs12 to reflect changes that
40888c2ecf20Sopenharmony_ci * occurred while L2 was running. (The "IA-32e mode guest" bit of the
40898c2ecf20Sopenharmony_ci * VM-entry controls is also updated, since this is really a guest
40908c2ecf20Sopenharmony_ci * state bit.)
40918c2ecf20Sopenharmony_ci */
40928c2ecf20Sopenharmony_cistatic void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
40938c2ecf20Sopenharmony_ci{
40948c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
40958c2ecf20Sopenharmony_ci
40968c2ecf20Sopenharmony_ci	if (vmx->nested.hv_evmcs)
40978c2ecf20Sopenharmony_ci		sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
40988c2ecf20Sopenharmony_ci
40998c2ecf20Sopenharmony_ci	vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
41008c2ecf20Sopenharmony_ci
41018c2ecf20Sopenharmony_ci	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
41028c2ecf20Sopenharmony_ci	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
41038c2ecf20Sopenharmony_ci
41048c2ecf20Sopenharmony_ci	vmcs12->guest_rsp = kvm_rsp_read(vcpu);
41058c2ecf20Sopenharmony_ci	vmcs12->guest_rip = kvm_rip_read(vcpu);
41068c2ecf20Sopenharmony_ci	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
41078c2ecf20Sopenharmony_ci
41088c2ecf20Sopenharmony_ci	vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
41098c2ecf20Sopenharmony_ci	vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
41108c2ecf20Sopenharmony_ci
41118c2ecf20Sopenharmony_ci	vmcs12->guest_interruptibility_info =
41128c2ecf20Sopenharmony_ci		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
41138c2ecf20Sopenharmony_ci
41148c2ecf20Sopenharmony_ci	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
41158c2ecf20Sopenharmony_ci		vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
41168c2ecf20Sopenharmony_ci	else
41178c2ecf20Sopenharmony_ci		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
41188c2ecf20Sopenharmony_ci
41198c2ecf20Sopenharmony_ci	if (nested_cpu_has_preemption_timer(vmcs12) &&
41208c2ecf20Sopenharmony_ci	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
41218c2ecf20Sopenharmony_ci	    !vmx->nested.nested_run_pending)
41228c2ecf20Sopenharmony_ci		vmcs12->vmx_preemption_timer_value =
41238c2ecf20Sopenharmony_ci			vmx_get_preemption_timer_value(vcpu);
41248c2ecf20Sopenharmony_ci
41258c2ecf20Sopenharmony_ci	/*
41268c2ecf20Sopenharmony_ci	 * In some cases (usually, nested EPT), L2 is allowed to change its
41278c2ecf20Sopenharmony_ci	 * own CR3 without exiting. If it has changed it, we must keep it.
41288c2ecf20Sopenharmony_ci	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
41298c2ecf20Sopenharmony_ci	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
41308c2ecf20Sopenharmony_ci	 *
41318c2ecf20Sopenharmony_ci	 * Additionally, restore L2's PDPTR to vmcs12.
41328c2ecf20Sopenharmony_ci	 */
41338c2ecf20Sopenharmony_ci	if (enable_ept) {
41348c2ecf20Sopenharmony_ci		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
41358c2ecf20Sopenharmony_ci		if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
41368c2ecf20Sopenharmony_ci			vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
41378c2ecf20Sopenharmony_ci			vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
41388c2ecf20Sopenharmony_ci			vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
41398c2ecf20Sopenharmony_ci			vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
41408c2ecf20Sopenharmony_ci		}
41418c2ecf20Sopenharmony_ci	}
41428c2ecf20Sopenharmony_ci
41438c2ecf20Sopenharmony_ci	vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
41448c2ecf20Sopenharmony_ci
41458c2ecf20Sopenharmony_ci	if (nested_cpu_has_vid(vmcs12))
41468c2ecf20Sopenharmony_ci		vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
41478c2ecf20Sopenharmony_ci
41488c2ecf20Sopenharmony_ci	vmcs12->vm_entry_controls =
41498c2ecf20Sopenharmony_ci		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
41508c2ecf20Sopenharmony_ci		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
41518c2ecf20Sopenharmony_ci
41528c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
41538c2ecf20Sopenharmony_ci		kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
41548c2ecf20Sopenharmony_ci
41558c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
41568c2ecf20Sopenharmony_ci		vmcs12->guest_ia32_efer = vcpu->arch.efer;
41578c2ecf20Sopenharmony_ci}
41588c2ecf20Sopenharmony_ci
41598c2ecf20Sopenharmony_ci/*
41608c2ecf20Sopenharmony_ci * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
41618c2ecf20Sopenharmony_ci * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
41628c2ecf20Sopenharmony_ci * and this function updates it to reflect the changes to the guest state while
41638c2ecf20Sopenharmony_ci * L2 was running (and perhaps made some exits which were handled directly by L0
41648c2ecf20Sopenharmony_ci * without going back to L1), and to reflect the exit reason.
41658c2ecf20Sopenharmony_ci * Note that we do not have to copy here all VMCS fields, just those that
41668c2ecf20Sopenharmony_ci * could have changed by the L2 guest or the exit - i.e., the guest-state and
41678c2ecf20Sopenharmony_ci * exit-information fields only. Other fields are modified by L1 with VMWRITE,
41688c2ecf20Sopenharmony_ci * which already writes to vmcs12 directly.
41698c2ecf20Sopenharmony_ci */
41708c2ecf20Sopenharmony_cistatic void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
41718c2ecf20Sopenharmony_ci			   u32 vm_exit_reason, u32 exit_intr_info,
41728c2ecf20Sopenharmony_ci			   unsigned long exit_qualification)
41738c2ecf20Sopenharmony_ci{
41748c2ecf20Sopenharmony_ci	/* update exit information fields: */
41758c2ecf20Sopenharmony_ci	vmcs12->vm_exit_reason = vm_exit_reason;
41768c2ecf20Sopenharmony_ci	vmcs12->exit_qualification = exit_qualification;
41778c2ecf20Sopenharmony_ci
41788c2ecf20Sopenharmony_ci	/*
41798c2ecf20Sopenharmony_ci	 * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched
41808c2ecf20Sopenharmony_ci	 * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other
41818c2ecf20Sopenharmony_ci	 * exit info fields are unmodified.
41828c2ecf20Sopenharmony_ci	 */
41838c2ecf20Sopenharmony_ci	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
41848c2ecf20Sopenharmony_ci		vmcs12->launch_state = 1;
41858c2ecf20Sopenharmony_ci
41868c2ecf20Sopenharmony_ci		/* vm_entry_intr_info_field is cleared on exit. Emulate this
41878c2ecf20Sopenharmony_ci		 * instead of reading the real value. */
41888c2ecf20Sopenharmony_ci		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
41898c2ecf20Sopenharmony_ci
41908c2ecf20Sopenharmony_ci		/*
41918c2ecf20Sopenharmony_ci		 * Transfer the event that L0 or L1 may wanted to inject into
41928c2ecf20Sopenharmony_ci		 * L2 to IDT_VECTORING_INFO_FIELD.
41938c2ecf20Sopenharmony_ci		 */
41948c2ecf20Sopenharmony_ci		vmcs12_save_pending_event(vcpu, vmcs12,
41958c2ecf20Sopenharmony_ci					  vm_exit_reason, exit_intr_info);
41968c2ecf20Sopenharmony_ci
41978c2ecf20Sopenharmony_ci		vmcs12->vm_exit_intr_info = exit_intr_info;
41988c2ecf20Sopenharmony_ci		vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
41998c2ecf20Sopenharmony_ci		vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
42008c2ecf20Sopenharmony_ci
42018c2ecf20Sopenharmony_ci		/*
42028c2ecf20Sopenharmony_ci		 * According to spec, there's no need to store the guest's
42038c2ecf20Sopenharmony_ci		 * MSRs if the exit is due to a VM-entry failure that occurs
42048c2ecf20Sopenharmony_ci		 * during or after loading the guest state. Since this exit
42058c2ecf20Sopenharmony_ci		 * does not fall in that category, we need to save the MSRs.
42068c2ecf20Sopenharmony_ci		 */
42078c2ecf20Sopenharmony_ci		if (nested_vmx_store_msr(vcpu,
42088c2ecf20Sopenharmony_ci					 vmcs12->vm_exit_msr_store_addr,
42098c2ecf20Sopenharmony_ci					 vmcs12->vm_exit_msr_store_count))
42108c2ecf20Sopenharmony_ci			nested_vmx_abort(vcpu,
42118c2ecf20Sopenharmony_ci					 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
42128c2ecf20Sopenharmony_ci	}
42138c2ecf20Sopenharmony_ci}
42148c2ecf20Sopenharmony_ci
42158c2ecf20Sopenharmony_ci/*
42168c2ecf20Sopenharmony_ci * A part of what we need to when the nested L2 guest exits and we want to
42178c2ecf20Sopenharmony_ci * run its L1 parent, is to reset L1's guest state to the host state specified
42188c2ecf20Sopenharmony_ci * in vmcs12.
42198c2ecf20Sopenharmony_ci * This function is to be called not only on normal nested exit, but also on
42208c2ecf20Sopenharmony_ci * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
42218c2ecf20Sopenharmony_ci * Failures During or After Loading Guest State").
42228c2ecf20Sopenharmony_ci * This function should be called when the active VMCS is L1's (vmcs01).
42238c2ecf20Sopenharmony_ci */
42248c2ecf20Sopenharmony_cistatic void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
42258c2ecf20Sopenharmony_ci				   struct vmcs12 *vmcs12)
42268c2ecf20Sopenharmony_ci{
42278c2ecf20Sopenharmony_ci	enum vm_entry_failure_code ignored;
42288c2ecf20Sopenharmony_ci	struct kvm_segment seg;
42298c2ecf20Sopenharmony_ci
42308c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
42318c2ecf20Sopenharmony_ci		vcpu->arch.efer = vmcs12->host_ia32_efer;
42328c2ecf20Sopenharmony_ci	else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
42338c2ecf20Sopenharmony_ci		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
42348c2ecf20Sopenharmony_ci	else
42358c2ecf20Sopenharmony_ci		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
42368c2ecf20Sopenharmony_ci	vmx_set_efer(vcpu, vcpu->arch.efer);
42378c2ecf20Sopenharmony_ci
42388c2ecf20Sopenharmony_ci	kvm_rsp_write(vcpu, vmcs12->host_rsp);
42398c2ecf20Sopenharmony_ci	kvm_rip_write(vcpu, vmcs12->host_rip);
42408c2ecf20Sopenharmony_ci	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
42418c2ecf20Sopenharmony_ci	vmx_set_interrupt_shadow(vcpu, 0);
42428c2ecf20Sopenharmony_ci
42438c2ecf20Sopenharmony_ci	/*
42448c2ecf20Sopenharmony_ci	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
42458c2ecf20Sopenharmony_ci	 * actually changed, because vmx_set_cr0 refers to efer set above.
42468c2ecf20Sopenharmony_ci	 *
42478c2ecf20Sopenharmony_ci	 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
42488c2ecf20Sopenharmony_ci	 * (KVM doesn't change it);
42498c2ecf20Sopenharmony_ci	 */
42508c2ecf20Sopenharmony_ci	vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
42518c2ecf20Sopenharmony_ci	vmx_set_cr0(vcpu, vmcs12->host_cr0);
42528c2ecf20Sopenharmony_ci
42538c2ecf20Sopenharmony_ci	/* Same as above - no reason to call set_cr4_guest_host_mask().  */
42548c2ecf20Sopenharmony_ci	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
42558c2ecf20Sopenharmony_ci	vmx_set_cr4(vcpu, vmcs12->host_cr4);
42568c2ecf20Sopenharmony_ci
42578c2ecf20Sopenharmony_ci	nested_ept_uninit_mmu_context(vcpu);
42588c2ecf20Sopenharmony_ci
42598c2ecf20Sopenharmony_ci	/*
42608c2ecf20Sopenharmony_ci	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
42618c2ecf20Sopenharmony_ci	 * couldn't have changed.
42628c2ecf20Sopenharmony_ci	 */
42638c2ecf20Sopenharmony_ci	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
42648c2ecf20Sopenharmony_ci		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
42658c2ecf20Sopenharmony_ci
42668c2ecf20Sopenharmony_ci	if (!enable_ept)
42678c2ecf20Sopenharmony_ci		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
42688c2ecf20Sopenharmony_ci
42698c2ecf20Sopenharmony_ci	nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
42708c2ecf20Sopenharmony_ci
42718c2ecf20Sopenharmony_ci	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
42728c2ecf20Sopenharmony_ci	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
42738c2ecf20Sopenharmony_ci	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
42748c2ecf20Sopenharmony_ci	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
42758c2ecf20Sopenharmony_ci	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
42768c2ecf20Sopenharmony_ci	vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
42778c2ecf20Sopenharmony_ci	vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
42788c2ecf20Sopenharmony_ci
42798c2ecf20Sopenharmony_ci	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
42808c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
42818c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_BNDCFGS, 0);
42828c2ecf20Sopenharmony_ci
42838c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
42848c2ecf20Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
42858c2ecf20Sopenharmony_ci		vcpu->arch.pat = vmcs12->host_ia32_pat;
42868c2ecf20Sopenharmony_ci	}
42878c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
42888c2ecf20Sopenharmony_ci		WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
42898c2ecf20Sopenharmony_ci					 vmcs12->host_ia32_perf_global_ctrl));
42908c2ecf20Sopenharmony_ci
42918c2ecf20Sopenharmony_ci	/* Set L1 segment info according to Intel SDM
42928c2ecf20Sopenharmony_ci	    27.5.2 Loading Host Segment and Descriptor-Table Registers */
42938c2ecf20Sopenharmony_ci	seg = (struct kvm_segment) {
42948c2ecf20Sopenharmony_ci		.base = 0,
42958c2ecf20Sopenharmony_ci		.limit = 0xFFFFFFFF,
42968c2ecf20Sopenharmony_ci		.selector = vmcs12->host_cs_selector,
42978c2ecf20Sopenharmony_ci		.type = 11,
42988c2ecf20Sopenharmony_ci		.present = 1,
42998c2ecf20Sopenharmony_ci		.s = 1,
43008c2ecf20Sopenharmony_ci		.g = 1
43018c2ecf20Sopenharmony_ci	};
43028c2ecf20Sopenharmony_ci	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
43038c2ecf20Sopenharmony_ci		seg.l = 1;
43048c2ecf20Sopenharmony_ci	else
43058c2ecf20Sopenharmony_ci		seg.db = 1;
43068c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
43078c2ecf20Sopenharmony_ci	seg = (struct kvm_segment) {
43088c2ecf20Sopenharmony_ci		.base = 0,
43098c2ecf20Sopenharmony_ci		.limit = 0xFFFFFFFF,
43108c2ecf20Sopenharmony_ci		.type = 3,
43118c2ecf20Sopenharmony_ci		.present = 1,
43128c2ecf20Sopenharmony_ci		.s = 1,
43138c2ecf20Sopenharmony_ci		.db = 1,
43148c2ecf20Sopenharmony_ci		.g = 1
43158c2ecf20Sopenharmony_ci	};
43168c2ecf20Sopenharmony_ci	seg.selector = vmcs12->host_ds_selector;
43178c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
43188c2ecf20Sopenharmony_ci	seg.selector = vmcs12->host_es_selector;
43198c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
43208c2ecf20Sopenharmony_ci	seg.selector = vmcs12->host_ss_selector;
43218c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
43228c2ecf20Sopenharmony_ci	seg.selector = vmcs12->host_fs_selector;
43238c2ecf20Sopenharmony_ci	seg.base = vmcs12->host_fs_base;
43248c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
43258c2ecf20Sopenharmony_ci	seg.selector = vmcs12->host_gs_selector;
43268c2ecf20Sopenharmony_ci	seg.base = vmcs12->host_gs_base;
43278c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
43288c2ecf20Sopenharmony_ci	seg = (struct kvm_segment) {
43298c2ecf20Sopenharmony_ci		.base = vmcs12->host_tr_base,
43308c2ecf20Sopenharmony_ci		.limit = 0x67,
43318c2ecf20Sopenharmony_ci		.selector = vmcs12->host_tr_selector,
43328c2ecf20Sopenharmony_ci		.type = 11,
43338c2ecf20Sopenharmony_ci		.present = 1
43348c2ecf20Sopenharmony_ci	};
43358c2ecf20Sopenharmony_ci	vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
43368c2ecf20Sopenharmony_ci
43378c2ecf20Sopenharmony_ci	kvm_set_dr(vcpu, 7, 0x400);
43388c2ecf20Sopenharmony_ci	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
43398c2ecf20Sopenharmony_ci
43408c2ecf20Sopenharmony_ci	if (cpu_has_vmx_msr_bitmap())
43418c2ecf20Sopenharmony_ci		vmx_update_msr_bitmap(vcpu);
43428c2ecf20Sopenharmony_ci
43438c2ecf20Sopenharmony_ci	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
43448c2ecf20Sopenharmony_ci				vmcs12->vm_exit_msr_load_count))
43458c2ecf20Sopenharmony_ci		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
43468c2ecf20Sopenharmony_ci}
43478c2ecf20Sopenharmony_ci
43488c2ecf20Sopenharmony_cistatic inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
43498c2ecf20Sopenharmony_ci{
43508c2ecf20Sopenharmony_ci	struct vmx_uret_msr *efer_msr;
43518c2ecf20Sopenharmony_ci	unsigned int i;
43528c2ecf20Sopenharmony_ci
43538c2ecf20Sopenharmony_ci	if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
43548c2ecf20Sopenharmony_ci		return vmcs_read64(GUEST_IA32_EFER);
43558c2ecf20Sopenharmony_ci
43568c2ecf20Sopenharmony_ci	if (cpu_has_load_ia32_efer())
43578c2ecf20Sopenharmony_ci		return host_efer;
43588c2ecf20Sopenharmony_ci
43598c2ecf20Sopenharmony_ci	for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
43608c2ecf20Sopenharmony_ci		if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
43618c2ecf20Sopenharmony_ci			return vmx->msr_autoload.guest.val[i].value;
43628c2ecf20Sopenharmony_ci	}
43638c2ecf20Sopenharmony_ci
43648c2ecf20Sopenharmony_ci	efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
43658c2ecf20Sopenharmony_ci	if (efer_msr)
43668c2ecf20Sopenharmony_ci		return efer_msr->data;
43678c2ecf20Sopenharmony_ci
43688c2ecf20Sopenharmony_ci	return host_efer;
43698c2ecf20Sopenharmony_ci}
43708c2ecf20Sopenharmony_ci
43718c2ecf20Sopenharmony_cistatic void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
43728c2ecf20Sopenharmony_ci{
43738c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
43748c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
43758c2ecf20Sopenharmony_ci	struct vmx_msr_entry g, h;
43768c2ecf20Sopenharmony_ci	gpa_t gpa;
43778c2ecf20Sopenharmony_ci	u32 i, j;
43788c2ecf20Sopenharmony_ci
43798c2ecf20Sopenharmony_ci	vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
43808c2ecf20Sopenharmony_ci
43818c2ecf20Sopenharmony_ci	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
43828c2ecf20Sopenharmony_ci		/*
43838c2ecf20Sopenharmony_ci		 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
43848c2ecf20Sopenharmony_ci		 * as vmcs01.GUEST_DR7 contains a userspace defined value
43858c2ecf20Sopenharmony_ci		 * and vcpu->arch.dr7 is not squirreled away before the
43868c2ecf20Sopenharmony_ci		 * nested VMENTER (not worth adding a variable in nested_vmx).
43878c2ecf20Sopenharmony_ci		 */
43888c2ecf20Sopenharmony_ci		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
43898c2ecf20Sopenharmony_ci			kvm_set_dr(vcpu, 7, DR7_FIXED_1);
43908c2ecf20Sopenharmony_ci		else
43918c2ecf20Sopenharmony_ci			WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
43928c2ecf20Sopenharmony_ci	}
43938c2ecf20Sopenharmony_ci
43948c2ecf20Sopenharmony_ci	/*
43958c2ecf20Sopenharmony_ci	 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
43968c2ecf20Sopenharmony_ci	 * handle a variety of side effects to KVM's software model.
43978c2ecf20Sopenharmony_ci	 */
43988c2ecf20Sopenharmony_ci	vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
43998c2ecf20Sopenharmony_ci
44008c2ecf20Sopenharmony_ci	vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
44018c2ecf20Sopenharmony_ci	vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
44028c2ecf20Sopenharmony_ci
44038c2ecf20Sopenharmony_ci	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
44048c2ecf20Sopenharmony_ci	vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
44058c2ecf20Sopenharmony_ci
44068c2ecf20Sopenharmony_ci	nested_ept_uninit_mmu_context(vcpu);
44078c2ecf20Sopenharmony_ci	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
44088c2ecf20Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
44098c2ecf20Sopenharmony_ci
44108c2ecf20Sopenharmony_ci	/*
44118c2ecf20Sopenharmony_ci	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
44128c2ecf20Sopenharmony_ci	 * from vmcs01 (if necessary).  The PDPTRs are not loaded on
44138c2ecf20Sopenharmony_ci	 * VMFail, like everything else we just need to ensure our
44148c2ecf20Sopenharmony_ci	 * software model is up-to-date.
44158c2ecf20Sopenharmony_ci	 */
44168c2ecf20Sopenharmony_ci	if (enable_ept && is_pae_paging(vcpu))
44178c2ecf20Sopenharmony_ci		ept_save_pdptrs(vcpu);
44188c2ecf20Sopenharmony_ci
44198c2ecf20Sopenharmony_ci	kvm_mmu_reset_context(vcpu);
44208c2ecf20Sopenharmony_ci
44218c2ecf20Sopenharmony_ci	if (cpu_has_vmx_msr_bitmap())
44228c2ecf20Sopenharmony_ci		vmx_update_msr_bitmap(vcpu);
44238c2ecf20Sopenharmony_ci
44248c2ecf20Sopenharmony_ci	/*
44258c2ecf20Sopenharmony_ci	 * This nasty bit of open coding is a compromise between blindly
44268c2ecf20Sopenharmony_ci	 * loading L1's MSRs using the exit load lists (incorrect emulation
44278c2ecf20Sopenharmony_ci	 * of VMFail), leaving the nested VM's MSRs in the software model
44288c2ecf20Sopenharmony_ci	 * (incorrect behavior) and snapshotting the modified MSRs (too
44298c2ecf20Sopenharmony_ci	 * expensive since the lists are unbound by hardware).  For each
44308c2ecf20Sopenharmony_ci	 * MSR that was (prematurely) loaded from the nested VMEntry load
44318c2ecf20Sopenharmony_ci	 * list, reload it from the exit load list if it exists and differs
44328c2ecf20Sopenharmony_ci	 * from the guest value.  The intent is to stuff host state as
44338c2ecf20Sopenharmony_ci	 * silently as possible, not to fully process the exit load list.
44348c2ecf20Sopenharmony_ci	 */
44358c2ecf20Sopenharmony_ci	for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
44368c2ecf20Sopenharmony_ci		gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
44378c2ecf20Sopenharmony_ci		if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
44388c2ecf20Sopenharmony_ci			pr_debug_ratelimited(
44398c2ecf20Sopenharmony_ci				"%s read MSR index failed (%u, 0x%08llx)\n",
44408c2ecf20Sopenharmony_ci				__func__, i, gpa);
44418c2ecf20Sopenharmony_ci			goto vmabort;
44428c2ecf20Sopenharmony_ci		}
44438c2ecf20Sopenharmony_ci
44448c2ecf20Sopenharmony_ci		for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
44458c2ecf20Sopenharmony_ci			gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
44468c2ecf20Sopenharmony_ci			if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
44478c2ecf20Sopenharmony_ci				pr_debug_ratelimited(
44488c2ecf20Sopenharmony_ci					"%s read MSR failed (%u, 0x%08llx)\n",
44498c2ecf20Sopenharmony_ci					__func__, j, gpa);
44508c2ecf20Sopenharmony_ci				goto vmabort;
44518c2ecf20Sopenharmony_ci			}
44528c2ecf20Sopenharmony_ci			if (h.index != g.index)
44538c2ecf20Sopenharmony_ci				continue;
44548c2ecf20Sopenharmony_ci			if (h.value == g.value)
44558c2ecf20Sopenharmony_ci				break;
44568c2ecf20Sopenharmony_ci
44578c2ecf20Sopenharmony_ci			if (nested_vmx_load_msr_check(vcpu, &h)) {
44588c2ecf20Sopenharmony_ci				pr_debug_ratelimited(
44598c2ecf20Sopenharmony_ci					"%s check failed (%u, 0x%x, 0x%x)\n",
44608c2ecf20Sopenharmony_ci					__func__, j, h.index, h.reserved);
44618c2ecf20Sopenharmony_ci				goto vmabort;
44628c2ecf20Sopenharmony_ci			}
44638c2ecf20Sopenharmony_ci
44648c2ecf20Sopenharmony_ci			if (kvm_set_msr(vcpu, h.index, h.value)) {
44658c2ecf20Sopenharmony_ci				pr_debug_ratelimited(
44668c2ecf20Sopenharmony_ci					"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
44678c2ecf20Sopenharmony_ci					__func__, j, h.index, h.value);
44688c2ecf20Sopenharmony_ci				goto vmabort;
44698c2ecf20Sopenharmony_ci			}
44708c2ecf20Sopenharmony_ci		}
44718c2ecf20Sopenharmony_ci	}
44728c2ecf20Sopenharmony_ci
44738c2ecf20Sopenharmony_ci	return;
44748c2ecf20Sopenharmony_ci
44758c2ecf20Sopenharmony_civmabort:
44768c2ecf20Sopenharmony_ci	nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
44778c2ecf20Sopenharmony_ci}
44788c2ecf20Sopenharmony_ci
44798c2ecf20Sopenharmony_ci/*
44808c2ecf20Sopenharmony_ci * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
44818c2ecf20Sopenharmony_ci * and modify vmcs12 to make it see what it would expect to see there if
44828c2ecf20Sopenharmony_ci * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
44838c2ecf20Sopenharmony_ci */
44848c2ecf20Sopenharmony_civoid nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
44858c2ecf20Sopenharmony_ci		       u32 exit_intr_info, unsigned long exit_qualification)
44868c2ecf20Sopenharmony_ci{
44878c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
44888c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
44898c2ecf20Sopenharmony_ci
44908c2ecf20Sopenharmony_ci	/* trying to cancel vmlaunch/vmresume is a bug */
44918c2ecf20Sopenharmony_ci	WARN_ON_ONCE(vmx->nested.nested_run_pending);
44928c2ecf20Sopenharmony_ci
44938c2ecf20Sopenharmony_ci	if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
44948c2ecf20Sopenharmony_ci		/*
44958c2ecf20Sopenharmony_ci		 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
44968c2ecf20Sopenharmony_ci		 * Enlightened VMCS after migration and we still need to
44978c2ecf20Sopenharmony_ci		 * do that when something is forcing L2->L1 exit prior to
44988c2ecf20Sopenharmony_ci		 * the first L2 run.
44998c2ecf20Sopenharmony_ci		 */
45008c2ecf20Sopenharmony_ci		(void)nested_get_evmcs_page(vcpu);
45018c2ecf20Sopenharmony_ci	}
45028c2ecf20Sopenharmony_ci
45038c2ecf20Sopenharmony_ci	/* Service the TLB flush request for L2 before switching to L1. */
45048c2ecf20Sopenharmony_ci	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
45058c2ecf20Sopenharmony_ci		kvm_vcpu_flush_tlb_current(vcpu);
45068c2ecf20Sopenharmony_ci
45078c2ecf20Sopenharmony_ci	/*
45088c2ecf20Sopenharmony_ci	 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
45098c2ecf20Sopenharmony_ci	 * now and the new vmentry.  Ensure that the VMCS02 PDPTR fields are
45108c2ecf20Sopenharmony_ci	 * up-to-date before switching to L1.
45118c2ecf20Sopenharmony_ci	 */
45128c2ecf20Sopenharmony_ci	if (enable_ept && is_pae_paging(vcpu))
45138c2ecf20Sopenharmony_ci		vmx_ept_load_pdptrs(vcpu);
45148c2ecf20Sopenharmony_ci
45158c2ecf20Sopenharmony_ci	leave_guest_mode(vcpu);
45168c2ecf20Sopenharmony_ci
45178c2ecf20Sopenharmony_ci	if (nested_cpu_has_preemption_timer(vmcs12))
45188c2ecf20Sopenharmony_ci		hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
45198c2ecf20Sopenharmony_ci
45208c2ecf20Sopenharmony_ci	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
45218c2ecf20Sopenharmony_ci		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
45228c2ecf20Sopenharmony_ci
45238c2ecf20Sopenharmony_ci	if (likely(!vmx->fail)) {
45248c2ecf20Sopenharmony_ci		sync_vmcs02_to_vmcs12(vcpu, vmcs12);
45258c2ecf20Sopenharmony_ci
45268c2ecf20Sopenharmony_ci		if (vm_exit_reason != -1)
45278c2ecf20Sopenharmony_ci			prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
45288c2ecf20Sopenharmony_ci				       exit_intr_info, exit_qualification);
45298c2ecf20Sopenharmony_ci
45308c2ecf20Sopenharmony_ci		/*
45318c2ecf20Sopenharmony_ci		 * Must happen outside of sync_vmcs02_to_vmcs12() as it will
45328c2ecf20Sopenharmony_ci		 * also be used to capture vmcs12 cache as part of
45338c2ecf20Sopenharmony_ci		 * capturing nVMX state for snapshot (migration).
45348c2ecf20Sopenharmony_ci		 *
45358c2ecf20Sopenharmony_ci		 * Otherwise, this flush will dirty guest memory at a
45368c2ecf20Sopenharmony_ci		 * point it is already assumed by user-space to be
45378c2ecf20Sopenharmony_ci		 * immutable.
45388c2ecf20Sopenharmony_ci		 */
45398c2ecf20Sopenharmony_ci		nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
45408c2ecf20Sopenharmony_ci	} else {
45418c2ecf20Sopenharmony_ci		/*
45428c2ecf20Sopenharmony_ci		 * The only expected VM-instruction error is "VM entry with
45438c2ecf20Sopenharmony_ci		 * invalid control field(s)." Anything else indicates a
45448c2ecf20Sopenharmony_ci		 * problem with L0.  And we should never get here with a
45458c2ecf20Sopenharmony_ci		 * VMFail of any type if early consistency checks are enabled.
45468c2ecf20Sopenharmony_ci		 */
45478c2ecf20Sopenharmony_ci		WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
45488c2ecf20Sopenharmony_ci			     VMXERR_ENTRY_INVALID_CONTROL_FIELD);
45498c2ecf20Sopenharmony_ci		WARN_ON_ONCE(nested_early_check);
45508c2ecf20Sopenharmony_ci	}
45518c2ecf20Sopenharmony_ci
45528c2ecf20Sopenharmony_ci	/*
45538c2ecf20Sopenharmony_ci	 * Drop events/exceptions that were queued for re-injection to L2
45548c2ecf20Sopenharmony_ci	 * (picked up via vmx_complete_interrupts()), as well as exceptions
45558c2ecf20Sopenharmony_ci	 * that were pending for L2.  Note, this must NOT be hoisted above
45568c2ecf20Sopenharmony_ci	 * prepare_vmcs12(), events/exceptions queued for re-injection need to
45578c2ecf20Sopenharmony_ci	 * be captured in vmcs12 (see vmcs12_save_pending_event()).
45588c2ecf20Sopenharmony_ci	 */
45598c2ecf20Sopenharmony_ci	vcpu->arch.nmi_injected = false;
45608c2ecf20Sopenharmony_ci	kvm_clear_exception_queue(vcpu);
45618c2ecf20Sopenharmony_ci	kvm_clear_interrupt_queue(vcpu);
45628c2ecf20Sopenharmony_ci
45638c2ecf20Sopenharmony_ci	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
45648c2ecf20Sopenharmony_ci
45658c2ecf20Sopenharmony_ci	/*
45668c2ecf20Sopenharmony_ci	 * If IBRS is advertised to the vCPU, KVM must flush the indirect
45678c2ecf20Sopenharmony_ci	 * branch predictors when transitioning from L2 to L1, as L1 expects
45688c2ecf20Sopenharmony_ci	 * hardware (KVM in this case) to provide separate predictor modes.
45698c2ecf20Sopenharmony_ci	 * Bare metal isolates VMX root (host) from VMX non-root (guest), but
45708c2ecf20Sopenharmony_ci	 * doesn't isolate different VMCSs, i.e. in this case, doesn't provide
45718c2ecf20Sopenharmony_ci	 * separate modes for L2 vs L1.
45728c2ecf20Sopenharmony_ci	 */
45738c2ecf20Sopenharmony_ci	if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
45748c2ecf20Sopenharmony_ci		indirect_branch_prediction_barrier();
45758c2ecf20Sopenharmony_ci
45768c2ecf20Sopenharmony_ci	/* Update any VMCS fields that might have changed while L2 ran */
45778c2ecf20Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
45788c2ecf20Sopenharmony_ci	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
45798c2ecf20Sopenharmony_ci	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
45808c2ecf20Sopenharmony_ci	if (vmx->nested.l1_tpr_threshold != -1)
45818c2ecf20Sopenharmony_ci		vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
45828c2ecf20Sopenharmony_ci
45838c2ecf20Sopenharmony_ci	if (kvm_has_tsc_control)
45848c2ecf20Sopenharmony_ci		decache_tsc_multiplier(vmx);
45858c2ecf20Sopenharmony_ci
45868c2ecf20Sopenharmony_ci	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
45878c2ecf20Sopenharmony_ci		vmx->nested.change_vmcs01_virtual_apic_mode = false;
45888c2ecf20Sopenharmony_ci		vmx_set_virtual_apic_mode(vcpu);
45898c2ecf20Sopenharmony_ci	}
45908c2ecf20Sopenharmony_ci
45918c2ecf20Sopenharmony_ci	/* Unpin physical memory we referred to in vmcs02 */
45928c2ecf20Sopenharmony_ci	if (vmx->nested.apic_access_page) {
45938c2ecf20Sopenharmony_ci		kvm_release_page_clean(vmx->nested.apic_access_page);
45948c2ecf20Sopenharmony_ci		vmx->nested.apic_access_page = NULL;
45958c2ecf20Sopenharmony_ci	}
45968c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
45978c2ecf20Sopenharmony_ci	kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
45988c2ecf20Sopenharmony_ci	vmx->nested.pi_desc = NULL;
45998c2ecf20Sopenharmony_ci
46008c2ecf20Sopenharmony_ci	if (vmx->nested.reload_vmcs01_apic_access_page) {
46018c2ecf20Sopenharmony_ci		vmx->nested.reload_vmcs01_apic_access_page = false;
46028c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
46038c2ecf20Sopenharmony_ci	}
46048c2ecf20Sopenharmony_ci
46058c2ecf20Sopenharmony_ci	if ((vm_exit_reason != -1) &&
46068c2ecf20Sopenharmony_ci	    (enable_shadow_vmcs || vmx->nested.hv_evmcs))
46078c2ecf20Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
46088c2ecf20Sopenharmony_ci
46098c2ecf20Sopenharmony_ci	/* in case we halted in L2 */
46108c2ecf20Sopenharmony_ci	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
46118c2ecf20Sopenharmony_ci
46128c2ecf20Sopenharmony_ci	if (likely(!vmx->fail)) {
46138c2ecf20Sopenharmony_ci		if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
46148c2ecf20Sopenharmony_ci		    nested_exit_intr_ack_set(vcpu)) {
46158c2ecf20Sopenharmony_ci			int irq = kvm_cpu_get_interrupt(vcpu);
46168c2ecf20Sopenharmony_ci			WARN_ON(irq < 0);
46178c2ecf20Sopenharmony_ci			vmcs12->vm_exit_intr_info = irq |
46188c2ecf20Sopenharmony_ci				INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
46198c2ecf20Sopenharmony_ci		}
46208c2ecf20Sopenharmony_ci
46218c2ecf20Sopenharmony_ci		if (vm_exit_reason != -1)
46228c2ecf20Sopenharmony_ci			trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
46238c2ecf20Sopenharmony_ci						       vmcs12->exit_qualification,
46248c2ecf20Sopenharmony_ci						       vmcs12->idt_vectoring_info_field,
46258c2ecf20Sopenharmony_ci						       vmcs12->vm_exit_intr_info,
46268c2ecf20Sopenharmony_ci						       vmcs12->vm_exit_intr_error_code,
46278c2ecf20Sopenharmony_ci						       KVM_ISA_VMX);
46288c2ecf20Sopenharmony_ci
46298c2ecf20Sopenharmony_ci		load_vmcs12_host_state(vcpu, vmcs12);
46308c2ecf20Sopenharmony_ci
46318c2ecf20Sopenharmony_ci		return;
46328c2ecf20Sopenharmony_ci	}
46338c2ecf20Sopenharmony_ci
46348c2ecf20Sopenharmony_ci	/*
46358c2ecf20Sopenharmony_ci	 * After an early L2 VM-entry failure, we're now back
46368c2ecf20Sopenharmony_ci	 * in L1 which thinks it just finished a VMLAUNCH or
46378c2ecf20Sopenharmony_ci	 * VMRESUME instruction, so we need to set the failure
46388c2ecf20Sopenharmony_ci	 * flag and the VM-instruction error field of the VMCS
46398c2ecf20Sopenharmony_ci	 * accordingly, and skip the emulated instruction.
46408c2ecf20Sopenharmony_ci	 */
46418c2ecf20Sopenharmony_ci	(void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
46428c2ecf20Sopenharmony_ci
46438c2ecf20Sopenharmony_ci	/*
46448c2ecf20Sopenharmony_ci	 * Restore L1's host state to KVM's software model.  We're here
46458c2ecf20Sopenharmony_ci	 * because a consistency check was caught by hardware, which
46468c2ecf20Sopenharmony_ci	 * means some amount of guest state has been propagated to KVM's
46478c2ecf20Sopenharmony_ci	 * model and needs to be unwound to the host's state.
46488c2ecf20Sopenharmony_ci	 */
46498c2ecf20Sopenharmony_ci	nested_vmx_restore_host_state(vcpu);
46508c2ecf20Sopenharmony_ci
46518c2ecf20Sopenharmony_ci	vmx->fail = 0;
46528c2ecf20Sopenharmony_ci}
46538c2ecf20Sopenharmony_ci
46548c2ecf20Sopenharmony_ci/*
46558c2ecf20Sopenharmony_ci * Decode the memory-address operand of a vmx instruction, as recorded on an
46568c2ecf20Sopenharmony_ci * exit caused by such an instruction (run by a guest hypervisor).
46578c2ecf20Sopenharmony_ci * On success, returns 0. When the operand is invalid, returns 1 and throws
46588c2ecf20Sopenharmony_ci * #UD, #GP, or #SS.
46598c2ecf20Sopenharmony_ci */
46608c2ecf20Sopenharmony_ciint get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
46618c2ecf20Sopenharmony_ci			u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
46628c2ecf20Sopenharmony_ci{
46638c2ecf20Sopenharmony_ci	gva_t off;
46648c2ecf20Sopenharmony_ci	bool exn;
46658c2ecf20Sopenharmony_ci	struct kvm_segment s;
46668c2ecf20Sopenharmony_ci
46678c2ecf20Sopenharmony_ci	/*
46688c2ecf20Sopenharmony_ci	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
46698c2ecf20Sopenharmony_ci	 * Execution", on an exit, vmx_instruction_info holds most of the
46708c2ecf20Sopenharmony_ci	 * addressing components of the operand. Only the displacement part
46718c2ecf20Sopenharmony_ci	 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
46728c2ecf20Sopenharmony_ci	 * For how an actual address is calculated from all these components,
46738c2ecf20Sopenharmony_ci	 * refer to Vol. 1, "Operand Addressing".
46748c2ecf20Sopenharmony_ci	 */
46758c2ecf20Sopenharmony_ci	int  scaling = vmx_instruction_info & 3;
46768c2ecf20Sopenharmony_ci	int  addr_size = (vmx_instruction_info >> 7) & 7;
46778c2ecf20Sopenharmony_ci	bool is_reg = vmx_instruction_info & (1u << 10);
46788c2ecf20Sopenharmony_ci	int  seg_reg = (vmx_instruction_info >> 15) & 7;
46798c2ecf20Sopenharmony_ci	int  index_reg = (vmx_instruction_info >> 18) & 0xf;
46808c2ecf20Sopenharmony_ci	bool index_is_valid = !(vmx_instruction_info & (1u << 22));
46818c2ecf20Sopenharmony_ci	int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
46828c2ecf20Sopenharmony_ci	bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
46838c2ecf20Sopenharmony_ci
46848c2ecf20Sopenharmony_ci	if (is_reg) {
46858c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
46868c2ecf20Sopenharmony_ci		return 1;
46878c2ecf20Sopenharmony_ci	}
46888c2ecf20Sopenharmony_ci
46898c2ecf20Sopenharmony_ci	/* Addr = segment_base + offset */
46908c2ecf20Sopenharmony_ci	/* offset = base + [index * scale] + displacement */
46918c2ecf20Sopenharmony_ci	off = exit_qualification; /* holds the displacement */
46928c2ecf20Sopenharmony_ci	if (addr_size == 1)
46938c2ecf20Sopenharmony_ci		off = (gva_t)sign_extend64(off, 31);
46948c2ecf20Sopenharmony_ci	else if (addr_size == 0)
46958c2ecf20Sopenharmony_ci		off = (gva_t)sign_extend64(off, 15);
46968c2ecf20Sopenharmony_ci	if (base_is_valid)
46978c2ecf20Sopenharmony_ci		off += kvm_register_readl(vcpu, base_reg);
46988c2ecf20Sopenharmony_ci	if (index_is_valid)
46998c2ecf20Sopenharmony_ci		off += kvm_register_readl(vcpu, index_reg) << scaling;
47008c2ecf20Sopenharmony_ci	vmx_get_segment(vcpu, &s, seg_reg);
47018c2ecf20Sopenharmony_ci
47028c2ecf20Sopenharmony_ci	/*
47038c2ecf20Sopenharmony_ci	 * The effective address, i.e. @off, of a memory operand is truncated
47048c2ecf20Sopenharmony_ci	 * based on the address size of the instruction.  Note that this is
47058c2ecf20Sopenharmony_ci	 * the *effective address*, i.e. the address prior to accounting for
47068c2ecf20Sopenharmony_ci	 * the segment's base.
47078c2ecf20Sopenharmony_ci	 */
47088c2ecf20Sopenharmony_ci	if (addr_size == 1) /* 32 bit */
47098c2ecf20Sopenharmony_ci		off &= 0xffffffff;
47108c2ecf20Sopenharmony_ci	else if (addr_size == 0) /* 16 bit */
47118c2ecf20Sopenharmony_ci		off &= 0xffff;
47128c2ecf20Sopenharmony_ci
47138c2ecf20Sopenharmony_ci	/* Checks for #GP/#SS exceptions. */
47148c2ecf20Sopenharmony_ci	exn = false;
47158c2ecf20Sopenharmony_ci	if (is_long_mode(vcpu)) {
47168c2ecf20Sopenharmony_ci		/*
47178c2ecf20Sopenharmony_ci		 * The virtual/linear address is never truncated in 64-bit
47188c2ecf20Sopenharmony_ci		 * mode, e.g. a 32-bit address size can yield a 64-bit virtual
47198c2ecf20Sopenharmony_ci		 * address when using FS/GS with a non-zero base.
47208c2ecf20Sopenharmony_ci		 */
47218c2ecf20Sopenharmony_ci		if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
47228c2ecf20Sopenharmony_ci			*ret = s.base + off;
47238c2ecf20Sopenharmony_ci		else
47248c2ecf20Sopenharmony_ci			*ret = off;
47258c2ecf20Sopenharmony_ci
47268c2ecf20Sopenharmony_ci		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
47278c2ecf20Sopenharmony_ci		 * non-canonical form. This is the only check on the memory
47288c2ecf20Sopenharmony_ci		 * destination for long mode!
47298c2ecf20Sopenharmony_ci		 */
47308c2ecf20Sopenharmony_ci		exn = is_noncanonical_address(*ret, vcpu);
47318c2ecf20Sopenharmony_ci	} else {
47328c2ecf20Sopenharmony_ci		/*
47338c2ecf20Sopenharmony_ci		 * When not in long mode, the virtual/linear address is
47348c2ecf20Sopenharmony_ci		 * unconditionally truncated to 32 bits regardless of the
47358c2ecf20Sopenharmony_ci		 * address size.
47368c2ecf20Sopenharmony_ci		 */
47378c2ecf20Sopenharmony_ci		*ret = (s.base + off) & 0xffffffff;
47388c2ecf20Sopenharmony_ci
47398c2ecf20Sopenharmony_ci		/* Protected mode: apply checks for segment validity in the
47408c2ecf20Sopenharmony_ci		 * following order:
47418c2ecf20Sopenharmony_ci		 * - segment type check (#GP(0) may be thrown)
47428c2ecf20Sopenharmony_ci		 * - usability check (#GP(0)/#SS(0))
47438c2ecf20Sopenharmony_ci		 * - limit check (#GP(0)/#SS(0))
47448c2ecf20Sopenharmony_ci		 */
47458c2ecf20Sopenharmony_ci		if (wr)
47468c2ecf20Sopenharmony_ci			/* #GP(0) if the destination operand is located in a
47478c2ecf20Sopenharmony_ci			 * read-only data segment or any code segment.
47488c2ecf20Sopenharmony_ci			 */
47498c2ecf20Sopenharmony_ci			exn = ((s.type & 0xa) == 0 || (s.type & 8));
47508c2ecf20Sopenharmony_ci		else
47518c2ecf20Sopenharmony_ci			/* #GP(0) if the source operand is located in an
47528c2ecf20Sopenharmony_ci			 * execute-only code segment
47538c2ecf20Sopenharmony_ci			 */
47548c2ecf20Sopenharmony_ci			exn = ((s.type & 0xa) == 8);
47558c2ecf20Sopenharmony_ci		if (exn) {
47568c2ecf20Sopenharmony_ci			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
47578c2ecf20Sopenharmony_ci			return 1;
47588c2ecf20Sopenharmony_ci		}
47598c2ecf20Sopenharmony_ci		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
47608c2ecf20Sopenharmony_ci		 */
47618c2ecf20Sopenharmony_ci		exn = (s.unusable != 0);
47628c2ecf20Sopenharmony_ci
47638c2ecf20Sopenharmony_ci		/*
47648c2ecf20Sopenharmony_ci		 * Protected mode: #GP(0)/#SS(0) if the memory operand is
47658c2ecf20Sopenharmony_ci		 * outside the segment limit.  All CPUs that support VMX ignore
47668c2ecf20Sopenharmony_ci		 * limit checks for flat segments, i.e. segments with base==0,
47678c2ecf20Sopenharmony_ci		 * limit==0xffffffff and of type expand-up data or code.
47688c2ecf20Sopenharmony_ci		 */
47698c2ecf20Sopenharmony_ci		if (!(s.base == 0 && s.limit == 0xffffffff &&
47708c2ecf20Sopenharmony_ci		     ((s.type & 8) || !(s.type & 4))))
47718c2ecf20Sopenharmony_ci			exn = exn || ((u64)off + len - 1 > s.limit);
47728c2ecf20Sopenharmony_ci	}
47738c2ecf20Sopenharmony_ci	if (exn) {
47748c2ecf20Sopenharmony_ci		kvm_queue_exception_e(vcpu,
47758c2ecf20Sopenharmony_ci				      seg_reg == VCPU_SREG_SS ?
47768c2ecf20Sopenharmony_ci						SS_VECTOR : GP_VECTOR,
47778c2ecf20Sopenharmony_ci				      0);
47788c2ecf20Sopenharmony_ci		return 1;
47798c2ecf20Sopenharmony_ci	}
47808c2ecf20Sopenharmony_ci
47818c2ecf20Sopenharmony_ci	return 0;
47828c2ecf20Sopenharmony_ci}
47838c2ecf20Sopenharmony_ci
47848c2ecf20Sopenharmony_civoid nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
47858c2ecf20Sopenharmony_ci{
47868c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx;
47878c2ecf20Sopenharmony_ci
47888c2ecf20Sopenharmony_ci	if (!nested_vmx_allowed(vcpu))
47898c2ecf20Sopenharmony_ci		return;
47908c2ecf20Sopenharmony_ci
47918c2ecf20Sopenharmony_ci	vmx = to_vmx(vcpu);
47928c2ecf20Sopenharmony_ci	if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
47938c2ecf20Sopenharmony_ci		vmx->nested.msrs.entry_ctls_high |=
47948c2ecf20Sopenharmony_ci				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
47958c2ecf20Sopenharmony_ci		vmx->nested.msrs.exit_ctls_high |=
47968c2ecf20Sopenharmony_ci				VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
47978c2ecf20Sopenharmony_ci	} else {
47988c2ecf20Sopenharmony_ci		vmx->nested.msrs.entry_ctls_high &=
47998c2ecf20Sopenharmony_ci				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
48008c2ecf20Sopenharmony_ci		vmx->nested.msrs.exit_ctls_high &=
48018c2ecf20Sopenharmony_ci				~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
48028c2ecf20Sopenharmony_ci	}
48038c2ecf20Sopenharmony_ci}
48048c2ecf20Sopenharmony_ci
48058c2ecf20Sopenharmony_cistatic int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
48068c2ecf20Sopenharmony_ci				int *ret)
48078c2ecf20Sopenharmony_ci{
48088c2ecf20Sopenharmony_ci	gva_t gva;
48098c2ecf20Sopenharmony_ci	struct x86_exception e;
48108c2ecf20Sopenharmony_ci	int r;
48118c2ecf20Sopenharmony_ci
48128c2ecf20Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
48138c2ecf20Sopenharmony_ci				vmcs_read32(VMX_INSTRUCTION_INFO), false,
48148c2ecf20Sopenharmony_ci				sizeof(*vmpointer), &gva)) {
48158c2ecf20Sopenharmony_ci		*ret = 1;
48168c2ecf20Sopenharmony_ci		return -EINVAL;
48178c2ecf20Sopenharmony_ci	}
48188c2ecf20Sopenharmony_ci
48198c2ecf20Sopenharmony_ci	r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
48208c2ecf20Sopenharmony_ci	if (r != X86EMUL_CONTINUE) {
48218c2ecf20Sopenharmony_ci		*ret = kvm_handle_memory_failure(vcpu, r, &e);
48228c2ecf20Sopenharmony_ci		return -EINVAL;
48238c2ecf20Sopenharmony_ci	}
48248c2ecf20Sopenharmony_ci
48258c2ecf20Sopenharmony_ci	return 0;
48268c2ecf20Sopenharmony_ci}
48278c2ecf20Sopenharmony_ci
48288c2ecf20Sopenharmony_ci/*
48298c2ecf20Sopenharmony_ci * Allocate a shadow VMCS and associate it with the currently loaded
48308c2ecf20Sopenharmony_ci * VMCS, unless such a shadow VMCS already exists. The newly allocated
48318c2ecf20Sopenharmony_ci * VMCS is also VMCLEARed, so that it is ready for use.
48328c2ecf20Sopenharmony_ci */
48338c2ecf20Sopenharmony_cistatic struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
48348c2ecf20Sopenharmony_ci{
48358c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
48368c2ecf20Sopenharmony_ci	struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
48378c2ecf20Sopenharmony_ci
48388c2ecf20Sopenharmony_ci	/*
48398c2ecf20Sopenharmony_ci	 * We should allocate a shadow vmcs for vmcs01 only when L1
48408c2ecf20Sopenharmony_ci	 * executes VMXON and free it when L1 executes VMXOFF.
48418c2ecf20Sopenharmony_ci	 * As it is invalid to execute VMXON twice, we shouldn't reach
48428c2ecf20Sopenharmony_ci	 * here when vmcs01 already have an allocated shadow vmcs.
48438c2ecf20Sopenharmony_ci	 */
48448c2ecf20Sopenharmony_ci	WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
48458c2ecf20Sopenharmony_ci
48468c2ecf20Sopenharmony_ci	if (!loaded_vmcs->shadow_vmcs) {
48478c2ecf20Sopenharmony_ci		loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
48488c2ecf20Sopenharmony_ci		if (loaded_vmcs->shadow_vmcs)
48498c2ecf20Sopenharmony_ci			vmcs_clear(loaded_vmcs->shadow_vmcs);
48508c2ecf20Sopenharmony_ci	}
48518c2ecf20Sopenharmony_ci	return loaded_vmcs->shadow_vmcs;
48528c2ecf20Sopenharmony_ci}
48538c2ecf20Sopenharmony_ci
48548c2ecf20Sopenharmony_cistatic int enter_vmx_operation(struct kvm_vcpu *vcpu)
48558c2ecf20Sopenharmony_ci{
48568c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
48578c2ecf20Sopenharmony_ci	int r;
48588c2ecf20Sopenharmony_ci
48598c2ecf20Sopenharmony_ci	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
48608c2ecf20Sopenharmony_ci	if (r < 0)
48618c2ecf20Sopenharmony_ci		goto out_vmcs02;
48628c2ecf20Sopenharmony_ci
48638c2ecf20Sopenharmony_ci	vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
48648c2ecf20Sopenharmony_ci	if (!vmx->nested.cached_vmcs12)
48658c2ecf20Sopenharmony_ci		goto out_cached_vmcs12;
48668c2ecf20Sopenharmony_ci
48678c2ecf20Sopenharmony_ci	vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
48688c2ecf20Sopenharmony_ci	if (!vmx->nested.cached_shadow_vmcs12)
48698c2ecf20Sopenharmony_ci		goto out_cached_shadow_vmcs12;
48708c2ecf20Sopenharmony_ci
48718c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
48728c2ecf20Sopenharmony_ci		goto out_shadow_vmcs;
48738c2ecf20Sopenharmony_ci
48748c2ecf20Sopenharmony_ci	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
48758c2ecf20Sopenharmony_ci		     HRTIMER_MODE_ABS_PINNED);
48768c2ecf20Sopenharmony_ci	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
48778c2ecf20Sopenharmony_ci
48788c2ecf20Sopenharmony_ci	vmx->nested.vpid02 = allocate_vpid();
48798c2ecf20Sopenharmony_ci
48808c2ecf20Sopenharmony_ci	vmx->nested.vmcs02_initialized = false;
48818c2ecf20Sopenharmony_ci	vmx->nested.vmxon = true;
48828c2ecf20Sopenharmony_ci
48838c2ecf20Sopenharmony_ci	if (vmx_pt_mode_is_host_guest()) {
48848c2ecf20Sopenharmony_ci		vmx->pt_desc.guest.ctl = 0;
48858c2ecf20Sopenharmony_ci		pt_update_intercept_for_msr(vcpu);
48868c2ecf20Sopenharmony_ci	}
48878c2ecf20Sopenharmony_ci
48888c2ecf20Sopenharmony_ci	return 0;
48898c2ecf20Sopenharmony_ci
48908c2ecf20Sopenharmony_ciout_shadow_vmcs:
48918c2ecf20Sopenharmony_ci	kfree(vmx->nested.cached_shadow_vmcs12);
48928c2ecf20Sopenharmony_ci
48938c2ecf20Sopenharmony_ciout_cached_shadow_vmcs12:
48948c2ecf20Sopenharmony_ci	kfree(vmx->nested.cached_vmcs12);
48958c2ecf20Sopenharmony_ci
48968c2ecf20Sopenharmony_ciout_cached_vmcs12:
48978c2ecf20Sopenharmony_ci	free_loaded_vmcs(&vmx->nested.vmcs02);
48988c2ecf20Sopenharmony_ci
48998c2ecf20Sopenharmony_ciout_vmcs02:
49008c2ecf20Sopenharmony_ci	return -ENOMEM;
49018c2ecf20Sopenharmony_ci}
49028c2ecf20Sopenharmony_ci
49038c2ecf20Sopenharmony_ci/*
49048c2ecf20Sopenharmony_ci * Emulate the VMXON instruction.
49058c2ecf20Sopenharmony_ci * Currently, we just remember that VMX is active, and do not save or even
49068c2ecf20Sopenharmony_ci * inspect the argument to VMXON (the so-called "VMXON pointer") because we
49078c2ecf20Sopenharmony_ci * do not currently need to store anything in that guest-allocated memory
49088c2ecf20Sopenharmony_ci * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
49098c2ecf20Sopenharmony_ci * argument is different from the VMXON pointer (which the spec says they do).
49108c2ecf20Sopenharmony_ci */
49118c2ecf20Sopenharmony_cistatic int handle_vmon(struct kvm_vcpu *vcpu)
49128c2ecf20Sopenharmony_ci{
49138c2ecf20Sopenharmony_ci	int ret;
49148c2ecf20Sopenharmony_ci	gpa_t vmptr;
49158c2ecf20Sopenharmony_ci	uint32_t revision;
49168c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
49178c2ecf20Sopenharmony_ci	const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
49188c2ecf20Sopenharmony_ci		| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
49198c2ecf20Sopenharmony_ci
49208c2ecf20Sopenharmony_ci	/*
49218c2ecf20Sopenharmony_ci	 * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
49228c2ecf20Sopenharmony_ci	 * the guest and so cannot rely on hardware to perform the check,
49238c2ecf20Sopenharmony_ci	 * which has higher priority than VM-Exit (see Intel SDM's pseudocode
49248c2ecf20Sopenharmony_ci	 * for VMXON).
49258c2ecf20Sopenharmony_ci	 *
49268c2ecf20Sopenharmony_ci	 * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
49278c2ecf20Sopenharmony_ci	 * and !COMPATIBILITY modes.  For an unrestricted guest, KVM doesn't
49288c2ecf20Sopenharmony_ci	 * force any of the relevant guest state.  For a restricted guest, KVM
49298c2ecf20Sopenharmony_ci	 * does force CR0.PE=1, but only to also force VM86 in order to emulate
49308c2ecf20Sopenharmony_ci	 * Real Mode, and so there's no need to check CR0.PE manually.
49318c2ecf20Sopenharmony_ci	 */
49328c2ecf20Sopenharmony_ci	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
49338c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
49348c2ecf20Sopenharmony_ci		return 1;
49358c2ecf20Sopenharmony_ci	}
49368c2ecf20Sopenharmony_ci
49378c2ecf20Sopenharmony_ci	/*
49388c2ecf20Sopenharmony_ci	 * The CPL is checked for "not in VMX operation" and for "in VMX root",
49398c2ecf20Sopenharmony_ci	 * and has higher priority than the VM-Fail due to being post-VMXON,
49408c2ecf20Sopenharmony_ci	 * i.e. VMXON #GPs outside of VMX non-root if CPL!=0.  In VMX non-root,
49418c2ecf20Sopenharmony_ci	 * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
49428c2ecf20Sopenharmony_ci	 * from L2 to L1, i.e. there's no need to check for the vCPU being in
49438c2ecf20Sopenharmony_ci	 * VMX non-root.
49448c2ecf20Sopenharmony_ci	 *
49458c2ecf20Sopenharmony_ci	 * Forwarding the VM-Exit unconditionally, i.e. without performing the
49468c2ecf20Sopenharmony_ci	 * #UD checks (see above), is functionally ok because KVM doesn't allow
49478c2ecf20Sopenharmony_ci	 * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
49488c2ecf20Sopenharmony_ci	 * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
49498c2ecf20Sopenharmony_ci	 * missed by hardware due to shadowing CR0 and/or CR4.
49508c2ecf20Sopenharmony_ci	 */
49518c2ecf20Sopenharmony_ci	if (vmx_get_cpl(vcpu)) {
49528c2ecf20Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
49538c2ecf20Sopenharmony_ci		return 1;
49548c2ecf20Sopenharmony_ci	}
49558c2ecf20Sopenharmony_ci
49568c2ecf20Sopenharmony_ci	if (vmx->nested.vmxon)
49578c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
49588c2ecf20Sopenharmony_ci
49598c2ecf20Sopenharmony_ci	/*
49608c2ecf20Sopenharmony_ci	 * Invalid CR0/CR4 generates #GP.  These checks are performed if and
49618c2ecf20Sopenharmony_ci	 * only if the vCPU isn't already in VMX operation, i.e. effectively
49628c2ecf20Sopenharmony_ci	 * have lower priority than the VM-Fail above.
49638c2ecf20Sopenharmony_ci	 */
49648c2ecf20Sopenharmony_ci	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
49658c2ecf20Sopenharmony_ci	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
49668c2ecf20Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
49678c2ecf20Sopenharmony_ci		return 1;
49688c2ecf20Sopenharmony_ci	}
49698c2ecf20Sopenharmony_ci
49708c2ecf20Sopenharmony_ci	if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
49718c2ecf20Sopenharmony_ci			!= VMXON_NEEDED_FEATURES) {
49728c2ecf20Sopenharmony_ci		kvm_inject_gp(vcpu, 0);
49738c2ecf20Sopenharmony_ci		return 1;
49748c2ecf20Sopenharmony_ci	}
49758c2ecf20Sopenharmony_ci
49768c2ecf20Sopenharmony_ci	if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
49778c2ecf20Sopenharmony_ci		return ret;
49788c2ecf20Sopenharmony_ci
49798c2ecf20Sopenharmony_ci	/*
49808c2ecf20Sopenharmony_ci	 * SDM 3: 24.11.5
49818c2ecf20Sopenharmony_ci	 * The first 4 bytes of VMXON region contain the supported
49828c2ecf20Sopenharmony_ci	 * VMCS revision identifier
49838c2ecf20Sopenharmony_ci	 *
49848c2ecf20Sopenharmony_ci	 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
49858c2ecf20Sopenharmony_ci	 * which replaces physical address width with 32
49868c2ecf20Sopenharmony_ci	 */
49878c2ecf20Sopenharmony_ci	if (!page_address_valid(vcpu, vmptr))
49888c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
49898c2ecf20Sopenharmony_ci
49908c2ecf20Sopenharmony_ci	if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
49918c2ecf20Sopenharmony_ci	    revision != VMCS12_REVISION)
49928c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
49938c2ecf20Sopenharmony_ci
49948c2ecf20Sopenharmony_ci	vmx->nested.vmxon_ptr = vmptr;
49958c2ecf20Sopenharmony_ci	ret = enter_vmx_operation(vcpu);
49968c2ecf20Sopenharmony_ci	if (ret)
49978c2ecf20Sopenharmony_ci		return ret;
49988c2ecf20Sopenharmony_ci
49998c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
50008c2ecf20Sopenharmony_ci}
50018c2ecf20Sopenharmony_ci
50028c2ecf20Sopenharmony_cistatic inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
50038c2ecf20Sopenharmony_ci{
50048c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
50058c2ecf20Sopenharmony_ci
50068c2ecf20Sopenharmony_ci	if (vmx->nested.current_vmptr == -1ull)
50078c2ecf20Sopenharmony_ci		return;
50088c2ecf20Sopenharmony_ci
50098c2ecf20Sopenharmony_ci	copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
50108c2ecf20Sopenharmony_ci
50118c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs) {
50128c2ecf20Sopenharmony_ci		/* copy to memory all shadowed fields in case
50138c2ecf20Sopenharmony_ci		   they were modified */
50148c2ecf20Sopenharmony_ci		copy_shadow_to_vmcs12(vmx);
50158c2ecf20Sopenharmony_ci		vmx_disable_shadow_vmcs(vmx);
50168c2ecf20Sopenharmony_ci	}
50178c2ecf20Sopenharmony_ci	vmx->nested.posted_intr_nv = -1;
50188c2ecf20Sopenharmony_ci
50198c2ecf20Sopenharmony_ci	/* Flush VMCS12 to guest memory */
50208c2ecf20Sopenharmony_ci	kvm_vcpu_write_guest_page(vcpu,
50218c2ecf20Sopenharmony_ci				  vmx->nested.current_vmptr >> PAGE_SHIFT,
50228c2ecf20Sopenharmony_ci				  vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
50238c2ecf20Sopenharmony_ci
50248c2ecf20Sopenharmony_ci	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
50258c2ecf20Sopenharmony_ci
50268c2ecf20Sopenharmony_ci	vmx->nested.current_vmptr = -1ull;
50278c2ecf20Sopenharmony_ci}
50288c2ecf20Sopenharmony_ci
50298c2ecf20Sopenharmony_ci/* Emulate the VMXOFF instruction */
50308c2ecf20Sopenharmony_cistatic int handle_vmoff(struct kvm_vcpu *vcpu)
50318c2ecf20Sopenharmony_ci{
50328c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
50338c2ecf20Sopenharmony_ci		return 1;
50348c2ecf20Sopenharmony_ci
50358c2ecf20Sopenharmony_ci	free_nested(vcpu);
50368c2ecf20Sopenharmony_ci
50378c2ecf20Sopenharmony_ci	/* Process a latched INIT during time CPU was in VMX operation */
50388c2ecf20Sopenharmony_ci	kvm_make_request(KVM_REQ_EVENT, vcpu);
50398c2ecf20Sopenharmony_ci
50408c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
50418c2ecf20Sopenharmony_ci}
50428c2ecf20Sopenharmony_ci
50438c2ecf20Sopenharmony_ci/* Emulate the VMCLEAR instruction */
50448c2ecf20Sopenharmony_cistatic int handle_vmclear(struct kvm_vcpu *vcpu)
50458c2ecf20Sopenharmony_ci{
50468c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
50478c2ecf20Sopenharmony_ci	u32 zero = 0;
50488c2ecf20Sopenharmony_ci	gpa_t vmptr;
50498c2ecf20Sopenharmony_ci	u64 evmcs_gpa;
50508c2ecf20Sopenharmony_ci	int r;
50518c2ecf20Sopenharmony_ci
50528c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
50538c2ecf20Sopenharmony_ci		return 1;
50548c2ecf20Sopenharmony_ci
50558c2ecf20Sopenharmony_ci	if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
50568c2ecf20Sopenharmony_ci		return r;
50578c2ecf20Sopenharmony_ci
50588c2ecf20Sopenharmony_ci	if (!page_address_valid(vcpu, vmptr))
50598c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
50608c2ecf20Sopenharmony_ci
50618c2ecf20Sopenharmony_ci	if (vmptr == vmx->nested.vmxon_ptr)
50628c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
50638c2ecf20Sopenharmony_ci
50648c2ecf20Sopenharmony_ci	/*
50658c2ecf20Sopenharmony_ci	 * When Enlightened VMEntry is enabled on the calling CPU we treat
50668c2ecf20Sopenharmony_ci	 * memory area pointer by vmptr as Enlightened VMCS (as there's no good
50678c2ecf20Sopenharmony_ci	 * way to distinguish it from VMCS12) and we must not corrupt it by
50688c2ecf20Sopenharmony_ci	 * writing to the non-existent 'launch_state' field. The area doesn't
50698c2ecf20Sopenharmony_ci	 * have to be the currently active EVMCS on the calling CPU and there's
50708c2ecf20Sopenharmony_ci	 * nothing KVM has to do to transition it from 'active' to 'non-active'
50718c2ecf20Sopenharmony_ci	 * state. It is possible that the area will stay mapped as
50728c2ecf20Sopenharmony_ci	 * vmx->nested.hv_evmcs but this shouldn't be a problem.
50738c2ecf20Sopenharmony_ci	 */
50748c2ecf20Sopenharmony_ci	if (likely(!vmx->nested.enlightened_vmcs_enabled ||
50758c2ecf20Sopenharmony_ci		   !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
50768c2ecf20Sopenharmony_ci		if (vmptr == vmx->nested.current_vmptr)
50778c2ecf20Sopenharmony_ci			nested_release_vmcs12(vcpu);
50788c2ecf20Sopenharmony_ci
50798c2ecf20Sopenharmony_ci		kvm_vcpu_write_guest(vcpu,
50808c2ecf20Sopenharmony_ci				     vmptr + offsetof(struct vmcs12,
50818c2ecf20Sopenharmony_ci						      launch_state),
50828c2ecf20Sopenharmony_ci				     &zero, sizeof(zero));
50838c2ecf20Sopenharmony_ci	}
50848c2ecf20Sopenharmony_ci
50858c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
50868c2ecf20Sopenharmony_ci}
50878c2ecf20Sopenharmony_ci
50888c2ecf20Sopenharmony_ci/* Emulate the VMLAUNCH instruction */
50898c2ecf20Sopenharmony_cistatic int handle_vmlaunch(struct kvm_vcpu *vcpu)
50908c2ecf20Sopenharmony_ci{
50918c2ecf20Sopenharmony_ci	return nested_vmx_run(vcpu, true);
50928c2ecf20Sopenharmony_ci}
50938c2ecf20Sopenharmony_ci
50948c2ecf20Sopenharmony_ci/* Emulate the VMRESUME instruction */
50958c2ecf20Sopenharmony_cistatic int handle_vmresume(struct kvm_vcpu *vcpu)
50968c2ecf20Sopenharmony_ci{
50978c2ecf20Sopenharmony_ci
50988c2ecf20Sopenharmony_ci	return nested_vmx_run(vcpu, false);
50998c2ecf20Sopenharmony_ci}
51008c2ecf20Sopenharmony_ci
51018c2ecf20Sopenharmony_cistatic int handle_vmread(struct kvm_vcpu *vcpu)
51028c2ecf20Sopenharmony_ci{
51038c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
51048c2ecf20Sopenharmony_ci						    : get_vmcs12(vcpu);
51058c2ecf20Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
51068c2ecf20Sopenharmony_ci	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
51078c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
51088c2ecf20Sopenharmony_ci	struct x86_exception e;
51098c2ecf20Sopenharmony_ci	unsigned long field;
51108c2ecf20Sopenharmony_ci	u64 value;
51118c2ecf20Sopenharmony_ci	gva_t gva = 0;
51128c2ecf20Sopenharmony_ci	short offset;
51138c2ecf20Sopenharmony_ci	int len, r;
51148c2ecf20Sopenharmony_ci
51158c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
51168c2ecf20Sopenharmony_ci		return 1;
51178c2ecf20Sopenharmony_ci
51188c2ecf20Sopenharmony_ci	/*
51198c2ecf20Sopenharmony_ci	 * In VMX non-root operation, when the VMCS-link pointer is -1ull,
51208c2ecf20Sopenharmony_ci	 * any VMREAD sets the ALU flags for VMfailInvalid.
51218c2ecf20Sopenharmony_ci	 */
51228c2ecf20Sopenharmony_ci	if (vmx->nested.current_vmptr == -1ull ||
51238c2ecf20Sopenharmony_ci	    (is_guest_mode(vcpu) &&
51248c2ecf20Sopenharmony_ci	     get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
51258c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
51268c2ecf20Sopenharmony_ci
51278c2ecf20Sopenharmony_ci	/* Decode instruction info and find the field to read */
51288c2ecf20Sopenharmony_ci	field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
51298c2ecf20Sopenharmony_ci
51308c2ecf20Sopenharmony_ci	offset = vmcs_field_to_offset(field);
51318c2ecf20Sopenharmony_ci	if (offset < 0)
51328c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
51338c2ecf20Sopenharmony_ci
51348c2ecf20Sopenharmony_ci	if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
51358c2ecf20Sopenharmony_ci		copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
51368c2ecf20Sopenharmony_ci
51378c2ecf20Sopenharmony_ci	/* Read the field, zero-extended to a u64 value */
51388c2ecf20Sopenharmony_ci	value = vmcs12_read_any(vmcs12, field, offset);
51398c2ecf20Sopenharmony_ci
51408c2ecf20Sopenharmony_ci	/*
51418c2ecf20Sopenharmony_ci	 * Now copy part of this value to register or memory, as requested.
51428c2ecf20Sopenharmony_ci	 * Note that the number of bits actually copied is 32 or 64 depending
51438c2ecf20Sopenharmony_ci	 * on the guest's mode (32 or 64 bit), not on the given field's length.
51448c2ecf20Sopenharmony_ci	 */
51458c2ecf20Sopenharmony_ci	if (instr_info & BIT(10)) {
51468c2ecf20Sopenharmony_ci		kvm_register_writel(vcpu, (((instr_info) >> 3) & 0xf), value);
51478c2ecf20Sopenharmony_ci	} else {
51488c2ecf20Sopenharmony_ci		len = is_64_bit_mode(vcpu) ? 8 : 4;
51498c2ecf20Sopenharmony_ci		if (get_vmx_mem_address(vcpu, exit_qualification,
51508c2ecf20Sopenharmony_ci					instr_info, true, len, &gva))
51518c2ecf20Sopenharmony_ci			return 1;
51528c2ecf20Sopenharmony_ci		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
51538c2ecf20Sopenharmony_ci		r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
51548c2ecf20Sopenharmony_ci		if (r != X86EMUL_CONTINUE)
51558c2ecf20Sopenharmony_ci			return kvm_handle_memory_failure(vcpu, r, &e);
51568c2ecf20Sopenharmony_ci	}
51578c2ecf20Sopenharmony_ci
51588c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
51598c2ecf20Sopenharmony_ci}
51608c2ecf20Sopenharmony_ci
51618c2ecf20Sopenharmony_cistatic bool is_shadow_field_rw(unsigned long field)
51628c2ecf20Sopenharmony_ci{
51638c2ecf20Sopenharmony_ci	switch (field) {
51648c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RW(x, y) case x:
51658c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h"
51668c2ecf20Sopenharmony_ci		return true;
51678c2ecf20Sopenharmony_ci	default:
51688c2ecf20Sopenharmony_ci		break;
51698c2ecf20Sopenharmony_ci	}
51708c2ecf20Sopenharmony_ci	return false;
51718c2ecf20Sopenharmony_ci}
51728c2ecf20Sopenharmony_ci
51738c2ecf20Sopenharmony_cistatic bool is_shadow_field_ro(unsigned long field)
51748c2ecf20Sopenharmony_ci{
51758c2ecf20Sopenharmony_ci	switch (field) {
51768c2ecf20Sopenharmony_ci#define SHADOW_FIELD_RO(x, y) case x:
51778c2ecf20Sopenharmony_ci#include "vmcs_shadow_fields.h"
51788c2ecf20Sopenharmony_ci		return true;
51798c2ecf20Sopenharmony_ci	default:
51808c2ecf20Sopenharmony_ci		break;
51818c2ecf20Sopenharmony_ci	}
51828c2ecf20Sopenharmony_ci	return false;
51838c2ecf20Sopenharmony_ci}
51848c2ecf20Sopenharmony_ci
51858c2ecf20Sopenharmony_cistatic int handle_vmwrite(struct kvm_vcpu *vcpu)
51868c2ecf20Sopenharmony_ci{
51878c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
51888c2ecf20Sopenharmony_ci						    : get_vmcs12(vcpu);
51898c2ecf20Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
51908c2ecf20Sopenharmony_ci	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
51918c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
51928c2ecf20Sopenharmony_ci	struct x86_exception e;
51938c2ecf20Sopenharmony_ci	unsigned long field;
51948c2ecf20Sopenharmony_ci	short offset;
51958c2ecf20Sopenharmony_ci	gva_t gva;
51968c2ecf20Sopenharmony_ci	int len, r;
51978c2ecf20Sopenharmony_ci
51988c2ecf20Sopenharmony_ci	/*
51998c2ecf20Sopenharmony_ci	 * The value to write might be 32 or 64 bits, depending on L1's long
52008c2ecf20Sopenharmony_ci	 * mode, and eventually we need to write that into a field of several
52018c2ecf20Sopenharmony_ci	 * possible lengths. The code below first zero-extends the value to 64
52028c2ecf20Sopenharmony_ci	 * bit (value), and then copies only the appropriate number of
52038c2ecf20Sopenharmony_ci	 * bits into the vmcs12 field.
52048c2ecf20Sopenharmony_ci	 */
52058c2ecf20Sopenharmony_ci	u64 value = 0;
52068c2ecf20Sopenharmony_ci
52078c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
52088c2ecf20Sopenharmony_ci		return 1;
52098c2ecf20Sopenharmony_ci
52108c2ecf20Sopenharmony_ci	/*
52118c2ecf20Sopenharmony_ci	 * In VMX non-root operation, when the VMCS-link pointer is -1ull,
52128c2ecf20Sopenharmony_ci	 * any VMWRITE sets the ALU flags for VMfailInvalid.
52138c2ecf20Sopenharmony_ci	 */
52148c2ecf20Sopenharmony_ci	if (vmx->nested.current_vmptr == -1ull ||
52158c2ecf20Sopenharmony_ci	    (is_guest_mode(vcpu) &&
52168c2ecf20Sopenharmony_ci	     get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
52178c2ecf20Sopenharmony_ci		return nested_vmx_failInvalid(vcpu);
52188c2ecf20Sopenharmony_ci
52198c2ecf20Sopenharmony_ci	if (instr_info & BIT(10))
52208c2ecf20Sopenharmony_ci		value = kvm_register_readl(vcpu, (((instr_info) >> 3) & 0xf));
52218c2ecf20Sopenharmony_ci	else {
52228c2ecf20Sopenharmony_ci		len = is_64_bit_mode(vcpu) ? 8 : 4;
52238c2ecf20Sopenharmony_ci		if (get_vmx_mem_address(vcpu, exit_qualification,
52248c2ecf20Sopenharmony_ci					instr_info, false, len, &gva))
52258c2ecf20Sopenharmony_ci			return 1;
52268c2ecf20Sopenharmony_ci		r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
52278c2ecf20Sopenharmony_ci		if (r != X86EMUL_CONTINUE)
52288c2ecf20Sopenharmony_ci			return kvm_handle_memory_failure(vcpu, r, &e);
52298c2ecf20Sopenharmony_ci	}
52308c2ecf20Sopenharmony_ci
52318c2ecf20Sopenharmony_ci	field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
52328c2ecf20Sopenharmony_ci
52338c2ecf20Sopenharmony_ci	offset = vmcs_field_to_offset(field);
52348c2ecf20Sopenharmony_ci	if (offset < 0)
52358c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
52368c2ecf20Sopenharmony_ci
52378c2ecf20Sopenharmony_ci	/*
52388c2ecf20Sopenharmony_ci	 * If the vCPU supports "VMWRITE to any supported field in the
52398c2ecf20Sopenharmony_ci	 * VMCS," then the "read-only" fields are actually read/write.
52408c2ecf20Sopenharmony_ci	 */
52418c2ecf20Sopenharmony_ci	if (vmcs_field_readonly(field) &&
52428c2ecf20Sopenharmony_ci	    !nested_cpu_has_vmwrite_any_field(vcpu))
52438c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
52448c2ecf20Sopenharmony_ci
52458c2ecf20Sopenharmony_ci	/*
52468c2ecf20Sopenharmony_ci	 * Ensure vmcs12 is up-to-date before any VMWRITE that dirties
52478c2ecf20Sopenharmony_ci	 * vmcs12, else we may crush a field or consume a stale value.
52488c2ecf20Sopenharmony_ci	 */
52498c2ecf20Sopenharmony_ci	if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
52508c2ecf20Sopenharmony_ci		copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
52518c2ecf20Sopenharmony_ci
52528c2ecf20Sopenharmony_ci	/*
52538c2ecf20Sopenharmony_ci	 * Some Intel CPUs intentionally drop the reserved bits of the AR byte
52548c2ecf20Sopenharmony_ci	 * fields on VMWRITE.  Emulate this behavior to ensure consistent KVM
52558c2ecf20Sopenharmony_ci	 * behavior regardless of the underlying hardware, e.g. if an AR_BYTE
52568c2ecf20Sopenharmony_ci	 * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
52578c2ecf20Sopenharmony_ci	 * from L1 will return a different value than VMREAD from L2 (L1 sees
52588c2ecf20Sopenharmony_ci	 * the stripped down value, L2 sees the full value as stored by KVM).
52598c2ecf20Sopenharmony_ci	 */
52608c2ecf20Sopenharmony_ci	if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
52618c2ecf20Sopenharmony_ci		value &= 0x1f0ff;
52628c2ecf20Sopenharmony_ci
52638c2ecf20Sopenharmony_ci	vmcs12_write_any(vmcs12, field, offset, value);
52648c2ecf20Sopenharmony_ci
52658c2ecf20Sopenharmony_ci	/*
52668c2ecf20Sopenharmony_ci	 * Do not track vmcs12 dirty-state if in guest-mode as we actually
52678c2ecf20Sopenharmony_ci	 * dirty shadow vmcs12 instead of vmcs12.  Fields that can be updated
52688c2ecf20Sopenharmony_ci	 * by L1 without a vmexit are always updated in the vmcs02, i.e. don't
52698c2ecf20Sopenharmony_ci	 * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
52708c2ecf20Sopenharmony_ci	 */
52718c2ecf20Sopenharmony_ci	if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
52728c2ecf20Sopenharmony_ci		/*
52738c2ecf20Sopenharmony_ci		 * L1 can read these fields without exiting, ensure the
52748c2ecf20Sopenharmony_ci		 * shadow VMCS is up-to-date.
52758c2ecf20Sopenharmony_ci		 */
52768c2ecf20Sopenharmony_ci		if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
52778c2ecf20Sopenharmony_ci			preempt_disable();
52788c2ecf20Sopenharmony_ci			vmcs_load(vmx->vmcs01.shadow_vmcs);
52798c2ecf20Sopenharmony_ci
52808c2ecf20Sopenharmony_ci			__vmcs_writel(field, value);
52818c2ecf20Sopenharmony_ci
52828c2ecf20Sopenharmony_ci			vmcs_clear(vmx->vmcs01.shadow_vmcs);
52838c2ecf20Sopenharmony_ci			vmcs_load(vmx->loaded_vmcs->vmcs);
52848c2ecf20Sopenharmony_ci			preempt_enable();
52858c2ecf20Sopenharmony_ci		}
52868c2ecf20Sopenharmony_ci		vmx->nested.dirty_vmcs12 = true;
52878c2ecf20Sopenharmony_ci	}
52888c2ecf20Sopenharmony_ci
52898c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
52908c2ecf20Sopenharmony_ci}
52918c2ecf20Sopenharmony_ci
52928c2ecf20Sopenharmony_cistatic void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
52938c2ecf20Sopenharmony_ci{
52948c2ecf20Sopenharmony_ci	vmx->nested.current_vmptr = vmptr;
52958c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs) {
52968c2ecf20Sopenharmony_ci		secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
52978c2ecf20Sopenharmony_ci		vmcs_write64(VMCS_LINK_POINTER,
52988c2ecf20Sopenharmony_ci			     __pa(vmx->vmcs01.shadow_vmcs));
52998c2ecf20Sopenharmony_ci		vmx->nested.need_vmcs12_to_shadow_sync = true;
53008c2ecf20Sopenharmony_ci	}
53018c2ecf20Sopenharmony_ci	vmx->nested.dirty_vmcs12 = true;
53028c2ecf20Sopenharmony_ci}
53038c2ecf20Sopenharmony_ci
53048c2ecf20Sopenharmony_ci/* Emulate the VMPTRLD instruction */
53058c2ecf20Sopenharmony_cistatic int handle_vmptrld(struct kvm_vcpu *vcpu)
53068c2ecf20Sopenharmony_ci{
53078c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
53088c2ecf20Sopenharmony_ci	gpa_t vmptr;
53098c2ecf20Sopenharmony_ci	int r;
53108c2ecf20Sopenharmony_ci
53118c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
53128c2ecf20Sopenharmony_ci		return 1;
53138c2ecf20Sopenharmony_ci
53148c2ecf20Sopenharmony_ci	if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
53158c2ecf20Sopenharmony_ci		return r;
53168c2ecf20Sopenharmony_ci
53178c2ecf20Sopenharmony_ci	if (!page_address_valid(vcpu, vmptr))
53188c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
53198c2ecf20Sopenharmony_ci
53208c2ecf20Sopenharmony_ci	if (vmptr == vmx->nested.vmxon_ptr)
53218c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
53228c2ecf20Sopenharmony_ci
53238c2ecf20Sopenharmony_ci	/* Forbid normal VMPTRLD if Enlightened version was used */
53248c2ecf20Sopenharmony_ci	if (vmx->nested.hv_evmcs)
53258c2ecf20Sopenharmony_ci		return 1;
53268c2ecf20Sopenharmony_ci
53278c2ecf20Sopenharmony_ci	if (vmx->nested.current_vmptr != vmptr) {
53288c2ecf20Sopenharmony_ci		struct kvm_host_map map;
53298c2ecf20Sopenharmony_ci		struct vmcs12 *new_vmcs12;
53308c2ecf20Sopenharmony_ci
53318c2ecf20Sopenharmony_ci		if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
53328c2ecf20Sopenharmony_ci			/*
53338c2ecf20Sopenharmony_ci			 * Reads from an unbacked page return all 1s,
53348c2ecf20Sopenharmony_ci			 * which means that the 32 bits located at the
53358c2ecf20Sopenharmony_ci			 * given physical address won't match the required
53368c2ecf20Sopenharmony_ci			 * VMCS12_REVISION identifier.
53378c2ecf20Sopenharmony_ci			 */
53388c2ecf20Sopenharmony_ci			return nested_vmx_fail(vcpu,
53398c2ecf20Sopenharmony_ci				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
53408c2ecf20Sopenharmony_ci		}
53418c2ecf20Sopenharmony_ci
53428c2ecf20Sopenharmony_ci		new_vmcs12 = map.hva;
53438c2ecf20Sopenharmony_ci
53448c2ecf20Sopenharmony_ci		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
53458c2ecf20Sopenharmony_ci		    (new_vmcs12->hdr.shadow_vmcs &&
53468c2ecf20Sopenharmony_ci		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
53478c2ecf20Sopenharmony_ci			kvm_vcpu_unmap(vcpu, &map, false);
53488c2ecf20Sopenharmony_ci			return nested_vmx_fail(vcpu,
53498c2ecf20Sopenharmony_ci				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
53508c2ecf20Sopenharmony_ci		}
53518c2ecf20Sopenharmony_ci
53528c2ecf20Sopenharmony_ci		nested_release_vmcs12(vcpu);
53538c2ecf20Sopenharmony_ci
53548c2ecf20Sopenharmony_ci		/*
53558c2ecf20Sopenharmony_ci		 * Load VMCS12 from guest memory since it is not already
53568c2ecf20Sopenharmony_ci		 * cached.
53578c2ecf20Sopenharmony_ci		 */
53588c2ecf20Sopenharmony_ci		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
53598c2ecf20Sopenharmony_ci		kvm_vcpu_unmap(vcpu, &map, false);
53608c2ecf20Sopenharmony_ci
53618c2ecf20Sopenharmony_ci		set_current_vmptr(vmx, vmptr);
53628c2ecf20Sopenharmony_ci	}
53638c2ecf20Sopenharmony_ci
53648c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
53658c2ecf20Sopenharmony_ci}
53668c2ecf20Sopenharmony_ci
53678c2ecf20Sopenharmony_ci/* Emulate the VMPTRST instruction */
53688c2ecf20Sopenharmony_cistatic int handle_vmptrst(struct kvm_vcpu *vcpu)
53698c2ecf20Sopenharmony_ci{
53708c2ecf20Sopenharmony_ci	unsigned long exit_qual = vmx_get_exit_qual(vcpu);
53718c2ecf20Sopenharmony_ci	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
53728c2ecf20Sopenharmony_ci	gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
53738c2ecf20Sopenharmony_ci	struct x86_exception e;
53748c2ecf20Sopenharmony_ci	gva_t gva;
53758c2ecf20Sopenharmony_ci	int r;
53768c2ecf20Sopenharmony_ci
53778c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
53788c2ecf20Sopenharmony_ci		return 1;
53798c2ecf20Sopenharmony_ci
53808c2ecf20Sopenharmony_ci	if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
53818c2ecf20Sopenharmony_ci		return 1;
53828c2ecf20Sopenharmony_ci
53838c2ecf20Sopenharmony_ci	if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
53848c2ecf20Sopenharmony_ci				true, sizeof(gpa_t), &gva))
53858c2ecf20Sopenharmony_ci		return 1;
53868c2ecf20Sopenharmony_ci	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
53878c2ecf20Sopenharmony_ci	r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
53888c2ecf20Sopenharmony_ci					sizeof(gpa_t), &e);
53898c2ecf20Sopenharmony_ci	if (r != X86EMUL_CONTINUE)
53908c2ecf20Sopenharmony_ci		return kvm_handle_memory_failure(vcpu, r, &e);
53918c2ecf20Sopenharmony_ci
53928c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
53938c2ecf20Sopenharmony_ci}
53948c2ecf20Sopenharmony_ci
53958c2ecf20Sopenharmony_ci#define EPTP_PA_MASK   GENMASK_ULL(51, 12)
53968c2ecf20Sopenharmony_ci
53978c2ecf20Sopenharmony_cistatic bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
53988c2ecf20Sopenharmony_ci{
53998c2ecf20Sopenharmony_ci	return VALID_PAGE(root_hpa) &&
54008c2ecf20Sopenharmony_ci		((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
54018c2ecf20Sopenharmony_ci}
54028c2ecf20Sopenharmony_ci
54038c2ecf20Sopenharmony_ci/* Emulate the INVEPT instruction */
54048c2ecf20Sopenharmony_cistatic int handle_invept(struct kvm_vcpu *vcpu)
54058c2ecf20Sopenharmony_ci{
54068c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
54078c2ecf20Sopenharmony_ci	u32 vmx_instruction_info, types;
54088c2ecf20Sopenharmony_ci	unsigned long type, roots_to_free;
54098c2ecf20Sopenharmony_ci	struct kvm_mmu *mmu;
54108c2ecf20Sopenharmony_ci	gva_t gva;
54118c2ecf20Sopenharmony_ci	struct x86_exception e;
54128c2ecf20Sopenharmony_ci	struct {
54138c2ecf20Sopenharmony_ci		u64 eptp, gpa;
54148c2ecf20Sopenharmony_ci	} operand;
54158c2ecf20Sopenharmony_ci	int i, r;
54168c2ecf20Sopenharmony_ci
54178c2ecf20Sopenharmony_ci	if (!(vmx->nested.msrs.secondary_ctls_high &
54188c2ecf20Sopenharmony_ci	      SECONDARY_EXEC_ENABLE_EPT) ||
54198c2ecf20Sopenharmony_ci	    !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
54208c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
54218c2ecf20Sopenharmony_ci		return 1;
54228c2ecf20Sopenharmony_ci	}
54238c2ecf20Sopenharmony_ci
54248c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
54258c2ecf20Sopenharmony_ci		return 1;
54268c2ecf20Sopenharmony_ci
54278c2ecf20Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
54288c2ecf20Sopenharmony_ci	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
54298c2ecf20Sopenharmony_ci
54308c2ecf20Sopenharmony_ci	types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
54318c2ecf20Sopenharmony_ci
54328c2ecf20Sopenharmony_ci	if (type >= 32 || !(types & (1 << type)))
54338c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
54348c2ecf20Sopenharmony_ci
54358c2ecf20Sopenharmony_ci	/* According to the Intel VMX instruction reference, the memory
54368c2ecf20Sopenharmony_ci	 * operand is read even if it isn't needed (e.g., for type==global)
54378c2ecf20Sopenharmony_ci	 */
54388c2ecf20Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
54398c2ecf20Sopenharmony_ci			vmx_instruction_info, false, sizeof(operand), &gva))
54408c2ecf20Sopenharmony_ci		return 1;
54418c2ecf20Sopenharmony_ci	r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
54428c2ecf20Sopenharmony_ci	if (r != X86EMUL_CONTINUE)
54438c2ecf20Sopenharmony_ci		return kvm_handle_memory_failure(vcpu, r, &e);
54448c2ecf20Sopenharmony_ci
54458c2ecf20Sopenharmony_ci	/*
54468c2ecf20Sopenharmony_ci	 * Nested EPT roots are always held through guest_mmu,
54478c2ecf20Sopenharmony_ci	 * not root_mmu.
54488c2ecf20Sopenharmony_ci	 */
54498c2ecf20Sopenharmony_ci	mmu = &vcpu->arch.guest_mmu;
54508c2ecf20Sopenharmony_ci
54518c2ecf20Sopenharmony_ci	switch (type) {
54528c2ecf20Sopenharmony_ci	case VMX_EPT_EXTENT_CONTEXT:
54538c2ecf20Sopenharmony_ci		if (!nested_vmx_check_eptp(vcpu, operand.eptp))
54548c2ecf20Sopenharmony_ci			return nested_vmx_fail(vcpu,
54558c2ecf20Sopenharmony_ci				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
54568c2ecf20Sopenharmony_ci
54578c2ecf20Sopenharmony_ci		roots_to_free = 0;
54588c2ecf20Sopenharmony_ci		if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
54598c2ecf20Sopenharmony_ci					    operand.eptp))
54608c2ecf20Sopenharmony_ci			roots_to_free |= KVM_MMU_ROOT_CURRENT;
54618c2ecf20Sopenharmony_ci
54628c2ecf20Sopenharmony_ci		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
54638c2ecf20Sopenharmony_ci			if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
54648c2ecf20Sopenharmony_ci						    mmu->prev_roots[i].pgd,
54658c2ecf20Sopenharmony_ci						    operand.eptp))
54668c2ecf20Sopenharmony_ci				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
54678c2ecf20Sopenharmony_ci		}
54688c2ecf20Sopenharmony_ci		break;
54698c2ecf20Sopenharmony_ci	case VMX_EPT_EXTENT_GLOBAL:
54708c2ecf20Sopenharmony_ci		roots_to_free = KVM_MMU_ROOTS_ALL;
54718c2ecf20Sopenharmony_ci		break;
54728c2ecf20Sopenharmony_ci	default:
54738c2ecf20Sopenharmony_ci		BUG();
54748c2ecf20Sopenharmony_ci		break;
54758c2ecf20Sopenharmony_ci	}
54768c2ecf20Sopenharmony_ci
54778c2ecf20Sopenharmony_ci	if (roots_to_free)
54788c2ecf20Sopenharmony_ci		kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
54798c2ecf20Sopenharmony_ci
54808c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
54818c2ecf20Sopenharmony_ci}
54828c2ecf20Sopenharmony_ci
54838c2ecf20Sopenharmony_cistatic int handle_invvpid(struct kvm_vcpu *vcpu)
54848c2ecf20Sopenharmony_ci{
54858c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
54868c2ecf20Sopenharmony_ci	u32 vmx_instruction_info;
54878c2ecf20Sopenharmony_ci	unsigned long type, types;
54888c2ecf20Sopenharmony_ci	gva_t gva;
54898c2ecf20Sopenharmony_ci	struct x86_exception e;
54908c2ecf20Sopenharmony_ci	struct {
54918c2ecf20Sopenharmony_ci		u64 vpid;
54928c2ecf20Sopenharmony_ci		u64 gla;
54938c2ecf20Sopenharmony_ci	} operand;
54948c2ecf20Sopenharmony_ci	u16 vpid02;
54958c2ecf20Sopenharmony_ci	int r;
54968c2ecf20Sopenharmony_ci
54978c2ecf20Sopenharmony_ci	if (!(vmx->nested.msrs.secondary_ctls_high &
54988c2ecf20Sopenharmony_ci	      SECONDARY_EXEC_ENABLE_VPID) ||
54998c2ecf20Sopenharmony_ci			!(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
55008c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
55018c2ecf20Sopenharmony_ci		return 1;
55028c2ecf20Sopenharmony_ci	}
55038c2ecf20Sopenharmony_ci
55048c2ecf20Sopenharmony_ci	if (!nested_vmx_check_permission(vcpu))
55058c2ecf20Sopenharmony_ci		return 1;
55068c2ecf20Sopenharmony_ci
55078c2ecf20Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
55088c2ecf20Sopenharmony_ci	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
55098c2ecf20Sopenharmony_ci
55108c2ecf20Sopenharmony_ci	types = (vmx->nested.msrs.vpid_caps &
55118c2ecf20Sopenharmony_ci			VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
55128c2ecf20Sopenharmony_ci
55138c2ecf20Sopenharmony_ci	if (type >= 32 || !(types & (1 << type)))
55148c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu,
55158c2ecf20Sopenharmony_ci			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
55168c2ecf20Sopenharmony_ci
55178c2ecf20Sopenharmony_ci	/* according to the intel vmx instruction reference, the memory
55188c2ecf20Sopenharmony_ci	 * operand is read even if it isn't needed (e.g., for type==global)
55198c2ecf20Sopenharmony_ci	 */
55208c2ecf20Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
55218c2ecf20Sopenharmony_ci			vmx_instruction_info, false, sizeof(operand), &gva))
55228c2ecf20Sopenharmony_ci		return 1;
55238c2ecf20Sopenharmony_ci	r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
55248c2ecf20Sopenharmony_ci	if (r != X86EMUL_CONTINUE)
55258c2ecf20Sopenharmony_ci		return kvm_handle_memory_failure(vcpu, r, &e);
55268c2ecf20Sopenharmony_ci
55278c2ecf20Sopenharmony_ci	if (operand.vpid >> 16)
55288c2ecf20Sopenharmony_ci		return nested_vmx_fail(vcpu,
55298c2ecf20Sopenharmony_ci			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
55308c2ecf20Sopenharmony_ci
55318c2ecf20Sopenharmony_ci	vpid02 = nested_get_vpid02(vcpu);
55328c2ecf20Sopenharmony_ci	switch (type) {
55338c2ecf20Sopenharmony_ci	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
55348c2ecf20Sopenharmony_ci		if (!operand.vpid ||
55358c2ecf20Sopenharmony_ci		    is_noncanonical_address(operand.gla, vcpu))
55368c2ecf20Sopenharmony_ci			return nested_vmx_fail(vcpu,
55378c2ecf20Sopenharmony_ci				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
55388c2ecf20Sopenharmony_ci		vpid_sync_vcpu_addr(vpid02, operand.gla);
55398c2ecf20Sopenharmony_ci		break;
55408c2ecf20Sopenharmony_ci	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
55418c2ecf20Sopenharmony_ci	case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
55428c2ecf20Sopenharmony_ci		if (!operand.vpid)
55438c2ecf20Sopenharmony_ci			return nested_vmx_fail(vcpu,
55448c2ecf20Sopenharmony_ci				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
55458c2ecf20Sopenharmony_ci		vpid_sync_context(vpid02);
55468c2ecf20Sopenharmony_ci		break;
55478c2ecf20Sopenharmony_ci	case VMX_VPID_EXTENT_ALL_CONTEXT:
55488c2ecf20Sopenharmony_ci		vpid_sync_context(vpid02);
55498c2ecf20Sopenharmony_ci		break;
55508c2ecf20Sopenharmony_ci	default:
55518c2ecf20Sopenharmony_ci		WARN_ON_ONCE(1);
55528c2ecf20Sopenharmony_ci		return kvm_skip_emulated_instruction(vcpu);
55538c2ecf20Sopenharmony_ci	}
55548c2ecf20Sopenharmony_ci
55558c2ecf20Sopenharmony_ci	/*
55568c2ecf20Sopenharmony_ci	 * Sync the shadow page tables if EPT is disabled, L1 is invalidating
55578c2ecf20Sopenharmony_ci	 * linear mappings for L2 (tagged with L2's VPID).  Free all roots as
55588c2ecf20Sopenharmony_ci	 * VPIDs are not tracked in the MMU role.
55598c2ecf20Sopenharmony_ci	 *
55608c2ecf20Sopenharmony_ci	 * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
55618c2ecf20Sopenharmony_ci	 * an MMU when EPT is disabled.
55628c2ecf20Sopenharmony_ci	 *
55638c2ecf20Sopenharmony_ci	 * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
55648c2ecf20Sopenharmony_ci	 */
55658c2ecf20Sopenharmony_ci	if (!enable_ept)
55668c2ecf20Sopenharmony_ci		kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu,
55678c2ecf20Sopenharmony_ci				   KVM_MMU_ROOTS_ALL);
55688c2ecf20Sopenharmony_ci
55698c2ecf20Sopenharmony_ci	return nested_vmx_succeed(vcpu);
55708c2ecf20Sopenharmony_ci}
55718c2ecf20Sopenharmony_ci
55728c2ecf20Sopenharmony_cistatic int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
55738c2ecf20Sopenharmony_ci				     struct vmcs12 *vmcs12)
55748c2ecf20Sopenharmony_ci{
55758c2ecf20Sopenharmony_ci	u32 index = kvm_rcx_read(vcpu);
55768c2ecf20Sopenharmony_ci	u64 new_eptp;
55778c2ecf20Sopenharmony_ci
55788c2ecf20Sopenharmony_ci	if (!nested_cpu_has_eptp_switching(vmcs12) ||
55798c2ecf20Sopenharmony_ci	    !nested_cpu_has_ept(vmcs12))
55808c2ecf20Sopenharmony_ci		return 1;
55818c2ecf20Sopenharmony_ci
55828c2ecf20Sopenharmony_ci	if (index >= VMFUNC_EPTP_ENTRIES)
55838c2ecf20Sopenharmony_ci		return 1;
55848c2ecf20Sopenharmony_ci
55858c2ecf20Sopenharmony_ci	if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
55868c2ecf20Sopenharmony_ci				     &new_eptp, index * 8, 8))
55878c2ecf20Sopenharmony_ci		return 1;
55888c2ecf20Sopenharmony_ci
55898c2ecf20Sopenharmony_ci	/*
55908c2ecf20Sopenharmony_ci	 * If the (L2) guest does a vmfunc to the currently
55918c2ecf20Sopenharmony_ci	 * active ept pointer, we don't have to do anything else
55928c2ecf20Sopenharmony_ci	 */
55938c2ecf20Sopenharmony_ci	if (vmcs12->ept_pointer != new_eptp) {
55948c2ecf20Sopenharmony_ci		if (!nested_vmx_check_eptp(vcpu, new_eptp))
55958c2ecf20Sopenharmony_ci			return 1;
55968c2ecf20Sopenharmony_ci
55978c2ecf20Sopenharmony_ci		vmcs12->ept_pointer = new_eptp;
55988c2ecf20Sopenharmony_ci
55998c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
56008c2ecf20Sopenharmony_ci	}
56018c2ecf20Sopenharmony_ci
56028c2ecf20Sopenharmony_ci	return 0;
56038c2ecf20Sopenharmony_ci}
56048c2ecf20Sopenharmony_ci
56058c2ecf20Sopenharmony_cistatic int handle_vmfunc(struct kvm_vcpu *vcpu)
56068c2ecf20Sopenharmony_ci{
56078c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
56088c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12;
56098c2ecf20Sopenharmony_ci	u32 function = kvm_rax_read(vcpu);
56108c2ecf20Sopenharmony_ci
56118c2ecf20Sopenharmony_ci	/*
56128c2ecf20Sopenharmony_ci	 * VMFUNC is only supported for nested guests, but we always enable the
56138c2ecf20Sopenharmony_ci	 * secondary control for simplicity; for non-nested mode, fake that we
56148c2ecf20Sopenharmony_ci	 * didn't by injecting #UD.
56158c2ecf20Sopenharmony_ci	 */
56168c2ecf20Sopenharmony_ci	if (!is_guest_mode(vcpu)) {
56178c2ecf20Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
56188c2ecf20Sopenharmony_ci		return 1;
56198c2ecf20Sopenharmony_ci	}
56208c2ecf20Sopenharmony_ci
56218c2ecf20Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
56228c2ecf20Sopenharmony_ci	if (!(vmcs12->vm_function_control & BIT_ULL(function)))
56238c2ecf20Sopenharmony_ci		goto fail;
56248c2ecf20Sopenharmony_ci
56258c2ecf20Sopenharmony_ci	switch (function) {
56268c2ecf20Sopenharmony_ci	case 0:
56278c2ecf20Sopenharmony_ci		if (nested_vmx_eptp_switching(vcpu, vmcs12))
56288c2ecf20Sopenharmony_ci			goto fail;
56298c2ecf20Sopenharmony_ci		break;
56308c2ecf20Sopenharmony_ci	default:
56318c2ecf20Sopenharmony_ci		goto fail;
56328c2ecf20Sopenharmony_ci	}
56338c2ecf20Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
56348c2ecf20Sopenharmony_ci
56358c2ecf20Sopenharmony_cifail:
56368c2ecf20Sopenharmony_ci	/*
56378c2ecf20Sopenharmony_ci	 * This is effectively a reflected VM-Exit, as opposed to a synthesized
56388c2ecf20Sopenharmony_ci	 * nested VM-Exit.  Pass the original exit reason, i.e. don't hardcode
56398c2ecf20Sopenharmony_ci	 * EXIT_REASON_VMFUNC as the exit reason.
56408c2ecf20Sopenharmony_ci	 */
56418c2ecf20Sopenharmony_ci	nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
56428c2ecf20Sopenharmony_ci			  vmx_get_intr_info(vcpu),
56438c2ecf20Sopenharmony_ci			  vmx_get_exit_qual(vcpu));
56448c2ecf20Sopenharmony_ci	return 1;
56458c2ecf20Sopenharmony_ci}
56468c2ecf20Sopenharmony_ci
56478c2ecf20Sopenharmony_ci/*
56488c2ecf20Sopenharmony_ci * Return true if an IO instruction with the specified port and size should cause
56498c2ecf20Sopenharmony_ci * a VM-exit into L1.
56508c2ecf20Sopenharmony_ci */
56518c2ecf20Sopenharmony_cibool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
56528c2ecf20Sopenharmony_ci				 int size)
56538c2ecf20Sopenharmony_ci{
56548c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
56558c2ecf20Sopenharmony_ci	gpa_t bitmap, last_bitmap;
56568c2ecf20Sopenharmony_ci	u8 b;
56578c2ecf20Sopenharmony_ci
56588c2ecf20Sopenharmony_ci	last_bitmap = (gpa_t)-1;
56598c2ecf20Sopenharmony_ci	b = -1;
56608c2ecf20Sopenharmony_ci
56618c2ecf20Sopenharmony_ci	while (size > 0) {
56628c2ecf20Sopenharmony_ci		if (port < 0x8000)
56638c2ecf20Sopenharmony_ci			bitmap = vmcs12->io_bitmap_a;
56648c2ecf20Sopenharmony_ci		else if (port < 0x10000)
56658c2ecf20Sopenharmony_ci			bitmap = vmcs12->io_bitmap_b;
56668c2ecf20Sopenharmony_ci		else
56678c2ecf20Sopenharmony_ci			return true;
56688c2ecf20Sopenharmony_ci		bitmap += (port & 0x7fff) / 8;
56698c2ecf20Sopenharmony_ci
56708c2ecf20Sopenharmony_ci		if (last_bitmap != bitmap)
56718c2ecf20Sopenharmony_ci			if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
56728c2ecf20Sopenharmony_ci				return true;
56738c2ecf20Sopenharmony_ci		if (b & (1 << (port & 7)))
56748c2ecf20Sopenharmony_ci			return true;
56758c2ecf20Sopenharmony_ci
56768c2ecf20Sopenharmony_ci		port++;
56778c2ecf20Sopenharmony_ci		size--;
56788c2ecf20Sopenharmony_ci		last_bitmap = bitmap;
56798c2ecf20Sopenharmony_ci	}
56808c2ecf20Sopenharmony_ci
56818c2ecf20Sopenharmony_ci	return false;
56828c2ecf20Sopenharmony_ci}
56838c2ecf20Sopenharmony_ci
56848c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
56858c2ecf20Sopenharmony_ci				       struct vmcs12 *vmcs12)
56868c2ecf20Sopenharmony_ci{
56878c2ecf20Sopenharmony_ci	unsigned long exit_qualification;
56888c2ecf20Sopenharmony_ci	unsigned short port;
56898c2ecf20Sopenharmony_ci	int size;
56908c2ecf20Sopenharmony_ci
56918c2ecf20Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
56928c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
56938c2ecf20Sopenharmony_ci
56948c2ecf20Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
56958c2ecf20Sopenharmony_ci
56968c2ecf20Sopenharmony_ci	port = exit_qualification >> 16;
56978c2ecf20Sopenharmony_ci	size = (exit_qualification & 7) + 1;
56988c2ecf20Sopenharmony_ci
56998c2ecf20Sopenharmony_ci	return nested_vmx_check_io_bitmaps(vcpu, port, size);
57008c2ecf20Sopenharmony_ci}
57018c2ecf20Sopenharmony_ci
57028c2ecf20Sopenharmony_ci/*
57038c2ecf20Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle an MSR access,
57048c2ecf20Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check whether L1 expressed
57058c2ecf20Sopenharmony_ci * disinterest in the current event (read or write a specific MSR) by using an
57068c2ecf20Sopenharmony_ci * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
57078c2ecf20Sopenharmony_ci */
57088c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
57098c2ecf20Sopenharmony_ci					struct vmcs12 *vmcs12,
57108c2ecf20Sopenharmony_ci					union vmx_exit_reason exit_reason)
57118c2ecf20Sopenharmony_ci{
57128c2ecf20Sopenharmony_ci	u32 msr_index = kvm_rcx_read(vcpu);
57138c2ecf20Sopenharmony_ci	gpa_t bitmap;
57148c2ecf20Sopenharmony_ci
57158c2ecf20Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
57168c2ecf20Sopenharmony_ci		return true;
57178c2ecf20Sopenharmony_ci
57188c2ecf20Sopenharmony_ci	/*
57198c2ecf20Sopenharmony_ci	 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
57208c2ecf20Sopenharmony_ci	 * for the four combinations of read/write and low/high MSR numbers.
57218c2ecf20Sopenharmony_ci	 * First we need to figure out which of the four to use:
57228c2ecf20Sopenharmony_ci	 */
57238c2ecf20Sopenharmony_ci	bitmap = vmcs12->msr_bitmap;
57248c2ecf20Sopenharmony_ci	if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
57258c2ecf20Sopenharmony_ci		bitmap += 2048;
57268c2ecf20Sopenharmony_ci	if (msr_index >= 0xc0000000) {
57278c2ecf20Sopenharmony_ci		msr_index -= 0xc0000000;
57288c2ecf20Sopenharmony_ci		bitmap += 1024;
57298c2ecf20Sopenharmony_ci	}
57308c2ecf20Sopenharmony_ci
57318c2ecf20Sopenharmony_ci	/* Then read the msr_index'th bit from this bitmap: */
57328c2ecf20Sopenharmony_ci	if (msr_index < 1024*8) {
57338c2ecf20Sopenharmony_ci		unsigned char b;
57348c2ecf20Sopenharmony_ci		if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
57358c2ecf20Sopenharmony_ci			return true;
57368c2ecf20Sopenharmony_ci		return 1 & (b >> (msr_index & 7));
57378c2ecf20Sopenharmony_ci	} else
57388c2ecf20Sopenharmony_ci		return true; /* let L1 handle the wrong parameter */
57398c2ecf20Sopenharmony_ci}
57408c2ecf20Sopenharmony_ci
57418c2ecf20Sopenharmony_ci/*
57428c2ecf20Sopenharmony_ci * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
57438c2ecf20Sopenharmony_ci * rather than handle it ourselves in L0. I.e., check if L1 wanted to
57448c2ecf20Sopenharmony_ci * intercept (via guest_host_mask etc.) the current event.
57458c2ecf20Sopenharmony_ci */
57468c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
57478c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12)
57488c2ecf20Sopenharmony_ci{
57498c2ecf20Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
57508c2ecf20Sopenharmony_ci	int cr = exit_qualification & 15;
57518c2ecf20Sopenharmony_ci	int reg;
57528c2ecf20Sopenharmony_ci	unsigned long val;
57538c2ecf20Sopenharmony_ci
57548c2ecf20Sopenharmony_ci	switch ((exit_qualification >> 4) & 3) {
57558c2ecf20Sopenharmony_ci	case 0: /* mov to cr */
57568c2ecf20Sopenharmony_ci		reg = (exit_qualification >> 8) & 15;
57578c2ecf20Sopenharmony_ci		val = kvm_register_readl(vcpu, reg);
57588c2ecf20Sopenharmony_ci		switch (cr) {
57598c2ecf20Sopenharmony_ci		case 0:
57608c2ecf20Sopenharmony_ci			if (vmcs12->cr0_guest_host_mask &
57618c2ecf20Sopenharmony_ci			    (val ^ vmcs12->cr0_read_shadow))
57628c2ecf20Sopenharmony_ci				return true;
57638c2ecf20Sopenharmony_ci			break;
57648c2ecf20Sopenharmony_ci		case 3:
57658c2ecf20Sopenharmony_ci			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
57668c2ecf20Sopenharmony_ci				return true;
57678c2ecf20Sopenharmony_ci			break;
57688c2ecf20Sopenharmony_ci		case 4:
57698c2ecf20Sopenharmony_ci			if (vmcs12->cr4_guest_host_mask &
57708c2ecf20Sopenharmony_ci			    (vmcs12->cr4_read_shadow ^ val))
57718c2ecf20Sopenharmony_ci				return true;
57728c2ecf20Sopenharmony_ci			break;
57738c2ecf20Sopenharmony_ci		case 8:
57748c2ecf20Sopenharmony_ci			if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
57758c2ecf20Sopenharmony_ci				return true;
57768c2ecf20Sopenharmony_ci			break;
57778c2ecf20Sopenharmony_ci		}
57788c2ecf20Sopenharmony_ci		break;
57798c2ecf20Sopenharmony_ci	case 2: /* clts */
57808c2ecf20Sopenharmony_ci		if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
57818c2ecf20Sopenharmony_ci		    (vmcs12->cr0_read_shadow & X86_CR0_TS))
57828c2ecf20Sopenharmony_ci			return true;
57838c2ecf20Sopenharmony_ci		break;
57848c2ecf20Sopenharmony_ci	case 1: /* mov from cr */
57858c2ecf20Sopenharmony_ci		switch (cr) {
57868c2ecf20Sopenharmony_ci		case 3:
57878c2ecf20Sopenharmony_ci			if (vmcs12->cpu_based_vm_exec_control &
57888c2ecf20Sopenharmony_ci			    CPU_BASED_CR3_STORE_EXITING)
57898c2ecf20Sopenharmony_ci				return true;
57908c2ecf20Sopenharmony_ci			break;
57918c2ecf20Sopenharmony_ci		case 8:
57928c2ecf20Sopenharmony_ci			if (vmcs12->cpu_based_vm_exec_control &
57938c2ecf20Sopenharmony_ci			    CPU_BASED_CR8_STORE_EXITING)
57948c2ecf20Sopenharmony_ci				return true;
57958c2ecf20Sopenharmony_ci			break;
57968c2ecf20Sopenharmony_ci		}
57978c2ecf20Sopenharmony_ci		break;
57988c2ecf20Sopenharmony_ci	case 3: /* lmsw */
57998c2ecf20Sopenharmony_ci		/*
58008c2ecf20Sopenharmony_ci		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
58018c2ecf20Sopenharmony_ci		 * cr0. Other attempted changes are ignored, with no exit.
58028c2ecf20Sopenharmony_ci		 */
58038c2ecf20Sopenharmony_ci		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
58048c2ecf20Sopenharmony_ci		if (vmcs12->cr0_guest_host_mask & 0xe &
58058c2ecf20Sopenharmony_ci		    (val ^ vmcs12->cr0_read_shadow))
58068c2ecf20Sopenharmony_ci			return true;
58078c2ecf20Sopenharmony_ci		if ((vmcs12->cr0_guest_host_mask & 0x1) &&
58088c2ecf20Sopenharmony_ci		    !(vmcs12->cr0_read_shadow & 0x1) &&
58098c2ecf20Sopenharmony_ci		    (val & 0x1))
58108c2ecf20Sopenharmony_ci			return true;
58118c2ecf20Sopenharmony_ci		break;
58128c2ecf20Sopenharmony_ci	}
58138c2ecf20Sopenharmony_ci	return false;
58148c2ecf20Sopenharmony_ci}
58158c2ecf20Sopenharmony_ci
58168c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
58178c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12, gpa_t bitmap)
58188c2ecf20Sopenharmony_ci{
58198c2ecf20Sopenharmony_ci	u32 vmx_instruction_info;
58208c2ecf20Sopenharmony_ci	unsigned long field;
58218c2ecf20Sopenharmony_ci	u8 b;
58228c2ecf20Sopenharmony_ci
58238c2ecf20Sopenharmony_ci	if (!nested_cpu_has_shadow_vmcs(vmcs12))
58248c2ecf20Sopenharmony_ci		return true;
58258c2ecf20Sopenharmony_ci
58268c2ecf20Sopenharmony_ci	/* Decode instruction info and find the field to access */
58278c2ecf20Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
58288c2ecf20Sopenharmony_ci	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
58298c2ecf20Sopenharmony_ci
58308c2ecf20Sopenharmony_ci	/* Out-of-range fields always cause a VM exit from L2 to L1 */
58318c2ecf20Sopenharmony_ci	if (field >> 15)
58328c2ecf20Sopenharmony_ci		return true;
58338c2ecf20Sopenharmony_ci
58348c2ecf20Sopenharmony_ci	if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
58358c2ecf20Sopenharmony_ci		return true;
58368c2ecf20Sopenharmony_ci
58378c2ecf20Sopenharmony_ci	return 1 & (b >> (field & 7));
58388c2ecf20Sopenharmony_ci}
58398c2ecf20Sopenharmony_ci
58408c2ecf20Sopenharmony_cistatic bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
58418c2ecf20Sopenharmony_ci{
58428c2ecf20Sopenharmony_ci	u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
58438c2ecf20Sopenharmony_ci
58448c2ecf20Sopenharmony_ci	if (nested_cpu_has_mtf(vmcs12))
58458c2ecf20Sopenharmony_ci		return true;
58468c2ecf20Sopenharmony_ci
58478c2ecf20Sopenharmony_ci	/*
58488c2ecf20Sopenharmony_ci	 * An MTF VM-exit may be injected into the guest by setting the
58498c2ecf20Sopenharmony_ci	 * interruption-type to 7 (other event) and the vector field to 0. Such
58508c2ecf20Sopenharmony_ci	 * is the case regardless of the 'monitor trap flag' VM-execution
58518c2ecf20Sopenharmony_ci	 * control.
58528c2ecf20Sopenharmony_ci	 */
58538c2ecf20Sopenharmony_ci	return entry_intr_info == (INTR_INFO_VALID_MASK
58548c2ecf20Sopenharmony_ci				   | INTR_TYPE_OTHER_EVENT);
58558c2ecf20Sopenharmony_ci}
58568c2ecf20Sopenharmony_ci
58578c2ecf20Sopenharmony_ci/*
58588c2ecf20Sopenharmony_ci * Return true if L0 wants to handle an exit from L2 regardless of whether or not
58598c2ecf20Sopenharmony_ci * L1 wants the exit.  Only call this when in is_guest_mode (L2).
58608c2ecf20Sopenharmony_ci */
58618c2ecf20Sopenharmony_cistatic bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
58628c2ecf20Sopenharmony_ci				     union vmx_exit_reason exit_reason)
58638c2ecf20Sopenharmony_ci{
58648c2ecf20Sopenharmony_ci	u32 intr_info;
58658c2ecf20Sopenharmony_ci
58668c2ecf20Sopenharmony_ci	switch ((u16)exit_reason.basic) {
58678c2ecf20Sopenharmony_ci	case EXIT_REASON_EXCEPTION_NMI:
58688c2ecf20Sopenharmony_ci		intr_info = vmx_get_intr_info(vcpu);
58698c2ecf20Sopenharmony_ci		if (is_nmi(intr_info))
58708c2ecf20Sopenharmony_ci			return true;
58718c2ecf20Sopenharmony_ci		else if (is_page_fault(intr_info))
58728c2ecf20Sopenharmony_ci			return vcpu->arch.apf.host_apf_flags ||
58738c2ecf20Sopenharmony_ci			       vmx_need_pf_intercept(vcpu);
58748c2ecf20Sopenharmony_ci		else if (is_debug(intr_info) &&
58758c2ecf20Sopenharmony_ci			 vcpu->guest_debug &
58768c2ecf20Sopenharmony_ci			 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
58778c2ecf20Sopenharmony_ci			return true;
58788c2ecf20Sopenharmony_ci		else if (is_breakpoint(intr_info) &&
58798c2ecf20Sopenharmony_ci			 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
58808c2ecf20Sopenharmony_ci			return true;
58818c2ecf20Sopenharmony_ci		else if (is_alignment_check(intr_info) &&
58828c2ecf20Sopenharmony_ci			 !vmx_guest_inject_ac(vcpu))
58838c2ecf20Sopenharmony_ci			return true;
58848c2ecf20Sopenharmony_ci		return false;
58858c2ecf20Sopenharmony_ci	case EXIT_REASON_EXTERNAL_INTERRUPT:
58868c2ecf20Sopenharmony_ci		return true;
58878c2ecf20Sopenharmony_ci	case EXIT_REASON_MCE_DURING_VMENTRY:
58888c2ecf20Sopenharmony_ci		return true;
58898c2ecf20Sopenharmony_ci	case EXIT_REASON_EPT_VIOLATION:
58908c2ecf20Sopenharmony_ci		/*
58918c2ecf20Sopenharmony_ci		 * L0 always deals with the EPT violation. If nested EPT is
58928c2ecf20Sopenharmony_ci		 * used, and the nested mmu code discovers that the address is
58938c2ecf20Sopenharmony_ci		 * missing in the guest EPT table (EPT12), the EPT violation
58948c2ecf20Sopenharmony_ci		 * will be injected with nested_ept_inject_page_fault()
58958c2ecf20Sopenharmony_ci		 */
58968c2ecf20Sopenharmony_ci		return true;
58978c2ecf20Sopenharmony_ci	case EXIT_REASON_EPT_MISCONFIG:
58988c2ecf20Sopenharmony_ci		/*
58998c2ecf20Sopenharmony_ci		 * L2 never uses directly L1's EPT, but rather L0's own EPT
59008c2ecf20Sopenharmony_ci		 * table (shadow on EPT) or a merged EPT table that L0 built
59018c2ecf20Sopenharmony_ci		 * (EPT on EPT). So any problems with the structure of the
59028c2ecf20Sopenharmony_ci		 * table is L0's fault.
59038c2ecf20Sopenharmony_ci		 */
59048c2ecf20Sopenharmony_ci		return true;
59058c2ecf20Sopenharmony_ci	case EXIT_REASON_PREEMPTION_TIMER:
59068c2ecf20Sopenharmony_ci		return true;
59078c2ecf20Sopenharmony_ci	case EXIT_REASON_PML_FULL:
59088c2ecf20Sopenharmony_ci		/* We emulate PML support to L1. */
59098c2ecf20Sopenharmony_ci		return true;
59108c2ecf20Sopenharmony_ci	case EXIT_REASON_VMFUNC:
59118c2ecf20Sopenharmony_ci		/* VM functions are emulated through L2->L0 vmexits. */
59128c2ecf20Sopenharmony_ci		return true;
59138c2ecf20Sopenharmony_ci	case EXIT_REASON_ENCLS:
59148c2ecf20Sopenharmony_ci		/* SGX is never exposed to L1 */
59158c2ecf20Sopenharmony_ci		return true;
59168c2ecf20Sopenharmony_ci	default:
59178c2ecf20Sopenharmony_ci		break;
59188c2ecf20Sopenharmony_ci	}
59198c2ecf20Sopenharmony_ci	return false;
59208c2ecf20Sopenharmony_ci}
59218c2ecf20Sopenharmony_ci
59228c2ecf20Sopenharmony_ci/*
59238c2ecf20Sopenharmony_ci * Return 1 if L1 wants to intercept an exit from L2.  Only call this when in
59248c2ecf20Sopenharmony_ci * is_guest_mode (L2).
59258c2ecf20Sopenharmony_ci */
59268c2ecf20Sopenharmony_cistatic bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
59278c2ecf20Sopenharmony_ci				     union vmx_exit_reason exit_reason)
59288c2ecf20Sopenharmony_ci{
59298c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
59308c2ecf20Sopenharmony_ci	u32 intr_info;
59318c2ecf20Sopenharmony_ci
59328c2ecf20Sopenharmony_ci	switch ((u16)exit_reason.basic) {
59338c2ecf20Sopenharmony_ci	case EXIT_REASON_EXCEPTION_NMI:
59348c2ecf20Sopenharmony_ci		intr_info = vmx_get_intr_info(vcpu);
59358c2ecf20Sopenharmony_ci		if (is_nmi(intr_info))
59368c2ecf20Sopenharmony_ci			return true;
59378c2ecf20Sopenharmony_ci		else if (is_page_fault(intr_info))
59388c2ecf20Sopenharmony_ci			return true;
59398c2ecf20Sopenharmony_ci		return vmcs12->exception_bitmap &
59408c2ecf20Sopenharmony_ci				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
59418c2ecf20Sopenharmony_ci	case EXIT_REASON_EXTERNAL_INTERRUPT:
59428c2ecf20Sopenharmony_ci		return nested_exit_on_intr(vcpu);
59438c2ecf20Sopenharmony_ci	case EXIT_REASON_TRIPLE_FAULT:
59448c2ecf20Sopenharmony_ci		return true;
59458c2ecf20Sopenharmony_ci	case EXIT_REASON_INTERRUPT_WINDOW:
59468c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
59478c2ecf20Sopenharmony_ci	case EXIT_REASON_NMI_WINDOW:
59488c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
59498c2ecf20Sopenharmony_ci	case EXIT_REASON_TASK_SWITCH:
59508c2ecf20Sopenharmony_ci		return true;
59518c2ecf20Sopenharmony_ci	case EXIT_REASON_CPUID:
59528c2ecf20Sopenharmony_ci		return true;
59538c2ecf20Sopenharmony_ci	case EXIT_REASON_HLT:
59548c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
59558c2ecf20Sopenharmony_ci	case EXIT_REASON_INVD:
59568c2ecf20Sopenharmony_ci		return true;
59578c2ecf20Sopenharmony_ci	case EXIT_REASON_INVLPG:
59588c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
59598c2ecf20Sopenharmony_ci	case EXIT_REASON_RDPMC:
59608c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
59618c2ecf20Sopenharmony_ci	case EXIT_REASON_RDRAND:
59628c2ecf20Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
59638c2ecf20Sopenharmony_ci	case EXIT_REASON_RDSEED:
59648c2ecf20Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
59658c2ecf20Sopenharmony_ci	case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
59668c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
59678c2ecf20Sopenharmony_ci	case EXIT_REASON_VMREAD:
59688c2ecf20Sopenharmony_ci		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
59698c2ecf20Sopenharmony_ci			vmcs12->vmread_bitmap);
59708c2ecf20Sopenharmony_ci	case EXIT_REASON_VMWRITE:
59718c2ecf20Sopenharmony_ci		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
59728c2ecf20Sopenharmony_ci			vmcs12->vmwrite_bitmap);
59738c2ecf20Sopenharmony_ci	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
59748c2ecf20Sopenharmony_ci	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
59758c2ecf20Sopenharmony_ci	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
59768c2ecf20Sopenharmony_ci	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
59778c2ecf20Sopenharmony_ci	case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
59788c2ecf20Sopenharmony_ci		/*
59798c2ecf20Sopenharmony_ci		 * VMX instructions trap unconditionally. This allows L1 to
59808c2ecf20Sopenharmony_ci		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
59818c2ecf20Sopenharmony_ci		 */
59828c2ecf20Sopenharmony_ci		return true;
59838c2ecf20Sopenharmony_ci	case EXIT_REASON_CR_ACCESS:
59848c2ecf20Sopenharmony_ci		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
59858c2ecf20Sopenharmony_ci	case EXIT_REASON_DR_ACCESS:
59868c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
59878c2ecf20Sopenharmony_ci	case EXIT_REASON_IO_INSTRUCTION:
59888c2ecf20Sopenharmony_ci		return nested_vmx_exit_handled_io(vcpu, vmcs12);
59898c2ecf20Sopenharmony_ci	case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
59908c2ecf20Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
59918c2ecf20Sopenharmony_ci	case EXIT_REASON_MSR_READ:
59928c2ecf20Sopenharmony_ci	case EXIT_REASON_MSR_WRITE:
59938c2ecf20Sopenharmony_ci		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
59948c2ecf20Sopenharmony_ci	case EXIT_REASON_INVALID_STATE:
59958c2ecf20Sopenharmony_ci		return true;
59968c2ecf20Sopenharmony_ci	case EXIT_REASON_MWAIT_INSTRUCTION:
59978c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
59988c2ecf20Sopenharmony_ci	case EXIT_REASON_MONITOR_TRAP_FLAG:
59998c2ecf20Sopenharmony_ci		return nested_vmx_exit_handled_mtf(vmcs12);
60008c2ecf20Sopenharmony_ci	case EXIT_REASON_MONITOR_INSTRUCTION:
60018c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
60028c2ecf20Sopenharmony_ci	case EXIT_REASON_PAUSE_INSTRUCTION:
60038c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
60048c2ecf20Sopenharmony_ci			nested_cpu_has2(vmcs12,
60058c2ecf20Sopenharmony_ci				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
60068c2ecf20Sopenharmony_ci	case EXIT_REASON_MCE_DURING_VMENTRY:
60078c2ecf20Sopenharmony_ci		return true;
60088c2ecf20Sopenharmony_ci	case EXIT_REASON_TPR_BELOW_THRESHOLD:
60098c2ecf20Sopenharmony_ci		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
60108c2ecf20Sopenharmony_ci	case EXIT_REASON_APIC_ACCESS:
60118c2ecf20Sopenharmony_ci	case EXIT_REASON_APIC_WRITE:
60128c2ecf20Sopenharmony_ci	case EXIT_REASON_EOI_INDUCED:
60138c2ecf20Sopenharmony_ci		/*
60148c2ecf20Sopenharmony_ci		 * The controls for "virtualize APIC accesses," "APIC-
60158c2ecf20Sopenharmony_ci		 * register virtualization," and "virtual-interrupt
60168c2ecf20Sopenharmony_ci		 * delivery" only come from vmcs12.
60178c2ecf20Sopenharmony_ci		 */
60188c2ecf20Sopenharmony_ci		return true;
60198c2ecf20Sopenharmony_ci	case EXIT_REASON_INVPCID:
60208c2ecf20Sopenharmony_ci		return
60218c2ecf20Sopenharmony_ci			nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
60228c2ecf20Sopenharmony_ci			nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
60238c2ecf20Sopenharmony_ci	case EXIT_REASON_WBINVD:
60248c2ecf20Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
60258c2ecf20Sopenharmony_ci	case EXIT_REASON_XSETBV:
60268c2ecf20Sopenharmony_ci		return true;
60278c2ecf20Sopenharmony_ci	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
60288c2ecf20Sopenharmony_ci		/*
60298c2ecf20Sopenharmony_ci		 * This should never happen, since it is not possible to
60308c2ecf20Sopenharmony_ci		 * set XSS to a non-zero value---neither in L1 nor in L2.
60318c2ecf20Sopenharmony_ci		 * If if it were, XSS would have to be checked against
60328c2ecf20Sopenharmony_ci		 * the XSS exit bitmap in vmcs12.
60338c2ecf20Sopenharmony_ci		 */
60348c2ecf20Sopenharmony_ci		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
60358c2ecf20Sopenharmony_ci	case EXIT_REASON_UMWAIT:
60368c2ecf20Sopenharmony_ci	case EXIT_REASON_TPAUSE:
60378c2ecf20Sopenharmony_ci		return nested_cpu_has2(vmcs12,
60388c2ecf20Sopenharmony_ci			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
60398c2ecf20Sopenharmony_ci	default:
60408c2ecf20Sopenharmony_ci		return true;
60418c2ecf20Sopenharmony_ci	}
60428c2ecf20Sopenharmony_ci}
60438c2ecf20Sopenharmony_ci
60448c2ecf20Sopenharmony_ci/*
60458c2ecf20Sopenharmony_ci * Conditionally reflect a VM-Exit into L1.  Returns %true if the VM-Exit was
60468c2ecf20Sopenharmony_ci * reflected into L1.
60478c2ecf20Sopenharmony_ci */
60488c2ecf20Sopenharmony_cibool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
60498c2ecf20Sopenharmony_ci{
60508c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
60518c2ecf20Sopenharmony_ci	union vmx_exit_reason exit_reason = vmx->exit_reason;
60528c2ecf20Sopenharmony_ci	unsigned long exit_qual;
60538c2ecf20Sopenharmony_ci	u32 exit_intr_info;
60548c2ecf20Sopenharmony_ci
60558c2ecf20Sopenharmony_ci	WARN_ON_ONCE(vmx->nested.nested_run_pending);
60568c2ecf20Sopenharmony_ci
60578c2ecf20Sopenharmony_ci	/*
60588c2ecf20Sopenharmony_ci	 * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
60598c2ecf20Sopenharmony_ci	 * has already loaded L2's state.
60608c2ecf20Sopenharmony_ci	 */
60618c2ecf20Sopenharmony_ci	if (unlikely(vmx->fail)) {
60628c2ecf20Sopenharmony_ci		trace_kvm_nested_vmenter_failed(
60638c2ecf20Sopenharmony_ci			"hardware VM-instruction error: ",
60648c2ecf20Sopenharmony_ci			vmcs_read32(VM_INSTRUCTION_ERROR));
60658c2ecf20Sopenharmony_ci		exit_intr_info = 0;
60668c2ecf20Sopenharmony_ci		exit_qual = 0;
60678c2ecf20Sopenharmony_ci		goto reflect_vmexit;
60688c2ecf20Sopenharmony_ci	}
60698c2ecf20Sopenharmony_ci
60708c2ecf20Sopenharmony_ci	trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX);
60718c2ecf20Sopenharmony_ci
60728c2ecf20Sopenharmony_ci	/* If L0 (KVM) wants the exit, it trumps L1's desires. */
60738c2ecf20Sopenharmony_ci	if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
60748c2ecf20Sopenharmony_ci		return false;
60758c2ecf20Sopenharmony_ci
60768c2ecf20Sopenharmony_ci	/* If L1 doesn't want the exit, handle it in L0. */
60778c2ecf20Sopenharmony_ci	if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
60788c2ecf20Sopenharmony_ci		return false;
60798c2ecf20Sopenharmony_ci
60808c2ecf20Sopenharmony_ci	/*
60818c2ecf20Sopenharmony_ci	 * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits.  For
60828c2ecf20Sopenharmony_ci	 * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
60838c2ecf20Sopenharmony_ci	 * need to be synthesized by querying the in-kernel LAPIC, but external
60848c2ecf20Sopenharmony_ci	 * interrupts are never reflected to L1 so it's a non-issue.
60858c2ecf20Sopenharmony_ci	 */
60868c2ecf20Sopenharmony_ci	exit_intr_info = vmx_get_intr_info(vcpu);
60878c2ecf20Sopenharmony_ci	if (is_exception_with_error_code(exit_intr_info)) {
60888c2ecf20Sopenharmony_ci		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
60898c2ecf20Sopenharmony_ci
60908c2ecf20Sopenharmony_ci		vmcs12->vm_exit_intr_error_code =
60918c2ecf20Sopenharmony_ci			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
60928c2ecf20Sopenharmony_ci	}
60938c2ecf20Sopenharmony_ci	exit_qual = vmx_get_exit_qual(vcpu);
60948c2ecf20Sopenharmony_ci
60958c2ecf20Sopenharmony_cireflect_vmexit:
60968c2ecf20Sopenharmony_ci	nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
60978c2ecf20Sopenharmony_ci	return true;
60988c2ecf20Sopenharmony_ci}
60998c2ecf20Sopenharmony_ci
61008c2ecf20Sopenharmony_cistatic int vmx_get_nested_state(struct kvm_vcpu *vcpu,
61018c2ecf20Sopenharmony_ci				struct kvm_nested_state __user *user_kvm_nested_state,
61028c2ecf20Sopenharmony_ci				u32 user_data_size)
61038c2ecf20Sopenharmony_ci{
61048c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx;
61058c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12;
61068c2ecf20Sopenharmony_ci	struct kvm_nested_state kvm_state = {
61078c2ecf20Sopenharmony_ci		.flags = 0,
61088c2ecf20Sopenharmony_ci		.format = KVM_STATE_NESTED_FORMAT_VMX,
61098c2ecf20Sopenharmony_ci		.size = sizeof(kvm_state),
61108c2ecf20Sopenharmony_ci		.hdr.vmx.flags = 0,
61118c2ecf20Sopenharmony_ci		.hdr.vmx.vmxon_pa = -1ull,
61128c2ecf20Sopenharmony_ci		.hdr.vmx.vmcs12_pa = -1ull,
61138c2ecf20Sopenharmony_ci		.hdr.vmx.preemption_timer_deadline = 0,
61148c2ecf20Sopenharmony_ci	};
61158c2ecf20Sopenharmony_ci	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
61168c2ecf20Sopenharmony_ci		&user_kvm_nested_state->data.vmx[0];
61178c2ecf20Sopenharmony_ci
61188c2ecf20Sopenharmony_ci	if (!vcpu)
61198c2ecf20Sopenharmony_ci		return kvm_state.size + sizeof(*user_vmx_nested_state);
61208c2ecf20Sopenharmony_ci
61218c2ecf20Sopenharmony_ci	vmx = to_vmx(vcpu);
61228c2ecf20Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
61238c2ecf20Sopenharmony_ci
61248c2ecf20Sopenharmony_ci	if (nested_vmx_allowed(vcpu) &&
61258c2ecf20Sopenharmony_ci	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
61268c2ecf20Sopenharmony_ci		kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
61278c2ecf20Sopenharmony_ci		kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
61288c2ecf20Sopenharmony_ci
61298c2ecf20Sopenharmony_ci		if (vmx_has_valid_vmcs12(vcpu)) {
61308c2ecf20Sopenharmony_ci			kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
61318c2ecf20Sopenharmony_ci
61328c2ecf20Sopenharmony_ci			if (vmx->nested.hv_evmcs)
61338c2ecf20Sopenharmony_ci				kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
61348c2ecf20Sopenharmony_ci
61358c2ecf20Sopenharmony_ci			if (is_guest_mode(vcpu) &&
61368c2ecf20Sopenharmony_ci			    nested_cpu_has_shadow_vmcs(vmcs12) &&
61378c2ecf20Sopenharmony_ci			    vmcs12->vmcs_link_pointer != -1ull)
61388c2ecf20Sopenharmony_ci				kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
61398c2ecf20Sopenharmony_ci		}
61408c2ecf20Sopenharmony_ci
61418c2ecf20Sopenharmony_ci		if (vmx->nested.smm.vmxon)
61428c2ecf20Sopenharmony_ci			kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
61438c2ecf20Sopenharmony_ci
61448c2ecf20Sopenharmony_ci		if (vmx->nested.smm.guest_mode)
61458c2ecf20Sopenharmony_ci			kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
61468c2ecf20Sopenharmony_ci
61478c2ecf20Sopenharmony_ci		if (is_guest_mode(vcpu)) {
61488c2ecf20Sopenharmony_ci			kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
61498c2ecf20Sopenharmony_ci
61508c2ecf20Sopenharmony_ci			if (vmx->nested.nested_run_pending)
61518c2ecf20Sopenharmony_ci				kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
61528c2ecf20Sopenharmony_ci
61538c2ecf20Sopenharmony_ci			if (vmx->nested.mtf_pending)
61548c2ecf20Sopenharmony_ci				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
61558c2ecf20Sopenharmony_ci
61568c2ecf20Sopenharmony_ci			if (nested_cpu_has_preemption_timer(vmcs12) &&
61578c2ecf20Sopenharmony_ci			    vmx->nested.has_preemption_timer_deadline) {
61588c2ecf20Sopenharmony_ci				kvm_state.hdr.vmx.flags |=
61598c2ecf20Sopenharmony_ci					KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
61608c2ecf20Sopenharmony_ci				kvm_state.hdr.vmx.preemption_timer_deadline =
61618c2ecf20Sopenharmony_ci					vmx->nested.preemption_timer_deadline;
61628c2ecf20Sopenharmony_ci			}
61638c2ecf20Sopenharmony_ci		}
61648c2ecf20Sopenharmony_ci	}
61658c2ecf20Sopenharmony_ci
61668c2ecf20Sopenharmony_ci	if (user_data_size < kvm_state.size)
61678c2ecf20Sopenharmony_ci		goto out;
61688c2ecf20Sopenharmony_ci
61698c2ecf20Sopenharmony_ci	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
61708c2ecf20Sopenharmony_ci		return -EFAULT;
61718c2ecf20Sopenharmony_ci
61728c2ecf20Sopenharmony_ci	if (!vmx_has_valid_vmcs12(vcpu))
61738c2ecf20Sopenharmony_ci		goto out;
61748c2ecf20Sopenharmony_ci
61758c2ecf20Sopenharmony_ci	/*
61768c2ecf20Sopenharmony_ci	 * When running L2, the authoritative vmcs12 state is in the
61778c2ecf20Sopenharmony_ci	 * vmcs02. When running L1, the authoritative vmcs12 state is
61788c2ecf20Sopenharmony_ci	 * in the shadow or enlightened vmcs linked to vmcs01, unless
61798c2ecf20Sopenharmony_ci	 * need_vmcs12_to_shadow_sync is set, in which case, the authoritative
61808c2ecf20Sopenharmony_ci	 * vmcs12 state is in the vmcs12 already.
61818c2ecf20Sopenharmony_ci	 */
61828c2ecf20Sopenharmony_ci	if (is_guest_mode(vcpu)) {
61838c2ecf20Sopenharmony_ci		sync_vmcs02_to_vmcs12(vcpu, vmcs12);
61848c2ecf20Sopenharmony_ci		sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
61858c2ecf20Sopenharmony_ci	} else  {
61868c2ecf20Sopenharmony_ci		copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
61878c2ecf20Sopenharmony_ci		if (!vmx->nested.need_vmcs12_to_shadow_sync) {
61888c2ecf20Sopenharmony_ci			if (vmx->nested.hv_evmcs)
61898c2ecf20Sopenharmony_ci				copy_enlightened_to_vmcs12(vmx);
61908c2ecf20Sopenharmony_ci			else if (enable_shadow_vmcs)
61918c2ecf20Sopenharmony_ci				copy_shadow_to_vmcs12(vmx);
61928c2ecf20Sopenharmony_ci		}
61938c2ecf20Sopenharmony_ci	}
61948c2ecf20Sopenharmony_ci
61958c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
61968c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
61978c2ecf20Sopenharmony_ci
61988c2ecf20Sopenharmony_ci	/*
61998c2ecf20Sopenharmony_ci	 * Copy over the full allocated size of vmcs12 rather than just the size
62008c2ecf20Sopenharmony_ci	 * of the struct.
62018c2ecf20Sopenharmony_ci	 */
62028c2ecf20Sopenharmony_ci	if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
62038c2ecf20Sopenharmony_ci		return -EFAULT;
62048c2ecf20Sopenharmony_ci
62058c2ecf20Sopenharmony_ci	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
62068c2ecf20Sopenharmony_ci	    vmcs12->vmcs_link_pointer != -1ull) {
62078c2ecf20Sopenharmony_ci		if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
62088c2ecf20Sopenharmony_ci				 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
62098c2ecf20Sopenharmony_ci			return -EFAULT;
62108c2ecf20Sopenharmony_ci	}
62118c2ecf20Sopenharmony_ciout:
62128c2ecf20Sopenharmony_ci	return kvm_state.size;
62138c2ecf20Sopenharmony_ci}
62148c2ecf20Sopenharmony_ci
62158c2ecf20Sopenharmony_ci/*
62168c2ecf20Sopenharmony_ci * Forcibly leave nested mode in order to be able to reset the VCPU later on.
62178c2ecf20Sopenharmony_ci */
62188c2ecf20Sopenharmony_civoid vmx_leave_nested(struct kvm_vcpu *vcpu)
62198c2ecf20Sopenharmony_ci{
62208c2ecf20Sopenharmony_ci	if (is_guest_mode(vcpu)) {
62218c2ecf20Sopenharmony_ci		to_vmx(vcpu)->nested.nested_run_pending = 0;
62228c2ecf20Sopenharmony_ci		nested_vmx_vmexit(vcpu, -1, 0, 0);
62238c2ecf20Sopenharmony_ci	}
62248c2ecf20Sopenharmony_ci	free_nested(vcpu);
62258c2ecf20Sopenharmony_ci}
62268c2ecf20Sopenharmony_ci
62278c2ecf20Sopenharmony_cistatic int vmx_set_nested_state(struct kvm_vcpu *vcpu,
62288c2ecf20Sopenharmony_ci				struct kvm_nested_state __user *user_kvm_nested_state,
62298c2ecf20Sopenharmony_ci				struct kvm_nested_state *kvm_state)
62308c2ecf20Sopenharmony_ci{
62318c2ecf20Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
62328c2ecf20Sopenharmony_ci	struct vmcs12 *vmcs12;
62338c2ecf20Sopenharmony_ci	enum vm_entry_failure_code ignored;
62348c2ecf20Sopenharmony_ci	struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
62358c2ecf20Sopenharmony_ci		&user_kvm_nested_state->data.vmx[0];
62368c2ecf20Sopenharmony_ci	int ret;
62378c2ecf20Sopenharmony_ci
62388c2ecf20Sopenharmony_ci	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
62398c2ecf20Sopenharmony_ci		return -EINVAL;
62408c2ecf20Sopenharmony_ci
62418c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
62428c2ecf20Sopenharmony_ci		if (kvm_state->hdr.vmx.smm.flags)
62438c2ecf20Sopenharmony_ci			return -EINVAL;
62448c2ecf20Sopenharmony_ci
62458c2ecf20Sopenharmony_ci		if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
62468c2ecf20Sopenharmony_ci			return -EINVAL;
62478c2ecf20Sopenharmony_ci
62488c2ecf20Sopenharmony_ci		/*
62498c2ecf20Sopenharmony_ci		 * KVM_STATE_NESTED_EVMCS used to signal that KVM should
62508c2ecf20Sopenharmony_ci		 * enable eVMCS capability on vCPU. However, since then
62518c2ecf20Sopenharmony_ci		 * code was changed such that flag signals vmcs12 should
62528c2ecf20Sopenharmony_ci		 * be copied into eVMCS in guest memory.
62538c2ecf20Sopenharmony_ci		 *
62548c2ecf20Sopenharmony_ci		 * To preserve backwards compatability, allow user
62558c2ecf20Sopenharmony_ci		 * to set this flag even when there is no VMXON region.
62568c2ecf20Sopenharmony_ci		 */
62578c2ecf20Sopenharmony_ci		if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
62588c2ecf20Sopenharmony_ci			return -EINVAL;
62598c2ecf20Sopenharmony_ci	} else {
62608c2ecf20Sopenharmony_ci		if (!nested_vmx_allowed(vcpu))
62618c2ecf20Sopenharmony_ci			return -EINVAL;
62628c2ecf20Sopenharmony_ci
62638c2ecf20Sopenharmony_ci		if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
62648c2ecf20Sopenharmony_ci			return -EINVAL;
62658c2ecf20Sopenharmony_ci	}
62668c2ecf20Sopenharmony_ci
62678c2ecf20Sopenharmony_ci	if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
62688c2ecf20Sopenharmony_ci	    (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
62698c2ecf20Sopenharmony_ci		return -EINVAL;
62708c2ecf20Sopenharmony_ci
62718c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.smm.flags &
62728c2ecf20Sopenharmony_ci	    ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
62738c2ecf20Sopenharmony_ci		return -EINVAL;
62748c2ecf20Sopenharmony_ci
62758c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
62768c2ecf20Sopenharmony_ci		return -EINVAL;
62778c2ecf20Sopenharmony_ci
62788c2ecf20Sopenharmony_ci	/*
62798c2ecf20Sopenharmony_ci	 * SMM temporarily disables VMX, so we cannot be in guest mode,
62808c2ecf20Sopenharmony_ci	 * nor can VMLAUNCH/VMRESUME be pending.  Outside SMM, SMM flags
62818c2ecf20Sopenharmony_ci	 * must be zero.
62828c2ecf20Sopenharmony_ci	 */
62838c2ecf20Sopenharmony_ci	if (is_smm(vcpu) ?
62848c2ecf20Sopenharmony_ci		(kvm_state->flags &
62858c2ecf20Sopenharmony_ci		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
62868c2ecf20Sopenharmony_ci		: kvm_state->hdr.vmx.smm.flags)
62878c2ecf20Sopenharmony_ci		return -EINVAL;
62888c2ecf20Sopenharmony_ci
62898c2ecf20Sopenharmony_ci	if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
62908c2ecf20Sopenharmony_ci	    !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
62918c2ecf20Sopenharmony_ci		return -EINVAL;
62928c2ecf20Sopenharmony_ci
62938c2ecf20Sopenharmony_ci	if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
62948c2ecf20Sopenharmony_ci		(!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
62958c2ecf20Sopenharmony_ci			return -EINVAL;
62968c2ecf20Sopenharmony_ci
62978c2ecf20Sopenharmony_ci	vmx_leave_nested(vcpu);
62988c2ecf20Sopenharmony_ci
62998c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
63008c2ecf20Sopenharmony_ci		return 0;
63018c2ecf20Sopenharmony_ci
63028c2ecf20Sopenharmony_ci	vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
63038c2ecf20Sopenharmony_ci	ret = enter_vmx_operation(vcpu);
63048c2ecf20Sopenharmony_ci	if (ret)
63058c2ecf20Sopenharmony_ci		return ret;
63068c2ecf20Sopenharmony_ci
63078c2ecf20Sopenharmony_ci	/* Empty 'VMXON' state is permitted if no VMCS loaded */
63088c2ecf20Sopenharmony_ci	if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
63098c2ecf20Sopenharmony_ci		/* See vmx_has_valid_vmcs12.  */
63108c2ecf20Sopenharmony_ci		if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
63118c2ecf20Sopenharmony_ci		    (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
63128c2ecf20Sopenharmony_ci		    (kvm_state->hdr.vmx.vmcs12_pa != -1ull))
63138c2ecf20Sopenharmony_ci			return -EINVAL;
63148c2ecf20Sopenharmony_ci		else
63158c2ecf20Sopenharmony_ci			return 0;
63168c2ecf20Sopenharmony_ci	}
63178c2ecf20Sopenharmony_ci
63188c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
63198c2ecf20Sopenharmony_ci		if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
63208c2ecf20Sopenharmony_ci		    !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
63218c2ecf20Sopenharmony_ci			return -EINVAL;
63228c2ecf20Sopenharmony_ci
63238c2ecf20Sopenharmony_ci		set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
63248c2ecf20Sopenharmony_ci	} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
63258c2ecf20Sopenharmony_ci		/*
63268c2ecf20Sopenharmony_ci		 * nested_vmx_handle_enlightened_vmptrld() cannot be called
63278c2ecf20Sopenharmony_ci		 * directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
63288c2ecf20Sopenharmony_ci		 * restored yet. EVMCS will be mapped from
63298c2ecf20Sopenharmony_ci		 * nested_get_vmcs12_pages().
63308c2ecf20Sopenharmony_ci		 */
63318c2ecf20Sopenharmony_ci		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
63328c2ecf20Sopenharmony_ci	} else {
63338c2ecf20Sopenharmony_ci		return -EINVAL;
63348c2ecf20Sopenharmony_ci	}
63358c2ecf20Sopenharmony_ci
63368c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
63378c2ecf20Sopenharmony_ci		vmx->nested.smm.vmxon = true;
63388c2ecf20Sopenharmony_ci		vmx->nested.vmxon = false;
63398c2ecf20Sopenharmony_ci
63408c2ecf20Sopenharmony_ci		if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
63418c2ecf20Sopenharmony_ci			vmx->nested.smm.guest_mode = true;
63428c2ecf20Sopenharmony_ci	}
63438c2ecf20Sopenharmony_ci
63448c2ecf20Sopenharmony_ci	vmcs12 = get_vmcs12(vcpu);
63458c2ecf20Sopenharmony_ci	if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
63468c2ecf20Sopenharmony_ci		return -EFAULT;
63478c2ecf20Sopenharmony_ci
63488c2ecf20Sopenharmony_ci	if (vmcs12->hdr.revision_id != VMCS12_REVISION)
63498c2ecf20Sopenharmony_ci		return -EINVAL;
63508c2ecf20Sopenharmony_ci
63518c2ecf20Sopenharmony_ci	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
63528c2ecf20Sopenharmony_ci		return 0;
63538c2ecf20Sopenharmony_ci
63548c2ecf20Sopenharmony_ci	vmx->nested.nested_run_pending =
63558c2ecf20Sopenharmony_ci		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
63568c2ecf20Sopenharmony_ci
63578c2ecf20Sopenharmony_ci	vmx->nested.mtf_pending =
63588c2ecf20Sopenharmony_ci		!!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
63598c2ecf20Sopenharmony_ci
63608c2ecf20Sopenharmony_ci	ret = -EINVAL;
63618c2ecf20Sopenharmony_ci	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
63628c2ecf20Sopenharmony_ci	    vmcs12->vmcs_link_pointer != -1ull) {
63638c2ecf20Sopenharmony_ci		struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
63648c2ecf20Sopenharmony_ci
63658c2ecf20Sopenharmony_ci		if (kvm_state->size <
63668c2ecf20Sopenharmony_ci		    sizeof(*kvm_state) +
63678c2ecf20Sopenharmony_ci		    sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
63688c2ecf20Sopenharmony_ci			goto error_guest_mode;
63698c2ecf20Sopenharmony_ci
63708c2ecf20Sopenharmony_ci		if (copy_from_user(shadow_vmcs12,
63718c2ecf20Sopenharmony_ci				   user_vmx_nested_state->shadow_vmcs12,
63728c2ecf20Sopenharmony_ci				   sizeof(*shadow_vmcs12))) {
63738c2ecf20Sopenharmony_ci			ret = -EFAULT;
63748c2ecf20Sopenharmony_ci			goto error_guest_mode;
63758c2ecf20Sopenharmony_ci		}
63768c2ecf20Sopenharmony_ci
63778c2ecf20Sopenharmony_ci		if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
63788c2ecf20Sopenharmony_ci		    !shadow_vmcs12->hdr.shadow_vmcs)
63798c2ecf20Sopenharmony_ci			goto error_guest_mode;
63808c2ecf20Sopenharmony_ci	}
63818c2ecf20Sopenharmony_ci
63828c2ecf20Sopenharmony_ci	vmx->nested.has_preemption_timer_deadline = false;
63838c2ecf20Sopenharmony_ci	if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
63848c2ecf20Sopenharmony_ci		vmx->nested.has_preemption_timer_deadline = true;
63858c2ecf20Sopenharmony_ci		vmx->nested.preemption_timer_deadline =
63868c2ecf20Sopenharmony_ci			kvm_state->hdr.vmx.preemption_timer_deadline;
63878c2ecf20Sopenharmony_ci	}
63888c2ecf20Sopenharmony_ci
63898c2ecf20Sopenharmony_ci	if (nested_vmx_check_controls(vcpu, vmcs12) ||
63908c2ecf20Sopenharmony_ci	    nested_vmx_check_host_state(vcpu, vmcs12) ||
63918c2ecf20Sopenharmony_ci	    nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
63928c2ecf20Sopenharmony_ci		goto error_guest_mode;
63938c2ecf20Sopenharmony_ci
63948c2ecf20Sopenharmony_ci	vmx->nested.dirty_vmcs12 = true;
63958c2ecf20Sopenharmony_ci	ret = nested_vmx_enter_non_root_mode(vcpu, false);
63968c2ecf20Sopenharmony_ci	if (ret)
63978c2ecf20Sopenharmony_ci		goto error_guest_mode;
63988c2ecf20Sopenharmony_ci
63998c2ecf20Sopenharmony_ci	return 0;
64008c2ecf20Sopenharmony_ci
64018c2ecf20Sopenharmony_cierror_guest_mode:
64028c2ecf20Sopenharmony_ci	vmx->nested.nested_run_pending = 0;
64038c2ecf20Sopenharmony_ci	return ret;
64048c2ecf20Sopenharmony_ci}
64058c2ecf20Sopenharmony_ci
64068c2ecf20Sopenharmony_civoid nested_vmx_set_vmcs_shadowing_bitmap(void)
64078c2ecf20Sopenharmony_ci{
64088c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs) {
64098c2ecf20Sopenharmony_ci		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
64108c2ecf20Sopenharmony_ci		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
64118c2ecf20Sopenharmony_ci	}
64128c2ecf20Sopenharmony_ci}
64138c2ecf20Sopenharmony_ci
64148c2ecf20Sopenharmony_ci/*
64158c2ecf20Sopenharmony_ci * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
64168c2ecf20Sopenharmony_ci * returned for the various VMX controls MSRs when nested VMX is enabled.
64178c2ecf20Sopenharmony_ci * The same values should also be used to verify that vmcs12 control fields are
64188c2ecf20Sopenharmony_ci * valid during nested entry from L1 to L2.
64198c2ecf20Sopenharmony_ci * Each of these control msrs has a low and high 32-bit half: A low bit is on
64208c2ecf20Sopenharmony_ci * if the corresponding bit in the (32-bit) control field *must* be on, and a
64218c2ecf20Sopenharmony_ci * bit in the high half is on if the corresponding bit in the control field
64228c2ecf20Sopenharmony_ci * may be on. See also vmx_control_verify().
64238c2ecf20Sopenharmony_ci */
64248c2ecf20Sopenharmony_civoid nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
64258c2ecf20Sopenharmony_ci{
64268c2ecf20Sopenharmony_ci	/*
64278c2ecf20Sopenharmony_ci	 * Note that as a general rule, the high half of the MSRs (bits in
64288c2ecf20Sopenharmony_ci	 * the control fields which may be 1) should be initialized by the
64298c2ecf20Sopenharmony_ci	 * intersection of the underlying hardware's MSR (i.e., features which
64308c2ecf20Sopenharmony_ci	 * can be supported) and the list of features we want to expose -
64318c2ecf20Sopenharmony_ci	 * because they are known to be properly supported in our code.
64328c2ecf20Sopenharmony_ci	 * Also, usually, the low half of the MSRs (bits which must be 1) can
64338c2ecf20Sopenharmony_ci	 * be set to 0, meaning that L1 may turn off any of these bits. The
64348c2ecf20Sopenharmony_ci	 * reason is that if one of these bits is necessary, it will appear
64358c2ecf20Sopenharmony_ci	 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
64368c2ecf20Sopenharmony_ci	 * fields of vmcs01 and vmcs02, will turn these bits off - and
64378c2ecf20Sopenharmony_ci	 * nested_vmx_l1_wants_exit() will not pass related exits to L1.
64388c2ecf20Sopenharmony_ci	 * These rules have exceptions below.
64398c2ecf20Sopenharmony_ci	 */
64408c2ecf20Sopenharmony_ci
64418c2ecf20Sopenharmony_ci	/* pin-based controls */
64428c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
64438c2ecf20Sopenharmony_ci		msrs->pinbased_ctls_low,
64448c2ecf20Sopenharmony_ci		msrs->pinbased_ctls_high);
64458c2ecf20Sopenharmony_ci	msrs->pinbased_ctls_low |=
64468c2ecf20Sopenharmony_ci		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
64478c2ecf20Sopenharmony_ci	msrs->pinbased_ctls_high &=
64488c2ecf20Sopenharmony_ci		PIN_BASED_EXT_INTR_MASK |
64498c2ecf20Sopenharmony_ci		PIN_BASED_NMI_EXITING |
64508c2ecf20Sopenharmony_ci		PIN_BASED_VIRTUAL_NMIS |
64518c2ecf20Sopenharmony_ci		(enable_apicv ? PIN_BASED_POSTED_INTR : 0);
64528c2ecf20Sopenharmony_ci	msrs->pinbased_ctls_high |=
64538c2ecf20Sopenharmony_ci		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
64548c2ecf20Sopenharmony_ci		PIN_BASED_VMX_PREEMPTION_TIMER;
64558c2ecf20Sopenharmony_ci
64568c2ecf20Sopenharmony_ci	/* exit controls */
64578c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_VMX_EXIT_CTLS,
64588c2ecf20Sopenharmony_ci		msrs->exit_ctls_low,
64598c2ecf20Sopenharmony_ci		msrs->exit_ctls_high);
64608c2ecf20Sopenharmony_ci	msrs->exit_ctls_low =
64618c2ecf20Sopenharmony_ci		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
64628c2ecf20Sopenharmony_ci
64638c2ecf20Sopenharmony_ci	msrs->exit_ctls_high &=
64648c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
64658c2ecf20Sopenharmony_ci		VM_EXIT_HOST_ADDR_SPACE_SIZE |
64668c2ecf20Sopenharmony_ci#endif
64678c2ecf20Sopenharmony_ci		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
64688c2ecf20Sopenharmony_ci		VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
64698c2ecf20Sopenharmony_ci	msrs->exit_ctls_high |=
64708c2ecf20Sopenharmony_ci		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
64718c2ecf20Sopenharmony_ci		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
64728c2ecf20Sopenharmony_ci		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
64738c2ecf20Sopenharmony_ci
64748c2ecf20Sopenharmony_ci	/* We support free control of debug control saving. */
64758c2ecf20Sopenharmony_ci	msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
64768c2ecf20Sopenharmony_ci
64778c2ecf20Sopenharmony_ci	/* entry controls */
64788c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
64798c2ecf20Sopenharmony_ci		msrs->entry_ctls_low,
64808c2ecf20Sopenharmony_ci		msrs->entry_ctls_high);
64818c2ecf20Sopenharmony_ci	msrs->entry_ctls_low =
64828c2ecf20Sopenharmony_ci		VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
64838c2ecf20Sopenharmony_ci	msrs->entry_ctls_high &=
64848c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
64858c2ecf20Sopenharmony_ci		VM_ENTRY_IA32E_MODE |
64868c2ecf20Sopenharmony_ci#endif
64878c2ecf20Sopenharmony_ci		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
64888c2ecf20Sopenharmony_ci		VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
64898c2ecf20Sopenharmony_ci	msrs->entry_ctls_high |=
64908c2ecf20Sopenharmony_ci		(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
64918c2ecf20Sopenharmony_ci
64928c2ecf20Sopenharmony_ci	/* We support free control of debug control loading. */
64938c2ecf20Sopenharmony_ci	msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
64948c2ecf20Sopenharmony_ci
64958c2ecf20Sopenharmony_ci	/* cpu-based controls */
64968c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
64978c2ecf20Sopenharmony_ci		msrs->procbased_ctls_low,
64988c2ecf20Sopenharmony_ci		msrs->procbased_ctls_high);
64998c2ecf20Sopenharmony_ci	msrs->procbased_ctls_low =
65008c2ecf20Sopenharmony_ci		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
65018c2ecf20Sopenharmony_ci	msrs->procbased_ctls_high &=
65028c2ecf20Sopenharmony_ci		CPU_BASED_INTR_WINDOW_EXITING |
65038c2ecf20Sopenharmony_ci		CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
65048c2ecf20Sopenharmony_ci		CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
65058c2ecf20Sopenharmony_ci		CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
65068c2ecf20Sopenharmony_ci		CPU_BASED_CR3_STORE_EXITING |
65078c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
65088c2ecf20Sopenharmony_ci		CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
65098c2ecf20Sopenharmony_ci#endif
65108c2ecf20Sopenharmony_ci		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
65118c2ecf20Sopenharmony_ci		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
65128c2ecf20Sopenharmony_ci		CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
65138c2ecf20Sopenharmony_ci		CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
65148c2ecf20Sopenharmony_ci		CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
65158c2ecf20Sopenharmony_ci	/*
65168c2ecf20Sopenharmony_ci	 * We can allow some features even when not supported by the
65178c2ecf20Sopenharmony_ci	 * hardware. For example, L1 can specify an MSR bitmap - and we
65188c2ecf20Sopenharmony_ci	 * can use it to avoid exits to L1 - even when L0 runs L2
65198c2ecf20Sopenharmony_ci	 * without MSR bitmaps.
65208c2ecf20Sopenharmony_ci	 */
65218c2ecf20Sopenharmony_ci	msrs->procbased_ctls_high |=
65228c2ecf20Sopenharmony_ci		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
65238c2ecf20Sopenharmony_ci		CPU_BASED_USE_MSR_BITMAPS;
65248c2ecf20Sopenharmony_ci
65258c2ecf20Sopenharmony_ci	/* We support free control of CR3 access interception. */
65268c2ecf20Sopenharmony_ci	msrs->procbased_ctls_low &=
65278c2ecf20Sopenharmony_ci		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
65288c2ecf20Sopenharmony_ci
65298c2ecf20Sopenharmony_ci	/*
65308c2ecf20Sopenharmony_ci	 * secondary cpu-based controls.  Do not include those that
65318c2ecf20Sopenharmony_ci	 * depend on CPUID bits, they are added later by
65328c2ecf20Sopenharmony_ci	 * vmx_vcpu_after_set_cpuid.
65338c2ecf20Sopenharmony_ci	 */
65348c2ecf20Sopenharmony_ci	if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
65358c2ecf20Sopenharmony_ci		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
65368c2ecf20Sopenharmony_ci		      msrs->secondary_ctls_low,
65378c2ecf20Sopenharmony_ci		      msrs->secondary_ctls_high);
65388c2ecf20Sopenharmony_ci
65398c2ecf20Sopenharmony_ci	msrs->secondary_ctls_low = 0;
65408c2ecf20Sopenharmony_ci	msrs->secondary_ctls_high &=
65418c2ecf20Sopenharmony_ci		SECONDARY_EXEC_DESC |
65428c2ecf20Sopenharmony_ci		SECONDARY_EXEC_ENABLE_RDTSCP |
65438c2ecf20Sopenharmony_ci		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
65448c2ecf20Sopenharmony_ci		SECONDARY_EXEC_WBINVD_EXITING |
65458c2ecf20Sopenharmony_ci		SECONDARY_EXEC_APIC_REGISTER_VIRT |
65468c2ecf20Sopenharmony_ci		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
65478c2ecf20Sopenharmony_ci		SECONDARY_EXEC_RDRAND_EXITING |
65488c2ecf20Sopenharmony_ci		SECONDARY_EXEC_ENABLE_INVPCID |
65498c2ecf20Sopenharmony_ci		SECONDARY_EXEC_RDSEED_EXITING |
65508c2ecf20Sopenharmony_ci		SECONDARY_EXEC_XSAVES;
65518c2ecf20Sopenharmony_ci
65528c2ecf20Sopenharmony_ci	/*
65538c2ecf20Sopenharmony_ci	 * We can emulate "VMCS shadowing," even if the hardware
65548c2ecf20Sopenharmony_ci	 * doesn't support it.
65558c2ecf20Sopenharmony_ci	 */
65568c2ecf20Sopenharmony_ci	msrs->secondary_ctls_high |=
65578c2ecf20Sopenharmony_ci		SECONDARY_EXEC_SHADOW_VMCS;
65588c2ecf20Sopenharmony_ci
65598c2ecf20Sopenharmony_ci	if (enable_ept) {
65608c2ecf20Sopenharmony_ci		/* nested EPT: emulate EPT also to L1 */
65618c2ecf20Sopenharmony_ci		msrs->secondary_ctls_high |=
65628c2ecf20Sopenharmony_ci			SECONDARY_EXEC_ENABLE_EPT;
65638c2ecf20Sopenharmony_ci		msrs->ept_caps =
65648c2ecf20Sopenharmony_ci			VMX_EPT_PAGE_WALK_4_BIT |
65658c2ecf20Sopenharmony_ci			VMX_EPT_PAGE_WALK_5_BIT |
65668c2ecf20Sopenharmony_ci			VMX_EPTP_WB_BIT |
65678c2ecf20Sopenharmony_ci			VMX_EPT_INVEPT_BIT |
65688c2ecf20Sopenharmony_ci			VMX_EPT_EXECUTE_ONLY_BIT;
65698c2ecf20Sopenharmony_ci
65708c2ecf20Sopenharmony_ci		msrs->ept_caps &= ept_caps;
65718c2ecf20Sopenharmony_ci		msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
65728c2ecf20Sopenharmony_ci			VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
65738c2ecf20Sopenharmony_ci			VMX_EPT_1GB_PAGE_BIT;
65748c2ecf20Sopenharmony_ci		if (enable_ept_ad_bits) {
65758c2ecf20Sopenharmony_ci			msrs->secondary_ctls_high |=
65768c2ecf20Sopenharmony_ci				SECONDARY_EXEC_ENABLE_PML;
65778c2ecf20Sopenharmony_ci			msrs->ept_caps |= VMX_EPT_AD_BIT;
65788c2ecf20Sopenharmony_ci		}
65798c2ecf20Sopenharmony_ci	}
65808c2ecf20Sopenharmony_ci
65818c2ecf20Sopenharmony_ci	if (cpu_has_vmx_vmfunc()) {
65828c2ecf20Sopenharmony_ci		msrs->secondary_ctls_high |=
65838c2ecf20Sopenharmony_ci			SECONDARY_EXEC_ENABLE_VMFUNC;
65848c2ecf20Sopenharmony_ci		/*
65858c2ecf20Sopenharmony_ci		 * Advertise EPTP switching unconditionally
65868c2ecf20Sopenharmony_ci		 * since we emulate it
65878c2ecf20Sopenharmony_ci		 */
65888c2ecf20Sopenharmony_ci		if (enable_ept)
65898c2ecf20Sopenharmony_ci			msrs->vmfunc_controls =
65908c2ecf20Sopenharmony_ci				VMX_VMFUNC_EPTP_SWITCHING;
65918c2ecf20Sopenharmony_ci	}
65928c2ecf20Sopenharmony_ci
65938c2ecf20Sopenharmony_ci	/*
65948c2ecf20Sopenharmony_ci	 * Old versions of KVM use the single-context version without
65958c2ecf20Sopenharmony_ci	 * checking for support, so declare that it is supported even
65968c2ecf20Sopenharmony_ci	 * though it is treated as global context.  The alternative is
65978c2ecf20Sopenharmony_ci	 * not failing the single-context invvpid, and it is worse.
65988c2ecf20Sopenharmony_ci	 */
65998c2ecf20Sopenharmony_ci	if (enable_vpid) {
66008c2ecf20Sopenharmony_ci		msrs->secondary_ctls_high |=
66018c2ecf20Sopenharmony_ci			SECONDARY_EXEC_ENABLE_VPID;
66028c2ecf20Sopenharmony_ci		msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
66038c2ecf20Sopenharmony_ci			VMX_VPID_EXTENT_SUPPORTED_MASK;
66048c2ecf20Sopenharmony_ci	}
66058c2ecf20Sopenharmony_ci
66068c2ecf20Sopenharmony_ci	if (enable_unrestricted_guest)
66078c2ecf20Sopenharmony_ci		msrs->secondary_ctls_high |=
66088c2ecf20Sopenharmony_ci			SECONDARY_EXEC_UNRESTRICTED_GUEST;
66098c2ecf20Sopenharmony_ci
66108c2ecf20Sopenharmony_ci	if (flexpriority_enabled)
66118c2ecf20Sopenharmony_ci		msrs->secondary_ctls_high |=
66128c2ecf20Sopenharmony_ci			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
66138c2ecf20Sopenharmony_ci
66148c2ecf20Sopenharmony_ci	/* miscellaneous data */
66158c2ecf20Sopenharmony_ci	rdmsr(MSR_IA32_VMX_MISC,
66168c2ecf20Sopenharmony_ci		msrs->misc_low,
66178c2ecf20Sopenharmony_ci		msrs->misc_high);
66188c2ecf20Sopenharmony_ci	msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
66198c2ecf20Sopenharmony_ci	msrs->misc_low |=
66208c2ecf20Sopenharmony_ci		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
66218c2ecf20Sopenharmony_ci		VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
66228c2ecf20Sopenharmony_ci		VMX_MISC_ACTIVITY_HLT;
66238c2ecf20Sopenharmony_ci	msrs->misc_high = 0;
66248c2ecf20Sopenharmony_ci
66258c2ecf20Sopenharmony_ci	/*
66268c2ecf20Sopenharmony_ci	 * This MSR reports some information about VMX support. We
66278c2ecf20Sopenharmony_ci	 * should return information about the VMX we emulate for the
66288c2ecf20Sopenharmony_ci	 * guest, and the VMCS structure we give it - not about the
66298c2ecf20Sopenharmony_ci	 * VMX support of the underlying hardware.
66308c2ecf20Sopenharmony_ci	 */
66318c2ecf20Sopenharmony_ci	msrs->basic =
66328c2ecf20Sopenharmony_ci		VMCS12_REVISION |
66338c2ecf20Sopenharmony_ci		VMX_BASIC_TRUE_CTLS |
66348c2ecf20Sopenharmony_ci		((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
66358c2ecf20Sopenharmony_ci		(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
66368c2ecf20Sopenharmony_ci
66378c2ecf20Sopenharmony_ci	if (cpu_has_vmx_basic_inout())
66388c2ecf20Sopenharmony_ci		msrs->basic |= VMX_BASIC_INOUT;
66398c2ecf20Sopenharmony_ci
66408c2ecf20Sopenharmony_ci	/*
66418c2ecf20Sopenharmony_ci	 * These MSRs specify bits which the guest must keep fixed on
66428c2ecf20Sopenharmony_ci	 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
66438c2ecf20Sopenharmony_ci	 * We picked the standard core2 setting.
66448c2ecf20Sopenharmony_ci	 */
66458c2ecf20Sopenharmony_ci#define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
66468c2ecf20Sopenharmony_ci#define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
66478c2ecf20Sopenharmony_ci	msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
66488c2ecf20Sopenharmony_ci	msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
66498c2ecf20Sopenharmony_ci
66508c2ecf20Sopenharmony_ci	/* These MSRs specify bits which the guest must keep fixed off. */
66518c2ecf20Sopenharmony_ci	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
66528c2ecf20Sopenharmony_ci	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
66538c2ecf20Sopenharmony_ci
66548c2ecf20Sopenharmony_ci	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
66558c2ecf20Sopenharmony_ci	msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
66568c2ecf20Sopenharmony_ci}
66578c2ecf20Sopenharmony_ci
66588c2ecf20Sopenharmony_civoid nested_vmx_hardware_unsetup(void)
66598c2ecf20Sopenharmony_ci{
66608c2ecf20Sopenharmony_ci	int i;
66618c2ecf20Sopenharmony_ci
66628c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs) {
66638c2ecf20Sopenharmony_ci		for (i = 0; i < VMX_BITMAP_NR; i++)
66648c2ecf20Sopenharmony_ci			free_page((unsigned long)vmx_bitmap[i]);
66658c2ecf20Sopenharmony_ci	}
66668c2ecf20Sopenharmony_ci}
66678c2ecf20Sopenharmony_ci
66688c2ecf20Sopenharmony_ci__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
66698c2ecf20Sopenharmony_ci{
66708c2ecf20Sopenharmony_ci	int i;
66718c2ecf20Sopenharmony_ci
66728c2ecf20Sopenharmony_ci	if (!cpu_has_vmx_shadow_vmcs())
66738c2ecf20Sopenharmony_ci		enable_shadow_vmcs = 0;
66748c2ecf20Sopenharmony_ci	if (enable_shadow_vmcs) {
66758c2ecf20Sopenharmony_ci		for (i = 0; i < VMX_BITMAP_NR; i++) {
66768c2ecf20Sopenharmony_ci			/*
66778c2ecf20Sopenharmony_ci			 * The vmx_bitmap is not tied to a VM and so should
66788c2ecf20Sopenharmony_ci			 * not be charged to a memcg.
66798c2ecf20Sopenharmony_ci			 */
66808c2ecf20Sopenharmony_ci			vmx_bitmap[i] = (unsigned long *)
66818c2ecf20Sopenharmony_ci				__get_free_page(GFP_KERNEL);
66828c2ecf20Sopenharmony_ci			if (!vmx_bitmap[i]) {
66838c2ecf20Sopenharmony_ci				nested_vmx_hardware_unsetup();
66848c2ecf20Sopenharmony_ci				return -ENOMEM;
66858c2ecf20Sopenharmony_ci			}
66868c2ecf20Sopenharmony_ci		}
66878c2ecf20Sopenharmony_ci
66888c2ecf20Sopenharmony_ci		init_vmcs_shadow_fields();
66898c2ecf20Sopenharmony_ci	}
66908c2ecf20Sopenharmony_ci
66918c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear;
66928c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch;
66938c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld;
66948c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst;
66958c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread;
66968c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume;
66978c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite;
66988c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff;
66998c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMON]		= handle_vmon;
67008c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept;
67018c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid;
67028c2ecf20Sopenharmony_ci	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc;
67038c2ecf20Sopenharmony_ci
67048c2ecf20Sopenharmony_ci	return 0;
67058c2ecf20Sopenharmony_ci}
67068c2ecf20Sopenharmony_ci
67078c2ecf20Sopenharmony_cistruct kvm_x86_nested_ops vmx_nested_ops = {
67088c2ecf20Sopenharmony_ci	.leave_nested = vmx_leave_nested,
67098c2ecf20Sopenharmony_ci	.check_events = vmx_check_nested_events,
67108c2ecf20Sopenharmony_ci	.hv_timer_pending = nested_vmx_preemption_timer_pending,
67118c2ecf20Sopenharmony_ci	.get_state = vmx_get_nested_state,
67128c2ecf20Sopenharmony_ci	.set_state = vmx_set_nested_state,
67138c2ecf20Sopenharmony_ci	.get_nested_state_pages = vmx_get_nested_state_pages,
67148c2ecf20Sopenharmony_ci	.write_log_dirty = nested_vmx_write_pml_buffer,
67158c2ecf20Sopenharmony_ci	.enable_evmcs = nested_enable_evmcs,
67168c2ecf20Sopenharmony_ci	.get_evmcs_version = nested_get_evmcs_version,
67178c2ecf20Sopenharmony_ci};
6718