162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Kernel-based Virtual Machine driver for Linux
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This module enables machines with Intel VT-x extensions to run virtual
662306a36Sopenharmony_ci * machines without emulation or binary translation.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Copyright (C) 2006 Qumranet, Inc.
962306a36Sopenharmony_ci * Copyright 2010 Red Hat, Inc. and/or its affiliates.
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Authors:
1262306a36Sopenharmony_ci *   Avi Kivity   <avi@qumranet.com>
1362306a36Sopenharmony_ci *   Yaniv Kamay  <yaniv@qumranet.com>
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#include <linux/highmem.h>
1862306a36Sopenharmony_ci#include <linux/hrtimer.h>
1962306a36Sopenharmony_ci#include <linux/kernel.h>
2062306a36Sopenharmony_ci#include <linux/kvm_host.h>
2162306a36Sopenharmony_ci#include <linux/module.h>
2262306a36Sopenharmony_ci#include <linux/moduleparam.h>
2362306a36Sopenharmony_ci#include <linux/mod_devicetable.h>
2462306a36Sopenharmony_ci#include <linux/mm.h>
2562306a36Sopenharmony_ci#include <linux/objtool.h>
2662306a36Sopenharmony_ci#include <linux/sched.h>
2762306a36Sopenharmony_ci#include <linux/sched/smt.h>
2862306a36Sopenharmony_ci#include <linux/slab.h>
2962306a36Sopenharmony_ci#include <linux/tboot.h>
3062306a36Sopenharmony_ci#include <linux/trace_events.h>
3162306a36Sopenharmony_ci#include <linux/entry-kvm.h>
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#include <asm/apic.h>
3462306a36Sopenharmony_ci#include <asm/asm.h>
3562306a36Sopenharmony_ci#include <asm/cpu.h>
3662306a36Sopenharmony_ci#include <asm/cpu_device_id.h>
3762306a36Sopenharmony_ci#include <asm/debugreg.h>
3862306a36Sopenharmony_ci#include <asm/desc.h>
3962306a36Sopenharmony_ci#include <asm/fpu/api.h>
4062306a36Sopenharmony_ci#include <asm/fpu/xstate.h>
4162306a36Sopenharmony_ci#include <asm/idtentry.h>
4262306a36Sopenharmony_ci#include <asm/io.h>
4362306a36Sopenharmony_ci#include <asm/irq_remapping.h>
4462306a36Sopenharmony_ci#include <asm/reboot.h>
4562306a36Sopenharmony_ci#include <asm/perf_event.h>
4662306a36Sopenharmony_ci#include <asm/mmu_context.h>
4762306a36Sopenharmony_ci#include <asm/mshyperv.h>
4862306a36Sopenharmony_ci#include <asm/mwait.h>
4962306a36Sopenharmony_ci#include <asm/spec-ctrl.h>
5062306a36Sopenharmony_ci#include <asm/vmx.h>
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci#include "capabilities.h"
5362306a36Sopenharmony_ci#include "cpuid.h"
5462306a36Sopenharmony_ci#include "hyperv.h"
5562306a36Sopenharmony_ci#include "kvm_onhyperv.h"
5662306a36Sopenharmony_ci#include "irq.h"
5762306a36Sopenharmony_ci#include "kvm_cache_regs.h"
5862306a36Sopenharmony_ci#include "lapic.h"
5962306a36Sopenharmony_ci#include "mmu.h"
6062306a36Sopenharmony_ci#include "nested.h"
6162306a36Sopenharmony_ci#include "pmu.h"
6262306a36Sopenharmony_ci#include "sgx.h"
6362306a36Sopenharmony_ci#include "trace.h"
6462306a36Sopenharmony_ci#include "vmcs.h"
6562306a36Sopenharmony_ci#include "vmcs12.h"
6662306a36Sopenharmony_ci#include "vmx.h"
6762306a36Sopenharmony_ci#include "x86.h"
6862306a36Sopenharmony_ci#include "smm.h"
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ciMODULE_AUTHOR("Qumranet");
7162306a36Sopenharmony_ciMODULE_LICENSE("GPL");
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci#ifdef MODULE
7462306a36Sopenharmony_cistatic const struct x86_cpu_id vmx_cpu_id[] = {
7562306a36Sopenharmony_ci	X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL),
7662306a36Sopenharmony_ci	{}
7762306a36Sopenharmony_ci};
7862306a36Sopenharmony_ciMODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
7962306a36Sopenharmony_ci#endif
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cibool __read_mostly enable_vpid = 1;
8262306a36Sopenharmony_cimodule_param_named(vpid, enable_vpid, bool, 0444);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic bool __read_mostly enable_vnmi = 1;
8562306a36Sopenharmony_cimodule_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cibool __read_mostly flexpriority_enabled = 1;
8862306a36Sopenharmony_cimodule_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_cibool __read_mostly enable_ept = 1;
9162306a36Sopenharmony_cimodule_param_named(ept, enable_ept, bool, S_IRUGO);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cibool __read_mostly enable_unrestricted_guest = 1;
9462306a36Sopenharmony_cimodule_param_named(unrestricted_guest,
9562306a36Sopenharmony_ci			enable_unrestricted_guest, bool, S_IRUGO);
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cibool __read_mostly enable_ept_ad_bits = 1;
9862306a36Sopenharmony_cimodule_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_cistatic bool __read_mostly emulate_invalid_guest_state = true;
10162306a36Sopenharmony_cimodule_param(emulate_invalid_guest_state, bool, S_IRUGO);
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_cistatic bool __read_mostly fasteoi = 1;
10462306a36Sopenharmony_cimodule_param(fasteoi, bool, S_IRUGO);
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_cimodule_param(enable_apicv, bool, S_IRUGO);
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_cibool __read_mostly enable_ipiv = true;
10962306a36Sopenharmony_cimodule_param(enable_ipiv, bool, 0444);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci/*
11262306a36Sopenharmony_ci * If nested=1, nested virtualization is supported, i.e., guests may use
11362306a36Sopenharmony_ci * VMX and be a hypervisor for its own guests. If nested=0, guests may not
11462306a36Sopenharmony_ci * use VMX instructions.
11562306a36Sopenharmony_ci */
11662306a36Sopenharmony_cistatic bool __read_mostly nested = 1;
11762306a36Sopenharmony_cimodule_param(nested, bool, S_IRUGO);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_cibool __read_mostly enable_pml = 1;
12062306a36Sopenharmony_cimodule_param_named(pml, enable_pml, bool, S_IRUGO);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic bool __read_mostly error_on_inconsistent_vmcs_config = true;
12362306a36Sopenharmony_cimodule_param(error_on_inconsistent_vmcs_config, bool, 0444);
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_cistatic bool __read_mostly dump_invalid_vmcs = 0;
12662306a36Sopenharmony_cimodule_param(dump_invalid_vmcs, bool, 0644);
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci#define MSR_BITMAP_MODE_X2APIC		1
12962306a36Sopenharmony_ci#define MSR_BITMAP_MODE_X2APIC_APICV	2
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci#define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci/* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
13462306a36Sopenharmony_cistatic int __read_mostly cpu_preemption_timer_multi;
13562306a36Sopenharmony_cistatic bool __read_mostly enable_preemption_timer = 1;
13662306a36Sopenharmony_ci#ifdef CONFIG_X86_64
13762306a36Sopenharmony_cimodule_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
13862306a36Sopenharmony_ci#endif
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ciextern bool __read_mostly allow_smaller_maxphyaddr;
14162306a36Sopenharmony_cimodule_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
14462306a36Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
14562306a36Sopenharmony_ci#define KVM_VM_CR0_ALWAYS_ON				\
14662306a36Sopenharmony_ci	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
14962306a36Sopenharmony_ci#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
15062306a36Sopenharmony_ci#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
15562306a36Sopenharmony_ci	RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
15662306a36Sopenharmony_ci	RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
15762306a36Sopenharmony_ci	RTIT_STATUS_BYTECNT))
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci/*
16062306a36Sopenharmony_ci * List of MSRs that can be directly passed to the guest.
16162306a36Sopenharmony_ci * In addition to these x2apic and PT MSRs are handled specially.
16262306a36Sopenharmony_ci */
16362306a36Sopenharmony_cistatic u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
16462306a36Sopenharmony_ci	MSR_IA32_SPEC_CTRL,
16562306a36Sopenharmony_ci	MSR_IA32_PRED_CMD,
16662306a36Sopenharmony_ci	MSR_IA32_FLUSH_CMD,
16762306a36Sopenharmony_ci	MSR_IA32_TSC,
16862306a36Sopenharmony_ci#ifdef CONFIG_X86_64
16962306a36Sopenharmony_ci	MSR_FS_BASE,
17062306a36Sopenharmony_ci	MSR_GS_BASE,
17162306a36Sopenharmony_ci	MSR_KERNEL_GS_BASE,
17262306a36Sopenharmony_ci	MSR_IA32_XFD,
17362306a36Sopenharmony_ci	MSR_IA32_XFD_ERR,
17462306a36Sopenharmony_ci#endif
17562306a36Sopenharmony_ci	MSR_IA32_SYSENTER_CS,
17662306a36Sopenharmony_ci	MSR_IA32_SYSENTER_ESP,
17762306a36Sopenharmony_ci	MSR_IA32_SYSENTER_EIP,
17862306a36Sopenharmony_ci	MSR_CORE_C1_RES,
17962306a36Sopenharmony_ci	MSR_CORE_C3_RESIDENCY,
18062306a36Sopenharmony_ci	MSR_CORE_C6_RESIDENCY,
18162306a36Sopenharmony_ci	MSR_CORE_C7_RESIDENCY,
18262306a36Sopenharmony_ci};
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci/*
18562306a36Sopenharmony_ci * These 2 parameters are used to config the controls for Pause-Loop Exiting:
18662306a36Sopenharmony_ci * ple_gap:    upper bound on the amount of time between two successive
18762306a36Sopenharmony_ci *             executions of PAUSE in a loop. Also indicate if ple enabled.
18862306a36Sopenharmony_ci *             According to test, this time is usually smaller than 128 cycles.
18962306a36Sopenharmony_ci * ple_window: upper bound on the amount of time a guest is allowed to execute
19062306a36Sopenharmony_ci *             in a PAUSE loop. Tests indicate that most spinlocks are held for
19162306a36Sopenharmony_ci *             less than 2^12 cycles
19262306a36Sopenharmony_ci * Time is measured based on a counter that runs at the same rate as the TSC,
19362306a36Sopenharmony_ci * refer SDM volume 3b section 21.6.13 & 22.1.3.
19462306a36Sopenharmony_ci */
19562306a36Sopenharmony_cistatic unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
19662306a36Sopenharmony_cimodule_param(ple_gap, uint, 0444);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_cistatic unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
19962306a36Sopenharmony_cimodule_param(ple_window, uint, 0444);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci/* Default doubles per-vcpu window every exit. */
20262306a36Sopenharmony_cistatic unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
20362306a36Sopenharmony_cimodule_param(ple_window_grow, uint, 0444);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci/* Default resets per-vcpu window every exit to ple_window. */
20662306a36Sopenharmony_cistatic unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
20762306a36Sopenharmony_cimodule_param(ple_window_shrink, uint, 0444);
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci/* Default is to compute the maximum so we can never overflow. */
21062306a36Sopenharmony_cistatic unsigned int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
21162306a36Sopenharmony_cimodule_param(ple_window_max, uint, 0444);
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci/* Default is SYSTEM mode, 1 for host-guest mode */
21462306a36Sopenharmony_ciint __read_mostly pt_mode = PT_MODE_SYSTEM;
21562306a36Sopenharmony_cimodule_param(pt_mode, int, S_IRUGO);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
21862306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
21962306a36Sopenharmony_cistatic DEFINE_MUTEX(vmx_l1d_flush_mutex);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci/* Storage for pre module init parameter parsing */
22262306a36Sopenharmony_cistatic enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_cistatic const struct {
22562306a36Sopenharmony_ci	const char *option;
22662306a36Sopenharmony_ci	bool for_parse;
22762306a36Sopenharmony_ci} vmentry_l1d_param[] = {
22862306a36Sopenharmony_ci	[VMENTER_L1D_FLUSH_AUTO]	 = {"auto", true},
22962306a36Sopenharmony_ci	[VMENTER_L1D_FLUSH_NEVER]	 = {"never", true},
23062306a36Sopenharmony_ci	[VMENTER_L1D_FLUSH_COND]	 = {"cond", true},
23162306a36Sopenharmony_ci	[VMENTER_L1D_FLUSH_ALWAYS]	 = {"always", true},
23262306a36Sopenharmony_ci	[VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
23362306a36Sopenharmony_ci	[VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
23462306a36Sopenharmony_ci};
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci#define L1D_CACHE_ORDER 4
23762306a36Sopenharmony_cistatic void *vmx_l1d_flush_pages;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_cistatic int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	struct page *page;
24262306a36Sopenharmony_ci	unsigned int i;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
24562306a36Sopenharmony_ci		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
24662306a36Sopenharmony_ci		return 0;
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	if (!enable_ept) {
25062306a36Sopenharmony_ci		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
25162306a36Sopenharmony_ci		return 0;
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
25562306a36Sopenharmony_ci		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
25662306a36Sopenharmony_ci		return 0;
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	/* If set to auto use the default l1tf mitigation method */
26062306a36Sopenharmony_ci	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
26162306a36Sopenharmony_ci		switch (l1tf_mitigation) {
26262306a36Sopenharmony_ci		case L1TF_MITIGATION_OFF:
26362306a36Sopenharmony_ci			l1tf = VMENTER_L1D_FLUSH_NEVER;
26462306a36Sopenharmony_ci			break;
26562306a36Sopenharmony_ci		case L1TF_MITIGATION_FLUSH_NOWARN:
26662306a36Sopenharmony_ci		case L1TF_MITIGATION_FLUSH:
26762306a36Sopenharmony_ci		case L1TF_MITIGATION_FLUSH_NOSMT:
26862306a36Sopenharmony_ci			l1tf = VMENTER_L1D_FLUSH_COND;
26962306a36Sopenharmony_ci			break;
27062306a36Sopenharmony_ci		case L1TF_MITIGATION_FULL:
27162306a36Sopenharmony_ci		case L1TF_MITIGATION_FULL_FORCE:
27262306a36Sopenharmony_ci			l1tf = VMENTER_L1D_FLUSH_ALWAYS;
27362306a36Sopenharmony_ci			break;
27462306a36Sopenharmony_ci		}
27562306a36Sopenharmony_ci	} else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
27662306a36Sopenharmony_ci		l1tf = VMENTER_L1D_FLUSH_ALWAYS;
27762306a36Sopenharmony_ci	}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
28062306a36Sopenharmony_ci	    !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
28162306a36Sopenharmony_ci		/*
28262306a36Sopenharmony_ci		 * This allocation for vmx_l1d_flush_pages is not tied to a VM
28362306a36Sopenharmony_ci		 * lifetime and so should not be charged to a memcg.
28462306a36Sopenharmony_ci		 */
28562306a36Sopenharmony_ci		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
28662306a36Sopenharmony_ci		if (!page)
28762306a36Sopenharmony_ci			return -ENOMEM;
28862306a36Sopenharmony_ci		vmx_l1d_flush_pages = page_address(page);
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci		/*
29162306a36Sopenharmony_ci		 * Initialize each page with a different pattern in
29262306a36Sopenharmony_ci		 * order to protect against KSM in the nested
29362306a36Sopenharmony_ci		 * virtualization case.
29462306a36Sopenharmony_ci		 */
29562306a36Sopenharmony_ci		for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
29662306a36Sopenharmony_ci			memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
29762306a36Sopenharmony_ci			       PAGE_SIZE);
29862306a36Sopenharmony_ci		}
29962306a36Sopenharmony_ci	}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	l1tf_vmx_mitigation = l1tf;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	if (l1tf != VMENTER_L1D_FLUSH_NEVER)
30462306a36Sopenharmony_ci		static_branch_enable(&vmx_l1d_should_flush);
30562306a36Sopenharmony_ci	else
30662306a36Sopenharmony_ci		static_branch_disable(&vmx_l1d_should_flush);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	if (l1tf == VMENTER_L1D_FLUSH_COND)
30962306a36Sopenharmony_ci		static_branch_enable(&vmx_l1d_flush_cond);
31062306a36Sopenharmony_ci	else
31162306a36Sopenharmony_ci		static_branch_disable(&vmx_l1d_flush_cond);
31262306a36Sopenharmony_ci	return 0;
31362306a36Sopenharmony_ci}
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_cistatic int vmentry_l1d_flush_parse(const char *s)
31662306a36Sopenharmony_ci{
31762306a36Sopenharmony_ci	unsigned int i;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	if (s) {
32062306a36Sopenharmony_ci		for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
32162306a36Sopenharmony_ci			if (vmentry_l1d_param[i].for_parse &&
32262306a36Sopenharmony_ci			    sysfs_streq(s, vmentry_l1d_param[i].option))
32362306a36Sopenharmony_ci				return i;
32462306a36Sopenharmony_ci		}
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci	return -EINVAL;
32762306a36Sopenharmony_ci}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_cistatic int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	int l1tf, ret;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	l1tf = vmentry_l1d_flush_parse(s);
33462306a36Sopenharmony_ci	if (l1tf < 0)
33562306a36Sopenharmony_ci		return l1tf;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	if (!boot_cpu_has(X86_BUG_L1TF))
33862306a36Sopenharmony_ci		return 0;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	/*
34162306a36Sopenharmony_ci	 * Has vmx_init() run already? If not then this is the pre init
34262306a36Sopenharmony_ci	 * parameter parsing. In that case just store the value and let
34362306a36Sopenharmony_ci	 * vmx_init() do the proper setup after enable_ept has been
34462306a36Sopenharmony_ci	 * established.
34562306a36Sopenharmony_ci	 */
34662306a36Sopenharmony_ci	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
34762306a36Sopenharmony_ci		vmentry_l1d_flush_param = l1tf;
34862306a36Sopenharmony_ci		return 0;
34962306a36Sopenharmony_ci	}
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	mutex_lock(&vmx_l1d_flush_mutex);
35262306a36Sopenharmony_ci	ret = vmx_setup_l1d_flush(l1tf);
35362306a36Sopenharmony_ci	mutex_unlock(&vmx_l1d_flush_mutex);
35462306a36Sopenharmony_ci	return ret;
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cistatic int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
35862306a36Sopenharmony_ci{
35962306a36Sopenharmony_ci	if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
36062306a36Sopenharmony_ci		return sysfs_emit(s, "???\n");
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
36362306a36Sopenharmony_ci}
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_cistatic __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
36662306a36Sopenharmony_ci{
36762306a36Sopenharmony_ci	u64 msr;
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	if (!vmx->disable_fb_clear)
37062306a36Sopenharmony_ci		return;
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
37362306a36Sopenharmony_ci	msr |= FB_CLEAR_DIS;
37462306a36Sopenharmony_ci	native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
37562306a36Sopenharmony_ci	/* Cache the MSR value to avoid reading it later */
37662306a36Sopenharmony_ci	vmx->msr_ia32_mcu_opt_ctrl = msr;
37762306a36Sopenharmony_ci}
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_cistatic __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
38062306a36Sopenharmony_ci{
38162306a36Sopenharmony_ci	if (!vmx->disable_fb_clear)
38262306a36Sopenharmony_ci		return;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
38562306a36Sopenharmony_ci	native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
38662306a36Sopenharmony_ci}
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_cistatic void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci	/*
39162306a36Sopenharmony_ci	 * Disable VERW's behavior of clearing CPU buffers for the guest if the
39262306a36Sopenharmony_ci	 * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
39362306a36Sopenharmony_ci	 * the mitigation. Disabling the clearing behavior provides a
39462306a36Sopenharmony_ci	 * performance boost for guests that aren't aware that manually clearing
39562306a36Sopenharmony_ci	 * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
39662306a36Sopenharmony_ci	 * and VM-Exit.
39762306a36Sopenharmony_ci	 */
39862306a36Sopenharmony_ci	vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
39962306a36Sopenharmony_ci				(host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
40062306a36Sopenharmony_ci				!boot_cpu_has_bug(X86_BUG_MDS) &&
40162306a36Sopenharmony_ci				!boot_cpu_has_bug(X86_BUG_TAA);
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	/*
40462306a36Sopenharmony_ci	 * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
40562306a36Sopenharmony_ci	 * at VMEntry. Skip the MSR read/write when a guest has no use case to
40662306a36Sopenharmony_ci	 * execute VERW.
40762306a36Sopenharmony_ci	 */
40862306a36Sopenharmony_ci	if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
40962306a36Sopenharmony_ci	   ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
41062306a36Sopenharmony_ci	    (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
41162306a36Sopenharmony_ci	    (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
41262306a36Sopenharmony_ci	    (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
41362306a36Sopenharmony_ci	    (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
41462306a36Sopenharmony_ci		vmx->disable_fb_clear = false;
41562306a36Sopenharmony_ci}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_cistatic const struct kernel_param_ops vmentry_l1d_flush_ops = {
41862306a36Sopenharmony_ci	.set = vmentry_l1d_flush_set,
41962306a36Sopenharmony_ci	.get = vmentry_l1d_flush_get,
42062306a36Sopenharmony_ci};
42162306a36Sopenharmony_cimodule_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_cistatic u32 vmx_segment_access_rights(struct kvm_segment *var);
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_civoid vmx_vmexit(void);
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci#define vmx_insn_failed(fmt...)		\
42862306a36Sopenharmony_cido {					\
42962306a36Sopenharmony_ci	WARN_ONCE(1, fmt);		\
43062306a36Sopenharmony_ci	pr_warn_ratelimited(fmt);	\
43162306a36Sopenharmony_ci} while (0)
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_cinoinline void vmread_error(unsigned long field)
43462306a36Sopenharmony_ci{
43562306a36Sopenharmony_ci	vmx_insn_failed("vmread failed: field=%lx\n", field);
43662306a36Sopenharmony_ci}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
43962306a36Sopenharmony_cinoinstr void vmread_error_trampoline2(unsigned long field, bool fault)
44062306a36Sopenharmony_ci{
44162306a36Sopenharmony_ci	if (fault) {
44262306a36Sopenharmony_ci		kvm_spurious_fault();
44362306a36Sopenharmony_ci	} else {
44462306a36Sopenharmony_ci		instrumentation_begin();
44562306a36Sopenharmony_ci		vmread_error(field);
44662306a36Sopenharmony_ci		instrumentation_end();
44762306a36Sopenharmony_ci	}
44862306a36Sopenharmony_ci}
44962306a36Sopenharmony_ci#endif
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_cinoinline void vmwrite_error(unsigned long field, unsigned long value)
45262306a36Sopenharmony_ci{
45362306a36Sopenharmony_ci	vmx_insn_failed("vmwrite failed: field=%lx val=%lx err=%u\n",
45462306a36Sopenharmony_ci			field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
45562306a36Sopenharmony_ci}
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_cinoinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
45862306a36Sopenharmony_ci{
45962306a36Sopenharmony_ci	vmx_insn_failed("vmclear failed: %p/%llx err=%u\n",
46062306a36Sopenharmony_ci			vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
46162306a36Sopenharmony_ci}
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_cinoinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
46462306a36Sopenharmony_ci{
46562306a36Sopenharmony_ci	vmx_insn_failed("vmptrld failed: %p/%llx err=%u\n",
46662306a36Sopenharmony_ci			vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
46762306a36Sopenharmony_ci}
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_cinoinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
47062306a36Sopenharmony_ci{
47162306a36Sopenharmony_ci	vmx_insn_failed("invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
47262306a36Sopenharmony_ci			ext, vpid, gva);
47362306a36Sopenharmony_ci}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_cinoinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
47662306a36Sopenharmony_ci{
47762306a36Sopenharmony_ci	vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
47862306a36Sopenharmony_ci			ext, eptp, gpa);
47962306a36Sopenharmony_ci}
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct vmcs *, vmxarea);
48262306a36Sopenharmony_ciDEFINE_PER_CPU(struct vmcs *, current_vmcs);
48362306a36Sopenharmony_ci/*
48462306a36Sopenharmony_ci * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
48562306a36Sopenharmony_ci * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
48662306a36Sopenharmony_ci */
48762306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_cistatic DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
49062306a36Sopenharmony_cistatic DEFINE_SPINLOCK(vmx_vpid_lock);
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_cistruct vmcs_config vmcs_config __ro_after_init;
49362306a36Sopenharmony_cistruct vmx_capability vmx_capability __ro_after_init;
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci#define VMX_SEGMENT_FIELD(seg)					\
49662306a36Sopenharmony_ci	[VCPU_SREG_##seg] = {                                   \
49762306a36Sopenharmony_ci		.selector = GUEST_##seg##_SELECTOR,		\
49862306a36Sopenharmony_ci		.base = GUEST_##seg##_BASE,		   	\
49962306a36Sopenharmony_ci		.limit = GUEST_##seg##_LIMIT,		   	\
50062306a36Sopenharmony_ci		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
50162306a36Sopenharmony_ci	}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_cistatic const struct kvm_vmx_segment_field {
50462306a36Sopenharmony_ci	unsigned selector;
50562306a36Sopenharmony_ci	unsigned base;
50662306a36Sopenharmony_ci	unsigned limit;
50762306a36Sopenharmony_ci	unsigned ar_bytes;
50862306a36Sopenharmony_ci} kvm_vmx_segment_fields[] = {
50962306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(CS),
51062306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(DS),
51162306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(ES),
51262306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(FS),
51362306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(GS),
51462306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(SS),
51562306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(TR),
51662306a36Sopenharmony_ci	VMX_SEGMENT_FIELD(LDTR),
51762306a36Sopenharmony_ci};
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_cistatic inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	vmx->segment_cache.bitmask = 0;
52262306a36Sopenharmony_ci}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_cistatic unsigned long host_idt_base;
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV)
52762306a36Sopenharmony_cistatic struct kvm_x86_ops vmx_x86_ops __initdata;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_cistatic bool __read_mostly enlightened_vmcs = true;
53062306a36Sopenharmony_cimodule_param(enlightened_vmcs, bool, 0444);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_cistatic int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
53362306a36Sopenharmony_ci{
53462306a36Sopenharmony_ci	struct hv_enlightened_vmcs *evmcs;
53562306a36Sopenharmony_ci	struct hv_partition_assist_pg **p_hv_pa_pg =
53662306a36Sopenharmony_ci			&to_kvm_hv(vcpu->kvm)->hv_pa_pg;
53762306a36Sopenharmony_ci	/*
53862306a36Sopenharmony_ci	 * Synthetic VM-Exit is not enabled in current code and so All
53962306a36Sopenharmony_ci	 * evmcs in singe VM shares same assist page.
54062306a36Sopenharmony_ci	 */
54162306a36Sopenharmony_ci	if (!*p_hv_pa_pg)
54262306a36Sopenharmony_ci		*p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	if (!*p_hv_pa_pg)
54562306a36Sopenharmony_ci		return -ENOMEM;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	evmcs->partition_assist_page =
55062306a36Sopenharmony_ci		__pa(*p_hv_pa_pg);
55162306a36Sopenharmony_ci	evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
55262306a36Sopenharmony_ci	evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	return 0;
55562306a36Sopenharmony_ci}
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_cistatic __init void hv_init_evmcs(void)
55862306a36Sopenharmony_ci{
55962306a36Sopenharmony_ci	int cpu;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	if (!enlightened_vmcs)
56262306a36Sopenharmony_ci		return;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	/*
56562306a36Sopenharmony_ci	 * Enlightened VMCS usage should be recommended and the host needs
56662306a36Sopenharmony_ci	 * to support eVMCS v1 or above.
56762306a36Sopenharmony_ci	 */
56862306a36Sopenharmony_ci	if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
56962306a36Sopenharmony_ci	    (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
57062306a36Sopenharmony_ci	     KVM_EVMCS_VERSION) {
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci		/* Check that we have assist pages on all online CPUs */
57362306a36Sopenharmony_ci		for_each_online_cpu(cpu) {
57462306a36Sopenharmony_ci			if (!hv_get_vp_assist_page(cpu)) {
57562306a36Sopenharmony_ci				enlightened_vmcs = false;
57662306a36Sopenharmony_ci				break;
57762306a36Sopenharmony_ci			}
57862306a36Sopenharmony_ci		}
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci		if (enlightened_vmcs) {
58162306a36Sopenharmony_ci			pr_info("Using Hyper-V Enlightened VMCS\n");
58262306a36Sopenharmony_ci			static_branch_enable(&__kvm_is_using_evmcs);
58362306a36Sopenharmony_ci		}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci		if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
58662306a36Sopenharmony_ci			vmx_x86_ops.enable_l2_tlb_flush
58762306a36Sopenharmony_ci				= hv_enable_l2_tlb_flush;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	} else {
59062306a36Sopenharmony_ci		enlightened_vmcs = false;
59162306a36Sopenharmony_ci	}
59262306a36Sopenharmony_ci}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_cistatic void hv_reset_evmcs(void)
59562306a36Sopenharmony_ci{
59662306a36Sopenharmony_ci	struct hv_vp_assist_page *vp_ap;
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	if (!kvm_is_using_evmcs())
59962306a36Sopenharmony_ci		return;
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	/*
60262306a36Sopenharmony_ci	 * KVM should enable eVMCS if and only if all CPUs have a VP assist
60362306a36Sopenharmony_ci	 * page, and should reject CPU onlining if eVMCS is enabled the CPU
60462306a36Sopenharmony_ci	 * doesn't have a VP assist page allocated.
60562306a36Sopenharmony_ci	 */
60662306a36Sopenharmony_ci	vp_ap = hv_get_vp_assist_page(smp_processor_id());
60762306a36Sopenharmony_ci	if (WARN_ON_ONCE(!vp_ap))
60862306a36Sopenharmony_ci		return;
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	/*
61162306a36Sopenharmony_ci	 * Reset everything to support using non-enlightened VMCS access later
61262306a36Sopenharmony_ci	 * (e.g. when we reload the module with enlightened_vmcs=0)
61362306a36Sopenharmony_ci	 */
61462306a36Sopenharmony_ci	vp_ap->nested_control.features.directhypercall = 0;
61562306a36Sopenharmony_ci	vp_ap->current_nested_vmcs = 0;
61662306a36Sopenharmony_ci	vp_ap->enlighten_vmentry = 0;
61762306a36Sopenharmony_ci}
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci#else /* IS_ENABLED(CONFIG_HYPERV) */
62062306a36Sopenharmony_cistatic void hv_init_evmcs(void) {}
62162306a36Sopenharmony_cistatic void hv_reset_evmcs(void) {}
62262306a36Sopenharmony_ci#endif /* IS_ENABLED(CONFIG_HYPERV) */
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci/*
62562306a36Sopenharmony_ci * Comment's format: document - errata name - stepping - processor name.
62662306a36Sopenharmony_ci * Refer from
62762306a36Sopenharmony_ci * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
62862306a36Sopenharmony_ci */
62962306a36Sopenharmony_cistatic u32 vmx_preemption_cpu_tfms[] = {
63062306a36Sopenharmony_ci/* 323344.pdf - BA86   - D0 - Xeon 7500 Series */
63162306a36Sopenharmony_ci0x000206E6,
63262306a36Sopenharmony_ci/* 323056.pdf - AAX65  - C2 - Xeon L3406 */
63362306a36Sopenharmony_ci/* 322814.pdf - AAT59  - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
63462306a36Sopenharmony_ci/* 322911.pdf - AAU65  - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
63562306a36Sopenharmony_ci0x00020652,
63662306a36Sopenharmony_ci/* 322911.pdf - AAU65  - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
63762306a36Sopenharmony_ci0x00020655,
63862306a36Sopenharmony_ci/* 322373.pdf - AAO95  - B1 - Xeon 3400 Series */
63962306a36Sopenharmony_ci/* 322166.pdf - AAN92  - B1 - i7-800 and i5-700 Desktop */
64062306a36Sopenharmony_ci/*
64162306a36Sopenharmony_ci * 320767.pdf - AAP86  - B1 -
64262306a36Sopenharmony_ci * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
64362306a36Sopenharmony_ci */
64462306a36Sopenharmony_ci0x000106E5,
64562306a36Sopenharmony_ci/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
64662306a36Sopenharmony_ci0x000106A0,
64762306a36Sopenharmony_ci/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
64862306a36Sopenharmony_ci0x000106A1,
64962306a36Sopenharmony_ci/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
65062306a36Sopenharmony_ci0x000106A4,
65162306a36Sopenharmony_ci /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
65262306a36Sopenharmony_ci /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
65362306a36Sopenharmony_ci /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
65462306a36Sopenharmony_ci0x000106A5,
65562306a36Sopenharmony_ci /* Xeon E3-1220 V2 */
65662306a36Sopenharmony_ci0x000306A8,
65762306a36Sopenharmony_ci};
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_cistatic inline bool cpu_has_broken_vmx_preemption_timer(void)
66062306a36Sopenharmony_ci{
66162306a36Sopenharmony_ci	u32 eax = cpuid_eax(0x00000001), i;
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	/* Clear the reserved bits */
66462306a36Sopenharmony_ci	eax &= ~(0x3U << 14 | 0xfU << 28);
66562306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
66662306a36Sopenharmony_ci		if (eax == vmx_preemption_cpu_tfms[i])
66762306a36Sopenharmony_ci			return true;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	return false;
67062306a36Sopenharmony_ci}
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_cistatic inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
67362306a36Sopenharmony_ci{
67462306a36Sopenharmony_ci	return flexpriority_enabled && lapic_in_kernel(vcpu);
67562306a36Sopenharmony_ci}
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_cistatic int possible_passthrough_msr_slot(u32 msr)
67862306a36Sopenharmony_ci{
67962306a36Sopenharmony_ci	u32 i;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
68262306a36Sopenharmony_ci		if (vmx_possible_passthrough_msrs[i] == msr)
68362306a36Sopenharmony_ci			return i;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	return -ENOENT;
68662306a36Sopenharmony_ci}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_cistatic bool is_valid_passthrough_msr(u32 msr)
68962306a36Sopenharmony_ci{
69062306a36Sopenharmony_ci	bool r;
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci	switch (msr) {
69362306a36Sopenharmony_ci	case 0x800 ... 0x8ff:
69462306a36Sopenharmony_ci		/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
69562306a36Sopenharmony_ci		return true;
69662306a36Sopenharmony_ci	case MSR_IA32_RTIT_STATUS:
69762306a36Sopenharmony_ci	case MSR_IA32_RTIT_OUTPUT_BASE:
69862306a36Sopenharmony_ci	case MSR_IA32_RTIT_OUTPUT_MASK:
69962306a36Sopenharmony_ci	case MSR_IA32_RTIT_CR3_MATCH:
70062306a36Sopenharmony_ci	case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
70162306a36Sopenharmony_ci		/* PT MSRs. These are handled in pt_update_intercept_for_msr() */
70262306a36Sopenharmony_ci	case MSR_LBR_SELECT:
70362306a36Sopenharmony_ci	case MSR_LBR_TOS:
70462306a36Sopenharmony_ci	case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31:
70562306a36Sopenharmony_ci	case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31:
70662306a36Sopenharmony_ci	case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
70762306a36Sopenharmony_ci	case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
70862306a36Sopenharmony_ci	case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
70962306a36Sopenharmony_ci		/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
71062306a36Sopenharmony_ci		return true;
71162306a36Sopenharmony_ci	}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	r = possible_passthrough_msr_slot(msr) != -ENOENT;
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	return r;
71862306a36Sopenharmony_ci}
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_cistruct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
72162306a36Sopenharmony_ci{
72262306a36Sopenharmony_ci	int i;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	i = kvm_find_user_return_msr(msr);
72562306a36Sopenharmony_ci	if (i >= 0)
72662306a36Sopenharmony_ci		return &vmx->guest_uret_msrs[i];
72762306a36Sopenharmony_ci	return NULL;
72862306a36Sopenharmony_ci}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_cistatic int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
73162306a36Sopenharmony_ci				  struct vmx_uret_msr *msr, u64 data)
73262306a36Sopenharmony_ci{
73362306a36Sopenharmony_ci	unsigned int slot = msr - vmx->guest_uret_msrs;
73462306a36Sopenharmony_ci	int ret = 0;
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	if (msr->load_into_hardware) {
73762306a36Sopenharmony_ci		preempt_disable();
73862306a36Sopenharmony_ci		ret = kvm_set_user_return_msr(slot, data, msr->mask);
73962306a36Sopenharmony_ci		preempt_enable();
74062306a36Sopenharmony_ci	}
74162306a36Sopenharmony_ci	if (!ret)
74262306a36Sopenharmony_ci		msr->data = data;
74362306a36Sopenharmony_ci	return ret;
74462306a36Sopenharmony_ci}
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci/*
74762306a36Sopenharmony_ci * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
74862306a36Sopenharmony_ci *
74962306a36Sopenharmony_ci * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
75062306a36Sopenharmony_ci * atomically track post-VMXON state, e.g. this may be called in NMI context.
75162306a36Sopenharmony_ci * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
75262306a36Sopenharmony_ci * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
75362306a36Sopenharmony_ci * magically in RM, VM86, compat mode, or at CPL>0.
75462306a36Sopenharmony_ci */
75562306a36Sopenharmony_cistatic int kvm_cpu_vmxoff(void)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	asm goto("1: vmxoff\n\t"
75862306a36Sopenharmony_ci			  _ASM_EXTABLE(1b, %l[fault])
75962306a36Sopenharmony_ci			  ::: "cc", "memory" : fault);
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	cr4_clear_bits(X86_CR4_VMXE);
76262306a36Sopenharmony_ci	return 0;
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_cifault:
76562306a36Sopenharmony_ci	cr4_clear_bits(X86_CR4_VMXE);
76662306a36Sopenharmony_ci	return -EIO;
76762306a36Sopenharmony_ci}
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_cistatic void vmx_emergency_disable(void)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
77262306a36Sopenharmony_ci	struct loaded_vmcs *v;
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	kvm_rebooting = true;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	/*
77762306a36Sopenharmony_ci	 * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
77862306a36Sopenharmony_ci	 * set in task context.  If this races with VMX is disabled by an NMI,
77962306a36Sopenharmony_ci	 * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
78062306a36Sopenharmony_ci	 * kvm_rebooting set.
78162306a36Sopenharmony_ci	 */
78262306a36Sopenharmony_ci	if (!(__read_cr4() & X86_CR4_VMXE))
78362306a36Sopenharmony_ci		return;
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
78662306a36Sopenharmony_ci			    loaded_vmcss_on_cpu_link)
78762306a36Sopenharmony_ci		vmcs_clear(v->vmcs);
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	kvm_cpu_vmxoff();
79062306a36Sopenharmony_ci}
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_cistatic void __loaded_vmcs_clear(void *arg)
79362306a36Sopenharmony_ci{
79462306a36Sopenharmony_ci	struct loaded_vmcs *loaded_vmcs = arg;
79562306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	if (loaded_vmcs->cpu != cpu)
79862306a36Sopenharmony_ci		return; /* vcpu migration can race with cpu offline */
79962306a36Sopenharmony_ci	if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
80062306a36Sopenharmony_ci		per_cpu(current_vmcs, cpu) = NULL;
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	vmcs_clear(loaded_vmcs->vmcs);
80362306a36Sopenharmony_ci	if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
80462306a36Sopenharmony_ci		vmcs_clear(loaded_vmcs->shadow_vmcs);
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	/*
80962306a36Sopenharmony_ci	 * Ensure all writes to loaded_vmcs, including deleting it from its
81062306a36Sopenharmony_ci	 * current percpu list, complete before setting loaded_vmcs->cpu to
81162306a36Sopenharmony_ci	 * -1, otherwise a different cpu can see loaded_vmcs->cpu == -1 first
81262306a36Sopenharmony_ci	 * and add loaded_vmcs to its percpu list before it's deleted from this
81362306a36Sopenharmony_ci	 * cpu's list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs().
81462306a36Sopenharmony_ci	 */
81562306a36Sopenharmony_ci	smp_wmb();
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	loaded_vmcs->cpu = -1;
81862306a36Sopenharmony_ci	loaded_vmcs->launched = 0;
81962306a36Sopenharmony_ci}
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_civoid loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
82262306a36Sopenharmony_ci{
82362306a36Sopenharmony_ci	int cpu = loaded_vmcs->cpu;
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	if (cpu != -1)
82662306a36Sopenharmony_ci		smp_call_function_single(cpu,
82762306a36Sopenharmony_ci			 __loaded_vmcs_clear, loaded_vmcs, 1);
82862306a36Sopenharmony_ci}
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_cistatic bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
83162306a36Sopenharmony_ci				       unsigned field)
83262306a36Sopenharmony_ci{
83362306a36Sopenharmony_ci	bool ret;
83462306a36Sopenharmony_ci	u32 mask = 1 << (seg * SEG_FIELD_NR + field);
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
83762306a36Sopenharmony_ci		kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
83862306a36Sopenharmony_ci		vmx->segment_cache.bitmask = 0;
83962306a36Sopenharmony_ci	}
84062306a36Sopenharmony_ci	ret = vmx->segment_cache.bitmask & mask;
84162306a36Sopenharmony_ci	vmx->segment_cache.bitmask |= mask;
84262306a36Sopenharmony_ci	return ret;
84362306a36Sopenharmony_ci}
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_cistatic u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	u16 *p = &vmx->segment_cache.seg[seg].selector;
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
85062306a36Sopenharmony_ci		*p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
85162306a36Sopenharmony_ci	return *p;
85262306a36Sopenharmony_ci}
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_cistatic ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
85562306a36Sopenharmony_ci{
85662306a36Sopenharmony_ci	ulong *p = &vmx->segment_cache.seg[seg].base;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
85962306a36Sopenharmony_ci		*p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
86062306a36Sopenharmony_ci	return *p;
86162306a36Sopenharmony_ci}
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_cistatic u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
86462306a36Sopenharmony_ci{
86562306a36Sopenharmony_ci	u32 *p = &vmx->segment_cache.seg[seg].limit;
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
86862306a36Sopenharmony_ci		*p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
86962306a36Sopenharmony_ci	return *p;
87062306a36Sopenharmony_ci}
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_cistatic u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
87362306a36Sopenharmony_ci{
87462306a36Sopenharmony_ci	u32 *p = &vmx->segment_cache.seg[seg].ar;
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
87762306a36Sopenharmony_ci		*p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
87862306a36Sopenharmony_ci	return *p;
87962306a36Sopenharmony_ci}
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_civoid vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
88262306a36Sopenharmony_ci{
88362306a36Sopenharmony_ci	u32 eb;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
88662306a36Sopenharmony_ci	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
88762306a36Sopenharmony_ci	/*
88862306a36Sopenharmony_ci	 * Guest access to VMware backdoor ports could legitimately
88962306a36Sopenharmony_ci	 * trigger #GP because of TSS I/O permission bitmap.
89062306a36Sopenharmony_ci	 * We intercept those #GP and allow access to them anyway
89162306a36Sopenharmony_ci	 * as VMware does.
89262306a36Sopenharmony_ci	 */
89362306a36Sopenharmony_ci	if (enable_vmware_backdoor)
89462306a36Sopenharmony_ci		eb |= (1u << GP_VECTOR);
89562306a36Sopenharmony_ci	if ((vcpu->guest_debug &
89662306a36Sopenharmony_ci	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
89762306a36Sopenharmony_ci	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
89862306a36Sopenharmony_ci		eb |= 1u << BP_VECTOR;
89962306a36Sopenharmony_ci	if (to_vmx(vcpu)->rmode.vm86_active)
90062306a36Sopenharmony_ci		eb = ~0;
90162306a36Sopenharmony_ci	if (!vmx_need_pf_intercept(vcpu))
90262306a36Sopenharmony_ci		eb &= ~(1u << PF_VECTOR);
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci	/* When we are running a nested L2 guest and L1 specified for it a
90562306a36Sopenharmony_ci	 * certain exception bitmap, we must trap the same exceptions and pass
90662306a36Sopenharmony_ci	 * them to L1. When running L2, we will only handle the exceptions
90762306a36Sopenharmony_ci	 * specified above if L1 did not want them.
90862306a36Sopenharmony_ci	 */
90962306a36Sopenharmony_ci	if (is_guest_mode(vcpu))
91062306a36Sopenharmony_ci		eb |= get_vmcs12(vcpu)->exception_bitmap;
91162306a36Sopenharmony_ci	else {
91262306a36Sopenharmony_ci		int mask = 0, match = 0;
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci		if (enable_ept && (eb & (1u << PF_VECTOR))) {
91562306a36Sopenharmony_ci			/*
91662306a36Sopenharmony_ci			 * If EPT is enabled, #PF is currently only intercepted
91762306a36Sopenharmony_ci			 * if MAXPHYADDR is smaller on the guest than on the
91862306a36Sopenharmony_ci			 * host.  In that case we only care about present,
91962306a36Sopenharmony_ci			 * non-reserved faults.  For vmcs02, however, PFEC_MASK
92062306a36Sopenharmony_ci			 * and PFEC_MATCH are set in prepare_vmcs02_rare.
92162306a36Sopenharmony_ci			 */
92262306a36Sopenharmony_ci			mask = PFERR_PRESENT_MASK | PFERR_RSVD_MASK;
92362306a36Sopenharmony_ci			match = PFERR_PRESENT_MASK;
92462306a36Sopenharmony_ci		}
92562306a36Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
92662306a36Sopenharmony_ci		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match);
92762306a36Sopenharmony_ci	}
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	/*
93062306a36Sopenharmony_ci	 * Disabling xfd interception indicates that dynamic xfeatures
93162306a36Sopenharmony_ci	 * might be used in the guest. Always trap #NM in this case
93262306a36Sopenharmony_ci	 * to save guest xfd_err timely.
93362306a36Sopenharmony_ci	 */
93462306a36Sopenharmony_ci	if (vcpu->arch.xfd_no_write_intercept)
93562306a36Sopenharmony_ci		eb |= (1u << NM_VECTOR);
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	vmcs_write32(EXCEPTION_BITMAP, eb);
93862306a36Sopenharmony_ci}
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci/*
94162306a36Sopenharmony_ci * Check if MSR is intercepted for currently loaded MSR bitmap.
94262306a36Sopenharmony_ci */
94362306a36Sopenharmony_cistatic bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
94462306a36Sopenharmony_ci{
94562306a36Sopenharmony_ci	if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
94662306a36Sopenharmony_ci		return true;
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_ci	return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
94962306a36Sopenharmony_ci}
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ciunsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
95262306a36Sopenharmony_ci{
95362306a36Sopenharmony_ci	unsigned int flags = 0;
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	if (vmx->loaded_vmcs->launched)
95662306a36Sopenharmony_ci		flags |= VMX_RUN_VMRESUME;
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	/*
95962306a36Sopenharmony_ci	 * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
96062306a36Sopenharmony_ci	 * to change it directly without causing a vmexit.  In that case read
96162306a36Sopenharmony_ci	 * it after vmexit and store it in vmx->spec_ctrl.
96262306a36Sopenharmony_ci	 */
96362306a36Sopenharmony_ci	if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
96462306a36Sopenharmony_ci		flags |= VMX_RUN_SAVE_SPEC_CTRL;
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	return flags;
96762306a36Sopenharmony_ci}
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_cistatic __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
97062306a36Sopenharmony_ci		unsigned long entry, unsigned long exit)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	vm_entry_controls_clearbit(vmx, entry);
97362306a36Sopenharmony_ci	vm_exit_controls_clearbit(vmx, exit);
97462306a36Sopenharmony_ci}
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ciint vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr)
97762306a36Sopenharmony_ci{
97862306a36Sopenharmony_ci	unsigned int i;
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	for (i = 0; i < m->nr; ++i) {
98162306a36Sopenharmony_ci		if (m->val[i].index == msr)
98262306a36Sopenharmony_ci			return i;
98362306a36Sopenharmony_ci	}
98462306a36Sopenharmony_ci	return -ENOENT;
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_cistatic void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
98862306a36Sopenharmony_ci{
98962306a36Sopenharmony_ci	int i;
99062306a36Sopenharmony_ci	struct msr_autoload *m = &vmx->msr_autoload;
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	switch (msr) {
99362306a36Sopenharmony_ci	case MSR_EFER:
99462306a36Sopenharmony_ci		if (cpu_has_load_ia32_efer()) {
99562306a36Sopenharmony_ci			clear_atomic_switch_msr_special(vmx,
99662306a36Sopenharmony_ci					VM_ENTRY_LOAD_IA32_EFER,
99762306a36Sopenharmony_ci					VM_EXIT_LOAD_IA32_EFER);
99862306a36Sopenharmony_ci			return;
99962306a36Sopenharmony_ci		}
100062306a36Sopenharmony_ci		break;
100162306a36Sopenharmony_ci	case MSR_CORE_PERF_GLOBAL_CTRL:
100262306a36Sopenharmony_ci		if (cpu_has_load_perf_global_ctrl()) {
100362306a36Sopenharmony_ci			clear_atomic_switch_msr_special(vmx,
100462306a36Sopenharmony_ci					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
100562306a36Sopenharmony_ci					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
100662306a36Sopenharmony_ci			return;
100762306a36Sopenharmony_ci		}
100862306a36Sopenharmony_ci		break;
100962306a36Sopenharmony_ci	}
101062306a36Sopenharmony_ci	i = vmx_find_loadstore_msr_slot(&m->guest, msr);
101162306a36Sopenharmony_ci	if (i < 0)
101262306a36Sopenharmony_ci		goto skip_guest;
101362306a36Sopenharmony_ci	--m->guest.nr;
101462306a36Sopenharmony_ci	m->guest.val[i] = m->guest.val[m->guest.nr];
101562306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ciskip_guest:
101862306a36Sopenharmony_ci	i = vmx_find_loadstore_msr_slot(&m->host, msr);
101962306a36Sopenharmony_ci	if (i < 0)
102062306a36Sopenharmony_ci		return;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	--m->host.nr;
102362306a36Sopenharmony_ci	m->host.val[i] = m->host.val[m->host.nr];
102462306a36Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
102562306a36Sopenharmony_ci}
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_cistatic __always_inline void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
102862306a36Sopenharmony_ci		unsigned long entry, unsigned long exit,
102962306a36Sopenharmony_ci		unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
103062306a36Sopenharmony_ci		u64 guest_val, u64 host_val)
103162306a36Sopenharmony_ci{
103262306a36Sopenharmony_ci	vmcs_write64(guest_val_vmcs, guest_val);
103362306a36Sopenharmony_ci	if (host_val_vmcs != HOST_IA32_EFER)
103462306a36Sopenharmony_ci		vmcs_write64(host_val_vmcs, host_val);
103562306a36Sopenharmony_ci	vm_entry_controls_setbit(vmx, entry);
103662306a36Sopenharmony_ci	vm_exit_controls_setbit(vmx, exit);
103762306a36Sopenharmony_ci}
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_cistatic void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
104062306a36Sopenharmony_ci				  u64 guest_val, u64 host_val, bool entry_only)
104162306a36Sopenharmony_ci{
104262306a36Sopenharmony_ci	int i, j = 0;
104362306a36Sopenharmony_ci	struct msr_autoload *m = &vmx->msr_autoload;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	switch (msr) {
104662306a36Sopenharmony_ci	case MSR_EFER:
104762306a36Sopenharmony_ci		if (cpu_has_load_ia32_efer()) {
104862306a36Sopenharmony_ci			add_atomic_switch_msr_special(vmx,
104962306a36Sopenharmony_ci					VM_ENTRY_LOAD_IA32_EFER,
105062306a36Sopenharmony_ci					VM_EXIT_LOAD_IA32_EFER,
105162306a36Sopenharmony_ci					GUEST_IA32_EFER,
105262306a36Sopenharmony_ci					HOST_IA32_EFER,
105362306a36Sopenharmony_ci					guest_val, host_val);
105462306a36Sopenharmony_ci			return;
105562306a36Sopenharmony_ci		}
105662306a36Sopenharmony_ci		break;
105762306a36Sopenharmony_ci	case MSR_CORE_PERF_GLOBAL_CTRL:
105862306a36Sopenharmony_ci		if (cpu_has_load_perf_global_ctrl()) {
105962306a36Sopenharmony_ci			add_atomic_switch_msr_special(vmx,
106062306a36Sopenharmony_ci					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
106162306a36Sopenharmony_ci					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
106262306a36Sopenharmony_ci					GUEST_IA32_PERF_GLOBAL_CTRL,
106362306a36Sopenharmony_ci					HOST_IA32_PERF_GLOBAL_CTRL,
106462306a36Sopenharmony_ci					guest_val, host_val);
106562306a36Sopenharmony_ci			return;
106662306a36Sopenharmony_ci		}
106762306a36Sopenharmony_ci		break;
106862306a36Sopenharmony_ci	case MSR_IA32_PEBS_ENABLE:
106962306a36Sopenharmony_ci		/* PEBS needs a quiescent period after being disabled (to write
107062306a36Sopenharmony_ci		 * a record).  Disabling PEBS through VMX MSR swapping doesn't
107162306a36Sopenharmony_ci		 * provide that period, so a CPU could write host's record into
107262306a36Sopenharmony_ci		 * guest's memory.
107362306a36Sopenharmony_ci		 */
107462306a36Sopenharmony_ci		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
107562306a36Sopenharmony_ci	}
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	i = vmx_find_loadstore_msr_slot(&m->guest, msr);
107862306a36Sopenharmony_ci	if (!entry_only)
107962306a36Sopenharmony_ci		j = vmx_find_loadstore_msr_slot(&m->host, msr);
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	if ((i < 0 && m->guest.nr == MAX_NR_LOADSTORE_MSRS) ||
108262306a36Sopenharmony_ci	    (j < 0 &&  m->host.nr == MAX_NR_LOADSTORE_MSRS)) {
108362306a36Sopenharmony_ci		printk_once(KERN_WARNING "Not enough msr switch entries. "
108462306a36Sopenharmony_ci				"Can't add msr %x\n", msr);
108562306a36Sopenharmony_ci		return;
108662306a36Sopenharmony_ci	}
108762306a36Sopenharmony_ci	if (i < 0) {
108862306a36Sopenharmony_ci		i = m->guest.nr++;
108962306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
109062306a36Sopenharmony_ci	}
109162306a36Sopenharmony_ci	m->guest.val[i].index = msr;
109262306a36Sopenharmony_ci	m->guest.val[i].value = guest_val;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	if (entry_only)
109562306a36Sopenharmony_ci		return;
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	if (j < 0) {
109862306a36Sopenharmony_ci		j = m->host.nr++;
109962306a36Sopenharmony_ci		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
110062306a36Sopenharmony_ci	}
110162306a36Sopenharmony_ci	m->host.val[j].index = msr;
110262306a36Sopenharmony_ci	m->host.val[j].value = host_val;
110362306a36Sopenharmony_ci}
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_cistatic bool update_transition_efer(struct vcpu_vmx *vmx)
110662306a36Sopenharmony_ci{
110762306a36Sopenharmony_ci	u64 guest_efer = vmx->vcpu.arch.efer;
110862306a36Sopenharmony_ci	u64 ignore_bits = 0;
110962306a36Sopenharmony_ci	int i;
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	/* Shadow paging assumes NX to be available.  */
111262306a36Sopenharmony_ci	if (!enable_ept)
111362306a36Sopenharmony_ci		guest_efer |= EFER_NX;
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci	/*
111662306a36Sopenharmony_ci	 * LMA and LME handled by hardware; SCE meaningless outside long mode.
111762306a36Sopenharmony_ci	 */
111862306a36Sopenharmony_ci	ignore_bits |= EFER_SCE;
111962306a36Sopenharmony_ci#ifdef CONFIG_X86_64
112062306a36Sopenharmony_ci	ignore_bits |= EFER_LMA | EFER_LME;
112162306a36Sopenharmony_ci	/* SCE is meaningful only in long mode on Intel */
112262306a36Sopenharmony_ci	if (guest_efer & EFER_LMA)
112362306a36Sopenharmony_ci		ignore_bits &= ~(u64)EFER_SCE;
112462306a36Sopenharmony_ci#endif
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	/*
112762306a36Sopenharmony_ci	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
112862306a36Sopenharmony_ci	 * On CPUs that support "load IA32_EFER", always switch EFER
112962306a36Sopenharmony_ci	 * atomically, since it's faster than switching it manually.
113062306a36Sopenharmony_ci	 */
113162306a36Sopenharmony_ci	if (cpu_has_load_ia32_efer() ||
113262306a36Sopenharmony_ci	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
113362306a36Sopenharmony_ci		if (!(guest_efer & EFER_LMA))
113462306a36Sopenharmony_ci			guest_efer &= ~EFER_LME;
113562306a36Sopenharmony_ci		if (guest_efer != host_efer)
113662306a36Sopenharmony_ci			add_atomic_switch_msr(vmx, MSR_EFER,
113762306a36Sopenharmony_ci					      guest_efer, host_efer, false);
113862306a36Sopenharmony_ci		else
113962306a36Sopenharmony_ci			clear_atomic_switch_msr(vmx, MSR_EFER);
114062306a36Sopenharmony_ci		return false;
114162306a36Sopenharmony_ci	}
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci	i = kvm_find_user_return_msr(MSR_EFER);
114462306a36Sopenharmony_ci	if (i < 0)
114562306a36Sopenharmony_ci		return false;
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	clear_atomic_switch_msr(vmx, MSR_EFER);
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci	guest_efer &= ~ignore_bits;
115062306a36Sopenharmony_ci	guest_efer |= host_efer & ignore_bits;
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	vmx->guest_uret_msrs[i].data = guest_efer;
115362306a36Sopenharmony_ci	vmx->guest_uret_msrs[i].mask = ~ignore_bits;
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	return true;
115662306a36Sopenharmony_ci}
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci#ifdef CONFIG_X86_32
115962306a36Sopenharmony_ci/*
116062306a36Sopenharmony_ci * On 32-bit kernels, VM exits still load the FS and GS bases from the
116162306a36Sopenharmony_ci * VMCS rather than the segment table.  KVM uses this helper to figure
116262306a36Sopenharmony_ci * out the current bases to poke them into the VMCS before entry.
116362306a36Sopenharmony_ci */
116462306a36Sopenharmony_cistatic unsigned long segment_base(u16 selector)
116562306a36Sopenharmony_ci{
116662306a36Sopenharmony_ci	struct desc_struct *table;
116762306a36Sopenharmony_ci	unsigned long v;
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	if (!(selector & ~SEGMENT_RPL_MASK))
117062306a36Sopenharmony_ci		return 0;
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	table = get_current_gdt_ro();
117362306a36Sopenharmony_ci
117462306a36Sopenharmony_ci	if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
117562306a36Sopenharmony_ci		u16 ldt_selector = kvm_read_ldt();
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci		if (!(ldt_selector & ~SEGMENT_RPL_MASK))
117862306a36Sopenharmony_ci			return 0;
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci		table = (struct desc_struct *)segment_base(ldt_selector);
118162306a36Sopenharmony_ci	}
118262306a36Sopenharmony_ci	v = get_desc_base(&table[selector >> 3]);
118362306a36Sopenharmony_ci	return v;
118462306a36Sopenharmony_ci}
118562306a36Sopenharmony_ci#endif
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_cistatic inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
118862306a36Sopenharmony_ci{
118962306a36Sopenharmony_ci	return vmx_pt_mode_is_host_guest() &&
119062306a36Sopenharmony_ci	       !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
119162306a36Sopenharmony_ci}
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_cistatic inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base)
119462306a36Sopenharmony_ci{
119562306a36Sopenharmony_ci	/* The base must be 128-byte aligned and a legal physical address. */
119662306a36Sopenharmony_ci	return kvm_vcpu_is_legal_aligned_gpa(vcpu, base, 128);
119762306a36Sopenharmony_ci}
119862306a36Sopenharmony_ci
119962306a36Sopenharmony_cistatic inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
120062306a36Sopenharmony_ci{
120162306a36Sopenharmony_ci	u32 i;
120262306a36Sopenharmony_ci
120362306a36Sopenharmony_ci	wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
120462306a36Sopenharmony_ci	wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
120562306a36Sopenharmony_ci	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
120662306a36Sopenharmony_ci	wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
120762306a36Sopenharmony_ci	for (i = 0; i < addr_range; i++) {
120862306a36Sopenharmony_ci		wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
120962306a36Sopenharmony_ci		wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
121062306a36Sopenharmony_ci	}
121162306a36Sopenharmony_ci}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_cistatic inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
121462306a36Sopenharmony_ci{
121562306a36Sopenharmony_ci	u32 i;
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
121862306a36Sopenharmony_ci	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
121962306a36Sopenharmony_ci	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
122062306a36Sopenharmony_ci	rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
122162306a36Sopenharmony_ci	for (i = 0; i < addr_range; i++) {
122262306a36Sopenharmony_ci		rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
122362306a36Sopenharmony_ci		rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
122462306a36Sopenharmony_ci	}
122562306a36Sopenharmony_ci}
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_cistatic void pt_guest_enter(struct vcpu_vmx *vmx)
122862306a36Sopenharmony_ci{
122962306a36Sopenharmony_ci	if (vmx_pt_mode_is_system())
123062306a36Sopenharmony_ci		return;
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	/*
123362306a36Sopenharmony_ci	 * GUEST_IA32_RTIT_CTL is already set in the VMCS.
123462306a36Sopenharmony_ci	 * Save host state before VM entry.
123562306a36Sopenharmony_ci	 */
123662306a36Sopenharmony_ci	rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
123762306a36Sopenharmony_ci	if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
123862306a36Sopenharmony_ci		wrmsrl(MSR_IA32_RTIT_CTL, 0);
123962306a36Sopenharmony_ci		pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
124062306a36Sopenharmony_ci		pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
124162306a36Sopenharmony_ci	}
124262306a36Sopenharmony_ci}
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_cistatic void pt_guest_exit(struct vcpu_vmx *vmx)
124562306a36Sopenharmony_ci{
124662306a36Sopenharmony_ci	if (vmx_pt_mode_is_system())
124762306a36Sopenharmony_ci		return;
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ci	if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
125062306a36Sopenharmony_ci		pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
125162306a36Sopenharmony_ci		pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
125262306a36Sopenharmony_ci	}
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_ci	/*
125562306a36Sopenharmony_ci	 * KVM requires VM_EXIT_CLEAR_IA32_RTIT_CTL to expose PT to the guest,
125662306a36Sopenharmony_ci	 * i.e. RTIT_CTL is always cleared on VM-Exit.  Restore it if necessary.
125762306a36Sopenharmony_ci	 */
125862306a36Sopenharmony_ci	if (vmx->pt_desc.host.ctl)
125962306a36Sopenharmony_ci		wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
126062306a36Sopenharmony_ci}
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_civoid vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
126362306a36Sopenharmony_ci			unsigned long fs_base, unsigned long gs_base)
126462306a36Sopenharmony_ci{
126562306a36Sopenharmony_ci	if (unlikely(fs_sel != host->fs_sel)) {
126662306a36Sopenharmony_ci		if (!(fs_sel & 7))
126762306a36Sopenharmony_ci			vmcs_write16(HOST_FS_SELECTOR, fs_sel);
126862306a36Sopenharmony_ci		else
126962306a36Sopenharmony_ci			vmcs_write16(HOST_FS_SELECTOR, 0);
127062306a36Sopenharmony_ci		host->fs_sel = fs_sel;
127162306a36Sopenharmony_ci	}
127262306a36Sopenharmony_ci	if (unlikely(gs_sel != host->gs_sel)) {
127362306a36Sopenharmony_ci		if (!(gs_sel & 7))
127462306a36Sopenharmony_ci			vmcs_write16(HOST_GS_SELECTOR, gs_sel);
127562306a36Sopenharmony_ci		else
127662306a36Sopenharmony_ci			vmcs_write16(HOST_GS_SELECTOR, 0);
127762306a36Sopenharmony_ci		host->gs_sel = gs_sel;
127862306a36Sopenharmony_ci	}
127962306a36Sopenharmony_ci	if (unlikely(fs_base != host->fs_base)) {
128062306a36Sopenharmony_ci		vmcs_writel(HOST_FS_BASE, fs_base);
128162306a36Sopenharmony_ci		host->fs_base = fs_base;
128262306a36Sopenharmony_ci	}
128362306a36Sopenharmony_ci	if (unlikely(gs_base != host->gs_base)) {
128462306a36Sopenharmony_ci		vmcs_writel(HOST_GS_BASE, gs_base);
128562306a36Sopenharmony_ci		host->gs_base = gs_base;
128662306a36Sopenharmony_ci	}
128762306a36Sopenharmony_ci}
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_civoid vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
129062306a36Sopenharmony_ci{
129162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
129262306a36Sopenharmony_ci	struct vmcs_host_state *host_state;
129362306a36Sopenharmony_ci#ifdef CONFIG_X86_64
129462306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
129562306a36Sopenharmony_ci#endif
129662306a36Sopenharmony_ci	unsigned long fs_base, gs_base;
129762306a36Sopenharmony_ci	u16 fs_sel, gs_sel;
129862306a36Sopenharmony_ci	int i;
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_ci	vmx->req_immediate_exit = false;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	/*
130362306a36Sopenharmony_ci	 * Note that guest MSRs to be saved/restored can also be changed
130462306a36Sopenharmony_ci	 * when guest state is loaded. This happens when guest transitions
130562306a36Sopenharmony_ci	 * to/from long-mode by setting MSR_EFER.LMA.
130662306a36Sopenharmony_ci	 */
130762306a36Sopenharmony_ci	if (!vmx->guest_uret_msrs_loaded) {
130862306a36Sopenharmony_ci		vmx->guest_uret_msrs_loaded = true;
130962306a36Sopenharmony_ci		for (i = 0; i < kvm_nr_uret_msrs; ++i) {
131062306a36Sopenharmony_ci			if (!vmx->guest_uret_msrs[i].load_into_hardware)
131162306a36Sopenharmony_ci				continue;
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci			kvm_set_user_return_msr(i,
131462306a36Sopenharmony_ci						vmx->guest_uret_msrs[i].data,
131562306a36Sopenharmony_ci						vmx->guest_uret_msrs[i].mask);
131662306a36Sopenharmony_ci		}
131762306a36Sopenharmony_ci	}
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	if (vmx->nested.need_vmcs12_to_shadow_sync)
132062306a36Sopenharmony_ci		nested_sync_vmcs12_to_shadow(vcpu);
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_ci	if (vmx->guest_state_loaded)
132362306a36Sopenharmony_ci		return;
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci	host_state = &vmx->loaded_vmcs->host_state;
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	/*
132862306a36Sopenharmony_ci	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
132962306a36Sopenharmony_ci	 * allow segment selectors with cpl > 0 or ti == 1.
133062306a36Sopenharmony_ci	 */
133162306a36Sopenharmony_ci	host_state->ldt_sel = kvm_read_ldt();
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_ci#ifdef CONFIG_X86_64
133462306a36Sopenharmony_ci	savesegment(ds, host_state->ds_sel);
133562306a36Sopenharmony_ci	savesegment(es, host_state->es_sel);
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_ci	gs_base = cpu_kernelmode_gs_base(cpu);
133862306a36Sopenharmony_ci	if (likely(is_64bit_mm(current->mm))) {
133962306a36Sopenharmony_ci		current_save_fsgs();
134062306a36Sopenharmony_ci		fs_sel = current->thread.fsindex;
134162306a36Sopenharmony_ci		gs_sel = current->thread.gsindex;
134262306a36Sopenharmony_ci		fs_base = current->thread.fsbase;
134362306a36Sopenharmony_ci		vmx->msr_host_kernel_gs_base = current->thread.gsbase;
134462306a36Sopenharmony_ci	} else {
134562306a36Sopenharmony_ci		savesegment(fs, fs_sel);
134662306a36Sopenharmony_ci		savesegment(gs, gs_sel);
134762306a36Sopenharmony_ci		fs_base = read_msr(MSR_FS_BASE);
134862306a36Sopenharmony_ci		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
134962306a36Sopenharmony_ci	}
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
135262306a36Sopenharmony_ci#else
135362306a36Sopenharmony_ci	savesegment(fs, fs_sel);
135462306a36Sopenharmony_ci	savesegment(gs, gs_sel);
135562306a36Sopenharmony_ci	fs_base = segment_base(fs_sel);
135662306a36Sopenharmony_ci	gs_base = segment_base(gs_sel);
135762306a36Sopenharmony_ci#endif
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
136062306a36Sopenharmony_ci	vmx->guest_state_loaded = true;
136162306a36Sopenharmony_ci}
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_cistatic void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
136462306a36Sopenharmony_ci{
136562306a36Sopenharmony_ci	struct vmcs_host_state *host_state;
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_ci	if (!vmx->guest_state_loaded)
136862306a36Sopenharmony_ci		return;
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_ci	host_state = &vmx->loaded_vmcs->host_state;
137162306a36Sopenharmony_ci
137262306a36Sopenharmony_ci	++vmx->vcpu.stat.host_state_reload;
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_ci#ifdef CONFIG_X86_64
137562306a36Sopenharmony_ci	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
137662306a36Sopenharmony_ci#endif
137762306a36Sopenharmony_ci	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
137862306a36Sopenharmony_ci		kvm_load_ldt(host_state->ldt_sel);
137962306a36Sopenharmony_ci#ifdef CONFIG_X86_64
138062306a36Sopenharmony_ci		load_gs_index(host_state->gs_sel);
138162306a36Sopenharmony_ci#else
138262306a36Sopenharmony_ci		loadsegment(gs, host_state->gs_sel);
138362306a36Sopenharmony_ci#endif
138462306a36Sopenharmony_ci	}
138562306a36Sopenharmony_ci	if (host_state->fs_sel & 7)
138662306a36Sopenharmony_ci		loadsegment(fs, host_state->fs_sel);
138762306a36Sopenharmony_ci#ifdef CONFIG_X86_64
138862306a36Sopenharmony_ci	if (unlikely(host_state->ds_sel | host_state->es_sel)) {
138962306a36Sopenharmony_ci		loadsegment(ds, host_state->ds_sel);
139062306a36Sopenharmony_ci		loadsegment(es, host_state->es_sel);
139162306a36Sopenharmony_ci	}
139262306a36Sopenharmony_ci#endif
139362306a36Sopenharmony_ci	invalidate_tss_limit();
139462306a36Sopenharmony_ci#ifdef CONFIG_X86_64
139562306a36Sopenharmony_ci	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
139662306a36Sopenharmony_ci#endif
139762306a36Sopenharmony_ci	load_fixmap_gdt(raw_smp_processor_id());
139862306a36Sopenharmony_ci	vmx->guest_state_loaded = false;
139962306a36Sopenharmony_ci	vmx->guest_uret_msrs_loaded = false;
140062306a36Sopenharmony_ci}
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci#ifdef CONFIG_X86_64
140362306a36Sopenharmony_cistatic u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
140462306a36Sopenharmony_ci{
140562306a36Sopenharmony_ci	preempt_disable();
140662306a36Sopenharmony_ci	if (vmx->guest_state_loaded)
140762306a36Sopenharmony_ci		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
140862306a36Sopenharmony_ci	preempt_enable();
140962306a36Sopenharmony_ci	return vmx->msr_guest_kernel_gs_base;
141062306a36Sopenharmony_ci}
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_cistatic void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
141362306a36Sopenharmony_ci{
141462306a36Sopenharmony_ci	preempt_disable();
141562306a36Sopenharmony_ci	if (vmx->guest_state_loaded)
141662306a36Sopenharmony_ci		wrmsrl(MSR_KERNEL_GS_BASE, data);
141762306a36Sopenharmony_ci	preempt_enable();
141862306a36Sopenharmony_ci	vmx->msr_guest_kernel_gs_base = data;
141962306a36Sopenharmony_ci}
142062306a36Sopenharmony_ci#endif
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_civoid vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
142362306a36Sopenharmony_ci			struct loaded_vmcs *buddy)
142462306a36Sopenharmony_ci{
142562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
142662306a36Sopenharmony_ci	bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
142762306a36Sopenharmony_ci	struct vmcs *prev;
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	if (!already_loaded) {
143062306a36Sopenharmony_ci		loaded_vmcs_clear(vmx->loaded_vmcs);
143162306a36Sopenharmony_ci		local_irq_disable();
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_ci		/*
143462306a36Sopenharmony_ci		 * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to
143562306a36Sopenharmony_ci		 * this cpu's percpu list, otherwise it may not yet be deleted
143662306a36Sopenharmony_ci		 * from its previous cpu's percpu list.  Pairs with the
143762306a36Sopenharmony_ci		 * smb_wmb() in __loaded_vmcs_clear().
143862306a36Sopenharmony_ci		 */
143962306a36Sopenharmony_ci		smp_rmb();
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci		list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
144262306a36Sopenharmony_ci			 &per_cpu(loaded_vmcss_on_cpu, cpu));
144362306a36Sopenharmony_ci		local_irq_enable();
144462306a36Sopenharmony_ci	}
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_ci	prev = per_cpu(current_vmcs, cpu);
144762306a36Sopenharmony_ci	if (prev != vmx->loaded_vmcs->vmcs) {
144862306a36Sopenharmony_ci		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
144962306a36Sopenharmony_ci		vmcs_load(vmx->loaded_vmcs->vmcs);
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci		/*
145262306a36Sopenharmony_ci		 * No indirect branch prediction barrier needed when switching
145362306a36Sopenharmony_ci		 * the active VMCS within a vCPU, unless IBRS is advertised to
145462306a36Sopenharmony_ci		 * the vCPU.  To minimize the number of IBPBs executed, KVM
145562306a36Sopenharmony_ci		 * performs IBPB on nested VM-Exit (a single nested transition
145662306a36Sopenharmony_ci		 * may switch the active VMCS multiple times).
145762306a36Sopenharmony_ci		 */
145862306a36Sopenharmony_ci		if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
145962306a36Sopenharmony_ci			indirect_branch_prediction_barrier();
146062306a36Sopenharmony_ci	}
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci	if (!already_loaded) {
146362306a36Sopenharmony_ci		void *gdt = get_current_gdt_ro();
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci		/*
146662306a36Sopenharmony_ci		 * Flush all EPTP/VPID contexts, the new pCPU may have stale
146762306a36Sopenharmony_ci		 * TLB entries from its previous association with the vCPU.
146862306a36Sopenharmony_ci		 */
146962306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
147062306a36Sopenharmony_ci
147162306a36Sopenharmony_ci		/*
147262306a36Sopenharmony_ci		 * Linux uses per-cpu TSS and GDT, so set these when switching
147362306a36Sopenharmony_ci		 * processors.  See 22.2.4.
147462306a36Sopenharmony_ci		 */
147562306a36Sopenharmony_ci		vmcs_writel(HOST_TR_BASE,
147662306a36Sopenharmony_ci			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
147762306a36Sopenharmony_ci		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
148062306a36Sopenharmony_ci			/* 22.2.3 */
148162306a36Sopenharmony_ci			vmcs_writel(HOST_IA32_SYSENTER_ESP,
148262306a36Sopenharmony_ci				    (unsigned long)(cpu_entry_stack(cpu) + 1));
148362306a36Sopenharmony_ci		}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci		vmx->loaded_vmcs->cpu = cpu;
148662306a36Sopenharmony_ci	}
148762306a36Sopenharmony_ci}
148862306a36Sopenharmony_ci
148962306a36Sopenharmony_ci/*
149062306a36Sopenharmony_ci * Switches to specified vcpu, until a matching vcpu_put(), but assumes
149162306a36Sopenharmony_ci * vcpu mutex is already taken.
149262306a36Sopenharmony_ci */
149362306a36Sopenharmony_cistatic void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
149462306a36Sopenharmony_ci{
149562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
149662306a36Sopenharmony_ci
149762306a36Sopenharmony_ci	vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	vmx_vcpu_pi_load(vcpu, cpu);
150062306a36Sopenharmony_ci
150162306a36Sopenharmony_ci	vmx->host_debugctlmsr = get_debugctlmsr();
150262306a36Sopenharmony_ci}
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_cistatic void vmx_vcpu_put(struct kvm_vcpu *vcpu)
150562306a36Sopenharmony_ci{
150662306a36Sopenharmony_ci	vmx_vcpu_pi_put(vcpu);
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci	vmx_prepare_switch_to_host(to_vmx(vcpu));
150962306a36Sopenharmony_ci}
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_cibool vmx_emulation_required(struct kvm_vcpu *vcpu)
151262306a36Sopenharmony_ci{
151362306a36Sopenharmony_ci	return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
151462306a36Sopenharmony_ci}
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ciunsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
151762306a36Sopenharmony_ci{
151862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
151962306a36Sopenharmony_ci	unsigned long rflags, save_rflags;
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_ci	if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
152262306a36Sopenharmony_ci		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
152362306a36Sopenharmony_ci		rflags = vmcs_readl(GUEST_RFLAGS);
152462306a36Sopenharmony_ci		if (vmx->rmode.vm86_active) {
152562306a36Sopenharmony_ci			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
152662306a36Sopenharmony_ci			save_rflags = vmx->rmode.save_rflags;
152762306a36Sopenharmony_ci			rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
152862306a36Sopenharmony_ci		}
152962306a36Sopenharmony_ci		vmx->rflags = rflags;
153062306a36Sopenharmony_ci	}
153162306a36Sopenharmony_ci	return vmx->rflags;
153262306a36Sopenharmony_ci}
153362306a36Sopenharmony_ci
153462306a36Sopenharmony_civoid vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
153562306a36Sopenharmony_ci{
153662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
153762306a36Sopenharmony_ci	unsigned long old_rflags;
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	/*
154062306a36Sopenharmony_ci	 * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
154162306a36Sopenharmony_ci	 * is an unrestricted guest in order to mark L2 as needing emulation
154262306a36Sopenharmony_ci	 * if L1 runs L2 as a restricted guest.
154362306a36Sopenharmony_ci	 */
154462306a36Sopenharmony_ci	if (is_unrestricted_guest(vcpu)) {
154562306a36Sopenharmony_ci		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
154662306a36Sopenharmony_ci		vmx->rflags = rflags;
154762306a36Sopenharmony_ci		vmcs_writel(GUEST_RFLAGS, rflags);
154862306a36Sopenharmony_ci		return;
154962306a36Sopenharmony_ci	}
155062306a36Sopenharmony_ci
155162306a36Sopenharmony_ci	old_rflags = vmx_get_rflags(vcpu);
155262306a36Sopenharmony_ci	vmx->rflags = rflags;
155362306a36Sopenharmony_ci	if (vmx->rmode.vm86_active) {
155462306a36Sopenharmony_ci		vmx->rmode.save_rflags = rflags;
155562306a36Sopenharmony_ci		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
155662306a36Sopenharmony_ci	}
155762306a36Sopenharmony_ci	vmcs_writel(GUEST_RFLAGS, rflags);
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_ci	if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
156062306a36Sopenharmony_ci		vmx->emulation_required = vmx_emulation_required(vcpu);
156162306a36Sopenharmony_ci}
156262306a36Sopenharmony_ci
156362306a36Sopenharmony_cistatic bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
156462306a36Sopenharmony_ci{
156562306a36Sopenharmony_ci	return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
156662306a36Sopenharmony_ci}
156762306a36Sopenharmony_ci
156862306a36Sopenharmony_ciu32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
156962306a36Sopenharmony_ci{
157062306a36Sopenharmony_ci	u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
157162306a36Sopenharmony_ci	int ret = 0;
157262306a36Sopenharmony_ci
157362306a36Sopenharmony_ci	if (interruptibility & GUEST_INTR_STATE_STI)
157462306a36Sopenharmony_ci		ret |= KVM_X86_SHADOW_INT_STI;
157562306a36Sopenharmony_ci	if (interruptibility & GUEST_INTR_STATE_MOV_SS)
157662306a36Sopenharmony_ci		ret |= KVM_X86_SHADOW_INT_MOV_SS;
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_ci	return ret;
157962306a36Sopenharmony_ci}
158062306a36Sopenharmony_ci
158162306a36Sopenharmony_civoid vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
158262306a36Sopenharmony_ci{
158362306a36Sopenharmony_ci	u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
158462306a36Sopenharmony_ci	u32 interruptibility = interruptibility_old;
158562306a36Sopenharmony_ci
158662306a36Sopenharmony_ci	interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	if (mask & KVM_X86_SHADOW_INT_MOV_SS)
158962306a36Sopenharmony_ci		interruptibility |= GUEST_INTR_STATE_MOV_SS;
159062306a36Sopenharmony_ci	else if (mask & KVM_X86_SHADOW_INT_STI)
159162306a36Sopenharmony_ci		interruptibility |= GUEST_INTR_STATE_STI;
159262306a36Sopenharmony_ci
159362306a36Sopenharmony_ci	if ((interruptibility != interruptibility_old))
159462306a36Sopenharmony_ci		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
159562306a36Sopenharmony_ci}
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_cistatic int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
159862306a36Sopenharmony_ci{
159962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
160062306a36Sopenharmony_ci	unsigned long value;
160162306a36Sopenharmony_ci
160262306a36Sopenharmony_ci	/*
160362306a36Sopenharmony_ci	 * Any MSR write that attempts to change bits marked reserved will
160462306a36Sopenharmony_ci	 * case a #GP fault.
160562306a36Sopenharmony_ci	 */
160662306a36Sopenharmony_ci	if (data & vmx->pt_desc.ctl_bitmask)
160762306a36Sopenharmony_ci		return 1;
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ci	/*
161062306a36Sopenharmony_ci	 * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
161162306a36Sopenharmony_ci	 * result in a #GP unless the same write also clears TraceEn.
161262306a36Sopenharmony_ci	 */
161362306a36Sopenharmony_ci	if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
161462306a36Sopenharmony_ci		((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
161562306a36Sopenharmony_ci		return 1;
161662306a36Sopenharmony_ci
161762306a36Sopenharmony_ci	/*
161862306a36Sopenharmony_ci	 * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
161962306a36Sopenharmony_ci	 * and FabricEn would cause #GP, if
162062306a36Sopenharmony_ci	 * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
162162306a36Sopenharmony_ci	 */
162262306a36Sopenharmony_ci	if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
162362306a36Sopenharmony_ci		!(data & RTIT_CTL_FABRIC_EN) &&
162462306a36Sopenharmony_ci		!intel_pt_validate_cap(vmx->pt_desc.caps,
162562306a36Sopenharmony_ci					PT_CAP_single_range_output))
162662306a36Sopenharmony_ci		return 1;
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci	/*
162962306a36Sopenharmony_ci	 * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
163062306a36Sopenharmony_ci	 * utilize encodings marked reserved will cause a #GP fault.
163162306a36Sopenharmony_ci	 */
163262306a36Sopenharmony_ci	value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
163362306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
163462306a36Sopenharmony_ci			!test_bit((data & RTIT_CTL_MTC_RANGE) >>
163562306a36Sopenharmony_ci			RTIT_CTL_MTC_RANGE_OFFSET, &value))
163662306a36Sopenharmony_ci		return 1;
163762306a36Sopenharmony_ci	value = intel_pt_validate_cap(vmx->pt_desc.caps,
163862306a36Sopenharmony_ci						PT_CAP_cycle_thresholds);
163962306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
164062306a36Sopenharmony_ci			!test_bit((data & RTIT_CTL_CYC_THRESH) >>
164162306a36Sopenharmony_ci			RTIT_CTL_CYC_THRESH_OFFSET, &value))
164262306a36Sopenharmony_ci		return 1;
164362306a36Sopenharmony_ci	value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
164462306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
164562306a36Sopenharmony_ci			!test_bit((data & RTIT_CTL_PSB_FREQ) >>
164662306a36Sopenharmony_ci			RTIT_CTL_PSB_FREQ_OFFSET, &value))
164762306a36Sopenharmony_ci		return 1;
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci	/*
165062306a36Sopenharmony_ci	 * If ADDRx_CFG is reserved or the encodings is >2 will
165162306a36Sopenharmony_ci	 * cause a #GP fault.
165262306a36Sopenharmony_ci	 */
165362306a36Sopenharmony_ci	value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
165462306a36Sopenharmony_ci	if ((value && (vmx->pt_desc.num_address_ranges < 1)) || (value > 2))
165562306a36Sopenharmony_ci		return 1;
165662306a36Sopenharmony_ci	value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
165762306a36Sopenharmony_ci	if ((value && (vmx->pt_desc.num_address_ranges < 2)) || (value > 2))
165862306a36Sopenharmony_ci		return 1;
165962306a36Sopenharmony_ci	value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
166062306a36Sopenharmony_ci	if ((value && (vmx->pt_desc.num_address_ranges < 3)) || (value > 2))
166162306a36Sopenharmony_ci		return 1;
166262306a36Sopenharmony_ci	value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
166362306a36Sopenharmony_ci	if ((value && (vmx->pt_desc.num_address_ranges < 4)) || (value > 2))
166462306a36Sopenharmony_ci		return 1;
166562306a36Sopenharmony_ci
166662306a36Sopenharmony_ci	return 0;
166762306a36Sopenharmony_ci}
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_cistatic bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
167062306a36Sopenharmony_ci					void *insn, int insn_len)
167162306a36Sopenharmony_ci{
167262306a36Sopenharmony_ci	/*
167362306a36Sopenharmony_ci	 * Emulation of instructions in SGX enclaves is impossible as RIP does
167462306a36Sopenharmony_ci	 * not point at the failing instruction, and even if it did, the code
167562306a36Sopenharmony_ci	 * stream is inaccessible.  Inject #UD instead of exiting to userspace
167662306a36Sopenharmony_ci	 * so that guest userspace can't DoS the guest simply by triggering
167762306a36Sopenharmony_ci	 * emulation (enclaves are CPL3 only).
167862306a36Sopenharmony_ci	 */
167962306a36Sopenharmony_ci	if (to_vmx(vcpu)->exit_reason.enclave_mode) {
168062306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
168162306a36Sopenharmony_ci		return false;
168262306a36Sopenharmony_ci	}
168362306a36Sopenharmony_ci	return true;
168462306a36Sopenharmony_ci}
168562306a36Sopenharmony_ci
168662306a36Sopenharmony_cistatic int skip_emulated_instruction(struct kvm_vcpu *vcpu)
168762306a36Sopenharmony_ci{
168862306a36Sopenharmony_ci	union vmx_exit_reason exit_reason = to_vmx(vcpu)->exit_reason;
168962306a36Sopenharmony_ci	unsigned long rip, orig_rip;
169062306a36Sopenharmony_ci	u32 instr_len;
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci	/*
169362306a36Sopenharmony_ci	 * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
169462306a36Sopenharmony_ci	 * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
169562306a36Sopenharmony_ci	 * set when EPT misconfig occurs.  In practice, real hardware updates
169662306a36Sopenharmony_ci	 * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
169762306a36Sopenharmony_ci	 * (namely Hyper-V) don't set it due to it being undefined behavior,
169862306a36Sopenharmony_ci	 * i.e. we end up advancing IP with some random value.
169962306a36Sopenharmony_ci	 */
170062306a36Sopenharmony_ci	if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
170162306a36Sopenharmony_ci	    exit_reason.basic != EXIT_REASON_EPT_MISCONFIG) {
170262306a36Sopenharmony_ci		instr_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
170362306a36Sopenharmony_ci
170462306a36Sopenharmony_ci		/*
170562306a36Sopenharmony_ci		 * Emulating an enclave's instructions isn't supported as KVM
170662306a36Sopenharmony_ci		 * cannot access the enclave's memory or its true RIP, e.g. the
170762306a36Sopenharmony_ci		 * vmcs.GUEST_RIP points at the exit point of the enclave, not
170862306a36Sopenharmony_ci		 * the RIP that actually triggered the VM-Exit.  But, because
170962306a36Sopenharmony_ci		 * most instructions that cause VM-Exit will #UD in an enclave,
171062306a36Sopenharmony_ci		 * most instruction-based VM-Exits simply do not occur.
171162306a36Sopenharmony_ci		 *
171262306a36Sopenharmony_ci		 * There are a few exceptions, notably the debug instructions
171362306a36Sopenharmony_ci		 * INT1ICEBRK and INT3, as they are allowed in debug enclaves
171462306a36Sopenharmony_ci		 * and generate #DB/#BP as expected, which KVM might intercept.
171562306a36Sopenharmony_ci		 * But again, the CPU does the dirty work and saves an instr
171662306a36Sopenharmony_ci		 * length of zero so VMMs don't shoot themselves in the foot.
171762306a36Sopenharmony_ci		 * WARN if KVM tries to skip a non-zero length instruction on
171862306a36Sopenharmony_ci		 * a VM-Exit from an enclave.
171962306a36Sopenharmony_ci		 */
172062306a36Sopenharmony_ci		if (!instr_len)
172162306a36Sopenharmony_ci			goto rip_updated;
172262306a36Sopenharmony_ci
172362306a36Sopenharmony_ci		WARN_ONCE(exit_reason.enclave_mode,
172462306a36Sopenharmony_ci			  "skipping instruction after SGX enclave VM-Exit");
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci		orig_rip = kvm_rip_read(vcpu);
172762306a36Sopenharmony_ci		rip = orig_rip + instr_len;
172862306a36Sopenharmony_ci#ifdef CONFIG_X86_64
172962306a36Sopenharmony_ci		/*
173062306a36Sopenharmony_ci		 * We need to mask out the high 32 bits of RIP if not in 64-bit
173162306a36Sopenharmony_ci		 * mode, but just finding out that we are in 64-bit mode is
173262306a36Sopenharmony_ci		 * quite expensive.  Only do it if there was a carry.
173362306a36Sopenharmony_ci		 */
173462306a36Sopenharmony_ci		if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu))
173562306a36Sopenharmony_ci			rip = (u32)rip;
173662306a36Sopenharmony_ci#endif
173762306a36Sopenharmony_ci		kvm_rip_write(vcpu, rip);
173862306a36Sopenharmony_ci	} else {
173962306a36Sopenharmony_ci		if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
174062306a36Sopenharmony_ci			return 0;
174162306a36Sopenharmony_ci	}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_cirip_updated:
174462306a36Sopenharmony_ci	/* skipping an emulated instruction also counts */
174562306a36Sopenharmony_ci	vmx_set_interrupt_shadow(vcpu, 0);
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_ci	return 1;
174862306a36Sopenharmony_ci}
174962306a36Sopenharmony_ci
175062306a36Sopenharmony_ci/*
175162306a36Sopenharmony_ci * Recognizes a pending MTF VM-exit and records the nested state for later
175262306a36Sopenharmony_ci * delivery.
175362306a36Sopenharmony_ci */
175462306a36Sopenharmony_cistatic void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
175562306a36Sopenharmony_ci{
175662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
175762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
175862306a36Sopenharmony_ci
175962306a36Sopenharmony_ci	if (!is_guest_mode(vcpu))
176062306a36Sopenharmony_ci		return;
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci	/*
176362306a36Sopenharmony_ci	 * Per the SDM, MTF takes priority over debug-trap exceptions besides
176462306a36Sopenharmony_ci	 * TSS T-bit traps and ICEBP (INT1).  KVM doesn't emulate T-bit traps
176562306a36Sopenharmony_ci	 * or ICEBP (in the emulator proper), and skipping of ICEBP after an
176662306a36Sopenharmony_ci	 * intercepted #DB deliberately avoids single-step #DB and MTF updates
176762306a36Sopenharmony_ci	 * as ICEBP is higher priority than both.  As instruction emulation is
176862306a36Sopenharmony_ci	 * completed at this point (i.e. KVM is at the instruction boundary),
176962306a36Sopenharmony_ci	 * any #DB exception pending delivery must be a debug-trap of lower
177062306a36Sopenharmony_ci	 * priority than MTF.  Record the pending MTF state to be delivered in
177162306a36Sopenharmony_ci	 * vmx_check_nested_events().
177262306a36Sopenharmony_ci	 */
177362306a36Sopenharmony_ci	if (nested_cpu_has_mtf(vmcs12) &&
177462306a36Sopenharmony_ci	    (!vcpu->arch.exception.pending ||
177562306a36Sopenharmony_ci	     vcpu->arch.exception.vector == DB_VECTOR) &&
177662306a36Sopenharmony_ci	    (!vcpu->arch.exception_vmexit.pending ||
177762306a36Sopenharmony_ci	     vcpu->arch.exception_vmexit.vector == DB_VECTOR)) {
177862306a36Sopenharmony_ci		vmx->nested.mtf_pending = true;
177962306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
178062306a36Sopenharmony_ci	} else {
178162306a36Sopenharmony_ci		vmx->nested.mtf_pending = false;
178262306a36Sopenharmony_ci	}
178362306a36Sopenharmony_ci}
178462306a36Sopenharmony_ci
178562306a36Sopenharmony_cistatic int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
178662306a36Sopenharmony_ci{
178762306a36Sopenharmony_ci	vmx_update_emulated_instruction(vcpu);
178862306a36Sopenharmony_ci	return skip_emulated_instruction(vcpu);
178962306a36Sopenharmony_ci}
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_cistatic void vmx_clear_hlt(struct kvm_vcpu *vcpu)
179262306a36Sopenharmony_ci{
179362306a36Sopenharmony_ci	/*
179462306a36Sopenharmony_ci	 * Ensure that we clear the HLT state in the VMCS.  We don't need to
179562306a36Sopenharmony_ci	 * explicitly skip the instruction because if the HLT state is set,
179662306a36Sopenharmony_ci	 * then the instruction is already executing and RIP has already been
179762306a36Sopenharmony_ci	 * advanced.
179862306a36Sopenharmony_ci	 */
179962306a36Sopenharmony_ci	if (kvm_hlt_in_guest(vcpu->kvm) &&
180062306a36Sopenharmony_ci			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
180162306a36Sopenharmony_ci		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
180262306a36Sopenharmony_ci}
180362306a36Sopenharmony_ci
180462306a36Sopenharmony_cistatic void vmx_inject_exception(struct kvm_vcpu *vcpu)
180562306a36Sopenharmony_ci{
180662306a36Sopenharmony_ci	struct kvm_queued_exception *ex = &vcpu->arch.exception;
180762306a36Sopenharmony_ci	u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
180862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci	kvm_deliver_exception_payload(vcpu, ex);
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_ci	if (ex->has_error_code) {
181362306a36Sopenharmony_ci		/*
181462306a36Sopenharmony_ci		 * Despite the error code being architecturally defined as 32
181562306a36Sopenharmony_ci		 * bits, and the VMCS field being 32 bits, Intel CPUs and thus
181662306a36Sopenharmony_ci		 * VMX don't actually supporting setting bits 31:16.  Hardware
181762306a36Sopenharmony_ci		 * will (should) never provide a bogus error code, but AMD CPUs
181862306a36Sopenharmony_ci		 * do generate error codes with bits 31:16 set, and so KVM's
181962306a36Sopenharmony_ci		 * ABI lets userspace shove in arbitrary 32-bit values.  Drop
182062306a36Sopenharmony_ci		 * the upper bits to avoid VM-Fail, losing information that
182162306a36Sopenharmony_ci		 * does't really exist is preferable to killing the VM.
182262306a36Sopenharmony_ci		 */
182362306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code);
182462306a36Sopenharmony_ci		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
182562306a36Sopenharmony_ci	}
182662306a36Sopenharmony_ci
182762306a36Sopenharmony_ci	if (vmx->rmode.vm86_active) {
182862306a36Sopenharmony_ci		int inc_eip = 0;
182962306a36Sopenharmony_ci		if (kvm_exception_is_soft(ex->vector))
183062306a36Sopenharmony_ci			inc_eip = vcpu->arch.event_exit_inst_len;
183162306a36Sopenharmony_ci		kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip);
183262306a36Sopenharmony_ci		return;
183362306a36Sopenharmony_ci	}
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_ci	WARN_ON_ONCE(vmx->emulation_required);
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci	if (kvm_exception_is_soft(ex->vector)) {
183862306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
183962306a36Sopenharmony_ci			     vmx->vcpu.arch.event_exit_inst_len);
184062306a36Sopenharmony_ci		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
184162306a36Sopenharmony_ci	} else
184262306a36Sopenharmony_ci		intr_info |= INTR_TYPE_HARD_EXCEPTION;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
184562306a36Sopenharmony_ci
184662306a36Sopenharmony_ci	vmx_clear_hlt(vcpu);
184762306a36Sopenharmony_ci}
184862306a36Sopenharmony_ci
184962306a36Sopenharmony_cistatic void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
185062306a36Sopenharmony_ci			       bool load_into_hardware)
185162306a36Sopenharmony_ci{
185262306a36Sopenharmony_ci	struct vmx_uret_msr *uret_msr;
185362306a36Sopenharmony_ci
185462306a36Sopenharmony_ci	uret_msr = vmx_find_uret_msr(vmx, msr);
185562306a36Sopenharmony_ci	if (!uret_msr)
185662306a36Sopenharmony_ci		return;
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	uret_msr->load_into_hardware = load_into_hardware;
185962306a36Sopenharmony_ci}
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci/*
186262306a36Sopenharmony_ci * Configuring user return MSRs to automatically save, load, and restore MSRs
186362306a36Sopenharmony_ci * that need to be shoved into hardware when running the guest.  Note, omitting
186462306a36Sopenharmony_ci * an MSR here does _NOT_ mean it's not emulated, only that it will not be
186562306a36Sopenharmony_ci * loaded into hardware when running the guest.
186662306a36Sopenharmony_ci */
186762306a36Sopenharmony_cistatic void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
186862306a36Sopenharmony_ci{
186962306a36Sopenharmony_ci#ifdef CONFIG_X86_64
187062306a36Sopenharmony_ci	bool load_syscall_msrs;
187162306a36Sopenharmony_ci
187262306a36Sopenharmony_ci	/*
187362306a36Sopenharmony_ci	 * The SYSCALL MSRs are only needed on long mode guests, and only
187462306a36Sopenharmony_ci	 * when EFER.SCE is set.
187562306a36Sopenharmony_ci	 */
187662306a36Sopenharmony_ci	load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
187762306a36Sopenharmony_ci			    (vmx->vcpu.arch.efer & EFER_SCE);
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_ci	vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
188062306a36Sopenharmony_ci	vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
188162306a36Sopenharmony_ci	vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
188262306a36Sopenharmony_ci#endif
188362306a36Sopenharmony_ci	vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
188462306a36Sopenharmony_ci
188562306a36Sopenharmony_ci	vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
188662306a36Sopenharmony_ci			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
188762306a36Sopenharmony_ci			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_ci	/*
189062306a36Sopenharmony_ci	 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
189162306a36Sopenharmony_ci	 * kernel and old userspace.  If those guests run on a tsx=off host, do
189262306a36Sopenharmony_ci	 * allow guests to use TSX_CTRL, but don't change the value in hardware
189362306a36Sopenharmony_ci	 * so that TSX remains always disabled.
189462306a36Sopenharmony_ci	 */
189562306a36Sopenharmony_ci	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci	/*
189862306a36Sopenharmony_ci	 * The set of MSRs to load may have changed, reload MSRs before the
189962306a36Sopenharmony_ci	 * next VM-Enter.
190062306a36Sopenharmony_ci	 */
190162306a36Sopenharmony_ci	vmx->guest_uret_msrs_loaded = false;
190262306a36Sopenharmony_ci}
190362306a36Sopenharmony_ci
190462306a36Sopenharmony_ciu64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
190562306a36Sopenharmony_ci{
190662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
190762306a36Sopenharmony_ci
190862306a36Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING))
190962306a36Sopenharmony_ci		return vmcs12->tsc_offset;
191062306a36Sopenharmony_ci
191162306a36Sopenharmony_ci	return 0;
191262306a36Sopenharmony_ci}
191362306a36Sopenharmony_ci
191462306a36Sopenharmony_ciu64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
191562306a36Sopenharmony_ci{
191662306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
191762306a36Sopenharmony_ci
191862306a36Sopenharmony_ci	if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING) &&
191962306a36Sopenharmony_ci	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
192062306a36Sopenharmony_ci		return vmcs12->tsc_multiplier;
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_ci	return kvm_caps.default_tsc_scaling_ratio;
192362306a36Sopenharmony_ci}
192462306a36Sopenharmony_ci
192562306a36Sopenharmony_cistatic void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
192662306a36Sopenharmony_ci{
192762306a36Sopenharmony_ci	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
192862306a36Sopenharmony_ci}
192962306a36Sopenharmony_ci
193062306a36Sopenharmony_cistatic void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
193162306a36Sopenharmony_ci{
193262306a36Sopenharmony_ci	vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
193362306a36Sopenharmony_ci}
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci/*
193662306a36Sopenharmony_ci * Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of
193762306a36Sopenharmony_ci * guest CPUID.  Note, KVM allows userspace to set "VMX in SMX" to maintain
193862306a36Sopenharmony_ci * backwards compatibility even though KVM doesn't support emulating SMX.  And
193962306a36Sopenharmony_ci * because userspace set "VMX in SMX", the guest must also be allowed to set it,
194062306a36Sopenharmony_ci * e.g. if the MSR is left unlocked and the guest does a RMW operation.
194162306a36Sopenharmony_ci */
194262306a36Sopenharmony_ci#define KVM_SUPPORTED_FEATURE_CONTROL  (FEAT_CTL_LOCKED			 | \
194362306a36Sopenharmony_ci					FEAT_CTL_VMX_ENABLED_INSIDE_SMX	 | \
194462306a36Sopenharmony_ci					FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | \
194562306a36Sopenharmony_ci					FEAT_CTL_SGX_LC_ENABLED		 | \
194662306a36Sopenharmony_ci					FEAT_CTL_SGX_ENABLED		 | \
194762306a36Sopenharmony_ci					FEAT_CTL_LMCE_ENABLED)
194862306a36Sopenharmony_ci
194962306a36Sopenharmony_cistatic inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
195062306a36Sopenharmony_ci						    struct msr_data *msr)
195162306a36Sopenharmony_ci{
195262306a36Sopenharmony_ci	uint64_t valid_bits;
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_ci	/*
195562306a36Sopenharmony_ci	 * Ensure KVM_SUPPORTED_FEATURE_CONTROL is updated when new bits are
195662306a36Sopenharmony_ci	 * exposed to the guest.
195762306a36Sopenharmony_ci	 */
195862306a36Sopenharmony_ci	WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits &
195962306a36Sopenharmony_ci		     ~KVM_SUPPORTED_FEATURE_CONTROL);
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci	if (!msr->host_initiated &&
196262306a36Sopenharmony_ci	    (vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED))
196362306a36Sopenharmony_ci		return false;
196462306a36Sopenharmony_ci
196562306a36Sopenharmony_ci	if (msr->host_initiated)
196662306a36Sopenharmony_ci		valid_bits = KVM_SUPPORTED_FEATURE_CONTROL;
196762306a36Sopenharmony_ci	else
196862306a36Sopenharmony_ci		valid_bits = vmx->msr_ia32_feature_control_valid_bits;
196962306a36Sopenharmony_ci
197062306a36Sopenharmony_ci	return !(msr->data & ~valid_bits);
197162306a36Sopenharmony_ci}
197262306a36Sopenharmony_ci
197362306a36Sopenharmony_cistatic int vmx_get_msr_feature(struct kvm_msr_entry *msr)
197462306a36Sopenharmony_ci{
197562306a36Sopenharmony_ci	switch (msr->index) {
197662306a36Sopenharmony_ci	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
197762306a36Sopenharmony_ci		if (!nested)
197862306a36Sopenharmony_ci			return 1;
197962306a36Sopenharmony_ci		return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
198062306a36Sopenharmony_ci	default:
198162306a36Sopenharmony_ci		return KVM_MSR_RET_INVALID;
198262306a36Sopenharmony_ci	}
198362306a36Sopenharmony_ci}
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci/*
198662306a36Sopenharmony_ci * Reads an msr value (of 'msr_info->index') into 'msr_info->data'.
198762306a36Sopenharmony_ci * Returns 0 on success, non-0 otherwise.
198862306a36Sopenharmony_ci * Assumes vcpu_load() was already called.
198962306a36Sopenharmony_ci */
199062306a36Sopenharmony_cistatic int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
199162306a36Sopenharmony_ci{
199262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
199362306a36Sopenharmony_ci	struct vmx_uret_msr *msr;
199462306a36Sopenharmony_ci	u32 index;
199562306a36Sopenharmony_ci
199662306a36Sopenharmony_ci	switch (msr_info->index) {
199762306a36Sopenharmony_ci#ifdef CONFIG_X86_64
199862306a36Sopenharmony_ci	case MSR_FS_BASE:
199962306a36Sopenharmony_ci		msr_info->data = vmcs_readl(GUEST_FS_BASE);
200062306a36Sopenharmony_ci		break;
200162306a36Sopenharmony_ci	case MSR_GS_BASE:
200262306a36Sopenharmony_ci		msr_info->data = vmcs_readl(GUEST_GS_BASE);
200362306a36Sopenharmony_ci		break;
200462306a36Sopenharmony_ci	case MSR_KERNEL_GS_BASE:
200562306a36Sopenharmony_ci		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
200662306a36Sopenharmony_ci		break;
200762306a36Sopenharmony_ci#endif
200862306a36Sopenharmony_ci	case MSR_EFER:
200962306a36Sopenharmony_ci		return kvm_get_msr_common(vcpu, msr_info);
201062306a36Sopenharmony_ci	case MSR_IA32_TSX_CTRL:
201162306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
201262306a36Sopenharmony_ci		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
201362306a36Sopenharmony_ci			return 1;
201462306a36Sopenharmony_ci		goto find_uret_msr;
201562306a36Sopenharmony_ci	case MSR_IA32_UMWAIT_CONTROL:
201662306a36Sopenharmony_ci		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
201762306a36Sopenharmony_ci			return 1;
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_ci		msr_info->data = vmx->msr_ia32_umwait_control;
202062306a36Sopenharmony_ci		break;
202162306a36Sopenharmony_ci	case MSR_IA32_SPEC_CTRL:
202262306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
202362306a36Sopenharmony_ci		    !guest_has_spec_ctrl_msr(vcpu))
202462306a36Sopenharmony_ci			return 1;
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci		msr_info->data = to_vmx(vcpu)->spec_ctrl;
202762306a36Sopenharmony_ci		break;
202862306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_CS:
202962306a36Sopenharmony_ci		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
203062306a36Sopenharmony_ci		break;
203162306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_EIP:
203262306a36Sopenharmony_ci		msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
203362306a36Sopenharmony_ci		break;
203462306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_ESP:
203562306a36Sopenharmony_ci		msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
203662306a36Sopenharmony_ci		break;
203762306a36Sopenharmony_ci	case MSR_IA32_BNDCFGS:
203862306a36Sopenharmony_ci		if (!kvm_mpx_supported() ||
203962306a36Sopenharmony_ci		    (!msr_info->host_initiated &&
204062306a36Sopenharmony_ci		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
204162306a36Sopenharmony_ci			return 1;
204262306a36Sopenharmony_ci		msr_info->data = vmcs_read64(GUEST_BNDCFGS);
204362306a36Sopenharmony_ci		break;
204462306a36Sopenharmony_ci	case MSR_IA32_MCG_EXT_CTL:
204562306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
204662306a36Sopenharmony_ci		    !(vmx->msr_ia32_feature_control &
204762306a36Sopenharmony_ci		      FEAT_CTL_LMCE_ENABLED))
204862306a36Sopenharmony_ci			return 1;
204962306a36Sopenharmony_ci		msr_info->data = vcpu->arch.mcg_ext_ctl;
205062306a36Sopenharmony_ci		break;
205162306a36Sopenharmony_ci	case MSR_IA32_FEAT_CTL:
205262306a36Sopenharmony_ci		msr_info->data = vmx->msr_ia32_feature_control;
205362306a36Sopenharmony_ci		break;
205462306a36Sopenharmony_ci	case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
205562306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
205662306a36Sopenharmony_ci		    !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
205762306a36Sopenharmony_ci			return 1;
205862306a36Sopenharmony_ci		msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
205962306a36Sopenharmony_ci			[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
206062306a36Sopenharmony_ci		break;
206162306a36Sopenharmony_ci	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
206262306a36Sopenharmony_ci		if (!guest_can_use(vcpu, X86_FEATURE_VMX))
206362306a36Sopenharmony_ci			return 1;
206462306a36Sopenharmony_ci		if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
206562306a36Sopenharmony_ci				    &msr_info->data))
206662306a36Sopenharmony_ci			return 1;
206762306a36Sopenharmony_ci		/*
206862306a36Sopenharmony_ci		 * Enlightened VMCS v1 doesn't have certain VMCS fields but
206962306a36Sopenharmony_ci		 * instead of just ignoring the features, different Hyper-V
207062306a36Sopenharmony_ci		 * versions are either trying to use them and fail or do some
207162306a36Sopenharmony_ci		 * sanity checking and refuse to boot. Filter all unsupported
207262306a36Sopenharmony_ci		 * features out.
207362306a36Sopenharmony_ci		 */
207462306a36Sopenharmony_ci		if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu))
207562306a36Sopenharmony_ci			nested_evmcs_filter_control_msr(vcpu, msr_info->index,
207662306a36Sopenharmony_ci							&msr_info->data);
207762306a36Sopenharmony_ci		break;
207862306a36Sopenharmony_ci	case MSR_IA32_RTIT_CTL:
207962306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest())
208062306a36Sopenharmony_ci			return 1;
208162306a36Sopenharmony_ci		msr_info->data = vmx->pt_desc.guest.ctl;
208262306a36Sopenharmony_ci		break;
208362306a36Sopenharmony_ci	case MSR_IA32_RTIT_STATUS:
208462306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest())
208562306a36Sopenharmony_ci			return 1;
208662306a36Sopenharmony_ci		msr_info->data = vmx->pt_desc.guest.status;
208762306a36Sopenharmony_ci		break;
208862306a36Sopenharmony_ci	case MSR_IA32_RTIT_CR3_MATCH:
208962306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest() ||
209062306a36Sopenharmony_ci			!intel_pt_validate_cap(vmx->pt_desc.caps,
209162306a36Sopenharmony_ci						PT_CAP_cr3_filtering))
209262306a36Sopenharmony_ci			return 1;
209362306a36Sopenharmony_ci		msr_info->data = vmx->pt_desc.guest.cr3_match;
209462306a36Sopenharmony_ci		break;
209562306a36Sopenharmony_ci	case MSR_IA32_RTIT_OUTPUT_BASE:
209662306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest() ||
209762306a36Sopenharmony_ci			(!intel_pt_validate_cap(vmx->pt_desc.caps,
209862306a36Sopenharmony_ci					PT_CAP_topa_output) &&
209962306a36Sopenharmony_ci			 !intel_pt_validate_cap(vmx->pt_desc.caps,
210062306a36Sopenharmony_ci					PT_CAP_single_range_output)))
210162306a36Sopenharmony_ci			return 1;
210262306a36Sopenharmony_ci		msr_info->data = vmx->pt_desc.guest.output_base;
210362306a36Sopenharmony_ci		break;
210462306a36Sopenharmony_ci	case MSR_IA32_RTIT_OUTPUT_MASK:
210562306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest() ||
210662306a36Sopenharmony_ci			(!intel_pt_validate_cap(vmx->pt_desc.caps,
210762306a36Sopenharmony_ci					PT_CAP_topa_output) &&
210862306a36Sopenharmony_ci			 !intel_pt_validate_cap(vmx->pt_desc.caps,
210962306a36Sopenharmony_ci					PT_CAP_single_range_output)))
211062306a36Sopenharmony_ci			return 1;
211162306a36Sopenharmony_ci		msr_info->data = vmx->pt_desc.guest.output_mask;
211262306a36Sopenharmony_ci		break;
211362306a36Sopenharmony_ci	case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
211462306a36Sopenharmony_ci		index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
211562306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest() ||
211662306a36Sopenharmony_ci		    (index >= 2 * vmx->pt_desc.num_address_ranges))
211762306a36Sopenharmony_ci			return 1;
211862306a36Sopenharmony_ci		if (index % 2)
211962306a36Sopenharmony_ci			msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
212062306a36Sopenharmony_ci		else
212162306a36Sopenharmony_ci			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
212262306a36Sopenharmony_ci		break;
212362306a36Sopenharmony_ci	case MSR_IA32_DEBUGCTLMSR:
212462306a36Sopenharmony_ci		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
212562306a36Sopenharmony_ci		break;
212662306a36Sopenharmony_ci	default:
212762306a36Sopenharmony_ci	find_uret_msr:
212862306a36Sopenharmony_ci		msr = vmx_find_uret_msr(vmx, msr_info->index);
212962306a36Sopenharmony_ci		if (msr) {
213062306a36Sopenharmony_ci			msr_info->data = msr->data;
213162306a36Sopenharmony_ci			break;
213262306a36Sopenharmony_ci		}
213362306a36Sopenharmony_ci		return kvm_get_msr_common(vcpu, msr_info);
213462306a36Sopenharmony_ci	}
213562306a36Sopenharmony_ci
213662306a36Sopenharmony_ci	return 0;
213762306a36Sopenharmony_ci}
213862306a36Sopenharmony_ci
213962306a36Sopenharmony_cistatic u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
214062306a36Sopenharmony_ci						    u64 data)
214162306a36Sopenharmony_ci{
214262306a36Sopenharmony_ci#ifdef CONFIG_X86_64
214362306a36Sopenharmony_ci	if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
214462306a36Sopenharmony_ci		return (u32)data;
214562306a36Sopenharmony_ci#endif
214662306a36Sopenharmony_ci	return (unsigned long)data;
214762306a36Sopenharmony_ci}
214862306a36Sopenharmony_ci
214962306a36Sopenharmony_cistatic u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
215062306a36Sopenharmony_ci{
215162306a36Sopenharmony_ci	u64 debugctl = 0;
215262306a36Sopenharmony_ci
215362306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
215462306a36Sopenharmony_ci	    (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
215562306a36Sopenharmony_ci		debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
215662306a36Sopenharmony_ci
215762306a36Sopenharmony_ci	if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) &&
215862306a36Sopenharmony_ci	    (host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
215962306a36Sopenharmony_ci		debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci	return debugctl;
216262306a36Sopenharmony_ci}
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_ci/*
216562306a36Sopenharmony_ci * Writes msr value into the appropriate "register".
216662306a36Sopenharmony_ci * Returns 0 on success, non-0 otherwise.
216762306a36Sopenharmony_ci * Assumes vcpu_load() was already called.
216862306a36Sopenharmony_ci */
216962306a36Sopenharmony_cistatic int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
217062306a36Sopenharmony_ci{
217162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
217262306a36Sopenharmony_ci	struct vmx_uret_msr *msr;
217362306a36Sopenharmony_ci	int ret = 0;
217462306a36Sopenharmony_ci	u32 msr_index = msr_info->index;
217562306a36Sopenharmony_ci	u64 data = msr_info->data;
217662306a36Sopenharmony_ci	u32 index;
217762306a36Sopenharmony_ci
217862306a36Sopenharmony_ci	switch (msr_index) {
217962306a36Sopenharmony_ci	case MSR_EFER:
218062306a36Sopenharmony_ci		ret = kvm_set_msr_common(vcpu, msr_info);
218162306a36Sopenharmony_ci		break;
218262306a36Sopenharmony_ci#ifdef CONFIG_X86_64
218362306a36Sopenharmony_ci	case MSR_FS_BASE:
218462306a36Sopenharmony_ci		vmx_segment_cache_clear(vmx);
218562306a36Sopenharmony_ci		vmcs_writel(GUEST_FS_BASE, data);
218662306a36Sopenharmony_ci		break;
218762306a36Sopenharmony_ci	case MSR_GS_BASE:
218862306a36Sopenharmony_ci		vmx_segment_cache_clear(vmx);
218962306a36Sopenharmony_ci		vmcs_writel(GUEST_GS_BASE, data);
219062306a36Sopenharmony_ci		break;
219162306a36Sopenharmony_ci	case MSR_KERNEL_GS_BASE:
219262306a36Sopenharmony_ci		vmx_write_guest_kernel_gs_base(vmx, data);
219362306a36Sopenharmony_ci		break;
219462306a36Sopenharmony_ci	case MSR_IA32_XFD:
219562306a36Sopenharmony_ci		ret = kvm_set_msr_common(vcpu, msr_info);
219662306a36Sopenharmony_ci		/*
219762306a36Sopenharmony_ci		 * Always intercepting WRMSR could incur non-negligible
219862306a36Sopenharmony_ci		 * overhead given xfd might be changed frequently in
219962306a36Sopenharmony_ci		 * guest context switch. Disable write interception
220062306a36Sopenharmony_ci		 * upon the first write with a non-zero value (indicating
220162306a36Sopenharmony_ci		 * potential usage on dynamic xfeatures). Also update
220262306a36Sopenharmony_ci		 * exception bitmap to trap #NM for proper virtualization
220362306a36Sopenharmony_ci		 * of guest xfd_err.
220462306a36Sopenharmony_ci		 */
220562306a36Sopenharmony_ci		if (!ret && data) {
220662306a36Sopenharmony_ci			vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD,
220762306a36Sopenharmony_ci						      MSR_TYPE_RW);
220862306a36Sopenharmony_ci			vcpu->arch.xfd_no_write_intercept = true;
220962306a36Sopenharmony_ci			vmx_update_exception_bitmap(vcpu);
221062306a36Sopenharmony_ci		}
221162306a36Sopenharmony_ci		break;
221262306a36Sopenharmony_ci#endif
221362306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_CS:
221462306a36Sopenharmony_ci		if (is_guest_mode(vcpu))
221562306a36Sopenharmony_ci			get_vmcs12(vcpu)->guest_sysenter_cs = data;
221662306a36Sopenharmony_ci		vmcs_write32(GUEST_SYSENTER_CS, data);
221762306a36Sopenharmony_ci		break;
221862306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_EIP:
221962306a36Sopenharmony_ci		if (is_guest_mode(vcpu)) {
222062306a36Sopenharmony_ci			data = nested_vmx_truncate_sysenter_addr(vcpu, data);
222162306a36Sopenharmony_ci			get_vmcs12(vcpu)->guest_sysenter_eip = data;
222262306a36Sopenharmony_ci		}
222362306a36Sopenharmony_ci		vmcs_writel(GUEST_SYSENTER_EIP, data);
222462306a36Sopenharmony_ci		break;
222562306a36Sopenharmony_ci	case MSR_IA32_SYSENTER_ESP:
222662306a36Sopenharmony_ci		if (is_guest_mode(vcpu)) {
222762306a36Sopenharmony_ci			data = nested_vmx_truncate_sysenter_addr(vcpu, data);
222862306a36Sopenharmony_ci			get_vmcs12(vcpu)->guest_sysenter_esp = data;
222962306a36Sopenharmony_ci		}
223062306a36Sopenharmony_ci		vmcs_writel(GUEST_SYSENTER_ESP, data);
223162306a36Sopenharmony_ci		break;
223262306a36Sopenharmony_ci	case MSR_IA32_DEBUGCTLMSR: {
223362306a36Sopenharmony_ci		u64 invalid;
223462306a36Sopenharmony_ci
223562306a36Sopenharmony_ci		invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
223662306a36Sopenharmony_ci		if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
223762306a36Sopenharmony_ci			kvm_pr_unimpl_wrmsr(vcpu, msr_index, data);
223862306a36Sopenharmony_ci			data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
223962306a36Sopenharmony_ci			invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
224062306a36Sopenharmony_ci		}
224162306a36Sopenharmony_ci
224262306a36Sopenharmony_ci		if (invalid)
224362306a36Sopenharmony_ci			return 1;
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ci		if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
224662306a36Sopenharmony_ci						VM_EXIT_SAVE_DEBUG_CONTROLS)
224762306a36Sopenharmony_ci			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
225062306a36Sopenharmony_ci		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
225162306a36Sopenharmony_ci		    (data & DEBUGCTLMSR_LBR))
225262306a36Sopenharmony_ci			intel_pmu_create_guest_lbr_event(vcpu);
225362306a36Sopenharmony_ci		return 0;
225462306a36Sopenharmony_ci	}
225562306a36Sopenharmony_ci	case MSR_IA32_BNDCFGS:
225662306a36Sopenharmony_ci		if (!kvm_mpx_supported() ||
225762306a36Sopenharmony_ci		    (!msr_info->host_initiated &&
225862306a36Sopenharmony_ci		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
225962306a36Sopenharmony_ci			return 1;
226062306a36Sopenharmony_ci		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
226162306a36Sopenharmony_ci		    (data & MSR_IA32_BNDCFGS_RSVD))
226262306a36Sopenharmony_ci			return 1;
226362306a36Sopenharmony_ci
226462306a36Sopenharmony_ci		if (is_guest_mode(vcpu) &&
226562306a36Sopenharmony_ci		    ((vmx->nested.msrs.entry_ctls_high & VM_ENTRY_LOAD_BNDCFGS) ||
226662306a36Sopenharmony_ci		     (vmx->nested.msrs.exit_ctls_high & VM_EXIT_CLEAR_BNDCFGS)))
226762306a36Sopenharmony_ci			get_vmcs12(vcpu)->guest_bndcfgs = data;
226862306a36Sopenharmony_ci
226962306a36Sopenharmony_ci		vmcs_write64(GUEST_BNDCFGS, data);
227062306a36Sopenharmony_ci		break;
227162306a36Sopenharmony_ci	case MSR_IA32_UMWAIT_CONTROL:
227262306a36Sopenharmony_ci		if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
227362306a36Sopenharmony_ci			return 1;
227462306a36Sopenharmony_ci
227562306a36Sopenharmony_ci		/* The reserved bit 1 and non-32 bit [63:32] should be zero */
227662306a36Sopenharmony_ci		if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
227762306a36Sopenharmony_ci			return 1;
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci		vmx->msr_ia32_umwait_control = data;
228062306a36Sopenharmony_ci		break;
228162306a36Sopenharmony_ci	case MSR_IA32_SPEC_CTRL:
228262306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
228362306a36Sopenharmony_ci		    !guest_has_spec_ctrl_msr(vcpu))
228462306a36Sopenharmony_ci			return 1;
228562306a36Sopenharmony_ci
228662306a36Sopenharmony_ci		if (kvm_spec_ctrl_test_value(data))
228762306a36Sopenharmony_ci			return 1;
228862306a36Sopenharmony_ci
228962306a36Sopenharmony_ci		vmx->spec_ctrl = data;
229062306a36Sopenharmony_ci		if (!data)
229162306a36Sopenharmony_ci			break;
229262306a36Sopenharmony_ci
229362306a36Sopenharmony_ci		/*
229462306a36Sopenharmony_ci		 * For non-nested:
229562306a36Sopenharmony_ci		 * When it's written (to non-zero) for the first time, pass
229662306a36Sopenharmony_ci		 * it through.
229762306a36Sopenharmony_ci		 *
229862306a36Sopenharmony_ci		 * For nested:
229962306a36Sopenharmony_ci		 * The handling of the MSR bitmap for L2 guests is done in
230062306a36Sopenharmony_ci		 * nested_vmx_prepare_msr_bitmap. We should not touch the
230162306a36Sopenharmony_ci		 * vmcs02.msr_bitmap here since it gets completely overwritten
230262306a36Sopenharmony_ci		 * in the merging. We update the vmcs01 here for L1 as well
230362306a36Sopenharmony_ci		 * since it will end up touching the MSR anyway now.
230462306a36Sopenharmony_ci		 */
230562306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu,
230662306a36Sopenharmony_ci					      MSR_IA32_SPEC_CTRL,
230762306a36Sopenharmony_ci					      MSR_TYPE_RW);
230862306a36Sopenharmony_ci		break;
230962306a36Sopenharmony_ci	case MSR_IA32_TSX_CTRL:
231062306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
231162306a36Sopenharmony_ci		    !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
231262306a36Sopenharmony_ci			return 1;
231362306a36Sopenharmony_ci		if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
231462306a36Sopenharmony_ci			return 1;
231562306a36Sopenharmony_ci		goto find_uret_msr;
231662306a36Sopenharmony_ci	case MSR_IA32_CR_PAT:
231762306a36Sopenharmony_ci		ret = kvm_set_msr_common(vcpu, msr_info);
231862306a36Sopenharmony_ci		if (ret)
231962306a36Sopenharmony_ci			break;
232062306a36Sopenharmony_ci
232162306a36Sopenharmony_ci		if (is_guest_mode(vcpu) &&
232262306a36Sopenharmony_ci		    get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
232362306a36Sopenharmony_ci			get_vmcs12(vcpu)->guest_ia32_pat = data;
232462306a36Sopenharmony_ci
232562306a36Sopenharmony_ci		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
232662306a36Sopenharmony_ci			vmcs_write64(GUEST_IA32_PAT, data);
232762306a36Sopenharmony_ci		break;
232862306a36Sopenharmony_ci	case MSR_IA32_MCG_EXT_CTL:
232962306a36Sopenharmony_ci		if ((!msr_info->host_initiated &&
233062306a36Sopenharmony_ci		     !(to_vmx(vcpu)->msr_ia32_feature_control &
233162306a36Sopenharmony_ci		       FEAT_CTL_LMCE_ENABLED)) ||
233262306a36Sopenharmony_ci		    (data & ~MCG_EXT_CTL_LMCE_EN))
233362306a36Sopenharmony_ci			return 1;
233462306a36Sopenharmony_ci		vcpu->arch.mcg_ext_ctl = data;
233562306a36Sopenharmony_ci		break;
233662306a36Sopenharmony_ci	case MSR_IA32_FEAT_CTL:
233762306a36Sopenharmony_ci		if (!is_vmx_feature_control_msr_valid(vmx, msr_info))
233862306a36Sopenharmony_ci			return 1;
233962306a36Sopenharmony_ci
234062306a36Sopenharmony_ci		vmx->msr_ia32_feature_control = data;
234162306a36Sopenharmony_ci		if (msr_info->host_initiated && data == 0)
234262306a36Sopenharmony_ci			vmx_leave_nested(vcpu);
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci		/* SGX may be enabled/disabled by guest's firmware */
234562306a36Sopenharmony_ci		vmx_write_encls_bitmap(vcpu, NULL);
234662306a36Sopenharmony_ci		break;
234762306a36Sopenharmony_ci	case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
234862306a36Sopenharmony_ci		/*
234962306a36Sopenharmony_ci		 * On real hardware, the LE hash MSRs are writable before
235062306a36Sopenharmony_ci		 * the firmware sets bit 0 in MSR 0x7a ("activating" SGX),
235162306a36Sopenharmony_ci		 * at which point SGX related bits in IA32_FEATURE_CONTROL
235262306a36Sopenharmony_ci		 * become writable.
235362306a36Sopenharmony_ci		 *
235462306a36Sopenharmony_ci		 * KVM does not emulate SGX activation for simplicity, so
235562306a36Sopenharmony_ci		 * allow writes to the LE hash MSRs if IA32_FEATURE_CONTROL
235662306a36Sopenharmony_ci		 * is unlocked.  This is technically not architectural
235762306a36Sopenharmony_ci		 * behavior, but it's close enough.
235862306a36Sopenharmony_ci		 */
235962306a36Sopenharmony_ci		if (!msr_info->host_initiated &&
236062306a36Sopenharmony_ci		    (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) ||
236162306a36Sopenharmony_ci		    ((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) &&
236262306a36Sopenharmony_ci		    !(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED))))
236362306a36Sopenharmony_ci			return 1;
236462306a36Sopenharmony_ci		vmx->msr_ia32_sgxlepubkeyhash
236562306a36Sopenharmony_ci			[msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
236662306a36Sopenharmony_ci		break;
236762306a36Sopenharmony_ci	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
236862306a36Sopenharmony_ci		if (!msr_info->host_initiated)
236962306a36Sopenharmony_ci			return 1; /* they are read-only */
237062306a36Sopenharmony_ci		if (!guest_can_use(vcpu, X86_FEATURE_VMX))
237162306a36Sopenharmony_ci			return 1;
237262306a36Sopenharmony_ci		return vmx_set_vmx_msr(vcpu, msr_index, data);
237362306a36Sopenharmony_ci	case MSR_IA32_RTIT_CTL:
237462306a36Sopenharmony_ci		if (!vmx_pt_mode_is_host_guest() ||
237562306a36Sopenharmony_ci			vmx_rtit_ctl_check(vcpu, data) ||
237662306a36Sopenharmony_ci			vmx->nested.vmxon)
237762306a36Sopenharmony_ci			return 1;
237862306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_RTIT_CTL, data);
237962306a36Sopenharmony_ci		vmx->pt_desc.guest.ctl = data;
238062306a36Sopenharmony_ci		pt_update_intercept_for_msr(vcpu);
238162306a36Sopenharmony_ci		break;
238262306a36Sopenharmony_ci	case MSR_IA32_RTIT_STATUS:
238362306a36Sopenharmony_ci		if (!pt_can_write_msr(vmx))
238462306a36Sopenharmony_ci			return 1;
238562306a36Sopenharmony_ci		if (data & MSR_IA32_RTIT_STATUS_MASK)
238662306a36Sopenharmony_ci			return 1;
238762306a36Sopenharmony_ci		vmx->pt_desc.guest.status = data;
238862306a36Sopenharmony_ci		break;
238962306a36Sopenharmony_ci	case MSR_IA32_RTIT_CR3_MATCH:
239062306a36Sopenharmony_ci		if (!pt_can_write_msr(vmx))
239162306a36Sopenharmony_ci			return 1;
239262306a36Sopenharmony_ci		if (!intel_pt_validate_cap(vmx->pt_desc.caps,
239362306a36Sopenharmony_ci					   PT_CAP_cr3_filtering))
239462306a36Sopenharmony_ci			return 1;
239562306a36Sopenharmony_ci		vmx->pt_desc.guest.cr3_match = data;
239662306a36Sopenharmony_ci		break;
239762306a36Sopenharmony_ci	case MSR_IA32_RTIT_OUTPUT_BASE:
239862306a36Sopenharmony_ci		if (!pt_can_write_msr(vmx))
239962306a36Sopenharmony_ci			return 1;
240062306a36Sopenharmony_ci		if (!intel_pt_validate_cap(vmx->pt_desc.caps,
240162306a36Sopenharmony_ci					   PT_CAP_topa_output) &&
240262306a36Sopenharmony_ci		    !intel_pt_validate_cap(vmx->pt_desc.caps,
240362306a36Sopenharmony_ci					   PT_CAP_single_range_output))
240462306a36Sopenharmony_ci			return 1;
240562306a36Sopenharmony_ci		if (!pt_output_base_valid(vcpu, data))
240662306a36Sopenharmony_ci			return 1;
240762306a36Sopenharmony_ci		vmx->pt_desc.guest.output_base = data;
240862306a36Sopenharmony_ci		break;
240962306a36Sopenharmony_ci	case MSR_IA32_RTIT_OUTPUT_MASK:
241062306a36Sopenharmony_ci		if (!pt_can_write_msr(vmx))
241162306a36Sopenharmony_ci			return 1;
241262306a36Sopenharmony_ci		if (!intel_pt_validate_cap(vmx->pt_desc.caps,
241362306a36Sopenharmony_ci					   PT_CAP_topa_output) &&
241462306a36Sopenharmony_ci		    !intel_pt_validate_cap(vmx->pt_desc.caps,
241562306a36Sopenharmony_ci					   PT_CAP_single_range_output))
241662306a36Sopenharmony_ci			return 1;
241762306a36Sopenharmony_ci		vmx->pt_desc.guest.output_mask = data;
241862306a36Sopenharmony_ci		break;
241962306a36Sopenharmony_ci	case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
242062306a36Sopenharmony_ci		if (!pt_can_write_msr(vmx))
242162306a36Sopenharmony_ci			return 1;
242262306a36Sopenharmony_ci		index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
242362306a36Sopenharmony_ci		if (index >= 2 * vmx->pt_desc.num_address_ranges)
242462306a36Sopenharmony_ci			return 1;
242562306a36Sopenharmony_ci		if (is_noncanonical_address(data, vcpu))
242662306a36Sopenharmony_ci			return 1;
242762306a36Sopenharmony_ci		if (index % 2)
242862306a36Sopenharmony_ci			vmx->pt_desc.guest.addr_b[index / 2] = data;
242962306a36Sopenharmony_ci		else
243062306a36Sopenharmony_ci			vmx->pt_desc.guest.addr_a[index / 2] = data;
243162306a36Sopenharmony_ci		break;
243262306a36Sopenharmony_ci	case MSR_IA32_PERF_CAPABILITIES:
243362306a36Sopenharmony_ci		if (data && !vcpu_to_pmu(vcpu)->version)
243462306a36Sopenharmony_ci			return 1;
243562306a36Sopenharmony_ci		if (data & PMU_CAP_LBR_FMT) {
243662306a36Sopenharmony_ci			if ((data & PMU_CAP_LBR_FMT) !=
243762306a36Sopenharmony_ci			    (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
243862306a36Sopenharmony_ci				return 1;
243962306a36Sopenharmony_ci			if (!cpuid_model_is_consistent(vcpu))
244062306a36Sopenharmony_ci				return 1;
244162306a36Sopenharmony_ci		}
244262306a36Sopenharmony_ci		if (data & PERF_CAP_PEBS_FORMAT) {
244362306a36Sopenharmony_ci			if ((data & PERF_CAP_PEBS_MASK) !=
244462306a36Sopenharmony_ci			    (kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK))
244562306a36Sopenharmony_ci				return 1;
244662306a36Sopenharmony_ci			if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
244762306a36Sopenharmony_ci				return 1;
244862306a36Sopenharmony_ci			if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64))
244962306a36Sopenharmony_ci				return 1;
245062306a36Sopenharmony_ci			if (!cpuid_model_is_consistent(vcpu))
245162306a36Sopenharmony_ci				return 1;
245262306a36Sopenharmony_ci		}
245362306a36Sopenharmony_ci		ret = kvm_set_msr_common(vcpu, msr_info);
245462306a36Sopenharmony_ci		break;
245562306a36Sopenharmony_ci
245662306a36Sopenharmony_ci	default:
245762306a36Sopenharmony_ci	find_uret_msr:
245862306a36Sopenharmony_ci		msr = vmx_find_uret_msr(vmx, msr_index);
245962306a36Sopenharmony_ci		if (msr)
246062306a36Sopenharmony_ci			ret = vmx_set_guest_uret_msr(vmx, msr, data);
246162306a36Sopenharmony_ci		else
246262306a36Sopenharmony_ci			ret = kvm_set_msr_common(vcpu, msr_info);
246362306a36Sopenharmony_ci	}
246462306a36Sopenharmony_ci
246562306a36Sopenharmony_ci	/* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */
246662306a36Sopenharmony_ci	if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
246762306a36Sopenharmony_ci		vmx_update_fb_clear_dis(vcpu, vmx);
246862306a36Sopenharmony_ci
246962306a36Sopenharmony_ci	return ret;
247062306a36Sopenharmony_ci}
247162306a36Sopenharmony_ci
247262306a36Sopenharmony_cistatic void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
247362306a36Sopenharmony_ci{
247462306a36Sopenharmony_ci	unsigned long guest_owned_bits;
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_ci	kvm_register_mark_available(vcpu, reg);
247762306a36Sopenharmony_ci
247862306a36Sopenharmony_ci	switch (reg) {
247962306a36Sopenharmony_ci	case VCPU_REGS_RSP:
248062306a36Sopenharmony_ci		vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
248162306a36Sopenharmony_ci		break;
248262306a36Sopenharmony_ci	case VCPU_REGS_RIP:
248362306a36Sopenharmony_ci		vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
248462306a36Sopenharmony_ci		break;
248562306a36Sopenharmony_ci	case VCPU_EXREG_PDPTR:
248662306a36Sopenharmony_ci		if (enable_ept)
248762306a36Sopenharmony_ci			ept_save_pdptrs(vcpu);
248862306a36Sopenharmony_ci		break;
248962306a36Sopenharmony_ci	case VCPU_EXREG_CR0:
249062306a36Sopenharmony_ci		guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
249162306a36Sopenharmony_ci
249262306a36Sopenharmony_ci		vcpu->arch.cr0 &= ~guest_owned_bits;
249362306a36Sopenharmony_ci		vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
249462306a36Sopenharmony_ci		break;
249562306a36Sopenharmony_ci	case VCPU_EXREG_CR3:
249662306a36Sopenharmony_ci		/*
249762306a36Sopenharmony_ci		 * When intercepting CR3 loads, e.g. for shadowing paging, KVM's
249862306a36Sopenharmony_ci		 * CR3 is loaded into hardware, not the guest's CR3.
249962306a36Sopenharmony_ci		 */
250062306a36Sopenharmony_ci		if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING))
250162306a36Sopenharmony_ci			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
250262306a36Sopenharmony_ci		break;
250362306a36Sopenharmony_ci	case VCPU_EXREG_CR4:
250462306a36Sopenharmony_ci		guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
250562306a36Sopenharmony_ci
250662306a36Sopenharmony_ci		vcpu->arch.cr4 &= ~guest_owned_bits;
250762306a36Sopenharmony_ci		vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits;
250862306a36Sopenharmony_ci		break;
250962306a36Sopenharmony_ci	default:
251062306a36Sopenharmony_ci		KVM_BUG_ON(1, vcpu->kvm);
251162306a36Sopenharmony_ci		break;
251262306a36Sopenharmony_ci	}
251362306a36Sopenharmony_ci}
251462306a36Sopenharmony_ci
251562306a36Sopenharmony_ci/*
251662306a36Sopenharmony_ci * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
251762306a36Sopenharmony_ci * directly instead of going through cpu_has(), to ensure KVM is trapping
251862306a36Sopenharmony_ci * ENCLS whenever it's supported in hardware.  It does not matter whether
251962306a36Sopenharmony_ci * the host OS supports or has enabled SGX.
252062306a36Sopenharmony_ci */
252162306a36Sopenharmony_cistatic bool cpu_has_sgx(void)
252262306a36Sopenharmony_ci{
252362306a36Sopenharmony_ci	return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
252462306a36Sopenharmony_ci}
252562306a36Sopenharmony_ci
252662306a36Sopenharmony_ci/*
252762306a36Sopenharmony_ci * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
252862306a36Sopenharmony_ci * can't be used due to errata where VM Exit may incorrectly clear
252962306a36Sopenharmony_ci * IA32_PERF_GLOBAL_CTRL[34:32]. Work around the errata by using the
253062306a36Sopenharmony_ci * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
253162306a36Sopenharmony_ci */
253262306a36Sopenharmony_cistatic bool cpu_has_perf_global_ctrl_bug(void)
253362306a36Sopenharmony_ci{
253462306a36Sopenharmony_ci	if (boot_cpu_data.x86 == 0x6) {
253562306a36Sopenharmony_ci		switch (boot_cpu_data.x86_model) {
253662306a36Sopenharmony_ci		case INTEL_FAM6_NEHALEM_EP:	/* AAK155 */
253762306a36Sopenharmony_ci		case INTEL_FAM6_NEHALEM:	/* AAP115 */
253862306a36Sopenharmony_ci		case INTEL_FAM6_WESTMERE:	/* AAT100 */
253962306a36Sopenharmony_ci		case INTEL_FAM6_WESTMERE_EP:	/* BC86,AAY89,BD102 */
254062306a36Sopenharmony_ci		case INTEL_FAM6_NEHALEM_EX:	/* BA97 */
254162306a36Sopenharmony_ci			return true;
254262306a36Sopenharmony_ci		default:
254362306a36Sopenharmony_ci			break;
254462306a36Sopenharmony_ci		}
254562306a36Sopenharmony_ci	}
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci	return false;
254862306a36Sopenharmony_ci}
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_cistatic int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result)
255162306a36Sopenharmony_ci{
255262306a36Sopenharmony_ci	u32 vmx_msr_low, vmx_msr_high;
255362306a36Sopenharmony_ci	u32 ctl = ctl_min | ctl_opt;
255462306a36Sopenharmony_ci
255562306a36Sopenharmony_ci	rdmsr(msr, vmx_msr_low, vmx_msr_high);
255662306a36Sopenharmony_ci
255762306a36Sopenharmony_ci	ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
255862306a36Sopenharmony_ci	ctl |= vmx_msr_low;  /* bit == 1 in low word  ==> must be one  */
255962306a36Sopenharmony_ci
256062306a36Sopenharmony_ci	/* Ensure minimum (required) set of control bits are supported. */
256162306a36Sopenharmony_ci	if (ctl_min & ~ctl)
256262306a36Sopenharmony_ci		return -EIO;
256362306a36Sopenharmony_ci
256462306a36Sopenharmony_ci	*result = ctl;
256562306a36Sopenharmony_ci	return 0;
256662306a36Sopenharmony_ci}
256762306a36Sopenharmony_ci
256862306a36Sopenharmony_cistatic u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
256962306a36Sopenharmony_ci{
257062306a36Sopenharmony_ci	u64 allowed;
257162306a36Sopenharmony_ci
257262306a36Sopenharmony_ci	rdmsrl(msr, allowed);
257362306a36Sopenharmony_ci
257462306a36Sopenharmony_ci	return  ctl_opt & allowed;
257562306a36Sopenharmony_ci}
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_cistatic int setup_vmcs_config(struct vmcs_config *vmcs_conf,
257862306a36Sopenharmony_ci			     struct vmx_capability *vmx_cap)
257962306a36Sopenharmony_ci{
258062306a36Sopenharmony_ci	u32 vmx_msr_low, vmx_msr_high;
258162306a36Sopenharmony_ci	u32 _pin_based_exec_control = 0;
258262306a36Sopenharmony_ci	u32 _cpu_based_exec_control = 0;
258362306a36Sopenharmony_ci	u32 _cpu_based_2nd_exec_control = 0;
258462306a36Sopenharmony_ci	u64 _cpu_based_3rd_exec_control = 0;
258562306a36Sopenharmony_ci	u32 _vmexit_control = 0;
258662306a36Sopenharmony_ci	u32 _vmentry_control = 0;
258762306a36Sopenharmony_ci	u64 misc_msr;
258862306a36Sopenharmony_ci	int i;
258962306a36Sopenharmony_ci
259062306a36Sopenharmony_ci	/*
259162306a36Sopenharmony_ci	 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
259262306a36Sopenharmony_ci	 * SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always
259362306a36Sopenharmony_ci	 * intercepts writes to PAT and EFER, i.e. never enables those controls.
259462306a36Sopenharmony_ci	 */
259562306a36Sopenharmony_ci	struct {
259662306a36Sopenharmony_ci		u32 entry_control;
259762306a36Sopenharmony_ci		u32 exit_control;
259862306a36Sopenharmony_ci	} const vmcs_entry_exit_pairs[] = {
259962306a36Sopenharmony_ci		{ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,	VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL },
260062306a36Sopenharmony_ci		{ VM_ENTRY_LOAD_IA32_PAT,		VM_EXIT_LOAD_IA32_PAT },
260162306a36Sopenharmony_ci		{ VM_ENTRY_LOAD_IA32_EFER,		VM_EXIT_LOAD_IA32_EFER },
260262306a36Sopenharmony_ci		{ VM_ENTRY_LOAD_BNDCFGS,		VM_EXIT_CLEAR_BNDCFGS },
260362306a36Sopenharmony_ci		{ VM_ENTRY_LOAD_IA32_RTIT_CTL,		VM_EXIT_CLEAR_IA32_RTIT_CTL },
260462306a36Sopenharmony_ci	};
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_ci	memset(vmcs_conf, 0, sizeof(*vmcs_conf));
260762306a36Sopenharmony_ci
260862306a36Sopenharmony_ci	if (adjust_vmx_controls(KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL,
260962306a36Sopenharmony_ci				KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL,
261062306a36Sopenharmony_ci				MSR_IA32_VMX_PROCBASED_CTLS,
261162306a36Sopenharmony_ci				&_cpu_based_exec_control))
261262306a36Sopenharmony_ci		return -EIO;
261362306a36Sopenharmony_ci	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
261462306a36Sopenharmony_ci		if (adjust_vmx_controls(KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL,
261562306a36Sopenharmony_ci					KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL,
261662306a36Sopenharmony_ci					MSR_IA32_VMX_PROCBASED_CTLS2,
261762306a36Sopenharmony_ci					&_cpu_based_2nd_exec_control))
261862306a36Sopenharmony_ci			return -EIO;
261962306a36Sopenharmony_ci	}
262062306a36Sopenharmony_ci#ifndef CONFIG_X86_64
262162306a36Sopenharmony_ci	if (!(_cpu_based_2nd_exec_control &
262262306a36Sopenharmony_ci				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
262362306a36Sopenharmony_ci		_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
262462306a36Sopenharmony_ci#endif
262562306a36Sopenharmony_ci
262662306a36Sopenharmony_ci	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
262762306a36Sopenharmony_ci		_cpu_based_2nd_exec_control &= ~(
262862306a36Sopenharmony_ci				SECONDARY_EXEC_APIC_REGISTER_VIRT |
262962306a36Sopenharmony_ci				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
263062306a36Sopenharmony_ci				SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
263362306a36Sopenharmony_ci		&vmx_cap->ept, &vmx_cap->vpid);
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci	if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
263662306a36Sopenharmony_ci	    vmx_cap->ept) {
263762306a36Sopenharmony_ci		pr_warn_once("EPT CAP should not exist if not support "
263862306a36Sopenharmony_ci				"1-setting enable EPT VM-execution control\n");
263962306a36Sopenharmony_ci
264062306a36Sopenharmony_ci		if (error_on_inconsistent_vmcs_config)
264162306a36Sopenharmony_ci			return -EIO;
264262306a36Sopenharmony_ci
264362306a36Sopenharmony_ci		vmx_cap->ept = 0;
264462306a36Sopenharmony_ci	}
264562306a36Sopenharmony_ci	if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
264662306a36Sopenharmony_ci	    vmx_cap->vpid) {
264762306a36Sopenharmony_ci		pr_warn_once("VPID CAP should not exist if not support "
264862306a36Sopenharmony_ci				"1-setting enable VPID VM-execution control\n");
264962306a36Sopenharmony_ci
265062306a36Sopenharmony_ci		if (error_on_inconsistent_vmcs_config)
265162306a36Sopenharmony_ci			return -EIO;
265262306a36Sopenharmony_ci
265362306a36Sopenharmony_ci		vmx_cap->vpid = 0;
265462306a36Sopenharmony_ci	}
265562306a36Sopenharmony_ci
265662306a36Sopenharmony_ci	if (!cpu_has_sgx())
265762306a36Sopenharmony_ci		_cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_ENCLS_EXITING;
265862306a36Sopenharmony_ci
265962306a36Sopenharmony_ci	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
266062306a36Sopenharmony_ci		_cpu_based_3rd_exec_control =
266162306a36Sopenharmony_ci			adjust_vmx_controls64(KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL,
266262306a36Sopenharmony_ci					      MSR_IA32_VMX_PROCBASED_CTLS3);
266362306a36Sopenharmony_ci
266462306a36Sopenharmony_ci	if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_EXIT_CONTROLS,
266562306a36Sopenharmony_ci				KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS,
266662306a36Sopenharmony_ci				MSR_IA32_VMX_EXIT_CTLS,
266762306a36Sopenharmony_ci				&_vmexit_control))
266862306a36Sopenharmony_ci		return -EIO;
266962306a36Sopenharmony_ci
267062306a36Sopenharmony_ci	if (adjust_vmx_controls(KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL,
267162306a36Sopenharmony_ci				KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL,
267262306a36Sopenharmony_ci				MSR_IA32_VMX_PINBASED_CTLS,
267362306a36Sopenharmony_ci				&_pin_based_exec_control))
267462306a36Sopenharmony_ci		return -EIO;
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ci	if (cpu_has_broken_vmx_preemption_timer())
267762306a36Sopenharmony_ci		_pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
267862306a36Sopenharmony_ci	if (!(_cpu_based_2nd_exec_control &
267962306a36Sopenharmony_ci		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
268062306a36Sopenharmony_ci		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
268162306a36Sopenharmony_ci
268262306a36Sopenharmony_ci	if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS,
268362306a36Sopenharmony_ci				KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS,
268462306a36Sopenharmony_ci				MSR_IA32_VMX_ENTRY_CTLS,
268562306a36Sopenharmony_ci				&_vmentry_control))
268662306a36Sopenharmony_ci		return -EIO;
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
268962306a36Sopenharmony_ci		u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
269062306a36Sopenharmony_ci		u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
269162306a36Sopenharmony_ci
269262306a36Sopenharmony_ci		if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
269362306a36Sopenharmony_ci			continue;
269462306a36Sopenharmony_ci
269562306a36Sopenharmony_ci		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
269662306a36Sopenharmony_ci			     _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
269762306a36Sopenharmony_ci
269862306a36Sopenharmony_ci		if (error_on_inconsistent_vmcs_config)
269962306a36Sopenharmony_ci			return -EIO;
270062306a36Sopenharmony_ci
270162306a36Sopenharmony_ci		_vmentry_control &= ~n_ctrl;
270262306a36Sopenharmony_ci		_vmexit_control &= ~x_ctrl;
270362306a36Sopenharmony_ci	}
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_ci	rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci	/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
270862306a36Sopenharmony_ci	if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
270962306a36Sopenharmony_ci		return -EIO;
271062306a36Sopenharmony_ci
271162306a36Sopenharmony_ci#ifdef CONFIG_X86_64
271262306a36Sopenharmony_ci	/* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
271362306a36Sopenharmony_ci	if (vmx_msr_high & (1u<<16))
271462306a36Sopenharmony_ci		return -EIO;
271562306a36Sopenharmony_ci#endif
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_ci	/* Require Write-Back (WB) memory type for VMCS accesses. */
271862306a36Sopenharmony_ci	if (((vmx_msr_high >> 18) & 15) != 6)
271962306a36Sopenharmony_ci		return -EIO;
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci	rdmsrl(MSR_IA32_VMX_MISC, misc_msr);
272262306a36Sopenharmony_ci
272362306a36Sopenharmony_ci	vmcs_conf->size = vmx_msr_high & 0x1fff;
272462306a36Sopenharmony_ci	vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
272562306a36Sopenharmony_ci
272662306a36Sopenharmony_ci	vmcs_conf->revision_id = vmx_msr_low;
272762306a36Sopenharmony_ci
272862306a36Sopenharmony_ci	vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
272962306a36Sopenharmony_ci	vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
273062306a36Sopenharmony_ci	vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
273162306a36Sopenharmony_ci	vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control;
273262306a36Sopenharmony_ci	vmcs_conf->vmexit_ctrl         = _vmexit_control;
273362306a36Sopenharmony_ci	vmcs_conf->vmentry_ctrl        = _vmentry_control;
273462306a36Sopenharmony_ci	vmcs_conf->misc	= misc_msr;
273562306a36Sopenharmony_ci
273662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV)
273762306a36Sopenharmony_ci	if (enlightened_vmcs)
273862306a36Sopenharmony_ci		evmcs_sanitize_exec_ctrls(vmcs_conf);
273962306a36Sopenharmony_ci#endif
274062306a36Sopenharmony_ci
274162306a36Sopenharmony_ci	return 0;
274262306a36Sopenharmony_ci}
274362306a36Sopenharmony_ci
274462306a36Sopenharmony_cistatic bool __kvm_is_vmx_supported(void)
274562306a36Sopenharmony_ci{
274662306a36Sopenharmony_ci	int cpu = smp_processor_id();
274762306a36Sopenharmony_ci
274862306a36Sopenharmony_ci	if (!(cpuid_ecx(1) & feature_bit(VMX))) {
274962306a36Sopenharmony_ci		pr_err("VMX not supported by CPU %d\n", cpu);
275062306a36Sopenharmony_ci		return false;
275162306a36Sopenharmony_ci	}
275262306a36Sopenharmony_ci
275362306a36Sopenharmony_ci	if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
275462306a36Sopenharmony_ci	    !this_cpu_has(X86_FEATURE_VMX)) {
275562306a36Sopenharmony_ci		pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu);
275662306a36Sopenharmony_ci		return false;
275762306a36Sopenharmony_ci	}
275862306a36Sopenharmony_ci
275962306a36Sopenharmony_ci	return true;
276062306a36Sopenharmony_ci}
276162306a36Sopenharmony_ci
276262306a36Sopenharmony_cistatic bool kvm_is_vmx_supported(void)
276362306a36Sopenharmony_ci{
276462306a36Sopenharmony_ci	bool supported;
276562306a36Sopenharmony_ci
276662306a36Sopenharmony_ci	migrate_disable();
276762306a36Sopenharmony_ci	supported = __kvm_is_vmx_supported();
276862306a36Sopenharmony_ci	migrate_enable();
276962306a36Sopenharmony_ci
277062306a36Sopenharmony_ci	return supported;
277162306a36Sopenharmony_ci}
277262306a36Sopenharmony_ci
277362306a36Sopenharmony_cistatic int vmx_check_processor_compat(void)
277462306a36Sopenharmony_ci{
277562306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
277662306a36Sopenharmony_ci	struct vmcs_config vmcs_conf;
277762306a36Sopenharmony_ci	struct vmx_capability vmx_cap;
277862306a36Sopenharmony_ci
277962306a36Sopenharmony_ci	if (!__kvm_is_vmx_supported())
278062306a36Sopenharmony_ci		return -EIO;
278162306a36Sopenharmony_ci
278262306a36Sopenharmony_ci	if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
278362306a36Sopenharmony_ci		pr_err("Failed to setup VMCS config on CPU %d\n", cpu);
278462306a36Sopenharmony_ci		return -EIO;
278562306a36Sopenharmony_ci	}
278662306a36Sopenharmony_ci	if (nested)
278762306a36Sopenharmony_ci		nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
278862306a36Sopenharmony_ci	if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config))) {
278962306a36Sopenharmony_ci		pr_err("Inconsistent VMCS config on CPU %d\n", cpu);
279062306a36Sopenharmony_ci		return -EIO;
279162306a36Sopenharmony_ci	}
279262306a36Sopenharmony_ci	return 0;
279362306a36Sopenharmony_ci}
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_cistatic int kvm_cpu_vmxon(u64 vmxon_pointer)
279662306a36Sopenharmony_ci{
279762306a36Sopenharmony_ci	u64 msr;
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci	cr4_set_bits(X86_CR4_VMXE);
280062306a36Sopenharmony_ci
280162306a36Sopenharmony_ci	asm goto("1: vmxon %[vmxon_pointer]\n\t"
280262306a36Sopenharmony_ci			  _ASM_EXTABLE(1b, %l[fault])
280362306a36Sopenharmony_ci			  : : [vmxon_pointer] "m"(vmxon_pointer)
280462306a36Sopenharmony_ci			  : : fault);
280562306a36Sopenharmony_ci	return 0;
280662306a36Sopenharmony_ci
280762306a36Sopenharmony_cifault:
280862306a36Sopenharmony_ci	WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
280962306a36Sopenharmony_ci		  rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
281062306a36Sopenharmony_ci	cr4_clear_bits(X86_CR4_VMXE);
281162306a36Sopenharmony_ci
281262306a36Sopenharmony_ci	return -EFAULT;
281362306a36Sopenharmony_ci}
281462306a36Sopenharmony_ci
281562306a36Sopenharmony_cistatic int vmx_hardware_enable(void)
281662306a36Sopenharmony_ci{
281762306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
281862306a36Sopenharmony_ci	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
281962306a36Sopenharmony_ci	int r;
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci	if (cr4_read_shadow() & X86_CR4_VMXE)
282262306a36Sopenharmony_ci		return -EBUSY;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci	/*
282562306a36Sopenharmony_ci	 * This can happen if we hot-added a CPU but failed to allocate
282662306a36Sopenharmony_ci	 * VP assist page for it.
282762306a36Sopenharmony_ci	 */
282862306a36Sopenharmony_ci	if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
282962306a36Sopenharmony_ci		return -EFAULT;
283062306a36Sopenharmony_ci
283162306a36Sopenharmony_ci	intel_pt_handle_vmx(1);
283262306a36Sopenharmony_ci
283362306a36Sopenharmony_ci	r = kvm_cpu_vmxon(phys_addr);
283462306a36Sopenharmony_ci	if (r) {
283562306a36Sopenharmony_ci		intel_pt_handle_vmx(0);
283662306a36Sopenharmony_ci		return r;
283762306a36Sopenharmony_ci	}
283862306a36Sopenharmony_ci
283962306a36Sopenharmony_ci	if (enable_ept)
284062306a36Sopenharmony_ci		ept_sync_global();
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_ci	return 0;
284362306a36Sopenharmony_ci}
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_cistatic void vmclear_local_loaded_vmcss(void)
284662306a36Sopenharmony_ci{
284762306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
284862306a36Sopenharmony_ci	struct loaded_vmcs *v, *n;
284962306a36Sopenharmony_ci
285062306a36Sopenharmony_ci	list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
285162306a36Sopenharmony_ci				 loaded_vmcss_on_cpu_link)
285262306a36Sopenharmony_ci		__loaded_vmcs_clear(v);
285362306a36Sopenharmony_ci}
285462306a36Sopenharmony_ci
285562306a36Sopenharmony_cistatic void vmx_hardware_disable(void)
285662306a36Sopenharmony_ci{
285762306a36Sopenharmony_ci	vmclear_local_loaded_vmcss();
285862306a36Sopenharmony_ci
285962306a36Sopenharmony_ci	if (kvm_cpu_vmxoff())
286062306a36Sopenharmony_ci		kvm_spurious_fault();
286162306a36Sopenharmony_ci
286262306a36Sopenharmony_ci	hv_reset_evmcs();
286362306a36Sopenharmony_ci
286462306a36Sopenharmony_ci	intel_pt_handle_vmx(0);
286562306a36Sopenharmony_ci}
286662306a36Sopenharmony_ci
286762306a36Sopenharmony_cistruct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
286862306a36Sopenharmony_ci{
286962306a36Sopenharmony_ci	int node = cpu_to_node(cpu);
287062306a36Sopenharmony_ci	struct page *pages;
287162306a36Sopenharmony_ci	struct vmcs *vmcs;
287262306a36Sopenharmony_ci
287362306a36Sopenharmony_ci	pages = __alloc_pages_node(node, flags, 0);
287462306a36Sopenharmony_ci	if (!pages)
287562306a36Sopenharmony_ci		return NULL;
287662306a36Sopenharmony_ci	vmcs = page_address(pages);
287762306a36Sopenharmony_ci	memset(vmcs, 0, vmcs_config.size);
287862306a36Sopenharmony_ci
287962306a36Sopenharmony_ci	/* KVM supports Enlightened VMCS v1 only */
288062306a36Sopenharmony_ci	if (kvm_is_using_evmcs())
288162306a36Sopenharmony_ci		vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
288262306a36Sopenharmony_ci	else
288362306a36Sopenharmony_ci		vmcs->hdr.revision_id = vmcs_config.revision_id;
288462306a36Sopenharmony_ci
288562306a36Sopenharmony_ci	if (shadow)
288662306a36Sopenharmony_ci		vmcs->hdr.shadow_vmcs = 1;
288762306a36Sopenharmony_ci	return vmcs;
288862306a36Sopenharmony_ci}
288962306a36Sopenharmony_ci
289062306a36Sopenharmony_civoid free_vmcs(struct vmcs *vmcs)
289162306a36Sopenharmony_ci{
289262306a36Sopenharmony_ci	free_page((unsigned long)vmcs);
289362306a36Sopenharmony_ci}
289462306a36Sopenharmony_ci
289562306a36Sopenharmony_ci/*
289662306a36Sopenharmony_ci * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
289762306a36Sopenharmony_ci */
289862306a36Sopenharmony_civoid free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
289962306a36Sopenharmony_ci{
290062306a36Sopenharmony_ci	if (!loaded_vmcs->vmcs)
290162306a36Sopenharmony_ci		return;
290262306a36Sopenharmony_ci	loaded_vmcs_clear(loaded_vmcs);
290362306a36Sopenharmony_ci	free_vmcs(loaded_vmcs->vmcs);
290462306a36Sopenharmony_ci	loaded_vmcs->vmcs = NULL;
290562306a36Sopenharmony_ci	if (loaded_vmcs->msr_bitmap)
290662306a36Sopenharmony_ci		free_page((unsigned long)loaded_vmcs->msr_bitmap);
290762306a36Sopenharmony_ci	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
290862306a36Sopenharmony_ci}
290962306a36Sopenharmony_ci
291062306a36Sopenharmony_ciint alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
291162306a36Sopenharmony_ci{
291262306a36Sopenharmony_ci	loaded_vmcs->vmcs = alloc_vmcs(false);
291362306a36Sopenharmony_ci	if (!loaded_vmcs->vmcs)
291462306a36Sopenharmony_ci		return -ENOMEM;
291562306a36Sopenharmony_ci
291662306a36Sopenharmony_ci	vmcs_clear(loaded_vmcs->vmcs);
291762306a36Sopenharmony_ci
291862306a36Sopenharmony_ci	loaded_vmcs->shadow_vmcs = NULL;
291962306a36Sopenharmony_ci	loaded_vmcs->hv_timer_soft_disabled = false;
292062306a36Sopenharmony_ci	loaded_vmcs->cpu = -1;
292162306a36Sopenharmony_ci	loaded_vmcs->launched = 0;
292262306a36Sopenharmony_ci
292362306a36Sopenharmony_ci	if (cpu_has_vmx_msr_bitmap()) {
292462306a36Sopenharmony_ci		loaded_vmcs->msr_bitmap = (unsigned long *)
292562306a36Sopenharmony_ci				__get_free_page(GFP_KERNEL_ACCOUNT);
292662306a36Sopenharmony_ci		if (!loaded_vmcs->msr_bitmap)
292762306a36Sopenharmony_ci			goto out_vmcs;
292862306a36Sopenharmony_ci		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
292962306a36Sopenharmony_ci	}
293062306a36Sopenharmony_ci
293162306a36Sopenharmony_ci	memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
293262306a36Sopenharmony_ci	memset(&loaded_vmcs->controls_shadow, 0,
293362306a36Sopenharmony_ci		sizeof(struct vmcs_controls_shadow));
293462306a36Sopenharmony_ci
293562306a36Sopenharmony_ci	return 0;
293662306a36Sopenharmony_ci
293762306a36Sopenharmony_ciout_vmcs:
293862306a36Sopenharmony_ci	free_loaded_vmcs(loaded_vmcs);
293962306a36Sopenharmony_ci	return -ENOMEM;
294062306a36Sopenharmony_ci}
294162306a36Sopenharmony_ci
294262306a36Sopenharmony_cistatic void free_kvm_area(void)
294362306a36Sopenharmony_ci{
294462306a36Sopenharmony_ci	int cpu;
294562306a36Sopenharmony_ci
294662306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
294762306a36Sopenharmony_ci		free_vmcs(per_cpu(vmxarea, cpu));
294862306a36Sopenharmony_ci		per_cpu(vmxarea, cpu) = NULL;
294962306a36Sopenharmony_ci	}
295062306a36Sopenharmony_ci}
295162306a36Sopenharmony_ci
295262306a36Sopenharmony_cistatic __init int alloc_kvm_area(void)
295362306a36Sopenharmony_ci{
295462306a36Sopenharmony_ci	int cpu;
295562306a36Sopenharmony_ci
295662306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
295762306a36Sopenharmony_ci		struct vmcs *vmcs;
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci		vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
296062306a36Sopenharmony_ci		if (!vmcs) {
296162306a36Sopenharmony_ci			free_kvm_area();
296262306a36Sopenharmony_ci			return -ENOMEM;
296362306a36Sopenharmony_ci		}
296462306a36Sopenharmony_ci
296562306a36Sopenharmony_ci		/*
296662306a36Sopenharmony_ci		 * When eVMCS is enabled, alloc_vmcs_cpu() sets
296762306a36Sopenharmony_ci		 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
296862306a36Sopenharmony_ci		 * revision_id reported by MSR_IA32_VMX_BASIC.
296962306a36Sopenharmony_ci		 *
297062306a36Sopenharmony_ci		 * However, even though not explicitly documented by
297162306a36Sopenharmony_ci		 * TLFS, VMXArea passed as VMXON argument should
297262306a36Sopenharmony_ci		 * still be marked with revision_id reported by
297362306a36Sopenharmony_ci		 * physical CPU.
297462306a36Sopenharmony_ci		 */
297562306a36Sopenharmony_ci		if (kvm_is_using_evmcs())
297662306a36Sopenharmony_ci			vmcs->hdr.revision_id = vmcs_config.revision_id;
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci		per_cpu(vmxarea, cpu) = vmcs;
297962306a36Sopenharmony_ci	}
298062306a36Sopenharmony_ci	return 0;
298162306a36Sopenharmony_ci}
298262306a36Sopenharmony_ci
298362306a36Sopenharmony_cistatic void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
298462306a36Sopenharmony_ci		struct kvm_segment *save)
298562306a36Sopenharmony_ci{
298662306a36Sopenharmony_ci	if (!emulate_invalid_guest_state) {
298762306a36Sopenharmony_ci		/*
298862306a36Sopenharmony_ci		 * CS and SS RPL should be equal during guest entry according
298962306a36Sopenharmony_ci		 * to VMX spec, but in reality it is not always so. Since vcpu
299062306a36Sopenharmony_ci		 * is in the middle of the transition from real mode to
299162306a36Sopenharmony_ci		 * protected mode it is safe to assume that RPL 0 is a good
299262306a36Sopenharmony_ci		 * default value.
299362306a36Sopenharmony_ci		 */
299462306a36Sopenharmony_ci		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
299562306a36Sopenharmony_ci			save->selector &= ~SEGMENT_RPL_MASK;
299662306a36Sopenharmony_ci		save->dpl = save->selector & SEGMENT_RPL_MASK;
299762306a36Sopenharmony_ci		save->s = 1;
299862306a36Sopenharmony_ci	}
299962306a36Sopenharmony_ci	__vmx_set_segment(vcpu, save, seg);
300062306a36Sopenharmony_ci}
300162306a36Sopenharmony_ci
300262306a36Sopenharmony_cistatic void enter_pmode(struct kvm_vcpu *vcpu)
300362306a36Sopenharmony_ci{
300462306a36Sopenharmony_ci	unsigned long flags;
300562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci	/*
300862306a36Sopenharmony_ci	 * Update real mode segment cache. It may be not up-to-date if segment
300962306a36Sopenharmony_ci	 * register was written while vcpu was in a guest mode.
301062306a36Sopenharmony_ci	 */
301162306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
301262306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
301362306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
301462306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
301562306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
301662306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
301762306a36Sopenharmony_ci
301862306a36Sopenharmony_ci	vmx->rmode.vm86_active = 0;
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci	__vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
302162306a36Sopenharmony_ci
302262306a36Sopenharmony_ci	flags = vmcs_readl(GUEST_RFLAGS);
302362306a36Sopenharmony_ci	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
302462306a36Sopenharmony_ci	flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
302562306a36Sopenharmony_ci	vmcs_writel(GUEST_RFLAGS, flags);
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
302862306a36Sopenharmony_ci			(vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
302962306a36Sopenharmony_ci
303062306a36Sopenharmony_ci	vmx_update_exception_bitmap(vcpu);
303162306a36Sopenharmony_ci
303262306a36Sopenharmony_ci	fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
303362306a36Sopenharmony_ci	fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
303462306a36Sopenharmony_ci	fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
303562306a36Sopenharmony_ci	fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
303662306a36Sopenharmony_ci	fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
303762306a36Sopenharmony_ci	fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
303862306a36Sopenharmony_ci}
303962306a36Sopenharmony_ci
304062306a36Sopenharmony_cistatic void fix_rmode_seg(int seg, struct kvm_segment *save)
304162306a36Sopenharmony_ci{
304262306a36Sopenharmony_ci	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
304362306a36Sopenharmony_ci	struct kvm_segment var = *save;
304462306a36Sopenharmony_ci
304562306a36Sopenharmony_ci	var.dpl = 0x3;
304662306a36Sopenharmony_ci	if (seg == VCPU_SREG_CS)
304762306a36Sopenharmony_ci		var.type = 0x3;
304862306a36Sopenharmony_ci
304962306a36Sopenharmony_ci	if (!emulate_invalid_guest_state) {
305062306a36Sopenharmony_ci		var.selector = var.base >> 4;
305162306a36Sopenharmony_ci		var.base = var.base & 0xffff0;
305262306a36Sopenharmony_ci		var.limit = 0xffff;
305362306a36Sopenharmony_ci		var.g = 0;
305462306a36Sopenharmony_ci		var.db = 0;
305562306a36Sopenharmony_ci		var.present = 1;
305662306a36Sopenharmony_ci		var.s = 1;
305762306a36Sopenharmony_ci		var.l = 0;
305862306a36Sopenharmony_ci		var.unusable = 0;
305962306a36Sopenharmony_ci		var.type = 0x3;
306062306a36Sopenharmony_ci		var.avl = 0;
306162306a36Sopenharmony_ci		if (save->base & 0xf)
306262306a36Sopenharmony_ci			pr_warn_once("segment base is not paragraph aligned "
306362306a36Sopenharmony_ci				     "when entering protected mode (seg=%d)", seg);
306462306a36Sopenharmony_ci	}
306562306a36Sopenharmony_ci
306662306a36Sopenharmony_ci	vmcs_write16(sf->selector, var.selector);
306762306a36Sopenharmony_ci	vmcs_writel(sf->base, var.base);
306862306a36Sopenharmony_ci	vmcs_write32(sf->limit, var.limit);
306962306a36Sopenharmony_ci	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
307062306a36Sopenharmony_ci}
307162306a36Sopenharmony_ci
307262306a36Sopenharmony_cistatic void enter_rmode(struct kvm_vcpu *vcpu)
307362306a36Sopenharmony_ci{
307462306a36Sopenharmony_ci	unsigned long flags;
307562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
307662306a36Sopenharmony_ci	struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
307762306a36Sopenharmony_ci
307862306a36Sopenharmony_ci	/*
307962306a36Sopenharmony_ci	 * KVM should never use VM86 to virtualize Real Mode when L2 is active,
308062306a36Sopenharmony_ci	 * as using VM86 is unnecessary if unrestricted guest is enabled, and
308162306a36Sopenharmony_ci	 * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
308262306a36Sopenharmony_ci	 * should VM-Fail and KVM should reject userspace attempts to stuff
308362306a36Sopenharmony_ci	 * CR0.PG=0 when L2 is active.
308462306a36Sopenharmony_ci	 */
308562306a36Sopenharmony_ci	WARN_ON_ONCE(is_guest_mode(vcpu));
308662306a36Sopenharmony_ci
308762306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
308862306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
308962306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
309062306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
309162306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
309262306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
309362306a36Sopenharmony_ci	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_ci	vmx->rmode.vm86_active = 1;
309662306a36Sopenharmony_ci
309762306a36Sopenharmony_ci	vmx_segment_cache_clear(vmx);
309862306a36Sopenharmony_ci
309962306a36Sopenharmony_ci	vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
310062306a36Sopenharmony_ci	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
310162306a36Sopenharmony_ci	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
310262306a36Sopenharmony_ci
310362306a36Sopenharmony_ci	flags = vmcs_readl(GUEST_RFLAGS);
310462306a36Sopenharmony_ci	vmx->rmode.save_rflags = flags;
310562306a36Sopenharmony_ci
310662306a36Sopenharmony_ci	flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
310762306a36Sopenharmony_ci
310862306a36Sopenharmony_ci	vmcs_writel(GUEST_RFLAGS, flags);
310962306a36Sopenharmony_ci	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
311062306a36Sopenharmony_ci	vmx_update_exception_bitmap(vcpu);
311162306a36Sopenharmony_ci
311262306a36Sopenharmony_ci	fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
311362306a36Sopenharmony_ci	fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
311462306a36Sopenharmony_ci	fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
311562306a36Sopenharmony_ci	fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
311662306a36Sopenharmony_ci	fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
311762306a36Sopenharmony_ci	fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
311862306a36Sopenharmony_ci}
311962306a36Sopenharmony_ci
312062306a36Sopenharmony_ciint vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
312162306a36Sopenharmony_ci{
312262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
312362306a36Sopenharmony_ci
312462306a36Sopenharmony_ci	/* Nothing to do if hardware doesn't support EFER. */
312562306a36Sopenharmony_ci	if (!vmx_find_uret_msr(vmx, MSR_EFER))
312662306a36Sopenharmony_ci		return 0;
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_ci	vcpu->arch.efer = efer;
312962306a36Sopenharmony_ci#ifdef CONFIG_X86_64
313062306a36Sopenharmony_ci	if (efer & EFER_LMA)
313162306a36Sopenharmony_ci		vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE);
313262306a36Sopenharmony_ci	else
313362306a36Sopenharmony_ci		vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE);
313462306a36Sopenharmony_ci#else
313562306a36Sopenharmony_ci	if (KVM_BUG_ON(efer & EFER_LMA, vcpu->kvm))
313662306a36Sopenharmony_ci		return 1;
313762306a36Sopenharmony_ci#endif
313862306a36Sopenharmony_ci
313962306a36Sopenharmony_ci	vmx_setup_uret_msrs(vmx);
314062306a36Sopenharmony_ci	return 0;
314162306a36Sopenharmony_ci}
314262306a36Sopenharmony_ci
314362306a36Sopenharmony_ci#ifdef CONFIG_X86_64
314462306a36Sopenharmony_ci
314562306a36Sopenharmony_cistatic void enter_lmode(struct kvm_vcpu *vcpu)
314662306a36Sopenharmony_ci{
314762306a36Sopenharmony_ci	u32 guest_tr_ar;
314862306a36Sopenharmony_ci
314962306a36Sopenharmony_ci	vmx_segment_cache_clear(to_vmx(vcpu));
315062306a36Sopenharmony_ci
315162306a36Sopenharmony_ci	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
315262306a36Sopenharmony_ci	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
315362306a36Sopenharmony_ci		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
315462306a36Sopenharmony_ci				     __func__);
315562306a36Sopenharmony_ci		vmcs_write32(GUEST_TR_AR_BYTES,
315662306a36Sopenharmony_ci			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
315762306a36Sopenharmony_ci			     | VMX_AR_TYPE_BUSY_64_TSS);
315862306a36Sopenharmony_ci	}
315962306a36Sopenharmony_ci	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
316062306a36Sopenharmony_ci}
316162306a36Sopenharmony_ci
316262306a36Sopenharmony_cistatic void exit_lmode(struct kvm_vcpu *vcpu)
316362306a36Sopenharmony_ci{
316462306a36Sopenharmony_ci	vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
316562306a36Sopenharmony_ci}
316662306a36Sopenharmony_ci
316762306a36Sopenharmony_ci#endif
316862306a36Sopenharmony_ci
316962306a36Sopenharmony_cistatic void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
317062306a36Sopenharmony_ci{
317162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
317262306a36Sopenharmony_ci
317362306a36Sopenharmony_ci	/*
317462306a36Sopenharmony_ci	 * INVEPT must be issued when EPT is enabled, irrespective of VPID, as
317562306a36Sopenharmony_ci	 * the CPU is not required to invalidate guest-physical mappings on
317662306a36Sopenharmony_ci	 * VM-Entry, even if VPID is disabled.  Guest-physical mappings are
317762306a36Sopenharmony_ci	 * associated with the root EPT structure and not any particular VPID
317862306a36Sopenharmony_ci	 * (INVVPID also isn't required to invalidate guest-physical mappings).
317962306a36Sopenharmony_ci	 */
318062306a36Sopenharmony_ci	if (enable_ept) {
318162306a36Sopenharmony_ci		ept_sync_global();
318262306a36Sopenharmony_ci	} else if (enable_vpid) {
318362306a36Sopenharmony_ci		if (cpu_has_vmx_invvpid_global()) {
318462306a36Sopenharmony_ci			vpid_sync_vcpu_global();
318562306a36Sopenharmony_ci		} else {
318662306a36Sopenharmony_ci			vpid_sync_vcpu_single(vmx->vpid);
318762306a36Sopenharmony_ci			vpid_sync_vcpu_single(vmx->nested.vpid02);
318862306a36Sopenharmony_ci		}
318962306a36Sopenharmony_ci	}
319062306a36Sopenharmony_ci}
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_cistatic inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
319362306a36Sopenharmony_ci{
319462306a36Sopenharmony_ci	if (is_guest_mode(vcpu))
319562306a36Sopenharmony_ci		return nested_get_vpid02(vcpu);
319662306a36Sopenharmony_ci	return to_vmx(vcpu)->vpid;
319762306a36Sopenharmony_ci}
319862306a36Sopenharmony_ci
319962306a36Sopenharmony_cistatic void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
320062306a36Sopenharmony_ci{
320162306a36Sopenharmony_ci	struct kvm_mmu *mmu = vcpu->arch.mmu;
320262306a36Sopenharmony_ci	u64 root_hpa = mmu->root.hpa;
320362306a36Sopenharmony_ci
320462306a36Sopenharmony_ci	/* No flush required if the current context is invalid. */
320562306a36Sopenharmony_ci	if (!VALID_PAGE(root_hpa))
320662306a36Sopenharmony_ci		return;
320762306a36Sopenharmony_ci
320862306a36Sopenharmony_ci	if (enable_ept)
320962306a36Sopenharmony_ci		ept_sync_context(construct_eptp(vcpu, root_hpa,
321062306a36Sopenharmony_ci						mmu->root_role.level));
321162306a36Sopenharmony_ci	else
321262306a36Sopenharmony_ci		vpid_sync_context(vmx_get_current_vpid(vcpu));
321362306a36Sopenharmony_ci}
321462306a36Sopenharmony_ci
321562306a36Sopenharmony_cistatic void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
321662306a36Sopenharmony_ci{
321762306a36Sopenharmony_ci	/*
321862306a36Sopenharmony_ci	 * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
321962306a36Sopenharmony_ci	 * vmx_flush_tlb_guest() for an explanation of why this is ok.
322062306a36Sopenharmony_ci	 */
322162306a36Sopenharmony_ci	vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
322262306a36Sopenharmony_ci}
322362306a36Sopenharmony_ci
322462306a36Sopenharmony_cistatic void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
322562306a36Sopenharmony_ci{
322662306a36Sopenharmony_ci	/*
322762306a36Sopenharmony_ci	 * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
322862306a36Sopenharmony_ci	 * vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit are
322962306a36Sopenharmony_ci	 * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
323062306a36Sopenharmony_ci	 * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
323162306a36Sopenharmony_ci	 * i.e. no explicit INVVPID is necessary.
323262306a36Sopenharmony_ci	 */
323362306a36Sopenharmony_ci	vpid_sync_context(vmx_get_current_vpid(vcpu));
323462306a36Sopenharmony_ci}
323562306a36Sopenharmony_ci
323662306a36Sopenharmony_civoid vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
323762306a36Sopenharmony_ci{
323862306a36Sopenharmony_ci	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
323962306a36Sopenharmony_ci
324062306a36Sopenharmony_ci	if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
324162306a36Sopenharmony_ci		return;
324262306a36Sopenharmony_ci
324362306a36Sopenharmony_ci	if (is_pae_paging(vcpu)) {
324462306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
324562306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
324662306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
324762306a36Sopenharmony_ci		vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
324862306a36Sopenharmony_ci	}
324962306a36Sopenharmony_ci}
325062306a36Sopenharmony_ci
325162306a36Sopenharmony_civoid ept_save_pdptrs(struct kvm_vcpu *vcpu)
325262306a36Sopenharmony_ci{
325362306a36Sopenharmony_ci	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
325462306a36Sopenharmony_ci
325562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!is_pae_paging(vcpu)))
325662306a36Sopenharmony_ci		return;
325762306a36Sopenharmony_ci
325862306a36Sopenharmony_ci	mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
325962306a36Sopenharmony_ci	mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
326062306a36Sopenharmony_ci	mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
326162306a36Sopenharmony_ci	mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
326262306a36Sopenharmony_ci
326362306a36Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR);
326462306a36Sopenharmony_ci}
326562306a36Sopenharmony_ci
326662306a36Sopenharmony_ci#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
326762306a36Sopenharmony_ci			  CPU_BASED_CR3_STORE_EXITING)
326862306a36Sopenharmony_ci
326962306a36Sopenharmony_cistatic bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
327062306a36Sopenharmony_ci{
327162306a36Sopenharmony_ci	if (is_guest_mode(vcpu))
327262306a36Sopenharmony_ci		return nested_guest_cr0_valid(vcpu, cr0);
327362306a36Sopenharmony_ci
327462306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.vmxon)
327562306a36Sopenharmony_ci		return nested_host_cr0_valid(vcpu, cr0);
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ci	return true;
327862306a36Sopenharmony_ci}
327962306a36Sopenharmony_ci
328062306a36Sopenharmony_civoid vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
328162306a36Sopenharmony_ci{
328262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
328362306a36Sopenharmony_ci	unsigned long hw_cr0, old_cr0_pg;
328462306a36Sopenharmony_ci	u32 tmp;
328562306a36Sopenharmony_ci
328662306a36Sopenharmony_ci	old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
328762306a36Sopenharmony_ci
328862306a36Sopenharmony_ci	hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
328962306a36Sopenharmony_ci	if (enable_unrestricted_guest)
329062306a36Sopenharmony_ci		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
329162306a36Sopenharmony_ci	else {
329262306a36Sopenharmony_ci		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
329362306a36Sopenharmony_ci		if (!enable_ept)
329462306a36Sopenharmony_ci			hw_cr0 |= X86_CR0_WP;
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci		if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
329762306a36Sopenharmony_ci			enter_pmode(vcpu);
329862306a36Sopenharmony_ci
329962306a36Sopenharmony_ci		if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
330062306a36Sopenharmony_ci			enter_rmode(vcpu);
330162306a36Sopenharmony_ci	}
330262306a36Sopenharmony_ci
330362306a36Sopenharmony_ci	vmcs_writel(CR0_READ_SHADOW, cr0);
330462306a36Sopenharmony_ci	vmcs_writel(GUEST_CR0, hw_cr0);
330562306a36Sopenharmony_ci	vcpu->arch.cr0 = cr0;
330662306a36Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_CR0);
330762306a36Sopenharmony_ci
330862306a36Sopenharmony_ci#ifdef CONFIG_X86_64
330962306a36Sopenharmony_ci	if (vcpu->arch.efer & EFER_LME) {
331062306a36Sopenharmony_ci		if (!old_cr0_pg && (cr0 & X86_CR0_PG))
331162306a36Sopenharmony_ci			enter_lmode(vcpu);
331262306a36Sopenharmony_ci		else if (old_cr0_pg && !(cr0 & X86_CR0_PG))
331362306a36Sopenharmony_ci			exit_lmode(vcpu);
331462306a36Sopenharmony_ci	}
331562306a36Sopenharmony_ci#endif
331662306a36Sopenharmony_ci
331762306a36Sopenharmony_ci	if (enable_ept && !enable_unrestricted_guest) {
331862306a36Sopenharmony_ci		/*
331962306a36Sopenharmony_ci		 * Ensure KVM has an up-to-date snapshot of the guest's CR3.  If
332062306a36Sopenharmony_ci		 * the below code _enables_ CR3 exiting, vmx_cache_reg() will
332162306a36Sopenharmony_ci		 * (correctly) stop reading vmcs.GUEST_CR3 because it thinks
332262306a36Sopenharmony_ci		 * KVM's CR3 is installed.
332362306a36Sopenharmony_ci		 */
332462306a36Sopenharmony_ci		if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
332562306a36Sopenharmony_ci			vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
332662306a36Sopenharmony_ci
332762306a36Sopenharmony_ci		/*
332862306a36Sopenharmony_ci		 * When running with EPT but not unrestricted guest, KVM must
332962306a36Sopenharmony_ci		 * intercept CR3 accesses when paging is _disabled_.  This is
333062306a36Sopenharmony_ci		 * necessary because restricted guests can't actually run with
333162306a36Sopenharmony_ci		 * paging disabled, and so KVM stuffs its own CR3 in order to
333262306a36Sopenharmony_ci		 * run the guest when identity mapped page tables.
333362306a36Sopenharmony_ci		 *
333462306a36Sopenharmony_ci		 * Do _NOT_ check the old CR0.PG, e.g. to optimize away the
333562306a36Sopenharmony_ci		 * update, it may be stale with respect to CR3 interception,
333662306a36Sopenharmony_ci		 * e.g. after nested VM-Enter.
333762306a36Sopenharmony_ci		 *
333862306a36Sopenharmony_ci		 * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or
333962306a36Sopenharmony_ci		 * stores to forward them to L1, even if KVM does not need to
334062306a36Sopenharmony_ci		 * intercept them to preserve its identity mapped page tables.
334162306a36Sopenharmony_ci		 */
334262306a36Sopenharmony_ci		if (!(cr0 & X86_CR0_PG)) {
334362306a36Sopenharmony_ci			exec_controls_setbit(vmx, CR3_EXITING_BITS);
334462306a36Sopenharmony_ci		} else if (!is_guest_mode(vcpu)) {
334562306a36Sopenharmony_ci			exec_controls_clearbit(vmx, CR3_EXITING_BITS);
334662306a36Sopenharmony_ci		} else {
334762306a36Sopenharmony_ci			tmp = exec_controls_get(vmx);
334862306a36Sopenharmony_ci			tmp &= ~CR3_EXITING_BITS;
334962306a36Sopenharmony_ci			tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS;
335062306a36Sopenharmony_ci			exec_controls_set(vmx, tmp);
335162306a36Sopenharmony_ci		}
335262306a36Sopenharmony_ci
335362306a36Sopenharmony_ci		/* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */
335462306a36Sopenharmony_ci		if ((old_cr0_pg ^ cr0) & X86_CR0_PG)
335562306a36Sopenharmony_ci			vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
335662306a36Sopenharmony_ci
335762306a36Sopenharmony_ci		/*
335862306a36Sopenharmony_ci		 * When !CR0_PG -> CR0_PG, vcpu->arch.cr3 becomes active, but
335962306a36Sopenharmony_ci		 * GUEST_CR3 is still vmx->ept_identity_map_addr if EPT + !URG.
336062306a36Sopenharmony_ci		 */
336162306a36Sopenharmony_ci		if (!(old_cr0_pg & X86_CR0_PG) && (cr0 & X86_CR0_PG))
336262306a36Sopenharmony_ci			kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
336362306a36Sopenharmony_ci	}
336462306a36Sopenharmony_ci
336562306a36Sopenharmony_ci	/* depends on vcpu->arch.cr0 to be set to a new value */
336662306a36Sopenharmony_ci	vmx->emulation_required = vmx_emulation_required(vcpu);
336762306a36Sopenharmony_ci}
336862306a36Sopenharmony_ci
336962306a36Sopenharmony_cistatic int vmx_get_max_ept_level(void)
337062306a36Sopenharmony_ci{
337162306a36Sopenharmony_ci	if (cpu_has_vmx_ept_5levels())
337262306a36Sopenharmony_ci		return 5;
337362306a36Sopenharmony_ci	return 4;
337462306a36Sopenharmony_ci}
337562306a36Sopenharmony_ci
337662306a36Sopenharmony_ciu64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
337762306a36Sopenharmony_ci{
337862306a36Sopenharmony_ci	u64 eptp = VMX_EPTP_MT_WB;
337962306a36Sopenharmony_ci
338062306a36Sopenharmony_ci	eptp |= (root_level == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
338162306a36Sopenharmony_ci
338262306a36Sopenharmony_ci	if (enable_ept_ad_bits &&
338362306a36Sopenharmony_ci	    (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
338462306a36Sopenharmony_ci		eptp |= VMX_EPTP_AD_ENABLE_BIT;
338562306a36Sopenharmony_ci	eptp |= root_hpa;
338662306a36Sopenharmony_ci
338762306a36Sopenharmony_ci	return eptp;
338862306a36Sopenharmony_ci}
338962306a36Sopenharmony_ci
339062306a36Sopenharmony_cistatic void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
339162306a36Sopenharmony_ci			     int root_level)
339262306a36Sopenharmony_ci{
339362306a36Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
339462306a36Sopenharmony_ci	bool update_guest_cr3 = true;
339562306a36Sopenharmony_ci	unsigned long guest_cr3;
339662306a36Sopenharmony_ci	u64 eptp;
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci	if (enable_ept) {
339962306a36Sopenharmony_ci		eptp = construct_eptp(vcpu, root_hpa, root_level);
340062306a36Sopenharmony_ci		vmcs_write64(EPT_POINTER, eptp);
340162306a36Sopenharmony_ci
340262306a36Sopenharmony_ci		hv_track_root_tdp(vcpu, root_hpa);
340362306a36Sopenharmony_ci
340462306a36Sopenharmony_ci		if (!enable_unrestricted_guest && !is_paging(vcpu))
340562306a36Sopenharmony_ci			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
340662306a36Sopenharmony_ci		else if (kvm_register_is_dirty(vcpu, VCPU_EXREG_CR3))
340762306a36Sopenharmony_ci			guest_cr3 = vcpu->arch.cr3;
340862306a36Sopenharmony_ci		else /* vmcs.GUEST_CR3 is already up-to-date. */
340962306a36Sopenharmony_ci			update_guest_cr3 = false;
341062306a36Sopenharmony_ci		vmx_ept_load_pdptrs(vcpu);
341162306a36Sopenharmony_ci	} else {
341262306a36Sopenharmony_ci		guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu);
341362306a36Sopenharmony_ci	}
341462306a36Sopenharmony_ci
341562306a36Sopenharmony_ci	if (update_guest_cr3)
341662306a36Sopenharmony_ci		vmcs_writel(GUEST_CR3, guest_cr3);
341762306a36Sopenharmony_ci}
341862306a36Sopenharmony_ci
341962306a36Sopenharmony_ci
342062306a36Sopenharmony_cistatic bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
342162306a36Sopenharmony_ci{
342262306a36Sopenharmony_ci	/*
342362306a36Sopenharmony_ci	 * We operate under the default treatment of SMM, so VMX cannot be
342462306a36Sopenharmony_ci	 * enabled under SMM.  Note, whether or not VMXE is allowed at all,
342562306a36Sopenharmony_ci	 * i.e. is a reserved bit, is handled by common x86 code.
342662306a36Sopenharmony_ci	 */
342762306a36Sopenharmony_ci	if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
342862306a36Sopenharmony_ci		return false;
342962306a36Sopenharmony_ci
343062306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
343162306a36Sopenharmony_ci		return false;
343262306a36Sopenharmony_ci
343362306a36Sopenharmony_ci	return true;
343462306a36Sopenharmony_ci}
343562306a36Sopenharmony_ci
343662306a36Sopenharmony_civoid vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
343762306a36Sopenharmony_ci{
343862306a36Sopenharmony_ci	unsigned long old_cr4 = kvm_read_cr4(vcpu);
343962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
344062306a36Sopenharmony_ci	unsigned long hw_cr4;
344162306a36Sopenharmony_ci
344262306a36Sopenharmony_ci	/*
344362306a36Sopenharmony_ci	 * Pass through host's Machine Check Enable value to hw_cr4, which
344462306a36Sopenharmony_ci	 * is in force while we are in guest mode.  Do not let guests control
344562306a36Sopenharmony_ci	 * this bit, even if host CR4.MCE == 0.
344662306a36Sopenharmony_ci	 */
344762306a36Sopenharmony_ci	hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
344862306a36Sopenharmony_ci	if (enable_unrestricted_guest)
344962306a36Sopenharmony_ci		hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
345062306a36Sopenharmony_ci	else if (vmx->rmode.vm86_active)
345162306a36Sopenharmony_ci		hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
345262306a36Sopenharmony_ci	else
345362306a36Sopenharmony_ci		hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
345462306a36Sopenharmony_ci
345562306a36Sopenharmony_ci	if (vmx_umip_emulated()) {
345662306a36Sopenharmony_ci		if (cr4 & X86_CR4_UMIP) {
345762306a36Sopenharmony_ci			secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
345862306a36Sopenharmony_ci			hw_cr4 &= ~X86_CR4_UMIP;
345962306a36Sopenharmony_ci		} else if (!is_guest_mode(vcpu) ||
346062306a36Sopenharmony_ci			!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
346162306a36Sopenharmony_ci			secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
346262306a36Sopenharmony_ci		}
346362306a36Sopenharmony_ci	}
346462306a36Sopenharmony_ci
346562306a36Sopenharmony_ci	vcpu->arch.cr4 = cr4;
346662306a36Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
346762306a36Sopenharmony_ci
346862306a36Sopenharmony_ci	if (!enable_unrestricted_guest) {
346962306a36Sopenharmony_ci		if (enable_ept) {
347062306a36Sopenharmony_ci			if (!is_paging(vcpu)) {
347162306a36Sopenharmony_ci				hw_cr4 &= ~X86_CR4_PAE;
347262306a36Sopenharmony_ci				hw_cr4 |= X86_CR4_PSE;
347362306a36Sopenharmony_ci			} else if (!(cr4 & X86_CR4_PAE)) {
347462306a36Sopenharmony_ci				hw_cr4 &= ~X86_CR4_PAE;
347562306a36Sopenharmony_ci			}
347662306a36Sopenharmony_ci		}
347762306a36Sopenharmony_ci
347862306a36Sopenharmony_ci		/*
347962306a36Sopenharmony_ci		 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
348062306a36Sopenharmony_ci		 * hardware.  To emulate this behavior, SMEP/SMAP/PKU needs
348162306a36Sopenharmony_ci		 * to be manually disabled when guest switches to non-paging
348262306a36Sopenharmony_ci		 * mode.
348362306a36Sopenharmony_ci		 *
348462306a36Sopenharmony_ci		 * If !enable_unrestricted_guest, the CPU is always running
348562306a36Sopenharmony_ci		 * with CR0.PG=1 and CR4 needs to be modified.
348662306a36Sopenharmony_ci		 * If enable_unrestricted_guest, the CPU automatically
348762306a36Sopenharmony_ci		 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
348862306a36Sopenharmony_ci		 */
348962306a36Sopenharmony_ci		if (!is_paging(vcpu))
349062306a36Sopenharmony_ci			hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
349162306a36Sopenharmony_ci	}
349262306a36Sopenharmony_ci
349362306a36Sopenharmony_ci	vmcs_writel(CR4_READ_SHADOW, cr4);
349462306a36Sopenharmony_ci	vmcs_writel(GUEST_CR4, hw_cr4);
349562306a36Sopenharmony_ci
349662306a36Sopenharmony_ci	if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
349762306a36Sopenharmony_ci		kvm_update_cpuid_runtime(vcpu);
349862306a36Sopenharmony_ci}
349962306a36Sopenharmony_ci
350062306a36Sopenharmony_civoid vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
350162306a36Sopenharmony_ci{
350262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
350362306a36Sopenharmony_ci	u32 ar;
350462306a36Sopenharmony_ci
350562306a36Sopenharmony_ci	if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
350662306a36Sopenharmony_ci		*var = vmx->rmode.segs[seg];
350762306a36Sopenharmony_ci		if (seg == VCPU_SREG_TR
350862306a36Sopenharmony_ci		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
350962306a36Sopenharmony_ci			return;
351062306a36Sopenharmony_ci		var->base = vmx_read_guest_seg_base(vmx, seg);
351162306a36Sopenharmony_ci		var->selector = vmx_read_guest_seg_selector(vmx, seg);
351262306a36Sopenharmony_ci		return;
351362306a36Sopenharmony_ci	}
351462306a36Sopenharmony_ci	var->base = vmx_read_guest_seg_base(vmx, seg);
351562306a36Sopenharmony_ci	var->limit = vmx_read_guest_seg_limit(vmx, seg);
351662306a36Sopenharmony_ci	var->selector = vmx_read_guest_seg_selector(vmx, seg);
351762306a36Sopenharmony_ci	ar = vmx_read_guest_seg_ar(vmx, seg);
351862306a36Sopenharmony_ci	var->unusable = (ar >> 16) & 1;
351962306a36Sopenharmony_ci	var->type = ar & 15;
352062306a36Sopenharmony_ci	var->s = (ar >> 4) & 1;
352162306a36Sopenharmony_ci	var->dpl = (ar >> 5) & 3;
352262306a36Sopenharmony_ci	/*
352362306a36Sopenharmony_ci	 * Some userspaces do not preserve unusable property. Since usable
352462306a36Sopenharmony_ci	 * segment has to be present according to VMX spec we can use present
352562306a36Sopenharmony_ci	 * property to amend userspace bug by making unusable segment always
352662306a36Sopenharmony_ci	 * nonpresent. vmx_segment_access_rights() already marks nonpresent
352762306a36Sopenharmony_ci	 * segment as unusable.
352862306a36Sopenharmony_ci	 */
352962306a36Sopenharmony_ci	var->present = !var->unusable;
353062306a36Sopenharmony_ci	var->avl = (ar >> 12) & 1;
353162306a36Sopenharmony_ci	var->l = (ar >> 13) & 1;
353262306a36Sopenharmony_ci	var->db = (ar >> 14) & 1;
353362306a36Sopenharmony_ci	var->g = (ar >> 15) & 1;
353462306a36Sopenharmony_ci}
353562306a36Sopenharmony_ci
353662306a36Sopenharmony_cistatic u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
353762306a36Sopenharmony_ci{
353862306a36Sopenharmony_ci	struct kvm_segment s;
353962306a36Sopenharmony_ci
354062306a36Sopenharmony_ci	if (to_vmx(vcpu)->rmode.vm86_active) {
354162306a36Sopenharmony_ci		vmx_get_segment(vcpu, &s, seg);
354262306a36Sopenharmony_ci		return s.base;
354362306a36Sopenharmony_ci	}
354462306a36Sopenharmony_ci	return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
354562306a36Sopenharmony_ci}
354662306a36Sopenharmony_ci
354762306a36Sopenharmony_ciint vmx_get_cpl(struct kvm_vcpu *vcpu)
354862306a36Sopenharmony_ci{
354962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
355062306a36Sopenharmony_ci
355162306a36Sopenharmony_ci	if (unlikely(vmx->rmode.vm86_active))
355262306a36Sopenharmony_ci		return 0;
355362306a36Sopenharmony_ci	else {
355462306a36Sopenharmony_ci		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
355562306a36Sopenharmony_ci		return VMX_AR_DPL(ar);
355662306a36Sopenharmony_ci	}
355762306a36Sopenharmony_ci}
355862306a36Sopenharmony_ci
355962306a36Sopenharmony_cistatic u32 vmx_segment_access_rights(struct kvm_segment *var)
356062306a36Sopenharmony_ci{
356162306a36Sopenharmony_ci	u32 ar;
356262306a36Sopenharmony_ci
356362306a36Sopenharmony_ci	ar = var->type & 15;
356462306a36Sopenharmony_ci	ar |= (var->s & 1) << 4;
356562306a36Sopenharmony_ci	ar |= (var->dpl & 3) << 5;
356662306a36Sopenharmony_ci	ar |= (var->present & 1) << 7;
356762306a36Sopenharmony_ci	ar |= (var->avl & 1) << 12;
356862306a36Sopenharmony_ci	ar |= (var->l & 1) << 13;
356962306a36Sopenharmony_ci	ar |= (var->db & 1) << 14;
357062306a36Sopenharmony_ci	ar |= (var->g & 1) << 15;
357162306a36Sopenharmony_ci	ar |= (var->unusable || !var->present) << 16;
357262306a36Sopenharmony_ci
357362306a36Sopenharmony_ci	return ar;
357462306a36Sopenharmony_ci}
357562306a36Sopenharmony_ci
357662306a36Sopenharmony_civoid __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
357762306a36Sopenharmony_ci{
357862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
357962306a36Sopenharmony_ci	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
358062306a36Sopenharmony_ci
358162306a36Sopenharmony_ci	vmx_segment_cache_clear(vmx);
358262306a36Sopenharmony_ci
358362306a36Sopenharmony_ci	if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
358462306a36Sopenharmony_ci		vmx->rmode.segs[seg] = *var;
358562306a36Sopenharmony_ci		if (seg == VCPU_SREG_TR)
358662306a36Sopenharmony_ci			vmcs_write16(sf->selector, var->selector);
358762306a36Sopenharmony_ci		else if (var->s)
358862306a36Sopenharmony_ci			fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
358962306a36Sopenharmony_ci		return;
359062306a36Sopenharmony_ci	}
359162306a36Sopenharmony_ci
359262306a36Sopenharmony_ci	vmcs_writel(sf->base, var->base);
359362306a36Sopenharmony_ci	vmcs_write32(sf->limit, var->limit);
359462306a36Sopenharmony_ci	vmcs_write16(sf->selector, var->selector);
359562306a36Sopenharmony_ci
359662306a36Sopenharmony_ci	/*
359762306a36Sopenharmony_ci	 *   Fix the "Accessed" bit in AR field of segment registers for older
359862306a36Sopenharmony_ci	 * qemu binaries.
359962306a36Sopenharmony_ci	 *   IA32 arch specifies that at the time of processor reset the
360062306a36Sopenharmony_ci	 * "Accessed" bit in the AR field of segment registers is 1. And qemu
360162306a36Sopenharmony_ci	 * is setting it to 0 in the userland code. This causes invalid guest
360262306a36Sopenharmony_ci	 * state vmexit when "unrestricted guest" mode is turned on.
360362306a36Sopenharmony_ci	 *    Fix for this setup issue in cpu_reset is being pushed in the qemu
360462306a36Sopenharmony_ci	 * tree. Newer qemu binaries with that qemu fix would not need this
360562306a36Sopenharmony_ci	 * kvm hack.
360662306a36Sopenharmony_ci	 */
360762306a36Sopenharmony_ci	if (is_unrestricted_guest(vcpu) && (seg != VCPU_SREG_LDTR))
360862306a36Sopenharmony_ci		var->type |= 0x1; /* Accessed */
360962306a36Sopenharmony_ci
361062306a36Sopenharmony_ci	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
361162306a36Sopenharmony_ci}
361262306a36Sopenharmony_ci
361362306a36Sopenharmony_cistatic void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
361462306a36Sopenharmony_ci{
361562306a36Sopenharmony_ci	__vmx_set_segment(vcpu, var, seg);
361662306a36Sopenharmony_ci
361762306a36Sopenharmony_ci	to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
361862306a36Sopenharmony_ci}
361962306a36Sopenharmony_ci
362062306a36Sopenharmony_cistatic void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
362162306a36Sopenharmony_ci{
362262306a36Sopenharmony_ci	u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
362362306a36Sopenharmony_ci
362462306a36Sopenharmony_ci	*db = (ar >> 14) & 1;
362562306a36Sopenharmony_ci	*l = (ar >> 13) & 1;
362662306a36Sopenharmony_ci}
362762306a36Sopenharmony_ci
362862306a36Sopenharmony_cistatic void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
362962306a36Sopenharmony_ci{
363062306a36Sopenharmony_ci	dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
363162306a36Sopenharmony_ci	dt->address = vmcs_readl(GUEST_IDTR_BASE);
363262306a36Sopenharmony_ci}
363362306a36Sopenharmony_ci
363462306a36Sopenharmony_cistatic void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
363562306a36Sopenharmony_ci{
363662306a36Sopenharmony_ci	vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
363762306a36Sopenharmony_ci	vmcs_writel(GUEST_IDTR_BASE, dt->address);
363862306a36Sopenharmony_ci}
363962306a36Sopenharmony_ci
364062306a36Sopenharmony_cistatic void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
364162306a36Sopenharmony_ci{
364262306a36Sopenharmony_ci	dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
364362306a36Sopenharmony_ci	dt->address = vmcs_readl(GUEST_GDTR_BASE);
364462306a36Sopenharmony_ci}
364562306a36Sopenharmony_ci
364662306a36Sopenharmony_cistatic void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
364762306a36Sopenharmony_ci{
364862306a36Sopenharmony_ci	vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
364962306a36Sopenharmony_ci	vmcs_writel(GUEST_GDTR_BASE, dt->address);
365062306a36Sopenharmony_ci}
365162306a36Sopenharmony_ci
365262306a36Sopenharmony_cistatic bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
365362306a36Sopenharmony_ci{
365462306a36Sopenharmony_ci	struct kvm_segment var;
365562306a36Sopenharmony_ci	u32 ar;
365662306a36Sopenharmony_ci
365762306a36Sopenharmony_ci	vmx_get_segment(vcpu, &var, seg);
365862306a36Sopenharmony_ci	var.dpl = 0x3;
365962306a36Sopenharmony_ci	if (seg == VCPU_SREG_CS)
366062306a36Sopenharmony_ci		var.type = 0x3;
366162306a36Sopenharmony_ci	ar = vmx_segment_access_rights(&var);
366262306a36Sopenharmony_ci
366362306a36Sopenharmony_ci	if (var.base != (var.selector << 4))
366462306a36Sopenharmony_ci		return false;
366562306a36Sopenharmony_ci	if (var.limit != 0xffff)
366662306a36Sopenharmony_ci		return false;
366762306a36Sopenharmony_ci	if (ar != 0xf3)
366862306a36Sopenharmony_ci		return false;
366962306a36Sopenharmony_ci
367062306a36Sopenharmony_ci	return true;
367162306a36Sopenharmony_ci}
367262306a36Sopenharmony_ci
367362306a36Sopenharmony_cistatic bool code_segment_valid(struct kvm_vcpu *vcpu)
367462306a36Sopenharmony_ci{
367562306a36Sopenharmony_ci	struct kvm_segment cs;
367662306a36Sopenharmony_ci	unsigned int cs_rpl;
367762306a36Sopenharmony_ci
367862306a36Sopenharmony_ci	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
367962306a36Sopenharmony_ci	cs_rpl = cs.selector & SEGMENT_RPL_MASK;
368062306a36Sopenharmony_ci
368162306a36Sopenharmony_ci	if (cs.unusable)
368262306a36Sopenharmony_ci		return false;
368362306a36Sopenharmony_ci	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
368462306a36Sopenharmony_ci		return false;
368562306a36Sopenharmony_ci	if (!cs.s)
368662306a36Sopenharmony_ci		return false;
368762306a36Sopenharmony_ci	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
368862306a36Sopenharmony_ci		if (cs.dpl > cs_rpl)
368962306a36Sopenharmony_ci			return false;
369062306a36Sopenharmony_ci	} else {
369162306a36Sopenharmony_ci		if (cs.dpl != cs_rpl)
369262306a36Sopenharmony_ci			return false;
369362306a36Sopenharmony_ci	}
369462306a36Sopenharmony_ci	if (!cs.present)
369562306a36Sopenharmony_ci		return false;
369662306a36Sopenharmony_ci
369762306a36Sopenharmony_ci	/* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
369862306a36Sopenharmony_ci	return true;
369962306a36Sopenharmony_ci}
370062306a36Sopenharmony_ci
370162306a36Sopenharmony_cistatic bool stack_segment_valid(struct kvm_vcpu *vcpu)
370262306a36Sopenharmony_ci{
370362306a36Sopenharmony_ci	struct kvm_segment ss;
370462306a36Sopenharmony_ci	unsigned int ss_rpl;
370562306a36Sopenharmony_ci
370662306a36Sopenharmony_ci	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
370762306a36Sopenharmony_ci	ss_rpl = ss.selector & SEGMENT_RPL_MASK;
370862306a36Sopenharmony_ci
370962306a36Sopenharmony_ci	if (ss.unusable)
371062306a36Sopenharmony_ci		return true;
371162306a36Sopenharmony_ci	if (ss.type != 3 && ss.type != 7)
371262306a36Sopenharmony_ci		return false;
371362306a36Sopenharmony_ci	if (!ss.s)
371462306a36Sopenharmony_ci		return false;
371562306a36Sopenharmony_ci	if (ss.dpl != ss_rpl) /* DPL != RPL */
371662306a36Sopenharmony_ci		return false;
371762306a36Sopenharmony_ci	if (!ss.present)
371862306a36Sopenharmony_ci		return false;
371962306a36Sopenharmony_ci
372062306a36Sopenharmony_ci	return true;
372162306a36Sopenharmony_ci}
372262306a36Sopenharmony_ci
372362306a36Sopenharmony_cistatic bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
372462306a36Sopenharmony_ci{
372562306a36Sopenharmony_ci	struct kvm_segment var;
372662306a36Sopenharmony_ci	unsigned int rpl;
372762306a36Sopenharmony_ci
372862306a36Sopenharmony_ci	vmx_get_segment(vcpu, &var, seg);
372962306a36Sopenharmony_ci	rpl = var.selector & SEGMENT_RPL_MASK;
373062306a36Sopenharmony_ci
373162306a36Sopenharmony_ci	if (var.unusable)
373262306a36Sopenharmony_ci		return true;
373362306a36Sopenharmony_ci	if (!var.s)
373462306a36Sopenharmony_ci		return false;
373562306a36Sopenharmony_ci	if (!var.present)
373662306a36Sopenharmony_ci		return false;
373762306a36Sopenharmony_ci	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
373862306a36Sopenharmony_ci		if (var.dpl < rpl) /* DPL < RPL */
373962306a36Sopenharmony_ci			return false;
374062306a36Sopenharmony_ci	}
374162306a36Sopenharmony_ci
374262306a36Sopenharmony_ci	/* TODO: Add other members to kvm_segment_field to allow checking for other access
374362306a36Sopenharmony_ci	 * rights flags
374462306a36Sopenharmony_ci	 */
374562306a36Sopenharmony_ci	return true;
374662306a36Sopenharmony_ci}
374762306a36Sopenharmony_ci
374862306a36Sopenharmony_cistatic bool tr_valid(struct kvm_vcpu *vcpu)
374962306a36Sopenharmony_ci{
375062306a36Sopenharmony_ci	struct kvm_segment tr;
375162306a36Sopenharmony_ci
375262306a36Sopenharmony_ci	vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
375362306a36Sopenharmony_ci
375462306a36Sopenharmony_ci	if (tr.unusable)
375562306a36Sopenharmony_ci		return false;
375662306a36Sopenharmony_ci	if (tr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
375762306a36Sopenharmony_ci		return false;
375862306a36Sopenharmony_ci	if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
375962306a36Sopenharmony_ci		return false;
376062306a36Sopenharmony_ci	if (!tr.present)
376162306a36Sopenharmony_ci		return false;
376262306a36Sopenharmony_ci
376362306a36Sopenharmony_ci	return true;
376462306a36Sopenharmony_ci}
376562306a36Sopenharmony_ci
376662306a36Sopenharmony_cistatic bool ldtr_valid(struct kvm_vcpu *vcpu)
376762306a36Sopenharmony_ci{
376862306a36Sopenharmony_ci	struct kvm_segment ldtr;
376962306a36Sopenharmony_ci
377062306a36Sopenharmony_ci	vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
377162306a36Sopenharmony_ci
377262306a36Sopenharmony_ci	if (ldtr.unusable)
377362306a36Sopenharmony_ci		return true;
377462306a36Sopenharmony_ci	if (ldtr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
377562306a36Sopenharmony_ci		return false;
377662306a36Sopenharmony_ci	if (ldtr.type != 2)
377762306a36Sopenharmony_ci		return false;
377862306a36Sopenharmony_ci	if (!ldtr.present)
377962306a36Sopenharmony_ci		return false;
378062306a36Sopenharmony_ci
378162306a36Sopenharmony_ci	return true;
378262306a36Sopenharmony_ci}
378362306a36Sopenharmony_ci
378462306a36Sopenharmony_cistatic bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
378562306a36Sopenharmony_ci{
378662306a36Sopenharmony_ci	struct kvm_segment cs, ss;
378762306a36Sopenharmony_ci
378862306a36Sopenharmony_ci	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
378962306a36Sopenharmony_ci	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
379062306a36Sopenharmony_ci
379162306a36Sopenharmony_ci	return ((cs.selector & SEGMENT_RPL_MASK) ==
379262306a36Sopenharmony_ci		 (ss.selector & SEGMENT_RPL_MASK));
379362306a36Sopenharmony_ci}
379462306a36Sopenharmony_ci
379562306a36Sopenharmony_ci/*
379662306a36Sopenharmony_ci * Check if guest state is valid. Returns true if valid, false if
379762306a36Sopenharmony_ci * not.
379862306a36Sopenharmony_ci * We assume that registers are always usable
379962306a36Sopenharmony_ci */
380062306a36Sopenharmony_cibool __vmx_guest_state_valid(struct kvm_vcpu *vcpu)
380162306a36Sopenharmony_ci{
380262306a36Sopenharmony_ci	/* real mode guest state checks */
380362306a36Sopenharmony_ci	if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
380462306a36Sopenharmony_ci		if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
380562306a36Sopenharmony_ci			return false;
380662306a36Sopenharmony_ci		if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
380762306a36Sopenharmony_ci			return false;
380862306a36Sopenharmony_ci		if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
380962306a36Sopenharmony_ci			return false;
381062306a36Sopenharmony_ci		if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
381162306a36Sopenharmony_ci			return false;
381262306a36Sopenharmony_ci		if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
381362306a36Sopenharmony_ci			return false;
381462306a36Sopenharmony_ci		if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
381562306a36Sopenharmony_ci			return false;
381662306a36Sopenharmony_ci	} else {
381762306a36Sopenharmony_ci	/* protected mode guest state checks */
381862306a36Sopenharmony_ci		if (!cs_ss_rpl_check(vcpu))
381962306a36Sopenharmony_ci			return false;
382062306a36Sopenharmony_ci		if (!code_segment_valid(vcpu))
382162306a36Sopenharmony_ci			return false;
382262306a36Sopenharmony_ci		if (!stack_segment_valid(vcpu))
382362306a36Sopenharmony_ci			return false;
382462306a36Sopenharmony_ci		if (!data_segment_valid(vcpu, VCPU_SREG_DS))
382562306a36Sopenharmony_ci			return false;
382662306a36Sopenharmony_ci		if (!data_segment_valid(vcpu, VCPU_SREG_ES))
382762306a36Sopenharmony_ci			return false;
382862306a36Sopenharmony_ci		if (!data_segment_valid(vcpu, VCPU_SREG_FS))
382962306a36Sopenharmony_ci			return false;
383062306a36Sopenharmony_ci		if (!data_segment_valid(vcpu, VCPU_SREG_GS))
383162306a36Sopenharmony_ci			return false;
383262306a36Sopenharmony_ci		if (!tr_valid(vcpu))
383362306a36Sopenharmony_ci			return false;
383462306a36Sopenharmony_ci		if (!ldtr_valid(vcpu))
383562306a36Sopenharmony_ci			return false;
383662306a36Sopenharmony_ci	}
383762306a36Sopenharmony_ci	/* TODO:
383862306a36Sopenharmony_ci	 * - Add checks on RIP
383962306a36Sopenharmony_ci	 * - Add checks on RFLAGS
384062306a36Sopenharmony_ci	 */
384162306a36Sopenharmony_ci
384262306a36Sopenharmony_ci	return true;
384362306a36Sopenharmony_ci}
384462306a36Sopenharmony_ci
384562306a36Sopenharmony_cistatic int init_rmode_tss(struct kvm *kvm, void __user *ua)
384662306a36Sopenharmony_ci{
384762306a36Sopenharmony_ci	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
384862306a36Sopenharmony_ci	u16 data;
384962306a36Sopenharmony_ci	int i;
385062306a36Sopenharmony_ci
385162306a36Sopenharmony_ci	for (i = 0; i < 3; i++) {
385262306a36Sopenharmony_ci		if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE))
385362306a36Sopenharmony_ci			return -EFAULT;
385462306a36Sopenharmony_ci	}
385562306a36Sopenharmony_ci
385662306a36Sopenharmony_ci	data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
385762306a36Sopenharmony_ci	if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16)))
385862306a36Sopenharmony_ci		return -EFAULT;
385962306a36Sopenharmony_ci
386062306a36Sopenharmony_ci	data = ~0;
386162306a36Sopenharmony_ci	if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8)))
386262306a36Sopenharmony_ci		return -EFAULT;
386362306a36Sopenharmony_ci
386462306a36Sopenharmony_ci	return 0;
386562306a36Sopenharmony_ci}
386662306a36Sopenharmony_ci
386762306a36Sopenharmony_cistatic int init_rmode_identity_map(struct kvm *kvm)
386862306a36Sopenharmony_ci{
386962306a36Sopenharmony_ci	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
387062306a36Sopenharmony_ci	int i, r = 0;
387162306a36Sopenharmony_ci	void __user *uaddr;
387262306a36Sopenharmony_ci	u32 tmp;
387362306a36Sopenharmony_ci
387462306a36Sopenharmony_ci	/* Protect kvm_vmx->ept_identity_pagetable_done. */
387562306a36Sopenharmony_ci	mutex_lock(&kvm->slots_lock);
387662306a36Sopenharmony_ci
387762306a36Sopenharmony_ci	if (likely(kvm_vmx->ept_identity_pagetable_done))
387862306a36Sopenharmony_ci		goto out;
387962306a36Sopenharmony_ci
388062306a36Sopenharmony_ci	if (!kvm_vmx->ept_identity_map_addr)
388162306a36Sopenharmony_ci		kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
388262306a36Sopenharmony_ci
388362306a36Sopenharmony_ci	uaddr = __x86_set_memory_region(kvm,
388462306a36Sopenharmony_ci					IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
388562306a36Sopenharmony_ci					kvm_vmx->ept_identity_map_addr,
388662306a36Sopenharmony_ci					PAGE_SIZE);
388762306a36Sopenharmony_ci	if (IS_ERR(uaddr)) {
388862306a36Sopenharmony_ci		r = PTR_ERR(uaddr);
388962306a36Sopenharmony_ci		goto out;
389062306a36Sopenharmony_ci	}
389162306a36Sopenharmony_ci
389262306a36Sopenharmony_ci	/* Set up identity-mapping pagetable for EPT in real mode */
389362306a36Sopenharmony_ci	for (i = 0; i < (PAGE_SIZE / sizeof(tmp)); i++) {
389462306a36Sopenharmony_ci		tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
389562306a36Sopenharmony_ci			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
389662306a36Sopenharmony_ci		if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) {
389762306a36Sopenharmony_ci			r = -EFAULT;
389862306a36Sopenharmony_ci			goto out;
389962306a36Sopenharmony_ci		}
390062306a36Sopenharmony_ci	}
390162306a36Sopenharmony_ci	kvm_vmx->ept_identity_pagetable_done = true;
390262306a36Sopenharmony_ci
390362306a36Sopenharmony_ciout:
390462306a36Sopenharmony_ci	mutex_unlock(&kvm->slots_lock);
390562306a36Sopenharmony_ci	return r;
390662306a36Sopenharmony_ci}
390762306a36Sopenharmony_ci
390862306a36Sopenharmony_cistatic void seg_setup(int seg)
390962306a36Sopenharmony_ci{
391062306a36Sopenharmony_ci	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
391162306a36Sopenharmony_ci	unsigned int ar;
391262306a36Sopenharmony_ci
391362306a36Sopenharmony_ci	vmcs_write16(sf->selector, 0);
391462306a36Sopenharmony_ci	vmcs_writel(sf->base, 0);
391562306a36Sopenharmony_ci	vmcs_write32(sf->limit, 0xffff);
391662306a36Sopenharmony_ci	ar = 0x93;
391762306a36Sopenharmony_ci	if (seg == VCPU_SREG_CS)
391862306a36Sopenharmony_ci		ar |= 0x08; /* code segment */
391962306a36Sopenharmony_ci
392062306a36Sopenharmony_ci	vmcs_write32(sf->ar_bytes, ar);
392162306a36Sopenharmony_ci}
392262306a36Sopenharmony_ci
392362306a36Sopenharmony_ciint allocate_vpid(void)
392462306a36Sopenharmony_ci{
392562306a36Sopenharmony_ci	int vpid;
392662306a36Sopenharmony_ci
392762306a36Sopenharmony_ci	if (!enable_vpid)
392862306a36Sopenharmony_ci		return 0;
392962306a36Sopenharmony_ci	spin_lock(&vmx_vpid_lock);
393062306a36Sopenharmony_ci	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
393162306a36Sopenharmony_ci	if (vpid < VMX_NR_VPIDS)
393262306a36Sopenharmony_ci		__set_bit(vpid, vmx_vpid_bitmap);
393362306a36Sopenharmony_ci	else
393462306a36Sopenharmony_ci		vpid = 0;
393562306a36Sopenharmony_ci	spin_unlock(&vmx_vpid_lock);
393662306a36Sopenharmony_ci	return vpid;
393762306a36Sopenharmony_ci}
393862306a36Sopenharmony_ci
393962306a36Sopenharmony_civoid free_vpid(int vpid)
394062306a36Sopenharmony_ci{
394162306a36Sopenharmony_ci	if (!enable_vpid || vpid == 0)
394262306a36Sopenharmony_ci		return;
394362306a36Sopenharmony_ci	spin_lock(&vmx_vpid_lock);
394462306a36Sopenharmony_ci	__clear_bit(vpid, vmx_vpid_bitmap);
394562306a36Sopenharmony_ci	spin_unlock(&vmx_vpid_lock);
394662306a36Sopenharmony_ci}
394762306a36Sopenharmony_ci
394862306a36Sopenharmony_cistatic void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
394962306a36Sopenharmony_ci{
395062306a36Sopenharmony_ci	/*
395162306a36Sopenharmony_ci	 * When KVM is a nested hypervisor on top of Hyper-V and uses
395262306a36Sopenharmony_ci	 * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
395362306a36Sopenharmony_ci	 * bitmap has changed.
395462306a36Sopenharmony_ci	 */
395562306a36Sopenharmony_ci	if (kvm_is_using_evmcs()) {
395662306a36Sopenharmony_ci		struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
395762306a36Sopenharmony_ci
395862306a36Sopenharmony_ci		if (evmcs->hv_enlightenments_control.msr_bitmap)
395962306a36Sopenharmony_ci			evmcs->hv_clean_fields &=
396062306a36Sopenharmony_ci				~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
396162306a36Sopenharmony_ci	}
396262306a36Sopenharmony_ci
396362306a36Sopenharmony_ci	vmx->nested.force_msr_bitmap_recalc = true;
396462306a36Sopenharmony_ci}
396562306a36Sopenharmony_ci
396662306a36Sopenharmony_civoid vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
396762306a36Sopenharmony_ci{
396862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
396962306a36Sopenharmony_ci	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
397062306a36Sopenharmony_ci
397162306a36Sopenharmony_ci	if (!cpu_has_vmx_msr_bitmap())
397262306a36Sopenharmony_ci		return;
397362306a36Sopenharmony_ci
397462306a36Sopenharmony_ci	vmx_msr_bitmap_l01_changed(vmx);
397562306a36Sopenharmony_ci
397662306a36Sopenharmony_ci	/*
397762306a36Sopenharmony_ci	 * Mark the desired intercept state in shadow bitmap, this is needed
397862306a36Sopenharmony_ci	 * for resync when the MSR filters change.
397962306a36Sopenharmony_ci	*/
398062306a36Sopenharmony_ci	if (is_valid_passthrough_msr(msr)) {
398162306a36Sopenharmony_ci		int idx = possible_passthrough_msr_slot(msr);
398262306a36Sopenharmony_ci
398362306a36Sopenharmony_ci		if (idx != -ENOENT) {
398462306a36Sopenharmony_ci			if (type & MSR_TYPE_R)
398562306a36Sopenharmony_ci				clear_bit(idx, vmx->shadow_msr_intercept.read);
398662306a36Sopenharmony_ci			if (type & MSR_TYPE_W)
398762306a36Sopenharmony_ci				clear_bit(idx, vmx->shadow_msr_intercept.write);
398862306a36Sopenharmony_ci		}
398962306a36Sopenharmony_ci	}
399062306a36Sopenharmony_ci
399162306a36Sopenharmony_ci	if ((type & MSR_TYPE_R) &&
399262306a36Sopenharmony_ci	    !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
399362306a36Sopenharmony_ci		vmx_set_msr_bitmap_read(msr_bitmap, msr);
399462306a36Sopenharmony_ci		type &= ~MSR_TYPE_R;
399562306a36Sopenharmony_ci	}
399662306a36Sopenharmony_ci
399762306a36Sopenharmony_ci	if ((type & MSR_TYPE_W) &&
399862306a36Sopenharmony_ci	    !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) {
399962306a36Sopenharmony_ci		vmx_set_msr_bitmap_write(msr_bitmap, msr);
400062306a36Sopenharmony_ci		type &= ~MSR_TYPE_W;
400162306a36Sopenharmony_ci	}
400262306a36Sopenharmony_ci
400362306a36Sopenharmony_ci	if (type & MSR_TYPE_R)
400462306a36Sopenharmony_ci		vmx_clear_msr_bitmap_read(msr_bitmap, msr);
400562306a36Sopenharmony_ci
400662306a36Sopenharmony_ci	if (type & MSR_TYPE_W)
400762306a36Sopenharmony_ci		vmx_clear_msr_bitmap_write(msr_bitmap, msr);
400862306a36Sopenharmony_ci}
400962306a36Sopenharmony_ci
401062306a36Sopenharmony_civoid vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
401162306a36Sopenharmony_ci{
401262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
401362306a36Sopenharmony_ci	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
401462306a36Sopenharmony_ci
401562306a36Sopenharmony_ci	if (!cpu_has_vmx_msr_bitmap())
401662306a36Sopenharmony_ci		return;
401762306a36Sopenharmony_ci
401862306a36Sopenharmony_ci	vmx_msr_bitmap_l01_changed(vmx);
401962306a36Sopenharmony_ci
402062306a36Sopenharmony_ci	/*
402162306a36Sopenharmony_ci	 * Mark the desired intercept state in shadow bitmap, this is needed
402262306a36Sopenharmony_ci	 * for resync when the MSR filter changes.
402362306a36Sopenharmony_ci	*/
402462306a36Sopenharmony_ci	if (is_valid_passthrough_msr(msr)) {
402562306a36Sopenharmony_ci		int idx = possible_passthrough_msr_slot(msr);
402662306a36Sopenharmony_ci
402762306a36Sopenharmony_ci		if (idx != -ENOENT) {
402862306a36Sopenharmony_ci			if (type & MSR_TYPE_R)
402962306a36Sopenharmony_ci				set_bit(idx, vmx->shadow_msr_intercept.read);
403062306a36Sopenharmony_ci			if (type & MSR_TYPE_W)
403162306a36Sopenharmony_ci				set_bit(idx, vmx->shadow_msr_intercept.write);
403262306a36Sopenharmony_ci		}
403362306a36Sopenharmony_ci	}
403462306a36Sopenharmony_ci
403562306a36Sopenharmony_ci	if (type & MSR_TYPE_R)
403662306a36Sopenharmony_ci		vmx_set_msr_bitmap_read(msr_bitmap, msr);
403762306a36Sopenharmony_ci
403862306a36Sopenharmony_ci	if (type & MSR_TYPE_W)
403962306a36Sopenharmony_ci		vmx_set_msr_bitmap_write(msr_bitmap, msr);
404062306a36Sopenharmony_ci}
404162306a36Sopenharmony_ci
404262306a36Sopenharmony_cistatic void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
404362306a36Sopenharmony_ci{
404462306a36Sopenharmony_ci	/*
404562306a36Sopenharmony_ci	 * x2APIC indices for 64-bit accesses into the RDMSR and WRMSR halves
404662306a36Sopenharmony_ci	 * of the MSR bitmap.  KVM emulates APIC registers up through 0x3f0,
404762306a36Sopenharmony_ci	 * i.e. MSR 0x83f, and so only needs to dynamically manipulate 64 bits.
404862306a36Sopenharmony_ci	 */
404962306a36Sopenharmony_ci	const int read_idx = APIC_BASE_MSR / BITS_PER_LONG_LONG;
405062306a36Sopenharmony_ci	const int write_idx = read_idx + (0x800 / sizeof(u64));
405162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
405262306a36Sopenharmony_ci	u64 *msr_bitmap = (u64 *)vmx->vmcs01.msr_bitmap;
405362306a36Sopenharmony_ci	u8 mode;
405462306a36Sopenharmony_ci
405562306a36Sopenharmony_ci	if (!cpu_has_vmx_msr_bitmap() || WARN_ON_ONCE(!lapic_in_kernel(vcpu)))
405662306a36Sopenharmony_ci		return;
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	if (cpu_has_secondary_exec_ctrls() &&
405962306a36Sopenharmony_ci	    (secondary_exec_controls_get(vmx) &
406062306a36Sopenharmony_ci	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
406162306a36Sopenharmony_ci		mode = MSR_BITMAP_MODE_X2APIC;
406262306a36Sopenharmony_ci		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
406362306a36Sopenharmony_ci			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
406462306a36Sopenharmony_ci	} else {
406562306a36Sopenharmony_ci		mode = 0;
406662306a36Sopenharmony_ci	}
406762306a36Sopenharmony_ci
406862306a36Sopenharmony_ci	if (mode == vmx->x2apic_msr_bitmap_mode)
406962306a36Sopenharmony_ci		return;
407062306a36Sopenharmony_ci
407162306a36Sopenharmony_ci	vmx->x2apic_msr_bitmap_mode = mode;
407262306a36Sopenharmony_ci
407362306a36Sopenharmony_ci	/*
407462306a36Sopenharmony_ci	 * Reset the bitmap for MSRs 0x800 - 0x83f.  Leave AMD's uber-extended
407562306a36Sopenharmony_ci	 * registers (0x840 and above) intercepted, KVM doesn't support them.
407662306a36Sopenharmony_ci	 * Intercept all writes by default and poke holes as needed.  Pass
407762306a36Sopenharmony_ci	 * through reads for all valid registers by default in x2APIC+APICv
407862306a36Sopenharmony_ci	 * mode, only the current timer count needs on-demand emulation by KVM.
407962306a36Sopenharmony_ci	 */
408062306a36Sopenharmony_ci	if (mode & MSR_BITMAP_MODE_X2APIC_APICV)
408162306a36Sopenharmony_ci		msr_bitmap[read_idx] = ~kvm_lapic_readable_reg_mask(vcpu->arch.apic);
408262306a36Sopenharmony_ci	else
408362306a36Sopenharmony_ci		msr_bitmap[read_idx] = ~0ull;
408462306a36Sopenharmony_ci	msr_bitmap[write_idx] = ~0ull;
408562306a36Sopenharmony_ci
408662306a36Sopenharmony_ci	/*
408762306a36Sopenharmony_ci	 * TPR reads and writes can be virtualized even if virtual interrupt
408862306a36Sopenharmony_ci	 * delivery is not in use.
408962306a36Sopenharmony_ci	 */
409062306a36Sopenharmony_ci	vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW,
409162306a36Sopenharmony_ci				  !(mode & MSR_BITMAP_MODE_X2APIC));
409262306a36Sopenharmony_ci
409362306a36Sopenharmony_ci	if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
409462306a36Sopenharmony_ci		vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
409562306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
409662306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
409762306a36Sopenharmony_ci		if (enable_ipiv)
409862306a36Sopenharmony_ci			vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW);
409962306a36Sopenharmony_ci	}
410062306a36Sopenharmony_ci}
410162306a36Sopenharmony_ci
410262306a36Sopenharmony_civoid pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
410362306a36Sopenharmony_ci{
410462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
410562306a36Sopenharmony_ci	bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
410662306a36Sopenharmony_ci	u32 i;
410762306a36Sopenharmony_ci
410862306a36Sopenharmony_ci	vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_STATUS, MSR_TYPE_RW, flag);
410962306a36Sopenharmony_ci	vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_BASE, MSR_TYPE_RW, flag);
411062306a36Sopenharmony_ci	vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_MASK, MSR_TYPE_RW, flag);
411162306a36Sopenharmony_ci	vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_CR3_MATCH, MSR_TYPE_RW, flag);
411262306a36Sopenharmony_ci	for (i = 0; i < vmx->pt_desc.num_address_ranges; i++) {
411362306a36Sopenharmony_ci		vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
411462306a36Sopenharmony_ci		vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
411562306a36Sopenharmony_ci	}
411662306a36Sopenharmony_ci}
411762306a36Sopenharmony_ci
411862306a36Sopenharmony_cistatic bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
411962306a36Sopenharmony_ci{
412062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
412162306a36Sopenharmony_ci	void *vapic_page;
412262306a36Sopenharmony_ci	u32 vppr;
412362306a36Sopenharmony_ci	int rvi;
412462306a36Sopenharmony_ci
412562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
412662306a36Sopenharmony_ci		!nested_cpu_has_vid(get_vmcs12(vcpu)) ||
412762306a36Sopenharmony_ci		WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
412862306a36Sopenharmony_ci		return false;
412962306a36Sopenharmony_ci
413062306a36Sopenharmony_ci	rvi = vmx_get_rvi();
413162306a36Sopenharmony_ci
413262306a36Sopenharmony_ci	vapic_page = vmx->nested.virtual_apic_map.hva;
413362306a36Sopenharmony_ci	vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
413462306a36Sopenharmony_ci
413562306a36Sopenharmony_ci	return ((rvi & 0xf0) > (vppr & 0xf0));
413662306a36Sopenharmony_ci}
413762306a36Sopenharmony_ci
413862306a36Sopenharmony_cistatic void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
413962306a36Sopenharmony_ci{
414062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
414162306a36Sopenharmony_ci	u32 i;
414262306a36Sopenharmony_ci
414362306a36Sopenharmony_ci	/*
414462306a36Sopenharmony_ci	 * Redo intercept permissions for MSRs that KVM is passing through to
414562306a36Sopenharmony_ci	 * the guest.  Disabling interception will check the new MSR filter and
414662306a36Sopenharmony_ci	 * ensure that KVM enables interception if usersepace wants to filter
414762306a36Sopenharmony_ci	 * the MSR.  MSRs that KVM is already intercepting don't need to be
414862306a36Sopenharmony_ci	 * refreshed since KVM is going to intercept them regardless of what
414962306a36Sopenharmony_ci	 * userspace wants.
415062306a36Sopenharmony_ci	 */
415162306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
415262306a36Sopenharmony_ci		u32 msr = vmx_possible_passthrough_msrs[i];
415362306a36Sopenharmony_ci
415462306a36Sopenharmony_ci		if (!test_bit(i, vmx->shadow_msr_intercept.read))
415562306a36Sopenharmony_ci			vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
415662306a36Sopenharmony_ci
415762306a36Sopenharmony_ci		if (!test_bit(i, vmx->shadow_msr_intercept.write))
415862306a36Sopenharmony_ci			vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
415962306a36Sopenharmony_ci	}
416062306a36Sopenharmony_ci
416162306a36Sopenharmony_ci	/* PT MSRs can be passed through iff PT is exposed to the guest. */
416262306a36Sopenharmony_ci	if (vmx_pt_mode_is_host_guest())
416362306a36Sopenharmony_ci		pt_update_intercept_for_msr(vcpu);
416462306a36Sopenharmony_ci}
416562306a36Sopenharmony_ci
416662306a36Sopenharmony_cistatic inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
416762306a36Sopenharmony_ci						     int pi_vec)
416862306a36Sopenharmony_ci{
416962306a36Sopenharmony_ci#ifdef CONFIG_SMP
417062306a36Sopenharmony_ci	if (vcpu->mode == IN_GUEST_MODE) {
417162306a36Sopenharmony_ci		/*
417262306a36Sopenharmony_ci		 * The vector of the virtual has already been set in the PIR.
417362306a36Sopenharmony_ci		 * Send a notification event to deliver the virtual interrupt
417462306a36Sopenharmony_ci		 * unless the vCPU is the currently running vCPU, i.e. the
417562306a36Sopenharmony_ci		 * event is being sent from a fastpath VM-Exit handler, in
417662306a36Sopenharmony_ci		 * which case the PIR will be synced to the vIRR before
417762306a36Sopenharmony_ci		 * re-entering the guest.
417862306a36Sopenharmony_ci		 *
417962306a36Sopenharmony_ci		 * When the target is not the running vCPU, the following
418062306a36Sopenharmony_ci		 * possibilities emerge:
418162306a36Sopenharmony_ci		 *
418262306a36Sopenharmony_ci		 * Case 1: vCPU stays in non-root mode. Sending a notification
418362306a36Sopenharmony_ci		 * event posts the interrupt to the vCPU.
418462306a36Sopenharmony_ci		 *
418562306a36Sopenharmony_ci		 * Case 2: vCPU exits to root mode and is still runnable. The
418662306a36Sopenharmony_ci		 * PIR will be synced to the vIRR before re-entering the guest.
418762306a36Sopenharmony_ci		 * Sending a notification event is ok as the host IRQ handler
418862306a36Sopenharmony_ci		 * will ignore the spurious event.
418962306a36Sopenharmony_ci		 *
419062306a36Sopenharmony_ci		 * Case 3: vCPU exits to root mode and is blocked. vcpu_block()
419162306a36Sopenharmony_ci		 * has already synced PIR to vIRR and never blocks the vCPU if
419262306a36Sopenharmony_ci		 * the vIRR is not empty. Therefore, a blocked vCPU here does
419362306a36Sopenharmony_ci		 * not wait for any requested interrupts in PIR, and sending a
419462306a36Sopenharmony_ci		 * notification event also results in a benign, spurious event.
419562306a36Sopenharmony_ci		 */
419662306a36Sopenharmony_ci
419762306a36Sopenharmony_ci		if (vcpu != kvm_get_running_vcpu())
419862306a36Sopenharmony_ci			__apic_send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
419962306a36Sopenharmony_ci		return;
420062306a36Sopenharmony_ci	}
420162306a36Sopenharmony_ci#endif
420262306a36Sopenharmony_ci	/*
420362306a36Sopenharmony_ci	 * The vCPU isn't in the guest; wake the vCPU in case it is blocking,
420462306a36Sopenharmony_ci	 * otherwise do nothing as KVM will grab the highest priority pending
420562306a36Sopenharmony_ci	 * IRQ via ->sync_pir_to_irr() in vcpu_enter_guest().
420662306a36Sopenharmony_ci	 */
420762306a36Sopenharmony_ci	kvm_vcpu_wake_up(vcpu);
420862306a36Sopenharmony_ci}
420962306a36Sopenharmony_ci
421062306a36Sopenharmony_cistatic int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
421162306a36Sopenharmony_ci						int vector)
421262306a36Sopenharmony_ci{
421362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
421462306a36Sopenharmony_ci
421562306a36Sopenharmony_ci	if (is_guest_mode(vcpu) &&
421662306a36Sopenharmony_ci	    vector == vmx->nested.posted_intr_nv) {
421762306a36Sopenharmony_ci		/*
421862306a36Sopenharmony_ci		 * If a posted intr is not recognized by hardware,
421962306a36Sopenharmony_ci		 * we will accomplish it in the next vmentry.
422062306a36Sopenharmony_ci		 */
422162306a36Sopenharmony_ci		vmx->nested.pi_pending = true;
422262306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
422362306a36Sopenharmony_ci
422462306a36Sopenharmony_ci		/*
422562306a36Sopenharmony_ci		 * This pairs with the smp_mb_*() after setting vcpu->mode in
422662306a36Sopenharmony_ci		 * vcpu_enter_guest() to guarantee the vCPU sees the event
422762306a36Sopenharmony_ci		 * request if triggering a posted interrupt "fails" because
422862306a36Sopenharmony_ci		 * vcpu->mode != IN_GUEST_MODE.  The extra barrier is needed as
422962306a36Sopenharmony_ci		 * the smb_wmb() in kvm_make_request() only ensures everything
423062306a36Sopenharmony_ci		 * done before making the request is visible when the request
423162306a36Sopenharmony_ci		 * is visible, it doesn't ensure ordering between the store to
423262306a36Sopenharmony_ci		 * vcpu->requests and the load from vcpu->mode.
423362306a36Sopenharmony_ci		 */
423462306a36Sopenharmony_ci		smp_mb__after_atomic();
423562306a36Sopenharmony_ci
423662306a36Sopenharmony_ci		/* the PIR and ON have been set by L1. */
423762306a36Sopenharmony_ci		kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR);
423862306a36Sopenharmony_ci		return 0;
423962306a36Sopenharmony_ci	}
424062306a36Sopenharmony_ci	return -1;
424162306a36Sopenharmony_ci}
424262306a36Sopenharmony_ci/*
424362306a36Sopenharmony_ci * Send interrupt to vcpu via posted interrupt way.
424462306a36Sopenharmony_ci * 1. If target vcpu is running(non-root mode), send posted interrupt
424562306a36Sopenharmony_ci * notification to vcpu and hardware will sync PIR to vIRR atomically.
424662306a36Sopenharmony_ci * 2. If target vcpu isn't running(root mode), kick it to pick up the
424762306a36Sopenharmony_ci * interrupt from PIR in next vmentry.
424862306a36Sopenharmony_ci */
424962306a36Sopenharmony_cistatic int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
425062306a36Sopenharmony_ci{
425162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
425262306a36Sopenharmony_ci	int r;
425362306a36Sopenharmony_ci
425462306a36Sopenharmony_ci	r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
425562306a36Sopenharmony_ci	if (!r)
425662306a36Sopenharmony_ci		return 0;
425762306a36Sopenharmony_ci
425862306a36Sopenharmony_ci	/* Note, this is called iff the local APIC is in-kernel. */
425962306a36Sopenharmony_ci	if (!vcpu->arch.apic->apicv_active)
426062306a36Sopenharmony_ci		return -1;
426162306a36Sopenharmony_ci
426262306a36Sopenharmony_ci	if (pi_test_and_set_pir(vector, &vmx->pi_desc))
426362306a36Sopenharmony_ci		return 0;
426462306a36Sopenharmony_ci
426562306a36Sopenharmony_ci	/* If a previous notification has sent the IPI, nothing to do.  */
426662306a36Sopenharmony_ci	if (pi_test_and_set_on(&vmx->pi_desc))
426762306a36Sopenharmony_ci		return 0;
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_ci	/*
427062306a36Sopenharmony_ci	 * The implied barrier in pi_test_and_set_on() pairs with the smp_mb_*()
427162306a36Sopenharmony_ci	 * after setting vcpu->mode in vcpu_enter_guest(), thus the vCPU is
427262306a36Sopenharmony_ci	 * guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a
427362306a36Sopenharmony_ci	 * posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE.
427462306a36Sopenharmony_ci	 */
427562306a36Sopenharmony_ci	kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR);
427662306a36Sopenharmony_ci	return 0;
427762306a36Sopenharmony_ci}
427862306a36Sopenharmony_ci
427962306a36Sopenharmony_cistatic void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
428062306a36Sopenharmony_ci				  int trig_mode, int vector)
428162306a36Sopenharmony_ci{
428262306a36Sopenharmony_ci	struct kvm_vcpu *vcpu = apic->vcpu;
428362306a36Sopenharmony_ci
428462306a36Sopenharmony_ci	if (vmx_deliver_posted_interrupt(vcpu, vector)) {
428562306a36Sopenharmony_ci		kvm_lapic_set_irr(vector, apic);
428662306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
428762306a36Sopenharmony_ci		kvm_vcpu_kick(vcpu);
428862306a36Sopenharmony_ci	} else {
428962306a36Sopenharmony_ci		trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
429062306a36Sopenharmony_ci					   trig_mode, vector);
429162306a36Sopenharmony_ci	}
429262306a36Sopenharmony_ci}
429362306a36Sopenharmony_ci
429462306a36Sopenharmony_ci/*
429562306a36Sopenharmony_ci * Set up the vmcs's constant host-state fields, i.e., host-state fields that
429662306a36Sopenharmony_ci * will not change in the lifetime of the guest.
429762306a36Sopenharmony_ci * Note that host-state that does change is set elsewhere. E.g., host-state
429862306a36Sopenharmony_ci * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
429962306a36Sopenharmony_ci */
430062306a36Sopenharmony_civoid vmx_set_constant_host_state(struct vcpu_vmx *vmx)
430162306a36Sopenharmony_ci{
430262306a36Sopenharmony_ci	u32 low32, high32;
430362306a36Sopenharmony_ci	unsigned long tmpl;
430462306a36Sopenharmony_ci	unsigned long cr0, cr3, cr4;
430562306a36Sopenharmony_ci
430662306a36Sopenharmony_ci	cr0 = read_cr0();
430762306a36Sopenharmony_ci	WARN_ON(cr0 & X86_CR0_TS);
430862306a36Sopenharmony_ci	vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
430962306a36Sopenharmony_ci
431062306a36Sopenharmony_ci	/*
431162306a36Sopenharmony_ci	 * Save the most likely value for this task's CR3 in the VMCS.
431262306a36Sopenharmony_ci	 * We can't use __get_current_cr3_fast() because we're not atomic.
431362306a36Sopenharmony_ci	 */
431462306a36Sopenharmony_ci	cr3 = __read_cr3();
431562306a36Sopenharmony_ci	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
431662306a36Sopenharmony_ci	vmx->loaded_vmcs->host_state.cr3 = cr3;
431762306a36Sopenharmony_ci
431862306a36Sopenharmony_ci	/* Save the most likely value for this task's CR4 in the VMCS. */
431962306a36Sopenharmony_ci	cr4 = cr4_read_shadow();
432062306a36Sopenharmony_ci	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
432162306a36Sopenharmony_ci	vmx->loaded_vmcs->host_state.cr4 = cr4;
432262306a36Sopenharmony_ci
432362306a36Sopenharmony_ci	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
432462306a36Sopenharmony_ci#ifdef CONFIG_X86_64
432562306a36Sopenharmony_ci	/*
432662306a36Sopenharmony_ci	 * Load null selectors, so we can avoid reloading them in
432762306a36Sopenharmony_ci	 * vmx_prepare_switch_to_host(), in case userspace uses
432862306a36Sopenharmony_ci	 * the null selectors too (the expected case).
432962306a36Sopenharmony_ci	 */
433062306a36Sopenharmony_ci	vmcs_write16(HOST_DS_SELECTOR, 0);
433162306a36Sopenharmony_ci	vmcs_write16(HOST_ES_SELECTOR, 0);
433262306a36Sopenharmony_ci#else
433362306a36Sopenharmony_ci	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
433462306a36Sopenharmony_ci	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
433562306a36Sopenharmony_ci#endif
433662306a36Sopenharmony_ci	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
433762306a36Sopenharmony_ci	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
433862306a36Sopenharmony_ci
433962306a36Sopenharmony_ci	vmcs_writel(HOST_IDTR_BASE, host_idt_base);   /* 22.2.4 */
434062306a36Sopenharmony_ci
434162306a36Sopenharmony_ci	vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
434262306a36Sopenharmony_ci
434362306a36Sopenharmony_ci	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
434462306a36Sopenharmony_ci	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
434562306a36Sopenharmony_ci
434662306a36Sopenharmony_ci	/*
434762306a36Sopenharmony_ci	 * SYSENTER is used for 32-bit system calls on either 32-bit or
434862306a36Sopenharmony_ci	 * 64-bit kernels.  It is always zero If neither is allowed, otherwise
434962306a36Sopenharmony_ci	 * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may
435062306a36Sopenharmony_ci	 * have already done so!).
435162306a36Sopenharmony_ci	 */
435262306a36Sopenharmony_ci	if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
435362306a36Sopenharmony_ci		vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
435462306a36Sopenharmony_ci
435562306a36Sopenharmony_ci	rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
435662306a36Sopenharmony_ci	vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   /* 22.2.3 */
435762306a36Sopenharmony_ci
435862306a36Sopenharmony_ci	if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
435962306a36Sopenharmony_ci		rdmsr(MSR_IA32_CR_PAT, low32, high32);
436062306a36Sopenharmony_ci		vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
436162306a36Sopenharmony_ci	}
436262306a36Sopenharmony_ci
436362306a36Sopenharmony_ci	if (cpu_has_load_ia32_efer())
436462306a36Sopenharmony_ci		vmcs_write64(HOST_IA32_EFER, host_efer);
436562306a36Sopenharmony_ci}
436662306a36Sopenharmony_ci
436762306a36Sopenharmony_civoid set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
436862306a36Sopenharmony_ci{
436962306a36Sopenharmony_ci	struct kvm_vcpu *vcpu = &vmx->vcpu;
437062306a36Sopenharmony_ci
437162306a36Sopenharmony_ci	vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS &
437262306a36Sopenharmony_ci					  ~vcpu->arch.cr4_guest_rsvd_bits;
437362306a36Sopenharmony_ci	if (!enable_ept) {
437462306a36Sopenharmony_ci		vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_TLBFLUSH_BITS;
437562306a36Sopenharmony_ci		vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PDPTR_BITS;
437662306a36Sopenharmony_ci	}
437762306a36Sopenharmony_ci	if (is_guest_mode(&vmx->vcpu))
437862306a36Sopenharmony_ci		vcpu->arch.cr4_guest_owned_bits &=
437962306a36Sopenharmony_ci			~get_vmcs12(vcpu)->cr4_guest_host_mask;
438062306a36Sopenharmony_ci	vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits);
438162306a36Sopenharmony_ci}
438262306a36Sopenharmony_ci
438362306a36Sopenharmony_cistatic u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
438462306a36Sopenharmony_ci{
438562306a36Sopenharmony_ci	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
438662306a36Sopenharmony_ci
438762306a36Sopenharmony_ci	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
438862306a36Sopenharmony_ci		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
438962306a36Sopenharmony_ci
439062306a36Sopenharmony_ci	if (!enable_vnmi)
439162306a36Sopenharmony_ci		pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
439262306a36Sopenharmony_ci
439362306a36Sopenharmony_ci	if (!enable_preemption_timer)
439462306a36Sopenharmony_ci		pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
439562306a36Sopenharmony_ci
439662306a36Sopenharmony_ci	return pin_based_exec_ctrl;
439762306a36Sopenharmony_ci}
439862306a36Sopenharmony_ci
439962306a36Sopenharmony_cistatic u32 vmx_vmentry_ctrl(void)
440062306a36Sopenharmony_ci{
440162306a36Sopenharmony_ci	u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
440262306a36Sopenharmony_ci
440362306a36Sopenharmony_ci	if (vmx_pt_mode_is_system())
440462306a36Sopenharmony_ci		vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
440562306a36Sopenharmony_ci				  VM_ENTRY_LOAD_IA32_RTIT_CTL);
440662306a36Sopenharmony_ci	/*
440762306a36Sopenharmony_ci	 * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
440862306a36Sopenharmony_ci	 */
440962306a36Sopenharmony_ci	vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
441062306a36Sopenharmony_ci			  VM_ENTRY_LOAD_IA32_EFER |
441162306a36Sopenharmony_ci			  VM_ENTRY_IA32E_MODE);
441262306a36Sopenharmony_ci
441362306a36Sopenharmony_ci	if (cpu_has_perf_global_ctrl_bug())
441462306a36Sopenharmony_ci		vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
441562306a36Sopenharmony_ci
441662306a36Sopenharmony_ci	return vmentry_ctrl;
441762306a36Sopenharmony_ci}
441862306a36Sopenharmony_ci
441962306a36Sopenharmony_cistatic u32 vmx_vmexit_ctrl(void)
442062306a36Sopenharmony_ci{
442162306a36Sopenharmony_ci	u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
442262306a36Sopenharmony_ci
442362306a36Sopenharmony_ci	/*
442462306a36Sopenharmony_ci	 * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
442562306a36Sopenharmony_ci	 * nested virtualization and thus allowed to be set in vmcs12.
442662306a36Sopenharmony_ci	 */
442762306a36Sopenharmony_ci	vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER |
442862306a36Sopenharmony_ci			 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
442962306a36Sopenharmony_ci
443062306a36Sopenharmony_ci	if (vmx_pt_mode_is_system())
443162306a36Sopenharmony_ci		vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
443262306a36Sopenharmony_ci				 VM_EXIT_CLEAR_IA32_RTIT_CTL);
443362306a36Sopenharmony_ci
443462306a36Sopenharmony_ci	if (cpu_has_perf_global_ctrl_bug())
443562306a36Sopenharmony_ci		vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
443662306a36Sopenharmony_ci
443762306a36Sopenharmony_ci	/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
443862306a36Sopenharmony_ci	return vmexit_ctrl &
443962306a36Sopenharmony_ci		~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
444062306a36Sopenharmony_ci}
444162306a36Sopenharmony_ci
444262306a36Sopenharmony_cistatic void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
444362306a36Sopenharmony_ci{
444462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
444562306a36Sopenharmony_ci
444662306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
444762306a36Sopenharmony_ci		vmx->nested.update_vmcs01_apicv_status = true;
444862306a36Sopenharmony_ci		return;
444962306a36Sopenharmony_ci	}
445062306a36Sopenharmony_ci
445162306a36Sopenharmony_ci	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
445262306a36Sopenharmony_ci
445362306a36Sopenharmony_ci	if (kvm_vcpu_apicv_active(vcpu)) {
445462306a36Sopenharmony_ci		secondary_exec_controls_setbit(vmx,
445562306a36Sopenharmony_ci					       SECONDARY_EXEC_APIC_REGISTER_VIRT |
445662306a36Sopenharmony_ci					       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
445762306a36Sopenharmony_ci		if (enable_ipiv)
445862306a36Sopenharmony_ci			tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT);
445962306a36Sopenharmony_ci	} else {
446062306a36Sopenharmony_ci		secondary_exec_controls_clearbit(vmx,
446162306a36Sopenharmony_ci						 SECONDARY_EXEC_APIC_REGISTER_VIRT |
446262306a36Sopenharmony_ci						 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
446362306a36Sopenharmony_ci		if (enable_ipiv)
446462306a36Sopenharmony_ci			tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT);
446562306a36Sopenharmony_ci	}
446662306a36Sopenharmony_ci
446762306a36Sopenharmony_ci	vmx_update_msr_bitmap_x2apic(vcpu);
446862306a36Sopenharmony_ci}
446962306a36Sopenharmony_ci
447062306a36Sopenharmony_cistatic u32 vmx_exec_control(struct vcpu_vmx *vmx)
447162306a36Sopenharmony_ci{
447262306a36Sopenharmony_ci	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
447362306a36Sopenharmony_ci
447462306a36Sopenharmony_ci	/*
447562306a36Sopenharmony_ci	 * Not used by KVM, but fully supported for nesting, i.e. are allowed in
447662306a36Sopenharmony_ci	 * vmcs12 and propagated to vmcs02 when set in vmcs12.
447762306a36Sopenharmony_ci	 */
447862306a36Sopenharmony_ci	exec_control &= ~(CPU_BASED_RDTSC_EXITING |
447962306a36Sopenharmony_ci			  CPU_BASED_USE_IO_BITMAPS |
448062306a36Sopenharmony_ci			  CPU_BASED_MONITOR_TRAP_FLAG |
448162306a36Sopenharmony_ci			  CPU_BASED_PAUSE_EXITING);
448262306a36Sopenharmony_ci
448362306a36Sopenharmony_ci	/* INTR_WINDOW_EXITING and NMI_WINDOW_EXITING are toggled dynamically */
448462306a36Sopenharmony_ci	exec_control &= ~(CPU_BASED_INTR_WINDOW_EXITING |
448562306a36Sopenharmony_ci			  CPU_BASED_NMI_WINDOW_EXITING);
448662306a36Sopenharmony_ci
448762306a36Sopenharmony_ci	if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
448862306a36Sopenharmony_ci		exec_control &= ~CPU_BASED_MOV_DR_EXITING;
448962306a36Sopenharmony_ci
449062306a36Sopenharmony_ci	if (!cpu_need_tpr_shadow(&vmx->vcpu))
449162306a36Sopenharmony_ci		exec_control &= ~CPU_BASED_TPR_SHADOW;
449262306a36Sopenharmony_ci
449362306a36Sopenharmony_ci#ifdef CONFIG_X86_64
449462306a36Sopenharmony_ci	if (exec_control & CPU_BASED_TPR_SHADOW)
449562306a36Sopenharmony_ci		exec_control &= ~(CPU_BASED_CR8_LOAD_EXITING |
449662306a36Sopenharmony_ci				  CPU_BASED_CR8_STORE_EXITING);
449762306a36Sopenharmony_ci	else
449862306a36Sopenharmony_ci		exec_control |= CPU_BASED_CR8_STORE_EXITING |
449962306a36Sopenharmony_ci				CPU_BASED_CR8_LOAD_EXITING;
450062306a36Sopenharmony_ci#endif
450162306a36Sopenharmony_ci	/* No need to intercept CR3 access or INVPLG when using EPT. */
450262306a36Sopenharmony_ci	if (enable_ept)
450362306a36Sopenharmony_ci		exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
450462306a36Sopenharmony_ci				  CPU_BASED_CR3_STORE_EXITING |
450562306a36Sopenharmony_ci				  CPU_BASED_INVLPG_EXITING);
450662306a36Sopenharmony_ci	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
450762306a36Sopenharmony_ci		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
450862306a36Sopenharmony_ci				CPU_BASED_MONITOR_EXITING);
450962306a36Sopenharmony_ci	if (kvm_hlt_in_guest(vmx->vcpu.kvm))
451062306a36Sopenharmony_ci		exec_control &= ~CPU_BASED_HLT_EXITING;
451162306a36Sopenharmony_ci	return exec_control;
451262306a36Sopenharmony_ci}
451362306a36Sopenharmony_ci
451462306a36Sopenharmony_cistatic u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx)
451562306a36Sopenharmony_ci{
451662306a36Sopenharmony_ci	u64 exec_control = vmcs_config.cpu_based_3rd_exec_ctrl;
451762306a36Sopenharmony_ci
451862306a36Sopenharmony_ci	/*
451962306a36Sopenharmony_ci	 * IPI virtualization relies on APICv. Disable IPI virtualization if
452062306a36Sopenharmony_ci	 * APICv is inhibited.
452162306a36Sopenharmony_ci	 */
452262306a36Sopenharmony_ci	if (!enable_ipiv || !kvm_vcpu_apicv_active(&vmx->vcpu))
452362306a36Sopenharmony_ci		exec_control &= ~TERTIARY_EXEC_IPI_VIRT;
452462306a36Sopenharmony_ci
452562306a36Sopenharmony_ci	return exec_control;
452662306a36Sopenharmony_ci}
452762306a36Sopenharmony_ci
452862306a36Sopenharmony_ci/*
452962306a36Sopenharmony_ci * Adjust a single secondary execution control bit to intercept/allow an
453062306a36Sopenharmony_ci * instruction in the guest.  This is usually done based on whether or not a
453162306a36Sopenharmony_ci * feature has been exposed to the guest in order to correctly emulate faults.
453262306a36Sopenharmony_ci */
453362306a36Sopenharmony_cistatic inline void
453462306a36Sopenharmony_civmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
453562306a36Sopenharmony_ci				  u32 control, bool enabled, bool exiting)
453662306a36Sopenharmony_ci{
453762306a36Sopenharmony_ci	/*
453862306a36Sopenharmony_ci	 * If the control is for an opt-in feature, clear the control if the
453962306a36Sopenharmony_ci	 * feature is not exposed to the guest, i.e. not enabled.  If the
454062306a36Sopenharmony_ci	 * control is opt-out, i.e. an exiting control, clear the control if
454162306a36Sopenharmony_ci	 * the feature _is_ exposed to the guest, i.e. exiting/interception is
454262306a36Sopenharmony_ci	 * disabled for the associated instruction.  Note, the caller is
454362306a36Sopenharmony_ci	 * responsible presetting exec_control to set all supported bits.
454462306a36Sopenharmony_ci	 */
454562306a36Sopenharmony_ci	if (enabled == exiting)
454662306a36Sopenharmony_ci		*exec_control &= ~control;
454762306a36Sopenharmony_ci
454862306a36Sopenharmony_ci	/*
454962306a36Sopenharmony_ci	 * Update the nested MSR settings so that a nested VMM can/can't set
455062306a36Sopenharmony_ci	 * controls for features that are/aren't exposed to the guest.
455162306a36Sopenharmony_ci	 */
455262306a36Sopenharmony_ci	if (nested) {
455362306a36Sopenharmony_ci		/*
455462306a36Sopenharmony_ci		 * All features that can be added or removed to VMX MSRs must
455562306a36Sopenharmony_ci		 * be supported in the first place for nested virtualization.
455662306a36Sopenharmony_ci		 */
455762306a36Sopenharmony_ci		if (WARN_ON_ONCE(!(vmcs_config.nested.secondary_ctls_high & control)))
455862306a36Sopenharmony_ci			enabled = false;
455962306a36Sopenharmony_ci
456062306a36Sopenharmony_ci		if (enabled)
456162306a36Sopenharmony_ci			vmx->nested.msrs.secondary_ctls_high |= control;
456262306a36Sopenharmony_ci		else
456362306a36Sopenharmony_ci			vmx->nested.msrs.secondary_ctls_high &= ~control;
456462306a36Sopenharmony_ci	}
456562306a36Sopenharmony_ci}
456662306a36Sopenharmony_ci
456762306a36Sopenharmony_ci/*
456862306a36Sopenharmony_ci * Wrapper macro for the common case of adjusting a secondary execution control
456962306a36Sopenharmony_ci * based on a single guest CPUID bit, with a dedicated feature bit.  This also
457062306a36Sopenharmony_ci * verifies that the control is actually supported by KVM and hardware.
457162306a36Sopenharmony_ci */
457262306a36Sopenharmony_ci#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting)	\
457362306a36Sopenharmony_ci({												\
457462306a36Sopenharmony_ci	struct kvm_vcpu *__vcpu = &(vmx)->vcpu;							\
457562306a36Sopenharmony_ci	bool __enabled;										\
457662306a36Sopenharmony_ci												\
457762306a36Sopenharmony_ci	if (cpu_has_vmx_##name()) {								\
457862306a36Sopenharmony_ci		if (kvm_is_governed_feature(X86_FEATURE_##feat_name))				\
457962306a36Sopenharmony_ci			__enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name);		\
458062306a36Sopenharmony_ci		else										\
458162306a36Sopenharmony_ci			__enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name);		\
458262306a36Sopenharmony_ci		vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
458362306a36Sopenharmony_ci						  __enabled, exiting);				\
458462306a36Sopenharmony_ci	}											\
458562306a36Sopenharmony_ci})
458662306a36Sopenharmony_ci
458762306a36Sopenharmony_ci/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
458862306a36Sopenharmony_ci#define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) \
458962306a36Sopenharmony_ci	vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, ENABLE_##uname, false)
459062306a36Sopenharmony_ci
459162306a36Sopenharmony_ci#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
459262306a36Sopenharmony_ci	vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
459362306a36Sopenharmony_ci
459462306a36Sopenharmony_cistatic u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
459562306a36Sopenharmony_ci{
459662306a36Sopenharmony_ci	struct kvm_vcpu *vcpu = &vmx->vcpu;
459762306a36Sopenharmony_ci
459862306a36Sopenharmony_ci	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
459962306a36Sopenharmony_ci
460062306a36Sopenharmony_ci	if (vmx_pt_mode_is_system())
460162306a36Sopenharmony_ci		exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
460262306a36Sopenharmony_ci	if (!cpu_need_virtualize_apic_accesses(vcpu))
460362306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
460462306a36Sopenharmony_ci	if (vmx->vpid == 0)
460562306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
460662306a36Sopenharmony_ci	if (!enable_ept) {
460762306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
460862306a36Sopenharmony_ci		enable_unrestricted_guest = 0;
460962306a36Sopenharmony_ci	}
461062306a36Sopenharmony_ci	if (!enable_unrestricted_guest)
461162306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
461262306a36Sopenharmony_ci	if (kvm_pause_in_guest(vmx->vcpu.kvm))
461362306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
461462306a36Sopenharmony_ci	if (!kvm_vcpu_apicv_active(vcpu))
461562306a36Sopenharmony_ci		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
461662306a36Sopenharmony_ci				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
461762306a36Sopenharmony_ci	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
461862306a36Sopenharmony_ci
461962306a36Sopenharmony_ci	/*
462062306a36Sopenharmony_ci	 * KVM doesn't support VMFUNC for L1, but the control is set in KVM's
462162306a36Sopenharmony_ci	 * base configuration as KVM emulates VMFUNC[EPTP_SWITCHING] for L2.
462262306a36Sopenharmony_ci	 */
462362306a36Sopenharmony_ci	exec_control &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
462462306a36Sopenharmony_ci
462562306a36Sopenharmony_ci	/* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
462662306a36Sopenharmony_ci	 * in vmx_set_cr4.  */
462762306a36Sopenharmony_ci	exec_control &= ~SECONDARY_EXEC_DESC;
462862306a36Sopenharmony_ci
462962306a36Sopenharmony_ci	/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
463062306a36Sopenharmony_ci	   (handle_vmptrld).
463162306a36Sopenharmony_ci	   We can NOT enable shadow_vmcs here because we don't have yet
463262306a36Sopenharmony_ci	   a current VMCS12
463362306a36Sopenharmony_ci	*/
463462306a36Sopenharmony_ci	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
463562306a36Sopenharmony_ci
463662306a36Sopenharmony_ci	/*
463762306a36Sopenharmony_ci	 * PML is enabled/disabled when dirty logging of memsmlots changes, but
463862306a36Sopenharmony_ci	 * it needs to be set here when dirty logging is already active, e.g.
463962306a36Sopenharmony_ci	 * if this vCPU was created after dirty logging was enabled.
464062306a36Sopenharmony_ci	 */
464162306a36Sopenharmony_ci	if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
464262306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
464362306a36Sopenharmony_ci
464462306a36Sopenharmony_ci	vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
464562306a36Sopenharmony_ci
464662306a36Sopenharmony_ci	/*
464762306a36Sopenharmony_ci	 * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
464862306a36Sopenharmony_ci	 * feature is exposed to the guest.  This creates a virtualization hole
464962306a36Sopenharmony_ci	 * if both are supported in hardware but only one is exposed to the
465062306a36Sopenharmony_ci	 * guest, but letting the guest execute RDTSCP or RDPID when either one
465162306a36Sopenharmony_ci	 * is advertised is preferable to emulating the advertised instruction
465262306a36Sopenharmony_ci	 * in KVM on #UD, and obviously better than incorrectly injecting #UD.
465362306a36Sopenharmony_ci	 */
465462306a36Sopenharmony_ci	if (cpu_has_vmx_rdtscp()) {
465562306a36Sopenharmony_ci		bool rdpid_or_rdtscp_enabled =
465662306a36Sopenharmony_ci			guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
465762306a36Sopenharmony_ci			guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
465862306a36Sopenharmony_ci
465962306a36Sopenharmony_ci		vmx_adjust_secondary_exec_control(vmx, &exec_control,
466062306a36Sopenharmony_ci						  SECONDARY_EXEC_ENABLE_RDTSCP,
466162306a36Sopenharmony_ci						  rdpid_or_rdtscp_enabled, false);
466262306a36Sopenharmony_ci	}
466362306a36Sopenharmony_ci
466462306a36Sopenharmony_ci	vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
466562306a36Sopenharmony_ci
466662306a36Sopenharmony_ci	vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
466762306a36Sopenharmony_ci	vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
466862306a36Sopenharmony_ci
466962306a36Sopenharmony_ci	vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG,
467062306a36Sopenharmony_ci				    ENABLE_USR_WAIT_PAUSE, false);
467162306a36Sopenharmony_ci
467262306a36Sopenharmony_ci	if (!vcpu->kvm->arch.bus_lock_detection_enabled)
467362306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION;
467462306a36Sopenharmony_ci
467562306a36Sopenharmony_ci	if (!kvm_notify_vmexit_enabled(vcpu->kvm))
467662306a36Sopenharmony_ci		exec_control &= ~SECONDARY_EXEC_NOTIFY_VM_EXITING;
467762306a36Sopenharmony_ci
467862306a36Sopenharmony_ci	return exec_control;
467962306a36Sopenharmony_ci}
468062306a36Sopenharmony_ci
468162306a36Sopenharmony_cistatic inline int vmx_get_pid_table_order(struct kvm *kvm)
468262306a36Sopenharmony_ci{
468362306a36Sopenharmony_ci	return get_order(kvm->arch.max_vcpu_ids * sizeof(*to_kvm_vmx(kvm)->pid_table));
468462306a36Sopenharmony_ci}
468562306a36Sopenharmony_ci
468662306a36Sopenharmony_cistatic int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
468762306a36Sopenharmony_ci{
468862306a36Sopenharmony_ci	struct page *pages;
468962306a36Sopenharmony_ci	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
469062306a36Sopenharmony_ci
469162306a36Sopenharmony_ci	if (!irqchip_in_kernel(kvm) || !enable_ipiv)
469262306a36Sopenharmony_ci		return 0;
469362306a36Sopenharmony_ci
469462306a36Sopenharmony_ci	if (kvm_vmx->pid_table)
469562306a36Sopenharmony_ci		return 0;
469662306a36Sopenharmony_ci
469762306a36Sopenharmony_ci	pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
469862306a36Sopenharmony_ci			    vmx_get_pid_table_order(kvm));
469962306a36Sopenharmony_ci	if (!pages)
470062306a36Sopenharmony_ci		return -ENOMEM;
470162306a36Sopenharmony_ci
470262306a36Sopenharmony_ci	kvm_vmx->pid_table = (void *)page_address(pages);
470362306a36Sopenharmony_ci	return 0;
470462306a36Sopenharmony_ci}
470562306a36Sopenharmony_ci
470662306a36Sopenharmony_cistatic int vmx_vcpu_precreate(struct kvm *kvm)
470762306a36Sopenharmony_ci{
470862306a36Sopenharmony_ci	return vmx_alloc_ipiv_pid_table(kvm);
470962306a36Sopenharmony_ci}
471062306a36Sopenharmony_ci
471162306a36Sopenharmony_ci#define VMX_XSS_EXIT_BITMAP 0
471262306a36Sopenharmony_ci
471362306a36Sopenharmony_cistatic void init_vmcs(struct vcpu_vmx *vmx)
471462306a36Sopenharmony_ci{
471562306a36Sopenharmony_ci	struct kvm *kvm = vmx->vcpu.kvm;
471662306a36Sopenharmony_ci	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
471762306a36Sopenharmony_ci
471862306a36Sopenharmony_ci	if (nested)
471962306a36Sopenharmony_ci		nested_vmx_set_vmcs_shadowing_bitmap();
472062306a36Sopenharmony_ci
472162306a36Sopenharmony_ci	if (cpu_has_vmx_msr_bitmap())
472262306a36Sopenharmony_ci		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
472362306a36Sopenharmony_ci
472462306a36Sopenharmony_ci	vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); /* 22.3.1.5 */
472562306a36Sopenharmony_ci
472662306a36Sopenharmony_ci	/* Control */
472762306a36Sopenharmony_ci	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
472862306a36Sopenharmony_ci
472962306a36Sopenharmony_ci	exec_controls_set(vmx, vmx_exec_control(vmx));
473062306a36Sopenharmony_ci
473162306a36Sopenharmony_ci	if (cpu_has_secondary_exec_ctrls())
473262306a36Sopenharmony_ci		secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
473362306a36Sopenharmony_ci
473462306a36Sopenharmony_ci	if (cpu_has_tertiary_exec_ctrls())
473562306a36Sopenharmony_ci		tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx));
473662306a36Sopenharmony_ci
473762306a36Sopenharmony_ci	if (enable_apicv && lapic_in_kernel(&vmx->vcpu)) {
473862306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP0, 0);
473962306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP1, 0);
474062306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP2, 0);
474162306a36Sopenharmony_ci		vmcs_write64(EOI_EXIT_BITMAP3, 0);
474262306a36Sopenharmony_ci
474362306a36Sopenharmony_ci		vmcs_write16(GUEST_INTR_STATUS, 0);
474462306a36Sopenharmony_ci
474562306a36Sopenharmony_ci		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
474662306a36Sopenharmony_ci		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
474762306a36Sopenharmony_ci	}
474862306a36Sopenharmony_ci
474962306a36Sopenharmony_ci	if (vmx_can_use_ipiv(&vmx->vcpu)) {
475062306a36Sopenharmony_ci		vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table));
475162306a36Sopenharmony_ci		vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
475262306a36Sopenharmony_ci	}
475362306a36Sopenharmony_ci
475462306a36Sopenharmony_ci	if (!kvm_pause_in_guest(kvm)) {
475562306a36Sopenharmony_ci		vmcs_write32(PLE_GAP, ple_gap);
475662306a36Sopenharmony_ci		vmx->ple_window = ple_window;
475762306a36Sopenharmony_ci		vmx->ple_window_dirty = true;
475862306a36Sopenharmony_ci	}
475962306a36Sopenharmony_ci
476062306a36Sopenharmony_ci	if (kvm_notify_vmexit_enabled(kvm))
476162306a36Sopenharmony_ci		vmcs_write32(NOTIFY_WINDOW, kvm->arch.notify_window);
476262306a36Sopenharmony_ci
476362306a36Sopenharmony_ci	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
476462306a36Sopenharmony_ci	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
476562306a36Sopenharmony_ci	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
476662306a36Sopenharmony_ci
476762306a36Sopenharmony_ci	vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
476862306a36Sopenharmony_ci	vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
476962306a36Sopenharmony_ci	vmx_set_constant_host_state(vmx);
477062306a36Sopenharmony_ci	vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
477162306a36Sopenharmony_ci	vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
477262306a36Sopenharmony_ci
477362306a36Sopenharmony_ci	if (cpu_has_vmx_vmfunc())
477462306a36Sopenharmony_ci		vmcs_write64(VM_FUNCTION_CONTROL, 0);
477562306a36Sopenharmony_ci
477662306a36Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
477762306a36Sopenharmony_ci	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
477862306a36Sopenharmony_ci	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
477962306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
478062306a36Sopenharmony_ci	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
478162306a36Sopenharmony_ci
478262306a36Sopenharmony_ci	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
478362306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
478462306a36Sopenharmony_ci
478562306a36Sopenharmony_ci	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
478662306a36Sopenharmony_ci
478762306a36Sopenharmony_ci	/* 22.2.1, 20.8.1 */
478862306a36Sopenharmony_ci	vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
478962306a36Sopenharmony_ci
479062306a36Sopenharmony_ci	vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
479162306a36Sopenharmony_ci	vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
479262306a36Sopenharmony_ci
479362306a36Sopenharmony_ci	set_cr4_guest_host_mask(vmx);
479462306a36Sopenharmony_ci
479562306a36Sopenharmony_ci	if (vmx->vpid != 0)
479662306a36Sopenharmony_ci		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
479762306a36Sopenharmony_ci
479862306a36Sopenharmony_ci	if (cpu_has_vmx_xsaves())
479962306a36Sopenharmony_ci		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
480062306a36Sopenharmony_ci
480162306a36Sopenharmony_ci	if (enable_pml) {
480262306a36Sopenharmony_ci		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
480362306a36Sopenharmony_ci		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
480462306a36Sopenharmony_ci	}
480562306a36Sopenharmony_ci
480662306a36Sopenharmony_ci	vmx_write_encls_bitmap(&vmx->vcpu, NULL);
480762306a36Sopenharmony_ci
480862306a36Sopenharmony_ci	if (vmx_pt_mode_is_host_guest()) {
480962306a36Sopenharmony_ci		memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
481062306a36Sopenharmony_ci		/* Bit[6~0] are forced to 1, writes are ignored. */
481162306a36Sopenharmony_ci		vmx->pt_desc.guest.output_mask = 0x7F;
481262306a36Sopenharmony_ci		vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
481362306a36Sopenharmony_ci	}
481462306a36Sopenharmony_ci
481562306a36Sopenharmony_ci	vmcs_write32(GUEST_SYSENTER_CS, 0);
481662306a36Sopenharmony_ci	vmcs_writel(GUEST_SYSENTER_ESP, 0);
481762306a36Sopenharmony_ci	vmcs_writel(GUEST_SYSENTER_EIP, 0);
481862306a36Sopenharmony_ci	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
481962306a36Sopenharmony_ci
482062306a36Sopenharmony_ci	if (cpu_has_vmx_tpr_shadow()) {
482162306a36Sopenharmony_ci		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
482262306a36Sopenharmony_ci		if (cpu_need_tpr_shadow(&vmx->vcpu))
482362306a36Sopenharmony_ci			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
482462306a36Sopenharmony_ci				     __pa(vmx->vcpu.arch.apic->regs));
482562306a36Sopenharmony_ci		vmcs_write32(TPR_THRESHOLD, 0);
482662306a36Sopenharmony_ci	}
482762306a36Sopenharmony_ci
482862306a36Sopenharmony_ci	vmx_setup_uret_msrs(vmx);
482962306a36Sopenharmony_ci}
483062306a36Sopenharmony_ci
483162306a36Sopenharmony_cistatic void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
483262306a36Sopenharmony_ci{
483362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
483462306a36Sopenharmony_ci
483562306a36Sopenharmony_ci	init_vmcs(vmx);
483662306a36Sopenharmony_ci
483762306a36Sopenharmony_ci	if (nested)
483862306a36Sopenharmony_ci		memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
483962306a36Sopenharmony_ci
484062306a36Sopenharmony_ci	vcpu_setup_sgx_lepubkeyhash(vcpu);
484162306a36Sopenharmony_ci
484262306a36Sopenharmony_ci	vmx->nested.posted_intr_nv = -1;
484362306a36Sopenharmony_ci	vmx->nested.vmxon_ptr = INVALID_GPA;
484462306a36Sopenharmony_ci	vmx->nested.current_vmptr = INVALID_GPA;
484562306a36Sopenharmony_ci	vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
484662306a36Sopenharmony_ci
484762306a36Sopenharmony_ci	vcpu->arch.microcode_version = 0x100000000ULL;
484862306a36Sopenharmony_ci	vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
484962306a36Sopenharmony_ci
485062306a36Sopenharmony_ci	/*
485162306a36Sopenharmony_ci	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
485262306a36Sopenharmony_ci	 * or POSTED_INTR_WAKEUP_VECTOR.
485362306a36Sopenharmony_ci	 */
485462306a36Sopenharmony_ci	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
485562306a36Sopenharmony_ci	vmx->pi_desc.sn = 1;
485662306a36Sopenharmony_ci}
485762306a36Sopenharmony_ci
485862306a36Sopenharmony_cistatic void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
485962306a36Sopenharmony_ci{
486062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
486162306a36Sopenharmony_ci
486262306a36Sopenharmony_ci	if (!init_event)
486362306a36Sopenharmony_ci		__vmx_vcpu_reset(vcpu);
486462306a36Sopenharmony_ci
486562306a36Sopenharmony_ci	vmx->rmode.vm86_active = 0;
486662306a36Sopenharmony_ci	vmx->spec_ctrl = 0;
486762306a36Sopenharmony_ci
486862306a36Sopenharmony_ci	vmx->msr_ia32_umwait_control = 0;
486962306a36Sopenharmony_ci
487062306a36Sopenharmony_ci	vmx->hv_deadline_tsc = -1;
487162306a36Sopenharmony_ci	kvm_set_cr8(vcpu, 0);
487262306a36Sopenharmony_ci
487362306a36Sopenharmony_ci	vmx_segment_cache_clear(vmx);
487462306a36Sopenharmony_ci	kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
487562306a36Sopenharmony_ci
487662306a36Sopenharmony_ci	seg_setup(VCPU_SREG_CS);
487762306a36Sopenharmony_ci	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
487862306a36Sopenharmony_ci	vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
487962306a36Sopenharmony_ci
488062306a36Sopenharmony_ci	seg_setup(VCPU_SREG_DS);
488162306a36Sopenharmony_ci	seg_setup(VCPU_SREG_ES);
488262306a36Sopenharmony_ci	seg_setup(VCPU_SREG_FS);
488362306a36Sopenharmony_ci	seg_setup(VCPU_SREG_GS);
488462306a36Sopenharmony_ci	seg_setup(VCPU_SREG_SS);
488562306a36Sopenharmony_ci
488662306a36Sopenharmony_ci	vmcs_write16(GUEST_TR_SELECTOR, 0);
488762306a36Sopenharmony_ci	vmcs_writel(GUEST_TR_BASE, 0);
488862306a36Sopenharmony_ci	vmcs_write32(GUEST_TR_LIMIT, 0xffff);
488962306a36Sopenharmony_ci	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
489062306a36Sopenharmony_ci
489162306a36Sopenharmony_ci	vmcs_write16(GUEST_LDTR_SELECTOR, 0);
489262306a36Sopenharmony_ci	vmcs_writel(GUEST_LDTR_BASE, 0);
489362306a36Sopenharmony_ci	vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
489462306a36Sopenharmony_ci	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
489562306a36Sopenharmony_ci
489662306a36Sopenharmony_ci	vmcs_writel(GUEST_GDTR_BASE, 0);
489762306a36Sopenharmony_ci	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
489862306a36Sopenharmony_ci
489962306a36Sopenharmony_ci	vmcs_writel(GUEST_IDTR_BASE, 0);
490062306a36Sopenharmony_ci	vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
490162306a36Sopenharmony_ci
490262306a36Sopenharmony_ci	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
490362306a36Sopenharmony_ci	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
490462306a36Sopenharmony_ci	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
490562306a36Sopenharmony_ci	if (kvm_mpx_supported())
490662306a36Sopenharmony_ci		vmcs_write64(GUEST_BNDCFGS, 0);
490762306a36Sopenharmony_ci
490862306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
490962306a36Sopenharmony_ci
491062306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
491162306a36Sopenharmony_ci
491262306a36Sopenharmony_ci	vpid_sync_context(vmx->vpid);
491362306a36Sopenharmony_ci
491462306a36Sopenharmony_ci	vmx_update_fb_clear_dis(vcpu, vmx);
491562306a36Sopenharmony_ci}
491662306a36Sopenharmony_ci
491762306a36Sopenharmony_cistatic void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
491862306a36Sopenharmony_ci{
491962306a36Sopenharmony_ci	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
492062306a36Sopenharmony_ci}
492162306a36Sopenharmony_ci
492262306a36Sopenharmony_cistatic void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
492362306a36Sopenharmony_ci{
492462306a36Sopenharmony_ci	if (!enable_vnmi ||
492562306a36Sopenharmony_ci	    vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
492662306a36Sopenharmony_ci		vmx_enable_irq_window(vcpu);
492762306a36Sopenharmony_ci		return;
492862306a36Sopenharmony_ci	}
492962306a36Sopenharmony_ci
493062306a36Sopenharmony_ci	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
493162306a36Sopenharmony_ci}
493262306a36Sopenharmony_ci
493362306a36Sopenharmony_cistatic void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
493462306a36Sopenharmony_ci{
493562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
493662306a36Sopenharmony_ci	uint32_t intr;
493762306a36Sopenharmony_ci	int irq = vcpu->arch.interrupt.nr;
493862306a36Sopenharmony_ci
493962306a36Sopenharmony_ci	trace_kvm_inj_virq(irq, vcpu->arch.interrupt.soft, reinjected);
494062306a36Sopenharmony_ci
494162306a36Sopenharmony_ci	++vcpu->stat.irq_injections;
494262306a36Sopenharmony_ci	if (vmx->rmode.vm86_active) {
494362306a36Sopenharmony_ci		int inc_eip = 0;
494462306a36Sopenharmony_ci		if (vcpu->arch.interrupt.soft)
494562306a36Sopenharmony_ci			inc_eip = vcpu->arch.event_exit_inst_len;
494662306a36Sopenharmony_ci		kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
494762306a36Sopenharmony_ci		return;
494862306a36Sopenharmony_ci	}
494962306a36Sopenharmony_ci	intr = irq | INTR_INFO_VALID_MASK;
495062306a36Sopenharmony_ci	if (vcpu->arch.interrupt.soft) {
495162306a36Sopenharmony_ci		intr |= INTR_TYPE_SOFT_INTR;
495262306a36Sopenharmony_ci		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
495362306a36Sopenharmony_ci			     vmx->vcpu.arch.event_exit_inst_len);
495462306a36Sopenharmony_ci	} else
495562306a36Sopenharmony_ci		intr |= INTR_TYPE_EXT_INTR;
495662306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
495762306a36Sopenharmony_ci
495862306a36Sopenharmony_ci	vmx_clear_hlt(vcpu);
495962306a36Sopenharmony_ci}
496062306a36Sopenharmony_ci
496162306a36Sopenharmony_cistatic void vmx_inject_nmi(struct kvm_vcpu *vcpu)
496262306a36Sopenharmony_ci{
496362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
496462306a36Sopenharmony_ci
496562306a36Sopenharmony_ci	if (!enable_vnmi) {
496662306a36Sopenharmony_ci		/*
496762306a36Sopenharmony_ci		 * Tracking the NMI-blocked state in software is built upon
496862306a36Sopenharmony_ci		 * finding the next open IRQ window. This, in turn, depends on
496962306a36Sopenharmony_ci		 * well-behaving guests: They have to keep IRQs disabled at
497062306a36Sopenharmony_ci		 * least as long as the NMI handler runs. Otherwise we may
497162306a36Sopenharmony_ci		 * cause NMI nesting, maybe breaking the guest. But as this is
497262306a36Sopenharmony_ci		 * highly unlikely, we can live with the residual risk.
497362306a36Sopenharmony_ci		 */
497462306a36Sopenharmony_ci		vmx->loaded_vmcs->soft_vnmi_blocked = 1;
497562306a36Sopenharmony_ci		vmx->loaded_vmcs->vnmi_blocked_time = 0;
497662306a36Sopenharmony_ci	}
497762306a36Sopenharmony_ci
497862306a36Sopenharmony_ci	++vcpu->stat.nmi_injections;
497962306a36Sopenharmony_ci	vmx->loaded_vmcs->nmi_known_unmasked = false;
498062306a36Sopenharmony_ci
498162306a36Sopenharmony_ci	if (vmx->rmode.vm86_active) {
498262306a36Sopenharmony_ci		kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
498362306a36Sopenharmony_ci		return;
498462306a36Sopenharmony_ci	}
498562306a36Sopenharmony_ci
498662306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
498762306a36Sopenharmony_ci			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
498862306a36Sopenharmony_ci
498962306a36Sopenharmony_ci	vmx_clear_hlt(vcpu);
499062306a36Sopenharmony_ci}
499162306a36Sopenharmony_ci
499262306a36Sopenharmony_cibool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
499362306a36Sopenharmony_ci{
499462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
499562306a36Sopenharmony_ci	bool masked;
499662306a36Sopenharmony_ci
499762306a36Sopenharmony_ci	if (!enable_vnmi)
499862306a36Sopenharmony_ci		return vmx->loaded_vmcs->soft_vnmi_blocked;
499962306a36Sopenharmony_ci	if (vmx->loaded_vmcs->nmi_known_unmasked)
500062306a36Sopenharmony_ci		return false;
500162306a36Sopenharmony_ci	masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
500262306a36Sopenharmony_ci	vmx->loaded_vmcs->nmi_known_unmasked = !masked;
500362306a36Sopenharmony_ci	return masked;
500462306a36Sopenharmony_ci}
500562306a36Sopenharmony_ci
500662306a36Sopenharmony_civoid vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
500762306a36Sopenharmony_ci{
500862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
500962306a36Sopenharmony_ci
501062306a36Sopenharmony_ci	if (!enable_vnmi) {
501162306a36Sopenharmony_ci		if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
501262306a36Sopenharmony_ci			vmx->loaded_vmcs->soft_vnmi_blocked = masked;
501362306a36Sopenharmony_ci			vmx->loaded_vmcs->vnmi_blocked_time = 0;
501462306a36Sopenharmony_ci		}
501562306a36Sopenharmony_ci	} else {
501662306a36Sopenharmony_ci		vmx->loaded_vmcs->nmi_known_unmasked = !masked;
501762306a36Sopenharmony_ci		if (masked)
501862306a36Sopenharmony_ci			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
501962306a36Sopenharmony_ci				      GUEST_INTR_STATE_NMI);
502062306a36Sopenharmony_ci		else
502162306a36Sopenharmony_ci			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
502262306a36Sopenharmony_ci					GUEST_INTR_STATE_NMI);
502362306a36Sopenharmony_ci	}
502462306a36Sopenharmony_ci}
502562306a36Sopenharmony_ci
502662306a36Sopenharmony_cibool vmx_nmi_blocked(struct kvm_vcpu *vcpu)
502762306a36Sopenharmony_ci{
502862306a36Sopenharmony_ci	if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
502962306a36Sopenharmony_ci		return false;
503062306a36Sopenharmony_ci
503162306a36Sopenharmony_ci	if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
503262306a36Sopenharmony_ci		return true;
503362306a36Sopenharmony_ci
503462306a36Sopenharmony_ci	return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
503562306a36Sopenharmony_ci		(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
503662306a36Sopenharmony_ci		 GUEST_INTR_STATE_NMI));
503762306a36Sopenharmony_ci}
503862306a36Sopenharmony_ci
503962306a36Sopenharmony_cistatic int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
504062306a36Sopenharmony_ci{
504162306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.nested_run_pending)
504262306a36Sopenharmony_ci		return -EBUSY;
504362306a36Sopenharmony_ci
504462306a36Sopenharmony_ci	/* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */
504562306a36Sopenharmony_ci	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu))
504662306a36Sopenharmony_ci		return -EBUSY;
504762306a36Sopenharmony_ci
504862306a36Sopenharmony_ci	return !vmx_nmi_blocked(vcpu);
504962306a36Sopenharmony_ci}
505062306a36Sopenharmony_ci
505162306a36Sopenharmony_cibool vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
505262306a36Sopenharmony_ci{
505362306a36Sopenharmony_ci	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
505462306a36Sopenharmony_ci		return false;
505562306a36Sopenharmony_ci
505662306a36Sopenharmony_ci	return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) ||
505762306a36Sopenharmony_ci	       (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
505862306a36Sopenharmony_ci		(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
505962306a36Sopenharmony_ci}
506062306a36Sopenharmony_ci
506162306a36Sopenharmony_cistatic int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
506262306a36Sopenharmony_ci{
506362306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.nested_run_pending)
506462306a36Sopenharmony_ci		return -EBUSY;
506562306a36Sopenharmony_ci
506662306a36Sopenharmony_ci	/*
506762306a36Sopenharmony_ci	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
506862306a36Sopenharmony_ci	 * e.g. if the IRQ arrived asynchronously after checking nested events.
506962306a36Sopenharmony_ci	 */
507062306a36Sopenharmony_ci	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
507162306a36Sopenharmony_ci		return -EBUSY;
507262306a36Sopenharmony_ci
507362306a36Sopenharmony_ci	return !vmx_interrupt_blocked(vcpu);
507462306a36Sopenharmony_ci}
507562306a36Sopenharmony_ci
507662306a36Sopenharmony_cistatic int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
507762306a36Sopenharmony_ci{
507862306a36Sopenharmony_ci	void __user *ret;
507962306a36Sopenharmony_ci
508062306a36Sopenharmony_ci	if (enable_unrestricted_guest)
508162306a36Sopenharmony_ci		return 0;
508262306a36Sopenharmony_ci
508362306a36Sopenharmony_ci	mutex_lock(&kvm->slots_lock);
508462306a36Sopenharmony_ci	ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
508562306a36Sopenharmony_ci				      PAGE_SIZE * 3);
508662306a36Sopenharmony_ci	mutex_unlock(&kvm->slots_lock);
508762306a36Sopenharmony_ci
508862306a36Sopenharmony_ci	if (IS_ERR(ret))
508962306a36Sopenharmony_ci		return PTR_ERR(ret);
509062306a36Sopenharmony_ci
509162306a36Sopenharmony_ci	to_kvm_vmx(kvm)->tss_addr = addr;
509262306a36Sopenharmony_ci
509362306a36Sopenharmony_ci	return init_rmode_tss(kvm, ret);
509462306a36Sopenharmony_ci}
509562306a36Sopenharmony_ci
509662306a36Sopenharmony_cistatic int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
509762306a36Sopenharmony_ci{
509862306a36Sopenharmony_ci	to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
509962306a36Sopenharmony_ci	return 0;
510062306a36Sopenharmony_ci}
510162306a36Sopenharmony_ci
510262306a36Sopenharmony_cistatic bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
510362306a36Sopenharmony_ci{
510462306a36Sopenharmony_ci	switch (vec) {
510562306a36Sopenharmony_ci	case BP_VECTOR:
510662306a36Sopenharmony_ci		/*
510762306a36Sopenharmony_ci		 * Update instruction length as we may reinject the exception
510862306a36Sopenharmony_ci		 * from user space while in guest debugging mode.
510962306a36Sopenharmony_ci		 */
511062306a36Sopenharmony_ci		to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
511162306a36Sopenharmony_ci			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
511262306a36Sopenharmony_ci		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
511362306a36Sopenharmony_ci			return false;
511462306a36Sopenharmony_ci		fallthrough;
511562306a36Sopenharmony_ci	case DB_VECTOR:
511662306a36Sopenharmony_ci		return !(vcpu->guest_debug &
511762306a36Sopenharmony_ci			(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP));
511862306a36Sopenharmony_ci	case DE_VECTOR:
511962306a36Sopenharmony_ci	case OF_VECTOR:
512062306a36Sopenharmony_ci	case BR_VECTOR:
512162306a36Sopenharmony_ci	case UD_VECTOR:
512262306a36Sopenharmony_ci	case DF_VECTOR:
512362306a36Sopenharmony_ci	case SS_VECTOR:
512462306a36Sopenharmony_ci	case GP_VECTOR:
512562306a36Sopenharmony_ci	case MF_VECTOR:
512662306a36Sopenharmony_ci		return true;
512762306a36Sopenharmony_ci	}
512862306a36Sopenharmony_ci	return false;
512962306a36Sopenharmony_ci}
513062306a36Sopenharmony_ci
513162306a36Sopenharmony_cistatic int handle_rmode_exception(struct kvm_vcpu *vcpu,
513262306a36Sopenharmony_ci				  int vec, u32 err_code)
513362306a36Sopenharmony_ci{
513462306a36Sopenharmony_ci	/*
513562306a36Sopenharmony_ci	 * Instruction with address size override prefix opcode 0x67
513662306a36Sopenharmony_ci	 * Cause the #SS fault with 0 error code in VM86 mode.
513762306a36Sopenharmony_ci	 */
513862306a36Sopenharmony_ci	if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
513962306a36Sopenharmony_ci		if (kvm_emulate_instruction(vcpu, 0)) {
514062306a36Sopenharmony_ci			if (vcpu->arch.halt_request) {
514162306a36Sopenharmony_ci				vcpu->arch.halt_request = 0;
514262306a36Sopenharmony_ci				return kvm_emulate_halt_noskip(vcpu);
514362306a36Sopenharmony_ci			}
514462306a36Sopenharmony_ci			return 1;
514562306a36Sopenharmony_ci		}
514662306a36Sopenharmony_ci		return 0;
514762306a36Sopenharmony_ci	}
514862306a36Sopenharmony_ci
514962306a36Sopenharmony_ci	/*
515062306a36Sopenharmony_ci	 * Forward all other exceptions that are valid in real mode.
515162306a36Sopenharmony_ci	 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
515262306a36Sopenharmony_ci	 *        the required debugging infrastructure rework.
515362306a36Sopenharmony_ci	 */
515462306a36Sopenharmony_ci	kvm_queue_exception(vcpu, vec);
515562306a36Sopenharmony_ci	return 1;
515662306a36Sopenharmony_ci}
515762306a36Sopenharmony_ci
515862306a36Sopenharmony_cistatic int handle_machine_check(struct kvm_vcpu *vcpu)
515962306a36Sopenharmony_ci{
516062306a36Sopenharmony_ci	/* handled by vmx_vcpu_run() */
516162306a36Sopenharmony_ci	return 1;
516262306a36Sopenharmony_ci}
516362306a36Sopenharmony_ci
516462306a36Sopenharmony_ci/*
516562306a36Sopenharmony_ci * If the host has split lock detection disabled, then #AC is
516662306a36Sopenharmony_ci * unconditionally injected into the guest, which is the pre split lock
516762306a36Sopenharmony_ci * detection behaviour.
516862306a36Sopenharmony_ci *
516962306a36Sopenharmony_ci * If the host has split lock detection enabled then #AC is
517062306a36Sopenharmony_ci * only injected into the guest when:
517162306a36Sopenharmony_ci *  - Guest CPL == 3 (user mode)
517262306a36Sopenharmony_ci *  - Guest has #AC detection enabled in CR0
517362306a36Sopenharmony_ci *  - Guest EFLAGS has AC bit set
517462306a36Sopenharmony_ci */
517562306a36Sopenharmony_cibool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
517662306a36Sopenharmony_ci{
517762306a36Sopenharmony_ci	if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
517862306a36Sopenharmony_ci		return true;
517962306a36Sopenharmony_ci
518062306a36Sopenharmony_ci	return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) &&
518162306a36Sopenharmony_ci	       (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
518262306a36Sopenharmony_ci}
518362306a36Sopenharmony_ci
518462306a36Sopenharmony_cistatic int handle_exception_nmi(struct kvm_vcpu *vcpu)
518562306a36Sopenharmony_ci{
518662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
518762306a36Sopenharmony_ci	struct kvm_run *kvm_run = vcpu->run;
518862306a36Sopenharmony_ci	u32 intr_info, ex_no, error_code;
518962306a36Sopenharmony_ci	unsigned long cr2, dr6;
519062306a36Sopenharmony_ci	u32 vect_info;
519162306a36Sopenharmony_ci
519262306a36Sopenharmony_ci	vect_info = vmx->idt_vectoring_info;
519362306a36Sopenharmony_ci	intr_info = vmx_get_intr_info(vcpu);
519462306a36Sopenharmony_ci
519562306a36Sopenharmony_ci	/*
519662306a36Sopenharmony_ci	 * Machine checks are handled by handle_exception_irqoff(), or by
519762306a36Sopenharmony_ci	 * vmx_vcpu_run() if a #MC occurs on VM-Entry.  NMIs are handled by
519862306a36Sopenharmony_ci	 * vmx_vcpu_enter_exit().
519962306a36Sopenharmony_ci	 */
520062306a36Sopenharmony_ci	if (is_machine_check(intr_info) || is_nmi(intr_info))
520162306a36Sopenharmony_ci		return 1;
520262306a36Sopenharmony_ci
520362306a36Sopenharmony_ci	/*
520462306a36Sopenharmony_ci	 * Queue the exception here instead of in handle_nm_fault_irqoff().
520562306a36Sopenharmony_ci	 * This ensures the nested_vmx check is not skipped so vmexit can
520662306a36Sopenharmony_ci	 * be reflected to L1 (when it intercepts #NM) before reaching this
520762306a36Sopenharmony_ci	 * point.
520862306a36Sopenharmony_ci	 */
520962306a36Sopenharmony_ci	if (is_nm_fault(intr_info)) {
521062306a36Sopenharmony_ci		kvm_queue_exception(vcpu, NM_VECTOR);
521162306a36Sopenharmony_ci		return 1;
521262306a36Sopenharmony_ci	}
521362306a36Sopenharmony_ci
521462306a36Sopenharmony_ci	if (is_invalid_opcode(intr_info))
521562306a36Sopenharmony_ci		return handle_ud(vcpu);
521662306a36Sopenharmony_ci
521762306a36Sopenharmony_ci	error_code = 0;
521862306a36Sopenharmony_ci	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
521962306a36Sopenharmony_ci		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
522062306a36Sopenharmony_ci
522162306a36Sopenharmony_ci	if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
522262306a36Sopenharmony_ci		WARN_ON_ONCE(!enable_vmware_backdoor);
522362306a36Sopenharmony_ci
522462306a36Sopenharmony_ci		/*
522562306a36Sopenharmony_ci		 * VMware backdoor emulation on #GP interception only handles
522662306a36Sopenharmony_ci		 * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
522762306a36Sopenharmony_ci		 * error code on #GP.
522862306a36Sopenharmony_ci		 */
522962306a36Sopenharmony_ci		if (error_code) {
523062306a36Sopenharmony_ci			kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
523162306a36Sopenharmony_ci			return 1;
523262306a36Sopenharmony_ci		}
523362306a36Sopenharmony_ci		return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
523462306a36Sopenharmony_ci	}
523562306a36Sopenharmony_ci
523662306a36Sopenharmony_ci	/*
523762306a36Sopenharmony_ci	 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
523862306a36Sopenharmony_ci	 * MMIO, it is better to report an internal error.
523962306a36Sopenharmony_ci	 * See the comments in vmx_handle_exit.
524062306a36Sopenharmony_ci	 */
524162306a36Sopenharmony_ci	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
524262306a36Sopenharmony_ci	    !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
524362306a36Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
524462306a36Sopenharmony_ci		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
524562306a36Sopenharmony_ci		vcpu->run->internal.ndata = 4;
524662306a36Sopenharmony_ci		vcpu->run->internal.data[0] = vect_info;
524762306a36Sopenharmony_ci		vcpu->run->internal.data[1] = intr_info;
524862306a36Sopenharmony_ci		vcpu->run->internal.data[2] = error_code;
524962306a36Sopenharmony_ci		vcpu->run->internal.data[3] = vcpu->arch.last_vmentry_cpu;
525062306a36Sopenharmony_ci		return 0;
525162306a36Sopenharmony_ci	}
525262306a36Sopenharmony_ci
525362306a36Sopenharmony_ci	if (is_page_fault(intr_info)) {
525462306a36Sopenharmony_ci		cr2 = vmx_get_exit_qual(vcpu);
525562306a36Sopenharmony_ci		if (enable_ept && !vcpu->arch.apf.host_apf_flags) {
525662306a36Sopenharmony_ci			/*
525762306a36Sopenharmony_ci			 * EPT will cause page fault only if we need to
525862306a36Sopenharmony_ci			 * detect illegal GPAs.
525962306a36Sopenharmony_ci			 */
526062306a36Sopenharmony_ci			WARN_ON_ONCE(!allow_smaller_maxphyaddr);
526162306a36Sopenharmony_ci			kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
526262306a36Sopenharmony_ci			return 1;
526362306a36Sopenharmony_ci		} else
526462306a36Sopenharmony_ci			return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
526562306a36Sopenharmony_ci	}
526662306a36Sopenharmony_ci
526762306a36Sopenharmony_ci	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
526862306a36Sopenharmony_ci
526962306a36Sopenharmony_ci	if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
527062306a36Sopenharmony_ci		return handle_rmode_exception(vcpu, ex_no, error_code);
527162306a36Sopenharmony_ci
527262306a36Sopenharmony_ci	switch (ex_no) {
527362306a36Sopenharmony_ci	case DB_VECTOR:
527462306a36Sopenharmony_ci		dr6 = vmx_get_exit_qual(vcpu);
527562306a36Sopenharmony_ci		if (!(vcpu->guest_debug &
527662306a36Sopenharmony_ci		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
527762306a36Sopenharmony_ci			/*
527862306a36Sopenharmony_ci			 * If the #DB was due to ICEBP, a.k.a. INT1, skip the
527962306a36Sopenharmony_ci			 * instruction.  ICEBP generates a trap-like #DB, but
528062306a36Sopenharmony_ci			 * despite its interception control being tied to #DB,
528162306a36Sopenharmony_ci			 * is an instruction intercept, i.e. the VM-Exit occurs
528262306a36Sopenharmony_ci			 * on the ICEBP itself.  Use the inner "skip" helper to
528362306a36Sopenharmony_ci			 * avoid single-step #DB and MTF updates, as ICEBP is
528462306a36Sopenharmony_ci			 * higher priority.  Note, skipping ICEBP still clears
528562306a36Sopenharmony_ci			 * STI and MOVSS blocking.
528662306a36Sopenharmony_ci			 *
528762306a36Sopenharmony_ci			 * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
528862306a36Sopenharmony_ci			 * if single-step is enabled in RFLAGS and STI or MOVSS
528962306a36Sopenharmony_ci			 * blocking is active, as the CPU doesn't set the bit
529062306a36Sopenharmony_ci			 * on VM-Exit due to #DB interception.  VM-Entry has a
529162306a36Sopenharmony_ci			 * consistency check that a single-step #DB is pending
529262306a36Sopenharmony_ci			 * in this scenario as the previous instruction cannot
529362306a36Sopenharmony_ci			 * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
529462306a36Sopenharmony_ci			 * don't modify RFLAGS), therefore the one instruction
529562306a36Sopenharmony_ci			 * delay when activating single-step breakpoints must
529662306a36Sopenharmony_ci			 * have already expired.  Note, the CPU sets/clears BS
529762306a36Sopenharmony_ci			 * as appropriate for all other VM-Exits types.
529862306a36Sopenharmony_ci			 */
529962306a36Sopenharmony_ci			if (is_icebp(intr_info))
530062306a36Sopenharmony_ci				WARN_ON(!skip_emulated_instruction(vcpu));
530162306a36Sopenharmony_ci			else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
530262306a36Sopenharmony_ci				 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
530362306a36Sopenharmony_ci				  (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
530462306a36Sopenharmony_ci				vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
530562306a36Sopenharmony_ci					    vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
530662306a36Sopenharmony_ci
530762306a36Sopenharmony_ci			kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
530862306a36Sopenharmony_ci			return 1;
530962306a36Sopenharmony_ci		}
531062306a36Sopenharmony_ci		kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
531162306a36Sopenharmony_ci		kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
531262306a36Sopenharmony_ci		fallthrough;
531362306a36Sopenharmony_ci	case BP_VECTOR:
531462306a36Sopenharmony_ci		/*
531562306a36Sopenharmony_ci		 * Update instruction length as we may reinject #BP from
531662306a36Sopenharmony_ci		 * user space while in guest debugging mode. Reading it for
531762306a36Sopenharmony_ci		 * #DB as well causes no harm, it is not used in that case.
531862306a36Sopenharmony_ci		 */
531962306a36Sopenharmony_ci		vmx->vcpu.arch.event_exit_inst_len =
532062306a36Sopenharmony_ci			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
532162306a36Sopenharmony_ci		kvm_run->exit_reason = KVM_EXIT_DEBUG;
532262306a36Sopenharmony_ci		kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
532362306a36Sopenharmony_ci		kvm_run->debug.arch.exception = ex_no;
532462306a36Sopenharmony_ci		break;
532562306a36Sopenharmony_ci	case AC_VECTOR:
532662306a36Sopenharmony_ci		if (vmx_guest_inject_ac(vcpu)) {
532762306a36Sopenharmony_ci			kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
532862306a36Sopenharmony_ci			return 1;
532962306a36Sopenharmony_ci		}
533062306a36Sopenharmony_ci
533162306a36Sopenharmony_ci		/*
533262306a36Sopenharmony_ci		 * Handle split lock. Depending on detection mode this will
533362306a36Sopenharmony_ci		 * either warn and disable split lock detection for this
533462306a36Sopenharmony_ci		 * task or force SIGBUS on it.
533562306a36Sopenharmony_ci		 */
533662306a36Sopenharmony_ci		if (handle_guest_split_lock(kvm_rip_read(vcpu)))
533762306a36Sopenharmony_ci			return 1;
533862306a36Sopenharmony_ci		fallthrough;
533962306a36Sopenharmony_ci	default:
534062306a36Sopenharmony_ci		kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
534162306a36Sopenharmony_ci		kvm_run->ex.exception = ex_no;
534262306a36Sopenharmony_ci		kvm_run->ex.error_code = error_code;
534362306a36Sopenharmony_ci		break;
534462306a36Sopenharmony_ci	}
534562306a36Sopenharmony_ci	return 0;
534662306a36Sopenharmony_ci}
534762306a36Sopenharmony_ci
534862306a36Sopenharmony_cistatic __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
534962306a36Sopenharmony_ci{
535062306a36Sopenharmony_ci	++vcpu->stat.irq_exits;
535162306a36Sopenharmony_ci	return 1;
535262306a36Sopenharmony_ci}
535362306a36Sopenharmony_ci
535462306a36Sopenharmony_cistatic int handle_triple_fault(struct kvm_vcpu *vcpu)
535562306a36Sopenharmony_ci{
535662306a36Sopenharmony_ci	vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
535762306a36Sopenharmony_ci	vcpu->mmio_needed = 0;
535862306a36Sopenharmony_ci	return 0;
535962306a36Sopenharmony_ci}
536062306a36Sopenharmony_ci
536162306a36Sopenharmony_cistatic int handle_io(struct kvm_vcpu *vcpu)
536262306a36Sopenharmony_ci{
536362306a36Sopenharmony_ci	unsigned long exit_qualification;
536462306a36Sopenharmony_ci	int size, in, string;
536562306a36Sopenharmony_ci	unsigned port;
536662306a36Sopenharmony_ci
536762306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
536862306a36Sopenharmony_ci	string = (exit_qualification & 16) != 0;
536962306a36Sopenharmony_ci
537062306a36Sopenharmony_ci	++vcpu->stat.io_exits;
537162306a36Sopenharmony_ci
537262306a36Sopenharmony_ci	if (string)
537362306a36Sopenharmony_ci		return kvm_emulate_instruction(vcpu, 0);
537462306a36Sopenharmony_ci
537562306a36Sopenharmony_ci	port = exit_qualification >> 16;
537662306a36Sopenharmony_ci	size = (exit_qualification & 7) + 1;
537762306a36Sopenharmony_ci	in = (exit_qualification & 8) != 0;
537862306a36Sopenharmony_ci
537962306a36Sopenharmony_ci	return kvm_fast_pio(vcpu, size, port, in);
538062306a36Sopenharmony_ci}
538162306a36Sopenharmony_ci
538262306a36Sopenharmony_cistatic void
538362306a36Sopenharmony_civmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
538462306a36Sopenharmony_ci{
538562306a36Sopenharmony_ci	/*
538662306a36Sopenharmony_ci	 * Patch in the VMCALL instruction:
538762306a36Sopenharmony_ci	 */
538862306a36Sopenharmony_ci	hypercall[0] = 0x0f;
538962306a36Sopenharmony_ci	hypercall[1] = 0x01;
539062306a36Sopenharmony_ci	hypercall[2] = 0xc1;
539162306a36Sopenharmony_ci}
539262306a36Sopenharmony_ci
539362306a36Sopenharmony_ci/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
539462306a36Sopenharmony_cistatic int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
539562306a36Sopenharmony_ci{
539662306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
539762306a36Sopenharmony_ci		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
539862306a36Sopenharmony_ci		unsigned long orig_val = val;
539962306a36Sopenharmony_ci
540062306a36Sopenharmony_ci		/*
540162306a36Sopenharmony_ci		 * We get here when L2 changed cr0 in a way that did not change
540262306a36Sopenharmony_ci		 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
540362306a36Sopenharmony_ci		 * but did change L0 shadowed bits. So we first calculate the
540462306a36Sopenharmony_ci		 * effective cr0 value that L1 would like to write into the
540562306a36Sopenharmony_ci		 * hardware. It consists of the L2-owned bits from the new
540662306a36Sopenharmony_ci		 * value combined with the L1-owned bits from L1's guest_cr0.
540762306a36Sopenharmony_ci		 */
540862306a36Sopenharmony_ci		val = (val & ~vmcs12->cr0_guest_host_mask) |
540962306a36Sopenharmony_ci			(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
541062306a36Sopenharmony_ci
541162306a36Sopenharmony_ci		if (kvm_set_cr0(vcpu, val))
541262306a36Sopenharmony_ci			return 1;
541362306a36Sopenharmony_ci		vmcs_writel(CR0_READ_SHADOW, orig_val);
541462306a36Sopenharmony_ci		return 0;
541562306a36Sopenharmony_ci	} else {
541662306a36Sopenharmony_ci		return kvm_set_cr0(vcpu, val);
541762306a36Sopenharmony_ci	}
541862306a36Sopenharmony_ci}
541962306a36Sopenharmony_ci
542062306a36Sopenharmony_cistatic int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
542162306a36Sopenharmony_ci{
542262306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
542362306a36Sopenharmony_ci		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
542462306a36Sopenharmony_ci		unsigned long orig_val = val;
542562306a36Sopenharmony_ci
542662306a36Sopenharmony_ci		/* analogously to handle_set_cr0 */
542762306a36Sopenharmony_ci		val = (val & ~vmcs12->cr4_guest_host_mask) |
542862306a36Sopenharmony_ci			(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
542962306a36Sopenharmony_ci		if (kvm_set_cr4(vcpu, val))
543062306a36Sopenharmony_ci			return 1;
543162306a36Sopenharmony_ci		vmcs_writel(CR4_READ_SHADOW, orig_val);
543262306a36Sopenharmony_ci		return 0;
543362306a36Sopenharmony_ci	} else
543462306a36Sopenharmony_ci		return kvm_set_cr4(vcpu, val);
543562306a36Sopenharmony_ci}
543662306a36Sopenharmony_ci
543762306a36Sopenharmony_cistatic int handle_desc(struct kvm_vcpu *vcpu)
543862306a36Sopenharmony_ci{
543962306a36Sopenharmony_ci	/*
544062306a36Sopenharmony_ci	 * UMIP emulation relies on intercepting writes to CR4.UMIP, i.e. this
544162306a36Sopenharmony_ci	 * and other code needs to be updated if UMIP can be guest owned.
544262306a36Sopenharmony_ci	 */
544362306a36Sopenharmony_ci	BUILD_BUG_ON(KVM_POSSIBLE_CR4_GUEST_BITS & X86_CR4_UMIP);
544462306a36Sopenharmony_ci
544562306a36Sopenharmony_ci	WARN_ON_ONCE(!kvm_is_cr4_bit_set(vcpu, X86_CR4_UMIP));
544662306a36Sopenharmony_ci	return kvm_emulate_instruction(vcpu, 0);
544762306a36Sopenharmony_ci}
544862306a36Sopenharmony_ci
544962306a36Sopenharmony_cistatic int handle_cr(struct kvm_vcpu *vcpu)
545062306a36Sopenharmony_ci{
545162306a36Sopenharmony_ci	unsigned long exit_qualification, val;
545262306a36Sopenharmony_ci	int cr;
545362306a36Sopenharmony_ci	int reg;
545462306a36Sopenharmony_ci	int err;
545562306a36Sopenharmony_ci	int ret;
545662306a36Sopenharmony_ci
545762306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
545862306a36Sopenharmony_ci	cr = exit_qualification & 15;
545962306a36Sopenharmony_ci	reg = (exit_qualification >> 8) & 15;
546062306a36Sopenharmony_ci	switch ((exit_qualification >> 4) & 3) {
546162306a36Sopenharmony_ci	case 0: /* mov to cr */
546262306a36Sopenharmony_ci		val = kvm_register_read(vcpu, reg);
546362306a36Sopenharmony_ci		trace_kvm_cr_write(cr, val);
546462306a36Sopenharmony_ci		switch (cr) {
546562306a36Sopenharmony_ci		case 0:
546662306a36Sopenharmony_ci			err = handle_set_cr0(vcpu, val);
546762306a36Sopenharmony_ci			return kvm_complete_insn_gp(vcpu, err);
546862306a36Sopenharmony_ci		case 3:
546962306a36Sopenharmony_ci			WARN_ON_ONCE(enable_unrestricted_guest);
547062306a36Sopenharmony_ci
547162306a36Sopenharmony_ci			err = kvm_set_cr3(vcpu, val);
547262306a36Sopenharmony_ci			return kvm_complete_insn_gp(vcpu, err);
547362306a36Sopenharmony_ci		case 4:
547462306a36Sopenharmony_ci			err = handle_set_cr4(vcpu, val);
547562306a36Sopenharmony_ci			return kvm_complete_insn_gp(vcpu, err);
547662306a36Sopenharmony_ci		case 8: {
547762306a36Sopenharmony_ci				u8 cr8_prev = kvm_get_cr8(vcpu);
547862306a36Sopenharmony_ci				u8 cr8 = (u8)val;
547962306a36Sopenharmony_ci				err = kvm_set_cr8(vcpu, cr8);
548062306a36Sopenharmony_ci				ret = kvm_complete_insn_gp(vcpu, err);
548162306a36Sopenharmony_ci				if (lapic_in_kernel(vcpu))
548262306a36Sopenharmony_ci					return ret;
548362306a36Sopenharmony_ci				if (cr8_prev <= cr8)
548462306a36Sopenharmony_ci					return ret;
548562306a36Sopenharmony_ci				/*
548662306a36Sopenharmony_ci				 * TODO: we might be squashing a
548762306a36Sopenharmony_ci				 * KVM_GUESTDBG_SINGLESTEP-triggered
548862306a36Sopenharmony_ci				 * KVM_EXIT_DEBUG here.
548962306a36Sopenharmony_ci				 */
549062306a36Sopenharmony_ci				vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
549162306a36Sopenharmony_ci				return 0;
549262306a36Sopenharmony_ci			}
549362306a36Sopenharmony_ci		}
549462306a36Sopenharmony_ci		break;
549562306a36Sopenharmony_ci	case 2: /* clts */
549662306a36Sopenharmony_ci		KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS");
549762306a36Sopenharmony_ci		return -EIO;
549862306a36Sopenharmony_ci	case 1: /*mov from cr*/
549962306a36Sopenharmony_ci		switch (cr) {
550062306a36Sopenharmony_ci		case 3:
550162306a36Sopenharmony_ci			WARN_ON_ONCE(enable_unrestricted_guest);
550262306a36Sopenharmony_ci
550362306a36Sopenharmony_ci			val = kvm_read_cr3(vcpu);
550462306a36Sopenharmony_ci			kvm_register_write(vcpu, reg, val);
550562306a36Sopenharmony_ci			trace_kvm_cr_read(cr, val);
550662306a36Sopenharmony_ci			return kvm_skip_emulated_instruction(vcpu);
550762306a36Sopenharmony_ci		case 8:
550862306a36Sopenharmony_ci			val = kvm_get_cr8(vcpu);
550962306a36Sopenharmony_ci			kvm_register_write(vcpu, reg, val);
551062306a36Sopenharmony_ci			trace_kvm_cr_read(cr, val);
551162306a36Sopenharmony_ci			return kvm_skip_emulated_instruction(vcpu);
551262306a36Sopenharmony_ci		}
551362306a36Sopenharmony_ci		break;
551462306a36Sopenharmony_ci	case 3: /* lmsw */
551562306a36Sopenharmony_ci		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
551662306a36Sopenharmony_ci		trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
551762306a36Sopenharmony_ci		kvm_lmsw(vcpu, val);
551862306a36Sopenharmony_ci
551962306a36Sopenharmony_ci		return kvm_skip_emulated_instruction(vcpu);
552062306a36Sopenharmony_ci	default:
552162306a36Sopenharmony_ci		break;
552262306a36Sopenharmony_ci	}
552362306a36Sopenharmony_ci	vcpu->run->exit_reason = 0;
552462306a36Sopenharmony_ci	vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
552562306a36Sopenharmony_ci	       (int)(exit_qualification >> 4) & 3, cr);
552662306a36Sopenharmony_ci	return 0;
552762306a36Sopenharmony_ci}
552862306a36Sopenharmony_ci
552962306a36Sopenharmony_cistatic int handle_dr(struct kvm_vcpu *vcpu)
553062306a36Sopenharmony_ci{
553162306a36Sopenharmony_ci	unsigned long exit_qualification;
553262306a36Sopenharmony_ci	int dr, dr7, reg;
553362306a36Sopenharmony_ci	int err = 1;
553462306a36Sopenharmony_ci
553562306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
553662306a36Sopenharmony_ci	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
553762306a36Sopenharmony_ci
553862306a36Sopenharmony_ci	/* First, if DR does not exist, trigger UD */
553962306a36Sopenharmony_ci	if (!kvm_require_dr(vcpu, dr))
554062306a36Sopenharmony_ci		return 1;
554162306a36Sopenharmony_ci
554262306a36Sopenharmony_ci	if (vmx_get_cpl(vcpu) > 0)
554362306a36Sopenharmony_ci		goto out;
554462306a36Sopenharmony_ci
554562306a36Sopenharmony_ci	dr7 = vmcs_readl(GUEST_DR7);
554662306a36Sopenharmony_ci	if (dr7 & DR7_GD) {
554762306a36Sopenharmony_ci		/*
554862306a36Sopenharmony_ci		 * As the vm-exit takes precedence over the debug trap, we
554962306a36Sopenharmony_ci		 * need to emulate the latter, either for the host or the
555062306a36Sopenharmony_ci		 * guest debugging itself.
555162306a36Sopenharmony_ci		 */
555262306a36Sopenharmony_ci		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
555362306a36Sopenharmony_ci			vcpu->run->debug.arch.dr6 = DR6_BD | DR6_ACTIVE_LOW;
555462306a36Sopenharmony_ci			vcpu->run->debug.arch.dr7 = dr7;
555562306a36Sopenharmony_ci			vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
555662306a36Sopenharmony_ci			vcpu->run->debug.arch.exception = DB_VECTOR;
555762306a36Sopenharmony_ci			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
555862306a36Sopenharmony_ci			return 0;
555962306a36Sopenharmony_ci		} else {
556062306a36Sopenharmony_ci			kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD);
556162306a36Sopenharmony_ci			return 1;
556262306a36Sopenharmony_ci		}
556362306a36Sopenharmony_ci	}
556462306a36Sopenharmony_ci
556562306a36Sopenharmony_ci	if (vcpu->guest_debug == 0) {
556662306a36Sopenharmony_ci		exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
556762306a36Sopenharmony_ci
556862306a36Sopenharmony_ci		/*
556962306a36Sopenharmony_ci		 * No more DR vmexits; force a reload of the debug registers
557062306a36Sopenharmony_ci		 * and reenter on this instruction.  The next vmexit will
557162306a36Sopenharmony_ci		 * retrieve the full state of the debug registers.
557262306a36Sopenharmony_ci		 */
557362306a36Sopenharmony_ci		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
557462306a36Sopenharmony_ci		return 1;
557562306a36Sopenharmony_ci	}
557662306a36Sopenharmony_ci
557762306a36Sopenharmony_ci	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
557862306a36Sopenharmony_ci	if (exit_qualification & TYPE_MOV_FROM_DR) {
557962306a36Sopenharmony_ci		unsigned long val;
558062306a36Sopenharmony_ci
558162306a36Sopenharmony_ci		kvm_get_dr(vcpu, dr, &val);
558262306a36Sopenharmony_ci		kvm_register_write(vcpu, reg, val);
558362306a36Sopenharmony_ci		err = 0;
558462306a36Sopenharmony_ci	} else {
558562306a36Sopenharmony_ci		err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
558662306a36Sopenharmony_ci	}
558762306a36Sopenharmony_ci
558862306a36Sopenharmony_ciout:
558962306a36Sopenharmony_ci	return kvm_complete_insn_gp(vcpu, err);
559062306a36Sopenharmony_ci}
559162306a36Sopenharmony_ci
559262306a36Sopenharmony_cistatic void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
559362306a36Sopenharmony_ci{
559462306a36Sopenharmony_ci	get_debugreg(vcpu->arch.db[0], 0);
559562306a36Sopenharmony_ci	get_debugreg(vcpu->arch.db[1], 1);
559662306a36Sopenharmony_ci	get_debugreg(vcpu->arch.db[2], 2);
559762306a36Sopenharmony_ci	get_debugreg(vcpu->arch.db[3], 3);
559862306a36Sopenharmony_ci	get_debugreg(vcpu->arch.dr6, 6);
559962306a36Sopenharmony_ci	vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
560062306a36Sopenharmony_ci
560162306a36Sopenharmony_ci	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
560262306a36Sopenharmony_ci	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
560362306a36Sopenharmony_ci
560462306a36Sopenharmony_ci	/*
560562306a36Sopenharmony_ci	 * exc_debug expects dr6 to be cleared after it runs, avoid that it sees
560662306a36Sopenharmony_ci	 * a stale dr6 from the guest.
560762306a36Sopenharmony_ci	 */
560862306a36Sopenharmony_ci	set_debugreg(DR6_RESERVED, 6);
560962306a36Sopenharmony_ci}
561062306a36Sopenharmony_ci
561162306a36Sopenharmony_cistatic void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
561262306a36Sopenharmony_ci{
561362306a36Sopenharmony_ci	vmcs_writel(GUEST_DR7, val);
561462306a36Sopenharmony_ci}
561562306a36Sopenharmony_ci
561662306a36Sopenharmony_cistatic int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
561762306a36Sopenharmony_ci{
561862306a36Sopenharmony_ci	kvm_apic_update_ppr(vcpu);
561962306a36Sopenharmony_ci	return 1;
562062306a36Sopenharmony_ci}
562162306a36Sopenharmony_ci
562262306a36Sopenharmony_cistatic int handle_interrupt_window(struct kvm_vcpu *vcpu)
562362306a36Sopenharmony_ci{
562462306a36Sopenharmony_ci	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
562562306a36Sopenharmony_ci
562662306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_EVENT, vcpu);
562762306a36Sopenharmony_ci
562862306a36Sopenharmony_ci	++vcpu->stat.irq_window_exits;
562962306a36Sopenharmony_ci	return 1;
563062306a36Sopenharmony_ci}
563162306a36Sopenharmony_ci
563262306a36Sopenharmony_cistatic int handle_invlpg(struct kvm_vcpu *vcpu)
563362306a36Sopenharmony_ci{
563462306a36Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
563562306a36Sopenharmony_ci
563662306a36Sopenharmony_ci	kvm_mmu_invlpg(vcpu, exit_qualification);
563762306a36Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
563862306a36Sopenharmony_ci}
563962306a36Sopenharmony_ci
564062306a36Sopenharmony_cistatic int handle_apic_access(struct kvm_vcpu *vcpu)
564162306a36Sopenharmony_ci{
564262306a36Sopenharmony_ci	if (likely(fasteoi)) {
564362306a36Sopenharmony_ci		unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
564462306a36Sopenharmony_ci		int access_type, offset;
564562306a36Sopenharmony_ci
564662306a36Sopenharmony_ci		access_type = exit_qualification & APIC_ACCESS_TYPE;
564762306a36Sopenharmony_ci		offset = exit_qualification & APIC_ACCESS_OFFSET;
564862306a36Sopenharmony_ci		/*
564962306a36Sopenharmony_ci		 * Sane guest uses MOV to write EOI, with written value
565062306a36Sopenharmony_ci		 * not cared. So make a short-circuit here by avoiding
565162306a36Sopenharmony_ci		 * heavy instruction emulation.
565262306a36Sopenharmony_ci		 */
565362306a36Sopenharmony_ci		if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
565462306a36Sopenharmony_ci		    (offset == APIC_EOI)) {
565562306a36Sopenharmony_ci			kvm_lapic_set_eoi(vcpu);
565662306a36Sopenharmony_ci			return kvm_skip_emulated_instruction(vcpu);
565762306a36Sopenharmony_ci		}
565862306a36Sopenharmony_ci	}
565962306a36Sopenharmony_ci	return kvm_emulate_instruction(vcpu, 0);
566062306a36Sopenharmony_ci}
566162306a36Sopenharmony_ci
566262306a36Sopenharmony_cistatic int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
566362306a36Sopenharmony_ci{
566462306a36Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
566562306a36Sopenharmony_ci	int vector = exit_qualification & 0xff;
566662306a36Sopenharmony_ci
566762306a36Sopenharmony_ci	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
566862306a36Sopenharmony_ci	kvm_apic_set_eoi_accelerated(vcpu, vector);
566962306a36Sopenharmony_ci	return 1;
567062306a36Sopenharmony_ci}
567162306a36Sopenharmony_ci
567262306a36Sopenharmony_cistatic int handle_apic_write(struct kvm_vcpu *vcpu)
567362306a36Sopenharmony_ci{
567462306a36Sopenharmony_ci	unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
567562306a36Sopenharmony_ci
567662306a36Sopenharmony_ci	/*
567762306a36Sopenharmony_ci	 * APIC-write VM-Exit is trap-like, KVM doesn't need to advance RIP and
567862306a36Sopenharmony_ci	 * hardware has done any necessary aliasing, offset adjustments, etc...
567962306a36Sopenharmony_ci	 * for the access.  I.e. the correct value has already been  written to
568062306a36Sopenharmony_ci	 * the vAPIC page for the correct 16-byte chunk.  KVM needs only to
568162306a36Sopenharmony_ci	 * retrieve the register value and emulate the access.
568262306a36Sopenharmony_ci	 */
568362306a36Sopenharmony_ci	u32 offset = exit_qualification & 0xff0;
568462306a36Sopenharmony_ci
568562306a36Sopenharmony_ci	kvm_apic_write_nodecode(vcpu, offset);
568662306a36Sopenharmony_ci	return 1;
568762306a36Sopenharmony_ci}
568862306a36Sopenharmony_ci
568962306a36Sopenharmony_cistatic int handle_task_switch(struct kvm_vcpu *vcpu)
569062306a36Sopenharmony_ci{
569162306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
569262306a36Sopenharmony_ci	unsigned long exit_qualification;
569362306a36Sopenharmony_ci	bool has_error_code = false;
569462306a36Sopenharmony_ci	u32 error_code = 0;
569562306a36Sopenharmony_ci	u16 tss_selector;
569662306a36Sopenharmony_ci	int reason, type, idt_v, idt_index;
569762306a36Sopenharmony_ci
569862306a36Sopenharmony_ci	idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
569962306a36Sopenharmony_ci	idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
570062306a36Sopenharmony_ci	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
570162306a36Sopenharmony_ci
570262306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
570362306a36Sopenharmony_ci
570462306a36Sopenharmony_ci	reason = (u32)exit_qualification >> 30;
570562306a36Sopenharmony_ci	if (reason == TASK_SWITCH_GATE && idt_v) {
570662306a36Sopenharmony_ci		switch (type) {
570762306a36Sopenharmony_ci		case INTR_TYPE_NMI_INTR:
570862306a36Sopenharmony_ci			vcpu->arch.nmi_injected = false;
570962306a36Sopenharmony_ci			vmx_set_nmi_mask(vcpu, true);
571062306a36Sopenharmony_ci			break;
571162306a36Sopenharmony_ci		case INTR_TYPE_EXT_INTR:
571262306a36Sopenharmony_ci		case INTR_TYPE_SOFT_INTR:
571362306a36Sopenharmony_ci			kvm_clear_interrupt_queue(vcpu);
571462306a36Sopenharmony_ci			break;
571562306a36Sopenharmony_ci		case INTR_TYPE_HARD_EXCEPTION:
571662306a36Sopenharmony_ci			if (vmx->idt_vectoring_info &
571762306a36Sopenharmony_ci			    VECTORING_INFO_DELIVER_CODE_MASK) {
571862306a36Sopenharmony_ci				has_error_code = true;
571962306a36Sopenharmony_ci				error_code =
572062306a36Sopenharmony_ci					vmcs_read32(IDT_VECTORING_ERROR_CODE);
572162306a36Sopenharmony_ci			}
572262306a36Sopenharmony_ci			fallthrough;
572362306a36Sopenharmony_ci		case INTR_TYPE_SOFT_EXCEPTION:
572462306a36Sopenharmony_ci			kvm_clear_exception_queue(vcpu);
572562306a36Sopenharmony_ci			break;
572662306a36Sopenharmony_ci		default:
572762306a36Sopenharmony_ci			break;
572862306a36Sopenharmony_ci		}
572962306a36Sopenharmony_ci	}
573062306a36Sopenharmony_ci	tss_selector = exit_qualification;
573162306a36Sopenharmony_ci
573262306a36Sopenharmony_ci	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
573362306a36Sopenharmony_ci		       type != INTR_TYPE_EXT_INTR &&
573462306a36Sopenharmony_ci		       type != INTR_TYPE_NMI_INTR))
573562306a36Sopenharmony_ci		WARN_ON(!skip_emulated_instruction(vcpu));
573662306a36Sopenharmony_ci
573762306a36Sopenharmony_ci	/*
573862306a36Sopenharmony_ci	 * TODO: What about debug traps on tss switch?
573962306a36Sopenharmony_ci	 *       Are we supposed to inject them and update dr6?
574062306a36Sopenharmony_ci	 */
574162306a36Sopenharmony_ci	return kvm_task_switch(vcpu, tss_selector,
574262306a36Sopenharmony_ci			       type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
574362306a36Sopenharmony_ci			       reason, has_error_code, error_code);
574462306a36Sopenharmony_ci}
574562306a36Sopenharmony_ci
574662306a36Sopenharmony_cistatic int handle_ept_violation(struct kvm_vcpu *vcpu)
574762306a36Sopenharmony_ci{
574862306a36Sopenharmony_ci	unsigned long exit_qualification;
574962306a36Sopenharmony_ci	gpa_t gpa;
575062306a36Sopenharmony_ci	u64 error_code;
575162306a36Sopenharmony_ci
575262306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
575362306a36Sopenharmony_ci
575462306a36Sopenharmony_ci	/*
575562306a36Sopenharmony_ci	 * EPT violation happened while executing iret from NMI,
575662306a36Sopenharmony_ci	 * "blocked by NMI" bit has to be set before next VM entry.
575762306a36Sopenharmony_ci	 * There are errata that may cause this bit to not be set:
575862306a36Sopenharmony_ci	 * AAK134, BY25.
575962306a36Sopenharmony_ci	 */
576062306a36Sopenharmony_ci	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
576162306a36Sopenharmony_ci			enable_vnmi &&
576262306a36Sopenharmony_ci			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
576362306a36Sopenharmony_ci		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
576462306a36Sopenharmony_ci
576562306a36Sopenharmony_ci	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
576662306a36Sopenharmony_ci	trace_kvm_page_fault(vcpu, gpa, exit_qualification);
576762306a36Sopenharmony_ci
576862306a36Sopenharmony_ci	/* Is it a read fault? */
576962306a36Sopenharmony_ci	error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
577062306a36Sopenharmony_ci		     ? PFERR_USER_MASK : 0;
577162306a36Sopenharmony_ci	/* Is it a write fault? */
577262306a36Sopenharmony_ci	error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
577362306a36Sopenharmony_ci		      ? PFERR_WRITE_MASK : 0;
577462306a36Sopenharmony_ci	/* Is it a fetch fault? */
577562306a36Sopenharmony_ci	error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
577662306a36Sopenharmony_ci		      ? PFERR_FETCH_MASK : 0;
577762306a36Sopenharmony_ci	/* ept page table entry is present? */
577862306a36Sopenharmony_ci	error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
577962306a36Sopenharmony_ci		      ? PFERR_PRESENT_MASK : 0;
578062306a36Sopenharmony_ci
578162306a36Sopenharmony_ci	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
578262306a36Sopenharmony_ci	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
578362306a36Sopenharmony_ci
578462306a36Sopenharmony_ci	vcpu->arch.exit_qualification = exit_qualification;
578562306a36Sopenharmony_ci
578662306a36Sopenharmony_ci	/*
578762306a36Sopenharmony_ci	 * Check that the GPA doesn't exceed physical memory limits, as that is
578862306a36Sopenharmony_ci	 * a guest page fault.  We have to emulate the instruction here, because
578962306a36Sopenharmony_ci	 * if the illegal address is that of a paging structure, then
579062306a36Sopenharmony_ci	 * EPT_VIOLATION_ACC_WRITE bit is set.  Alternatively, if supported we
579162306a36Sopenharmony_ci	 * would also use advanced VM-exit information for EPT violations to
579262306a36Sopenharmony_ci	 * reconstruct the page fault error code.
579362306a36Sopenharmony_ci	 */
579462306a36Sopenharmony_ci	if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
579562306a36Sopenharmony_ci		return kvm_emulate_instruction(vcpu, 0);
579662306a36Sopenharmony_ci
579762306a36Sopenharmony_ci	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
579862306a36Sopenharmony_ci}
579962306a36Sopenharmony_ci
580062306a36Sopenharmony_cistatic int handle_ept_misconfig(struct kvm_vcpu *vcpu)
580162306a36Sopenharmony_ci{
580262306a36Sopenharmony_ci	gpa_t gpa;
580362306a36Sopenharmony_ci
580462306a36Sopenharmony_ci	if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
580562306a36Sopenharmony_ci		return 1;
580662306a36Sopenharmony_ci
580762306a36Sopenharmony_ci	/*
580862306a36Sopenharmony_ci	 * A nested guest cannot optimize MMIO vmexits, because we have an
580962306a36Sopenharmony_ci	 * nGPA here instead of the required GPA.
581062306a36Sopenharmony_ci	 */
581162306a36Sopenharmony_ci	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
581262306a36Sopenharmony_ci	if (!is_guest_mode(vcpu) &&
581362306a36Sopenharmony_ci	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
581462306a36Sopenharmony_ci		trace_kvm_fast_mmio(gpa);
581562306a36Sopenharmony_ci		return kvm_skip_emulated_instruction(vcpu);
581662306a36Sopenharmony_ci	}
581762306a36Sopenharmony_ci
581862306a36Sopenharmony_ci	return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
581962306a36Sopenharmony_ci}
582062306a36Sopenharmony_ci
582162306a36Sopenharmony_cistatic int handle_nmi_window(struct kvm_vcpu *vcpu)
582262306a36Sopenharmony_ci{
582362306a36Sopenharmony_ci	if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm))
582462306a36Sopenharmony_ci		return -EIO;
582562306a36Sopenharmony_ci
582662306a36Sopenharmony_ci	exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
582762306a36Sopenharmony_ci	++vcpu->stat.nmi_window_exits;
582862306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_EVENT, vcpu);
582962306a36Sopenharmony_ci
583062306a36Sopenharmony_ci	return 1;
583162306a36Sopenharmony_ci}
583262306a36Sopenharmony_ci
583362306a36Sopenharmony_cistatic bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
583462306a36Sopenharmony_ci{
583562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
583662306a36Sopenharmony_ci
583762306a36Sopenharmony_ci	return vmx->emulation_required && !vmx->rmode.vm86_active &&
583862306a36Sopenharmony_ci	       (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
583962306a36Sopenharmony_ci}
584062306a36Sopenharmony_ci
584162306a36Sopenharmony_cistatic int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
584262306a36Sopenharmony_ci{
584362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
584462306a36Sopenharmony_ci	bool intr_window_requested;
584562306a36Sopenharmony_ci	unsigned count = 130;
584662306a36Sopenharmony_ci
584762306a36Sopenharmony_ci	intr_window_requested = exec_controls_get(vmx) &
584862306a36Sopenharmony_ci				CPU_BASED_INTR_WINDOW_EXITING;
584962306a36Sopenharmony_ci
585062306a36Sopenharmony_ci	while (vmx->emulation_required && count-- != 0) {
585162306a36Sopenharmony_ci		if (intr_window_requested && !vmx_interrupt_blocked(vcpu))
585262306a36Sopenharmony_ci			return handle_interrupt_window(&vmx->vcpu);
585362306a36Sopenharmony_ci
585462306a36Sopenharmony_ci		if (kvm_test_request(KVM_REQ_EVENT, vcpu))
585562306a36Sopenharmony_ci			return 1;
585662306a36Sopenharmony_ci
585762306a36Sopenharmony_ci		if (!kvm_emulate_instruction(vcpu, 0))
585862306a36Sopenharmony_ci			return 0;
585962306a36Sopenharmony_ci
586062306a36Sopenharmony_ci		if (vmx_emulation_required_with_pending_exception(vcpu)) {
586162306a36Sopenharmony_ci			kvm_prepare_emulation_failure_exit(vcpu);
586262306a36Sopenharmony_ci			return 0;
586362306a36Sopenharmony_ci		}
586462306a36Sopenharmony_ci
586562306a36Sopenharmony_ci		if (vcpu->arch.halt_request) {
586662306a36Sopenharmony_ci			vcpu->arch.halt_request = 0;
586762306a36Sopenharmony_ci			return kvm_emulate_halt_noskip(vcpu);
586862306a36Sopenharmony_ci		}
586962306a36Sopenharmony_ci
587062306a36Sopenharmony_ci		/*
587162306a36Sopenharmony_ci		 * Note, return 1 and not 0, vcpu_run() will invoke
587262306a36Sopenharmony_ci		 * xfer_to_guest_mode() which will create a proper return
587362306a36Sopenharmony_ci		 * code.
587462306a36Sopenharmony_ci		 */
587562306a36Sopenharmony_ci		if (__xfer_to_guest_mode_work_pending())
587662306a36Sopenharmony_ci			return 1;
587762306a36Sopenharmony_ci	}
587862306a36Sopenharmony_ci
587962306a36Sopenharmony_ci	return 1;
588062306a36Sopenharmony_ci}
588162306a36Sopenharmony_ci
588262306a36Sopenharmony_cistatic int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
588362306a36Sopenharmony_ci{
588462306a36Sopenharmony_ci	if (vmx_emulation_required_with_pending_exception(vcpu)) {
588562306a36Sopenharmony_ci		kvm_prepare_emulation_failure_exit(vcpu);
588662306a36Sopenharmony_ci		return 0;
588762306a36Sopenharmony_ci	}
588862306a36Sopenharmony_ci
588962306a36Sopenharmony_ci	return 1;
589062306a36Sopenharmony_ci}
589162306a36Sopenharmony_ci
589262306a36Sopenharmony_cistatic void grow_ple_window(struct kvm_vcpu *vcpu)
589362306a36Sopenharmony_ci{
589462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
589562306a36Sopenharmony_ci	unsigned int old = vmx->ple_window;
589662306a36Sopenharmony_ci
589762306a36Sopenharmony_ci	vmx->ple_window = __grow_ple_window(old, ple_window,
589862306a36Sopenharmony_ci					    ple_window_grow,
589962306a36Sopenharmony_ci					    ple_window_max);
590062306a36Sopenharmony_ci
590162306a36Sopenharmony_ci	if (vmx->ple_window != old) {
590262306a36Sopenharmony_ci		vmx->ple_window_dirty = true;
590362306a36Sopenharmony_ci		trace_kvm_ple_window_update(vcpu->vcpu_id,
590462306a36Sopenharmony_ci					    vmx->ple_window, old);
590562306a36Sopenharmony_ci	}
590662306a36Sopenharmony_ci}
590762306a36Sopenharmony_ci
590862306a36Sopenharmony_cistatic void shrink_ple_window(struct kvm_vcpu *vcpu)
590962306a36Sopenharmony_ci{
591062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
591162306a36Sopenharmony_ci	unsigned int old = vmx->ple_window;
591262306a36Sopenharmony_ci
591362306a36Sopenharmony_ci	vmx->ple_window = __shrink_ple_window(old, ple_window,
591462306a36Sopenharmony_ci					      ple_window_shrink,
591562306a36Sopenharmony_ci					      ple_window);
591662306a36Sopenharmony_ci
591762306a36Sopenharmony_ci	if (vmx->ple_window != old) {
591862306a36Sopenharmony_ci		vmx->ple_window_dirty = true;
591962306a36Sopenharmony_ci		trace_kvm_ple_window_update(vcpu->vcpu_id,
592062306a36Sopenharmony_ci					    vmx->ple_window, old);
592162306a36Sopenharmony_ci	}
592262306a36Sopenharmony_ci}
592362306a36Sopenharmony_ci
592462306a36Sopenharmony_ci/*
592562306a36Sopenharmony_ci * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
592662306a36Sopenharmony_ci * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
592762306a36Sopenharmony_ci */
592862306a36Sopenharmony_cistatic int handle_pause(struct kvm_vcpu *vcpu)
592962306a36Sopenharmony_ci{
593062306a36Sopenharmony_ci	if (!kvm_pause_in_guest(vcpu->kvm))
593162306a36Sopenharmony_ci		grow_ple_window(vcpu);
593262306a36Sopenharmony_ci
593362306a36Sopenharmony_ci	/*
593462306a36Sopenharmony_ci	 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
593562306a36Sopenharmony_ci	 * VM-execution control is ignored if CPL > 0. OTOH, KVM
593662306a36Sopenharmony_ci	 * never set PAUSE_EXITING and just set PLE if supported,
593762306a36Sopenharmony_ci	 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
593862306a36Sopenharmony_ci	 */
593962306a36Sopenharmony_ci	kvm_vcpu_on_spin(vcpu, true);
594062306a36Sopenharmony_ci	return kvm_skip_emulated_instruction(vcpu);
594162306a36Sopenharmony_ci}
594262306a36Sopenharmony_ci
594362306a36Sopenharmony_cistatic int handle_monitor_trap(struct kvm_vcpu *vcpu)
594462306a36Sopenharmony_ci{
594562306a36Sopenharmony_ci	return 1;
594662306a36Sopenharmony_ci}
594762306a36Sopenharmony_ci
594862306a36Sopenharmony_cistatic int handle_invpcid(struct kvm_vcpu *vcpu)
594962306a36Sopenharmony_ci{
595062306a36Sopenharmony_ci	u32 vmx_instruction_info;
595162306a36Sopenharmony_ci	unsigned long type;
595262306a36Sopenharmony_ci	gva_t gva;
595362306a36Sopenharmony_ci	struct {
595462306a36Sopenharmony_ci		u64 pcid;
595562306a36Sopenharmony_ci		u64 gla;
595662306a36Sopenharmony_ci	} operand;
595762306a36Sopenharmony_ci	int gpr_index;
595862306a36Sopenharmony_ci
595962306a36Sopenharmony_ci	if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
596062306a36Sopenharmony_ci		kvm_queue_exception(vcpu, UD_VECTOR);
596162306a36Sopenharmony_ci		return 1;
596262306a36Sopenharmony_ci	}
596362306a36Sopenharmony_ci
596462306a36Sopenharmony_ci	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
596562306a36Sopenharmony_ci	gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
596662306a36Sopenharmony_ci	type = kvm_register_read(vcpu, gpr_index);
596762306a36Sopenharmony_ci
596862306a36Sopenharmony_ci	/* According to the Intel instruction reference, the memory operand
596962306a36Sopenharmony_ci	 * is read even if it isn't needed (e.g., for type==all)
597062306a36Sopenharmony_ci	 */
597162306a36Sopenharmony_ci	if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
597262306a36Sopenharmony_ci				vmx_instruction_info, false,
597362306a36Sopenharmony_ci				sizeof(operand), &gva))
597462306a36Sopenharmony_ci		return 1;
597562306a36Sopenharmony_ci
597662306a36Sopenharmony_ci	return kvm_handle_invpcid(vcpu, type, gva);
597762306a36Sopenharmony_ci}
597862306a36Sopenharmony_ci
597962306a36Sopenharmony_cistatic int handle_pml_full(struct kvm_vcpu *vcpu)
598062306a36Sopenharmony_ci{
598162306a36Sopenharmony_ci	unsigned long exit_qualification;
598262306a36Sopenharmony_ci
598362306a36Sopenharmony_ci	trace_kvm_pml_full(vcpu->vcpu_id);
598462306a36Sopenharmony_ci
598562306a36Sopenharmony_ci	exit_qualification = vmx_get_exit_qual(vcpu);
598662306a36Sopenharmony_ci
598762306a36Sopenharmony_ci	/*
598862306a36Sopenharmony_ci	 * PML buffer FULL happened while executing iret from NMI,
598962306a36Sopenharmony_ci	 * "blocked by NMI" bit has to be set before next VM entry.
599062306a36Sopenharmony_ci	 */
599162306a36Sopenharmony_ci	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
599262306a36Sopenharmony_ci			enable_vnmi &&
599362306a36Sopenharmony_ci			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
599462306a36Sopenharmony_ci		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
599562306a36Sopenharmony_ci				GUEST_INTR_STATE_NMI);
599662306a36Sopenharmony_ci
599762306a36Sopenharmony_ci	/*
599862306a36Sopenharmony_ci	 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
599962306a36Sopenharmony_ci	 * here.., and there's no userspace involvement needed for PML.
600062306a36Sopenharmony_ci	 */
600162306a36Sopenharmony_ci	return 1;
600262306a36Sopenharmony_ci}
600362306a36Sopenharmony_ci
600462306a36Sopenharmony_cistatic fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
600562306a36Sopenharmony_ci{
600662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
600762306a36Sopenharmony_ci
600862306a36Sopenharmony_ci	if (!vmx->req_immediate_exit &&
600962306a36Sopenharmony_ci	    !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
601062306a36Sopenharmony_ci		kvm_lapic_expired_hv_timer(vcpu);
601162306a36Sopenharmony_ci		return EXIT_FASTPATH_REENTER_GUEST;
601262306a36Sopenharmony_ci	}
601362306a36Sopenharmony_ci
601462306a36Sopenharmony_ci	return EXIT_FASTPATH_NONE;
601562306a36Sopenharmony_ci}
601662306a36Sopenharmony_ci
601762306a36Sopenharmony_cistatic int handle_preemption_timer(struct kvm_vcpu *vcpu)
601862306a36Sopenharmony_ci{
601962306a36Sopenharmony_ci	handle_fastpath_preemption_timer(vcpu);
602062306a36Sopenharmony_ci	return 1;
602162306a36Sopenharmony_ci}
602262306a36Sopenharmony_ci
602362306a36Sopenharmony_ci/*
602462306a36Sopenharmony_ci * When nested=0, all VMX instruction VM Exits filter here.  The handlers
602562306a36Sopenharmony_ci * are overwritten by nested_vmx_setup() when nested=1.
602662306a36Sopenharmony_ci */
602762306a36Sopenharmony_cistatic int handle_vmx_instruction(struct kvm_vcpu *vcpu)
602862306a36Sopenharmony_ci{
602962306a36Sopenharmony_ci	kvm_queue_exception(vcpu, UD_VECTOR);
603062306a36Sopenharmony_ci	return 1;
603162306a36Sopenharmony_ci}
603262306a36Sopenharmony_ci
603362306a36Sopenharmony_ci#ifndef CONFIG_X86_SGX_KVM
603462306a36Sopenharmony_cistatic int handle_encls(struct kvm_vcpu *vcpu)
603562306a36Sopenharmony_ci{
603662306a36Sopenharmony_ci	/*
603762306a36Sopenharmony_ci	 * SGX virtualization is disabled.  There is no software enable bit for
603862306a36Sopenharmony_ci	 * SGX, so KVM intercepts all ENCLS leafs and injects a #UD to prevent
603962306a36Sopenharmony_ci	 * the guest from executing ENCLS (when SGX is supported by hardware).
604062306a36Sopenharmony_ci	 */
604162306a36Sopenharmony_ci	kvm_queue_exception(vcpu, UD_VECTOR);
604262306a36Sopenharmony_ci	return 1;
604362306a36Sopenharmony_ci}
604462306a36Sopenharmony_ci#endif /* CONFIG_X86_SGX_KVM */
604562306a36Sopenharmony_ci
604662306a36Sopenharmony_cistatic int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
604762306a36Sopenharmony_ci{
604862306a36Sopenharmony_ci	/*
604962306a36Sopenharmony_ci	 * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK
605062306a36Sopenharmony_ci	 * VM-Exits. Unconditionally set the flag here and leave the handling to
605162306a36Sopenharmony_ci	 * vmx_handle_exit().
605262306a36Sopenharmony_ci	 */
605362306a36Sopenharmony_ci	to_vmx(vcpu)->exit_reason.bus_lock_detected = true;
605462306a36Sopenharmony_ci	return 1;
605562306a36Sopenharmony_ci}
605662306a36Sopenharmony_ci
605762306a36Sopenharmony_cistatic int handle_notify(struct kvm_vcpu *vcpu)
605862306a36Sopenharmony_ci{
605962306a36Sopenharmony_ci	unsigned long exit_qual = vmx_get_exit_qual(vcpu);
606062306a36Sopenharmony_ci	bool context_invalid = exit_qual & NOTIFY_VM_CONTEXT_INVALID;
606162306a36Sopenharmony_ci
606262306a36Sopenharmony_ci	++vcpu->stat.notify_window_exits;
606362306a36Sopenharmony_ci
606462306a36Sopenharmony_ci	/*
606562306a36Sopenharmony_ci	 * Notify VM exit happened while executing iret from NMI,
606662306a36Sopenharmony_ci	 * "blocked by NMI" bit has to be set before next VM entry.
606762306a36Sopenharmony_ci	 */
606862306a36Sopenharmony_ci	if (enable_vnmi && (exit_qual & INTR_INFO_UNBLOCK_NMI))
606962306a36Sopenharmony_ci		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
607062306a36Sopenharmony_ci			      GUEST_INTR_STATE_NMI);
607162306a36Sopenharmony_ci
607262306a36Sopenharmony_ci	if (vcpu->kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_USER ||
607362306a36Sopenharmony_ci	    context_invalid) {
607462306a36Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_NOTIFY;
607562306a36Sopenharmony_ci		vcpu->run->notify.flags = context_invalid ?
607662306a36Sopenharmony_ci					  KVM_NOTIFY_CONTEXT_INVALID : 0;
607762306a36Sopenharmony_ci		return 0;
607862306a36Sopenharmony_ci	}
607962306a36Sopenharmony_ci
608062306a36Sopenharmony_ci	return 1;
608162306a36Sopenharmony_ci}
608262306a36Sopenharmony_ci
608362306a36Sopenharmony_ci/*
608462306a36Sopenharmony_ci * The exit handlers return 1 if the exit was handled fully and guest execution
608562306a36Sopenharmony_ci * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
608662306a36Sopenharmony_ci * to be done to userspace and return 0.
608762306a36Sopenharmony_ci */
608862306a36Sopenharmony_cistatic int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
608962306a36Sopenharmony_ci	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception_nmi,
609062306a36Sopenharmony_ci	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
609162306a36Sopenharmony_ci	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
609262306a36Sopenharmony_ci	[EXIT_REASON_NMI_WINDOW]	      = handle_nmi_window,
609362306a36Sopenharmony_ci	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,
609462306a36Sopenharmony_ci	[EXIT_REASON_CR_ACCESS]               = handle_cr,
609562306a36Sopenharmony_ci	[EXIT_REASON_DR_ACCESS]               = handle_dr,
609662306a36Sopenharmony_ci	[EXIT_REASON_CPUID]                   = kvm_emulate_cpuid,
609762306a36Sopenharmony_ci	[EXIT_REASON_MSR_READ]                = kvm_emulate_rdmsr,
609862306a36Sopenharmony_ci	[EXIT_REASON_MSR_WRITE]               = kvm_emulate_wrmsr,
609962306a36Sopenharmony_ci	[EXIT_REASON_INTERRUPT_WINDOW]        = handle_interrupt_window,
610062306a36Sopenharmony_ci	[EXIT_REASON_HLT]                     = kvm_emulate_halt,
610162306a36Sopenharmony_ci	[EXIT_REASON_INVD]		      = kvm_emulate_invd,
610262306a36Sopenharmony_ci	[EXIT_REASON_INVLPG]		      = handle_invlpg,
610362306a36Sopenharmony_ci	[EXIT_REASON_RDPMC]                   = kvm_emulate_rdpmc,
610462306a36Sopenharmony_ci	[EXIT_REASON_VMCALL]                  = kvm_emulate_hypercall,
610562306a36Sopenharmony_ci	[EXIT_REASON_VMCLEAR]		      = handle_vmx_instruction,
610662306a36Sopenharmony_ci	[EXIT_REASON_VMLAUNCH]		      = handle_vmx_instruction,
610762306a36Sopenharmony_ci	[EXIT_REASON_VMPTRLD]		      = handle_vmx_instruction,
610862306a36Sopenharmony_ci	[EXIT_REASON_VMPTRST]		      = handle_vmx_instruction,
610962306a36Sopenharmony_ci	[EXIT_REASON_VMREAD]		      = handle_vmx_instruction,
611062306a36Sopenharmony_ci	[EXIT_REASON_VMRESUME]		      = handle_vmx_instruction,
611162306a36Sopenharmony_ci	[EXIT_REASON_VMWRITE]		      = handle_vmx_instruction,
611262306a36Sopenharmony_ci	[EXIT_REASON_VMOFF]		      = handle_vmx_instruction,
611362306a36Sopenharmony_ci	[EXIT_REASON_VMON]		      = handle_vmx_instruction,
611462306a36Sopenharmony_ci	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
611562306a36Sopenharmony_ci	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
611662306a36Sopenharmony_ci	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
611762306a36Sopenharmony_ci	[EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
611862306a36Sopenharmony_ci	[EXIT_REASON_WBINVD]                  = kvm_emulate_wbinvd,
611962306a36Sopenharmony_ci	[EXIT_REASON_XSETBV]                  = kvm_emulate_xsetbv,
612062306a36Sopenharmony_ci	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
612162306a36Sopenharmony_ci	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
612262306a36Sopenharmony_ci	[EXIT_REASON_GDTR_IDTR]		      = handle_desc,
612362306a36Sopenharmony_ci	[EXIT_REASON_LDTR_TR]		      = handle_desc,
612462306a36Sopenharmony_ci	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
612562306a36Sopenharmony_ci	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
612662306a36Sopenharmony_ci	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
612762306a36Sopenharmony_ci	[EXIT_REASON_MWAIT_INSTRUCTION]	      = kvm_emulate_mwait,
612862306a36Sopenharmony_ci	[EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
612962306a36Sopenharmony_ci	[EXIT_REASON_MONITOR_INSTRUCTION]     = kvm_emulate_monitor,
613062306a36Sopenharmony_ci	[EXIT_REASON_INVEPT]                  = handle_vmx_instruction,
613162306a36Sopenharmony_ci	[EXIT_REASON_INVVPID]                 = handle_vmx_instruction,
613262306a36Sopenharmony_ci	[EXIT_REASON_RDRAND]                  = kvm_handle_invalid_op,
613362306a36Sopenharmony_ci	[EXIT_REASON_RDSEED]                  = kvm_handle_invalid_op,
613462306a36Sopenharmony_ci	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
613562306a36Sopenharmony_ci	[EXIT_REASON_INVPCID]                 = handle_invpcid,
613662306a36Sopenharmony_ci	[EXIT_REASON_VMFUNC]		      = handle_vmx_instruction,
613762306a36Sopenharmony_ci	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
613862306a36Sopenharmony_ci	[EXIT_REASON_ENCLS]		      = handle_encls,
613962306a36Sopenharmony_ci	[EXIT_REASON_BUS_LOCK]                = handle_bus_lock_vmexit,
614062306a36Sopenharmony_ci	[EXIT_REASON_NOTIFY]		      = handle_notify,
614162306a36Sopenharmony_ci};
614262306a36Sopenharmony_ci
614362306a36Sopenharmony_cistatic const int kvm_vmx_max_exit_handlers =
614462306a36Sopenharmony_ci	ARRAY_SIZE(kvm_vmx_exit_handlers);
614562306a36Sopenharmony_ci
614662306a36Sopenharmony_cistatic void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
614762306a36Sopenharmony_ci			      u64 *info1, u64 *info2,
614862306a36Sopenharmony_ci			      u32 *intr_info, u32 *error_code)
614962306a36Sopenharmony_ci{
615062306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
615162306a36Sopenharmony_ci
615262306a36Sopenharmony_ci	*reason = vmx->exit_reason.full;
615362306a36Sopenharmony_ci	*info1 = vmx_get_exit_qual(vcpu);
615462306a36Sopenharmony_ci	if (!(vmx->exit_reason.failed_vmentry)) {
615562306a36Sopenharmony_ci		*info2 = vmx->idt_vectoring_info;
615662306a36Sopenharmony_ci		*intr_info = vmx_get_intr_info(vcpu);
615762306a36Sopenharmony_ci		if (is_exception_with_error_code(*intr_info))
615862306a36Sopenharmony_ci			*error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
615962306a36Sopenharmony_ci		else
616062306a36Sopenharmony_ci			*error_code = 0;
616162306a36Sopenharmony_ci	} else {
616262306a36Sopenharmony_ci		*info2 = 0;
616362306a36Sopenharmony_ci		*intr_info = 0;
616462306a36Sopenharmony_ci		*error_code = 0;
616562306a36Sopenharmony_ci	}
616662306a36Sopenharmony_ci}
616762306a36Sopenharmony_ci
616862306a36Sopenharmony_cistatic void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
616962306a36Sopenharmony_ci{
617062306a36Sopenharmony_ci	if (vmx->pml_pg) {
617162306a36Sopenharmony_ci		__free_page(vmx->pml_pg);
617262306a36Sopenharmony_ci		vmx->pml_pg = NULL;
617362306a36Sopenharmony_ci	}
617462306a36Sopenharmony_ci}
617562306a36Sopenharmony_ci
617662306a36Sopenharmony_cistatic void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
617762306a36Sopenharmony_ci{
617862306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
617962306a36Sopenharmony_ci	u64 *pml_buf;
618062306a36Sopenharmony_ci	u16 pml_idx;
618162306a36Sopenharmony_ci
618262306a36Sopenharmony_ci	pml_idx = vmcs_read16(GUEST_PML_INDEX);
618362306a36Sopenharmony_ci
618462306a36Sopenharmony_ci	/* Do nothing if PML buffer is empty */
618562306a36Sopenharmony_ci	if (pml_idx == (PML_ENTITY_NUM - 1))
618662306a36Sopenharmony_ci		return;
618762306a36Sopenharmony_ci
618862306a36Sopenharmony_ci	/* PML index always points to next available PML buffer entity */
618962306a36Sopenharmony_ci	if (pml_idx >= PML_ENTITY_NUM)
619062306a36Sopenharmony_ci		pml_idx = 0;
619162306a36Sopenharmony_ci	else
619262306a36Sopenharmony_ci		pml_idx++;
619362306a36Sopenharmony_ci
619462306a36Sopenharmony_ci	pml_buf = page_address(vmx->pml_pg);
619562306a36Sopenharmony_ci	for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
619662306a36Sopenharmony_ci		u64 gpa;
619762306a36Sopenharmony_ci
619862306a36Sopenharmony_ci		gpa = pml_buf[pml_idx];
619962306a36Sopenharmony_ci		WARN_ON(gpa & (PAGE_SIZE - 1));
620062306a36Sopenharmony_ci		kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
620162306a36Sopenharmony_ci	}
620262306a36Sopenharmony_ci
620362306a36Sopenharmony_ci	/* reset PML index */
620462306a36Sopenharmony_ci	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
620562306a36Sopenharmony_ci}
620662306a36Sopenharmony_ci
620762306a36Sopenharmony_cistatic void vmx_dump_sel(char *name, uint32_t sel)
620862306a36Sopenharmony_ci{
620962306a36Sopenharmony_ci	pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
621062306a36Sopenharmony_ci	       name, vmcs_read16(sel),
621162306a36Sopenharmony_ci	       vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
621262306a36Sopenharmony_ci	       vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
621362306a36Sopenharmony_ci	       vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
621462306a36Sopenharmony_ci}
621562306a36Sopenharmony_ci
621662306a36Sopenharmony_cistatic void vmx_dump_dtsel(char *name, uint32_t limit)
621762306a36Sopenharmony_ci{
621862306a36Sopenharmony_ci	pr_err("%s                           limit=0x%08x, base=0x%016lx\n",
621962306a36Sopenharmony_ci	       name, vmcs_read32(limit),
622062306a36Sopenharmony_ci	       vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
622162306a36Sopenharmony_ci}
622262306a36Sopenharmony_ci
622362306a36Sopenharmony_cistatic void vmx_dump_msrs(char *name, struct vmx_msrs *m)
622462306a36Sopenharmony_ci{
622562306a36Sopenharmony_ci	unsigned int i;
622662306a36Sopenharmony_ci	struct vmx_msr_entry *e;
622762306a36Sopenharmony_ci
622862306a36Sopenharmony_ci	pr_err("MSR %s:\n", name);
622962306a36Sopenharmony_ci	for (i = 0, e = m->val; i < m->nr; ++i, ++e)
623062306a36Sopenharmony_ci		pr_err("  %2d: msr=0x%08x value=0x%016llx\n", i, e->index, e->value);
623162306a36Sopenharmony_ci}
623262306a36Sopenharmony_ci
623362306a36Sopenharmony_civoid dump_vmcs(struct kvm_vcpu *vcpu)
623462306a36Sopenharmony_ci{
623562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
623662306a36Sopenharmony_ci	u32 vmentry_ctl, vmexit_ctl;
623762306a36Sopenharmony_ci	u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
623862306a36Sopenharmony_ci	u64 tertiary_exec_control;
623962306a36Sopenharmony_ci	unsigned long cr4;
624062306a36Sopenharmony_ci	int efer_slot;
624162306a36Sopenharmony_ci
624262306a36Sopenharmony_ci	if (!dump_invalid_vmcs) {
624362306a36Sopenharmony_ci		pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
624462306a36Sopenharmony_ci		return;
624562306a36Sopenharmony_ci	}
624662306a36Sopenharmony_ci
624762306a36Sopenharmony_ci	vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
624862306a36Sopenharmony_ci	vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
624962306a36Sopenharmony_ci	cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
625062306a36Sopenharmony_ci	pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
625162306a36Sopenharmony_ci	cr4 = vmcs_readl(GUEST_CR4);
625262306a36Sopenharmony_ci
625362306a36Sopenharmony_ci	if (cpu_has_secondary_exec_ctrls())
625462306a36Sopenharmony_ci		secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
625562306a36Sopenharmony_ci	else
625662306a36Sopenharmony_ci		secondary_exec_control = 0;
625762306a36Sopenharmony_ci
625862306a36Sopenharmony_ci	if (cpu_has_tertiary_exec_ctrls())
625962306a36Sopenharmony_ci		tertiary_exec_control = vmcs_read64(TERTIARY_VM_EXEC_CONTROL);
626062306a36Sopenharmony_ci	else
626162306a36Sopenharmony_ci		tertiary_exec_control = 0;
626262306a36Sopenharmony_ci
626362306a36Sopenharmony_ci	pr_err("VMCS %p, last attempted VM-entry on CPU %d\n",
626462306a36Sopenharmony_ci	       vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu);
626562306a36Sopenharmony_ci	pr_err("*** Guest State ***\n");
626662306a36Sopenharmony_ci	pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
626762306a36Sopenharmony_ci	       vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
626862306a36Sopenharmony_ci	       vmcs_readl(CR0_GUEST_HOST_MASK));
626962306a36Sopenharmony_ci	pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
627062306a36Sopenharmony_ci	       cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
627162306a36Sopenharmony_ci	pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
627262306a36Sopenharmony_ci	if (cpu_has_vmx_ept()) {
627362306a36Sopenharmony_ci		pr_err("PDPTR0 = 0x%016llx  PDPTR1 = 0x%016llx\n",
627462306a36Sopenharmony_ci		       vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
627562306a36Sopenharmony_ci		pr_err("PDPTR2 = 0x%016llx  PDPTR3 = 0x%016llx\n",
627662306a36Sopenharmony_ci		       vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
627762306a36Sopenharmony_ci	}
627862306a36Sopenharmony_ci	pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
627962306a36Sopenharmony_ci	       vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
628062306a36Sopenharmony_ci	pr_err("RFLAGS=0x%08lx         DR7 = 0x%016lx\n",
628162306a36Sopenharmony_ci	       vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
628262306a36Sopenharmony_ci	pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
628362306a36Sopenharmony_ci	       vmcs_readl(GUEST_SYSENTER_ESP),
628462306a36Sopenharmony_ci	       vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
628562306a36Sopenharmony_ci	vmx_dump_sel("CS:  ", GUEST_CS_SELECTOR);
628662306a36Sopenharmony_ci	vmx_dump_sel("DS:  ", GUEST_DS_SELECTOR);
628762306a36Sopenharmony_ci	vmx_dump_sel("SS:  ", GUEST_SS_SELECTOR);
628862306a36Sopenharmony_ci	vmx_dump_sel("ES:  ", GUEST_ES_SELECTOR);
628962306a36Sopenharmony_ci	vmx_dump_sel("FS:  ", GUEST_FS_SELECTOR);
629062306a36Sopenharmony_ci	vmx_dump_sel("GS:  ", GUEST_GS_SELECTOR);
629162306a36Sopenharmony_ci	vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
629262306a36Sopenharmony_ci	vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
629362306a36Sopenharmony_ci	vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
629462306a36Sopenharmony_ci	vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
629562306a36Sopenharmony_ci	efer_slot = vmx_find_loadstore_msr_slot(&vmx->msr_autoload.guest, MSR_EFER);
629662306a36Sopenharmony_ci	if (vmentry_ctl & VM_ENTRY_LOAD_IA32_EFER)
629762306a36Sopenharmony_ci		pr_err("EFER= 0x%016llx\n", vmcs_read64(GUEST_IA32_EFER));
629862306a36Sopenharmony_ci	else if (efer_slot >= 0)
629962306a36Sopenharmony_ci		pr_err("EFER= 0x%016llx (autoload)\n",
630062306a36Sopenharmony_ci		       vmx->msr_autoload.guest.val[efer_slot].value);
630162306a36Sopenharmony_ci	else if (vmentry_ctl & VM_ENTRY_IA32E_MODE)
630262306a36Sopenharmony_ci		pr_err("EFER= 0x%016llx (effective)\n",
630362306a36Sopenharmony_ci		       vcpu->arch.efer | (EFER_LMA | EFER_LME));
630462306a36Sopenharmony_ci	else
630562306a36Sopenharmony_ci		pr_err("EFER= 0x%016llx (effective)\n",
630662306a36Sopenharmony_ci		       vcpu->arch.efer & ~(EFER_LMA | EFER_LME));
630762306a36Sopenharmony_ci	if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PAT)
630862306a36Sopenharmony_ci		pr_err("PAT = 0x%016llx\n", vmcs_read64(GUEST_IA32_PAT));
630962306a36Sopenharmony_ci	pr_err("DebugCtl = 0x%016llx  DebugExceptions = 0x%016lx\n",
631062306a36Sopenharmony_ci	       vmcs_read64(GUEST_IA32_DEBUGCTL),
631162306a36Sopenharmony_ci	       vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
631262306a36Sopenharmony_ci	if (cpu_has_load_perf_global_ctrl() &&
631362306a36Sopenharmony_ci	    vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
631462306a36Sopenharmony_ci		pr_err("PerfGlobCtl = 0x%016llx\n",
631562306a36Sopenharmony_ci		       vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
631662306a36Sopenharmony_ci	if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
631762306a36Sopenharmony_ci		pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
631862306a36Sopenharmony_ci	pr_err("Interruptibility = %08x  ActivityState = %08x\n",
631962306a36Sopenharmony_ci	       vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
632062306a36Sopenharmony_ci	       vmcs_read32(GUEST_ACTIVITY_STATE));
632162306a36Sopenharmony_ci	if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
632262306a36Sopenharmony_ci		pr_err("InterruptStatus = %04x\n",
632362306a36Sopenharmony_ci		       vmcs_read16(GUEST_INTR_STATUS));
632462306a36Sopenharmony_ci	if (vmcs_read32(VM_ENTRY_MSR_LOAD_COUNT) > 0)
632562306a36Sopenharmony_ci		vmx_dump_msrs("guest autoload", &vmx->msr_autoload.guest);
632662306a36Sopenharmony_ci	if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
632762306a36Sopenharmony_ci		vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
632862306a36Sopenharmony_ci
632962306a36Sopenharmony_ci	pr_err("*** Host State ***\n");
633062306a36Sopenharmony_ci	pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
633162306a36Sopenharmony_ci	       vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
633262306a36Sopenharmony_ci	pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
633362306a36Sopenharmony_ci	       vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
633462306a36Sopenharmony_ci	       vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
633562306a36Sopenharmony_ci	       vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
633662306a36Sopenharmony_ci	       vmcs_read16(HOST_TR_SELECTOR));
633762306a36Sopenharmony_ci	pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
633862306a36Sopenharmony_ci	       vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
633962306a36Sopenharmony_ci	       vmcs_readl(HOST_TR_BASE));
634062306a36Sopenharmony_ci	pr_err("GDTBase=%016lx IDTBase=%016lx\n",
634162306a36Sopenharmony_ci	       vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
634262306a36Sopenharmony_ci	pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
634362306a36Sopenharmony_ci	       vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
634462306a36Sopenharmony_ci	       vmcs_readl(HOST_CR4));
634562306a36Sopenharmony_ci	pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
634662306a36Sopenharmony_ci	       vmcs_readl(HOST_IA32_SYSENTER_ESP),
634762306a36Sopenharmony_ci	       vmcs_read32(HOST_IA32_SYSENTER_CS),
634862306a36Sopenharmony_ci	       vmcs_readl(HOST_IA32_SYSENTER_EIP));
634962306a36Sopenharmony_ci	if (vmexit_ctl & VM_EXIT_LOAD_IA32_EFER)
635062306a36Sopenharmony_ci		pr_err("EFER= 0x%016llx\n", vmcs_read64(HOST_IA32_EFER));
635162306a36Sopenharmony_ci	if (vmexit_ctl & VM_EXIT_LOAD_IA32_PAT)
635262306a36Sopenharmony_ci		pr_err("PAT = 0x%016llx\n", vmcs_read64(HOST_IA32_PAT));
635362306a36Sopenharmony_ci	if (cpu_has_load_perf_global_ctrl() &&
635462306a36Sopenharmony_ci	    vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
635562306a36Sopenharmony_ci		pr_err("PerfGlobCtl = 0x%016llx\n",
635662306a36Sopenharmony_ci		       vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
635762306a36Sopenharmony_ci	if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0)
635862306a36Sopenharmony_ci		vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
635962306a36Sopenharmony_ci
636062306a36Sopenharmony_ci	pr_err("*** Control State ***\n");
636162306a36Sopenharmony_ci	pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
636262306a36Sopenharmony_ci	       cpu_based_exec_ctrl, secondary_exec_control, tertiary_exec_control);
636362306a36Sopenharmony_ci	pr_err("PinBased=0x%08x EntryControls=%08x ExitControls=%08x\n",
636462306a36Sopenharmony_ci	       pin_based_exec_ctrl, vmentry_ctl, vmexit_ctl);
636562306a36Sopenharmony_ci	pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
636662306a36Sopenharmony_ci	       vmcs_read32(EXCEPTION_BITMAP),
636762306a36Sopenharmony_ci	       vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
636862306a36Sopenharmony_ci	       vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
636962306a36Sopenharmony_ci	pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
637062306a36Sopenharmony_ci	       vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
637162306a36Sopenharmony_ci	       vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
637262306a36Sopenharmony_ci	       vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
637362306a36Sopenharmony_ci	pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
637462306a36Sopenharmony_ci	       vmcs_read32(VM_EXIT_INTR_INFO),
637562306a36Sopenharmony_ci	       vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
637662306a36Sopenharmony_ci	       vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
637762306a36Sopenharmony_ci	pr_err("        reason=%08x qualification=%016lx\n",
637862306a36Sopenharmony_ci	       vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
637962306a36Sopenharmony_ci	pr_err("IDTVectoring: info=%08x errcode=%08x\n",
638062306a36Sopenharmony_ci	       vmcs_read32(IDT_VECTORING_INFO_FIELD),
638162306a36Sopenharmony_ci	       vmcs_read32(IDT_VECTORING_ERROR_CODE));
638262306a36Sopenharmony_ci	pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
638362306a36Sopenharmony_ci	if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
638462306a36Sopenharmony_ci		pr_err("TSC Multiplier = 0x%016llx\n",
638562306a36Sopenharmony_ci		       vmcs_read64(TSC_MULTIPLIER));
638662306a36Sopenharmony_ci	if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
638762306a36Sopenharmony_ci		if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
638862306a36Sopenharmony_ci			u16 status = vmcs_read16(GUEST_INTR_STATUS);
638962306a36Sopenharmony_ci			pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
639062306a36Sopenharmony_ci		}
639162306a36Sopenharmony_ci		pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
639262306a36Sopenharmony_ci		if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
639362306a36Sopenharmony_ci			pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
639462306a36Sopenharmony_ci		pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
639562306a36Sopenharmony_ci	}
639662306a36Sopenharmony_ci	if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
639762306a36Sopenharmony_ci		pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
639862306a36Sopenharmony_ci	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
639962306a36Sopenharmony_ci		pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
640062306a36Sopenharmony_ci	if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
640162306a36Sopenharmony_ci		pr_err("PLE Gap=%08x Window=%08x\n",
640262306a36Sopenharmony_ci		       vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
640362306a36Sopenharmony_ci	if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
640462306a36Sopenharmony_ci		pr_err("Virtual processor ID = 0x%04x\n",
640562306a36Sopenharmony_ci		       vmcs_read16(VIRTUAL_PROCESSOR_ID));
640662306a36Sopenharmony_ci}
640762306a36Sopenharmony_ci
640862306a36Sopenharmony_ci/*
640962306a36Sopenharmony_ci * The guest has exited.  See if we can fix it or if we need userspace
641062306a36Sopenharmony_ci * assistance.
641162306a36Sopenharmony_ci */
641262306a36Sopenharmony_cistatic int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
641362306a36Sopenharmony_ci{
641462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
641562306a36Sopenharmony_ci	union vmx_exit_reason exit_reason = vmx->exit_reason;
641662306a36Sopenharmony_ci	u32 vectoring_info = vmx->idt_vectoring_info;
641762306a36Sopenharmony_ci	u16 exit_handler_index;
641862306a36Sopenharmony_ci
641962306a36Sopenharmony_ci	/*
642062306a36Sopenharmony_ci	 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
642162306a36Sopenharmony_ci	 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
642262306a36Sopenharmony_ci	 * querying dirty_bitmap, we only need to kick all vcpus out of guest
642362306a36Sopenharmony_ci	 * mode as if vcpus is in root mode, the PML buffer must has been
642462306a36Sopenharmony_ci	 * flushed already.  Note, PML is never enabled in hardware while
642562306a36Sopenharmony_ci	 * running L2.
642662306a36Sopenharmony_ci	 */
642762306a36Sopenharmony_ci	if (enable_pml && !is_guest_mode(vcpu))
642862306a36Sopenharmony_ci		vmx_flush_pml_buffer(vcpu);
642962306a36Sopenharmony_ci
643062306a36Sopenharmony_ci	/*
643162306a36Sopenharmony_ci	 * KVM should never reach this point with a pending nested VM-Enter.
643262306a36Sopenharmony_ci	 * More specifically, short-circuiting VM-Entry to emulate L2 due to
643362306a36Sopenharmony_ci	 * invalid guest state should never happen as that means KVM knowingly
643462306a36Sopenharmony_ci	 * allowed a nested VM-Enter with an invalid vmcs12.  More below.
643562306a36Sopenharmony_ci	 */
643662306a36Sopenharmony_ci	if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
643762306a36Sopenharmony_ci		return -EIO;
643862306a36Sopenharmony_ci
643962306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
644062306a36Sopenharmony_ci		/*
644162306a36Sopenharmony_ci		 * PML is never enabled when running L2, bail immediately if a
644262306a36Sopenharmony_ci		 * PML full exit occurs as something is horribly wrong.
644362306a36Sopenharmony_ci		 */
644462306a36Sopenharmony_ci		if (exit_reason.basic == EXIT_REASON_PML_FULL)
644562306a36Sopenharmony_ci			goto unexpected_vmexit;
644662306a36Sopenharmony_ci
644762306a36Sopenharmony_ci		/*
644862306a36Sopenharmony_ci		 * The host physical addresses of some pages of guest memory
644962306a36Sopenharmony_ci		 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
645062306a36Sopenharmony_ci		 * Page). The CPU may write to these pages via their host
645162306a36Sopenharmony_ci		 * physical address while L2 is running, bypassing any
645262306a36Sopenharmony_ci		 * address-translation-based dirty tracking (e.g. EPT write
645362306a36Sopenharmony_ci		 * protection).
645462306a36Sopenharmony_ci		 *
645562306a36Sopenharmony_ci		 * Mark them dirty on every exit from L2 to prevent them from
645662306a36Sopenharmony_ci		 * getting out of sync with dirty tracking.
645762306a36Sopenharmony_ci		 */
645862306a36Sopenharmony_ci		nested_mark_vmcs12_pages_dirty(vcpu);
645962306a36Sopenharmony_ci
646062306a36Sopenharmony_ci		/*
646162306a36Sopenharmony_ci		 * Synthesize a triple fault if L2 state is invalid.  In normal
646262306a36Sopenharmony_ci		 * operation, nested VM-Enter rejects any attempt to enter L2
646362306a36Sopenharmony_ci		 * with invalid state.  However, those checks are skipped if
646462306a36Sopenharmony_ci		 * state is being stuffed via RSM or KVM_SET_NESTED_STATE.  If
646562306a36Sopenharmony_ci		 * L2 state is invalid, it means either L1 modified SMRAM state
646662306a36Sopenharmony_ci		 * or userspace provided bad state.  Synthesize TRIPLE_FAULT as
646762306a36Sopenharmony_ci		 * doing so is architecturally allowed in the RSM case, and is
646862306a36Sopenharmony_ci		 * the least awful solution for the userspace case without
646962306a36Sopenharmony_ci		 * risking false positives.
647062306a36Sopenharmony_ci		 */
647162306a36Sopenharmony_ci		if (vmx->emulation_required) {
647262306a36Sopenharmony_ci			nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
647362306a36Sopenharmony_ci			return 1;
647462306a36Sopenharmony_ci		}
647562306a36Sopenharmony_ci
647662306a36Sopenharmony_ci		if (nested_vmx_reflect_vmexit(vcpu))
647762306a36Sopenharmony_ci			return 1;
647862306a36Sopenharmony_ci	}
647962306a36Sopenharmony_ci
648062306a36Sopenharmony_ci	/* If guest state is invalid, start emulating.  L2 is handled above. */
648162306a36Sopenharmony_ci	if (vmx->emulation_required)
648262306a36Sopenharmony_ci		return handle_invalid_guest_state(vcpu);
648362306a36Sopenharmony_ci
648462306a36Sopenharmony_ci	if (exit_reason.failed_vmentry) {
648562306a36Sopenharmony_ci		dump_vmcs(vcpu);
648662306a36Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
648762306a36Sopenharmony_ci		vcpu->run->fail_entry.hardware_entry_failure_reason
648862306a36Sopenharmony_ci			= exit_reason.full;
648962306a36Sopenharmony_ci		vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
649062306a36Sopenharmony_ci		return 0;
649162306a36Sopenharmony_ci	}
649262306a36Sopenharmony_ci
649362306a36Sopenharmony_ci	if (unlikely(vmx->fail)) {
649462306a36Sopenharmony_ci		dump_vmcs(vcpu);
649562306a36Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
649662306a36Sopenharmony_ci		vcpu->run->fail_entry.hardware_entry_failure_reason
649762306a36Sopenharmony_ci			= vmcs_read32(VM_INSTRUCTION_ERROR);
649862306a36Sopenharmony_ci		vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
649962306a36Sopenharmony_ci		return 0;
650062306a36Sopenharmony_ci	}
650162306a36Sopenharmony_ci
650262306a36Sopenharmony_ci	/*
650362306a36Sopenharmony_ci	 * Note:
650462306a36Sopenharmony_ci	 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
650562306a36Sopenharmony_ci	 * delivery event since it indicates guest is accessing MMIO.
650662306a36Sopenharmony_ci	 * The vm-exit can be triggered again after return to guest that
650762306a36Sopenharmony_ci	 * will cause infinite loop.
650862306a36Sopenharmony_ci	 */
650962306a36Sopenharmony_ci	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
651062306a36Sopenharmony_ci	    (exit_reason.basic != EXIT_REASON_EXCEPTION_NMI &&
651162306a36Sopenharmony_ci	     exit_reason.basic != EXIT_REASON_EPT_VIOLATION &&
651262306a36Sopenharmony_ci	     exit_reason.basic != EXIT_REASON_PML_FULL &&
651362306a36Sopenharmony_ci	     exit_reason.basic != EXIT_REASON_APIC_ACCESS &&
651462306a36Sopenharmony_ci	     exit_reason.basic != EXIT_REASON_TASK_SWITCH &&
651562306a36Sopenharmony_ci	     exit_reason.basic != EXIT_REASON_NOTIFY)) {
651662306a36Sopenharmony_ci		int ndata = 3;
651762306a36Sopenharmony_ci
651862306a36Sopenharmony_ci		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
651962306a36Sopenharmony_ci		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
652062306a36Sopenharmony_ci		vcpu->run->internal.data[0] = vectoring_info;
652162306a36Sopenharmony_ci		vcpu->run->internal.data[1] = exit_reason.full;
652262306a36Sopenharmony_ci		vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
652362306a36Sopenharmony_ci		if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
652462306a36Sopenharmony_ci			vcpu->run->internal.data[ndata++] =
652562306a36Sopenharmony_ci				vmcs_read64(GUEST_PHYSICAL_ADDRESS);
652662306a36Sopenharmony_ci		}
652762306a36Sopenharmony_ci		vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu;
652862306a36Sopenharmony_ci		vcpu->run->internal.ndata = ndata;
652962306a36Sopenharmony_ci		return 0;
653062306a36Sopenharmony_ci	}
653162306a36Sopenharmony_ci
653262306a36Sopenharmony_ci	if (unlikely(!enable_vnmi &&
653362306a36Sopenharmony_ci		     vmx->loaded_vmcs->soft_vnmi_blocked)) {
653462306a36Sopenharmony_ci		if (!vmx_interrupt_blocked(vcpu)) {
653562306a36Sopenharmony_ci			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
653662306a36Sopenharmony_ci		} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
653762306a36Sopenharmony_ci			   vcpu->arch.nmi_pending) {
653862306a36Sopenharmony_ci			/*
653962306a36Sopenharmony_ci			 * This CPU don't support us in finding the end of an
654062306a36Sopenharmony_ci			 * NMI-blocked window if the guest runs with IRQs
654162306a36Sopenharmony_ci			 * disabled. So we pull the trigger after 1 s of
654262306a36Sopenharmony_ci			 * futile waiting, but inform the user about this.
654362306a36Sopenharmony_ci			 */
654462306a36Sopenharmony_ci			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
654562306a36Sopenharmony_ci			       "state on VCPU %d after 1 s timeout\n",
654662306a36Sopenharmony_ci			       __func__, vcpu->vcpu_id);
654762306a36Sopenharmony_ci			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
654862306a36Sopenharmony_ci		}
654962306a36Sopenharmony_ci	}
655062306a36Sopenharmony_ci
655162306a36Sopenharmony_ci	if (exit_fastpath != EXIT_FASTPATH_NONE)
655262306a36Sopenharmony_ci		return 1;
655362306a36Sopenharmony_ci
655462306a36Sopenharmony_ci	if (exit_reason.basic >= kvm_vmx_max_exit_handlers)
655562306a36Sopenharmony_ci		goto unexpected_vmexit;
655662306a36Sopenharmony_ci#ifdef CONFIG_RETPOLINE
655762306a36Sopenharmony_ci	if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
655862306a36Sopenharmony_ci		return kvm_emulate_wrmsr(vcpu);
655962306a36Sopenharmony_ci	else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
656062306a36Sopenharmony_ci		return handle_preemption_timer(vcpu);
656162306a36Sopenharmony_ci	else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW)
656262306a36Sopenharmony_ci		return handle_interrupt_window(vcpu);
656362306a36Sopenharmony_ci	else if (exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
656462306a36Sopenharmony_ci		return handle_external_interrupt(vcpu);
656562306a36Sopenharmony_ci	else if (exit_reason.basic == EXIT_REASON_HLT)
656662306a36Sopenharmony_ci		return kvm_emulate_halt(vcpu);
656762306a36Sopenharmony_ci	else if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG)
656862306a36Sopenharmony_ci		return handle_ept_misconfig(vcpu);
656962306a36Sopenharmony_ci#endif
657062306a36Sopenharmony_ci
657162306a36Sopenharmony_ci	exit_handler_index = array_index_nospec((u16)exit_reason.basic,
657262306a36Sopenharmony_ci						kvm_vmx_max_exit_handlers);
657362306a36Sopenharmony_ci	if (!kvm_vmx_exit_handlers[exit_handler_index])
657462306a36Sopenharmony_ci		goto unexpected_vmexit;
657562306a36Sopenharmony_ci
657662306a36Sopenharmony_ci	return kvm_vmx_exit_handlers[exit_handler_index](vcpu);
657762306a36Sopenharmony_ci
657862306a36Sopenharmony_ciunexpected_vmexit:
657962306a36Sopenharmony_ci	vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
658062306a36Sopenharmony_ci		    exit_reason.full);
658162306a36Sopenharmony_ci	dump_vmcs(vcpu);
658262306a36Sopenharmony_ci	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
658362306a36Sopenharmony_ci	vcpu->run->internal.suberror =
658462306a36Sopenharmony_ci			KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
658562306a36Sopenharmony_ci	vcpu->run->internal.ndata = 2;
658662306a36Sopenharmony_ci	vcpu->run->internal.data[0] = exit_reason.full;
658762306a36Sopenharmony_ci	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
658862306a36Sopenharmony_ci	return 0;
658962306a36Sopenharmony_ci}
659062306a36Sopenharmony_ci
659162306a36Sopenharmony_cistatic int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
659262306a36Sopenharmony_ci{
659362306a36Sopenharmony_ci	int ret = __vmx_handle_exit(vcpu, exit_fastpath);
659462306a36Sopenharmony_ci
659562306a36Sopenharmony_ci	/*
659662306a36Sopenharmony_ci	 * Exit to user space when bus lock detected to inform that there is
659762306a36Sopenharmony_ci	 * a bus lock in guest.
659862306a36Sopenharmony_ci	 */
659962306a36Sopenharmony_ci	if (to_vmx(vcpu)->exit_reason.bus_lock_detected) {
660062306a36Sopenharmony_ci		if (ret > 0)
660162306a36Sopenharmony_ci			vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK;
660262306a36Sopenharmony_ci
660362306a36Sopenharmony_ci		vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK;
660462306a36Sopenharmony_ci		return 0;
660562306a36Sopenharmony_ci	}
660662306a36Sopenharmony_ci	return ret;
660762306a36Sopenharmony_ci}
660862306a36Sopenharmony_ci
660962306a36Sopenharmony_ci/*
661062306a36Sopenharmony_ci * Software based L1D cache flush which is used when microcode providing
661162306a36Sopenharmony_ci * the cache control MSR is not loaded.
661262306a36Sopenharmony_ci *
661362306a36Sopenharmony_ci * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
661462306a36Sopenharmony_ci * flush it is required to read in 64 KiB because the replacement algorithm
661562306a36Sopenharmony_ci * is not exactly LRU. This could be sized at runtime via topology
661662306a36Sopenharmony_ci * information but as all relevant affected CPUs have 32KiB L1D cache size
661762306a36Sopenharmony_ci * there is no point in doing so.
661862306a36Sopenharmony_ci */
661962306a36Sopenharmony_cistatic noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
662062306a36Sopenharmony_ci{
662162306a36Sopenharmony_ci	int size = PAGE_SIZE << L1D_CACHE_ORDER;
662262306a36Sopenharmony_ci
662362306a36Sopenharmony_ci	/*
662462306a36Sopenharmony_ci	 * This code is only executed when the flush mode is 'cond' or
662562306a36Sopenharmony_ci	 * 'always'
662662306a36Sopenharmony_ci	 */
662762306a36Sopenharmony_ci	if (static_branch_likely(&vmx_l1d_flush_cond)) {
662862306a36Sopenharmony_ci		bool flush_l1d;
662962306a36Sopenharmony_ci
663062306a36Sopenharmony_ci		/*
663162306a36Sopenharmony_ci		 * Clear the per-vcpu flush bit, it gets set again
663262306a36Sopenharmony_ci		 * either from vcpu_run() or from one of the unsafe
663362306a36Sopenharmony_ci		 * VMEXIT handlers.
663462306a36Sopenharmony_ci		 */
663562306a36Sopenharmony_ci		flush_l1d = vcpu->arch.l1tf_flush_l1d;
663662306a36Sopenharmony_ci		vcpu->arch.l1tf_flush_l1d = false;
663762306a36Sopenharmony_ci
663862306a36Sopenharmony_ci		/*
663962306a36Sopenharmony_ci		 * Clear the per-cpu flush bit, it gets set again from
664062306a36Sopenharmony_ci		 * the interrupt handlers.
664162306a36Sopenharmony_ci		 */
664262306a36Sopenharmony_ci		flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
664362306a36Sopenharmony_ci		kvm_clear_cpu_l1tf_flush_l1d();
664462306a36Sopenharmony_ci
664562306a36Sopenharmony_ci		if (!flush_l1d)
664662306a36Sopenharmony_ci			return;
664762306a36Sopenharmony_ci	}
664862306a36Sopenharmony_ci
664962306a36Sopenharmony_ci	vcpu->stat.l1d_flush++;
665062306a36Sopenharmony_ci
665162306a36Sopenharmony_ci	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
665262306a36Sopenharmony_ci		native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
665362306a36Sopenharmony_ci		return;
665462306a36Sopenharmony_ci	}
665562306a36Sopenharmony_ci
665662306a36Sopenharmony_ci	asm volatile(
665762306a36Sopenharmony_ci		/* First ensure the pages are in the TLB */
665862306a36Sopenharmony_ci		"xorl	%%eax, %%eax\n"
665962306a36Sopenharmony_ci		".Lpopulate_tlb:\n\t"
666062306a36Sopenharmony_ci		"movzbl	(%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
666162306a36Sopenharmony_ci		"addl	$4096, %%eax\n\t"
666262306a36Sopenharmony_ci		"cmpl	%%eax, %[size]\n\t"
666362306a36Sopenharmony_ci		"jne	.Lpopulate_tlb\n\t"
666462306a36Sopenharmony_ci		"xorl	%%eax, %%eax\n\t"
666562306a36Sopenharmony_ci		"cpuid\n\t"
666662306a36Sopenharmony_ci		/* Now fill the cache */
666762306a36Sopenharmony_ci		"xorl	%%eax, %%eax\n"
666862306a36Sopenharmony_ci		".Lfill_cache:\n"
666962306a36Sopenharmony_ci		"movzbl	(%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
667062306a36Sopenharmony_ci		"addl	$64, %%eax\n\t"
667162306a36Sopenharmony_ci		"cmpl	%%eax, %[size]\n\t"
667262306a36Sopenharmony_ci		"jne	.Lfill_cache\n\t"
667362306a36Sopenharmony_ci		"lfence\n"
667462306a36Sopenharmony_ci		:: [flush_pages] "r" (vmx_l1d_flush_pages),
667562306a36Sopenharmony_ci		    [size] "r" (size)
667662306a36Sopenharmony_ci		: "eax", "ebx", "ecx", "edx");
667762306a36Sopenharmony_ci}
667862306a36Sopenharmony_ci
667962306a36Sopenharmony_cistatic void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
668062306a36Sopenharmony_ci{
668162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
668262306a36Sopenharmony_ci	int tpr_threshold;
668362306a36Sopenharmony_ci
668462306a36Sopenharmony_ci	if (is_guest_mode(vcpu) &&
668562306a36Sopenharmony_ci		nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
668662306a36Sopenharmony_ci		return;
668762306a36Sopenharmony_ci
668862306a36Sopenharmony_ci	tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
668962306a36Sopenharmony_ci	if (is_guest_mode(vcpu))
669062306a36Sopenharmony_ci		to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
669162306a36Sopenharmony_ci	else
669262306a36Sopenharmony_ci		vmcs_write32(TPR_THRESHOLD, tpr_threshold);
669362306a36Sopenharmony_ci}
669462306a36Sopenharmony_ci
669562306a36Sopenharmony_civoid vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
669662306a36Sopenharmony_ci{
669762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
669862306a36Sopenharmony_ci	u32 sec_exec_control;
669962306a36Sopenharmony_ci
670062306a36Sopenharmony_ci	if (!lapic_in_kernel(vcpu))
670162306a36Sopenharmony_ci		return;
670262306a36Sopenharmony_ci
670362306a36Sopenharmony_ci	if (!flexpriority_enabled &&
670462306a36Sopenharmony_ci	    !cpu_has_vmx_virtualize_x2apic_mode())
670562306a36Sopenharmony_ci		return;
670662306a36Sopenharmony_ci
670762306a36Sopenharmony_ci	/* Postpone execution until vmcs01 is the current VMCS. */
670862306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
670962306a36Sopenharmony_ci		vmx->nested.change_vmcs01_virtual_apic_mode = true;
671062306a36Sopenharmony_ci		return;
671162306a36Sopenharmony_ci	}
671262306a36Sopenharmony_ci
671362306a36Sopenharmony_ci	sec_exec_control = secondary_exec_controls_get(vmx);
671462306a36Sopenharmony_ci	sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
671562306a36Sopenharmony_ci			      SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
671662306a36Sopenharmony_ci
671762306a36Sopenharmony_ci	switch (kvm_get_apic_mode(vcpu)) {
671862306a36Sopenharmony_ci	case LAPIC_MODE_INVALID:
671962306a36Sopenharmony_ci		WARN_ONCE(true, "Invalid local APIC state");
672062306a36Sopenharmony_ci		break;
672162306a36Sopenharmony_ci	case LAPIC_MODE_DISABLED:
672262306a36Sopenharmony_ci		break;
672362306a36Sopenharmony_ci	case LAPIC_MODE_XAPIC:
672462306a36Sopenharmony_ci		if (flexpriority_enabled) {
672562306a36Sopenharmony_ci			sec_exec_control |=
672662306a36Sopenharmony_ci				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
672762306a36Sopenharmony_ci			kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
672862306a36Sopenharmony_ci
672962306a36Sopenharmony_ci			/*
673062306a36Sopenharmony_ci			 * Flush the TLB, reloading the APIC access page will
673162306a36Sopenharmony_ci			 * only do so if its physical address has changed, but
673262306a36Sopenharmony_ci			 * the guest may have inserted a non-APIC mapping into
673362306a36Sopenharmony_ci			 * the TLB while the APIC access page was disabled.
673462306a36Sopenharmony_ci			 */
673562306a36Sopenharmony_ci			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
673662306a36Sopenharmony_ci		}
673762306a36Sopenharmony_ci		break;
673862306a36Sopenharmony_ci	case LAPIC_MODE_X2APIC:
673962306a36Sopenharmony_ci		if (cpu_has_vmx_virtualize_x2apic_mode())
674062306a36Sopenharmony_ci			sec_exec_control |=
674162306a36Sopenharmony_ci				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
674262306a36Sopenharmony_ci		break;
674362306a36Sopenharmony_ci	}
674462306a36Sopenharmony_ci	secondary_exec_controls_set(vmx, sec_exec_control);
674562306a36Sopenharmony_ci
674662306a36Sopenharmony_ci	vmx_update_msr_bitmap_x2apic(vcpu);
674762306a36Sopenharmony_ci}
674862306a36Sopenharmony_ci
674962306a36Sopenharmony_cistatic void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
675062306a36Sopenharmony_ci{
675162306a36Sopenharmony_ci	const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT;
675262306a36Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
675362306a36Sopenharmony_ci	struct kvm_memslots *slots = kvm_memslots(kvm);
675462306a36Sopenharmony_ci	struct kvm_memory_slot *slot;
675562306a36Sopenharmony_ci	unsigned long mmu_seq;
675662306a36Sopenharmony_ci	kvm_pfn_t pfn;
675762306a36Sopenharmony_ci
675862306a36Sopenharmony_ci	/* Defer reload until vmcs01 is the current VMCS. */
675962306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
676062306a36Sopenharmony_ci		to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true;
676162306a36Sopenharmony_ci		return;
676262306a36Sopenharmony_ci	}
676362306a36Sopenharmony_ci
676462306a36Sopenharmony_ci	if (!(secondary_exec_controls_get(to_vmx(vcpu)) &
676562306a36Sopenharmony_ci	    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
676662306a36Sopenharmony_ci		return;
676762306a36Sopenharmony_ci
676862306a36Sopenharmony_ci	/*
676962306a36Sopenharmony_ci	 * Grab the memslot so that the hva lookup for the mmu_notifier retry
677062306a36Sopenharmony_ci	 * is guaranteed to use the same memslot as the pfn lookup, i.e. rely
677162306a36Sopenharmony_ci	 * on the pfn lookup's validation of the memslot to ensure a valid hva
677262306a36Sopenharmony_ci	 * is used for the retry check.
677362306a36Sopenharmony_ci	 */
677462306a36Sopenharmony_ci	slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
677562306a36Sopenharmony_ci	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
677662306a36Sopenharmony_ci		return;
677762306a36Sopenharmony_ci
677862306a36Sopenharmony_ci	/*
677962306a36Sopenharmony_ci	 * Ensure that the mmu_notifier sequence count is read before KVM
678062306a36Sopenharmony_ci	 * retrieves the pfn from the primary MMU.  Note, the memslot is
678162306a36Sopenharmony_ci	 * protected by SRCU, not the mmu_notifier.  Pairs with the smp_wmb()
678262306a36Sopenharmony_ci	 * in kvm_mmu_invalidate_end().
678362306a36Sopenharmony_ci	 */
678462306a36Sopenharmony_ci	mmu_seq = kvm->mmu_invalidate_seq;
678562306a36Sopenharmony_ci	smp_rmb();
678662306a36Sopenharmony_ci
678762306a36Sopenharmony_ci	/*
678862306a36Sopenharmony_ci	 * No need to retry if the memslot does not exist or is invalid.  KVM
678962306a36Sopenharmony_ci	 * controls the APIC-access page memslot, and only deletes the memslot
679062306a36Sopenharmony_ci	 * if APICv is permanently inhibited, i.e. the memslot won't reappear.
679162306a36Sopenharmony_ci	 */
679262306a36Sopenharmony_ci	pfn = gfn_to_pfn_memslot(slot, gfn);
679362306a36Sopenharmony_ci	if (is_error_noslot_pfn(pfn))
679462306a36Sopenharmony_ci		return;
679562306a36Sopenharmony_ci
679662306a36Sopenharmony_ci	read_lock(&vcpu->kvm->mmu_lock);
679762306a36Sopenharmony_ci	if (mmu_invalidate_retry_hva(kvm, mmu_seq,
679862306a36Sopenharmony_ci				     gfn_to_hva_memslot(slot, gfn))) {
679962306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
680062306a36Sopenharmony_ci		read_unlock(&vcpu->kvm->mmu_lock);
680162306a36Sopenharmony_ci		goto out;
680262306a36Sopenharmony_ci	}
680362306a36Sopenharmony_ci
680462306a36Sopenharmony_ci	vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
680562306a36Sopenharmony_ci	read_unlock(&vcpu->kvm->mmu_lock);
680662306a36Sopenharmony_ci
680762306a36Sopenharmony_ci	/*
680862306a36Sopenharmony_ci	 * No need for a manual TLB flush at this point, KVM has already done a
680962306a36Sopenharmony_ci	 * flush if there were SPTEs pointing at the previous page.
681062306a36Sopenharmony_ci	 */
681162306a36Sopenharmony_ciout:
681262306a36Sopenharmony_ci	/*
681362306a36Sopenharmony_ci	 * Do not pin apic access page in memory, the MMU notifier
681462306a36Sopenharmony_ci	 * will call us again if it is migrated or swapped out.
681562306a36Sopenharmony_ci	 */
681662306a36Sopenharmony_ci	kvm_release_pfn_clean(pfn);
681762306a36Sopenharmony_ci}
681862306a36Sopenharmony_ci
681962306a36Sopenharmony_cistatic void vmx_hwapic_isr_update(int max_isr)
682062306a36Sopenharmony_ci{
682162306a36Sopenharmony_ci	u16 status;
682262306a36Sopenharmony_ci	u8 old;
682362306a36Sopenharmony_ci
682462306a36Sopenharmony_ci	if (max_isr == -1)
682562306a36Sopenharmony_ci		max_isr = 0;
682662306a36Sopenharmony_ci
682762306a36Sopenharmony_ci	status = vmcs_read16(GUEST_INTR_STATUS);
682862306a36Sopenharmony_ci	old = status >> 8;
682962306a36Sopenharmony_ci	if (max_isr != old) {
683062306a36Sopenharmony_ci		status &= 0xff;
683162306a36Sopenharmony_ci		status |= max_isr << 8;
683262306a36Sopenharmony_ci		vmcs_write16(GUEST_INTR_STATUS, status);
683362306a36Sopenharmony_ci	}
683462306a36Sopenharmony_ci}
683562306a36Sopenharmony_ci
683662306a36Sopenharmony_cistatic void vmx_set_rvi(int vector)
683762306a36Sopenharmony_ci{
683862306a36Sopenharmony_ci	u16 status;
683962306a36Sopenharmony_ci	u8 old;
684062306a36Sopenharmony_ci
684162306a36Sopenharmony_ci	if (vector == -1)
684262306a36Sopenharmony_ci		vector = 0;
684362306a36Sopenharmony_ci
684462306a36Sopenharmony_ci	status = vmcs_read16(GUEST_INTR_STATUS);
684562306a36Sopenharmony_ci	old = (u8)status & 0xff;
684662306a36Sopenharmony_ci	if ((u8)vector != old) {
684762306a36Sopenharmony_ci		status &= ~0xff;
684862306a36Sopenharmony_ci		status |= (u8)vector;
684962306a36Sopenharmony_ci		vmcs_write16(GUEST_INTR_STATUS, status);
685062306a36Sopenharmony_ci	}
685162306a36Sopenharmony_ci}
685262306a36Sopenharmony_ci
685362306a36Sopenharmony_cistatic void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
685462306a36Sopenharmony_ci{
685562306a36Sopenharmony_ci	/*
685662306a36Sopenharmony_ci	 * When running L2, updating RVI is only relevant when
685762306a36Sopenharmony_ci	 * vmcs12 virtual-interrupt-delivery enabled.
685862306a36Sopenharmony_ci	 * However, it can be enabled only when L1 also
685962306a36Sopenharmony_ci	 * intercepts external-interrupts and in that case
686062306a36Sopenharmony_ci	 * we should not update vmcs02 RVI but instead intercept
686162306a36Sopenharmony_ci	 * interrupt. Therefore, do nothing when running L2.
686262306a36Sopenharmony_ci	 */
686362306a36Sopenharmony_ci	if (!is_guest_mode(vcpu))
686462306a36Sopenharmony_ci		vmx_set_rvi(max_irr);
686562306a36Sopenharmony_ci}
686662306a36Sopenharmony_ci
686762306a36Sopenharmony_cistatic int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
686862306a36Sopenharmony_ci{
686962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
687062306a36Sopenharmony_ci	int max_irr;
687162306a36Sopenharmony_ci	bool got_posted_interrupt;
687262306a36Sopenharmony_ci
687362306a36Sopenharmony_ci	if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
687462306a36Sopenharmony_ci		return -EIO;
687562306a36Sopenharmony_ci
687662306a36Sopenharmony_ci	if (pi_test_on(&vmx->pi_desc)) {
687762306a36Sopenharmony_ci		pi_clear_on(&vmx->pi_desc);
687862306a36Sopenharmony_ci		/*
687962306a36Sopenharmony_ci		 * IOMMU can write to PID.ON, so the barrier matters even on UP.
688062306a36Sopenharmony_ci		 * But on x86 this is just a compiler barrier anyway.
688162306a36Sopenharmony_ci		 */
688262306a36Sopenharmony_ci		smp_mb__after_atomic();
688362306a36Sopenharmony_ci		got_posted_interrupt =
688462306a36Sopenharmony_ci			kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
688562306a36Sopenharmony_ci	} else {
688662306a36Sopenharmony_ci		max_irr = kvm_lapic_find_highest_irr(vcpu);
688762306a36Sopenharmony_ci		got_posted_interrupt = false;
688862306a36Sopenharmony_ci	}
688962306a36Sopenharmony_ci
689062306a36Sopenharmony_ci	/*
689162306a36Sopenharmony_ci	 * Newly recognized interrupts are injected via either virtual interrupt
689262306a36Sopenharmony_ci	 * delivery (RVI) or KVM_REQ_EVENT.  Virtual interrupt delivery is
689362306a36Sopenharmony_ci	 * disabled in two cases:
689462306a36Sopenharmony_ci	 *
689562306a36Sopenharmony_ci	 * 1) If L2 is running and the vCPU has a new pending interrupt.  If L1
689662306a36Sopenharmony_ci	 * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
689762306a36Sopenharmony_ci	 * VM-Exit to L1.  If L1 doesn't want to exit, the interrupt is injected
689862306a36Sopenharmony_ci	 * into L2, but KVM doesn't use virtual interrupt delivery to inject
689962306a36Sopenharmony_ci	 * interrupts into L2, and so KVM_REQ_EVENT is again needed.
690062306a36Sopenharmony_ci	 *
690162306a36Sopenharmony_ci	 * 2) If APICv is disabled for this vCPU, assigned devices may still
690262306a36Sopenharmony_ci	 * attempt to post interrupts.  The posted interrupt vector will cause
690362306a36Sopenharmony_ci	 * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
690462306a36Sopenharmony_ci	 */
690562306a36Sopenharmony_ci	if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
690662306a36Sopenharmony_ci		vmx_set_rvi(max_irr);
690762306a36Sopenharmony_ci	else if (got_posted_interrupt)
690862306a36Sopenharmony_ci		kvm_make_request(KVM_REQ_EVENT, vcpu);
690962306a36Sopenharmony_ci
691062306a36Sopenharmony_ci	return max_irr;
691162306a36Sopenharmony_ci}
691262306a36Sopenharmony_ci
691362306a36Sopenharmony_cistatic void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
691462306a36Sopenharmony_ci{
691562306a36Sopenharmony_ci	if (!kvm_vcpu_apicv_active(vcpu))
691662306a36Sopenharmony_ci		return;
691762306a36Sopenharmony_ci
691862306a36Sopenharmony_ci	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
691962306a36Sopenharmony_ci	vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
692062306a36Sopenharmony_ci	vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
692162306a36Sopenharmony_ci	vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
692262306a36Sopenharmony_ci}
692362306a36Sopenharmony_ci
692462306a36Sopenharmony_cistatic void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
692562306a36Sopenharmony_ci{
692662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
692762306a36Sopenharmony_ci
692862306a36Sopenharmony_ci	pi_clear_on(&vmx->pi_desc);
692962306a36Sopenharmony_ci	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
693062306a36Sopenharmony_ci}
693162306a36Sopenharmony_ci
693262306a36Sopenharmony_civoid vmx_do_interrupt_irqoff(unsigned long entry);
693362306a36Sopenharmony_civoid vmx_do_nmi_irqoff(void);
693462306a36Sopenharmony_ci
693562306a36Sopenharmony_cistatic void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
693662306a36Sopenharmony_ci{
693762306a36Sopenharmony_ci	/*
693862306a36Sopenharmony_ci	 * Save xfd_err to guest_fpu before interrupt is enabled, so the
693962306a36Sopenharmony_ci	 * MSR value is not clobbered by the host activity before the guest
694062306a36Sopenharmony_ci	 * has chance to consume it.
694162306a36Sopenharmony_ci	 *
694262306a36Sopenharmony_ci	 * Do not blindly read xfd_err here, since this exception might
694362306a36Sopenharmony_ci	 * be caused by L1 interception on a platform which doesn't
694462306a36Sopenharmony_ci	 * support xfd at all.
694562306a36Sopenharmony_ci	 *
694662306a36Sopenharmony_ci	 * Do it conditionally upon guest_fpu::xfd. xfd_err matters
694762306a36Sopenharmony_ci	 * only when xfd contains a non-zero value.
694862306a36Sopenharmony_ci	 *
694962306a36Sopenharmony_ci	 * Queuing exception is done in vmx_handle_exit. See comment there.
695062306a36Sopenharmony_ci	 */
695162306a36Sopenharmony_ci	if (vcpu->arch.guest_fpu.fpstate->xfd)
695262306a36Sopenharmony_ci		rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
695362306a36Sopenharmony_ci}
695462306a36Sopenharmony_ci
695562306a36Sopenharmony_cistatic void handle_exception_irqoff(struct vcpu_vmx *vmx)
695662306a36Sopenharmony_ci{
695762306a36Sopenharmony_ci	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
695862306a36Sopenharmony_ci
695962306a36Sopenharmony_ci	/* if exit due to PF check for async PF */
696062306a36Sopenharmony_ci	if (is_page_fault(intr_info))
696162306a36Sopenharmony_ci		vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
696262306a36Sopenharmony_ci	/* if exit due to NM, handle before interrupts are enabled */
696362306a36Sopenharmony_ci	else if (is_nm_fault(intr_info))
696462306a36Sopenharmony_ci		handle_nm_fault_irqoff(&vmx->vcpu);
696562306a36Sopenharmony_ci	/* Handle machine checks before interrupts are enabled */
696662306a36Sopenharmony_ci	else if (is_machine_check(intr_info))
696762306a36Sopenharmony_ci		kvm_machine_check();
696862306a36Sopenharmony_ci}
696962306a36Sopenharmony_ci
697062306a36Sopenharmony_cistatic void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
697162306a36Sopenharmony_ci{
697262306a36Sopenharmony_ci	u32 intr_info = vmx_get_intr_info(vcpu);
697362306a36Sopenharmony_ci	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
697462306a36Sopenharmony_ci	gate_desc *desc = (gate_desc *)host_idt_base + vector;
697562306a36Sopenharmony_ci
697662306a36Sopenharmony_ci	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
697762306a36Sopenharmony_ci	    "unexpected VM-Exit interrupt info: 0x%x", intr_info))
697862306a36Sopenharmony_ci		return;
697962306a36Sopenharmony_ci
698062306a36Sopenharmony_ci	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
698162306a36Sopenharmony_ci	vmx_do_interrupt_irqoff(gate_offset(desc));
698262306a36Sopenharmony_ci	kvm_after_interrupt(vcpu);
698362306a36Sopenharmony_ci
698462306a36Sopenharmony_ci	vcpu->arch.at_instruction_boundary = true;
698562306a36Sopenharmony_ci}
698662306a36Sopenharmony_ci
698762306a36Sopenharmony_cistatic void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
698862306a36Sopenharmony_ci{
698962306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
699062306a36Sopenharmony_ci
699162306a36Sopenharmony_ci	if (vmx->emulation_required)
699262306a36Sopenharmony_ci		return;
699362306a36Sopenharmony_ci
699462306a36Sopenharmony_ci	if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
699562306a36Sopenharmony_ci		handle_external_interrupt_irqoff(vcpu);
699662306a36Sopenharmony_ci	else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
699762306a36Sopenharmony_ci		handle_exception_irqoff(vmx);
699862306a36Sopenharmony_ci}
699962306a36Sopenharmony_ci
700062306a36Sopenharmony_ci/*
700162306a36Sopenharmony_ci * The kvm parameter can be NULL (module initialization, or invocation before
700262306a36Sopenharmony_ci * VM creation). Be sure to check the kvm parameter before using it.
700362306a36Sopenharmony_ci */
700462306a36Sopenharmony_cistatic bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
700562306a36Sopenharmony_ci{
700662306a36Sopenharmony_ci	switch (index) {
700762306a36Sopenharmony_ci	case MSR_IA32_SMBASE:
700862306a36Sopenharmony_ci		if (!IS_ENABLED(CONFIG_KVM_SMM))
700962306a36Sopenharmony_ci			return false;
701062306a36Sopenharmony_ci		/*
701162306a36Sopenharmony_ci		 * We cannot do SMM unless we can run the guest in big
701262306a36Sopenharmony_ci		 * real mode.
701362306a36Sopenharmony_ci		 */
701462306a36Sopenharmony_ci		return enable_unrestricted_guest || emulate_invalid_guest_state;
701562306a36Sopenharmony_ci	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
701662306a36Sopenharmony_ci		return nested;
701762306a36Sopenharmony_ci	case MSR_AMD64_VIRT_SPEC_CTRL:
701862306a36Sopenharmony_ci	case MSR_AMD64_TSC_RATIO:
701962306a36Sopenharmony_ci		/* This is AMD only.  */
702062306a36Sopenharmony_ci		return false;
702162306a36Sopenharmony_ci	default:
702262306a36Sopenharmony_ci		return true;
702362306a36Sopenharmony_ci	}
702462306a36Sopenharmony_ci}
702562306a36Sopenharmony_ci
702662306a36Sopenharmony_cistatic void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
702762306a36Sopenharmony_ci{
702862306a36Sopenharmony_ci	u32 exit_intr_info;
702962306a36Sopenharmony_ci	bool unblock_nmi;
703062306a36Sopenharmony_ci	u8 vector;
703162306a36Sopenharmony_ci	bool idtv_info_valid;
703262306a36Sopenharmony_ci
703362306a36Sopenharmony_ci	idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
703462306a36Sopenharmony_ci
703562306a36Sopenharmony_ci	if (enable_vnmi) {
703662306a36Sopenharmony_ci		if (vmx->loaded_vmcs->nmi_known_unmasked)
703762306a36Sopenharmony_ci			return;
703862306a36Sopenharmony_ci
703962306a36Sopenharmony_ci		exit_intr_info = vmx_get_intr_info(&vmx->vcpu);
704062306a36Sopenharmony_ci		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
704162306a36Sopenharmony_ci		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
704262306a36Sopenharmony_ci		/*
704362306a36Sopenharmony_ci		 * SDM 3: 27.7.1.2 (September 2008)
704462306a36Sopenharmony_ci		 * Re-set bit "block by NMI" before VM entry if vmexit caused by
704562306a36Sopenharmony_ci		 * a guest IRET fault.
704662306a36Sopenharmony_ci		 * SDM 3: 23.2.2 (September 2008)
704762306a36Sopenharmony_ci		 * Bit 12 is undefined in any of the following cases:
704862306a36Sopenharmony_ci		 *  If the VM exit sets the valid bit in the IDT-vectoring
704962306a36Sopenharmony_ci		 *   information field.
705062306a36Sopenharmony_ci		 *  If the VM exit is due to a double fault.
705162306a36Sopenharmony_ci		 */
705262306a36Sopenharmony_ci		if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
705362306a36Sopenharmony_ci		    vector != DF_VECTOR && !idtv_info_valid)
705462306a36Sopenharmony_ci			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
705562306a36Sopenharmony_ci				      GUEST_INTR_STATE_NMI);
705662306a36Sopenharmony_ci		else
705762306a36Sopenharmony_ci			vmx->loaded_vmcs->nmi_known_unmasked =
705862306a36Sopenharmony_ci				!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
705962306a36Sopenharmony_ci				  & GUEST_INTR_STATE_NMI);
706062306a36Sopenharmony_ci	} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
706162306a36Sopenharmony_ci		vmx->loaded_vmcs->vnmi_blocked_time +=
706262306a36Sopenharmony_ci			ktime_to_ns(ktime_sub(ktime_get(),
706362306a36Sopenharmony_ci					      vmx->loaded_vmcs->entry_time));
706462306a36Sopenharmony_ci}
706562306a36Sopenharmony_ci
706662306a36Sopenharmony_cistatic void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
706762306a36Sopenharmony_ci				      u32 idt_vectoring_info,
706862306a36Sopenharmony_ci				      int instr_len_field,
706962306a36Sopenharmony_ci				      int error_code_field)
707062306a36Sopenharmony_ci{
707162306a36Sopenharmony_ci	u8 vector;
707262306a36Sopenharmony_ci	int type;
707362306a36Sopenharmony_ci	bool idtv_info_valid;
707462306a36Sopenharmony_ci
707562306a36Sopenharmony_ci	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
707662306a36Sopenharmony_ci
707762306a36Sopenharmony_ci	vcpu->arch.nmi_injected = false;
707862306a36Sopenharmony_ci	kvm_clear_exception_queue(vcpu);
707962306a36Sopenharmony_ci	kvm_clear_interrupt_queue(vcpu);
708062306a36Sopenharmony_ci
708162306a36Sopenharmony_ci	if (!idtv_info_valid)
708262306a36Sopenharmony_ci		return;
708362306a36Sopenharmony_ci
708462306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_EVENT, vcpu);
708562306a36Sopenharmony_ci
708662306a36Sopenharmony_ci	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
708762306a36Sopenharmony_ci	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
708862306a36Sopenharmony_ci
708962306a36Sopenharmony_ci	switch (type) {
709062306a36Sopenharmony_ci	case INTR_TYPE_NMI_INTR:
709162306a36Sopenharmony_ci		vcpu->arch.nmi_injected = true;
709262306a36Sopenharmony_ci		/*
709362306a36Sopenharmony_ci		 * SDM 3: 27.7.1.2 (September 2008)
709462306a36Sopenharmony_ci		 * Clear bit "block by NMI" before VM entry if a NMI
709562306a36Sopenharmony_ci		 * delivery faulted.
709662306a36Sopenharmony_ci		 */
709762306a36Sopenharmony_ci		vmx_set_nmi_mask(vcpu, false);
709862306a36Sopenharmony_ci		break;
709962306a36Sopenharmony_ci	case INTR_TYPE_SOFT_EXCEPTION:
710062306a36Sopenharmony_ci		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
710162306a36Sopenharmony_ci		fallthrough;
710262306a36Sopenharmony_ci	case INTR_TYPE_HARD_EXCEPTION:
710362306a36Sopenharmony_ci		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
710462306a36Sopenharmony_ci			u32 err = vmcs_read32(error_code_field);
710562306a36Sopenharmony_ci			kvm_requeue_exception_e(vcpu, vector, err);
710662306a36Sopenharmony_ci		} else
710762306a36Sopenharmony_ci			kvm_requeue_exception(vcpu, vector);
710862306a36Sopenharmony_ci		break;
710962306a36Sopenharmony_ci	case INTR_TYPE_SOFT_INTR:
711062306a36Sopenharmony_ci		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
711162306a36Sopenharmony_ci		fallthrough;
711262306a36Sopenharmony_ci	case INTR_TYPE_EXT_INTR:
711362306a36Sopenharmony_ci		kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
711462306a36Sopenharmony_ci		break;
711562306a36Sopenharmony_ci	default:
711662306a36Sopenharmony_ci		break;
711762306a36Sopenharmony_ci	}
711862306a36Sopenharmony_ci}
711962306a36Sopenharmony_ci
712062306a36Sopenharmony_cistatic void vmx_complete_interrupts(struct vcpu_vmx *vmx)
712162306a36Sopenharmony_ci{
712262306a36Sopenharmony_ci	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
712362306a36Sopenharmony_ci				  VM_EXIT_INSTRUCTION_LEN,
712462306a36Sopenharmony_ci				  IDT_VECTORING_ERROR_CODE);
712562306a36Sopenharmony_ci}
712662306a36Sopenharmony_ci
712762306a36Sopenharmony_cistatic void vmx_cancel_injection(struct kvm_vcpu *vcpu)
712862306a36Sopenharmony_ci{
712962306a36Sopenharmony_ci	__vmx_complete_interrupts(vcpu,
713062306a36Sopenharmony_ci				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
713162306a36Sopenharmony_ci				  VM_ENTRY_INSTRUCTION_LEN,
713262306a36Sopenharmony_ci				  VM_ENTRY_EXCEPTION_ERROR_CODE);
713362306a36Sopenharmony_ci
713462306a36Sopenharmony_ci	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
713562306a36Sopenharmony_ci}
713662306a36Sopenharmony_ci
713762306a36Sopenharmony_cistatic void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
713862306a36Sopenharmony_ci{
713962306a36Sopenharmony_ci	int i, nr_msrs;
714062306a36Sopenharmony_ci	struct perf_guest_switch_msr *msrs;
714162306a36Sopenharmony_ci	struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
714262306a36Sopenharmony_ci
714362306a36Sopenharmony_ci	pmu->host_cross_mapped_mask = 0;
714462306a36Sopenharmony_ci	if (pmu->pebs_enable & pmu->global_ctrl)
714562306a36Sopenharmony_ci		intel_pmu_cross_mapped_check(pmu);
714662306a36Sopenharmony_ci
714762306a36Sopenharmony_ci	/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
714862306a36Sopenharmony_ci	msrs = perf_guest_get_msrs(&nr_msrs, (void *)pmu);
714962306a36Sopenharmony_ci	if (!msrs)
715062306a36Sopenharmony_ci		return;
715162306a36Sopenharmony_ci
715262306a36Sopenharmony_ci	for (i = 0; i < nr_msrs; i++)
715362306a36Sopenharmony_ci		if (msrs[i].host == msrs[i].guest)
715462306a36Sopenharmony_ci			clear_atomic_switch_msr(vmx, msrs[i].msr);
715562306a36Sopenharmony_ci		else
715662306a36Sopenharmony_ci			add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
715762306a36Sopenharmony_ci					msrs[i].host, false);
715862306a36Sopenharmony_ci}
715962306a36Sopenharmony_ci
716062306a36Sopenharmony_cistatic void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
716162306a36Sopenharmony_ci{
716262306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
716362306a36Sopenharmony_ci	u64 tscl;
716462306a36Sopenharmony_ci	u32 delta_tsc;
716562306a36Sopenharmony_ci
716662306a36Sopenharmony_ci	if (vmx->req_immediate_exit) {
716762306a36Sopenharmony_ci		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
716862306a36Sopenharmony_ci		vmx->loaded_vmcs->hv_timer_soft_disabled = false;
716962306a36Sopenharmony_ci	} else if (vmx->hv_deadline_tsc != -1) {
717062306a36Sopenharmony_ci		tscl = rdtsc();
717162306a36Sopenharmony_ci		if (vmx->hv_deadline_tsc > tscl)
717262306a36Sopenharmony_ci			/* set_hv_timer ensures the delta fits in 32-bits */
717362306a36Sopenharmony_ci			delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
717462306a36Sopenharmony_ci				cpu_preemption_timer_multi);
717562306a36Sopenharmony_ci		else
717662306a36Sopenharmony_ci			delta_tsc = 0;
717762306a36Sopenharmony_ci
717862306a36Sopenharmony_ci		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
717962306a36Sopenharmony_ci		vmx->loaded_vmcs->hv_timer_soft_disabled = false;
718062306a36Sopenharmony_ci	} else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
718162306a36Sopenharmony_ci		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
718262306a36Sopenharmony_ci		vmx->loaded_vmcs->hv_timer_soft_disabled = true;
718362306a36Sopenharmony_ci	}
718462306a36Sopenharmony_ci}
718562306a36Sopenharmony_ci
718662306a36Sopenharmony_civoid noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
718762306a36Sopenharmony_ci{
718862306a36Sopenharmony_ci	if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
718962306a36Sopenharmony_ci		vmx->loaded_vmcs->host_state.rsp = host_rsp;
719062306a36Sopenharmony_ci		vmcs_writel(HOST_RSP, host_rsp);
719162306a36Sopenharmony_ci	}
719262306a36Sopenharmony_ci}
719362306a36Sopenharmony_ci
719462306a36Sopenharmony_civoid noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
719562306a36Sopenharmony_ci					unsigned int flags)
719662306a36Sopenharmony_ci{
719762306a36Sopenharmony_ci	u64 hostval = this_cpu_read(x86_spec_ctrl_current);
719862306a36Sopenharmony_ci
719962306a36Sopenharmony_ci	if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
720062306a36Sopenharmony_ci		return;
720162306a36Sopenharmony_ci
720262306a36Sopenharmony_ci	if (flags & VMX_RUN_SAVE_SPEC_CTRL)
720362306a36Sopenharmony_ci		vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
720462306a36Sopenharmony_ci
720562306a36Sopenharmony_ci	/*
720662306a36Sopenharmony_ci	 * If the guest/host SPEC_CTRL values differ, restore the host value.
720762306a36Sopenharmony_ci	 *
720862306a36Sopenharmony_ci	 * For legacy IBRS, the IBRS bit always needs to be written after
720962306a36Sopenharmony_ci	 * transitioning from a less privileged predictor mode, regardless of
721062306a36Sopenharmony_ci	 * whether the guest/host values differ.
721162306a36Sopenharmony_ci	 */
721262306a36Sopenharmony_ci	if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
721362306a36Sopenharmony_ci	    vmx->spec_ctrl != hostval)
721462306a36Sopenharmony_ci		native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
721562306a36Sopenharmony_ci
721662306a36Sopenharmony_ci	barrier_nospec();
721762306a36Sopenharmony_ci}
721862306a36Sopenharmony_ci
721962306a36Sopenharmony_cistatic fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
722062306a36Sopenharmony_ci{
722162306a36Sopenharmony_ci	switch (to_vmx(vcpu)->exit_reason.basic) {
722262306a36Sopenharmony_ci	case EXIT_REASON_MSR_WRITE:
722362306a36Sopenharmony_ci		return handle_fastpath_set_msr_irqoff(vcpu);
722462306a36Sopenharmony_ci	case EXIT_REASON_PREEMPTION_TIMER:
722562306a36Sopenharmony_ci		return handle_fastpath_preemption_timer(vcpu);
722662306a36Sopenharmony_ci	default:
722762306a36Sopenharmony_ci		return EXIT_FASTPATH_NONE;
722862306a36Sopenharmony_ci	}
722962306a36Sopenharmony_ci}
723062306a36Sopenharmony_ci
723162306a36Sopenharmony_cistatic noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
723262306a36Sopenharmony_ci					unsigned int flags)
723362306a36Sopenharmony_ci{
723462306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
723562306a36Sopenharmony_ci
723662306a36Sopenharmony_ci	guest_state_enter_irqoff();
723762306a36Sopenharmony_ci
723862306a36Sopenharmony_ci	/*
723962306a36Sopenharmony_ci	 * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
724062306a36Sopenharmony_ci	 * mitigation for MDS is done late in VMentry and is still
724162306a36Sopenharmony_ci	 * executed in spite of L1D Flush. This is because an extra VERW
724262306a36Sopenharmony_ci	 * should not matter much after the big hammer L1D Flush.
724362306a36Sopenharmony_ci	 */
724462306a36Sopenharmony_ci	if (static_branch_unlikely(&vmx_l1d_should_flush))
724562306a36Sopenharmony_ci		vmx_l1d_flush(vcpu);
724662306a36Sopenharmony_ci	else if (static_branch_unlikely(&mmio_stale_data_clear) &&
724762306a36Sopenharmony_ci		 kvm_arch_has_assigned_device(vcpu->kvm))
724862306a36Sopenharmony_ci		mds_clear_cpu_buffers();
724962306a36Sopenharmony_ci
725062306a36Sopenharmony_ci	vmx_disable_fb_clear(vmx);
725162306a36Sopenharmony_ci
725262306a36Sopenharmony_ci	if (vcpu->arch.cr2 != native_read_cr2())
725362306a36Sopenharmony_ci		native_write_cr2(vcpu->arch.cr2);
725462306a36Sopenharmony_ci
725562306a36Sopenharmony_ci	vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
725662306a36Sopenharmony_ci				   flags);
725762306a36Sopenharmony_ci
725862306a36Sopenharmony_ci	vcpu->arch.cr2 = native_read_cr2();
725962306a36Sopenharmony_ci	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
726062306a36Sopenharmony_ci
726162306a36Sopenharmony_ci	vmx->idt_vectoring_info = 0;
726262306a36Sopenharmony_ci
726362306a36Sopenharmony_ci	vmx_enable_fb_clear(vmx);
726462306a36Sopenharmony_ci
726562306a36Sopenharmony_ci	if (unlikely(vmx->fail)) {
726662306a36Sopenharmony_ci		vmx->exit_reason.full = 0xdead;
726762306a36Sopenharmony_ci		goto out;
726862306a36Sopenharmony_ci	}
726962306a36Sopenharmony_ci
727062306a36Sopenharmony_ci	vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
727162306a36Sopenharmony_ci	if (likely(!vmx->exit_reason.failed_vmentry))
727262306a36Sopenharmony_ci		vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
727362306a36Sopenharmony_ci
727462306a36Sopenharmony_ci	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
727562306a36Sopenharmony_ci	    is_nmi(vmx_get_intr_info(vcpu))) {
727662306a36Sopenharmony_ci		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
727762306a36Sopenharmony_ci		vmx_do_nmi_irqoff();
727862306a36Sopenharmony_ci		kvm_after_interrupt(vcpu);
727962306a36Sopenharmony_ci	}
728062306a36Sopenharmony_ci
728162306a36Sopenharmony_ciout:
728262306a36Sopenharmony_ci	guest_state_exit_irqoff();
728362306a36Sopenharmony_ci}
728462306a36Sopenharmony_ci
728562306a36Sopenharmony_cistatic fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
728662306a36Sopenharmony_ci{
728762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
728862306a36Sopenharmony_ci	unsigned long cr3, cr4;
728962306a36Sopenharmony_ci
729062306a36Sopenharmony_ci	/* Record the guest's net vcpu time for enforced NMI injections. */
729162306a36Sopenharmony_ci	if (unlikely(!enable_vnmi &&
729262306a36Sopenharmony_ci		     vmx->loaded_vmcs->soft_vnmi_blocked))
729362306a36Sopenharmony_ci		vmx->loaded_vmcs->entry_time = ktime_get();
729462306a36Sopenharmony_ci
729562306a36Sopenharmony_ci	/*
729662306a36Sopenharmony_ci	 * Don't enter VMX if guest state is invalid, let the exit handler
729762306a36Sopenharmony_ci	 * start emulation until we arrive back to a valid state.  Synthesize a
729862306a36Sopenharmony_ci	 * consistency check VM-Exit due to invalid guest state and bail.
729962306a36Sopenharmony_ci	 */
730062306a36Sopenharmony_ci	if (unlikely(vmx->emulation_required)) {
730162306a36Sopenharmony_ci		vmx->fail = 0;
730262306a36Sopenharmony_ci
730362306a36Sopenharmony_ci		vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
730462306a36Sopenharmony_ci		vmx->exit_reason.failed_vmentry = 1;
730562306a36Sopenharmony_ci		kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
730662306a36Sopenharmony_ci		vmx->exit_qualification = ENTRY_FAIL_DEFAULT;
730762306a36Sopenharmony_ci		kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
730862306a36Sopenharmony_ci		vmx->exit_intr_info = 0;
730962306a36Sopenharmony_ci		return EXIT_FASTPATH_NONE;
731062306a36Sopenharmony_ci	}
731162306a36Sopenharmony_ci
731262306a36Sopenharmony_ci	trace_kvm_entry(vcpu);
731362306a36Sopenharmony_ci
731462306a36Sopenharmony_ci	if (vmx->ple_window_dirty) {
731562306a36Sopenharmony_ci		vmx->ple_window_dirty = false;
731662306a36Sopenharmony_ci		vmcs_write32(PLE_WINDOW, vmx->ple_window);
731762306a36Sopenharmony_ci	}
731862306a36Sopenharmony_ci
731962306a36Sopenharmony_ci	/*
732062306a36Sopenharmony_ci	 * We did this in prepare_switch_to_guest, because it needs to
732162306a36Sopenharmony_ci	 * be within srcu_read_lock.
732262306a36Sopenharmony_ci	 */
732362306a36Sopenharmony_ci	WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
732462306a36Sopenharmony_ci
732562306a36Sopenharmony_ci	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
732662306a36Sopenharmony_ci		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
732762306a36Sopenharmony_ci	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
732862306a36Sopenharmony_ci		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
732962306a36Sopenharmony_ci	vcpu->arch.regs_dirty = 0;
733062306a36Sopenharmony_ci
733162306a36Sopenharmony_ci	/*
733262306a36Sopenharmony_ci	 * Refresh vmcs.HOST_CR3 if necessary.  This must be done immediately
733362306a36Sopenharmony_ci	 * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
733462306a36Sopenharmony_ci	 * it switches back to the current->mm, which can occur in KVM context
733562306a36Sopenharmony_ci	 * when switching to a temporary mm to patch kernel code, e.g. if KVM
733662306a36Sopenharmony_ci	 * toggles a static key while handling a VM-Exit.
733762306a36Sopenharmony_ci	 */
733862306a36Sopenharmony_ci	cr3 = __get_current_cr3_fast();
733962306a36Sopenharmony_ci	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
734062306a36Sopenharmony_ci		vmcs_writel(HOST_CR3, cr3);
734162306a36Sopenharmony_ci		vmx->loaded_vmcs->host_state.cr3 = cr3;
734262306a36Sopenharmony_ci	}
734362306a36Sopenharmony_ci
734462306a36Sopenharmony_ci	cr4 = cr4_read_shadow();
734562306a36Sopenharmony_ci	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
734662306a36Sopenharmony_ci		vmcs_writel(HOST_CR4, cr4);
734762306a36Sopenharmony_ci		vmx->loaded_vmcs->host_state.cr4 = cr4;
734862306a36Sopenharmony_ci	}
734962306a36Sopenharmony_ci
735062306a36Sopenharmony_ci	/* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
735162306a36Sopenharmony_ci	if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
735262306a36Sopenharmony_ci		set_debugreg(vcpu->arch.dr6, 6);
735362306a36Sopenharmony_ci
735462306a36Sopenharmony_ci	/* When single-stepping over STI and MOV SS, we must clear the
735562306a36Sopenharmony_ci	 * corresponding interruptibility bits in the guest state. Otherwise
735662306a36Sopenharmony_ci	 * vmentry fails as it then expects bit 14 (BS) in pending debug
735762306a36Sopenharmony_ci	 * exceptions being set, but that's not correct for the guest debugging
735862306a36Sopenharmony_ci	 * case. */
735962306a36Sopenharmony_ci	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
736062306a36Sopenharmony_ci		vmx_set_interrupt_shadow(vcpu, 0);
736162306a36Sopenharmony_ci
736262306a36Sopenharmony_ci	kvm_load_guest_xsave_state(vcpu);
736362306a36Sopenharmony_ci
736462306a36Sopenharmony_ci	pt_guest_enter(vmx);
736562306a36Sopenharmony_ci
736662306a36Sopenharmony_ci	atomic_switch_perf_msrs(vmx);
736762306a36Sopenharmony_ci	if (intel_pmu_lbr_is_enabled(vcpu))
736862306a36Sopenharmony_ci		vmx_passthrough_lbr_msrs(vcpu);
736962306a36Sopenharmony_ci
737062306a36Sopenharmony_ci	if (enable_preemption_timer)
737162306a36Sopenharmony_ci		vmx_update_hv_timer(vcpu);
737262306a36Sopenharmony_ci
737362306a36Sopenharmony_ci	kvm_wait_lapic_expire(vcpu);
737462306a36Sopenharmony_ci
737562306a36Sopenharmony_ci	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
737662306a36Sopenharmony_ci	vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx));
737762306a36Sopenharmony_ci
737862306a36Sopenharmony_ci	/* All fields are clean at this point */
737962306a36Sopenharmony_ci	if (kvm_is_using_evmcs()) {
738062306a36Sopenharmony_ci		current_evmcs->hv_clean_fields |=
738162306a36Sopenharmony_ci			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
738262306a36Sopenharmony_ci
738362306a36Sopenharmony_ci		current_evmcs->hv_vp_id = kvm_hv_get_vpindex(vcpu);
738462306a36Sopenharmony_ci	}
738562306a36Sopenharmony_ci
738662306a36Sopenharmony_ci	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
738762306a36Sopenharmony_ci	if (vmx->host_debugctlmsr)
738862306a36Sopenharmony_ci		update_debugctlmsr(vmx->host_debugctlmsr);
738962306a36Sopenharmony_ci
739062306a36Sopenharmony_ci#ifndef CONFIG_X86_64
739162306a36Sopenharmony_ci	/*
739262306a36Sopenharmony_ci	 * The sysexit path does not restore ds/es, so we must set them to
739362306a36Sopenharmony_ci	 * a reasonable value ourselves.
739462306a36Sopenharmony_ci	 *
739562306a36Sopenharmony_ci	 * We can't defer this to vmx_prepare_switch_to_host() since that
739662306a36Sopenharmony_ci	 * function may be executed in interrupt context, which saves and
739762306a36Sopenharmony_ci	 * restore segments around it, nullifying its effect.
739862306a36Sopenharmony_ci	 */
739962306a36Sopenharmony_ci	loadsegment(ds, __USER_DS);
740062306a36Sopenharmony_ci	loadsegment(es, __USER_DS);
740162306a36Sopenharmony_ci#endif
740262306a36Sopenharmony_ci
740362306a36Sopenharmony_ci	pt_guest_exit(vmx);
740462306a36Sopenharmony_ci
740562306a36Sopenharmony_ci	kvm_load_host_xsave_state(vcpu);
740662306a36Sopenharmony_ci
740762306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
740862306a36Sopenharmony_ci		/*
740962306a36Sopenharmony_ci		 * Track VMLAUNCH/VMRESUME that have made past guest state
741062306a36Sopenharmony_ci		 * checking.
741162306a36Sopenharmony_ci		 */
741262306a36Sopenharmony_ci		if (vmx->nested.nested_run_pending &&
741362306a36Sopenharmony_ci		    !vmx->exit_reason.failed_vmentry)
741462306a36Sopenharmony_ci			++vcpu->stat.nested_run;
741562306a36Sopenharmony_ci
741662306a36Sopenharmony_ci		vmx->nested.nested_run_pending = 0;
741762306a36Sopenharmony_ci	}
741862306a36Sopenharmony_ci
741962306a36Sopenharmony_ci	if (unlikely(vmx->fail))
742062306a36Sopenharmony_ci		return EXIT_FASTPATH_NONE;
742162306a36Sopenharmony_ci
742262306a36Sopenharmony_ci	if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
742362306a36Sopenharmony_ci		kvm_machine_check();
742462306a36Sopenharmony_ci
742562306a36Sopenharmony_ci	trace_kvm_exit(vcpu, KVM_ISA_VMX);
742662306a36Sopenharmony_ci
742762306a36Sopenharmony_ci	if (unlikely(vmx->exit_reason.failed_vmentry))
742862306a36Sopenharmony_ci		return EXIT_FASTPATH_NONE;
742962306a36Sopenharmony_ci
743062306a36Sopenharmony_ci	vmx->loaded_vmcs->launched = 1;
743162306a36Sopenharmony_ci
743262306a36Sopenharmony_ci	vmx_recover_nmi_blocking(vmx);
743362306a36Sopenharmony_ci	vmx_complete_interrupts(vmx);
743462306a36Sopenharmony_ci
743562306a36Sopenharmony_ci	if (is_guest_mode(vcpu))
743662306a36Sopenharmony_ci		return EXIT_FASTPATH_NONE;
743762306a36Sopenharmony_ci
743862306a36Sopenharmony_ci	return vmx_exit_handlers_fastpath(vcpu);
743962306a36Sopenharmony_ci}
744062306a36Sopenharmony_ci
744162306a36Sopenharmony_cistatic void vmx_vcpu_free(struct kvm_vcpu *vcpu)
744262306a36Sopenharmony_ci{
744362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
744462306a36Sopenharmony_ci
744562306a36Sopenharmony_ci	if (enable_pml)
744662306a36Sopenharmony_ci		vmx_destroy_pml_buffer(vmx);
744762306a36Sopenharmony_ci	free_vpid(vmx->vpid);
744862306a36Sopenharmony_ci	nested_vmx_free_vcpu(vcpu);
744962306a36Sopenharmony_ci	free_loaded_vmcs(vmx->loaded_vmcs);
745062306a36Sopenharmony_ci}
745162306a36Sopenharmony_ci
745262306a36Sopenharmony_cistatic int vmx_vcpu_create(struct kvm_vcpu *vcpu)
745362306a36Sopenharmony_ci{
745462306a36Sopenharmony_ci	struct vmx_uret_msr *tsx_ctrl;
745562306a36Sopenharmony_ci	struct vcpu_vmx *vmx;
745662306a36Sopenharmony_ci	int i, err;
745762306a36Sopenharmony_ci
745862306a36Sopenharmony_ci	BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
745962306a36Sopenharmony_ci	vmx = to_vmx(vcpu);
746062306a36Sopenharmony_ci
746162306a36Sopenharmony_ci	INIT_LIST_HEAD(&vmx->pi_wakeup_list);
746262306a36Sopenharmony_ci
746362306a36Sopenharmony_ci	err = -ENOMEM;
746462306a36Sopenharmony_ci
746562306a36Sopenharmony_ci	vmx->vpid = allocate_vpid();
746662306a36Sopenharmony_ci
746762306a36Sopenharmony_ci	/*
746862306a36Sopenharmony_ci	 * If PML is turned on, failure on enabling PML just results in failure
746962306a36Sopenharmony_ci	 * of creating the vcpu, therefore we can simplify PML logic (by
747062306a36Sopenharmony_ci	 * avoiding dealing with cases, such as enabling PML partially on vcpus
747162306a36Sopenharmony_ci	 * for the guest), etc.
747262306a36Sopenharmony_ci	 */
747362306a36Sopenharmony_ci	if (enable_pml) {
747462306a36Sopenharmony_ci		vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
747562306a36Sopenharmony_ci		if (!vmx->pml_pg)
747662306a36Sopenharmony_ci			goto free_vpid;
747762306a36Sopenharmony_ci	}
747862306a36Sopenharmony_ci
747962306a36Sopenharmony_ci	for (i = 0; i < kvm_nr_uret_msrs; ++i)
748062306a36Sopenharmony_ci		vmx->guest_uret_msrs[i].mask = -1ull;
748162306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_RTM)) {
748262306a36Sopenharmony_ci		/*
748362306a36Sopenharmony_ci		 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
748462306a36Sopenharmony_ci		 * Keep the host value unchanged to avoid changing CPUID bits
748562306a36Sopenharmony_ci		 * under the host kernel's feet.
748662306a36Sopenharmony_ci		 */
748762306a36Sopenharmony_ci		tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
748862306a36Sopenharmony_ci		if (tsx_ctrl)
748962306a36Sopenharmony_ci			tsx_ctrl->mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
749062306a36Sopenharmony_ci	}
749162306a36Sopenharmony_ci
749262306a36Sopenharmony_ci	err = alloc_loaded_vmcs(&vmx->vmcs01);
749362306a36Sopenharmony_ci	if (err < 0)
749462306a36Sopenharmony_ci		goto free_pml;
749562306a36Sopenharmony_ci
749662306a36Sopenharmony_ci	/*
749762306a36Sopenharmony_ci	 * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a
749862306a36Sopenharmony_ci	 * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the
749962306a36Sopenharmony_ci	 * feature only for vmcs01, KVM currently isn't equipped to realize any
750062306a36Sopenharmony_ci	 * performance benefits from enabling it for vmcs02.
750162306a36Sopenharmony_ci	 */
750262306a36Sopenharmony_ci	if (kvm_is_using_evmcs() &&
750362306a36Sopenharmony_ci	    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
750462306a36Sopenharmony_ci		struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
750562306a36Sopenharmony_ci
750662306a36Sopenharmony_ci		evmcs->hv_enlightenments_control.msr_bitmap = 1;
750762306a36Sopenharmony_ci	}
750862306a36Sopenharmony_ci
750962306a36Sopenharmony_ci	/* The MSR bitmap starts with all ones */
751062306a36Sopenharmony_ci	bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
751162306a36Sopenharmony_ci	bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
751262306a36Sopenharmony_ci
751362306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
751462306a36Sopenharmony_ci#ifdef CONFIG_X86_64
751562306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
751662306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
751762306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
751862306a36Sopenharmony_ci#endif
751962306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
752062306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
752162306a36Sopenharmony_ci	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
752262306a36Sopenharmony_ci	if (kvm_cstate_in_guest(vcpu->kvm)) {
752362306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
752462306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
752562306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
752662306a36Sopenharmony_ci		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
752762306a36Sopenharmony_ci	}
752862306a36Sopenharmony_ci
752962306a36Sopenharmony_ci	vmx->loaded_vmcs = &vmx->vmcs01;
753062306a36Sopenharmony_ci
753162306a36Sopenharmony_ci	if (cpu_need_virtualize_apic_accesses(vcpu)) {
753262306a36Sopenharmony_ci		err = kvm_alloc_apic_access_page(vcpu->kvm);
753362306a36Sopenharmony_ci		if (err)
753462306a36Sopenharmony_ci			goto free_vmcs;
753562306a36Sopenharmony_ci	}
753662306a36Sopenharmony_ci
753762306a36Sopenharmony_ci	if (enable_ept && !enable_unrestricted_guest) {
753862306a36Sopenharmony_ci		err = init_rmode_identity_map(vcpu->kvm);
753962306a36Sopenharmony_ci		if (err)
754062306a36Sopenharmony_ci			goto free_vmcs;
754162306a36Sopenharmony_ci	}
754262306a36Sopenharmony_ci
754362306a36Sopenharmony_ci	if (vmx_can_use_ipiv(vcpu))
754462306a36Sopenharmony_ci		WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id],
754562306a36Sopenharmony_ci			   __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
754662306a36Sopenharmony_ci
754762306a36Sopenharmony_ci	return 0;
754862306a36Sopenharmony_ci
754962306a36Sopenharmony_cifree_vmcs:
755062306a36Sopenharmony_ci	free_loaded_vmcs(vmx->loaded_vmcs);
755162306a36Sopenharmony_cifree_pml:
755262306a36Sopenharmony_ci	vmx_destroy_pml_buffer(vmx);
755362306a36Sopenharmony_cifree_vpid:
755462306a36Sopenharmony_ci	free_vpid(vmx->vpid);
755562306a36Sopenharmony_ci	return err;
755662306a36Sopenharmony_ci}
755762306a36Sopenharmony_ci
755862306a36Sopenharmony_ci#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
755962306a36Sopenharmony_ci#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
756062306a36Sopenharmony_ci
756162306a36Sopenharmony_cistatic int vmx_vm_init(struct kvm *kvm)
756262306a36Sopenharmony_ci{
756362306a36Sopenharmony_ci	if (!ple_gap)
756462306a36Sopenharmony_ci		kvm->arch.pause_in_guest = true;
756562306a36Sopenharmony_ci
756662306a36Sopenharmony_ci	if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
756762306a36Sopenharmony_ci		switch (l1tf_mitigation) {
756862306a36Sopenharmony_ci		case L1TF_MITIGATION_OFF:
756962306a36Sopenharmony_ci		case L1TF_MITIGATION_FLUSH_NOWARN:
757062306a36Sopenharmony_ci			/* 'I explicitly don't care' is set */
757162306a36Sopenharmony_ci			break;
757262306a36Sopenharmony_ci		case L1TF_MITIGATION_FLUSH:
757362306a36Sopenharmony_ci		case L1TF_MITIGATION_FLUSH_NOSMT:
757462306a36Sopenharmony_ci		case L1TF_MITIGATION_FULL:
757562306a36Sopenharmony_ci			/*
757662306a36Sopenharmony_ci			 * Warn upon starting the first VM in a potentially
757762306a36Sopenharmony_ci			 * insecure environment.
757862306a36Sopenharmony_ci			 */
757962306a36Sopenharmony_ci			if (sched_smt_active())
758062306a36Sopenharmony_ci				pr_warn_once(L1TF_MSG_SMT);
758162306a36Sopenharmony_ci			if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
758262306a36Sopenharmony_ci				pr_warn_once(L1TF_MSG_L1D);
758362306a36Sopenharmony_ci			break;
758462306a36Sopenharmony_ci		case L1TF_MITIGATION_FULL_FORCE:
758562306a36Sopenharmony_ci			/* Flush is enforced */
758662306a36Sopenharmony_ci			break;
758762306a36Sopenharmony_ci		}
758862306a36Sopenharmony_ci	}
758962306a36Sopenharmony_ci	return 0;
759062306a36Sopenharmony_ci}
759162306a36Sopenharmony_ci
759262306a36Sopenharmony_cistatic u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
759362306a36Sopenharmony_ci{
759462306a36Sopenharmony_ci	u8 cache;
759562306a36Sopenharmony_ci
759662306a36Sopenharmony_ci	/* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
759762306a36Sopenharmony_ci	 * memory aliases with conflicting memory types and sometimes MCEs.
759862306a36Sopenharmony_ci	 * We have to be careful as to what are honored and when.
759962306a36Sopenharmony_ci	 *
760062306a36Sopenharmony_ci	 * For MMIO, guest CD/MTRR are ignored.  The EPT memory type is set to
760162306a36Sopenharmony_ci	 * UC.  The effective memory type is UC or WC depending on guest PAT.
760262306a36Sopenharmony_ci	 * This was historically the source of MCEs and we want to be
760362306a36Sopenharmony_ci	 * conservative.
760462306a36Sopenharmony_ci	 *
760562306a36Sopenharmony_ci	 * When there is no need to deal with noncoherent DMA (e.g., no VT-d
760662306a36Sopenharmony_ci	 * or VT-d has snoop control), guest CD/MTRR/PAT are all ignored.  The
760762306a36Sopenharmony_ci	 * EPT memory type is set to WB.  The effective memory type is forced
760862306a36Sopenharmony_ci	 * WB.
760962306a36Sopenharmony_ci	 *
761062306a36Sopenharmony_ci	 * Otherwise, we trust guest.  Guest CD/MTRR/PAT are all honored.  The
761162306a36Sopenharmony_ci	 * EPT memory type is used to emulate guest CD/MTRR.
761262306a36Sopenharmony_ci	 */
761362306a36Sopenharmony_ci
761462306a36Sopenharmony_ci	if (is_mmio)
761562306a36Sopenharmony_ci		return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
761662306a36Sopenharmony_ci
761762306a36Sopenharmony_ci	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
761862306a36Sopenharmony_ci		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
761962306a36Sopenharmony_ci
762062306a36Sopenharmony_ci	if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
762162306a36Sopenharmony_ci		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
762262306a36Sopenharmony_ci			cache = MTRR_TYPE_WRBACK;
762362306a36Sopenharmony_ci		else
762462306a36Sopenharmony_ci			cache = MTRR_TYPE_UNCACHABLE;
762562306a36Sopenharmony_ci
762662306a36Sopenharmony_ci		return (cache << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
762762306a36Sopenharmony_ci	}
762862306a36Sopenharmony_ci
762962306a36Sopenharmony_ci	return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT;
763062306a36Sopenharmony_ci}
763162306a36Sopenharmony_ci
763262306a36Sopenharmony_cistatic void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
763362306a36Sopenharmony_ci{
763462306a36Sopenharmony_ci	/*
763562306a36Sopenharmony_ci	 * These bits in the secondary execution controls field
763662306a36Sopenharmony_ci	 * are dynamic, the others are mostly based on the hypervisor
763762306a36Sopenharmony_ci	 * architecture and the guest's CPUID.  Do not touch the
763862306a36Sopenharmony_ci	 * dynamic bits.
763962306a36Sopenharmony_ci	 */
764062306a36Sopenharmony_ci	u32 mask =
764162306a36Sopenharmony_ci		SECONDARY_EXEC_SHADOW_VMCS |
764262306a36Sopenharmony_ci		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
764362306a36Sopenharmony_ci		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
764462306a36Sopenharmony_ci		SECONDARY_EXEC_DESC;
764562306a36Sopenharmony_ci
764662306a36Sopenharmony_ci	u32 cur_ctl = secondary_exec_controls_get(vmx);
764762306a36Sopenharmony_ci
764862306a36Sopenharmony_ci	secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
764962306a36Sopenharmony_ci}
765062306a36Sopenharmony_ci
765162306a36Sopenharmony_ci/*
765262306a36Sopenharmony_ci * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
765362306a36Sopenharmony_ci * (indicating "allowed-1") if they are supported in the guest's CPUID.
765462306a36Sopenharmony_ci */
765562306a36Sopenharmony_cistatic void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
765662306a36Sopenharmony_ci{
765762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
765862306a36Sopenharmony_ci	struct kvm_cpuid_entry2 *entry;
765962306a36Sopenharmony_ci
766062306a36Sopenharmony_ci	vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
766162306a36Sopenharmony_ci	vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
766262306a36Sopenharmony_ci
766362306a36Sopenharmony_ci#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do {		\
766462306a36Sopenharmony_ci	if (entry && (entry->_reg & (_cpuid_mask)))			\
766562306a36Sopenharmony_ci		vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask);	\
766662306a36Sopenharmony_ci} while (0)
766762306a36Sopenharmony_ci
766862306a36Sopenharmony_ci	entry = kvm_find_cpuid_entry(vcpu, 0x1);
766962306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_VME,        edx, feature_bit(VME));
767062306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_PVI,        edx, feature_bit(VME));
767162306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_TSD,        edx, feature_bit(TSC));
767262306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_DE,         edx, feature_bit(DE));
767362306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_PSE,        edx, feature_bit(PSE));
767462306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_PAE,        edx, feature_bit(PAE));
767562306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_MCE,        edx, feature_bit(MCE));
767662306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_PGE,        edx, feature_bit(PGE));
767762306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_OSFXSR,     edx, feature_bit(FXSR));
767862306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM));
767962306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_VMXE,       ecx, feature_bit(VMX));
768062306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_SMXE,       ecx, feature_bit(SMX));
768162306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_PCIDE,      ecx, feature_bit(PCID));
768262306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_OSXSAVE,    ecx, feature_bit(XSAVE));
768362306a36Sopenharmony_ci
768462306a36Sopenharmony_ci	entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 0);
768562306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_FSGSBASE,   ebx, feature_bit(FSGSBASE));
768662306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_SMEP,       ebx, feature_bit(SMEP));
768762306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_SMAP,       ebx, feature_bit(SMAP));
768862306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_PKE,        ecx, feature_bit(PKU));
768962306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_UMIP,       ecx, feature_bit(UMIP));
769062306a36Sopenharmony_ci	cr4_fixed1_update(X86_CR4_LA57,       ecx, feature_bit(LA57));
769162306a36Sopenharmony_ci
769262306a36Sopenharmony_ci#undef cr4_fixed1_update
769362306a36Sopenharmony_ci}
769462306a36Sopenharmony_ci
769562306a36Sopenharmony_cistatic void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
769662306a36Sopenharmony_ci{
769762306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
769862306a36Sopenharmony_ci	struct kvm_cpuid_entry2 *best = NULL;
769962306a36Sopenharmony_ci	int i;
770062306a36Sopenharmony_ci
770162306a36Sopenharmony_ci	for (i = 0; i < PT_CPUID_LEAVES; i++) {
770262306a36Sopenharmony_ci		best = kvm_find_cpuid_entry_index(vcpu, 0x14, i);
770362306a36Sopenharmony_ci		if (!best)
770462306a36Sopenharmony_ci			return;
770562306a36Sopenharmony_ci		vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
770662306a36Sopenharmony_ci		vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
770762306a36Sopenharmony_ci		vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
770862306a36Sopenharmony_ci		vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
770962306a36Sopenharmony_ci	}
771062306a36Sopenharmony_ci
771162306a36Sopenharmony_ci	/* Get the number of configurable Address Ranges for filtering */
771262306a36Sopenharmony_ci	vmx->pt_desc.num_address_ranges = intel_pt_validate_cap(vmx->pt_desc.caps,
771362306a36Sopenharmony_ci						PT_CAP_num_address_ranges);
771462306a36Sopenharmony_ci
771562306a36Sopenharmony_ci	/* Initialize and clear the no dependency bits */
771662306a36Sopenharmony_ci	vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
771762306a36Sopenharmony_ci			RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC |
771862306a36Sopenharmony_ci			RTIT_CTL_BRANCH_EN);
771962306a36Sopenharmony_ci
772062306a36Sopenharmony_ci	/*
772162306a36Sopenharmony_ci	 * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise
772262306a36Sopenharmony_ci	 * will inject an #GP
772362306a36Sopenharmony_ci	 */
772462306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
772562306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
772662306a36Sopenharmony_ci
772762306a36Sopenharmony_ci	/*
772862306a36Sopenharmony_ci	 * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and
772962306a36Sopenharmony_ci	 * PSBFreq can be set
773062306a36Sopenharmony_ci	 */
773162306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
773262306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
773362306a36Sopenharmony_ci				RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
773462306a36Sopenharmony_ci
773562306a36Sopenharmony_ci	/*
773662306a36Sopenharmony_ci	 * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn and MTCFreq can be set
773762306a36Sopenharmony_ci	 */
773862306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
773962306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
774062306a36Sopenharmony_ci					      RTIT_CTL_MTC_RANGE);
774162306a36Sopenharmony_ci
774262306a36Sopenharmony_ci	/* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */
774362306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
774462306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
774562306a36Sopenharmony_ci							RTIT_CTL_PTW_EN);
774662306a36Sopenharmony_ci
774762306a36Sopenharmony_ci	/* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */
774862306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
774962306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
775062306a36Sopenharmony_ci
775162306a36Sopenharmony_ci	/* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */
775262306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
775362306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
775462306a36Sopenharmony_ci
775562306a36Sopenharmony_ci	/* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabricEn can be set */
775662306a36Sopenharmony_ci	if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
775762306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
775862306a36Sopenharmony_ci
775962306a36Sopenharmony_ci	/* unmask address range configure area */
776062306a36Sopenharmony_ci	for (i = 0; i < vmx->pt_desc.num_address_ranges; i++)
776162306a36Sopenharmony_ci		vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
776262306a36Sopenharmony_ci}
776362306a36Sopenharmony_ci
776462306a36Sopenharmony_cistatic void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
776562306a36Sopenharmony_ci{
776662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
776762306a36Sopenharmony_ci
776862306a36Sopenharmony_ci	/*
776962306a36Sopenharmony_ci	 * XSAVES is effectively enabled if and only if XSAVE is also exposed
777062306a36Sopenharmony_ci	 * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
777162306a36Sopenharmony_ci	 * set if and only if XSAVE is supported.
777262306a36Sopenharmony_ci	 */
777362306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_XSAVE) &&
777462306a36Sopenharmony_ci	    guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
777562306a36Sopenharmony_ci		kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
777662306a36Sopenharmony_ci
777762306a36Sopenharmony_ci	kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
777862306a36Sopenharmony_ci
777962306a36Sopenharmony_ci	vmx_setup_uret_msrs(vmx);
778062306a36Sopenharmony_ci
778162306a36Sopenharmony_ci	if (cpu_has_secondary_exec_ctrls())
778262306a36Sopenharmony_ci		vmcs_set_secondary_exec_control(vmx,
778362306a36Sopenharmony_ci						vmx_secondary_exec_control(vmx));
778462306a36Sopenharmony_ci
778562306a36Sopenharmony_ci	if (guest_can_use(vcpu, X86_FEATURE_VMX))
778662306a36Sopenharmony_ci		vmx->msr_ia32_feature_control_valid_bits |=
778762306a36Sopenharmony_ci			FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
778862306a36Sopenharmony_ci			FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
778962306a36Sopenharmony_ci	else
779062306a36Sopenharmony_ci		vmx->msr_ia32_feature_control_valid_bits &=
779162306a36Sopenharmony_ci			~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
779262306a36Sopenharmony_ci			  FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
779362306a36Sopenharmony_ci
779462306a36Sopenharmony_ci	if (guest_can_use(vcpu, X86_FEATURE_VMX))
779562306a36Sopenharmony_ci		nested_vmx_cr_fixed1_bits_update(vcpu);
779662306a36Sopenharmony_ci
779762306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
779862306a36Sopenharmony_ci			guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
779962306a36Sopenharmony_ci		update_intel_pt_cfg(vcpu);
780062306a36Sopenharmony_ci
780162306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_RTM)) {
780262306a36Sopenharmony_ci		struct vmx_uret_msr *msr;
780362306a36Sopenharmony_ci		msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
780462306a36Sopenharmony_ci		if (msr) {
780562306a36Sopenharmony_ci			bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
780662306a36Sopenharmony_ci			vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
780762306a36Sopenharmony_ci		}
780862306a36Sopenharmony_ci	}
780962306a36Sopenharmony_ci
781062306a36Sopenharmony_ci	if (kvm_cpu_cap_has(X86_FEATURE_XFD))
781162306a36Sopenharmony_ci		vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
781262306a36Sopenharmony_ci					  !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
781362306a36Sopenharmony_ci
781462306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_IBPB))
781562306a36Sopenharmony_ci		vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
781662306a36Sopenharmony_ci					  !guest_has_pred_cmd_msr(vcpu));
781762306a36Sopenharmony_ci
781862306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
781962306a36Sopenharmony_ci		vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
782062306a36Sopenharmony_ci					  !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
782162306a36Sopenharmony_ci
782262306a36Sopenharmony_ci	set_cr4_guest_host_mask(vmx);
782362306a36Sopenharmony_ci
782462306a36Sopenharmony_ci	vmx_write_encls_bitmap(vcpu, NULL);
782562306a36Sopenharmony_ci	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX))
782662306a36Sopenharmony_ci		vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED;
782762306a36Sopenharmony_ci	else
782862306a36Sopenharmony_ci		vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED;
782962306a36Sopenharmony_ci
783062306a36Sopenharmony_ci	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
783162306a36Sopenharmony_ci		vmx->msr_ia32_feature_control_valid_bits |=
783262306a36Sopenharmony_ci			FEAT_CTL_SGX_LC_ENABLED;
783362306a36Sopenharmony_ci	else
783462306a36Sopenharmony_ci		vmx->msr_ia32_feature_control_valid_bits &=
783562306a36Sopenharmony_ci			~FEAT_CTL_SGX_LC_ENABLED;
783662306a36Sopenharmony_ci
783762306a36Sopenharmony_ci	/* Refresh #PF interception to account for MAXPHYADDR changes. */
783862306a36Sopenharmony_ci	vmx_update_exception_bitmap(vcpu);
783962306a36Sopenharmony_ci}
784062306a36Sopenharmony_ci
784162306a36Sopenharmony_cistatic u64 vmx_get_perf_capabilities(void)
784262306a36Sopenharmony_ci{
784362306a36Sopenharmony_ci	u64 perf_cap = PMU_CAP_FW_WRITES;
784462306a36Sopenharmony_ci	struct x86_pmu_lbr lbr;
784562306a36Sopenharmony_ci	u64 host_perf_cap = 0;
784662306a36Sopenharmony_ci
784762306a36Sopenharmony_ci	if (!enable_pmu)
784862306a36Sopenharmony_ci		return 0;
784962306a36Sopenharmony_ci
785062306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_PDCM))
785162306a36Sopenharmony_ci		rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
785262306a36Sopenharmony_ci
785362306a36Sopenharmony_ci	if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
785462306a36Sopenharmony_ci		x86_perf_get_lbr(&lbr);
785562306a36Sopenharmony_ci		if (lbr.nr)
785662306a36Sopenharmony_ci			perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
785762306a36Sopenharmony_ci	}
785862306a36Sopenharmony_ci
785962306a36Sopenharmony_ci	if (vmx_pebs_supported()) {
786062306a36Sopenharmony_ci		perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
786162306a36Sopenharmony_ci		if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
786262306a36Sopenharmony_ci			perf_cap &= ~PERF_CAP_PEBS_BASELINE;
786362306a36Sopenharmony_ci	}
786462306a36Sopenharmony_ci
786562306a36Sopenharmony_ci	return perf_cap;
786662306a36Sopenharmony_ci}
786762306a36Sopenharmony_ci
786862306a36Sopenharmony_cistatic __init void vmx_set_cpu_caps(void)
786962306a36Sopenharmony_ci{
787062306a36Sopenharmony_ci	kvm_set_cpu_caps();
787162306a36Sopenharmony_ci
787262306a36Sopenharmony_ci	/* CPUID 0x1 */
787362306a36Sopenharmony_ci	if (nested)
787462306a36Sopenharmony_ci		kvm_cpu_cap_set(X86_FEATURE_VMX);
787562306a36Sopenharmony_ci
787662306a36Sopenharmony_ci	/* CPUID 0x7 */
787762306a36Sopenharmony_ci	if (kvm_mpx_supported())
787862306a36Sopenharmony_ci		kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
787962306a36Sopenharmony_ci	if (!cpu_has_vmx_invpcid())
788062306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
788162306a36Sopenharmony_ci	if (vmx_pt_mode_is_host_guest())
788262306a36Sopenharmony_ci		kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
788362306a36Sopenharmony_ci	if (vmx_pebs_supported()) {
788462306a36Sopenharmony_ci		kvm_cpu_cap_check_and_set(X86_FEATURE_DS);
788562306a36Sopenharmony_ci		kvm_cpu_cap_check_and_set(X86_FEATURE_DTES64);
788662306a36Sopenharmony_ci	}
788762306a36Sopenharmony_ci
788862306a36Sopenharmony_ci	if (!enable_pmu)
788962306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_PDCM);
789062306a36Sopenharmony_ci	kvm_caps.supported_perf_cap = vmx_get_perf_capabilities();
789162306a36Sopenharmony_ci
789262306a36Sopenharmony_ci	if (!enable_sgx) {
789362306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_SGX);
789462306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_SGX_LC);
789562306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_SGX1);
789662306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_SGX2);
789762306a36Sopenharmony_ci	}
789862306a36Sopenharmony_ci
789962306a36Sopenharmony_ci	if (vmx_umip_emulated())
790062306a36Sopenharmony_ci		kvm_cpu_cap_set(X86_FEATURE_UMIP);
790162306a36Sopenharmony_ci
790262306a36Sopenharmony_ci	/* CPUID 0xD.1 */
790362306a36Sopenharmony_ci	kvm_caps.supported_xss = 0;
790462306a36Sopenharmony_ci	if (!cpu_has_vmx_xsaves())
790562306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
790662306a36Sopenharmony_ci
790762306a36Sopenharmony_ci	/* CPUID 0x80000001 and 0x7 (RDPID) */
790862306a36Sopenharmony_ci	if (!cpu_has_vmx_rdtscp()) {
790962306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
791062306a36Sopenharmony_ci		kvm_cpu_cap_clear(X86_FEATURE_RDPID);
791162306a36Sopenharmony_ci	}
791262306a36Sopenharmony_ci
791362306a36Sopenharmony_ci	if (cpu_has_vmx_waitpkg())
791462306a36Sopenharmony_ci		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
791562306a36Sopenharmony_ci}
791662306a36Sopenharmony_ci
791762306a36Sopenharmony_cistatic void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
791862306a36Sopenharmony_ci{
791962306a36Sopenharmony_ci	to_vmx(vcpu)->req_immediate_exit = true;
792062306a36Sopenharmony_ci}
792162306a36Sopenharmony_ci
792262306a36Sopenharmony_cistatic int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
792362306a36Sopenharmony_ci				  struct x86_instruction_info *info)
792462306a36Sopenharmony_ci{
792562306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
792662306a36Sopenharmony_ci	unsigned short port;
792762306a36Sopenharmony_ci	bool intercept;
792862306a36Sopenharmony_ci	int size;
792962306a36Sopenharmony_ci
793062306a36Sopenharmony_ci	if (info->intercept == x86_intercept_in ||
793162306a36Sopenharmony_ci	    info->intercept == x86_intercept_ins) {
793262306a36Sopenharmony_ci		port = info->src_val;
793362306a36Sopenharmony_ci		size = info->dst_bytes;
793462306a36Sopenharmony_ci	} else {
793562306a36Sopenharmony_ci		port = info->dst_val;
793662306a36Sopenharmony_ci		size = info->src_bytes;
793762306a36Sopenharmony_ci	}
793862306a36Sopenharmony_ci
793962306a36Sopenharmony_ci	/*
794062306a36Sopenharmony_ci	 * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
794162306a36Sopenharmony_ci	 * VM-exits depend on the 'unconditional IO exiting' VM-execution
794262306a36Sopenharmony_ci	 * control.
794362306a36Sopenharmony_ci	 *
794462306a36Sopenharmony_ci	 * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
794562306a36Sopenharmony_ci	 */
794662306a36Sopenharmony_ci	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
794762306a36Sopenharmony_ci		intercept = nested_cpu_has(vmcs12,
794862306a36Sopenharmony_ci					   CPU_BASED_UNCOND_IO_EXITING);
794962306a36Sopenharmony_ci	else
795062306a36Sopenharmony_ci		intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
795162306a36Sopenharmony_ci
795262306a36Sopenharmony_ci	/* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED.  */
795362306a36Sopenharmony_ci	return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
795462306a36Sopenharmony_ci}
795562306a36Sopenharmony_ci
795662306a36Sopenharmony_cistatic int vmx_check_intercept(struct kvm_vcpu *vcpu,
795762306a36Sopenharmony_ci			       struct x86_instruction_info *info,
795862306a36Sopenharmony_ci			       enum x86_intercept_stage stage,
795962306a36Sopenharmony_ci			       struct x86_exception *exception)
796062306a36Sopenharmony_ci{
796162306a36Sopenharmony_ci	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
796262306a36Sopenharmony_ci
796362306a36Sopenharmony_ci	switch (info->intercept) {
796462306a36Sopenharmony_ci	/*
796562306a36Sopenharmony_ci	 * RDPID causes #UD if disabled through secondary execution controls.
796662306a36Sopenharmony_ci	 * Because it is marked as EmulateOnUD, we need to intercept it here.
796762306a36Sopenharmony_ci	 * Note, RDPID is hidden behind ENABLE_RDTSCP.
796862306a36Sopenharmony_ci	 */
796962306a36Sopenharmony_ci	case x86_intercept_rdpid:
797062306a36Sopenharmony_ci		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
797162306a36Sopenharmony_ci			exception->vector = UD_VECTOR;
797262306a36Sopenharmony_ci			exception->error_code_valid = false;
797362306a36Sopenharmony_ci			return X86EMUL_PROPAGATE_FAULT;
797462306a36Sopenharmony_ci		}
797562306a36Sopenharmony_ci		break;
797662306a36Sopenharmony_ci
797762306a36Sopenharmony_ci	case x86_intercept_in:
797862306a36Sopenharmony_ci	case x86_intercept_ins:
797962306a36Sopenharmony_ci	case x86_intercept_out:
798062306a36Sopenharmony_ci	case x86_intercept_outs:
798162306a36Sopenharmony_ci		return vmx_check_intercept_io(vcpu, info);
798262306a36Sopenharmony_ci
798362306a36Sopenharmony_ci	case x86_intercept_lgdt:
798462306a36Sopenharmony_ci	case x86_intercept_lidt:
798562306a36Sopenharmony_ci	case x86_intercept_lldt:
798662306a36Sopenharmony_ci	case x86_intercept_ltr:
798762306a36Sopenharmony_ci	case x86_intercept_sgdt:
798862306a36Sopenharmony_ci	case x86_intercept_sidt:
798962306a36Sopenharmony_ci	case x86_intercept_sldt:
799062306a36Sopenharmony_ci	case x86_intercept_str:
799162306a36Sopenharmony_ci		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
799262306a36Sopenharmony_ci			return X86EMUL_CONTINUE;
799362306a36Sopenharmony_ci
799462306a36Sopenharmony_ci		/* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED.  */
799562306a36Sopenharmony_ci		break;
799662306a36Sopenharmony_ci
799762306a36Sopenharmony_ci	case x86_intercept_pause:
799862306a36Sopenharmony_ci		/*
799962306a36Sopenharmony_ci		 * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides
800062306a36Sopenharmony_ci		 * with vanilla NOPs in the emulator.  Apply the interception
800162306a36Sopenharmony_ci		 * check only to actual PAUSE instructions.  Don't check
800262306a36Sopenharmony_ci		 * PAUSE-loop-exiting, software can't expect a given PAUSE to
800362306a36Sopenharmony_ci		 * exit, i.e. KVM is within its rights to allow L2 to execute
800462306a36Sopenharmony_ci		 * the PAUSE.
800562306a36Sopenharmony_ci		 */
800662306a36Sopenharmony_ci		if ((info->rep_prefix != REPE_PREFIX) ||
800762306a36Sopenharmony_ci		    !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING))
800862306a36Sopenharmony_ci			return X86EMUL_CONTINUE;
800962306a36Sopenharmony_ci
801062306a36Sopenharmony_ci		break;
801162306a36Sopenharmony_ci
801262306a36Sopenharmony_ci	/* TODO: check more intercepts... */
801362306a36Sopenharmony_ci	default:
801462306a36Sopenharmony_ci		break;
801562306a36Sopenharmony_ci	}
801662306a36Sopenharmony_ci
801762306a36Sopenharmony_ci	return X86EMUL_UNHANDLEABLE;
801862306a36Sopenharmony_ci}
801962306a36Sopenharmony_ci
802062306a36Sopenharmony_ci#ifdef CONFIG_X86_64
802162306a36Sopenharmony_ci/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
802262306a36Sopenharmony_cistatic inline int u64_shl_div_u64(u64 a, unsigned int shift,
802362306a36Sopenharmony_ci				  u64 divisor, u64 *result)
802462306a36Sopenharmony_ci{
802562306a36Sopenharmony_ci	u64 low = a << shift, high = a >> (64 - shift);
802662306a36Sopenharmony_ci
802762306a36Sopenharmony_ci	/* To avoid the overflow on divq */
802862306a36Sopenharmony_ci	if (high >= divisor)
802962306a36Sopenharmony_ci		return 1;
803062306a36Sopenharmony_ci
803162306a36Sopenharmony_ci	/* Low hold the result, high hold rem which is discarded */
803262306a36Sopenharmony_ci	asm("divq %2\n\t" : "=a" (low), "=d" (high) :
803362306a36Sopenharmony_ci	    "rm" (divisor), "0" (low), "1" (high));
803462306a36Sopenharmony_ci	*result = low;
803562306a36Sopenharmony_ci
803662306a36Sopenharmony_ci	return 0;
803762306a36Sopenharmony_ci}
803862306a36Sopenharmony_ci
803962306a36Sopenharmony_cistatic int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
804062306a36Sopenharmony_ci			    bool *expired)
804162306a36Sopenharmony_ci{
804262306a36Sopenharmony_ci	struct vcpu_vmx *vmx;
804362306a36Sopenharmony_ci	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
804462306a36Sopenharmony_ci	struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
804562306a36Sopenharmony_ci
804662306a36Sopenharmony_ci	vmx = to_vmx(vcpu);
804762306a36Sopenharmony_ci	tscl = rdtsc();
804862306a36Sopenharmony_ci	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
804962306a36Sopenharmony_ci	delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
805062306a36Sopenharmony_ci	lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
805162306a36Sopenharmony_ci						    ktimer->timer_advance_ns);
805262306a36Sopenharmony_ci
805362306a36Sopenharmony_ci	if (delta_tsc > lapic_timer_advance_cycles)
805462306a36Sopenharmony_ci		delta_tsc -= lapic_timer_advance_cycles;
805562306a36Sopenharmony_ci	else
805662306a36Sopenharmony_ci		delta_tsc = 0;
805762306a36Sopenharmony_ci
805862306a36Sopenharmony_ci	/* Convert to host delta tsc if tsc scaling is enabled */
805962306a36Sopenharmony_ci	if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio &&
806062306a36Sopenharmony_ci	    delta_tsc && u64_shl_div_u64(delta_tsc,
806162306a36Sopenharmony_ci				kvm_caps.tsc_scaling_ratio_frac_bits,
806262306a36Sopenharmony_ci				vcpu->arch.l1_tsc_scaling_ratio, &delta_tsc))
806362306a36Sopenharmony_ci		return -ERANGE;
806462306a36Sopenharmony_ci
806562306a36Sopenharmony_ci	/*
806662306a36Sopenharmony_ci	 * If the delta tsc can't fit in the 32 bit after the multi shift,
806762306a36Sopenharmony_ci	 * we can't use the preemption timer.
806862306a36Sopenharmony_ci	 * It's possible that it fits on later vmentries, but checking
806962306a36Sopenharmony_ci	 * on every vmentry is costly so we just use an hrtimer.
807062306a36Sopenharmony_ci	 */
807162306a36Sopenharmony_ci	if (delta_tsc >> (cpu_preemption_timer_multi + 32))
807262306a36Sopenharmony_ci		return -ERANGE;
807362306a36Sopenharmony_ci
807462306a36Sopenharmony_ci	vmx->hv_deadline_tsc = tscl + delta_tsc;
807562306a36Sopenharmony_ci	*expired = !delta_tsc;
807662306a36Sopenharmony_ci	return 0;
807762306a36Sopenharmony_ci}
807862306a36Sopenharmony_ci
807962306a36Sopenharmony_cistatic void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
808062306a36Sopenharmony_ci{
808162306a36Sopenharmony_ci	to_vmx(vcpu)->hv_deadline_tsc = -1;
808262306a36Sopenharmony_ci}
808362306a36Sopenharmony_ci#endif
808462306a36Sopenharmony_ci
808562306a36Sopenharmony_cistatic void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
808662306a36Sopenharmony_ci{
808762306a36Sopenharmony_ci	if (!kvm_pause_in_guest(vcpu->kvm))
808862306a36Sopenharmony_ci		shrink_ple_window(vcpu);
808962306a36Sopenharmony_ci}
809062306a36Sopenharmony_ci
809162306a36Sopenharmony_civoid vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
809262306a36Sopenharmony_ci{
809362306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
809462306a36Sopenharmony_ci
809562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!enable_pml))
809662306a36Sopenharmony_ci		return;
809762306a36Sopenharmony_ci
809862306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
809962306a36Sopenharmony_ci		vmx->nested.update_vmcs01_cpu_dirty_logging = true;
810062306a36Sopenharmony_ci		return;
810162306a36Sopenharmony_ci	}
810262306a36Sopenharmony_ci
810362306a36Sopenharmony_ci	/*
810462306a36Sopenharmony_ci	 * Note, nr_memslots_dirty_logging can be changed concurrent with this
810562306a36Sopenharmony_ci	 * code, but in that case another update request will be made and so
810662306a36Sopenharmony_ci	 * the guest will never run with a stale PML value.
810762306a36Sopenharmony_ci	 */
810862306a36Sopenharmony_ci	if (atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
810962306a36Sopenharmony_ci		secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
811062306a36Sopenharmony_ci	else
811162306a36Sopenharmony_ci		secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
811262306a36Sopenharmony_ci}
811362306a36Sopenharmony_ci
811462306a36Sopenharmony_cistatic void vmx_setup_mce(struct kvm_vcpu *vcpu)
811562306a36Sopenharmony_ci{
811662306a36Sopenharmony_ci	if (vcpu->arch.mcg_cap & MCG_LMCE_P)
811762306a36Sopenharmony_ci		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
811862306a36Sopenharmony_ci			FEAT_CTL_LMCE_ENABLED;
811962306a36Sopenharmony_ci	else
812062306a36Sopenharmony_ci		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
812162306a36Sopenharmony_ci			~FEAT_CTL_LMCE_ENABLED;
812262306a36Sopenharmony_ci}
812362306a36Sopenharmony_ci
812462306a36Sopenharmony_ci#ifdef CONFIG_KVM_SMM
812562306a36Sopenharmony_cistatic int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
812662306a36Sopenharmony_ci{
812762306a36Sopenharmony_ci	/* we need a nested vmexit to enter SMM, postpone if run is pending */
812862306a36Sopenharmony_ci	if (to_vmx(vcpu)->nested.nested_run_pending)
812962306a36Sopenharmony_ci		return -EBUSY;
813062306a36Sopenharmony_ci	return !is_smm(vcpu);
813162306a36Sopenharmony_ci}
813262306a36Sopenharmony_ci
813362306a36Sopenharmony_cistatic int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
813462306a36Sopenharmony_ci{
813562306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
813662306a36Sopenharmony_ci
813762306a36Sopenharmony_ci	/*
813862306a36Sopenharmony_ci	 * TODO: Implement custom flows for forcing the vCPU out/in of L2 on
813962306a36Sopenharmony_ci	 * SMI and RSM.  Using the common VM-Exit + VM-Enter routines is wrong
814062306a36Sopenharmony_ci	 * SMI and RSM only modify state that is saved and restored via SMRAM.
814162306a36Sopenharmony_ci	 * E.g. most MSRs are left untouched, but many are modified by VM-Exit
814262306a36Sopenharmony_ci	 * and VM-Enter, and thus L2's values may be corrupted on SMI+RSM.
814362306a36Sopenharmony_ci	 */
814462306a36Sopenharmony_ci	vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
814562306a36Sopenharmony_ci	if (vmx->nested.smm.guest_mode)
814662306a36Sopenharmony_ci		nested_vmx_vmexit(vcpu, -1, 0, 0);
814762306a36Sopenharmony_ci
814862306a36Sopenharmony_ci	vmx->nested.smm.vmxon = vmx->nested.vmxon;
814962306a36Sopenharmony_ci	vmx->nested.vmxon = false;
815062306a36Sopenharmony_ci	vmx_clear_hlt(vcpu);
815162306a36Sopenharmony_ci	return 0;
815262306a36Sopenharmony_ci}
815362306a36Sopenharmony_ci
815462306a36Sopenharmony_cistatic int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
815562306a36Sopenharmony_ci{
815662306a36Sopenharmony_ci	struct vcpu_vmx *vmx = to_vmx(vcpu);
815762306a36Sopenharmony_ci	int ret;
815862306a36Sopenharmony_ci
815962306a36Sopenharmony_ci	if (vmx->nested.smm.vmxon) {
816062306a36Sopenharmony_ci		vmx->nested.vmxon = true;
816162306a36Sopenharmony_ci		vmx->nested.smm.vmxon = false;
816262306a36Sopenharmony_ci	}
816362306a36Sopenharmony_ci
816462306a36Sopenharmony_ci	if (vmx->nested.smm.guest_mode) {
816562306a36Sopenharmony_ci		ret = nested_vmx_enter_non_root_mode(vcpu, false);
816662306a36Sopenharmony_ci		if (ret)
816762306a36Sopenharmony_ci			return ret;
816862306a36Sopenharmony_ci
816962306a36Sopenharmony_ci		vmx->nested.nested_run_pending = 1;
817062306a36Sopenharmony_ci		vmx->nested.smm.guest_mode = false;
817162306a36Sopenharmony_ci	}
817262306a36Sopenharmony_ci	return 0;
817362306a36Sopenharmony_ci}
817462306a36Sopenharmony_ci
817562306a36Sopenharmony_cistatic void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
817662306a36Sopenharmony_ci{
817762306a36Sopenharmony_ci	/* RSM will cause a vmexit anyway.  */
817862306a36Sopenharmony_ci}
817962306a36Sopenharmony_ci#endif
818062306a36Sopenharmony_ci
818162306a36Sopenharmony_cistatic bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
818262306a36Sopenharmony_ci{
818362306a36Sopenharmony_ci	return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu);
818462306a36Sopenharmony_ci}
818562306a36Sopenharmony_ci
818662306a36Sopenharmony_cistatic void vmx_migrate_timers(struct kvm_vcpu *vcpu)
818762306a36Sopenharmony_ci{
818862306a36Sopenharmony_ci	if (is_guest_mode(vcpu)) {
818962306a36Sopenharmony_ci		struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer;
819062306a36Sopenharmony_ci
819162306a36Sopenharmony_ci		if (hrtimer_try_to_cancel(timer) == 1)
819262306a36Sopenharmony_ci			hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
819362306a36Sopenharmony_ci	}
819462306a36Sopenharmony_ci}
819562306a36Sopenharmony_ci
819662306a36Sopenharmony_cistatic void vmx_hardware_unsetup(void)
819762306a36Sopenharmony_ci{
819862306a36Sopenharmony_ci	kvm_set_posted_intr_wakeup_handler(NULL);
819962306a36Sopenharmony_ci
820062306a36Sopenharmony_ci	if (nested)
820162306a36Sopenharmony_ci		nested_vmx_hardware_unsetup();
820262306a36Sopenharmony_ci
820362306a36Sopenharmony_ci	free_kvm_area();
820462306a36Sopenharmony_ci}
820562306a36Sopenharmony_ci
820662306a36Sopenharmony_ci#define VMX_REQUIRED_APICV_INHIBITS			\
820762306a36Sopenharmony_ci(							\
820862306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_DISABLE)|		\
820962306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_ABSENT) |		\
821062306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_HYPERV) |		\
821162306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |		\
821262306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) |	\
821362306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |	\
821462306a36Sopenharmony_ci	BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED)	\
821562306a36Sopenharmony_ci)
821662306a36Sopenharmony_ci
821762306a36Sopenharmony_cistatic void vmx_vm_destroy(struct kvm *kvm)
821862306a36Sopenharmony_ci{
821962306a36Sopenharmony_ci	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
822062306a36Sopenharmony_ci
822162306a36Sopenharmony_ci	free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
822262306a36Sopenharmony_ci}
822362306a36Sopenharmony_ci
822462306a36Sopenharmony_cistatic struct kvm_x86_ops vmx_x86_ops __initdata = {
822562306a36Sopenharmony_ci	.name = KBUILD_MODNAME,
822662306a36Sopenharmony_ci
822762306a36Sopenharmony_ci	.check_processor_compatibility = vmx_check_processor_compat,
822862306a36Sopenharmony_ci
822962306a36Sopenharmony_ci	.hardware_unsetup = vmx_hardware_unsetup,
823062306a36Sopenharmony_ci
823162306a36Sopenharmony_ci	.hardware_enable = vmx_hardware_enable,
823262306a36Sopenharmony_ci	.hardware_disable = vmx_hardware_disable,
823362306a36Sopenharmony_ci	.has_emulated_msr = vmx_has_emulated_msr,
823462306a36Sopenharmony_ci
823562306a36Sopenharmony_ci	.vm_size = sizeof(struct kvm_vmx),
823662306a36Sopenharmony_ci	.vm_init = vmx_vm_init,
823762306a36Sopenharmony_ci	.vm_destroy = vmx_vm_destroy,
823862306a36Sopenharmony_ci
823962306a36Sopenharmony_ci	.vcpu_precreate = vmx_vcpu_precreate,
824062306a36Sopenharmony_ci	.vcpu_create = vmx_vcpu_create,
824162306a36Sopenharmony_ci	.vcpu_free = vmx_vcpu_free,
824262306a36Sopenharmony_ci	.vcpu_reset = vmx_vcpu_reset,
824362306a36Sopenharmony_ci
824462306a36Sopenharmony_ci	.prepare_switch_to_guest = vmx_prepare_switch_to_guest,
824562306a36Sopenharmony_ci	.vcpu_load = vmx_vcpu_load,
824662306a36Sopenharmony_ci	.vcpu_put = vmx_vcpu_put,
824762306a36Sopenharmony_ci
824862306a36Sopenharmony_ci	.update_exception_bitmap = vmx_update_exception_bitmap,
824962306a36Sopenharmony_ci	.get_msr_feature = vmx_get_msr_feature,
825062306a36Sopenharmony_ci	.get_msr = vmx_get_msr,
825162306a36Sopenharmony_ci	.set_msr = vmx_set_msr,
825262306a36Sopenharmony_ci	.get_segment_base = vmx_get_segment_base,
825362306a36Sopenharmony_ci	.get_segment = vmx_get_segment,
825462306a36Sopenharmony_ci	.set_segment = vmx_set_segment,
825562306a36Sopenharmony_ci	.get_cpl = vmx_get_cpl,
825662306a36Sopenharmony_ci	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
825762306a36Sopenharmony_ci	.is_valid_cr0 = vmx_is_valid_cr0,
825862306a36Sopenharmony_ci	.set_cr0 = vmx_set_cr0,
825962306a36Sopenharmony_ci	.is_valid_cr4 = vmx_is_valid_cr4,
826062306a36Sopenharmony_ci	.set_cr4 = vmx_set_cr4,
826162306a36Sopenharmony_ci	.set_efer = vmx_set_efer,
826262306a36Sopenharmony_ci	.get_idt = vmx_get_idt,
826362306a36Sopenharmony_ci	.set_idt = vmx_set_idt,
826462306a36Sopenharmony_ci	.get_gdt = vmx_get_gdt,
826562306a36Sopenharmony_ci	.set_gdt = vmx_set_gdt,
826662306a36Sopenharmony_ci	.set_dr7 = vmx_set_dr7,
826762306a36Sopenharmony_ci	.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
826862306a36Sopenharmony_ci	.cache_reg = vmx_cache_reg,
826962306a36Sopenharmony_ci	.get_rflags = vmx_get_rflags,
827062306a36Sopenharmony_ci	.set_rflags = vmx_set_rflags,
827162306a36Sopenharmony_ci	.get_if_flag = vmx_get_if_flag,
827262306a36Sopenharmony_ci
827362306a36Sopenharmony_ci	.flush_tlb_all = vmx_flush_tlb_all,
827462306a36Sopenharmony_ci	.flush_tlb_current = vmx_flush_tlb_current,
827562306a36Sopenharmony_ci	.flush_tlb_gva = vmx_flush_tlb_gva,
827662306a36Sopenharmony_ci	.flush_tlb_guest = vmx_flush_tlb_guest,
827762306a36Sopenharmony_ci
827862306a36Sopenharmony_ci	.vcpu_pre_run = vmx_vcpu_pre_run,
827962306a36Sopenharmony_ci	.vcpu_run = vmx_vcpu_run,
828062306a36Sopenharmony_ci	.handle_exit = vmx_handle_exit,
828162306a36Sopenharmony_ci	.skip_emulated_instruction = vmx_skip_emulated_instruction,
828262306a36Sopenharmony_ci	.update_emulated_instruction = vmx_update_emulated_instruction,
828362306a36Sopenharmony_ci	.set_interrupt_shadow = vmx_set_interrupt_shadow,
828462306a36Sopenharmony_ci	.get_interrupt_shadow = vmx_get_interrupt_shadow,
828562306a36Sopenharmony_ci	.patch_hypercall = vmx_patch_hypercall,
828662306a36Sopenharmony_ci	.inject_irq = vmx_inject_irq,
828762306a36Sopenharmony_ci	.inject_nmi = vmx_inject_nmi,
828862306a36Sopenharmony_ci	.inject_exception = vmx_inject_exception,
828962306a36Sopenharmony_ci	.cancel_injection = vmx_cancel_injection,
829062306a36Sopenharmony_ci	.interrupt_allowed = vmx_interrupt_allowed,
829162306a36Sopenharmony_ci	.nmi_allowed = vmx_nmi_allowed,
829262306a36Sopenharmony_ci	.get_nmi_mask = vmx_get_nmi_mask,
829362306a36Sopenharmony_ci	.set_nmi_mask = vmx_set_nmi_mask,
829462306a36Sopenharmony_ci	.enable_nmi_window = vmx_enable_nmi_window,
829562306a36Sopenharmony_ci	.enable_irq_window = vmx_enable_irq_window,
829662306a36Sopenharmony_ci	.update_cr8_intercept = vmx_update_cr8_intercept,
829762306a36Sopenharmony_ci	.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
829862306a36Sopenharmony_ci	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
829962306a36Sopenharmony_ci	.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
830062306a36Sopenharmony_ci	.load_eoi_exitmap = vmx_load_eoi_exitmap,
830162306a36Sopenharmony_ci	.apicv_pre_state_restore = vmx_apicv_pre_state_restore,
830262306a36Sopenharmony_ci	.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
830362306a36Sopenharmony_ci	.hwapic_irr_update = vmx_hwapic_irr_update,
830462306a36Sopenharmony_ci	.hwapic_isr_update = vmx_hwapic_isr_update,
830562306a36Sopenharmony_ci	.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
830662306a36Sopenharmony_ci	.sync_pir_to_irr = vmx_sync_pir_to_irr,
830762306a36Sopenharmony_ci	.deliver_interrupt = vmx_deliver_interrupt,
830862306a36Sopenharmony_ci	.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
830962306a36Sopenharmony_ci
831062306a36Sopenharmony_ci	.set_tss_addr = vmx_set_tss_addr,
831162306a36Sopenharmony_ci	.set_identity_map_addr = vmx_set_identity_map_addr,
831262306a36Sopenharmony_ci	.get_mt_mask = vmx_get_mt_mask,
831362306a36Sopenharmony_ci
831462306a36Sopenharmony_ci	.get_exit_info = vmx_get_exit_info,
831562306a36Sopenharmony_ci
831662306a36Sopenharmony_ci	.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
831762306a36Sopenharmony_ci
831862306a36Sopenharmony_ci	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
831962306a36Sopenharmony_ci
832062306a36Sopenharmony_ci	.get_l2_tsc_offset = vmx_get_l2_tsc_offset,
832162306a36Sopenharmony_ci	.get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
832262306a36Sopenharmony_ci	.write_tsc_offset = vmx_write_tsc_offset,
832362306a36Sopenharmony_ci	.write_tsc_multiplier = vmx_write_tsc_multiplier,
832462306a36Sopenharmony_ci
832562306a36Sopenharmony_ci	.load_mmu_pgd = vmx_load_mmu_pgd,
832662306a36Sopenharmony_ci
832762306a36Sopenharmony_ci	.check_intercept = vmx_check_intercept,
832862306a36Sopenharmony_ci	.handle_exit_irqoff = vmx_handle_exit_irqoff,
832962306a36Sopenharmony_ci
833062306a36Sopenharmony_ci	.request_immediate_exit = vmx_request_immediate_exit,
833162306a36Sopenharmony_ci
833262306a36Sopenharmony_ci	.sched_in = vmx_sched_in,
833362306a36Sopenharmony_ci
833462306a36Sopenharmony_ci	.cpu_dirty_log_size = PML_ENTITY_NUM,
833562306a36Sopenharmony_ci	.update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
833662306a36Sopenharmony_ci
833762306a36Sopenharmony_ci	.nested_ops = &vmx_nested_ops,
833862306a36Sopenharmony_ci
833962306a36Sopenharmony_ci	.pi_update_irte = vmx_pi_update_irte,
834062306a36Sopenharmony_ci	.pi_start_assignment = vmx_pi_start_assignment,
834162306a36Sopenharmony_ci
834262306a36Sopenharmony_ci#ifdef CONFIG_X86_64
834362306a36Sopenharmony_ci	.set_hv_timer = vmx_set_hv_timer,
834462306a36Sopenharmony_ci	.cancel_hv_timer = vmx_cancel_hv_timer,
834562306a36Sopenharmony_ci#endif
834662306a36Sopenharmony_ci
834762306a36Sopenharmony_ci	.setup_mce = vmx_setup_mce,
834862306a36Sopenharmony_ci
834962306a36Sopenharmony_ci#ifdef CONFIG_KVM_SMM
835062306a36Sopenharmony_ci	.smi_allowed = vmx_smi_allowed,
835162306a36Sopenharmony_ci	.enter_smm = vmx_enter_smm,
835262306a36Sopenharmony_ci	.leave_smm = vmx_leave_smm,
835362306a36Sopenharmony_ci	.enable_smi_window = vmx_enable_smi_window,
835462306a36Sopenharmony_ci#endif
835562306a36Sopenharmony_ci
835662306a36Sopenharmony_ci	.can_emulate_instruction = vmx_can_emulate_instruction,
835762306a36Sopenharmony_ci	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
835862306a36Sopenharmony_ci	.migrate_timers = vmx_migrate_timers,
835962306a36Sopenharmony_ci
836062306a36Sopenharmony_ci	.msr_filter_changed = vmx_msr_filter_changed,
836162306a36Sopenharmony_ci	.complete_emulated_msr = kvm_complete_insn_gp,
836262306a36Sopenharmony_ci
836362306a36Sopenharmony_ci	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
836462306a36Sopenharmony_ci};
836562306a36Sopenharmony_ci
836662306a36Sopenharmony_cistatic unsigned int vmx_handle_intel_pt_intr(void)
836762306a36Sopenharmony_ci{
836862306a36Sopenharmony_ci	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
836962306a36Sopenharmony_ci
837062306a36Sopenharmony_ci	/* '0' on failure so that the !PT case can use a RET0 static call. */
837162306a36Sopenharmony_ci	if (!vcpu || !kvm_handling_nmi_from_guest(vcpu))
837262306a36Sopenharmony_ci		return 0;
837362306a36Sopenharmony_ci
837462306a36Sopenharmony_ci	kvm_make_request(KVM_REQ_PMI, vcpu);
837562306a36Sopenharmony_ci	__set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
837662306a36Sopenharmony_ci		  (unsigned long *)&vcpu->arch.pmu.global_status);
837762306a36Sopenharmony_ci	return 1;
837862306a36Sopenharmony_ci}
837962306a36Sopenharmony_ci
838062306a36Sopenharmony_cistatic __init void vmx_setup_user_return_msrs(void)
838162306a36Sopenharmony_ci{
838262306a36Sopenharmony_ci
838362306a36Sopenharmony_ci	/*
838462306a36Sopenharmony_ci	 * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
838562306a36Sopenharmony_ci	 * will emulate SYSCALL in legacy mode if the vendor string in guest
838662306a36Sopenharmony_ci	 * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
838762306a36Sopenharmony_ci	 * support this emulation, MSR_STAR is included in the list for i386,
838862306a36Sopenharmony_ci	 * but is never loaded into hardware.  MSR_CSTAR is also never loaded
838962306a36Sopenharmony_ci	 * into hardware and is here purely for emulation purposes.
839062306a36Sopenharmony_ci	 */
839162306a36Sopenharmony_ci	const u32 vmx_uret_msrs_list[] = {
839262306a36Sopenharmony_ci	#ifdef CONFIG_X86_64
839362306a36Sopenharmony_ci		MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
839462306a36Sopenharmony_ci	#endif
839562306a36Sopenharmony_ci		MSR_EFER, MSR_TSC_AUX, MSR_STAR,
839662306a36Sopenharmony_ci		MSR_IA32_TSX_CTRL,
839762306a36Sopenharmony_ci	};
839862306a36Sopenharmony_ci	int i;
839962306a36Sopenharmony_ci
840062306a36Sopenharmony_ci	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
840162306a36Sopenharmony_ci
840262306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
840362306a36Sopenharmony_ci		kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
840462306a36Sopenharmony_ci}
840562306a36Sopenharmony_ci
840662306a36Sopenharmony_cistatic void __init vmx_setup_me_spte_mask(void)
840762306a36Sopenharmony_ci{
840862306a36Sopenharmony_ci	u64 me_mask = 0;
840962306a36Sopenharmony_ci
841062306a36Sopenharmony_ci	/*
841162306a36Sopenharmony_ci	 * kvm_get_shadow_phys_bits() returns shadow_phys_bits.  Use
841262306a36Sopenharmony_ci	 * the former to avoid exposing shadow_phys_bits.
841362306a36Sopenharmony_ci	 *
841462306a36Sopenharmony_ci	 * On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to
841562306a36Sopenharmony_ci	 * shadow_phys_bits.  On MKTME and/or TDX capable systems,
841662306a36Sopenharmony_ci	 * boot_cpu_data.x86_phys_bits holds the actual physical address
841762306a36Sopenharmony_ci	 * w/o the KeyID bits, and shadow_phys_bits equals to MAXPHYADDR
841862306a36Sopenharmony_ci	 * reported by CPUID.  Those bits between are KeyID bits.
841962306a36Sopenharmony_ci	 */
842062306a36Sopenharmony_ci	if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits())
842162306a36Sopenharmony_ci		me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits,
842262306a36Sopenharmony_ci			kvm_get_shadow_phys_bits() - 1);
842362306a36Sopenharmony_ci	/*
842462306a36Sopenharmony_ci	 * Unlike SME, host kernel doesn't support setting up any
842562306a36Sopenharmony_ci	 * MKTME KeyID on Intel platforms.  No memory encryption
842662306a36Sopenharmony_ci	 * bits should be included into the SPTE.
842762306a36Sopenharmony_ci	 */
842862306a36Sopenharmony_ci	kvm_mmu_set_me_spte_mask(0, me_mask);
842962306a36Sopenharmony_ci}
843062306a36Sopenharmony_ci
843162306a36Sopenharmony_cistatic struct kvm_x86_init_ops vmx_init_ops __initdata;
843262306a36Sopenharmony_ci
843362306a36Sopenharmony_cistatic __init int hardware_setup(void)
843462306a36Sopenharmony_ci{
843562306a36Sopenharmony_ci	unsigned long host_bndcfgs;
843662306a36Sopenharmony_ci	struct desc_ptr dt;
843762306a36Sopenharmony_ci	int r;
843862306a36Sopenharmony_ci
843962306a36Sopenharmony_ci	store_idt(&dt);
844062306a36Sopenharmony_ci	host_idt_base = dt.address;
844162306a36Sopenharmony_ci
844262306a36Sopenharmony_ci	vmx_setup_user_return_msrs();
844362306a36Sopenharmony_ci
844462306a36Sopenharmony_ci	if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
844562306a36Sopenharmony_ci		return -EIO;
844662306a36Sopenharmony_ci
844762306a36Sopenharmony_ci	if (cpu_has_perf_global_ctrl_bug())
844862306a36Sopenharmony_ci		pr_warn_once("VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
844962306a36Sopenharmony_ci			     "does not work properly. Using workaround\n");
845062306a36Sopenharmony_ci
845162306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_NX))
845262306a36Sopenharmony_ci		kvm_enable_efer_bits(EFER_NX);
845362306a36Sopenharmony_ci
845462306a36Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_MPX)) {
845562306a36Sopenharmony_ci		rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
845662306a36Sopenharmony_ci		WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost");
845762306a36Sopenharmony_ci	}
845862306a36Sopenharmony_ci
845962306a36Sopenharmony_ci	if (!cpu_has_vmx_mpx())
846062306a36Sopenharmony_ci		kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |
846162306a36Sopenharmony_ci					     XFEATURE_MASK_BNDCSR);
846262306a36Sopenharmony_ci
846362306a36Sopenharmony_ci	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
846462306a36Sopenharmony_ci	    !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
846562306a36Sopenharmony_ci		enable_vpid = 0;
846662306a36Sopenharmony_ci
846762306a36Sopenharmony_ci	if (!cpu_has_vmx_ept() ||
846862306a36Sopenharmony_ci	    !cpu_has_vmx_ept_4levels() ||
846962306a36Sopenharmony_ci	    !cpu_has_vmx_ept_mt_wb() ||
847062306a36Sopenharmony_ci	    !cpu_has_vmx_invept_global())
847162306a36Sopenharmony_ci		enable_ept = 0;
847262306a36Sopenharmony_ci
847362306a36Sopenharmony_ci	/* NX support is required for shadow paging. */
847462306a36Sopenharmony_ci	if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
847562306a36Sopenharmony_ci		pr_err_ratelimited("NX (Execute Disable) not supported\n");
847662306a36Sopenharmony_ci		return -EOPNOTSUPP;
847762306a36Sopenharmony_ci	}
847862306a36Sopenharmony_ci
847962306a36Sopenharmony_ci	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
848062306a36Sopenharmony_ci		enable_ept_ad_bits = 0;
848162306a36Sopenharmony_ci
848262306a36Sopenharmony_ci	if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
848362306a36Sopenharmony_ci		enable_unrestricted_guest = 0;
848462306a36Sopenharmony_ci
848562306a36Sopenharmony_ci	if (!cpu_has_vmx_flexpriority())
848662306a36Sopenharmony_ci		flexpriority_enabled = 0;
848762306a36Sopenharmony_ci
848862306a36Sopenharmony_ci	if (!cpu_has_virtual_nmis())
848962306a36Sopenharmony_ci		enable_vnmi = 0;
849062306a36Sopenharmony_ci
849162306a36Sopenharmony_ci#ifdef CONFIG_X86_SGX_KVM
849262306a36Sopenharmony_ci	if (!cpu_has_vmx_encls_vmexit())
849362306a36Sopenharmony_ci		enable_sgx = false;
849462306a36Sopenharmony_ci#endif
849562306a36Sopenharmony_ci
849662306a36Sopenharmony_ci	/*
849762306a36Sopenharmony_ci	 * set_apic_access_page_addr() is used to reload apic access
849862306a36Sopenharmony_ci	 * page upon invalidation.  No need to do anything if not
849962306a36Sopenharmony_ci	 * using the APIC_ACCESS_ADDR VMCS field.
850062306a36Sopenharmony_ci	 */
850162306a36Sopenharmony_ci	if (!flexpriority_enabled)
850262306a36Sopenharmony_ci		vmx_x86_ops.set_apic_access_page_addr = NULL;
850362306a36Sopenharmony_ci
850462306a36Sopenharmony_ci	if (!cpu_has_vmx_tpr_shadow())
850562306a36Sopenharmony_ci		vmx_x86_ops.update_cr8_intercept = NULL;
850662306a36Sopenharmony_ci
850762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_HYPERV)
850862306a36Sopenharmony_ci	if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
850962306a36Sopenharmony_ci	    && enable_ept) {
851062306a36Sopenharmony_ci		vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
851162306a36Sopenharmony_ci		vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
851262306a36Sopenharmony_ci	}
851362306a36Sopenharmony_ci#endif
851462306a36Sopenharmony_ci
851562306a36Sopenharmony_ci	if (!cpu_has_vmx_ple()) {
851662306a36Sopenharmony_ci		ple_gap = 0;
851762306a36Sopenharmony_ci		ple_window = 0;
851862306a36Sopenharmony_ci		ple_window_grow = 0;
851962306a36Sopenharmony_ci		ple_window_max = 0;
852062306a36Sopenharmony_ci		ple_window_shrink = 0;
852162306a36Sopenharmony_ci	}
852262306a36Sopenharmony_ci
852362306a36Sopenharmony_ci	if (!cpu_has_vmx_apicv())
852462306a36Sopenharmony_ci		enable_apicv = 0;
852562306a36Sopenharmony_ci	if (!enable_apicv)
852662306a36Sopenharmony_ci		vmx_x86_ops.sync_pir_to_irr = NULL;
852762306a36Sopenharmony_ci
852862306a36Sopenharmony_ci	if (!enable_apicv || !cpu_has_vmx_ipiv())
852962306a36Sopenharmony_ci		enable_ipiv = false;
853062306a36Sopenharmony_ci
853162306a36Sopenharmony_ci	if (cpu_has_vmx_tsc_scaling())
853262306a36Sopenharmony_ci		kvm_caps.has_tsc_control = true;
853362306a36Sopenharmony_ci
853462306a36Sopenharmony_ci	kvm_caps.max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
853562306a36Sopenharmony_ci	kvm_caps.tsc_scaling_ratio_frac_bits = 48;
853662306a36Sopenharmony_ci	kvm_caps.has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
853762306a36Sopenharmony_ci	kvm_caps.has_notify_vmexit = cpu_has_notify_vmexit();
853862306a36Sopenharmony_ci
853962306a36Sopenharmony_ci	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
854062306a36Sopenharmony_ci
854162306a36Sopenharmony_ci	if (enable_ept)
854262306a36Sopenharmony_ci		kvm_mmu_set_ept_masks(enable_ept_ad_bits,
854362306a36Sopenharmony_ci				      cpu_has_vmx_ept_execute_only());
854462306a36Sopenharmony_ci
854562306a36Sopenharmony_ci	/*
854662306a36Sopenharmony_ci	 * Setup shadow_me_value/shadow_me_mask to include MKTME KeyID
854762306a36Sopenharmony_ci	 * bits to shadow_zero_check.
854862306a36Sopenharmony_ci	 */
854962306a36Sopenharmony_ci	vmx_setup_me_spte_mask();
855062306a36Sopenharmony_ci
855162306a36Sopenharmony_ci	kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
855262306a36Sopenharmony_ci			  ept_caps_to_lpage_level(vmx_capability.ept));
855362306a36Sopenharmony_ci
855462306a36Sopenharmony_ci	/*
855562306a36Sopenharmony_ci	 * Only enable PML when hardware supports PML feature, and both EPT
855662306a36Sopenharmony_ci	 * and EPT A/D bit features are enabled -- PML depends on them to work.
855762306a36Sopenharmony_ci	 */
855862306a36Sopenharmony_ci	if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
855962306a36Sopenharmony_ci		enable_pml = 0;
856062306a36Sopenharmony_ci
856162306a36Sopenharmony_ci	if (!enable_pml)
856262306a36Sopenharmony_ci		vmx_x86_ops.cpu_dirty_log_size = 0;
856362306a36Sopenharmony_ci
856462306a36Sopenharmony_ci	if (!cpu_has_vmx_preemption_timer())
856562306a36Sopenharmony_ci		enable_preemption_timer = false;
856662306a36Sopenharmony_ci
856762306a36Sopenharmony_ci	if (enable_preemption_timer) {
856862306a36Sopenharmony_ci		u64 use_timer_freq = 5000ULL * 1000 * 1000;
856962306a36Sopenharmony_ci
857062306a36Sopenharmony_ci		cpu_preemption_timer_multi =
857162306a36Sopenharmony_ci			vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
857262306a36Sopenharmony_ci
857362306a36Sopenharmony_ci		if (tsc_khz)
857462306a36Sopenharmony_ci			use_timer_freq = (u64)tsc_khz * 1000;
857562306a36Sopenharmony_ci		use_timer_freq >>= cpu_preemption_timer_multi;
857662306a36Sopenharmony_ci
857762306a36Sopenharmony_ci		/*
857862306a36Sopenharmony_ci		 * KVM "disables" the preemption timer by setting it to its max
857962306a36Sopenharmony_ci		 * value.  Don't use the timer if it might cause spurious exits
858062306a36Sopenharmony_ci		 * at a rate faster than 0.1 Hz (of uninterrupted guest time).
858162306a36Sopenharmony_ci		 */
858262306a36Sopenharmony_ci		if (use_timer_freq > 0xffffffffu / 10)
858362306a36Sopenharmony_ci			enable_preemption_timer = false;
858462306a36Sopenharmony_ci	}
858562306a36Sopenharmony_ci
858662306a36Sopenharmony_ci	if (!enable_preemption_timer) {
858762306a36Sopenharmony_ci		vmx_x86_ops.set_hv_timer = NULL;
858862306a36Sopenharmony_ci		vmx_x86_ops.cancel_hv_timer = NULL;
858962306a36Sopenharmony_ci		vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
859062306a36Sopenharmony_ci	}
859162306a36Sopenharmony_ci
859262306a36Sopenharmony_ci	kvm_caps.supported_mce_cap |= MCG_LMCE_P;
859362306a36Sopenharmony_ci	kvm_caps.supported_mce_cap |= MCG_CMCI_P;
859462306a36Sopenharmony_ci
859562306a36Sopenharmony_ci	if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
859662306a36Sopenharmony_ci		return -EINVAL;
859762306a36Sopenharmony_ci	if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt())
859862306a36Sopenharmony_ci		pt_mode = PT_MODE_SYSTEM;
859962306a36Sopenharmony_ci	if (pt_mode == PT_MODE_HOST_GUEST)
860062306a36Sopenharmony_ci		vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
860162306a36Sopenharmony_ci	else
860262306a36Sopenharmony_ci		vmx_init_ops.handle_intel_pt_intr = NULL;
860362306a36Sopenharmony_ci
860462306a36Sopenharmony_ci	setup_default_sgx_lepubkeyhash();
860562306a36Sopenharmony_ci
860662306a36Sopenharmony_ci	if (nested) {
860762306a36Sopenharmony_ci		nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept);
860862306a36Sopenharmony_ci
860962306a36Sopenharmony_ci		r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
861062306a36Sopenharmony_ci		if (r)
861162306a36Sopenharmony_ci			return r;
861262306a36Sopenharmony_ci	}
861362306a36Sopenharmony_ci
861462306a36Sopenharmony_ci	vmx_set_cpu_caps();
861562306a36Sopenharmony_ci
861662306a36Sopenharmony_ci	r = alloc_kvm_area();
861762306a36Sopenharmony_ci	if (r && nested)
861862306a36Sopenharmony_ci		nested_vmx_hardware_unsetup();
861962306a36Sopenharmony_ci
862062306a36Sopenharmony_ci	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
862162306a36Sopenharmony_ci
862262306a36Sopenharmony_ci	return r;
862362306a36Sopenharmony_ci}
862462306a36Sopenharmony_ci
862562306a36Sopenharmony_cistatic struct kvm_x86_init_ops vmx_init_ops __initdata = {
862662306a36Sopenharmony_ci	.hardware_setup = hardware_setup,
862762306a36Sopenharmony_ci	.handle_intel_pt_intr = NULL,
862862306a36Sopenharmony_ci
862962306a36Sopenharmony_ci	.runtime_ops = &vmx_x86_ops,
863062306a36Sopenharmony_ci	.pmu_ops = &intel_pmu_ops,
863162306a36Sopenharmony_ci};
863262306a36Sopenharmony_ci
863362306a36Sopenharmony_cistatic void vmx_cleanup_l1d_flush(void)
863462306a36Sopenharmony_ci{
863562306a36Sopenharmony_ci	if (vmx_l1d_flush_pages) {
863662306a36Sopenharmony_ci		free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
863762306a36Sopenharmony_ci		vmx_l1d_flush_pages = NULL;
863862306a36Sopenharmony_ci	}
863962306a36Sopenharmony_ci	/* Restore state so sysfs ignores VMX */
864062306a36Sopenharmony_ci	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
864162306a36Sopenharmony_ci}
864262306a36Sopenharmony_ci
864362306a36Sopenharmony_cistatic void __vmx_exit(void)
864462306a36Sopenharmony_ci{
864562306a36Sopenharmony_ci	allow_smaller_maxphyaddr = false;
864662306a36Sopenharmony_ci
864762306a36Sopenharmony_ci	cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
864862306a36Sopenharmony_ci
864962306a36Sopenharmony_ci	vmx_cleanup_l1d_flush();
865062306a36Sopenharmony_ci}
865162306a36Sopenharmony_ci
865262306a36Sopenharmony_cistatic void vmx_exit(void)
865362306a36Sopenharmony_ci{
865462306a36Sopenharmony_ci	kvm_exit();
865562306a36Sopenharmony_ci	kvm_x86_vendor_exit();
865662306a36Sopenharmony_ci
865762306a36Sopenharmony_ci	__vmx_exit();
865862306a36Sopenharmony_ci}
865962306a36Sopenharmony_cimodule_exit(vmx_exit);
866062306a36Sopenharmony_ci
866162306a36Sopenharmony_cistatic int __init vmx_init(void)
866262306a36Sopenharmony_ci{
866362306a36Sopenharmony_ci	int r, cpu;
866462306a36Sopenharmony_ci
866562306a36Sopenharmony_ci	if (!kvm_is_vmx_supported())
866662306a36Sopenharmony_ci		return -EOPNOTSUPP;
866762306a36Sopenharmony_ci
866862306a36Sopenharmony_ci	/*
866962306a36Sopenharmony_ci	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
867062306a36Sopenharmony_ci	 * to unwind if a later step fails.
867162306a36Sopenharmony_ci	 */
867262306a36Sopenharmony_ci	hv_init_evmcs();
867362306a36Sopenharmony_ci
867462306a36Sopenharmony_ci	r = kvm_x86_vendor_init(&vmx_init_ops);
867562306a36Sopenharmony_ci	if (r)
867662306a36Sopenharmony_ci		return r;
867762306a36Sopenharmony_ci
867862306a36Sopenharmony_ci	/*
867962306a36Sopenharmony_ci	 * Must be called after common x86 init so enable_ept is properly set
868062306a36Sopenharmony_ci	 * up. Hand the parameter mitigation value in which was stored in
868162306a36Sopenharmony_ci	 * the pre module init parser. If no parameter was given, it will
868262306a36Sopenharmony_ci	 * contain 'auto' which will be turned into the default 'cond'
868362306a36Sopenharmony_ci	 * mitigation mode.
868462306a36Sopenharmony_ci	 */
868562306a36Sopenharmony_ci	r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
868662306a36Sopenharmony_ci	if (r)
868762306a36Sopenharmony_ci		goto err_l1d_flush;
868862306a36Sopenharmony_ci
868962306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
869062306a36Sopenharmony_ci		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
869162306a36Sopenharmony_ci
869262306a36Sopenharmony_ci		pi_init_cpu(cpu);
869362306a36Sopenharmony_ci	}
869462306a36Sopenharmony_ci
869562306a36Sopenharmony_ci	cpu_emergency_register_virt_callback(vmx_emergency_disable);
869662306a36Sopenharmony_ci
869762306a36Sopenharmony_ci	vmx_check_vmcs12_offsets();
869862306a36Sopenharmony_ci
869962306a36Sopenharmony_ci	/*
870062306a36Sopenharmony_ci	 * Shadow paging doesn't have a (further) performance penalty
870162306a36Sopenharmony_ci	 * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
870262306a36Sopenharmony_ci	 * by default
870362306a36Sopenharmony_ci	 */
870462306a36Sopenharmony_ci	if (!enable_ept)
870562306a36Sopenharmony_ci		allow_smaller_maxphyaddr = true;
870662306a36Sopenharmony_ci
870762306a36Sopenharmony_ci	/*
870862306a36Sopenharmony_ci	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
870962306a36Sopenharmony_ci	 * exposed to userspace!
871062306a36Sopenharmony_ci	 */
871162306a36Sopenharmony_ci	r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
871262306a36Sopenharmony_ci		     THIS_MODULE);
871362306a36Sopenharmony_ci	if (r)
871462306a36Sopenharmony_ci		goto err_kvm_init;
871562306a36Sopenharmony_ci
871662306a36Sopenharmony_ci	return 0;
871762306a36Sopenharmony_ci
871862306a36Sopenharmony_cierr_kvm_init:
871962306a36Sopenharmony_ci	__vmx_exit();
872062306a36Sopenharmony_cierr_l1d_flush:
872162306a36Sopenharmony_ci	kvm_x86_vendor_exit();
872262306a36Sopenharmony_ci	return r;
872362306a36Sopenharmony_ci}
872462306a36Sopenharmony_cimodule_init(vmx_init);
8725