xref: /kernel/linux/linux-6.6/arch/x86/kvm/vmx/vmx.h (revision 62306a36)
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __KVM_X86_VMX_H
3#define __KVM_X86_VMX_H
4
5#include <linux/kvm_host.h>
6
7#include <asm/kvm.h>
8#include <asm/intel_pt.h>
9#include <asm/perf_event.h>
10
11#include "capabilities.h"
12#include "../kvm_cache_regs.h"
13#include "posted_intr.h"
14#include "vmcs.h"
15#include "vmx_ops.h"
16#include "../cpuid.h"
17#include "run_flags.h"
18
19#define MSR_TYPE_R	1
20#define MSR_TYPE_W	2
21#define MSR_TYPE_RW	3
22
23#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
24
25#ifdef CONFIG_X86_64
26#define MAX_NR_USER_RETURN_MSRS	7
27#else
28#define MAX_NR_USER_RETURN_MSRS	4
29#endif
30
31#define MAX_NR_LOADSTORE_MSRS	8
32
33struct vmx_msrs {
34	unsigned int		nr;
35	struct vmx_msr_entry	val[MAX_NR_LOADSTORE_MSRS];
36};
37
38struct vmx_uret_msr {
39	bool load_into_hardware;
40	u64 data;
41	u64 mask;
42};
43
44enum segment_cache_field {
45	SEG_FIELD_SEL = 0,
46	SEG_FIELD_BASE = 1,
47	SEG_FIELD_LIMIT = 2,
48	SEG_FIELD_AR = 3,
49
50	SEG_FIELD_NR = 4
51};
52
53#define RTIT_ADDR_RANGE		4
54
55struct pt_ctx {
56	u64 ctl;
57	u64 status;
58	u64 output_base;
59	u64 output_mask;
60	u64 cr3_match;
61	u64 addr_a[RTIT_ADDR_RANGE];
62	u64 addr_b[RTIT_ADDR_RANGE];
63};
64
65struct pt_desc {
66	u64 ctl_bitmask;
67	u32 num_address_ranges;
68	u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
69	struct pt_ctx host;
70	struct pt_ctx guest;
71};
72
73union vmx_exit_reason {
74	struct {
75		u32	basic			: 16;
76		u32	reserved16		: 1;
77		u32	reserved17		: 1;
78		u32	reserved18		: 1;
79		u32	reserved19		: 1;
80		u32	reserved20		: 1;
81		u32	reserved21		: 1;
82		u32	reserved22		: 1;
83		u32	reserved23		: 1;
84		u32	reserved24		: 1;
85		u32	reserved25		: 1;
86		u32	bus_lock_detected	: 1;
87		u32	enclave_mode		: 1;
88		u32	smi_pending_mtf		: 1;
89		u32	smi_from_vmx_root	: 1;
90		u32	reserved30		: 1;
91		u32	failed_vmentry		: 1;
92	};
93	u32 full;
94};
95
96struct lbr_desc {
97	/* Basic info about guest LBR records. */
98	struct x86_pmu_lbr records;
99
100	/*
101	 * Emulate LBR feature via passthrough LBR registers when the
102	 * per-vcpu guest LBR event is scheduled on the current pcpu.
103	 *
104	 * The records may be inaccurate if the host reclaims the LBR.
105	 */
106	struct perf_event *event;
107
108	/* True if LBRs are marked as not intercepted in the MSR bitmap */
109	bool msr_passthrough;
110};
111
112/*
113 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
114 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
115 */
116struct nested_vmx {
117	/* Has the level1 guest done vmxon? */
118	bool vmxon;
119	gpa_t vmxon_ptr;
120	bool pml_full;
121
122	/* The guest-physical address of the current VMCS L1 keeps for L2 */
123	gpa_t current_vmptr;
124	/*
125	 * Cache of the guest's VMCS, existing outside of guest memory.
126	 * Loaded from guest memory during VMPTRLD. Flushed to guest
127	 * memory during VMCLEAR and VMPTRLD.
128	 */
129	struct vmcs12 *cached_vmcs12;
130	/*
131	 * Cache of the guest's shadow VMCS, existing outside of guest
132	 * memory. Loaded from guest memory during VM entry. Flushed
133	 * to guest memory during VM exit.
134	 */
135	struct vmcs12 *cached_shadow_vmcs12;
136
137	/*
138	 * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
139	 */
140	struct gfn_to_hva_cache shadow_vmcs12_cache;
141
142	/*
143	 * GPA to HVA cache for VMCS12
144	 */
145	struct gfn_to_hva_cache vmcs12_cache;
146
147	/*
148	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
149	 * with the data held by struct vmcs12.
150	 */
151	bool need_vmcs12_to_shadow_sync;
152	bool dirty_vmcs12;
153
154	/*
155	 * Indicates whether MSR bitmap for L2 needs to be rebuilt due to
156	 * changes in MSR bitmap for L1 or switching to a different L2. Note,
157	 * this flag can only be used reliably in conjunction with a paravirt L1
158	 * which informs L0 whether any changes to MSR bitmap for L2 were done
159	 * on its side.
160	 */
161	bool force_msr_bitmap_recalc;
162
163	/*
164	 * Indicates lazily loaded guest state has not yet been decached from
165	 * vmcs02.
166	 */
167	bool need_sync_vmcs02_to_vmcs12_rare;
168
169	/*
170	 * vmcs02 has been initialized, i.e. state that is constant for
171	 * vmcs02 has been written to the backing VMCS.  Initialization
172	 * is delayed until L1 actually attempts to run a nested VM.
173	 */
174	bool vmcs02_initialized;
175
176	bool change_vmcs01_virtual_apic_mode;
177	bool reload_vmcs01_apic_access_page;
178	bool update_vmcs01_cpu_dirty_logging;
179	bool update_vmcs01_apicv_status;
180
181	/*
182	 * Enlightened VMCS has been enabled. It does not mean that L1 has to
183	 * use it. However, VMX features available to L1 will be limited based
184	 * on what the enlightened VMCS supports.
185	 */
186	bool enlightened_vmcs_enabled;
187
188	/* L2 must run next, and mustn't decide to exit to L1. */
189	bool nested_run_pending;
190
191	/* Pending MTF VM-exit into L1.  */
192	bool mtf_pending;
193
194	struct loaded_vmcs vmcs02;
195
196	/*
197	 * Guest pages referred to in the vmcs02 with host-physical
198	 * pointers, so we must keep them pinned while L2 runs.
199	 */
200	struct kvm_host_map apic_access_page_map;
201	struct kvm_host_map virtual_apic_map;
202	struct kvm_host_map pi_desc_map;
203
204	struct kvm_host_map msr_bitmap_map;
205
206	struct pi_desc *pi_desc;
207	bool pi_pending;
208	u16 posted_intr_nv;
209
210	struct hrtimer preemption_timer;
211	u64 preemption_timer_deadline;
212	bool has_preemption_timer_deadline;
213	bool preemption_timer_expired;
214
215	/*
216	 * Used to snapshot MSRs that are conditionally loaded on VM-Enter in
217	 * order to propagate the guest's pre-VM-Enter value into vmcs02.  For
218	 * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value.
219	 * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_
220	 * userspace restores MSRs before nested state.  If userspace restores
221	 * MSRs after nested state, the snapshot holds garbage, but KVM can't
222	 * detect that, and the garbage value in vmcs02 will be overwritten by
223	 * MSR restoration in any case.
224	 */
225	u64 pre_vmenter_debugctl;
226	u64 pre_vmenter_bndcfgs;
227
228	/* to migrate it to L1 if L2 writes to L1's CR8 directly */
229	int l1_tpr_threshold;
230
231	u16 vpid02;
232	u16 last_vpid;
233
234	struct nested_vmx_msrs msrs;
235
236	/* SMM related state */
237	struct {
238		/* in VMX operation on SMM entry? */
239		bool vmxon;
240		/* in guest mode on SMM entry? */
241		bool guest_mode;
242	} smm;
243
244	gpa_t hv_evmcs_vmptr;
245	struct kvm_host_map hv_evmcs_map;
246	struct hv_enlightened_vmcs *hv_evmcs;
247};
248
249struct vcpu_vmx {
250	struct kvm_vcpu       vcpu;
251	u8                    fail;
252	u8		      x2apic_msr_bitmap_mode;
253
254	/*
255	 * If true, host state has been stored in vmx->loaded_vmcs for
256	 * the CPU registers that only need to be switched when transitioning
257	 * to/from the kernel, and the registers have been loaded with guest
258	 * values.  If false, host state is loaded in the CPU registers
259	 * and vmx->loaded_vmcs->host_state is invalid.
260	 */
261	bool		      guest_state_loaded;
262
263	unsigned long         exit_qualification;
264	u32                   exit_intr_info;
265	u32                   idt_vectoring_info;
266	ulong                 rflags;
267
268	/*
269	 * User return MSRs are always emulated when enabled in the guest, but
270	 * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
271	 * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
272	 * be loaded into hardware if those conditions aren't met.
273	 */
274	struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
275	bool                  guest_uret_msrs_loaded;
276#ifdef CONFIG_X86_64
277	u64		      msr_host_kernel_gs_base;
278	u64		      msr_guest_kernel_gs_base;
279#endif
280
281	u64		      spec_ctrl;
282	u32		      msr_ia32_umwait_control;
283
284	/*
285	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
286	 * non-nested (L1) guest, it always points to vmcs01. For a nested
287	 * guest (L2), it points to a different VMCS.
288	 */
289	struct loaded_vmcs    vmcs01;
290	struct loaded_vmcs   *loaded_vmcs;
291
292	struct msr_autoload {
293		struct vmx_msrs guest;
294		struct vmx_msrs host;
295	} msr_autoload;
296
297	struct msr_autostore {
298		struct vmx_msrs guest;
299	} msr_autostore;
300
301	struct {
302		int vm86_active;
303		ulong save_rflags;
304		struct kvm_segment segs[8];
305	} rmode;
306	struct {
307		u32 bitmask; /* 4 bits per segment (1 bit per field) */
308		struct kvm_save_segment {
309			u16 selector;
310			unsigned long base;
311			u32 limit;
312			u32 ar;
313		} seg[8];
314	} segment_cache;
315	int vpid;
316	bool emulation_required;
317
318	union vmx_exit_reason exit_reason;
319
320	/* Posted interrupt descriptor */
321	struct pi_desc pi_desc;
322
323	/* Used if this vCPU is waiting for PI notification wakeup. */
324	struct list_head pi_wakeup_list;
325
326	/* Support for a guest hypervisor (nested VMX) */
327	struct nested_vmx nested;
328
329	/* Dynamic PLE window. */
330	unsigned int ple_window;
331	bool ple_window_dirty;
332
333	bool req_immediate_exit;
334
335	/* Support for PML */
336#define PML_ENTITY_NUM		512
337	struct page *pml_pg;
338
339	/* apic deadline value in host tsc */
340	u64 hv_deadline_tsc;
341
342	unsigned long host_debugctlmsr;
343
344	/*
345	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
346	 * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
347	 * in msr_ia32_feature_control_valid_bits.
348	 */
349	u64 msr_ia32_feature_control;
350	u64 msr_ia32_feature_control_valid_bits;
351	/* SGX Launch Control public key hash */
352	u64 msr_ia32_sgxlepubkeyhash[4];
353	u64 msr_ia32_mcu_opt_ctrl;
354	bool disable_fb_clear;
355
356	struct pt_desc pt_desc;
357	struct lbr_desc lbr_desc;
358
359	/* Save desired MSR intercept (read: pass-through) state */
360#define MAX_POSSIBLE_PASSTHROUGH_MSRS	16
361	struct {
362		DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
363		DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
364	} shadow_msr_intercept;
365};
366
367struct kvm_vmx {
368	struct kvm kvm;
369
370	unsigned int tss_addr;
371	bool ept_identity_pagetable_done;
372	gpa_t ept_identity_map_addr;
373	/* Posted Interrupt Descriptor (PID) table for IPI virtualization */
374	u64 *pid_table;
375};
376
377void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
378			struct loaded_vmcs *buddy);
379int allocate_vpid(void);
380void free_vpid(int vpid);
381void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
382void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
383void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
384			unsigned long fs_base, unsigned long gs_base);
385int vmx_get_cpl(struct kvm_vcpu *vcpu);
386bool vmx_emulation_required(struct kvm_vcpu *vcpu);
387unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
388void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
389u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
390void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
391int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
392void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
393void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
394void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
395void ept_save_pdptrs(struct kvm_vcpu *vcpu);
396void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
397void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
398u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
399
400bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
401void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
402bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
403bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
404bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
405void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
406void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
407struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
408void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
409void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
410void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
411unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
412bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
413		    unsigned int flags);
414int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
415void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
416
417void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
418void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
419
420u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
421u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
422
423static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
424					     int type, bool value)
425{
426	if (value)
427		vmx_enable_intercept_for_msr(vcpu, msr, type);
428	else
429		vmx_disable_intercept_for_msr(vcpu, msr, type);
430}
431
432void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
433
434/*
435 * Note, early Intel manuals have the write-low and read-high bitmap offsets
436 * the wrong way round.  The bitmaps control MSRs 0x00000000-0x00001fff and
437 * 0xc0000000-0xc0001fff.  The former (low) uses bytes 0-0x3ff for reads and
438 * 0x800-0xbff for writes.  The latter (high) uses 0x400-0x7ff for reads and
439 * 0xc00-0xfff for writes.  MSRs not covered by either of the ranges always
440 * VM-Exit.
441 */
442#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base)      \
443static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap,  \
444						       u32 msr)		       \
445{									       \
446	int f = sizeof(unsigned long);					       \
447									       \
448	if (msr <= 0x1fff)						       \
449		return bitop##_bit(msr, bitmap + base / f);		       \
450	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))		       \
451		return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
452	return (rtype)true;						       \
453}
454#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop)		       \
455	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read,  0x0)     \
456	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
457
458BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
459BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
460BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
461
462static inline u8 vmx_get_rvi(void)
463{
464	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
465}
466
467#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS				\
468	(VM_ENTRY_LOAD_DEBUG_CONTROLS)
469#ifdef CONFIG_X86_64
470	#define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS			\
471		(__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS |			\
472		 VM_ENTRY_IA32E_MODE)
473#else
474	#define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS			\
475		__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS
476#endif
477#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS				\
478	(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |				\
479	 VM_ENTRY_LOAD_IA32_PAT |					\
480	 VM_ENTRY_LOAD_IA32_EFER |					\
481	 VM_ENTRY_LOAD_BNDCFGS |					\
482	 VM_ENTRY_PT_CONCEAL_PIP |					\
483	 VM_ENTRY_LOAD_IA32_RTIT_CTL)
484
485#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS				\
486	(VM_EXIT_SAVE_DEBUG_CONTROLS |					\
487	 VM_EXIT_ACK_INTR_ON_EXIT)
488#ifdef CONFIG_X86_64
489	#define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS			\
490		(__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS |			\
491		 VM_EXIT_HOST_ADDR_SPACE_SIZE)
492#else
493	#define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS			\
494		__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS
495#endif
496#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS				\
497	      (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |			\
498	       VM_EXIT_SAVE_IA32_PAT |					\
499	       VM_EXIT_LOAD_IA32_PAT |					\
500	       VM_EXIT_SAVE_IA32_EFER |					\
501	       VM_EXIT_SAVE_VMX_PREEMPTION_TIMER |			\
502	       VM_EXIT_LOAD_IA32_EFER |					\
503	       VM_EXIT_CLEAR_BNDCFGS |					\
504	       VM_EXIT_PT_CONCEAL_PIP |					\
505	       VM_EXIT_CLEAR_IA32_RTIT_CTL)
506
507#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL			\
508	(PIN_BASED_EXT_INTR_MASK |					\
509	 PIN_BASED_NMI_EXITING)
510#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL			\
511	(PIN_BASED_VIRTUAL_NMIS |					\
512	 PIN_BASED_POSTED_INTR |					\
513	 PIN_BASED_VMX_PREEMPTION_TIMER)
514
515#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL			\
516	(CPU_BASED_HLT_EXITING |					\
517	 CPU_BASED_CR3_LOAD_EXITING |					\
518	 CPU_BASED_CR3_STORE_EXITING |					\
519	 CPU_BASED_UNCOND_IO_EXITING |					\
520	 CPU_BASED_MOV_DR_EXITING |					\
521	 CPU_BASED_USE_TSC_OFFSETTING |					\
522	 CPU_BASED_MWAIT_EXITING |					\
523	 CPU_BASED_MONITOR_EXITING |					\
524	 CPU_BASED_INVLPG_EXITING |					\
525	 CPU_BASED_RDPMC_EXITING |					\
526	 CPU_BASED_INTR_WINDOW_EXITING)
527
528#ifdef CONFIG_X86_64
529	#define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL		\
530		(__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL |		\
531		 CPU_BASED_CR8_LOAD_EXITING |				\
532		 CPU_BASED_CR8_STORE_EXITING)
533#else
534	#define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL		\
535		__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL
536#endif
537
538#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL			\
539	(CPU_BASED_RDTSC_EXITING |					\
540	 CPU_BASED_TPR_SHADOW |						\
541	 CPU_BASED_USE_IO_BITMAPS |					\
542	 CPU_BASED_MONITOR_TRAP_FLAG |					\
543	 CPU_BASED_USE_MSR_BITMAPS |					\
544	 CPU_BASED_NMI_WINDOW_EXITING |					\
545	 CPU_BASED_PAUSE_EXITING |					\
546	 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |			\
547	 CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
548
549#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0
550#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL			\
551	(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |			\
552	 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |			\
553	 SECONDARY_EXEC_WBINVD_EXITING |				\
554	 SECONDARY_EXEC_ENABLE_VPID |					\
555	 SECONDARY_EXEC_ENABLE_EPT |					\
556	 SECONDARY_EXEC_UNRESTRICTED_GUEST |				\
557	 SECONDARY_EXEC_PAUSE_LOOP_EXITING |				\
558	 SECONDARY_EXEC_DESC |						\
559	 SECONDARY_EXEC_ENABLE_RDTSCP |					\
560	 SECONDARY_EXEC_ENABLE_INVPCID |				\
561	 SECONDARY_EXEC_APIC_REGISTER_VIRT |				\
562	 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |				\
563	 SECONDARY_EXEC_SHADOW_VMCS |					\
564	 SECONDARY_EXEC_ENABLE_XSAVES |					\
565	 SECONDARY_EXEC_RDSEED_EXITING |				\
566	 SECONDARY_EXEC_RDRAND_EXITING |				\
567	 SECONDARY_EXEC_ENABLE_PML |					\
568	 SECONDARY_EXEC_TSC_SCALING |					\
569	 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |				\
570	 SECONDARY_EXEC_PT_USE_GPA |					\
571	 SECONDARY_EXEC_PT_CONCEAL_VMX |				\
572	 SECONDARY_EXEC_ENABLE_VMFUNC |					\
573	 SECONDARY_EXEC_BUS_LOCK_DETECTION |				\
574	 SECONDARY_EXEC_NOTIFY_VM_EXITING |				\
575	 SECONDARY_EXEC_ENCLS_EXITING)
576
577#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
578#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL			\
579	(TERTIARY_EXEC_IPI_VIRT)
580
581#define BUILD_CONTROLS_SHADOW(lname, uname, bits)						\
582static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val)			\
583{												\
584	if (vmx->loaded_vmcs->controls_shadow.lname != val) {					\
585		vmcs_write##bits(uname, val);							\
586		vmx->loaded_vmcs->controls_shadow.lname = val;					\
587	}											\
588}												\
589static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs)			\
590{												\
591	return vmcs->controls_shadow.lname;							\
592}												\
593static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx)				\
594{												\
595	return __##lname##_controls_get(vmx->loaded_vmcs);					\
596}												\
597static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val)		\
598{												\
599	BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname)));		\
600	lname##_controls_set(vmx, lname##_controls_get(vmx) | val);				\
601}												\
602static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val)	\
603{												\
604	BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname)));		\
605	lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val);				\
606}
607BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
608BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
609BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32)
610BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32)
611BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32)
612BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
613
614/*
615 * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
616 * cache on demand.  Other registers not listed here are synced to
617 * the cache immediately after VM-Exit.
618 */
619#define VMX_REGS_LAZY_LOAD_SET	((1 << VCPU_REGS_RIP) |         \
620				(1 << VCPU_REGS_RSP) |          \
621				(1 << VCPU_EXREG_RFLAGS) |      \
622				(1 << VCPU_EXREG_PDPTR) |       \
623				(1 << VCPU_EXREG_SEGMENTS) |    \
624				(1 << VCPU_EXREG_CR0) |         \
625				(1 << VCPU_EXREG_CR3) |         \
626				(1 << VCPU_EXREG_CR4) |         \
627				(1 << VCPU_EXREG_EXIT_INFO_1) | \
628				(1 << VCPU_EXREG_EXIT_INFO_2))
629
630static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
631{
632	unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
633
634	/*
635	 * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
636	 * in order to construct shadow PTEs with the correct protections.
637	 * Note!  CR0.WP technically can be passed through to the guest if
638	 * paging is disabled, but checking CR0.PG would generate a cyclical
639	 * dependency of sorts due to forcing the caller to ensure CR0 holds
640	 * the correct value prior to determining which CR0 bits can be owned
641	 * by L1.  Keep it simple and limit the optimization to EPT.
642	 */
643	if (!enable_ept)
644		bits &= ~X86_CR0_WP;
645	return bits;
646}
647
648static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
649{
650	return container_of(kvm, struct kvm_vmx, kvm);
651}
652
653static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
654{
655	return container_of(vcpu, struct vcpu_vmx, vcpu);
656}
657
658static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
659{
660	return &to_vmx(vcpu)->lbr_desc;
661}
662
663static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
664{
665	return &vcpu_to_lbr_desc(vcpu)->records;
666}
667
668static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
669{
670	return !!vcpu_to_lbr_records(vcpu)->nr;
671}
672
673void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
674int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
675void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
676
677static __always_inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
678{
679	struct vcpu_vmx *vmx = to_vmx(vcpu);
680
681	if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1))
682		vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
683
684	return vmx->exit_qualification;
685}
686
687static __always_inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu)
688{
689	struct vcpu_vmx *vmx = to_vmx(vcpu);
690
691	if (!kvm_register_test_and_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2))
692		vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
693
694	return vmx->exit_intr_info;
695}
696
697struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
698void free_vmcs(struct vmcs *vmcs);
699int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
700void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
701void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
702
703static inline struct vmcs *alloc_vmcs(bool shadow)
704{
705	return alloc_vmcs_cpu(shadow, raw_smp_processor_id(),
706			      GFP_KERNEL_ACCOUNT);
707}
708
709static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
710{
711	return secondary_exec_controls_get(vmx) &
712		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
713}
714
715static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
716{
717	if (!enable_ept)
718		return true;
719
720	return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
721}
722
723static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
724{
725	return enable_unrestricted_guest && (!is_guest_mode(vcpu) ||
726	    (secondary_exec_controls_get(to_vmx(vcpu)) &
727	    SECONDARY_EXEC_UNRESTRICTED_GUEST));
728}
729
730bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu);
731static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
732{
733	return is_unrestricted_guest(vcpu) || __vmx_guest_state_valid(vcpu);
734}
735
736void dump_vmcs(struct kvm_vcpu *vcpu);
737
738static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
739{
740	return (vmx_instr_info >> 28) & 0xf;
741}
742
743static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
744{
745	return  lapic_in_kernel(vcpu) && enable_ipiv;
746}
747
748static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
749{
750	/*
751	 * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and
752	 * eVMCS has been explicitly enabled by userspace.
753	 */
754	return vcpu->arch.hyperv_enabled &&
755	       to_vmx(vcpu)->nested.enlightened_vmcs_enabled;
756}
757
758#endif /* __KVM_X86_VMX_H */
759