xref: /kernel/linux/linux-6.6/arch/x86/kvm/smm.c (revision 62306a36)
1/* SPDX-License-Identifier: GPL-2.0 */
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/kvm_host.h>
5#include "x86.h"
6#include "kvm_cache_regs.h"
7#include "kvm_emulate.h"
8#include "smm.h"
9#include "cpuid.h"
10#include "trace.h"
11
12#define CHECK_SMRAM32_OFFSET(field, offset) \
13	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14
15#define CHECK_SMRAM64_OFFSET(field, offset) \
16	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17
18static void check_smram_offsets(void)
19{
20	/* 32 bit SMRAM image */
21	CHECK_SMRAM32_OFFSET(reserved1,			0xFE00);
22	CHECK_SMRAM32_OFFSET(smbase,			0xFEF8);
23	CHECK_SMRAM32_OFFSET(smm_revision,		0xFEFC);
24	CHECK_SMRAM32_OFFSET(io_inst_restart,		0xFF00);
25	CHECK_SMRAM32_OFFSET(auto_hlt_restart,		0xFF02);
26	CHECK_SMRAM32_OFFSET(io_restart_rdi,		0xFF04);
27	CHECK_SMRAM32_OFFSET(io_restart_rcx,		0xFF08);
28	CHECK_SMRAM32_OFFSET(io_restart_rsi,		0xFF0C);
29	CHECK_SMRAM32_OFFSET(io_restart_rip,		0xFF10);
30	CHECK_SMRAM32_OFFSET(cr4,			0xFF14);
31	CHECK_SMRAM32_OFFSET(reserved2,			0xFF18);
32	CHECK_SMRAM32_OFFSET(int_shadow,		0xFF1A);
33	CHECK_SMRAM32_OFFSET(reserved3,			0xFF1B);
34	CHECK_SMRAM32_OFFSET(ds,			0xFF2C);
35	CHECK_SMRAM32_OFFSET(fs,			0xFF38);
36	CHECK_SMRAM32_OFFSET(gs,			0xFF44);
37	CHECK_SMRAM32_OFFSET(idtr,			0xFF50);
38	CHECK_SMRAM32_OFFSET(tr,			0xFF5C);
39	CHECK_SMRAM32_OFFSET(gdtr,			0xFF6C);
40	CHECK_SMRAM32_OFFSET(ldtr,			0xFF78);
41	CHECK_SMRAM32_OFFSET(es,			0xFF84);
42	CHECK_SMRAM32_OFFSET(cs,			0xFF90);
43	CHECK_SMRAM32_OFFSET(ss,			0xFF9C);
44	CHECK_SMRAM32_OFFSET(es_sel,			0xFFA8);
45	CHECK_SMRAM32_OFFSET(cs_sel,			0xFFAC);
46	CHECK_SMRAM32_OFFSET(ss_sel,			0xFFB0);
47	CHECK_SMRAM32_OFFSET(ds_sel,			0xFFB4);
48	CHECK_SMRAM32_OFFSET(fs_sel,			0xFFB8);
49	CHECK_SMRAM32_OFFSET(gs_sel,			0xFFBC);
50	CHECK_SMRAM32_OFFSET(ldtr_sel,			0xFFC0);
51	CHECK_SMRAM32_OFFSET(tr_sel,			0xFFC4);
52	CHECK_SMRAM32_OFFSET(dr7,			0xFFC8);
53	CHECK_SMRAM32_OFFSET(dr6,			0xFFCC);
54	CHECK_SMRAM32_OFFSET(gprs,			0xFFD0);
55	CHECK_SMRAM32_OFFSET(eip,			0xFFF0);
56	CHECK_SMRAM32_OFFSET(eflags,			0xFFF4);
57	CHECK_SMRAM32_OFFSET(cr3,			0xFFF8);
58	CHECK_SMRAM32_OFFSET(cr0,			0xFFFC);
59
60	/* 64 bit SMRAM image */
61	CHECK_SMRAM64_OFFSET(es,			0xFE00);
62	CHECK_SMRAM64_OFFSET(cs,			0xFE10);
63	CHECK_SMRAM64_OFFSET(ss,			0xFE20);
64	CHECK_SMRAM64_OFFSET(ds,			0xFE30);
65	CHECK_SMRAM64_OFFSET(fs,			0xFE40);
66	CHECK_SMRAM64_OFFSET(gs,			0xFE50);
67	CHECK_SMRAM64_OFFSET(gdtr,			0xFE60);
68	CHECK_SMRAM64_OFFSET(ldtr,			0xFE70);
69	CHECK_SMRAM64_OFFSET(idtr,			0xFE80);
70	CHECK_SMRAM64_OFFSET(tr,			0xFE90);
71	CHECK_SMRAM64_OFFSET(io_restart_rip,		0xFEA0);
72	CHECK_SMRAM64_OFFSET(io_restart_rcx,		0xFEA8);
73	CHECK_SMRAM64_OFFSET(io_restart_rsi,		0xFEB0);
74	CHECK_SMRAM64_OFFSET(io_restart_rdi,		0xFEB8);
75	CHECK_SMRAM64_OFFSET(io_restart_dword,		0xFEC0);
76	CHECK_SMRAM64_OFFSET(reserved1,			0xFEC4);
77	CHECK_SMRAM64_OFFSET(io_inst_restart,		0xFEC8);
78	CHECK_SMRAM64_OFFSET(auto_hlt_restart,		0xFEC9);
79	CHECK_SMRAM64_OFFSET(amd_nmi_mask,		0xFECA);
80	CHECK_SMRAM64_OFFSET(int_shadow,		0xFECB);
81	CHECK_SMRAM64_OFFSET(reserved2,			0xFECC);
82	CHECK_SMRAM64_OFFSET(efer,			0xFED0);
83	CHECK_SMRAM64_OFFSET(svm_guest_flag,		0xFED8);
84	CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,	0xFEE0);
85	CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,	0xFEE8);
86	CHECK_SMRAM64_OFFSET(reserved3,			0xFEF0);
87	CHECK_SMRAM64_OFFSET(smm_revison,		0xFEFC);
88	CHECK_SMRAM64_OFFSET(smbase,			0xFF00);
89	CHECK_SMRAM64_OFFSET(reserved4,			0xFF04);
90	CHECK_SMRAM64_OFFSET(ssp,			0xFF18);
91	CHECK_SMRAM64_OFFSET(svm_guest_pat,		0xFF20);
92	CHECK_SMRAM64_OFFSET(svm_host_efer,		0xFF28);
93	CHECK_SMRAM64_OFFSET(svm_host_cr4,		0xFF30);
94	CHECK_SMRAM64_OFFSET(svm_host_cr3,		0xFF38);
95	CHECK_SMRAM64_OFFSET(svm_host_cr0,		0xFF40);
96	CHECK_SMRAM64_OFFSET(cr4,			0xFF48);
97	CHECK_SMRAM64_OFFSET(cr3,			0xFF50);
98	CHECK_SMRAM64_OFFSET(cr0,			0xFF58);
99	CHECK_SMRAM64_OFFSET(dr7,			0xFF60);
100	CHECK_SMRAM64_OFFSET(dr6,			0xFF68);
101	CHECK_SMRAM64_OFFSET(rflags,			0xFF70);
102	CHECK_SMRAM64_OFFSET(rip,			0xFF78);
103	CHECK_SMRAM64_OFFSET(gprs,			0xFF80);
104
105	BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106}
107
108#undef CHECK_SMRAM64_OFFSET
109#undef CHECK_SMRAM32_OFFSET
110
111
112void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113{
114	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116	if (entering_smm) {
117		vcpu->arch.hflags |= HF_SMM_MASK;
118	} else {
119		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121		/* Process a latched INIT or SMI, if any.  */
122		kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124		/*
125		 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126		 * on SMM exit we still need to reload them from
127		 * guest memory
128		 */
129		vcpu->arch.pdptrs_from_userspace = false;
130	}
131
132	kvm_mmu_reset_context(vcpu);
133}
134
135void process_smi(struct kvm_vcpu *vcpu)
136{
137	vcpu->arch.smi_pending = true;
138	kvm_make_request(KVM_REQ_EVENT, vcpu);
139}
140
141static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
142{
143	u32 flags = 0;
144	flags |= seg->g       << 23;
145	flags |= seg->db      << 22;
146	flags |= seg->l       << 21;
147	flags |= seg->avl     << 20;
148	flags |= seg->present << 15;
149	flags |= seg->dpl     << 13;
150	flags |= seg->s       << 12;
151	flags |= seg->type    << 8;
152	return flags;
153}
154
155static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
156				  struct kvm_smm_seg_state_32 *state,
157				  u32 *selector, int n)
158{
159	struct kvm_segment seg;
160
161	kvm_get_segment(vcpu, &seg, n);
162	*selector = seg.selector;
163	state->base = seg.base;
164	state->limit = seg.limit;
165	state->flags = enter_smm_get_segment_flags(&seg);
166}
167
168#ifdef CONFIG_X86_64
169static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
170				  struct kvm_smm_seg_state_64 *state,
171				  int n)
172{
173	struct kvm_segment seg;
174
175	kvm_get_segment(vcpu, &seg, n);
176	state->selector = seg.selector;
177	state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
178	state->limit = seg.limit;
179	state->base = seg.base;
180}
181#endif
182
183static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
184				    struct kvm_smram_state_32 *smram)
185{
186	struct desc_ptr dt;
187	unsigned long val;
188	int i;
189
190	smram->cr0     = kvm_read_cr0(vcpu);
191	smram->cr3     = kvm_read_cr3(vcpu);
192	smram->eflags  = kvm_get_rflags(vcpu);
193	smram->eip     = kvm_rip_read(vcpu);
194
195	for (i = 0; i < 8; i++)
196		smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197
198	kvm_get_dr(vcpu, 6, &val);
199	smram->dr6     = (u32)val;
200	kvm_get_dr(vcpu, 7, &val);
201	smram->dr7     = (u32)val;
202
203	enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
204	enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
205
206	static_call(kvm_x86_get_gdt)(vcpu, &dt);
207	smram->gdtr.base = dt.address;
208	smram->gdtr.limit = dt.size;
209
210	static_call(kvm_x86_get_idt)(vcpu, &dt);
211	smram->idtr.base = dt.address;
212	smram->idtr.limit = dt.size;
213
214	enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
215	enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
216	enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
217
218	enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
219	enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
220	enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
221
222	smram->cr4 = kvm_read_cr4(vcpu);
223	smram->smm_revision = 0x00020000;
224	smram->smbase = vcpu->arch.smbase;
225
226	smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
227}
228
229#ifdef CONFIG_X86_64
230static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
231				    struct kvm_smram_state_64 *smram)
232{
233	struct desc_ptr dt;
234	unsigned long val;
235	int i;
236
237	for (i = 0; i < 16; i++)
238		smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
239
240	smram->rip    = kvm_rip_read(vcpu);
241	smram->rflags = kvm_get_rflags(vcpu);
242
243
244	kvm_get_dr(vcpu, 6, &val);
245	smram->dr6 = val;
246	kvm_get_dr(vcpu, 7, &val);
247	smram->dr7 = val;
248
249	smram->cr0 = kvm_read_cr0(vcpu);
250	smram->cr3 = kvm_read_cr3(vcpu);
251	smram->cr4 = kvm_read_cr4(vcpu);
252
253	smram->smbase = vcpu->arch.smbase;
254	smram->smm_revison = 0x00020064;
255
256	smram->efer = vcpu->arch.efer;
257
258	enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
259
260	static_call(kvm_x86_get_idt)(vcpu, &dt);
261	smram->idtr.limit = dt.size;
262	smram->idtr.base = dt.address;
263
264	enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
265
266	static_call(kvm_x86_get_gdt)(vcpu, &dt);
267	smram->gdtr.limit = dt.size;
268	smram->gdtr.base = dt.address;
269
270	enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
271	enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
272	enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
273	enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
274	enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
275	enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
276
277	smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
278}
279#endif
280
281void enter_smm(struct kvm_vcpu *vcpu)
282{
283	struct kvm_segment cs, ds;
284	struct desc_ptr dt;
285	unsigned long cr0;
286	union kvm_smram smram;
287
288	check_smram_offsets();
289
290	memset(smram.bytes, 0, sizeof(smram.bytes));
291
292#ifdef CONFIG_X86_64
293	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
294		enter_smm_save_state_64(vcpu, &smram.smram64);
295	else
296#endif
297		enter_smm_save_state_32(vcpu, &smram.smram32);
298
299	/*
300	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
301	 * state (e.g. leave guest mode) after we've saved the state into the
302	 * SMM state-save area.
303	 *
304	 * Kill the VM in the unlikely case of failure, because the VM
305	 * can be in undefined state in this case.
306	 */
307	if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
308		goto error;
309
310	kvm_smm_changed(vcpu, true);
311
312	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
313		goto error;
314
315	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
316		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
317	else
318		static_call(kvm_x86_set_nmi_mask)(vcpu, true);
319
320	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
321	kvm_rip_write(vcpu, 0x8000);
322
323	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
324
325	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
326	static_call(kvm_x86_set_cr0)(vcpu, cr0);
327	vcpu->arch.cr0 = cr0;
328
329	static_call(kvm_x86_set_cr4)(vcpu, 0);
330
331	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
332	dt.address = dt.size = 0;
333	static_call(kvm_x86_set_idt)(vcpu, &dt);
334
335	if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
336		goto error;
337
338	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
339	cs.base = vcpu->arch.smbase;
340
341	ds.selector = 0;
342	ds.base = 0;
343
344	cs.limit    = ds.limit = 0xffffffff;
345	cs.type     = ds.type = 0x3;
346	cs.dpl      = ds.dpl = 0;
347	cs.db       = ds.db = 0;
348	cs.s        = ds.s = 1;
349	cs.l        = ds.l = 0;
350	cs.g        = ds.g = 1;
351	cs.avl      = ds.avl = 0;
352	cs.present  = ds.present = 1;
353	cs.unusable = ds.unusable = 0;
354	cs.padding  = ds.padding = 0;
355
356	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
357	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
358	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
359	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
360	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
361	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
362
363#ifdef CONFIG_X86_64
364	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
365		if (static_call(kvm_x86_set_efer)(vcpu, 0))
366			goto error;
367#endif
368
369	kvm_update_cpuid_runtime(vcpu);
370	kvm_mmu_reset_context(vcpu);
371	return;
372error:
373	kvm_vm_dead(vcpu->kvm);
374}
375
376static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
377{
378	desc->g    = (flags >> 23) & 1;
379	desc->db   = (flags >> 22) & 1;
380	desc->l    = (flags >> 21) & 1;
381	desc->avl  = (flags >> 20) & 1;
382	desc->present = (flags >> 15) & 1;
383	desc->dpl  = (flags >> 13) & 3;
384	desc->s    = (flags >> 12) & 1;
385	desc->type = (flags >>  8) & 15;
386
387	desc->unusable = !desc->present;
388	desc->padding = 0;
389}
390
391static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
392			   const struct kvm_smm_seg_state_32 *state,
393			   u16 selector, int n)
394{
395	struct kvm_segment desc;
396
397	desc.selector =           selector;
398	desc.base =               state->base;
399	desc.limit =              state->limit;
400	rsm_set_desc_flags(&desc, state->flags);
401	kvm_set_segment(vcpu, &desc, n);
402	return X86EMUL_CONTINUE;
403}
404
405#ifdef CONFIG_X86_64
406
407static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
408			   const struct kvm_smm_seg_state_64 *state,
409			   int n)
410{
411	struct kvm_segment desc;
412
413	desc.selector =           state->selector;
414	rsm_set_desc_flags(&desc, state->attributes << 8);
415	desc.limit =              state->limit;
416	desc.base =               state->base;
417	kvm_set_segment(vcpu, &desc, n);
418	return X86EMUL_CONTINUE;
419}
420#endif
421
422static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
423				    u64 cr0, u64 cr3, u64 cr4)
424{
425	int bad;
426	u64 pcid;
427
428	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
429	pcid = 0;
430	if (cr4 & X86_CR4_PCIDE) {
431		pcid = cr3 & 0xfff;
432		cr3 &= ~0xfff;
433	}
434
435	bad = kvm_set_cr3(vcpu, cr3);
436	if (bad)
437		return X86EMUL_UNHANDLEABLE;
438
439	/*
440	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
441	 * Then enable protected mode.	However, PCID cannot be enabled
442	 * if EFER.LMA=0, so set it separately.
443	 */
444	bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
445	if (bad)
446		return X86EMUL_UNHANDLEABLE;
447
448	bad = kvm_set_cr0(vcpu, cr0);
449	if (bad)
450		return X86EMUL_UNHANDLEABLE;
451
452	if (cr4 & X86_CR4_PCIDE) {
453		bad = kvm_set_cr4(vcpu, cr4);
454		if (bad)
455			return X86EMUL_UNHANDLEABLE;
456		if (pcid) {
457			bad = kvm_set_cr3(vcpu, cr3 | pcid);
458			if (bad)
459				return X86EMUL_UNHANDLEABLE;
460		}
461
462	}
463
464	return X86EMUL_CONTINUE;
465}
466
467static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
468			     const struct kvm_smram_state_32 *smstate)
469{
470	struct kvm_vcpu *vcpu = ctxt->vcpu;
471	struct desc_ptr dt;
472	int i, r;
473
474	ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
475	ctxt->_eip =  smstate->eip;
476
477	for (i = 0; i < 8; i++)
478		*reg_write(ctxt, i) = smstate->gprs[i];
479
480	if (kvm_set_dr(vcpu, 6, smstate->dr6))
481		return X86EMUL_UNHANDLEABLE;
482	if (kvm_set_dr(vcpu, 7, smstate->dr7))
483		return X86EMUL_UNHANDLEABLE;
484
485	rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
486	rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
487
488	dt.address =               smstate->gdtr.base;
489	dt.size =                  smstate->gdtr.limit;
490	static_call(kvm_x86_set_gdt)(vcpu, &dt);
491
492	dt.address =               smstate->idtr.base;
493	dt.size =                  smstate->idtr.limit;
494	static_call(kvm_x86_set_idt)(vcpu, &dt);
495
496	rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
497	rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
498	rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
499
500	rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
501	rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
502	rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
503
504	vcpu->arch.smbase = smstate->smbase;
505
506	r = rsm_enter_protected_mode(vcpu, smstate->cr0,
507					smstate->cr3, smstate->cr4);
508
509	if (r != X86EMUL_CONTINUE)
510		return r;
511
512	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
513	ctxt->interruptibility = (u8)smstate->int_shadow;
514
515	return r;
516}
517
518#ifdef CONFIG_X86_64
519static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
520			     const struct kvm_smram_state_64 *smstate)
521{
522	struct kvm_vcpu *vcpu = ctxt->vcpu;
523	struct desc_ptr dt;
524	int i, r;
525
526	for (i = 0; i < 16; i++)
527		*reg_write(ctxt, i) = smstate->gprs[15 - i];
528
529	ctxt->_eip   = smstate->rip;
530	ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
531
532	if (kvm_set_dr(vcpu, 6, smstate->dr6))
533		return X86EMUL_UNHANDLEABLE;
534	if (kvm_set_dr(vcpu, 7, smstate->dr7))
535		return X86EMUL_UNHANDLEABLE;
536
537	vcpu->arch.smbase =         smstate->smbase;
538
539	if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
540		return X86EMUL_UNHANDLEABLE;
541
542	rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
543
544	dt.size =                   smstate->idtr.limit;
545	dt.address =                smstate->idtr.base;
546	static_call(kvm_x86_set_idt)(vcpu, &dt);
547
548	rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
549
550	dt.size =                   smstate->gdtr.limit;
551	dt.address =                smstate->gdtr.base;
552	static_call(kvm_x86_set_gdt)(vcpu, &dt);
553
554	r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
555	if (r != X86EMUL_CONTINUE)
556		return r;
557
558	rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
559	rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
560	rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
561	rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
562	rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
563	rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
564
565	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
566	ctxt->interruptibility = (u8)smstate->int_shadow;
567
568	return X86EMUL_CONTINUE;
569}
570#endif
571
572int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
573{
574	struct kvm_vcpu *vcpu = ctxt->vcpu;
575	unsigned long cr0;
576	union kvm_smram smram;
577	u64 smbase;
578	int ret;
579
580	smbase = vcpu->arch.smbase;
581
582	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
583	if (ret < 0)
584		return X86EMUL_UNHANDLEABLE;
585
586	if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
587		static_call(kvm_x86_set_nmi_mask)(vcpu, false);
588
589	kvm_smm_changed(vcpu, false);
590
591	/*
592	 * Get back to real mode, to prepare a safe state in which to load
593	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
594	 * supports long mode.
595	 */
596#ifdef CONFIG_X86_64
597	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
598		struct kvm_segment cs_desc;
599		unsigned long cr4;
600
601		/* Zero CR4.PCIDE before CR0.PG.  */
602		cr4 = kvm_read_cr4(vcpu);
603		if (cr4 & X86_CR4_PCIDE)
604			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
605
606		/* A 32-bit code segment is required to clear EFER.LMA.  */
607		memset(&cs_desc, 0, sizeof(cs_desc));
608		cs_desc.type = 0xb;
609		cs_desc.s = cs_desc.g = cs_desc.present = 1;
610		kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
611	}
612#endif
613
614	/* For the 64-bit case, this will clear EFER.LMA.  */
615	cr0 = kvm_read_cr0(vcpu);
616	if (cr0 & X86_CR0_PE)
617		kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
618
619#ifdef CONFIG_X86_64
620	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
621		unsigned long cr4, efer;
622
623		/* Clear CR4.PAE before clearing EFER.LME. */
624		cr4 = kvm_read_cr4(vcpu);
625		if (cr4 & X86_CR4_PAE)
626			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
627
628		/* And finally go back to 32-bit mode.  */
629		efer = 0;
630		kvm_set_msr(vcpu, MSR_EFER, efer);
631	}
632#endif
633
634	/*
635	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
636	 * state (e.g. enter guest mode) before loading state from the SMM
637	 * state-save area.
638	 */
639	if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
640		return X86EMUL_UNHANDLEABLE;
641
642#ifdef CONFIG_X86_64
643	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
644		return rsm_load_state_64(ctxt, &smram.smram64);
645	else
646#endif
647		return rsm_load_state_32(ctxt, &smram.smram32);
648}
649