xref: /kernel/linux/linux-6.6/arch/riscv/kvm/vcpu.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4 *
5 * Authors:
6 *     Anup Patel <anup.patel@wdc.com>
7 */
8
9#include <linux/bitops.h>
10#include <linux/entry-kvm.h>
11#include <linux/errno.h>
12#include <linux/err.h>
13#include <linux/kdebug.h>
14#include <linux/module.h>
15#include <linux/percpu.h>
16#include <linux/vmalloc.h>
17#include <linux/sched/signal.h>
18#include <linux/fs.h>
19#include <linux/kvm_host.h>
20#include <asm/csr.h>
21#include <asm/cacheflush.h>
22#include <asm/kvm_vcpu_vector.h>
23
24const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
25	KVM_GENERIC_VCPU_STATS(),
26	STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
27	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
28	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
29	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
30	STATS_DESC_COUNTER(VCPU, csr_exit_user),
31	STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
32	STATS_DESC_COUNTER(VCPU, signal_exits),
33	STATS_DESC_COUNTER(VCPU, exits)
34};
35
36const struct kvm_stats_header kvm_vcpu_stats_header = {
37	.name_size = KVM_STATS_NAME_SIZE,
38	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
39	.id_offset = sizeof(struct kvm_stats_header),
40	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
41	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
42		       sizeof(kvm_vcpu_stats_desc),
43};
44
45static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
46{
47	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
48	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
49	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
50	struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
51	bool loaded;
52
53	/**
54	 * The preemption should be disabled here because it races with
55	 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
56	 * also calls vcpu_load/put.
57	 */
58	get_cpu();
59	loaded = (vcpu->cpu != -1);
60	if (loaded)
61		kvm_arch_vcpu_put(vcpu);
62
63	vcpu->arch.last_exit_cpu = -1;
64
65	memcpy(csr, reset_csr, sizeof(*csr));
66
67	memcpy(cntx, reset_cntx, sizeof(*cntx));
68
69	kvm_riscv_vcpu_fp_reset(vcpu);
70
71	kvm_riscv_vcpu_vector_reset(vcpu);
72
73	kvm_riscv_vcpu_timer_reset(vcpu);
74
75	kvm_riscv_vcpu_aia_reset(vcpu);
76
77	bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
78	bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
79
80	kvm_riscv_vcpu_pmu_reset(vcpu);
81
82	vcpu->arch.hfence_head = 0;
83	vcpu->arch.hfence_tail = 0;
84	memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
85
86	/* Reset the guest CSRs for hotplug usecase */
87	if (loaded)
88		kvm_arch_vcpu_load(vcpu, smp_processor_id());
89	put_cpu();
90}
91
92int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
93{
94	return 0;
95}
96
97int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
98{
99	int rc;
100	struct kvm_cpu_context *cntx;
101	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
102
103	/* Mark this VCPU never ran */
104	vcpu->arch.ran_atleast_once = false;
105	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
106	bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
107
108	/* Setup ISA features available to VCPU */
109	kvm_riscv_vcpu_setup_isa(vcpu);
110
111	/* Setup vendor, arch, and implementation details */
112	vcpu->arch.mvendorid = sbi_get_mvendorid();
113	vcpu->arch.marchid = sbi_get_marchid();
114	vcpu->arch.mimpid = sbi_get_mimpid();
115
116	/* Setup VCPU hfence queue */
117	spin_lock_init(&vcpu->arch.hfence_lock);
118
119	/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
120	cntx = &vcpu->arch.guest_reset_context;
121	cntx->sstatus = SR_SPP | SR_SPIE;
122	cntx->hstatus = 0;
123	cntx->hstatus |= HSTATUS_VTW;
124	cntx->hstatus |= HSTATUS_SPVP;
125	cntx->hstatus |= HSTATUS_SPV;
126
127	if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
128		return -ENOMEM;
129
130	/* By default, make CY, TM, and IR counters accessible in VU mode */
131	reset_csr->scounteren = 0x7;
132
133	/* Setup VCPU timer */
134	kvm_riscv_vcpu_timer_init(vcpu);
135
136	/* setup performance monitoring */
137	kvm_riscv_vcpu_pmu_init(vcpu);
138
139	/* Setup VCPU AIA */
140	rc = kvm_riscv_vcpu_aia_init(vcpu);
141	if (rc)
142		return rc;
143
144	/* Reset VCPU */
145	kvm_riscv_reset_vcpu(vcpu);
146
147	return 0;
148}
149
150void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
151{
152	/**
153	 * vcpu with id 0 is the designated boot cpu.
154	 * Keep all vcpus with non-zero id in power-off state so that
155	 * they can be brought up using SBI HSM extension.
156	 */
157	if (vcpu->vcpu_idx != 0)
158		kvm_riscv_vcpu_power_off(vcpu);
159}
160
161void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
162{
163	/* Cleanup VCPU AIA context */
164	kvm_riscv_vcpu_aia_deinit(vcpu);
165
166	/* Cleanup VCPU timer */
167	kvm_riscv_vcpu_timer_deinit(vcpu);
168
169	kvm_riscv_vcpu_pmu_deinit(vcpu);
170
171	/* Free unused pages pre-allocated for G-stage page table mappings */
172	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
173
174	/* Free vector context space for host and guest kernel */
175	kvm_riscv_vcpu_free_vector_context(vcpu);
176}
177
178int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
179{
180	return kvm_riscv_vcpu_timer_pending(vcpu);
181}
182
183void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
184{
185	kvm_riscv_aia_wakeon_hgei(vcpu, true);
186}
187
188void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
189{
190	kvm_riscv_aia_wakeon_hgei(vcpu, false);
191}
192
193int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
194{
195	return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
196		!vcpu->arch.power_off && !vcpu->arch.pause);
197}
198
199int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
200{
201	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
202}
203
204bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
205{
206	return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
207}
208
209vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
210{
211	return VM_FAULT_SIGBUS;
212}
213
214long kvm_arch_vcpu_async_ioctl(struct file *filp,
215			       unsigned int ioctl, unsigned long arg)
216{
217	struct kvm_vcpu *vcpu = filp->private_data;
218	void __user *argp = (void __user *)arg;
219
220	if (ioctl == KVM_INTERRUPT) {
221		struct kvm_interrupt irq;
222
223		if (copy_from_user(&irq, argp, sizeof(irq)))
224			return -EFAULT;
225
226		if (irq.irq == KVM_INTERRUPT_SET)
227			return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
228		else
229			return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
230	}
231
232	return -ENOIOCTLCMD;
233}
234
235long kvm_arch_vcpu_ioctl(struct file *filp,
236			 unsigned int ioctl, unsigned long arg)
237{
238	struct kvm_vcpu *vcpu = filp->private_data;
239	void __user *argp = (void __user *)arg;
240	long r = -EINVAL;
241
242	switch (ioctl) {
243	case KVM_SET_ONE_REG:
244	case KVM_GET_ONE_REG: {
245		struct kvm_one_reg reg;
246
247		r = -EFAULT;
248		if (copy_from_user(&reg, argp, sizeof(reg)))
249			break;
250
251		if (ioctl == KVM_SET_ONE_REG)
252			r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
253		else
254			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
255		break;
256	}
257	case KVM_GET_REG_LIST: {
258		struct kvm_reg_list __user *user_list = argp;
259		struct kvm_reg_list reg_list;
260		unsigned int n;
261
262		r = -EFAULT;
263		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
264			break;
265		n = reg_list.n;
266		reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
267		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
268			break;
269		r = -E2BIG;
270		if (n < reg_list.n)
271			break;
272		r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
273		break;
274	}
275	default:
276		break;
277	}
278
279	return r;
280}
281
282int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
283				  struct kvm_sregs *sregs)
284{
285	return -EINVAL;
286}
287
288int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
289				  struct kvm_sregs *sregs)
290{
291	return -EINVAL;
292}
293
294int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
295{
296	return -EINVAL;
297}
298
299int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
300{
301	return -EINVAL;
302}
303
304int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
305				  struct kvm_translation *tr)
306{
307	return -EINVAL;
308}
309
310int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
311{
312	return -EINVAL;
313}
314
315int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
316{
317	return -EINVAL;
318}
319
320void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
321{
322	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
323	unsigned long mask, val;
324
325	if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
326		mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
327		val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
328
329		csr->hvip &= ~mask;
330		csr->hvip |= val;
331	}
332
333	/* Flush AIA high interrupts */
334	kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
335}
336
337void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
338{
339	unsigned long hvip;
340	struct kvm_vcpu_arch *v = &vcpu->arch;
341	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
342
343	/* Read current HVIP and VSIE CSRs */
344	csr->vsie = csr_read(CSR_VSIE);
345
346	/* Sync-up HVIP.VSSIP bit changes does by Guest */
347	hvip = csr_read(CSR_HVIP);
348	if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
349		if (hvip & (1UL << IRQ_VS_SOFT)) {
350			if (!test_and_set_bit(IRQ_VS_SOFT,
351					      v->irqs_pending_mask))
352				set_bit(IRQ_VS_SOFT, v->irqs_pending);
353		} else {
354			if (!test_and_set_bit(IRQ_VS_SOFT,
355					      v->irqs_pending_mask))
356				clear_bit(IRQ_VS_SOFT, v->irqs_pending);
357		}
358	}
359
360	/* Sync-up AIA high interrupts */
361	kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
362
363	/* Sync-up timer CSRs */
364	kvm_riscv_vcpu_timer_sync(vcpu);
365}
366
367int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
368{
369	/*
370	 * We only allow VS-mode software, timer, and external
371	 * interrupts when irq is one of the local interrupts
372	 * defined by RISC-V privilege specification.
373	 */
374	if (irq < IRQ_LOCAL_MAX &&
375	    irq != IRQ_VS_SOFT &&
376	    irq != IRQ_VS_TIMER &&
377	    irq != IRQ_VS_EXT)
378		return -EINVAL;
379
380	set_bit(irq, vcpu->arch.irqs_pending);
381	smp_mb__before_atomic();
382	set_bit(irq, vcpu->arch.irqs_pending_mask);
383
384	kvm_vcpu_kick(vcpu);
385
386	return 0;
387}
388
389int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
390{
391	/*
392	 * We only allow VS-mode software, timer, and external
393	 * interrupts when irq is one of the local interrupts
394	 * defined by RISC-V privilege specification.
395	 */
396	if (irq < IRQ_LOCAL_MAX &&
397	    irq != IRQ_VS_SOFT &&
398	    irq != IRQ_VS_TIMER &&
399	    irq != IRQ_VS_EXT)
400		return -EINVAL;
401
402	clear_bit(irq, vcpu->arch.irqs_pending);
403	smp_mb__before_atomic();
404	set_bit(irq, vcpu->arch.irqs_pending_mask);
405
406	return 0;
407}
408
409bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
410{
411	unsigned long ie;
412
413	ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
414		<< VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
415	ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
416		(unsigned long)mask;
417	if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
418		return true;
419
420	/* Check AIA high interrupts */
421	return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
422}
423
424void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
425{
426	vcpu->arch.power_off = true;
427	kvm_make_request(KVM_REQ_SLEEP, vcpu);
428	kvm_vcpu_kick(vcpu);
429}
430
431void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
432{
433	vcpu->arch.power_off = false;
434	kvm_vcpu_wake_up(vcpu);
435}
436
437int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
438				    struct kvm_mp_state *mp_state)
439{
440	if (vcpu->arch.power_off)
441		mp_state->mp_state = KVM_MP_STATE_STOPPED;
442	else
443		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
444
445	return 0;
446}
447
448int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
449				    struct kvm_mp_state *mp_state)
450{
451	int ret = 0;
452
453	switch (mp_state->mp_state) {
454	case KVM_MP_STATE_RUNNABLE:
455		vcpu->arch.power_off = false;
456		break;
457	case KVM_MP_STATE_STOPPED:
458		kvm_riscv_vcpu_power_off(vcpu);
459		break;
460	default:
461		ret = -EINVAL;
462	}
463
464	return ret;
465}
466
467int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
468					struct kvm_guest_debug *dbg)
469{
470	/* TODO; To be implemented later. */
471	return -EINVAL;
472}
473
474static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
475{
476	u64 henvcfg = 0;
477
478	if (riscv_isa_extension_available(isa, SVPBMT))
479		henvcfg |= ENVCFG_PBMTE;
480
481	if (riscv_isa_extension_available(isa, SSTC))
482		henvcfg |= ENVCFG_STCE;
483
484	if (riscv_isa_extension_available(isa, ZICBOM))
485		henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
486
487	if (riscv_isa_extension_available(isa, ZICBOZ))
488		henvcfg |= ENVCFG_CBZE;
489
490	csr_write(CSR_HENVCFG, henvcfg);
491#ifdef CONFIG_32BIT
492	csr_write(CSR_HENVCFGH, henvcfg >> 32);
493#endif
494}
495
496void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
497{
498	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
499
500	csr_write(CSR_VSSTATUS, csr->vsstatus);
501	csr_write(CSR_VSIE, csr->vsie);
502	csr_write(CSR_VSTVEC, csr->vstvec);
503	csr_write(CSR_VSSCRATCH, csr->vsscratch);
504	csr_write(CSR_VSEPC, csr->vsepc);
505	csr_write(CSR_VSCAUSE, csr->vscause);
506	csr_write(CSR_VSTVAL, csr->vstval);
507	csr_write(CSR_HVIP, csr->hvip);
508	csr_write(CSR_VSATP, csr->vsatp);
509
510	kvm_riscv_vcpu_update_config(vcpu->arch.isa);
511
512	kvm_riscv_gstage_update_hgatp(vcpu);
513
514	kvm_riscv_vcpu_timer_restore(vcpu);
515
516	kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
517	kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
518					vcpu->arch.isa);
519	kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
520	kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
521					    vcpu->arch.isa);
522
523	kvm_riscv_vcpu_aia_load(vcpu, cpu);
524
525	vcpu->cpu = cpu;
526}
527
528void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
529{
530	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
531
532	vcpu->cpu = -1;
533
534	kvm_riscv_vcpu_aia_put(vcpu);
535
536	kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
537				     vcpu->arch.isa);
538	kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
539
540	kvm_riscv_vcpu_timer_save(vcpu);
541	kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
542					 vcpu->arch.isa);
543	kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
544
545	csr->vsstatus = csr_read(CSR_VSSTATUS);
546	csr->vsie = csr_read(CSR_VSIE);
547	csr->vstvec = csr_read(CSR_VSTVEC);
548	csr->vsscratch = csr_read(CSR_VSSCRATCH);
549	csr->vsepc = csr_read(CSR_VSEPC);
550	csr->vscause = csr_read(CSR_VSCAUSE);
551	csr->vstval = csr_read(CSR_VSTVAL);
552	csr->hvip = csr_read(CSR_HVIP);
553	csr->vsatp = csr_read(CSR_VSATP);
554}
555
556static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
557{
558	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
559
560	if (kvm_request_pending(vcpu)) {
561		if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
562			kvm_vcpu_srcu_read_unlock(vcpu);
563			rcuwait_wait_event(wait,
564				(!vcpu->arch.power_off) && (!vcpu->arch.pause),
565				TASK_INTERRUPTIBLE);
566			kvm_vcpu_srcu_read_lock(vcpu);
567
568			if (vcpu->arch.power_off || vcpu->arch.pause) {
569				/*
570				 * Awaken to handle a signal, request to
571				 * sleep again later.
572				 */
573				kvm_make_request(KVM_REQ_SLEEP, vcpu);
574			}
575		}
576
577		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
578			kvm_riscv_reset_vcpu(vcpu);
579
580		if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
581			kvm_riscv_gstage_update_hgatp(vcpu);
582
583		if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
584			kvm_riscv_fence_i_process(vcpu);
585
586		/*
587		 * The generic KVM_REQ_TLB_FLUSH is same as
588		 * KVM_REQ_HFENCE_GVMA_VMID_ALL
589		 */
590		if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
591			kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
592
593		if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
594			kvm_riscv_hfence_vvma_all_process(vcpu);
595
596		if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
597			kvm_riscv_hfence_process(vcpu);
598	}
599}
600
601static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
602{
603	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
604
605	csr_write(CSR_HVIP, csr->hvip);
606	kvm_riscv_vcpu_aia_update_hvip(vcpu);
607}
608
609/*
610 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
611 * the vCPU is running.
612 *
613 * This must be noinstr as instrumentation may make use of RCU, and this is not
614 * safe during the EQS.
615 */
616static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
617{
618	guest_state_enter_irqoff();
619	__kvm_riscv_switch_to(&vcpu->arch);
620	vcpu->arch.last_exit_cpu = vcpu->cpu;
621	guest_state_exit_irqoff();
622}
623
624int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
625{
626	int ret;
627	struct kvm_cpu_trap trap;
628	struct kvm_run *run = vcpu->run;
629
630	/* Mark this VCPU ran at least once */
631	vcpu->arch.ran_atleast_once = true;
632
633	kvm_vcpu_srcu_read_lock(vcpu);
634
635	switch (run->exit_reason) {
636	case KVM_EXIT_MMIO:
637		/* Process MMIO value returned from user-space */
638		ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
639		break;
640	case KVM_EXIT_RISCV_SBI:
641		/* Process SBI value returned from user-space */
642		ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
643		break;
644	case KVM_EXIT_RISCV_CSR:
645		/* Process CSR value returned from user-space */
646		ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
647		break;
648	default:
649		ret = 0;
650		break;
651	}
652	if (ret) {
653		kvm_vcpu_srcu_read_unlock(vcpu);
654		return ret;
655	}
656
657	if (run->immediate_exit) {
658		kvm_vcpu_srcu_read_unlock(vcpu);
659		return -EINTR;
660	}
661
662	vcpu_load(vcpu);
663
664	kvm_sigset_activate(vcpu);
665
666	ret = 1;
667	run->exit_reason = KVM_EXIT_UNKNOWN;
668	while (ret > 0) {
669		/* Check conditions before entering the guest */
670		ret = xfer_to_guest_mode_handle_work(vcpu);
671		if (ret)
672			continue;
673		ret = 1;
674
675		kvm_riscv_gstage_vmid_update(vcpu);
676
677		kvm_riscv_check_vcpu_requests(vcpu);
678
679		preempt_disable();
680
681		/* Update AIA HW state before entering guest */
682		ret = kvm_riscv_vcpu_aia_update(vcpu);
683		if (ret <= 0) {
684			preempt_enable();
685			continue;
686		}
687
688		local_irq_disable();
689
690		/*
691		 * Ensure we set mode to IN_GUEST_MODE after we disable
692		 * interrupts and before the final VCPU requests check.
693		 * See the comment in kvm_vcpu_exiting_guest_mode() and
694		 * Documentation/virt/kvm/vcpu-requests.rst
695		 */
696		vcpu->mode = IN_GUEST_MODE;
697
698		kvm_vcpu_srcu_read_unlock(vcpu);
699		smp_mb__after_srcu_read_unlock();
700
701		/*
702		 * We might have got VCPU interrupts updated asynchronously
703		 * so update it in HW.
704		 */
705		kvm_riscv_vcpu_flush_interrupts(vcpu);
706
707		/* Update HVIP CSR for current CPU */
708		kvm_riscv_update_hvip(vcpu);
709
710		if (ret <= 0 ||
711		    kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
712		    kvm_request_pending(vcpu) ||
713		    xfer_to_guest_mode_work_pending()) {
714			vcpu->mode = OUTSIDE_GUEST_MODE;
715			local_irq_enable();
716			preempt_enable();
717			kvm_vcpu_srcu_read_lock(vcpu);
718			continue;
719		}
720
721		/*
722		 * Cleanup stale TLB enteries
723		 *
724		 * Note: This should be done after G-stage VMID has been
725		 * updated using kvm_riscv_gstage_vmid_ver_changed()
726		 */
727		kvm_riscv_local_tlb_sanitize(vcpu);
728
729		guest_timing_enter_irqoff();
730
731		kvm_riscv_vcpu_enter_exit(vcpu);
732
733		vcpu->mode = OUTSIDE_GUEST_MODE;
734		vcpu->stat.exits++;
735
736		/*
737		 * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
738		 * get an interrupt between __kvm_riscv_switch_to() and
739		 * local_irq_enable() which can potentially change CSRs.
740		 */
741		trap.sepc = vcpu->arch.guest_context.sepc;
742		trap.scause = csr_read(CSR_SCAUSE);
743		trap.stval = csr_read(CSR_STVAL);
744		trap.htval = csr_read(CSR_HTVAL);
745		trap.htinst = csr_read(CSR_HTINST);
746
747		/* Syncup interrupts state with HW */
748		kvm_riscv_vcpu_sync_interrupts(vcpu);
749
750		/*
751		 * We must ensure that any pending interrupts are taken before
752		 * we exit guest timing so that timer ticks are accounted as
753		 * guest time. Transiently unmask interrupts so that any
754		 * pending interrupts are taken.
755		 *
756		 * There's no barrier which ensures that pending interrupts are
757		 * recognised, so we just hope that the CPU takes any pending
758		 * interrupts between the enable and disable.
759		 */
760		local_irq_enable();
761		local_irq_disable();
762
763		guest_timing_exit_irqoff();
764
765		local_irq_enable();
766
767		preempt_enable();
768
769		kvm_vcpu_srcu_read_lock(vcpu);
770
771		ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
772	}
773
774	kvm_sigset_deactivate(vcpu);
775
776	vcpu_put(vcpu);
777
778	kvm_vcpu_srcu_read_unlock(vcpu);
779
780	return ret;
781}
782