xref: /kernel/linux/linux-5.10/arch/arm64/kvm/reset.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012,2013 - ARM Ltd
4 * Author: Marc Zyngier <marc.zyngier@arm.com>
5 *
6 * Derived from arch/arm/kvm/reset.c
7 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9 */
10
11#include <linux/errno.h>
12#include <linux/kernel.h>
13#include <linux/kvm_host.h>
14#include <linux/kvm.h>
15#include <linux/hw_breakpoint.h>
16#include <linux/slab.h>
17#include <linux/string.h>
18#include <linux/types.h>
19
20#include <kvm/arm_arch_timer.h>
21
22#include <asm/cpufeature.h>
23#include <asm/cputype.h>
24#include <asm/fpsimd.h>
25#include <asm/ptrace.h>
26#include <asm/kvm_arm.h>
27#include <asm/kvm_asm.h>
28#include <asm/kvm_coproc.h>
29#include <asm/kvm_emulate.h>
30#include <asm/kvm_mmu.h>
31#include <asm/virt.h>
32
33/* Maximum phys_shift supported for any VM on this host */
34static u32 kvm_ipa_limit;
35
36/*
37 * ARMv8 Reset Values
38 */
39#define VCPU_RESET_PSTATE_EL1	(PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
40				 PSR_F_BIT | PSR_D_BIT)
41
42#define VCPU_RESET_PSTATE_SVC	(PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
43				 PSR_AA32_I_BIT | PSR_AA32_F_BIT)
44
45static bool system_has_full_ptr_auth(void)
46{
47	return system_supports_address_auth() && system_supports_generic_auth();
48}
49
50/**
51 * kvm_arch_vm_ioctl_check_extension
52 *
53 * We currently assume that the number of HW registers is uniform
54 * across all CPUs (see cpuinfo_sanity_check).
55 */
56int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
57{
58	int r;
59
60	switch (ext) {
61	case KVM_CAP_ARM_EL1_32BIT:
62		r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
63		break;
64	case KVM_CAP_GUEST_DEBUG_HW_BPS:
65		r = get_num_brps();
66		break;
67	case KVM_CAP_GUEST_DEBUG_HW_WPS:
68		r = get_num_wrps();
69		break;
70	case KVM_CAP_ARM_PMU_V3:
71		r = kvm_arm_support_pmu_v3();
72		break;
73	case KVM_CAP_ARM_INJECT_SERROR_ESR:
74		r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
75		break;
76	case KVM_CAP_SET_GUEST_DEBUG:
77	case KVM_CAP_VCPU_ATTRIBUTES:
78		r = 1;
79		break;
80	case KVM_CAP_ARM_VM_IPA_SIZE:
81		r = kvm_ipa_limit;
82		break;
83	case KVM_CAP_ARM_SVE:
84		r = system_supports_sve();
85		break;
86	case KVM_CAP_ARM_PTRAUTH_ADDRESS:
87	case KVM_CAP_ARM_PTRAUTH_GENERIC:
88		r = system_has_full_ptr_auth();
89		break;
90	default:
91		r = 0;
92	}
93
94	return r;
95}
96
97unsigned int kvm_sve_max_vl;
98
99int kvm_arm_init_sve(void)
100{
101	if (system_supports_sve()) {
102		kvm_sve_max_vl = sve_max_virtualisable_vl;
103
104		/*
105		 * The get_sve_reg()/set_sve_reg() ioctl interface will need
106		 * to be extended with multiple register slice support in
107		 * order to support vector lengths greater than
108		 * SVE_VL_ARCH_MAX:
109		 */
110		if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX))
111			kvm_sve_max_vl = SVE_VL_ARCH_MAX;
112
113		/*
114		 * Don't even try to make use of vector lengths that
115		 * aren't available on all CPUs, for now:
116		 */
117		if (kvm_sve_max_vl < sve_max_vl)
118			pr_warn("KVM: SVE vector length for guests limited to %u bytes\n",
119				kvm_sve_max_vl);
120	}
121
122	return 0;
123}
124
125static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
126{
127	if (!system_supports_sve())
128		return -EINVAL;
129
130	/* Verify that KVM startup enforced this when SVE was detected: */
131	if (WARN_ON(!has_vhe()))
132		return -EINVAL;
133
134	vcpu->arch.sve_max_vl = kvm_sve_max_vl;
135
136	/*
137	 * Userspace can still customize the vector lengths by writing
138	 * KVM_REG_ARM64_SVE_VLS.  Allocation is deferred until
139	 * kvm_arm_vcpu_finalize(), which freezes the configuration.
140	 */
141	vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
142
143	return 0;
144}
145
146/*
147 * Finalize vcpu's maximum SVE vector length, allocating
148 * vcpu->arch.sve_state as necessary.
149 */
150static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
151{
152	void *buf;
153	unsigned int vl;
154
155	vl = vcpu->arch.sve_max_vl;
156
157	/*
158	 * Responsibility for these properties is shared between
159	 * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
160	 * set_sve_vls().  Double-check here just to be sure:
161	 */
162	if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl ||
163		    vl > SVE_VL_ARCH_MAX))
164		return -EIO;
165
166	buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
167	if (!buf)
168		return -ENOMEM;
169
170	vcpu->arch.sve_state = buf;
171	vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
172	return 0;
173}
174
175int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
176{
177	switch (feature) {
178	case KVM_ARM_VCPU_SVE:
179		if (!vcpu_has_sve(vcpu))
180			return -EINVAL;
181
182		if (kvm_arm_vcpu_sve_finalized(vcpu))
183			return -EPERM;
184
185		return kvm_vcpu_finalize_sve(vcpu);
186	}
187
188	return -EINVAL;
189}
190
191bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
192{
193	if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu))
194		return false;
195
196	return true;
197}
198
199void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
200{
201	kfree(vcpu->arch.sve_state);
202}
203
204static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
205{
206	if (vcpu_has_sve(vcpu))
207		memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
208}
209
210static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
211{
212	/*
213	 * For now make sure that both address/generic pointer authentication
214	 * features are requested by the userspace together and the system
215	 * supports these capabilities.
216	 */
217	if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
218	    !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
219	    !system_has_full_ptr_auth())
220		return -EINVAL;
221
222	vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
223	return 0;
224}
225
226static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
227{
228	struct kvm_vcpu *tmp;
229	bool is32bit;
230	int i;
231
232	is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
233	if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
234		return false;
235
236	/* Check that the vcpus are either all 32bit or all 64bit */
237	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
238		if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
239			return false;
240	}
241
242	return true;
243}
244
245/**
246 * kvm_reset_vcpu - sets core registers and sys_regs to reset value
247 * @vcpu: The VCPU pointer
248 *
249 * This function finds the right table above and sets the registers on
250 * the virtual CPU struct to their architecturally defined reset
251 * values, except for registers whose reset is deferred until
252 * kvm_arm_vcpu_finalize().
253 *
254 * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
255 * ioctl or as part of handling a request issued by another VCPU in the PSCI
256 * handling code.  In the first case, the VCPU will not be loaded, and in the
257 * second case the VCPU will be loaded.  Because this function operates purely
258 * on the memory-backed values of system registers, we want to do a full put if
259 * we were loaded (handling a request) and load the values back at the end of
260 * the function.  Otherwise we leave the state alone.  In both cases, we
261 * disable preemption around the vcpu reset as we would otherwise race with
262 * preempt notifiers which also call put/load.
263 */
264int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
265{
266	struct vcpu_reset_state reset_state;
267	int ret;
268	bool loaded;
269	u32 pstate;
270
271	mutex_lock(&vcpu->kvm->lock);
272	reset_state = vcpu->arch.reset_state;
273	WRITE_ONCE(vcpu->arch.reset_state.reset, false);
274	mutex_unlock(&vcpu->kvm->lock);
275
276	/* Reset PMU outside of the non-preemptible section */
277	kvm_pmu_vcpu_reset(vcpu);
278
279	preempt_disable();
280	loaded = (vcpu->cpu != -1);
281	if (loaded)
282		kvm_arch_vcpu_put(vcpu);
283
284	if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
285		if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
286			ret = kvm_vcpu_enable_sve(vcpu);
287			if (ret)
288				goto out;
289		}
290	} else {
291		kvm_vcpu_reset_sve(vcpu);
292	}
293
294	if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
295	    test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
296		if (kvm_vcpu_enable_ptrauth(vcpu)) {
297			ret = -EINVAL;
298			goto out;
299		}
300	}
301
302	if (!vcpu_allowed_register_width(vcpu)) {
303		ret = -EINVAL;
304		goto out;
305	}
306
307	switch (vcpu->arch.target) {
308	default:
309		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
310			pstate = VCPU_RESET_PSTATE_SVC;
311		} else {
312			pstate = VCPU_RESET_PSTATE_EL1;
313		}
314
315		break;
316	}
317
318	/* Reset core registers */
319	memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
320	memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
321	vcpu->arch.ctxt.spsr_abt = 0;
322	vcpu->arch.ctxt.spsr_und = 0;
323	vcpu->arch.ctxt.spsr_irq = 0;
324	vcpu->arch.ctxt.spsr_fiq = 0;
325	vcpu_gp_regs(vcpu)->pstate = pstate;
326
327	/* Reset system registers */
328	kvm_reset_sys_regs(vcpu);
329
330	/*
331	 * Additional reset state handling that PSCI may have imposed on us.
332	 * Must be done after all the sys_reg reset.
333	 */
334	if (reset_state.reset) {
335		unsigned long target_pc = reset_state.pc;
336
337		/* Gracefully handle Thumb2 entry point */
338		if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
339			target_pc &= ~1UL;
340			vcpu_set_thumb(vcpu);
341		}
342
343		/* Propagate caller endianness */
344		if (reset_state.be)
345			kvm_vcpu_set_be(vcpu);
346
347		*vcpu_pc(vcpu) = target_pc;
348		vcpu_set_reg(vcpu, 0, reset_state.r0);
349	}
350
351	/* Reset timer */
352	ret = kvm_timer_vcpu_reset(vcpu);
353out:
354	if (loaded)
355		kvm_arch_vcpu_load(vcpu, smp_processor_id());
356	preempt_enable();
357	return ret;
358}
359
360u32 get_kvm_ipa_limit(void)
361{
362	return kvm_ipa_limit;
363}
364
365int kvm_set_ipa_limit(void)
366{
367	unsigned int parange, tgran_2;
368	u64 mmfr0;
369
370	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
371	parange = cpuid_feature_extract_unsigned_field(mmfr0,
372				ID_AA64MMFR0_PARANGE_SHIFT);
373	/*
374	 * IPA size beyond 48 bits could not be supported
375	 * on either 4K or 16K page size. Hence let's cap
376	 * it to 48 bits, in case it's reported as larger
377	 * on the system.
378	 */
379	if (PAGE_SIZE != SZ_64K)
380		parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
381
382	/*
383	 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
384	 * Stage-2. If not, things will stop very quickly.
385	 */
386	switch (PAGE_SIZE) {
387	default:
388	case SZ_4K:
389		tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
390		break;
391	case SZ_16K:
392		tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
393		break;
394	case SZ_64K:
395		tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
396		break;
397	}
398
399	switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
400	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
401		kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
402		return -EINVAL;
403	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT:
404		kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
405		break;
406	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX:
407		kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
408		break;
409	default:
410		kvm_err("Unsupported value for TGRAN_2, giving up\n");
411		return -EINVAL;
412	}
413
414	kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
415	kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
416		 ((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
417		  " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
418
419	return 0;
420}
421
422/*
423 * Configure the VTCR_EL2 for this VM. The VTCR value is common
424 * across all the physical CPUs on the system. We use system wide
425 * sanitised values to fill in different fields, except for Hardware
426 * Management of Access Flags. HA Flag is set unconditionally on
427 * all CPUs, as it is safe to run with or without the feature and
428 * the bit is RES0 on CPUs that don't support it.
429 */
430int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
431{
432	u64 vtcr = VTCR_EL2_FLAGS, mmfr0;
433	u32 parange, phys_shift;
434	u8 lvls;
435
436	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
437		return -EINVAL;
438
439	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
440	if (phys_shift) {
441		if (phys_shift > kvm_ipa_limit ||
442		    phys_shift < 32)
443			return -EINVAL;
444	} else {
445		phys_shift = KVM_PHYS_SHIFT;
446		if (phys_shift > kvm_ipa_limit) {
447			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
448				     current->comm);
449			return -EINVAL;
450		}
451	}
452
453	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
454	parange = cpuid_feature_extract_unsigned_field(mmfr0,
455				ID_AA64MMFR0_PARANGE_SHIFT);
456	if (parange > ID_AA64MMFR0_PARANGE_MAX)
457		parange = ID_AA64MMFR0_PARANGE_MAX;
458	vtcr |= parange << VTCR_EL2_PS_SHIFT;
459
460	vtcr |= VTCR_EL2_T0SZ(phys_shift);
461	/*
462	 * Use a minimum 2 level page table to prevent splitting
463	 * host PMD huge pages at stage2.
464	 */
465	lvls = stage2_pgtable_levels(phys_shift);
466	if (lvls < 2)
467		lvls = 2;
468	vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
469
470	/*
471	 * Enable the Hardware Access Flag management, unconditionally
472	 * on all CPUs. The features is RES0 on CPUs without the support
473	 * and must be ignored by the CPUs.
474	 */
475	vtcr |= VTCR_EL2_HA;
476
477	/* Set the vmid bits */
478	vtcr |= (kvm_get_vmid_bits() == 16) ?
479		VTCR_EL2_VS_16BIT :
480		VTCR_EL2_VS_8BIT;
481	kvm->arch.vtcr = vtcr;
482	return 0;
483}
484