162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2020 - Google LLC
462306a36Sopenharmony_ci * Author: Quentin Perret <qperret@google.com>
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/init.h>
862306a36Sopenharmony_ci#include <linux/kmemleak.h>
962306a36Sopenharmony_ci#include <linux/kvm_host.h>
1062306a36Sopenharmony_ci#include <linux/memblock.h>
1162306a36Sopenharmony_ci#include <linux/mutex.h>
1262306a36Sopenharmony_ci#include <linux/sort.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <asm/kvm_pkvm.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include "hyp_constants.h"
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_cistatic struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
2162306a36Sopenharmony_cistatic unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ciphys_addr_t hyp_mem_base;
2462306a36Sopenharmony_ciphys_addr_t hyp_mem_size;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic int cmp_hyp_memblock(const void *p1, const void *p2)
2762306a36Sopenharmony_ci{
2862306a36Sopenharmony_ci	const struct memblock_region *r1 = p1;
2962306a36Sopenharmony_ci	const struct memblock_region *r2 = p2;
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	return r1->base < r2->base ? -1 : (r1->base > r2->base);
3262306a36Sopenharmony_ci}
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic void __init sort_memblock_regions(void)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	sort(hyp_memory,
3762306a36Sopenharmony_ci	     *hyp_memblock_nr_ptr,
3862306a36Sopenharmony_ci	     sizeof(struct memblock_region),
3962306a36Sopenharmony_ci	     cmp_hyp_memblock,
4062306a36Sopenharmony_ci	     NULL);
4162306a36Sopenharmony_ci}
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic int __init register_memblock_regions(void)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	struct memblock_region *reg;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	for_each_mem_region(reg) {
4862306a36Sopenharmony_ci		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
4962306a36Sopenharmony_ci			return -ENOMEM;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
5262306a36Sopenharmony_ci		(*hyp_memblock_nr_ptr)++;
5362306a36Sopenharmony_ci	}
5462306a36Sopenharmony_ci	sort_memblock_regions();
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	return 0;
5762306a36Sopenharmony_ci}
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_civoid __init kvm_hyp_reserve(void)
6062306a36Sopenharmony_ci{
6162306a36Sopenharmony_ci	u64 hyp_mem_pages = 0;
6262306a36Sopenharmony_ci	int ret;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
6562306a36Sopenharmony_ci		return;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	if (kvm_get_mode() != KVM_MODE_PROTECTED)
6862306a36Sopenharmony_ci		return;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	ret = register_memblock_regions();
7162306a36Sopenharmony_ci	if (ret) {
7262306a36Sopenharmony_ci		*hyp_memblock_nr_ptr = 0;
7362306a36Sopenharmony_ci		kvm_err("Failed to register hyp memblocks: %d\n", ret);
7462306a36Sopenharmony_ci		return;
7562306a36Sopenharmony_ci	}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	hyp_mem_pages += hyp_s1_pgtable_pages();
7862306a36Sopenharmony_ci	hyp_mem_pages += host_s2_pgtable_pages();
7962306a36Sopenharmony_ci	hyp_mem_pages += hyp_vm_table_pages();
8062306a36Sopenharmony_ci	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
8162306a36Sopenharmony_ci	hyp_mem_pages += hyp_ffa_proxy_pages();
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	/*
8462306a36Sopenharmony_ci	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
8562306a36Sopenharmony_ci	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
8662306a36Sopenharmony_ci	 */
8762306a36Sopenharmony_ci	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
8862306a36Sopenharmony_ci	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
8962306a36Sopenharmony_ci					   PMD_SIZE);
9062306a36Sopenharmony_ci	if (!hyp_mem_base)
9162306a36Sopenharmony_ci		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
9262306a36Sopenharmony_ci	else
9362306a36Sopenharmony_ci		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	if (!hyp_mem_base) {
9662306a36Sopenharmony_ci		kvm_err("Failed to reserve hyp memory\n");
9762306a36Sopenharmony_ci		return;
9862306a36Sopenharmony_ci	}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
10162306a36Sopenharmony_ci		 hyp_mem_base);
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistatic void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
10562306a36Sopenharmony_ci{
10662306a36Sopenharmony_ci	if (host_kvm->arch.pkvm.handle) {
10762306a36Sopenharmony_ci		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
10862306a36Sopenharmony_ci					  host_kvm->arch.pkvm.handle));
10962306a36Sopenharmony_ci	}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	host_kvm->arch.pkvm.handle = 0;
11262306a36Sopenharmony_ci	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci/*
11662306a36Sopenharmony_ci * Allocates and donates memory for hypervisor VM structs at EL2.
11762306a36Sopenharmony_ci *
11862306a36Sopenharmony_ci * Allocates space for the VM state, which includes the hyp vm as well as
11962306a36Sopenharmony_ci * the hyp vcpus.
12062306a36Sopenharmony_ci *
12162306a36Sopenharmony_ci * Stores an opaque handler in the kvm struct for future reference.
12262306a36Sopenharmony_ci *
12362306a36Sopenharmony_ci * Return 0 on success, negative error code on failure.
12462306a36Sopenharmony_ci */
12562306a36Sopenharmony_cistatic int __pkvm_create_hyp_vm(struct kvm *host_kvm)
12662306a36Sopenharmony_ci{
12762306a36Sopenharmony_ci	size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
12862306a36Sopenharmony_ci	struct kvm_vcpu *host_vcpu;
12962306a36Sopenharmony_ci	pkvm_handle_t handle;
13062306a36Sopenharmony_ci	void *pgd, *hyp_vm;
13162306a36Sopenharmony_ci	unsigned long idx;
13262306a36Sopenharmony_ci	int ret;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	if (host_kvm->created_vcpus < 1)
13562306a36Sopenharmony_ci		return -EINVAL;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	/*
14062306a36Sopenharmony_ci	 * The PGD pages will be reclaimed using a hyp_memcache which implies
14162306a36Sopenharmony_ci	 * page granularity. So, use alloc_pages_exact() to get individual
14262306a36Sopenharmony_ci	 * refcounts.
14362306a36Sopenharmony_ci	 */
14462306a36Sopenharmony_ci	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
14562306a36Sopenharmony_ci	if (!pgd)
14662306a36Sopenharmony_ci		return -ENOMEM;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	/* Allocate memory to donate to hyp for vm and vcpu pointers. */
14962306a36Sopenharmony_ci	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
15062306a36Sopenharmony_ci					size_mul(sizeof(void *),
15162306a36Sopenharmony_ci						 host_kvm->created_vcpus)));
15262306a36Sopenharmony_ci	hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
15362306a36Sopenharmony_ci	if (!hyp_vm) {
15462306a36Sopenharmony_ci		ret = -ENOMEM;
15562306a36Sopenharmony_ci		goto free_pgd;
15662306a36Sopenharmony_ci	}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	/* Donate the VM memory to hyp and let hyp initialize it. */
15962306a36Sopenharmony_ci	ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
16062306a36Sopenharmony_ci	if (ret < 0)
16162306a36Sopenharmony_ci		goto free_vm;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	handle = ret;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	host_kvm->arch.pkvm.handle = handle;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	/* Donate memory for the vcpus at hyp and initialize it. */
16862306a36Sopenharmony_ci	hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
16962306a36Sopenharmony_ci	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
17062306a36Sopenharmony_ci		void *hyp_vcpu;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci		/* Indexing of the vcpus to be sequential starting at 0. */
17362306a36Sopenharmony_ci		if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
17462306a36Sopenharmony_ci			ret = -EINVAL;
17562306a36Sopenharmony_ci			goto destroy_vm;
17662306a36Sopenharmony_ci		}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci		hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
17962306a36Sopenharmony_ci		if (!hyp_vcpu) {
18062306a36Sopenharmony_ci			ret = -ENOMEM;
18162306a36Sopenharmony_ci			goto destroy_vm;
18262306a36Sopenharmony_ci		}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci		ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
18562306a36Sopenharmony_ci					hyp_vcpu);
18662306a36Sopenharmony_ci		if (ret) {
18762306a36Sopenharmony_ci			free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
18862306a36Sopenharmony_ci			goto destroy_vm;
18962306a36Sopenharmony_ci		}
19062306a36Sopenharmony_ci	}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	return 0;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_cidestroy_vm:
19562306a36Sopenharmony_ci	__pkvm_destroy_hyp_vm(host_kvm);
19662306a36Sopenharmony_ci	return ret;
19762306a36Sopenharmony_cifree_vm:
19862306a36Sopenharmony_ci	free_pages_exact(hyp_vm, hyp_vm_sz);
19962306a36Sopenharmony_cifree_pgd:
20062306a36Sopenharmony_ci	free_pages_exact(pgd, pgd_sz);
20162306a36Sopenharmony_ci	return ret;
20262306a36Sopenharmony_ci}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ciint pkvm_create_hyp_vm(struct kvm *host_kvm)
20562306a36Sopenharmony_ci{
20662306a36Sopenharmony_ci	int ret = 0;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	mutex_lock(&host_kvm->arch.config_lock);
20962306a36Sopenharmony_ci	if (!host_kvm->arch.pkvm.handle)
21062306a36Sopenharmony_ci		ret = __pkvm_create_hyp_vm(host_kvm);
21162306a36Sopenharmony_ci	mutex_unlock(&host_kvm->arch.config_lock);
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	return ret;
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_civoid pkvm_destroy_hyp_vm(struct kvm *host_kvm)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	mutex_lock(&host_kvm->arch.config_lock);
21962306a36Sopenharmony_ci	__pkvm_destroy_hyp_vm(host_kvm);
22062306a36Sopenharmony_ci	mutex_unlock(&host_kvm->arch.config_lock);
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ciint pkvm_init_host_vm(struct kvm *host_kvm)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	mutex_init(&host_kvm->lock);
22662306a36Sopenharmony_ci	return 0;
22762306a36Sopenharmony_ci}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_cistatic void __init _kvm_host_prot_finalize(void *arg)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	int *err = arg;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
23462306a36Sopenharmony_ci		WRITE_ONCE(*err, -EINVAL);
23562306a36Sopenharmony_ci}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_cistatic int __init pkvm_drop_host_privileges(void)
23862306a36Sopenharmony_ci{
23962306a36Sopenharmony_ci	int ret = 0;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	/*
24262306a36Sopenharmony_ci	 * Flip the static key upfront as that may no longer be possible
24362306a36Sopenharmony_ci	 * once the host stage 2 is installed.
24462306a36Sopenharmony_ci	 */
24562306a36Sopenharmony_ci	static_branch_enable(&kvm_protected_mode_initialized);
24662306a36Sopenharmony_ci	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
24762306a36Sopenharmony_ci	return ret;
24862306a36Sopenharmony_ci}
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_cistatic int __init finalize_pkvm(void)
25162306a36Sopenharmony_ci{
25262306a36Sopenharmony_ci	int ret;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
25562306a36Sopenharmony_ci		return 0;
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	/*
25862306a36Sopenharmony_ci	 * Exclude HYP sections from kmemleak so that they don't get peeked
25962306a36Sopenharmony_ci	 * at, which would end badly once inaccessible.
26062306a36Sopenharmony_ci	 */
26162306a36Sopenharmony_ci	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
26262306a36Sopenharmony_ci	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	ret = pkvm_drop_host_privileges();
26562306a36Sopenharmony_ci	if (ret)
26662306a36Sopenharmony_ci		pr_err("Failed to finalize Hyp protection: %d\n", ret);
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	return ret;
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_cidevice_initcall_sync(finalize_pkvm);
271