162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2012 - Virtual Open Systems and Columbia University
462306a36Sopenharmony_ci * Author: Christoffer Dall <c.dall@virtualopensystems.com>
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/mman.h>
862306a36Sopenharmony_ci#include <linux/kvm_host.h>
962306a36Sopenharmony_ci#include <linux/io.h>
1062306a36Sopenharmony_ci#include <linux/hugetlb.h>
1162306a36Sopenharmony_ci#include <linux/sched/signal.h>
1262306a36Sopenharmony_ci#include <trace/events/kvm.h>
1362306a36Sopenharmony_ci#include <asm/pgalloc.h>
1462306a36Sopenharmony_ci#include <asm/cacheflush.h>
1562306a36Sopenharmony_ci#include <asm/kvm_arm.h>
1662306a36Sopenharmony_ci#include <asm/kvm_mmu.h>
1762306a36Sopenharmony_ci#include <asm/kvm_pgtable.h>
1862306a36Sopenharmony_ci#include <asm/kvm_ras.h>
1962306a36Sopenharmony_ci#include <asm/kvm_asm.h>
2062306a36Sopenharmony_ci#include <asm/kvm_emulate.h>
2162306a36Sopenharmony_ci#include <asm/virt.h>
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#include "trace.h"
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_cistatic struct kvm_pgtable *hyp_pgtable;
2662306a36Sopenharmony_cistatic DEFINE_MUTEX(kvm_hyp_pgd_mutex);
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cistatic unsigned long __ro_after_init hyp_idmap_start;
2962306a36Sopenharmony_cistatic unsigned long __ro_after_init hyp_idmap_end;
3062306a36Sopenharmony_cistatic phys_addr_t __ro_after_init hyp_idmap_vector;
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_cistatic unsigned long __ro_after_init io_map_base;
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
3562306a36Sopenharmony_ci					   phys_addr_t size)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	return (boundary - 1 < end - 1) ? boundary : end;
4062306a36Sopenharmony_ci}
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
4362306a36Sopenharmony_ci{
4462306a36Sopenharmony_ci	phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	return __stage2_range_addr_end(addr, end, size);
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci/*
5062306a36Sopenharmony_ci * Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
5162306a36Sopenharmony_ci * we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
5262306a36Sopenharmony_ci * CONFIG_LOCKUP_DETECTOR, CONFIG_LOCKDEP. Additionally, holding the lock too
5362306a36Sopenharmony_ci * long will also starve other vCPUs. We have to also make sure that the page
5462306a36Sopenharmony_ci * tables are not freed while we released the lock.
5562306a36Sopenharmony_ci */
5662306a36Sopenharmony_cistatic int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
5762306a36Sopenharmony_ci			      phys_addr_t end,
5862306a36Sopenharmony_ci			      int (*fn)(struct kvm_pgtable *, u64, u64),
5962306a36Sopenharmony_ci			      bool resched)
6062306a36Sopenharmony_ci{
6162306a36Sopenharmony_ci	struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
6262306a36Sopenharmony_ci	int ret;
6362306a36Sopenharmony_ci	u64 next;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	do {
6662306a36Sopenharmony_ci		struct kvm_pgtable *pgt = mmu->pgt;
6762306a36Sopenharmony_ci		if (!pgt)
6862306a36Sopenharmony_ci			return -EINVAL;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci		next = stage2_range_addr_end(addr, end);
7162306a36Sopenharmony_ci		ret = fn(pgt, addr, next - addr);
7262306a36Sopenharmony_ci		if (ret)
7362306a36Sopenharmony_ci			break;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci		if (resched && next != end)
7662306a36Sopenharmony_ci			cond_resched_rwlock_write(&kvm->mmu_lock);
7762306a36Sopenharmony_ci	} while (addr = next, addr != end);
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	return ret;
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci#define stage2_apply_range_resched(mmu, addr, end, fn)			\
8362306a36Sopenharmony_ci	stage2_apply_range(mmu, addr, end, fn, true)
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci/*
8662306a36Sopenharmony_ci * Get the maximum number of page-tables pages needed to split a range
8762306a36Sopenharmony_ci * of blocks into PAGE_SIZE PTEs. It assumes the range is already
8862306a36Sopenharmony_ci * mapped at level 2, or at level 1 if allowed.
8962306a36Sopenharmony_ci */
9062306a36Sopenharmony_cistatic int kvm_mmu_split_nr_page_tables(u64 range)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	int n = 0;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2)
9562306a36Sopenharmony_ci		n += DIV_ROUND_UP(range, PUD_SIZE);
9662306a36Sopenharmony_ci	n += DIV_ROUND_UP(range, PMD_SIZE);
9762306a36Sopenharmony_ci	return n;
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_cistatic bool need_split_memcache_topup_or_resched(struct kvm *kvm)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	struct kvm_mmu_memory_cache *cache;
10362306a36Sopenharmony_ci	u64 chunk_size, min;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
10662306a36Sopenharmony_ci		return true;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	chunk_size = kvm->arch.mmu.split_page_chunk_size;
10962306a36Sopenharmony_ci	min = kvm_mmu_split_nr_page_tables(chunk_size);
11062306a36Sopenharmony_ci	cache = &kvm->arch.mmu.split_page_cache;
11162306a36Sopenharmony_ci	return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
11262306a36Sopenharmony_ci}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_cistatic int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
11562306a36Sopenharmony_ci				    phys_addr_t end)
11662306a36Sopenharmony_ci{
11762306a36Sopenharmony_ci	struct kvm_mmu_memory_cache *cache;
11862306a36Sopenharmony_ci	struct kvm_pgtable *pgt;
11962306a36Sopenharmony_ci	int ret, cache_capacity;
12062306a36Sopenharmony_ci	u64 next, chunk_size;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	lockdep_assert_held_write(&kvm->mmu_lock);
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	chunk_size = kvm->arch.mmu.split_page_chunk_size;
12562306a36Sopenharmony_ci	cache_capacity = kvm_mmu_split_nr_page_tables(chunk_size);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (chunk_size == 0)
12862306a36Sopenharmony_ci		return 0;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	cache = &kvm->arch.mmu.split_page_cache;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	do {
13362306a36Sopenharmony_ci		if (need_split_memcache_topup_or_resched(kvm)) {
13462306a36Sopenharmony_ci			write_unlock(&kvm->mmu_lock);
13562306a36Sopenharmony_ci			cond_resched();
13662306a36Sopenharmony_ci			/* Eager page splitting is best-effort. */
13762306a36Sopenharmony_ci			ret = __kvm_mmu_topup_memory_cache(cache,
13862306a36Sopenharmony_ci							   cache_capacity,
13962306a36Sopenharmony_ci							   cache_capacity);
14062306a36Sopenharmony_ci			write_lock(&kvm->mmu_lock);
14162306a36Sopenharmony_ci			if (ret)
14262306a36Sopenharmony_ci				break;
14362306a36Sopenharmony_ci		}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		pgt = kvm->arch.mmu.pgt;
14662306a36Sopenharmony_ci		if (!pgt)
14762306a36Sopenharmony_ci			return -EINVAL;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci		next = __stage2_range_addr_end(addr, end, chunk_size);
15062306a36Sopenharmony_ci		ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
15162306a36Sopenharmony_ci		if (ret)
15262306a36Sopenharmony_ci			break;
15362306a36Sopenharmony_ci	} while (addr = next, addr != end);
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	return ret;
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_cistatic bool memslot_is_logging(struct kvm_memory_slot *memslot)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
16162306a36Sopenharmony_ci}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci/**
16462306a36Sopenharmony_ci * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
16562306a36Sopenharmony_ci * @kvm:	pointer to kvm structure.
16662306a36Sopenharmony_ci *
16762306a36Sopenharmony_ci * Interface to HYP function to flush all VM TLB entries
16862306a36Sopenharmony_ci */
16962306a36Sopenharmony_ciint kvm_arch_flush_remote_tlbs(struct kvm *kvm)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
17262306a36Sopenharmony_ci	return 0;
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ciint kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
17662306a36Sopenharmony_ci				      gfn_t gfn, u64 nr_pages)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
17962306a36Sopenharmony_ci				gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
18062306a36Sopenharmony_ci	return 0;
18162306a36Sopenharmony_ci}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_cistatic bool kvm_is_device_pfn(unsigned long pfn)
18462306a36Sopenharmony_ci{
18562306a36Sopenharmony_ci	return !pfn_is_map_memory(pfn);
18662306a36Sopenharmony_ci}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_cistatic void *stage2_memcache_zalloc_page(void *arg)
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	struct kvm_mmu_memory_cache *mc = arg;
19162306a36Sopenharmony_ci	void *virt;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	/* Allocated with __GFP_ZERO, so no need to zero */
19462306a36Sopenharmony_ci	virt = kvm_mmu_memory_cache_alloc(mc);
19562306a36Sopenharmony_ci	if (virt)
19662306a36Sopenharmony_ci		kvm_account_pgtable_pages(virt, 1);
19762306a36Sopenharmony_ci	return virt;
19862306a36Sopenharmony_ci}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_cistatic void *kvm_host_zalloc_pages_exact(size_t size)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void *kvm_s2_zalloc_pages_exact(size_t size)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	void *virt = kvm_host_zalloc_pages_exact(size);
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	if (virt)
21062306a36Sopenharmony_ci		kvm_account_pgtable_pages(virt, (size >> PAGE_SHIFT));
21162306a36Sopenharmony_ci	return virt;
21262306a36Sopenharmony_ci}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_cistatic void kvm_s2_free_pages_exact(void *virt, size_t size)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	kvm_account_pgtable_pages(virt, -(size >> PAGE_SHIFT));
21762306a36Sopenharmony_ci	free_pages_exact(virt, size);
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cistatic struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_cistatic void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci	struct page *page = container_of(head, struct page, rcu_head);
22562306a36Sopenharmony_ci	void *pgtable = page_to_virt(page);
22662306a36Sopenharmony_ci	u32 level = page_private(page);
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
22962306a36Sopenharmony_ci}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_cistatic void stage2_free_unlinked_table(void *addr, u32 level)
23262306a36Sopenharmony_ci{
23362306a36Sopenharmony_ci	struct page *page = virt_to_page(addr);
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	set_page_private(page, (unsigned long)level);
23662306a36Sopenharmony_ci	call_rcu(&page->rcu_head, stage2_free_unlinked_table_rcu_cb);
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_cistatic void kvm_host_get_page(void *addr)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	get_page(virt_to_page(addr));
24262306a36Sopenharmony_ci}
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_cistatic void kvm_host_put_page(void *addr)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	put_page(virt_to_page(addr));
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_cistatic void kvm_s2_put_page(void *addr)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	struct page *p = virt_to_page(addr);
25262306a36Sopenharmony_ci	/* Dropping last refcount, the page will be freed */
25362306a36Sopenharmony_ci	if (page_count(p) == 1)
25462306a36Sopenharmony_ci		kvm_account_pgtable_pages(addr, -1);
25562306a36Sopenharmony_ci	put_page(p);
25662306a36Sopenharmony_ci}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_cistatic int kvm_host_page_count(void *addr)
25962306a36Sopenharmony_ci{
26062306a36Sopenharmony_ci	return page_count(virt_to_page(addr));
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_cistatic phys_addr_t kvm_host_pa(void *addr)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	return __pa(addr);
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_cistatic void *kvm_host_va(phys_addr_t phys)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	return __va(phys);
27162306a36Sopenharmony_ci}
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_cistatic void clean_dcache_guest_page(void *va, size_t size)
27462306a36Sopenharmony_ci{
27562306a36Sopenharmony_ci	__clean_dcache_guest_page(va, size);
27662306a36Sopenharmony_ci}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_cistatic void invalidate_icache_guest_page(void *va, size_t size)
27962306a36Sopenharmony_ci{
28062306a36Sopenharmony_ci	__invalidate_icache_guest_page(va, size);
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/*
28462306a36Sopenharmony_ci * Unmapping vs dcache management:
28562306a36Sopenharmony_ci *
28662306a36Sopenharmony_ci * If a guest maps certain memory pages as uncached, all writes will
28762306a36Sopenharmony_ci * bypass the data cache and go directly to RAM.  However, the CPUs
28862306a36Sopenharmony_ci * can still speculate reads (not writes) and fill cache lines with
28962306a36Sopenharmony_ci * data.
29062306a36Sopenharmony_ci *
29162306a36Sopenharmony_ci * Those cache lines will be *clean* cache lines though, so a
29262306a36Sopenharmony_ci * clean+invalidate operation is equivalent to an invalidate
29362306a36Sopenharmony_ci * operation, because no cache lines are marked dirty.
29462306a36Sopenharmony_ci *
29562306a36Sopenharmony_ci * Those clean cache lines could be filled prior to an uncached write
29662306a36Sopenharmony_ci * by the guest, and the cache coherent IO subsystem would therefore
29762306a36Sopenharmony_ci * end up writing old data to disk.
29862306a36Sopenharmony_ci *
29962306a36Sopenharmony_ci * This is why right after unmapping a page/section and invalidating
30062306a36Sopenharmony_ci * the corresponding TLBs, we flush to make sure the IO subsystem will
30162306a36Sopenharmony_ci * never hit in the cache.
30262306a36Sopenharmony_ci *
30362306a36Sopenharmony_ci * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
30462306a36Sopenharmony_ci * we then fully enforce cacheability of RAM, no matter what the guest
30562306a36Sopenharmony_ci * does.
30662306a36Sopenharmony_ci */
30762306a36Sopenharmony_ci/**
30862306a36Sopenharmony_ci * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
30962306a36Sopenharmony_ci * @mmu:   The KVM stage-2 MMU pointer
31062306a36Sopenharmony_ci * @start: The intermediate physical base address of the range to unmap
31162306a36Sopenharmony_ci * @size:  The size of the area to unmap
31262306a36Sopenharmony_ci * @may_block: Whether or not we are permitted to block
31362306a36Sopenharmony_ci *
31462306a36Sopenharmony_ci * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
31562306a36Sopenharmony_ci * be called while holding mmu_lock (unless for freeing the stage2 pgd before
31662306a36Sopenharmony_ci * destroying the VM), otherwise another faulting VCPU may come in and mess
31762306a36Sopenharmony_ci * with things behind our backs.
31862306a36Sopenharmony_ci */
31962306a36Sopenharmony_cistatic void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
32062306a36Sopenharmony_ci				 bool may_block)
32162306a36Sopenharmony_ci{
32262306a36Sopenharmony_ci	struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
32362306a36Sopenharmony_ci	phys_addr_t end = start + size;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	lockdep_assert_held_write(&kvm->mmu_lock);
32662306a36Sopenharmony_ci	WARN_ON(size & ~PAGE_MASK);
32762306a36Sopenharmony_ci	WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
32862306a36Sopenharmony_ci				   may_block));
32962306a36Sopenharmony_ci}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_cistatic void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	__unmap_stage2_range(mmu, start, size, true);
33462306a36Sopenharmony_ci}
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_cistatic void stage2_flush_memslot(struct kvm *kvm,
33762306a36Sopenharmony_ci				 struct kvm_memory_slot *memslot)
33862306a36Sopenharmony_ci{
33962306a36Sopenharmony_ci	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
34062306a36Sopenharmony_ci	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush);
34362306a36Sopenharmony_ci}
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci/**
34662306a36Sopenharmony_ci * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
34762306a36Sopenharmony_ci * @kvm: The struct kvm pointer
34862306a36Sopenharmony_ci *
34962306a36Sopenharmony_ci * Go through the stage 2 page tables and invalidate any cache lines
35062306a36Sopenharmony_ci * backing memory already mapped to the VM.
35162306a36Sopenharmony_ci */
35262306a36Sopenharmony_cistatic void stage2_flush_vm(struct kvm *kvm)
35362306a36Sopenharmony_ci{
35462306a36Sopenharmony_ci	struct kvm_memslots *slots;
35562306a36Sopenharmony_ci	struct kvm_memory_slot *memslot;
35662306a36Sopenharmony_ci	int idx, bkt;
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	idx = srcu_read_lock(&kvm->srcu);
35962306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	slots = kvm_memslots(kvm);
36262306a36Sopenharmony_ci	kvm_for_each_memslot(memslot, bkt, slots)
36362306a36Sopenharmony_ci		stage2_flush_memslot(kvm, memslot);
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
36662306a36Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, idx);
36762306a36Sopenharmony_ci}
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci/**
37062306a36Sopenharmony_ci * free_hyp_pgds - free Hyp-mode page tables
37162306a36Sopenharmony_ci */
37262306a36Sopenharmony_civoid __init free_hyp_pgds(void)
37362306a36Sopenharmony_ci{
37462306a36Sopenharmony_ci	mutex_lock(&kvm_hyp_pgd_mutex);
37562306a36Sopenharmony_ci	if (hyp_pgtable) {
37662306a36Sopenharmony_ci		kvm_pgtable_hyp_destroy(hyp_pgtable);
37762306a36Sopenharmony_ci		kfree(hyp_pgtable);
37862306a36Sopenharmony_ci		hyp_pgtable = NULL;
37962306a36Sopenharmony_ci	}
38062306a36Sopenharmony_ci	mutex_unlock(&kvm_hyp_pgd_mutex);
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic bool kvm_host_owns_hyp_mappings(void)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	if (is_kernel_in_hyp_mode())
38662306a36Sopenharmony_ci		return false;
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	if (static_branch_likely(&kvm_protected_mode_initialized))
38962306a36Sopenharmony_ci		return false;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	/*
39262306a36Sopenharmony_ci	 * This can happen at boot time when __create_hyp_mappings() is called
39362306a36Sopenharmony_ci	 * after the hyp protection has been enabled, but the static key has
39462306a36Sopenharmony_ci	 * not been flipped yet.
39562306a36Sopenharmony_ci	 */
39662306a36Sopenharmony_ci	if (!hyp_pgtable && is_protected_kvm_enabled())
39762306a36Sopenharmony_ci		return false;
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	WARN_ON(!hyp_pgtable);
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	return true;
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ciint __create_hyp_mappings(unsigned long start, unsigned long size,
40562306a36Sopenharmony_ci			  unsigned long phys, enum kvm_pgtable_prot prot)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	int err;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	if (WARN_ON(!kvm_host_owns_hyp_mappings()))
41062306a36Sopenharmony_ci		return -EINVAL;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	mutex_lock(&kvm_hyp_pgd_mutex);
41362306a36Sopenharmony_ci	err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
41462306a36Sopenharmony_ci	mutex_unlock(&kvm_hyp_pgd_mutex);
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	return err;
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic phys_addr_t kvm_kaddr_to_phys(void *kaddr)
42062306a36Sopenharmony_ci{
42162306a36Sopenharmony_ci	if (!is_vmalloc_addr(kaddr)) {
42262306a36Sopenharmony_ci		BUG_ON(!virt_addr_valid(kaddr));
42362306a36Sopenharmony_ci		return __pa(kaddr);
42462306a36Sopenharmony_ci	} else {
42562306a36Sopenharmony_ci		return page_to_phys(vmalloc_to_page(kaddr)) +
42662306a36Sopenharmony_ci		       offset_in_page(kaddr);
42762306a36Sopenharmony_ci	}
42862306a36Sopenharmony_ci}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_cistruct hyp_shared_pfn {
43162306a36Sopenharmony_ci	u64 pfn;
43262306a36Sopenharmony_ci	int count;
43362306a36Sopenharmony_ci	struct rb_node node;
43462306a36Sopenharmony_ci};
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_cistatic DEFINE_MUTEX(hyp_shared_pfns_lock);
43762306a36Sopenharmony_cistatic struct rb_root hyp_shared_pfns = RB_ROOT;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_cistatic struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node,
44062306a36Sopenharmony_ci					      struct rb_node **parent)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	struct hyp_shared_pfn *this;
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	*node = &hyp_shared_pfns.rb_node;
44562306a36Sopenharmony_ci	*parent = NULL;
44662306a36Sopenharmony_ci	while (**node) {
44762306a36Sopenharmony_ci		this = container_of(**node, struct hyp_shared_pfn, node);
44862306a36Sopenharmony_ci		*parent = **node;
44962306a36Sopenharmony_ci		if (this->pfn < pfn)
45062306a36Sopenharmony_ci			*node = &((**node)->rb_left);
45162306a36Sopenharmony_ci		else if (this->pfn > pfn)
45262306a36Sopenharmony_ci			*node = &((**node)->rb_right);
45362306a36Sopenharmony_ci		else
45462306a36Sopenharmony_ci			return this;
45562306a36Sopenharmony_ci	}
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	return NULL;
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_cistatic int share_pfn_hyp(u64 pfn)
46162306a36Sopenharmony_ci{
46262306a36Sopenharmony_ci	struct rb_node **node, *parent;
46362306a36Sopenharmony_ci	struct hyp_shared_pfn *this;
46462306a36Sopenharmony_ci	int ret = 0;
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	mutex_lock(&hyp_shared_pfns_lock);
46762306a36Sopenharmony_ci	this = find_shared_pfn(pfn, &node, &parent);
46862306a36Sopenharmony_ci	if (this) {
46962306a36Sopenharmony_ci		this->count++;
47062306a36Sopenharmony_ci		goto unlock;
47162306a36Sopenharmony_ci	}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	this = kzalloc(sizeof(*this), GFP_KERNEL);
47462306a36Sopenharmony_ci	if (!this) {
47562306a36Sopenharmony_ci		ret = -ENOMEM;
47662306a36Sopenharmony_ci		goto unlock;
47762306a36Sopenharmony_ci	}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	this->pfn = pfn;
48062306a36Sopenharmony_ci	this->count = 1;
48162306a36Sopenharmony_ci	rb_link_node(&this->node, parent, node);
48262306a36Sopenharmony_ci	rb_insert_color(&this->node, &hyp_shared_pfns);
48362306a36Sopenharmony_ci	ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1);
48462306a36Sopenharmony_ciunlock:
48562306a36Sopenharmony_ci	mutex_unlock(&hyp_shared_pfns_lock);
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	return ret;
48862306a36Sopenharmony_ci}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_cistatic int unshare_pfn_hyp(u64 pfn)
49162306a36Sopenharmony_ci{
49262306a36Sopenharmony_ci	struct rb_node **node, *parent;
49362306a36Sopenharmony_ci	struct hyp_shared_pfn *this;
49462306a36Sopenharmony_ci	int ret = 0;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	mutex_lock(&hyp_shared_pfns_lock);
49762306a36Sopenharmony_ci	this = find_shared_pfn(pfn, &node, &parent);
49862306a36Sopenharmony_ci	if (WARN_ON(!this)) {
49962306a36Sopenharmony_ci		ret = -ENOENT;
50062306a36Sopenharmony_ci		goto unlock;
50162306a36Sopenharmony_ci	}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	this->count--;
50462306a36Sopenharmony_ci	if (this->count)
50562306a36Sopenharmony_ci		goto unlock;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	rb_erase(&this->node, &hyp_shared_pfns);
50862306a36Sopenharmony_ci	kfree(this);
50962306a36Sopenharmony_ci	ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1);
51062306a36Sopenharmony_ciunlock:
51162306a36Sopenharmony_ci	mutex_unlock(&hyp_shared_pfns_lock);
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	return ret;
51462306a36Sopenharmony_ci}
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ciint kvm_share_hyp(void *from, void *to)
51762306a36Sopenharmony_ci{
51862306a36Sopenharmony_ci	phys_addr_t start, end, cur;
51962306a36Sopenharmony_ci	u64 pfn;
52062306a36Sopenharmony_ci	int ret;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	if (is_kernel_in_hyp_mode())
52362306a36Sopenharmony_ci		return 0;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	/*
52662306a36Sopenharmony_ci	 * The share hcall maps things in the 'fixed-offset' region of the hyp
52762306a36Sopenharmony_ci	 * VA space, so we can only share physically contiguous data-structures
52862306a36Sopenharmony_ci	 * for now.
52962306a36Sopenharmony_ci	 */
53062306a36Sopenharmony_ci	if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to))
53162306a36Sopenharmony_ci		return -EINVAL;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	if (kvm_host_owns_hyp_mappings())
53462306a36Sopenharmony_ci		return create_hyp_mappings(from, to, PAGE_HYP);
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
53762306a36Sopenharmony_ci	end = PAGE_ALIGN(__pa(to));
53862306a36Sopenharmony_ci	for (cur = start; cur < end; cur += PAGE_SIZE) {
53962306a36Sopenharmony_ci		pfn = __phys_to_pfn(cur);
54062306a36Sopenharmony_ci		ret = share_pfn_hyp(pfn);
54162306a36Sopenharmony_ci		if (ret)
54262306a36Sopenharmony_ci			return ret;
54362306a36Sopenharmony_ci	}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	return 0;
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_civoid kvm_unshare_hyp(void *from, void *to)
54962306a36Sopenharmony_ci{
55062306a36Sopenharmony_ci	phys_addr_t start, end, cur;
55162306a36Sopenharmony_ci	u64 pfn;
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from)
55462306a36Sopenharmony_ci		return;
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	start = ALIGN_DOWN(__pa(from), PAGE_SIZE);
55762306a36Sopenharmony_ci	end = PAGE_ALIGN(__pa(to));
55862306a36Sopenharmony_ci	for (cur = start; cur < end; cur += PAGE_SIZE) {
55962306a36Sopenharmony_ci		pfn = __phys_to_pfn(cur);
56062306a36Sopenharmony_ci		WARN_ON(unshare_pfn_hyp(pfn));
56162306a36Sopenharmony_ci	}
56262306a36Sopenharmony_ci}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci/**
56562306a36Sopenharmony_ci * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
56662306a36Sopenharmony_ci * @from:	The virtual kernel start address of the range
56762306a36Sopenharmony_ci * @to:		The virtual kernel end address of the range (exclusive)
56862306a36Sopenharmony_ci * @prot:	The protection to be applied to this range
56962306a36Sopenharmony_ci *
57062306a36Sopenharmony_ci * The same virtual address as the kernel virtual address is also used
57162306a36Sopenharmony_ci * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
57262306a36Sopenharmony_ci * physical pages.
57362306a36Sopenharmony_ci */
57462306a36Sopenharmony_ciint create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
57562306a36Sopenharmony_ci{
57662306a36Sopenharmony_ci	phys_addr_t phys_addr;
57762306a36Sopenharmony_ci	unsigned long virt_addr;
57862306a36Sopenharmony_ci	unsigned long start = kern_hyp_va((unsigned long)from);
57962306a36Sopenharmony_ci	unsigned long end = kern_hyp_va((unsigned long)to);
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	if (is_kernel_in_hyp_mode())
58262306a36Sopenharmony_ci		return 0;
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	if (!kvm_host_owns_hyp_mappings())
58562306a36Sopenharmony_ci		return -EPERM;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	start = start & PAGE_MASK;
58862306a36Sopenharmony_ci	end = PAGE_ALIGN(end);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
59162306a36Sopenharmony_ci		int err;
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
59462306a36Sopenharmony_ci		err = __create_hyp_mappings(virt_addr, PAGE_SIZE, phys_addr,
59562306a36Sopenharmony_ci					    prot);
59662306a36Sopenharmony_ci		if (err)
59762306a36Sopenharmony_ci			return err;
59862306a36Sopenharmony_ci	}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	return 0;
60162306a36Sopenharmony_ci}
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_cistatic int __hyp_alloc_private_va_range(unsigned long base)
60462306a36Sopenharmony_ci{
60562306a36Sopenharmony_ci	lockdep_assert_held(&kvm_hyp_pgd_mutex);
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	if (!PAGE_ALIGNED(base))
60862306a36Sopenharmony_ci		return -EINVAL;
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	/*
61162306a36Sopenharmony_ci	 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
61262306a36Sopenharmony_ci	 * allocating the new area, as it would indicate we've
61362306a36Sopenharmony_ci	 * overflowed the idmap/IO address range.
61462306a36Sopenharmony_ci	 */
61562306a36Sopenharmony_ci	if ((base ^ io_map_base) & BIT(VA_BITS - 1))
61662306a36Sopenharmony_ci		return -ENOMEM;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	io_map_base = base;
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_ci	return 0;
62162306a36Sopenharmony_ci}
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci/**
62462306a36Sopenharmony_ci * hyp_alloc_private_va_range - Allocates a private VA range.
62562306a36Sopenharmony_ci * @size:	The size of the VA range to reserve.
62662306a36Sopenharmony_ci * @haddr:	The hypervisor virtual start address of the allocation.
62762306a36Sopenharmony_ci *
62862306a36Sopenharmony_ci * The private virtual address (VA) range is allocated below io_map_base
62962306a36Sopenharmony_ci * and aligned based on the order of @size.
63062306a36Sopenharmony_ci *
63162306a36Sopenharmony_ci * Return: 0 on success or negative error code on failure.
63262306a36Sopenharmony_ci */
63362306a36Sopenharmony_ciint hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
63462306a36Sopenharmony_ci{
63562306a36Sopenharmony_ci	unsigned long base;
63662306a36Sopenharmony_ci	int ret = 0;
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci	mutex_lock(&kvm_hyp_pgd_mutex);
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	/*
64162306a36Sopenharmony_ci	 * This assumes that we have enough space below the idmap
64262306a36Sopenharmony_ci	 * page to allocate our VAs. If not, the check in
64362306a36Sopenharmony_ci	 * __hyp_alloc_private_va_range() will kick. A potential
64462306a36Sopenharmony_ci	 * alternative would be to detect that overflow and switch
64562306a36Sopenharmony_ci	 * to an allocation above the idmap.
64662306a36Sopenharmony_ci	 *
64762306a36Sopenharmony_ci	 * The allocated size is always a multiple of PAGE_SIZE.
64862306a36Sopenharmony_ci	 */
64962306a36Sopenharmony_ci	size = PAGE_ALIGN(size);
65062306a36Sopenharmony_ci	base = io_map_base - size;
65162306a36Sopenharmony_ci	ret = __hyp_alloc_private_va_range(base);
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	mutex_unlock(&kvm_hyp_pgd_mutex);
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	if (!ret)
65662306a36Sopenharmony_ci		*haddr = base;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	return ret;
65962306a36Sopenharmony_ci}
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_cistatic int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
66262306a36Sopenharmony_ci					unsigned long *haddr,
66362306a36Sopenharmony_ci					enum kvm_pgtable_prot prot)
66462306a36Sopenharmony_ci{
66562306a36Sopenharmony_ci	unsigned long addr;
66662306a36Sopenharmony_ci	int ret = 0;
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	if (!kvm_host_owns_hyp_mappings()) {
66962306a36Sopenharmony_ci		addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
67062306a36Sopenharmony_ci					 phys_addr, size, prot);
67162306a36Sopenharmony_ci		if (IS_ERR_VALUE(addr))
67262306a36Sopenharmony_ci			return addr;
67362306a36Sopenharmony_ci		*haddr = addr;
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci		return 0;
67662306a36Sopenharmony_ci	}
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	size = PAGE_ALIGN(size + offset_in_page(phys_addr));
67962306a36Sopenharmony_ci	ret = hyp_alloc_private_va_range(size, &addr);
68062306a36Sopenharmony_ci	if (ret)
68162306a36Sopenharmony_ci		return ret;
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	ret = __create_hyp_mappings(addr, size, phys_addr, prot);
68462306a36Sopenharmony_ci	if (ret)
68562306a36Sopenharmony_ci		return ret;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	*haddr = addr + offset_in_page(phys_addr);
68862306a36Sopenharmony_ci	return ret;
68962306a36Sopenharmony_ci}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ciint create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
69262306a36Sopenharmony_ci{
69362306a36Sopenharmony_ci	unsigned long base;
69462306a36Sopenharmony_ci	size_t size;
69562306a36Sopenharmony_ci	int ret;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	mutex_lock(&kvm_hyp_pgd_mutex);
69862306a36Sopenharmony_ci	/*
69962306a36Sopenharmony_ci	 * Efficient stack verification using the PAGE_SHIFT bit implies
70062306a36Sopenharmony_ci	 * an alignment of our allocation on the order of the size.
70162306a36Sopenharmony_ci	 */
70262306a36Sopenharmony_ci	size = PAGE_SIZE * 2;
70362306a36Sopenharmony_ci	base = ALIGN_DOWN(io_map_base - size, size);
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	ret = __hyp_alloc_private_va_range(base);
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	mutex_unlock(&kvm_hyp_pgd_mutex);
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	if (ret) {
71062306a36Sopenharmony_ci		kvm_err("Cannot allocate hyp stack guard page\n");
71162306a36Sopenharmony_ci		return ret;
71262306a36Sopenharmony_ci	}
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	/*
71562306a36Sopenharmony_ci	 * Since the stack grows downwards, map the stack to the page
71662306a36Sopenharmony_ci	 * at the higher address and leave the lower guard page
71762306a36Sopenharmony_ci	 * unbacked.
71862306a36Sopenharmony_ci	 *
71962306a36Sopenharmony_ci	 * Any valid stack address now has the PAGE_SHIFT bit as 1
72062306a36Sopenharmony_ci	 * and addresses corresponding to the guard page have the
72162306a36Sopenharmony_ci	 * PAGE_SHIFT bit as 0 - this is used for overflow detection.
72262306a36Sopenharmony_ci	 */
72362306a36Sopenharmony_ci	ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
72462306a36Sopenharmony_ci				    PAGE_HYP);
72562306a36Sopenharmony_ci	if (ret)
72662306a36Sopenharmony_ci		kvm_err("Cannot map hyp stack\n");
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	*haddr = base + size;
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	return ret;
73162306a36Sopenharmony_ci}
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci/**
73462306a36Sopenharmony_ci * create_hyp_io_mappings - Map IO into both kernel and HYP
73562306a36Sopenharmony_ci * @phys_addr:	The physical start address which gets mapped
73662306a36Sopenharmony_ci * @size:	Size of the region being mapped
73762306a36Sopenharmony_ci * @kaddr:	Kernel VA for this mapping
73862306a36Sopenharmony_ci * @haddr:	HYP VA for this mapping
73962306a36Sopenharmony_ci */
74062306a36Sopenharmony_ciint create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
74162306a36Sopenharmony_ci			   void __iomem **kaddr,
74262306a36Sopenharmony_ci			   void __iomem **haddr)
74362306a36Sopenharmony_ci{
74462306a36Sopenharmony_ci	unsigned long addr;
74562306a36Sopenharmony_ci	int ret;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	if (is_protected_kvm_enabled())
74862306a36Sopenharmony_ci		return -EPERM;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	*kaddr = ioremap(phys_addr, size);
75162306a36Sopenharmony_ci	if (!*kaddr)
75262306a36Sopenharmony_ci		return -ENOMEM;
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	if (is_kernel_in_hyp_mode()) {
75562306a36Sopenharmony_ci		*haddr = *kaddr;
75662306a36Sopenharmony_ci		return 0;
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	ret = __create_hyp_private_mapping(phys_addr, size,
76062306a36Sopenharmony_ci					   &addr, PAGE_HYP_DEVICE);
76162306a36Sopenharmony_ci	if (ret) {
76262306a36Sopenharmony_ci		iounmap(*kaddr);
76362306a36Sopenharmony_ci		*kaddr = NULL;
76462306a36Sopenharmony_ci		*haddr = NULL;
76562306a36Sopenharmony_ci		return ret;
76662306a36Sopenharmony_ci	}
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	*haddr = (void __iomem *)addr;
76962306a36Sopenharmony_ci	return 0;
77062306a36Sopenharmony_ci}
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci/**
77362306a36Sopenharmony_ci * create_hyp_exec_mappings - Map an executable range into HYP
77462306a36Sopenharmony_ci * @phys_addr:	The physical start address which gets mapped
77562306a36Sopenharmony_ci * @size:	Size of the region being mapped
77662306a36Sopenharmony_ci * @haddr:	HYP VA for this mapping
77762306a36Sopenharmony_ci */
77862306a36Sopenharmony_ciint create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
77962306a36Sopenharmony_ci			     void **haddr)
78062306a36Sopenharmony_ci{
78162306a36Sopenharmony_ci	unsigned long addr;
78262306a36Sopenharmony_ci	int ret;
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	BUG_ON(is_kernel_in_hyp_mode());
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci	ret = __create_hyp_private_mapping(phys_addr, size,
78762306a36Sopenharmony_ci					   &addr, PAGE_HYP_EXEC);
78862306a36Sopenharmony_ci	if (ret) {
78962306a36Sopenharmony_ci		*haddr = NULL;
79062306a36Sopenharmony_ci		return ret;
79162306a36Sopenharmony_ci	}
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	*haddr = (void *)addr;
79462306a36Sopenharmony_ci	return 0;
79562306a36Sopenharmony_ci}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_cistatic struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
79862306a36Sopenharmony_ci	/* We shouldn't need any other callback to walk the PT */
79962306a36Sopenharmony_ci	.phys_to_virt		= kvm_host_va,
80062306a36Sopenharmony_ci};
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_cistatic int get_user_mapping_size(struct kvm *kvm, u64 addr)
80362306a36Sopenharmony_ci{
80462306a36Sopenharmony_ci	struct kvm_pgtable pgt = {
80562306a36Sopenharmony_ci		.pgd		= (kvm_pteref_t)kvm->mm->pgd,
80662306a36Sopenharmony_ci		.ia_bits	= vabits_actual,
80762306a36Sopenharmony_ci		.start_level	= (KVM_PGTABLE_MAX_LEVELS -
80862306a36Sopenharmony_ci				   CONFIG_PGTABLE_LEVELS),
80962306a36Sopenharmony_ci		.mm_ops		= &kvm_user_mm_ops,
81062306a36Sopenharmony_ci	};
81162306a36Sopenharmony_ci	unsigned long flags;
81262306a36Sopenharmony_ci	kvm_pte_t pte = 0;	/* Keep GCC quiet... */
81362306a36Sopenharmony_ci	u32 level = ~0;
81462306a36Sopenharmony_ci	int ret;
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	/*
81762306a36Sopenharmony_ci	 * Disable IRQs so that we hazard against a concurrent
81862306a36Sopenharmony_ci	 * teardown of the userspace page tables (which relies on
81962306a36Sopenharmony_ci	 * IPI-ing threads).
82062306a36Sopenharmony_ci	 */
82162306a36Sopenharmony_ci	local_irq_save(flags);
82262306a36Sopenharmony_ci	ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
82362306a36Sopenharmony_ci	local_irq_restore(flags);
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	if (ret)
82662306a36Sopenharmony_ci		return ret;
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	/*
82962306a36Sopenharmony_ci	 * Not seeing an error, but not updating level? Something went
83062306a36Sopenharmony_ci	 * deeply wrong...
83162306a36Sopenharmony_ci	 */
83262306a36Sopenharmony_ci	if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
83362306a36Sopenharmony_ci		return -EFAULT;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	/* Oops, the userspace PTs are gone... Replay the fault */
83662306a36Sopenharmony_ci	if (!kvm_pte_valid(pte))
83762306a36Sopenharmony_ci		return -EAGAIN;
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
84062306a36Sopenharmony_ci}
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_cistatic struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
84362306a36Sopenharmony_ci	.zalloc_page		= stage2_memcache_zalloc_page,
84462306a36Sopenharmony_ci	.zalloc_pages_exact	= kvm_s2_zalloc_pages_exact,
84562306a36Sopenharmony_ci	.free_pages_exact	= kvm_s2_free_pages_exact,
84662306a36Sopenharmony_ci	.free_unlinked_table	= stage2_free_unlinked_table,
84762306a36Sopenharmony_ci	.get_page		= kvm_host_get_page,
84862306a36Sopenharmony_ci	.put_page		= kvm_s2_put_page,
84962306a36Sopenharmony_ci	.page_count		= kvm_host_page_count,
85062306a36Sopenharmony_ci	.phys_to_virt		= kvm_host_va,
85162306a36Sopenharmony_ci	.virt_to_phys		= kvm_host_pa,
85262306a36Sopenharmony_ci	.dcache_clean_inval_poc	= clean_dcache_guest_page,
85362306a36Sopenharmony_ci	.icache_inval_pou	= invalidate_icache_guest_page,
85462306a36Sopenharmony_ci};
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci/**
85762306a36Sopenharmony_ci * kvm_init_stage2_mmu - Initialise a S2 MMU structure
85862306a36Sopenharmony_ci * @kvm:	The pointer to the KVM structure
85962306a36Sopenharmony_ci * @mmu:	The pointer to the s2 MMU structure
86062306a36Sopenharmony_ci * @type:	The machine type of the virtual machine
86162306a36Sopenharmony_ci *
86262306a36Sopenharmony_ci * Allocates only the stage-2 HW PGD level table(s).
86362306a36Sopenharmony_ci * Note we don't need locking here as this is only called when the VM is
86462306a36Sopenharmony_ci * created, which can only be done once.
86562306a36Sopenharmony_ci */
86662306a36Sopenharmony_ciint kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
86762306a36Sopenharmony_ci{
86862306a36Sopenharmony_ci	u32 kvm_ipa_limit = get_kvm_ipa_limit();
86962306a36Sopenharmony_ci	int cpu, err;
87062306a36Sopenharmony_ci	struct kvm_pgtable *pgt;
87162306a36Sopenharmony_ci	u64 mmfr0, mmfr1;
87262306a36Sopenharmony_ci	u32 phys_shift;
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
87562306a36Sopenharmony_ci		return -EINVAL;
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
87862306a36Sopenharmony_ci	if (is_protected_kvm_enabled()) {
87962306a36Sopenharmony_ci		phys_shift = kvm_ipa_limit;
88062306a36Sopenharmony_ci	} else if (phys_shift) {
88162306a36Sopenharmony_ci		if (phys_shift > kvm_ipa_limit ||
88262306a36Sopenharmony_ci		    phys_shift < ARM64_MIN_PARANGE_BITS)
88362306a36Sopenharmony_ci			return -EINVAL;
88462306a36Sopenharmony_ci	} else {
88562306a36Sopenharmony_ci		phys_shift = KVM_PHYS_SHIFT;
88662306a36Sopenharmony_ci		if (phys_shift > kvm_ipa_limit) {
88762306a36Sopenharmony_ci			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
88862306a36Sopenharmony_ci				     current->comm);
88962306a36Sopenharmony_ci			return -EINVAL;
89062306a36Sopenharmony_ci		}
89162306a36Sopenharmony_ci	}
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
89462306a36Sopenharmony_ci	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
89562306a36Sopenharmony_ci	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	if (mmu->pgt != NULL) {
89862306a36Sopenharmony_ci		kvm_err("kvm_arch already initialized?\n");
89962306a36Sopenharmony_ci		return -EINVAL;
90062306a36Sopenharmony_ci	}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
90362306a36Sopenharmony_ci	if (!pgt)
90462306a36Sopenharmony_ci		return -ENOMEM;
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	mmu->arch = &kvm->arch;
90762306a36Sopenharmony_ci	err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
90862306a36Sopenharmony_ci	if (err)
90962306a36Sopenharmony_ci		goto out_free_pgtable;
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
91262306a36Sopenharmony_ci	if (!mmu->last_vcpu_ran) {
91362306a36Sopenharmony_ci		err = -ENOMEM;
91462306a36Sopenharmony_ci		goto out_destroy_pgtable;
91562306a36Sopenharmony_ci	}
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
91862306a36Sopenharmony_ci		*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci	 /* The eager page splitting is disabled by default */
92162306a36Sopenharmony_ci	mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
92262306a36Sopenharmony_ci	mmu->split_page_cache.gfp_zero = __GFP_ZERO;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	mmu->pgt = pgt;
92562306a36Sopenharmony_ci	mmu->pgd_phys = __pa(pgt->pgd);
92662306a36Sopenharmony_ci	return 0;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ciout_destroy_pgtable:
92962306a36Sopenharmony_ci	kvm_pgtable_stage2_destroy(pgt);
93062306a36Sopenharmony_ciout_free_pgtable:
93162306a36Sopenharmony_ci	kfree(pgt);
93262306a36Sopenharmony_ci	return err;
93362306a36Sopenharmony_ci}
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_civoid kvm_uninit_stage2_mmu(struct kvm *kvm)
93662306a36Sopenharmony_ci{
93762306a36Sopenharmony_ci	kvm_free_stage2_pgd(&kvm->arch.mmu);
93862306a36Sopenharmony_ci	kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
93962306a36Sopenharmony_ci}
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_cistatic void stage2_unmap_memslot(struct kvm *kvm,
94262306a36Sopenharmony_ci				 struct kvm_memory_slot *memslot)
94362306a36Sopenharmony_ci{
94462306a36Sopenharmony_ci	hva_t hva = memslot->userspace_addr;
94562306a36Sopenharmony_ci	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
94662306a36Sopenharmony_ci	phys_addr_t size = PAGE_SIZE * memslot->npages;
94762306a36Sopenharmony_ci	hva_t reg_end = hva + size;
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	/*
95062306a36Sopenharmony_ci	 * A memory region could potentially cover multiple VMAs, and any holes
95162306a36Sopenharmony_ci	 * between them, so iterate over all of them to find out if we should
95262306a36Sopenharmony_ci	 * unmap any of them.
95362306a36Sopenharmony_ci	 *
95462306a36Sopenharmony_ci	 *     +--------------------------------------------+
95562306a36Sopenharmony_ci	 * +---------------+----------------+   +----------------+
95662306a36Sopenharmony_ci	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
95762306a36Sopenharmony_ci	 * +---------------+----------------+   +----------------+
95862306a36Sopenharmony_ci	 *     |               memory region                |
95962306a36Sopenharmony_ci	 *     +--------------------------------------------+
96062306a36Sopenharmony_ci	 */
96162306a36Sopenharmony_ci	do {
96262306a36Sopenharmony_ci		struct vm_area_struct *vma;
96362306a36Sopenharmony_ci		hva_t vm_start, vm_end;
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci		vma = find_vma_intersection(current->mm, hva, reg_end);
96662306a36Sopenharmony_ci		if (!vma)
96762306a36Sopenharmony_ci			break;
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci		/*
97062306a36Sopenharmony_ci		 * Take the intersection of this VMA with the memory region
97162306a36Sopenharmony_ci		 */
97262306a36Sopenharmony_ci		vm_start = max(hva, vma->vm_start);
97362306a36Sopenharmony_ci		vm_end = min(reg_end, vma->vm_end);
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci		if (!(vma->vm_flags & VM_PFNMAP)) {
97662306a36Sopenharmony_ci			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
97762306a36Sopenharmony_ci			unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
97862306a36Sopenharmony_ci		}
97962306a36Sopenharmony_ci		hva = vm_end;
98062306a36Sopenharmony_ci	} while (hva < reg_end);
98162306a36Sopenharmony_ci}
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci/**
98462306a36Sopenharmony_ci * stage2_unmap_vm - Unmap Stage-2 RAM mappings
98562306a36Sopenharmony_ci * @kvm: The struct kvm pointer
98662306a36Sopenharmony_ci *
98762306a36Sopenharmony_ci * Go through the memregions and unmap any regular RAM
98862306a36Sopenharmony_ci * backing memory already mapped to the VM.
98962306a36Sopenharmony_ci */
99062306a36Sopenharmony_civoid stage2_unmap_vm(struct kvm *kvm)
99162306a36Sopenharmony_ci{
99262306a36Sopenharmony_ci	struct kvm_memslots *slots;
99362306a36Sopenharmony_ci	struct kvm_memory_slot *memslot;
99462306a36Sopenharmony_ci	int idx, bkt;
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	idx = srcu_read_lock(&kvm->srcu);
99762306a36Sopenharmony_ci	mmap_read_lock(current->mm);
99862306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci	slots = kvm_memslots(kvm);
100162306a36Sopenharmony_ci	kvm_for_each_memslot(memslot, bkt, slots)
100262306a36Sopenharmony_ci		stage2_unmap_memslot(kvm, memslot);
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
100562306a36Sopenharmony_ci	mmap_read_unlock(current->mm);
100662306a36Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, idx);
100762306a36Sopenharmony_ci}
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_civoid kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
101062306a36Sopenharmony_ci{
101162306a36Sopenharmony_ci	struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
101262306a36Sopenharmony_ci	struct kvm_pgtable *pgt = NULL;
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
101562306a36Sopenharmony_ci	pgt = mmu->pgt;
101662306a36Sopenharmony_ci	if (pgt) {
101762306a36Sopenharmony_ci		mmu->pgd_phys = 0;
101862306a36Sopenharmony_ci		mmu->pgt = NULL;
101962306a36Sopenharmony_ci		free_percpu(mmu->last_vcpu_ran);
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	if (pgt) {
102462306a36Sopenharmony_ci		kvm_pgtable_stage2_destroy(pgt);
102562306a36Sopenharmony_ci		kfree(pgt);
102662306a36Sopenharmony_ci	}
102762306a36Sopenharmony_ci}
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_cistatic void hyp_mc_free_fn(void *addr, void *unused)
103062306a36Sopenharmony_ci{
103162306a36Sopenharmony_ci	free_page((unsigned long)addr);
103262306a36Sopenharmony_ci}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_cistatic void *hyp_mc_alloc_fn(void *unused)
103562306a36Sopenharmony_ci{
103662306a36Sopenharmony_ci	return (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
103762306a36Sopenharmony_ci}
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_civoid free_hyp_memcache(struct kvm_hyp_memcache *mc)
104062306a36Sopenharmony_ci{
104162306a36Sopenharmony_ci	if (is_protected_kvm_enabled())
104262306a36Sopenharmony_ci		__free_hyp_memcache(mc, hyp_mc_free_fn,
104362306a36Sopenharmony_ci				    kvm_host_va, NULL);
104462306a36Sopenharmony_ci}
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ciint topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
104762306a36Sopenharmony_ci{
104862306a36Sopenharmony_ci	if (!is_protected_kvm_enabled())
104962306a36Sopenharmony_ci		return 0;
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
105262306a36Sopenharmony_ci				    kvm_host_pa, NULL);
105362306a36Sopenharmony_ci}
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci/**
105662306a36Sopenharmony_ci * kvm_phys_addr_ioremap - map a device range to guest IPA
105762306a36Sopenharmony_ci *
105862306a36Sopenharmony_ci * @kvm:	The KVM pointer
105962306a36Sopenharmony_ci * @guest_ipa:	The IPA at which to insert the mapping
106062306a36Sopenharmony_ci * @pa:		The physical address of the device
106162306a36Sopenharmony_ci * @size:	The size of the mapping
106262306a36Sopenharmony_ci * @writable:   Whether or not to create a writable mapping
106362306a36Sopenharmony_ci */
106462306a36Sopenharmony_ciint kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
106562306a36Sopenharmony_ci			  phys_addr_t pa, unsigned long size, bool writable)
106662306a36Sopenharmony_ci{
106762306a36Sopenharmony_ci	phys_addr_t addr;
106862306a36Sopenharmony_ci	int ret = 0;
106962306a36Sopenharmony_ci	struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
107062306a36Sopenharmony_ci	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
107162306a36Sopenharmony_ci	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
107262306a36Sopenharmony_ci				     KVM_PGTABLE_PROT_R |
107362306a36Sopenharmony_ci				     (writable ? KVM_PGTABLE_PROT_W : 0);
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci	if (is_protected_kvm_enabled())
107662306a36Sopenharmony_ci		return -EPERM;
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci	size += offset_in_page(guest_ipa);
107962306a36Sopenharmony_ci	guest_ipa &= PAGE_MASK;
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
108262306a36Sopenharmony_ci		ret = kvm_mmu_topup_memory_cache(&cache,
108362306a36Sopenharmony_ci						 kvm_mmu_cache_min_pages(kvm));
108462306a36Sopenharmony_ci		if (ret)
108562306a36Sopenharmony_ci			break;
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_ci		write_lock(&kvm->mmu_lock);
108862306a36Sopenharmony_ci		ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
108962306a36Sopenharmony_ci					     &cache, 0);
109062306a36Sopenharmony_ci		write_unlock(&kvm->mmu_lock);
109162306a36Sopenharmony_ci		if (ret)
109262306a36Sopenharmony_ci			break;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci		pa += PAGE_SIZE;
109562306a36Sopenharmony_ci	}
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	kvm_mmu_free_memory_cache(&cache);
109862306a36Sopenharmony_ci	return ret;
109962306a36Sopenharmony_ci}
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci/**
110262306a36Sopenharmony_ci * stage2_wp_range() - write protect stage2 memory region range
110362306a36Sopenharmony_ci * @mmu:        The KVM stage-2 MMU pointer
110462306a36Sopenharmony_ci * @addr:	Start address of range
110562306a36Sopenharmony_ci * @end:	End address of range
110662306a36Sopenharmony_ci */
110762306a36Sopenharmony_cistatic void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
110862306a36Sopenharmony_ci{
110962306a36Sopenharmony_ci	stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
111062306a36Sopenharmony_ci}
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci/**
111362306a36Sopenharmony_ci * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
111462306a36Sopenharmony_ci * @kvm:	The KVM pointer
111562306a36Sopenharmony_ci * @slot:	The memory slot to write protect
111662306a36Sopenharmony_ci *
111762306a36Sopenharmony_ci * Called to start logging dirty pages after memory region
111862306a36Sopenharmony_ci * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
111962306a36Sopenharmony_ci * all present PUD, PMD and PTEs are write protected in the memory region.
112062306a36Sopenharmony_ci * Afterwards read of dirty page log can be called.
112162306a36Sopenharmony_ci *
112262306a36Sopenharmony_ci * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
112362306a36Sopenharmony_ci * serializing operations for VM memory regions.
112462306a36Sopenharmony_ci */
112562306a36Sopenharmony_cistatic void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
112662306a36Sopenharmony_ci{
112762306a36Sopenharmony_ci	struct kvm_memslots *slots = kvm_memslots(kvm);
112862306a36Sopenharmony_ci	struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
112962306a36Sopenharmony_ci	phys_addr_t start, end;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci	if (WARN_ON_ONCE(!memslot))
113262306a36Sopenharmony_ci		return;
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	start = memslot->base_gfn << PAGE_SHIFT;
113562306a36Sopenharmony_ci	end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
113862306a36Sopenharmony_ci	stage2_wp_range(&kvm->arch.mmu, start, end);
113962306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
114062306a36Sopenharmony_ci	kvm_flush_remote_tlbs_memslot(kvm, memslot);
114162306a36Sopenharmony_ci}
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci/**
114462306a36Sopenharmony_ci * kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE
114562306a36Sopenharmony_ci *				   pages for memory slot
114662306a36Sopenharmony_ci * @kvm:	The KVM pointer
114762306a36Sopenharmony_ci * @slot:	The memory slot to split
114862306a36Sopenharmony_ci *
114962306a36Sopenharmony_ci * Acquires kvm->mmu_lock. Called with kvm->slots_lock mutex acquired,
115062306a36Sopenharmony_ci * serializing operations for VM memory regions.
115162306a36Sopenharmony_ci */
115262306a36Sopenharmony_cistatic void kvm_mmu_split_memory_region(struct kvm *kvm, int slot)
115362306a36Sopenharmony_ci{
115462306a36Sopenharmony_ci	struct kvm_memslots *slots;
115562306a36Sopenharmony_ci	struct kvm_memory_slot *memslot;
115662306a36Sopenharmony_ci	phys_addr_t start, end;
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	lockdep_assert_held(&kvm->slots_lock);
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci	slots = kvm_memslots(kvm);
116162306a36Sopenharmony_ci	memslot = id_to_memslot(slots, slot);
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci	start = memslot->base_gfn << PAGE_SHIFT;
116462306a36Sopenharmony_ci	end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
116762306a36Sopenharmony_ci	kvm_mmu_split_huge_pages(kvm, start, end);
116862306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
116962306a36Sopenharmony_ci}
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci/*
117262306a36Sopenharmony_ci * kvm_arch_mmu_enable_log_dirty_pt_masked() - enable dirty logging for selected pages.
117362306a36Sopenharmony_ci * @kvm:	The KVM pointer
117462306a36Sopenharmony_ci * @slot:	The memory slot associated with mask
117562306a36Sopenharmony_ci * @gfn_offset:	The gfn offset in memory slot
117662306a36Sopenharmony_ci * @mask:	The mask of pages at offset 'gfn_offset' in this memory
117762306a36Sopenharmony_ci *		slot to enable dirty logging on
117862306a36Sopenharmony_ci *
117962306a36Sopenharmony_ci * Writes protect selected pages to enable dirty logging, and then
118062306a36Sopenharmony_ci * splits them to PAGE_SIZE. Caller must acquire kvm->mmu_lock.
118162306a36Sopenharmony_ci */
118262306a36Sopenharmony_civoid kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
118362306a36Sopenharmony_ci		struct kvm_memory_slot *slot,
118462306a36Sopenharmony_ci		gfn_t gfn_offset, unsigned long mask)
118562306a36Sopenharmony_ci{
118662306a36Sopenharmony_ci	phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
118762306a36Sopenharmony_ci	phys_addr_t start = (base_gfn +  __ffs(mask)) << PAGE_SHIFT;
118862306a36Sopenharmony_ci	phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci	lockdep_assert_held_write(&kvm->mmu_lock);
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	stage2_wp_range(&kvm->arch.mmu, start, end);
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	/*
119562306a36Sopenharmony_ci	 * Eager-splitting is done when manual-protect is set.  We
119662306a36Sopenharmony_ci	 * also check for initially-all-set because we can avoid
119762306a36Sopenharmony_ci	 * eager-splitting if initially-all-set is false.
119862306a36Sopenharmony_ci	 * Initially-all-set equal false implies that huge-pages were
119962306a36Sopenharmony_ci	 * already split when enabling dirty logging: no need to do it
120062306a36Sopenharmony_ci	 * again.
120162306a36Sopenharmony_ci	 */
120262306a36Sopenharmony_ci	if (kvm_dirty_log_manual_protect_and_init_set(kvm))
120362306a36Sopenharmony_ci		kvm_mmu_split_huge_pages(kvm, start, end);
120462306a36Sopenharmony_ci}
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_cistatic void kvm_send_hwpoison_signal(unsigned long address, short lsb)
120762306a36Sopenharmony_ci{
120862306a36Sopenharmony_ci	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
120962306a36Sopenharmony_ci}
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_cistatic bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
121262306a36Sopenharmony_ci					       unsigned long hva,
121362306a36Sopenharmony_ci					       unsigned long map_size)
121462306a36Sopenharmony_ci{
121562306a36Sopenharmony_ci	gpa_t gpa_start;
121662306a36Sopenharmony_ci	hva_t uaddr_start, uaddr_end;
121762306a36Sopenharmony_ci	size_t size;
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	/* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
122062306a36Sopenharmony_ci	if (map_size == PAGE_SIZE)
122162306a36Sopenharmony_ci		return true;
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	size = memslot->npages * PAGE_SIZE;
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci	gpa_start = memslot->base_gfn << PAGE_SHIFT;
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	uaddr_start = memslot->userspace_addr;
122862306a36Sopenharmony_ci	uaddr_end = uaddr_start + size;
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	/*
123162306a36Sopenharmony_ci	 * Pages belonging to memslots that don't have the same alignment
123262306a36Sopenharmony_ci	 * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2
123362306a36Sopenharmony_ci	 * PMD/PUD entries, because we'll end up mapping the wrong pages.
123462306a36Sopenharmony_ci	 *
123562306a36Sopenharmony_ci	 * Consider a layout like the following:
123662306a36Sopenharmony_ci	 *
123762306a36Sopenharmony_ci	 *    memslot->userspace_addr:
123862306a36Sopenharmony_ci	 *    +-----+--------------------+--------------------+---+
123962306a36Sopenharmony_ci	 *    |abcde|fgh  Stage-1 block  |    Stage-1 block tv|xyz|
124062306a36Sopenharmony_ci	 *    +-----+--------------------+--------------------+---+
124162306a36Sopenharmony_ci	 *
124262306a36Sopenharmony_ci	 *    memslot->base_gfn << PAGE_SHIFT:
124362306a36Sopenharmony_ci	 *      +---+--------------------+--------------------+-----+
124462306a36Sopenharmony_ci	 *      |abc|def  Stage-2 block  |    Stage-2 block   |tvxyz|
124562306a36Sopenharmony_ci	 *      +---+--------------------+--------------------+-----+
124662306a36Sopenharmony_ci	 *
124762306a36Sopenharmony_ci	 * If we create those stage-2 blocks, we'll end up with this incorrect
124862306a36Sopenharmony_ci	 * mapping:
124962306a36Sopenharmony_ci	 *   d -> f
125062306a36Sopenharmony_ci	 *   e -> g
125162306a36Sopenharmony_ci	 *   f -> h
125262306a36Sopenharmony_ci	 */
125362306a36Sopenharmony_ci	if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
125462306a36Sopenharmony_ci		return false;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	/*
125762306a36Sopenharmony_ci	 * Next, let's make sure we're not trying to map anything not covered
125862306a36Sopenharmony_ci	 * by the memslot. This means we have to prohibit block size mappings
125962306a36Sopenharmony_ci	 * for the beginning and end of a non-block aligned and non-block sized
126062306a36Sopenharmony_ci	 * memory slot (illustrated by the head and tail parts of the
126162306a36Sopenharmony_ci	 * userspace view above containing pages 'abcde' and 'xyz',
126262306a36Sopenharmony_ci	 * respectively).
126362306a36Sopenharmony_ci	 *
126462306a36Sopenharmony_ci	 * Note that it doesn't matter if we do the check using the
126562306a36Sopenharmony_ci	 * userspace_addr or the base_gfn, as both are equally aligned (per
126662306a36Sopenharmony_ci	 * the check above) and equally sized.
126762306a36Sopenharmony_ci	 */
126862306a36Sopenharmony_ci	return (hva & ~(map_size - 1)) >= uaddr_start &&
126962306a36Sopenharmony_ci	       (hva & ~(map_size - 1)) + map_size <= uaddr_end;
127062306a36Sopenharmony_ci}
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci/*
127362306a36Sopenharmony_ci * Check if the given hva is backed by a transparent huge page (THP) and
127462306a36Sopenharmony_ci * whether it can be mapped using block mapping in stage2. If so, adjust
127562306a36Sopenharmony_ci * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
127662306a36Sopenharmony_ci * supported. This will need to be updated to support other THP sizes.
127762306a36Sopenharmony_ci *
127862306a36Sopenharmony_ci * Returns the size of the mapping.
127962306a36Sopenharmony_ci */
128062306a36Sopenharmony_cistatic long
128162306a36Sopenharmony_citransparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
128262306a36Sopenharmony_ci			    unsigned long hva, kvm_pfn_t *pfnp,
128362306a36Sopenharmony_ci			    phys_addr_t *ipap)
128462306a36Sopenharmony_ci{
128562306a36Sopenharmony_ci	kvm_pfn_t pfn = *pfnp;
128662306a36Sopenharmony_ci
128762306a36Sopenharmony_ci	/*
128862306a36Sopenharmony_ci	 * Make sure the adjustment is done only for THP pages. Also make
128962306a36Sopenharmony_ci	 * sure that the HVA and IPA are sufficiently aligned and that the
129062306a36Sopenharmony_ci	 * block map is contained within the memslot.
129162306a36Sopenharmony_ci	 */
129262306a36Sopenharmony_ci	if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
129362306a36Sopenharmony_ci		int sz = get_user_mapping_size(kvm, hva);
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_ci		if (sz < 0)
129662306a36Sopenharmony_ci			return sz;
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci		if (sz < PMD_SIZE)
129962306a36Sopenharmony_ci			return PAGE_SIZE;
130062306a36Sopenharmony_ci
130162306a36Sopenharmony_ci		/*
130262306a36Sopenharmony_ci		 * The address we faulted on is backed by a transparent huge
130362306a36Sopenharmony_ci		 * page.  However, because we map the compound huge page and
130462306a36Sopenharmony_ci		 * not the individual tail page, we need to transfer the
130562306a36Sopenharmony_ci		 * refcount to the head page.  We have to be careful that the
130662306a36Sopenharmony_ci		 * THP doesn't start to split while we are adjusting the
130762306a36Sopenharmony_ci		 * refcounts.
130862306a36Sopenharmony_ci		 *
130962306a36Sopenharmony_ci		 * We are sure this doesn't happen, because mmu_invalidate_retry
131062306a36Sopenharmony_ci		 * was successful and we are holding the mmu_lock, so if this
131162306a36Sopenharmony_ci		 * THP is trying to split, it will be blocked in the mmu
131262306a36Sopenharmony_ci		 * notifier before touching any of the pages, specifically
131362306a36Sopenharmony_ci		 * before being able to call __split_huge_page_refcount().
131462306a36Sopenharmony_ci		 *
131562306a36Sopenharmony_ci		 * We can therefore safely transfer the refcount from PG_tail
131662306a36Sopenharmony_ci		 * to PG_head and switch the pfn from a tail page to the head
131762306a36Sopenharmony_ci		 * page accordingly.
131862306a36Sopenharmony_ci		 */
131962306a36Sopenharmony_ci		*ipap &= PMD_MASK;
132062306a36Sopenharmony_ci		kvm_release_pfn_clean(pfn);
132162306a36Sopenharmony_ci		pfn &= ~(PTRS_PER_PMD - 1);
132262306a36Sopenharmony_ci		get_page(pfn_to_page(pfn));
132362306a36Sopenharmony_ci		*pfnp = pfn;
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci		return PMD_SIZE;
132662306a36Sopenharmony_ci	}
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci	/* Use page mapping if we cannot use block mapping. */
132962306a36Sopenharmony_ci	return PAGE_SIZE;
133062306a36Sopenharmony_ci}
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_cistatic int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
133362306a36Sopenharmony_ci{
133462306a36Sopenharmony_ci	unsigned long pa;
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_ci	if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
133762306a36Sopenharmony_ci		return huge_page_shift(hstate_vma(vma));
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci	if (!(vma->vm_flags & VM_PFNMAP))
134062306a36Sopenharmony_ci		return PAGE_SHIFT;
134162306a36Sopenharmony_ci
134262306a36Sopenharmony_ci	VM_BUG_ON(is_vm_hugetlb_page(vma));
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci	pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
134562306a36Sopenharmony_ci
134662306a36Sopenharmony_ci#ifndef __PAGETABLE_PMD_FOLDED
134762306a36Sopenharmony_ci	if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
134862306a36Sopenharmony_ci	    ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
134962306a36Sopenharmony_ci	    ALIGN(hva, PUD_SIZE) <= vma->vm_end)
135062306a36Sopenharmony_ci		return PUD_SHIFT;
135162306a36Sopenharmony_ci#endif
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci	if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
135462306a36Sopenharmony_ci	    ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
135562306a36Sopenharmony_ci	    ALIGN(hva, PMD_SIZE) <= vma->vm_end)
135662306a36Sopenharmony_ci		return PMD_SHIFT;
135762306a36Sopenharmony_ci
135862306a36Sopenharmony_ci	return PAGE_SHIFT;
135962306a36Sopenharmony_ci}
136062306a36Sopenharmony_ci
136162306a36Sopenharmony_ci/*
136262306a36Sopenharmony_ci * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
136362306a36Sopenharmony_ci * able to see the page's tags and therefore they must be initialised first. If
136462306a36Sopenharmony_ci * PG_mte_tagged is set, tags have already been initialised.
136562306a36Sopenharmony_ci *
136662306a36Sopenharmony_ci * The race in the test/set of the PG_mte_tagged flag is handled by:
136762306a36Sopenharmony_ci * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
136862306a36Sopenharmony_ci *   racing to santise the same page
136962306a36Sopenharmony_ci * - mmap_lock protects between a VM faulting a page in and the VMM performing
137062306a36Sopenharmony_ci *   an mprotect() to add VM_MTE
137162306a36Sopenharmony_ci */
137262306a36Sopenharmony_cistatic void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
137362306a36Sopenharmony_ci			      unsigned long size)
137462306a36Sopenharmony_ci{
137562306a36Sopenharmony_ci	unsigned long i, nr_pages = size >> PAGE_SHIFT;
137662306a36Sopenharmony_ci	struct page *page = pfn_to_page(pfn);
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_ci	if (!kvm_has_mte(kvm))
137962306a36Sopenharmony_ci		return;
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++, page++) {
138262306a36Sopenharmony_ci		if (try_page_mte_tagging(page)) {
138362306a36Sopenharmony_ci			mte_clear_page_tags(page_address(page));
138462306a36Sopenharmony_ci			set_page_mte_tagged(page);
138562306a36Sopenharmony_ci		}
138662306a36Sopenharmony_ci	}
138762306a36Sopenharmony_ci}
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_cistatic bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
139062306a36Sopenharmony_ci{
139162306a36Sopenharmony_ci	return vma->vm_flags & VM_MTE_ALLOWED;
139262306a36Sopenharmony_ci}
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_cistatic int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
139562306a36Sopenharmony_ci			  struct kvm_memory_slot *memslot, unsigned long hva,
139662306a36Sopenharmony_ci			  unsigned long fault_status)
139762306a36Sopenharmony_ci{
139862306a36Sopenharmony_ci	int ret = 0;
139962306a36Sopenharmony_ci	bool write_fault, writable, force_pte = false;
140062306a36Sopenharmony_ci	bool exec_fault, mte_allowed;
140162306a36Sopenharmony_ci	bool device = false;
140262306a36Sopenharmony_ci	unsigned long mmu_seq;
140362306a36Sopenharmony_ci	struct kvm *kvm = vcpu->kvm;
140462306a36Sopenharmony_ci	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
140562306a36Sopenharmony_ci	struct vm_area_struct *vma;
140662306a36Sopenharmony_ci	short vma_shift;
140762306a36Sopenharmony_ci	gfn_t gfn;
140862306a36Sopenharmony_ci	kvm_pfn_t pfn;
140962306a36Sopenharmony_ci	bool logging_active = memslot_is_logging(memslot);
141062306a36Sopenharmony_ci	unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
141162306a36Sopenharmony_ci	long vma_pagesize, fault_granule;
141262306a36Sopenharmony_ci	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
141362306a36Sopenharmony_ci	struct kvm_pgtable *pgt;
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ci	fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
141662306a36Sopenharmony_ci	write_fault = kvm_is_write_fault(vcpu);
141762306a36Sopenharmony_ci	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
141862306a36Sopenharmony_ci	VM_BUG_ON(write_fault && exec_fault);
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci	if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
142162306a36Sopenharmony_ci		kvm_err("Unexpected L2 read permission error\n");
142262306a36Sopenharmony_ci		return -EFAULT;
142362306a36Sopenharmony_ci	}
142462306a36Sopenharmony_ci
142562306a36Sopenharmony_ci	/*
142662306a36Sopenharmony_ci	 * Permission faults just need to update the existing leaf entry,
142762306a36Sopenharmony_ci	 * and so normally don't require allocations from the memcache. The
142862306a36Sopenharmony_ci	 * only exception to this is when dirty logging is enabled at runtime
142962306a36Sopenharmony_ci	 * and a write fault needs to collapse a block entry into a table.
143062306a36Sopenharmony_ci	 */
143162306a36Sopenharmony_ci	if (fault_status != ESR_ELx_FSC_PERM ||
143262306a36Sopenharmony_ci	    (logging_active && write_fault)) {
143362306a36Sopenharmony_ci		ret = kvm_mmu_topup_memory_cache(memcache,
143462306a36Sopenharmony_ci						 kvm_mmu_cache_min_pages(kvm));
143562306a36Sopenharmony_ci		if (ret)
143662306a36Sopenharmony_ci			return ret;
143762306a36Sopenharmony_ci	}
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	/*
144062306a36Sopenharmony_ci	 * Let's check if we will get back a huge page backed by hugetlbfs, or
144162306a36Sopenharmony_ci	 * get block mapping for device MMIO region.
144262306a36Sopenharmony_ci	 */
144362306a36Sopenharmony_ci	mmap_read_lock(current->mm);
144462306a36Sopenharmony_ci	vma = vma_lookup(current->mm, hva);
144562306a36Sopenharmony_ci	if (unlikely(!vma)) {
144662306a36Sopenharmony_ci		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
144762306a36Sopenharmony_ci		mmap_read_unlock(current->mm);
144862306a36Sopenharmony_ci		return -EFAULT;
144962306a36Sopenharmony_ci	}
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci	/*
145262306a36Sopenharmony_ci	 * logging_active is guaranteed to never be true for VM_PFNMAP
145362306a36Sopenharmony_ci	 * memslots.
145462306a36Sopenharmony_ci	 */
145562306a36Sopenharmony_ci	if (logging_active) {
145662306a36Sopenharmony_ci		force_pte = true;
145762306a36Sopenharmony_ci		vma_shift = PAGE_SHIFT;
145862306a36Sopenharmony_ci	} else {
145962306a36Sopenharmony_ci		vma_shift = get_vma_page_shift(vma, hva);
146062306a36Sopenharmony_ci	}
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci	switch (vma_shift) {
146362306a36Sopenharmony_ci#ifndef __PAGETABLE_PMD_FOLDED
146462306a36Sopenharmony_ci	case PUD_SHIFT:
146562306a36Sopenharmony_ci		if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
146662306a36Sopenharmony_ci			break;
146762306a36Sopenharmony_ci		fallthrough;
146862306a36Sopenharmony_ci#endif
146962306a36Sopenharmony_ci	case CONT_PMD_SHIFT:
147062306a36Sopenharmony_ci		vma_shift = PMD_SHIFT;
147162306a36Sopenharmony_ci		fallthrough;
147262306a36Sopenharmony_ci	case PMD_SHIFT:
147362306a36Sopenharmony_ci		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE))
147462306a36Sopenharmony_ci			break;
147562306a36Sopenharmony_ci		fallthrough;
147662306a36Sopenharmony_ci	case CONT_PTE_SHIFT:
147762306a36Sopenharmony_ci		vma_shift = PAGE_SHIFT;
147862306a36Sopenharmony_ci		force_pte = true;
147962306a36Sopenharmony_ci		fallthrough;
148062306a36Sopenharmony_ci	case PAGE_SHIFT:
148162306a36Sopenharmony_ci		break;
148262306a36Sopenharmony_ci	default:
148362306a36Sopenharmony_ci		WARN_ONCE(1, "Unknown vma_shift %d", vma_shift);
148462306a36Sopenharmony_ci	}
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_ci	vma_pagesize = 1UL << vma_shift;
148762306a36Sopenharmony_ci	if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
148862306a36Sopenharmony_ci		fault_ipa &= ~(vma_pagesize - 1);
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci	gfn = fault_ipa >> PAGE_SHIFT;
149162306a36Sopenharmony_ci	mte_allowed = kvm_vma_mte_allowed(vma);
149262306a36Sopenharmony_ci
149362306a36Sopenharmony_ci	/* Don't use the VMA after the unlock -- it may have vanished */
149462306a36Sopenharmony_ci	vma = NULL;
149562306a36Sopenharmony_ci
149662306a36Sopenharmony_ci	/*
149762306a36Sopenharmony_ci	 * Read mmu_invalidate_seq so that KVM can detect if the results of
149862306a36Sopenharmony_ci	 * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
149962306a36Sopenharmony_ci	 * acquiring kvm->mmu_lock.
150062306a36Sopenharmony_ci	 *
150162306a36Sopenharmony_ci	 * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
150262306a36Sopenharmony_ci	 * with the smp_wmb() in kvm_mmu_invalidate_end().
150362306a36Sopenharmony_ci	 */
150462306a36Sopenharmony_ci	mmu_seq = vcpu->kvm->mmu_invalidate_seq;
150562306a36Sopenharmony_ci	mmap_read_unlock(current->mm);
150662306a36Sopenharmony_ci
150762306a36Sopenharmony_ci	pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
150862306a36Sopenharmony_ci				   write_fault, &writable, NULL);
150962306a36Sopenharmony_ci	if (pfn == KVM_PFN_ERR_HWPOISON) {
151062306a36Sopenharmony_ci		kvm_send_hwpoison_signal(hva, vma_shift);
151162306a36Sopenharmony_ci		return 0;
151262306a36Sopenharmony_ci	}
151362306a36Sopenharmony_ci	if (is_error_noslot_pfn(pfn))
151462306a36Sopenharmony_ci		return -EFAULT;
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ci	if (kvm_is_device_pfn(pfn)) {
151762306a36Sopenharmony_ci		/*
151862306a36Sopenharmony_ci		 * If the page was identified as device early by looking at
151962306a36Sopenharmony_ci		 * the VMA flags, vma_pagesize is already representing the
152062306a36Sopenharmony_ci		 * largest quantity we can map.  If instead it was mapped
152162306a36Sopenharmony_ci		 * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
152262306a36Sopenharmony_ci		 * and must not be upgraded.
152362306a36Sopenharmony_ci		 *
152462306a36Sopenharmony_ci		 * In both cases, we don't let transparent_hugepage_adjust()
152562306a36Sopenharmony_ci		 * change things at the last minute.
152662306a36Sopenharmony_ci		 */
152762306a36Sopenharmony_ci		device = true;
152862306a36Sopenharmony_ci	} else if (logging_active && !write_fault) {
152962306a36Sopenharmony_ci		/*
153062306a36Sopenharmony_ci		 * Only actually map the page as writable if this was a write
153162306a36Sopenharmony_ci		 * fault.
153262306a36Sopenharmony_ci		 */
153362306a36Sopenharmony_ci		writable = false;
153462306a36Sopenharmony_ci	}
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ci	if (exec_fault && device)
153762306a36Sopenharmony_ci		return -ENOEXEC;
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	read_lock(&kvm->mmu_lock);
154062306a36Sopenharmony_ci	pgt = vcpu->arch.hw_mmu->pgt;
154162306a36Sopenharmony_ci	if (mmu_invalidate_retry(kvm, mmu_seq))
154262306a36Sopenharmony_ci		goto out_unlock;
154362306a36Sopenharmony_ci
154462306a36Sopenharmony_ci	/*
154562306a36Sopenharmony_ci	 * If we are not forced to use page mapping, check if we are
154662306a36Sopenharmony_ci	 * backed by a THP and thus use block mapping if possible.
154762306a36Sopenharmony_ci	 */
154862306a36Sopenharmony_ci	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
154962306a36Sopenharmony_ci		if (fault_status ==  ESR_ELx_FSC_PERM &&
155062306a36Sopenharmony_ci		    fault_granule > PAGE_SIZE)
155162306a36Sopenharmony_ci			vma_pagesize = fault_granule;
155262306a36Sopenharmony_ci		else
155362306a36Sopenharmony_ci			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
155462306a36Sopenharmony_ci								   hva, &pfn,
155562306a36Sopenharmony_ci								   &fault_ipa);
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ci		if (vma_pagesize < 0) {
155862306a36Sopenharmony_ci			ret = vma_pagesize;
155962306a36Sopenharmony_ci			goto out_unlock;
156062306a36Sopenharmony_ci		}
156162306a36Sopenharmony_ci	}
156262306a36Sopenharmony_ci
156362306a36Sopenharmony_ci	if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
156462306a36Sopenharmony_ci		/* Check the VMM hasn't introduced a new disallowed VMA */
156562306a36Sopenharmony_ci		if (mte_allowed) {
156662306a36Sopenharmony_ci			sanitise_mte_tags(kvm, pfn, vma_pagesize);
156762306a36Sopenharmony_ci		} else {
156862306a36Sopenharmony_ci			ret = -EFAULT;
156962306a36Sopenharmony_ci			goto out_unlock;
157062306a36Sopenharmony_ci		}
157162306a36Sopenharmony_ci	}
157262306a36Sopenharmony_ci
157362306a36Sopenharmony_ci	if (writable)
157462306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_W;
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci	if (exec_fault)
157762306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_X;
157862306a36Sopenharmony_ci
157962306a36Sopenharmony_ci	if (device)
158062306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_DEVICE;
158162306a36Sopenharmony_ci	else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
158262306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_X;
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_ci	/*
158562306a36Sopenharmony_ci	 * Under the premise of getting a FSC_PERM fault, we just need to relax
158662306a36Sopenharmony_ci	 * permissions only if vma_pagesize equals fault_granule. Otherwise,
158762306a36Sopenharmony_ci	 * kvm_pgtable_stage2_map() should be called to change block size.
158862306a36Sopenharmony_ci	 */
158962306a36Sopenharmony_ci	if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
159062306a36Sopenharmony_ci		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
159162306a36Sopenharmony_ci	else
159262306a36Sopenharmony_ci		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
159362306a36Sopenharmony_ci					     __pfn_to_phys(pfn), prot,
159462306a36Sopenharmony_ci					     memcache,
159562306a36Sopenharmony_ci					     KVM_PGTABLE_WALK_HANDLE_FAULT |
159662306a36Sopenharmony_ci					     KVM_PGTABLE_WALK_SHARED);
159762306a36Sopenharmony_ci
159862306a36Sopenharmony_ci	/* Mark the page dirty only if the fault is handled successfully */
159962306a36Sopenharmony_ci	if (writable && !ret) {
160062306a36Sopenharmony_ci		kvm_set_pfn_dirty(pfn);
160162306a36Sopenharmony_ci		mark_page_dirty_in_slot(kvm, memslot, gfn);
160262306a36Sopenharmony_ci	}
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_ciout_unlock:
160562306a36Sopenharmony_ci	read_unlock(&kvm->mmu_lock);
160662306a36Sopenharmony_ci	kvm_release_pfn_clean(pfn);
160762306a36Sopenharmony_ci	return ret != -EAGAIN ? ret : 0;
160862306a36Sopenharmony_ci}
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_ci/* Resolve the access fault by making the page young again. */
161162306a36Sopenharmony_cistatic void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
161262306a36Sopenharmony_ci{
161362306a36Sopenharmony_ci	kvm_pte_t pte;
161462306a36Sopenharmony_ci	struct kvm_s2_mmu *mmu;
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	trace_kvm_access_fault(fault_ipa);
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	read_lock(&vcpu->kvm->mmu_lock);
161962306a36Sopenharmony_ci	mmu = vcpu->arch.hw_mmu;
162062306a36Sopenharmony_ci	pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
162162306a36Sopenharmony_ci	read_unlock(&vcpu->kvm->mmu_lock);
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci	if (kvm_pte_valid(pte))
162462306a36Sopenharmony_ci		kvm_set_pfn_accessed(kvm_pte_to_pfn(pte));
162562306a36Sopenharmony_ci}
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci/**
162862306a36Sopenharmony_ci * kvm_handle_guest_abort - handles all 2nd stage aborts
162962306a36Sopenharmony_ci * @vcpu:	the VCPU pointer
163062306a36Sopenharmony_ci *
163162306a36Sopenharmony_ci * Any abort that gets to the host is almost guaranteed to be caused by a
163262306a36Sopenharmony_ci * missing second stage translation table entry, which can mean that either the
163362306a36Sopenharmony_ci * guest simply needs more memory and we must allocate an appropriate page or it
163462306a36Sopenharmony_ci * can mean that the guest tried to access I/O memory, which is emulated by user
163562306a36Sopenharmony_ci * space. The distinction is based on the IPA causing the fault and whether this
163662306a36Sopenharmony_ci * memory region has been registered as standard RAM by user space.
163762306a36Sopenharmony_ci */
163862306a36Sopenharmony_ciint kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
163962306a36Sopenharmony_ci{
164062306a36Sopenharmony_ci	unsigned long fault_status;
164162306a36Sopenharmony_ci	phys_addr_t fault_ipa;
164262306a36Sopenharmony_ci	struct kvm_memory_slot *memslot;
164362306a36Sopenharmony_ci	unsigned long hva;
164462306a36Sopenharmony_ci	bool is_iabt, write_fault, writable;
164562306a36Sopenharmony_ci	gfn_t gfn;
164662306a36Sopenharmony_ci	int ret, idx;
164762306a36Sopenharmony_ci
164862306a36Sopenharmony_ci	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
165162306a36Sopenharmony_ci	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	if (fault_status == ESR_ELx_FSC_FAULT) {
165462306a36Sopenharmony_ci		/* Beyond sanitised PARange (which is the IPA limit) */
165562306a36Sopenharmony_ci		if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
165662306a36Sopenharmony_ci			kvm_inject_size_fault(vcpu);
165762306a36Sopenharmony_ci			return 1;
165862306a36Sopenharmony_ci		}
165962306a36Sopenharmony_ci
166062306a36Sopenharmony_ci		/* Falls between the IPA range and the PARange? */
166162306a36Sopenharmony_ci		if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
166262306a36Sopenharmony_ci			fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_ci			if (is_iabt)
166562306a36Sopenharmony_ci				kvm_inject_pabt(vcpu, fault_ipa);
166662306a36Sopenharmony_ci			else
166762306a36Sopenharmony_ci				kvm_inject_dabt(vcpu, fault_ipa);
166862306a36Sopenharmony_ci			return 1;
166962306a36Sopenharmony_ci		}
167062306a36Sopenharmony_ci	}
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci	/* Synchronous External Abort? */
167362306a36Sopenharmony_ci	if (kvm_vcpu_abt_issea(vcpu)) {
167462306a36Sopenharmony_ci		/*
167562306a36Sopenharmony_ci		 * For RAS the host kernel may handle this abort.
167662306a36Sopenharmony_ci		 * There is no need to pass the error into the guest.
167762306a36Sopenharmony_ci		 */
167862306a36Sopenharmony_ci		if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
167962306a36Sopenharmony_ci			kvm_inject_vabt(vcpu);
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_ci		return 1;
168262306a36Sopenharmony_ci	}
168362306a36Sopenharmony_ci
168462306a36Sopenharmony_ci	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
168562306a36Sopenharmony_ci			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci	/* Check the stage-2 fault is trans. fault or write fault */
168862306a36Sopenharmony_ci	if (fault_status != ESR_ELx_FSC_FAULT &&
168962306a36Sopenharmony_ci	    fault_status != ESR_ELx_FSC_PERM &&
169062306a36Sopenharmony_ci	    fault_status != ESR_ELx_FSC_ACCESS) {
169162306a36Sopenharmony_ci		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
169262306a36Sopenharmony_ci			kvm_vcpu_trap_get_class(vcpu),
169362306a36Sopenharmony_ci			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
169462306a36Sopenharmony_ci			(unsigned long)kvm_vcpu_get_esr(vcpu));
169562306a36Sopenharmony_ci		return -EFAULT;
169662306a36Sopenharmony_ci	}
169762306a36Sopenharmony_ci
169862306a36Sopenharmony_ci	idx = srcu_read_lock(&vcpu->kvm->srcu);
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_ci	gfn = fault_ipa >> PAGE_SHIFT;
170162306a36Sopenharmony_ci	memslot = gfn_to_memslot(vcpu->kvm, gfn);
170262306a36Sopenharmony_ci	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
170362306a36Sopenharmony_ci	write_fault = kvm_is_write_fault(vcpu);
170462306a36Sopenharmony_ci	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
170562306a36Sopenharmony_ci		/*
170662306a36Sopenharmony_ci		 * The guest has put either its instructions or its page-tables
170762306a36Sopenharmony_ci		 * somewhere it shouldn't have. Userspace won't be able to do
170862306a36Sopenharmony_ci		 * anything about this (there's no syndrome for a start), so
170962306a36Sopenharmony_ci		 * re-inject the abort back into the guest.
171062306a36Sopenharmony_ci		 */
171162306a36Sopenharmony_ci		if (is_iabt) {
171262306a36Sopenharmony_ci			ret = -ENOEXEC;
171362306a36Sopenharmony_ci			goto out;
171462306a36Sopenharmony_ci		}
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci		if (kvm_vcpu_abt_iss1tw(vcpu)) {
171762306a36Sopenharmony_ci			kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
171862306a36Sopenharmony_ci			ret = 1;
171962306a36Sopenharmony_ci			goto out_unlock;
172062306a36Sopenharmony_ci		}
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci		/*
172362306a36Sopenharmony_ci		 * Check for a cache maintenance operation. Since we
172462306a36Sopenharmony_ci		 * ended-up here, we know it is outside of any memory
172562306a36Sopenharmony_ci		 * slot. But we can't find out if that is for a device,
172662306a36Sopenharmony_ci		 * or if the guest is just being stupid. The only thing
172762306a36Sopenharmony_ci		 * we know for sure is that this range cannot be cached.
172862306a36Sopenharmony_ci		 *
172962306a36Sopenharmony_ci		 * So let's assume that the guest is just being
173062306a36Sopenharmony_ci		 * cautious, and skip the instruction.
173162306a36Sopenharmony_ci		 */
173262306a36Sopenharmony_ci		if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
173362306a36Sopenharmony_ci			kvm_incr_pc(vcpu);
173462306a36Sopenharmony_ci			ret = 1;
173562306a36Sopenharmony_ci			goto out_unlock;
173662306a36Sopenharmony_ci		}
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci		/*
173962306a36Sopenharmony_ci		 * The IPA is reported as [MAX:12], so we need to
174062306a36Sopenharmony_ci		 * complement it with the bottom 12 bits from the
174162306a36Sopenharmony_ci		 * faulting VA. This is always 12 bits, irrespective
174262306a36Sopenharmony_ci		 * of the page size.
174362306a36Sopenharmony_ci		 */
174462306a36Sopenharmony_ci		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
174562306a36Sopenharmony_ci		ret = io_mem_abort(vcpu, fault_ipa);
174662306a36Sopenharmony_ci		goto out_unlock;
174762306a36Sopenharmony_ci	}
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci	/* Userspace should not be able to register out-of-bounds IPAs */
175062306a36Sopenharmony_ci	VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	if (fault_status == ESR_ELx_FSC_ACCESS) {
175362306a36Sopenharmony_ci		handle_access_fault(vcpu, fault_ipa);
175462306a36Sopenharmony_ci		ret = 1;
175562306a36Sopenharmony_ci		goto out_unlock;
175662306a36Sopenharmony_ci	}
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
175962306a36Sopenharmony_ci	if (ret == 0)
176062306a36Sopenharmony_ci		ret = 1;
176162306a36Sopenharmony_ciout:
176262306a36Sopenharmony_ci	if (ret == -ENOEXEC) {
176362306a36Sopenharmony_ci		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
176462306a36Sopenharmony_ci		ret = 1;
176562306a36Sopenharmony_ci	}
176662306a36Sopenharmony_ciout_unlock:
176762306a36Sopenharmony_ci	srcu_read_unlock(&vcpu->kvm->srcu, idx);
176862306a36Sopenharmony_ci	return ret;
176962306a36Sopenharmony_ci}
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_cibool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
177262306a36Sopenharmony_ci{
177362306a36Sopenharmony_ci	if (!kvm->arch.mmu.pgt)
177462306a36Sopenharmony_ci		return false;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci	__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
177762306a36Sopenharmony_ci			     (range->end - range->start) << PAGE_SHIFT,
177862306a36Sopenharmony_ci			     range->may_block);
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci	return false;
178162306a36Sopenharmony_ci}
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_cibool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
178462306a36Sopenharmony_ci{
178562306a36Sopenharmony_ci	kvm_pfn_t pfn = pte_pfn(range->arg.pte);
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_ci	if (!kvm->arch.mmu.pgt)
178862306a36Sopenharmony_ci		return false;
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ci	WARN_ON(range->end - range->start != 1);
179162306a36Sopenharmony_ci
179262306a36Sopenharmony_ci	/*
179362306a36Sopenharmony_ci	 * If the page isn't tagged, defer to user_mem_abort() for sanitising
179462306a36Sopenharmony_ci	 * the MTE tags. The S2 pte should have been unmapped by
179562306a36Sopenharmony_ci	 * mmu_notifier_invalidate_range_end().
179662306a36Sopenharmony_ci	 */
179762306a36Sopenharmony_ci	if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
179862306a36Sopenharmony_ci		return false;
179962306a36Sopenharmony_ci
180062306a36Sopenharmony_ci	/*
180162306a36Sopenharmony_ci	 * We've moved a page around, probably through CoW, so let's treat
180262306a36Sopenharmony_ci	 * it just like a translation fault and the map handler will clean
180362306a36Sopenharmony_ci	 * the cache to the PoC.
180462306a36Sopenharmony_ci	 *
180562306a36Sopenharmony_ci	 * The MMU notifiers will have unmapped a huge PMD before calling
180662306a36Sopenharmony_ci	 * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
180762306a36Sopenharmony_ci	 * therefore we never need to clear out a huge PMD through this
180862306a36Sopenharmony_ci	 * calling path and a memcache is not required.
180962306a36Sopenharmony_ci	 */
181062306a36Sopenharmony_ci	kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
181162306a36Sopenharmony_ci			       PAGE_SIZE, __pfn_to_phys(pfn),
181262306a36Sopenharmony_ci			       KVM_PGTABLE_PROT_R, NULL, 0);
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_ci	return false;
181562306a36Sopenharmony_ci}
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_cibool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
181862306a36Sopenharmony_ci{
181962306a36Sopenharmony_ci	u64 size = (range->end - range->start) << PAGE_SHIFT;
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci	if (!kvm->arch.mmu.pgt)
182262306a36Sopenharmony_ci		return false;
182362306a36Sopenharmony_ci
182462306a36Sopenharmony_ci	return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
182562306a36Sopenharmony_ci						   range->start << PAGE_SHIFT,
182662306a36Sopenharmony_ci						   size, true);
182762306a36Sopenharmony_ci}
182862306a36Sopenharmony_ci
182962306a36Sopenharmony_cibool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
183062306a36Sopenharmony_ci{
183162306a36Sopenharmony_ci	u64 size = (range->end - range->start) << PAGE_SHIFT;
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci	if (!kvm->arch.mmu.pgt)
183462306a36Sopenharmony_ci		return false;
183562306a36Sopenharmony_ci
183662306a36Sopenharmony_ci	return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
183762306a36Sopenharmony_ci						   range->start << PAGE_SHIFT,
183862306a36Sopenharmony_ci						   size, false);
183962306a36Sopenharmony_ci}
184062306a36Sopenharmony_ci
184162306a36Sopenharmony_ciphys_addr_t kvm_mmu_get_httbr(void)
184262306a36Sopenharmony_ci{
184362306a36Sopenharmony_ci	return __pa(hyp_pgtable->pgd);
184462306a36Sopenharmony_ci}
184562306a36Sopenharmony_ci
184662306a36Sopenharmony_ciphys_addr_t kvm_get_idmap_vector(void)
184762306a36Sopenharmony_ci{
184862306a36Sopenharmony_ci	return hyp_idmap_vector;
184962306a36Sopenharmony_ci}
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_cistatic int kvm_map_idmap_text(void)
185262306a36Sopenharmony_ci{
185362306a36Sopenharmony_ci	unsigned long size = hyp_idmap_end - hyp_idmap_start;
185462306a36Sopenharmony_ci	int err = __create_hyp_mappings(hyp_idmap_start, size, hyp_idmap_start,
185562306a36Sopenharmony_ci					PAGE_HYP_EXEC);
185662306a36Sopenharmony_ci	if (err)
185762306a36Sopenharmony_ci		kvm_err("Failed to idmap %lx-%lx\n",
185862306a36Sopenharmony_ci			hyp_idmap_start, hyp_idmap_end);
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci	return err;
186162306a36Sopenharmony_ci}
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_cistatic void *kvm_hyp_zalloc_page(void *arg)
186462306a36Sopenharmony_ci{
186562306a36Sopenharmony_ci	return (void *)get_zeroed_page(GFP_KERNEL);
186662306a36Sopenharmony_ci}
186762306a36Sopenharmony_ci
186862306a36Sopenharmony_cistatic struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
186962306a36Sopenharmony_ci	.zalloc_page		= kvm_hyp_zalloc_page,
187062306a36Sopenharmony_ci	.get_page		= kvm_host_get_page,
187162306a36Sopenharmony_ci	.put_page		= kvm_host_put_page,
187262306a36Sopenharmony_ci	.phys_to_virt		= kvm_host_va,
187362306a36Sopenharmony_ci	.virt_to_phys		= kvm_host_pa,
187462306a36Sopenharmony_ci};
187562306a36Sopenharmony_ci
187662306a36Sopenharmony_ciint __init kvm_mmu_init(u32 *hyp_va_bits)
187762306a36Sopenharmony_ci{
187862306a36Sopenharmony_ci	int err;
187962306a36Sopenharmony_ci	u32 idmap_bits;
188062306a36Sopenharmony_ci	u32 kernel_bits;
188162306a36Sopenharmony_ci
188262306a36Sopenharmony_ci	hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
188362306a36Sopenharmony_ci	hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
188462306a36Sopenharmony_ci	hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
188562306a36Sopenharmony_ci	hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
188662306a36Sopenharmony_ci	hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
188762306a36Sopenharmony_ci
188862306a36Sopenharmony_ci	/*
188962306a36Sopenharmony_ci	 * We rely on the linker script to ensure at build time that the HYP
189062306a36Sopenharmony_ci	 * init code does not cross a page boundary.
189162306a36Sopenharmony_ci	 */
189262306a36Sopenharmony_ci	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_ci	/*
189562306a36Sopenharmony_ci	 * The ID map may be configured to use an extended virtual address
189662306a36Sopenharmony_ci	 * range. This is only the case if system RAM is out of range for the
189762306a36Sopenharmony_ci	 * currently configured page size and VA_BITS_MIN, in which case we will
189862306a36Sopenharmony_ci	 * also need the extended virtual range for the HYP ID map, or we won't
189962306a36Sopenharmony_ci	 * be able to enable the EL2 MMU.
190062306a36Sopenharmony_ci	 *
190162306a36Sopenharmony_ci	 * However, in some cases the ID map may be configured for fewer than
190262306a36Sopenharmony_ci	 * the number of VA bits used by the regular kernel stage 1. This
190362306a36Sopenharmony_ci	 * happens when VA_BITS=52 and the kernel image is placed in PA space
190462306a36Sopenharmony_ci	 * below 48 bits.
190562306a36Sopenharmony_ci	 *
190662306a36Sopenharmony_ci	 * At EL2, there is only one TTBR register, and we can't switch between
190762306a36Sopenharmony_ci	 * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
190862306a36Sopenharmony_ci	 * line: we need to use the extended range with *both* our translation
190962306a36Sopenharmony_ci	 * tables.
191062306a36Sopenharmony_ci	 *
191162306a36Sopenharmony_ci	 * So use the maximum of the idmap VA bits and the regular kernel stage
191262306a36Sopenharmony_ci	 * 1 VA bits to assure that the hypervisor can both ID map its code page
191362306a36Sopenharmony_ci	 * and map any kernel memory.
191462306a36Sopenharmony_ci	 */
191562306a36Sopenharmony_ci	idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
191662306a36Sopenharmony_ci	kernel_bits = vabits_actual;
191762306a36Sopenharmony_ci	*hyp_va_bits = max(idmap_bits, kernel_bits);
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_ci	kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
192062306a36Sopenharmony_ci	kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
192162306a36Sopenharmony_ci	kvm_debug("HYP VA range: %lx:%lx\n",
192262306a36Sopenharmony_ci		  kern_hyp_va(PAGE_OFFSET),
192362306a36Sopenharmony_ci		  kern_hyp_va((unsigned long)high_memory - 1));
192462306a36Sopenharmony_ci
192562306a36Sopenharmony_ci	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
192662306a36Sopenharmony_ci	    hyp_idmap_start <  kern_hyp_va((unsigned long)high_memory - 1) &&
192762306a36Sopenharmony_ci	    hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
192862306a36Sopenharmony_ci		/*
192962306a36Sopenharmony_ci		 * The idmap page is intersecting with the VA space,
193062306a36Sopenharmony_ci		 * it is not safe to continue further.
193162306a36Sopenharmony_ci		 */
193262306a36Sopenharmony_ci		kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
193362306a36Sopenharmony_ci		err = -EINVAL;
193462306a36Sopenharmony_ci		goto out;
193562306a36Sopenharmony_ci	}
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci	hyp_pgtable = kzalloc(sizeof(*hyp_pgtable), GFP_KERNEL);
193862306a36Sopenharmony_ci	if (!hyp_pgtable) {
193962306a36Sopenharmony_ci		kvm_err("Hyp mode page-table not allocated\n");
194062306a36Sopenharmony_ci		err = -ENOMEM;
194162306a36Sopenharmony_ci		goto out;
194262306a36Sopenharmony_ci	}
194362306a36Sopenharmony_ci
194462306a36Sopenharmony_ci	err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops);
194562306a36Sopenharmony_ci	if (err)
194662306a36Sopenharmony_ci		goto out_free_pgtable;
194762306a36Sopenharmony_ci
194862306a36Sopenharmony_ci	err = kvm_map_idmap_text();
194962306a36Sopenharmony_ci	if (err)
195062306a36Sopenharmony_ci		goto out_destroy_pgtable;
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_ci	io_map_base = hyp_idmap_start;
195362306a36Sopenharmony_ci	return 0;
195462306a36Sopenharmony_ci
195562306a36Sopenharmony_ciout_destroy_pgtable:
195662306a36Sopenharmony_ci	kvm_pgtable_hyp_destroy(hyp_pgtable);
195762306a36Sopenharmony_ciout_free_pgtable:
195862306a36Sopenharmony_ci	kfree(hyp_pgtable);
195962306a36Sopenharmony_ci	hyp_pgtable = NULL;
196062306a36Sopenharmony_ciout:
196162306a36Sopenharmony_ci	return err;
196262306a36Sopenharmony_ci}
196362306a36Sopenharmony_ci
196462306a36Sopenharmony_civoid kvm_arch_commit_memory_region(struct kvm *kvm,
196562306a36Sopenharmony_ci				   struct kvm_memory_slot *old,
196662306a36Sopenharmony_ci				   const struct kvm_memory_slot *new,
196762306a36Sopenharmony_ci				   enum kvm_mr_change change)
196862306a36Sopenharmony_ci{
196962306a36Sopenharmony_ci	bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
197062306a36Sopenharmony_ci
197162306a36Sopenharmony_ci	/*
197262306a36Sopenharmony_ci	 * At this point memslot has been committed and there is an
197362306a36Sopenharmony_ci	 * allocated dirty_bitmap[], dirty pages will be tracked while the
197462306a36Sopenharmony_ci	 * memory slot is write protected.
197562306a36Sopenharmony_ci	 */
197662306a36Sopenharmony_ci	if (log_dirty_pages) {
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci		if (change == KVM_MR_DELETE)
197962306a36Sopenharmony_ci			return;
198062306a36Sopenharmony_ci
198162306a36Sopenharmony_ci		/*
198262306a36Sopenharmony_ci		 * Huge and normal pages are write-protected and split
198362306a36Sopenharmony_ci		 * on either of these two cases:
198462306a36Sopenharmony_ci		 *
198562306a36Sopenharmony_ci		 * 1. with initial-all-set: gradually with CLEAR ioctls,
198662306a36Sopenharmony_ci		 */
198762306a36Sopenharmony_ci		if (kvm_dirty_log_manual_protect_and_init_set(kvm))
198862306a36Sopenharmony_ci			return;
198962306a36Sopenharmony_ci		/*
199062306a36Sopenharmony_ci		 * or
199162306a36Sopenharmony_ci		 * 2. without initial-all-set: all in one shot when
199262306a36Sopenharmony_ci		 *    enabling dirty logging.
199362306a36Sopenharmony_ci		 */
199462306a36Sopenharmony_ci		kvm_mmu_wp_memory_region(kvm, new->id);
199562306a36Sopenharmony_ci		kvm_mmu_split_memory_region(kvm, new->id);
199662306a36Sopenharmony_ci	} else {
199762306a36Sopenharmony_ci		/*
199862306a36Sopenharmony_ci		 * Free any leftovers from the eager page splitting cache. Do
199962306a36Sopenharmony_ci		 * this when deleting, moving, disabling dirty logging, or
200062306a36Sopenharmony_ci		 * creating the memslot (a nop). Doing it for deletes makes
200162306a36Sopenharmony_ci		 * sure we don't leak memory, and there's no need to keep the
200262306a36Sopenharmony_ci		 * cache around for any of the other cases.
200362306a36Sopenharmony_ci		 */
200462306a36Sopenharmony_ci		kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
200562306a36Sopenharmony_ci	}
200662306a36Sopenharmony_ci}
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ciint kvm_arch_prepare_memory_region(struct kvm *kvm,
200962306a36Sopenharmony_ci				   const struct kvm_memory_slot *old,
201062306a36Sopenharmony_ci				   struct kvm_memory_slot *new,
201162306a36Sopenharmony_ci				   enum kvm_mr_change change)
201262306a36Sopenharmony_ci{
201362306a36Sopenharmony_ci	hva_t hva, reg_end;
201462306a36Sopenharmony_ci	int ret = 0;
201562306a36Sopenharmony_ci
201662306a36Sopenharmony_ci	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
201762306a36Sopenharmony_ci			change != KVM_MR_FLAGS_ONLY)
201862306a36Sopenharmony_ci		return 0;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci	/*
202162306a36Sopenharmony_ci	 * Prevent userspace from creating a memory region outside of the IPA
202262306a36Sopenharmony_ci	 * space addressable by the KVM guest IPA space.
202362306a36Sopenharmony_ci	 */
202462306a36Sopenharmony_ci	if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
202562306a36Sopenharmony_ci		return -EFAULT;
202662306a36Sopenharmony_ci
202762306a36Sopenharmony_ci	hva = new->userspace_addr;
202862306a36Sopenharmony_ci	reg_end = hva + (new->npages << PAGE_SHIFT);
202962306a36Sopenharmony_ci
203062306a36Sopenharmony_ci	mmap_read_lock(current->mm);
203162306a36Sopenharmony_ci	/*
203262306a36Sopenharmony_ci	 * A memory region could potentially cover multiple VMAs, and any holes
203362306a36Sopenharmony_ci	 * between them, so iterate over all of them.
203462306a36Sopenharmony_ci	 *
203562306a36Sopenharmony_ci	 *     +--------------------------------------------+
203662306a36Sopenharmony_ci	 * +---------------+----------------+   +----------------+
203762306a36Sopenharmony_ci	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
203862306a36Sopenharmony_ci	 * +---------------+----------------+   +----------------+
203962306a36Sopenharmony_ci	 *     |               memory region                |
204062306a36Sopenharmony_ci	 *     +--------------------------------------------+
204162306a36Sopenharmony_ci	 */
204262306a36Sopenharmony_ci	do {
204362306a36Sopenharmony_ci		struct vm_area_struct *vma;
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci		vma = find_vma_intersection(current->mm, hva, reg_end);
204662306a36Sopenharmony_ci		if (!vma)
204762306a36Sopenharmony_ci			break;
204862306a36Sopenharmony_ci
204962306a36Sopenharmony_ci		if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) {
205062306a36Sopenharmony_ci			ret = -EINVAL;
205162306a36Sopenharmony_ci			break;
205262306a36Sopenharmony_ci		}
205362306a36Sopenharmony_ci
205462306a36Sopenharmony_ci		if (vma->vm_flags & VM_PFNMAP) {
205562306a36Sopenharmony_ci			/* IO region dirty page logging not allowed */
205662306a36Sopenharmony_ci			if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
205762306a36Sopenharmony_ci				ret = -EINVAL;
205862306a36Sopenharmony_ci				break;
205962306a36Sopenharmony_ci			}
206062306a36Sopenharmony_ci		}
206162306a36Sopenharmony_ci		hva = min(reg_end, vma->vm_end);
206262306a36Sopenharmony_ci	} while (hva < reg_end);
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci	mmap_read_unlock(current->mm);
206562306a36Sopenharmony_ci	return ret;
206662306a36Sopenharmony_ci}
206762306a36Sopenharmony_ci
206862306a36Sopenharmony_civoid kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
206962306a36Sopenharmony_ci{
207062306a36Sopenharmony_ci}
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_civoid kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
207362306a36Sopenharmony_ci{
207462306a36Sopenharmony_ci}
207562306a36Sopenharmony_ci
207662306a36Sopenharmony_civoid kvm_arch_flush_shadow_all(struct kvm *kvm)
207762306a36Sopenharmony_ci{
207862306a36Sopenharmony_ci	kvm_uninit_stage2_mmu(kvm);
207962306a36Sopenharmony_ci}
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_civoid kvm_arch_flush_shadow_memslot(struct kvm *kvm,
208262306a36Sopenharmony_ci				   struct kvm_memory_slot *slot)
208362306a36Sopenharmony_ci{
208462306a36Sopenharmony_ci	gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
208562306a36Sopenharmony_ci	phys_addr_t size = slot->npages << PAGE_SHIFT;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
208862306a36Sopenharmony_ci	unmap_stage2_range(&kvm->arch.mmu, gpa, size);
208962306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
209062306a36Sopenharmony_ci}
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_ci/*
209362306a36Sopenharmony_ci * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
209462306a36Sopenharmony_ci *
209562306a36Sopenharmony_ci * Main problems:
209662306a36Sopenharmony_ci * - S/W ops are local to a CPU (not broadcast)
209762306a36Sopenharmony_ci * - We have line migration behind our back (speculation)
209862306a36Sopenharmony_ci * - System caches don't support S/W at all (damn!)
209962306a36Sopenharmony_ci *
210062306a36Sopenharmony_ci * In the face of the above, the best we can do is to try and convert
210162306a36Sopenharmony_ci * S/W ops to VA ops. Because the guest is not allowed to infer the
210262306a36Sopenharmony_ci * S/W to PA mapping, it can only use S/W to nuke the whole cache,
210362306a36Sopenharmony_ci * which is a rather good thing for us.
210462306a36Sopenharmony_ci *
210562306a36Sopenharmony_ci * Also, it is only used when turning caches on/off ("The expected
210662306a36Sopenharmony_ci * usage of the cache maintenance instructions that operate by set/way
210762306a36Sopenharmony_ci * is associated with the cache maintenance instructions associated
210862306a36Sopenharmony_ci * with the powerdown and powerup of caches, if this is required by
210962306a36Sopenharmony_ci * the implementation.").
211062306a36Sopenharmony_ci *
211162306a36Sopenharmony_ci * We use the following policy:
211262306a36Sopenharmony_ci *
211362306a36Sopenharmony_ci * - If we trap a S/W operation, we enable VM trapping to detect
211462306a36Sopenharmony_ci *   caches being turned on/off, and do a full clean.
211562306a36Sopenharmony_ci *
211662306a36Sopenharmony_ci * - We flush the caches on both caches being turned on and off.
211762306a36Sopenharmony_ci *
211862306a36Sopenharmony_ci * - Once the caches are enabled, we stop trapping VM ops.
211962306a36Sopenharmony_ci */
212062306a36Sopenharmony_civoid kvm_set_way_flush(struct kvm_vcpu *vcpu)
212162306a36Sopenharmony_ci{
212262306a36Sopenharmony_ci	unsigned long hcr = *vcpu_hcr(vcpu);
212362306a36Sopenharmony_ci
212462306a36Sopenharmony_ci	/*
212562306a36Sopenharmony_ci	 * If this is the first time we do a S/W operation
212662306a36Sopenharmony_ci	 * (i.e. HCR_TVM not set) flush the whole memory, and set the
212762306a36Sopenharmony_ci	 * VM trapping.
212862306a36Sopenharmony_ci	 *
212962306a36Sopenharmony_ci	 * Otherwise, rely on the VM trapping to wait for the MMU +
213062306a36Sopenharmony_ci	 * Caches to be turned off. At that point, we'll be able to
213162306a36Sopenharmony_ci	 * clean the caches again.
213262306a36Sopenharmony_ci	 */
213362306a36Sopenharmony_ci	if (!(hcr & HCR_TVM)) {
213462306a36Sopenharmony_ci		trace_kvm_set_way_flush(*vcpu_pc(vcpu),
213562306a36Sopenharmony_ci					vcpu_has_cache_enabled(vcpu));
213662306a36Sopenharmony_ci		stage2_flush_vm(vcpu->kvm);
213762306a36Sopenharmony_ci		*vcpu_hcr(vcpu) = hcr | HCR_TVM;
213862306a36Sopenharmony_ci	}
213962306a36Sopenharmony_ci}
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_civoid kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
214262306a36Sopenharmony_ci{
214362306a36Sopenharmony_ci	bool now_enabled = vcpu_has_cache_enabled(vcpu);
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	/*
214662306a36Sopenharmony_ci	 * If switching the MMU+caches on, need to invalidate the caches.
214762306a36Sopenharmony_ci	 * If switching it off, need to clean the caches.
214862306a36Sopenharmony_ci	 * Clean + invalidate does the trick always.
214962306a36Sopenharmony_ci	 */
215062306a36Sopenharmony_ci	if (now_enabled != was_enabled)
215162306a36Sopenharmony_ci		stage2_flush_vm(vcpu->kvm);
215262306a36Sopenharmony_ci
215362306a36Sopenharmony_ci	/* Caches are now on, stop trapping VM ops (until a S/W op) */
215462306a36Sopenharmony_ci	if (now_enabled)
215562306a36Sopenharmony_ci		*vcpu_hcr(vcpu) &= ~HCR_TVM;
215662306a36Sopenharmony_ci
215762306a36Sopenharmony_ci	trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
215862306a36Sopenharmony_ci}
2159