xref: /kernel/linux/linux-6.6/arch/arm64/kvm/hyp/pgtable.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
462306a36Sopenharmony_ci * No bombay mix was harmed in the writing of this file.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright (C) 2020 Google LLC
762306a36Sopenharmony_ci * Author: Will Deacon <will@kernel.org>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/bitfield.h>
1162306a36Sopenharmony_ci#include <asm/kvm_pgtable.h>
1262306a36Sopenharmony_ci#include <asm/stage2_pgtable.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#define KVM_PTE_TYPE			BIT(1)
1662306a36Sopenharmony_ci#define KVM_PTE_TYPE_BLOCK		0
1762306a36Sopenharmony_ci#define KVM_PTE_TYPE_PAGE		1
1862306a36Sopenharmony_ci#define KVM_PTE_TYPE_TABLE		1
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
2362306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_AP	GENMASK(7, 6)
2462306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO		\
2562306a36Sopenharmony_ci	({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
2662306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW		\
2762306a36Sopenharmony_ci	({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
2862306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_SH	GENMASK(9, 8)
2962306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS	3
3062306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S1_AF	BIT(10)
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR	GENMASK(5, 2)
3362306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R	BIT(6)
3462306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W	BIT(7)
3562306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S2_SH	GENMASK(9, 8)
3662306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS	3
3762306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_LO_S2_AF	BIT(10)
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 50)
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_HI_SW		GENMASK(58, 55)
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_HI_S1_GP	BIT(50)
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#define KVM_PTE_LEAF_ATTR_S2_PERMS	(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
5062306a36Sopenharmony_ci					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
5162306a36Sopenharmony_ci					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci#define KVM_INVALID_PTE_OWNER_MASK	GENMASK(9, 2)
5462306a36Sopenharmony_ci#define KVM_MAX_OWNER_ID		1
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci/*
5762306a36Sopenharmony_ci * Used to indicate a pte for which a 'break-before-make' sequence is in
5862306a36Sopenharmony_ci * progress.
5962306a36Sopenharmony_ci */
6062306a36Sopenharmony_ci#define KVM_INVALID_PTE_LOCKED		BIT(10)
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistruct kvm_pgtable_walk_data {
6362306a36Sopenharmony_ci	struct kvm_pgtable_walker	*walker;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	const u64			start;
6662306a36Sopenharmony_ci	u64				addr;
6762306a36Sopenharmony_ci	const u64			end;
6862306a36Sopenharmony_ci};
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx)
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI);
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cistatic bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
7862306a36Sopenharmony_ci}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_cistatic bool kvm_phys_is_valid(u64 phys)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	u64 granule = kvm_granule_size(ctx->level);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	if (!kvm_level_supports_block_mapping(ctx->level))
9062306a36Sopenharmony_ci		return false;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	if (granule > (ctx->end - ctx->addr))
9362306a36Sopenharmony_ci		return false;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
9662306a36Sopenharmony_ci		return false;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	return IS_ALIGNED(ctx->addr, granule);
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_cistatic u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	u64 shift = kvm_granule_shift(level);
10462306a36Sopenharmony_ci	u64 mask = BIT(PAGE_SHIFT - 3) - 1;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	return (data->addr >> shift) & mask;
10762306a36Sopenharmony_ci}
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistatic u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
11262306a36Sopenharmony_ci	u64 mask = BIT(pgt->ia_bits) - 1;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	return (addr & mask) >> shift;
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	struct kvm_pgtable pgt = {
12062306a36Sopenharmony_ci		.ia_bits	= ia_bits,
12162306a36Sopenharmony_ci		.start_level	= start_level,
12262306a36Sopenharmony_ci	};
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
12562306a36Sopenharmony_ci}
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cistatic bool kvm_pte_table(kvm_pte_t pte, u32 level)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
13062306a36Sopenharmony_ci		return false;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	if (!kvm_pte_valid(pte))
13362306a36Sopenharmony_ci		return false;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_cistatic kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_cistatic void kvm_clear_pte(kvm_pte_t *ptep)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	WRITE_ONCE(*ptep, 0);
14662306a36Sopenharmony_ci}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_cistatic kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops)
14962306a36Sopenharmony_ci{
15062306a36Sopenharmony_ci	kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
15362306a36Sopenharmony_ci	pte |= KVM_PTE_VALID;
15462306a36Sopenharmony_ci	return pte;
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	kvm_pte_t pte = kvm_phys_to_pte(pa);
16062306a36Sopenharmony_ci	u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
16162306a36Sopenharmony_ci							   KVM_PTE_TYPE_BLOCK;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
16462306a36Sopenharmony_ci	pte |= FIELD_PREP(KVM_PTE_TYPE, type);
16562306a36Sopenharmony_ci	pte |= KVM_PTE_VALID;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	return pte;
16862306a36Sopenharmony_ci}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
17662306a36Sopenharmony_ci				  const struct kvm_pgtable_visit_ctx *ctx,
17762306a36Sopenharmony_ci				  enum kvm_pgtable_walk_flags visit)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	struct kvm_pgtable_walker *walker = data->walker;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	/* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
18262306a36Sopenharmony_ci	WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
18362306a36Sopenharmony_ci	return walker->cb(ctx, visit);
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_cistatic bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
18762306a36Sopenharmony_ci				      int r)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	/*
19062306a36Sopenharmony_ci	 * Visitor callbacks return EAGAIN when the conditions that led to a
19162306a36Sopenharmony_ci	 * fault are no longer reflected in the page tables due to a race to
19262306a36Sopenharmony_ci	 * update a PTE. In the context of a fault handler this is interpreted
19362306a36Sopenharmony_ci	 * as a signal to retry guest execution.
19462306a36Sopenharmony_ci	 *
19562306a36Sopenharmony_ci	 * Ignore the return code altogether for walkers outside a fault handler
19662306a36Sopenharmony_ci	 * (e.g. write protecting a range of memory) and chug along with the
19762306a36Sopenharmony_ci	 * page table walk.
19862306a36Sopenharmony_ci	 */
19962306a36Sopenharmony_ci	if (r == -EAGAIN)
20062306a36Sopenharmony_ci		return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	return !r;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
20662306a36Sopenharmony_ci			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_cistatic inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
20962306a36Sopenharmony_ci				      struct kvm_pgtable_mm_ops *mm_ops,
21062306a36Sopenharmony_ci				      kvm_pteref_t pteref, u32 level)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	enum kvm_pgtable_walk_flags flags = data->walker->flags;
21362306a36Sopenharmony_ci	kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
21462306a36Sopenharmony_ci	struct kvm_pgtable_visit_ctx ctx = {
21562306a36Sopenharmony_ci		.ptep	= ptep,
21662306a36Sopenharmony_ci		.old	= READ_ONCE(*ptep),
21762306a36Sopenharmony_ci		.arg	= data->walker->arg,
21862306a36Sopenharmony_ci		.mm_ops	= mm_ops,
21962306a36Sopenharmony_ci		.start	= data->start,
22062306a36Sopenharmony_ci		.addr	= data->addr,
22162306a36Sopenharmony_ci		.end	= data->end,
22262306a36Sopenharmony_ci		.level	= level,
22362306a36Sopenharmony_ci		.flags	= flags,
22462306a36Sopenharmony_ci	};
22562306a36Sopenharmony_ci	int ret = 0;
22662306a36Sopenharmony_ci	bool reload = false;
22762306a36Sopenharmony_ci	kvm_pteref_t childp;
22862306a36Sopenharmony_ci	bool table = kvm_pte_table(ctx.old, level);
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
23162306a36Sopenharmony_ci		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
23262306a36Sopenharmony_ci		reload = true;
23362306a36Sopenharmony_ci	}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
23662306a36Sopenharmony_ci		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
23762306a36Sopenharmony_ci		reload = true;
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	/*
24162306a36Sopenharmony_ci	 * Reload the page table after invoking the walker callback for leaf
24262306a36Sopenharmony_ci	 * entries or after pre-order traversal, to allow the walker to descend
24362306a36Sopenharmony_ci	 * into a newly installed or replaced table.
24462306a36Sopenharmony_ci	 */
24562306a36Sopenharmony_ci	if (reload) {
24662306a36Sopenharmony_ci		ctx.old = READ_ONCE(*ptep);
24762306a36Sopenharmony_ci		table = kvm_pte_table(ctx.old, level);
24862306a36Sopenharmony_ci	}
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	if (!kvm_pgtable_walk_continue(data->walker, ret))
25162306a36Sopenharmony_ci		goto out;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	if (!table) {
25462306a36Sopenharmony_ci		data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
25562306a36Sopenharmony_ci		data->addr += kvm_granule_size(level);
25662306a36Sopenharmony_ci		goto out;
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops);
26062306a36Sopenharmony_ci	ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1);
26162306a36Sopenharmony_ci	if (!kvm_pgtable_walk_continue(data->walker, ret))
26262306a36Sopenharmony_ci		goto out;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST)
26562306a36Sopenharmony_ci		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST);
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ciout:
26862306a36Sopenharmony_ci	if (kvm_pgtable_walk_continue(data->walker, ret))
26962306a36Sopenharmony_ci		return 0;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	return ret;
27262306a36Sopenharmony_ci}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_cistatic int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
27562306a36Sopenharmony_ci			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
27662306a36Sopenharmony_ci{
27762306a36Sopenharmony_ci	u32 idx;
27862306a36Sopenharmony_ci	int ret = 0;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
28162306a36Sopenharmony_ci		return -EINVAL;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
28462306a36Sopenharmony_ci		kvm_pteref_t pteref = &pgtable[idx];
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci		if (data->addr >= data->end)
28762306a36Sopenharmony_ci			break;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci		ret = __kvm_pgtable_visit(data, mm_ops, pteref, level);
29062306a36Sopenharmony_ci		if (ret)
29162306a36Sopenharmony_ci			break;
29262306a36Sopenharmony_ci	}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	return ret;
29562306a36Sopenharmony_ci}
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_cistatic int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data)
29862306a36Sopenharmony_ci{
29962306a36Sopenharmony_ci	u32 idx;
30062306a36Sopenharmony_ci	int ret = 0;
30162306a36Sopenharmony_ci	u64 limit = BIT(pgt->ia_bits);
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	if (data->addr > limit || data->end > limit)
30462306a36Sopenharmony_ci		return -ERANGE;
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	if (!pgt->pgd)
30762306a36Sopenharmony_ci		return -EINVAL;
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) {
31062306a36Sopenharmony_ci		kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE];
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci		ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level);
31362306a36Sopenharmony_ci		if (ret)
31462306a36Sopenharmony_ci			break;
31562306a36Sopenharmony_ci	}
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	return ret;
31862306a36Sopenharmony_ci}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ciint kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
32162306a36Sopenharmony_ci		     struct kvm_pgtable_walker *walker)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci	struct kvm_pgtable_walk_data walk_data = {
32462306a36Sopenharmony_ci		.start	= ALIGN_DOWN(addr, PAGE_SIZE),
32562306a36Sopenharmony_ci		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
32662306a36Sopenharmony_ci		.end	= PAGE_ALIGN(walk_data.addr + size),
32762306a36Sopenharmony_ci		.walker	= walker,
32862306a36Sopenharmony_ci	};
32962306a36Sopenharmony_ci	int r;
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	r = kvm_pgtable_walk_begin(walker);
33262306a36Sopenharmony_ci	if (r)
33362306a36Sopenharmony_ci		return r;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	r = _kvm_pgtable_walk(pgt, &walk_data);
33662306a36Sopenharmony_ci	kvm_pgtable_walk_end(walker);
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	return r;
33962306a36Sopenharmony_ci}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_cistruct leaf_walk_data {
34262306a36Sopenharmony_ci	kvm_pte_t	pte;
34362306a36Sopenharmony_ci	u32		level;
34462306a36Sopenharmony_ci};
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_cistatic int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
34762306a36Sopenharmony_ci		       enum kvm_pgtable_walk_flags visit)
34862306a36Sopenharmony_ci{
34962306a36Sopenharmony_ci	struct leaf_walk_data *data = ctx->arg;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	data->pte   = ctx->old;
35262306a36Sopenharmony_ci	data->level = ctx->level;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	return 0;
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ciint kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
35862306a36Sopenharmony_ci			 kvm_pte_t *ptep, u32 *level)
35962306a36Sopenharmony_ci{
36062306a36Sopenharmony_ci	struct leaf_walk_data data;
36162306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
36262306a36Sopenharmony_ci		.cb	= leaf_walker,
36362306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF,
36462306a36Sopenharmony_ci		.arg	= &data,
36562306a36Sopenharmony_ci	};
36662306a36Sopenharmony_ci	int ret;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
36962306a36Sopenharmony_ci			       PAGE_SIZE, &walker);
37062306a36Sopenharmony_ci	if (!ret) {
37162306a36Sopenharmony_ci		if (ptep)
37262306a36Sopenharmony_ci			*ptep  = data.pte;
37362306a36Sopenharmony_ci		if (level)
37462306a36Sopenharmony_ci			*level = data.level;
37562306a36Sopenharmony_ci	}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	return ret;
37862306a36Sopenharmony_ci}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_cistruct hyp_map_data {
38162306a36Sopenharmony_ci	const u64			phys;
38262306a36Sopenharmony_ci	kvm_pte_t			attr;
38362306a36Sopenharmony_ci};
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_cistatic int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
38862306a36Sopenharmony_ci	u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
38962306a36Sopenharmony_ci	kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
39062306a36Sopenharmony_ci	u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
39162306a36Sopenharmony_ci	u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
39262306a36Sopenharmony_ci					       KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	if (!(prot & KVM_PGTABLE_PROT_R))
39562306a36Sopenharmony_ci		return -EINVAL;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	if (prot & KVM_PGTABLE_PROT_X) {
39862306a36Sopenharmony_ci		if (prot & KVM_PGTABLE_PROT_W)
39962306a36Sopenharmony_ci			return -EINVAL;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci		if (device)
40262306a36Sopenharmony_ci			return -EINVAL;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
40562306a36Sopenharmony_ci			attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP;
40662306a36Sopenharmony_ci	} else {
40762306a36Sopenharmony_ci		attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
40862306a36Sopenharmony_ci	}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
41162306a36Sopenharmony_ci	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
41262306a36Sopenharmony_ci	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
41362306a36Sopenharmony_ci	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
41462306a36Sopenharmony_ci	*ptep = attr;
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	return 0;
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cienum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
42062306a36Sopenharmony_ci{
42162306a36Sopenharmony_ci	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
42262306a36Sopenharmony_ci	u32 ap;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	if (!kvm_pte_valid(pte))
42562306a36Sopenharmony_ci		return prot;
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
42862306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_X;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
43162306a36Sopenharmony_ci	if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
43262306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_R;
43362306a36Sopenharmony_ci	else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
43462306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_RW;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	return prot;
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_cistatic bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
44062306a36Sopenharmony_ci				    struct hyp_map_data *data)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	u64 phys = data->phys + (ctx->addr - ctx->start);
44362306a36Sopenharmony_ci	kvm_pte_t new;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	if (!kvm_block_mapping_supported(ctx, phys))
44662306a36Sopenharmony_ci		return false;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
44962306a36Sopenharmony_ci	if (ctx->old == new)
45062306a36Sopenharmony_ci		return true;
45162306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old))
45262306a36Sopenharmony_ci		ctx->mm_ops->get_page(ctx->ptep);
45362306a36Sopenharmony_ci	else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
45462306a36Sopenharmony_ci		return false;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	smp_store_release(ctx->ptep, new);
45762306a36Sopenharmony_ci	return true;
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_cistatic int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
46162306a36Sopenharmony_ci			  enum kvm_pgtable_walk_flags visit)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	kvm_pte_t *childp, new;
46462306a36Sopenharmony_ci	struct hyp_map_data *data = ctx->arg;
46562306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if (hyp_map_walker_try_leaf(ctx, data))
46862306a36Sopenharmony_ci		return 0;
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci	if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
47162306a36Sopenharmony_ci		return -EINVAL;
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
47462306a36Sopenharmony_ci	if (!childp)
47562306a36Sopenharmony_ci		return -ENOMEM;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	new = kvm_init_table_pte(childp, mm_ops);
47862306a36Sopenharmony_ci	mm_ops->get_page(ctx->ptep);
47962306a36Sopenharmony_ci	smp_store_release(ctx->ptep, new);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	return 0;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ciint kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
48562306a36Sopenharmony_ci			enum kvm_pgtable_prot prot)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	int ret;
48862306a36Sopenharmony_ci	struct hyp_map_data map_data = {
48962306a36Sopenharmony_ci		.phys	= ALIGN_DOWN(phys, PAGE_SIZE),
49062306a36Sopenharmony_ci	};
49162306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
49262306a36Sopenharmony_ci		.cb	= hyp_map_walker,
49362306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF,
49462306a36Sopenharmony_ci		.arg	= &map_data,
49562306a36Sopenharmony_ci	};
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	ret = hyp_set_prot_attr(prot, &map_data.attr);
49862306a36Sopenharmony_ci	if (ret)
49962306a36Sopenharmony_ci		return ret;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
50262306a36Sopenharmony_ci	dsb(ishst);
50362306a36Sopenharmony_ci	isb();
50462306a36Sopenharmony_ci	return ret;
50562306a36Sopenharmony_ci}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_cistatic int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
50862306a36Sopenharmony_ci			    enum kvm_pgtable_walk_flags visit)
50962306a36Sopenharmony_ci{
51062306a36Sopenharmony_ci	kvm_pte_t *childp = NULL;
51162306a36Sopenharmony_ci	u64 granule = kvm_granule_size(ctx->level);
51262306a36Sopenharmony_ci	u64 *unmapped = ctx->arg;
51362306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old))
51662306a36Sopenharmony_ci		return -EINVAL;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	if (kvm_pte_table(ctx->old, ctx->level)) {
51962306a36Sopenharmony_ci		childp = kvm_pte_follow(ctx->old, mm_ops);
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci		if (mm_ops->page_count(childp) != 1)
52262306a36Sopenharmony_ci			return 0;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci		kvm_clear_pte(ctx->ptep);
52562306a36Sopenharmony_ci		dsb(ishst);
52662306a36Sopenharmony_ci		__tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
52762306a36Sopenharmony_ci	} else {
52862306a36Sopenharmony_ci		if (ctx->end - ctx->addr < granule)
52962306a36Sopenharmony_ci			return -EINVAL;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci		kvm_clear_pte(ctx->ptep);
53262306a36Sopenharmony_ci		dsb(ishst);
53362306a36Sopenharmony_ci		__tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
53462306a36Sopenharmony_ci		*unmapped += granule;
53562306a36Sopenharmony_ci	}
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	dsb(ish);
53862306a36Sopenharmony_ci	isb();
53962306a36Sopenharmony_ci	mm_ops->put_page(ctx->ptep);
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	if (childp)
54262306a36Sopenharmony_ci		mm_ops->put_page(childp);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	return 0;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ciu64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
54862306a36Sopenharmony_ci{
54962306a36Sopenharmony_ci	u64 unmapped = 0;
55062306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
55162306a36Sopenharmony_ci		.cb	= hyp_unmap_walker,
55262306a36Sopenharmony_ci		.arg	= &unmapped,
55362306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
55462306a36Sopenharmony_ci	};
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	if (!pgt->mm_ops->page_count)
55762306a36Sopenharmony_ci		return 0;
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	kvm_pgtable_walk(pgt, addr, size, &walker);
56062306a36Sopenharmony_ci	return unmapped;
56162306a36Sopenharmony_ci}
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ciint kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
56462306a36Sopenharmony_ci			 struct kvm_pgtable_mm_ops *mm_ops)
56562306a36Sopenharmony_ci{
56662306a36Sopenharmony_ci	u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
56962306a36Sopenharmony_ci	if (!pgt->pgd)
57062306a36Sopenharmony_ci		return -ENOMEM;
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	pgt->ia_bits		= va_bits;
57362306a36Sopenharmony_ci	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
57462306a36Sopenharmony_ci	pgt->mm_ops		= mm_ops;
57562306a36Sopenharmony_ci	pgt->mmu		= NULL;
57662306a36Sopenharmony_ci	pgt->force_pte_cb	= NULL;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	return 0;
57962306a36Sopenharmony_ci}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_cistatic int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
58262306a36Sopenharmony_ci			   enum kvm_pgtable_walk_flags visit)
58362306a36Sopenharmony_ci{
58462306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old))
58762306a36Sopenharmony_ci		return 0;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	mm_ops->put_page(ctx->ptep);
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	if (kvm_pte_table(ctx->old, ctx->level))
59262306a36Sopenharmony_ci		mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	return 0;
59562306a36Sopenharmony_ci}
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_civoid kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
59862306a36Sopenharmony_ci{
59962306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
60062306a36Sopenharmony_ci		.cb	= hyp_free_walker,
60162306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
60262306a36Sopenharmony_ci	};
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
60562306a36Sopenharmony_ci	pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd));
60662306a36Sopenharmony_ci	pgt->pgd = NULL;
60762306a36Sopenharmony_ci}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_cistruct stage2_map_data {
61062306a36Sopenharmony_ci	const u64			phys;
61162306a36Sopenharmony_ci	kvm_pte_t			attr;
61262306a36Sopenharmony_ci	u8				owner_id;
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	kvm_pte_t			*anchor;
61562306a36Sopenharmony_ci	kvm_pte_t			*childp;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	struct kvm_s2_mmu		*mmu;
61862306a36Sopenharmony_ci	void				*memcache;
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_ci	/* Force mappings to page granularity */
62162306a36Sopenharmony_ci	bool				force_pte;
62262306a36Sopenharmony_ci};
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ciu64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
62562306a36Sopenharmony_ci{
62662306a36Sopenharmony_ci	u64 vtcr = VTCR_EL2_FLAGS;
62762306a36Sopenharmony_ci	u8 lvls;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
63062306a36Sopenharmony_ci	vtcr |= VTCR_EL2_T0SZ(phys_shift);
63162306a36Sopenharmony_ci	/*
63262306a36Sopenharmony_ci	 * Use a minimum 2 level page table to prevent splitting
63362306a36Sopenharmony_ci	 * host PMD huge pages at stage2.
63462306a36Sopenharmony_ci	 */
63562306a36Sopenharmony_ci	lvls = stage2_pgtable_levels(phys_shift);
63662306a36Sopenharmony_ci	if (lvls < 2)
63762306a36Sopenharmony_ci		lvls = 2;
63862306a36Sopenharmony_ci	vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci#ifdef CONFIG_ARM64_HW_AFDBM
64162306a36Sopenharmony_ci	/*
64262306a36Sopenharmony_ci	 * Enable the Hardware Access Flag management, unconditionally
64362306a36Sopenharmony_ci	 * on all CPUs. In systems that have asymmetric support for the feature
64462306a36Sopenharmony_ci	 * this allows KVM to leverage hardware support on the subset of cores
64562306a36Sopenharmony_ci	 * that implement the feature.
64662306a36Sopenharmony_ci	 *
64762306a36Sopenharmony_ci	 * The architecture requires VTCR_EL2.HA to be RES0 (thus ignored by
64862306a36Sopenharmony_ci	 * hardware) on implementations that do not advertise support for the
64962306a36Sopenharmony_ci	 * feature. As such, setting HA unconditionally is safe, unless you
65062306a36Sopenharmony_ci	 * happen to be running on a design that has unadvertised support for
65162306a36Sopenharmony_ci	 * HAFDBS. Here be dragons.
65262306a36Sopenharmony_ci	 */
65362306a36Sopenharmony_ci	if (!cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
65462306a36Sopenharmony_ci		vtcr |= VTCR_EL2_HA;
65562306a36Sopenharmony_ci#endif /* CONFIG_ARM64_HW_AFDBM */
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	/* Set the vmid bits */
65862306a36Sopenharmony_ci	vtcr |= (get_vmid_bits(mmfr1) == 16) ?
65962306a36Sopenharmony_ci		VTCR_EL2_VS_16BIT :
66062306a36Sopenharmony_ci		VTCR_EL2_VS_8BIT;
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	return vtcr;
66362306a36Sopenharmony_ci}
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_cistatic bool stage2_has_fwb(struct kvm_pgtable *pgt)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
66862306a36Sopenharmony_ci		return false;
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_civoid kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
67462306a36Sopenharmony_ci				phys_addr_t addr, size_t size)
67562306a36Sopenharmony_ci{
67662306a36Sopenharmony_ci	unsigned long pages, inval_pages;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	if (!system_supports_tlb_range()) {
67962306a36Sopenharmony_ci		kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
68062306a36Sopenharmony_ci		return;
68162306a36Sopenharmony_ci	}
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	pages = size >> PAGE_SHIFT;
68462306a36Sopenharmony_ci	while (pages > 0) {
68562306a36Sopenharmony_ci		inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
68662306a36Sopenharmony_ci		kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci		addr += inval_pages << PAGE_SHIFT;
68962306a36Sopenharmony_ci		pages -= inval_pages;
69062306a36Sopenharmony_ci	}
69162306a36Sopenharmony_ci}
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_cistatic int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
69662306a36Sopenharmony_ci				kvm_pte_t *ptep)
69762306a36Sopenharmony_ci{
69862306a36Sopenharmony_ci	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
69962306a36Sopenharmony_ci	kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
70062306a36Sopenharmony_ci			    KVM_S2_MEMATTR(pgt, NORMAL);
70162306a36Sopenharmony_ci	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	if (!(prot & KVM_PGTABLE_PROT_X))
70462306a36Sopenharmony_ci		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
70562306a36Sopenharmony_ci	else if (device)
70662306a36Sopenharmony_ci		return -EINVAL;
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	if (prot & KVM_PGTABLE_PROT_R)
70962306a36Sopenharmony_ci		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	if (prot & KVM_PGTABLE_PROT_W)
71262306a36Sopenharmony_ci		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
71562306a36Sopenharmony_ci	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
71662306a36Sopenharmony_ci	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
71762306a36Sopenharmony_ci	*ptep = attr;
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci	return 0;
72062306a36Sopenharmony_ci}
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_cienum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
72362306a36Sopenharmony_ci{
72462306a36Sopenharmony_ci	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	if (!kvm_pte_valid(pte))
72762306a36Sopenharmony_ci		return prot;
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
73062306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_R;
73162306a36Sopenharmony_ci	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
73262306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_W;
73362306a36Sopenharmony_ci	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
73462306a36Sopenharmony_ci		prot |= KVM_PGTABLE_PROT_X;
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	return prot;
73762306a36Sopenharmony_ci}
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_cistatic bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
74062306a36Sopenharmony_ci{
74162306a36Sopenharmony_ci	if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
74262306a36Sopenharmony_ci		return true;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
74562306a36Sopenharmony_ci}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_cistatic bool stage2_pte_is_counted(kvm_pte_t pte)
74862306a36Sopenharmony_ci{
74962306a36Sopenharmony_ci	/*
75062306a36Sopenharmony_ci	 * The refcount tracks valid entries as well as invalid entries if they
75162306a36Sopenharmony_ci	 * encode ownership of a page to another entity than the page-table
75262306a36Sopenharmony_ci	 * owner, whose id is 0.
75362306a36Sopenharmony_ci	 */
75462306a36Sopenharmony_ci	return !!pte;
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_cistatic bool stage2_pte_is_locked(kvm_pte_t pte)
75862306a36Sopenharmony_ci{
75962306a36Sopenharmony_ci	return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED);
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_cistatic bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
76362306a36Sopenharmony_ci{
76462306a36Sopenharmony_ci	if (!kvm_pgtable_walk_shared(ctx)) {
76562306a36Sopenharmony_ci		WRITE_ONCE(*ctx->ptep, new);
76662306a36Sopenharmony_ci		return true;
76762306a36Sopenharmony_ci	}
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old;
77062306a36Sopenharmony_ci}
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci/**
77362306a36Sopenharmony_ci * stage2_try_break_pte() - Invalidates a pte according to the
77462306a36Sopenharmony_ci *			    'break-before-make' requirements of the
77562306a36Sopenharmony_ci *			    architecture.
77662306a36Sopenharmony_ci *
77762306a36Sopenharmony_ci * @ctx: context of the visited pte.
77862306a36Sopenharmony_ci * @mmu: stage-2 mmu
77962306a36Sopenharmony_ci *
78062306a36Sopenharmony_ci * Returns: true if the pte was successfully broken.
78162306a36Sopenharmony_ci *
78262306a36Sopenharmony_ci * If the removed pte was valid, performs the necessary serialization and TLB
78362306a36Sopenharmony_ci * invalidation for the old value. For counted ptes, drops the reference count
78462306a36Sopenharmony_ci * on the containing table page.
78562306a36Sopenharmony_ci */
78662306a36Sopenharmony_cistatic bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
78762306a36Sopenharmony_ci				 struct kvm_s2_mmu *mmu)
78862306a36Sopenharmony_ci{
78962306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	if (stage2_pte_is_locked(ctx->old)) {
79262306a36Sopenharmony_ci		/*
79362306a36Sopenharmony_ci		 * Should never occur if this walker has exclusive access to the
79462306a36Sopenharmony_ci		 * page tables.
79562306a36Sopenharmony_ci		 */
79662306a36Sopenharmony_ci		WARN_ON(!kvm_pgtable_walk_shared(ctx));
79762306a36Sopenharmony_ci		return false;
79862306a36Sopenharmony_ci	}
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
80162306a36Sopenharmony_ci		return false;
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
80462306a36Sopenharmony_ci		/*
80562306a36Sopenharmony_ci		 * Perform the appropriate TLB invalidation based on the
80662306a36Sopenharmony_ci		 * evicted pte value (if any).
80762306a36Sopenharmony_ci		 */
80862306a36Sopenharmony_ci		if (kvm_pte_table(ctx->old, ctx->level))
80962306a36Sopenharmony_ci			kvm_tlb_flush_vmid_range(mmu, ctx->addr,
81062306a36Sopenharmony_ci						kvm_granule_size(ctx->level));
81162306a36Sopenharmony_ci		else if (kvm_pte_valid(ctx->old))
81262306a36Sopenharmony_ci			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
81362306a36Sopenharmony_ci				     ctx->addr, ctx->level);
81462306a36Sopenharmony_ci	}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	if (stage2_pte_is_counted(ctx->old))
81762306a36Sopenharmony_ci		mm_ops->put_page(ctx->ptep);
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	return true;
82062306a36Sopenharmony_ci}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_cistatic void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
82362306a36Sopenharmony_ci{
82462306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	WARN_ON(!stage2_pte_is_locked(*ctx->ptep));
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	if (stage2_pte_is_counted(new))
82962306a36Sopenharmony_ci		mm_ops->get_page(ctx->ptep);
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	smp_store_release(ctx->ptep, new);
83262306a36Sopenharmony_ci}
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_cistatic bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
83562306a36Sopenharmony_ci{
83662306a36Sopenharmony_ci	/*
83762306a36Sopenharmony_ci	 * If FEAT_TLBIRANGE is implemented, defer the individual
83862306a36Sopenharmony_ci	 * TLB invalidations until the entire walk is finished, and
83962306a36Sopenharmony_ci	 * then use the range-based TLBI instructions to do the
84062306a36Sopenharmony_ci	 * invalidations. Condition deferred TLB invalidation on the
84162306a36Sopenharmony_ci	 * system supporting FWB as the optimization is entirely
84262306a36Sopenharmony_ci	 * pointless when the unmap walker needs to perform CMOs.
84362306a36Sopenharmony_ci	 */
84462306a36Sopenharmony_ci	return system_supports_tlb_range() && stage2_has_fwb(pgt);
84562306a36Sopenharmony_ci}
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_cistatic void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
84862306a36Sopenharmony_ci				struct kvm_s2_mmu *mmu,
84962306a36Sopenharmony_ci				struct kvm_pgtable_mm_ops *mm_ops)
85062306a36Sopenharmony_ci{
85162306a36Sopenharmony_ci	struct kvm_pgtable *pgt = ctx->arg;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	/*
85462306a36Sopenharmony_ci	 * Clear the existing PTE, and perform break-before-make if it was
85562306a36Sopenharmony_ci	 * valid. Depending on the system support, defer the TLB maintenance
85662306a36Sopenharmony_ci	 * for the same until the entire unmap walk is completed.
85762306a36Sopenharmony_ci	 */
85862306a36Sopenharmony_ci	if (kvm_pte_valid(ctx->old)) {
85962306a36Sopenharmony_ci		kvm_clear_pte(ctx->ptep);
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci		if (!stage2_unmap_defer_tlb_flush(pgt))
86262306a36Sopenharmony_ci			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
86362306a36Sopenharmony_ci					ctx->addr, ctx->level);
86462306a36Sopenharmony_ci	}
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	mm_ops->put_page(ctx->ptep);
86762306a36Sopenharmony_ci}
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_cistatic bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
87062306a36Sopenharmony_ci{
87162306a36Sopenharmony_ci	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
87262306a36Sopenharmony_ci	return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_cistatic bool stage2_pte_executable(kvm_pte_t pte)
87662306a36Sopenharmony_ci{
87762306a36Sopenharmony_ci	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
87862306a36Sopenharmony_ci}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_cistatic u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
88162306a36Sopenharmony_ci				       const struct stage2_map_data *data)
88262306a36Sopenharmony_ci{
88362306a36Sopenharmony_ci	u64 phys = data->phys;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	/*
88662306a36Sopenharmony_ci	 * Stage-2 walks to update ownership data are communicated to the map
88762306a36Sopenharmony_ci	 * walker using an invalid PA. Avoid offsetting an already invalid PA,
88862306a36Sopenharmony_ci	 * which could overflow and make the address valid again.
88962306a36Sopenharmony_ci	 */
89062306a36Sopenharmony_ci	if (!kvm_phys_is_valid(phys))
89162306a36Sopenharmony_ci		return phys;
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	/*
89462306a36Sopenharmony_ci	 * Otherwise, work out the correct PA based on how far the walk has
89562306a36Sopenharmony_ci	 * gotten.
89662306a36Sopenharmony_ci	 */
89762306a36Sopenharmony_ci	return phys + (ctx->addr - ctx->start);
89862306a36Sopenharmony_ci}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_cistatic bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
90162306a36Sopenharmony_ci					struct stage2_map_data *data)
90262306a36Sopenharmony_ci{
90362306a36Sopenharmony_ci	u64 phys = stage2_map_walker_phys_addr(ctx, data);
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci	if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
90662306a36Sopenharmony_ci		return false;
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci	return kvm_block_mapping_supported(ctx, phys);
90962306a36Sopenharmony_ci}
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_cistatic int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
91262306a36Sopenharmony_ci				      struct stage2_map_data *data)
91362306a36Sopenharmony_ci{
91462306a36Sopenharmony_ci	kvm_pte_t new;
91562306a36Sopenharmony_ci	u64 phys = stage2_map_walker_phys_addr(ctx, data);
91662306a36Sopenharmony_ci	u64 granule = kvm_granule_size(ctx->level);
91762306a36Sopenharmony_ci	struct kvm_pgtable *pgt = data->mmu->pgt;
91862306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci	if (!stage2_leaf_mapping_allowed(ctx, data))
92162306a36Sopenharmony_ci		return -E2BIG;
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	if (kvm_phys_is_valid(phys))
92462306a36Sopenharmony_ci		new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
92562306a36Sopenharmony_ci	else
92662306a36Sopenharmony_ci		new = kvm_init_invalid_leaf_owner(data->owner_id);
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	/*
92962306a36Sopenharmony_ci	 * Skip updating the PTE if we are trying to recreate the exact
93062306a36Sopenharmony_ci	 * same mapping or only change the access permissions. Instead,
93162306a36Sopenharmony_ci	 * the vCPU will exit one more time from guest if still needed
93262306a36Sopenharmony_ci	 * and then go through the path of relaxing permissions.
93362306a36Sopenharmony_ci	 */
93462306a36Sopenharmony_ci	if (!stage2_pte_needs_update(ctx->old, new))
93562306a36Sopenharmony_ci		return -EAGAIN;
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	if (!stage2_try_break_pte(ctx, data->mmu))
93862306a36Sopenharmony_ci		return -EAGAIN;
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	/* Perform CMOs before installation of the guest stage-2 PTE */
94162306a36Sopenharmony_ci	if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc &&
94262306a36Sopenharmony_ci	    stage2_pte_cacheable(pgt, new))
94362306a36Sopenharmony_ci		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
94462306a36Sopenharmony_ci					       granule);
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci	if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou &&
94762306a36Sopenharmony_ci	    stage2_pte_executable(new))
94862306a36Sopenharmony_ci		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	stage2_make_pte(ctx, new);
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	return 0;
95362306a36Sopenharmony_ci}
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_cistatic int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
95662306a36Sopenharmony_ci				     struct stage2_map_data *data)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
95962306a36Sopenharmony_ci	kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
96062306a36Sopenharmony_ci	int ret;
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci	if (!stage2_leaf_mapping_allowed(ctx, data))
96362306a36Sopenharmony_ci		return 0;
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	ret = stage2_map_walker_try_leaf(ctx, data);
96662306a36Sopenharmony_ci	if (ret)
96762306a36Sopenharmony_ci		return ret;
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	mm_ops->free_unlinked_table(childp, ctx->level);
97062306a36Sopenharmony_ci	return 0;
97162306a36Sopenharmony_ci}
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_cistatic int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
97462306a36Sopenharmony_ci				struct stage2_map_data *data)
97562306a36Sopenharmony_ci{
97662306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
97762306a36Sopenharmony_ci	kvm_pte_t *childp, new;
97862306a36Sopenharmony_ci	int ret;
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	ret = stage2_map_walker_try_leaf(ctx, data);
98162306a36Sopenharmony_ci	if (ret != -E2BIG)
98262306a36Sopenharmony_ci		return ret;
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci	if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
98562306a36Sopenharmony_ci		return -EINVAL;
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci	if (!data->memcache)
98862306a36Sopenharmony_ci		return -ENOMEM;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci	childp = mm_ops->zalloc_page(data->memcache);
99162306a36Sopenharmony_ci	if (!childp)
99262306a36Sopenharmony_ci		return -ENOMEM;
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	if (!stage2_try_break_pte(ctx, data->mmu)) {
99562306a36Sopenharmony_ci		mm_ops->put_page(childp);
99662306a36Sopenharmony_ci		return -EAGAIN;
99762306a36Sopenharmony_ci	}
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	/*
100062306a36Sopenharmony_ci	 * If we've run into an existing block mapping then replace it with
100162306a36Sopenharmony_ci	 * a table. Accesses beyond 'end' that fall within the new table
100262306a36Sopenharmony_ci	 * will be mapped lazily.
100362306a36Sopenharmony_ci	 */
100462306a36Sopenharmony_ci	new = kvm_init_table_pte(childp, mm_ops);
100562306a36Sopenharmony_ci	stage2_make_pte(ctx, new);
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci	return 0;
100862306a36Sopenharmony_ci}
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci/*
101162306a36Sopenharmony_ci * The TABLE_PRE callback runs for table entries on the way down, looking
101262306a36Sopenharmony_ci * for table entries which we could conceivably replace with a block entry
101362306a36Sopenharmony_ci * for this mapping. If it finds one it replaces the entry and calls
101462306a36Sopenharmony_ci * kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table.
101562306a36Sopenharmony_ci *
101662306a36Sopenharmony_ci * Otherwise, the LEAF callback performs the mapping at the existing leaves
101762306a36Sopenharmony_ci * instead.
101862306a36Sopenharmony_ci */
101962306a36Sopenharmony_cistatic int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
102062306a36Sopenharmony_ci			     enum kvm_pgtable_walk_flags visit)
102162306a36Sopenharmony_ci{
102262306a36Sopenharmony_ci	struct stage2_map_data *data = ctx->arg;
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	switch (visit) {
102562306a36Sopenharmony_ci	case KVM_PGTABLE_WALK_TABLE_PRE:
102662306a36Sopenharmony_ci		return stage2_map_walk_table_pre(ctx, data);
102762306a36Sopenharmony_ci	case KVM_PGTABLE_WALK_LEAF:
102862306a36Sopenharmony_ci		return stage2_map_walk_leaf(ctx, data);
102962306a36Sopenharmony_ci	default:
103062306a36Sopenharmony_ci		return -EINVAL;
103162306a36Sopenharmony_ci	}
103262306a36Sopenharmony_ci}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ciint kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
103562306a36Sopenharmony_ci			   u64 phys, enum kvm_pgtable_prot prot,
103662306a36Sopenharmony_ci			   void *mc, enum kvm_pgtable_walk_flags flags)
103762306a36Sopenharmony_ci{
103862306a36Sopenharmony_ci	int ret;
103962306a36Sopenharmony_ci	struct stage2_map_data map_data = {
104062306a36Sopenharmony_ci		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
104162306a36Sopenharmony_ci		.mmu		= pgt->mmu,
104262306a36Sopenharmony_ci		.memcache	= mc,
104362306a36Sopenharmony_ci		.force_pte	= pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
104462306a36Sopenharmony_ci	};
104562306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
104662306a36Sopenharmony_ci		.cb		= stage2_map_walker,
104762306a36Sopenharmony_ci		.flags		= flags |
104862306a36Sopenharmony_ci				  KVM_PGTABLE_WALK_TABLE_PRE |
104962306a36Sopenharmony_ci				  KVM_PGTABLE_WALK_LEAF,
105062306a36Sopenharmony_ci		.arg		= &map_data,
105162306a36Sopenharmony_ci	};
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_ci	if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
105462306a36Sopenharmony_ci		return -EINVAL;
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
105762306a36Sopenharmony_ci	if (ret)
105862306a36Sopenharmony_ci		return ret;
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
106162306a36Sopenharmony_ci	dsb(ishst);
106262306a36Sopenharmony_ci	return ret;
106362306a36Sopenharmony_ci}
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ciint kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
106662306a36Sopenharmony_ci				 void *mc, u8 owner_id)
106762306a36Sopenharmony_ci{
106862306a36Sopenharmony_ci	int ret;
106962306a36Sopenharmony_ci	struct stage2_map_data map_data = {
107062306a36Sopenharmony_ci		.phys		= KVM_PHYS_INVALID,
107162306a36Sopenharmony_ci		.mmu		= pgt->mmu,
107262306a36Sopenharmony_ci		.memcache	= mc,
107362306a36Sopenharmony_ci		.owner_id	= owner_id,
107462306a36Sopenharmony_ci		.force_pte	= true,
107562306a36Sopenharmony_ci	};
107662306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
107762306a36Sopenharmony_ci		.cb		= stage2_map_walker,
107862306a36Sopenharmony_ci		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
107962306a36Sopenharmony_ci				  KVM_PGTABLE_WALK_LEAF,
108062306a36Sopenharmony_ci		.arg		= &map_data,
108162306a36Sopenharmony_ci	};
108262306a36Sopenharmony_ci
108362306a36Sopenharmony_ci	if (owner_id > KVM_MAX_OWNER_ID)
108462306a36Sopenharmony_ci		return -EINVAL;
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
108762306a36Sopenharmony_ci	return ret;
108862306a36Sopenharmony_ci}
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_cistatic int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
109162306a36Sopenharmony_ci			       enum kvm_pgtable_walk_flags visit)
109262306a36Sopenharmony_ci{
109362306a36Sopenharmony_ci	struct kvm_pgtable *pgt = ctx->arg;
109462306a36Sopenharmony_ci	struct kvm_s2_mmu *mmu = pgt->mmu;
109562306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
109662306a36Sopenharmony_ci	kvm_pte_t *childp = NULL;
109762306a36Sopenharmony_ci	bool need_flush = false;
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old)) {
110062306a36Sopenharmony_ci		if (stage2_pte_is_counted(ctx->old)) {
110162306a36Sopenharmony_ci			kvm_clear_pte(ctx->ptep);
110262306a36Sopenharmony_ci			mm_ops->put_page(ctx->ptep);
110362306a36Sopenharmony_ci		}
110462306a36Sopenharmony_ci		return 0;
110562306a36Sopenharmony_ci	}
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ci	if (kvm_pte_table(ctx->old, ctx->level)) {
110862306a36Sopenharmony_ci		childp = kvm_pte_follow(ctx->old, mm_ops);
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci		if (mm_ops->page_count(childp) != 1)
111162306a36Sopenharmony_ci			return 0;
111262306a36Sopenharmony_ci	} else if (stage2_pte_cacheable(pgt, ctx->old)) {
111362306a36Sopenharmony_ci		need_flush = !stage2_has_fwb(pgt);
111462306a36Sopenharmony_ci	}
111562306a36Sopenharmony_ci
111662306a36Sopenharmony_ci	/*
111762306a36Sopenharmony_ci	 * This is similar to the map() path in that we unmap the entire
111862306a36Sopenharmony_ci	 * block entry and rely on the remaining portions being faulted
111962306a36Sopenharmony_ci	 * back lazily.
112062306a36Sopenharmony_ci	 */
112162306a36Sopenharmony_ci	stage2_unmap_put_pte(ctx, mmu, mm_ops);
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	if (need_flush && mm_ops->dcache_clean_inval_poc)
112462306a36Sopenharmony_ci		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
112562306a36Sopenharmony_ci					       kvm_granule_size(ctx->level));
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	if (childp)
112862306a36Sopenharmony_ci		mm_ops->put_page(childp);
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	return 0;
113162306a36Sopenharmony_ci}
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ciint kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
113462306a36Sopenharmony_ci{
113562306a36Sopenharmony_ci	int ret;
113662306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
113762306a36Sopenharmony_ci		.cb	= stage2_unmap_walker,
113862306a36Sopenharmony_ci		.arg	= pgt,
113962306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
114062306a36Sopenharmony_ci	};
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
114362306a36Sopenharmony_ci	if (stage2_unmap_defer_tlb_flush(pgt))
114462306a36Sopenharmony_ci		/* Perform the deferred TLB invalidations */
114562306a36Sopenharmony_ci		kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	return ret;
114862306a36Sopenharmony_ci}
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_cistruct stage2_attr_data {
115162306a36Sopenharmony_ci	kvm_pte_t			attr_set;
115262306a36Sopenharmony_ci	kvm_pte_t			attr_clr;
115362306a36Sopenharmony_ci	kvm_pte_t			pte;
115462306a36Sopenharmony_ci	u32				level;
115562306a36Sopenharmony_ci};
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_cistatic int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
115862306a36Sopenharmony_ci			      enum kvm_pgtable_walk_flags visit)
115962306a36Sopenharmony_ci{
116062306a36Sopenharmony_ci	kvm_pte_t pte = ctx->old;
116162306a36Sopenharmony_ci	struct stage2_attr_data *data = ctx->arg;
116262306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old))
116562306a36Sopenharmony_ci		return -EAGAIN;
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	data->level = ctx->level;
116862306a36Sopenharmony_ci	data->pte = pte;
116962306a36Sopenharmony_ci	pte &= ~data->attr_clr;
117062306a36Sopenharmony_ci	pte |= data->attr_set;
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	/*
117362306a36Sopenharmony_ci	 * We may race with the CPU trying to set the access flag here,
117462306a36Sopenharmony_ci	 * but worst-case the access flag update gets lost and will be
117562306a36Sopenharmony_ci	 * set on the next access instead.
117662306a36Sopenharmony_ci	 */
117762306a36Sopenharmony_ci	if (data->pte != pte) {
117862306a36Sopenharmony_ci		/*
117962306a36Sopenharmony_ci		 * Invalidate instruction cache before updating the guest
118062306a36Sopenharmony_ci		 * stage-2 PTE if we are going to add executable permission.
118162306a36Sopenharmony_ci		 */
118262306a36Sopenharmony_ci		if (mm_ops->icache_inval_pou &&
118362306a36Sopenharmony_ci		    stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old))
118462306a36Sopenharmony_ci			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
118562306a36Sopenharmony_ci						  kvm_granule_size(ctx->level));
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_ci		if (!stage2_try_set_pte(ctx, pte))
118862306a36Sopenharmony_ci			return -EAGAIN;
118962306a36Sopenharmony_ci	}
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci	return 0;
119262306a36Sopenharmony_ci}
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_cistatic int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
119562306a36Sopenharmony_ci				    u64 size, kvm_pte_t attr_set,
119662306a36Sopenharmony_ci				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
119762306a36Sopenharmony_ci				    u32 *level, enum kvm_pgtable_walk_flags flags)
119862306a36Sopenharmony_ci{
119962306a36Sopenharmony_ci	int ret;
120062306a36Sopenharmony_ci	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
120162306a36Sopenharmony_ci	struct stage2_attr_data data = {
120262306a36Sopenharmony_ci		.attr_set	= attr_set & attr_mask,
120362306a36Sopenharmony_ci		.attr_clr	= attr_clr & attr_mask,
120462306a36Sopenharmony_ci	};
120562306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
120662306a36Sopenharmony_ci		.cb		= stage2_attr_walker,
120762306a36Sopenharmony_ci		.arg		= &data,
120862306a36Sopenharmony_ci		.flags		= flags | KVM_PGTABLE_WALK_LEAF,
120962306a36Sopenharmony_ci	};
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_ci	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
121262306a36Sopenharmony_ci	if (ret)
121362306a36Sopenharmony_ci		return ret;
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	if (orig_pte)
121662306a36Sopenharmony_ci		*orig_pte = data.pte;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	if (level)
121962306a36Sopenharmony_ci		*level = data.level;
122062306a36Sopenharmony_ci	return 0;
122162306a36Sopenharmony_ci}
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ciint kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
122462306a36Sopenharmony_ci{
122562306a36Sopenharmony_ci	return stage2_update_leaf_attrs(pgt, addr, size, 0,
122662306a36Sopenharmony_ci					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
122762306a36Sopenharmony_ci					NULL, NULL, 0);
122862306a36Sopenharmony_ci}
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_cikvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	kvm_pte_t pte = 0;
123362306a36Sopenharmony_ci	int ret;
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
123662306a36Sopenharmony_ci				       &pte, NULL,
123762306a36Sopenharmony_ci				       KVM_PGTABLE_WALK_HANDLE_FAULT |
123862306a36Sopenharmony_ci				       KVM_PGTABLE_WALK_SHARED);
123962306a36Sopenharmony_ci	if (!ret)
124062306a36Sopenharmony_ci		dsb(ishst);
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	return pte;
124362306a36Sopenharmony_ci}
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_cistruct stage2_age_data {
124662306a36Sopenharmony_ci	bool	mkold;
124762306a36Sopenharmony_ci	bool	young;
124862306a36Sopenharmony_ci};
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_cistatic int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx,
125162306a36Sopenharmony_ci			     enum kvm_pgtable_walk_flags visit)
125262306a36Sopenharmony_ci{
125362306a36Sopenharmony_ci	kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF;
125462306a36Sopenharmony_ci	struct stage2_age_data *data = ctx->arg;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old) || new == ctx->old)
125762306a36Sopenharmony_ci		return 0;
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_ci	data->young = true;
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci	/*
126262306a36Sopenharmony_ci	 * stage2_age_walker() is always called while holding the MMU lock for
126362306a36Sopenharmony_ci	 * write, so this will always succeed. Nonetheless, this deliberately
126462306a36Sopenharmony_ci	 * follows the race detection pattern of the other stage-2 walkers in
126562306a36Sopenharmony_ci	 * case the locking mechanics of the MMU notifiers is ever changed.
126662306a36Sopenharmony_ci	 */
126762306a36Sopenharmony_ci	if (data->mkold && !stage2_try_set_pte(ctx, new))
126862306a36Sopenharmony_ci		return -EAGAIN;
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	/*
127162306a36Sopenharmony_ci	 * "But where's the TLBI?!", you scream.
127262306a36Sopenharmony_ci	 * "Over in the core code", I sigh.
127362306a36Sopenharmony_ci	 *
127462306a36Sopenharmony_ci	 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
127562306a36Sopenharmony_ci	 */
127662306a36Sopenharmony_ci	return 0;
127762306a36Sopenharmony_ci}
127862306a36Sopenharmony_ci
127962306a36Sopenharmony_cibool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
128062306a36Sopenharmony_ci					 u64 size, bool mkold)
128162306a36Sopenharmony_ci{
128262306a36Sopenharmony_ci	struct stage2_age_data data = {
128362306a36Sopenharmony_ci		.mkold		= mkold,
128462306a36Sopenharmony_ci	};
128562306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
128662306a36Sopenharmony_ci		.cb		= stage2_age_walker,
128762306a36Sopenharmony_ci		.arg		= &data,
128862306a36Sopenharmony_ci		.flags		= KVM_PGTABLE_WALK_LEAF,
128962306a36Sopenharmony_ci	};
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_ci	WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
129262306a36Sopenharmony_ci	return data.young;
129362306a36Sopenharmony_ci}
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_ciint kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
129662306a36Sopenharmony_ci				   enum kvm_pgtable_prot prot)
129762306a36Sopenharmony_ci{
129862306a36Sopenharmony_ci	int ret;
129962306a36Sopenharmony_ci	u32 level;
130062306a36Sopenharmony_ci	kvm_pte_t set = 0, clr = 0;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
130362306a36Sopenharmony_ci		return -EINVAL;
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ci	if (prot & KVM_PGTABLE_PROT_R)
130662306a36Sopenharmony_ci		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci	if (prot & KVM_PGTABLE_PROT_W)
130962306a36Sopenharmony_ci		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci	if (prot & KVM_PGTABLE_PROT_X)
131262306a36Sopenharmony_ci		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
131362306a36Sopenharmony_ci
131462306a36Sopenharmony_ci	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
131562306a36Sopenharmony_ci				       KVM_PGTABLE_WALK_HANDLE_FAULT |
131662306a36Sopenharmony_ci				       KVM_PGTABLE_WALK_SHARED);
131762306a36Sopenharmony_ci	if (!ret)
131862306a36Sopenharmony_ci		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
131962306a36Sopenharmony_ci	return ret;
132062306a36Sopenharmony_ci}
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_cistatic int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
132362306a36Sopenharmony_ci			       enum kvm_pgtable_walk_flags visit)
132462306a36Sopenharmony_ci{
132562306a36Sopenharmony_ci	struct kvm_pgtable *pgt = ctx->arg;
132662306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci	if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
132962306a36Sopenharmony_ci		return 0;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	if (mm_ops->dcache_clean_inval_poc)
133262306a36Sopenharmony_ci		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
133362306a36Sopenharmony_ci					       kvm_granule_size(ctx->level));
133462306a36Sopenharmony_ci	return 0;
133562306a36Sopenharmony_ci}
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_ciint kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
133862306a36Sopenharmony_ci{
133962306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
134062306a36Sopenharmony_ci		.cb	= stage2_flush_walker,
134162306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF,
134262306a36Sopenharmony_ci		.arg	= pgt,
134362306a36Sopenharmony_ci	};
134462306a36Sopenharmony_ci
134562306a36Sopenharmony_ci	if (stage2_has_fwb(pgt))
134662306a36Sopenharmony_ci		return 0;
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_ci	return kvm_pgtable_walk(pgt, addr, size, &walker);
134962306a36Sopenharmony_ci}
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_cikvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
135262306a36Sopenharmony_ci					      u64 phys, u32 level,
135362306a36Sopenharmony_ci					      enum kvm_pgtable_prot prot,
135462306a36Sopenharmony_ci					      void *mc, bool force_pte)
135562306a36Sopenharmony_ci{
135662306a36Sopenharmony_ci	struct stage2_map_data map_data = {
135762306a36Sopenharmony_ci		.phys		= phys,
135862306a36Sopenharmony_ci		.mmu		= pgt->mmu,
135962306a36Sopenharmony_ci		.memcache	= mc,
136062306a36Sopenharmony_ci		.force_pte	= force_pte,
136162306a36Sopenharmony_ci	};
136262306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
136362306a36Sopenharmony_ci		.cb		= stage2_map_walker,
136462306a36Sopenharmony_ci		.flags		= KVM_PGTABLE_WALK_LEAF |
136562306a36Sopenharmony_ci				  KVM_PGTABLE_WALK_SKIP_BBM_TLBI |
136662306a36Sopenharmony_ci				  KVM_PGTABLE_WALK_SKIP_CMO,
136762306a36Sopenharmony_ci		.arg		= &map_data,
136862306a36Sopenharmony_ci	};
136962306a36Sopenharmony_ci	/*
137062306a36Sopenharmony_ci	 * The input address (.addr) is irrelevant for walking an
137162306a36Sopenharmony_ci	 * unlinked table. Construct an ambiguous IA range to map
137262306a36Sopenharmony_ci	 * kvm_granule_size(level) worth of memory.
137362306a36Sopenharmony_ci	 */
137462306a36Sopenharmony_ci	struct kvm_pgtable_walk_data data = {
137562306a36Sopenharmony_ci		.walker	= &walker,
137662306a36Sopenharmony_ci		.addr	= 0,
137762306a36Sopenharmony_ci		.end	= kvm_granule_size(level),
137862306a36Sopenharmony_ci	};
137962306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
138062306a36Sopenharmony_ci	kvm_pte_t *pgtable;
138162306a36Sopenharmony_ci	int ret;
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci	if (!IS_ALIGNED(phys, kvm_granule_size(level)))
138462306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_ci	ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
138762306a36Sopenharmony_ci	if (ret)
138862306a36Sopenharmony_ci		return ERR_PTR(ret);
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	pgtable = mm_ops->zalloc_page(mc);
139162306a36Sopenharmony_ci	if (!pgtable)
139262306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_ci	ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable,
139562306a36Sopenharmony_ci				 level + 1);
139662306a36Sopenharmony_ci	if (ret) {
139762306a36Sopenharmony_ci		kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
139862306a36Sopenharmony_ci		mm_ops->put_page(pgtable);
139962306a36Sopenharmony_ci		return ERR_PTR(ret);
140062306a36Sopenharmony_ci	}
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci	return pgtable;
140362306a36Sopenharmony_ci}
140462306a36Sopenharmony_ci
140562306a36Sopenharmony_ci/*
140662306a36Sopenharmony_ci * Get the number of page-tables needed to replace a block with a
140762306a36Sopenharmony_ci * fully populated tree up to the PTE entries. Note that @level is
140862306a36Sopenharmony_ci * interpreted as in "level @level entry".
140962306a36Sopenharmony_ci */
141062306a36Sopenharmony_cistatic int stage2_block_get_nr_page_tables(u32 level)
141162306a36Sopenharmony_ci{
141262306a36Sopenharmony_ci	switch (level) {
141362306a36Sopenharmony_ci	case 1:
141462306a36Sopenharmony_ci		return PTRS_PER_PTE + 1;
141562306a36Sopenharmony_ci	case 2:
141662306a36Sopenharmony_ci		return 1;
141762306a36Sopenharmony_ci	case 3:
141862306a36Sopenharmony_ci		return 0;
141962306a36Sopenharmony_ci	default:
142062306a36Sopenharmony_ci		WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
142162306a36Sopenharmony_ci			     level >= KVM_PGTABLE_MAX_LEVELS);
142262306a36Sopenharmony_ci		return -EINVAL;
142362306a36Sopenharmony_ci	};
142462306a36Sopenharmony_ci}
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_cistatic int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
142762306a36Sopenharmony_ci			       enum kvm_pgtable_walk_flags visit)
142862306a36Sopenharmony_ci{
142962306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
143062306a36Sopenharmony_ci	struct kvm_mmu_memory_cache *mc = ctx->arg;
143162306a36Sopenharmony_ci	struct kvm_s2_mmu *mmu;
143262306a36Sopenharmony_ci	kvm_pte_t pte = ctx->old, new, *childp;
143362306a36Sopenharmony_ci	enum kvm_pgtable_prot prot;
143462306a36Sopenharmony_ci	u32 level = ctx->level;
143562306a36Sopenharmony_ci	bool force_pte;
143662306a36Sopenharmony_ci	int nr_pages;
143762306a36Sopenharmony_ci	u64 phys;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	/* No huge-pages exist at the last level */
144062306a36Sopenharmony_ci	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
144162306a36Sopenharmony_ci		return 0;
144262306a36Sopenharmony_ci
144362306a36Sopenharmony_ci	/* We only split valid block mappings */
144462306a36Sopenharmony_ci	if (!kvm_pte_valid(pte))
144562306a36Sopenharmony_ci		return 0;
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	nr_pages = stage2_block_get_nr_page_tables(level);
144862306a36Sopenharmony_ci	if (nr_pages < 0)
144962306a36Sopenharmony_ci		return nr_pages;
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci	if (mc->nobjs >= nr_pages) {
145262306a36Sopenharmony_ci		/* Build a tree mapped down to the PTE granularity. */
145362306a36Sopenharmony_ci		force_pte = true;
145462306a36Sopenharmony_ci	} else {
145562306a36Sopenharmony_ci		/*
145662306a36Sopenharmony_ci		 * Don't force PTEs, so create_unlinked() below does
145762306a36Sopenharmony_ci		 * not populate the tree up to the PTE level. The
145862306a36Sopenharmony_ci		 * consequence is that the call will require a single
145962306a36Sopenharmony_ci		 * page of level 2 entries at level 1, or a single
146062306a36Sopenharmony_ci		 * page of PTEs at level 2. If we are at level 1, the
146162306a36Sopenharmony_ci		 * PTEs will be created recursively.
146262306a36Sopenharmony_ci		 */
146362306a36Sopenharmony_ci		force_pte = false;
146462306a36Sopenharmony_ci		nr_pages = 1;
146562306a36Sopenharmony_ci	}
146662306a36Sopenharmony_ci
146762306a36Sopenharmony_ci	if (mc->nobjs < nr_pages)
146862306a36Sopenharmony_ci		return -ENOMEM;
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_ci	mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
147162306a36Sopenharmony_ci	phys = kvm_pte_to_phys(pte);
147262306a36Sopenharmony_ci	prot = kvm_pgtable_stage2_pte_prot(pte);
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci	childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
147562306a36Sopenharmony_ci						    level, prot, mc, force_pte);
147662306a36Sopenharmony_ci	if (IS_ERR(childp))
147762306a36Sopenharmony_ci		return PTR_ERR(childp);
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	if (!stage2_try_break_pte(ctx, mmu)) {
148062306a36Sopenharmony_ci		kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
148162306a36Sopenharmony_ci		mm_ops->put_page(childp);
148262306a36Sopenharmony_ci		return -EAGAIN;
148362306a36Sopenharmony_ci	}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci	/*
148662306a36Sopenharmony_ci	 * Note, the contents of the page table are guaranteed to be made
148762306a36Sopenharmony_ci	 * visible before the new PTE is assigned because stage2_make_pte()
148862306a36Sopenharmony_ci	 * writes the PTE using smp_store_release().
148962306a36Sopenharmony_ci	 */
149062306a36Sopenharmony_ci	new = kvm_init_table_pte(childp, mm_ops);
149162306a36Sopenharmony_ci	stage2_make_pte(ctx, new);
149262306a36Sopenharmony_ci	dsb(ishst);
149362306a36Sopenharmony_ci	return 0;
149462306a36Sopenharmony_ci}
149562306a36Sopenharmony_ci
149662306a36Sopenharmony_ciint kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
149762306a36Sopenharmony_ci			     struct kvm_mmu_memory_cache *mc)
149862306a36Sopenharmony_ci{
149962306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
150062306a36Sopenharmony_ci		.cb	= stage2_split_walker,
150162306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF,
150262306a36Sopenharmony_ci		.arg	= mc,
150362306a36Sopenharmony_ci	};
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	return kvm_pgtable_walk(pgt, addr, size, &walker);
150662306a36Sopenharmony_ci}
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ciint __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
150962306a36Sopenharmony_ci			      struct kvm_pgtable_mm_ops *mm_ops,
151062306a36Sopenharmony_ci			      enum kvm_pgtable_stage2_flags flags,
151162306a36Sopenharmony_ci			      kvm_pgtable_force_pte_cb_t force_pte_cb)
151262306a36Sopenharmony_ci{
151362306a36Sopenharmony_ci	size_t pgd_sz;
151462306a36Sopenharmony_ci	u64 vtcr = mmu->arch->vtcr;
151562306a36Sopenharmony_ci	u32 ia_bits = VTCR_EL2_IPA(vtcr);
151662306a36Sopenharmony_ci	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
151762306a36Sopenharmony_ci	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
151862306a36Sopenharmony_ci
151962306a36Sopenharmony_ci	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
152062306a36Sopenharmony_ci	pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
152162306a36Sopenharmony_ci	if (!pgt->pgd)
152262306a36Sopenharmony_ci		return -ENOMEM;
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	pgt->ia_bits		= ia_bits;
152562306a36Sopenharmony_ci	pgt->start_level	= start_level;
152662306a36Sopenharmony_ci	pgt->mm_ops		= mm_ops;
152762306a36Sopenharmony_ci	pgt->mmu		= mmu;
152862306a36Sopenharmony_ci	pgt->flags		= flags;
152962306a36Sopenharmony_ci	pgt->force_pte_cb	= force_pte_cb;
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_ci	/* Ensure zeroed PGD pages are visible to the hardware walker */
153262306a36Sopenharmony_ci	dsb(ishst);
153362306a36Sopenharmony_ci	return 0;
153462306a36Sopenharmony_ci}
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_cisize_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
153762306a36Sopenharmony_ci{
153862306a36Sopenharmony_ci	u32 ia_bits = VTCR_EL2_IPA(vtcr);
153962306a36Sopenharmony_ci	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
154062306a36Sopenharmony_ci	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
154362306a36Sopenharmony_ci}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_cistatic int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
154662306a36Sopenharmony_ci			      enum kvm_pgtable_walk_flags visit)
154762306a36Sopenharmony_ci{
154862306a36Sopenharmony_ci	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_ci	if (!stage2_pte_is_counted(ctx->old))
155162306a36Sopenharmony_ci		return 0;
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	mm_ops->put_page(ctx->ptep);
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	if (kvm_pte_table(ctx->old, ctx->level))
155662306a36Sopenharmony_ci		mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	return 0;
155962306a36Sopenharmony_ci}
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_civoid kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
156262306a36Sopenharmony_ci{
156362306a36Sopenharmony_ci	size_t pgd_sz;
156462306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
156562306a36Sopenharmony_ci		.cb	= stage2_free_walker,
156662306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF |
156762306a36Sopenharmony_ci			  KVM_PGTABLE_WALK_TABLE_POST,
156862306a36Sopenharmony_ci	};
156962306a36Sopenharmony_ci
157062306a36Sopenharmony_ci	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
157162306a36Sopenharmony_ci	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
157262306a36Sopenharmony_ci	pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
157362306a36Sopenharmony_ci	pgt->pgd = NULL;
157462306a36Sopenharmony_ci}
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_civoid kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
157762306a36Sopenharmony_ci{
157862306a36Sopenharmony_ci	kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
157962306a36Sopenharmony_ci	struct kvm_pgtable_walker walker = {
158062306a36Sopenharmony_ci		.cb	= stage2_free_walker,
158162306a36Sopenharmony_ci		.flags	= KVM_PGTABLE_WALK_LEAF |
158262306a36Sopenharmony_ci			  KVM_PGTABLE_WALK_TABLE_POST,
158362306a36Sopenharmony_ci	};
158462306a36Sopenharmony_ci	struct kvm_pgtable_walk_data data = {
158562306a36Sopenharmony_ci		.walker	= &walker,
158662306a36Sopenharmony_ci
158762306a36Sopenharmony_ci		/*
158862306a36Sopenharmony_ci		 * At this point the IPA really doesn't matter, as the page
158962306a36Sopenharmony_ci		 * table being traversed has already been removed from the stage
159062306a36Sopenharmony_ci		 * 2. Set an appropriate range to cover the entire page table.
159162306a36Sopenharmony_ci		 */
159262306a36Sopenharmony_ci		.addr	= 0,
159362306a36Sopenharmony_ci		.end	= kvm_granule_size(level),
159462306a36Sopenharmony_ci	};
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci	WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
159762306a36Sopenharmony_ci
159862306a36Sopenharmony_ci	WARN_ON(mm_ops->page_count(pgtable) != 1);
159962306a36Sopenharmony_ci	mm_ops->put_page(pgtable);
160062306a36Sopenharmony_ci}
1601