162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#ifndef __KVM_X86_MMU_TDP_ITER_H
462306a36Sopenharmony_ci#define __KVM_X86_MMU_TDP_ITER_H
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/kvm_host.h>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "mmu.h"
962306a36Sopenharmony_ci#include "spte.h"
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci/*
1262306a36Sopenharmony_ci * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
1362306a36Sopenharmony_ci * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
1462306a36Sopenharmony_ci * batched without having to collect the list of zapped SPs.  Flows that can
1562306a36Sopenharmony_ci * remove SPs must service pending TLB flushes prior to dropping RCU protection.
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_cistatic inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
1862306a36Sopenharmony_ci{
1962306a36Sopenharmony_ci	return READ_ONCE(*rcu_dereference(sptep));
2062306a36Sopenharmony_ci}
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistatic inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	return xchg(rcu_dereference(sptep), new_spte);
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistatic inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
2862306a36Sopenharmony_ci{
2962306a36Sopenharmony_ci	WRITE_ONCE(*rcu_dereference(sptep), new_spte);
3062306a36Sopenharmony_ci}
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci/*
3362306a36Sopenharmony_ci * SPTEs must be modified atomically if they are shadow-present, leaf
3462306a36Sopenharmony_ci * SPTEs, and have volatile bits, i.e. has bits that can be set outside
3562306a36Sopenharmony_ci * of mmu_lock.  The Writable bit can be set by KVM's fast page fault
3662306a36Sopenharmony_ci * handler, and Accessed and Dirty bits can be set by the CPU.
3762306a36Sopenharmony_ci *
3862306a36Sopenharmony_ci * Note, non-leaf SPTEs do have Accessed bits and those bits are
3962306a36Sopenharmony_ci * technically volatile, but KVM doesn't consume the Accessed bit of
4062306a36Sopenharmony_ci * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit.  This
4162306a36Sopenharmony_ci * logic needs to be reassessed if KVM were to use non-leaf Accessed
4262306a36Sopenharmony_ci * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_cistatic inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	return is_shadow_present_pte(old_spte) &&
4762306a36Sopenharmony_ci	       is_last_spte(old_spte, level) &&
4862306a36Sopenharmony_ci	       spte_has_volatile_bits(old_spte);
4962306a36Sopenharmony_ci}
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_cistatic inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
5262306a36Sopenharmony_ci					 u64 new_spte, int level)
5362306a36Sopenharmony_ci{
5462306a36Sopenharmony_ci	if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level))
5562306a36Sopenharmony_ci		return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	__kvm_tdp_mmu_write_spte(sptep, new_spte);
5862306a36Sopenharmony_ci	return old_spte;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistatic inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
6262306a36Sopenharmony_ci					  u64 mask, int level)
6362306a36Sopenharmony_ci{
6462306a36Sopenharmony_ci	atomic64_t *sptep_atomic;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) {
6762306a36Sopenharmony_ci		sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
6862306a36Sopenharmony_ci		return (u64)atomic64_fetch_and(~mask, sptep_atomic);
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	__kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
7262306a36Sopenharmony_ci	return old_spte;
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci/*
7662306a36Sopenharmony_ci * A TDP iterator performs a pre-order walk over a TDP paging structure.
7762306a36Sopenharmony_ci */
7862306a36Sopenharmony_cistruct tdp_iter {
7962306a36Sopenharmony_ci	/*
8062306a36Sopenharmony_ci	 * The iterator will traverse the paging structure towards the mapping
8162306a36Sopenharmony_ci	 * for this GFN.
8262306a36Sopenharmony_ci	 */
8362306a36Sopenharmony_ci	gfn_t next_last_level_gfn;
8462306a36Sopenharmony_ci	/*
8562306a36Sopenharmony_ci	 * The next_last_level_gfn at the time when the thread last
8662306a36Sopenharmony_ci	 * yielded. Only yielding when the next_last_level_gfn !=
8762306a36Sopenharmony_ci	 * yielded_gfn helps ensure forward progress.
8862306a36Sopenharmony_ci	 */
8962306a36Sopenharmony_ci	gfn_t yielded_gfn;
9062306a36Sopenharmony_ci	/* Pointers to the page tables traversed to reach the current SPTE */
9162306a36Sopenharmony_ci	tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
9262306a36Sopenharmony_ci	/* A pointer to the current SPTE */
9362306a36Sopenharmony_ci	tdp_ptep_t sptep;
9462306a36Sopenharmony_ci	/* The lowest GFN mapped by the current SPTE */
9562306a36Sopenharmony_ci	gfn_t gfn;
9662306a36Sopenharmony_ci	/* The level of the root page given to the iterator */
9762306a36Sopenharmony_ci	int root_level;
9862306a36Sopenharmony_ci	/* The lowest level the iterator should traverse to */
9962306a36Sopenharmony_ci	int min_level;
10062306a36Sopenharmony_ci	/* The iterator's current level within the paging structure */
10162306a36Sopenharmony_ci	int level;
10262306a36Sopenharmony_ci	/* The address space ID, i.e. SMM vs. regular. */
10362306a36Sopenharmony_ci	int as_id;
10462306a36Sopenharmony_ci	/* A snapshot of the value at sptep */
10562306a36Sopenharmony_ci	u64 old_spte;
10662306a36Sopenharmony_ci	/*
10762306a36Sopenharmony_ci	 * Whether the iterator has a valid state. This will be false if the
10862306a36Sopenharmony_ci	 * iterator walks off the end of the paging structure.
10962306a36Sopenharmony_ci	 */
11062306a36Sopenharmony_ci	bool valid;
11162306a36Sopenharmony_ci	/*
11262306a36Sopenharmony_ci	 * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
11362306a36Sopenharmony_ci	 * which case tdp_iter_next() needs to restart the walk at the root
11462306a36Sopenharmony_ci	 * level instead of advancing to the next entry.
11562306a36Sopenharmony_ci	 */
11662306a36Sopenharmony_ci	bool yielded;
11762306a36Sopenharmony_ci};
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/*
12062306a36Sopenharmony_ci * Iterates over every SPTE mapping the GFN range [start, end) in a
12162306a36Sopenharmony_ci * preorder traversal.
12262306a36Sopenharmony_ci */
12362306a36Sopenharmony_ci#define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \
12462306a36Sopenharmony_ci	for (tdp_iter_start(&iter, root, min_level, start); \
12562306a36Sopenharmony_ci	     iter.valid && iter.gfn < end;		     \
12662306a36Sopenharmony_ci	     tdp_iter_next(&iter))
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci#define for_each_tdp_pte(iter, root, start, end) \
12962306a36Sopenharmony_ci	for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end)
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_citdp_ptep_t spte_to_child_pt(u64 pte, int level);
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_civoid tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
13462306a36Sopenharmony_ci		    int min_level, gfn_t next_last_level_gfn);
13562306a36Sopenharmony_civoid tdp_iter_next(struct tdp_iter *iter);
13662306a36Sopenharmony_civoid tdp_iter_restart(struct tdp_iter *iter);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci#endif /* __KVM_X86_MMU_TDP_ITER_H */
139