162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#ifndef __KVM_X86_MMU_TDP_ITER_H 462306a36Sopenharmony_ci#define __KVM_X86_MMU_TDP_ITER_H 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/kvm_host.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "mmu.h" 962306a36Sopenharmony_ci#include "spte.h" 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci/* 1262306a36Sopenharmony_ci * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs) 1362306a36Sopenharmony_ci * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be 1462306a36Sopenharmony_ci * batched without having to collect the list of zapped SPs. Flows that can 1562306a36Sopenharmony_ci * remove SPs must service pending TLB flushes prior to dropping RCU protection. 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_cistatic inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep) 1862306a36Sopenharmony_ci{ 1962306a36Sopenharmony_ci return READ_ONCE(*rcu_dereference(sptep)); 2062306a36Sopenharmony_ci} 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistatic inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci return xchg(rcu_dereference(sptep), new_spte); 2562306a36Sopenharmony_ci} 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci WRITE_ONCE(*rcu_dereference(sptep), new_spte); 3062306a36Sopenharmony_ci} 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* 3362306a36Sopenharmony_ci * SPTEs must be modified atomically if they are shadow-present, leaf 3462306a36Sopenharmony_ci * SPTEs, and have volatile bits, i.e. has bits that can be set outside 3562306a36Sopenharmony_ci * of mmu_lock. The Writable bit can be set by KVM's fast page fault 3662306a36Sopenharmony_ci * handler, and Accessed and Dirty bits can be set by the CPU. 3762306a36Sopenharmony_ci * 3862306a36Sopenharmony_ci * Note, non-leaf SPTEs do have Accessed bits and those bits are 3962306a36Sopenharmony_ci * technically volatile, but KVM doesn't consume the Accessed bit of 4062306a36Sopenharmony_ci * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This 4162306a36Sopenharmony_ci * logic needs to be reassessed if KVM were to use non-leaf Accessed 4262306a36Sopenharmony_ci * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs. 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_cistatic inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci return is_shadow_present_pte(old_spte) && 4762306a36Sopenharmony_ci is_last_spte(old_spte, level) && 4862306a36Sopenharmony_ci spte_has_volatile_bits(old_spte); 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, 5262306a36Sopenharmony_ci u64 new_spte, int level) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) 5562306a36Sopenharmony_ci return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci __kvm_tdp_mmu_write_spte(sptep, new_spte); 5862306a36Sopenharmony_ci return old_spte; 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte, 6262306a36Sopenharmony_ci u64 mask, int level) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci atomic64_t *sptep_atomic; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) { 6762306a36Sopenharmony_ci sptep_atomic = (atomic64_t *)rcu_dereference(sptep); 6862306a36Sopenharmony_ci return (u64)atomic64_fetch_and(~mask, sptep_atomic); 6962306a36Sopenharmony_ci } 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask); 7262306a36Sopenharmony_ci return old_spte; 7362306a36Sopenharmony_ci} 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci/* 7662306a36Sopenharmony_ci * A TDP iterator performs a pre-order walk over a TDP paging structure. 7762306a36Sopenharmony_ci */ 7862306a36Sopenharmony_cistruct tdp_iter { 7962306a36Sopenharmony_ci /* 8062306a36Sopenharmony_ci * The iterator will traverse the paging structure towards the mapping 8162306a36Sopenharmony_ci * for this GFN. 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_ci gfn_t next_last_level_gfn; 8462306a36Sopenharmony_ci /* 8562306a36Sopenharmony_ci * The next_last_level_gfn at the time when the thread last 8662306a36Sopenharmony_ci * yielded. Only yielding when the next_last_level_gfn != 8762306a36Sopenharmony_ci * yielded_gfn helps ensure forward progress. 8862306a36Sopenharmony_ci */ 8962306a36Sopenharmony_ci gfn_t yielded_gfn; 9062306a36Sopenharmony_ci /* Pointers to the page tables traversed to reach the current SPTE */ 9162306a36Sopenharmony_ci tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL]; 9262306a36Sopenharmony_ci /* A pointer to the current SPTE */ 9362306a36Sopenharmony_ci tdp_ptep_t sptep; 9462306a36Sopenharmony_ci /* The lowest GFN mapped by the current SPTE */ 9562306a36Sopenharmony_ci gfn_t gfn; 9662306a36Sopenharmony_ci /* The level of the root page given to the iterator */ 9762306a36Sopenharmony_ci int root_level; 9862306a36Sopenharmony_ci /* The lowest level the iterator should traverse to */ 9962306a36Sopenharmony_ci int min_level; 10062306a36Sopenharmony_ci /* The iterator's current level within the paging structure */ 10162306a36Sopenharmony_ci int level; 10262306a36Sopenharmony_ci /* The address space ID, i.e. SMM vs. regular. */ 10362306a36Sopenharmony_ci int as_id; 10462306a36Sopenharmony_ci /* A snapshot of the value at sptep */ 10562306a36Sopenharmony_ci u64 old_spte; 10662306a36Sopenharmony_ci /* 10762306a36Sopenharmony_ci * Whether the iterator has a valid state. This will be false if the 10862306a36Sopenharmony_ci * iterator walks off the end of the paging structure. 10962306a36Sopenharmony_ci */ 11062306a36Sopenharmony_ci bool valid; 11162306a36Sopenharmony_ci /* 11262306a36Sopenharmony_ci * True if KVM dropped mmu_lock and yielded in the middle of a walk, in 11362306a36Sopenharmony_ci * which case tdp_iter_next() needs to restart the walk at the root 11462306a36Sopenharmony_ci * level instead of advancing to the next entry. 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_ci bool yielded; 11762306a36Sopenharmony_ci}; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci/* 12062306a36Sopenharmony_ci * Iterates over every SPTE mapping the GFN range [start, end) in a 12162306a36Sopenharmony_ci * preorder traversal. 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_ci#define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \ 12462306a36Sopenharmony_ci for (tdp_iter_start(&iter, root, min_level, start); \ 12562306a36Sopenharmony_ci iter.valid && iter.gfn < end; \ 12662306a36Sopenharmony_ci tdp_iter_next(&iter)) 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci#define for_each_tdp_pte(iter, root, start, end) \ 12962306a36Sopenharmony_ci for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_citdp_ptep_t spte_to_child_pt(u64 pte, int level); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_civoid tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, 13462306a36Sopenharmony_ci int min_level, gfn_t next_last_level_gfn); 13562306a36Sopenharmony_civoid tdp_iter_next(struct tdp_iter *iter); 13662306a36Sopenharmony_civoid tdp_iter_restart(struct tdp_iter *iter); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci#endif /* __KVM_X86_MMU_TDP_ITER_H */ 139