18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include "mmu.h" 48c2ecf20Sopenharmony_ci#include "mmu_internal.h" 58c2ecf20Sopenharmony_ci#include "mmutrace.h" 68c2ecf20Sopenharmony_ci#include "tdp_iter.h" 78c2ecf20Sopenharmony_ci#include "tdp_mmu.h" 88c2ecf20Sopenharmony_ci#include "spte.h" 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 118c2ecf20Sopenharmony_cistatic bool __read_mostly tdp_mmu_enabled = false; 128c2ecf20Sopenharmony_cimodule_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); 138c2ecf20Sopenharmony_ci#endif 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_cistatic bool is_tdp_mmu_enabled(void) 168c2ecf20Sopenharmony_ci{ 178c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 188c2ecf20Sopenharmony_ci return tdp_enabled && READ_ONCE(tdp_mmu_enabled); 198c2ecf20Sopenharmony_ci#else 208c2ecf20Sopenharmony_ci return false; 218c2ecf20Sopenharmony_ci#endif /* CONFIG_X86_64 */ 228c2ecf20Sopenharmony_ci} 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/* Initializes the TDP MMU for the VM, if enabled. */ 258c2ecf20Sopenharmony_civoid kvm_mmu_init_tdp_mmu(struct kvm *kvm) 268c2ecf20Sopenharmony_ci{ 278c2ecf20Sopenharmony_ci if (!is_tdp_mmu_enabled()) 288c2ecf20Sopenharmony_ci return; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci /* This should not be changed for the lifetime of the VM. */ 318c2ecf20Sopenharmony_ci kvm->arch.tdp_mmu_enabled = true; 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 348c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); 358c2ecf20Sopenharmony_ci} 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_civoid kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 388c2ecf20Sopenharmony_ci{ 398c2ecf20Sopenharmony_ci if (!kvm->arch.tdp_mmu_enabled) 408c2ecf20Sopenharmony_ci return; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) 468c2ecf20Sopenharmony_ci{ 478c2ecf20Sopenharmony_ci if (kvm_mmu_put_root(kvm, root)) 488c2ecf20Sopenharmony_ci kvm_tdp_mmu_free_root(kvm, root); 498c2ecf20Sopenharmony_ci} 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistatic inline bool tdp_mmu_next_root_valid(struct kvm *kvm, 528c2ecf20Sopenharmony_ci struct kvm_mmu_page *root) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci lockdep_assert_held(&kvm->mmu_lock); 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link)) 578c2ecf20Sopenharmony_ci return false; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci kvm_mmu_get_root(kvm, root); 608c2ecf20Sopenharmony_ci return true; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci} 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, 658c2ecf20Sopenharmony_ci struct kvm_mmu_page *root) 668c2ecf20Sopenharmony_ci{ 678c2ecf20Sopenharmony_ci struct kvm_mmu_page *next_root; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci next_root = list_next_entry(root, link); 708c2ecf20Sopenharmony_ci tdp_mmu_put_root(kvm, root); 718c2ecf20Sopenharmony_ci return next_root; 728c2ecf20Sopenharmony_ci} 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci/* 758c2ecf20Sopenharmony_ci * Note: this iterator gets and puts references to the roots it iterates over. 768c2ecf20Sopenharmony_ci * This makes it safe to release the MMU lock and yield within the loop, but 778c2ecf20Sopenharmony_ci * if exiting the loop early, the caller must drop the reference to the most 788c2ecf20Sopenharmony_ci * recent root. (Unless keeping a live reference is desirable.) 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ci#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ 818c2ecf20Sopenharmony_ci for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \ 828c2ecf20Sopenharmony_ci typeof(*_root), link); \ 838c2ecf20Sopenharmony_ci tdp_mmu_next_root_valid(_kvm, _root); \ 848c2ecf20Sopenharmony_ci _root = tdp_mmu_next_root(_kvm, _root)) 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci#define for_each_tdp_mmu_root(_kvm, _root) \ 878c2ecf20Sopenharmony_ci list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_cibool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) 908c2ecf20Sopenharmony_ci{ 918c2ecf20Sopenharmony_ci struct kvm_mmu_page *sp; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci if (!kvm->arch.tdp_mmu_enabled) 948c2ecf20Sopenharmony_ci return false; 958c2ecf20Sopenharmony_ci if (WARN_ON(!VALID_PAGE(hpa))) 968c2ecf20Sopenharmony_ci return false; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci sp = to_shadow_page(hpa); 998c2ecf20Sopenharmony_ci if (WARN_ON(!sp)) 1008c2ecf20Sopenharmony_ci return false; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci return sp->tdp_mmu_page && sp->root_count; 1038c2ecf20Sopenharmony_ci} 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_cistatic bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 1068c2ecf20Sopenharmony_ci gfn_t start, gfn_t end, bool can_yield, bool flush); 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_civoid kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) 1098c2ecf20Sopenharmony_ci{ 1108c2ecf20Sopenharmony_ci gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci lockdep_assert_held(&kvm->mmu_lock); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci WARN_ON(root->root_count); 1158c2ecf20Sopenharmony_ci WARN_ON(!root->tdp_mmu_page); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci list_del(&root->link); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci zap_gfn_range(kvm, root, 0, max_gfn, false, false); 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci free_page((unsigned long)root->spt); 1228c2ecf20Sopenharmony_ci kmem_cache_free(mmu_page_header_cache, root); 1238c2ecf20Sopenharmony_ci} 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_cistatic union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu, 1268c2ecf20Sopenharmony_ci int level) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci union kvm_mmu_page_role role; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci role = vcpu->arch.mmu->mmu_role.base; 1318c2ecf20Sopenharmony_ci role.level = level; 1328c2ecf20Sopenharmony_ci role.direct = true; 1338c2ecf20Sopenharmony_ci role.gpte_is_8_bytes = true; 1348c2ecf20Sopenharmony_ci role.access = ACC_ALL; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci return role; 1378c2ecf20Sopenharmony_ci} 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_cistatic struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn, 1408c2ecf20Sopenharmony_ci int level) 1418c2ecf20Sopenharmony_ci{ 1428c2ecf20Sopenharmony_ci struct kvm_mmu_page *sp; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 1458c2ecf20Sopenharmony_ci sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); 1468c2ecf20Sopenharmony_ci set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci sp->role.word = page_role_for_level(vcpu, level).word; 1498c2ecf20Sopenharmony_ci sp->gfn = gfn; 1508c2ecf20Sopenharmony_ci sp->tdp_mmu_page = true; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci return sp; 1538c2ecf20Sopenharmony_ci} 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_cistatic struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu) 1568c2ecf20Sopenharmony_ci{ 1578c2ecf20Sopenharmony_ci union kvm_mmu_page_role role; 1588c2ecf20Sopenharmony_ci struct kvm *kvm = vcpu->kvm; 1598c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci /* Check for an existing root before allocating a new one. */ 1668c2ecf20Sopenharmony_ci for_each_tdp_mmu_root(kvm, root) { 1678c2ecf20Sopenharmony_ci if (root->role.word == role.word) { 1688c2ecf20Sopenharmony_ci kvm_mmu_get_root(kvm, root); 1698c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 1708c2ecf20Sopenharmony_ci return root; 1718c2ecf20Sopenharmony_ci } 1728c2ecf20Sopenharmony_ci } 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level); 1758c2ecf20Sopenharmony_ci root->root_count = 1; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci list_add(&root->link, &kvm->arch.tdp_mmu_roots); 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci return root; 1828c2ecf20Sopenharmony_ci} 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_cihpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) 1858c2ecf20Sopenharmony_ci{ 1868c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci root = get_tdp_mmu_vcpu_root(vcpu); 1898c2ecf20Sopenharmony_ci if (!root) 1908c2ecf20Sopenharmony_ci return INVALID_PAGE; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci return __pa(root->spt); 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_cistatic void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 1968c2ecf20Sopenharmony_ci u64 old_spte, u64 new_spte, int level); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_cistatic int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci return sp->role.smm ? 1 : 0; 2018c2ecf20Sopenharmony_ci} 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_cistatic void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) 2048c2ecf20Sopenharmony_ci{ 2058c2ecf20Sopenharmony_ci bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level)) 2088c2ecf20Sopenharmony_ci return; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci if (is_accessed_spte(old_spte) && 2118c2ecf20Sopenharmony_ci (!is_accessed_spte(new_spte) || pfn_changed)) 2128c2ecf20Sopenharmony_ci kvm_set_pfn_accessed(spte_to_pfn(old_spte)); 2138c2ecf20Sopenharmony_ci} 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_cistatic void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, 2168c2ecf20Sopenharmony_ci u64 old_spte, u64 new_spte, int level) 2178c2ecf20Sopenharmony_ci{ 2188c2ecf20Sopenharmony_ci bool pfn_changed; 2198c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci if (level > PG_LEVEL_4K) 2228c2ecf20Sopenharmony_ci return; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci if ((!is_writable_pte(old_spte) || pfn_changed) && 2278c2ecf20Sopenharmony_ci is_writable_pte(new_spte)) { 2288c2ecf20Sopenharmony_ci slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); 2298c2ecf20Sopenharmony_ci mark_page_dirty_in_slot(slot, gfn); 2308c2ecf20Sopenharmony_ci } 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci/** 2348c2ecf20Sopenharmony_ci * handle_changed_spte - handle bookkeeping associated with an SPTE change 2358c2ecf20Sopenharmony_ci * @kvm: kvm instance 2368c2ecf20Sopenharmony_ci * @as_id: the address space of the paging structure the SPTE was a part of 2378c2ecf20Sopenharmony_ci * @gfn: the base GFN that was mapped by the SPTE 2388c2ecf20Sopenharmony_ci * @old_spte: The value of the SPTE before the change 2398c2ecf20Sopenharmony_ci * @new_spte: The value of the SPTE after the change 2408c2ecf20Sopenharmony_ci * @level: the level of the PT the SPTE is part of in the paging structure 2418c2ecf20Sopenharmony_ci * 2428c2ecf20Sopenharmony_ci * Handle bookkeeping that might result from the modification of a SPTE. 2438c2ecf20Sopenharmony_ci * This function must be called for all TDP SPTE modifications. 2448c2ecf20Sopenharmony_ci */ 2458c2ecf20Sopenharmony_cistatic void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 2468c2ecf20Sopenharmony_ci u64 old_spte, u64 new_spte, int level) 2478c2ecf20Sopenharmony_ci{ 2488c2ecf20Sopenharmony_ci bool was_present = is_shadow_present_pte(old_spte); 2498c2ecf20Sopenharmony_ci bool is_present = is_shadow_present_pte(new_spte); 2508c2ecf20Sopenharmony_ci bool was_leaf = was_present && is_last_spte(old_spte, level); 2518c2ecf20Sopenharmony_ci bool is_leaf = is_present && is_last_spte(new_spte, level); 2528c2ecf20Sopenharmony_ci bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); 2538c2ecf20Sopenharmony_ci u64 *pt; 2548c2ecf20Sopenharmony_ci struct kvm_mmu_page *sp; 2558c2ecf20Sopenharmony_ci u64 old_child_spte; 2568c2ecf20Sopenharmony_ci int i; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci WARN_ON(level > PT64_ROOT_MAX_LEVEL); 2598c2ecf20Sopenharmony_ci WARN_ON(level < PG_LEVEL_4K); 2608c2ecf20Sopenharmony_ci WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci /* 2638c2ecf20Sopenharmony_ci * If this warning were to trigger it would indicate that there was a 2648c2ecf20Sopenharmony_ci * missing MMU notifier or a race with some notifier handler. 2658c2ecf20Sopenharmony_ci * A present, leaf SPTE should never be directly replaced with another 2668c2ecf20Sopenharmony_ci * present leaf SPTE pointing to a differnt PFN. A notifier handler 2678c2ecf20Sopenharmony_ci * should be zapping the SPTE before the main MM's page table is 2688c2ecf20Sopenharmony_ci * changed, or the SPTE should be zeroed, and the TLBs flushed by the 2698c2ecf20Sopenharmony_ci * thread before replacement. 2708c2ecf20Sopenharmony_ci */ 2718c2ecf20Sopenharmony_ci if (was_leaf && is_leaf && pfn_changed) { 2728c2ecf20Sopenharmony_ci pr_err("Invalid SPTE change: cannot replace a present leaf\n" 2738c2ecf20Sopenharmony_ci "SPTE with another present leaf SPTE mapping a\n" 2748c2ecf20Sopenharmony_ci "different PFN!\n" 2758c2ecf20Sopenharmony_ci "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 2768c2ecf20Sopenharmony_ci as_id, gfn, old_spte, new_spte, level); 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci /* 2798c2ecf20Sopenharmony_ci * Crash the host to prevent error propagation and guest data 2808c2ecf20Sopenharmony_ci * courruption. 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_ci BUG(); 2838c2ecf20Sopenharmony_ci } 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci if (old_spte == new_spte) 2868c2ecf20Sopenharmony_ci return; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci /* 2898c2ecf20Sopenharmony_ci * The only times a SPTE should be changed from a non-present to 2908c2ecf20Sopenharmony_ci * non-present state is when an MMIO entry is installed/modified/ 2918c2ecf20Sopenharmony_ci * removed. In that case, there is nothing to do here. 2928c2ecf20Sopenharmony_ci */ 2938c2ecf20Sopenharmony_ci if (!was_present && !is_present) { 2948c2ecf20Sopenharmony_ci /* 2958c2ecf20Sopenharmony_ci * If this change does not involve a MMIO SPTE, it is 2968c2ecf20Sopenharmony_ci * unexpected. Log the change, though it should not impact the 2978c2ecf20Sopenharmony_ci * guest since both the former and current SPTEs are nonpresent. 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_ci if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte))) 3008c2ecf20Sopenharmony_ci pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" 3018c2ecf20Sopenharmony_ci "should not be replaced with another,\n" 3028c2ecf20Sopenharmony_ci "different nonpresent SPTE, unless one or both\n" 3038c2ecf20Sopenharmony_ci "are MMIO SPTEs.\n" 3048c2ecf20Sopenharmony_ci "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", 3058c2ecf20Sopenharmony_ci as_id, gfn, old_spte, new_spte, level); 3068c2ecf20Sopenharmony_ci return; 3078c2ecf20Sopenharmony_ci } 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci if (was_leaf && is_dirty_spte(old_spte) && 3118c2ecf20Sopenharmony_ci (!is_dirty_spte(new_spte) || pfn_changed)) 3128c2ecf20Sopenharmony_ci kvm_set_pfn_dirty(spte_to_pfn(old_spte)); 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci /* 3158c2ecf20Sopenharmony_ci * Recursively handle child PTs if the change removed a subtree from 3168c2ecf20Sopenharmony_ci * the paging structure. 3178c2ecf20Sopenharmony_ci */ 3188c2ecf20Sopenharmony_ci if (was_present && !was_leaf && (pfn_changed || !is_present)) { 3198c2ecf20Sopenharmony_ci pt = spte_to_child_pt(old_spte, level); 3208c2ecf20Sopenharmony_ci sp = sptep_to_sp(pt); 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci list_del(&sp->link); 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci if (sp->lpage_disallowed) 3258c2ecf20Sopenharmony_ci unaccount_huge_nx_page(kvm, sp); 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 3288c2ecf20Sopenharmony_ci old_child_spte = READ_ONCE(*(pt + i)); 3298c2ecf20Sopenharmony_ci WRITE_ONCE(*(pt + i), 0); 3308c2ecf20Sopenharmony_ci handle_changed_spte(kvm, as_id, 3318c2ecf20Sopenharmony_ci gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), 3328c2ecf20Sopenharmony_ci old_child_spte, 0, level - 1); 3338c2ecf20Sopenharmony_ci } 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs_with_address(kvm, gfn, 3368c2ecf20Sopenharmony_ci KVM_PAGES_PER_HPAGE(level)); 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci free_page((unsigned long)pt); 3398c2ecf20Sopenharmony_ci kmem_cache_free(mmu_page_header_cache, sp); 3408c2ecf20Sopenharmony_ci } 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_cistatic void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, 3448c2ecf20Sopenharmony_ci u64 old_spte, u64 new_spte, int level) 3458c2ecf20Sopenharmony_ci{ 3468c2ecf20Sopenharmony_ci __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); 3478c2ecf20Sopenharmony_ci handle_changed_spte_acc_track(old_spte, new_spte, level); 3488c2ecf20Sopenharmony_ci handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, 3498c2ecf20Sopenharmony_ci new_spte, level); 3508c2ecf20Sopenharmony_ci} 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_cistatic inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 3538c2ecf20Sopenharmony_ci u64 new_spte, bool record_acc_track, 3548c2ecf20Sopenharmony_ci bool record_dirty_log) 3558c2ecf20Sopenharmony_ci{ 3568c2ecf20Sopenharmony_ci u64 *root_pt = tdp_iter_root_pt(iter); 3578c2ecf20Sopenharmony_ci struct kvm_mmu_page *root = sptep_to_sp(root_pt); 3588c2ecf20Sopenharmony_ci int as_id = kvm_mmu_page_as_id(root); 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci WRITE_ONCE(*iter->sptep, new_spte); 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 3638c2ecf20Sopenharmony_ci iter->level); 3648c2ecf20Sopenharmony_ci if (record_acc_track) 3658c2ecf20Sopenharmony_ci handle_changed_spte_acc_track(iter->old_spte, new_spte, 3668c2ecf20Sopenharmony_ci iter->level); 3678c2ecf20Sopenharmony_ci if (record_dirty_log) 3688c2ecf20Sopenharmony_ci handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, 3698c2ecf20Sopenharmony_ci iter->old_spte, new_spte, 3708c2ecf20Sopenharmony_ci iter->level); 3718c2ecf20Sopenharmony_ci} 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_cistatic inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, 3748c2ecf20Sopenharmony_ci u64 new_spte) 3758c2ecf20Sopenharmony_ci{ 3768c2ecf20Sopenharmony_ci __tdp_mmu_set_spte(kvm, iter, new_spte, true, true); 3778c2ecf20Sopenharmony_ci} 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_cistatic inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm, 3808c2ecf20Sopenharmony_ci struct tdp_iter *iter, 3818c2ecf20Sopenharmony_ci u64 new_spte) 3828c2ecf20Sopenharmony_ci{ 3838c2ecf20Sopenharmony_ci __tdp_mmu_set_spte(kvm, iter, new_spte, false, true); 3848c2ecf20Sopenharmony_ci} 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_cistatic inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm, 3878c2ecf20Sopenharmony_ci struct tdp_iter *iter, 3888c2ecf20Sopenharmony_ci u64 new_spte) 3898c2ecf20Sopenharmony_ci{ 3908c2ecf20Sopenharmony_ci __tdp_mmu_set_spte(kvm, iter, new_spte, true, false); 3918c2ecf20Sopenharmony_ci} 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci#define tdp_root_for_each_pte(_iter, _root, _start, _end) \ 3948c2ecf20Sopenharmony_ci for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end) 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci#define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \ 3978c2ecf20Sopenharmony_ci tdp_root_for_each_pte(_iter, _root, _start, _end) \ 3988c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(_iter.old_spte) || \ 3998c2ecf20Sopenharmony_ci !is_last_spte(_iter.old_spte, _iter.level)) \ 4008c2ecf20Sopenharmony_ci continue; \ 4018c2ecf20Sopenharmony_ci else 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \ 4048c2ecf20Sopenharmony_ci for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \ 4058c2ecf20Sopenharmony_ci _mmu->shadow_root_level, _start, _end) 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci/* 4088c2ecf20Sopenharmony_ci * Yield if the MMU lock is contended or this thread needs to return control 4098c2ecf20Sopenharmony_ci * to the scheduler. 4108c2ecf20Sopenharmony_ci * 4118c2ecf20Sopenharmony_ci * If this function should yield and flush is set, it will perform a remote 4128c2ecf20Sopenharmony_ci * TLB flush before yielding. 4138c2ecf20Sopenharmony_ci * 4148c2ecf20Sopenharmony_ci * If this function yields, it will also reset the tdp_iter's walk over the 4158c2ecf20Sopenharmony_ci * paging structure and the calling function should skip to the next 4168c2ecf20Sopenharmony_ci * iteration to allow the iterator to continue its traversal from the 4178c2ecf20Sopenharmony_ci * paging structure root. 4188c2ecf20Sopenharmony_ci * 4198c2ecf20Sopenharmony_ci * Return true if this function yielded and the iterator's traversal was reset. 4208c2ecf20Sopenharmony_ci * Return false if a yield was not needed. 4218c2ecf20Sopenharmony_ci */ 4228c2ecf20Sopenharmony_cistatic inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, 4238c2ecf20Sopenharmony_ci struct tdp_iter *iter, bool flush) 4248c2ecf20Sopenharmony_ci{ 4258c2ecf20Sopenharmony_ci /* Ensure forward progress has been made before yielding. */ 4268c2ecf20Sopenharmony_ci if (iter->next_last_level_gfn == iter->yielded_gfn) 4278c2ecf20Sopenharmony_ci return false; 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 4308c2ecf20Sopenharmony_ci if (flush) 4318c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci cond_resched_lock(&kvm->mmu_lock); 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci WARN_ON(iter->gfn > iter->next_last_level_gfn); 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], 4388c2ecf20Sopenharmony_ci iter->root_level, iter->min_level, 4398c2ecf20Sopenharmony_ci iter->next_last_level_gfn); 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci return true; 4428c2ecf20Sopenharmony_ci } 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci return false; 4458c2ecf20Sopenharmony_ci} 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci/* 4488c2ecf20Sopenharmony_ci * Tears down the mappings for the range of gfns, [start, end), and frees the 4498c2ecf20Sopenharmony_ci * non-root pages mapping GFNs strictly within that range. Returns true if 4508c2ecf20Sopenharmony_ci * SPTEs have been cleared and a TLB flush is needed before releasing the 4518c2ecf20Sopenharmony_ci * MMU lock. 4528c2ecf20Sopenharmony_ci * If can_yield is true, will release the MMU lock and reschedule if the 4538c2ecf20Sopenharmony_ci * scheduler needs the CPU or there is contention on the MMU lock. If this 4548c2ecf20Sopenharmony_ci * function cannot yield, it will not release the MMU lock or reschedule and 4558c2ecf20Sopenharmony_ci * the caller must ensure it does not supply too large a GFN range, or the 4568c2ecf20Sopenharmony_ci * operation can cause a soft lockup. Note, in some use cases a flush may be 4578c2ecf20Sopenharmony_ci * required by prior actions. Ensure the pending flush is performed prior to 4588c2ecf20Sopenharmony_ci * yielding. 4598c2ecf20Sopenharmony_ci */ 4608c2ecf20Sopenharmony_cistatic bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 4618c2ecf20Sopenharmony_ci gfn_t start, gfn_t end, bool can_yield, bool flush) 4628c2ecf20Sopenharmony_ci{ 4638c2ecf20Sopenharmony_ci struct tdp_iter iter; 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci tdp_root_for_each_pte(iter, root, start, end) { 4668c2ecf20Sopenharmony_ci if (can_yield && 4678c2ecf20Sopenharmony_ci tdp_mmu_iter_cond_resched(kvm, &iter, flush)) { 4688c2ecf20Sopenharmony_ci flush = false; 4698c2ecf20Sopenharmony_ci continue; 4708c2ecf20Sopenharmony_ci } 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte)) 4738c2ecf20Sopenharmony_ci continue; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci /* 4768c2ecf20Sopenharmony_ci * If this is a non-last-level SPTE that covers a larger range 4778c2ecf20Sopenharmony_ci * than should be zapped, continue, and zap the mappings at a 4788c2ecf20Sopenharmony_ci * lower level. 4798c2ecf20Sopenharmony_ci */ 4808c2ecf20Sopenharmony_ci if ((iter.gfn < start || 4818c2ecf20Sopenharmony_ci iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && 4828c2ecf20Sopenharmony_ci !is_last_spte(iter.old_spte, iter.level)) 4838c2ecf20Sopenharmony_ci continue; 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci tdp_mmu_set_spte(kvm, &iter, 0); 4868c2ecf20Sopenharmony_ci flush = true; 4878c2ecf20Sopenharmony_ci } 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci return flush; 4908c2ecf20Sopenharmony_ci} 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci/* 4938c2ecf20Sopenharmony_ci * Tears down the mappings for the range of gfns, [start, end), and frees the 4948c2ecf20Sopenharmony_ci * non-root pages mapping GFNs strictly within that range. Returns true if 4958c2ecf20Sopenharmony_ci * SPTEs have been cleared and a TLB flush is needed before releasing the 4968c2ecf20Sopenharmony_ci * MMU lock. 4978c2ecf20Sopenharmony_ci */ 4988c2ecf20Sopenharmony_cibool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end, 4998c2ecf20Sopenharmony_ci bool can_yield) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 5028c2ecf20Sopenharmony_ci bool flush = false; 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci for_each_tdp_mmu_root_yield_safe(kvm, root) 5058c2ecf20Sopenharmony_ci flush = zap_gfn_range(kvm, root, start, end, can_yield, flush); 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci return flush; 5088c2ecf20Sopenharmony_ci} 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_civoid kvm_tdp_mmu_zap_all(struct kvm *kvm) 5118c2ecf20Sopenharmony_ci{ 5128c2ecf20Sopenharmony_ci gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); 5138c2ecf20Sopenharmony_ci bool flush; 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); 5168c2ecf20Sopenharmony_ci if (flush) 5178c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 5188c2ecf20Sopenharmony_ci} 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci/* 5218c2ecf20Sopenharmony_ci * Installs a last-level SPTE to handle a TDP page fault. 5228c2ecf20Sopenharmony_ci * (NPT/EPT violation/misconfiguration) 5238c2ecf20Sopenharmony_ci */ 5248c2ecf20Sopenharmony_cistatic int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, 5258c2ecf20Sopenharmony_ci int map_writable, 5268c2ecf20Sopenharmony_ci struct tdp_iter *iter, 5278c2ecf20Sopenharmony_ci kvm_pfn_t pfn, bool prefault) 5288c2ecf20Sopenharmony_ci{ 5298c2ecf20Sopenharmony_ci u64 new_spte; 5308c2ecf20Sopenharmony_ci int ret = RET_PF_FIXED; 5318c2ecf20Sopenharmony_ci int make_spte_ret = 0; 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci if (unlikely(is_noslot_pfn(pfn))) { 5348c2ecf20Sopenharmony_ci new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 5358c2ecf20Sopenharmony_ci trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte); 5368c2ecf20Sopenharmony_ci } else 5378c2ecf20Sopenharmony_ci make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, 5388c2ecf20Sopenharmony_ci pfn, iter->old_spte, prefault, true, 5398c2ecf20Sopenharmony_ci map_writable, !shadow_accessed_mask, 5408c2ecf20Sopenharmony_ci &new_spte); 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci if (new_spte == iter->old_spte) 5438c2ecf20Sopenharmony_ci ret = RET_PF_SPURIOUS; 5448c2ecf20Sopenharmony_ci else 5458c2ecf20Sopenharmony_ci tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci /* 5488c2ecf20Sopenharmony_ci * If the page fault was caused by a write but the page is write 5498c2ecf20Sopenharmony_ci * protected, emulation is needed. If the emulation was skipped, 5508c2ecf20Sopenharmony_ci * the vCPU would have the same fault again. 5518c2ecf20Sopenharmony_ci */ 5528c2ecf20Sopenharmony_ci if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { 5538c2ecf20Sopenharmony_ci if (write) 5548c2ecf20Sopenharmony_ci ret = RET_PF_EMULATE; 5558c2ecf20Sopenharmony_ci kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 5568c2ecf20Sopenharmony_ci } 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ 5598c2ecf20Sopenharmony_ci if (unlikely(is_mmio_spte(new_spte))) 5608c2ecf20Sopenharmony_ci ret = RET_PF_EMULATE; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep); 5638c2ecf20Sopenharmony_ci if (!prefault) 5648c2ecf20Sopenharmony_ci vcpu->stat.pf_fixed++; 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci return ret; 5678c2ecf20Sopenharmony_ci} 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci/* 5708c2ecf20Sopenharmony_ci * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing 5718c2ecf20Sopenharmony_ci * page tables and SPTEs to translate the faulting guest physical address. 5728c2ecf20Sopenharmony_ci */ 5738c2ecf20Sopenharmony_ciint kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, 5748c2ecf20Sopenharmony_ci int map_writable, int max_level, kvm_pfn_t pfn, 5758c2ecf20Sopenharmony_ci bool prefault) 5768c2ecf20Sopenharmony_ci{ 5778c2ecf20Sopenharmony_ci bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(); 5788c2ecf20Sopenharmony_ci bool write = error_code & PFERR_WRITE_MASK; 5798c2ecf20Sopenharmony_ci bool exec = error_code & PFERR_FETCH_MASK; 5808c2ecf20Sopenharmony_ci bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; 5818c2ecf20Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.mmu; 5828c2ecf20Sopenharmony_ci struct tdp_iter iter; 5838c2ecf20Sopenharmony_ci struct kvm_mmu_page *sp; 5848c2ecf20Sopenharmony_ci u64 *child_pt; 5858c2ecf20Sopenharmony_ci u64 new_spte; 5868c2ecf20Sopenharmony_ci int ret; 5878c2ecf20Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 5888c2ecf20Sopenharmony_ci int level; 5898c2ecf20Sopenharmony_ci int req_level; 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) 5928c2ecf20Sopenharmony_ci return RET_PF_RETRY; 5938c2ecf20Sopenharmony_ci if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))) 5948c2ecf20Sopenharmony_ci return RET_PF_RETRY; 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn, 5978c2ecf20Sopenharmony_ci huge_page_disallowed, &req_level); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci trace_kvm_mmu_spte_requested(gpa, level, pfn); 6008c2ecf20Sopenharmony_ci tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 6018c2ecf20Sopenharmony_ci if (nx_huge_page_workaround_enabled) 6028c2ecf20Sopenharmony_ci disallowed_hugepage_adjust(iter.old_spte, gfn, 6038c2ecf20Sopenharmony_ci iter.level, &pfn, &level); 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci if (iter.level == level) 6068c2ecf20Sopenharmony_ci break; 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci /* 6098c2ecf20Sopenharmony_ci * If there is an SPTE mapping a large page at a higher level 6108c2ecf20Sopenharmony_ci * than the target, that SPTE must be cleared and replaced 6118c2ecf20Sopenharmony_ci * with a non-leaf SPTE. 6128c2ecf20Sopenharmony_ci */ 6138c2ecf20Sopenharmony_ci if (is_shadow_present_pte(iter.old_spte) && 6148c2ecf20Sopenharmony_ci is_large_pte(iter.old_spte)) { 6158c2ecf20Sopenharmony_ci tdp_mmu_set_spte(vcpu->kvm, &iter, 0); 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, 6188c2ecf20Sopenharmony_ci KVM_PAGES_PER_HPAGE(iter.level)); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci /* 6218c2ecf20Sopenharmony_ci * The iter must explicitly re-read the spte here 6228c2ecf20Sopenharmony_ci * because the new value informs the !present 6238c2ecf20Sopenharmony_ci * path below. 6248c2ecf20Sopenharmony_ci */ 6258c2ecf20Sopenharmony_ci iter.old_spte = READ_ONCE(*iter.sptep); 6268c2ecf20Sopenharmony_ci } 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte)) { 6298c2ecf20Sopenharmony_ci sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); 6308c2ecf20Sopenharmony_ci list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages); 6318c2ecf20Sopenharmony_ci child_pt = sp->spt; 6328c2ecf20Sopenharmony_ci clear_page(child_pt); 6338c2ecf20Sopenharmony_ci new_spte = make_nonleaf_spte(child_pt, 6348c2ecf20Sopenharmony_ci !shadow_accessed_mask); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci trace_kvm_mmu_get_page(sp, true); 6378c2ecf20Sopenharmony_ci if (huge_page_disallowed && req_level >= iter.level) 6388c2ecf20Sopenharmony_ci account_huge_nx_page(vcpu->kvm, sp); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); 6418c2ecf20Sopenharmony_ci } 6428c2ecf20Sopenharmony_ci } 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci if (WARN_ON(iter.level != level)) 6458c2ecf20Sopenharmony_ci return RET_PF_RETRY; 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter, 6488c2ecf20Sopenharmony_ci pfn, prefault); 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci return ret; 6518c2ecf20Sopenharmony_ci} 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_cistatic int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, 6548c2ecf20Sopenharmony_ci unsigned long end, unsigned long data, 6558c2ecf20Sopenharmony_ci int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, 6568c2ecf20Sopenharmony_ci struct kvm_mmu_page *root, gfn_t start, 6578c2ecf20Sopenharmony_ci gfn_t end, unsigned long data)) 6588c2ecf20Sopenharmony_ci{ 6598c2ecf20Sopenharmony_ci struct kvm_memslots *slots; 6608c2ecf20Sopenharmony_ci struct kvm_memory_slot *memslot; 6618c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 6628c2ecf20Sopenharmony_ci int ret = 0; 6638c2ecf20Sopenharmony_ci int as_id; 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci for_each_tdp_mmu_root_yield_safe(kvm, root) { 6668c2ecf20Sopenharmony_ci as_id = kvm_mmu_page_as_id(root); 6678c2ecf20Sopenharmony_ci slots = __kvm_memslots(kvm, as_id); 6688c2ecf20Sopenharmony_ci kvm_for_each_memslot(memslot, slots) { 6698c2ecf20Sopenharmony_ci unsigned long hva_start, hva_end; 6708c2ecf20Sopenharmony_ci gfn_t gfn_start, gfn_end; 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci hva_start = max(start, memslot->userspace_addr); 6738c2ecf20Sopenharmony_ci hva_end = min(end, memslot->userspace_addr + 6748c2ecf20Sopenharmony_ci (memslot->npages << PAGE_SHIFT)); 6758c2ecf20Sopenharmony_ci if (hva_start >= hva_end) 6768c2ecf20Sopenharmony_ci continue; 6778c2ecf20Sopenharmony_ci /* 6788c2ecf20Sopenharmony_ci * {gfn(page) | page intersects with [hva_start, hva_end)} = 6798c2ecf20Sopenharmony_ci * {gfn_start, gfn_start+1, ..., gfn_end-1}. 6808c2ecf20Sopenharmony_ci */ 6818c2ecf20Sopenharmony_ci gfn_start = hva_to_gfn_memslot(hva_start, memslot); 6828c2ecf20Sopenharmony_ci gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci ret |= handler(kvm, memslot, root, gfn_start, 6858c2ecf20Sopenharmony_ci gfn_end, data); 6868c2ecf20Sopenharmony_ci } 6878c2ecf20Sopenharmony_ci } 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci return ret; 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_cistatic int zap_gfn_range_hva_wrapper(struct kvm *kvm, 6938c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, 6948c2ecf20Sopenharmony_ci struct kvm_mmu_page *root, gfn_t start, 6958c2ecf20Sopenharmony_ci gfn_t end, unsigned long unused) 6968c2ecf20Sopenharmony_ci{ 6978c2ecf20Sopenharmony_ci return zap_gfn_range(kvm, root, start, end, false, false); 6988c2ecf20Sopenharmony_ci} 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ciint kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, 7018c2ecf20Sopenharmony_ci unsigned long end) 7028c2ecf20Sopenharmony_ci{ 7038c2ecf20Sopenharmony_ci return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 7048c2ecf20Sopenharmony_ci zap_gfn_range_hva_wrapper); 7058c2ecf20Sopenharmony_ci} 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci/* 7088c2ecf20Sopenharmony_ci * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero 7098c2ecf20Sopenharmony_ci * if any of the GFNs in the range have been accessed. 7108c2ecf20Sopenharmony_ci */ 7118c2ecf20Sopenharmony_cistatic int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, 7128c2ecf20Sopenharmony_ci struct kvm_mmu_page *root, gfn_t start, gfn_t end, 7138c2ecf20Sopenharmony_ci unsigned long unused) 7148c2ecf20Sopenharmony_ci{ 7158c2ecf20Sopenharmony_ci struct tdp_iter iter; 7168c2ecf20Sopenharmony_ci int young = 0; 7178c2ecf20Sopenharmony_ci u64 new_spte = 0; 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci tdp_root_for_each_leaf_pte(iter, root, start, end) { 7208c2ecf20Sopenharmony_ci /* 7218c2ecf20Sopenharmony_ci * If we have a non-accessed entry we don't need to change the 7228c2ecf20Sopenharmony_ci * pte. 7238c2ecf20Sopenharmony_ci */ 7248c2ecf20Sopenharmony_ci if (!is_accessed_spte(iter.old_spte)) 7258c2ecf20Sopenharmony_ci continue; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci new_spte = iter.old_spte; 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci if (spte_ad_enabled(new_spte)) { 7308c2ecf20Sopenharmony_ci clear_bit((ffs(shadow_accessed_mask) - 1), 7318c2ecf20Sopenharmony_ci (unsigned long *)&new_spte); 7328c2ecf20Sopenharmony_ci } else { 7338c2ecf20Sopenharmony_ci /* 7348c2ecf20Sopenharmony_ci * Capture the dirty status of the page, so that it doesn't get 7358c2ecf20Sopenharmony_ci * lost when the SPTE is marked for access tracking. 7368c2ecf20Sopenharmony_ci */ 7378c2ecf20Sopenharmony_ci if (is_writable_pte(new_spte)) 7388c2ecf20Sopenharmony_ci kvm_set_pfn_dirty(spte_to_pfn(new_spte)); 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_ci new_spte = mark_spte_for_access_track(new_spte); 7418c2ecf20Sopenharmony_ci } 7428c2ecf20Sopenharmony_ci new_spte &= ~shadow_dirty_mask; 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte); 7458c2ecf20Sopenharmony_ci young = 1; 7468c2ecf20Sopenharmony_ci } 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci return young; 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ciint kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start, 7528c2ecf20Sopenharmony_ci unsigned long end) 7538c2ecf20Sopenharmony_ci{ 7548c2ecf20Sopenharmony_ci return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, 7558c2ecf20Sopenharmony_ci age_gfn_range); 7568c2ecf20Sopenharmony_ci} 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_cistatic int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 7598c2ecf20Sopenharmony_ci struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 7608c2ecf20Sopenharmony_ci unsigned long unused2) 7618c2ecf20Sopenharmony_ci{ 7628c2ecf20Sopenharmony_ci struct tdp_iter iter; 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) 7658c2ecf20Sopenharmony_ci if (is_accessed_spte(iter.old_spte)) 7668c2ecf20Sopenharmony_ci return 1; 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci return 0; 7698c2ecf20Sopenharmony_ci} 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ciint kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva) 7728c2ecf20Sopenharmony_ci{ 7738c2ecf20Sopenharmony_ci return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0, 7748c2ecf20Sopenharmony_ci test_age_gfn); 7758c2ecf20Sopenharmony_ci} 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci/* 7788c2ecf20Sopenharmony_ci * Handle the changed_pte MMU notifier for the TDP MMU. 7798c2ecf20Sopenharmony_ci * data is a pointer to the new pte_t mapping the HVA specified by the MMU 7808c2ecf20Sopenharmony_ci * notifier. 7818c2ecf20Sopenharmony_ci * Returns non-zero if a flush is needed before releasing the MMU lock. 7828c2ecf20Sopenharmony_ci */ 7838c2ecf20Sopenharmony_cistatic int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot, 7848c2ecf20Sopenharmony_ci struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused, 7858c2ecf20Sopenharmony_ci unsigned long data) 7868c2ecf20Sopenharmony_ci{ 7878c2ecf20Sopenharmony_ci struct tdp_iter iter; 7888c2ecf20Sopenharmony_ci pte_t *ptep = (pte_t *)data; 7898c2ecf20Sopenharmony_ci kvm_pfn_t new_pfn; 7908c2ecf20Sopenharmony_ci u64 new_spte; 7918c2ecf20Sopenharmony_ci int need_flush = 0; 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_ci WARN_ON(pte_huge(*ptep)); 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci new_pfn = pte_pfn(*ptep); 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci tdp_root_for_each_pte(iter, root, gfn, gfn + 1) { 7988c2ecf20Sopenharmony_ci if (iter.level != PG_LEVEL_4K) 7998c2ecf20Sopenharmony_ci continue; 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte)) 8028c2ecf20Sopenharmony_ci break; 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci tdp_mmu_set_spte(kvm, &iter, 0); 8058c2ecf20Sopenharmony_ci 8068c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs_with_address(kvm, iter.gfn, 1); 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci if (!pte_write(*ptep)) { 8098c2ecf20Sopenharmony_ci new_spte = kvm_mmu_changed_pte_notifier_make_spte( 8108c2ecf20Sopenharmony_ci iter.old_spte, new_pfn); 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci tdp_mmu_set_spte(kvm, &iter, new_spte); 8138c2ecf20Sopenharmony_ci } 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci need_flush = 1; 8168c2ecf20Sopenharmony_ci } 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci if (need_flush) 8198c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci return 0; 8228c2ecf20Sopenharmony_ci} 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ciint kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address, 8258c2ecf20Sopenharmony_ci pte_t *host_ptep) 8268c2ecf20Sopenharmony_ci{ 8278c2ecf20Sopenharmony_ci return kvm_tdp_mmu_handle_hva_range(kvm, address, address + 1, 8288c2ecf20Sopenharmony_ci (unsigned long)host_ptep, 8298c2ecf20Sopenharmony_ci set_tdp_spte); 8308c2ecf20Sopenharmony_ci} 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci/* 8338c2ecf20Sopenharmony_ci * Remove write access from all the SPTEs mapping GFNs [start, end). If 8348c2ecf20Sopenharmony_ci * skip_4k is set, SPTEs that map 4k pages, will not be write-protected. 8358c2ecf20Sopenharmony_ci * Returns true if an SPTE has been changed and the TLBs need to be flushed. 8368c2ecf20Sopenharmony_ci */ 8378c2ecf20Sopenharmony_cistatic bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 8388c2ecf20Sopenharmony_ci gfn_t start, gfn_t end, int min_level) 8398c2ecf20Sopenharmony_ci{ 8408c2ecf20Sopenharmony_ci struct tdp_iter iter; 8418c2ecf20Sopenharmony_ci u64 new_spte; 8428c2ecf20Sopenharmony_ci bool spte_set = false; 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci for_each_tdp_pte_min_level(iter, root->spt, root->role.level, 8478c2ecf20Sopenharmony_ci min_level, start, end) { 8488c2ecf20Sopenharmony_ci if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) 8498c2ecf20Sopenharmony_ci continue; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte) || 8528c2ecf20Sopenharmony_ci !is_last_spte(iter.old_spte, iter.level)) 8538c2ecf20Sopenharmony_ci continue; 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ci tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 8588c2ecf20Sopenharmony_ci spte_set = true; 8598c2ecf20Sopenharmony_ci } 8608c2ecf20Sopenharmony_ci return spte_set; 8618c2ecf20Sopenharmony_ci} 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci/* 8648c2ecf20Sopenharmony_ci * Remove write access from all the SPTEs mapping GFNs in the memslot. Will 8658c2ecf20Sopenharmony_ci * only affect leaf SPTEs down to min_level. 8668c2ecf20Sopenharmony_ci * Returns true if an SPTE has been changed and the TLBs need to be flushed. 8678c2ecf20Sopenharmony_ci */ 8688c2ecf20Sopenharmony_cibool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, 8698c2ecf20Sopenharmony_ci int min_level) 8708c2ecf20Sopenharmony_ci{ 8718c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 8728c2ecf20Sopenharmony_ci int root_as_id; 8738c2ecf20Sopenharmony_ci bool spte_set = false; 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci for_each_tdp_mmu_root_yield_safe(kvm, root) { 8768c2ecf20Sopenharmony_ci root_as_id = kvm_mmu_page_as_id(root); 8778c2ecf20Sopenharmony_ci if (root_as_id != slot->as_id) 8788c2ecf20Sopenharmony_ci continue; 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, 8818c2ecf20Sopenharmony_ci slot->base_gfn + slot->npages, min_level); 8828c2ecf20Sopenharmony_ci } 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci return spte_set; 8858c2ecf20Sopenharmony_ci} 8868c2ecf20Sopenharmony_ci 8878c2ecf20Sopenharmony_ci/* 8888c2ecf20Sopenharmony_ci * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 8898c2ecf20Sopenharmony_ci * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 8908c2ecf20Sopenharmony_ci * If AD bits are not enabled, this will require clearing the writable bit on 8918c2ecf20Sopenharmony_ci * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 8928c2ecf20Sopenharmony_ci * be flushed. 8938c2ecf20Sopenharmony_ci */ 8948c2ecf20Sopenharmony_cistatic bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 8958c2ecf20Sopenharmony_ci gfn_t start, gfn_t end) 8968c2ecf20Sopenharmony_ci{ 8978c2ecf20Sopenharmony_ci struct tdp_iter iter; 8988c2ecf20Sopenharmony_ci u64 new_spte; 8998c2ecf20Sopenharmony_ci bool spte_set = false; 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci tdp_root_for_each_leaf_pte(iter, root, start, end) { 9028c2ecf20Sopenharmony_ci if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) 9038c2ecf20Sopenharmony_ci continue; 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte)) 9068c2ecf20Sopenharmony_ci continue; 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci if (spte_ad_need_write_protect(iter.old_spte)) { 9098c2ecf20Sopenharmony_ci if (is_writable_pte(iter.old_spte)) 9108c2ecf20Sopenharmony_ci new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 9118c2ecf20Sopenharmony_ci else 9128c2ecf20Sopenharmony_ci continue; 9138c2ecf20Sopenharmony_ci } else { 9148c2ecf20Sopenharmony_ci if (iter.old_spte & shadow_dirty_mask) 9158c2ecf20Sopenharmony_ci new_spte = iter.old_spte & ~shadow_dirty_mask; 9168c2ecf20Sopenharmony_ci else 9178c2ecf20Sopenharmony_ci continue; 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 9218c2ecf20Sopenharmony_ci spte_set = true; 9228c2ecf20Sopenharmony_ci } 9238c2ecf20Sopenharmony_ci return spte_set; 9248c2ecf20Sopenharmony_ci} 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci/* 9278c2ecf20Sopenharmony_ci * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If 9288c2ecf20Sopenharmony_ci * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. 9298c2ecf20Sopenharmony_ci * If AD bits are not enabled, this will require clearing the writable bit on 9308c2ecf20Sopenharmony_ci * each SPTE. Returns true if an SPTE has been changed and the TLBs need to 9318c2ecf20Sopenharmony_ci * be flushed. 9328c2ecf20Sopenharmony_ci */ 9338c2ecf20Sopenharmony_cibool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 9348c2ecf20Sopenharmony_ci{ 9358c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 9368c2ecf20Sopenharmony_ci int root_as_id; 9378c2ecf20Sopenharmony_ci bool spte_set = false; 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci for_each_tdp_mmu_root_yield_safe(kvm, root) { 9408c2ecf20Sopenharmony_ci root_as_id = kvm_mmu_page_as_id(root); 9418c2ecf20Sopenharmony_ci if (root_as_id != slot->as_id) 9428c2ecf20Sopenharmony_ci continue; 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, 9458c2ecf20Sopenharmony_ci slot->base_gfn + slot->npages); 9468c2ecf20Sopenharmony_ci } 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci return spte_set; 9498c2ecf20Sopenharmony_ci} 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci/* 9528c2ecf20Sopenharmony_ci * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 9538c2ecf20Sopenharmony_ci * set in mask, starting at gfn. The given memslot is expected to contain all 9548c2ecf20Sopenharmony_ci * the GFNs represented by set bits in the mask. If AD bits are enabled, 9558c2ecf20Sopenharmony_ci * clearing the dirty status will involve clearing the dirty bit on each SPTE 9568c2ecf20Sopenharmony_ci * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 9578c2ecf20Sopenharmony_ci */ 9588c2ecf20Sopenharmony_cistatic void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, 9598c2ecf20Sopenharmony_ci gfn_t gfn, unsigned long mask, bool wrprot) 9608c2ecf20Sopenharmony_ci{ 9618c2ecf20Sopenharmony_ci struct tdp_iter iter; 9628c2ecf20Sopenharmony_ci u64 new_spte; 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask), 9658c2ecf20Sopenharmony_ci gfn + BITS_PER_LONG) { 9668c2ecf20Sopenharmony_ci if (!mask) 9678c2ecf20Sopenharmony_ci break; 9688c2ecf20Sopenharmony_ci 9698c2ecf20Sopenharmony_ci if (iter.level > PG_LEVEL_4K || 9708c2ecf20Sopenharmony_ci !(mask & (1UL << (iter.gfn - gfn)))) 9718c2ecf20Sopenharmony_ci continue; 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci if (wrprot || spte_ad_need_write_protect(iter.old_spte)) { 9748c2ecf20Sopenharmony_ci if (is_writable_pte(iter.old_spte)) 9758c2ecf20Sopenharmony_ci new_spte = iter.old_spte & ~PT_WRITABLE_MASK; 9768c2ecf20Sopenharmony_ci else 9778c2ecf20Sopenharmony_ci continue; 9788c2ecf20Sopenharmony_ci } else { 9798c2ecf20Sopenharmony_ci if (iter.old_spte & shadow_dirty_mask) 9808c2ecf20Sopenharmony_ci new_spte = iter.old_spte & ~shadow_dirty_mask; 9818c2ecf20Sopenharmony_ci else 9828c2ecf20Sopenharmony_ci continue; 9838c2ecf20Sopenharmony_ci } 9848c2ecf20Sopenharmony_ci 9858c2ecf20Sopenharmony_ci tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci mask &= ~(1UL << (iter.gfn - gfn)); 9888c2ecf20Sopenharmony_ci } 9898c2ecf20Sopenharmony_ci} 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci/* 9928c2ecf20Sopenharmony_ci * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is 9938c2ecf20Sopenharmony_ci * set in mask, starting at gfn. The given memslot is expected to contain all 9948c2ecf20Sopenharmony_ci * the GFNs represented by set bits in the mask. If AD bits are enabled, 9958c2ecf20Sopenharmony_ci * clearing the dirty status will involve clearing the dirty bit on each SPTE 9968c2ecf20Sopenharmony_ci * or, if AD bits are not enabled, clearing the writable bit on each SPTE. 9978c2ecf20Sopenharmony_ci */ 9988c2ecf20Sopenharmony_civoid kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, 9998c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, 10008c2ecf20Sopenharmony_ci gfn_t gfn, unsigned long mask, 10018c2ecf20Sopenharmony_ci bool wrprot) 10028c2ecf20Sopenharmony_ci{ 10038c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 10048c2ecf20Sopenharmony_ci int root_as_id; 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci lockdep_assert_held(&kvm->mmu_lock); 10078c2ecf20Sopenharmony_ci for_each_tdp_mmu_root(kvm, root) { 10088c2ecf20Sopenharmony_ci root_as_id = kvm_mmu_page_as_id(root); 10098c2ecf20Sopenharmony_ci if (root_as_id != slot->as_id) 10108c2ecf20Sopenharmony_ci continue; 10118c2ecf20Sopenharmony_ci 10128c2ecf20Sopenharmony_ci clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot); 10138c2ecf20Sopenharmony_ci } 10148c2ecf20Sopenharmony_ci} 10158c2ecf20Sopenharmony_ci 10168c2ecf20Sopenharmony_ci/* 10178c2ecf20Sopenharmony_ci * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 10188c2ecf20Sopenharmony_ci * only used for PML, and so will involve setting the dirty bit on each SPTE. 10198c2ecf20Sopenharmony_ci * Returns true if an SPTE has been changed and the TLBs need to be flushed. 10208c2ecf20Sopenharmony_ci */ 10218c2ecf20Sopenharmony_cistatic bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, 10228c2ecf20Sopenharmony_ci gfn_t start, gfn_t end) 10238c2ecf20Sopenharmony_ci{ 10248c2ecf20Sopenharmony_ci struct tdp_iter iter; 10258c2ecf20Sopenharmony_ci u64 new_spte; 10268c2ecf20Sopenharmony_ci bool spte_set = false; 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci tdp_root_for_each_pte(iter, root, start, end) { 10298c2ecf20Sopenharmony_ci if (tdp_mmu_iter_cond_resched(kvm, &iter, false)) 10308c2ecf20Sopenharmony_ci continue; 10318c2ecf20Sopenharmony_ci 10328c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte)) 10338c2ecf20Sopenharmony_ci continue; 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_ci new_spte = iter.old_spte | shadow_dirty_mask; 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci tdp_mmu_set_spte(kvm, &iter, new_spte); 10388c2ecf20Sopenharmony_ci spte_set = true; 10398c2ecf20Sopenharmony_ci } 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci return spte_set; 10428c2ecf20Sopenharmony_ci} 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_ci/* 10458c2ecf20Sopenharmony_ci * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is 10468c2ecf20Sopenharmony_ci * only used for PML, and so will involve setting the dirty bit on each SPTE. 10478c2ecf20Sopenharmony_ci * Returns true if an SPTE has been changed and the TLBs need to be flushed. 10488c2ecf20Sopenharmony_ci */ 10498c2ecf20Sopenharmony_cibool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) 10508c2ecf20Sopenharmony_ci{ 10518c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 10528c2ecf20Sopenharmony_ci int root_as_id; 10538c2ecf20Sopenharmony_ci bool spte_set = false; 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci for_each_tdp_mmu_root_yield_safe(kvm, root) { 10568c2ecf20Sopenharmony_ci root_as_id = kvm_mmu_page_as_id(root); 10578c2ecf20Sopenharmony_ci if (root_as_id != slot->as_id) 10588c2ecf20Sopenharmony_ci continue; 10598c2ecf20Sopenharmony_ci 10608c2ecf20Sopenharmony_ci spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, 10618c2ecf20Sopenharmony_ci slot->base_gfn + slot->npages); 10628c2ecf20Sopenharmony_ci } 10638c2ecf20Sopenharmony_ci return spte_set; 10648c2ecf20Sopenharmony_ci} 10658c2ecf20Sopenharmony_ci 10668c2ecf20Sopenharmony_ci/* 10678c2ecf20Sopenharmony_ci * Clear leaf entries which could be replaced by large mappings, for 10688c2ecf20Sopenharmony_ci * GFNs within the slot. 10698c2ecf20Sopenharmony_ci */ 10708c2ecf20Sopenharmony_cistatic void zap_collapsible_spte_range(struct kvm *kvm, 10718c2ecf20Sopenharmony_ci struct kvm_mmu_page *root, 10728c2ecf20Sopenharmony_ci gfn_t start, gfn_t end) 10738c2ecf20Sopenharmony_ci{ 10748c2ecf20Sopenharmony_ci struct tdp_iter iter; 10758c2ecf20Sopenharmony_ci kvm_pfn_t pfn; 10768c2ecf20Sopenharmony_ci bool spte_set = false; 10778c2ecf20Sopenharmony_ci 10788c2ecf20Sopenharmony_ci tdp_root_for_each_pte(iter, root, start, end) { 10798c2ecf20Sopenharmony_ci if (tdp_mmu_iter_cond_resched(kvm, &iter, spte_set)) { 10808c2ecf20Sopenharmony_ci spte_set = false; 10818c2ecf20Sopenharmony_ci continue; 10828c2ecf20Sopenharmony_ci } 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci if (!is_shadow_present_pte(iter.old_spte) || 10858c2ecf20Sopenharmony_ci !is_last_spte(iter.old_spte, iter.level)) 10868c2ecf20Sopenharmony_ci continue; 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_ci pfn = spte_to_pfn(iter.old_spte); 10898c2ecf20Sopenharmony_ci if (kvm_is_reserved_pfn(pfn) || 10908c2ecf20Sopenharmony_ci (!PageCompound(pfn_to_page(pfn)) && 10918c2ecf20Sopenharmony_ci !kvm_is_zone_device_pfn(pfn))) 10928c2ecf20Sopenharmony_ci continue; 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci tdp_mmu_set_spte(kvm, &iter, 0); 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ci spte_set = true; 10978c2ecf20Sopenharmony_ci } 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci if (spte_set) 11008c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 11018c2ecf20Sopenharmony_ci} 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_ci/* 11048c2ecf20Sopenharmony_ci * Clear non-leaf entries (and free associated page tables) which could 11058c2ecf20Sopenharmony_ci * be replaced by large mappings, for GFNs within the slot. 11068c2ecf20Sopenharmony_ci */ 11078c2ecf20Sopenharmony_civoid kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, 11088c2ecf20Sopenharmony_ci const struct kvm_memory_slot *slot) 11098c2ecf20Sopenharmony_ci{ 11108c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 11118c2ecf20Sopenharmony_ci int root_as_id; 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ci for_each_tdp_mmu_root_yield_safe(kvm, root) { 11148c2ecf20Sopenharmony_ci root_as_id = kvm_mmu_page_as_id(root); 11158c2ecf20Sopenharmony_ci if (root_as_id != slot->as_id) 11168c2ecf20Sopenharmony_ci continue; 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci zap_collapsible_spte_range(kvm, root, slot->base_gfn, 11198c2ecf20Sopenharmony_ci slot->base_gfn + slot->npages); 11208c2ecf20Sopenharmony_ci } 11218c2ecf20Sopenharmony_ci} 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci/* 11248c2ecf20Sopenharmony_ci * Removes write access on the last level SPTE mapping this GFN and unsets the 11258c2ecf20Sopenharmony_ci * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 11268c2ecf20Sopenharmony_ci * Returns true if an SPTE was set and a TLB flush is needed. 11278c2ecf20Sopenharmony_ci */ 11288c2ecf20Sopenharmony_cistatic bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, 11298c2ecf20Sopenharmony_ci gfn_t gfn) 11308c2ecf20Sopenharmony_ci{ 11318c2ecf20Sopenharmony_ci struct tdp_iter iter; 11328c2ecf20Sopenharmony_ci u64 new_spte; 11338c2ecf20Sopenharmony_ci bool spte_set = false; 11348c2ecf20Sopenharmony_ci 11358c2ecf20Sopenharmony_ci tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) { 11368c2ecf20Sopenharmony_ci new_spte = iter.old_spte & 11378c2ecf20Sopenharmony_ci ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci if (new_spte == iter.old_spte) 11408c2ecf20Sopenharmony_ci break; 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci tdp_mmu_set_spte(kvm, &iter, new_spte); 11438c2ecf20Sopenharmony_ci spte_set = true; 11448c2ecf20Sopenharmony_ci } 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci return spte_set; 11478c2ecf20Sopenharmony_ci} 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci/* 11508c2ecf20Sopenharmony_ci * Removes write access on the last level SPTE mapping this GFN and unsets the 11518c2ecf20Sopenharmony_ci * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted. 11528c2ecf20Sopenharmony_ci * Returns true if an SPTE was set and a TLB flush is needed. 11538c2ecf20Sopenharmony_ci */ 11548c2ecf20Sopenharmony_cibool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, 11558c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, gfn_t gfn) 11568c2ecf20Sopenharmony_ci{ 11578c2ecf20Sopenharmony_ci struct kvm_mmu_page *root; 11588c2ecf20Sopenharmony_ci int root_as_id; 11598c2ecf20Sopenharmony_ci bool spte_set = false; 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_ci lockdep_assert_held(&kvm->mmu_lock); 11628c2ecf20Sopenharmony_ci for_each_tdp_mmu_root(kvm, root) { 11638c2ecf20Sopenharmony_ci root_as_id = kvm_mmu_page_as_id(root); 11648c2ecf20Sopenharmony_ci if (root_as_id != slot->as_id) 11658c2ecf20Sopenharmony_ci continue; 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci spte_set |= write_protect_gfn(kvm, root, gfn); 11688c2ecf20Sopenharmony_ci } 11698c2ecf20Sopenharmony_ci return spte_set; 11708c2ecf20Sopenharmony_ci} 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci/* 11738c2ecf20Sopenharmony_ci * Return the level of the lowest level SPTE added to sptes. 11748c2ecf20Sopenharmony_ci * That SPTE may be non-present. 11758c2ecf20Sopenharmony_ci */ 11768c2ecf20Sopenharmony_ciint kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, 11778c2ecf20Sopenharmony_ci int *root_level) 11788c2ecf20Sopenharmony_ci{ 11798c2ecf20Sopenharmony_ci struct tdp_iter iter; 11808c2ecf20Sopenharmony_ci struct kvm_mmu *mmu = vcpu->arch.mmu; 11818c2ecf20Sopenharmony_ci gfn_t gfn = addr >> PAGE_SHIFT; 11828c2ecf20Sopenharmony_ci int leaf = -1; 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci *root_level = vcpu->arch.mmu->shadow_root_level; 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { 11878c2ecf20Sopenharmony_ci leaf = iter.level; 11888c2ecf20Sopenharmony_ci sptes[leaf - 1] = iter.old_spte; 11898c2ecf20Sopenharmony_ci } 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci return leaf; 11928c2ecf20Sopenharmony_ci} 1193