162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Support KVM gust page tracking 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This feature allows us to track page access in guest. Currently, only 662306a36Sopenharmony_ci * write access is tracked. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright(C) 2015 Intel Corporation. 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Author: 1162306a36Sopenharmony_ci * Xiao Guangrong <guangrong.xiao@linux.intel.com> 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include <linux/lockdep.h> 1662306a36Sopenharmony_ci#include <linux/kvm_host.h> 1762306a36Sopenharmony_ci#include <linux/rculist.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include "mmu.h" 2062306a36Sopenharmony_ci#include "mmu_internal.h" 2162306a36Sopenharmony_ci#include "page_track.h" 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cibool kvm_page_track_write_tracking_enabled(struct kvm *kvm) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) || 2662306a36Sopenharmony_ci !tdp_enabled || kvm_shadow_root_allocated(kvm); 2762306a36Sopenharmony_ci} 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_civoid kvm_page_track_free_memslot(struct kvm_memory_slot *slot) 3062306a36Sopenharmony_ci{ 3162306a36Sopenharmony_ci kvfree(slot->arch.gfn_write_track); 3262306a36Sopenharmony_ci slot->arch.gfn_write_track = NULL; 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cistatic int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, 3662306a36Sopenharmony_ci unsigned long npages) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci const size_t size = sizeof(*slot->arch.gfn_write_track); 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci if (!slot->arch.gfn_write_track) 4162306a36Sopenharmony_ci slot->arch.gfn_write_track = __vcalloc(npages, size, 4262306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci return slot->arch.gfn_write_track ? 0 : -ENOMEM; 4562306a36Sopenharmony_ci} 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ciint kvm_page_track_create_memslot(struct kvm *kvm, 4862306a36Sopenharmony_ci struct kvm_memory_slot *slot, 4962306a36Sopenharmony_ci unsigned long npages) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci if (!kvm_page_track_write_tracking_enabled(kvm)) 5262306a36Sopenharmony_ci return 0; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci return __kvm_page_track_write_tracking_alloc(slot, npages); 5562306a36Sopenharmony_ci} 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ciint kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci return __kvm_page_track_write_tracking_alloc(slot, slot->npages); 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, 6362306a36Sopenharmony_ci short count) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci int index, val; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci val = slot->arch.gfn_write_track[index]; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) 7262306a36Sopenharmony_ci return; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci slot->arch.gfn_write_track[index] += count; 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_civoid __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 7862306a36Sopenharmony_ci gfn_t gfn) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci lockdep_assert_held_write(&kvm->mmu_lock); 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || 8362306a36Sopenharmony_ci srcu_read_lock_held(&kvm->srcu)); 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) 8662306a36Sopenharmony_ci return; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci update_gfn_write_track(slot, gfn, 1); 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci /* 9162306a36Sopenharmony_ci * new track stops large page mapping for the 9262306a36Sopenharmony_ci * tracked page. 9362306a36Sopenharmony_ci */ 9462306a36Sopenharmony_ci kvm_mmu_gfn_disallow_lpage(slot, gfn); 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) 9762306a36Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_civoid __kvm_write_track_remove_gfn(struct kvm *kvm, 10162306a36Sopenharmony_ci struct kvm_memory_slot *slot, gfn_t gfn) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci lockdep_assert_held_write(&kvm->mmu_lock); 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || 10662306a36Sopenharmony_ci srcu_read_lock_held(&kvm->srcu)); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) 10962306a36Sopenharmony_ci return; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci update_gfn_write_track(slot, gfn, -1); 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci /* 11462306a36Sopenharmony_ci * allow large page mapping for the tracked page 11562306a36Sopenharmony_ci * after the tracker is gone. 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_ci kvm_mmu_gfn_allow_lpage(slot, gfn); 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/* 12162306a36Sopenharmony_ci * check if the corresponding access on the specified guest page is tracked. 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_cibool kvm_gfn_is_write_tracked(struct kvm *kvm, 12462306a36Sopenharmony_ci const struct kvm_memory_slot *slot, gfn_t gfn) 12562306a36Sopenharmony_ci{ 12662306a36Sopenharmony_ci int index; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (!slot) 12962306a36Sopenharmony_ci return false; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci if (!kvm_page_track_write_tracking_enabled(kvm)) 13262306a36Sopenharmony_ci return false; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 13562306a36Sopenharmony_ci return !!READ_ONCE(slot->arch.gfn_write_track[index]); 13662306a36Sopenharmony_ci} 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING 13962306a36Sopenharmony_civoid kvm_page_track_cleanup(struct kvm *kvm) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci struct kvm_page_track_notifier_head *head; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci head = &kvm->arch.track_notifier_head; 14462306a36Sopenharmony_ci cleanup_srcu_struct(&head->track_srcu); 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ciint kvm_page_track_init(struct kvm *kvm) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci struct kvm_page_track_notifier_head *head; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci head = &kvm->arch.track_notifier_head; 15262306a36Sopenharmony_ci INIT_HLIST_HEAD(&head->track_notifier_list); 15362306a36Sopenharmony_ci return init_srcu_struct(&head->track_srcu); 15462306a36Sopenharmony_ci} 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci/* 15762306a36Sopenharmony_ci * register the notifier so that event interception for the tracked guest 15862306a36Sopenharmony_ci * pages can be received. 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_ciint kvm_page_track_register_notifier(struct kvm *kvm, 16162306a36Sopenharmony_ci struct kvm_page_track_notifier_node *n) 16262306a36Sopenharmony_ci{ 16362306a36Sopenharmony_ci struct kvm_page_track_notifier_head *head; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci if (!kvm || kvm->mm != current->mm) 16662306a36Sopenharmony_ci return -ESRCH; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci kvm_get_kvm(kvm); 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci head = &kvm->arch.track_notifier_head; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci write_lock(&kvm->mmu_lock); 17362306a36Sopenharmony_ci hlist_add_head_rcu(&n->node, &head->track_notifier_list); 17462306a36Sopenharmony_ci write_unlock(&kvm->mmu_lock); 17562306a36Sopenharmony_ci return 0; 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci/* 18062306a36Sopenharmony_ci * stop receiving the event interception. It is the opposed operation of 18162306a36Sopenharmony_ci * kvm_page_track_register_notifier(). 18262306a36Sopenharmony_ci */ 18362306a36Sopenharmony_civoid kvm_page_track_unregister_notifier(struct kvm *kvm, 18462306a36Sopenharmony_ci struct kvm_page_track_notifier_node *n) 18562306a36Sopenharmony_ci{ 18662306a36Sopenharmony_ci struct kvm_page_track_notifier_head *head; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci head = &kvm->arch.track_notifier_head; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci write_lock(&kvm->mmu_lock); 19162306a36Sopenharmony_ci hlist_del_rcu(&n->node); 19262306a36Sopenharmony_ci write_unlock(&kvm->mmu_lock); 19362306a36Sopenharmony_ci synchronize_srcu(&head->track_srcu); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci kvm_put_kvm(kvm); 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci/* 20062306a36Sopenharmony_ci * Notify the node that write access is intercepted and write emulation is 20162306a36Sopenharmony_ci * finished at this time. 20262306a36Sopenharmony_ci * 20362306a36Sopenharmony_ci * The node should figure out if the written page is the one that node is 20462306a36Sopenharmony_ci * interested in by itself. 20562306a36Sopenharmony_ci */ 20662306a36Sopenharmony_civoid __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci struct kvm_page_track_notifier_head *head; 20962306a36Sopenharmony_ci struct kvm_page_track_notifier_node *n; 21062306a36Sopenharmony_ci int idx; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci head = &kvm->arch.track_notifier_head; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci if (hlist_empty(&head->track_notifier_list)) 21562306a36Sopenharmony_ci return; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci idx = srcu_read_lock(&head->track_srcu); 21862306a36Sopenharmony_ci hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 21962306a36Sopenharmony_ci srcu_read_lock_held(&head->track_srcu)) 22062306a36Sopenharmony_ci if (n->track_write) 22162306a36Sopenharmony_ci n->track_write(gpa, new, bytes, n); 22262306a36Sopenharmony_ci srcu_read_unlock(&head->track_srcu, idx); 22362306a36Sopenharmony_ci} 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci/* 22662306a36Sopenharmony_ci * Notify external page track nodes that a memory region is being removed from 22762306a36Sopenharmony_ci * the VM, e.g. so that users can free any associated metadata. 22862306a36Sopenharmony_ci */ 22962306a36Sopenharmony_civoid kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci struct kvm_page_track_notifier_head *head; 23262306a36Sopenharmony_ci struct kvm_page_track_notifier_node *n; 23362306a36Sopenharmony_ci int idx; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci head = &kvm->arch.track_notifier_head; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci if (hlist_empty(&head->track_notifier_list)) 23862306a36Sopenharmony_ci return; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci idx = srcu_read_lock(&head->track_srcu); 24162306a36Sopenharmony_ci hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 24262306a36Sopenharmony_ci srcu_read_lock_held(&head->track_srcu)) 24362306a36Sopenharmony_ci if (n->track_remove_region) 24462306a36Sopenharmony_ci n->track_remove_region(slot->base_gfn, slot->npages, n); 24562306a36Sopenharmony_ci srcu_read_unlock(&head->track_srcu, idx); 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci/* 24962306a36Sopenharmony_ci * add guest page to the tracking pool so that corresponding access on that 25062306a36Sopenharmony_ci * page will be intercepted. 25162306a36Sopenharmony_ci * 25262306a36Sopenharmony_ci * @kvm: the guest instance we are interested in. 25362306a36Sopenharmony_ci * @gfn: the guest page. 25462306a36Sopenharmony_ci */ 25562306a36Sopenharmony_ciint kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci struct kvm_memory_slot *slot; 25862306a36Sopenharmony_ci int idx; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci slot = gfn_to_memslot(kvm, gfn); 26362306a36Sopenharmony_ci if (!slot) { 26462306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 26562306a36Sopenharmony_ci return -EINVAL; 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci write_lock(&kvm->mmu_lock); 26962306a36Sopenharmony_ci __kvm_write_track_add_gfn(kvm, slot, gfn); 27062306a36Sopenharmony_ci write_unlock(&kvm->mmu_lock); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci return 0; 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci/* 27962306a36Sopenharmony_ci * remove the guest page from the tracking pool which stops the interception 28062306a36Sopenharmony_ci * of corresponding access on that page. 28162306a36Sopenharmony_ci * 28262306a36Sopenharmony_ci * @kvm: the guest instance we are interested in. 28362306a36Sopenharmony_ci * @gfn: the guest page. 28462306a36Sopenharmony_ci */ 28562306a36Sopenharmony_ciint kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci struct kvm_memory_slot *slot; 28862306a36Sopenharmony_ci int idx; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci slot = gfn_to_memslot(kvm, gfn); 29362306a36Sopenharmony_ci if (!slot) { 29462306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 29562306a36Sopenharmony_ci return -EINVAL; 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci write_lock(&kvm->mmu_lock); 29962306a36Sopenharmony_ci __kvm_write_track_remove_gfn(kvm, slot, gfn); 30062306a36Sopenharmony_ci write_unlock(&kvm->mmu_lock); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci return 0; 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); 30762306a36Sopenharmony_ci#endif 308