18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Support KVM gust page tracking 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This feature allows us to track page access in guest. Currently, only 68c2ecf20Sopenharmony_ci * write access is tracked. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Copyright(C) 2015 Intel Corporation. 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Author: 118c2ecf20Sopenharmony_ci * Xiao Guangrong <guangrong.xiao@linux.intel.com> 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/kvm_host.h> 158c2ecf20Sopenharmony_ci#include <linux/rculist.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <asm/kvm_page_track.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include "mmu_internal.h" 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_civoid kvm_page_track_free_memslot(struct kvm_memory_slot *slot) 228c2ecf20Sopenharmony_ci{ 238c2ecf20Sopenharmony_ci int i; 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { 268c2ecf20Sopenharmony_ci kvfree(slot->arch.gfn_track[i]); 278c2ecf20Sopenharmony_ci slot->arch.gfn_track[i] = NULL; 288c2ecf20Sopenharmony_ci } 298c2ecf20Sopenharmony_ci} 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ciint kvm_page_track_create_memslot(struct kvm_memory_slot *slot, 328c2ecf20Sopenharmony_ci unsigned long npages) 338c2ecf20Sopenharmony_ci{ 348c2ecf20Sopenharmony_ci int i; 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { 378c2ecf20Sopenharmony_ci slot->arch.gfn_track[i] = 388c2ecf20Sopenharmony_ci __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]), 398c2ecf20Sopenharmony_ci GFP_KERNEL_ACCOUNT); 408c2ecf20Sopenharmony_ci if (!slot->arch.gfn_track[i]) 418c2ecf20Sopenharmony_ci goto track_free; 428c2ecf20Sopenharmony_ci } 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci return 0; 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_citrack_free: 478c2ecf20Sopenharmony_ci kvm_page_track_free_memslot(slot); 488c2ecf20Sopenharmony_ci return -ENOMEM; 498c2ecf20Sopenharmony_ci} 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistatic inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX) 548c2ecf20Sopenharmony_ci return false; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci return true; 578c2ecf20Sopenharmony_ci} 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, 608c2ecf20Sopenharmony_ci enum kvm_page_track_mode mode, short count) 618c2ecf20Sopenharmony_ci{ 628c2ecf20Sopenharmony_ci int index, val; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci val = slot->arch.gfn_track[mode][index]; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci if (WARN_ON(val + count < 0 || val + count > USHRT_MAX)) 698c2ecf20Sopenharmony_ci return; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci slot->arch.gfn_track[mode][index] += count; 728c2ecf20Sopenharmony_ci} 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci/* 758c2ecf20Sopenharmony_ci * add guest page to the tracking pool so that corresponding access on that 768c2ecf20Sopenharmony_ci * page will be intercepted. 778c2ecf20Sopenharmony_ci * 788c2ecf20Sopenharmony_ci * It should be called under the protection both of mmu-lock and kvm->srcu 798c2ecf20Sopenharmony_ci * or kvm->slots_lock. 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * @kvm: the guest instance we are interested in. 828c2ecf20Sopenharmony_ci * @slot: the @gfn belongs to. 838c2ecf20Sopenharmony_ci * @gfn: the guest page. 848c2ecf20Sopenharmony_ci * @mode: tracking mode, currently only write track is supported. 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_civoid kvm_slot_page_track_add_page(struct kvm *kvm, 878c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, gfn_t gfn, 888c2ecf20Sopenharmony_ci enum kvm_page_track_mode mode) 898c2ecf20Sopenharmony_ci{ 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci if (WARN_ON(!page_track_mode_is_valid(mode))) 928c2ecf20Sopenharmony_ci return; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci update_gfn_track(slot, gfn, mode, 1); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci /* 978c2ecf20Sopenharmony_ci * new track stops large page mapping for the 988c2ecf20Sopenharmony_ci * tracked page. 998c2ecf20Sopenharmony_ci */ 1008c2ecf20Sopenharmony_ci kvm_mmu_gfn_disallow_lpage(slot, gfn); 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci if (mode == KVM_PAGE_TRACK_WRITE) 1038c2ecf20Sopenharmony_ci if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) 1048c2ecf20Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 1058c2ecf20Sopenharmony_ci} 1068c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page); 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci/* 1098c2ecf20Sopenharmony_ci * remove the guest page from the tracking pool which stops the interception 1108c2ecf20Sopenharmony_ci * of corresponding access on that page. It is the opposed operation of 1118c2ecf20Sopenharmony_ci * kvm_slot_page_track_add_page(). 1128c2ecf20Sopenharmony_ci * 1138c2ecf20Sopenharmony_ci * It should be called under the protection both of mmu-lock and kvm->srcu 1148c2ecf20Sopenharmony_ci * or kvm->slots_lock. 1158c2ecf20Sopenharmony_ci * 1168c2ecf20Sopenharmony_ci * @kvm: the guest instance we are interested in. 1178c2ecf20Sopenharmony_ci * @slot: the @gfn belongs to. 1188c2ecf20Sopenharmony_ci * @gfn: the guest page. 1198c2ecf20Sopenharmony_ci * @mode: tracking mode, currently only write track is supported. 1208c2ecf20Sopenharmony_ci */ 1218c2ecf20Sopenharmony_civoid kvm_slot_page_track_remove_page(struct kvm *kvm, 1228c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot, gfn_t gfn, 1238c2ecf20Sopenharmony_ci enum kvm_page_track_mode mode) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci if (WARN_ON(!page_track_mode_is_valid(mode))) 1268c2ecf20Sopenharmony_ci return; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci update_gfn_track(slot, gfn, mode, -1); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci /* 1318c2ecf20Sopenharmony_ci * allow large page mapping for the tracked page 1328c2ecf20Sopenharmony_ci * after the tracker is gone. 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_ci kvm_mmu_gfn_allow_lpage(slot, gfn); 1358c2ecf20Sopenharmony_ci} 1368c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page); 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci/* 1398c2ecf20Sopenharmony_ci * check if the corresponding access on the specified guest page is tracked. 1408c2ecf20Sopenharmony_ci */ 1418c2ecf20Sopenharmony_cibool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, 1428c2ecf20Sopenharmony_ci enum kvm_page_track_mode mode) 1438c2ecf20Sopenharmony_ci{ 1448c2ecf20Sopenharmony_ci struct kvm_memory_slot *slot; 1458c2ecf20Sopenharmony_ci int index; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci if (WARN_ON(!page_track_mode_is_valid(mode))) 1488c2ecf20Sopenharmony_ci return false; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 1518c2ecf20Sopenharmony_ci if (!slot) 1528c2ecf20Sopenharmony_ci return false; 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 1558c2ecf20Sopenharmony_ci return !!READ_ONCE(slot->arch.gfn_track[mode][index]); 1568c2ecf20Sopenharmony_ci} 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_civoid kvm_page_track_cleanup(struct kvm *kvm) 1598c2ecf20Sopenharmony_ci{ 1608c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_head *head; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci head = &kvm->arch.track_notifier_head; 1638c2ecf20Sopenharmony_ci cleanup_srcu_struct(&head->track_srcu); 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ciint kvm_page_track_init(struct kvm *kvm) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_head *head; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci head = &kvm->arch.track_notifier_head; 1718c2ecf20Sopenharmony_ci INIT_HLIST_HEAD(&head->track_notifier_list); 1728c2ecf20Sopenharmony_ci return init_srcu_struct(&head->track_srcu); 1738c2ecf20Sopenharmony_ci} 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci/* 1768c2ecf20Sopenharmony_ci * register the notifier so that event interception for the tracked guest 1778c2ecf20Sopenharmony_ci * pages can be received. 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_civoid 1808c2ecf20Sopenharmony_cikvm_page_track_register_notifier(struct kvm *kvm, 1818c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_node *n) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_head *head; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci head = &kvm->arch.track_notifier_head; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 1888c2ecf20Sopenharmony_ci hlist_add_head_rcu(&n->node, &head->track_notifier_list); 1898c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci/* 1948c2ecf20Sopenharmony_ci * stop receiving the event interception. It is the opposed operation of 1958c2ecf20Sopenharmony_ci * kvm_page_track_register_notifier(). 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_civoid 1988c2ecf20Sopenharmony_cikvm_page_track_unregister_notifier(struct kvm *kvm, 1998c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_node *n) 2008c2ecf20Sopenharmony_ci{ 2018c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_head *head; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci head = &kvm->arch.track_notifier_head; 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci spin_lock(&kvm->mmu_lock); 2068c2ecf20Sopenharmony_ci hlist_del_rcu(&n->node); 2078c2ecf20Sopenharmony_ci spin_unlock(&kvm->mmu_lock); 2088c2ecf20Sopenharmony_ci synchronize_srcu(&head->track_srcu); 2098c2ecf20Sopenharmony_ci} 2108c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci/* 2138c2ecf20Sopenharmony_ci * Notify the node that write access is intercepted and write emulation is 2148c2ecf20Sopenharmony_ci * finished at this time. 2158c2ecf20Sopenharmony_ci * 2168c2ecf20Sopenharmony_ci * The node should figure out if the written page is the one that node is 2178c2ecf20Sopenharmony_ci * interested in by itself. 2188c2ecf20Sopenharmony_ci */ 2198c2ecf20Sopenharmony_civoid kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, 2208c2ecf20Sopenharmony_ci int bytes) 2218c2ecf20Sopenharmony_ci{ 2228c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_head *head; 2238c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_node *n; 2248c2ecf20Sopenharmony_ci int idx; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci head = &vcpu->kvm->arch.track_notifier_head; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (hlist_empty(&head->track_notifier_list)) 2298c2ecf20Sopenharmony_ci return; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci idx = srcu_read_lock(&head->track_srcu); 2328c2ecf20Sopenharmony_ci hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 2338c2ecf20Sopenharmony_ci srcu_read_lock_held(&head->track_srcu)) 2348c2ecf20Sopenharmony_ci if (n->track_write) 2358c2ecf20Sopenharmony_ci n->track_write(vcpu, gpa, new, bytes, n); 2368c2ecf20Sopenharmony_ci srcu_read_unlock(&head->track_srcu, idx); 2378c2ecf20Sopenharmony_ci} 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci/* 2408c2ecf20Sopenharmony_ci * Notify the node that memory slot is being removed or moved so that it can 2418c2ecf20Sopenharmony_ci * drop write-protection for the pages in the memory slot. 2428c2ecf20Sopenharmony_ci * 2438c2ecf20Sopenharmony_ci * The node should figure out it has any write-protected pages in this slot 2448c2ecf20Sopenharmony_ci * by itself. 2458c2ecf20Sopenharmony_ci */ 2468c2ecf20Sopenharmony_civoid kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 2478c2ecf20Sopenharmony_ci{ 2488c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_head *head; 2498c2ecf20Sopenharmony_ci struct kvm_page_track_notifier_node *n; 2508c2ecf20Sopenharmony_ci int idx; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci head = &kvm->arch.track_notifier_head; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci if (hlist_empty(&head->track_notifier_list)) 2558c2ecf20Sopenharmony_ci return; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci idx = srcu_read_lock(&head->track_srcu); 2588c2ecf20Sopenharmony_ci hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 2598c2ecf20Sopenharmony_ci srcu_read_lock_held(&head->track_srcu)) 2608c2ecf20Sopenharmony_ci if (n->track_flush_slot) 2618c2ecf20Sopenharmony_ci n->track_flush_slot(kvm, slot, n); 2628c2ecf20Sopenharmony_ci srcu_read_unlock(&head->track_srcu, idx); 2638c2ecf20Sopenharmony_ci} 264