18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Support KVM gust page tracking
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This feature allows us to track page access in guest. Currently, only
68c2ecf20Sopenharmony_ci * write access is tracked.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Copyright(C) 2015 Intel Corporation.
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * Author:
118c2ecf20Sopenharmony_ci *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include <linux/kvm_host.h>
158c2ecf20Sopenharmony_ci#include <linux/rculist.h>
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <asm/kvm_page_track.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include "mmu_internal.h"
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_civoid kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
228c2ecf20Sopenharmony_ci{
238c2ecf20Sopenharmony_ci	int i;
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
268c2ecf20Sopenharmony_ci		kvfree(slot->arch.gfn_track[i]);
278c2ecf20Sopenharmony_ci		slot->arch.gfn_track[i] = NULL;
288c2ecf20Sopenharmony_ci	}
298c2ecf20Sopenharmony_ci}
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ciint kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
328c2ecf20Sopenharmony_ci				  unsigned long npages)
338c2ecf20Sopenharmony_ci{
348c2ecf20Sopenharmony_ci	int  i;
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
378c2ecf20Sopenharmony_ci		slot->arch.gfn_track[i] =
388c2ecf20Sopenharmony_ci			__vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
398c2ecf20Sopenharmony_ci				 GFP_KERNEL_ACCOUNT);
408c2ecf20Sopenharmony_ci		if (!slot->arch.gfn_track[i])
418c2ecf20Sopenharmony_ci			goto track_free;
428c2ecf20Sopenharmony_ci	}
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	return 0;
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_citrack_free:
478c2ecf20Sopenharmony_ci	kvm_page_track_free_memslot(slot);
488c2ecf20Sopenharmony_ci	return -ENOMEM;
498c2ecf20Sopenharmony_ci}
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cistatic inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
548c2ecf20Sopenharmony_ci		return false;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	return true;
578c2ecf20Sopenharmony_ci}
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_cistatic void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
608c2ecf20Sopenharmony_ci			     enum kvm_page_track_mode mode, short count)
618c2ecf20Sopenharmony_ci{
628c2ecf20Sopenharmony_ci	int index, val;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	val = slot->arch.gfn_track[mode][index];
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
698c2ecf20Sopenharmony_ci		return;
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	slot->arch.gfn_track[mode][index] += count;
728c2ecf20Sopenharmony_ci}
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci/*
758c2ecf20Sopenharmony_ci * add guest page to the tracking pool so that corresponding access on that
768c2ecf20Sopenharmony_ci * page will be intercepted.
778c2ecf20Sopenharmony_ci *
788c2ecf20Sopenharmony_ci * It should be called under the protection both of mmu-lock and kvm->srcu
798c2ecf20Sopenharmony_ci * or kvm->slots_lock.
808c2ecf20Sopenharmony_ci *
818c2ecf20Sopenharmony_ci * @kvm: the guest instance we are interested in.
828c2ecf20Sopenharmony_ci * @slot: the @gfn belongs to.
838c2ecf20Sopenharmony_ci * @gfn: the guest page.
848c2ecf20Sopenharmony_ci * @mode: tracking mode, currently only write track is supported.
858c2ecf20Sopenharmony_ci */
868c2ecf20Sopenharmony_civoid kvm_slot_page_track_add_page(struct kvm *kvm,
878c2ecf20Sopenharmony_ci				  struct kvm_memory_slot *slot, gfn_t gfn,
888c2ecf20Sopenharmony_ci				  enum kvm_page_track_mode mode)
898c2ecf20Sopenharmony_ci{
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	if (WARN_ON(!page_track_mode_is_valid(mode)))
928c2ecf20Sopenharmony_ci		return;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	update_gfn_track(slot, gfn, mode, 1);
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	/*
978c2ecf20Sopenharmony_ci	 * new track stops large page mapping for the
988c2ecf20Sopenharmony_ci	 * tracked page.
998c2ecf20Sopenharmony_ci	 */
1008c2ecf20Sopenharmony_ci	kvm_mmu_gfn_disallow_lpage(slot, gfn);
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	if (mode == KVM_PAGE_TRACK_WRITE)
1038c2ecf20Sopenharmony_ci		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
1048c2ecf20Sopenharmony_ci			kvm_flush_remote_tlbs(kvm);
1058c2ecf20Sopenharmony_ci}
1068c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci/*
1098c2ecf20Sopenharmony_ci * remove the guest page from the tracking pool which stops the interception
1108c2ecf20Sopenharmony_ci * of corresponding access on that page. It is the opposed operation of
1118c2ecf20Sopenharmony_ci * kvm_slot_page_track_add_page().
1128c2ecf20Sopenharmony_ci *
1138c2ecf20Sopenharmony_ci * It should be called under the protection both of mmu-lock and kvm->srcu
1148c2ecf20Sopenharmony_ci * or kvm->slots_lock.
1158c2ecf20Sopenharmony_ci *
1168c2ecf20Sopenharmony_ci * @kvm: the guest instance we are interested in.
1178c2ecf20Sopenharmony_ci * @slot: the @gfn belongs to.
1188c2ecf20Sopenharmony_ci * @gfn: the guest page.
1198c2ecf20Sopenharmony_ci * @mode: tracking mode, currently only write track is supported.
1208c2ecf20Sopenharmony_ci */
1218c2ecf20Sopenharmony_civoid kvm_slot_page_track_remove_page(struct kvm *kvm,
1228c2ecf20Sopenharmony_ci				     struct kvm_memory_slot *slot, gfn_t gfn,
1238c2ecf20Sopenharmony_ci				     enum kvm_page_track_mode mode)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	if (WARN_ON(!page_track_mode_is_valid(mode)))
1268c2ecf20Sopenharmony_ci		return;
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	update_gfn_track(slot, gfn, mode, -1);
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	/*
1318c2ecf20Sopenharmony_ci	 * allow large page mapping for the tracked page
1328c2ecf20Sopenharmony_ci	 * after the tracker is gone.
1338c2ecf20Sopenharmony_ci	 */
1348c2ecf20Sopenharmony_ci	kvm_mmu_gfn_allow_lpage(slot, gfn);
1358c2ecf20Sopenharmony_ci}
1368c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci/*
1398c2ecf20Sopenharmony_ci * check if the corresponding access on the specified guest page is tracked.
1408c2ecf20Sopenharmony_ci */
1418c2ecf20Sopenharmony_cibool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
1428c2ecf20Sopenharmony_ci			      enum kvm_page_track_mode mode)
1438c2ecf20Sopenharmony_ci{
1448c2ecf20Sopenharmony_ci	struct kvm_memory_slot *slot;
1458c2ecf20Sopenharmony_ci	int index;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	if (WARN_ON(!page_track_mode_is_valid(mode)))
1488c2ecf20Sopenharmony_ci		return false;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
1518c2ecf20Sopenharmony_ci	if (!slot)
1528c2ecf20Sopenharmony_ci		return false;
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
1558c2ecf20Sopenharmony_ci	return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_civoid kvm_page_track_cleanup(struct kvm *kvm)
1598c2ecf20Sopenharmony_ci{
1608c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
1638c2ecf20Sopenharmony_ci	cleanup_srcu_struct(&head->track_srcu);
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ciint kvm_page_track_init(struct kvm *kvm)
1678c2ecf20Sopenharmony_ci{
1688c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
1718c2ecf20Sopenharmony_ci	INIT_HLIST_HEAD(&head->track_notifier_list);
1728c2ecf20Sopenharmony_ci	return init_srcu_struct(&head->track_srcu);
1738c2ecf20Sopenharmony_ci}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci/*
1768c2ecf20Sopenharmony_ci * register the notifier so that event interception for the tracked guest
1778c2ecf20Sopenharmony_ci * pages can be received.
1788c2ecf20Sopenharmony_ci */
1798c2ecf20Sopenharmony_civoid
1808c2ecf20Sopenharmony_cikvm_page_track_register_notifier(struct kvm *kvm,
1818c2ecf20Sopenharmony_ci				 struct kvm_page_track_notifier_node *n)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
1888c2ecf20Sopenharmony_ci	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
1898c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci/*
1948c2ecf20Sopenharmony_ci * stop receiving the event interception. It is the opposed operation of
1958c2ecf20Sopenharmony_ci * kvm_page_track_register_notifier().
1968c2ecf20Sopenharmony_ci */
1978c2ecf20Sopenharmony_civoid
1988c2ecf20Sopenharmony_cikvm_page_track_unregister_notifier(struct kvm *kvm,
1998c2ecf20Sopenharmony_ci				   struct kvm_page_track_notifier_node *n)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	spin_lock(&kvm->mmu_lock);
2068c2ecf20Sopenharmony_ci	hlist_del_rcu(&n->node);
2078c2ecf20Sopenharmony_ci	spin_unlock(&kvm->mmu_lock);
2088c2ecf20Sopenharmony_ci	synchronize_srcu(&head->track_srcu);
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci/*
2138c2ecf20Sopenharmony_ci * Notify the node that write access is intercepted and write emulation is
2148c2ecf20Sopenharmony_ci * finished at this time.
2158c2ecf20Sopenharmony_ci *
2168c2ecf20Sopenharmony_ci * The node should figure out if the written page is the one that node is
2178c2ecf20Sopenharmony_ci * interested in by itself.
2188c2ecf20Sopenharmony_ci */
2198c2ecf20Sopenharmony_civoid kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
2208c2ecf20Sopenharmony_ci			  int bytes)
2218c2ecf20Sopenharmony_ci{
2228c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
2238c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_node *n;
2248c2ecf20Sopenharmony_ci	int idx;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	head = &vcpu->kvm->arch.track_notifier_head;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	if (hlist_empty(&head->track_notifier_list))
2298c2ecf20Sopenharmony_ci		return;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	idx = srcu_read_lock(&head->track_srcu);
2328c2ecf20Sopenharmony_ci	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
2338c2ecf20Sopenharmony_ci				srcu_read_lock_held(&head->track_srcu))
2348c2ecf20Sopenharmony_ci		if (n->track_write)
2358c2ecf20Sopenharmony_ci			n->track_write(vcpu, gpa, new, bytes, n);
2368c2ecf20Sopenharmony_ci	srcu_read_unlock(&head->track_srcu, idx);
2378c2ecf20Sopenharmony_ci}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci/*
2408c2ecf20Sopenharmony_ci * Notify the node that memory slot is being removed or moved so that it can
2418c2ecf20Sopenharmony_ci * drop write-protection for the pages in the memory slot.
2428c2ecf20Sopenharmony_ci *
2438c2ecf20Sopenharmony_ci * The node should figure out it has any write-protected pages in this slot
2448c2ecf20Sopenharmony_ci * by itself.
2458c2ecf20Sopenharmony_ci */
2468c2ecf20Sopenharmony_civoid kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
2478c2ecf20Sopenharmony_ci{
2488c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
2498c2ecf20Sopenharmony_ci	struct kvm_page_track_notifier_node *n;
2508c2ecf20Sopenharmony_ci	int idx;
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci	if (hlist_empty(&head->track_notifier_list))
2558c2ecf20Sopenharmony_ci		return;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	idx = srcu_read_lock(&head->track_srcu);
2588c2ecf20Sopenharmony_ci	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
2598c2ecf20Sopenharmony_ci				srcu_read_lock_held(&head->track_srcu))
2608c2ecf20Sopenharmony_ci		if (n->track_flush_slot)
2618c2ecf20Sopenharmony_ci			n->track_flush_slot(kvm, slot, n);
2628c2ecf20Sopenharmony_ci	srcu_read_unlock(&head->track_srcu, idx);
2638c2ecf20Sopenharmony_ci}
264