162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Support KVM gust page tracking
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This feature allows us to track page access in guest. Currently, only
662306a36Sopenharmony_ci * write access is tracked.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Copyright(C) 2015 Intel Corporation.
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci * Author:
1162306a36Sopenharmony_ci *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include <linux/lockdep.h>
1662306a36Sopenharmony_ci#include <linux/kvm_host.h>
1762306a36Sopenharmony_ci#include <linux/rculist.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "mmu.h"
2062306a36Sopenharmony_ci#include "mmu_internal.h"
2162306a36Sopenharmony_ci#include "page_track.h"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_cibool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
2662306a36Sopenharmony_ci	       !tdp_enabled || kvm_shadow_root_allocated(kvm);
2762306a36Sopenharmony_ci}
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_civoid kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
3062306a36Sopenharmony_ci{
3162306a36Sopenharmony_ci	kvfree(slot->arch.gfn_write_track);
3262306a36Sopenharmony_ci	slot->arch.gfn_write_track = NULL;
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
3662306a36Sopenharmony_ci						 unsigned long npages)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	const size_t size = sizeof(*slot->arch.gfn_write_track);
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	if (!slot->arch.gfn_write_track)
4162306a36Sopenharmony_ci		slot->arch.gfn_write_track = __vcalloc(npages, size,
4262306a36Sopenharmony_ci						       GFP_KERNEL_ACCOUNT);
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	return slot->arch.gfn_write_track ? 0 : -ENOMEM;
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ciint kvm_page_track_create_memslot(struct kvm *kvm,
4862306a36Sopenharmony_ci				  struct kvm_memory_slot *slot,
4962306a36Sopenharmony_ci				  unsigned long npages)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	if (!kvm_page_track_write_tracking_enabled(kvm))
5262306a36Sopenharmony_ci		return 0;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	return __kvm_page_track_write_tracking_alloc(slot, npages);
5562306a36Sopenharmony_ci}
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ciint kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
6062306a36Sopenharmony_ci}
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistatic void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
6362306a36Sopenharmony_ci				   short count)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	int index, val;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	val = slot->arch.gfn_write_track[index];
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
7262306a36Sopenharmony_ci		return;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	slot->arch.gfn_write_track[index] += count;
7562306a36Sopenharmony_ci}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_civoid __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
7862306a36Sopenharmony_ci			       gfn_t gfn)
7962306a36Sopenharmony_ci{
8062306a36Sopenharmony_ci	lockdep_assert_held_write(&kvm->mmu_lock);
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
8362306a36Sopenharmony_ci			    srcu_read_lock_held(&kvm->srcu));
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
8662306a36Sopenharmony_ci		return;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	update_gfn_write_track(slot, gfn, 1);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	/*
9162306a36Sopenharmony_ci	 * new track stops large page mapping for the
9262306a36Sopenharmony_ci	 * tracked page.
9362306a36Sopenharmony_ci	 */
9462306a36Sopenharmony_ci	kvm_mmu_gfn_disallow_lpage(slot, gfn);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
9762306a36Sopenharmony_ci		kvm_flush_remote_tlbs(kvm);
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_civoid __kvm_write_track_remove_gfn(struct kvm *kvm,
10162306a36Sopenharmony_ci				  struct kvm_memory_slot *slot, gfn_t gfn)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	lockdep_assert_held_write(&kvm->mmu_lock);
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
10662306a36Sopenharmony_ci			    srcu_read_lock_held(&kvm->srcu));
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
10962306a36Sopenharmony_ci		return;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	update_gfn_write_track(slot, gfn, -1);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	/*
11462306a36Sopenharmony_ci	 * allow large page mapping for the tracked page
11562306a36Sopenharmony_ci	 * after the tracker is gone.
11662306a36Sopenharmony_ci	 */
11762306a36Sopenharmony_ci	kvm_mmu_gfn_allow_lpage(slot, gfn);
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci/*
12162306a36Sopenharmony_ci * check if the corresponding access on the specified guest page is tracked.
12262306a36Sopenharmony_ci */
12362306a36Sopenharmony_cibool kvm_gfn_is_write_tracked(struct kvm *kvm,
12462306a36Sopenharmony_ci			      const struct kvm_memory_slot *slot, gfn_t gfn)
12562306a36Sopenharmony_ci{
12662306a36Sopenharmony_ci	int index;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	if (!slot)
12962306a36Sopenharmony_ci		return false;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	if (!kvm_page_track_write_tracking_enabled(kvm))
13262306a36Sopenharmony_ci		return false;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
13562306a36Sopenharmony_ci	return !!READ_ONCE(slot->arch.gfn_write_track[index]);
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
13962306a36Sopenharmony_civoid kvm_page_track_cleanup(struct kvm *kvm)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
14462306a36Sopenharmony_ci	cleanup_srcu_struct(&head->track_srcu);
14562306a36Sopenharmony_ci}
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ciint kvm_page_track_init(struct kvm *kvm)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
15262306a36Sopenharmony_ci	INIT_HLIST_HEAD(&head->track_notifier_list);
15362306a36Sopenharmony_ci	return init_srcu_struct(&head->track_srcu);
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci/*
15762306a36Sopenharmony_ci * register the notifier so that event interception for the tracked guest
15862306a36Sopenharmony_ci * pages can be received.
15962306a36Sopenharmony_ci */
16062306a36Sopenharmony_ciint kvm_page_track_register_notifier(struct kvm *kvm,
16162306a36Sopenharmony_ci				     struct kvm_page_track_notifier_node *n)
16262306a36Sopenharmony_ci{
16362306a36Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	if (!kvm || kvm->mm != current->mm)
16662306a36Sopenharmony_ci		return -ESRCH;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	kvm_get_kvm(kvm);
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
17362306a36Sopenharmony_ci	hlist_add_head_rcu(&n->node, &head->track_notifier_list);
17462306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
17562306a36Sopenharmony_ci	return 0;
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci/*
18062306a36Sopenharmony_ci * stop receiving the event interception. It is the opposed operation of
18162306a36Sopenharmony_ci * kvm_page_track_register_notifier().
18262306a36Sopenharmony_ci */
18362306a36Sopenharmony_civoid kvm_page_track_unregister_notifier(struct kvm *kvm,
18462306a36Sopenharmony_ci					struct kvm_page_track_notifier_node *n)
18562306a36Sopenharmony_ci{
18662306a36Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
19162306a36Sopenharmony_ci	hlist_del_rcu(&n->node);
19262306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
19362306a36Sopenharmony_ci	synchronize_srcu(&head->track_srcu);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	kvm_put_kvm(kvm);
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/*
20062306a36Sopenharmony_ci * Notify the node that write access is intercepted and write emulation is
20162306a36Sopenharmony_ci * finished at this time.
20262306a36Sopenharmony_ci *
20362306a36Sopenharmony_ci * The node should figure out if the written page is the one that node is
20462306a36Sopenharmony_ci * interested in by itself.
20562306a36Sopenharmony_ci */
20662306a36Sopenharmony_civoid __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
20962306a36Sopenharmony_ci	struct kvm_page_track_notifier_node *n;
21062306a36Sopenharmony_ci	int idx;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	if (hlist_empty(&head->track_notifier_list))
21562306a36Sopenharmony_ci		return;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	idx = srcu_read_lock(&head->track_srcu);
21862306a36Sopenharmony_ci	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
21962306a36Sopenharmony_ci				  srcu_read_lock_held(&head->track_srcu))
22062306a36Sopenharmony_ci		if (n->track_write)
22162306a36Sopenharmony_ci			n->track_write(gpa, new, bytes, n);
22262306a36Sopenharmony_ci	srcu_read_unlock(&head->track_srcu, idx);
22362306a36Sopenharmony_ci}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci/*
22662306a36Sopenharmony_ci * Notify external page track nodes that a memory region is being removed from
22762306a36Sopenharmony_ci * the VM, e.g. so that users can free any associated metadata.
22862306a36Sopenharmony_ci */
22962306a36Sopenharmony_civoid kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	struct kvm_page_track_notifier_head *head;
23262306a36Sopenharmony_ci	struct kvm_page_track_notifier_node *n;
23362306a36Sopenharmony_ci	int idx;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	head = &kvm->arch.track_notifier_head;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	if (hlist_empty(&head->track_notifier_list))
23862306a36Sopenharmony_ci		return;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	idx = srcu_read_lock(&head->track_srcu);
24162306a36Sopenharmony_ci	hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
24262306a36Sopenharmony_ci				  srcu_read_lock_held(&head->track_srcu))
24362306a36Sopenharmony_ci		if (n->track_remove_region)
24462306a36Sopenharmony_ci			n->track_remove_region(slot->base_gfn, slot->npages, n);
24562306a36Sopenharmony_ci	srcu_read_unlock(&head->track_srcu, idx);
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci/*
24962306a36Sopenharmony_ci * add guest page to the tracking pool so that corresponding access on that
25062306a36Sopenharmony_ci * page will be intercepted.
25162306a36Sopenharmony_ci *
25262306a36Sopenharmony_ci * @kvm: the guest instance we are interested in.
25362306a36Sopenharmony_ci * @gfn: the guest page.
25462306a36Sopenharmony_ci */
25562306a36Sopenharmony_ciint kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	struct kvm_memory_slot *slot;
25862306a36Sopenharmony_ci	int idx;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	idx = srcu_read_lock(&kvm->srcu);
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	slot = gfn_to_memslot(kvm, gfn);
26362306a36Sopenharmony_ci	if (!slot) {
26462306a36Sopenharmony_ci		srcu_read_unlock(&kvm->srcu, idx);
26562306a36Sopenharmony_ci		return -EINVAL;
26662306a36Sopenharmony_ci	}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
26962306a36Sopenharmony_ci	__kvm_write_track_add_gfn(kvm, slot, gfn);
27062306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, idx);
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	return 0;
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci/*
27962306a36Sopenharmony_ci * remove the guest page from the tracking pool which stops the interception
28062306a36Sopenharmony_ci * of corresponding access on that page.
28162306a36Sopenharmony_ci *
28262306a36Sopenharmony_ci * @kvm: the guest instance we are interested in.
28362306a36Sopenharmony_ci * @gfn: the guest page.
28462306a36Sopenharmony_ci */
28562306a36Sopenharmony_ciint kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
28662306a36Sopenharmony_ci{
28762306a36Sopenharmony_ci	struct kvm_memory_slot *slot;
28862306a36Sopenharmony_ci	int idx;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	idx = srcu_read_lock(&kvm->srcu);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	slot = gfn_to_memslot(kvm, gfn);
29362306a36Sopenharmony_ci	if (!slot) {
29462306a36Sopenharmony_ci		srcu_read_unlock(&kvm->srcu, idx);
29562306a36Sopenharmony_ci		return -EINVAL;
29662306a36Sopenharmony_ci	}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	write_lock(&kvm->mmu_lock);
29962306a36Sopenharmony_ci	__kvm_write_track_remove_gfn(kvm, slot, gfn);
30062306a36Sopenharmony_ci	write_unlock(&kvm->mmu_lock);
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	srcu_read_unlock(&kvm->srcu, idx);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	return 0;
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
30762306a36Sopenharmony_ci#endif
308