162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Kernel-based Virtual Machine driver for Linux 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This module enables machines with Intel VT-x extensions to run virtual 662306a36Sopenharmony_ci * machines without emulation or binary translation. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright (C) 2006 Qumranet, Inc. 962306a36Sopenharmony_ci * Copyright 2010 Red Hat, Inc. and/or its affiliates. 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Authors: 1262306a36Sopenharmony_ci * Avi Kivity <avi@qumranet.com> 1362306a36Sopenharmony_ci * Yaniv Kamay <yaniv@qumranet.com> 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <kvm/iodev.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <linux/kvm_host.h> 1962306a36Sopenharmony_ci#include <linux/kvm.h> 2062306a36Sopenharmony_ci#include <linux/module.h> 2162306a36Sopenharmony_ci#include <linux/errno.h> 2262306a36Sopenharmony_ci#include <linux/percpu.h> 2362306a36Sopenharmony_ci#include <linux/mm.h> 2462306a36Sopenharmony_ci#include <linux/miscdevice.h> 2562306a36Sopenharmony_ci#include <linux/vmalloc.h> 2662306a36Sopenharmony_ci#include <linux/reboot.h> 2762306a36Sopenharmony_ci#include <linux/debugfs.h> 2862306a36Sopenharmony_ci#include <linux/highmem.h> 2962306a36Sopenharmony_ci#include <linux/file.h> 3062306a36Sopenharmony_ci#include <linux/syscore_ops.h> 3162306a36Sopenharmony_ci#include <linux/cpu.h> 3262306a36Sopenharmony_ci#include <linux/sched/signal.h> 3362306a36Sopenharmony_ci#include <linux/sched/mm.h> 3462306a36Sopenharmony_ci#include <linux/sched/stat.h> 3562306a36Sopenharmony_ci#include <linux/cpumask.h> 3662306a36Sopenharmony_ci#include <linux/smp.h> 3762306a36Sopenharmony_ci#include <linux/anon_inodes.h> 3862306a36Sopenharmony_ci#include <linux/profile.h> 3962306a36Sopenharmony_ci#include <linux/kvm_para.h> 4062306a36Sopenharmony_ci#include <linux/pagemap.h> 4162306a36Sopenharmony_ci#include <linux/mman.h> 4262306a36Sopenharmony_ci#include <linux/swap.h> 4362306a36Sopenharmony_ci#include <linux/bitops.h> 4462306a36Sopenharmony_ci#include <linux/spinlock.h> 4562306a36Sopenharmony_ci#include <linux/compat.h> 4662306a36Sopenharmony_ci#include <linux/srcu.h> 4762306a36Sopenharmony_ci#include <linux/hugetlb.h> 4862306a36Sopenharmony_ci#include <linux/slab.h> 4962306a36Sopenharmony_ci#include <linux/sort.h> 5062306a36Sopenharmony_ci#include <linux/bsearch.h> 5162306a36Sopenharmony_ci#include <linux/io.h> 5262306a36Sopenharmony_ci#include <linux/lockdep.h> 5362306a36Sopenharmony_ci#include <linux/kthread.h> 5462306a36Sopenharmony_ci#include <linux/suspend.h> 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#include <asm/processor.h> 5762306a36Sopenharmony_ci#include <asm/ioctl.h> 5862306a36Sopenharmony_ci#include <linux/uaccess.h> 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#include "coalesced_mmio.h" 6162306a36Sopenharmony_ci#include "async_pf.h" 6262306a36Sopenharmony_ci#include "kvm_mm.h" 6362306a36Sopenharmony_ci#include "vfio.h" 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci#include <trace/events/ipi.h> 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 6862306a36Sopenharmony_ci#include <trace/events/kvm.h> 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci#include <linux/kvm_dirty_ring.h> 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/* Worst case buffer size needed for holding an integer. */ 7462306a36Sopenharmony_ci#define ITOA_MAX_LEN 12 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ciMODULE_AUTHOR("Qumranet"); 7762306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* Architectures should define their poll value according to the halt latency */ 8062306a36Sopenharmony_ciunsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; 8162306a36Sopenharmony_cimodule_param(halt_poll_ns, uint, 0644); 8262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(halt_poll_ns); 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci/* Default doubles per-vcpu halt_poll_ns. */ 8562306a36Sopenharmony_ciunsigned int halt_poll_ns_grow = 2; 8662306a36Sopenharmony_cimodule_param(halt_poll_ns_grow, uint, 0644); 8762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(halt_poll_ns_grow); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci/* The start value to grow halt_poll_ns from */ 9062306a36Sopenharmony_ciunsigned int halt_poll_ns_grow_start = 10000; /* 10us */ 9162306a36Sopenharmony_cimodule_param(halt_poll_ns_grow_start, uint, 0644); 9262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci/* Default resets per-vcpu halt_poll_ns . */ 9562306a36Sopenharmony_ciunsigned int halt_poll_ns_shrink; 9662306a36Sopenharmony_cimodule_param(halt_poll_ns_shrink, uint, 0644); 9762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(halt_poll_ns_shrink); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci/* 10062306a36Sopenharmony_ci * Ordering of locks: 10162306a36Sopenharmony_ci * 10262306a36Sopenharmony_ci * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 10362306a36Sopenharmony_ci */ 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ciDEFINE_MUTEX(kvm_lock); 10662306a36Sopenharmony_ciLIST_HEAD(vm_list); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_cistatic struct kmem_cache *kvm_vcpu_cache; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_cistatic __read_mostly struct preempt_ops kvm_preempt_ops; 11162306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_cistruct dentry *kvm_debugfs_dir; 11462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_debugfs_dir); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_cistatic const struct file_operations stat_fops_per_vm; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic struct file_operations kvm_chardev_ops; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 12162306a36Sopenharmony_ci unsigned long arg); 12262306a36Sopenharmony_ci#ifdef CONFIG_KVM_COMPAT 12362306a36Sopenharmony_cistatic long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, 12462306a36Sopenharmony_ci unsigned long arg); 12562306a36Sopenharmony_ci#define KVM_COMPAT(c) .compat_ioctl = (c) 12662306a36Sopenharmony_ci#else 12762306a36Sopenharmony_ci/* 12862306a36Sopenharmony_ci * For architectures that don't implement a compat infrastructure, 12962306a36Sopenharmony_ci * adopt a double line of defense: 13062306a36Sopenharmony_ci * - Prevent a compat task from opening /dev/kvm 13162306a36Sopenharmony_ci * - If the open has been done by a 64bit task, and the KVM fd 13262306a36Sopenharmony_ci * passed to a compat task, let the ioctls fail. 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_cistatic long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, 13562306a36Sopenharmony_ci unsigned long arg) { return -EINVAL; } 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_cistatic int kvm_no_compat_open(struct inode *inode, struct file *file) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci return is_compat_task() ? -ENODEV : 0; 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ 14262306a36Sopenharmony_ci .open = kvm_no_compat_open 14362306a36Sopenharmony_ci#endif 14462306a36Sopenharmony_cistatic int hardware_enable_all(void); 14562306a36Sopenharmony_cistatic void hardware_disable_all(void); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic void kvm_io_bus_destroy(struct kvm_io_bus *bus); 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci#define KVM_EVENT_CREATE_VM 0 15062306a36Sopenharmony_ci#define KVM_EVENT_DESTROY_VM 1 15162306a36Sopenharmony_cistatic void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); 15262306a36Sopenharmony_cistatic unsigned long long kvm_createvm_count; 15362306a36Sopenharmony_cistatic unsigned long long kvm_active_vms; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cibool kvm_is_zone_device_page(struct page *page) 16262306a36Sopenharmony_ci{ 16362306a36Sopenharmony_ci /* 16462306a36Sopenharmony_ci * The metadata used by is_zone_device_page() to determine whether or 16562306a36Sopenharmony_ci * not a page is ZONE_DEVICE is guaranteed to be valid if and only if 16662306a36Sopenharmony_ci * the device has been pinned, e.g. by get_user_pages(). WARN if the 16762306a36Sopenharmony_ci * page_count() is zero to help detect bad usage of this helper. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci if (WARN_ON_ONCE(!page_count(page))) 17062306a36Sopenharmony_ci return false; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci return is_zone_device_page(page); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci/* 17662306a36Sopenharmony_ci * Returns a 'struct page' if the pfn is "valid" and backed by a refcounted 17762306a36Sopenharmony_ci * page, NULL otherwise. Note, the list of refcounted PG_reserved page types 17862306a36Sopenharmony_ci * is likely incomplete, it has been compiled purely through people wanting to 17962306a36Sopenharmony_ci * back guest with a certain type of memory and encountering issues. 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_cistruct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn) 18262306a36Sopenharmony_ci{ 18362306a36Sopenharmony_ci struct page *page; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci if (!pfn_valid(pfn)) 18662306a36Sopenharmony_ci return NULL; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci page = pfn_to_page(pfn); 18962306a36Sopenharmony_ci if (!PageReserved(page)) 19062306a36Sopenharmony_ci return page; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci /* The ZERO_PAGE(s) is marked PG_reserved, but is refcounted. */ 19362306a36Sopenharmony_ci if (is_zero_pfn(pfn)) 19462306a36Sopenharmony_ci return page; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* 19762306a36Sopenharmony_ci * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting 19862306a36Sopenharmony_ci * perspective they are "normal" pages, albeit with slightly different 19962306a36Sopenharmony_ci * usage rules. 20062306a36Sopenharmony_ci */ 20162306a36Sopenharmony_ci if (kvm_is_zone_device_page(page)) 20262306a36Sopenharmony_ci return page; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci return NULL; 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci/* 20862306a36Sopenharmony_ci * Switches to specified vcpu, until a matching vcpu_put() 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_civoid vcpu_load(struct kvm_vcpu *vcpu) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci int cpu = get_cpu(); 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci __this_cpu_write(kvm_running_vcpu, vcpu); 21562306a36Sopenharmony_ci preempt_notifier_register(&vcpu->preempt_notifier); 21662306a36Sopenharmony_ci kvm_arch_vcpu_load(vcpu, cpu); 21762306a36Sopenharmony_ci put_cpu(); 21862306a36Sopenharmony_ci} 21962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vcpu_load); 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_civoid vcpu_put(struct kvm_vcpu *vcpu) 22262306a36Sopenharmony_ci{ 22362306a36Sopenharmony_ci preempt_disable(); 22462306a36Sopenharmony_ci kvm_arch_vcpu_put(vcpu); 22562306a36Sopenharmony_ci preempt_notifier_unregister(&vcpu->preempt_notifier); 22662306a36Sopenharmony_ci __this_cpu_write(kvm_running_vcpu, NULL); 22762306a36Sopenharmony_ci preempt_enable(); 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vcpu_put); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci/* TODO: merge with kvm_arch_vcpu_should_kick */ 23262306a36Sopenharmony_cistatic bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req) 23362306a36Sopenharmony_ci{ 23462306a36Sopenharmony_ci int mode = kvm_vcpu_exiting_guest_mode(vcpu); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* 23762306a36Sopenharmony_ci * We need to wait for the VCPU to reenable interrupts and get out of 23862306a36Sopenharmony_ci * READING_SHADOW_PAGE_TABLES mode. 23962306a36Sopenharmony_ci */ 24062306a36Sopenharmony_ci if (req & KVM_REQUEST_WAIT) 24162306a36Sopenharmony_ci return mode != OUTSIDE_GUEST_MODE; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci /* 24462306a36Sopenharmony_ci * Need to kick a running VCPU, but otherwise there is nothing to do. 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_ci return mode == IN_GUEST_MODE; 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_cistatic void ack_kick(void *_completed) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cistatic inline bool kvm_kick_many_cpus(struct cpumask *cpus, bool wait) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci if (cpumask_empty(cpus)) 25662306a36Sopenharmony_ci return false; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci smp_call_function_many(cpus, ack_kick, NULL, wait); 25962306a36Sopenharmony_ci return true; 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_cistatic void kvm_make_vcpu_request(struct kvm_vcpu *vcpu, unsigned int req, 26362306a36Sopenharmony_ci struct cpumask *tmp, int current_cpu) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci int cpu; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci if (likely(!(req & KVM_REQUEST_NO_ACTION))) 26862306a36Sopenharmony_ci __kvm_make_request(req, vcpu); 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) 27162306a36Sopenharmony_ci return; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci /* 27462306a36Sopenharmony_ci * Note, the vCPU could get migrated to a different pCPU at any point 27562306a36Sopenharmony_ci * after kvm_request_needs_ipi(), which could result in sending an IPI 27662306a36Sopenharmony_ci * to the previous pCPU. But, that's OK because the purpose of the IPI 27762306a36Sopenharmony_ci * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is 27862306a36Sopenharmony_ci * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES 27962306a36Sopenharmony_ci * after this point is also OK, as the requirement is only that KVM wait 28062306a36Sopenharmony_ci * for vCPUs that were reading SPTEs _before_ any changes were 28162306a36Sopenharmony_ci * finalized. See kvm_vcpu_kick() for more details on handling requests. 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_ci if (kvm_request_needs_ipi(vcpu, req)) { 28462306a36Sopenharmony_ci cpu = READ_ONCE(vcpu->cpu); 28562306a36Sopenharmony_ci if (cpu != -1 && cpu != current_cpu) 28662306a36Sopenharmony_ci __cpumask_set_cpu(cpu, tmp); 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ci} 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_cibool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, 29162306a36Sopenharmony_ci unsigned long *vcpu_bitmap) 29262306a36Sopenharmony_ci{ 29362306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 29462306a36Sopenharmony_ci struct cpumask *cpus; 29562306a36Sopenharmony_ci int i, me; 29662306a36Sopenharmony_ci bool called; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci me = get_cpu(); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask); 30162306a36Sopenharmony_ci cpumask_clear(cpus); 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) { 30462306a36Sopenharmony_ci vcpu = kvm_get_vcpu(kvm, i); 30562306a36Sopenharmony_ci if (!vcpu) 30662306a36Sopenharmony_ci continue; 30762306a36Sopenharmony_ci kvm_make_vcpu_request(vcpu, req, cpus, me); 30862306a36Sopenharmony_ci } 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); 31162306a36Sopenharmony_ci put_cpu(); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci return called; 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_cibool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, 31762306a36Sopenharmony_ci struct kvm_vcpu *except) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 32062306a36Sopenharmony_ci struct cpumask *cpus; 32162306a36Sopenharmony_ci unsigned long i; 32262306a36Sopenharmony_ci bool called; 32362306a36Sopenharmony_ci int me; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci me = get_cpu(); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask); 32862306a36Sopenharmony_ci cpumask_clear(cpus); 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) { 33162306a36Sopenharmony_ci if (vcpu == except) 33262306a36Sopenharmony_ci continue; 33362306a36Sopenharmony_ci kvm_make_vcpu_request(vcpu, req, cpus, me); 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); 33762306a36Sopenharmony_ci put_cpu(); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci return called; 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_cibool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci return kvm_make_all_cpus_request_except(kvm, req, NULL); 34562306a36Sopenharmony_ci} 34662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_make_all_cpus_request); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_civoid kvm_flush_remote_tlbs(struct kvm *kvm) 34962306a36Sopenharmony_ci{ 35062306a36Sopenharmony_ci ++kvm->stat.generic.remote_tlb_flush_requests; 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci /* 35362306a36Sopenharmony_ci * We want to publish modifications to the page tables before reading 35462306a36Sopenharmony_ci * mode. Pairs with a memory barrier in arch-specific code. 35562306a36Sopenharmony_ci * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest 35662306a36Sopenharmony_ci * and smp_mb in walk_shadow_page_lockless_begin/end. 35762306a36Sopenharmony_ci * - powerpc: smp_mb in kvmppc_prepare_to_enter. 35862306a36Sopenharmony_ci * 35962306a36Sopenharmony_ci * There is already an smp_mb__after_atomic() before 36062306a36Sopenharmony_ci * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that 36162306a36Sopenharmony_ci * barrier here. 36262306a36Sopenharmony_ci */ 36362306a36Sopenharmony_ci if (!kvm_arch_flush_remote_tlbs(kvm) 36462306a36Sopenharmony_ci || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 36562306a36Sopenharmony_ci ++kvm->stat.generic.remote_tlb_flush; 36662306a36Sopenharmony_ci} 36762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_civoid kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) 37062306a36Sopenharmony_ci{ 37162306a36Sopenharmony_ci if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages)) 37262306a36Sopenharmony_ci return; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci /* 37562306a36Sopenharmony_ci * Fall back to a flushing entire TLBs if the architecture range-based 37662306a36Sopenharmony_ci * TLB invalidation is unsupported or can't be performed for whatever 37762306a36Sopenharmony_ci * reason. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_civoid kvm_flush_remote_tlbs_memslot(struct kvm *kvm, 38362306a36Sopenharmony_ci const struct kvm_memory_slot *memslot) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci /* 38662306a36Sopenharmony_ci * All current use cases for flushing the TLBs for a specific memslot 38762306a36Sopenharmony_ci * are related to dirty logging, and many do the TLB flush out of 38862306a36Sopenharmony_ci * mmu_lock. The interaction between the various operations on memslot 38962306a36Sopenharmony_ci * must be serialized by slots_locks to ensure the TLB flush from one 39062306a36Sopenharmony_ci * operation is observed by any other operation on the same memslot. 39162306a36Sopenharmony_ci */ 39262306a36Sopenharmony_ci lockdep_assert_held(&kvm->slots_lock); 39362306a36Sopenharmony_ci kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); 39462306a36Sopenharmony_ci} 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_cistatic void kvm_flush_shadow_all(struct kvm *kvm) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci kvm_arch_flush_shadow_all(kvm); 39962306a36Sopenharmony_ci kvm_arch_guest_memory_reclaimed(kvm); 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40362306a36Sopenharmony_cistatic inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, 40462306a36Sopenharmony_ci gfp_t gfp_flags) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci gfp_flags |= mc->gfp_zero; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci if (mc->kmem_cache) 40962306a36Sopenharmony_ci return kmem_cache_alloc(mc->kmem_cache, gfp_flags); 41062306a36Sopenharmony_ci else 41162306a36Sopenharmony_ci return (void *)__get_free_page(gfp_flags); 41262306a36Sopenharmony_ci} 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ciint __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min) 41562306a36Sopenharmony_ci{ 41662306a36Sopenharmony_ci gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; 41762306a36Sopenharmony_ci void *obj; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci if (mc->nobjs >= min) 42062306a36Sopenharmony_ci return 0; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci if (unlikely(!mc->objects)) { 42362306a36Sopenharmony_ci if (WARN_ON_ONCE(!capacity)) 42462306a36Sopenharmony_ci return -EIO; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp); 42762306a36Sopenharmony_ci if (!mc->objects) 42862306a36Sopenharmony_ci return -ENOMEM; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci mc->capacity = capacity; 43162306a36Sopenharmony_ci } 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci /* It is illegal to request a different capacity across topups. */ 43462306a36Sopenharmony_ci if (WARN_ON_ONCE(mc->capacity != capacity)) 43562306a36Sopenharmony_ci return -EIO; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci while (mc->nobjs < mc->capacity) { 43862306a36Sopenharmony_ci obj = mmu_memory_cache_alloc_obj(mc, gfp); 43962306a36Sopenharmony_ci if (!obj) 44062306a36Sopenharmony_ci return mc->nobjs >= min ? 0 : -ENOMEM; 44162306a36Sopenharmony_ci mc->objects[mc->nobjs++] = obj; 44262306a36Sopenharmony_ci } 44362306a36Sopenharmony_ci return 0; 44462306a36Sopenharmony_ci} 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ciint kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci return __kvm_mmu_topup_memory_cache(mc, KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE, min); 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ciint kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci return mc->nobjs; 45462306a36Sopenharmony_ci} 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_civoid kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci while (mc->nobjs) { 45962306a36Sopenharmony_ci if (mc->kmem_cache) 46062306a36Sopenharmony_ci kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); 46162306a36Sopenharmony_ci else 46262306a36Sopenharmony_ci free_page((unsigned long)mc->objects[--mc->nobjs]); 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci kvfree(mc->objects); 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci mc->objects = NULL; 46862306a36Sopenharmony_ci mc->capacity = 0; 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_civoid *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) 47262306a36Sopenharmony_ci{ 47362306a36Sopenharmony_ci void *p; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci if (WARN_ON(!mc->nobjs)) 47662306a36Sopenharmony_ci p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT); 47762306a36Sopenharmony_ci else 47862306a36Sopenharmony_ci p = mc->objects[--mc->nobjs]; 47962306a36Sopenharmony_ci BUG_ON(!p); 48062306a36Sopenharmony_ci return p; 48162306a36Sopenharmony_ci} 48262306a36Sopenharmony_ci#endif 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_cistatic void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci mutex_init(&vcpu->mutex); 48762306a36Sopenharmony_ci vcpu->cpu = -1; 48862306a36Sopenharmony_ci vcpu->kvm = kvm; 48962306a36Sopenharmony_ci vcpu->vcpu_id = id; 49062306a36Sopenharmony_ci vcpu->pid = NULL; 49162306a36Sopenharmony_ci#ifndef __KVM_HAVE_ARCH_WQP 49262306a36Sopenharmony_ci rcuwait_init(&vcpu->wait); 49362306a36Sopenharmony_ci#endif 49462306a36Sopenharmony_ci kvm_async_pf_vcpu_init(vcpu); 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci kvm_vcpu_set_in_spin_loop(vcpu, false); 49762306a36Sopenharmony_ci kvm_vcpu_set_dy_eligible(vcpu, false); 49862306a36Sopenharmony_ci vcpu->preempted = false; 49962306a36Sopenharmony_ci vcpu->ready = false; 50062306a36Sopenharmony_ci preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 50162306a36Sopenharmony_ci vcpu->last_used_slot = NULL; 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* Fill the stats id string for the vcpu */ 50462306a36Sopenharmony_ci snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", 50562306a36Sopenharmony_ci task_pid_nr(current), id); 50662306a36Sopenharmony_ci} 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_cistatic void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) 50962306a36Sopenharmony_ci{ 51062306a36Sopenharmony_ci kvm_arch_vcpu_destroy(vcpu); 51162306a36Sopenharmony_ci kvm_dirty_ring_free(&vcpu->dirty_ring); 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci /* 51462306a36Sopenharmony_ci * No need for rcu_read_lock as VCPU_RUN is the only place that changes 51562306a36Sopenharmony_ci * the vcpu->pid pointer, and at destruction time all file descriptors 51662306a36Sopenharmony_ci * are already gone. 51762306a36Sopenharmony_ci */ 51862306a36Sopenharmony_ci put_pid(rcu_dereference_protected(vcpu->pid, 1)); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci free_page((unsigned long)vcpu->run); 52162306a36Sopenharmony_ci kmem_cache_free(kvm_vcpu_cache, vcpu); 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_civoid kvm_destroy_vcpus(struct kvm *kvm) 52562306a36Sopenharmony_ci{ 52662306a36Sopenharmony_ci unsigned long i; 52762306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) { 53062306a36Sopenharmony_ci kvm_vcpu_destroy(vcpu); 53162306a36Sopenharmony_ci xa_erase(&kvm->vcpu_array, i); 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci atomic_set(&kvm->online_vcpus, 0); 53562306a36Sopenharmony_ci} 53662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_destroy_vcpus); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 53962306a36Sopenharmony_cistatic inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 54062306a36Sopenharmony_ci{ 54162306a36Sopenharmony_ci return container_of(mn, struct kvm, mmu_notifier); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_citypedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_citypedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start, 54762306a36Sopenharmony_ci unsigned long end); 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_citypedef void (*on_unlock_fn_t)(struct kvm *kvm); 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_cistruct kvm_hva_range { 55262306a36Sopenharmony_ci unsigned long start; 55362306a36Sopenharmony_ci unsigned long end; 55462306a36Sopenharmony_ci union kvm_mmu_notifier_arg arg; 55562306a36Sopenharmony_ci hva_handler_t handler; 55662306a36Sopenharmony_ci on_lock_fn_t on_lock; 55762306a36Sopenharmony_ci on_unlock_fn_t on_unlock; 55862306a36Sopenharmony_ci bool flush_on_ret; 55962306a36Sopenharmony_ci bool may_block; 56062306a36Sopenharmony_ci}; 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci/* 56362306a36Sopenharmony_ci * Use a dedicated stub instead of NULL to indicate that there is no callback 56462306a36Sopenharmony_ci * function/handler. The compiler technically can't guarantee that a real 56562306a36Sopenharmony_ci * function will have a non-zero address, and so it will generate code to 56662306a36Sopenharmony_ci * check for !NULL, whereas comparing against a stub will be elided at compile 56762306a36Sopenharmony_ci * time (unless the compiler is getting long in the tooth, e.g. gcc 4.9). 56862306a36Sopenharmony_ci */ 56962306a36Sopenharmony_cistatic void kvm_null_fn(void) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci} 57362306a36Sopenharmony_ci#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn) 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_cistatic const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci/* Iterate over each memslot intersecting [start, last] (inclusive) range */ 57862306a36Sopenharmony_ci#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \ 57962306a36Sopenharmony_ci for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \ 58062306a36Sopenharmony_ci node; \ 58162306a36Sopenharmony_ci node = interval_tree_iter_next(node, start, last)) \ 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_cistatic __always_inline int __kvm_handle_hva_range(struct kvm *kvm, 58462306a36Sopenharmony_ci const struct kvm_hva_range *range) 58562306a36Sopenharmony_ci{ 58662306a36Sopenharmony_ci bool ret = false, locked = false; 58762306a36Sopenharmony_ci struct kvm_gfn_range gfn_range; 58862306a36Sopenharmony_ci struct kvm_memory_slot *slot; 58962306a36Sopenharmony_ci struct kvm_memslots *slots; 59062306a36Sopenharmony_ci int i, idx; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci if (WARN_ON_ONCE(range->end <= range->start)) 59362306a36Sopenharmony_ci return 0; 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci /* A null handler is allowed if and only if on_lock() is provided. */ 59662306a36Sopenharmony_ci if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && 59762306a36Sopenharmony_ci IS_KVM_NULL_FN(range->handler))) 59862306a36Sopenharmony_ci return 0; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 60362306a36Sopenharmony_ci struct interval_tree_node *node; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci slots = __kvm_memslots(kvm, i); 60662306a36Sopenharmony_ci kvm_for_each_memslot_in_hva_range(node, slots, 60762306a36Sopenharmony_ci range->start, range->end - 1) { 60862306a36Sopenharmony_ci unsigned long hva_start, hva_end; 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]); 61162306a36Sopenharmony_ci hva_start = max(range->start, slot->userspace_addr); 61262306a36Sopenharmony_ci hva_end = min(range->end, slot->userspace_addr + 61362306a36Sopenharmony_ci (slot->npages << PAGE_SHIFT)); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci /* 61662306a36Sopenharmony_ci * To optimize for the likely case where the address 61762306a36Sopenharmony_ci * range is covered by zero or one memslots, don't 61862306a36Sopenharmony_ci * bother making these conditional (to avoid writes on 61962306a36Sopenharmony_ci * the second or later invocation of the handler). 62062306a36Sopenharmony_ci */ 62162306a36Sopenharmony_ci gfn_range.arg = range->arg; 62262306a36Sopenharmony_ci gfn_range.may_block = range->may_block; 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci /* 62562306a36Sopenharmony_ci * {gfn(page) | page intersects with [hva_start, hva_end)} = 62662306a36Sopenharmony_ci * {gfn_start, gfn_start+1, ..., gfn_end-1}. 62762306a36Sopenharmony_ci */ 62862306a36Sopenharmony_ci gfn_range.start = hva_to_gfn_memslot(hva_start, slot); 62962306a36Sopenharmony_ci gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot); 63062306a36Sopenharmony_ci gfn_range.slot = slot; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci if (!locked) { 63362306a36Sopenharmony_ci locked = true; 63462306a36Sopenharmony_ci KVM_MMU_LOCK(kvm); 63562306a36Sopenharmony_ci if (!IS_KVM_NULL_FN(range->on_lock)) 63662306a36Sopenharmony_ci range->on_lock(kvm, range->start, range->end); 63762306a36Sopenharmony_ci if (IS_KVM_NULL_FN(range->handler)) 63862306a36Sopenharmony_ci break; 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci ret |= range->handler(kvm, &gfn_range); 64162306a36Sopenharmony_ci } 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci if (range->flush_on_ret && ret) 64562306a36Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci if (locked) { 64862306a36Sopenharmony_ci KVM_MMU_UNLOCK(kvm); 64962306a36Sopenharmony_ci if (!IS_KVM_NULL_FN(range->on_unlock)) 65062306a36Sopenharmony_ci range->on_unlock(kvm); 65162306a36Sopenharmony_ci } 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci /* The notifiers are averse to booleans. :-( */ 65662306a36Sopenharmony_ci return (int)ret; 65762306a36Sopenharmony_ci} 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_cistatic __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, 66062306a36Sopenharmony_ci unsigned long start, 66162306a36Sopenharmony_ci unsigned long end, 66262306a36Sopenharmony_ci union kvm_mmu_notifier_arg arg, 66362306a36Sopenharmony_ci hva_handler_t handler) 66462306a36Sopenharmony_ci{ 66562306a36Sopenharmony_ci struct kvm *kvm = mmu_notifier_to_kvm(mn); 66662306a36Sopenharmony_ci const struct kvm_hva_range range = { 66762306a36Sopenharmony_ci .start = start, 66862306a36Sopenharmony_ci .end = end, 66962306a36Sopenharmony_ci .arg = arg, 67062306a36Sopenharmony_ci .handler = handler, 67162306a36Sopenharmony_ci .on_lock = (void *)kvm_null_fn, 67262306a36Sopenharmony_ci .on_unlock = (void *)kvm_null_fn, 67362306a36Sopenharmony_ci .flush_on_ret = true, 67462306a36Sopenharmony_ci .may_block = false, 67562306a36Sopenharmony_ci }; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci return __kvm_handle_hva_range(kvm, &range); 67862306a36Sopenharmony_ci} 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_cistatic __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn, 68162306a36Sopenharmony_ci unsigned long start, 68262306a36Sopenharmony_ci unsigned long end, 68362306a36Sopenharmony_ci hva_handler_t handler) 68462306a36Sopenharmony_ci{ 68562306a36Sopenharmony_ci struct kvm *kvm = mmu_notifier_to_kvm(mn); 68662306a36Sopenharmony_ci const struct kvm_hva_range range = { 68762306a36Sopenharmony_ci .start = start, 68862306a36Sopenharmony_ci .end = end, 68962306a36Sopenharmony_ci .handler = handler, 69062306a36Sopenharmony_ci .on_lock = (void *)kvm_null_fn, 69162306a36Sopenharmony_ci .on_unlock = (void *)kvm_null_fn, 69262306a36Sopenharmony_ci .flush_on_ret = false, 69362306a36Sopenharmony_ci .may_block = false, 69462306a36Sopenharmony_ci }; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci return __kvm_handle_hva_range(kvm, &range); 69762306a36Sopenharmony_ci} 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_cistatic bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) 70062306a36Sopenharmony_ci{ 70162306a36Sopenharmony_ci /* 70262306a36Sopenharmony_ci * Skipping invalid memslots is correct if and only change_pte() is 70362306a36Sopenharmony_ci * surrounded by invalidate_range_{start,end}(), which is currently 70462306a36Sopenharmony_ci * guaranteed by the primary MMU. If that ever changes, KVM needs to 70562306a36Sopenharmony_ci * unmap the memslot instead of skipping the memslot to ensure that KVM 70662306a36Sopenharmony_ci * doesn't hold references to the old PFN. 70762306a36Sopenharmony_ci */ 70862306a36Sopenharmony_ci WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci if (range->slot->flags & KVM_MEMSLOT_INVALID) 71162306a36Sopenharmony_ci return false; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci return kvm_set_spte_gfn(kvm, range); 71462306a36Sopenharmony_ci} 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_cistatic void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 71762306a36Sopenharmony_ci struct mm_struct *mm, 71862306a36Sopenharmony_ci unsigned long address, 71962306a36Sopenharmony_ci pte_t pte) 72062306a36Sopenharmony_ci{ 72162306a36Sopenharmony_ci struct kvm *kvm = mmu_notifier_to_kvm(mn); 72262306a36Sopenharmony_ci const union kvm_mmu_notifier_arg arg = { .pte = pte }; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci trace_kvm_set_spte_hva(address); 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci /* 72762306a36Sopenharmony_ci * .change_pte() must be surrounded by .invalidate_range_{start,end}(). 72862306a36Sopenharmony_ci * If mmu_invalidate_in_progress is zero, then no in-progress 72962306a36Sopenharmony_ci * invalidations, including this one, found a relevant memslot at 73062306a36Sopenharmony_ci * start(); rechecking memslots here is unnecessary. Note, a false 73162306a36Sopenharmony_ci * positive (count elevated by a different invalidation) is sub-optimal 73262306a36Sopenharmony_ci * but functionally ok. 73362306a36Sopenharmony_ci */ 73462306a36Sopenharmony_ci WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); 73562306a36Sopenharmony_ci if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) 73662306a36Sopenharmony_ci return; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn); 73962306a36Sopenharmony_ci} 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_civoid kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, 74262306a36Sopenharmony_ci unsigned long end) 74362306a36Sopenharmony_ci{ 74462306a36Sopenharmony_ci /* 74562306a36Sopenharmony_ci * The count increase must become visible at unlock time as no 74662306a36Sopenharmony_ci * spte can be established without taking the mmu_lock and 74762306a36Sopenharmony_ci * count is also read inside the mmu_lock critical section. 74862306a36Sopenharmony_ci */ 74962306a36Sopenharmony_ci kvm->mmu_invalidate_in_progress++; 75062306a36Sopenharmony_ci if (likely(kvm->mmu_invalidate_in_progress == 1)) { 75162306a36Sopenharmony_ci kvm->mmu_invalidate_range_start = start; 75262306a36Sopenharmony_ci kvm->mmu_invalidate_range_end = end; 75362306a36Sopenharmony_ci } else { 75462306a36Sopenharmony_ci /* 75562306a36Sopenharmony_ci * Fully tracking multiple concurrent ranges has diminishing 75662306a36Sopenharmony_ci * returns. Keep things simple and just find the minimal range 75762306a36Sopenharmony_ci * which includes the current and new ranges. As there won't be 75862306a36Sopenharmony_ci * enough information to subtract a range after its invalidate 75962306a36Sopenharmony_ci * completes, any ranges invalidated concurrently will 76062306a36Sopenharmony_ci * accumulate and persist until all outstanding invalidates 76162306a36Sopenharmony_ci * complete. 76262306a36Sopenharmony_ci */ 76362306a36Sopenharmony_ci kvm->mmu_invalidate_range_start = 76462306a36Sopenharmony_ci min(kvm->mmu_invalidate_range_start, start); 76562306a36Sopenharmony_ci kvm->mmu_invalidate_range_end = 76662306a36Sopenharmony_ci max(kvm->mmu_invalidate_range_end, end); 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci} 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_cistatic int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 77162306a36Sopenharmony_ci const struct mmu_notifier_range *range) 77262306a36Sopenharmony_ci{ 77362306a36Sopenharmony_ci struct kvm *kvm = mmu_notifier_to_kvm(mn); 77462306a36Sopenharmony_ci const struct kvm_hva_range hva_range = { 77562306a36Sopenharmony_ci .start = range->start, 77662306a36Sopenharmony_ci .end = range->end, 77762306a36Sopenharmony_ci .handler = kvm_unmap_gfn_range, 77862306a36Sopenharmony_ci .on_lock = kvm_mmu_invalidate_begin, 77962306a36Sopenharmony_ci .on_unlock = kvm_arch_guest_memory_reclaimed, 78062306a36Sopenharmony_ci .flush_on_ret = true, 78162306a36Sopenharmony_ci .may_block = mmu_notifier_range_blockable(range), 78262306a36Sopenharmony_ci }; 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci trace_kvm_unmap_hva_range(range->start, range->end); 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci /* 78762306a36Sopenharmony_ci * Prevent memslot modification between range_start() and range_end() 78862306a36Sopenharmony_ci * so that conditionally locking provides the same result in both 78962306a36Sopenharmony_ci * functions. Without that guarantee, the mmu_invalidate_in_progress 79062306a36Sopenharmony_ci * adjustments will be imbalanced. 79162306a36Sopenharmony_ci * 79262306a36Sopenharmony_ci * Pairs with the decrement in range_end(). 79362306a36Sopenharmony_ci */ 79462306a36Sopenharmony_ci spin_lock(&kvm->mn_invalidate_lock); 79562306a36Sopenharmony_ci kvm->mn_active_invalidate_count++; 79662306a36Sopenharmony_ci spin_unlock(&kvm->mn_invalidate_lock); 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci /* 79962306a36Sopenharmony_ci * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e. 80062306a36Sopenharmony_ci * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring 80162306a36Sopenharmony_ci * each cache's lock. There are relatively few caches in existence at 80262306a36Sopenharmony_ci * any given time, and the caches themselves can check for hva overlap, 80362306a36Sopenharmony_ci * i.e. don't need to rely on memslot overlap checks for performance. 80462306a36Sopenharmony_ci * Because this runs without holding mmu_lock, the pfn caches must use 80562306a36Sopenharmony_ci * mn_active_invalidate_count (see above) instead of 80662306a36Sopenharmony_ci * mmu_invalidate_in_progress. 80762306a36Sopenharmony_ci */ 80862306a36Sopenharmony_ci gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, 80962306a36Sopenharmony_ci hva_range.may_block); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci __kvm_handle_hva_range(kvm, &hva_range); 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci return 0; 81462306a36Sopenharmony_ci} 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_civoid kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, 81762306a36Sopenharmony_ci unsigned long end) 81862306a36Sopenharmony_ci{ 81962306a36Sopenharmony_ci /* 82062306a36Sopenharmony_ci * This sequence increase will notify the kvm page fault that 82162306a36Sopenharmony_ci * the page that is going to be mapped in the spte could have 82262306a36Sopenharmony_ci * been freed. 82362306a36Sopenharmony_ci */ 82462306a36Sopenharmony_ci kvm->mmu_invalidate_seq++; 82562306a36Sopenharmony_ci smp_wmb(); 82662306a36Sopenharmony_ci /* 82762306a36Sopenharmony_ci * The above sequence increase must be visible before the 82862306a36Sopenharmony_ci * below count decrease, which is ensured by the smp_wmb above 82962306a36Sopenharmony_ci * in conjunction with the smp_rmb in mmu_invalidate_retry(). 83062306a36Sopenharmony_ci */ 83162306a36Sopenharmony_ci kvm->mmu_invalidate_in_progress--; 83262306a36Sopenharmony_ci} 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_cistatic void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 83562306a36Sopenharmony_ci const struct mmu_notifier_range *range) 83662306a36Sopenharmony_ci{ 83762306a36Sopenharmony_ci struct kvm *kvm = mmu_notifier_to_kvm(mn); 83862306a36Sopenharmony_ci const struct kvm_hva_range hva_range = { 83962306a36Sopenharmony_ci .start = range->start, 84062306a36Sopenharmony_ci .end = range->end, 84162306a36Sopenharmony_ci .handler = (void *)kvm_null_fn, 84262306a36Sopenharmony_ci .on_lock = kvm_mmu_invalidate_end, 84362306a36Sopenharmony_ci .on_unlock = (void *)kvm_null_fn, 84462306a36Sopenharmony_ci .flush_on_ret = false, 84562306a36Sopenharmony_ci .may_block = mmu_notifier_range_blockable(range), 84662306a36Sopenharmony_ci }; 84762306a36Sopenharmony_ci bool wake; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci __kvm_handle_hva_range(kvm, &hva_range); 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci /* Pairs with the increment in range_start(). */ 85262306a36Sopenharmony_ci spin_lock(&kvm->mn_invalidate_lock); 85362306a36Sopenharmony_ci wake = (--kvm->mn_active_invalidate_count == 0); 85462306a36Sopenharmony_ci spin_unlock(&kvm->mn_invalidate_lock); 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci /* 85762306a36Sopenharmony_ci * There can only be one waiter, since the wait happens under 85862306a36Sopenharmony_ci * slots_lock. 85962306a36Sopenharmony_ci */ 86062306a36Sopenharmony_ci if (wake) 86162306a36Sopenharmony_ci rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci BUG_ON(kvm->mmu_invalidate_in_progress < 0); 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_cistatic int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 86762306a36Sopenharmony_ci struct mm_struct *mm, 86862306a36Sopenharmony_ci unsigned long start, 86962306a36Sopenharmony_ci unsigned long end) 87062306a36Sopenharmony_ci{ 87162306a36Sopenharmony_ci trace_kvm_age_hva(start, end); 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG, 87462306a36Sopenharmony_ci kvm_age_gfn); 87562306a36Sopenharmony_ci} 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_cistatic int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, 87862306a36Sopenharmony_ci struct mm_struct *mm, 87962306a36Sopenharmony_ci unsigned long start, 88062306a36Sopenharmony_ci unsigned long end) 88162306a36Sopenharmony_ci{ 88262306a36Sopenharmony_ci trace_kvm_age_hva(start, end); 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci /* 88562306a36Sopenharmony_ci * Even though we do not flush TLB, this will still adversely 88662306a36Sopenharmony_ci * affect performance on pre-Haswell Intel EPT, where there is 88762306a36Sopenharmony_ci * no EPT Access Bit to clear so that we have to tear down EPT 88862306a36Sopenharmony_ci * tables instead. If we find this unacceptable, we can always 88962306a36Sopenharmony_ci * add a parameter to kvm_age_hva so that it effectively doesn't 89062306a36Sopenharmony_ci * do anything on clear_young. 89162306a36Sopenharmony_ci * 89262306a36Sopenharmony_ci * Also note that currently we never issue secondary TLB flushes 89362306a36Sopenharmony_ci * from clear_young, leaving this job up to the regular system 89462306a36Sopenharmony_ci * cadence. If we find this inaccurate, we might come up with a 89562306a36Sopenharmony_ci * more sophisticated heuristic later. 89662306a36Sopenharmony_ci */ 89762306a36Sopenharmony_ci return kvm_handle_hva_range_no_flush(mn, start, end, kvm_age_gfn); 89862306a36Sopenharmony_ci} 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_cistatic int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, 90162306a36Sopenharmony_ci struct mm_struct *mm, 90262306a36Sopenharmony_ci unsigned long address) 90362306a36Sopenharmony_ci{ 90462306a36Sopenharmony_ci trace_kvm_test_age_hva(address); 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci return kvm_handle_hva_range_no_flush(mn, address, address + 1, 90762306a36Sopenharmony_ci kvm_test_age_gfn); 90862306a36Sopenharmony_ci} 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_cistatic void kvm_mmu_notifier_release(struct mmu_notifier *mn, 91162306a36Sopenharmony_ci struct mm_struct *mm) 91262306a36Sopenharmony_ci{ 91362306a36Sopenharmony_ci struct kvm *kvm = mmu_notifier_to_kvm(mn); 91462306a36Sopenharmony_ci int idx; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci idx = srcu_read_lock(&kvm->srcu); 91762306a36Sopenharmony_ci kvm_flush_shadow_all(kvm); 91862306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, idx); 91962306a36Sopenharmony_ci} 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_cistatic const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 92262306a36Sopenharmony_ci .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 92362306a36Sopenharmony_ci .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 92462306a36Sopenharmony_ci .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 92562306a36Sopenharmony_ci .clear_young = kvm_mmu_notifier_clear_young, 92662306a36Sopenharmony_ci .test_young = kvm_mmu_notifier_test_young, 92762306a36Sopenharmony_ci .change_pte = kvm_mmu_notifier_change_pte, 92862306a36Sopenharmony_ci .release = kvm_mmu_notifier_release, 92962306a36Sopenharmony_ci}; 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_cistatic int kvm_init_mmu_notifier(struct kvm *kvm) 93262306a36Sopenharmony_ci{ 93362306a36Sopenharmony_ci kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 93462306a36Sopenharmony_ci return mmu_notifier_register(&kvm->mmu_notifier, current->mm); 93562306a36Sopenharmony_ci} 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_cistatic int kvm_init_mmu_notifier(struct kvm *kvm) 94062306a36Sopenharmony_ci{ 94162306a36Sopenharmony_ci return 0; 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER 94762306a36Sopenharmony_cistatic int kvm_pm_notifier_call(struct notifier_block *bl, 94862306a36Sopenharmony_ci unsigned long state, 94962306a36Sopenharmony_ci void *unused) 95062306a36Sopenharmony_ci{ 95162306a36Sopenharmony_ci struct kvm *kvm = container_of(bl, struct kvm, pm_notifier); 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci return kvm_arch_pm_notifier(kvm, state); 95462306a36Sopenharmony_ci} 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_cistatic void kvm_init_pm_notifier(struct kvm *kvm) 95762306a36Sopenharmony_ci{ 95862306a36Sopenharmony_ci kvm->pm_notifier.notifier_call = kvm_pm_notifier_call; 95962306a36Sopenharmony_ci /* Suspend KVM before we suspend ftrace, RCU, etc. */ 96062306a36Sopenharmony_ci kvm->pm_notifier.priority = INT_MAX; 96162306a36Sopenharmony_ci register_pm_notifier(&kvm->pm_notifier); 96262306a36Sopenharmony_ci} 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_cistatic void kvm_destroy_pm_notifier(struct kvm *kvm) 96562306a36Sopenharmony_ci{ 96662306a36Sopenharmony_ci unregister_pm_notifier(&kvm->pm_notifier); 96762306a36Sopenharmony_ci} 96862306a36Sopenharmony_ci#else /* !CONFIG_HAVE_KVM_PM_NOTIFIER */ 96962306a36Sopenharmony_cistatic void kvm_init_pm_notifier(struct kvm *kvm) 97062306a36Sopenharmony_ci{ 97162306a36Sopenharmony_ci} 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_cistatic void kvm_destroy_pm_notifier(struct kvm *kvm) 97462306a36Sopenharmony_ci{ 97562306a36Sopenharmony_ci} 97662306a36Sopenharmony_ci#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */ 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_cistatic void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) 97962306a36Sopenharmony_ci{ 98062306a36Sopenharmony_ci if (!memslot->dirty_bitmap) 98162306a36Sopenharmony_ci return; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci kvfree(memslot->dirty_bitmap); 98462306a36Sopenharmony_ci memslot->dirty_bitmap = NULL; 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci/* This does not remove the slot from struct kvm_memslots data structures */ 98862306a36Sopenharmony_cistatic void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) 98962306a36Sopenharmony_ci{ 99062306a36Sopenharmony_ci kvm_destroy_dirty_bitmap(slot); 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci kvm_arch_free_memslot(kvm, slot); 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci kfree(slot); 99562306a36Sopenharmony_ci} 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_cistatic void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) 99862306a36Sopenharmony_ci{ 99962306a36Sopenharmony_ci struct hlist_node *idnode; 100062306a36Sopenharmony_ci struct kvm_memory_slot *memslot; 100162306a36Sopenharmony_ci int bkt; 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci /* 100462306a36Sopenharmony_ci * The same memslot objects live in both active and inactive sets, 100562306a36Sopenharmony_ci * arbitrarily free using index '1' so the second invocation of this 100662306a36Sopenharmony_ci * function isn't operating over a structure with dangling pointers 100762306a36Sopenharmony_ci * (even though this function isn't actually touching them). 100862306a36Sopenharmony_ci */ 100962306a36Sopenharmony_ci if (!slots->node_idx) 101062306a36Sopenharmony_ci return; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1]) 101362306a36Sopenharmony_ci kvm_free_memslot(kvm, memslot); 101462306a36Sopenharmony_ci} 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_cistatic umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { 101962306a36Sopenharmony_ci case KVM_STATS_TYPE_INSTANT: 102062306a36Sopenharmony_ci return 0444; 102162306a36Sopenharmony_ci case KVM_STATS_TYPE_CUMULATIVE: 102262306a36Sopenharmony_ci case KVM_STATS_TYPE_PEAK: 102362306a36Sopenharmony_ci default: 102462306a36Sopenharmony_ci return 0644; 102562306a36Sopenharmony_ci } 102662306a36Sopenharmony_ci} 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_cistatic void kvm_destroy_vm_debugfs(struct kvm *kvm) 103062306a36Sopenharmony_ci{ 103162306a36Sopenharmony_ci int i; 103262306a36Sopenharmony_ci int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + 103362306a36Sopenharmony_ci kvm_vcpu_stats_header.num_desc; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci if (IS_ERR(kvm->debugfs_dentry)) 103662306a36Sopenharmony_ci return; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci debugfs_remove_recursive(kvm->debugfs_dentry); 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci if (kvm->debugfs_stat_data) { 104162306a36Sopenharmony_ci for (i = 0; i < kvm_debugfs_num_entries; i++) 104262306a36Sopenharmony_ci kfree(kvm->debugfs_stat_data[i]); 104362306a36Sopenharmony_ci kfree(kvm->debugfs_stat_data); 104462306a36Sopenharmony_ci } 104562306a36Sopenharmony_ci} 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_cistatic int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) 104862306a36Sopenharmony_ci{ 104962306a36Sopenharmony_ci static DEFINE_MUTEX(kvm_debugfs_lock); 105062306a36Sopenharmony_ci struct dentry *dent; 105162306a36Sopenharmony_ci char dir_name[ITOA_MAX_LEN * 2]; 105262306a36Sopenharmony_ci struct kvm_stat_data *stat_data; 105362306a36Sopenharmony_ci const struct _kvm_stats_desc *pdesc; 105462306a36Sopenharmony_ci int i, ret = -ENOMEM; 105562306a36Sopenharmony_ci int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + 105662306a36Sopenharmony_ci kvm_vcpu_stats_header.num_desc; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci if (!debugfs_initialized()) 105962306a36Sopenharmony_ci return 0; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); 106262306a36Sopenharmony_ci mutex_lock(&kvm_debugfs_lock); 106362306a36Sopenharmony_ci dent = debugfs_lookup(dir_name, kvm_debugfs_dir); 106462306a36Sopenharmony_ci if (dent) { 106562306a36Sopenharmony_ci pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); 106662306a36Sopenharmony_ci dput(dent); 106762306a36Sopenharmony_ci mutex_unlock(&kvm_debugfs_lock); 106862306a36Sopenharmony_ci return 0; 106962306a36Sopenharmony_ci } 107062306a36Sopenharmony_ci dent = debugfs_create_dir(dir_name, kvm_debugfs_dir); 107162306a36Sopenharmony_ci mutex_unlock(&kvm_debugfs_lock); 107262306a36Sopenharmony_ci if (IS_ERR(dent)) 107362306a36Sopenharmony_ci return 0; 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci kvm->debugfs_dentry = dent; 107662306a36Sopenharmony_ci kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, 107762306a36Sopenharmony_ci sizeof(*kvm->debugfs_stat_data), 107862306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 107962306a36Sopenharmony_ci if (!kvm->debugfs_stat_data) 108062306a36Sopenharmony_ci goto out_err; 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { 108362306a36Sopenharmony_ci pdesc = &kvm_vm_stats_desc[i]; 108462306a36Sopenharmony_ci stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); 108562306a36Sopenharmony_ci if (!stat_data) 108662306a36Sopenharmony_ci goto out_err; 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci stat_data->kvm = kvm; 108962306a36Sopenharmony_ci stat_data->desc = pdesc; 109062306a36Sopenharmony_ci stat_data->kind = KVM_STAT_VM; 109162306a36Sopenharmony_ci kvm->debugfs_stat_data[i] = stat_data; 109262306a36Sopenharmony_ci debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), 109362306a36Sopenharmony_ci kvm->debugfs_dentry, stat_data, 109462306a36Sopenharmony_ci &stat_fops_per_vm); 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { 109862306a36Sopenharmony_ci pdesc = &kvm_vcpu_stats_desc[i]; 109962306a36Sopenharmony_ci stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); 110062306a36Sopenharmony_ci if (!stat_data) 110162306a36Sopenharmony_ci goto out_err; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci stat_data->kvm = kvm; 110462306a36Sopenharmony_ci stat_data->desc = pdesc; 110562306a36Sopenharmony_ci stat_data->kind = KVM_STAT_VCPU; 110662306a36Sopenharmony_ci kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; 110762306a36Sopenharmony_ci debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), 110862306a36Sopenharmony_ci kvm->debugfs_dentry, stat_data, 110962306a36Sopenharmony_ci &stat_fops_per_vm); 111062306a36Sopenharmony_ci } 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci ret = kvm_arch_create_vm_debugfs(kvm); 111362306a36Sopenharmony_ci if (ret) 111462306a36Sopenharmony_ci goto out_err; 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci return 0; 111762306a36Sopenharmony_ciout_err: 111862306a36Sopenharmony_ci kvm_destroy_vm_debugfs(kvm); 111962306a36Sopenharmony_ci return ret; 112062306a36Sopenharmony_ci} 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ci/* 112362306a36Sopenharmony_ci * Called after the VM is otherwise initialized, but just before adding it to 112462306a36Sopenharmony_ci * the vm_list. 112562306a36Sopenharmony_ci */ 112662306a36Sopenharmony_ciint __weak kvm_arch_post_init_vm(struct kvm *kvm) 112762306a36Sopenharmony_ci{ 112862306a36Sopenharmony_ci return 0; 112962306a36Sopenharmony_ci} 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci/* 113262306a36Sopenharmony_ci * Called just after removing the VM from the vm_list, but before doing any 113362306a36Sopenharmony_ci * other destruction. 113462306a36Sopenharmony_ci */ 113562306a36Sopenharmony_civoid __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) 113662306a36Sopenharmony_ci{ 113762306a36Sopenharmony_ci} 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci/* 114062306a36Sopenharmony_ci * Called after per-vm debugfs created. When called kvm->debugfs_dentry should 114162306a36Sopenharmony_ci * be setup already, so we can create arch-specific debugfs entries under it. 114262306a36Sopenharmony_ci * Cleanup should be automatic done in kvm_destroy_vm_debugfs() recursively, so 114362306a36Sopenharmony_ci * a per-arch destroy interface is not needed. 114462306a36Sopenharmony_ci */ 114562306a36Sopenharmony_ciint __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) 114662306a36Sopenharmony_ci{ 114762306a36Sopenharmony_ci return 0; 114862306a36Sopenharmony_ci} 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_cistatic struct kvm *kvm_create_vm(unsigned long type, const char *fdname) 115162306a36Sopenharmony_ci{ 115262306a36Sopenharmony_ci struct kvm *kvm = kvm_arch_alloc_vm(); 115362306a36Sopenharmony_ci struct kvm_memslots *slots; 115462306a36Sopenharmony_ci int r = -ENOMEM; 115562306a36Sopenharmony_ci int i, j; 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci if (!kvm) 115862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ 116162306a36Sopenharmony_ci __module_get(kvm_chardev_ops.owner); 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci KVM_MMU_LOCK_INIT(kvm); 116462306a36Sopenharmony_ci mmgrab(current->mm); 116562306a36Sopenharmony_ci kvm->mm = current->mm; 116662306a36Sopenharmony_ci kvm_eventfd_init(kvm); 116762306a36Sopenharmony_ci mutex_init(&kvm->lock); 116862306a36Sopenharmony_ci mutex_init(&kvm->irq_lock); 116962306a36Sopenharmony_ci mutex_init(&kvm->slots_lock); 117062306a36Sopenharmony_ci mutex_init(&kvm->slots_arch_lock); 117162306a36Sopenharmony_ci spin_lock_init(&kvm->mn_invalidate_lock); 117262306a36Sopenharmony_ci rcuwait_init(&kvm->mn_memslots_update_rcuwait); 117362306a36Sopenharmony_ci xa_init(&kvm->vcpu_array); 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci INIT_LIST_HEAD(&kvm->gpc_list); 117662306a36Sopenharmony_ci spin_lock_init(&kvm->gpc_lock); 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci INIT_LIST_HEAD(&kvm->devices); 117962306a36Sopenharmony_ci kvm->max_vcpus = KVM_MAX_VCPUS; 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci /* 118462306a36Sopenharmony_ci * Force subsequent debugfs file creations to fail if the VM directory 118562306a36Sopenharmony_ci * is not created (by kvm_create_vm_debugfs()). 118662306a36Sopenharmony_ci */ 118762306a36Sopenharmony_ci kvm->debugfs_dentry = ERR_PTR(-ENOENT); 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", 119062306a36Sopenharmony_ci task_pid_nr(current)); 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci if (init_srcu_struct(&kvm->srcu)) 119362306a36Sopenharmony_ci goto out_err_no_srcu; 119462306a36Sopenharmony_ci if (init_srcu_struct(&kvm->irq_srcu)) 119562306a36Sopenharmony_ci goto out_err_no_irq_srcu; 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci refcount_set(&kvm->users_count, 1); 119862306a36Sopenharmony_ci for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 119962306a36Sopenharmony_ci for (j = 0; j < 2; j++) { 120062306a36Sopenharmony_ci slots = &kvm->__memslots[i][j]; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci atomic_long_set(&slots->last_used_slot, (unsigned long)NULL); 120362306a36Sopenharmony_ci slots->hva_tree = RB_ROOT_CACHED; 120462306a36Sopenharmony_ci slots->gfn_tree = RB_ROOT; 120562306a36Sopenharmony_ci hash_init(slots->id_hash); 120662306a36Sopenharmony_ci slots->node_idx = j; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci /* Generations must be different for each address space. */ 120962306a36Sopenharmony_ci slots->generation = i; 121062306a36Sopenharmony_ci } 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); 121362306a36Sopenharmony_ci } 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci for (i = 0; i < KVM_NR_BUSES; i++) { 121662306a36Sopenharmony_ci rcu_assign_pointer(kvm->buses[i], 121762306a36Sopenharmony_ci kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); 121862306a36Sopenharmony_ci if (!kvm->buses[i]) 121962306a36Sopenharmony_ci goto out_err_no_arch_destroy_vm; 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci r = kvm_arch_init_vm(kvm, type); 122362306a36Sopenharmony_ci if (r) 122462306a36Sopenharmony_ci goto out_err_no_arch_destroy_vm; 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci r = hardware_enable_all(); 122762306a36Sopenharmony_ci if (r) 122862306a36Sopenharmony_ci goto out_err_no_disable; 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_IRQFD 123162306a36Sopenharmony_ci INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 123262306a36Sopenharmony_ci#endif 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci r = kvm_init_mmu_notifier(kvm); 123562306a36Sopenharmony_ci if (r) 123662306a36Sopenharmony_ci goto out_err_no_mmu_notifier; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci r = kvm_coalesced_mmio_init(kvm); 123962306a36Sopenharmony_ci if (r < 0) 124062306a36Sopenharmony_ci goto out_no_coalesced_mmio; 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci r = kvm_create_vm_debugfs(kvm, fdname); 124362306a36Sopenharmony_ci if (r) 124462306a36Sopenharmony_ci goto out_err_no_debugfs; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci r = kvm_arch_post_init_vm(kvm); 124762306a36Sopenharmony_ci if (r) 124862306a36Sopenharmony_ci goto out_err; 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci mutex_lock(&kvm_lock); 125162306a36Sopenharmony_ci list_add(&kvm->vm_list, &vm_list); 125262306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci preempt_notifier_inc(); 125562306a36Sopenharmony_ci kvm_init_pm_notifier(kvm); 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci return kvm; 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ciout_err: 126062306a36Sopenharmony_ci kvm_destroy_vm_debugfs(kvm); 126162306a36Sopenharmony_ciout_err_no_debugfs: 126262306a36Sopenharmony_ci kvm_coalesced_mmio_free(kvm); 126362306a36Sopenharmony_ciout_no_coalesced_mmio: 126462306a36Sopenharmony_ci#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 126562306a36Sopenharmony_ci if (kvm->mmu_notifier.ops) 126662306a36Sopenharmony_ci mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); 126762306a36Sopenharmony_ci#endif 126862306a36Sopenharmony_ciout_err_no_mmu_notifier: 126962306a36Sopenharmony_ci hardware_disable_all(); 127062306a36Sopenharmony_ciout_err_no_disable: 127162306a36Sopenharmony_ci kvm_arch_destroy_vm(kvm); 127262306a36Sopenharmony_ciout_err_no_arch_destroy_vm: 127362306a36Sopenharmony_ci WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); 127462306a36Sopenharmony_ci for (i = 0; i < KVM_NR_BUSES; i++) 127562306a36Sopenharmony_ci kfree(kvm_get_bus(kvm, i)); 127662306a36Sopenharmony_ci cleanup_srcu_struct(&kvm->irq_srcu); 127762306a36Sopenharmony_ciout_err_no_irq_srcu: 127862306a36Sopenharmony_ci cleanup_srcu_struct(&kvm->srcu); 127962306a36Sopenharmony_ciout_err_no_srcu: 128062306a36Sopenharmony_ci kvm_arch_free_vm(kvm); 128162306a36Sopenharmony_ci mmdrop(current->mm); 128262306a36Sopenharmony_ci module_put(kvm_chardev_ops.owner); 128362306a36Sopenharmony_ci return ERR_PTR(r); 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_cistatic void kvm_destroy_devices(struct kvm *kvm) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci struct kvm_device *dev, *tmp; 128962306a36Sopenharmony_ci 129062306a36Sopenharmony_ci /* 129162306a36Sopenharmony_ci * We do not need to take the kvm->lock here, because nobody else 129262306a36Sopenharmony_ci * has a reference to the struct kvm at this point and therefore 129362306a36Sopenharmony_ci * cannot access the devices list anyhow. 129462306a36Sopenharmony_ci */ 129562306a36Sopenharmony_ci list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { 129662306a36Sopenharmony_ci list_del(&dev->vm_node); 129762306a36Sopenharmony_ci dev->ops->destroy(dev); 129862306a36Sopenharmony_ci } 129962306a36Sopenharmony_ci} 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_cistatic void kvm_destroy_vm(struct kvm *kvm) 130262306a36Sopenharmony_ci{ 130362306a36Sopenharmony_ci int i; 130462306a36Sopenharmony_ci struct mm_struct *mm = kvm->mm; 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci kvm_destroy_pm_notifier(kvm); 130762306a36Sopenharmony_ci kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); 130862306a36Sopenharmony_ci kvm_destroy_vm_debugfs(kvm); 130962306a36Sopenharmony_ci kvm_arch_sync_events(kvm); 131062306a36Sopenharmony_ci mutex_lock(&kvm_lock); 131162306a36Sopenharmony_ci list_del(&kvm->vm_list); 131262306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 131362306a36Sopenharmony_ci kvm_arch_pre_destroy_vm(kvm); 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci kvm_free_irq_routing(kvm); 131662306a36Sopenharmony_ci for (i = 0; i < KVM_NR_BUSES; i++) { 131762306a36Sopenharmony_ci struct kvm_io_bus *bus = kvm_get_bus(kvm, i); 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci if (bus) 132062306a36Sopenharmony_ci kvm_io_bus_destroy(bus); 132162306a36Sopenharmony_ci kvm->buses[i] = NULL; 132262306a36Sopenharmony_ci } 132362306a36Sopenharmony_ci kvm_coalesced_mmio_free(kvm); 132462306a36Sopenharmony_ci#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 132562306a36Sopenharmony_ci mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 132662306a36Sopenharmony_ci /* 132762306a36Sopenharmony_ci * At this point, pending calls to invalidate_range_start() 132862306a36Sopenharmony_ci * have completed but no more MMU notifiers will run, so 132962306a36Sopenharmony_ci * mn_active_invalidate_count may remain unbalanced. 133062306a36Sopenharmony_ci * No threads can be waiting in kvm_swap_active_memslots() as the 133162306a36Sopenharmony_ci * last reference on KVM has been dropped, but freeing 133262306a36Sopenharmony_ci * memslots would deadlock without this manual intervention. 133362306a36Sopenharmony_ci */ 133462306a36Sopenharmony_ci WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait)); 133562306a36Sopenharmony_ci kvm->mn_active_invalidate_count = 0; 133662306a36Sopenharmony_ci#else 133762306a36Sopenharmony_ci kvm_flush_shadow_all(kvm); 133862306a36Sopenharmony_ci#endif 133962306a36Sopenharmony_ci kvm_arch_destroy_vm(kvm); 134062306a36Sopenharmony_ci kvm_destroy_devices(kvm); 134162306a36Sopenharmony_ci for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 134262306a36Sopenharmony_ci kvm_free_memslots(kvm, &kvm->__memslots[i][0]); 134362306a36Sopenharmony_ci kvm_free_memslots(kvm, &kvm->__memslots[i][1]); 134462306a36Sopenharmony_ci } 134562306a36Sopenharmony_ci cleanup_srcu_struct(&kvm->irq_srcu); 134662306a36Sopenharmony_ci cleanup_srcu_struct(&kvm->srcu); 134762306a36Sopenharmony_ci kvm_arch_free_vm(kvm); 134862306a36Sopenharmony_ci preempt_notifier_dec(); 134962306a36Sopenharmony_ci hardware_disable_all(); 135062306a36Sopenharmony_ci mmdrop(mm); 135162306a36Sopenharmony_ci module_put(kvm_chardev_ops.owner); 135262306a36Sopenharmony_ci} 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_civoid kvm_get_kvm(struct kvm *kvm) 135562306a36Sopenharmony_ci{ 135662306a36Sopenharmony_ci refcount_inc(&kvm->users_count); 135762306a36Sopenharmony_ci} 135862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_get_kvm); 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci/* 136162306a36Sopenharmony_ci * Make sure the vm is not during destruction, which is a safe version of 136262306a36Sopenharmony_ci * kvm_get_kvm(). Return true if kvm referenced successfully, false otherwise. 136362306a36Sopenharmony_ci */ 136462306a36Sopenharmony_cibool kvm_get_kvm_safe(struct kvm *kvm) 136562306a36Sopenharmony_ci{ 136662306a36Sopenharmony_ci return refcount_inc_not_zero(&kvm->users_count); 136762306a36Sopenharmony_ci} 136862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_get_kvm_safe); 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_civoid kvm_put_kvm(struct kvm *kvm) 137162306a36Sopenharmony_ci{ 137262306a36Sopenharmony_ci if (refcount_dec_and_test(&kvm->users_count)) 137362306a36Sopenharmony_ci kvm_destroy_vm(kvm); 137462306a36Sopenharmony_ci} 137562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_put_kvm); 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ci/* 137862306a36Sopenharmony_ci * Used to put a reference that was taken on behalf of an object associated 137962306a36Sopenharmony_ci * with a user-visible file descriptor, e.g. a vcpu or device, if installation 138062306a36Sopenharmony_ci * of the new file descriptor fails and the reference cannot be transferred to 138162306a36Sopenharmony_ci * its final owner. In such cases, the caller is still actively using @kvm and 138262306a36Sopenharmony_ci * will fail miserably if the refcount unexpectedly hits zero. 138362306a36Sopenharmony_ci */ 138462306a36Sopenharmony_civoid kvm_put_kvm_no_destroy(struct kvm *kvm) 138562306a36Sopenharmony_ci{ 138662306a36Sopenharmony_ci WARN_ON(refcount_dec_and_test(&kvm->users_count)); 138762306a36Sopenharmony_ci} 138862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy); 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_cistatic int kvm_vm_release(struct inode *inode, struct file *filp) 139162306a36Sopenharmony_ci{ 139262306a36Sopenharmony_ci struct kvm *kvm = filp->private_data; 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci kvm_irqfd_release(kvm); 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ci kvm_put_kvm(kvm); 139762306a36Sopenharmony_ci return 0; 139862306a36Sopenharmony_ci} 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci/* 140162306a36Sopenharmony_ci * Allocation size is twice as large as the actual dirty bitmap size. 140262306a36Sopenharmony_ci * See kvm_vm_ioctl_get_dirty_log() why this is needed. 140362306a36Sopenharmony_ci */ 140462306a36Sopenharmony_cistatic int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) 140562306a36Sopenharmony_ci{ 140662306a36Sopenharmony_ci unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(memslot); 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); 140962306a36Sopenharmony_ci if (!memslot->dirty_bitmap) 141062306a36Sopenharmony_ci return -ENOMEM; 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci return 0; 141362306a36Sopenharmony_ci} 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_cistatic struct kvm_memslots *kvm_get_inactive_memslots(struct kvm *kvm, int as_id) 141662306a36Sopenharmony_ci{ 141762306a36Sopenharmony_ci struct kvm_memslots *active = __kvm_memslots(kvm, as_id); 141862306a36Sopenharmony_ci int node_idx_inactive = active->node_idx ^ 1; 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci return &kvm->__memslots[as_id][node_idx_inactive]; 142162306a36Sopenharmony_ci} 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci/* 142462306a36Sopenharmony_ci * Helper to get the address space ID when one of memslot pointers may be NULL. 142562306a36Sopenharmony_ci * This also serves as a sanity that at least one of the pointers is non-NULL, 142662306a36Sopenharmony_ci * and that their address space IDs don't diverge. 142762306a36Sopenharmony_ci */ 142862306a36Sopenharmony_cistatic int kvm_memslots_get_as_id(struct kvm_memory_slot *a, 142962306a36Sopenharmony_ci struct kvm_memory_slot *b) 143062306a36Sopenharmony_ci{ 143162306a36Sopenharmony_ci if (WARN_ON_ONCE(!a && !b)) 143262306a36Sopenharmony_ci return 0; 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci if (!a) 143562306a36Sopenharmony_ci return b->as_id; 143662306a36Sopenharmony_ci if (!b) 143762306a36Sopenharmony_ci return a->as_id; 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci WARN_ON_ONCE(a->as_id != b->as_id); 144062306a36Sopenharmony_ci return a->as_id; 144162306a36Sopenharmony_ci} 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_cistatic void kvm_insert_gfn_node(struct kvm_memslots *slots, 144462306a36Sopenharmony_ci struct kvm_memory_slot *slot) 144562306a36Sopenharmony_ci{ 144662306a36Sopenharmony_ci struct rb_root *gfn_tree = &slots->gfn_tree; 144762306a36Sopenharmony_ci struct rb_node **node, *parent; 144862306a36Sopenharmony_ci int idx = slots->node_idx; 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci parent = NULL; 145162306a36Sopenharmony_ci for (node = &gfn_tree->rb_node; *node; ) { 145262306a36Sopenharmony_ci struct kvm_memory_slot *tmp; 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci tmp = container_of(*node, struct kvm_memory_slot, gfn_node[idx]); 145562306a36Sopenharmony_ci parent = *node; 145662306a36Sopenharmony_ci if (slot->base_gfn < tmp->base_gfn) 145762306a36Sopenharmony_ci node = &(*node)->rb_left; 145862306a36Sopenharmony_ci else if (slot->base_gfn > tmp->base_gfn) 145962306a36Sopenharmony_ci node = &(*node)->rb_right; 146062306a36Sopenharmony_ci else 146162306a36Sopenharmony_ci BUG(); 146262306a36Sopenharmony_ci } 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci rb_link_node(&slot->gfn_node[idx], parent, node); 146562306a36Sopenharmony_ci rb_insert_color(&slot->gfn_node[idx], gfn_tree); 146662306a36Sopenharmony_ci} 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_cistatic void kvm_erase_gfn_node(struct kvm_memslots *slots, 146962306a36Sopenharmony_ci struct kvm_memory_slot *slot) 147062306a36Sopenharmony_ci{ 147162306a36Sopenharmony_ci rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree); 147262306a36Sopenharmony_ci} 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_cistatic void kvm_replace_gfn_node(struct kvm_memslots *slots, 147562306a36Sopenharmony_ci struct kvm_memory_slot *old, 147662306a36Sopenharmony_ci struct kvm_memory_slot *new) 147762306a36Sopenharmony_ci{ 147862306a36Sopenharmony_ci int idx = slots->node_idx; 147962306a36Sopenharmony_ci 148062306a36Sopenharmony_ci WARN_ON_ONCE(old->base_gfn != new->base_gfn); 148162306a36Sopenharmony_ci 148262306a36Sopenharmony_ci rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx], 148362306a36Sopenharmony_ci &slots->gfn_tree); 148462306a36Sopenharmony_ci} 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci/* 148762306a36Sopenharmony_ci * Replace @old with @new in the inactive memslots. 148862306a36Sopenharmony_ci * 148962306a36Sopenharmony_ci * With NULL @old this simply adds @new. 149062306a36Sopenharmony_ci * With NULL @new this simply removes @old. 149162306a36Sopenharmony_ci * 149262306a36Sopenharmony_ci * If @new is non-NULL its hva_node[slots_idx] range has to be set 149362306a36Sopenharmony_ci * appropriately. 149462306a36Sopenharmony_ci */ 149562306a36Sopenharmony_cistatic void kvm_replace_memslot(struct kvm *kvm, 149662306a36Sopenharmony_ci struct kvm_memory_slot *old, 149762306a36Sopenharmony_ci struct kvm_memory_slot *new) 149862306a36Sopenharmony_ci{ 149962306a36Sopenharmony_ci int as_id = kvm_memslots_get_as_id(old, new); 150062306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id); 150162306a36Sopenharmony_ci int idx = slots->node_idx; 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_ci if (old) { 150462306a36Sopenharmony_ci hash_del(&old->id_node[idx]); 150562306a36Sopenharmony_ci interval_tree_remove(&old->hva_node[idx], &slots->hva_tree); 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci if ((long)old == atomic_long_read(&slots->last_used_slot)) 150862306a36Sopenharmony_ci atomic_long_set(&slots->last_used_slot, (long)new); 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci if (!new) { 151162306a36Sopenharmony_ci kvm_erase_gfn_node(slots, old); 151262306a36Sopenharmony_ci return; 151362306a36Sopenharmony_ci } 151462306a36Sopenharmony_ci } 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci /* 151762306a36Sopenharmony_ci * Initialize @new's hva range. Do this even when replacing an @old 151862306a36Sopenharmony_ci * slot, kvm_copy_memslot() deliberately does not touch node data. 151962306a36Sopenharmony_ci */ 152062306a36Sopenharmony_ci new->hva_node[idx].start = new->userspace_addr; 152162306a36Sopenharmony_ci new->hva_node[idx].last = new->userspace_addr + 152262306a36Sopenharmony_ci (new->npages << PAGE_SHIFT) - 1; 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci /* 152562306a36Sopenharmony_ci * (Re)Add the new memslot. There is no O(1) interval_tree_replace(), 152662306a36Sopenharmony_ci * hva_node needs to be swapped with remove+insert even though hva can't 152762306a36Sopenharmony_ci * change when replacing an existing slot. 152862306a36Sopenharmony_ci */ 152962306a36Sopenharmony_ci hash_add(slots->id_hash, &new->id_node[idx], new->id); 153062306a36Sopenharmony_ci interval_tree_insert(&new->hva_node[idx], &slots->hva_tree); 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci /* 153362306a36Sopenharmony_ci * If the memslot gfn is unchanged, rb_replace_node() can be used to 153462306a36Sopenharmony_ci * switch the node in the gfn tree instead of removing the old and 153562306a36Sopenharmony_ci * inserting the new as two separate operations. Replacement is a 153662306a36Sopenharmony_ci * single O(1) operation versus two O(log(n)) operations for 153762306a36Sopenharmony_ci * remove+insert. 153862306a36Sopenharmony_ci */ 153962306a36Sopenharmony_ci if (old && old->base_gfn == new->base_gfn) { 154062306a36Sopenharmony_ci kvm_replace_gfn_node(slots, old, new); 154162306a36Sopenharmony_ci } else { 154262306a36Sopenharmony_ci if (old) 154362306a36Sopenharmony_ci kvm_erase_gfn_node(slots, old); 154462306a36Sopenharmony_ci kvm_insert_gfn_node(slots, new); 154562306a36Sopenharmony_ci } 154662306a36Sopenharmony_ci} 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_cistatic int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) 154962306a36Sopenharmony_ci{ 155062306a36Sopenharmony_ci u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci#ifdef __KVM_HAVE_READONLY_MEM 155362306a36Sopenharmony_ci valid_flags |= KVM_MEM_READONLY; 155462306a36Sopenharmony_ci#endif 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci if (mem->flags & ~valid_flags) 155762306a36Sopenharmony_ci return -EINVAL; 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci return 0; 156062306a36Sopenharmony_ci} 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_cistatic void kvm_swap_active_memslots(struct kvm *kvm, int as_id) 156362306a36Sopenharmony_ci{ 156462306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id); 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci /* Grab the generation from the activate memslots. */ 156762306a36Sopenharmony_ci u64 gen = __kvm_memslots(kvm, as_id)->generation; 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); 157062306a36Sopenharmony_ci slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci /* 157362306a36Sopenharmony_ci * Do not store the new memslots while there are invalidations in 157462306a36Sopenharmony_ci * progress, otherwise the locking in invalidate_range_start and 157562306a36Sopenharmony_ci * invalidate_range_end will be unbalanced. 157662306a36Sopenharmony_ci */ 157762306a36Sopenharmony_ci spin_lock(&kvm->mn_invalidate_lock); 157862306a36Sopenharmony_ci prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait); 157962306a36Sopenharmony_ci while (kvm->mn_active_invalidate_count) { 158062306a36Sopenharmony_ci set_current_state(TASK_UNINTERRUPTIBLE); 158162306a36Sopenharmony_ci spin_unlock(&kvm->mn_invalidate_lock); 158262306a36Sopenharmony_ci schedule(); 158362306a36Sopenharmony_ci spin_lock(&kvm->mn_invalidate_lock); 158462306a36Sopenharmony_ci } 158562306a36Sopenharmony_ci finish_rcuwait(&kvm->mn_memslots_update_rcuwait); 158662306a36Sopenharmony_ci rcu_assign_pointer(kvm->memslots[as_id], slots); 158762306a36Sopenharmony_ci spin_unlock(&kvm->mn_invalidate_lock); 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci /* 159062306a36Sopenharmony_ci * Acquired in kvm_set_memslot. Must be released before synchronize 159162306a36Sopenharmony_ci * SRCU below in order to avoid deadlock with another thread 159262306a36Sopenharmony_ci * acquiring the slots_arch_lock in an srcu critical section. 159362306a36Sopenharmony_ci */ 159462306a36Sopenharmony_ci mutex_unlock(&kvm->slots_arch_lock); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci synchronize_srcu_expedited(&kvm->srcu); 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci /* 159962306a36Sopenharmony_ci * Increment the new memslot generation a second time, dropping the 160062306a36Sopenharmony_ci * update in-progress flag and incrementing the generation based on 160162306a36Sopenharmony_ci * the number of address spaces. This provides a unique and easily 160262306a36Sopenharmony_ci * identifiable generation number while the memslots are in flux. 160362306a36Sopenharmony_ci */ 160462306a36Sopenharmony_ci gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci /* 160762306a36Sopenharmony_ci * Generations must be unique even across address spaces. We do not need 160862306a36Sopenharmony_ci * a global counter for that, instead the generation space is evenly split 160962306a36Sopenharmony_ci * across address spaces. For example, with two address spaces, address 161062306a36Sopenharmony_ci * space 0 will use generations 0, 2, 4, ... while address space 1 will 161162306a36Sopenharmony_ci * use generations 1, 3, 5, ... 161262306a36Sopenharmony_ci */ 161362306a36Sopenharmony_ci gen += KVM_ADDRESS_SPACE_NUM; 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci kvm_arch_memslots_updated(kvm, gen); 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci slots->generation = gen; 161862306a36Sopenharmony_ci} 161962306a36Sopenharmony_ci 162062306a36Sopenharmony_cistatic int kvm_prepare_memory_region(struct kvm *kvm, 162162306a36Sopenharmony_ci const struct kvm_memory_slot *old, 162262306a36Sopenharmony_ci struct kvm_memory_slot *new, 162362306a36Sopenharmony_ci enum kvm_mr_change change) 162462306a36Sopenharmony_ci{ 162562306a36Sopenharmony_ci int r; 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci /* 162862306a36Sopenharmony_ci * If dirty logging is disabled, nullify the bitmap; the old bitmap 162962306a36Sopenharmony_ci * will be freed on "commit". If logging is enabled in both old and 163062306a36Sopenharmony_ci * new, reuse the existing bitmap. If logging is enabled only in the 163162306a36Sopenharmony_ci * new and KVM isn't using a ring buffer, allocate and initialize a 163262306a36Sopenharmony_ci * new bitmap. 163362306a36Sopenharmony_ci */ 163462306a36Sopenharmony_ci if (change != KVM_MR_DELETE) { 163562306a36Sopenharmony_ci if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) 163662306a36Sopenharmony_ci new->dirty_bitmap = NULL; 163762306a36Sopenharmony_ci else if (old && old->dirty_bitmap) 163862306a36Sopenharmony_ci new->dirty_bitmap = old->dirty_bitmap; 163962306a36Sopenharmony_ci else if (kvm_use_dirty_bitmap(kvm)) { 164062306a36Sopenharmony_ci r = kvm_alloc_dirty_bitmap(new); 164162306a36Sopenharmony_ci if (r) 164262306a36Sopenharmony_ci return r; 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci if (kvm_dirty_log_manual_protect_and_init_set(kvm)) 164562306a36Sopenharmony_ci bitmap_set(new->dirty_bitmap, 0, new->npages); 164662306a36Sopenharmony_ci } 164762306a36Sopenharmony_ci } 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci r = kvm_arch_prepare_memory_region(kvm, old, new, change); 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci /* Free the bitmap on failure if it was allocated above. */ 165262306a36Sopenharmony_ci if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) 165362306a36Sopenharmony_ci kvm_destroy_dirty_bitmap(new); 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci return r; 165662306a36Sopenharmony_ci} 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_cistatic void kvm_commit_memory_region(struct kvm *kvm, 165962306a36Sopenharmony_ci struct kvm_memory_slot *old, 166062306a36Sopenharmony_ci const struct kvm_memory_slot *new, 166162306a36Sopenharmony_ci enum kvm_mr_change change) 166262306a36Sopenharmony_ci{ 166362306a36Sopenharmony_ci int old_flags = old ? old->flags : 0; 166462306a36Sopenharmony_ci int new_flags = new ? new->flags : 0; 166562306a36Sopenharmony_ci /* 166662306a36Sopenharmony_ci * Update the total number of memslot pages before calling the arch 166762306a36Sopenharmony_ci * hook so that architectures can consume the result directly. 166862306a36Sopenharmony_ci */ 166962306a36Sopenharmony_ci if (change == KVM_MR_DELETE) 167062306a36Sopenharmony_ci kvm->nr_memslot_pages -= old->npages; 167162306a36Sopenharmony_ci else if (change == KVM_MR_CREATE) 167262306a36Sopenharmony_ci kvm->nr_memslot_pages += new->npages; 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES) { 167562306a36Sopenharmony_ci int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1; 167662306a36Sopenharmony_ci atomic_set(&kvm->nr_memslots_dirty_logging, 167762306a36Sopenharmony_ci atomic_read(&kvm->nr_memslots_dirty_logging) + change); 167862306a36Sopenharmony_ci } 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci kvm_arch_commit_memory_region(kvm, old, new, change); 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ci switch (change) { 168362306a36Sopenharmony_ci case KVM_MR_CREATE: 168462306a36Sopenharmony_ci /* Nothing more to do. */ 168562306a36Sopenharmony_ci break; 168662306a36Sopenharmony_ci case KVM_MR_DELETE: 168762306a36Sopenharmony_ci /* Free the old memslot and all its metadata. */ 168862306a36Sopenharmony_ci kvm_free_memslot(kvm, old); 168962306a36Sopenharmony_ci break; 169062306a36Sopenharmony_ci case KVM_MR_MOVE: 169162306a36Sopenharmony_ci case KVM_MR_FLAGS_ONLY: 169262306a36Sopenharmony_ci /* 169362306a36Sopenharmony_ci * Free the dirty bitmap as needed; the below check encompasses 169462306a36Sopenharmony_ci * both the flags and whether a ring buffer is being used) 169562306a36Sopenharmony_ci */ 169662306a36Sopenharmony_ci if (old->dirty_bitmap && !new->dirty_bitmap) 169762306a36Sopenharmony_ci kvm_destroy_dirty_bitmap(old); 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci /* 170062306a36Sopenharmony_ci * The final quirk. Free the detached, old slot, but only its 170162306a36Sopenharmony_ci * memory, not any metadata. Metadata, including arch specific 170262306a36Sopenharmony_ci * data, may be reused by @new. 170362306a36Sopenharmony_ci */ 170462306a36Sopenharmony_ci kfree(old); 170562306a36Sopenharmony_ci break; 170662306a36Sopenharmony_ci default: 170762306a36Sopenharmony_ci BUG(); 170862306a36Sopenharmony_ci } 170962306a36Sopenharmony_ci} 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci/* 171262306a36Sopenharmony_ci * Activate @new, which must be installed in the inactive slots by the caller, 171362306a36Sopenharmony_ci * by swapping the active slots and then propagating @new to @old once @old is 171462306a36Sopenharmony_ci * unreachable and can be safely modified. 171562306a36Sopenharmony_ci * 171662306a36Sopenharmony_ci * With NULL @old this simply adds @new to @active (while swapping the sets). 171762306a36Sopenharmony_ci * With NULL @new this simply removes @old from @active and frees it 171862306a36Sopenharmony_ci * (while also swapping the sets). 171962306a36Sopenharmony_ci */ 172062306a36Sopenharmony_cistatic void kvm_activate_memslot(struct kvm *kvm, 172162306a36Sopenharmony_ci struct kvm_memory_slot *old, 172262306a36Sopenharmony_ci struct kvm_memory_slot *new) 172362306a36Sopenharmony_ci{ 172462306a36Sopenharmony_ci int as_id = kvm_memslots_get_as_id(old, new); 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci kvm_swap_active_memslots(kvm, as_id); 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci /* Propagate the new memslot to the now inactive memslots. */ 172962306a36Sopenharmony_ci kvm_replace_memslot(kvm, old, new); 173062306a36Sopenharmony_ci} 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_cistatic void kvm_copy_memslot(struct kvm_memory_slot *dest, 173362306a36Sopenharmony_ci const struct kvm_memory_slot *src) 173462306a36Sopenharmony_ci{ 173562306a36Sopenharmony_ci dest->base_gfn = src->base_gfn; 173662306a36Sopenharmony_ci dest->npages = src->npages; 173762306a36Sopenharmony_ci dest->dirty_bitmap = src->dirty_bitmap; 173862306a36Sopenharmony_ci dest->arch = src->arch; 173962306a36Sopenharmony_ci dest->userspace_addr = src->userspace_addr; 174062306a36Sopenharmony_ci dest->flags = src->flags; 174162306a36Sopenharmony_ci dest->id = src->id; 174262306a36Sopenharmony_ci dest->as_id = src->as_id; 174362306a36Sopenharmony_ci} 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_cistatic void kvm_invalidate_memslot(struct kvm *kvm, 174662306a36Sopenharmony_ci struct kvm_memory_slot *old, 174762306a36Sopenharmony_ci struct kvm_memory_slot *invalid_slot) 174862306a36Sopenharmony_ci{ 174962306a36Sopenharmony_ci /* 175062306a36Sopenharmony_ci * Mark the current slot INVALID. As with all memslot modifications, 175162306a36Sopenharmony_ci * this must be done on an unreachable slot to avoid modifying the 175262306a36Sopenharmony_ci * current slot in the active tree. 175362306a36Sopenharmony_ci */ 175462306a36Sopenharmony_ci kvm_copy_memslot(invalid_slot, old); 175562306a36Sopenharmony_ci invalid_slot->flags |= KVM_MEMSLOT_INVALID; 175662306a36Sopenharmony_ci kvm_replace_memslot(kvm, old, invalid_slot); 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci /* 175962306a36Sopenharmony_ci * Activate the slot that is now marked INVALID, but don't propagate 176062306a36Sopenharmony_ci * the slot to the now inactive slots. The slot is either going to be 176162306a36Sopenharmony_ci * deleted or recreated as a new slot. 176262306a36Sopenharmony_ci */ 176362306a36Sopenharmony_ci kvm_swap_active_memslots(kvm, old->as_id); 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci /* 176662306a36Sopenharmony_ci * From this point no new shadow pages pointing to a deleted, or moved, 176762306a36Sopenharmony_ci * memslot will be created. Validation of sp->gfn happens in: 176862306a36Sopenharmony_ci * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) 176962306a36Sopenharmony_ci * - kvm_is_visible_gfn (mmu_check_root) 177062306a36Sopenharmony_ci */ 177162306a36Sopenharmony_ci kvm_arch_flush_shadow_memslot(kvm, old); 177262306a36Sopenharmony_ci kvm_arch_guest_memory_reclaimed(kvm); 177362306a36Sopenharmony_ci 177462306a36Sopenharmony_ci /* Was released by kvm_swap_active_memslots(), reacquire. */ 177562306a36Sopenharmony_ci mutex_lock(&kvm->slots_arch_lock); 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci /* 177862306a36Sopenharmony_ci * Copy the arch-specific field of the newly-installed slot back to the 177962306a36Sopenharmony_ci * old slot as the arch data could have changed between releasing 178062306a36Sopenharmony_ci * slots_arch_lock in kvm_swap_active_memslots() and re-acquiring the lock 178162306a36Sopenharmony_ci * above. Writers are required to retrieve memslots *after* acquiring 178262306a36Sopenharmony_ci * slots_arch_lock, thus the active slot's data is guaranteed to be fresh. 178362306a36Sopenharmony_ci */ 178462306a36Sopenharmony_ci old->arch = invalid_slot->arch; 178562306a36Sopenharmony_ci} 178662306a36Sopenharmony_ci 178762306a36Sopenharmony_cistatic void kvm_create_memslot(struct kvm *kvm, 178862306a36Sopenharmony_ci struct kvm_memory_slot *new) 178962306a36Sopenharmony_ci{ 179062306a36Sopenharmony_ci /* Add the new memslot to the inactive set and activate. */ 179162306a36Sopenharmony_ci kvm_replace_memslot(kvm, NULL, new); 179262306a36Sopenharmony_ci kvm_activate_memslot(kvm, NULL, new); 179362306a36Sopenharmony_ci} 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_cistatic void kvm_delete_memslot(struct kvm *kvm, 179662306a36Sopenharmony_ci struct kvm_memory_slot *old, 179762306a36Sopenharmony_ci struct kvm_memory_slot *invalid_slot) 179862306a36Sopenharmony_ci{ 179962306a36Sopenharmony_ci /* 180062306a36Sopenharmony_ci * Remove the old memslot (in the inactive memslots) by passing NULL as 180162306a36Sopenharmony_ci * the "new" slot, and for the invalid version in the active slots. 180262306a36Sopenharmony_ci */ 180362306a36Sopenharmony_ci kvm_replace_memslot(kvm, old, NULL); 180462306a36Sopenharmony_ci kvm_activate_memslot(kvm, invalid_slot, NULL); 180562306a36Sopenharmony_ci} 180662306a36Sopenharmony_ci 180762306a36Sopenharmony_cistatic void kvm_move_memslot(struct kvm *kvm, 180862306a36Sopenharmony_ci struct kvm_memory_slot *old, 180962306a36Sopenharmony_ci struct kvm_memory_slot *new, 181062306a36Sopenharmony_ci struct kvm_memory_slot *invalid_slot) 181162306a36Sopenharmony_ci{ 181262306a36Sopenharmony_ci /* 181362306a36Sopenharmony_ci * Replace the old memslot in the inactive slots, and then swap slots 181462306a36Sopenharmony_ci * and replace the current INVALID with the new as well. 181562306a36Sopenharmony_ci */ 181662306a36Sopenharmony_ci kvm_replace_memslot(kvm, old, new); 181762306a36Sopenharmony_ci kvm_activate_memslot(kvm, invalid_slot, new); 181862306a36Sopenharmony_ci} 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_cistatic void kvm_update_flags_memslot(struct kvm *kvm, 182162306a36Sopenharmony_ci struct kvm_memory_slot *old, 182262306a36Sopenharmony_ci struct kvm_memory_slot *new) 182362306a36Sopenharmony_ci{ 182462306a36Sopenharmony_ci /* 182562306a36Sopenharmony_ci * Similar to the MOVE case, but the slot doesn't need to be zapped as 182662306a36Sopenharmony_ci * an intermediate step. Instead, the old memslot is simply replaced 182762306a36Sopenharmony_ci * with a new, updated copy in both memslot sets. 182862306a36Sopenharmony_ci */ 182962306a36Sopenharmony_ci kvm_replace_memslot(kvm, old, new); 183062306a36Sopenharmony_ci kvm_activate_memslot(kvm, old, new); 183162306a36Sopenharmony_ci} 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_cistatic int kvm_set_memslot(struct kvm *kvm, 183462306a36Sopenharmony_ci struct kvm_memory_slot *old, 183562306a36Sopenharmony_ci struct kvm_memory_slot *new, 183662306a36Sopenharmony_ci enum kvm_mr_change change) 183762306a36Sopenharmony_ci{ 183862306a36Sopenharmony_ci struct kvm_memory_slot *invalid_slot; 183962306a36Sopenharmony_ci int r; 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci /* 184262306a36Sopenharmony_ci * Released in kvm_swap_active_memslots(). 184362306a36Sopenharmony_ci * 184462306a36Sopenharmony_ci * Must be held from before the current memslots are copied until after 184562306a36Sopenharmony_ci * the new memslots are installed with rcu_assign_pointer, then 184662306a36Sopenharmony_ci * released before the synchronize srcu in kvm_swap_active_memslots(). 184762306a36Sopenharmony_ci * 184862306a36Sopenharmony_ci * When modifying memslots outside of the slots_lock, must be held 184962306a36Sopenharmony_ci * before reading the pointer to the current memslots until after all 185062306a36Sopenharmony_ci * changes to those memslots are complete. 185162306a36Sopenharmony_ci * 185262306a36Sopenharmony_ci * These rules ensure that installing new memslots does not lose 185362306a36Sopenharmony_ci * changes made to the previous memslots. 185462306a36Sopenharmony_ci */ 185562306a36Sopenharmony_ci mutex_lock(&kvm->slots_arch_lock); 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci /* 185862306a36Sopenharmony_ci * Invalidate the old slot if it's being deleted or moved. This is 185962306a36Sopenharmony_ci * done prior to actually deleting/moving the memslot to allow vCPUs to 186062306a36Sopenharmony_ci * continue running by ensuring there are no mappings or shadow pages 186162306a36Sopenharmony_ci * for the memslot when it is deleted/moved. Without pre-invalidation 186262306a36Sopenharmony_ci * (and without a lock), a window would exist between effecting the 186362306a36Sopenharmony_ci * delete/move and committing the changes in arch code where KVM or a 186462306a36Sopenharmony_ci * guest could access a non-existent memslot. 186562306a36Sopenharmony_ci * 186662306a36Sopenharmony_ci * Modifications are done on a temporary, unreachable slot. The old 186762306a36Sopenharmony_ci * slot needs to be preserved in case a later step fails and the 186862306a36Sopenharmony_ci * invalidation needs to be reverted. 186962306a36Sopenharmony_ci */ 187062306a36Sopenharmony_ci if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { 187162306a36Sopenharmony_ci invalid_slot = kzalloc(sizeof(*invalid_slot), GFP_KERNEL_ACCOUNT); 187262306a36Sopenharmony_ci if (!invalid_slot) { 187362306a36Sopenharmony_ci mutex_unlock(&kvm->slots_arch_lock); 187462306a36Sopenharmony_ci return -ENOMEM; 187562306a36Sopenharmony_ci } 187662306a36Sopenharmony_ci kvm_invalidate_memslot(kvm, old, invalid_slot); 187762306a36Sopenharmony_ci } 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci r = kvm_prepare_memory_region(kvm, old, new, change); 188062306a36Sopenharmony_ci if (r) { 188162306a36Sopenharmony_ci /* 188262306a36Sopenharmony_ci * For DELETE/MOVE, revert the above INVALID change. No 188362306a36Sopenharmony_ci * modifications required since the original slot was preserved 188462306a36Sopenharmony_ci * in the inactive slots. Changing the active memslots also 188562306a36Sopenharmony_ci * release slots_arch_lock. 188662306a36Sopenharmony_ci */ 188762306a36Sopenharmony_ci if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { 188862306a36Sopenharmony_ci kvm_activate_memslot(kvm, invalid_slot, old); 188962306a36Sopenharmony_ci kfree(invalid_slot); 189062306a36Sopenharmony_ci } else { 189162306a36Sopenharmony_ci mutex_unlock(&kvm->slots_arch_lock); 189262306a36Sopenharmony_ci } 189362306a36Sopenharmony_ci return r; 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci /* 189762306a36Sopenharmony_ci * For DELETE and MOVE, the working slot is now active as the INVALID 189862306a36Sopenharmony_ci * version of the old slot. MOVE is particularly special as it reuses 189962306a36Sopenharmony_ci * the old slot and returns a copy of the old slot (in working_slot). 190062306a36Sopenharmony_ci * For CREATE, there is no old slot. For DELETE and FLAGS_ONLY, the 190162306a36Sopenharmony_ci * old slot is detached but otherwise preserved. 190262306a36Sopenharmony_ci */ 190362306a36Sopenharmony_ci if (change == KVM_MR_CREATE) 190462306a36Sopenharmony_ci kvm_create_memslot(kvm, new); 190562306a36Sopenharmony_ci else if (change == KVM_MR_DELETE) 190662306a36Sopenharmony_ci kvm_delete_memslot(kvm, old, invalid_slot); 190762306a36Sopenharmony_ci else if (change == KVM_MR_MOVE) 190862306a36Sopenharmony_ci kvm_move_memslot(kvm, old, new, invalid_slot); 190962306a36Sopenharmony_ci else if (change == KVM_MR_FLAGS_ONLY) 191062306a36Sopenharmony_ci kvm_update_flags_memslot(kvm, old, new); 191162306a36Sopenharmony_ci else 191262306a36Sopenharmony_ci BUG(); 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci /* Free the temporary INVALID slot used for DELETE and MOVE. */ 191562306a36Sopenharmony_ci if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) 191662306a36Sopenharmony_ci kfree(invalid_slot); 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci /* 191962306a36Sopenharmony_ci * No need to refresh new->arch, changes after dropping slots_arch_lock 192062306a36Sopenharmony_ci * will directly hit the final, active memslot. Architectures are 192162306a36Sopenharmony_ci * responsible for knowing that new->arch may be stale. 192262306a36Sopenharmony_ci */ 192362306a36Sopenharmony_ci kvm_commit_memory_region(kvm, old, new, change); 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci return 0; 192662306a36Sopenharmony_ci} 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_cistatic bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id, 192962306a36Sopenharmony_ci gfn_t start, gfn_t end) 193062306a36Sopenharmony_ci{ 193162306a36Sopenharmony_ci struct kvm_memslot_iter iter; 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_ci kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) { 193462306a36Sopenharmony_ci if (iter.slot->id != id) 193562306a36Sopenharmony_ci return true; 193662306a36Sopenharmony_ci } 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_ci return false; 193962306a36Sopenharmony_ci} 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci/* 194262306a36Sopenharmony_ci * Allocate some memory and give it an address in the guest physical address 194362306a36Sopenharmony_ci * space. 194462306a36Sopenharmony_ci * 194562306a36Sopenharmony_ci * Discontiguous memory is allowed, mostly for framebuffers. 194662306a36Sopenharmony_ci * 194762306a36Sopenharmony_ci * Must be called holding kvm->slots_lock for write. 194862306a36Sopenharmony_ci */ 194962306a36Sopenharmony_ciint __kvm_set_memory_region(struct kvm *kvm, 195062306a36Sopenharmony_ci const struct kvm_userspace_memory_region *mem) 195162306a36Sopenharmony_ci{ 195262306a36Sopenharmony_ci struct kvm_memory_slot *old, *new; 195362306a36Sopenharmony_ci struct kvm_memslots *slots; 195462306a36Sopenharmony_ci enum kvm_mr_change change; 195562306a36Sopenharmony_ci unsigned long npages; 195662306a36Sopenharmony_ci gfn_t base_gfn; 195762306a36Sopenharmony_ci int as_id, id; 195862306a36Sopenharmony_ci int r; 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci r = check_memory_region_flags(mem); 196162306a36Sopenharmony_ci if (r) 196262306a36Sopenharmony_ci return r; 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ci as_id = mem->slot >> 16; 196562306a36Sopenharmony_ci id = (u16)mem->slot; 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_ci /* General sanity checks */ 196862306a36Sopenharmony_ci if ((mem->memory_size & (PAGE_SIZE - 1)) || 196962306a36Sopenharmony_ci (mem->memory_size != (unsigned long)mem->memory_size)) 197062306a36Sopenharmony_ci return -EINVAL; 197162306a36Sopenharmony_ci if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 197262306a36Sopenharmony_ci return -EINVAL; 197362306a36Sopenharmony_ci /* We can read the guest memory with __xxx_user() later on. */ 197462306a36Sopenharmony_ci if ((mem->userspace_addr & (PAGE_SIZE - 1)) || 197562306a36Sopenharmony_ci (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || 197662306a36Sopenharmony_ci !access_ok((void __user *)(unsigned long)mem->userspace_addr, 197762306a36Sopenharmony_ci mem->memory_size)) 197862306a36Sopenharmony_ci return -EINVAL; 197962306a36Sopenharmony_ci if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) 198062306a36Sopenharmony_ci return -EINVAL; 198162306a36Sopenharmony_ci if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 198262306a36Sopenharmony_ci return -EINVAL; 198362306a36Sopenharmony_ci if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) 198462306a36Sopenharmony_ci return -EINVAL; 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci slots = __kvm_memslots(kvm, as_id); 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci /* 198962306a36Sopenharmony_ci * Note, the old memslot (and the pointer itself!) may be invalidated 199062306a36Sopenharmony_ci * and/or destroyed by kvm_set_memslot(). 199162306a36Sopenharmony_ci */ 199262306a36Sopenharmony_ci old = id_to_memslot(slots, id); 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci if (!mem->memory_size) { 199562306a36Sopenharmony_ci if (!old || !old->npages) 199662306a36Sopenharmony_ci return -EINVAL; 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) 199962306a36Sopenharmony_ci return -EIO; 200062306a36Sopenharmony_ci 200162306a36Sopenharmony_ci return kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE); 200262306a36Sopenharmony_ci } 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT); 200562306a36Sopenharmony_ci npages = (mem->memory_size >> PAGE_SHIFT); 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci if (!old || !old->npages) { 200862306a36Sopenharmony_ci change = KVM_MR_CREATE; 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_ci /* 201162306a36Sopenharmony_ci * To simplify KVM internals, the total number of pages across 201262306a36Sopenharmony_ci * all memslots must fit in an unsigned long. 201362306a36Sopenharmony_ci */ 201462306a36Sopenharmony_ci if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages) 201562306a36Sopenharmony_ci return -EINVAL; 201662306a36Sopenharmony_ci } else { /* Modify an existing slot. */ 201762306a36Sopenharmony_ci if ((mem->userspace_addr != old->userspace_addr) || 201862306a36Sopenharmony_ci (npages != old->npages) || 201962306a36Sopenharmony_ci ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) 202062306a36Sopenharmony_ci return -EINVAL; 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci if (base_gfn != old->base_gfn) 202362306a36Sopenharmony_ci change = KVM_MR_MOVE; 202462306a36Sopenharmony_ci else if (mem->flags != old->flags) 202562306a36Sopenharmony_ci change = KVM_MR_FLAGS_ONLY; 202662306a36Sopenharmony_ci else /* Nothing to change. */ 202762306a36Sopenharmony_ci return 0; 202862306a36Sopenharmony_ci } 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_ci if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) && 203162306a36Sopenharmony_ci kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages)) 203262306a36Sopenharmony_ci return -EEXIST; 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_ci /* Allocate a slot that will persist in the memslot. */ 203562306a36Sopenharmony_ci new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT); 203662306a36Sopenharmony_ci if (!new) 203762306a36Sopenharmony_ci return -ENOMEM; 203862306a36Sopenharmony_ci 203962306a36Sopenharmony_ci new->as_id = as_id; 204062306a36Sopenharmony_ci new->id = id; 204162306a36Sopenharmony_ci new->base_gfn = base_gfn; 204262306a36Sopenharmony_ci new->npages = npages; 204362306a36Sopenharmony_ci new->flags = mem->flags; 204462306a36Sopenharmony_ci new->userspace_addr = mem->userspace_addr; 204562306a36Sopenharmony_ci 204662306a36Sopenharmony_ci r = kvm_set_memslot(kvm, old, new, change); 204762306a36Sopenharmony_ci if (r) 204862306a36Sopenharmony_ci kfree(new); 204962306a36Sopenharmony_ci return r; 205062306a36Sopenharmony_ci} 205162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__kvm_set_memory_region); 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ciint kvm_set_memory_region(struct kvm *kvm, 205462306a36Sopenharmony_ci const struct kvm_userspace_memory_region *mem) 205562306a36Sopenharmony_ci{ 205662306a36Sopenharmony_ci int r; 205762306a36Sopenharmony_ci 205862306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 205962306a36Sopenharmony_ci r = __kvm_set_memory_region(kvm, mem); 206062306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 206162306a36Sopenharmony_ci return r; 206262306a36Sopenharmony_ci} 206362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_set_memory_region); 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_cistatic int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 206662306a36Sopenharmony_ci struct kvm_userspace_memory_region *mem) 206762306a36Sopenharmony_ci{ 206862306a36Sopenharmony_ci if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) 206962306a36Sopenharmony_ci return -EINVAL; 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci return kvm_set_memory_region(kvm, mem); 207262306a36Sopenharmony_ci} 207362306a36Sopenharmony_ci 207462306a36Sopenharmony_ci#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 207562306a36Sopenharmony_ci/** 207662306a36Sopenharmony_ci * kvm_get_dirty_log - get a snapshot of dirty pages 207762306a36Sopenharmony_ci * @kvm: pointer to kvm instance 207862306a36Sopenharmony_ci * @log: slot id and address to which we copy the log 207962306a36Sopenharmony_ci * @is_dirty: set to '1' if any dirty pages were found 208062306a36Sopenharmony_ci * @memslot: set to the associated memslot, always valid on success 208162306a36Sopenharmony_ci */ 208262306a36Sopenharmony_ciint kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, 208362306a36Sopenharmony_ci int *is_dirty, struct kvm_memory_slot **memslot) 208462306a36Sopenharmony_ci{ 208562306a36Sopenharmony_ci struct kvm_memslots *slots; 208662306a36Sopenharmony_ci int i, as_id, id; 208762306a36Sopenharmony_ci unsigned long n; 208862306a36Sopenharmony_ci unsigned long any = 0; 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci /* Dirty ring tracking may be exclusive to dirty log tracking */ 209162306a36Sopenharmony_ci if (!kvm_use_dirty_bitmap(kvm)) 209262306a36Sopenharmony_ci return -ENXIO; 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci *memslot = NULL; 209562306a36Sopenharmony_ci *is_dirty = 0; 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci as_id = log->slot >> 16; 209862306a36Sopenharmony_ci id = (u16)log->slot; 209962306a36Sopenharmony_ci if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) 210062306a36Sopenharmony_ci return -EINVAL; 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci slots = __kvm_memslots(kvm, as_id); 210362306a36Sopenharmony_ci *memslot = id_to_memslot(slots, id); 210462306a36Sopenharmony_ci if (!(*memslot) || !(*memslot)->dirty_bitmap) 210562306a36Sopenharmony_ci return -ENOENT; 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_ci kvm_arch_sync_dirty_log(kvm, *memslot); 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci n = kvm_dirty_bitmap_bytes(*memslot); 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_ci for (i = 0; !any && i < n/sizeof(long); ++i) 211262306a36Sopenharmony_ci any = (*memslot)->dirty_bitmap[i]; 211362306a36Sopenharmony_ci 211462306a36Sopenharmony_ci if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) 211562306a36Sopenharmony_ci return -EFAULT; 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci if (any) 211862306a36Sopenharmony_ci *is_dirty = 1; 211962306a36Sopenharmony_ci return 0; 212062306a36Sopenharmony_ci} 212162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_get_dirty_log); 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_ci#else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ 212462306a36Sopenharmony_ci/** 212562306a36Sopenharmony_ci * kvm_get_dirty_log_protect - get a snapshot of dirty pages 212662306a36Sopenharmony_ci * and reenable dirty page tracking for the corresponding pages. 212762306a36Sopenharmony_ci * @kvm: pointer to kvm instance 212862306a36Sopenharmony_ci * @log: slot id and address to which we copy the log 212962306a36Sopenharmony_ci * 213062306a36Sopenharmony_ci * We need to keep it in mind that VCPU threads can write to the bitmap 213162306a36Sopenharmony_ci * concurrently. So, to avoid losing track of dirty pages we keep the 213262306a36Sopenharmony_ci * following order: 213362306a36Sopenharmony_ci * 213462306a36Sopenharmony_ci * 1. Take a snapshot of the bit and clear it if needed. 213562306a36Sopenharmony_ci * 2. Write protect the corresponding page. 213662306a36Sopenharmony_ci * 3. Copy the snapshot to the userspace. 213762306a36Sopenharmony_ci * 4. Upon return caller flushes TLB's if needed. 213862306a36Sopenharmony_ci * 213962306a36Sopenharmony_ci * Between 2 and 4, the guest may write to the page using the remaining TLB 214062306a36Sopenharmony_ci * entry. This is not a problem because the page is reported dirty using 214162306a36Sopenharmony_ci * the snapshot taken before and step 4 ensures that writes done after 214262306a36Sopenharmony_ci * exiting to userspace will be logged for the next call. 214362306a36Sopenharmony_ci * 214462306a36Sopenharmony_ci */ 214562306a36Sopenharmony_cistatic int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) 214662306a36Sopenharmony_ci{ 214762306a36Sopenharmony_ci struct kvm_memslots *slots; 214862306a36Sopenharmony_ci struct kvm_memory_slot *memslot; 214962306a36Sopenharmony_ci int i, as_id, id; 215062306a36Sopenharmony_ci unsigned long n; 215162306a36Sopenharmony_ci unsigned long *dirty_bitmap; 215262306a36Sopenharmony_ci unsigned long *dirty_bitmap_buffer; 215362306a36Sopenharmony_ci bool flush; 215462306a36Sopenharmony_ci 215562306a36Sopenharmony_ci /* Dirty ring tracking may be exclusive to dirty log tracking */ 215662306a36Sopenharmony_ci if (!kvm_use_dirty_bitmap(kvm)) 215762306a36Sopenharmony_ci return -ENXIO; 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci as_id = log->slot >> 16; 216062306a36Sopenharmony_ci id = (u16)log->slot; 216162306a36Sopenharmony_ci if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) 216262306a36Sopenharmony_ci return -EINVAL; 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci slots = __kvm_memslots(kvm, as_id); 216562306a36Sopenharmony_ci memslot = id_to_memslot(slots, id); 216662306a36Sopenharmony_ci if (!memslot || !memslot->dirty_bitmap) 216762306a36Sopenharmony_ci return -ENOENT; 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci dirty_bitmap = memslot->dirty_bitmap; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci kvm_arch_sync_dirty_log(kvm, memslot); 217262306a36Sopenharmony_ci 217362306a36Sopenharmony_ci n = kvm_dirty_bitmap_bytes(memslot); 217462306a36Sopenharmony_ci flush = false; 217562306a36Sopenharmony_ci if (kvm->manual_dirty_log_protect) { 217662306a36Sopenharmony_ci /* 217762306a36Sopenharmony_ci * Unlike kvm_get_dirty_log, we always return false in *flush, 217862306a36Sopenharmony_ci * because no flush is needed until KVM_CLEAR_DIRTY_LOG. There 217962306a36Sopenharmony_ci * is some code duplication between this function and 218062306a36Sopenharmony_ci * kvm_get_dirty_log, but hopefully all architecture 218162306a36Sopenharmony_ci * transition to kvm_get_dirty_log_protect and kvm_get_dirty_log 218262306a36Sopenharmony_ci * can be eliminated. 218362306a36Sopenharmony_ci */ 218462306a36Sopenharmony_ci dirty_bitmap_buffer = dirty_bitmap; 218562306a36Sopenharmony_ci } else { 218662306a36Sopenharmony_ci dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); 218762306a36Sopenharmony_ci memset(dirty_bitmap_buffer, 0, n); 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci KVM_MMU_LOCK(kvm); 219062306a36Sopenharmony_ci for (i = 0; i < n / sizeof(long); i++) { 219162306a36Sopenharmony_ci unsigned long mask; 219262306a36Sopenharmony_ci gfn_t offset; 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci if (!dirty_bitmap[i]) 219562306a36Sopenharmony_ci continue; 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci flush = true; 219862306a36Sopenharmony_ci mask = xchg(&dirty_bitmap[i], 0); 219962306a36Sopenharmony_ci dirty_bitmap_buffer[i] = mask; 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci offset = i * BITS_PER_LONG; 220262306a36Sopenharmony_ci kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, 220362306a36Sopenharmony_ci offset, mask); 220462306a36Sopenharmony_ci } 220562306a36Sopenharmony_ci KVM_MMU_UNLOCK(kvm); 220662306a36Sopenharmony_ci } 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci if (flush) 220962306a36Sopenharmony_ci kvm_flush_remote_tlbs_memslot(kvm, memslot); 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) 221262306a36Sopenharmony_ci return -EFAULT; 221362306a36Sopenharmony_ci return 0; 221462306a36Sopenharmony_ci} 221562306a36Sopenharmony_ci 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci/** 221862306a36Sopenharmony_ci * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot 221962306a36Sopenharmony_ci * @kvm: kvm instance 222062306a36Sopenharmony_ci * @log: slot id and address to which we copy the log 222162306a36Sopenharmony_ci * 222262306a36Sopenharmony_ci * Steps 1-4 below provide general overview of dirty page logging. See 222362306a36Sopenharmony_ci * kvm_get_dirty_log_protect() function description for additional details. 222462306a36Sopenharmony_ci * 222562306a36Sopenharmony_ci * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we 222662306a36Sopenharmony_ci * always flush the TLB (step 4) even if previous step failed and the dirty 222762306a36Sopenharmony_ci * bitmap may be corrupt. Regardless of previous outcome the KVM logging API 222862306a36Sopenharmony_ci * does not preclude user space subsequent dirty log read. Flushing TLB ensures 222962306a36Sopenharmony_ci * writes will be marked dirty for next log read. 223062306a36Sopenharmony_ci * 223162306a36Sopenharmony_ci * 1. Take a snapshot of the bit and clear it if needed. 223262306a36Sopenharmony_ci * 2. Write protect the corresponding page. 223362306a36Sopenharmony_ci * 3. Copy the snapshot to the userspace. 223462306a36Sopenharmony_ci * 4. Flush TLB's if needed. 223562306a36Sopenharmony_ci */ 223662306a36Sopenharmony_cistatic int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 223762306a36Sopenharmony_ci struct kvm_dirty_log *log) 223862306a36Sopenharmony_ci{ 223962306a36Sopenharmony_ci int r; 224062306a36Sopenharmony_ci 224162306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci r = kvm_get_dirty_log_protect(kvm, log); 224462306a36Sopenharmony_ci 224562306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 224662306a36Sopenharmony_ci return r; 224762306a36Sopenharmony_ci} 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci/** 225062306a36Sopenharmony_ci * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap 225162306a36Sopenharmony_ci * and reenable dirty page tracking for the corresponding pages. 225262306a36Sopenharmony_ci * @kvm: pointer to kvm instance 225362306a36Sopenharmony_ci * @log: slot id and address from which to fetch the bitmap of dirty pages 225462306a36Sopenharmony_ci */ 225562306a36Sopenharmony_cistatic int kvm_clear_dirty_log_protect(struct kvm *kvm, 225662306a36Sopenharmony_ci struct kvm_clear_dirty_log *log) 225762306a36Sopenharmony_ci{ 225862306a36Sopenharmony_ci struct kvm_memslots *slots; 225962306a36Sopenharmony_ci struct kvm_memory_slot *memslot; 226062306a36Sopenharmony_ci int as_id, id; 226162306a36Sopenharmony_ci gfn_t offset; 226262306a36Sopenharmony_ci unsigned long i, n; 226362306a36Sopenharmony_ci unsigned long *dirty_bitmap; 226462306a36Sopenharmony_ci unsigned long *dirty_bitmap_buffer; 226562306a36Sopenharmony_ci bool flush; 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci /* Dirty ring tracking may be exclusive to dirty log tracking */ 226862306a36Sopenharmony_ci if (!kvm_use_dirty_bitmap(kvm)) 226962306a36Sopenharmony_ci return -ENXIO; 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci as_id = log->slot >> 16; 227262306a36Sopenharmony_ci id = (u16)log->slot; 227362306a36Sopenharmony_ci if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) 227462306a36Sopenharmony_ci return -EINVAL; 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_ci if (log->first_page & 63) 227762306a36Sopenharmony_ci return -EINVAL; 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci slots = __kvm_memslots(kvm, as_id); 228062306a36Sopenharmony_ci memslot = id_to_memslot(slots, id); 228162306a36Sopenharmony_ci if (!memslot || !memslot->dirty_bitmap) 228262306a36Sopenharmony_ci return -ENOENT; 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_ci dirty_bitmap = memslot->dirty_bitmap; 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci if (log->first_page > memslot->npages || 228962306a36Sopenharmony_ci log->num_pages > memslot->npages - log->first_page || 229062306a36Sopenharmony_ci (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) 229162306a36Sopenharmony_ci return -EINVAL; 229262306a36Sopenharmony_ci 229362306a36Sopenharmony_ci kvm_arch_sync_dirty_log(kvm, memslot); 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_ci flush = false; 229662306a36Sopenharmony_ci dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); 229762306a36Sopenharmony_ci if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) 229862306a36Sopenharmony_ci return -EFAULT; 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci KVM_MMU_LOCK(kvm); 230162306a36Sopenharmony_ci for (offset = log->first_page, i = offset / BITS_PER_LONG, 230262306a36Sopenharmony_ci n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; 230362306a36Sopenharmony_ci i++, offset += BITS_PER_LONG) { 230462306a36Sopenharmony_ci unsigned long mask = *dirty_bitmap_buffer++; 230562306a36Sopenharmony_ci atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i]; 230662306a36Sopenharmony_ci if (!mask) 230762306a36Sopenharmony_ci continue; 230862306a36Sopenharmony_ci 230962306a36Sopenharmony_ci mask &= atomic_long_fetch_andnot(mask, p); 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_ci /* 231262306a36Sopenharmony_ci * mask contains the bits that really have been cleared. This 231362306a36Sopenharmony_ci * never includes any bits beyond the length of the memslot (if 231462306a36Sopenharmony_ci * the length is not aligned to 64 pages), therefore it is not 231562306a36Sopenharmony_ci * a problem if userspace sets them in log->dirty_bitmap. 231662306a36Sopenharmony_ci */ 231762306a36Sopenharmony_ci if (mask) { 231862306a36Sopenharmony_ci flush = true; 231962306a36Sopenharmony_ci kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, 232062306a36Sopenharmony_ci offset, mask); 232162306a36Sopenharmony_ci } 232262306a36Sopenharmony_ci } 232362306a36Sopenharmony_ci KVM_MMU_UNLOCK(kvm); 232462306a36Sopenharmony_ci 232562306a36Sopenharmony_ci if (flush) 232662306a36Sopenharmony_ci kvm_flush_remote_tlbs_memslot(kvm, memslot); 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci return 0; 232962306a36Sopenharmony_ci} 233062306a36Sopenharmony_ci 233162306a36Sopenharmony_cistatic int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, 233262306a36Sopenharmony_ci struct kvm_clear_dirty_log *log) 233362306a36Sopenharmony_ci{ 233462306a36Sopenharmony_ci int r; 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 233762306a36Sopenharmony_ci 233862306a36Sopenharmony_ci r = kvm_clear_dirty_log_protect(kvm, log); 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 234162306a36Sopenharmony_ci return r; 234262306a36Sopenharmony_ci} 234362306a36Sopenharmony_ci#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ 234462306a36Sopenharmony_ci 234562306a36Sopenharmony_cistruct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 234662306a36Sopenharmony_ci{ 234762306a36Sopenharmony_ci return __gfn_to_memslot(kvm_memslots(kvm), gfn); 234862306a36Sopenharmony_ci} 234962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_memslot); 235062306a36Sopenharmony_ci 235162306a36Sopenharmony_cistruct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn) 235262306a36Sopenharmony_ci{ 235362306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); 235462306a36Sopenharmony_ci u64 gen = slots->generation; 235562306a36Sopenharmony_ci struct kvm_memory_slot *slot; 235662306a36Sopenharmony_ci 235762306a36Sopenharmony_ci /* 235862306a36Sopenharmony_ci * This also protects against using a memslot from a different address space, 235962306a36Sopenharmony_ci * since different address spaces have different generation numbers. 236062306a36Sopenharmony_ci */ 236162306a36Sopenharmony_ci if (unlikely(gen != vcpu->last_used_slot_gen)) { 236262306a36Sopenharmony_ci vcpu->last_used_slot = NULL; 236362306a36Sopenharmony_ci vcpu->last_used_slot_gen = gen; 236462306a36Sopenharmony_ci } 236562306a36Sopenharmony_ci 236662306a36Sopenharmony_ci slot = try_get_memslot(vcpu->last_used_slot, gfn); 236762306a36Sopenharmony_ci if (slot) 236862306a36Sopenharmony_ci return slot; 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci /* 237162306a36Sopenharmony_ci * Fall back to searching all memslots. We purposely use 237262306a36Sopenharmony_ci * search_memslots() instead of __gfn_to_memslot() to avoid 237362306a36Sopenharmony_ci * thrashing the VM-wide last_used_slot in kvm_memslots. 237462306a36Sopenharmony_ci */ 237562306a36Sopenharmony_ci slot = search_memslots(slots, gfn, false); 237662306a36Sopenharmony_ci if (slot) { 237762306a36Sopenharmony_ci vcpu->last_used_slot = slot; 237862306a36Sopenharmony_ci return slot; 237962306a36Sopenharmony_ci } 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci return NULL; 238262306a36Sopenharmony_ci} 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_cibool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 238562306a36Sopenharmony_ci{ 238662306a36Sopenharmony_ci struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci return kvm_is_visible_memslot(memslot); 238962306a36Sopenharmony_ci} 239062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 239162306a36Sopenharmony_ci 239262306a36Sopenharmony_cibool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) 239362306a36Sopenharmony_ci{ 239462306a36Sopenharmony_ci struct kvm_memory_slot *memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 239562306a36Sopenharmony_ci 239662306a36Sopenharmony_ci return kvm_is_visible_memslot(memslot); 239762306a36Sopenharmony_ci} 239862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_is_visible_gfn); 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ciunsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) 240162306a36Sopenharmony_ci{ 240262306a36Sopenharmony_ci struct vm_area_struct *vma; 240362306a36Sopenharmony_ci unsigned long addr, size; 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci size = PAGE_SIZE; 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); 240862306a36Sopenharmony_ci if (kvm_is_error_hva(addr)) 240962306a36Sopenharmony_ci return PAGE_SIZE; 241062306a36Sopenharmony_ci 241162306a36Sopenharmony_ci mmap_read_lock(current->mm); 241262306a36Sopenharmony_ci vma = find_vma(current->mm, addr); 241362306a36Sopenharmony_ci if (!vma) 241462306a36Sopenharmony_ci goto out; 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci size = vma_kernel_pagesize(vma); 241762306a36Sopenharmony_ci 241862306a36Sopenharmony_ciout: 241962306a36Sopenharmony_ci mmap_read_unlock(current->mm); 242062306a36Sopenharmony_ci 242162306a36Sopenharmony_ci return size; 242262306a36Sopenharmony_ci} 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_cistatic bool memslot_is_readonly(const struct kvm_memory_slot *slot) 242562306a36Sopenharmony_ci{ 242662306a36Sopenharmony_ci return slot->flags & KVM_MEM_READONLY; 242762306a36Sopenharmony_ci} 242862306a36Sopenharmony_ci 242962306a36Sopenharmony_cistatic unsigned long __gfn_to_hva_many(const struct kvm_memory_slot *slot, gfn_t gfn, 243062306a36Sopenharmony_ci gfn_t *nr_pages, bool write) 243162306a36Sopenharmony_ci{ 243262306a36Sopenharmony_ci if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 243362306a36Sopenharmony_ci return KVM_HVA_ERR_BAD; 243462306a36Sopenharmony_ci 243562306a36Sopenharmony_ci if (memslot_is_readonly(slot) && write) 243662306a36Sopenharmony_ci return KVM_HVA_ERR_RO_BAD; 243762306a36Sopenharmony_ci 243862306a36Sopenharmony_ci if (nr_pages) 243962306a36Sopenharmony_ci *nr_pages = slot->npages - (gfn - slot->base_gfn); 244062306a36Sopenharmony_ci 244162306a36Sopenharmony_ci return __gfn_to_hva_memslot(slot, gfn); 244262306a36Sopenharmony_ci} 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_cistatic unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, 244562306a36Sopenharmony_ci gfn_t *nr_pages) 244662306a36Sopenharmony_ci{ 244762306a36Sopenharmony_ci return __gfn_to_hva_many(slot, gfn, nr_pages, true); 244862306a36Sopenharmony_ci} 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ciunsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, 245162306a36Sopenharmony_ci gfn_t gfn) 245262306a36Sopenharmony_ci{ 245362306a36Sopenharmony_ci return gfn_to_hva_many(slot, gfn, NULL); 245462306a36Sopenharmony_ci} 245562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_hva_memslot); 245662306a36Sopenharmony_ci 245762306a36Sopenharmony_ciunsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 245862306a36Sopenharmony_ci{ 245962306a36Sopenharmony_ci return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); 246062306a36Sopenharmony_ci} 246162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_hva); 246262306a36Sopenharmony_ci 246362306a36Sopenharmony_ciunsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) 246462306a36Sopenharmony_ci{ 246562306a36Sopenharmony_ci return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL); 246662306a36Sopenharmony_ci} 246762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva); 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci/* 247062306a36Sopenharmony_ci * Return the hva of a @gfn and the R/W attribute if possible. 247162306a36Sopenharmony_ci * 247262306a36Sopenharmony_ci * @slot: the kvm_memory_slot which contains @gfn 247362306a36Sopenharmony_ci * @gfn: the gfn to be translated 247462306a36Sopenharmony_ci * @writable: used to return the read/write attribute of the @slot if the hva 247562306a36Sopenharmony_ci * is valid and @writable is not NULL 247662306a36Sopenharmony_ci */ 247762306a36Sopenharmony_ciunsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, 247862306a36Sopenharmony_ci gfn_t gfn, bool *writable) 247962306a36Sopenharmony_ci{ 248062306a36Sopenharmony_ci unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); 248162306a36Sopenharmony_ci 248262306a36Sopenharmony_ci if (!kvm_is_error_hva(hva) && writable) 248362306a36Sopenharmony_ci *writable = !memslot_is_readonly(slot); 248462306a36Sopenharmony_ci 248562306a36Sopenharmony_ci return hva; 248662306a36Sopenharmony_ci} 248762306a36Sopenharmony_ci 248862306a36Sopenharmony_ciunsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 248962306a36Sopenharmony_ci{ 249062306a36Sopenharmony_ci struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 249162306a36Sopenharmony_ci 249262306a36Sopenharmony_ci return gfn_to_hva_memslot_prot(slot, gfn, writable); 249362306a36Sopenharmony_ci} 249462306a36Sopenharmony_ci 249562306a36Sopenharmony_ciunsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable) 249662306a36Sopenharmony_ci{ 249762306a36Sopenharmony_ci struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_ci return gfn_to_hva_memslot_prot(slot, gfn, writable); 250062306a36Sopenharmony_ci} 250162306a36Sopenharmony_ci 250262306a36Sopenharmony_cistatic inline int check_user_page_hwpoison(unsigned long addr) 250362306a36Sopenharmony_ci{ 250462306a36Sopenharmony_ci int rc, flags = FOLL_HWPOISON | FOLL_WRITE; 250562306a36Sopenharmony_ci 250662306a36Sopenharmony_ci rc = get_user_pages(addr, 1, flags, NULL); 250762306a36Sopenharmony_ci return rc == -EHWPOISON; 250862306a36Sopenharmony_ci} 250962306a36Sopenharmony_ci 251062306a36Sopenharmony_ci/* 251162306a36Sopenharmony_ci * The fast path to get the writable pfn which will be stored in @pfn, 251262306a36Sopenharmony_ci * true indicates success, otherwise false is returned. It's also the 251362306a36Sopenharmony_ci * only part that runs if we can in atomic context. 251462306a36Sopenharmony_ci */ 251562306a36Sopenharmony_cistatic bool hva_to_pfn_fast(unsigned long addr, bool write_fault, 251662306a36Sopenharmony_ci bool *writable, kvm_pfn_t *pfn) 251762306a36Sopenharmony_ci{ 251862306a36Sopenharmony_ci struct page *page[1]; 251962306a36Sopenharmony_ci 252062306a36Sopenharmony_ci /* 252162306a36Sopenharmony_ci * Fast pin a writable pfn only if it is a write fault request 252262306a36Sopenharmony_ci * or the caller allows to map a writable pfn for a read fault 252362306a36Sopenharmony_ci * request. 252462306a36Sopenharmony_ci */ 252562306a36Sopenharmony_ci if (!(write_fault || writable)) 252662306a36Sopenharmony_ci return false; 252762306a36Sopenharmony_ci 252862306a36Sopenharmony_ci if (get_user_page_fast_only(addr, FOLL_WRITE, page)) { 252962306a36Sopenharmony_ci *pfn = page_to_pfn(page[0]); 253062306a36Sopenharmony_ci 253162306a36Sopenharmony_ci if (writable) 253262306a36Sopenharmony_ci *writable = true; 253362306a36Sopenharmony_ci return true; 253462306a36Sopenharmony_ci } 253562306a36Sopenharmony_ci 253662306a36Sopenharmony_ci return false; 253762306a36Sopenharmony_ci} 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci/* 254062306a36Sopenharmony_ci * The slow path to get the pfn of the specified host virtual address, 254162306a36Sopenharmony_ci * 1 indicates success, -errno is returned if error is detected. 254262306a36Sopenharmony_ci */ 254362306a36Sopenharmony_cistatic int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, 254462306a36Sopenharmony_ci bool interruptible, bool *writable, kvm_pfn_t *pfn) 254562306a36Sopenharmony_ci{ 254662306a36Sopenharmony_ci /* 254762306a36Sopenharmony_ci * When a VCPU accesses a page that is not mapped into the secondary 254862306a36Sopenharmony_ci * MMU, we lookup the page using GUP to map it, so the guest VCPU can 254962306a36Sopenharmony_ci * make progress. We always want to honor NUMA hinting faults in that 255062306a36Sopenharmony_ci * case, because GUP usage corresponds to memory accesses from the VCPU. 255162306a36Sopenharmony_ci * Otherwise, we'd not trigger NUMA hinting faults once a page is 255262306a36Sopenharmony_ci * mapped into the secondary MMU and gets accessed by a VCPU. 255362306a36Sopenharmony_ci * 255462306a36Sopenharmony_ci * Note that get_user_page_fast_only() and FOLL_WRITE for now 255562306a36Sopenharmony_ci * implicitly honor NUMA hinting faults and don't need this flag. 255662306a36Sopenharmony_ci */ 255762306a36Sopenharmony_ci unsigned int flags = FOLL_HWPOISON | FOLL_HONOR_NUMA_FAULT; 255862306a36Sopenharmony_ci struct page *page; 255962306a36Sopenharmony_ci int npages; 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci might_sleep(); 256262306a36Sopenharmony_ci 256362306a36Sopenharmony_ci if (writable) 256462306a36Sopenharmony_ci *writable = write_fault; 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci if (write_fault) 256762306a36Sopenharmony_ci flags |= FOLL_WRITE; 256862306a36Sopenharmony_ci if (async) 256962306a36Sopenharmony_ci flags |= FOLL_NOWAIT; 257062306a36Sopenharmony_ci if (interruptible) 257162306a36Sopenharmony_ci flags |= FOLL_INTERRUPTIBLE; 257262306a36Sopenharmony_ci 257362306a36Sopenharmony_ci npages = get_user_pages_unlocked(addr, 1, &page, flags); 257462306a36Sopenharmony_ci if (npages != 1) 257562306a36Sopenharmony_ci return npages; 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_ci /* map read fault as writable if possible */ 257862306a36Sopenharmony_ci if (unlikely(!write_fault) && writable) { 257962306a36Sopenharmony_ci struct page *wpage; 258062306a36Sopenharmony_ci 258162306a36Sopenharmony_ci if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) { 258262306a36Sopenharmony_ci *writable = true; 258362306a36Sopenharmony_ci put_page(page); 258462306a36Sopenharmony_ci page = wpage; 258562306a36Sopenharmony_ci } 258662306a36Sopenharmony_ci } 258762306a36Sopenharmony_ci *pfn = page_to_pfn(page); 258862306a36Sopenharmony_ci return npages; 258962306a36Sopenharmony_ci} 259062306a36Sopenharmony_ci 259162306a36Sopenharmony_cistatic bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) 259262306a36Sopenharmony_ci{ 259362306a36Sopenharmony_ci if (unlikely(!(vma->vm_flags & VM_READ))) 259462306a36Sopenharmony_ci return false; 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) 259762306a36Sopenharmony_ci return false; 259862306a36Sopenharmony_ci 259962306a36Sopenharmony_ci return true; 260062306a36Sopenharmony_ci} 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_cistatic int kvm_try_get_pfn(kvm_pfn_t pfn) 260362306a36Sopenharmony_ci{ 260462306a36Sopenharmony_ci struct page *page = kvm_pfn_to_refcounted_page(pfn); 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci if (!page) 260762306a36Sopenharmony_ci return 1; 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci return get_page_unless_zero(page); 261062306a36Sopenharmony_ci} 261162306a36Sopenharmony_ci 261262306a36Sopenharmony_cistatic int hva_to_pfn_remapped(struct vm_area_struct *vma, 261362306a36Sopenharmony_ci unsigned long addr, bool write_fault, 261462306a36Sopenharmony_ci bool *writable, kvm_pfn_t *p_pfn) 261562306a36Sopenharmony_ci{ 261662306a36Sopenharmony_ci kvm_pfn_t pfn; 261762306a36Sopenharmony_ci pte_t *ptep; 261862306a36Sopenharmony_ci pte_t pte; 261962306a36Sopenharmony_ci spinlock_t *ptl; 262062306a36Sopenharmony_ci int r; 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_ci r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); 262362306a36Sopenharmony_ci if (r) { 262462306a36Sopenharmony_ci /* 262562306a36Sopenharmony_ci * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does 262662306a36Sopenharmony_ci * not call the fault handler, so do it here. 262762306a36Sopenharmony_ci */ 262862306a36Sopenharmony_ci bool unlocked = false; 262962306a36Sopenharmony_ci r = fixup_user_fault(current->mm, addr, 263062306a36Sopenharmony_ci (write_fault ? FAULT_FLAG_WRITE : 0), 263162306a36Sopenharmony_ci &unlocked); 263262306a36Sopenharmony_ci if (unlocked) 263362306a36Sopenharmony_ci return -EAGAIN; 263462306a36Sopenharmony_ci if (r) 263562306a36Sopenharmony_ci return r; 263662306a36Sopenharmony_ci 263762306a36Sopenharmony_ci r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); 263862306a36Sopenharmony_ci if (r) 263962306a36Sopenharmony_ci return r; 264062306a36Sopenharmony_ci } 264162306a36Sopenharmony_ci 264262306a36Sopenharmony_ci pte = ptep_get(ptep); 264362306a36Sopenharmony_ci 264462306a36Sopenharmony_ci if (write_fault && !pte_write(pte)) { 264562306a36Sopenharmony_ci pfn = KVM_PFN_ERR_RO_FAULT; 264662306a36Sopenharmony_ci goto out; 264762306a36Sopenharmony_ci } 264862306a36Sopenharmony_ci 264962306a36Sopenharmony_ci if (writable) 265062306a36Sopenharmony_ci *writable = pte_write(pte); 265162306a36Sopenharmony_ci pfn = pte_pfn(pte); 265262306a36Sopenharmony_ci 265362306a36Sopenharmony_ci /* 265462306a36Sopenharmony_ci * Get a reference here because callers of *hva_to_pfn* and 265562306a36Sopenharmony_ci * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the 265662306a36Sopenharmony_ci * returned pfn. This is only needed if the VMA has VM_MIXEDMAP 265762306a36Sopenharmony_ci * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will 265862306a36Sopenharmony_ci * simply do nothing for reserved pfns. 265962306a36Sopenharmony_ci * 266062306a36Sopenharmony_ci * Whoever called remap_pfn_range is also going to call e.g. 266162306a36Sopenharmony_ci * unmap_mapping_range before the underlying pages are freed, 266262306a36Sopenharmony_ci * causing a call to our MMU notifier. 266362306a36Sopenharmony_ci * 266462306a36Sopenharmony_ci * Certain IO or PFNMAP mappings can be backed with valid 266562306a36Sopenharmony_ci * struct pages, but be allocated without refcounting e.g., 266662306a36Sopenharmony_ci * tail pages of non-compound higher order allocations, which 266762306a36Sopenharmony_ci * would then underflow the refcount when the caller does the 266862306a36Sopenharmony_ci * required put_page. Don't allow those pages here. 266962306a36Sopenharmony_ci */ 267062306a36Sopenharmony_ci if (!kvm_try_get_pfn(pfn)) 267162306a36Sopenharmony_ci r = -EFAULT; 267262306a36Sopenharmony_ci 267362306a36Sopenharmony_ciout: 267462306a36Sopenharmony_ci pte_unmap_unlock(ptep, ptl); 267562306a36Sopenharmony_ci *p_pfn = pfn; 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci return r; 267862306a36Sopenharmony_ci} 267962306a36Sopenharmony_ci 268062306a36Sopenharmony_ci/* 268162306a36Sopenharmony_ci * Pin guest page in memory and return its pfn. 268262306a36Sopenharmony_ci * @addr: host virtual address which maps memory to the guest 268362306a36Sopenharmony_ci * @atomic: whether this function can sleep 268462306a36Sopenharmony_ci * @interruptible: whether the process can be interrupted by non-fatal signals 268562306a36Sopenharmony_ci * @async: whether this function need to wait IO complete if the 268662306a36Sopenharmony_ci * host page is not in the memory 268762306a36Sopenharmony_ci * @write_fault: whether we should get a writable host page 268862306a36Sopenharmony_ci * @writable: whether it allows to map a writable host page for !@write_fault 268962306a36Sopenharmony_ci * 269062306a36Sopenharmony_ci * The function will map a writable host page for these two cases: 269162306a36Sopenharmony_ci * 1): @write_fault = true 269262306a36Sopenharmony_ci * 2): @write_fault = false && @writable, @writable will tell the caller 269362306a36Sopenharmony_ci * whether the mapping is writable. 269462306a36Sopenharmony_ci */ 269562306a36Sopenharmony_cikvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, 269662306a36Sopenharmony_ci bool *async, bool write_fault, bool *writable) 269762306a36Sopenharmony_ci{ 269862306a36Sopenharmony_ci struct vm_area_struct *vma; 269962306a36Sopenharmony_ci kvm_pfn_t pfn; 270062306a36Sopenharmony_ci int npages, r; 270162306a36Sopenharmony_ci 270262306a36Sopenharmony_ci /* we can do it either atomically or asynchronously, not both */ 270362306a36Sopenharmony_ci BUG_ON(atomic && async); 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci if (hva_to_pfn_fast(addr, write_fault, writable, &pfn)) 270662306a36Sopenharmony_ci return pfn; 270762306a36Sopenharmony_ci 270862306a36Sopenharmony_ci if (atomic) 270962306a36Sopenharmony_ci return KVM_PFN_ERR_FAULT; 271062306a36Sopenharmony_ci 271162306a36Sopenharmony_ci npages = hva_to_pfn_slow(addr, async, write_fault, interruptible, 271262306a36Sopenharmony_ci writable, &pfn); 271362306a36Sopenharmony_ci if (npages == 1) 271462306a36Sopenharmony_ci return pfn; 271562306a36Sopenharmony_ci if (npages == -EINTR) 271662306a36Sopenharmony_ci return KVM_PFN_ERR_SIGPENDING; 271762306a36Sopenharmony_ci 271862306a36Sopenharmony_ci mmap_read_lock(current->mm); 271962306a36Sopenharmony_ci if (npages == -EHWPOISON || 272062306a36Sopenharmony_ci (!async && check_user_page_hwpoison(addr))) { 272162306a36Sopenharmony_ci pfn = KVM_PFN_ERR_HWPOISON; 272262306a36Sopenharmony_ci goto exit; 272362306a36Sopenharmony_ci } 272462306a36Sopenharmony_ci 272562306a36Sopenharmony_ciretry: 272662306a36Sopenharmony_ci vma = vma_lookup(current->mm, addr); 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci if (vma == NULL) 272962306a36Sopenharmony_ci pfn = KVM_PFN_ERR_FAULT; 273062306a36Sopenharmony_ci else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { 273162306a36Sopenharmony_ci r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn); 273262306a36Sopenharmony_ci if (r == -EAGAIN) 273362306a36Sopenharmony_ci goto retry; 273462306a36Sopenharmony_ci if (r < 0) 273562306a36Sopenharmony_ci pfn = KVM_PFN_ERR_FAULT; 273662306a36Sopenharmony_ci } else { 273762306a36Sopenharmony_ci if (async && vma_is_valid(vma, write_fault)) 273862306a36Sopenharmony_ci *async = true; 273962306a36Sopenharmony_ci pfn = KVM_PFN_ERR_FAULT; 274062306a36Sopenharmony_ci } 274162306a36Sopenharmony_ciexit: 274262306a36Sopenharmony_ci mmap_read_unlock(current->mm); 274362306a36Sopenharmony_ci return pfn; 274462306a36Sopenharmony_ci} 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_cikvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, 274762306a36Sopenharmony_ci bool atomic, bool interruptible, bool *async, 274862306a36Sopenharmony_ci bool write_fault, bool *writable, hva_t *hva) 274962306a36Sopenharmony_ci{ 275062306a36Sopenharmony_ci unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); 275162306a36Sopenharmony_ci 275262306a36Sopenharmony_ci if (hva) 275362306a36Sopenharmony_ci *hva = addr; 275462306a36Sopenharmony_ci 275562306a36Sopenharmony_ci if (addr == KVM_HVA_ERR_RO_BAD) { 275662306a36Sopenharmony_ci if (writable) 275762306a36Sopenharmony_ci *writable = false; 275862306a36Sopenharmony_ci return KVM_PFN_ERR_RO_FAULT; 275962306a36Sopenharmony_ci } 276062306a36Sopenharmony_ci 276162306a36Sopenharmony_ci if (kvm_is_error_hva(addr)) { 276262306a36Sopenharmony_ci if (writable) 276362306a36Sopenharmony_ci *writable = false; 276462306a36Sopenharmony_ci return KVM_PFN_NOSLOT; 276562306a36Sopenharmony_ci } 276662306a36Sopenharmony_ci 276762306a36Sopenharmony_ci /* Do not map writable pfn in the readonly memslot. */ 276862306a36Sopenharmony_ci if (writable && memslot_is_readonly(slot)) { 276962306a36Sopenharmony_ci *writable = false; 277062306a36Sopenharmony_ci writable = NULL; 277162306a36Sopenharmony_ci } 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_ci return hva_to_pfn(addr, atomic, interruptible, async, write_fault, 277462306a36Sopenharmony_ci writable); 277562306a36Sopenharmony_ci} 277662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_cikvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, 277962306a36Sopenharmony_ci bool *writable) 278062306a36Sopenharmony_ci{ 278162306a36Sopenharmony_ci return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false, 278262306a36Sopenharmony_ci NULL, write_fault, writable, NULL); 278362306a36Sopenharmony_ci} 278462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_pfn_prot); 278562306a36Sopenharmony_ci 278662306a36Sopenharmony_cikvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) 278762306a36Sopenharmony_ci{ 278862306a36Sopenharmony_ci return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true, 278962306a36Sopenharmony_ci NULL, NULL); 279062306a36Sopenharmony_ci} 279162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); 279262306a36Sopenharmony_ci 279362306a36Sopenharmony_cikvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn) 279462306a36Sopenharmony_ci{ 279562306a36Sopenharmony_ci return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true, 279662306a36Sopenharmony_ci NULL, NULL); 279762306a36Sopenharmony_ci} 279862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); 279962306a36Sopenharmony_ci 280062306a36Sopenharmony_cikvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn) 280162306a36Sopenharmony_ci{ 280262306a36Sopenharmony_ci return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); 280362306a36Sopenharmony_ci} 280462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic); 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_cikvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 280762306a36Sopenharmony_ci{ 280862306a36Sopenharmony_ci return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn); 280962306a36Sopenharmony_ci} 281062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_pfn); 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_cikvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 281362306a36Sopenharmony_ci{ 281462306a36Sopenharmony_ci return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); 281562306a36Sopenharmony_ci} 281662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn); 281762306a36Sopenharmony_ci 281862306a36Sopenharmony_ciint gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, 281962306a36Sopenharmony_ci struct page **pages, int nr_pages) 282062306a36Sopenharmony_ci{ 282162306a36Sopenharmony_ci unsigned long addr; 282262306a36Sopenharmony_ci gfn_t entry = 0; 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci addr = gfn_to_hva_many(slot, gfn, &entry); 282562306a36Sopenharmony_ci if (kvm_is_error_hva(addr)) 282662306a36Sopenharmony_ci return -1; 282762306a36Sopenharmony_ci 282862306a36Sopenharmony_ci if (entry < nr_pages) 282962306a36Sopenharmony_ci return 0; 283062306a36Sopenharmony_ci 283162306a36Sopenharmony_ci return get_user_pages_fast_only(addr, nr_pages, FOLL_WRITE, pages); 283262306a36Sopenharmony_ci} 283362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); 283462306a36Sopenharmony_ci 283562306a36Sopenharmony_ci/* 283662306a36Sopenharmony_ci * Do not use this helper unless you are absolutely certain the gfn _must_ be 283762306a36Sopenharmony_ci * backed by 'struct page'. A valid example is if the backing memslot is 283862306a36Sopenharmony_ci * controlled by KVM. Note, if the returned page is valid, it's refcount has 283962306a36Sopenharmony_ci * been elevated by gfn_to_pfn(). 284062306a36Sopenharmony_ci */ 284162306a36Sopenharmony_cistruct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 284262306a36Sopenharmony_ci{ 284362306a36Sopenharmony_ci struct page *page; 284462306a36Sopenharmony_ci kvm_pfn_t pfn; 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ci pfn = gfn_to_pfn(kvm, gfn); 284762306a36Sopenharmony_ci 284862306a36Sopenharmony_ci if (is_error_noslot_pfn(pfn)) 284962306a36Sopenharmony_ci return KVM_ERR_PTR_BAD_PAGE; 285062306a36Sopenharmony_ci 285162306a36Sopenharmony_ci page = kvm_pfn_to_refcounted_page(pfn); 285262306a36Sopenharmony_ci if (!page) 285362306a36Sopenharmony_ci return KVM_ERR_PTR_BAD_PAGE; 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_ci return page; 285662306a36Sopenharmony_ci} 285762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gfn_to_page); 285862306a36Sopenharmony_ci 285962306a36Sopenharmony_civoid kvm_release_pfn(kvm_pfn_t pfn, bool dirty) 286062306a36Sopenharmony_ci{ 286162306a36Sopenharmony_ci if (dirty) 286262306a36Sopenharmony_ci kvm_release_pfn_dirty(pfn); 286362306a36Sopenharmony_ci else 286462306a36Sopenharmony_ci kvm_release_pfn_clean(pfn); 286562306a36Sopenharmony_ci} 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ciint kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) 286862306a36Sopenharmony_ci{ 286962306a36Sopenharmony_ci kvm_pfn_t pfn; 287062306a36Sopenharmony_ci void *hva = NULL; 287162306a36Sopenharmony_ci struct page *page = KVM_UNMAPPED_PAGE; 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci if (!map) 287462306a36Sopenharmony_ci return -EINVAL; 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci pfn = gfn_to_pfn(vcpu->kvm, gfn); 287762306a36Sopenharmony_ci if (is_error_noslot_pfn(pfn)) 287862306a36Sopenharmony_ci return -EINVAL; 287962306a36Sopenharmony_ci 288062306a36Sopenharmony_ci if (pfn_valid(pfn)) { 288162306a36Sopenharmony_ci page = pfn_to_page(pfn); 288262306a36Sopenharmony_ci hva = kmap(page); 288362306a36Sopenharmony_ci#ifdef CONFIG_HAS_IOMEM 288462306a36Sopenharmony_ci } else { 288562306a36Sopenharmony_ci hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); 288662306a36Sopenharmony_ci#endif 288762306a36Sopenharmony_ci } 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_ci if (!hva) 289062306a36Sopenharmony_ci return -EFAULT; 289162306a36Sopenharmony_ci 289262306a36Sopenharmony_ci map->page = page; 289362306a36Sopenharmony_ci map->hva = hva; 289462306a36Sopenharmony_ci map->pfn = pfn; 289562306a36Sopenharmony_ci map->gfn = gfn; 289662306a36Sopenharmony_ci 289762306a36Sopenharmony_ci return 0; 289862306a36Sopenharmony_ci} 289962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_map); 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_civoid kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) 290262306a36Sopenharmony_ci{ 290362306a36Sopenharmony_ci if (!map) 290462306a36Sopenharmony_ci return; 290562306a36Sopenharmony_ci 290662306a36Sopenharmony_ci if (!map->hva) 290762306a36Sopenharmony_ci return; 290862306a36Sopenharmony_ci 290962306a36Sopenharmony_ci if (map->page != KVM_UNMAPPED_PAGE) 291062306a36Sopenharmony_ci kunmap(map->page); 291162306a36Sopenharmony_ci#ifdef CONFIG_HAS_IOMEM 291262306a36Sopenharmony_ci else 291362306a36Sopenharmony_ci memunmap(map->hva); 291462306a36Sopenharmony_ci#endif 291562306a36Sopenharmony_ci 291662306a36Sopenharmony_ci if (dirty) 291762306a36Sopenharmony_ci kvm_vcpu_mark_page_dirty(vcpu, map->gfn); 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_ci kvm_release_pfn(map->pfn, dirty); 292062306a36Sopenharmony_ci 292162306a36Sopenharmony_ci map->hva = NULL; 292262306a36Sopenharmony_ci map->page = NULL; 292362306a36Sopenharmony_ci} 292462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_unmap); 292562306a36Sopenharmony_ci 292662306a36Sopenharmony_cistatic bool kvm_is_ad_tracked_page(struct page *page) 292762306a36Sopenharmony_ci{ 292862306a36Sopenharmony_ci /* 292962306a36Sopenharmony_ci * Per page-flags.h, pages tagged PG_reserved "should in general not be 293062306a36Sopenharmony_ci * touched (e.g. set dirty) except by its owner". 293162306a36Sopenharmony_ci */ 293262306a36Sopenharmony_ci return !PageReserved(page); 293362306a36Sopenharmony_ci} 293462306a36Sopenharmony_ci 293562306a36Sopenharmony_cistatic void kvm_set_page_dirty(struct page *page) 293662306a36Sopenharmony_ci{ 293762306a36Sopenharmony_ci if (kvm_is_ad_tracked_page(page)) 293862306a36Sopenharmony_ci SetPageDirty(page); 293962306a36Sopenharmony_ci} 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_cistatic void kvm_set_page_accessed(struct page *page) 294262306a36Sopenharmony_ci{ 294362306a36Sopenharmony_ci if (kvm_is_ad_tracked_page(page)) 294462306a36Sopenharmony_ci mark_page_accessed(page); 294562306a36Sopenharmony_ci} 294662306a36Sopenharmony_ci 294762306a36Sopenharmony_civoid kvm_release_page_clean(struct page *page) 294862306a36Sopenharmony_ci{ 294962306a36Sopenharmony_ci WARN_ON(is_error_page(page)); 295062306a36Sopenharmony_ci 295162306a36Sopenharmony_ci kvm_set_page_accessed(page); 295262306a36Sopenharmony_ci put_page(page); 295362306a36Sopenharmony_ci} 295462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_release_page_clean); 295562306a36Sopenharmony_ci 295662306a36Sopenharmony_civoid kvm_release_pfn_clean(kvm_pfn_t pfn) 295762306a36Sopenharmony_ci{ 295862306a36Sopenharmony_ci struct page *page; 295962306a36Sopenharmony_ci 296062306a36Sopenharmony_ci if (is_error_noslot_pfn(pfn)) 296162306a36Sopenharmony_ci return; 296262306a36Sopenharmony_ci 296362306a36Sopenharmony_ci page = kvm_pfn_to_refcounted_page(pfn); 296462306a36Sopenharmony_ci if (!page) 296562306a36Sopenharmony_ci return; 296662306a36Sopenharmony_ci 296762306a36Sopenharmony_ci kvm_release_page_clean(page); 296862306a36Sopenharmony_ci} 296962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_civoid kvm_release_page_dirty(struct page *page) 297262306a36Sopenharmony_ci{ 297362306a36Sopenharmony_ci WARN_ON(is_error_page(page)); 297462306a36Sopenharmony_ci 297562306a36Sopenharmony_ci kvm_set_page_dirty(page); 297662306a36Sopenharmony_ci kvm_release_page_clean(page); 297762306a36Sopenharmony_ci} 297862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_release_page_dirty); 297962306a36Sopenharmony_ci 298062306a36Sopenharmony_civoid kvm_release_pfn_dirty(kvm_pfn_t pfn) 298162306a36Sopenharmony_ci{ 298262306a36Sopenharmony_ci struct page *page; 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci if (is_error_noslot_pfn(pfn)) 298562306a36Sopenharmony_ci return; 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_ci page = kvm_pfn_to_refcounted_page(pfn); 298862306a36Sopenharmony_ci if (!page) 298962306a36Sopenharmony_ci return; 299062306a36Sopenharmony_ci 299162306a36Sopenharmony_ci kvm_release_page_dirty(page); 299262306a36Sopenharmony_ci} 299362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 299462306a36Sopenharmony_ci 299562306a36Sopenharmony_ci/* 299662306a36Sopenharmony_ci * Note, checking for an error/noslot pfn is the caller's responsibility when 299762306a36Sopenharmony_ci * directly marking a page dirty/accessed. Unlike the "release" helpers, the 299862306a36Sopenharmony_ci * "set" helpers are not to be used when the pfn might point at garbage. 299962306a36Sopenharmony_ci */ 300062306a36Sopenharmony_civoid kvm_set_pfn_dirty(kvm_pfn_t pfn) 300162306a36Sopenharmony_ci{ 300262306a36Sopenharmony_ci if (WARN_ON(is_error_noslot_pfn(pfn))) 300362306a36Sopenharmony_ci return; 300462306a36Sopenharmony_ci 300562306a36Sopenharmony_ci if (pfn_valid(pfn)) 300662306a36Sopenharmony_ci kvm_set_page_dirty(pfn_to_page(pfn)); 300762306a36Sopenharmony_ci} 300862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_civoid kvm_set_pfn_accessed(kvm_pfn_t pfn) 301162306a36Sopenharmony_ci{ 301262306a36Sopenharmony_ci if (WARN_ON(is_error_noslot_pfn(pfn))) 301362306a36Sopenharmony_ci return; 301462306a36Sopenharmony_ci 301562306a36Sopenharmony_ci if (pfn_valid(pfn)) 301662306a36Sopenharmony_ci kvm_set_page_accessed(pfn_to_page(pfn)); 301762306a36Sopenharmony_ci} 301862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_cistatic int next_segment(unsigned long len, int offset) 302162306a36Sopenharmony_ci{ 302262306a36Sopenharmony_ci if (len > PAGE_SIZE - offset) 302362306a36Sopenharmony_ci return PAGE_SIZE - offset; 302462306a36Sopenharmony_ci else 302562306a36Sopenharmony_ci return len; 302662306a36Sopenharmony_ci} 302762306a36Sopenharmony_ci 302862306a36Sopenharmony_cistatic int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn, 302962306a36Sopenharmony_ci void *data, int offset, int len) 303062306a36Sopenharmony_ci{ 303162306a36Sopenharmony_ci int r; 303262306a36Sopenharmony_ci unsigned long addr; 303362306a36Sopenharmony_ci 303462306a36Sopenharmony_ci addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); 303562306a36Sopenharmony_ci if (kvm_is_error_hva(addr)) 303662306a36Sopenharmony_ci return -EFAULT; 303762306a36Sopenharmony_ci r = __copy_from_user(data, (void __user *)addr + offset, len); 303862306a36Sopenharmony_ci if (r) 303962306a36Sopenharmony_ci return -EFAULT; 304062306a36Sopenharmony_ci return 0; 304162306a36Sopenharmony_ci} 304262306a36Sopenharmony_ci 304362306a36Sopenharmony_ciint kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 304462306a36Sopenharmony_ci int len) 304562306a36Sopenharmony_ci{ 304662306a36Sopenharmony_ci struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 304762306a36Sopenharmony_ci 304862306a36Sopenharmony_ci return __kvm_read_guest_page(slot, gfn, data, offset, len); 304962306a36Sopenharmony_ci} 305062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_read_guest_page); 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_ciint kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, 305362306a36Sopenharmony_ci int offset, int len) 305462306a36Sopenharmony_ci{ 305562306a36Sopenharmony_ci struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 305662306a36Sopenharmony_ci 305762306a36Sopenharmony_ci return __kvm_read_guest_page(slot, gfn, data, offset, len); 305862306a36Sopenharmony_ci} 305962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page); 306062306a36Sopenharmony_ci 306162306a36Sopenharmony_ciint kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 306262306a36Sopenharmony_ci{ 306362306a36Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 306462306a36Sopenharmony_ci int seg; 306562306a36Sopenharmony_ci int offset = offset_in_page(gpa); 306662306a36Sopenharmony_ci int ret; 306762306a36Sopenharmony_ci 306862306a36Sopenharmony_ci while ((seg = next_segment(len, offset)) != 0) { 306962306a36Sopenharmony_ci ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 307062306a36Sopenharmony_ci if (ret < 0) 307162306a36Sopenharmony_ci return ret; 307262306a36Sopenharmony_ci offset = 0; 307362306a36Sopenharmony_ci len -= seg; 307462306a36Sopenharmony_ci data += seg; 307562306a36Sopenharmony_ci ++gfn; 307662306a36Sopenharmony_ci } 307762306a36Sopenharmony_ci return 0; 307862306a36Sopenharmony_ci} 307962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_read_guest); 308062306a36Sopenharmony_ci 308162306a36Sopenharmony_ciint kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len) 308262306a36Sopenharmony_ci{ 308362306a36Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 308462306a36Sopenharmony_ci int seg; 308562306a36Sopenharmony_ci int offset = offset_in_page(gpa); 308662306a36Sopenharmony_ci int ret; 308762306a36Sopenharmony_ci 308862306a36Sopenharmony_ci while ((seg = next_segment(len, offset)) != 0) { 308962306a36Sopenharmony_ci ret = kvm_vcpu_read_guest_page(vcpu, gfn, data, offset, seg); 309062306a36Sopenharmony_ci if (ret < 0) 309162306a36Sopenharmony_ci return ret; 309262306a36Sopenharmony_ci offset = 0; 309362306a36Sopenharmony_ci len -= seg; 309462306a36Sopenharmony_ci data += seg; 309562306a36Sopenharmony_ci ++gfn; 309662306a36Sopenharmony_ci } 309762306a36Sopenharmony_ci return 0; 309862306a36Sopenharmony_ci} 309962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_read_guest); 310062306a36Sopenharmony_ci 310162306a36Sopenharmony_cistatic int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, 310262306a36Sopenharmony_ci void *data, int offset, unsigned long len) 310362306a36Sopenharmony_ci{ 310462306a36Sopenharmony_ci int r; 310562306a36Sopenharmony_ci unsigned long addr; 310662306a36Sopenharmony_ci 310762306a36Sopenharmony_ci addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); 310862306a36Sopenharmony_ci if (kvm_is_error_hva(addr)) 310962306a36Sopenharmony_ci return -EFAULT; 311062306a36Sopenharmony_ci pagefault_disable(); 311162306a36Sopenharmony_ci r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 311262306a36Sopenharmony_ci pagefault_enable(); 311362306a36Sopenharmony_ci if (r) 311462306a36Sopenharmony_ci return -EFAULT; 311562306a36Sopenharmony_ci return 0; 311662306a36Sopenharmony_ci} 311762306a36Sopenharmony_ci 311862306a36Sopenharmony_ciint kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, 311962306a36Sopenharmony_ci void *data, unsigned long len) 312062306a36Sopenharmony_ci{ 312162306a36Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 312262306a36Sopenharmony_ci struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 312362306a36Sopenharmony_ci int offset = offset_in_page(gpa); 312462306a36Sopenharmony_ci 312562306a36Sopenharmony_ci return __kvm_read_guest_atomic(slot, gfn, data, offset, len); 312662306a36Sopenharmony_ci} 312762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); 312862306a36Sopenharmony_ci 312962306a36Sopenharmony_cistatic int __kvm_write_guest_page(struct kvm *kvm, 313062306a36Sopenharmony_ci struct kvm_memory_slot *memslot, gfn_t gfn, 313162306a36Sopenharmony_ci const void *data, int offset, int len) 313262306a36Sopenharmony_ci{ 313362306a36Sopenharmony_ci int r; 313462306a36Sopenharmony_ci unsigned long addr; 313562306a36Sopenharmony_ci 313662306a36Sopenharmony_ci addr = gfn_to_hva_memslot(memslot, gfn); 313762306a36Sopenharmony_ci if (kvm_is_error_hva(addr)) 313862306a36Sopenharmony_ci return -EFAULT; 313962306a36Sopenharmony_ci r = __copy_to_user((void __user *)addr + offset, data, len); 314062306a36Sopenharmony_ci if (r) 314162306a36Sopenharmony_ci return -EFAULT; 314262306a36Sopenharmony_ci mark_page_dirty_in_slot(kvm, memslot, gfn); 314362306a36Sopenharmony_ci return 0; 314462306a36Sopenharmony_ci} 314562306a36Sopenharmony_ci 314662306a36Sopenharmony_ciint kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, 314762306a36Sopenharmony_ci const void *data, int offset, int len) 314862306a36Sopenharmony_ci{ 314962306a36Sopenharmony_ci struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 315062306a36Sopenharmony_ci 315162306a36Sopenharmony_ci return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); 315262306a36Sopenharmony_ci} 315362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_guest_page); 315462306a36Sopenharmony_ci 315562306a36Sopenharmony_ciint kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, 315662306a36Sopenharmony_ci const void *data, int offset, int len) 315762306a36Sopenharmony_ci{ 315862306a36Sopenharmony_ci struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 315962306a36Sopenharmony_ci 316062306a36Sopenharmony_ci return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); 316162306a36Sopenharmony_ci} 316262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); 316362306a36Sopenharmony_ci 316462306a36Sopenharmony_ciint kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 316562306a36Sopenharmony_ci unsigned long len) 316662306a36Sopenharmony_ci{ 316762306a36Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 316862306a36Sopenharmony_ci int seg; 316962306a36Sopenharmony_ci int offset = offset_in_page(gpa); 317062306a36Sopenharmony_ci int ret; 317162306a36Sopenharmony_ci 317262306a36Sopenharmony_ci while ((seg = next_segment(len, offset)) != 0) { 317362306a36Sopenharmony_ci ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 317462306a36Sopenharmony_ci if (ret < 0) 317562306a36Sopenharmony_ci return ret; 317662306a36Sopenharmony_ci offset = 0; 317762306a36Sopenharmony_ci len -= seg; 317862306a36Sopenharmony_ci data += seg; 317962306a36Sopenharmony_ci ++gfn; 318062306a36Sopenharmony_ci } 318162306a36Sopenharmony_ci return 0; 318262306a36Sopenharmony_ci} 318362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_guest); 318462306a36Sopenharmony_ci 318562306a36Sopenharmony_ciint kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, 318662306a36Sopenharmony_ci unsigned long len) 318762306a36Sopenharmony_ci{ 318862306a36Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 318962306a36Sopenharmony_ci int seg; 319062306a36Sopenharmony_ci int offset = offset_in_page(gpa); 319162306a36Sopenharmony_ci int ret; 319262306a36Sopenharmony_ci 319362306a36Sopenharmony_ci while ((seg = next_segment(len, offset)) != 0) { 319462306a36Sopenharmony_ci ret = kvm_vcpu_write_guest_page(vcpu, gfn, data, offset, seg); 319562306a36Sopenharmony_ci if (ret < 0) 319662306a36Sopenharmony_ci return ret; 319762306a36Sopenharmony_ci offset = 0; 319862306a36Sopenharmony_ci len -= seg; 319962306a36Sopenharmony_ci data += seg; 320062306a36Sopenharmony_ci ++gfn; 320162306a36Sopenharmony_ci } 320262306a36Sopenharmony_ci return 0; 320362306a36Sopenharmony_ci} 320462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); 320562306a36Sopenharmony_ci 320662306a36Sopenharmony_cistatic int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, 320762306a36Sopenharmony_ci struct gfn_to_hva_cache *ghc, 320862306a36Sopenharmony_ci gpa_t gpa, unsigned long len) 320962306a36Sopenharmony_ci{ 321062306a36Sopenharmony_ci int offset = offset_in_page(gpa); 321162306a36Sopenharmony_ci gfn_t start_gfn = gpa >> PAGE_SHIFT; 321262306a36Sopenharmony_ci gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; 321362306a36Sopenharmony_ci gfn_t nr_pages_needed = end_gfn - start_gfn + 1; 321462306a36Sopenharmony_ci gfn_t nr_pages_avail; 321562306a36Sopenharmony_ci 321662306a36Sopenharmony_ci /* Update ghc->generation before performing any error checks. */ 321762306a36Sopenharmony_ci ghc->generation = slots->generation; 321862306a36Sopenharmony_ci 321962306a36Sopenharmony_ci if (start_gfn > end_gfn) { 322062306a36Sopenharmony_ci ghc->hva = KVM_HVA_ERR_BAD; 322162306a36Sopenharmony_ci return -EINVAL; 322262306a36Sopenharmony_ci } 322362306a36Sopenharmony_ci 322462306a36Sopenharmony_ci /* 322562306a36Sopenharmony_ci * If the requested region crosses two memslots, we still 322662306a36Sopenharmony_ci * verify that the entire region is valid here. 322762306a36Sopenharmony_ci */ 322862306a36Sopenharmony_ci for ( ; start_gfn <= end_gfn; start_gfn += nr_pages_avail) { 322962306a36Sopenharmony_ci ghc->memslot = __gfn_to_memslot(slots, start_gfn); 323062306a36Sopenharmony_ci ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, 323162306a36Sopenharmony_ci &nr_pages_avail); 323262306a36Sopenharmony_ci if (kvm_is_error_hva(ghc->hva)) 323362306a36Sopenharmony_ci return -EFAULT; 323462306a36Sopenharmony_ci } 323562306a36Sopenharmony_ci 323662306a36Sopenharmony_ci /* Use the slow path for cross page reads and writes. */ 323762306a36Sopenharmony_ci if (nr_pages_needed == 1) 323862306a36Sopenharmony_ci ghc->hva += offset; 323962306a36Sopenharmony_ci else 324062306a36Sopenharmony_ci ghc->memslot = NULL; 324162306a36Sopenharmony_ci 324262306a36Sopenharmony_ci ghc->gpa = gpa; 324362306a36Sopenharmony_ci ghc->len = len; 324462306a36Sopenharmony_ci return 0; 324562306a36Sopenharmony_ci} 324662306a36Sopenharmony_ci 324762306a36Sopenharmony_ciint kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 324862306a36Sopenharmony_ci gpa_t gpa, unsigned long len) 324962306a36Sopenharmony_ci{ 325062306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_memslots(kvm); 325162306a36Sopenharmony_ci return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); 325262306a36Sopenharmony_ci} 325362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ciint kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 325662306a36Sopenharmony_ci void *data, unsigned int offset, 325762306a36Sopenharmony_ci unsigned long len) 325862306a36Sopenharmony_ci{ 325962306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_memslots(kvm); 326062306a36Sopenharmony_ci int r; 326162306a36Sopenharmony_ci gpa_t gpa = ghc->gpa + offset; 326262306a36Sopenharmony_ci 326362306a36Sopenharmony_ci if (WARN_ON_ONCE(len + offset > ghc->len)) 326462306a36Sopenharmony_ci return -EINVAL; 326562306a36Sopenharmony_ci 326662306a36Sopenharmony_ci if (slots->generation != ghc->generation) { 326762306a36Sopenharmony_ci if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) 326862306a36Sopenharmony_ci return -EFAULT; 326962306a36Sopenharmony_ci } 327062306a36Sopenharmony_ci 327162306a36Sopenharmony_ci if (kvm_is_error_hva(ghc->hva)) 327262306a36Sopenharmony_ci return -EFAULT; 327362306a36Sopenharmony_ci 327462306a36Sopenharmony_ci if (unlikely(!ghc->memslot)) 327562306a36Sopenharmony_ci return kvm_write_guest(kvm, gpa, data, len); 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ci r = __copy_to_user((void __user *)ghc->hva + offset, data, len); 327862306a36Sopenharmony_ci if (r) 327962306a36Sopenharmony_ci return -EFAULT; 328062306a36Sopenharmony_ci mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); 328162306a36Sopenharmony_ci 328262306a36Sopenharmony_ci return 0; 328362306a36Sopenharmony_ci} 328462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); 328562306a36Sopenharmony_ci 328662306a36Sopenharmony_ciint kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 328762306a36Sopenharmony_ci void *data, unsigned long len) 328862306a36Sopenharmony_ci{ 328962306a36Sopenharmony_ci return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); 329062306a36Sopenharmony_ci} 329162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_write_guest_cached); 329262306a36Sopenharmony_ci 329362306a36Sopenharmony_ciint kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 329462306a36Sopenharmony_ci void *data, unsigned int offset, 329562306a36Sopenharmony_ci unsigned long len) 329662306a36Sopenharmony_ci{ 329762306a36Sopenharmony_ci struct kvm_memslots *slots = kvm_memslots(kvm); 329862306a36Sopenharmony_ci int r; 329962306a36Sopenharmony_ci gpa_t gpa = ghc->gpa + offset; 330062306a36Sopenharmony_ci 330162306a36Sopenharmony_ci if (WARN_ON_ONCE(len + offset > ghc->len)) 330262306a36Sopenharmony_ci return -EINVAL; 330362306a36Sopenharmony_ci 330462306a36Sopenharmony_ci if (slots->generation != ghc->generation) { 330562306a36Sopenharmony_ci if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) 330662306a36Sopenharmony_ci return -EFAULT; 330762306a36Sopenharmony_ci } 330862306a36Sopenharmony_ci 330962306a36Sopenharmony_ci if (kvm_is_error_hva(ghc->hva)) 331062306a36Sopenharmony_ci return -EFAULT; 331162306a36Sopenharmony_ci 331262306a36Sopenharmony_ci if (unlikely(!ghc->memslot)) 331362306a36Sopenharmony_ci return kvm_read_guest(kvm, gpa, data, len); 331462306a36Sopenharmony_ci 331562306a36Sopenharmony_ci r = __copy_from_user(data, (void __user *)ghc->hva + offset, len); 331662306a36Sopenharmony_ci if (r) 331762306a36Sopenharmony_ci return -EFAULT; 331862306a36Sopenharmony_ci 331962306a36Sopenharmony_ci return 0; 332062306a36Sopenharmony_ci} 332162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached); 332262306a36Sopenharmony_ci 332362306a36Sopenharmony_ciint kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 332462306a36Sopenharmony_ci void *data, unsigned long len) 332562306a36Sopenharmony_ci{ 332662306a36Sopenharmony_ci return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len); 332762306a36Sopenharmony_ci} 332862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_read_guest_cached); 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ciint kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 333162306a36Sopenharmony_ci{ 333262306a36Sopenharmony_ci const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); 333362306a36Sopenharmony_ci gfn_t gfn = gpa >> PAGE_SHIFT; 333462306a36Sopenharmony_ci int seg; 333562306a36Sopenharmony_ci int offset = offset_in_page(gpa); 333662306a36Sopenharmony_ci int ret; 333762306a36Sopenharmony_ci 333862306a36Sopenharmony_ci while ((seg = next_segment(len, offset)) != 0) { 333962306a36Sopenharmony_ci ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len); 334062306a36Sopenharmony_ci if (ret < 0) 334162306a36Sopenharmony_ci return ret; 334262306a36Sopenharmony_ci offset = 0; 334362306a36Sopenharmony_ci len -= seg; 334462306a36Sopenharmony_ci ++gfn; 334562306a36Sopenharmony_ci } 334662306a36Sopenharmony_ci return 0; 334762306a36Sopenharmony_ci} 334862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_clear_guest); 334962306a36Sopenharmony_ci 335062306a36Sopenharmony_civoid mark_page_dirty_in_slot(struct kvm *kvm, 335162306a36Sopenharmony_ci const struct kvm_memory_slot *memslot, 335262306a36Sopenharmony_ci gfn_t gfn) 335362306a36Sopenharmony_ci{ 335462306a36Sopenharmony_ci struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 335562306a36Sopenharmony_ci 335662306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_DIRTY_RING 335762306a36Sopenharmony_ci if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) 335862306a36Sopenharmony_ci return; 335962306a36Sopenharmony_ci 336062306a36Sopenharmony_ci WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm)); 336162306a36Sopenharmony_ci#endif 336262306a36Sopenharmony_ci 336362306a36Sopenharmony_ci if (memslot && kvm_slot_dirty_track_enabled(memslot)) { 336462306a36Sopenharmony_ci unsigned long rel_gfn = gfn - memslot->base_gfn; 336562306a36Sopenharmony_ci u32 slot = (memslot->as_id << 16) | memslot->id; 336662306a36Sopenharmony_ci 336762306a36Sopenharmony_ci if (kvm->dirty_ring_size && vcpu) 336862306a36Sopenharmony_ci kvm_dirty_ring_push(vcpu, slot, rel_gfn); 336962306a36Sopenharmony_ci else if (memslot->dirty_bitmap) 337062306a36Sopenharmony_ci set_bit_le(rel_gfn, memslot->dirty_bitmap); 337162306a36Sopenharmony_ci } 337262306a36Sopenharmony_ci} 337362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mark_page_dirty_in_slot); 337462306a36Sopenharmony_ci 337562306a36Sopenharmony_civoid mark_page_dirty(struct kvm *kvm, gfn_t gfn) 337662306a36Sopenharmony_ci{ 337762306a36Sopenharmony_ci struct kvm_memory_slot *memslot; 337862306a36Sopenharmony_ci 337962306a36Sopenharmony_ci memslot = gfn_to_memslot(kvm, gfn); 338062306a36Sopenharmony_ci mark_page_dirty_in_slot(kvm, memslot, gfn); 338162306a36Sopenharmony_ci} 338262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mark_page_dirty); 338362306a36Sopenharmony_ci 338462306a36Sopenharmony_civoid kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) 338562306a36Sopenharmony_ci{ 338662306a36Sopenharmony_ci struct kvm_memory_slot *memslot; 338762306a36Sopenharmony_ci 338862306a36Sopenharmony_ci memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 338962306a36Sopenharmony_ci mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); 339062306a36Sopenharmony_ci} 339162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); 339262306a36Sopenharmony_ci 339362306a36Sopenharmony_civoid kvm_sigset_activate(struct kvm_vcpu *vcpu) 339462306a36Sopenharmony_ci{ 339562306a36Sopenharmony_ci if (!vcpu->sigset_active) 339662306a36Sopenharmony_ci return; 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci /* 339962306a36Sopenharmony_ci * This does a lockless modification of ->real_blocked, which is fine 340062306a36Sopenharmony_ci * because, only current can change ->real_blocked and all readers of 340162306a36Sopenharmony_ci * ->real_blocked don't care as long ->real_blocked is always a subset 340262306a36Sopenharmony_ci * of ->blocked. 340362306a36Sopenharmony_ci */ 340462306a36Sopenharmony_ci sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); 340562306a36Sopenharmony_ci} 340662306a36Sopenharmony_ci 340762306a36Sopenharmony_civoid kvm_sigset_deactivate(struct kvm_vcpu *vcpu) 340862306a36Sopenharmony_ci{ 340962306a36Sopenharmony_ci if (!vcpu->sigset_active) 341062306a36Sopenharmony_ci return; 341162306a36Sopenharmony_ci 341262306a36Sopenharmony_ci sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); 341362306a36Sopenharmony_ci sigemptyset(¤t->real_blocked); 341462306a36Sopenharmony_ci} 341562306a36Sopenharmony_ci 341662306a36Sopenharmony_cistatic void grow_halt_poll_ns(struct kvm_vcpu *vcpu) 341762306a36Sopenharmony_ci{ 341862306a36Sopenharmony_ci unsigned int old, val, grow, grow_start; 341962306a36Sopenharmony_ci 342062306a36Sopenharmony_ci old = val = vcpu->halt_poll_ns; 342162306a36Sopenharmony_ci grow_start = READ_ONCE(halt_poll_ns_grow_start); 342262306a36Sopenharmony_ci grow = READ_ONCE(halt_poll_ns_grow); 342362306a36Sopenharmony_ci if (!grow) 342462306a36Sopenharmony_ci goto out; 342562306a36Sopenharmony_ci 342662306a36Sopenharmony_ci val *= grow; 342762306a36Sopenharmony_ci if (val < grow_start) 342862306a36Sopenharmony_ci val = grow_start; 342962306a36Sopenharmony_ci 343062306a36Sopenharmony_ci vcpu->halt_poll_ns = val; 343162306a36Sopenharmony_ciout: 343262306a36Sopenharmony_ci trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); 343362306a36Sopenharmony_ci} 343462306a36Sopenharmony_ci 343562306a36Sopenharmony_cistatic void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) 343662306a36Sopenharmony_ci{ 343762306a36Sopenharmony_ci unsigned int old, val, shrink, grow_start; 343862306a36Sopenharmony_ci 343962306a36Sopenharmony_ci old = val = vcpu->halt_poll_ns; 344062306a36Sopenharmony_ci shrink = READ_ONCE(halt_poll_ns_shrink); 344162306a36Sopenharmony_ci grow_start = READ_ONCE(halt_poll_ns_grow_start); 344262306a36Sopenharmony_ci if (shrink == 0) 344362306a36Sopenharmony_ci val = 0; 344462306a36Sopenharmony_ci else 344562306a36Sopenharmony_ci val /= shrink; 344662306a36Sopenharmony_ci 344762306a36Sopenharmony_ci if (val < grow_start) 344862306a36Sopenharmony_ci val = 0; 344962306a36Sopenharmony_ci 345062306a36Sopenharmony_ci vcpu->halt_poll_ns = val; 345162306a36Sopenharmony_ci trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); 345262306a36Sopenharmony_ci} 345362306a36Sopenharmony_ci 345462306a36Sopenharmony_cistatic int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) 345562306a36Sopenharmony_ci{ 345662306a36Sopenharmony_ci int ret = -EINTR; 345762306a36Sopenharmony_ci int idx = srcu_read_lock(&vcpu->kvm->srcu); 345862306a36Sopenharmony_ci 345962306a36Sopenharmony_ci if (kvm_arch_vcpu_runnable(vcpu)) 346062306a36Sopenharmony_ci goto out; 346162306a36Sopenharmony_ci if (kvm_cpu_has_pending_timer(vcpu)) 346262306a36Sopenharmony_ci goto out; 346362306a36Sopenharmony_ci if (signal_pending(current)) 346462306a36Sopenharmony_ci goto out; 346562306a36Sopenharmony_ci if (kvm_check_request(KVM_REQ_UNBLOCK, vcpu)) 346662306a36Sopenharmony_ci goto out; 346762306a36Sopenharmony_ci 346862306a36Sopenharmony_ci ret = 0; 346962306a36Sopenharmony_ciout: 347062306a36Sopenharmony_ci srcu_read_unlock(&vcpu->kvm->srcu, idx); 347162306a36Sopenharmony_ci return ret; 347262306a36Sopenharmony_ci} 347362306a36Sopenharmony_ci 347462306a36Sopenharmony_ci/* 347562306a36Sopenharmony_ci * Block the vCPU until the vCPU is runnable, an event arrives, or a signal is 347662306a36Sopenharmony_ci * pending. This is mostly used when halting a vCPU, but may also be used 347762306a36Sopenharmony_ci * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI. 347862306a36Sopenharmony_ci */ 347962306a36Sopenharmony_cibool kvm_vcpu_block(struct kvm_vcpu *vcpu) 348062306a36Sopenharmony_ci{ 348162306a36Sopenharmony_ci struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 348262306a36Sopenharmony_ci bool waited = false; 348362306a36Sopenharmony_ci 348462306a36Sopenharmony_ci vcpu->stat.generic.blocking = 1; 348562306a36Sopenharmony_ci 348662306a36Sopenharmony_ci preempt_disable(); 348762306a36Sopenharmony_ci kvm_arch_vcpu_blocking(vcpu); 348862306a36Sopenharmony_ci prepare_to_rcuwait(wait); 348962306a36Sopenharmony_ci preempt_enable(); 349062306a36Sopenharmony_ci 349162306a36Sopenharmony_ci for (;;) { 349262306a36Sopenharmony_ci set_current_state(TASK_INTERRUPTIBLE); 349362306a36Sopenharmony_ci 349462306a36Sopenharmony_ci if (kvm_vcpu_check_block(vcpu) < 0) 349562306a36Sopenharmony_ci break; 349662306a36Sopenharmony_ci 349762306a36Sopenharmony_ci waited = true; 349862306a36Sopenharmony_ci schedule(); 349962306a36Sopenharmony_ci } 350062306a36Sopenharmony_ci 350162306a36Sopenharmony_ci preempt_disable(); 350262306a36Sopenharmony_ci finish_rcuwait(wait); 350362306a36Sopenharmony_ci kvm_arch_vcpu_unblocking(vcpu); 350462306a36Sopenharmony_ci preempt_enable(); 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ci vcpu->stat.generic.blocking = 0; 350762306a36Sopenharmony_ci 350862306a36Sopenharmony_ci return waited; 350962306a36Sopenharmony_ci} 351062306a36Sopenharmony_ci 351162306a36Sopenharmony_cistatic inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, 351262306a36Sopenharmony_ci ktime_t end, bool success) 351362306a36Sopenharmony_ci{ 351462306a36Sopenharmony_ci struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic; 351562306a36Sopenharmony_ci u64 poll_ns = ktime_to_ns(ktime_sub(end, start)); 351662306a36Sopenharmony_ci 351762306a36Sopenharmony_ci ++vcpu->stat.generic.halt_attempted_poll; 351862306a36Sopenharmony_ci 351962306a36Sopenharmony_ci if (success) { 352062306a36Sopenharmony_ci ++vcpu->stat.generic.halt_successful_poll; 352162306a36Sopenharmony_ci 352262306a36Sopenharmony_ci if (!vcpu_valid_wakeup(vcpu)) 352362306a36Sopenharmony_ci ++vcpu->stat.generic.halt_poll_invalid; 352462306a36Sopenharmony_ci 352562306a36Sopenharmony_ci stats->halt_poll_success_ns += poll_ns; 352662306a36Sopenharmony_ci KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns); 352762306a36Sopenharmony_ci } else { 352862306a36Sopenharmony_ci stats->halt_poll_fail_ns += poll_ns; 352962306a36Sopenharmony_ci KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns); 353062306a36Sopenharmony_ci } 353162306a36Sopenharmony_ci} 353262306a36Sopenharmony_ci 353362306a36Sopenharmony_cistatic unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) 353462306a36Sopenharmony_ci{ 353562306a36Sopenharmony_ci struct kvm *kvm = vcpu->kvm; 353662306a36Sopenharmony_ci 353762306a36Sopenharmony_ci if (kvm->override_halt_poll_ns) { 353862306a36Sopenharmony_ci /* 353962306a36Sopenharmony_ci * Ensure kvm->max_halt_poll_ns is not read before 354062306a36Sopenharmony_ci * kvm->override_halt_poll_ns. 354162306a36Sopenharmony_ci * 354262306a36Sopenharmony_ci * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL. 354362306a36Sopenharmony_ci */ 354462306a36Sopenharmony_ci smp_rmb(); 354562306a36Sopenharmony_ci return READ_ONCE(kvm->max_halt_poll_ns); 354662306a36Sopenharmony_ci } 354762306a36Sopenharmony_ci 354862306a36Sopenharmony_ci return READ_ONCE(halt_poll_ns); 354962306a36Sopenharmony_ci} 355062306a36Sopenharmony_ci 355162306a36Sopenharmony_ci/* 355262306a36Sopenharmony_ci * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt 355362306a36Sopenharmony_ci * polling is enabled, busy wait for a short time before blocking to avoid the 355462306a36Sopenharmony_ci * expensive block+unblock sequence if a wake event arrives soon after the vCPU 355562306a36Sopenharmony_ci * is halted. 355662306a36Sopenharmony_ci */ 355762306a36Sopenharmony_civoid kvm_vcpu_halt(struct kvm_vcpu *vcpu) 355862306a36Sopenharmony_ci{ 355962306a36Sopenharmony_ci unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); 356062306a36Sopenharmony_ci bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); 356162306a36Sopenharmony_ci ktime_t start, cur, poll_end; 356262306a36Sopenharmony_ci bool waited = false; 356362306a36Sopenharmony_ci bool do_halt_poll; 356462306a36Sopenharmony_ci u64 halt_ns; 356562306a36Sopenharmony_ci 356662306a36Sopenharmony_ci if (vcpu->halt_poll_ns > max_halt_poll_ns) 356762306a36Sopenharmony_ci vcpu->halt_poll_ns = max_halt_poll_ns; 356862306a36Sopenharmony_ci 356962306a36Sopenharmony_ci do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; 357062306a36Sopenharmony_ci 357162306a36Sopenharmony_ci start = cur = poll_end = ktime_get(); 357262306a36Sopenharmony_ci if (do_halt_poll) { 357362306a36Sopenharmony_ci ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); 357462306a36Sopenharmony_ci 357562306a36Sopenharmony_ci do { 357662306a36Sopenharmony_ci if (kvm_vcpu_check_block(vcpu) < 0) 357762306a36Sopenharmony_ci goto out; 357862306a36Sopenharmony_ci cpu_relax(); 357962306a36Sopenharmony_ci poll_end = cur = ktime_get(); 358062306a36Sopenharmony_ci } while (kvm_vcpu_can_poll(cur, stop)); 358162306a36Sopenharmony_ci } 358262306a36Sopenharmony_ci 358362306a36Sopenharmony_ci waited = kvm_vcpu_block(vcpu); 358462306a36Sopenharmony_ci 358562306a36Sopenharmony_ci cur = ktime_get(); 358662306a36Sopenharmony_ci if (waited) { 358762306a36Sopenharmony_ci vcpu->stat.generic.halt_wait_ns += 358862306a36Sopenharmony_ci ktime_to_ns(cur) - ktime_to_ns(poll_end); 358962306a36Sopenharmony_ci KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist, 359062306a36Sopenharmony_ci ktime_to_ns(cur) - ktime_to_ns(poll_end)); 359162306a36Sopenharmony_ci } 359262306a36Sopenharmony_ciout: 359362306a36Sopenharmony_ci /* The total time the vCPU was "halted", including polling time. */ 359462306a36Sopenharmony_ci halt_ns = ktime_to_ns(cur) - ktime_to_ns(start); 359562306a36Sopenharmony_ci 359662306a36Sopenharmony_ci /* 359762306a36Sopenharmony_ci * Note, halt-polling is considered successful so long as the vCPU was 359862306a36Sopenharmony_ci * never actually scheduled out, i.e. even if the wake event arrived 359962306a36Sopenharmony_ci * after of the halt-polling loop itself, but before the full wait. 360062306a36Sopenharmony_ci */ 360162306a36Sopenharmony_ci if (do_halt_poll) 360262306a36Sopenharmony_ci update_halt_poll_stats(vcpu, start, poll_end, !waited); 360362306a36Sopenharmony_ci 360462306a36Sopenharmony_ci if (halt_poll_allowed) { 360562306a36Sopenharmony_ci /* Recompute the max halt poll time in case it changed. */ 360662306a36Sopenharmony_ci max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); 360762306a36Sopenharmony_ci 360862306a36Sopenharmony_ci if (!vcpu_valid_wakeup(vcpu)) { 360962306a36Sopenharmony_ci shrink_halt_poll_ns(vcpu); 361062306a36Sopenharmony_ci } else if (max_halt_poll_ns) { 361162306a36Sopenharmony_ci if (halt_ns <= vcpu->halt_poll_ns) 361262306a36Sopenharmony_ci ; 361362306a36Sopenharmony_ci /* we had a long block, shrink polling */ 361462306a36Sopenharmony_ci else if (vcpu->halt_poll_ns && 361562306a36Sopenharmony_ci halt_ns > max_halt_poll_ns) 361662306a36Sopenharmony_ci shrink_halt_poll_ns(vcpu); 361762306a36Sopenharmony_ci /* we had a short halt and our poll time is too small */ 361862306a36Sopenharmony_ci else if (vcpu->halt_poll_ns < max_halt_poll_ns && 361962306a36Sopenharmony_ci halt_ns < max_halt_poll_ns) 362062306a36Sopenharmony_ci grow_halt_poll_ns(vcpu); 362162306a36Sopenharmony_ci } else { 362262306a36Sopenharmony_ci vcpu->halt_poll_ns = 0; 362362306a36Sopenharmony_ci } 362462306a36Sopenharmony_ci } 362562306a36Sopenharmony_ci 362662306a36Sopenharmony_ci trace_kvm_vcpu_wakeup(halt_ns, waited, vcpu_valid_wakeup(vcpu)); 362762306a36Sopenharmony_ci} 362862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_halt); 362962306a36Sopenharmony_ci 363062306a36Sopenharmony_cibool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) 363162306a36Sopenharmony_ci{ 363262306a36Sopenharmony_ci if (__kvm_vcpu_wake_up(vcpu)) { 363362306a36Sopenharmony_ci WRITE_ONCE(vcpu->ready, true); 363462306a36Sopenharmony_ci ++vcpu->stat.generic.halt_wakeup; 363562306a36Sopenharmony_ci return true; 363662306a36Sopenharmony_ci } 363762306a36Sopenharmony_ci 363862306a36Sopenharmony_ci return false; 363962306a36Sopenharmony_ci} 364062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); 364162306a36Sopenharmony_ci 364262306a36Sopenharmony_ci#ifndef CONFIG_S390 364362306a36Sopenharmony_ci/* 364462306a36Sopenharmony_ci * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. 364562306a36Sopenharmony_ci */ 364662306a36Sopenharmony_civoid kvm_vcpu_kick(struct kvm_vcpu *vcpu) 364762306a36Sopenharmony_ci{ 364862306a36Sopenharmony_ci int me, cpu; 364962306a36Sopenharmony_ci 365062306a36Sopenharmony_ci if (kvm_vcpu_wake_up(vcpu)) 365162306a36Sopenharmony_ci return; 365262306a36Sopenharmony_ci 365362306a36Sopenharmony_ci me = get_cpu(); 365462306a36Sopenharmony_ci /* 365562306a36Sopenharmony_ci * The only state change done outside the vcpu mutex is IN_GUEST_MODE 365662306a36Sopenharmony_ci * to EXITING_GUEST_MODE. Therefore the moderately expensive "should 365762306a36Sopenharmony_ci * kick" check does not need atomic operations if kvm_vcpu_kick is used 365862306a36Sopenharmony_ci * within the vCPU thread itself. 365962306a36Sopenharmony_ci */ 366062306a36Sopenharmony_ci if (vcpu == __this_cpu_read(kvm_running_vcpu)) { 366162306a36Sopenharmony_ci if (vcpu->mode == IN_GUEST_MODE) 366262306a36Sopenharmony_ci WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); 366362306a36Sopenharmony_ci goto out; 366462306a36Sopenharmony_ci } 366562306a36Sopenharmony_ci 366662306a36Sopenharmony_ci /* 366762306a36Sopenharmony_ci * Note, the vCPU could get migrated to a different pCPU at any point 366862306a36Sopenharmony_ci * after kvm_arch_vcpu_should_kick(), which could result in sending an 366962306a36Sopenharmony_ci * IPI to the previous pCPU. But, that's ok because the purpose of the 367062306a36Sopenharmony_ci * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the 367162306a36Sopenharmony_ci * vCPU also requires it to leave IN_GUEST_MODE. 367262306a36Sopenharmony_ci */ 367362306a36Sopenharmony_ci if (kvm_arch_vcpu_should_kick(vcpu)) { 367462306a36Sopenharmony_ci cpu = READ_ONCE(vcpu->cpu); 367562306a36Sopenharmony_ci if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 367662306a36Sopenharmony_ci smp_send_reschedule(cpu); 367762306a36Sopenharmony_ci } 367862306a36Sopenharmony_ciout: 367962306a36Sopenharmony_ci put_cpu(); 368062306a36Sopenharmony_ci} 368162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_kick); 368262306a36Sopenharmony_ci#endif /* !CONFIG_S390 */ 368362306a36Sopenharmony_ci 368462306a36Sopenharmony_ciint kvm_vcpu_yield_to(struct kvm_vcpu *target) 368562306a36Sopenharmony_ci{ 368662306a36Sopenharmony_ci struct pid *pid; 368762306a36Sopenharmony_ci struct task_struct *task = NULL; 368862306a36Sopenharmony_ci int ret = 0; 368962306a36Sopenharmony_ci 369062306a36Sopenharmony_ci rcu_read_lock(); 369162306a36Sopenharmony_ci pid = rcu_dereference(target->pid); 369262306a36Sopenharmony_ci if (pid) 369362306a36Sopenharmony_ci task = get_pid_task(pid, PIDTYPE_PID); 369462306a36Sopenharmony_ci rcu_read_unlock(); 369562306a36Sopenharmony_ci if (!task) 369662306a36Sopenharmony_ci return ret; 369762306a36Sopenharmony_ci ret = yield_to(task, 1); 369862306a36Sopenharmony_ci put_task_struct(task); 369962306a36Sopenharmony_ci 370062306a36Sopenharmony_ci return ret; 370162306a36Sopenharmony_ci} 370262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); 370362306a36Sopenharmony_ci 370462306a36Sopenharmony_ci/* 370562306a36Sopenharmony_ci * Helper that checks whether a VCPU is eligible for directed yield. 370662306a36Sopenharmony_ci * Most eligible candidate to yield is decided by following heuristics: 370762306a36Sopenharmony_ci * 370862306a36Sopenharmony_ci * (a) VCPU which has not done pl-exit or cpu relax intercepted recently 370962306a36Sopenharmony_ci * (preempted lock holder), indicated by @in_spin_loop. 371062306a36Sopenharmony_ci * Set at the beginning and cleared at the end of interception/PLE handler. 371162306a36Sopenharmony_ci * 371262306a36Sopenharmony_ci * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get 371362306a36Sopenharmony_ci * chance last time (mostly it has become eligible now since we have probably 371462306a36Sopenharmony_ci * yielded to lockholder in last iteration. This is done by toggling 371562306a36Sopenharmony_ci * @dy_eligible each time a VCPU checked for eligibility.) 371662306a36Sopenharmony_ci * 371762306a36Sopenharmony_ci * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding 371862306a36Sopenharmony_ci * to preempted lock-holder could result in wrong VCPU selection and CPU 371962306a36Sopenharmony_ci * burning. Giving priority for a potential lock-holder increases lock 372062306a36Sopenharmony_ci * progress. 372162306a36Sopenharmony_ci * 372262306a36Sopenharmony_ci * Since algorithm is based on heuristics, accessing another VCPU data without 372362306a36Sopenharmony_ci * locking does not harm. It may result in trying to yield to same VCPU, fail 372462306a36Sopenharmony_ci * and continue with next VCPU and so on. 372562306a36Sopenharmony_ci */ 372662306a36Sopenharmony_cistatic bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) 372762306a36Sopenharmony_ci{ 372862306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 372962306a36Sopenharmony_ci bool eligible; 373062306a36Sopenharmony_ci 373162306a36Sopenharmony_ci eligible = !vcpu->spin_loop.in_spin_loop || 373262306a36Sopenharmony_ci vcpu->spin_loop.dy_eligible; 373362306a36Sopenharmony_ci 373462306a36Sopenharmony_ci if (vcpu->spin_loop.in_spin_loop) 373562306a36Sopenharmony_ci kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); 373662306a36Sopenharmony_ci 373762306a36Sopenharmony_ci return eligible; 373862306a36Sopenharmony_ci#else 373962306a36Sopenharmony_ci return true; 374062306a36Sopenharmony_ci#endif 374162306a36Sopenharmony_ci} 374262306a36Sopenharmony_ci 374362306a36Sopenharmony_ci/* 374462306a36Sopenharmony_ci * Unlike kvm_arch_vcpu_runnable, this function is called outside 374562306a36Sopenharmony_ci * a vcpu_load/vcpu_put pair. However, for most architectures 374662306a36Sopenharmony_ci * kvm_arch_vcpu_runnable does not require vcpu_load. 374762306a36Sopenharmony_ci */ 374862306a36Sopenharmony_cibool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) 374962306a36Sopenharmony_ci{ 375062306a36Sopenharmony_ci return kvm_arch_vcpu_runnable(vcpu); 375162306a36Sopenharmony_ci} 375262306a36Sopenharmony_ci 375362306a36Sopenharmony_cistatic bool vcpu_dy_runnable(struct kvm_vcpu *vcpu) 375462306a36Sopenharmony_ci{ 375562306a36Sopenharmony_ci if (kvm_arch_dy_runnable(vcpu)) 375662306a36Sopenharmony_ci return true; 375762306a36Sopenharmony_ci 375862306a36Sopenharmony_ci#ifdef CONFIG_KVM_ASYNC_PF 375962306a36Sopenharmony_ci if (!list_empty_careful(&vcpu->async_pf.done)) 376062306a36Sopenharmony_ci return true; 376162306a36Sopenharmony_ci#endif 376262306a36Sopenharmony_ci 376362306a36Sopenharmony_ci return false; 376462306a36Sopenharmony_ci} 376562306a36Sopenharmony_ci 376662306a36Sopenharmony_cibool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) 376762306a36Sopenharmony_ci{ 376862306a36Sopenharmony_ci return false; 376962306a36Sopenharmony_ci} 377062306a36Sopenharmony_ci 377162306a36Sopenharmony_civoid kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) 377262306a36Sopenharmony_ci{ 377362306a36Sopenharmony_ci struct kvm *kvm = me->kvm; 377462306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 377562306a36Sopenharmony_ci int last_boosted_vcpu = me->kvm->last_boosted_vcpu; 377662306a36Sopenharmony_ci unsigned long i; 377762306a36Sopenharmony_ci int yielded = 0; 377862306a36Sopenharmony_ci int try = 3; 377962306a36Sopenharmony_ci int pass; 378062306a36Sopenharmony_ci 378162306a36Sopenharmony_ci kvm_vcpu_set_in_spin_loop(me, true); 378262306a36Sopenharmony_ci /* 378362306a36Sopenharmony_ci * We boost the priority of a VCPU that is runnable but not 378462306a36Sopenharmony_ci * currently running, because it got preempted by something 378562306a36Sopenharmony_ci * else and called schedule in __vcpu_run. Hopefully that 378662306a36Sopenharmony_ci * VCPU is holding the lock that we need and will release it. 378762306a36Sopenharmony_ci * We approximate round-robin by starting at the last boosted VCPU. 378862306a36Sopenharmony_ci */ 378962306a36Sopenharmony_ci for (pass = 0; pass < 2 && !yielded && try; pass++) { 379062306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) { 379162306a36Sopenharmony_ci if (!pass && i <= last_boosted_vcpu) { 379262306a36Sopenharmony_ci i = last_boosted_vcpu; 379362306a36Sopenharmony_ci continue; 379462306a36Sopenharmony_ci } else if (pass && i > last_boosted_vcpu) 379562306a36Sopenharmony_ci break; 379662306a36Sopenharmony_ci if (!READ_ONCE(vcpu->ready)) 379762306a36Sopenharmony_ci continue; 379862306a36Sopenharmony_ci if (vcpu == me) 379962306a36Sopenharmony_ci continue; 380062306a36Sopenharmony_ci if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu)) 380162306a36Sopenharmony_ci continue; 380262306a36Sopenharmony_ci if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && 380362306a36Sopenharmony_ci !kvm_arch_dy_has_pending_interrupt(vcpu) && 380462306a36Sopenharmony_ci !kvm_arch_vcpu_in_kernel(vcpu)) 380562306a36Sopenharmony_ci continue; 380662306a36Sopenharmony_ci if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) 380762306a36Sopenharmony_ci continue; 380862306a36Sopenharmony_ci 380962306a36Sopenharmony_ci yielded = kvm_vcpu_yield_to(vcpu); 381062306a36Sopenharmony_ci if (yielded > 0) { 381162306a36Sopenharmony_ci kvm->last_boosted_vcpu = i; 381262306a36Sopenharmony_ci break; 381362306a36Sopenharmony_ci } else if (yielded < 0) { 381462306a36Sopenharmony_ci try--; 381562306a36Sopenharmony_ci if (!try) 381662306a36Sopenharmony_ci break; 381762306a36Sopenharmony_ci } 381862306a36Sopenharmony_ci } 381962306a36Sopenharmony_ci } 382062306a36Sopenharmony_ci kvm_vcpu_set_in_spin_loop(me, false); 382162306a36Sopenharmony_ci 382262306a36Sopenharmony_ci /* Ensure vcpu is not eligible during next spinloop */ 382362306a36Sopenharmony_ci kvm_vcpu_set_dy_eligible(me, false); 382462306a36Sopenharmony_ci} 382562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); 382662306a36Sopenharmony_ci 382762306a36Sopenharmony_cistatic bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff) 382862306a36Sopenharmony_ci{ 382962306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_DIRTY_RING 383062306a36Sopenharmony_ci return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) && 383162306a36Sopenharmony_ci (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET + 383262306a36Sopenharmony_ci kvm->dirty_ring_size / PAGE_SIZE); 383362306a36Sopenharmony_ci#else 383462306a36Sopenharmony_ci return false; 383562306a36Sopenharmony_ci#endif 383662306a36Sopenharmony_ci} 383762306a36Sopenharmony_ci 383862306a36Sopenharmony_cistatic vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) 383962306a36Sopenharmony_ci{ 384062306a36Sopenharmony_ci struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; 384162306a36Sopenharmony_ci struct page *page; 384262306a36Sopenharmony_ci 384362306a36Sopenharmony_ci if (vmf->pgoff == 0) 384462306a36Sopenharmony_ci page = virt_to_page(vcpu->run); 384562306a36Sopenharmony_ci#ifdef CONFIG_X86 384662306a36Sopenharmony_ci else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 384762306a36Sopenharmony_ci page = virt_to_page(vcpu->arch.pio_data); 384862306a36Sopenharmony_ci#endif 384962306a36Sopenharmony_ci#ifdef CONFIG_KVM_MMIO 385062306a36Sopenharmony_ci else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 385162306a36Sopenharmony_ci page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 385262306a36Sopenharmony_ci#endif 385362306a36Sopenharmony_ci else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) 385462306a36Sopenharmony_ci page = kvm_dirty_ring_get_page( 385562306a36Sopenharmony_ci &vcpu->dirty_ring, 385662306a36Sopenharmony_ci vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); 385762306a36Sopenharmony_ci else 385862306a36Sopenharmony_ci return kvm_arch_vcpu_fault(vcpu, vmf); 385962306a36Sopenharmony_ci get_page(page); 386062306a36Sopenharmony_ci vmf->page = page; 386162306a36Sopenharmony_ci return 0; 386262306a36Sopenharmony_ci} 386362306a36Sopenharmony_ci 386462306a36Sopenharmony_cistatic const struct vm_operations_struct kvm_vcpu_vm_ops = { 386562306a36Sopenharmony_ci .fault = kvm_vcpu_fault, 386662306a36Sopenharmony_ci}; 386762306a36Sopenharmony_ci 386862306a36Sopenharmony_cistatic int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 386962306a36Sopenharmony_ci{ 387062306a36Sopenharmony_ci struct kvm_vcpu *vcpu = file->private_data; 387162306a36Sopenharmony_ci unsigned long pages = vma_pages(vma); 387262306a36Sopenharmony_ci 387362306a36Sopenharmony_ci if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || 387462306a36Sopenharmony_ci kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && 387562306a36Sopenharmony_ci ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) 387662306a36Sopenharmony_ci return -EINVAL; 387762306a36Sopenharmony_ci 387862306a36Sopenharmony_ci vma->vm_ops = &kvm_vcpu_vm_ops; 387962306a36Sopenharmony_ci return 0; 388062306a36Sopenharmony_ci} 388162306a36Sopenharmony_ci 388262306a36Sopenharmony_cistatic int kvm_vcpu_release(struct inode *inode, struct file *filp) 388362306a36Sopenharmony_ci{ 388462306a36Sopenharmony_ci struct kvm_vcpu *vcpu = filp->private_data; 388562306a36Sopenharmony_ci 388662306a36Sopenharmony_ci kvm_put_kvm(vcpu->kvm); 388762306a36Sopenharmony_ci return 0; 388862306a36Sopenharmony_ci} 388962306a36Sopenharmony_ci 389062306a36Sopenharmony_cistatic const struct file_operations kvm_vcpu_fops = { 389162306a36Sopenharmony_ci .release = kvm_vcpu_release, 389262306a36Sopenharmony_ci .unlocked_ioctl = kvm_vcpu_ioctl, 389362306a36Sopenharmony_ci .mmap = kvm_vcpu_mmap, 389462306a36Sopenharmony_ci .llseek = noop_llseek, 389562306a36Sopenharmony_ci KVM_COMPAT(kvm_vcpu_compat_ioctl), 389662306a36Sopenharmony_ci}; 389762306a36Sopenharmony_ci 389862306a36Sopenharmony_ci/* 389962306a36Sopenharmony_ci * Allocates an inode for the vcpu. 390062306a36Sopenharmony_ci */ 390162306a36Sopenharmony_cistatic int create_vcpu_fd(struct kvm_vcpu *vcpu) 390262306a36Sopenharmony_ci{ 390362306a36Sopenharmony_ci char name[8 + 1 + ITOA_MAX_LEN + 1]; 390462306a36Sopenharmony_ci 390562306a36Sopenharmony_ci snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); 390662306a36Sopenharmony_ci return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); 390762306a36Sopenharmony_ci} 390862306a36Sopenharmony_ci 390962306a36Sopenharmony_ci#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS 391062306a36Sopenharmony_cistatic int vcpu_get_pid(void *data, u64 *val) 391162306a36Sopenharmony_ci{ 391262306a36Sopenharmony_ci struct kvm_vcpu *vcpu = data; 391362306a36Sopenharmony_ci 391462306a36Sopenharmony_ci rcu_read_lock(); 391562306a36Sopenharmony_ci *val = pid_nr(rcu_dereference(vcpu->pid)); 391662306a36Sopenharmony_ci rcu_read_unlock(); 391762306a36Sopenharmony_ci return 0; 391862306a36Sopenharmony_ci} 391962306a36Sopenharmony_ci 392062306a36Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(vcpu_get_pid_fops, vcpu_get_pid, NULL, "%llu\n"); 392162306a36Sopenharmony_ci 392262306a36Sopenharmony_cistatic void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 392362306a36Sopenharmony_ci{ 392462306a36Sopenharmony_ci struct dentry *debugfs_dentry; 392562306a36Sopenharmony_ci char dir_name[ITOA_MAX_LEN * 2]; 392662306a36Sopenharmony_ci 392762306a36Sopenharmony_ci if (!debugfs_initialized()) 392862306a36Sopenharmony_ci return; 392962306a36Sopenharmony_ci 393062306a36Sopenharmony_ci snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); 393162306a36Sopenharmony_ci debugfs_dentry = debugfs_create_dir(dir_name, 393262306a36Sopenharmony_ci vcpu->kvm->debugfs_dentry); 393362306a36Sopenharmony_ci debugfs_create_file("pid", 0444, debugfs_dentry, vcpu, 393462306a36Sopenharmony_ci &vcpu_get_pid_fops); 393562306a36Sopenharmony_ci 393662306a36Sopenharmony_ci kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry); 393762306a36Sopenharmony_ci} 393862306a36Sopenharmony_ci#endif 393962306a36Sopenharmony_ci 394062306a36Sopenharmony_ci/* 394162306a36Sopenharmony_ci * Creates some virtual cpus. Good luck creating more than one. 394262306a36Sopenharmony_ci */ 394362306a36Sopenharmony_cistatic int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) 394462306a36Sopenharmony_ci{ 394562306a36Sopenharmony_ci int r; 394662306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 394762306a36Sopenharmony_ci struct page *page; 394862306a36Sopenharmony_ci 394962306a36Sopenharmony_ci if (id >= KVM_MAX_VCPU_IDS) 395062306a36Sopenharmony_ci return -EINVAL; 395162306a36Sopenharmony_ci 395262306a36Sopenharmony_ci mutex_lock(&kvm->lock); 395362306a36Sopenharmony_ci if (kvm->created_vcpus >= kvm->max_vcpus) { 395462306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 395562306a36Sopenharmony_ci return -EINVAL; 395662306a36Sopenharmony_ci } 395762306a36Sopenharmony_ci 395862306a36Sopenharmony_ci r = kvm_arch_vcpu_precreate(kvm, id); 395962306a36Sopenharmony_ci if (r) { 396062306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 396162306a36Sopenharmony_ci return r; 396262306a36Sopenharmony_ci } 396362306a36Sopenharmony_ci 396462306a36Sopenharmony_ci kvm->created_vcpus++; 396562306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 396662306a36Sopenharmony_ci 396762306a36Sopenharmony_ci vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); 396862306a36Sopenharmony_ci if (!vcpu) { 396962306a36Sopenharmony_ci r = -ENOMEM; 397062306a36Sopenharmony_ci goto vcpu_decrement; 397162306a36Sopenharmony_ci } 397262306a36Sopenharmony_ci 397362306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE); 397462306a36Sopenharmony_ci page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 397562306a36Sopenharmony_ci if (!page) { 397662306a36Sopenharmony_ci r = -ENOMEM; 397762306a36Sopenharmony_ci goto vcpu_free; 397862306a36Sopenharmony_ci } 397962306a36Sopenharmony_ci vcpu->run = page_address(page); 398062306a36Sopenharmony_ci 398162306a36Sopenharmony_ci kvm_vcpu_init(vcpu, kvm, id); 398262306a36Sopenharmony_ci 398362306a36Sopenharmony_ci r = kvm_arch_vcpu_create(vcpu); 398462306a36Sopenharmony_ci if (r) 398562306a36Sopenharmony_ci goto vcpu_free_run_page; 398662306a36Sopenharmony_ci 398762306a36Sopenharmony_ci if (kvm->dirty_ring_size) { 398862306a36Sopenharmony_ci r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, 398962306a36Sopenharmony_ci id, kvm->dirty_ring_size); 399062306a36Sopenharmony_ci if (r) 399162306a36Sopenharmony_ci goto arch_vcpu_destroy; 399262306a36Sopenharmony_ci } 399362306a36Sopenharmony_ci 399462306a36Sopenharmony_ci mutex_lock(&kvm->lock); 399562306a36Sopenharmony_ci 399662306a36Sopenharmony_ci#ifdef CONFIG_LOCKDEP 399762306a36Sopenharmony_ci /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ 399862306a36Sopenharmony_ci mutex_lock(&vcpu->mutex); 399962306a36Sopenharmony_ci mutex_unlock(&vcpu->mutex); 400062306a36Sopenharmony_ci#endif 400162306a36Sopenharmony_ci 400262306a36Sopenharmony_ci if (kvm_get_vcpu_by_id(kvm, id)) { 400362306a36Sopenharmony_ci r = -EEXIST; 400462306a36Sopenharmony_ci goto unlock_vcpu_destroy; 400562306a36Sopenharmony_ci } 400662306a36Sopenharmony_ci 400762306a36Sopenharmony_ci vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); 400862306a36Sopenharmony_ci r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT); 400962306a36Sopenharmony_ci if (r) 401062306a36Sopenharmony_ci goto unlock_vcpu_destroy; 401162306a36Sopenharmony_ci 401262306a36Sopenharmony_ci /* Now it's all set up, let userspace reach it */ 401362306a36Sopenharmony_ci kvm_get_kvm(kvm); 401462306a36Sopenharmony_ci r = create_vcpu_fd(vcpu); 401562306a36Sopenharmony_ci if (r < 0) 401662306a36Sopenharmony_ci goto kvm_put_xa_release; 401762306a36Sopenharmony_ci 401862306a36Sopenharmony_ci if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { 401962306a36Sopenharmony_ci r = -EINVAL; 402062306a36Sopenharmony_ci goto kvm_put_xa_release; 402162306a36Sopenharmony_ci } 402262306a36Sopenharmony_ci 402362306a36Sopenharmony_ci /* 402462306a36Sopenharmony_ci * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu 402562306a36Sopenharmony_ci * pointer before kvm->online_vcpu's incremented value. 402662306a36Sopenharmony_ci */ 402762306a36Sopenharmony_ci smp_wmb(); 402862306a36Sopenharmony_ci atomic_inc(&kvm->online_vcpus); 402962306a36Sopenharmony_ci 403062306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 403162306a36Sopenharmony_ci kvm_arch_vcpu_postcreate(vcpu); 403262306a36Sopenharmony_ci kvm_create_vcpu_debugfs(vcpu); 403362306a36Sopenharmony_ci return r; 403462306a36Sopenharmony_ci 403562306a36Sopenharmony_cikvm_put_xa_release: 403662306a36Sopenharmony_ci kvm_put_kvm_no_destroy(kvm); 403762306a36Sopenharmony_ci xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); 403862306a36Sopenharmony_ciunlock_vcpu_destroy: 403962306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 404062306a36Sopenharmony_ci kvm_dirty_ring_free(&vcpu->dirty_ring); 404162306a36Sopenharmony_ciarch_vcpu_destroy: 404262306a36Sopenharmony_ci kvm_arch_vcpu_destroy(vcpu); 404362306a36Sopenharmony_civcpu_free_run_page: 404462306a36Sopenharmony_ci free_page((unsigned long)vcpu->run); 404562306a36Sopenharmony_civcpu_free: 404662306a36Sopenharmony_ci kmem_cache_free(kvm_vcpu_cache, vcpu); 404762306a36Sopenharmony_civcpu_decrement: 404862306a36Sopenharmony_ci mutex_lock(&kvm->lock); 404962306a36Sopenharmony_ci kvm->created_vcpus--; 405062306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 405162306a36Sopenharmony_ci return r; 405262306a36Sopenharmony_ci} 405362306a36Sopenharmony_ci 405462306a36Sopenharmony_cistatic int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 405562306a36Sopenharmony_ci{ 405662306a36Sopenharmony_ci if (sigset) { 405762306a36Sopenharmony_ci sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 405862306a36Sopenharmony_ci vcpu->sigset_active = 1; 405962306a36Sopenharmony_ci vcpu->sigset = *sigset; 406062306a36Sopenharmony_ci } else 406162306a36Sopenharmony_ci vcpu->sigset_active = 0; 406262306a36Sopenharmony_ci return 0; 406362306a36Sopenharmony_ci} 406462306a36Sopenharmony_ci 406562306a36Sopenharmony_cistatic ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer, 406662306a36Sopenharmony_ci size_t size, loff_t *offset) 406762306a36Sopenharmony_ci{ 406862306a36Sopenharmony_ci struct kvm_vcpu *vcpu = file->private_data; 406962306a36Sopenharmony_ci 407062306a36Sopenharmony_ci return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header, 407162306a36Sopenharmony_ci &kvm_vcpu_stats_desc[0], &vcpu->stat, 407262306a36Sopenharmony_ci sizeof(vcpu->stat), user_buffer, size, offset); 407362306a36Sopenharmony_ci} 407462306a36Sopenharmony_ci 407562306a36Sopenharmony_cistatic int kvm_vcpu_stats_release(struct inode *inode, struct file *file) 407662306a36Sopenharmony_ci{ 407762306a36Sopenharmony_ci struct kvm_vcpu *vcpu = file->private_data; 407862306a36Sopenharmony_ci 407962306a36Sopenharmony_ci kvm_put_kvm(vcpu->kvm); 408062306a36Sopenharmony_ci return 0; 408162306a36Sopenharmony_ci} 408262306a36Sopenharmony_ci 408362306a36Sopenharmony_cistatic const struct file_operations kvm_vcpu_stats_fops = { 408462306a36Sopenharmony_ci .read = kvm_vcpu_stats_read, 408562306a36Sopenharmony_ci .release = kvm_vcpu_stats_release, 408662306a36Sopenharmony_ci .llseek = noop_llseek, 408762306a36Sopenharmony_ci}; 408862306a36Sopenharmony_ci 408962306a36Sopenharmony_cistatic int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu) 409062306a36Sopenharmony_ci{ 409162306a36Sopenharmony_ci int fd; 409262306a36Sopenharmony_ci struct file *file; 409362306a36Sopenharmony_ci char name[15 + ITOA_MAX_LEN + 1]; 409462306a36Sopenharmony_ci 409562306a36Sopenharmony_ci snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id); 409662306a36Sopenharmony_ci 409762306a36Sopenharmony_ci fd = get_unused_fd_flags(O_CLOEXEC); 409862306a36Sopenharmony_ci if (fd < 0) 409962306a36Sopenharmony_ci return fd; 410062306a36Sopenharmony_ci 410162306a36Sopenharmony_ci file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY); 410262306a36Sopenharmony_ci if (IS_ERR(file)) { 410362306a36Sopenharmony_ci put_unused_fd(fd); 410462306a36Sopenharmony_ci return PTR_ERR(file); 410562306a36Sopenharmony_ci } 410662306a36Sopenharmony_ci 410762306a36Sopenharmony_ci kvm_get_kvm(vcpu->kvm); 410862306a36Sopenharmony_ci 410962306a36Sopenharmony_ci file->f_mode |= FMODE_PREAD; 411062306a36Sopenharmony_ci fd_install(fd, file); 411162306a36Sopenharmony_ci 411262306a36Sopenharmony_ci return fd; 411362306a36Sopenharmony_ci} 411462306a36Sopenharmony_ci 411562306a36Sopenharmony_cistatic long kvm_vcpu_ioctl(struct file *filp, 411662306a36Sopenharmony_ci unsigned int ioctl, unsigned long arg) 411762306a36Sopenharmony_ci{ 411862306a36Sopenharmony_ci struct kvm_vcpu *vcpu = filp->private_data; 411962306a36Sopenharmony_ci void __user *argp = (void __user *)arg; 412062306a36Sopenharmony_ci int r; 412162306a36Sopenharmony_ci struct kvm_fpu *fpu = NULL; 412262306a36Sopenharmony_ci struct kvm_sregs *kvm_sregs = NULL; 412362306a36Sopenharmony_ci 412462306a36Sopenharmony_ci if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) 412562306a36Sopenharmony_ci return -EIO; 412662306a36Sopenharmony_ci 412762306a36Sopenharmony_ci if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) 412862306a36Sopenharmony_ci return -EINVAL; 412962306a36Sopenharmony_ci 413062306a36Sopenharmony_ci /* 413162306a36Sopenharmony_ci * Some architectures have vcpu ioctls that are asynchronous to vcpu 413262306a36Sopenharmony_ci * execution; mutex_lock() would break them. 413362306a36Sopenharmony_ci */ 413462306a36Sopenharmony_ci r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg); 413562306a36Sopenharmony_ci if (r != -ENOIOCTLCMD) 413662306a36Sopenharmony_ci return r; 413762306a36Sopenharmony_ci 413862306a36Sopenharmony_ci if (mutex_lock_killable(&vcpu->mutex)) 413962306a36Sopenharmony_ci return -EINTR; 414062306a36Sopenharmony_ci switch (ioctl) { 414162306a36Sopenharmony_ci case KVM_RUN: { 414262306a36Sopenharmony_ci struct pid *oldpid; 414362306a36Sopenharmony_ci r = -EINVAL; 414462306a36Sopenharmony_ci if (arg) 414562306a36Sopenharmony_ci goto out; 414662306a36Sopenharmony_ci oldpid = rcu_access_pointer(vcpu->pid); 414762306a36Sopenharmony_ci if (unlikely(oldpid != task_pid(current))) { 414862306a36Sopenharmony_ci /* The thread running this VCPU changed. */ 414962306a36Sopenharmony_ci struct pid *newpid; 415062306a36Sopenharmony_ci 415162306a36Sopenharmony_ci r = kvm_arch_vcpu_run_pid_change(vcpu); 415262306a36Sopenharmony_ci if (r) 415362306a36Sopenharmony_ci break; 415462306a36Sopenharmony_ci 415562306a36Sopenharmony_ci newpid = get_task_pid(current, PIDTYPE_PID); 415662306a36Sopenharmony_ci rcu_assign_pointer(vcpu->pid, newpid); 415762306a36Sopenharmony_ci if (oldpid) 415862306a36Sopenharmony_ci synchronize_rcu(); 415962306a36Sopenharmony_ci put_pid(oldpid); 416062306a36Sopenharmony_ci } 416162306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_run(vcpu); 416262306a36Sopenharmony_ci trace_kvm_userspace_exit(vcpu->run->exit_reason, r); 416362306a36Sopenharmony_ci break; 416462306a36Sopenharmony_ci } 416562306a36Sopenharmony_ci case KVM_GET_REGS: { 416662306a36Sopenharmony_ci struct kvm_regs *kvm_regs; 416762306a36Sopenharmony_ci 416862306a36Sopenharmony_ci r = -ENOMEM; 416962306a36Sopenharmony_ci kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT); 417062306a36Sopenharmony_ci if (!kvm_regs) 417162306a36Sopenharmony_ci goto out; 417262306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 417362306a36Sopenharmony_ci if (r) 417462306a36Sopenharmony_ci goto out_free1; 417562306a36Sopenharmony_ci r = -EFAULT; 417662306a36Sopenharmony_ci if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 417762306a36Sopenharmony_ci goto out_free1; 417862306a36Sopenharmony_ci r = 0; 417962306a36Sopenharmony_ciout_free1: 418062306a36Sopenharmony_ci kfree(kvm_regs); 418162306a36Sopenharmony_ci break; 418262306a36Sopenharmony_ci } 418362306a36Sopenharmony_ci case KVM_SET_REGS: { 418462306a36Sopenharmony_ci struct kvm_regs *kvm_regs; 418562306a36Sopenharmony_ci 418662306a36Sopenharmony_ci kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); 418762306a36Sopenharmony_ci if (IS_ERR(kvm_regs)) { 418862306a36Sopenharmony_ci r = PTR_ERR(kvm_regs); 418962306a36Sopenharmony_ci goto out; 419062306a36Sopenharmony_ci } 419162306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 419262306a36Sopenharmony_ci kfree(kvm_regs); 419362306a36Sopenharmony_ci break; 419462306a36Sopenharmony_ci } 419562306a36Sopenharmony_ci case KVM_GET_SREGS: { 419662306a36Sopenharmony_ci kvm_sregs = kzalloc(sizeof(struct kvm_sregs), 419762306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 419862306a36Sopenharmony_ci r = -ENOMEM; 419962306a36Sopenharmony_ci if (!kvm_sregs) 420062306a36Sopenharmony_ci goto out; 420162306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 420262306a36Sopenharmony_ci if (r) 420362306a36Sopenharmony_ci goto out; 420462306a36Sopenharmony_ci r = -EFAULT; 420562306a36Sopenharmony_ci if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 420662306a36Sopenharmony_ci goto out; 420762306a36Sopenharmony_ci r = 0; 420862306a36Sopenharmony_ci break; 420962306a36Sopenharmony_ci } 421062306a36Sopenharmony_ci case KVM_SET_SREGS: { 421162306a36Sopenharmony_ci kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); 421262306a36Sopenharmony_ci if (IS_ERR(kvm_sregs)) { 421362306a36Sopenharmony_ci r = PTR_ERR(kvm_sregs); 421462306a36Sopenharmony_ci kvm_sregs = NULL; 421562306a36Sopenharmony_ci goto out; 421662306a36Sopenharmony_ci } 421762306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 421862306a36Sopenharmony_ci break; 421962306a36Sopenharmony_ci } 422062306a36Sopenharmony_ci case KVM_GET_MP_STATE: { 422162306a36Sopenharmony_ci struct kvm_mp_state mp_state; 422262306a36Sopenharmony_ci 422362306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 422462306a36Sopenharmony_ci if (r) 422562306a36Sopenharmony_ci goto out; 422662306a36Sopenharmony_ci r = -EFAULT; 422762306a36Sopenharmony_ci if (copy_to_user(argp, &mp_state, sizeof(mp_state))) 422862306a36Sopenharmony_ci goto out; 422962306a36Sopenharmony_ci r = 0; 423062306a36Sopenharmony_ci break; 423162306a36Sopenharmony_ci } 423262306a36Sopenharmony_ci case KVM_SET_MP_STATE: { 423362306a36Sopenharmony_ci struct kvm_mp_state mp_state; 423462306a36Sopenharmony_ci 423562306a36Sopenharmony_ci r = -EFAULT; 423662306a36Sopenharmony_ci if (copy_from_user(&mp_state, argp, sizeof(mp_state))) 423762306a36Sopenharmony_ci goto out; 423862306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 423962306a36Sopenharmony_ci break; 424062306a36Sopenharmony_ci } 424162306a36Sopenharmony_ci case KVM_TRANSLATE: { 424262306a36Sopenharmony_ci struct kvm_translation tr; 424362306a36Sopenharmony_ci 424462306a36Sopenharmony_ci r = -EFAULT; 424562306a36Sopenharmony_ci if (copy_from_user(&tr, argp, sizeof(tr))) 424662306a36Sopenharmony_ci goto out; 424762306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 424862306a36Sopenharmony_ci if (r) 424962306a36Sopenharmony_ci goto out; 425062306a36Sopenharmony_ci r = -EFAULT; 425162306a36Sopenharmony_ci if (copy_to_user(argp, &tr, sizeof(tr))) 425262306a36Sopenharmony_ci goto out; 425362306a36Sopenharmony_ci r = 0; 425462306a36Sopenharmony_ci break; 425562306a36Sopenharmony_ci } 425662306a36Sopenharmony_ci case KVM_SET_GUEST_DEBUG: { 425762306a36Sopenharmony_ci struct kvm_guest_debug dbg; 425862306a36Sopenharmony_ci 425962306a36Sopenharmony_ci r = -EFAULT; 426062306a36Sopenharmony_ci if (copy_from_user(&dbg, argp, sizeof(dbg))) 426162306a36Sopenharmony_ci goto out; 426262306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 426362306a36Sopenharmony_ci break; 426462306a36Sopenharmony_ci } 426562306a36Sopenharmony_ci case KVM_SET_SIGNAL_MASK: { 426662306a36Sopenharmony_ci struct kvm_signal_mask __user *sigmask_arg = argp; 426762306a36Sopenharmony_ci struct kvm_signal_mask kvm_sigmask; 426862306a36Sopenharmony_ci sigset_t sigset, *p; 426962306a36Sopenharmony_ci 427062306a36Sopenharmony_ci p = NULL; 427162306a36Sopenharmony_ci if (argp) { 427262306a36Sopenharmony_ci r = -EFAULT; 427362306a36Sopenharmony_ci if (copy_from_user(&kvm_sigmask, argp, 427462306a36Sopenharmony_ci sizeof(kvm_sigmask))) 427562306a36Sopenharmony_ci goto out; 427662306a36Sopenharmony_ci r = -EINVAL; 427762306a36Sopenharmony_ci if (kvm_sigmask.len != sizeof(sigset)) 427862306a36Sopenharmony_ci goto out; 427962306a36Sopenharmony_ci r = -EFAULT; 428062306a36Sopenharmony_ci if (copy_from_user(&sigset, sigmask_arg->sigset, 428162306a36Sopenharmony_ci sizeof(sigset))) 428262306a36Sopenharmony_ci goto out; 428362306a36Sopenharmony_ci p = &sigset; 428462306a36Sopenharmony_ci } 428562306a36Sopenharmony_ci r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); 428662306a36Sopenharmony_ci break; 428762306a36Sopenharmony_ci } 428862306a36Sopenharmony_ci case KVM_GET_FPU: { 428962306a36Sopenharmony_ci fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT); 429062306a36Sopenharmony_ci r = -ENOMEM; 429162306a36Sopenharmony_ci if (!fpu) 429262306a36Sopenharmony_ci goto out; 429362306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 429462306a36Sopenharmony_ci if (r) 429562306a36Sopenharmony_ci goto out; 429662306a36Sopenharmony_ci r = -EFAULT; 429762306a36Sopenharmony_ci if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 429862306a36Sopenharmony_ci goto out; 429962306a36Sopenharmony_ci r = 0; 430062306a36Sopenharmony_ci break; 430162306a36Sopenharmony_ci } 430262306a36Sopenharmony_ci case KVM_SET_FPU: { 430362306a36Sopenharmony_ci fpu = memdup_user(argp, sizeof(*fpu)); 430462306a36Sopenharmony_ci if (IS_ERR(fpu)) { 430562306a36Sopenharmony_ci r = PTR_ERR(fpu); 430662306a36Sopenharmony_ci fpu = NULL; 430762306a36Sopenharmony_ci goto out; 430862306a36Sopenharmony_ci } 430962306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 431062306a36Sopenharmony_ci break; 431162306a36Sopenharmony_ci } 431262306a36Sopenharmony_ci case KVM_GET_STATS_FD: { 431362306a36Sopenharmony_ci r = kvm_vcpu_ioctl_get_stats_fd(vcpu); 431462306a36Sopenharmony_ci break; 431562306a36Sopenharmony_ci } 431662306a36Sopenharmony_ci default: 431762306a36Sopenharmony_ci r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 431862306a36Sopenharmony_ci } 431962306a36Sopenharmony_ciout: 432062306a36Sopenharmony_ci mutex_unlock(&vcpu->mutex); 432162306a36Sopenharmony_ci kfree(fpu); 432262306a36Sopenharmony_ci kfree(kvm_sregs); 432362306a36Sopenharmony_ci return r; 432462306a36Sopenharmony_ci} 432562306a36Sopenharmony_ci 432662306a36Sopenharmony_ci#ifdef CONFIG_KVM_COMPAT 432762306a36Sopenharmony_cistatic long kvm_vcpu_compat_ioctl(struct file *filp, 432862306a36Sopenharmony_ci unsigned int ioctl, unsigned long arg) 432962306a36Sopenharmony_ci{ 433062306a36Sopenharmony_ci struct kvm_vcpu *vcpu = filp->private_data; 433162306a36Sopenharmony_ci void __user *argp = compat_ptr(arg); 433262306a36Sopenharmony_ci int r; 433362306a36Sopenharmony_ci 433462306a36Sopenharmony_ci if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) 433562306a36Sopenharmony_ci return -EIO; 433662306a36Sopenharmony_ci 433762306a36Sopenharmony_ci switch (ioctl) { 433862306a36Sopenharmony_ci case KVM_SET_SIGNAL_MASK: { 433962306a36Sopenharmony_ci struct kvm_signal_mask __user *sigmask_arg = argp; 434062306a36Sopenharmony_ci struct kvm_signal_mask kvm_sigmask; 434162306a36Sopenharmony_ci sigset_t sigset; 434262306a36Sopenharmony_ci 434362306a36Sopenharmony_ci if (argp) { 434462306a36Sopenharmony_ci r = -EFAULT; 434562306a36Sopenharmony_ci if (copy_from_user(&kvm_sigmask, argp, 434662306a36Sopenharmony_ci sizeof(kvm_sigmask))) 434762306a36Sopenharmony_ci goto out; 434862306a36Sopenharmony_ci r = -EINVAL; 434962306a36Sopenharmony_ci if (kvm_sigmask.len != sizeof(compat_sigset_t)) 435062306a36Sopenharmony_ci goto out; 435162306a36Sopenharmony_ci r = -EFAULT; 435262306a36Sopenharmony_ci if (get_compat_sigset(&sigset, 435362306a36Sopenharmony_ci (compat_sigset_t __user *)sigmask_arg->sigset)) 435462306a36Sopenharmony_ci goto out; 435562306a36Sopenharmony_ci r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 435662306a36Sopenharmony_ci } else 435762306a36Sopenharmony_ci r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL); 435862306a36Sopenharmony_ci break; 435962306a36Sopenharmony_ci } 436062306a36Sopenharmony_ci default: 436162306a36Sopenharmony_ci r = kvm_vcpu_ioctl(filp, ioctl, arg); 436262306a36Sopenharmony_ci } 436362306a36Sopenharmony_ci 436462306a36Sopenharmony_ciout: 436562306a36Sopenharmony_ci return r; 436662306a36Sopenharmony_ci} 436762306a36Sopenharmony_ci#endif 436862306a36Sopenharmony_ci 436962306a36Sopenharmony_cistatic int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) 437062306a36Sopenharmony_ci{ 437162306a36Sopenharmony_ci struct kvm_device *dev = filp->private_data; 437262306a36Sopenharmony_ci 437362306a36Sopenharmony_ci if (dev->ops->mmap) 437462306a36Sopenharmony_ci return dev->ops->mmap(dev, vma); 437562306a36Sopenharmony_ci 437662306a36Sopenharmony_ci return -ENODEV; 437762306a36Sopenharmony_ci} 437862306a36Sopenharmony_ci 437962306a36Sopenharmony_cistatic int kvm_device_ioctl_attr(struct kvm_device *dev, 438062306a36Sopenharmony_ci int (*accessor)(struct kvm_device *dev, 438162306a36Sopenharmony_ci struct kvm_device_attr *attr), 438262306a36Sopenharmony_ci unsigned long arg) 438362306a36Sopenharmony_ci{ 438462306a36Sopenharmony_ci struct kvm_device_attr attr; 438562306a36Sopenharmony_ci 438662306a36Sopenharmony_ci if (!accessor) 438762306a36Sopenharmony_ci return -EPERM; 438862306a36Sopenharmony_ci 438962306a36Sopenharmony_ci if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 439062306a36Sopenharmony_ci return -EFAULT; 439162306a36Sopenharmony_ci 439262306a36Sopenharmony_ci return accessor(dev, &attr); 439362306a36Sopenharmony_ci} 439462306a36Sopenharmony_ci 439562306a36Sopenharmony_cistatic long kvm_device_ioctl(struct file *filp, unsigned int ioctl, 439662306a36Sopenharmony_ci unsigned long arg) 439762306a36Sopenharmony_ci{ 439862306a36Sopenharmony_ci struct kvm_device *dev = filp->private_data; 439962306a36Sopenharmony_ci 440062306a36Sopenharmony_ci if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) 440162306a36Sopenharmony_ci return -EIO; 440262306a36Sopenharmony_ci 440362306a36Sopenharmony_ci switch (ioctl) { 440462306a36Sopenharmony_ci case KVM_SET_DEVICE_ATTR: 440562306a36Sopenharmony_ci return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); 440662306a36Sopenharmony_ci case KVM_GET_DEVICE_ATTR: 440762306a36Sopenharmony_ci return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); 440862306a36Sopenharmony_ci case KVM_HAS_DEVICE_ATTR: 440962306a36Sopenharmony_ci return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); 441062306a36Sopenharmony_ci default: 441162306a36Sopenharmony_ci if (dev->ops->ioctl) 441262306a36Sopenharmony_ci return dev->ops->ioctl(dev, ioctl, arg); 441362306a36Sopenharmony_ci 441462306a36Sopenharmony_ci return -ENOTTY; 441562306a36Sopenharmony_ci } 441662306a36Sopenharmony_ci} 441762306a36Sopenharmony_ci 441862306a36Sopenharmony_cistatic int kvm_device_release(struct inode *inode, struct file *filp) 441962306a36Sopenharmony_ci{ 442062306a36Sopenharmony_ci struct kvm_device *dev = filp->private_data; 442162306a36Sopenharmony_ci struct kvm *kvm = dev->kvm; 442262306a36Sopenharmony_ci 442362306a36Sopenharmony_ci if (dev->ops->release) { 442462306a36Sopenharmony_ci mutex_lock(&kvm->lock); 442562306a36Sopenharmony_ci list_del(&dev->vm_node); 442662306a36Sopenharmony_ci dev->ops->release(dev); 442762306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 442862306a36Sopenharmony_ci } 442962306a36Sopenharmony_ci 443062306a36Sopenharmony_ci kvm_put_kvm(kvm); 443162306a36Sopenharmony_ci return 0; 443262306a36Sopenharmony_ci} 443362306a36Sopenharmony_ci 443462306a36Sopenharmony_cistatic const struct file_operations kvm_device_fops = { 443562306a36Sopenharmony_ci .unlocked_ioctl = kvm_device_ioctl, 443662306a36Sopenharmony_ci .release = kvm_device_release, 443762306a36Sopenharmony_ci KVM_COMPAT(kvm_device_ioctl), 443862306a36Sopenharmony_ci .mmap = kvm_device_mmap, 443962306a36Sopenharmony_ci}; 444062306a36Sopenharmony_ci 444162306a36Sopenharmony_cistruct kvm_device *kvm_device_from_filp(struct file *filp) 444262306a36Sopenharmony_ci{ 444362306a36Sopenharmony_ci if (filp->f_op != &kvm_device_fops) 444462306a36Sopenharmony_ci return NULL; 444562306a36Sopenharmony_ci 444662306a36Sopenharmony_ci return filp->private_data; 444762306a36Sopenharmony_ci} 444862306a36Sopenharmony_ci 444962306a36Sopenharmony_cistatic const struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { 445062306a36Sopenharmony_ci#ifdef CONFIG_KVM_MPIC 445162306a36Sopenharmony_ci [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, 445262306a36Sopenharmony_ci [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, 445362306a36Sopenharmony_ci#endif 445462306a36Sopenharmony_ci}; 445562306a36Sopenharmony_ci 445662306a36Sopenharmony_ciint kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type) 445762306a36Sopenharmony_ci{ 445862306a36Sopenharmony_ci if (type >= ARRAY_SIZE(kvm_device_ops_table)) 445962306a36Sopenharmony_ci return -ENOSPC; 446062306a36Sopenharmony_ci 446162306a36Sopenharmony_ci if (kvm_device_ops_table[type] != NULL) 446262306a36Sopenharmony_ci return -EEXIST; 446362306a36Sopenharmony_ci 446462306a36Sopenharmony_ci kvm_device_ops_table[type] = ops; 446562306a36Sopenharmony_ci return 0; 446662306a36Sopenharmony_ci} 446762306a36Sopenharmony_ci 446862306a36Sopenharmony_civoid kvm_unregister_device_ops(u32 type) 446962306a36Sopenharmony_ci{ 447062306a36Sopenharmony_ci if (kvm_device_ops_table[type] != NULL) 447162306a36Sopenharmony_ci kvm_device_ops_table[type] = NULL; 447262306a36Sopenharmony_ci} 447362306a36Sopenharmony_ci 447462306a36Sopenharmony_cistatic int kvm_ioctl_create_device(struct kvm *kvm, 447562306a36Sopenharmony_ci struct kvm_create_device *cd) 447662306a36Sopenharmony_ci{ 447762306a36Sopenharmony_ci const struct kvm_device_ops *ops; 447862306a36Sopenharmony_ci struct kvm_device *dev; 447962306a36Sopenharmony_ci bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 448062306a36Sopenharmony_ci int type; 448162306a36Sopenharmony_ci int ret; 448262306a36Sopenharmony_ci 448362306a36Sopenharmony_ci if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) 448462306a36Sopenharmony_ci return -ENODEV; 448562306a36Sopenharmony_ci 448662306a36Sopenharmony_ci type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); 448762306a36Sopenharmony_ci ops = kvm_device_ops_table[type]; 448862306a36Sopenharmony_ci if (ops == NULL) 448962306a36Sopenharmony_ci return -ENODEV; 449062306a36Sopenharmony_ci 449162306a36Sopenharmony_ci if (test) 449262306a36Sopenharmony_ci return 0; 449362306a36Sopenharmony_ci 449462306a36Sopenharmony_ci dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT); 449562306a36Sopenharmony_ci if (!dev) 449662306a36Sopenharmony_ci return -ENOMEM; 449762306a36Sopenharmony_ci 449862306a36Sopenharmony_ci dev->ops = ops; 449962306a36Sopenharmony_ci dev->kvm = kvm; 450062306a36Sopenharmony_ci 450162306a36Sopenharmony_ci mutex_lock(&kvm->lock); 450262306a36Sopenharmony_ci ret = ops->create(dev, type); 450362306a36Sopenharmony_ci if (ret < 0) { 450462306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 450562306a36Sopenharmony_ci kfree(dev); 450662306a36Sopenharmony_ci return ret; 450762306a36Sopenharmony_ci } 450862306a36Sopenharmony_ci list_add(&dev->vm_node, &kvm->devices); 450962306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 451062306a36Sopenharmony_ci 451162306a36Sopenharmony_ci if (ops->init) 451262306a36Sopenharmony_ci ops->init(dev); 451362306a36Sopenharmony_ci 451462306a36Sopenharmony_ci kvm_get_kvm(kvm); 451562306a36Sopenharmony_ci ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); 451662306a36Sopenharmony_ci if (ret < 0) { 451762306a36Sopenharmony_ci kvm_put_kvm_no_destroy(kvm); 451862306a36Sopenharmony_ci mutex_lock(&kvm->lock); 451962306a36Sopenharmony_ci list_del(&dev->vm_node); 452062306a36Sopenharmony_ci if (ops->release) 452162306a36Sopenharmony_ci ops->release(dev); 452262306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 452362306a36Sopenharmony_ci if (ops->destroy) 452462306a36Sopenharmony_ci ops->destroy(dev); 452562306a36Sopenharmony_ci return ret; 452662306a36Sopenharmony_ci } 452762306a36Sopenharmony_ci 452862306a36Sopenharmony_ci cd->fd = ret; 452962306a36Sopenharmony_ci return 0; 453062306a36Sopenharmony_ci} 453162306a36Sopenharmony_ci 453262306a36Sopenharmony_cistatic int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) 453362306a36Sopenharmony_ci{ 453462306a36Sopenharmony_ci switch (arg) { 453562306a36Sopenharmony_ci case KVM_CAP_USER_MEMORY: 453662306a36Sopenharmony_ci case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 453762306a36Sopenharmony_ci case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: 453862306a36Sopenharmony_ci case KVM_CAP_INTERNAL_ERROR_DATA: 453962306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_MSI 454062306a36Sopenharmony_ci case KVM_CAP_SIGNAL_MSI: 454162306a36Sopenharmony_ci#endif 454262306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_IRQFD 454362306a36Sopenharmony_ci case KVM_CAP_IRQFD: 454462306a36Sopenharmony_ci#endif 454562306a36Sopenharmony_ci case KVM_CAP_IOEVENTFD_ANY_LENGTH: 454662306a36Sopenharmony_ci case KVM_CAP_CHECK_EXTENSION_VM: 454762306a36Sopenharmony_ci case KVM_CAP_ENABLE_CAP_VM: 454862306a36Sopenharmony_ci case KVM_CAP_HALT_POLL: 454962306a36Sopenharmony_ci return 1; 455062306a36Sopenharmony_ci#ifdef CONFIG_KVM_MMIO 455162306a36Sopenharmony_ci case KVM_CAP_COALESCED_MMIO: 455262306a36Sopenharmony_ci return KVM_COALESCED_MMIO_PAGE_OFFSET; 455362306a36Sopenharmony_ci case KVM_CAP_COALESCED_PIO: 455462306a36Sopenharmony_ci return 1; 455562306a36Sopenharmony_ci#endif 455662306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 455762306a36Sopenharmony_ci case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: 455862306a36Sopenharmony_ci return KVM_DIRTY_LOG_MANUAL_CAPS; 455962306a36Sopenharmony_ci#endif 456062306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 456162306a36Sopenharmony_ci case KVM_CAP_IRQ_ROUTING: 456262306a36Sopenharmony_ci return KVM_MAX_IRQ_ROUTES; 456362306a36Sopenharmony_ci#endif 456462306a36Sopenharmony_ci#if KVM_ADDRESS_SPACE_NUM > 1 456562306a36Sopenharmony_ci case KVM_CAP_MULTI_ADDRESS_SPACE: 456662306a36Sopenharmony_ci return KVM_ADDRESS_SPACE_NUM; 456762306a36Sopenharmony_ci#endif 456862306a36Sopenharmony_ci case KVM_CAP_NR_MEMSLOTS: 456962306a36Sopenharmony_ci return KVM_USER_MEM_SLOTS; 457062306a36Sopenharmony_ci case KVM_CAP_DIRTY_LOG_RING: 457162306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_DIRTY_RING_TSO 457262306a36Sopenharmony_ci return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); 457362306a36Sopenharmony_ci#else 457462306a36Sopenharmony_ci return 0; 457562306a36Sopenharmony_ci#endif 457662306a36Sopenharmony_ci case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: 457762306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL 457862306a36Sopenharmony_ci return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); 457962306a36Sopenharmony_ci#else 458062306a36Sopenharmony_ci return 0; 458162306a36Sopenharmony_ci#endif 458262306a36Sopenharmony_ci#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP 458362306a36Sopenharmony_ci case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: 458462306a36Sopenharmony_ci#endif 458562306a36Sopenharmony_ci case KVM_CAP_BINARY_STATS_FD: 458662306a36Sopenharmony_ci case KVM_CAP_SYSTEM_EVENT_DATA: 458762306a36Sopenharmony_ci return 1; 458862306a36Sopenharmony_ci default: 458962306a36Sopenharmony_ci break; 459062306a36Sopenharmony_ci } 459162306a36Sopenharmony_ci return kvm_vm_ioctl_check_extension(kvm, arg); 459262306a36Sopenharmony_ci} 459362306a36Sopenharmony_ci 459462306a36Sopenharmony_cistatic int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size) 459562306a36Sopenharmony_ci{ 459662306a36Sopenharmony_ci int r; 459762306a36Sopenharmony_ci 459862306a36Sopenharmony_ci if (!KVM_DIRTY_LOG_PAGE_OFFSET) 459962306a36Sopenharmony_ci return -EINVAL; 460062306a36Sopenharmony_ci 460162306a36Sopenharmony_ci /* the size should be power of 2 */ 460262306a36Sopenharmony_ci if (!size || (size & (size - 1))) 460362306a36Sopenharmony_ci return -EINVAL; 460462306a36Sopenharmony_ci 460562306a36Sopenharmony_ci /* Should be bigger to keep the reserved entries, or a page */ 460662306a36Sopenharmony_ci if (size < kvm_dirty_ring_get_rsvd_entries() * 460762306a36Sopenharmony_ci sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE) 460862306a36Sopenharmony_ci return -EINVAL; 460962306a36Sopenharmony_ci 461062306a36Sopenharmony_ci if (size > KVM_DIRTY_RING_MAX_ENTRIES * 461162306a36Sopenharmony_ci sizeof(struct kvm_dirty_gfn)) 461262306a36Sopenharmony_ci return -E2BIG; 461362306a36Sopenharmony_ci 461462306a36Sopenharmony_ci /* We only allow it to set once */ 461562306a36Sopenharmony_ci if (kvm->dirty_ring_size) 461662306a36Sopenharmony_ci return -EINVAL; 461762306a36Sopenharmony_ci 461862306a36Sopenharmony_ci mutex_lock(&kvm->lock); 461962306a36Sopenharmony_ci 462062306a36Sopenharmony_ci if (kvm->created_vcpus) { 462162306a36Sopenharmony_ci /* We don't allow to change this value after vcpu created */ 462262306a36Sopenharmony_ci r = -EINVAL; 462362306a36Sopenharmony_ci } else { 462462306a36Sopenharmony_ci kvm->dirty_ring_size = size; 462562306a36Sopenharmony_ci r = 0; 462662306a36Sopenharmony_ci } 462762306a36Sopenharmony_ci 462862306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 462962306a36Sopenharmony_ci return r; 463062306a36Sopenharmony_ci} 463162306a36Sopenharmony_ci 463262306a36Sopenharmony_cistatic int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) 463362306a36Sopenharmony_ci{ 463462306a36Sopenharmony_ci unsigned long i; 463562306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 463662306a36Sopenharmony_ci int cleared = 0; 463762306a36Sopenharmony_ci 463862306a36Sopenharmony_ci if (!kvm->dirty_ring_size) 463962306a36Sopenharmony_ci return -EINVAL; 464062306a36Sopenharmony_ci 464162306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 464262306a36Sopenharmony_ci 464362306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) 464462306a36Sopenharmony_ci cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); 464562306a36Sopenharmony_ci 464662306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 464762306a36Sopenharmony_ci 464862306a36Sopenharmony_ci if (cleared) 464962306a36Sopenharmony_ci kvm_flush_remote_tlbs(kvm); 465062306a36Sopenharmony_ci 465162306a36Sopenharmony_ci return cleared; 465262306a36Sopenharmony_ci} 465362306a36Sopenharmony_ci 465462306a36Sopenharmony_ciint __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, 465562306a36Sopenharmony_ci struct kvm_enable_cap *cap) 465662306a36Sopenharmony_ci{ 465762306a36Sopenharmony_ci return -EINVAL; 465862306a36Sopenharmony_ci} 465962306a36Sopenharmony_ci 466062306a36Sopenharmony_cibool kvm_are_all_memslots_empty(struct kvm *kvm) 466162306a36Sopenharmony_ci{ 466262306a36Sopenharmony_ci int i; 466362306a36Sopenharmony_ci 466462306a36Sopenharmony_ci lockdep_assert_held(&kvm->slots_lock); 466562306a36Sopenharmony_ci 466662306a36Sopenharmony_ci for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 466762306a36Sopenharmony_ci if (!kvm_memslots_empty(__kvm_memslots(kvm, i))) 466862306a36Sopenharmony_ci return false; 466962306a36Sopenharmony_ci } 467062306a36Sopenharmony_ci 467162306a36Sopenharmony_ci return true; 467262306a36Sopenharmony_ci} 467362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty); 467462306a36Sopenharmony_ci 467562306a36Sopenharmony_cistatic int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, 467662306a36Sopenharmony_ci struct kvm_enable_cap *cap) 467762306a36Sopenharmony_ci{ 467862306a36Sopenharmony_ci switch (cap->cap) { 467962306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 468062306a36Sopenharmony_ci case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: { 468162306a36Sopenharmony_ci u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE; 468262306a36Sopenharmony_ci 468362306a36Sopenharmony_ci if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) 468462306a36Sopenharmony_ci allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS; 468562306a36Sopenharmony_ci 468662306a36Sopenharmony_ci if (cap->flags || (cap->args[0] & ~allowed_options)) 468762306a36Sopenharmony_ci return -EINVAL; 468862306a36Sopenharmony_ci kvm->manual_dirty_log_protect = cap->args[0]; 468962306a36Sopenharmony_ci return 0; 469062306a36Sopenharmony_ci } 469162306a36Sopenharmony_ci#endif 469262306a36Sopenharmony_ci case KVM_CAP_HALT_POLL: { 469362306a36Sopenharmony_ci if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) 469462306a36Sopenharmony_ci return -EINVAL; 469562306a36Sopenharmony_ci 469662306a36Sopenharmony_ci kvm->max_halt_poll_ns = cap->args[0]; 469762306a36Sopenharmony_ci 469862306a36Sopenharmony_ci /* 469962306a36Sopenharmony_ci * Ensure kvm->override_halt_poll_ns does not become visible 470062306a36Sopenharmony_ci * before kvm->max_halt_poll_ns. 470162306a36Sopenharmony_ci * 470262306a36Sopenharmony_ci * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns(). 470362306a36Sopenharmony_ci */ 470462306a36Sopenharmony_ci smp_wmb(); 470562306a36Sopenharmony_ci kvm->override_halt_poll_ns = true; 470662306a36Sopenharmony_ci 470762306a36Sopenharmony_ci return 0; 470862306a36Sopenharmony_ci } 470962306a36Sopenharmony_ci case KVM_CAP_DIRTY_LOG_RING: 471062306a36Sopenharmony_ci case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: 471162306a36Sopenharmony_ci if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap)) 471262306a36Sopenharmony_ci return -EINVAL; 471362306a36Sopenharmony_ci 471462306a36Sopenharmony_ci return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); 471562306a36Sopenharmony_ci case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: { 471662306a36Sopenharmony_ci int r = -EINVAL; 471762306a36Sopenharmony_ci 471862306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) || 471962306a36Sopenharmony_ci !kvm->dirty_ring_size || cap->flags) 472062306a36Sopenharmony_ci return r; 472162306a36Sopenharmony_ci 472262306a36Sopenharmony_ci mutex_lock(&kvm->slots_lock); 472362306a36Sopenharmony_ci 472462306a36Sopenharmony_ci /* 472562306a36Sopenharmony_ci * For simplicity, allow enabling ring+bitmap if and only if 472662306a36Sopenharmony_ci * there are no memslots, e.g. to ensure all memslots allocate 472762306a36Sopenharmony_ci * a bitmap after the capability is enabled. 472862306a36Sopenharmony_ci */ 472962306a36Sopenharmony_ci if (kvm_are_all_memslots_empty(kvm)) { 473062306a36Sopenharmony_ci kvm->dirty_ring_with_bitmap = true; 473162306a36Sopenharmony_ci r = 0; 473262306a36Sopenharmony_ci } 473362306a36Sopenharmony_ci 473462306a36Sopenharmony_ci mutex_unlock(&kvm->slots_lock); 473562306a36Sopenharmony_ci 473662306a36Sopenharmony_ci return r; 473762306a36Sopenharmony_ci } 473862306a36Sopenharmony_ci default: 473962306a36Sopenharmony_ci return kvm_vm_ioctl_enable_cap(kvm, cap); 474062306a36Sopenharmony_ci } 474162306a36Sopenharmony_ci} 474262306a36Sopenharmony_ci 474362306a36Sopenharmony_cistatic ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer, 474462306a36Sopenharmony_ci size_t size, loff_t *offset) 474562306a36Sopenharmony_ci{ 474662306a36Sopenharmony_ci struct kvm *kvm = file->private_data; 474762306a36Sopenharmony_ci 474862306a36Sopenharmony_ci return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header, 474962306a36Sopenharmony_ci &kvm_vm_stats_desc[0], &kvm->stat, 475062306a36Sopenharmony_ci sizeof(kvm->stat), user_buffer, size, offset); 475162306a36Sopenharmony_ci} 475262306a36Sopenharmony_ci 475362306a36Sopenharmony_cistatic int kvm_vm_stats_release(struct inode *inode, struct file *file) 475462306a36Sopenharmony_ci{ 475562306a36Sopenharmony_ci struct kvm *kvm = file->private_data; 475662306a36Sopenharmony_ci 475762306a36Sopenharmony_ci kvm_put_kvm(kvm); 475862306a36Sopenharmony_ci return 0; 475962306a36Sopenharmony_ci} 476062306a36Sopenharmony_ci 476162306a36Sopenharmony_cistatic const struct file_operations kvm_vm_stats_fops = { 476262306a36Sopenharmony_ci .read = kvm_vm_stats_read, 476362306a36Sopenharmony_ci .release = kvm_vm_stats_release, 476462306a36Sopenharmony_ci .llseek = noop_llseek, 476562306a36Sopenharmony_ci}; 476662306a36Sopenharmony_ci 476762306a36Sopenharmony_cistatic int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm) 476862306a36Sopenharmony_ci{ 476962306a36Sopenharmony_ci int fd; 477062306a36Sopenharmony_ci struct file *file; 477162306a36Sopenharmony_ci 477262306a36Sopenharmony_ci fd = get_unused_fd_flags(O_CLOEXEC); 477362306a36Sopenharmony_ci if (fd < 0) 477462306a36Sopenharmony_ci return fd; 477562306a36Sopenharmony_ci 477662306a36Sopenharmony_ci file = anon_inode_getfile("kvm-vm-stats", 477762306a36Sopenharmony_ci &kvm_vm_stats_fops, kvm, O_RDONLY); 477862306a36Sopenharmony_ci if (IS_ERR(file)) { 477962306a36Sopenharmony_ci put_unused_fd(fd); 478062306a36Sopenharmony_ci return PTR_ERR(file); 478162306a36Sopenharmony_ci } 478262306a36Sopenharmony_ci 478362306a36Sopenharmony_ci kvm_get_kvm(kvm); 478462306a36Sopenharmony_ci 478562306a36Sopenharmony_ci file->f_mode |= FMODE_PREAD; 478662306a36Sopenharmony_ci fd_install(fd, file); 478762306a36Sopenharmony_ci 478862306a36Sopenharmony_ci return fd; 478962306a36Sopenharmony_ci} 479062306a36Sopenharmony_ci 479162306a36Sopenharmony_cistatic long kvm_vm_ioctl(struct file *filp, 479262306a36Sopenharmony_ci unsigned int ioctl, unsigned long arg) 479362306a36Sopenharmony_ci{ 479462306a36Sopenharmony_ci struct kvm *kvm = filp->private_data; 479562306a36Sopenharmony_ci void __user *argp = (void __user *)arg; 479662306a36Sopenharmony_ci int r; 479762306a36Sopenharmony_ci 479862306a36Sopenharmony_ci if (kvm->mm != current->mm || kvm->vm_dead) 479962306a36Sopenharmony_ci return -EIO; 480062306a36Sopenharmony_ci switch (ioctl) { 480162306a36Sopenharmony_ci case KVM_CREATE_VCPU: 480262306a36Sopenharmony_ci r = kvm_vm_ioctl_create_vcpu(kvm, arg); 480362306a36Sopenharmony_ci break; 480462306a36Sopenharmony_ci case KVM_ENABLE_CAP: { 480562306a36Sopenharmony_ci struct kvm_enable_cap cap; 480662306a36Sopenharmony_ci 480762306a36Sopenharmony_ci r = -EFAULT; 480862306a36Sopenharmony_ci if (copy_from_user(&cap, argp, sizeof(cap))) 480962306a36Sopenharmony_ci goto out; 481062306a36Sopenharmony_ci r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap); 481162306a36Sopenharmony_ci break; 481262306a36Sopenharmony_ci } 481362306a36Sopenharmony_ci case KVM_SET_USER_MEMORY_REGION: { 481462306a36Sopenharmony_ci struct kvm_userspace_memory_region kvm_userspace_mem; 481562306a36Sopenharmony_ci 481662306a36Sopenharmony_ci r = -EFAULT; 481762306a36Sopenharmony_ci if (copy_from_user(&kvm_userspace_mem, argp, 481862306a36Sopenharmony_ci sizeof(kvm_userspace_mem))) 481962306a36Sopenharmony_ci goto out; 482062306a36Sopenharmony_ci 482162306a36Sopenharmony_ci r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); 482262306a36Sopenharmony_ci break; 482362306a36Sopenharmony_ci } 482462306a36Sopenharmony_ci case KVM_GET_DIRTY_LOG: { 482562306a36Sopenharmony_ci struct kvm_dirty_log log; 482662306a36Sopenharmony_ci 482762306a36Sopenharmony_ci r = -EFAULT; 482862306a36Sopenharmony_ci if (copy_from_user(&log, argp, sizeof(log))) 482962306a36Sopenharmony_ci goto out; 483062306a36Sopenharmony_ci r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 483162306a36Sopenharmony_ci break; 483262306a36Sopenharmony_ci } 483362306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 483462306a36Sopenharmony_ci case KVM_CLEAR_DIRTY_LOG: { 483562306a36Sopenharmony_ci struct kvm_clear_dirty_log log; 483662306a36Sopenharmony_ci 483762306a36Sopenharmony_ci r = -EFAULT; 483862306a36Sopenharmony_ci if (copy_from_user(&log, argp, sizeof(log))) 483962306a36Sopenharmony_ci goto out; 484062306a36Sopenharmony_ci r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); 484162306a36Sopenharmony_ci break; 484262306a36Sopenharmony_ci } 484362306a36Sopenharmony_ci#endif 484462306a36Sopenharmony_ci#ifdef CONFIG_KVM_MMIO 484562306a36Sopenharmony_ci case KVM_REGISTER_COALESCED_MMIO: { 484662306a36Sopenharmony_ci struct kvm_coalesced_mmio_zone zone; 484762306a36Sopenharmony_ci 484862306a36Sopenharmony_ci r = -EFAULT; 484962306a36Sopenharmony_ci if (copy_from_user(&zone, argp, sizeof(zone))) 485062306a36Sopenharmony_ci goto out; 485162306a36Sopenharmony_ci r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 485262306a36Sopenharmony_ci break; 485362306a36Sopenharmony_ci } 485462306a36Sopenharmony_ci case KVM_UNREGISTER_COALESCED_MMIO: { 485562306a36Sopenharmony_ci struct kvm_coalesced_mmio_zone zone; 485662306a36Sopenharmony_ci 485762306a36Sopenharmony_ci r = -EFAULT; 485862306a36Sopenharmony_ci if (copy_from_user(&zone, argp, sizeof(zone))) 485962306a36Sopenharmony_ci goto out; 486062306a36Sopenharmony_ci r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 486162306a36Sopenharmony_ci break; 486262306a36Sopenharmony_ci } 486362306a36Sopenharmony_ci#endif 486462306a36Sopenharmony_ci case KVM_IRQFD: { 486562306a36Sopenharmony_ci struct kvm_irqfd data; 486662306a36Sopenharmony_ci 486762306a36Sopenharmony_ci r = -EFAULT; 486862306a36Sopenharmony_ci if (copy_from_user(&data, argp, sizeof(data))) 486962306a36Sopenharmony_ci goto out; 487062306a36Sopenharmony_ci r = kvm_irqfd(kvm, &data); 487162306a36Sopenharmony_ci break; 487262306a36Sopenharmony_ci } 487362306a36Sopenharmony_ci case KVM_IOEVENTFD: { 487462306a36Sopenharmony_ci struct kvm_ioeventfd data; 487562306a36Sopenharmony_ci 487662306a36Sopenharmony_ci r = -EFAULT; 487762306a36Sopenharmony_ci if (copy_from_user(&data, argp, sizeof(data))) 487862306a36Sopenharmony_ci goto out; 487962306a36Sopenharmony_ci r = kvm_ioeventfd(kvm, &data); 488062306a36Sopenharmony_ci break; 488162306a36Sopenharmony_ci } 488262306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_MSI 488362306a36Sopenharmony_ci case KVM_SIGNAL_MSI: { 488462306a36Sopenharmony_ci struct kvm_msi msi; 488562306a36Sopenharmony_ci 488662306a36Sopenharmony_ci r = -EFAULT; 488762306a36Sopenharmony_ci if (copy_from_user(&msi, argp, sizeof(msi))) 488862306a36Sopenharmony_ci goto out; 488962306a36Sopenharmony_ci r = kvm_send_userspace_msi(kvm, &msi); 489062306a36Sopenharmony_ci break; 489162306a36Sopenharmony_ci } 489262306a36Sopenharmony_ci#endif 489362306a36Sopenharmony_ci#ifdef __KVM_HAVE_IRQ_LINE 489462306a36Sopenharmony_ci case KVM_IRQ_LINE_STATUS: 489562306a36Sopenharmony_ci case KVM_IRQ_LINE: { 489662306a36Sopenharmony_ci struct kvm_irq_level irq_event; 489762306a36Sopenharmony_ci 489862306a36Sopenharmony_ci r = -EFAULT; 489962306a36Sopenharmony_ci if (copy_from_user(&irq_event, argp, sizeof(irq_event))) 490062306a36Sopenharmony_ci goto out; 490162306a36Sopenharmony_ci 490262306a36Sopenharmony_ci r = kvm_vm_ioctl_irq_line(kvm, &irq_event, 490362306a36Sopenharmony_ci ioctl == KVM_IRQ_LINE_STATUS); 490462306a36Sopenharmony_ci if (r) 490562306a36Sopenharmony_ci goto out; 490662306a36Sopenharmony_ci 490762306a36Sopenharmony_ci r = -EFAULT; 490862306a36Sopenharmony_ci if (ioctl == KVM_IRQ_LINE_STATUS) { 490962306a36Sopenharmony_ci if (copy_to_user(argp, &irq_event, sizeof(irq_event))) 491062306a36Sopenharmony_ci goto out; 491162306a36Sopenharmony_ci } 491262306a36Sopenharmony_ci 491362306a36Sopenharmony_ci r = 0; 491462306a36Sopenharmony_ci break; 491562306a36Sopenharmony_ci } 491662306a36Sopenharmony_ci#endif 491762306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 491862306a36Sopenharmony_ci case KVM_SET_GSI_ROUTING: { 491962306a36Sopenharmony_ci struct kvm_irq_routing routing; 492062306a36Sopenharmony_ci struct kvm_irq_routing __user *urouting; 492162306a36Sopenharmony_ci struct kvm_irq_routing_entry *entries = NULL; 492262306a36Sopenharmony_ci 492362306a36Sopenharmony_ci r = -EFAULT; 492462306a36Sopenharmony_ci if (copy_from_user(&routing, argp, sizeof(routing))) 492562306a36Sopenharmony_ci goto out; 492662306a36Sopenharmony_ci r = -EINVAL; 492762306a36Sopenharmony_ci if (!kvm_arch_can_set_irq_routing(kvm)) 492862306a36Sopenharmony_ci goto out; 492962306a36Sopenharmony_ci if (routing.nr > KVM_MAX_IRQ_ROUTES) 493062306a36Sopenharmony_ci goto out; 493162306a36Sopenharmony_ci if (routing.flags) 493262306a36Sopenharmony_ci goto out; 493362306a36Sopenharmony_ci if (routing.nr) { 493462306a36Sopenharmony_ci urouting = argp; 493562306a36Sopenharmony_ci entries = vmemdup_user(urouting->entries, 493662306a36Sopenharmony_ci array_size(sizeof(*entries), 493762306a36Sopenharmony_ci routing.nr)); 493862306a36Sopenharmony_ci if (IS_ERR(entries)) { 493962306a36Sopenharmony_ci r = PTR_ERR(entries); 494062306a36Sopenharmony_ci goto out; 494162306a36Sopenharmony_ci } 494262306a36Sopenharmony_ci } 494362306a36Sopenharmony_ci r = kvm_set_irq_routing(kvm, entries, routing.nr, 494462306a36Sopenharmony_ci routing.flags); 494562306a36Sopenharmony_ci kvfree(entries); 494662306a36Sopenharmony_ci break; 494762306a36Sopenharmony_ci } 494862306a36Sopenharmony_ci#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ 494962306a36Sopenharmony_ci case KVM_CREATE_DEVICE: { 495062306a36Sopenharmony_ci struct kvm_create_device cd; 495162306a36Sopenharmony_ci 495262306a36Sopenharmony_ci r = -EFAULT; 495362306a36Sopenharmony_ci if (copy_from_user(&cd, argp, sizeof(cd))) 495462306a36Sopenharmony_ci goto out; 495562306a36Sopenharmony_ci 495662306a36Sopenharmony_ci r = kvm_ioctl_create_device(kvm, &cd); 495762306a36Sopenharmony_ci if (r) 495862306a36Sopenharmony_ci goto out; 495962306a36Sopenharmony_ci 496062306a36Sopenharmony_ci r = -EFAULT; 496162306a36Sopenharmony_ci if (copy_to_user(argp, &cd, sizeof(cd))) 496262306a36Sopenharmony_ci goto out; 496362306a36Sopenharmony_ci 496462306a36Sopenharmony_ci r = 0; 496562306a36Sopenharmony_ci break; 496662306a36Sopenharmony_ci } 496762306a36Sopenharmony_ci case KVM_CHECK_EXTENSION: 496862306a36Sopenharmony_ci r = kvm_vm_ioctl_check_extension_generic(kvm, arg); 496962306a36Sopenharmony_ci break; 497062306a36Sopenharmony_ci case KVM_RESET_DIRTY_RINGS: 497162306a36Sopenharmony_ci r = kvm_vm_ioctl_reset_dirty_pages(kvm); 497262306a36Sopenharmony_ci break; 497362306a36Sopenharmony_ci case KVM_GET_STATS_FD: 497462306a36Sopenharmony_ci r = kvm_vm_ioctl_get_stats_fd(kvm); 497562306a36Sopenharmony_ci break; 497662306a36Sopenharmony_ci default: 497762306a36Sopenharmony_ci r = kvm_arch_vm_ioctl(filp, ioctl, arg); 497862306a36Sopenharmony_ci } 497962306a36Sopenharmony_ciout: 498062306a36Sopenharmony_ci return r; 498162306a36Sopenharmony_ci} 498262306a36Sopenharmony_ci 498362306a36Sopenharmony_ci#ifdef CONFIG_KVM_COMPAT 498462306a36Sopenharmony_cistruct compat_kvm_dirty_log { 498562306a36Sopenharmony_ci __u32 slot; 498662306a36Sopenharmony_ci __u32 padding1; 498762306a36Sopenharmony_ci union { 498862306a36Sopenharmony_ci compat_uptr_t dirty_bitmap; /* one bit per page */ 498962306a36Sopenharmony_ci __u64 padding2; 499062306a36Sopenharmony_ci }; 499162306a36Sopenharmony_ci}; 499262306a36Sopenharmony_ci 499362306a36Sopenharmony_cistruct compat_kvm_clear_dirty_log { 499462306a36Sopenharmony_ci __u32 slot; 499562306a36Sopenharmony_ci __u32 num_pages; 499662306a36Sopenharmony_ci __u64 first_page; 499762306a36Sopenharmony_ci union { 499862306a36Sopenharmony_ci compat_uptr_t dirty_bitmap; /* one bit per page */ 499962306a36Sopenharmony_ci __u64 padding2; 500062306a36Sopenharmony_ci }; 500162306a36Sopenharmony_ci}; 500262306a36Sopenharmony_ci 500362306a36Sopenharmony_cilong __weak kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, 500462306a36Sopenharmony_ci unsigned long arg) 500562306a36Sopenharmony_ci{ 500662306a36Sopenharmony_ci return -ENOTTY; 500762306a36Sopenharmony_ci} 500862306a36Sopenharmony_ci 500962306a36Sopenharmony_cistatic long kvm_vm_compat_ioctl(struct file *filp, 501062306a36Sopenharmony_ci unsigned int ioctl, unsigned long arg) 501162306a36Sopenharmony_ci{ 501262306a36Sopenharmony_ci struct kvm *kvm = filp->private_data; 501362306a36Sopenharmony_ci int r; 501462306a36Sopenharmony_ci 501562306a36Sopenharmony_ci if (kvm->mm != current->mm || kvm->vm_dead) 501662306a36Sopenharmony_ci return -EIO; 501762306a36Sopenharmony_ci 501862306a36Sopenharmony_ci r = kvm_arch_vm_compat_ioctl(filp, ioctl, arg); 501962306a36Sopenharmony_ci if (r != -ENOTTY) 502062306a36Sopenharmony_ci return r; 502162306a36Sopenharmony_ci 502262306a36Sopenharmony_ci switch (ioctl) { 502362306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 502462306a36Sopenharmony_ci case KVM_CLEAR_DIRTY_LOG: { 502562306a36Sopenharmony_ci struct compat_kvm_clear_dirty_log compat_log; 502662306a36Sopenharmony_ci struct kvm_clear_dirty_log log; 502762306a36Sopenharmony_ci 502862306a36Sopenharmony_ci if (copy_from_user(&compat_log, (void __user *)arg, 502962306a36Sopenharmony_ci sizeof(compat_log))) 503062306a36Sopenharmony_ci return -EFAULT; 503162306a36Sopenharmony_ci log.slot = compat_log.slot; 503262306a36Sopenharmony_ci log.num_pages = compat_log.num_pages; 503362306a36Sopenharmony_ci log.first_page = compat_log.first_page; 503462306a36Sopenharmony_ci log.padding2 = compat_log.padding2; 503562306a36Sopenharmony_ci log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); 503662306a36Sopenharmony_ci 503762306a36Sopenharmony_ci r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); 503862306a36Sopenharmony_ci break; 503962306a36Sopenharmony_ci } 504062306a36Sopenharmony_ci#endif 504162306a36Sopenharmony_ci case KVM_GET_DIRTY_LOG: { 504262306a36Sopenharmony_ci struct compat_kvm_dirty_log compat_log; 504362306a36Sopenharmony_ci struct kvm_dirty_log log; 504462306a36Sopenharmony_ci 504562306a36Sopenharmony_ci if (copy_from_user(&compat_log, (void __user *)arg, 504662306a36Sopenharmony_ci sizeof(compat_log))) 504762306a36Sopenharmony_ci return -EFAULT; 504862306a36Sopenharmony_ci log.slot = compat_log.slot; 504962306a36Sopenharmony_ci log.padding1 = compat_log.padding1; 505062306a36Sopenharmony_ci log.padding2 = compat_log.padding2; 505162306a36Sopenharmony_ci log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); 505262306a36Sopenharmony_ci 505362306a36Sopenharmony_ci r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 505462306a36Sopenharmony_ci break; 505562306a36Sopenharmony_ci } 505662306a36Sopenharmony_ci default: 505762306a36Sopenharmony_ci r = kvm_vm_ioctl(filp, ioctl, arg); 505862306a36Sopenharmony_ci } 505962306a36Sopenharmony_ci return r; 506062306a36Sopenharmony_ci} 506162306a36Sopenharmony_ci#endif 506262306a36Sopenharmony_ci 506362306a36Sopenharmony_cistatic const struct file_operations kvm_vm_fops = { 506462306a36Sopenharmony_ci .release = kvm_vm_release, 506562306a36Sopenharmony_ci .unlocked_ioctl = kvm_vm_ioctl, 506662306a36Sopenharmony_ci .llseek = noop_llseek, 506762306a36Sopenharmony_ci KVM_COMPAT(kvm_vm_compat_ioctl), 506862306a36Sopenharmony_ci}; 506962306a36Sopenharmony_ci 507062306a36Sopenharmony_cibool file_is_kvm(struct file *file) 507162306a36Sopenharmony_ci{ 507262306a36Sopenharmony_ci return file && file->f_op == &kvm_vm_fops; 507362306a36Sopenharmony_ci} 507462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(file_is_kvm); 507562306a36Sopenharmony_ci 507662306a36Sopenharmony_cistatic int kvm_dev_ioctl_create_vm(unsigned long type) 507762306a36Sopenharmony_ci{ 507862306a36Sopenharmony_ci char fdname[ITOA_MAX_LEN + 1]; 507962306a36Sopenharmony_ci int r, fd; 508062306a36Sopenharmony_ci struct kvm *kvm; 508162306a36Sopenharmony_ci struct file *file; 508262306a36Sopenharmony_ci 508362306a36Sopenharmony_ci fd = get_unused_fd_flags(O_CLOEXEC); 508462306a36Sopenharmony_ci if (fd < 0) 508562306a36Sopenharmony_ci return fd; 508662306a36Sopenharmony_ci 508762306a36Sopenharmony_ci snprintf(fdname, sizeof(fdname), "%d", fd); 508862306a36Sopenharmony_ci 508962306a36Sopenharmony_ci kvm = kvm_create_vm(type, fdname); 509062306a36Sopenharmony_ci if (IS_ERR(kvm)) { 509162306a36Sopenharmony_ci r = PTR_ERR(kvm); 509262306a36Sopenharmony_ci goto put_fd; 509362306a36Sopenharmony_ci } 509462306a36Sopenharmony_ci 509562306a36Sopenharmony_ci file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 509662306a36Sopenharmony_ci if (IS_ERR(file)) { 509762306a36Sopenharmony_ci r = PTR_ERR(file); 509862306a36Sopenharmony_ci goto put_kvm; 509962306a36Sopenharmony_ci } 510062306a36Sopenharmony_ci 510162306a36Sopenharmony_ci /* 510262306a36Sopenharmony_ci * Don't call kvm_put_kvm anymore at this point; file->f_op is 510362306a36Sopenharmony_ci * already set, with ->release() being kvm_vm_release(). In error 510462306a36Sopenharmony_ci * cases it will be called by the final fput(file) and will take 510562306a36Sopenharmony_ci * care of doing kvm_put_kvm(kvm). 510662306a36Sopenharmony_ci */ 510762306a36Sopenharmony_ci kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); 510862306a36Sopenharmony_ci 510962306a36Sopenharmony_ci fd_install(fd, file); 511062306a36Sopenharmony_ci return fd; 511162306a36Sopenharmony_ci 511262306a36Sopenharmony_ciput_kvm: 511362306a36Sopenharmony_ci kvm_put_kvm(kvm); 511462306a36Sopenharmony_ciput_fd: 511562306a36Sopenharmony_ci put_unused_fd(fd); 511662306a36Sopenharmony_ci return r; 511762306a36Sopenharmony_ci} 511862306a36Sopenharmony_ci 511962306a36Sopenharmony_cistatic long kvm_dev_ioctl(struct file *filp, 512062306a36Sopenharmony_ci unsigned int ioctl, unsigned long arg) 512162306a36Sopenharmony_ci{ 512262306a36Sopenharmony_ci int r = -EINVAL; 512362306a36Sopenharmony_ci 512462306a36Sopenharmony_ci switch (ioctl) { 512562306a36Sopenharmony_ci case KVM_GET_API_VERSION: 512662306a36Sopenharmony_ci if (arg) 512762306a36Sopenharmony_ci goto out; 512862306a36Sopenharmony_ci r = KVM_API_VERSION; 512962306a36Sopenharmony_ci break; 513062306a36Sopenharmony_ci case KVM_CREATE_VM: 513162306a36Sopenharmony_ci r = kvm_dev_ioctl_create_vm(arg); 513262306a36Sopenharmony_ci break; 513362306a36Sopenharmony_ci case KVM_CHECK_EXTENSION: 513462306a36Sopenharmony_ci r = kvm_vm_ioctl_check_extension_generic(NULL, arg); 513562306a36Sopenharmony_ci break; 513662306a36Sopenharmony_ci case KVM_GET_VCPU_MMAP_SIZE: 513762306a36Sopenharmony_ci if (arg) 513862306a36Sopenharmony_ci goto out; 513962306a36Sopenharmony_ci r = PAGE_SIZE; /* struct kvm_run */ 514062306a36Sopenharmony_ci#ifdef CONFIG_X86 514162306a36Sopenharmony_ci r += PAGE_SIZE; /* pio data page */ 514262306a36Sopenharmony_ci#endif 514362306a36Sopenharmony_ci#ifdef CONFIG_KVM_MMIO 514462306a36Sopenharmony_ci r += PAGE_SIZE; /* coalesced mmio ring page */ 514562306a36Sopenharmony_ci#endif 514662306a36Sopenharmony_ci break; 514762306a36Sopenharmony_ci case KVM_TRACE_ENABLE: 514862306a36Sopenharmony_ci case KVM_TRACE_PAUSE: 514962306a36Sopenharmony_ci case KVM_TRACE_DISABLE: 515062306a36Sopenharmony_ci r = -EOPNOTSUPP; 515162306a36Sopenharmony_ci break; 515262306a36Sopenharmony_ci default: 515362306a36Sopenharmony_ci return kvm_arch_dev_ioctl(filp, ioctl, arg); 515462306a36Sopenharmony_ci } 515562306a36Sopenharmony_ciout: 515662306a36Sopenharmony_ci return r; 515762306a36Sopenharmony_ci} 515862306a36Sopenharmony_ci 515962306a36Sopenharmony_cistatic struct file_operations kvm_chardev_ops = { 516062306a36Sopenharmony_ci .unlocked_ioctl = kvm_dev_ioctl, 516162306a36Sopenharmony_ci .llseek = noop_llseek, 516262306a36Sopenharmony_ci KVM_COMPAT(kvm_dev_ioctl), 516362306a36Sopenharmony_ci}; 516462306a36Sopenharmony_ci 516562306a36Sopenharmony_cistatic struct miscdevice kvm_dev = { 516662306a36Sopenharmony_ci KVM_MINOR, 516762306a36Sopenharmony_ci "kvm", 516862306a36Sopenharmony_ci &kvm_chardev_ops, 516962306a36Sopenharmony_ci}; 517062306a36Sopenharmony_ci 517162306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING 517262306a36Sopenharmony_ci__visible bool kvm_rebooting; 517362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_rebooting); 517462306a36Sopenharmony_ci 517562306a36Sopenharmony_cistatic DEFINE_PER_CPU(bool, hardware_enabled); 517662306a36Sopenharmony_cistatic int kvm_usage_count; 517762306a36Sopenharmony_ci 517862306a36Sopenharmony_cistatic int __hardware_enable_nolock(void) 517962306a36Sopenharmony_ci{ 518062306a36Sopenharmony_ci if (__this_cpu_read(hardware_enabled)) 518162306a36Sopenharmony_ci return 0; 518262306a36Sopenharmony_ci 518362306a36Sopenharmony_ci if (kvm_arch_hardware_enable()) { 518462306a36Sopenharmony_ci pr_info("kvm: enabling virtualization on CPU%d failed\n", 518562306a36Sopenharmony_ci raw_smp_processor_id()); 518662306a36Sopenharmony_ci return -EIO; 518762306a36Sopenharmony_ci } 518862306a36Sopenharmony_ci 518962306a36Sopenharmony_ci __this_cpu_write(hardware_enabled, true); 519062306a36Sopenharmony_ci return 0; 519162306a36Sopenharmony_ci} 519262306a36Sopenharmony_ci 519362306a36Sopenharmony_cistatic void hardware_enable_nolock(void *failed) 519462306a36Sopenharmony_ci{ 519562306a36Sopenharmony_ci if (__hardware_enable_nolock()) 519662306a36Sopenharmony_ci atomic_inc(failed); 519762306a36Sopenharmony_ci} 519862306a36Sopenharmony_ci 519962306a36Sopenharmony_cistatic int kvm_online_cpu(unsigned int cpu) 520062306a36Sopenharmony_ci{ 520162306a36Sopenharmony_ci int ret = 0; 520262306a36Sopenharmony_ci 520362306a36Sopenharmony_ci /* 520462306a36Sopenharmony_ci * Abort the CPU online process if hardware virtualization cannot 520562306a36Sopenharmony_ci * be enabled. Otherwise running VMs would encounter unrecoverable 520662306a36Sopenharmony_ci * errors when scheduled to this CPU. 520762306a36Sopenharmony_ci */ 520862306a36Sopenharmony_ci mutex_lock(&kvm_lock); 520962306a36Sopenharmony_ci if (kvm_usage_count) 521062306a36Sopenharmony_ci ret = __hardware_enable_nolock(); 521162306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 521262306a36Sopenharmony_ci return ret; 521362306a36Sopenharmony_ci} 521462306a36Sopenharmony_ci 521562306a36Sopenharmony_cistatic void hardware_disable_nolock(void *junk) 521662306a36Sopenharmony_ci{ 521762306a36Sopenharmony_ci /* 521862306a36Sopenharmony_ci * Note, hardware_disable_all_nolock() tells all online CPUs to disable 521962306a36Sopenharmony_ci * hardware, not just CPUs that successfully enabled hardware! 522062306a36Sopenharmony_ci */ 522162306a36Sopenharmony_ci if (!__this_cpu_read(hardware_enabled)) 522262306a36Sopenharmony_ci return; 522362306a36Sopenharmony_ci 522462306a36Sopenharmony_ci kvm_arch_hardware_disable(); 522562306a36Sopenharmony_ci 522662306a36Sopenharmony_ci __this_cpu_write(hardware_enabled, false); 522762306a36Sopenharmony_ci} 522862306a36Sopenharmony_ci 522962306a36Sopenharmony_cistatic int kvm_offline_cpu(unsigned int cpu) 523062306a36Sopenharmony_ci{ 523162306a36Sopenharmony_ci mutex_lock(&kvm_lock); 523262306a36Sopenharmony_ci if (kvm_usage_count) 523362306a36Sopenharmony_ci hardware_disable_nolock(NULL); 523462306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 523562306a36Sopenharmony_ci return 0; 523662306a36Sopenharmony_ci} 523762306a36Sopenharmony_ci 523862306a36Sopenharmony_cistatic void hardware_disable_all_nolock(void) 523962306a36Sopenharmony_ci{ 524062306a36Sopenharmony_ci BUG_ON(!kvm_usage_count); 524162306a36Sopenharmony_ci 524262306a36Sopenharmony_ci kvm_usage_count--; 524362306a36Sopenharmony_ci if (!kvm_usage_count) 524462306a36Sopenharmony_ci on_each_cpu(hardware_disable_nolock, NULL, 1); 524562306a36Sopenharmony_ci} 524662306a36Sopenharmony_ci 524762306a36Sopenharmony_cistatic void hardware_disable_all(void) 524862306a36Sopenharmony_ci{ 524962306a36Sopenharmony_ci cpus_read_lock(); 525062306a36Sopenharmony_ci mutex_lock(&kvm_lock); 525162306a36Sopenharmony_ci hardware_disable_all_nolock(); 525262306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 525362306a36Sopenharmony_ci cpus_read_unlock(); 525462306a36Sopenharmony_ci} 525562306a36Sopenharmony_ci 525662306a36Sopenharmony_cistatic int hardware_enable_all(void) 525762306a36Sopenharmony_ci{ 525862306a36Sopenharmony_ci atomic_t failed = ATOMIC_INIT(0); 525962306a36Sopenharmony_ci int r; 526062306a36Sopenharmony_ci 526162306a36Sopenharmony_ci /* 526262306a36Sopenharmony_ci * Do not enable hardware virtualization if the system is going down. 526362306a36Sopenharmony_ci * If userspace initiated a forced reboot, e.g. reboot -f, then it's 526462306a36Sopenharmony_ci * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling 526562306a36Sopenharmony_ci * after kvm_reboot() is called. Note, this relies on system_state 526662306a36Sopenharmony_ci * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops 526762306a36Sopenharmony_ci * hook instead of registering a dedicated reboot notifier (the latter 526862306a36Sopenharmony_ci * runs before system_state is updated). 526962306a36Sopenharmony_ci */ 527062306a36Sopenharmony_ci if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || 527162306a36Sopenharmony_ci system_state == SYSTEM_RESTART) 527262306a36Sopenharmony_ci return -EBUSY; 527362306a36Sopenharmony_ci 527462306a36Sopenharmony_ci /* 527562306a36Sopenharmony_ci * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() 527662306a36Sopenharmony_ci * is called, and so on_each_cpu() between them includes the CPU that 527762306a36Sopenharmony_ci * is being onlined. As a result, hardware_enable_nolock() may get 527862306a36Sopenharmony_ci * invoked before kvm_online_cpu(), which also enables hardware if the 527962306a36Sopenharmony_ci * usage count is non-zero. Disable CPU hotplug to avoid attempting to 528062306a36Sopenharmony_ci * enable hardware multiple times. 528162306a36Sopenharmony_ci */ 528262306a36Sopenharmony_ci cpus_read_lock(); 528362306a36Sopenharmony_ci mutex_lock(&kvm_lock); 528462306a36Sopenharmony_ci 528562306a36Sopenharmony_ci r = 0; 528662306a36Sopenharmony_ci 528762306a36Sopenharmony_ci kvm_usage_count++; 528862306a36Sopenharmony_ci if (kvm_usage_count == 1) { 528962306a36Sopenharmony_ci on_each_cpu(hardware_enable_nolock, &failed, 1); 529062306a36Sopenharmony_ci 529162306a36Sopenharmony_ci if (atomic_read(&failed)) { 529262306a36Sopenharmony_ci hardware_disable_all_nolock(); 529362306a36Sopenharmony_ci r = -EBUSY; 529462306a36Sopenharmony_ci } 529562306a36Sopenharmony_ci } 529662306a36Sopenharmony_ci 529762306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 529862306a36Sopenharmony_ci cpus_read_unlock(); 529962306a36Sopenharmony_ci 530062306a36Sopenharmony_ci return r; 530162306a36Sopenharmony_ci} 530262306a36Sopenharmony_ci 530362306a36Sopenharmony_cistatic void kvm_shutdown(void) 530462306a36Sopenharmony_ci{ 530562306a36Sopenharmony_ci /* 530662306a36Sopenharmony_ci * Disable hardware virtualization and set kvm_rebooting to indicate 530762306a36Sopenharmony_ci * that KVM has asynchronously disabled hardware virtualization, i.e. 530862306a36Sopenharmony_ci * that relevant errors and exceptions aren't entirely unexpected. 530962306a36Sopenharmony_ci * Some flavors of hardware virtualization need to be disabled before 531062306a36Sopenharmony_ci * transferring control to firmware (to perform shutdown/reboot), e.g. 531162306a36Sopenharmony_ci * on x86, virtualization can block INIT interrupts, which are used by 531262306a36Sopenharmony_ci * firmware to pull APs back under firmware control. Note, this path 531362306a36Sopenharmony_ci * is used for both shutdown and reboot scenarios, i.e. neither name is 531462306a36Sopenharmony_ci * 100% comprehensive. 531562306a36Sopenharmony_ci */ 531662306a36Sopenharmony_ci pr_info("kvm: exiting hardware virtualization\n"); 531762306a36Sopenharmony_ci kvm_rebooting = true; 531862306a36Sopenharmony_ci on_each_cpu(hardware_disable_nolock, NULL, 1); 531962306a36Sopenharmony_ci} 532062306a36Sopenharmony_ci 532162306a36Sopenharmony_cistatic int kvm_suspend(void) 532262306a36Sopenharmony_ci{ 532362306a36Sopenharmony_ci /* 532462306a36Sopenharmony_ci * Secondary CPUs and CPU hotplug are disabled across the suspend/resume 532562306a36Sopenharmony_ci * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count 532662306a36Sopenharmony_ci * is stable. Assert that kvm_lock is not held to ensure the system 532762306a36Sopenharmony_ci * isn't suspended while KVM is enabling hardware. Hardware enabling 532862306a36Sopenharmony_ci * can be preempted, but the task cannot be frozen until it has dropped 532962306a36Sopenharmony_ci * all locks (userspace tasks are frozen via a fake signal). 533062306a36Sopenharmony_ci */ 533162306a36Sopenharmony_ci lockdep_assert_not_held(&kvm_lock); 533262306a36Sopenharmony_ci lockdep_assert_irqs_disabled(); 533362306a36Sopenharmony_ci 533462306a36Sopenharmony_ci if (kvm_usage_count) 533562306a36Sopenharmony_ci hardware_disable_nolock(NULL); 533662306a36Sopenharmony_ci return 0; 533762306a36Sopenharmony_ci} 533862306a36Sopenharmony_ci 533962306a36Sopenharmony_cistatic void kvm_resume(void) 534062306a36Sopenharmony_ci{ 534162306a36Sopenharmony_ci lockdep_assert_not_held(&kvm_lock); 534262306a36Sopenharmony_ci lockdep_assert_irqs_disabled(); 534362306a36Sopenharmony_ci 534462306a36Sopenharmony_ci if (kvm_usage_count) 534562306a36Sopenharmony_ci WARN_ON_ONCE(__hardware_enable_nolock()); 534662306a36Sopenharmony_ci} 534762306a36Sopenharmony_ci 534862306a36Sopenharmony_cistatic struct syscore_ops kvm_syscore_ops = { 534962306a36Sopenharmony_ci .suspend = kvm_suspend, 535062306a36Sopenharmony_ci .resume = kvm_resume, 535162306a36Sopenharmony_ci .shutdown = kvm_shutdown, 535262306a36Sopenharmony_ci}; 535362306a36Sopenharmony_ci#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ 535462306a36Sopenharmony_cistatic int hardware_enable_all(void) 535562306a36Sopenharmony_ci{ 535662306a36Sopenharmony_ci return 0; 535762306a36Sopenharmony_ci} 535862306a36Sopenharmony_ci 535962306a36Sopenharmony_cistatic void hardware_disable_all(void) 536062306a36Sopenharmony_ci{ 536162306a36Sopenharmony_ci 536262306a36Sopenharmony_ci} 536362306a36Sopenharmony_ci#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ 536462306a36Sopenharmony_ci 536562306a36Sopenharmony_cistatic void kvm_iodevice_destructor(struct kvm_io_device *dev) 536662306a36Sopenharmony_ci{ 536762306a36Sopenharmony_ci if (dev->ops->destructor) 536862306a36Sopenharmony_ci dev->ops->destructor(dev); 536962306a36Sopenharmony_ci} 537062306a36Sopenharmony_ci 537162306a36Sopenharmony_cistatic void kvm_io_bus_destroy(struct kvm_io_bus *bus) 537262306a36Sopenharmony_ci{ 537362306a36Sopenharmony_ci int i; 537462306a36Sopenharmony_ci 537562306a36Sopenharmony_ci for (i = 0; i < bus->dev_count; i++) { 537662306a36Sopenharmony_ci struct kvm_io_device *pos = bus->range[i].dev; 537762306a36Sopenharmony_ci 537862306a36Sopenharmony_ci kvm_iodevice_destructor(pos); 537962306a36Sopenharmony_ci } 538062306a36Sopenharmony_ci kfree(bus); 538162306a36Sopenharmony_ci} 538262306a36Sopenharmony_ci 538362306a36Sopenharmony_cistatic inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, 538462306a36Sopenharmony_ci const struct kvm_io_range *r2) 538562306a36Sopenharmony_ci{ 538662306a36Sopenharmony_ci gpa_t addr1 = r1->addr; 538762306a36Sopenharmony_ci gpa_t addr2 = r2->addr; 538862306a36Sopenharmony_ci 538962306a36Sopenharmony_ci if (addr1 < addr2) 539062306a36Sopenharmony_ci return -1; 539162306a36Sopenharmony_ci 539262306a36Sopenharmony_ci /* If r2->len == 0, match the exact address. If r2->len != 0, 539362306a36Sopenharmony_ci * accept any overlapping write. Any order is acceptable for 539462306a36Sopenharmony_ci * overlapping ranges, because kvm_io_bus_get_first_dev ensures 539562306a36Sopenharmony_ci * we process all of them. 539662306a36Sopenharmony_ci */ 539762306a36Sopenharmony_ci if (r2->len) { 539862306a36Sopenharmony_ci addr1 += r1->len; 539962306a36Sopenharmony_ci addr2 += r2->len; 540062306a36Sopenharmony_ci } 540162306a36Sopenharmony_ci 540262306a36Sopenharmony_ci if (addr1 > addr2) 540362306a36Sopenharmony_ci return 1; 540462306a36Sopenharmony_ci 540562306a36Sopenharmony_ci return 0; 540662306a36Sopenharmony_ci} 540762306a36Sopenharmony_ci 540862306a36Sopenharmony_cistatic int kvm_io_bus_sort_cmp(const void *p1, const void *p2) 540962306a36Sopenharmony_ci{ 541062306a36Sopenharmony_ci return kvm_io_bus_cmp(p1, p2); 541162306a36Sopenharmony_ci} 541262306a36Sopenharmony_ci 541362306a36Sopenharmony_cistatic int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, 541462306a36Sopenharmony_ci gpa_t addr, int len) 541562306a36Sopenharmony_ci{ 541662306a36Sopenharmony_ci struct kvm_io_range *range, key; 541762306a36Sopenharmony_ci int off; 541862306a36Sopenharmony_ci 541962306a36Sopenharmony_ci key = (struct kvm_io_range) { 542062306a36Sopenharmony_ci .addr = addr, 542162306a36Sopenharmony_ci .len = len, 542262306a36Sopenharmony_ci }; 542362306a36Sopenharmony_ci 542462306a36Sopenharmony_ci range = bsearch(&key, bus->range, bus->dev_count, 542562306a36Sopenharmony_ci sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); 542662306a36Sopenharmony_ci if (range == NULL) 542762306a36Sopenharmony_ci return -ENOENT; 542862306a36Sopenharmony_ci 542962306a36Sopenharmony_ci off = range - bus->range; 543062306a36Sopenharmony_ci 543162306a36Sopenharmony_ci while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) 543262306a36Sopenharmony_ci off--; 543362306a36Sopenharmony_ci 543462306a36Sopenharmony_ci return off; 543562306a36Sopenharmony_ci} 543662306a36Sopenharmony_ci 543762306a36Sopenharmony_cistatic int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, 543862306a36Sopenharmony_ci struct kvm_io_range *range, const void *val) 543962306a36Sopenharmony_ci{ 544062306a36Sopenharmony_ci int idx; 544162306a36Sopenharmony_ci 544262306a36Sopenharmony_ci idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); 544362306a36Sopenharmony_ci if (idx < 0) 544462306a36Sopenharmony_ci return -EOPNOTSUPP; 544562306a36Sopenharmony_ci 544662306a36Sopenharmony_ci while (idx < bus->dev_count && 544762306a36Sopenharmony_ci kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { 544862306a36Sopenharmony_ci if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, 544962306a36Sopenharmony_ci range->len, val)) 545062306a36Sopenharmony_ci return idx; 545162306a36Sopenharmony_ci idx++; 545262306a36Sopenharmony_ci } 545362306a36Sopenharmony_ci 545462306a36Sopenharmony_ci return -EOPNOTSUPP; 545562306a36Sopenharmony_ci} 545662306a36Sopenharmony_ci 545762306a36Sopenharmony_ci/* kvm_io_bus_write - called under kvm->slots_lock */ 545862306a36Sopenharmony_ciint kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, 545962306a36Sopenharmony_ci int len, const void *val) 546062306a36Sopenharmony_ci{ 546162306a36Sopenharmony_ci struct kvm_io_bus *bus; 546262306a36Sopenharmony_ci struct kvm_io_range range; 546362306a36Sopenharmony_ci int r; 546462306a36Sopenharmony_ci 546562306a36Sopenharmony_ci range = (struct kvm_io_range) { 546662306a36Sopenharmony_ci .addr = addr, 546762306a36Sopenharmony_ci .len = len, 546862306a36Sopenharmony_ci }; 546962306a36Sopenharmony_ci 547062306a36Sopenharmony_ci bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 547162306a36Sopenharmony_ci if (!bus) 547262306a36Sopenharmony_ci return -ENOMEM; 547362306a36Sopenharmony_ci r = __kvm_io_bus_write(vcpu, bus, &range, val); 547462306a36Sopenharmony_ci return r < 0 ? r : 0; 547562306a36Sopenharmony_ci} 547662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_io_bus_write); 547762306a36Sopenharmony_ci 547862306a36Sopenharmony_ci/* kvm_io_bus_write_cookie - called under kvm->slots_lock */ 547962306a36Sopenharmony_ciint kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, 548062306a36Sopenharmony_ci gpa_t addr, int len, const void *val, long cookie) 548162306a36Sopenharmony_ci{ 548262306a36Sopenharmony_ci struct kvm_io_bus *bus; 548362306a36Sopenharmony_ci struct kvm_io_range range; 548462306a36Sopenharmony_ci 548562306a36Sopenharmony_ci range = (struct kvm_io_range) { 548662306a36Sopenharmony_ci .addr = addr, 548762306a36Sopenharmony_ci .len = len, 548862306a36Sopenharmony_ci }; 548962306a36Sopenharmony_ci 549062306a36Sopenharmony_ci bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 549162306a36Sopenharmony_ci if (!bus) 549262306a36Sopenharmony_ci return -ENOMEM; 549362306a36Sopenharmony_ci 549462306a36Sopenharmony_ci /* First try the device referenced by cookie. */ 549562306a36Sopenharmony_ci if ((cookie >= 0) && (cookie < bus->dev_count) && 549662306a36Sopenharmony_ci (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) 549762306a36Sopenharmony_ci if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, 549862306a36Sopenharmony_ci val)) 549962306a36Sopenharmony_ci return cookie; 550062306a36Sopenharmony_ci 550162306a36Sopenharmony_ci /* 550262306a36Sopenharmony_ci * cookie contained garbage; fall back to search and return the 550362306a36Sopenharmony_ci * correct cookie value. 550462306a36Sopenharmony_ci */ 550562306a36Sopenharmony_ci return __kvm_io_bus_write(vcpu, bus, &range, val); 550662306a36Sopenharmony_ci} 550762306a36Sopenharmony_ci 550862306a36Sopenharmony_cistatic int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, 550962306a36Sopenharmony_ci struct kvm_io_range *range, void *val) 551062306a36Sopenharmony_ci{ 551162306a36Sopenharmony_ci int idx; 551262306a36Sopenharmony_ci 551362306a36Sopenharmony_ci idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); 551462306a36Sopenharmony_ci if (idx < 0) 551562306a36Sopenharmony_ci return -EOPNOTSUPP; 551662306a36Sopenharmony_ci 551762306a36Sopenharmony_ci while (idx < bus->dev_count && 551862306a36Sopenharmony_ci kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { 551962306a36Sopenharmony_ci if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, 552062306a36Sopenharmony_ci range->len, val)) 552162306a36Sopenharmony_ci return idx; 552262306a36Sopenharmony_ci idx++; 552362306a36Sopenharmony_ci } 552462306a36Sopenharmony_ci 552562306a36Sopenharmony_ci return -EOPNOTSUPP; 552662306a36Sopenharmony_ci} 552762306a36Sopenharmony_ci 552862306a36Sopenharmony_ci/* kvm_io_bus_read - called under kvm->slots_lock */ 552962306a36Sopenharmony_ciint kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, 553062306a36Sopenharmony_ci int len, void *val) 553162306a36Sopenharmony_ci{ 553262306a36Sopenharmony_ci struct kvm_io_bus *bus; 553362306a36Sopenharmony_ci struct kvm_io_range range; 553462306a36Sopenharmony_ci int r; 553562306a36Sopenharmony_ci 553662306a36Sopenharmony_ci range = (struct kvm_io_range) { 553762306a36Sopenharmony_ci .addr = addr, 553862306a36Sopenharmony_ci .len = len, 553962306a36Sopenharmony_ci }; 554062306a36Sopenharmony_ci 554162306a36Sopenharmony_ci bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 554262306a36Sopenharmony_ci if (!bus) 554362306a36Sopenharmony_ci return -ENOMEM; 554462306a36Sopenharmony_ci r = __kvm_io_bus_read(vcpu, bus, &range, val); 554562306a36Sopenharmony_ci return r < 0 ? r : 0; 554662306a36Sopenharmony_ci} 554762306a36Sopenharmony_ci 554862306a36Sopenharmony_ci/* Caller must hold slots_lock. */ 554962306a36Sopenharmony_ciint kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 555062306a36Sopenharmony_ci int len, struct kvm_io_device *dev) 555162306a36Sopenharmony_ci{ 555262306a36Sopenharmony_ci int i; 555362306a36Sopenharmony_ci struct kvm_io_bus *new_bus, *bus; 555462306a36Sopenharmony_ci struct kvm_io_range range; 555562306a36Sopenharmony_ci 555662306a36Sopenharmony_ci bus = kvm_get_bus(kvm, bus_idx); 555762306a36Sopenharmony_ci if (!bus) 555862306a36Sopenharmony_ci return -ENOMEM; 555962306a36Sopenharmony_ci 556062306a36Sopenharmony_ci /* exclude ioeventfd which is limited by maximum fd */ 556162306a36Sopenharmony_ci if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) 556262306a36Sopenharmony_ci return -ENOSPC; 556362306a36Sopenharmony_ci 556462306a36Sopenharmony_ci new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), 556562306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 556662306a36Sopenharmony_ci if (!new_bus) 556762306a36Sopenharmony_ci return -ENOMEM; 556862306a36Sopenharmony_ci 556962306a36Sopenharmony_ci range = (struct kvm_io_range) { 557062306a36Sopenharmony_ci .addr = addr, 557162306a36Sopenharmony_ci .len = len, 557262306a36Sopenharmony_ci .dev = dev, 557362306a36Sopenharmony_ci }; 557462306a36Sopenharmony_ci 557562306a36Sopenharmony_ci for (i = 0; i < bus->dev_count; i++) 557662306a36Sopenharmony_ci if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) 557762306a36Sopenharmony_ci break; 557862306a36Sopenharmony_ci 557962306a36Sopenharmony_ci memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); 558062306a36Sopenharmony_ci new_bus->dev_count++; 558162306a36Sopenharmony_ci new_bus->range[i] = range; 558262306a36Sopenharmony_ci memcpy(new_bus->range + i + 1, bus->range + i, 558362306a36Sopenharmony_ci (bus->dev_count - i) * sizeof(struct kvm_io_range)); 558462306a36Sopenharmony_ci rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 558562306a36Sopenharmony_ci synchronize_srcu_expedited(&kvm->srcu); 558662306a36Sopenharmony_ci kfree(bus); 558762306a36Sopenharmony_ci 558862306a36Sopenharmony_ci return 0; 558962306a36Sopenharmony_ci} 559062306a36Sopenharmony_ci 559162306a36Sopenharmony_ciint kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 559262306a36Sopenharmony_ci struct kvm_io_device *dev) 559362306a36Sopenharmony_ci{ 559462306a36Sopenharmony_ci int i; 559562306a36Sopenharmony_ci struct kvm_io_bus *new_bus, *bus; 559662306a36Sopenharmony_ci 559762306a36Sopenharmony_ci lockdep_assert_held(&kvm->slots_lock); 559862306a36Sopenharmony_ci 559962306a36Sopenharmony_ci bus = kvm_get_bus(kvm, bus_idx); 560062306a36Sopenharmony_ci if (!bus) 560162306a36Sopenharmony_ci return 0; 560262306a36Sopenharmony_ci 560362306a36Sopenharmony_ci for (i = 0; i < bus->dev_count; i++) { 560462306a36Sopenharmony_ci if (bus->range[i].dev == dev) { 560562306a36Sopenharmony_ci break; 560662306a36Sopenharmony_ci } 560762306a36Sopenharmony_ci } 560862306a36Sopenharmony_ci 560962306a36Sopenharmony_ci if (i == bus->dev_count) 561062306a36Sopenharmony_ci return 0; 561162306a36Sopenharmony_ci 561262306a36Sopenharmony_ci new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), 561362306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 561462306a36Sopenharmony_ci if (new_bus) { 561562306a36Sopenharmony_ci memcpy(new_bus, bus, struct_size(bus, range, i)); 561662306a36Sopenharmony_ci new_bus->dev_count--; 561762306a36Sopenharmony_ci memcpy(new_bus->range + i, bus->range + i + 1, 561862306a36Sopenharmony_ci flex_array_size(new_bus, range, new_bus->dev_count - i)); 561962306a36Sopenharmony_ci } 562062306a36Sopenharmony_ci 562162306a36Sopenharmony_ci rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 562262306a36Sopenharmony_ci synchronize_srcu_expedited(&kvm->srcu); 562362306a36Sopenharmony_ci 562462306a36Sopenharmony_ci /* 562562306a36Sopenharmony_ci * If NULL bus is installed, destroy the old bus, including all the 562662306a36Sopenharmony_ci * attached devices. Otherwise, destroy the caller's device only. 562762306a36Sopenharmony_ci */ 562862306a36Sopenharmony_ci if (!new_bus) { 562962306a36Sopenharmony_ci pr_err("kvm: failed to shrink bus, removing it completely\n"); 563062306a36Sopenharmony_ci kvm_io_bus_destroy(bus); 563162306a36Sopenharmony_ci return -ENOMEM; 563262306a36Sopenharmony_ci } 563362306a36Sopenharmony_ci 563462306a36Sopenharmony_ci kvm_iodevice_destructor(dev); 563562306a36Sopenharmony_ci kfree(bus); 563662306a36Sopenharmony_ci return 0; 563762306a36Sopenharmony_ci} 563862306a36Sopenharmony_ci 563962306a36Sopenharmony_cistruct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, 564062306a36Sopenharmony_ci gpa_t addr) 564162306a36Sopenharmony_ci{ 564262306a36Sopenharmony_ci struct kvm_io_bus *bus; 564362306a36Sopenharmony_ci int dev_idx, srcu_idx; 564462306a36Sopenharmony_ci struct kvm_io_device *iodev = NULL; 564562306a36Sopenharmony_ci 564662306a36Sopenharmony_ci srcu_idx = srcu_read_lock(&kvm->srcu); 564762306a36Sopenharmony_ci 564862306a36Sopenharmony_ci bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 564962306a36Sopenharmony_ci if (!bus) 565062306a36Sopenharmony_ci goto out_unlock; 565162306a36Sopenharmony_ci 565262306a36Sopenharmony_ci dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); 565362306a36Sopenharmony_ci if (dev_idx < 0) 565462306a36Sopenharmony_ci goto out_unlock; 565562306a36Sopenharmony_ci 565662306a36Sopenharmony_ci iodev = bus->range[dev_idx].dev; 565762306a36Sopenharmony_ci 565862306a36Sopenharmony_ciout_unlock: 565962306a36Sopenharmony_ci srcu_read_unlock(&kvm->srcu, srcu_idx); 566062306a36Sopenharmony_ci 566162306a36Sopenharmony_ci return iodev; 566262306a36Sopenharmony_ci} 566362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); 566462306a36Sopenharmony_ci 566562306a36Sopenharmony_cistatic int kvm_debugfs_open(struct inode *inode, struct file *file, 566662306a36Sopenharmony_ci int (*get)(void *, u64 *), int (*set)(void *, u64), 566762306a36Sopenharmony_ci const char *fmt) 566862306a36Sopenharmony_ci{ 566962306a36Sopenharmony_ci int ret; 567062306a36Sopenharmony_ci struct kvm_stat_data *stat_data = inode->i_private; 567162306a36Sopenharmony_ci 567262306a36Sopenharmony_ci /* 567362306a36Sopenharmony_ci * The debugfs files are a reference to the kvm struct which 567462306a36Sopenharmony_ci * is still valid when kvm_destroy_vm is called. kvm_get_kvm_safe 567562306a36Sopenharmony_ci * avoids the race between open and the removal of the debugfs directory. 567662306a36Sopenharmony_ci */ 567762306a36Sopenharmony_ci if (!kvm_get_kvm_safe(stat_data->kvm)) 567862306a36Sopenharmony_ci return -ENOENT; 567962306a36Sopenharmony_ci 568062306a36Sopenharmony_ci ret = simple_attr_open(inode, file, get, 568162306a36Sopenharmony_ci kvm_stats_debugfs_mode(stat_data->desc) & 0222 568262306a36Sopenharmony_ci ? set : NULL, fmt); 568362306a36Sopenharmony_ci if (ret) 568462306a36Sopenharmony_ci kvm_put_kvm(stat_data->kvm); 568562306a36Sopenharmony_ci 568662306a36Sopenharmony_ci return ret; 568762306a36Sopenharmony_ci} 568862306a36Sopenharmony_ci 568962306a36Sopenharmony_cistatic int kvm_debugfs_release(struct inode *inode, struct file *file) 569062306a36Sopenharmony_ci{ 569162306a36Sopenharmony_ci struct kvm_stat_data *stat_data = inode->i_private; 569262306a36Sopenharmony_ci 569362306a36Sopenharmony_ci simple_attr_release(inode, file); 569462306a36Sopenharmony_ci kvm_put_kvm(stat_data->kvm); 569562306a36Sopenharmony_ci 569662306a36Sopenharmony_ci return 0; 569762306a36Sopenharmony_ci} 569862306a36Sopenharmony_ci 569962306a36Sopenharmony_cistatic int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val) 570062306a36Sopenharmony_ci{ 570162306a36Sopenharmony_ci *val = *(u64 *)((void *)(&kvm->stat) + offset); 570262306a36Sopenharmony_ci 570362306a36Sopenharmony_ci return 0; 570462306a36Sopenharmony_ci} 570562306a36Sopenharmony_ci 570662306a36Sopenharmony_cistatic int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset) 570762306a36Sopenharmony_ci{ 570862306a36Sopenharmony_ci *(u64 *)((void *)(&kvm->stat) + offset) = 0; 570962306a36Sopenharmony_ci 571062306a36Sopenharmony_ci return 0; 571162306a36Sopenharmony_ci} 571262306a36Sopenharmony_ci 571362306a36Sopenharmony_cistatic int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val) 571462306a36Sopenharmony_ci{ 571562306a36Sopenharmony_ci unsigned long i; 571662306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 571762306a36Sopenharmony_ci 571862306a36Sopenharmony_ci *val = 0; 571962306a36Sopenharmony_ci 572062306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) 572162306a36Sopenharmony_ci *val += *(u64 *)((void *)(&vcpu->stat) + offset); 572262306a36Sopenharmony_ci 572362306a36Sopenharmony_ci return 0; 572462306a36Sopenharmony_ci} 572562306a36Sopenharmony_ci 572662306a36Sopenharmony_cistatic int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset) 572762306a36Sopenharmony_ci{ 572862306a36Sopenharmony_ci unsigned long i; 572962306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 573062306a36Sopenharmony_ci 573162306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) 573262306a36Sopenharmony_ci *(u64 *)((void *)(&vcpu->stat) + offset) = 0; 573362306a36Sopenharmony_ci 573462306a36Sopenharmony_ci return 0; 573562306a36Sopenharmony_ci} 573662306a36Sopenharmony_ci 573762306a36Sopenharmony_cistatic int kvm_stat_data_get(void *data, u64 *val) 573862306a36Sopenharmony_ci{ 573962306a36Sopenharmony_ci int r = -EFAULT; 574062306a36Sopenharmony_ci struct kvm_stat_data *stat_data = data; 574162306a36Sopenharmony_ci 574262306a36Sopenharmony_ci switch (stat_data->kind) { 574362306a36Sopenharmony_ci case KVM_STAT_VM: 574462306a36Sopenharmony_ci r = kvm_get_stat_per_vm(stat_data->kvm, 574562306a36Sopenharmony_ci stat_data->desc->desc.offset, val); 574662306a36Sopenharmony_ci break; 574762306a36Sopenharmony_ci case KVM_STAT_VCPU: 574862306a36Sopenharmony_ci r = kvm_get_stat_per_vcpu(stat_data->kvm, 574962306a36Sopenharmony_ci stat_data->desc->desc.offset, val); 575062306a36Sopenharmony_ci break; 575162306a36Sopenharmony_ci } 575262306a36Sopenharmony_ci 575362306a36Sopenharmony_ci return r; 575462306a36Sopenharmony_ci} 575562306a36Sopenharmony_ci 575662306a36Sopenharmony_cistatic int kvm_stat_data_clear(void *data, u64 val) 575762306a36Sopenharmony_ci{ 575862306a36Sopenharmony_ci int r = -EFAULT; 575962306a36Sopenharmony_ci struct kvm_stat_data *stat_data = data; 576062306a36Sopenharmony_ci 576162306a36Sopenharmony_ci if (val) 576262306a36Sopenharmony_ci return -EINVAL; 576362306a36Sopenharmony_ci 576462306a36Sopenharmony_ci switch (stat_data->kind) { 576562306a36Sopenharmony_ci case KVM_STAT_VM: 576662306a36Sopenharmony_ci r = kvm_clear_stat_per_vm(stat_data->kvm, 576762306a36Sopenharmony_ci stat_data->desc->desc.offset); 576862306a36Sopenharmony_ci break; 576962306a36Sopenharmony_ci case KVM_STAT_VCPU: 577062306a36Sopenharmony_ci r = kvm_clear_stat_per_vcpu(stat_data->kvm, 577162306a36Sopenharmony_ci stat_data->desc->desc.offset); 577262306a36Sopenharmony_ci break; 577362306a36Sopenharmony_ci } 577462306a36Sopenharmony_ci 577562306a36Sopenharmony_ci return r; 577662306a36Sopenharmony_ci} 577762306a36Sopenharmony_ci 577862306a36Sopenharmony_cistatic int kvm_stat_data_open(struct inode *inode, struct file *file) 577962306a36Sopenharmony_ci{ 578062306a36Sopenharmony_ci __simple_attr_check_format("%llu\n", 0ull); 578162306a36Sopenharmony_ci return kvm_debugfs_open(inode, file, kvm_stat_data_get, 578262306a36Sopenharmony_ci kvm_stat_data_clear, "%llu\n"); 578362306a36Sopenharmony_ci} 578462306a36Sopenharmony_ci 578562306a36Sopenharmony_cistatic const struct file_operations stat_fops_per_vm = { 578662306a36Sopenharmony_ci .owner = THIS_MODULE, 578762306a36Sopenharmony_ci .open = kvm_stat_data_open, 578862306a36Sopenharmony_ci .release = kvm_debugfs_release, 578962306a36Sopenharmony_ci .read = simple_attr_read, 579062306a36Sopenharmony_ci .write = simple_attr_write, 579162306a36Sopenharmony_ci .llseek = no_llseek, 579262306a36Sopenharmony_ci}; 579362306a36Sopenharmony_ci 579462306a36Sopenharmony_cistatic int vm_stat_get(void *_offset, u64 *val) 579562306a36Sopenharmony_ci{ 579662306a36Sopenharmony_ci unsigned offset = (long)_offset; 579762306a36Sopenharmony_ci struct kvm *kvm; 579862306a36Sopenharmony_ci u64 tmp_val; 579962306a36Sopenharmony_ci 580062306a36Sopenharmony_ci *val = 0; 580162306a36Sopenharmony_ci mutex_lock(&kvm_lock); 580262306a36Sopenharmony_ci list_for_each_entry(kvm, &vm_list, vm_list) { 580362306a36Sopenharmony_ci kvm_get_stat_per_vm(kvm, offset, &tmp_val); 580462306a36Sopenharmony_ci *val += tmp_val; 580562306a36Sopenharmony_ci } 580662306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 580762306a36Sopenharmony_ci return 0; 580862306a36Sopenharmony_ci} 580962306a36Sopenharmony_ci 581062306a36Sopenharmony_cistatic int vm_stat_clear(void *_offset, u64 val) 581162306a36Sopenharmony_ci{ 581262306a36Sopenharmony_ci unsigned offset = (long)_offset; 581362306a36Sopenharmony_ci struct kvm *kvm; 581462306a36Sopenharmony_ci 581562306a36Sopenharmony_ci if (val) 581662306a36Sopenharmony_ci return -EINVAL; 581762306a36Sopenharmony_ci 581862306a36Sopenharmony_ci mutex_lock(&kvm_lock); 581962306a36Sopenharmony_ci list_for_each_entry(kvm, &vm_list, vm_list) { 582062306a36Sopenharmony_ci kvm_clear_stat_per_vm(kvm, offset); 582162306a36Sopenharmony_ci } 582262306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 582362306a36Sopenharmony_ci 582462306a36Sopenharmony_ci return 0; 582562306a36Sopenharmony_ci} 582662306a36Sopenharmony_ci 582762306a36Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n"); 582862306a36Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(vm_stat_readonly_fops, vm_stat_get, NULL, "%llu\n"); 582962306a36Sopenharmony_ci 583062306a36Sopenharmony_cistatic int vcpu_stat_get(void *_offset, u64 *val) 583162306a36Sopenharmony_ci{ 583262306a36Sopenharmony_ci unsigned offset = (long)_offset; 583362306a36Sopenharmony_ci struct kvm *kvm; 583462306a36Sopenharmony_ci u64 tmp_val; 583562306a36Sopenharmony_ci 583662306a36Sopenharmony_ci *val = 0; 583762306a36Sopenharmony_ci mutex_lock(&kvm_lock); 583862306a36Sopenharmony_ci list_for_each_entry(kvm, &vm_list, vm_list) { 583962306a36Sopenharmony_ci kvm_get_stat_per_vcpu(kvm, offset, &tmp_val); 584062306a36Sopenharmony_ci *val += tmp_val; 584162306a36Sopenharmony_ci } 584262306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 584362306a36Sopenharmony_ci return 0; 584462306a36Sopenharmony_ci} 584562306a36Sopenharmony_ci 584662306a36Sopenharmony_cistatic int vcpu_stat_clear(void *_offset, u64 val) 584762306a36Sopenharmony_ci{ 584862306a36Sopenharmony_ci unsigned offset = (long)_offset; 584962306a36Sopenharmony_ci struct kvm *kvm; 585062306a36Sopenharmony_ci 585162306a36Sopenharmony_ci if (val) 585262306a36Sopenharmony_ci return -EINVAL; 585362306a36Sopenharmony_ci 585462306a36Sopenharmony_ci mutex_lock(&kvm_lock); 585562306a36Sopenharmony_ci list_for_each_entry(kvm, &vm_list, vm_list) { 585662306a36Sopenharmony_ci kvm_clear_stat_per_vcpu(kvm, offset); 585762306a36Sopenharmony_ci } 585862306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 585962306a36Sopenharmony_ci 586062306a36Sopenharmony_ci return 0; 586162306a36Sopenharmony_ci} 586262306a36Sopenharmony_ci 586362306a36Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, 586462306a36Sopenharmony_ci "%llu\n"); 586562306a36Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n"); 586662306a36Sopenharmony_ci 586762306a36Sopenharmony_cistatic void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) 586862306a36Sopenharmony_ci{ 586962306a36Sopenharmony_ci struct kobj_uevent_env *env; 587062306a36Sopenharmony_ci unsigned long long created, active; 587162306a36Sopenharmony_ci 587262306a36Sopenharmony_ci if (!kvm_dev.this_device || !kvm) 587362306a36Sopenharmony_ci return; 587462306a36Sopenharmony_ci 587562306a36Sopenharmony_ci mutex_lock(&kvm_lock); 587662306a36Sopenharmony_ci if (type == KVM_EVENT_CREATE_VM) { 587762306a36Sopenharmony_ci kvm_createvm_count++; 587862306a36Sopenharmony_ci kvm_active_vms++; 587962306a36Sopenharmony_ci } else if (type == KVM_EVENT_DESTROY_VM) { 588062306a36Sopenharmony_ci kvm_active_vms--; 588162306a36Sopenharmony_ci } 588262306a36Sopenharmony_ci created = kvm_createvm_count; 588362306a36Sopenharmony_ci active = kvm_active_vms; 588462306a36Sopenharmony_ci mutex_unlock(&kvm_lock); 588562306a36Sopenharmony_ci 588662306a36Sopenharmony_ci env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); 588762306a36Sopenharmony_ci if (!env) 588862306a36Sopenharmony_ci return; 588962306a36Sopenharmony_ci 589062306a36Sopenharmony_ci add_uevent_var(env, "CREATED=%llu", created); 589162306a36Sopenharmony_ci add_uevent_var(env, "COUNT=%llu", active); 589262306a36Sopenharmony_ci 589362306a36Sopenharmony_ci if (type == KVM_EVENT_CREATE_VM) { 589462306a36Sopenharmony_ci add_uevent_var(env, "EVENT=create"); 589562306a36Sopenharmony_ci kvm->userspace_pid = task_pid_nr(current); 589662306a36Sopenharmony_ci } else if (type == KVM_EVENT_DESTROY_VM) { 589762306a36Sopenharmony_ci add_uevent_var(env, "EVENT=destroy"); 589862306a36Sopenharmony_ci } 589962306a36Sopenharmony_ci add_uevent_var(env, "PID=%d", kvm->userspace_pid); 590062306a36Sopenharmony_ci 590162306a36Sopenharmony_ci if (!IS_ERR(kvm->debugfs_dentry)) { 590262306a36Sopenharmony_ci char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); 590362306a36Sopenharmony_ci 590462306a36Sopenharmony_ci if (p) { 590562306a36Sopenharmony_ci tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); 590662306a36Sopenharmony_ci if (!IS_ERR(tmp)) 590762306a36Sopenharmony_ci add_uevent_var(env, "STATS_PATH=%s", tmp); 590862306a36Sopenharmony_ci kfree(p); 590962306a36Sopenharmony_ci } 591062306a36Sopenharmony_ci } 591162306a36Sopenharmony_ci /* no need for checks, since we are adding at most only 5 keys */ 591262306a36Sopenharmony_ci env->envp[env->envp_idx++] = NULL; 591362306a36Sopenharmony_ci kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); 591462306a36Sopenharmony_ci kfree(env); 591562306a36Sopenharmony_ci} 591662306a36Sopenharmony_ci 591762306a36Sopenharmony_cistatic void kvm_init_debug(void) 591862306a36Sopenharmony_ci{ 591962306a36Sopenharmony_ci const struct file_operations *fops; 592062306a36Sopenharmony_ci const struct _kvm_stats_desc *pdesc; 592162306a36Sopenharmony_ci int i; 592262306a36Sopenharmony_ci 592362306a36Sopenharmony_ci kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 592462306a36Sopenharmony_ci 592562306a36Sopenharmony_ci for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { 592662306a36Sopenharmony_ci pdesc = &kvm_vm_stats_desc[i]; 592762306a36Sopenharmony_ci if (kvm_stats_debugfs_mode(pdesc) & 0222) 592862306a36Sopenharmony_ci fops = &vm_stat_fops; 592962306a36Sopenharmony_ci else 593062306a36Sopenharmony_ci fops = &vm_stat_readonly_fops; 593162306a36Sopenharmony_ci debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), 593262306a36Sopenharmony_ci kvm_debugfs_dir, 593362306a36Sopenharmony_ci (void *)(long)pdesc->desc.offset, fops); 593462306a36Sopenharmony_ci } 593562306a36Sopenharmony_ci 593662306a36Sopenharmony_ci for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) { 593762306a36Sopenharmony_ci pdesc = &kvm_vcpu_stats_desc[i]; 593862306a36Sopenharmony_ci if (kvm_stats_debugfs_mode(pdesc) & 0222) 593962306a36Sopenharmony_ci fops = &vcpu_stat_fops; 594062306a36Sopenharmony_ci else 594162306a36Sopenharmony_ci fops = &vcpu_stat_readonly_fops; 594262306a36Sopenharmony_ci debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), 594362306a36Sopenharmony_ci kvm_debugfs_dir, 594462306a36Sopenharmony_ci (void *)(long)pdesc->desc.offset, fops); 594562306a36Sopenharmony_ci } 594662306a36Sopenharmony_ci} 594762306a36Sopenharmony_ci 594862306a36Sopenharmony_cistatic inline 594962306a36Sopenharmony_cistruct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 595062306a36Sopenharmony_ci{ 595162306a36Sopenharmony_ci return container_of(pn, struct kvm_vcpu, preempt_notifier); 595262306a36Sopenharmony_ci} 595362306a36Sopenharmony_ci 595462306a36Sopenharmony_cistatic void kvm_sched_in(struct preempt_notifier *pn, int cpu) 595562306a36Sopenharmony_ci{ 595662306a36Sopenharmony_ci struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 595762306a36Sopenharmony_ci 595862306a36Sopenharmony_ci WRITE_ONCE(vcpu->preempted, false); 595962306a36Sopenharmony_ci WRITE_ONCE(vcpu->ready, false); 596062306a36Sopenharmony_ci 596162306a36Sopenharmony_ci __this_cpu_write(kvm_running_vcpu, vcpu); 596262306a36Sopenharmony_ci kvm_arch_sched_in(vcpu, cpu); 596362306a36Sopenharmony_ci kvm_arch_vcpu_load(vcpu, cpu); 596462306a36Sopenharmony_ci} 596562306a36Sopenharmony_ci 596662306a36Sopenharmony_cistatic void kvm_sched_out(struct preempt_notifier *pn, 596762306a36Sopenharmony_ci struct task_struct *next) 596862306a36Sopenharmony_ci{ 596962306a36Sopenharmony_ci struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 597062306a36Sopenharmony_ci 597162306a36Sopenharmony_ci if (current->on_rq) { 597262306a36Sopenharmony_ci WRITE_ONCE(vcpu->preempted, true); 597362306a36Sopenharmony_ci WRITE_ONCE(vcpu->ready, true); 597462306a36Sopenharmony_ci } 597562306a36Sopenharmony_ci kvm_arch_vcpu_put(vcpu); 597662306a36Sopenharmony_ci __this_cpu_write(kvm_running_vcpu, NULL); 597762306a36Sopenharmony_ci} 597862306a36Sopenharmony_ci 597962306a36Sopenharmony_ci/** 598062306a36Sopenharmony_ci * kvm_get_running_vcpu - get the vcpu running on the current CPU. 598162306a36Sopenharmony_ci * 598262306a36Sopenharmony_ci * We can disable preemption locally around accessing the per-CPU variable, 598362306a36Sopenharmony_ci * and use the resolved vcpu pointer after enabling preemption again, 598462306a36Sopenharmony_ci * because even if the current thread is migrated to another CPU, reading 598562306a36Sopenharmony_ci * the per-CPU value later will give us the same value as we update the 598662306a36Sopenharmony_ci * per-CPU variable in the preempt notifier handlers. 598762306a36Sopenharmony_ci */ 598862306a36Sopenharmony_cistruct kvm_vcpu *kvm_get_running_vcpu(void) 598962306a36Sopenharmony_ci{ 599062306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 599162306a36Sopenharmony_ci 599262306a36Sopenharmony_ci preempt_disable(); 599362306a36Sopenharmony_ci vcpu = __this_cpu_read(kvm_running_vcpu); 599462306a36Sopenharmony_ci preempt_enable(); 599562306a36Sopenharmony_ci 599662306a36Sopenharmony_ci return vcpu; 599762306a36Sopenharmony_ci} 599862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_get_running_vcpu); 599962306a36Sopenharmony_ci 600062306a36Sopenharmony_ci/** 600162306a36Sopenharmony_ci * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus. 600262306a36Sopenharmony_ci */ 600362306a36Sopenharmony_cistruct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) 600462306a36Sopenharmony_ci{ 600562306a36Sopenharmony_ci return &kvm_running_vcpu; 600662306a36Sopenharmony_ci} 600762306a36Sopenharmony_ci 600862306a36Sopenharmony_ci#ifdef CONFIG_GUEST_PERF_EVENTS 600962306a36Sopenharmony_cistatic unsigned int kvm_guest_state(void) 601062306a36Sopenharmony_ci{ 601162306a36Sopenharmony_ci struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 601262306a36Sopenharmony_ci unsigned int state; 601362306a36Sopenharmony_ci 601462306a36Sopenharmony_ci if (!kvm_arch_pmi_in_guest(vcpu)) 601562306a36Sopenharmony_ci return 0; 601662306a36Sopenharmony_ci 601762306a36Sopenharmony_ci state = PERF_GUEST_ACTIVE; 601862306a36Sopenharmony_ci if (!kvm_arch_vcpu_in_kernel(vcpu)) 601962306a36Sopenharmony_ci state |= PERF_GUEST_USER; 602062306a36Sopenharmony_ci 602162306a36Sopenharmony_ci return state; 602262306a36Sopenharmony_ci} 602362306a36Sopenharmony_ci 602462306a36Sopenharmony_cistatic unsigned long kvm_guest_get_ip(void) 602562306a36Sopenharmony_ci{ 602662306a36Sopenharmony_ci struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 602762306a36Sopenharmony_ci 602862306a36Sopenharmony_ci /* Retrieving the IP must be guarded by a call to kvm_guest_state(). */ 602962306a36Sopenharmony_ci if (WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu))) 603062306a36Sopenharmony_ci return 0; 603162306a36Sopenharmony_ci 603262306a36Sopenharmony_ci return kvm_arch_vcpu_get_ip(vcpu); 603362306a36Sopenharmony_ci} 603462306a36Sopenharmony_ci 603562306a36Sopenharmony_cistatic struct perf_guest_info_callbacks kvm_guest_cbs = { 603662306a36Sopenharmony_ci .state = kvm_guest_state, 603762306a36Sopenharmony_ci .get_ip = kvm_guest_get_ip, 603862306a36Sopenharmony_ci .handle_intel_pt_intr = NULL, 603962306a36Sopenharmony_ci}; 604062306a36Sopenharmony_ci 604162306a36Sopenharmony_civoid kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)) 604262306a36Sopenharmony_ci{ 604362306a36Sopenharmony_ci kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler; 604462306a36Sopenharmony_ci perf_register_guest_info_callbacks(&kvm_guest_cbs); 604562306a36Sopenharmony_ci} 604662306a36Sopenharmony_civoid kvm_unregister_perf_callbacks(void) 604762306a36Sopenharmony_ci{ 604862306a36Sopenharmony_ci perf_unregister_guest_info_callbacks(&kvm_guest_cbs); 604962306a36Sopenharmony_ci} 605062306a36Sopenharmony_ci#endif 605162306a36Sopenharmony_ci 605262306a36Sopenharmony_ciint kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) 605362306a36Sopenharmony_ci{ 605462306a36Sopenharmony_ci int r; 605562306a36Sopenharmony_ci int cpu; 605662306a36Sopenharmony_ci 605762306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING 605862306a36Sopenharmony_ci r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", 605962306a36Sopenharmony_ci kvm_online_cpu, kvm_offline_cpu); 606062306a36Sopenharmony_ci if (r) 606162306a36Sopenharmony_ci return r; 606262306a36Sopenharmony_ci 606362306a36Sopenharmony_ci register_syscore_ops(&kvm_syscore_ops); 606462306a36Sopenharmony_ci#endif 606562306a36Sopenharmony_ci 606662306a36Sopenharmony_ci /* A kmem cache lets us meet the alignment requirements of fx_save. */ 606762306a36Sopenharmony_ci if (!vcpu_align) 606862306a36Sopenharmony_ci vcpu_align = __alignof__(struct kvm_vcpu); 606962306a36Sopenharmony_ci kvm_vcpu_cache = 607062306a36Sopenharmony_ci kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, 607162306a36Sopenharmony_ci SLAB_ACCOUNT, 607262306a36Sopenharmony_ci offsetof(struct kvm_vcpu, arch), 607362306a36Sopenharmony_ci offsetofend(struct kvm_vcpu, stats_id) 607462306a36Sopenharmony_ci - offsetof(struct kvm_vcpu, arch), 607562306a36Sopenharmony_ci NULL); 607662306a36Sopenharmony_ci if (!kvm_vcpu_cache) { 607762306a36Sopenharmony_ci r = -ENOMEM; 607862306a36Sopenharmony_ci goto err_vcpu_cache; 607962306a36Sopenharmony_ci } 608062306a36Sopenharmony_ci 608162306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 608262306a36Sopenharmony_ci if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), 608362306a36Sopenharmony_ci GFP_KERNEL, cpu_to_node(cpu))) { 608462306a36Sopenharmony_ci r = -ENOMEM; 608562306a36Sopenharmony_ci goto err_cpu_kick_mask; 608662306a36Sopenharmony_ci } 608762306a36Sopenharmony_ci } 608862306a36Sopenharmony_ci 608962306a36Sopenharmony_ci r = kvm_irqfd_init(); 609062306a36Sopenharmony_ci if (r) 609162306a36Sopenharmony_ci goto err_irqfd; 609262306a36Sopenharmony_ci 609362306a36Sopenharmony_ci r = kvm_async_pf_init(); 609462306a36Sopenharmony_ci if (r) 609562306a36Sopenharmony_ci goto err_async_pf; 609662306a36Sopenharmony_ci 609762306a36Sopenharmony_ci kvm_chardev_ops.owner = module; 609862306a36Sopenharmony_ci 609962306a36Sopenharmony_ci kvm_preempt_ops.sched_in = kvm_sched_in; 610062306a36Sopenharmony_ci kvm_preempt_ops.sched_out = kvm_sched_out; 610162306a36Sopenharmony_ci 610262306a36Sopenharmony_ci kvm_init_debug(); 610362306a36Sopenharmony_ci 610462306a36Sopenharmony_ci r = kvm_vfio_ops_init(); 610562306a36Sopenharmony_ci if (WARN_ON_ONCE(r)) 610662306a36Sopenharmony_ci goto err_vfio; 610762306a36Sopenharmony_ci 610862306a36Sopenharmony_ci /* 610962306a36Sopenharmony_ci * Registration _must_ be the very last thing done, as this exposes 611062306a36Sopenharmony_ci * /dev/kvm to userspace, i.e. all infrastructure must be setup! 611162306a36Sopenharmony_ci */ 611262306a36Sopenharmony_ci r = misc_register(&kvm_dev); 611362306a36Sopenharmony_ci if (r) { 611462306a36Sopenharmony_ci pr_err("kvm: misc device register failed\n"); 611562306a36Sopenharmony_ci goto err_register; 611662306a36Sopenharmony_ci } 611762306a36Sopenharmony_ci 611862306a36Sopenharmony_ci return 0; 611962306a36Sopenharmony_ci 612062306a36Sopenharmony_cierr_register: 612162306a36Sopenharmony_ci kvm_vfio_ops_exit(); 612262306a36Sopenharmony_cierr_vfio: 612362306a36Sopenharmony_ci kvm_async_pf_deinit(); 612462306a36Sopenharmony_cierr_async_pf: 612562306a36Sopenharmony_ci kvm_irqfd_exit(); 612662306a36Sopenharmony_cierr_irqfd: 612762306a36Sopenharmony_cierr_cpu_kick_mask: 612862306a36Sopenharmony_ci for_each_possible_cpu(cpu) 612962306a36Sopenharmony_ci free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); 613062306a36Sopenharmony_ci kmem_cache_destroy(kvm_vcpu_cache); 613162306a36Sopenharmony_cierr_vcpu_cache: 613262306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING 613362306a36Sopenharmony_ci unregister_syscore_ops(&kvm_syscore_ops); 613462306a36Sopenharmony_ci cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); 613562306a36Sopenharmony_ci#endif 613662306a36Sopenharmony_ci return r; 613762306a36Sopenharmony_ci} 613862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_init); 613962306a36Sopenharmony_ci 614062306a36Sopenharmony_civoid kvm_exit(void) 614162306a36Sopenharmony_ci{ 614262306a36Sopenharmony_ci int cpu; 614362306a36Sopenharmony_ci 614462306a36Sopenharmony_ci /* 614562306a36Sopenharmony_ci * Note, unregistering /dev/kvm doesn't strictly need to come first, 614662306a36Sopenharmony_ci * fops_get(), a.k.a. try_module_get(), prevents acquiring references 614762306a36Sopenharmony_ci * to KVM while the module is being stopped. 614862306a36Sopenharmony_ci */ 614962306a36Sopenharmony_ci misc_deregister(&kvm_dev); 615062306a36Sopenharmony_ci 615162306a36Sopenharmony_ci debugfs_remove_recursive(kvm_debugfs_dir); 615262306a36Sopenharmony_ci for_each_possible_cpu(cpu) 615362306a36Sopenharmony_ci free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); 615462306a36Sopenharmony_ci kmem_cache_destroy(kvm_vcpu_cache); 615562306a36Sopenharmony_ci kvm_vfio_ops_exit(); 615662306a36Sopenharmony_ci kvm_async_pf_deinit(); 615762306a36Sopenharmony_ci#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING 615862306a36Sopenharmony_ci unregister_syscore_ops(&kvm_syscore_ops); 615962306a36Sopenharmony_ci cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); 616062306a36Sopenharmony_ci#endif 616162306a36Sopenharmony_ci kvm_irqfd_exit(); 616262306a36Sopenharmony_ci} 616362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_exit); 616462306a36Sopenharmony_ci 616562306a36Sopenharmony_cistruct kvm_vm_worker_thread_context { 616662306a36Sopenharmony_ci struct kvm *kvm; 616762306a36Sopenharmony_ci struct task_struct *parent; 616862306a36Sopenharmony_ci struct completion init_done; 616962306a36Sopenharmony_ci kvm_vm_thread_fn_t thread_fn; 617062306a36Sopenharmony_ci uintptr_t data; 617162306a36Sopenharmony_ci int err; 617262306a36Sopenharmony_ci}; 617362306a36Sopenharmony_ci 617462306a36Sopenharmony_cistatic int kvm_vm_worker_thread(void *context) 617562306a36Sopenharmony_ci{ 617662306a36Sopenharmony_ci /* 617762306a36Sopenharmony_ci * The init_context is allocated on the stack of the parent thread, so 617862306a36Sopenharmony_ci * we have to locally copy anything that is needed beyond initialization 617962306a36Sopenharmony_ci */ 618062306a36Sopenharmony_ci struct kvm_vm_worker_thread_context *init_context = context; 618162306a36Sopenharmony_ci struct task_struct *parent; 618262306a36Sopenharmony_ci struct kvm *kvm = init_context->kvm; 618362306a36Sopenharmony_ci kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; 618462306a36Sopenharmony_ci uintptr_t data = init_context->data; 618562306a36Sopenharmony_ci int err; 618662306a36Sopenharmony_ci 618762306a36Sopenharmony_ci err = kthread_park(current); 618862306a36Sopenharmony_ci /* kthread_park(current) is never supposed to return an error */ 618962306a36Sopenharmony_ci WARN_ON(err != 0); 619062306a36Sopenharmony_ci if (err) 619162306a36Sopenharmony_ci goto init_complete; 619262306a36Sopenharmony_ci 619362306a36Sopenharmony_ci err = cgroup_attach_task_all(init_context->parent, current); 619462306a36Sopenharmony_ci if (err) { 619562306a36Sopenharmony_ci kvm_err("%s: cgroup_attach_task_all failed with err %d\n", 619662306a36Sopenharmony_ci __func__, err); 619762306a36Sopenharmony_ci goto init_complete; 619862306a36Sopenharmony_ci } 619962306a36Sopenharmony_ci 620062306a36Sopenharmony_ci set_user_nice(current, task_nice(init_context->parent)); 620162306a36Sopenharmony_ci 620262306a36Sopenharmony_ciinit_complete: 620362306a36Sopenharmony_ci init_context->err = err; 620462306a36Sopenharmony_ci complete(&init_context->init_done); 620562306a36Sopenharmony_ci init_context = NULL; 620662306a36Sopenharmony_ci 620762306a36Sopenharmony_ci if (err) 620862306a36Sopenharmony_ci goto out; 620962306a36Sopenharmony_ci 621062306a36Sopenharmony_ci /* Wait to be woken up by the spawner before proceeding. */ 621162306a36Sopenharmony_ci kthread_parkme(); 621262306a36Sopenharmony_ci 621362306a36Sopenharmony_ci if (!kthread_should_stop()) 621462306a36Sopenharmony_ci err = thread_fn(kvm, data); 621562306a36Sopenharmony_ci 621662306a36Sopenharmony_ciout: 621762306a36Sopenharmony_ci /* 621862306a36Sopenharmony_ci * Move kthread back to its original cgroup to prevent it lingering in 621962306a36Sopenharmony_ci * the cgroup of the VM process, after the latter finishes its 622062306a36Sopenharmony_ci * execution. 622162306a36Sopenharmony_ci * 622262306a36Sopenharmony_ci * kthread_stop() waits on the 'exited' completion condition which is 622362306a36Sopenharmony_ci * set in exit_mm(), via mm_release(), in do_exit(). However, the 622462306a36Sopenharmony_ci * kthread is removed from the cgroup in the cgroup_exit() which is 622562306a36Sopenharmony_ci * called after the exit_mm(). This causes the kthread_stop() to return 622662306a36Sopenharmony_ci * before the kthread actually quits the cgroup. 622762306a36Sopenharmony_ci */ 622862306a36Sopenharmony_ci rcu_read_lock(); 622962306a36Sopenharmony_ci parent = rcu_dereference(current->real_parent); 623062306a36Sopenharmony_ci get_task_struct(parent); 623162306a36Sopenharmony_ci rcu_read_unlock(); 623262306a36Sopenharmony_ci cgroup_attach_task_all(parent, current); 623362306a36Sopenharmony_ci put_task_struct(parent); 623462306a36Sopenharmony_ci 623562306a36Sopenharmony_ci return err; 623662306a36Sopenharmony_ci} 623762306a36Sopenharmony_ci 623862306a36Sopenharmony_ciint kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, 623962306a36Sopenharmony_ci uintptr_t data, const char *name, 624062306a36Sopenharmony_ci struct task_struct **thread_ptr) 624162306a36Sopenharmony_ci{ 624262306a36Sopenharmony_ci struct kvm_vm_worker_thread_context init_context = {}; 624362306a36Sopenharmony_ci struct task_struct *thread; 624462306a36Sopenharmony_ci 624562306a36Sopenharmony_ci *thread_ptr = NULL; 624662306a36Sopenharmony_ci init_context.kvm = kvm; 624762306a36Sopenharmony_ci init_context.parent = current; 624862306a36Sopenharmony_ci init_context.thread_fn = thread_fn; 624962306a36Sopenharmony_ci init_context.data = data; 625062306a36Sopenharmony_ci init_completion(&init_context.init_done); 625162306a36Sopenharmony_ci 625262306a36Sopenharmony_ci thread = kthread_run(kvm_vm_worker_thread, &init_context, 625362306a36Sopenharmony_ci "%s-%d", name, task_pid_nr(current)); 625462306a36Sopenharmony_ci if (IS_ERR(thread)) 625562306a36Sopenharmony_ci return PTR_ERR(thread); 625662306a36Sopenharmony_ci 625762306a36Sopenharmony_ci /* kthread_run is never supposed to return NULL */ 625862306a36Sopenharmony_ci WARN_ON(thread == NULL); 625962306a36Sopenharmony_ci 626062306a36Sopenharmony_ci wait_for_completion(&init_context.init_done); 626162306a36Sopenharmony_ci 626262306a36Sopenharmony_ci if (!init_context.err) 626362306a36Sopenharmony_ci *thread_ptr = thread; 626462306a36Sopenharmony_ci 626562306a36Sopenharmony_ci return init_context.err; 626662306a36Sopenharmony_ci} 6267