18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * kvm asynchronous fault support 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright 2010 Red Hat, Inc. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Author: 88c2ecf20Sopenharmony_ci * Gleb Natapov <gleb@redhat.com> 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <linux/kvm_host.h> 128c2ecf20Sopenharmony_ci#include <linux/slab.h> 138c2ecf20Sopenharmony_ci#include <linux/module.h> 148c2ecf20Sopenharmony_ci#include <linux/mmu_context.h> 158c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include "async_pf.h" 188c2ecf20Sopenharmony_ci#include <trace/events/kvm.h> 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_cistatic struct kmem_cache *async_pf_cache; 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ciint kvm_async_pf_init(void) 238c2ecf20Sopenharmony_ci{ 248c2ecf20Sopenharmony_ci async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci if (!async_pf_cache) 278c2ecf20Sopenharmony_ci return -ENOMEM; 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci return 0; 308c2ecf20Sopenharmony_ci} 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_civoid kvm_async_pf_deinit(void) 338c2ecf20Sopenharmony_ci{ 348c2ecf20Sopenharmony_ci kmem_cache_destroy(async_pf_cache); 358c2ecf20Sopenharmony_ci async_pf_cache = NULL; 368c2ecf20Sopenharmony_ci} 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_civoid kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) 398c2ecf20Sopenharmony_ci{ 408c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&vcpu->async_pf.done); 418c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&vcpu->async_pf.queue); 428c2ecf20Sopenharmony_ci spin_lock_init(&vcpu->async_pf.lock); 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic void async_pf_execute(struct work_struct *work) 468c2ecf20Sopenharmony_ci{ 478c2ecf20Sopenharmony_ci struct kvm_async_pf *apf = 488c2ecf20Sopenharmony_ci container_of(work, struct kvm_async_pf, work); 498c2ecf20Sopenharmony_ci struct mm_struct *mm = apf->mm; 508c2ecf20Sopenharmony_ci struct kvm_vcpu *vcpu = apf->vcpu; 518c2ecf20Sopenharmony_ci unsigned long addr = apf->addr; 528c2ecf20Sopenharmony_ci gpa_t cr2_or_gpa = apf->cr2_or_gpa; 538c2ecf20Sopenharmony_ci int locked = 1; 548c2ecf20Sopenharmony_ci bool first; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci might_sleep(); 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci /* 598c2ecf20Sopenharmony_ci * This work is run asynchronously to the task which owns 608c2ecf20Sopenharmony_ci * mm and might be done in another context, so we must 618c2ecf20Sopenharmony_ci * access remotely. 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ci mmap_read_lock(mm); 648c2ecf20Sopenharmony_ci get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, NULL, 658c2ecf20Sopenharmony_ci &locked); 668c2ecf20Sopenharmony_ci if (locked) 678c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) 708c2ecf20Sopenharmony_ci kvm_arch_async_page_present(vcpu, apf); 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci spin_lock(&vcpu->async_pf.lock); 738c2ecf20Sopenharmony_ci first = list_empty(&vcpu->async_pf.done); 748c2ecf20Sopenharmony_ci list_add_tail(&apf->link, &vcpu->async_pf.done); 758c2ecf20Sopenharmony_ci apf->vcpu = NULL; 768c2ecf20Sopenharmony_ci spin_unlock(&vcpu->async_pf.lock); 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first) 798c2ecf20Sopenharmony_ci kvm_arch_async_page_present_queued(vcpu); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci /* 828c2ecf20Sopenharmony_ci * apf may be freed by kvm_check_async_pf_completion() after 838c2ecf20Sopenharmony_ci * this point 848c2ecf20Sopenharmony_ci */ 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci trace_kvm_async_pf_completed(addr, cr2_or_gpa); 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci rcuwait_wake_up(&vcpu->wait); 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci mmput(mm); 918c2ecf20Sopenharmony_ci kvm_put_kvm(vcpu->kvm); 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_civoid kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) 958c2ecf20Sopenharmony_ci{ 968c2ecf20Sopenharmony_ci spin_lock(&vcpu->async_pf.lock); 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci /* cancel outstanding work queue item */ 998c2ecf20Sopenharmony_ci while (!list_empty(&vcpu->async_pf.queue)) { 1008c2ecf20Sopenharmony_ci struct kvm_async_pf *work = 1018c2ecf20Sopenharmony_ci list_first_entry(&vcpu->async_pf.queue, 1028c2ecf20Sopenharmony_ci typeof(*work), queue); 1038c2ecf20Sopenharmony_ci list_del(&work->queue); 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci /* 1068c2ecf20Sopenharmony_ci * We know it's present in vcpu->async_pf.done, do 1078c2ecf20Sopenharmony_ci * nothing here. 1088c2ecf20Sopenharmony_ci */ 1098c2ecf20Sopenharmony_ci if (!work->vcpu) 1108c2ecf20Sopenharmony_ci continue; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci spin_unlock(&vcpu->async_pf.lock); 1138c2ecf20Sopenharmony_ci#ifdef CONFIG_KVM_ASYNC_PF_SYNC 1148c2ecf20Sopenharmony_ci flush_work(&work->work); 1158c2ecf20Sopenharmony_ci#else 1168c2ecf20Sopenharmony_ci if (cancel_work_sync(&work->work)) { 1178c2ecf20Sopenharmony_ci mmput(work->mm); 1188c2ecf20Sopenharmony_ci kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 1198c2ecf20Sopenharmony_ci kmem_cache_free(async_pf_cache, work); 1208c2ecf20Sopenharmony_ci } 1218c2ecf20Sopenharmony_ci#endif 1228c2ecf20Sopenharmony_ci spin_lock(&vcpu->async_pf.lock); 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci while (!list_empty(&vcpu->async_pf.done)) { 1268c2ecf20Sopenharmony_ci struct kvm_async_pf *work = 1278c2ecf20Sopenharmony_ci list_first_entry(&vcpu->async_pf.done, 1288c2ecf20Sopenharmony_ci typeof(*work), link); 1298c2ecf20Sopenharmony_ci list_del(&work->link); 1308c2ecf20Sopenharmony_ci kmem_cache_free(async_pf_cache, work); 1318c2ecf20Sopenharmony_ci } 1328c2ecf20Sopenharmony_ci spin_unlock(&vcpu->async_pf.lock); 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci vcpu->async_pf.queued = 0; 1358c2ecf20Sopenharmony_ci} 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_civoid kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) 1388c2ecf20Sopenharmony_ci{ 1398c2ecf20Sopenharmony_ci struct kvm_async_pf *work; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci while (!list_empty_careful(&vcpu->async_pf.done) && 1428c2ecf20Sopenharmony_ci kvm_arch_can_dequeue_async_page_present(vcpu)) { 1438c2ecf20Sopenharmony_ci spin_lock(&vcpu->async_pf.lock); 1448c2ecf20Sopenharmony_ci work = list_first_entry(&vcpu->async_pf.done, typeof(*work), 1458c2ecf20Sopenharmony_ci link); 1468c2ecf20Sopenharmony_ci list_del(&work->link); 1478c2ecf20Sopenharmony_ci spin_unlock(&vcpu->async_pf.lock); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci kvm_arch_async_page_ready(vcpu, work); 1508c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) 1518c2ecf20Sopenharmony_ci kvm_arch_async_page_present(vcpu, work); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci list_del(&work->queue); 1548c2ecf20Sopenharmony_ci vcpu->async_pf.queued--; 1558c2ecf20Sopenharmony_ci kmem_cache_free(async_pf_cache, work); 1568c2ecf20Sopenharmony_ci } 1578c2ecf20Sopenharmony_ci} 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci/* 1608c2ecf20Sopenharmony_ci * Try to schedule a job to handle page fault asynchronously. Returns 'true' on 1618c2ecf20Sopenharmony_ci * success, 'false' on failure (page fault has to be handled synchronously). 1628c2ecf20Sopenharmony_ci */ 1638c2ecf20Sopenharmony_cibool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, 1648c2ecf20Sopenharmony_ci unsigned long hva, struct kvm_arch_async_pf *arch) 1658c2ecf20Sopenharmony_ci{ 1668c2ecf20Sopenharmony_ci struct kvm_async_pf *work; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) 1698c2ecf20Sopenharmony_ci return false; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci /* Arch specific code should not do async PF in this case */ 1728c2ecf20Sopenharmony_ci if (unlikely(kvm_is_error_hva(hva))) 1738c2ecf20Sopenharmony_ci return false; 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci /* 1768c2ecf20Sopenharmony_ci * do alloc nowait since if we are going to sleep anyway we 1778c2ecf20Sopenharmony_ci * may as well sleep faulting in page 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_ci work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); 1808c2ecf20Sopenharmony_ci if (!work) 1818c2ecf20Sopenharmony_ci return false; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci work->wakeup_all = false; 1848c2ecf20Sopenharmony_ci work->vcpu = vcpu; 1858c2ecf20Sopenharmony_ci work->cr2_or_gpa = cr2_or_gpa; 1868c2ecf20Sopenharmony_ci work->addr = hva; 1878c2ecf20Sopenharmony_ci work->arch = *arch; 1888c2ecf20Sopenharmony_ci work->mm = current->mm; 1898c2ecf20Sopenharmony_ci mmget(work->mm); 1908c2ecf20Sopenharmony_ci kvm_get_kvm(work->vcpu->kvm); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci INIT_WORK(&work->work, async_pf_execute); 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci list_add_tail(&work->queue, &vcpu->async_pf.queue); 1958c2ecf20Sopenharmony_ci vcpu->async_pf.queued++; 1968c2ecf20Sopenharmony_ci work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci schedule_work(&work->work); 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci return true; 2018c2ecf20Sopenharmony_ci} 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ciint kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) 2048c2ecf20Sopenharmony_ci{ 2058c2ecf20Sopenharmony_ci struct kvm_async_pf *work; 2068c2ecf20Sopenharmony_ci bool first; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci if (!list_empty_careful(&vcpu->async_pf.done)) 2098c2ecf20Sopenharmony_ci return 0; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); 2128c2ecf20Sopenharmony_ci if (!work) 2138c2ecf20Sopenharmony_ci return -ENOMEM; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci work->wakeup_all = true; 2168c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci spin_lock(&vcpu->async_pf.lock); 2198c2ecf20Sopenharmony_ci first = list_empty(&vcpu->async_pf.done); 2208c2ecf20Sopenharmony_ci list_add_tail(&work->link, &vcpu->async_pf.done); 2218c2ecf20Sopenharmony_ci spin_unlock(&vcpu->async_pf.lock); 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first) 2248c2ecf20Sopenharmony_ci kvm_arch_async_page_present_queued(vcpu); 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci vcpu->async_pf.queued++; 2278c2ecf20Sopenharmony_ci return 0; 2288c2ecf20Sopenharmony_ci} 229