162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * VFIO generic eventfd code for IRQFD support. 462306a36Sopenharmony_ci * Derived from drivers/vfio/pci/vfio_pci_intrs.c 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 762306a36Sopenharmony_ci * Author: Alex Williamson <alex.williamson@redhat.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/vfio.h> 1162306a36Sopenharmony_ci#include <linux/eventfd.h> 1262306a36Sopenharmony_ci#include <linux/file.h> 1362306a36Sopenharmony_ci#include <linux/module.h> 1462306a36Sopenharmony_ci#include <linux/slab.h> 1562306a36Sopenharmony_ci#include "vfio.h" 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_cistatic struct workqueue_struct *vfio_irqfd_cleanup_wq; 1862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(virqfd_lock); 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ciint __init vfio_virqfd_init(void) 2162306a36Sopenharmony_ci{ 2262306a36Sopenharmony_ci vfio_irqfd_cleanup_wq = 2362306a36Sopenharmony_ci create_singlethread_workqueue("vfio-irqfd-cleanup"); 2462306a36Sopenharmony_ci if (!vfio_irqfd_cleanup_wq) 2562306a36Sopenharmony_ci return -ENOMEM; 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci return 0; 2862306a36Sopenharmony_ci} 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_civoid vfio_virqfd_exit(void) 3162306a36Sopenharmony_ci{ 3262306a36Sopenharmony_ci destroy_workqueue(vfio_irqfd_cleanup_wq); 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cistatic void virqfd_deactivate(struct virqfd *virqfd) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistatic int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 4162306a36Sopenharmony_ci{ 4262306a36Sopenharmony_ci struct virqfd *virqfd = container_of(wait, struct virqfd, wait); 4362306a36Sopenharmony_ci __poll_t flags = key_to_poll(key); 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci if (flags & EPOLLIN) { 4662306a36Sopenharmony_ci u64 cnt; 4762306a36Sopenharmony_ci eventfd_ctx_do_read(virqfd->eventfd, &cnt); 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci /* An event has been signaled, call function */ 5062306a36Sopenharmony_ci if ((!virqfd->handler || 5162306a36Sopenharmony_ci virqfd->handler(virqfd->opaque, virqfd->data)) && 5262306a36Sopenharmony_ci virqfd->thread) 5362306a36Sopenharmony_ci schedule_work(&virqfd->inject); 5462306a36Sopenharmony_ci } 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci if (flags & EPOLLHUP) { 5762306a36Sopenharmony_ci unsigned long flags; 5862306a36Sopenharmony_ci spin_lock_irqsave(&virqfd_lock, flags); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci /* 6162306a36Sopenharmony_ci * The eventfd is closing, if the virqfd has not yet been 6262306a36Sopenharmony_ci * queued for release, as determined by testing whether the 6362306a36Sopenharmony_ci * virqfd pointer to it is still valid, queue it now. As 6462306a36Sopenharmony_ci * with kvm irqfds, we know we won't race against the virqfd 6562306a36Sopenharmony_ci * going away because we hold the lock to get here. 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci if (*(virqfd->pvirqfd) == virqfd) { 6862306a36Sopenharmony_ci *(virqfd->pvirqfd) = NULL; 6962306a36Sopenharmony_ci virqfd_deactivate(virqfd); 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci spin_unlock_irqrestore(&virqfd_lock, flags); 7362306a36Sopenharmony_ci } 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci return 0; 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistatic void virqfd_ptable_queue_proc(struct file *file, 7962306a36Sopenharmony_ci wait_queue_head_t *wqh, poll_table *pt) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci struct virqfd *virqfd = container_of(pt, struct virqfd, pt); 8262306a36Sopenharmony_ci add_wait_queue(wqh, &virqfd->wait); 8362306a36Sopenharmony_ci} 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic void virqfd_shutdown(struct work_struct *work) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); 8862306a36Sopenharmony_ci u64 cnt; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); 9162306a36Sopenharmony_ci flush_work(&virqfd->inject); 9262306a36Sopenharmony_ci eventfd_ctx_put(virqfd->eventfd); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci kfree(virqfd); 9562306a36Sopenharmony_ci} 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_cistatic void virqfd_inject(struct work_struct *work) 9862306a36Sopenharmony_ci{ 9962306a36Sopenharmony_ci struct virqfd *virqfd = container_of(work, struct virqfd, inject); 10062306a36Sopenharmony_ci if (virqfd->thread) 10162306a36Sopenharmony_ci virqfd->thread(virqfd->opaque, virqfd->data); 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ciint vfio_virqfd_enable(void *opaque, 10562306a36Sopenharmony_ci int (*handler)(void *, void *), 10662306a36Sopenharmony_ci void (*thread)(void *, void *), 10762306a36Sopenharmony_ci void *data, struct virqfd **pvirqfd, int fd) 10862306a36Sopenharmony_ci{ 10962306a36Sopenharmony_ci struct fd irqfd; 11062306a36Sopenharmony_ci struct eventfd_ctx *ctx; 11162306a36Sopenharmony_ci struct virqfd *virqfd; 11262306a36Sopenharmony_ci int ret = 0; 11362306a36Sopenharmony_ci __poll_t events; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT); 11662306a36Sopenharmony_ci if (!virqfd) 11762306a36Sopenharmony_ci return -ENOMEM; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci virqfd->pvirqfd = pvirqfd; 12062306a36Sopenharmony_ci virqfd->opaque = opaque; 12162306a36Sopenharmony_ci virqfd->handler = handler; 12262306a36Sopenharmony_ci virqfd->thread = thread; 12362306a36Sopenharmony_ci virqfd->data = data; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci INIT_WORK(&virqfd->shutdown, virqfd_shutdown); 12662306a36Sopenharmony_ci INIT_WORK(&virqfd->inject, virqfd_inject); 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci irqfd = fdget(fd); 12962306a36Sopenharmony_ci if (!irqfd.file) { 13062306a36Sopenharmony_ci ret = -EBADF; 13162306a36Sopenharmony_ci goto err_fd; 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci ctx = eventfd_ctx_fileget(irqfd.file); 13562306a36Sopenharmony_ci if (IS_ERR(ctx)) { 13662306a36Sopenharmony_ci ret = PTR_ERR(ctx); 13762306a36Sopenharmony_ci goto err_ctx; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci virqfd->eventfd = ctx; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci /* 14362306a36Sopenharmony_ci * virqfds can be released by closing the eventfd or directly 14462306a36Sopenharmony_ci * through ioctl. These are both done through a workqueue, so 14562306a36Sopenharmony_ci * we update the pointer to the virqfd under lock to avoid 14662306a36Sopenharmony_ci * pushing multiple jobs to release the same virqfd. 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_ci spin_lock_irq(&virqfd_lock); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci if (*pvirqfd) { 15162306a36Sopenharmony_ci spin_unlock_irq(&virqfd_lock); 15262306a36Sopenharmony_ci ret = -EBUSY; 15362306a36Sopenharmony_ci goto err_busy; 15462306a36Sopenharmony_ci } 15562306a36Sopenharmony_ci *pvirqfd = virqfd; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci spin_unlock_irq(&virqfd_lock); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci /* 16062306a36Sopenharmony_ci * Install our own custom wake-up handling so we are notified via 16162306a36Sopenharmony_ci * a callback whenever someone signals the underlying eventfd. 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); 16462306a36Sopenharmony_ci init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci events = vfs_poll(irqfd.file, &virqfd->pt); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci /* 16962306a36Sopenharmony_ci * Check if there was an event already pending on the eventfd 17062306a36Sopenharmony_ci * before we registered and trigger it as if we didn't miss it. 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_ci if (events & EPOLLIN) { 17362306a36Sopenharmony_ci if ((!handler || handler(opaque, data)) && thread) 17462306a36Sopenharmony_ci schedule_work(&virqfd->inject); 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci /* 17862306a36Sopenharmony_ci * Do not drop the file until the irqfd is fully initialized, 17962306a36Sopenharmony_ci * otherwise we might race against the EPOLLHUP. 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_ci fdput(irqfd); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci return 0; 18462306a36Sopenharmony_cierr_busy: 18562306a36Sopenharmony_ci eventfd_ctx_put(ctx); 18662306a36Sopenharmony_cierr_ctx: 18762306a36Sopenharmony_ci fdput(irqfd); 18862306a36Sopenharmony_cierr_fd: 18962306a36Sopenharmony_ci kfree(virqfd); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci return ret; 19262306a36Sopenharmony_ci} 19362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_virqfd_enable); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_civoid vfio_virqfd_disable(struct virqfd **pvirqfd) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci unsigned long flags; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci spin_lock_irqsave(&virqfd_lock, flags); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci if (*pvirqfd) { 20262306a36Sopenharmony_ci virqfd_deactivate(*pvirqfd); 20362306a36Sopenharmony_ci *pvirqfd = NULL; 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci spin_unlock_irqrestore(&virqfd_lock, flags); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci /* 20962306a36Sopenharmony_ci * Block until we know all outstanding shutdown jobs have completed. 21062306a36Sopenharmony_ci * Even if we don't queue the job, flush the wq to be sure it's 21162306a36Sopenharmony_ci * been released. 21262306a36Sopenharmony_ci */ 21362306a36Sopenharmony_ci flush_workqueue(vfio_irqfd_cleanup_wq); 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_virqfd_disable); 216