162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * VFIO generic eventfd code for IRQFD support.
462306a36Sopenharmony_ci * Derived from drivers/vfio/pci/vfio_pci_intrs.c
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
762306a36Sopenharmony_ci *     Author: Alex Williamson <alex.williamson@redhat.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/vfio.h>
1162306a36Sopenharmony_ci#include <linux/eventfd.h>
1262306a36Sopenharmony_ci#include <linux/file.h>
1362306a36Sopenharmony_ci#include <linux/module.h>
1462306a36Sopenharmony_ci#include <linux/slab.h>
1562306a36Sopenharmony_ci#include "vfio.h"
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_cistatic struct workqueue_struct *vfio_irqfd_cleanup_wq;
1862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(virqfd_lock);
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ciint __init vfio_virqfd_init(void)
2162306a36Sopenharmony_ci{
2262306a36Sopenharmony_ci	vfio_irqfd_cleanup_wq =
2362306a36Sopenharmony_ci		create_singlethread_workqueue("vfio-irqfd-cleanup");
2462306a36Sopenharmony_ci	if (!vfio_irqfd_cleanup_wq)
2562306a36Sopenharmony_ci		return -ENOMEM;
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	return 0;
2862306a36Sopenharmony_ci}
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_civoid vfio_virqfd_exit(void)
3162306a36Sopenharmony_ci{
3262306a36Sopenharmony_ci	destroy_workqueue(vfio_irqfd_cleanup_wq);
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic void virqfd_deactivate(struct virqfd *virqfd)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
4362306a36Sopenharmony_ci	__poll_t flags = key_to_poll(key);
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	if (flags & EPOLLIN) {
4662306a36Sopenharmony_ci		u64 cnt;
4762306a36Sopenharmony_ci		eventfd_ctx_do_read(virqfd->eventfd, &cnt);
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci		/* An event has been signaled, call function */
5062306a36Sopenharmony_ci		if ((!virqfd->handler ||
5162306a36Sopenharmony_ci		     virqfd->handler(virqfd->opaque, virqfd->data)) &&
5262306a36Sopenharmony_ci		    virqfd->thread)
5362306a36Sopenharmony_ci			schedule_work(&virqfd->inject);
5462306a36Sopenharmony_ci	}
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	if (flags & EPOLLHUP) {
5762306a36Sopenharmony_ci		unsigned long flags;
5862306a36Sopenharmony_ci		spin_lock_irqsave(&virqfd_lock, flags);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci		/*
6162306a36Sopenharmony_ci		 * The eventfd is closing, if the virqfd has not yet been
6262306a36Sopenharmony_ci		 * queued for release, as determined by testing whether the
6362306a36Sopenharmony_ci		 * virqfd pointer to it is still valid, queue it now.  As
6462306a36Sopenharmony_ci		 * with kvm irqfds, we know we won't race against the virqfd
6562306a36Sopenharmony_ci		 * going away because we hold the lock to get here.
6662306a36Sopenharmony_ci		 */
6762306a36Sopenharmony_ci		if (*(virqfd->pvirqfd) == virqfd) {
6862306a36Sopenharmony_ci			*(virqfd->pvirqfd) = NULL;
6962306a36Sopenharmony_ci			virqfd_deactivate(virqfd);
7062306a36Sopenharmony_ci		}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci		spin_unlock_irqrestore(&virqfd_lock, flags);
7362306a36Sopenharmony_ci	}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	return 0;
7662306a36Sopenharmony_ci}
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_cistatic void virqfd_ptable_queue_proc(struct file *file,
7962306a36Sopenharmony_ci				     wait_queue_head_t *wqh, poll_table *pt)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
8262306a36Sopenharmony_ci	add_wait_queue(wqh, &virqfd->wait);
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic void virqfd_shutdown(struct work_struct *work)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
8862306a36Sopenharmony_ci	u64 cnt;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
9162306a36Sopenharmony_ci	flush_work(&virqfd->inject);
9262306a36Sopenharmony_ci	eventfd_ctx_put(virqfd->eventfd);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	kfree(virqfd);
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic void virqfd_inject(struct work_struct *work)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
10062306a36Sopenharmony_ci	if (virqfd->thread)
10162306a36Sopenharmony_ci		virqfd->thread(virqfd->opaque, virqfd->data);
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ciint vfio_virqfd_enable(void *opaque,
10562306a36Sopenharmony_ci		       int (*handler)(void *, void *),
10662306a36Sopenharmony_ci		       void (*thread)(void *, void *),
10762306a36Sopenharmony_ci		       void *data, struct virqfd **pvirqfd, int fd)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	struct fd irqfd;
11062306a36Sopenharmony_ci	struct eventfd_ctx *ctx;
11162306a36Sopenharmony_ci	struct virqfd *virqfd;
11262306a36Sopenharmony_ci	int ret = 0;
11362306a36Sopenharmony_ci	__poll_t events;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT);
11662306a36Sopenharmony_ci	if (!virqfd)
11762306a36Sopenharmony_ci		return -ENOMEM;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	virqfd->pvirqfd = pvirqfd;
12062306a36Sopenharmony_ci	virqfd->opaque = opaque;
12162306a36Sopenharmony_ci	virqfd->handler = handler;
12262306a36Sopenharmony_ci	virqfd->thread = thread;
12362306a36Sopenharmony_ci	virqfd->data = data;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
12662306a36Sopenharmony_ci	INIT_WORK(&virqfd->inject, virqfd_inject);
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	irqfd = fdget(fd);
12962306a36Sopenharmony_ci	if (!irqfd.file) {
13062306a36Sopenharmony_ci		ret = -EBADF;
13162306a36Sopenharmony_ci		goto err_fd;
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	ctx = eventfd_ctx_fileget(irqfd.file);
13562306a36Sopenharmony_ci	if (IS_ERR(ctx)) {
13662306a36Sopenharmony_ci		ret = PTR_ERR(ctx);
13762306a36Sopenharmony_ci		goto err_ctx;
13862306a36Sopenharmony_ci	}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	virqfd->eventfd = ctx;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	/*
14362306a36Sopenharmony_ci	 * virqfds can be released by closing the eventfd or directly
14462306a36Sopenharmony_ci	 * through ioctl.  These are both done through a workqueue, so
14562306a36Sopenharmony_ci	 * we update the pointer to the virqfd under lock to avoid
14662306a36Sopenharmony_ci	 * pushing multiple jobs to release the same virqfd.
14762306a36Sopenharmony_ci	 */
14862306a36Sopenharmony_ci	spin_lock_irq(&virqfd_lock);
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	if (*pvirqfd) {
15162306a36Sopenharmony_ci		spin_unlock_irq(&virqfd_lock);
15262306a36Sopenharmony_ci		ret = -EBUSY;
15362306a36Sopenharmony_ci		goto err_busy;
15462306a36Sopenharmony_ci	}
15562306a36Sopenharmony_ci	*pvirqfd = virqfd;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	spin_unlock_irq(&virqfd_lock);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	/*
16062306a36Sopenharmony_ci	 * Install our own custom wake-up handling so we are notified via
16162306a36Sopenharmony_ci	 * a callback whenever someone signals the underlying eventfd.
16262306a36Sopenharmony_ci	 */
16362306a36Sopenharmony_ci	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
16462306a36Sopenharmony_ci	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	events = vfs_poll(irqfd.file, &virqfd->pt);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	/*
16962306a36Sopenharmony_ci	 * Check if there was an event already pending on the eventfd
17062306a36Sopenharmony_ci	 * before we registered and trigger it as if we didn't miss it.
17162306a36Sopenharmony_ci	 */
17262306a36Sopenharmony_ci	if (events & EPOLLIN) {
17362306a36Sopenharmony_ci		if ((!handler || handler(opaque, data)) && thread)
17462306a36Sopenharmony_ci			schedule_work(&virqfd->inject);
17562306a36Sopenharmony_ci	}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	/*
17862306a36Sopenharmony_ci	 * Do not drop the file until the irqfd is fully initialized,
17962306a36Sopenharmony_ci	 * otherwise we might race against the EPOLLHUP.
18062306a36Sopenharmony_ci	 */
18162306a36Sopenharmony_ci	fdput(irqfd);
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	return 0;
18462306a36Sopenharmony_cierr_busy:
18562306a36Sopenharmony_ci	eventfd_ctx_put(ctx);
18662306a36Sopenharmony_cierr_ctx:
18762306a36Sopenharmony_ci	fdput(irqfd);
18862306a36Sopenharmony_cierr_fd:
18962306a36Sopenharmony_ci	kfree(virqfd);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	return ret;
19262306a36Sopenharmony_ci}
19362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_virqfd_enable);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_civoid vfio_virqfd_disable(struct virqfd **pvirqfd)
19662306a36Sopenharmony_ci{
19762306a36Sopenharmony_ci	unsigned long flags;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	spin_lock_irqsave(&virqfd_lock, flags);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	if (*pvirqfd) {
20262306a36Sopenharmony_ci		virqfd_deactivate(*pvirqfd);
20362306a36Sopenharmony_ci		*pvirqfd = NULL;
20462306a36Sopenharmony_ci	}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	spin_unlock_irqrestore(&virqfd_lock, flags);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	/*
20962306a36Sopenharmony_ci	 * Block until we know all outstanding shutdown jobs have completed.
21062306a36Sopenharmony_ci	 * Even if we don't queue the job, flush the wq to be sure it's
21162306a36Sopenharmony_ci	 * been released.
21262306a36Sopenharmony_ci	 */
21362306a36Sopenharmony_ci	flush_workqueue(vfio_irqfd_cleanup_wq);
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_virqfd_disable);
216