1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VFIO PCI interrupt handling
4 *
5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6 *     Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13#include <linux/device.h>
14#include <linux/interrupt.h>
15#include <linux/eventfd.h>
16#include <linux/msi.h>
17#include <linux/pci.h>
18#include <linux/file.h>
19#include <linux/vfio.h>
20#include <linux/wait.h>
21#include <linux/slab.h>
22
23#include "vfio_pci_private.h"
24
25/*
26 * INTx
27 */
28static void vfio_send_intx_eventfd(void *opaque, void *unused)
29{
30	struct vfio_pci_device *vdev = opaque;
31
32	if (likely(is_intx(vdev) && !vdev->virq_disabled))
33		eventfd_signal(vdev->ctx[0].trigger, 1);
34}
35
36void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
37{
38	struct pci_dev *pdev = vdev->pdev;
39	unsigned long flags;
40
41	spin_lock_irqsave(&vdev->irqlock, flags);
42
43	/*
44	 * Masking can come from interrupt, ioctl, or config space
45	 * via INTx disable.  The latter means this can get called
46	 * even when not using intx delivery.  In this case, just
47	 * try to have the physical bit follow the virtual bit.
48	 */
49	if (unlikely(!is_intx(vdev))) {
50		if (vdev->pci_2_3)
51			pci_intx(pdev, 0);
52	} else if (!vdev->ctx[0].masked) {
53		/*
54		 * Can't use check_and_mask here because we always want to
55		 * mask, not just when something is pending.
56		 */
57		if (vdev->pci_2_3)
58			pci_intx(pdev, 0);
59		else
60			disable_irq_nosync(pdev->irq);
61
62		vdev->ctx[0].masked = true;
63	}
64
65	spin_unlock_irqrestore(&vdev->irqlock, flags);
66}
67
68/*
69 * If this is triggered by an eventfd, we can't call eventfd_signal
70 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
71 * a signal is necessary, which can then be handled via a work queue
72 * or directly depending on the caller.
73 */
74static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
75{
76	struct vfio_pci_device *vdev = opaque;
77	struct pci_dev *pdev = vdev->pdev;
78	unsigned long flags;
79	int ret = 0;
80
81	spin_lock_irqsave(&vdev->irqlock, flags);
82
83	/*
84	 * Unmasking comes from ioctl or config, so again, have the
85	 * physical bit follow the virtual even when not using INTx.
86	 */
87	if (unlikely(!is_intx(vdev))) {
88		if (vdev->pci_2_3)
89			pci_intx(pdev, 1);
90	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
91		/*
92		 * A pending interrupt here would immediately trigger,
93		 * but we can avoid that overhead by just re-sending
94		 * the interrupt to the user.
95		 */
96		if (vdev->pci_2_3) {
97			if (!pci_check_and_unmask_intx(pdev))
98				ret = 1;
99		} else
100			enable_irq(pdev->irq);
101
102		vdev->ctx[0].masked = (ret > 0);
103	}
104
105	spin_unlock_irqrestore(&vdev->irqlock, flags);
106
107	return ret;
108}
109
110void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
111{
112	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
113		vfio_send_intx_eventfd(vdev, NULL);
114}
115
116static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
117{
118	struct vfio_pci_device *vdev = dev_id;
119	unsigned long flags;
120	int ret = IRQ_NONE;
121
122	spin_lock_irqsave(&vdev->irqlock, flags);
123
124	if (!vdev->pci_2_3) {
125		disable_irq_nosync(vdev->pdev->irq);
126		vdev->ctx[0].masked = true;
127		ret = IRQ_HANDLED;
128	} else if (!vdev->ctx[0].masked &&  /* may be shared */
129		   pci_check_and_mask_intx(vdev->pdev)) {
130		vdev->ctx[0].masked = true;
131		ret = IRQ_HANDLED;
132	}
133
134	spin_unlock_irqrestore(&vdev->irqlock, flags);
135
136	if (ret == IRQ_HANDLED)
137		vfio_send_intx_eventfd(vdev, NULL);
138
139	return ret;
140}
141
142static int vfio_intx_enable(struct vfio_pci_device *vdev)
143{
144	if (!is_irq_none(vdev))
145		return -EINVAL;
146
147	if (!vdev->pdev->irq)
148		return -ENODEV;
149
150	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
151	if (!vdev->ctx)
152		return -ENOMEM;
153
154	vdev->num_ctx = 1;
155
156	/*
157	 * If the virtual interrupt is masked, restore it.  Devices
158	 * supporting DisINTx can be masked at the hardware level
159	 * here, non-PCI-2.3 devices will have to wait until the
160	 * interrupt is enabled.
161	 */
162	vdev->ctx[0].masked = vdev->virq_disabled;
163	if (vdev->pci_2_3)
164		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
165
166	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
167
168	return 0;
169}
170
171static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
172{
173	struct pci_dev *pdev = vdev->pdev;
174	unsigned long irqflags = IRQF_SHARED;
175	struct eventfd_ctx *trigger;
176	unsigned long flags;
177	int ret;
178
179	if (vdev->ctx[0].trigger) {
180		free_irq(pdev->irq, vdev);
181		kfree(vdev->ctx[0].name);
182		eventfd_ctx_put(vdev->ctx[0].trigger);
183		vdev->ctx[0].trigger = NULL;
184	}
185
186	if (fd < 0) /* Disable only */
187		return 0;
188
189	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
190				      pci_name(pdev));
191	if (!vdev->ctx[0].name)
192		return -ENOMEM;
193
194	trigger = eventfd_ctx_fdget(fd);
195	if (IS_ERR(trigger)) {
196		kfree(vdev->ctx[0].name);
197		return PTR_ERR(trigger);
198	}
199
200	vdev->ctx[0].trigger = trigger;
201
202	if (!vdev->pci_2_3)
203		irqflags = 0;
204
205	ret = request_irq(pdev->irq, vfio_intx_handler,
206			  irqflags, vdev->ctx[0].name, vdev);
207	if (ret) {
208		vdev->ctx[0].trigger = NULL;
209		kfree(vdev->ctx[0].name);
210		eventfd_ctx_put(trigger);
211		return ret;
212	}
213
214	/*
215	 * INTx disable will stick across the new irq setup,
216	 * disable_irq won't.
217	 */
218	spin_lock_irqsave(&vdev->irqlock, flags);
219	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
220		disable_irq_nosync(pdev->irq);
221	spin_unlock_irqrestore(&vdev->irqlock, flags);
222
223	return 0;
224}
225
226static void vfio_intx_disable(struct vfio_pci_device *vdev)
227{
228	vfio_virqfd_disable(&vdev->ctx[0].unmask);
229	vfio_virqfd_disable(&vdev->ctx[0].mask);
230	vfio_intx_set_signal(vdev, -1);
231	vdev->irq_type = VFIO_PCI_NUM_IRQS;
232	vdev->num_ctx = 0;
233	kfree(vdev->ctx);
234}
235
236/*
237 * MSI/MSI-X
238 */
239static irqreturn_t vfio_msihandler(int irq, void *arg)
240{
241	struct eventfd_ctx *trigger = arg;
242
243	eventfd_signal(trigger, 1);
244	return IRQ_HANDLED;
245}
246
247static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
248{
249	struct pci_dev *pdev = vdev->pdev;
250	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
251	int ret;
252	u16 cmd;
253
254	if (!is_irq_none(vdev))
255		return -EINVAL;
256
257	vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
258	if (!vdev->ctx)
259		return -ENOMEM;
260
261	/* return the number of supported vectors if we can't get all: */
262	cmd = vfio_pci_memory_lock_and_enable(vdev);
263	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
264	if (ret < nvec) {
265		if (ret > 0)
266			pci_free_irq_vectors(pdev);
267		vfio_pci_memory_unlock_and_restore(vdev, cmd);
268		kfree(vdev->ctx);
269		return ret;
270	}
271	vfio_pci_memory_unlock_and_restore(vdev, cmd);
272
273	vdev->num_ctx = nvec;
274	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
275				VFIO_PCI_MSI_IRQ_INDEX;
276
277	if (!msix) {
278		/*
279		 * Compute the virtual hardware field for max msi vectors -
280		 * it is the log base 2 of the number of vectors.
281		 */
282		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
283	}
284
285	return 0;
286}
287
288static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
289				      int vector, int fd, bool msix)
290{
291	struct pci_dev *pdev = vdev->pdev;
292	struct eventfd_ctx *trigger;
293	int irq, ret;
294	u16 cmd;
295
296	if (vector < 0 || vector >= vdev->num_ctx)
297		return -EINVAL;
298
299	irq = pci_irq_vector(pdev, vector);
300
301	if (vdev->ctx[vector].trigger) {
302		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
303
304		cmd = vfio_pci_memory_lock_and_enable(vdev);
305		free_irq(irq, vdev->ctx[vector].trigger);
306		vfio_pci_memory_unlock_and_restore(vdev, cmd);
307
308		kfree(vdev->ctx[vector].name);
309		eventfd_ctx_put(vdev->ctx[vector].trigger);
310		vdev->ctx[vector].trigger = NULL;
311	}
312
313	if (fd < 0)
314		return 0;
315
316	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
317					   msix ? "x" : "", vector,
318					   pci_name(pdev));
319	if (!vdev->ctx[vector].name)
320		return -ENOMEM;
321
322	trigger = eventfd_ctx_fdget(fd);
323	if (IS_ERR(trigger)) {
324		kfree(vdev->ctx[vector].name);
325		return PTR_ERR(trigger);
326	}
327
328	/*
329	 * The MSIx vector table resides in device memory which may be cleared
330	 * via backdoor resets. We don't allow direct access to the vector
331	 * table so even if a userspace driver attempts to save/restore around
332	 * such a reset it would be unsuccessful. To avoid this, restore the
333	 * cached value of the message prior to enabling.
334	 */
335	cmd = vfio_pci_memory_lock_and_enable(vdev);
336	if (msix) {
337		struct msi_msg msg;
338
339		get_cached_msi_msg(irq, &msg);
340		pci_write_msi_msg(irq, &msg);
341	}
342
343	ret = request_irq(irq, vfio_msihandler, 0,
344			  vdev->ctx[vector].name, trigger);
345	vfio_pci_memory_unlock_and_restore(vdev, cmd);
346	if (ret) {
347		kfree(vdev->ctx[vector].name);
348		eventfd_ctx_put(trigger);
349		return ret;
350	}
351
352	vdev->ctx[vector].producer.token = trigger;
353	vdev->ctx[vector].producer.irq = irq;
354	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
355	if (unlikely(ret)) {
356		dev_info(&pdev->dev,
357		"irq bypass producer (token %p) registration fails: %d\n",
358		vdev->ctx[vector].producer.token, ret);
359
360		vdev->ctx[vector].producer.token = NULL;
361	}
362	vdev->ctx[vector].trigger = trigger;
363
364	return 0;
365}
366
367static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
368			      unsigned count, int32_t *fds, bool msix)
369{
370	int i, j, ret = 0;
371
372	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
373		return -EINVAL;
374
375	for (i = 0, j = start; i < count && !ret; i++, j++) {
376		int fd = fds ? fds[i] : -1;
377		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
378	}
379
380	if (ret) {
381		for (--j; j >= (int)start; j--)
382			vfio_msi_set_vector_signal(vdev, j, -1, msix);
383	}
384
385	return ret;
386}
387
388static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
389{
390	struct pci_dev *pdev = vdev->pdev;
391	int i;
392	u16 cmd;
393
394	for (i = 0; i < vdev->num_ctx; i++) {
395		vfio_virqfd_disable(&vdev->ctx[i].unmask);
396		vfio_virqfd_disable(&vdev->ctx[i].mask);
397	}
398
399	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
400
401	cmd = vfio_pci_memory_lock_and_enable(vdev);
402	pci_free_irq_vectors(pdev);
403	vfio_pci_memory_unlock_and_restore(vdev, cmd);
404
405	/*
406	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
407	 * via their shutdown paths.  Restore for NoINTx devices.
408	 */
409	if (vdev->nointx)
410		pci_intx(pdev, 0);
411
412	vdev->irq_type = VFIO_PCI_NUM_IRQS;
413	vdev->num_ctx = 0;
414	kfree(vdev->ctx);
415}
416
417/*
418 * IOCTL support
419 */
420static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
421				    unsigned index, unsigned start,
422				    unsigned count, uint32_t flags, void *data)
423{
424	if (!is_intx(vdev) || start != 0 || count != 1)
425		return -EINVAL;
426
427	if (flags & VFIO_IRQ_SET_DATA_NONE) {
428		vfio_pci_intx_unmask(vdev);
429	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
430		uint8_t unmask = *(uint8_t *)data;
431		if (unmask)
432			vfio_pci_intx_unmask(vdev);
433	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
434		int32_t fd = *(int32_t *)data;
435		if (fd >= 0)
436			return vfio_virqfd_enable((void *) vdev,
437						  vfio_pci_intx_unmask_handler,
438						  vfio_send_intx_eventfd, NULL,
439						  &vdev->ctx[0].unmask, fd);
440
441		vfio_virqfd_disable(&vdev->ctx[0].unmask);
442	}
443
444	return 0;
445}
446
447static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
448				  unsigned index, unsigned start,
449				  unsigned count, uint32_t flags, void *data)
450{
451	if (!is_intx(vdev) || start != 0 || count != 1)
452		return -EINVAL;
453
454	if (flags & VFIO_IRQ_SET_DATA_NONE) {
455		vfio_pci_intx_mask(vdev);
456	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
457		uint8_t mask = *(uint8_t *)data;
458		if (mask)
459			vfio_pci_intx_mask(vdev);
460	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
461		return -ENOTTY; /* XXX implement me */
462	}
463
464	return 0;
465}
466
467static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
468				     unsigned index, unsigned start,
469				     unsigned count, uint32_t flags, void *data)
470{
471	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
472		vfio_intx_disable(vdev);
473		return 0;
474	}
475
476	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
477		return -EINVAL;
478
479	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
480		int32_t fd = *(int32_t *)data;
481		int ret;
482
483		if (is_intx(vdev))
484			return vfio_intx_set_signal(vdev, fd);
485
486		ret = vfio_intx_enable(vdev);
487		if (ret)
488			return ret;
489
490		ret = vfio_intx_set_signal(vdev, fd);
491		if (ret)
492			vfio_intx_disable(vdev);
493
494		return ret;
495	}
496
497	if (!is_intx(vdev))
498		return -EINVAL;
499
500	if (flags & VFIO_IRQ_SET_DATA_NONE) {
501		vfio_send_intx_eventfd(vdev, NULL);
502	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
503		uint8_t trigger = *(uint8_t *)data;
504		if (trigger)
505			vfio_send_intx_eventfd(vdev, NULL);
506	}
507	return 0;
508}
509
510static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
511				    unsigned index, unsigned start,
512				    unsigned count, uint32_t flags, void *data)
513{
514	int i;
515	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
516
517	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
518		vfio_msi_disable(vdev, msix);
519		return 0;
520	}
521
522	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
523		return -EINVAL;
524
525	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
526		int32_t *fds = data;
527		int ret;
528
529		if (vdev->irq_type == index)
530			return vfio_msi_set_block(vdev, start, count,
531						  fds, msix);
532
533		ret = vfio_msi_enable(vdev, start + count, msix);
534		if (ret)
535			return ret;
536
537		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
538		if (ret)
539			vfio_msi_disable(vdev, msix);
540
541		return ret;
542	}
543
544	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
545		return -EINVAL;
546
547	for (i = start; i < start + count; i++) {
548		if (!vdev->ctx[i].trigger)
549			continue;
550		if (flags & VFIO_IRQ_SET_DATA_NONE) {
551			eventfd_signal(vdev->ctx[i].trigger, 1);
552		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
553			uint8_t *bools = data;
554			if (bools[i - start])
555				eventfd_signal(vdev->ctx[i].trigger, 1);
556		}
557	}
558	return 0;
559}
560
561static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
562					   unsigned int count, uint32_t flags,
563					   void *data)
564{
565	/* DATA_NONE/DATA_BOOL enables loopback testing */
566	if (flags & VFIO_IRQ_SET_DATA_NONE) {
567		if (*ctx) {
568			if (count) {
569				eventfd_signal(*ctx, 1);
570			} else {
571				eventfd_ctx_put(*ctx);
572				*ctx = NULL;
573			}
574			return 0;
575		}
576	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
577		uint8_t trigger;
578
579		if (!count)
580			return -EINVAL;
581
582		trigger = *(uint8_t *)data;
583		if (trigger && *ctx)
584			eventfd_signal(*ctx, 1);
585
586		return 0;
587	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
588		int32_t fd;
589
590		if (!count)
591			return -EINVAL;
592
593		fd = *(int32_t *)data;
594		if (fd == -1) {
595			if (*ctx)
596				eventfd_ctx_put(*ctx);
597			*ctx = NULL;
598		} else if (fd >= 0) {
599			struct eventfd_ctx *efdctx;
600
601			efdctx = eventfd_ctx_fdget(fd);
602			if (IS_ERR(efdctx))
603				return PTR_ERR(efdctx);
604
605			if (*ctx)
606				eventfd_ctx_put(*ctx);
607
608			*ctx = efdctx;
609		}
610		return 0;
611	}
612
613	return -EINVAL;
614}
615
616static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
617				    unsigned index, unsigned start,
618				    unsigned count, uint32_t flags, void *data)
619{
620	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
621		return -EINVAL;
622
623	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
624					       count, flags, data);
625}
626
627static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
628				    unsigned index, unsigned start,
629				    unsigned count, uint32_t flags, void *data)
630{
631	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
632		return -EINVAL;
633
634	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
635					       count, flags, data);
636}
637
638int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
639			    unsigned index, unsigned start, unsigned count,
640			    void *data)
641{
642	int (*func)(struct vfio_pci_device *vdev, unsigned index,
643		    unsigned start, unsigned count, uint32_t flags,
644		    void *data) = NULL;
645
646	switch (index) {
647	case VFIO_PCI_INTX_IRQ_INDEX:
648		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
649		case VFIO_IRQ_SET_ACTION_MASK:
650			func = vfio_pci_set_intx_mask;
651			break;
652		case VFIO_IRQ_SET_ACTION_UNMASK:
653			func = vfio_pci_set_intx_unmask;
654			break;
655		case VFIO_IRQ_SET_ACTION_TRIGGER:
656			func = vfio_pci_set_intx_trigger;
657			break;
658		}
659		break;
660	case VFIO_PCI_MSI_IRQ_INDEX:
661	case VFIO_PCI_MSIX_IRQ_INDEX:
662		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
663		case VFIO_IRQ_SET_ACTION_MASK:
664		case VFIO_IRQ_SET_ACTION_UNMASK:
665			/* XXX Need masking support exported */
666			break;
667		case VFIO_IRQ_SET_ACTION_TRIGGER:
668			func = vfio_pci_set_msi_trigger;
669			break;
670		}
671		break;
672	case VFIO_PCI_ERR_IRQ_INDEX:
673		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
674		case VFIO_IRQ_SET_ACTION_TRIGGER:
675			if (pci_is_pcie(vdev->pdev))
676				func = vfio_pci_set_err_trigger;
677			break;
678		}
679		break;
680	case VFIO_PCI_REQ_IRQ_INDEX:
681		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
682		case VFIO_IRQ_SET_ACTION_TRIGGER:
683			func = vfio_pci_set_req_trigger;
684			break;
685		}
686		break;
687	}
688
689	if (!func)
690		return -ENOTTY;
691
692	return func(vdev, index, start, count, flags, data);
693}
694