1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio PCI driver - common functionality for all device versions
4 *
5 * This module allows virtio devices to be used over a virtual PCI device.
6 * This can be used with QEMU based VMMs like KVM or Xen.
7 *
8 * Copyright IBM Corp. 2007
9 * Copyright Red Hat, Inc. 2014
10 *
11 * Authors:
12 *  Anthony Liguori  <aliguori@us.ibm.com>
13 *  Rusty Russell <rusty@rustcorp.com.au>
14 *  Michael S. Tsirkin <mst@redhat.com>
15 */
16
17#include "virtio_pci_common.h"
18
19static bool force_legacy = false;
20
21#if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
22module_param(force_legacy, bool, 0444);
23MODULE_PARM_DESC(force_legacy,
24		 "Force legacy mode for transitional virtio 1 devices");
25#endif
26
27/* wait for pending irq handlers */
28void vp_synchronize_vectors(struct virtio_device *vdev)
29{
30	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
31	int i;
32
33	if (vp_dev->intx_enabled)
34		synchronize_irq(vp_dev->pci_dev->irq);
35
36	for (i = 0; i < vp_dev->msix_vectors; ++i)
37		synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
38}
39
40/* the notify function used when creating a virt queue */
41bool vp_notify(struct virtqueue *vq)
42{
43	/* we write the queue's selector into the notification register to
44	 * signal the other end */
45	iowrite16(vq->index, (void __iomem *)vq->priv);
46	return true;
47}
48
49/* Handle a configuration change: Tell driver if it wants to know. */
50static irqreturn_t vp_config_changed(int irq, void *opaque)
51{
52	struct virtio_pci_device *vp_dev = opaque;
53
54	virtio_config_changed(&vp_dev->vdev);
55	return IRQ_HANDLED;
56}
57
58/* Notify all virtqueues on an interrupt. */
59static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
60{
61	struct virtio_pci_device *vp_dev = opaque;
62	struct virtio_pci_vq_info *info;
63	irqreturn_t ret = IRQ_NONE;
64	unsigned long flags;
65
66	spin_lock_irqsave(&vp_dev->lock, flags);
67	list_for_each_entry(info, &vp_dev->virtqueues, node) {
68		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
69			ret = IRQ_HANDLED;
70	}
71	spin_unlock_irqrestore(&vp_dev->lock, flags);
72
73	return ret;
74}
75
76/* A small wrapper to also acknowledge the interrupt when it's handled.
77 * I really need an EIO hook for the vring so I can ack the interrupt once we
78 * know that we'll be handling the IRQ but before we invoke the callback since
79 * the callback may notify the host which results in the host attempting to
80 * raise an interrupt that we would then mask once we acknowledged the
81 * interrupt. */
82static irqreturn_t vp_interrupt(int irq, void *opaque)
83{
84	struct virtio_pci_device *vp_dev = opaque;
85	u8 isr;
86
87	/* reading the ISR has the effect of also clearing it so it's very
88	 * important to save off the value. */
89	isr = ioread8(vp_dev->isr);
90
91	/* It's definitely not us if the ISR was not high */
92	if (!isr)
93		return IRQ_NONE;
94
95	/* Configuration change?  Tell driver if it wants to know. */
96	if (isr & VIRTIO_PCI_ISR_CONFIG)
97		vp_config_changed(irq, opaque);
98
99	return vp_vring_interrupt(irq, opaque);
100}
101
102static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
103				   bool per_vq_vectors, struct irq_affinity *desc)
104{
105	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
106	const char *name = dev_name(&vp_dev->vdev.dev);
107	unsigned flags = PCI_IRQ_MSIX;
108	unsigned i, v;
109	int err = -ENOMEM;
110
111	vp_dev->msix_vectors = nvectors;
112
113	vp_dev->msix_names = kmalloc_array(nvectors,
114					   sizeof(*vp_dev->msix_names),
115					   GFP_KERNEL);
116	if (!vp_dev->msix_names)
117		goto error;
118	vp_dev->msix_affinity_masks
119		= kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks),
120			  GFP_KERNEL);
121	if (!vp_dev->msix_affinity_masks)
122		goto error;
123	for (i = 0; i < nvectors; ++i)
124		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
125					GFP_KERNEL))
126			goto error;
127
128	if (desc) {
129		flags |= PCI_IRQ_AFFINITY;
130		desc->pre_vectors++; /* virtio config vector */
131	}
132
133	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
134					     nvectors, flags, desc);
135	if (err < 0)
136		goto error;
137	vp_dev->msix_enabled = 1;
138
139	/* Set the vector used for configuration */
140	v = vp_dev->msix_used_vectors;
141	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
142		 "%s-config", name);
143	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
144			  vp_config_changed, 0, vp_dev->msix_names[v],
145			  vp_dev);
146	if (err)
147		goto error;
148	++vp_dev->msix_used_vectors;
149
150	v = vp_dev->config_vector(vp_dev, v);
151	/* Verify we had enough resources to assign the vector */
152	if (v == VIRTIO_MSI_NO_VECTOR) {
153		err = -EBUSY;
154		goto error;
155	}
156
157	if (!per_vq_vectors) {
158		/* Shared vector for all VQs */
159		v = vp_dev->msix_used_vectors;
160		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
161			 "%s-virtqueues", name);
162		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
163				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
164				  vp_dev);
165		if (err)
166			goto error;
167		++vp_dev->msix_used_vectors;
168	}
169	return 0;
170error:
171	return err;
172}
173
174static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
175				     void (*callback)(struct virtqueue *vq),
176				     const char *name,
177				     bool ctx,
178				     u16 msix_vec)
179{
180	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
181	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
182	struct virtqueue *vq;
183	unsigned long flags;
184
185	/* fill out our structure that represents an active queue */
186	if (!info)
187		return ERR_PTR(-ENOMEM);
188
189	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
190			      msix_vec);
191	if (IS_ERR(vq))
192		goto out_info;
193
194	info->vq = vq;
195	if (callback) {
196		spin_lock_irqsave(&vp_dev->lock, flags);
197		list_add(&info->node, &vp_dev->virtqueues);
198		spin_unlock_irqrestore(&vp_dev->lock, flags);
199	} else {
200		INIT_LIST_HEAD(&info->node);
201	}
202
203	vp_dev->vqs[index] = info;
204	return vq;
205
206out_info:
207	kfree(info);
208	return vq;
209}
210
211static void vp_del_vq(struct virtqueue *vq)
212{
213	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
214	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
215	unsigned long flags;
216
217	spin_lock_irqsave(&vp_dev->lock, flags);
218	list_del(&info->node);
219	spin_unlock_irqrestore(&vp_dev->lock, flags);
220
221	vp_dev->del_vq(info);
222	kfree(info);
223}
224
225/* the config->del_vqs() implementation */
226void vp_del_vqs(struct virtio_device *vdev)
227{
228	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
229	struct virtqueue *vq, *n;
230	int i;
231
232	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
233		if (vp_dev->per_vq_vectors) {
234			int v = vp_dev->vqs[vq->index]->msix_vector;
235
236			if (v != VIRTIO_MSI_NO_VECTOR) {
237				int irq = pci_irq_vector(vp_dev->pci_dev, v);
238
239				irq_set_affinity_hint(irq, NULL);
240				free_irq(irq, vq);
241			}
242		}
243		vp_del_vq(vq);
244	}
245	vp_dev->per_vq_vectors = false;
246
247	if (vp_dev->intx_enabled) {
248		free_irq(vp_dev->pci_dev->irq, vp_dev);
249		vp_dev->intx_enabled = 0;
250	}
251
252	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
253		free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
254
255	if (vp_dev->msix_affinity_masks) {
256		for (i = 0; i < vp_dev->msix_vectors; i++)
257			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
258	}
259
260	if (vp_dev->msix_enabled) {
261		/* Disable the vector used for configuration */
262		vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
263
264		pci_free_irq_vectors(vp_dev->pci_dev);
265		vp_dev->msix_enabled = 0;
266	}
267
268	vp_dev->msix_vectors = 0;
269	vp_dev->msix_used_vectors = 0;
270	kfree(vp_dev->msix_names);
271	vp_dev->msix_names = NULL;
272	kfree(vp_dev->msix_affinity_masks);
273	vp_dev->msix_affinity_masks = NULL;
274	kfree(vp_dev->vqs);
275	vp_dev->vqs = NULL;
276}
277
278static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
279		struct virtqueue *vqs[], vq_callback_t *callbacks[],
280		const char * const names[], bool per_vq_vectors,
281		const bool *ctx,
282		struct irq_affinity *desc)
283{
284	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
285	u16 msix_vec;
286	int i, err, nvectors, allocated_vectors, queue_idx = 0;
287
288	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
289	if (!vp_dev->vqs)
290		return -ENOMEM;
291
292	if (per_vq_vectors) {
293		/* Best option: one for change interrupt, one per vq. */
294		nvectors = 1;
295		for (i = 0; i < nvqs; ++i)
296			if (names[i] && callbacks[i])
297				++nvectors;
298	} else {
299		/* Second best: one for change, shared for all vqs. */
300		nvectors = 2;
301	}
302
303	err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
304				      per_vq_vectors ? desc : NULL);
305	if (err)
306		goto error_find;
307
308	vp_dev->per_vq_vectors = per_vq_vectors;
309	allocated_vectors = vp_dev->msix_used_vectors;
310	for (i = 0; i < nvqs; ++i) {
311		if (!names[i]) {
312			vqs[i] = NULL;
313			continue;
314		}
315
316		if (!callbacks[i])
317			msix_vec = VIRTIO_MSI_NO_VECTOR;
318		else if (vp_dev->per_vq_vectors)
319			msix_vec = allocated_vectors++;
320		else
321			msix_vec = VP_MSIX_VQ_VECTOR;
322		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
323				     ctx ? ctx[i] : false,
324				     msix_vec);
325		if (IS_ERR(vqs[i])) {
326			err = PTR_ERR(vqs[i]);
327			goto error_find;
328		}
329
330		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
331			continue;
332
333		/* allocate per-vq irq if available and necessary */
334		snprintf(vp_dev->msix_names[msix_vec],
335			 sizeof *vp_dev->msix_names,
336			 "%s-%s",
337			 dev_name(&vp_dev->vdev.dev), names[i]);
338		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
339				  vring_interrupt, 0,
340				  vp_dev->msix_names[msix_vec],
341				  vqs[i]);
342		if (err)
343			goto error_find;
344	}
345	return 0;
346
347error_find:
348	vp_del_vqs(vdev);
349	return err;
350}
351
352static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
353		struct virtqueue *vqs[], vq_callback_t *callbacks[],
354		const char * const names[], const bool *ctx)
355{
356	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
357	int i, err, queue_idx = 0;
358
359	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
360	if (!vp_dev->vqs)
361		return -ENOMEM;
362
363	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
364			dev_name(&vdev->dev), vp_dev);
365	if (err)
366		goto out_del_vqs;
367
368	vp_dev->intx_enabled = 1;
369	vp_dev->per_vq_vectors = false;
370	for (i = 0; i < nvqs; ++i) {
371		if (!names[i]) {
372			vqs[i] = NULL;
373			continue;
374		}
375		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
376				     ctx ? ctx[i] : false,
377				     VIRTIO_MSI_NO_VECTOR);
378		if (IS_ERR(vqs[i])) {
379			err = PTR_ERR(vqs[i]);
380			goto out_del_vqs;
381		}
382	}
383
384	return 0;
385out_del_vqs:
386	vp_del_vqs(vdev);
387	return err;
388}
389
390/* the config->find_vqs() implementation */
391int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
392		struct virtqueue *vqs[], vq_callback_t *callbacks[],
393		const char * const names[], const bool *ctx,
394		struct irq_affinity *desc)
395{
396	int err;
397
398	/* Try MSI-X with one vector per queue. */
399	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
400	if (!err)
401		return 0;
402	/* Fallback: MSI-X with one vector for config, one shared for queues. */
403	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
404	if (!err)
405		return 0;
406	/* Finally fall back to regular interrupts. */
407	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
408}
409
410const char *vp_bus_name(struct virtio_device *vdev)
411{
412	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
413
414	return pci_name(vp_dev->pci_dev);
415}
416
417/* Setup the affinity for a virtqueue:
418 * - force the affinity for per vq vector
419 * - OR over all affinities for shared MSI
420 * - ignore the affinity request if we're using INTX
421 */
422int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
423{
424	struct virtio_device *vdev = vq->vdev;
425	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
426	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
427	struct cpumask *mask;
428	unsigned int irq;
429
430	if (!vq->callback)
431		return -EINVAL;
432
433	if (vp_dev->msix_enabled) {
434		mask = vp_dev->msix_affinity_masks[info->msix_vector];
435		irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
436		if (!cpu_mask)
437			irq_set_affinity_hint(irq, NULL);
438		else {
439			cpumask_copy(mask, cpu_mask);
440			irq_set_affinity_hint(irq, mask);
441		}
442	}
443	return 0;
444}
445
446const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
447{
448	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
449
450	if (!vp_dev->per_vq_vectors ||
451	    vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR)
452		return NULL;
453
454	return pci_irq_get_affinity(vp_dev->pci_dev,
455				    vp_dev->vqs[index]->msix_vector);
456}
457
458#ifdef CONFIG_PM_SLEEP
459static int virtio_pci_freeze(struct device *dev)
460{
461	struct pci_dev *pci_dev = to_pci_dev(dev);
462	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
463	int ret;
464
465	ret = virtio_device_freeze(&vp_dev->vdev);
466
467	if (!ret)
468		pci_disable_device(pci_dev);
469	return ret;
470}
471
472static int virtio_pci_restore(struct device *dev)
473{
474	struct pci_dev *pci_dev = to_pci_dev(dev);
475	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
476	int ret;
477
478	ret = pci_enable_device(pci_dev);
479	if (ret)
480		return ret;
481
482	pci_set_master(pci_dev);
483	return virtio_device_restore(&vp_dev->vdev);
484}
485
486static const struct dev_pm_ops virtio_pci_pm_ops = {
487	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
488};
489#endif
490
491
492/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
493static const struct pci_device_id virtio_pci_id_table[] = {
494	{ PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
495	{ 0 }
496};
497
498MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
499
500static void virtio_pci_release_dev(struct device *_d)
501{
502	struct virtio_device *vdev = dev_to_virtio(_d);
503	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
504
505	/* As struct device is a kobject, it's not safe to
506	 * free the memory (including the reference counter itself)
507	 * until it's release callback. */
508	kfree(vp_dev);
509}
510
511static int virtio_pci_probe(struct pci_dev *pci_dev,
512			    const struct pci_device_id *id)
513{
514	struct virtio_pci_device *vp_dev, *reg_dev = NULL;
515	int rc;
516
517	/* allocate our structure and fill it out */
518	vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
519	if (!vp_dev)
520		return -ENOMEM;
521
522	pci_set_drvdata(pci_dev, vp_dev);
523	vp_dev->vdev.dev.parent = &pci_dev->dev;
524	vp_dev->vdev.dev.release = virtio_pci_release_dev;
525	vp_dev->pci_dev = pci_dev;
526	INIT_LIST_HEAD(&vp_dev->virtqueues);
527	spin_lock_init(&vp_dev->lock);
528
529	/* enable the device */
530	rc = pci_enable_device(pci_dev);
531	if (rc)
532		goto err_enable_device;
533
534	if (force_legacy) {
535		rc = virtio_pci_legacy_probe(vp_dev);
536		/* Also try modern mode if we can't map BAR0 (no IO space). */
537		if (rc == -ENODEV || rc == -ENOMEM)
538			rc = virtio_pci_modern_probe(vp_dev);
539		if (rc)
540			goto err_probe;
541	} else {
542		rc = virtio_pci_modern_probe(vp_dev);
543		if (rc == -ENODEV)
544			rc = virtio_pci_legacy_probe(vp_dev);
545		if (rc)
546			goto err_probe;
547	}
548
549	pci_set_master(pci_dev);
550
551	rc = register_virtio_device(&vp_dev->vdev);
552	reg_dev = vp_dev;
553	if (rc)
554		goto err_register;
555
556	return 0;
557
558err_register:
559	if (vp_dev->ioaddr)
560	     virtio_pci_legacy_remove(vp_dev);
561	else
562	     virtio_pci_modern_remove(vp_dev);
563err_probe:
564	pci_disable_device(pci_dev);
565err_enable_device:
566	if (reg_dev)
567		put_device(&vp_dev->vdev.dev);
568	else
569		kfree(vp_dev);
570	return rc;
571}
572
573static void virtio_pci_remove(struct pci_dev *pci_dev)
574{
575	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
576	struct device *dev = get_device(&vp_dev->vdev.dev);
577
578	/*
579	 * Device is marked broken on surprise removal so that virtio upper
580	 * layers can abort any ongoing operation.
581	 */
582	if (!pci_device_is_present(pci_dev))
583		virtio_break_device(&vp_dev->vdev);
584
585	pci_disable_sriov(pci_dev);
586
587	unregister_virtio_device(&vp_dev->vdev);
588
589	if (vp_dev->ioaddr)
590		virtio_pci_legacy_remove(vp_dev);
591	else
592		virtio_pci_modern_remove(vp_dev);
593
594	pci_disable_device(pci_dev);
595	put_device(dev);
596}
597
598static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs)
599{
600	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
601	struct virtio_device *vdev = &vp_dev->vdev;
602	int ret;
603
604	if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK))
605		return -EBUSY;
606
607	if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV))
608		return -EINVAL;
609
610	if (pci_vfs_assigned(pci_dev))
611		return -EPERM;
612
613	if (num_vfs == 0) {
614		pci_disable_sriov(pci_dev);
615		return 0;
616	}
617
618	ret = pci_enable_sriov(pci_dev, num_vfs);
619	if (ret < 0)
620		return ret;
621
622	return num_vfs;
623}
624
625static struct pci_driver virtio_pci_driver = {
626	.name		= "virtio-pci",
627	.id_table	= virtio_pci_id_table,
628	.probe		= virtio_pci_probe,
629	.remove		= virtio_pci_remove,
630#ifdef CONFIG_PM_SLEEP
631	.driver.pm	= &virtio_pci_pm_ops,
632#endif
633	.sriov_configure = virtio_pci_sriov_configure,
634};
635
636module_pci_driver(virtio_pci_driver);
637
638MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
639MODULE_DESCRIPTION("virtio-pci");
640MODULE_LICENSE("GPL");
641MODULE_VERSION("1");
642