18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
48c2ecf20Sopenharmony_ci *     Author: Alex Williamson <alex.williamson@redhat.com>
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Derived from original vfio:
78c2ecf20Sopenharmony_ci * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
88c2ecf20Sopenharmony_ci * Author: Tom Lyon, pugs@cisco.com
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/device.h>
148c2ecf20Sopenharmony_ci#include <linux/eventfd.h>
158c2ecf20Sopenharmony_ci#include <linux/file.h>
168c2ecf20Sopenharmony_ci#include <linux/interrupt.h>
178c2ecf20Sopenharmony_ci#include <linux/iommu.h>
188c2ecf20Sopenharmony_ci#include <linux/module.h>
198c2ecf20Sopenharmony_ci#include <linux/mutex.h>
208c2ecf20Sopenharmony_ci#include <linux/notifier.h>
218c2ecf20Sopenharmony_ci#include <linux/pci.h>
228c2ecf20Sopenharmony_ci#include <linux/pm_runtime.h>
238c2ecf20Sopenharmony_ci#include <linux/slab.h>
248c2ecf20Sopenharmony_ci#include <linux/types.h>
258c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
268c2ecf20Sopenharmony_ci#include <linux/vfio.h>
278c2ecf20Sopenharmony_ci#include <linux/vgaarb.h>
288c2ecf20Sopenharmony_ci#include <linux/nospec.h>
298c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#include "vfio_pci_private.h"
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#define DRIVER_VERSION  "0.2"
348c2ecf20Sopenharmony_ci#define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
358c2ecf20Sopenharmony_ci#define DRIVER_DESC     "VFIO PCI - User Level meta-driver"
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_cistatic char ids[1024] __initdata;
388c2ecf20Sopenharmony_cimodule_param_string(ids, ids, sizeof(ids), 0);
398c2ecf20Sopenharmony_ciMODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic bool nointxmask;
428c2ecf20Sopenharmony_cimodule_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
438c2ecf20Sopenharmony_ciMODULE_PARM_DESC(nointxmask,
448c2ecf20Sopenharmony_ci		  "Disable support for PCI 2.3 style INTx masking.  If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci#ifdef CONFIG_VFIO_PCI_VGA
478c2ecf20Sopenharmony_cistatic bool disable_vga;
488c2ecf20Sopenharmony_cimodule_param(disable_vga, bool, S_IRUGO);
498c2ecf20Sopenharmony_ciMODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci");
508c2ecf20Sopenharmony_ci#endif
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic bool disable_idle_d3;
538c2ecf20Sopenharmony_cimodule_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
548c2ecf20Sopenharmony_ciMODULE_PARM_DESC(disable_idle_d3,
558c2ecf20Sopenharmony_ci		 "Disable using the PCI D3 low power state for idle, unused devices");
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic bool enable_sriov;
588c2ecf20Sopenharmony_ci#ifdef CONFIG_PCI_IOV
598c2ecf20Sopenharmony_cimodule_param(enable_sriov, bool, 0644);
608c2ecf20Sopenharmony_ciMODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration.  Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF.");
618c2ecf20Sopenharmony_ci#endif
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic bool disable_denylist;
648c2ecf20Sopenharmony_cimodule_param(disable_denylist, bool, 0444);
658c2ecf20Sopenharmony_ciMODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users.");
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_cistatic inline bool vfio_vga_disabled(void)
688c2ecf20Sopenharmony_ci{
698c2ecf20Sopenharmony_ci#ifdef CONFIG_VFIO_PCI_VGA
708c2ecf20Sopenharmony_ci	return disable_vga;
718c2ecf20Sopenharmony_ci#else
728c2ecf20Sopenharmony_ci	return true;
738c2ecf20Sopenharmony_ci#endif
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistatic bool vfio_pci_dev_in_denylist(struct pci_dev *pdev)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	switch (pdev->vendor) {
798c2ecf20Sopenharmony_ci	case PCI_VENDOR_ID_INTEL:
808c2ecf20Sopenharmony_ci		switch (pdev->device) {
818c2ecf20Sopenharmony_ci		case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
828c2ecf20Sopenharmony_ci		case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF:
838c2ecf20Sopenharmony_ci		case PCI_DEVICE_ID_INTEL_QAT_C62X:
848c2ecf20Sopenharmony_ci		case PCI_DEVICE_ID_INTEL_QAT_C62X_VF:
858c2ecf20Sopenharmony_ci		case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
868c2ecf20Sopenharmony_ci		case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF:
878c2ecf20Sopenharmony_ci			return true;
888c2ecf20Sopenharmony_ci		default:
898c2ecf20Sopenharmony_ci			return false;
908c2ecf20Sopenharmony_ci		}
918c2ecf20Sopenharmony_ci	}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	return false;
948c2ecf20Sopenharmony_ci}
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_cistatic bool vfio_pci_is_denylisted(struct pci_dev *pdev)
978c2ecf20Sopenharmony_ci{
988c2ecf20Sopenharmony_ci	if (!vfio_pci_dev_in_denylist(pdev))
998c2ecf20Sopenharmony_ci		return false;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	if (disable_denylist) {
1028c2ecf20Sopenharmony_ci		pci_warn(pdev,
1038c2ecf20Sopenharmony_ci			 "device denylist disabled - allowing device %04x:%04x.\n",
1048c2ecf20Sopenharmony_ci			 pdev->vendor, pdev->device);
1058c2ecf20Sopenharmony_ci		return false;
1068c2ecf20Sopenharmony_ci	}
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n",
1098c2ecf20Sopenharmony_ci		 pdev->vendor, pdev->device);
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	return true;
1128c2ecf20Sopenharmony_ci}
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci/*
1158c2ecf20Sopenharmony_ci * Our VGA arbiter participation is limited since we don't know anything
1168c2ecf20Sopenharmony_ci * about the device itself.  However, if the device is the only VGA device
1178c2ecf20Sopenharmony_ci * downstream of a bridge and VFIO VGA support is disabled, then we can
1188c2ecf20Sopenharmony_ci * safely return legacy VGA IO and memory as not decoded since the user
1198c2ecf20Sopenharmony_ci * has no way to get to it and routing can be disabled externally at the
1208c2ecf20Sopenharmony_ci * bridge.
1218c2ecf20Sopenharmony_ci */
1228c2ecf20Sopenharmony_cistatic unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)
1238c2ecf20Sopenharmony_ci{
1248c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = opaque;
1258c2ecf20Sopenharmony_ci	struct pci_dev *tmp = NULL, *pdev = vdev->pdev;
1268c2ecf20Sopenharmony_ci	unsigned char max_busnr;
1278c2ecf20Sopenharmony_ci	unsigned int decodes;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
1308c2ecf20Sopenharmony_ci		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
1318c2ecf20Sopenharmony_ci		       VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	max_busnr = pci_bus_max_busnr(pdev->bus);
1348c2ecf20Sopenharmony_ci	decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
1378c2ecf20Sopenharmony_ci		if (tmp == pdev ||
1388c2ecf20Sopenharmony_ci		    pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
1398c2ecf20Sopenharmony_ci		    pci_is_root_bus(tmp->bus))
1408c2ecf20Sopenharmony_ci			continue;
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci		if (tmp->bus->number >= pdev->bus->number &&
1438c2ecf20Sopenharmony_ci		    tmp->bus->number <= max_busnr) {
1448c2ecf20Sopenharmony_ci			pci_dev_put(tmp);
1458c2ecf20Sopenharmony_ci			decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
1468c2ecf20Sopenharmony_ci			break;
1478c2ecf20Sopenharmony_ci		}
1488c2ecf20Sopenharmony_ci	}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	return decodes;
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_cistatic inline bool vfio_pci_is_vga(struct pci_dev *pdev)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cistatic void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
1598c2ecf20Sopenharmony_ci{
1608c2ecf20Sopenharmony_ci	struct resource *res;
1618c2ecf20Sopenharmony_ci	int i;
1628c2ecf20Sopenharmony_ci	struct vfio_pci_dummy_resource *dummy_res;
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
1658c2ecf20Sopenharmony_ci		int bar = i + PCI_STD_RESOURCES;
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci		res = &vdev->pdev->resource[bar];
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci		if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
1708c2ecf20Sopenharmony_ci			goto no_mmap;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		if (!(res->flags & IORESOURCE_MEM))
1738c2ecf20Sopenharmony_ci			goto no_mmap;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		/*
1768c2ecf20Sopenharmony_ci		 * The PCI core shouldn't set up a resource with a
1778c2ecf20Sopenharmony_ci		 * type but zero size. But there may be bugs that
1788c2ecf20Sopenharmony_ci		 * cause us to do that.
1798c2ecf20Sopenharmony_ci		 */
1808c2ecf20Sopenharmony_ci		if (!resource_size(res))
1818c2ecf20Sopenharmony_ci			goto no_mmap;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci		if (resource_size(res) >= PAGE_SIZE) {
1848c2ecf20Sopenharmony_ci			vdev->bar_mmap_supported[bar] = true;
1858c2ecf20Sopenharmony_ci			continue;
1868c2ecf20Sopenharmony_ci		}
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci		if (!(res->start & ~PAGE_MASK)) {
1898c2ecf20Sopenharmony_ci			/*
1908c2ecf20Sopenharmony_ci			 * Add a dummy resource to reserve the remainder
1918c2ecf20Sopenharmony_ci			 * of the exclusive page in case that hot-add
1928c2ecf20Sopenharmony_ci			 * device's bar is assigned into it.
1938c2ecf20Sopenharmony_ci			 */
1948c2ecf20Sopenharmony_ci			dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
1958c2ecf20Sopenharmony_ci			if (dummy_res == NULL)
1968c2ecf20Sopenharmony_ci				goto no_mmap;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci			dummy_res->resource.name = "vfio sub-page reserved";
1998c2ecf20Sopenharmony_ci			dummy_res->resource.start = res->end + 1;
2008c2ecf20Sopenharmony_ci			dummy_res->resource.end = res->start + PAGE_SIZE - 1;
2018c2ecf20Sopenharmony_ci			dummy_res->resource.flags = res->flags;
2028c2ecf20Sopenharmony_ci			if (request_resource(res->parent,
2038c2ecf20Sopenharmony_ci						&dummy_res->resource)) {
2048c2ecf20Sopenharmony_ci				kfree(dummy_res);
2058c2ecf20Sopenharmony_ci				goto no_mmap;
2068c2ecf20Sopenharmony_ci			}
2078c2ecf20Sopenharmony_ci			dummy_res->index = bar;
2088c2ecf20Sopenharmony_ci			list_add(&dummy_res->res_next,
2098c2ecf20Sopenharmony_ci					&vdev->dummy_resources_list);
2108c2ecf20Sopenharmony_ci			vdev->bar_mmap_supported[bar] = true;
2118c2ecf20Sopenharmony_ci			continue;
2128c2ecf20Sopenharmony_ci		}
2138c2ecf20Sopenharmony_ci		/*
2148c2ecf20Sopenharmony_ci		 * Here we don't handle the case when the BAR is not page
2158c2ecf20Sopenharmony_ci		 * aligned because we can't expect the BAR will be
2168c2ecf20Sopenharmony_ci		 * assigned into the same location in a page in guest
2178c2ecf20Sopenharmony_ci		 * when we passthrough the BAR. And it's hard to access
2188c2ecf20Sopenharmony_ci		 * this BAR in userspace because we have no way to get
2198c2ecf20Sopenharmony_ci		 * the BAR's location in a page.
2208c2ecf20Sopenharmony_ci		 */
2218c2ecf20Sopenharmony_cino_mmap:
2228c2ecf20Sopenharmony_ci		vdev->bar_mmap_supported[bar] = false;
2238c2ecf20Sopenharmony_ci	}
2248c2ecf20Sopenharmony_ci}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_cistatic void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
2278c2ecf20Sopenharmony_cistatic void vfio_pci_disable(struct vfio_pci_device *vdev);
2288c2ecf20Sopenharmony_cistatic int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci/*
2318c2ecf20Sopenharmony_ci * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
2328c2ecf20Sopenharmony_ci * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
2338c2ecf20Sopenharmony_ci * If a device implements the former but not the latter we would typically
2348c2ecf20Sopenharmony_ci * expect broken_intx_masking be set and require an exclusive interrupt.
2358c2ecf20Sopenharmony_ci * However since we do have control of the device's ability to assert INTx,
2368c2ecf20Sopenharmony_ci * we can instead pretend that the device does not implement INTx, virtualizing
2378c2ecf20Sopenharmony_ci * the pin register to report zero and maintaining DisINTx set on the host.
2388c2ecf20Sopenharmony_ci */
2398c2ecf20Sopenharmony_cistatic bool vfio_pci_nointx(struct pci_dev *pdev)
2408c2ecf20Sopenharmony_ci{
2418c2ecf20Sopenharmony_ci	switch (pdev->vendor) {
2428c2ecf20Sopenharmony_ci	case PCI_VENDOR_ID_INTEL:
2438c2ecf20Sopenharmony_ci		switch (pdev->device) {
2448c2ecf20Sopenharmony_ci		/* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
2458c2ecf20Sopenharmony_ci		case 0x1572:
2468c2ecf20Sopenharmony_ci		case 0x1574:
2478c2ecf20Sopenharmony_ci		case 0x1580 ... 0x1581:
2488c2ecf20Sopenharmony_ci		case 0x1583 ... 0x158b:
2498c2ecf20Sopenharmony_ci		case 0x37d0 ... 0x37d2:
2508c2ecf20Sopenharmony_ci		/* X550 */
2518c2ecf20Sopenharmony_ci		case 0x1563:
2528c2ecf20Sopenharmony_ci			return true;
2538c2ecf20Sopenharmony_ci		default:
2548c2ecf20Sopenharmony_ci			return false;
2558c2ecf20Sopenharmony_ci		}
2568c2ecf20Sopenharmony_ci	}
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	return false;
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cistatic void vfio_pci_probe_power_state(struct vfio_pci_device *vdev)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
2648c2ecf20Sopenharmony_ci	u16 pmcsr;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	if (!pdev->pm_cap)
2678c2ecf20Sopenharmony_ci		return;
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci	pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
2728c2ecf20Sopenharmony_ci}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci/*
2758c2ecf20Sopenharmony_ci * pci_set_power_state() wrapper handling devices which perform a soft reset on
2768c2ecf20Sopenharmony_ci * D3->D0 transition.  Save state prior to D0/1/2->D3, stash it on the vdev,
2778c2ecf20Sopenharmony_ci * restore when returned to D0.  Saved separately from pci_saved_state for use
2788c2ecf20Sopenharmony_ci * by PM capability emulation and separately from pci_dev internal saved state
2798c2ecf20Sopenharmony_ci * to avoid it being overwritten and consumed around other resets.
2808c2ecf20Sopenharmony_ci */
2818c2ecf20Sopenharmony_ciint vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
2828c2ecf20Sopenharmony_ci{
2838c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
2848c2ecf20Sopenharmony_ci	bool needs_restore = false, needs_save = false;
2858c2ecf20Sopenharmony_ci	int ret;
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	if (vdev->needs_pm_restore) {
2888c2ecf20Sopenharmony_ci		if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
2898c2ecf20Sopenharmony_ci			pci_save_state(pdev);
2908c2ecf20Sopenharmony_ci			needs_save = true;
2918c2ecf20Sopenharmony_ci		}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci		if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
2948c2ecf20Sopenharmony_ci			needs_restore = true;
2958c2ecf20Sopenharmony_ci	}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	ret = pci_set_power_state(pdev, state);
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	if (!ret) {
3008c2ecf20Sopenharmony_ci		/* D3 might be unsupported via quirk, skip unless in D3 */
3018c2ecf20Sopenharmony_ci		if (needs_save && pdev->current_state >= PCI_D3hot) {
3028c2ecf20Sopenharmony_ci			vdev->pm_save = pci_store_saved_state(pdev);
3038c2ecf20Sopenharmony_ci		} else if (needs_restore) {
3048c2ecf20Sopenharmony_ci			pci_load_and_free_saved_state(pdev, &vdev->pm_save);
3058c2ecf20Sopenharmony_ci			pci_restore_state(pdev);
3068c2ecf20Sopenharmony_ci		}
3078c2ecf20Sopenharmony_ci	}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	return ret;
3108c2ecf20Sopenharmony_ci}
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_cistatic int vfio_pci_enable(struct vfio_pci_device *vdev)
3138c2ecf20Sopenharmony_ci{
3148c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
3158c2ecf20Sopenharmony_ci	int ret;
3168c2ecf20Sopenharmony_ci	u16 cmd;
3178c2ecf20Sopenharmony_ci	u8 msix_pos;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	vfio_pci_set_power_state(vdev, PCI_D0);
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	/* Don't allow our initial saved state to include busmaster */
3228c2ecf20Sopenharmony_ci	pci_clear_master(pdev);
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	ret = pci_enable_device(pdev);
3258c2ecf20Sopenharmony_ci	if (ret)
3268c2ecf20Sopenharmony_ci		return ret;
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	/* If reset fails because of the device lock, fail this path entirely */
3298c2ecf20Sopenharmony_ci	ret = pci_try_reset_function(pdev);
3308c2ecf20Sopenharmony_ci	if (ret == -EAGAIN) {
3318c2ecf20Sopenharmony_ci		pci_disable_device(pdev);
3328c2ecf20Sopenharmony_ci		return ret;
3338c2ecf20Sopenharmony_ci	}
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	vdev->reset_works = !ret;
3368c2ecf20Sopenharmony_ci	pci_save_state(pdev);
3378c2ecf20Sopenharmony_ci	vdev->pci_saved_state = pci_store_saved_state(pdev);
3388c2ecf20Sopenharmony_ci	if (!vdev->pci_saved_state)
3398c2ecf20Sopenharmony_ci		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	if (likely(!nointxmask)) {
3428c2ecf20Sopenharmony_ci		if (vfio_pci_nointx(pdev)) {
3438c2ecf20Sopenharmony_ci			pci_info(pdev, "Masking broken INTx support\n");
3448c2ecf20Sopenharmony_ci			vdev->nointx = true;
3458c2ecf20Sopenharmony_ci			pci_intx(pdev, 0);
3468c2ecf20Sopenharmony_ci		} else
3478c2ecf20Sopenharmony_ci			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
3488c2ecf20Sopenharmony_ci	}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
3518c2ecf20Sopenharmony_ci	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
3528c2ecf20Sopenharmony_ci		cmd &= ~PCI_COMMAND_INTX_DISABLE;
3538c2ecf20Sopenharmony_ci		pci_write_config_word(pdev, PCI_COMMAND, cmd);
3548c2ecf20Sopenharmony_ci	}
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	ret = vfio_config_init(vdev);
3578c2ecf20Sopenharmony_ci	if (ret) {
3588c2ecf20Sopenharmony_ci		kfree(vdev->pci_saved_state);
3598c2ecf20Sopenharmony_ci		vdev->pci_saved_state = NULL;
3608c2ecf20Sopenharmony_ci		pci_disable_device(pdev);
3618c2ecf20Sopenharmony_ci		return ret;
3628c2ecf20Sopenharmony_ci	}
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	msix_pos = pdev->msix_cap;
3658c2ecf20Sopenharmony_ci	if (msix_pos) {
3668c2ecf20Sopenharmony_ci		u16 flags;
3678c2ecf20Sopenharmony_ci		u32 table;
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci		pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);
3708c2ecf20Sopenharmony_ci		pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci		vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
3738c2ecf20Sopenharmony_ci		vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
3748c2ecf20Sopenharmony_ci		vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
3758c2ecf20Sopenharmony_ci	} else
3768c2ecf20Sopenharmony_ci		vdev->msix_bar = 0xFF;
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
3798c2ecf20Sopenharmony_ci		vdev->has_vga = true;
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	if (vfio_pci_is_vga(pdev) &&
3838c2ecf20Sopenharmony_ci	    pdev->vendor == PCI_VENDOR_ID_INTEL &&
3848c2ecf20Sopenharmony_ci	    IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
3858c2ecf20Sopenharmony_ci		ret = vfio_pci_igd_init(vdev);
3868c2ecf20Sopenharmony_ci		if (ret && ret != -ENODEV) {
3878c2ecf20Sopenharmony_ci			pci_warn(pdev, "Failed to setup Intel IGD regions\n");
3888c2ecf20Sopenharmony_ci			goto disable_exit;
3898c2ecf20Sopenharmony_ci		}
3908c2ecf20Sopenharmony_ci	}
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
3938c2ecf20Sopenharmony_ci	    IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
3948c2ecf20Sopenharmony_ci		ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
3958c2ecf20Sopenharmony_ci		if (ret && ret != -ENODEV) {
3968c2ecf20Sopenharmony_ci			pci_warn(pdev, "Failed to setup NVIDIA NV2 RAM region\n");
3978c2ecf20Sopenharmony_ci			goto disable_exit;
3988c2ecf20Sopenharmony_ci		}
3998c2ecf20Sopenharmony_ci	}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	if (pdev->vendor == PCI_VENDOR_ID_IBM &&
4028c2ecf20Sopenharmony_ci	    IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
4038c2ecf20Sopenharmony_ci		ret = vfio_pci_ibm_npu2_init(vdev);
4048c2ecf20Sopenharmony_ci		if (ret && ret != -ENODEV) {
4058c2ecf20Sopenharmony_ci			pci_warn(pdev, "Failed to setup NVIDIA NV2 ATSD region\n");
4068c2ecf20Sopenharmony_ci			goto disable_exit;
4078c2ecf20Sopenharmony_ci		}
4088c2ecf20Sopenharmony_ci	}
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	vfio_pci_probe_mmaps(vdev);
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	return 0;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_cidisable_exit:
4158c2ecf20Sopenharmony_ci	vfio_pci_disable(vdev);
4168c2ecf20Sopenharmony_ci	return ret;
4178c2ecf20Sopenharmony_ci}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_cistatic void vfio_pci_disable(struct vfio_pci_device *vdev)
4208c2ecf20Sopenharmony_ci{
4218c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
4228c2ecf20Sopenharmony_ci	struct vfio_pci_dummy_resource *dummy_res, *tmp;
4238c2ecf20Sopenharmony_ci	struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
4248c2ecf20Sopenharmony_ci	int i, bar;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	/* Stop the device from further DMA */
4278c2ecf20Sopenharmony_ci	pci_clear_master(pdev);
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
4308c2ecf20Sopenharmony_ci				VFIO_IRQ_SET_ACTION_TRIGGER,
4318c2ecf20Sopenharmony_ci				vdev->irq_type, 0, 0, NULL);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	/* Device closed, don't need mutex here */
4348c2ecf20Sopenharmony_ci	list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
4358c2ecf20Sopenharmony_ci				 &vdev->ioeventfds_list, next) {
4368c2ecf20Sopenharmony_ci		vfio_virqfd_disable(&ioeventfd->virqfd);
4378c2ecf20Sopenharmony_ci		list_del(&ioeventfd->next);
4388c2ecf20Sopenharmony_ci		kfree(ioeventfd);
4398c2ecf20Sopenharmony_ci	}
4408c2ecf20Sopenharmony_ci	vdev->ioeventfds_nr = 0;
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci	vdev->virq_disabled = false;
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	for (i = 0; i < vdev->num_regions; i++)
4458c2ecf20Sopenharmony_ci		vdev->region[i].ops->release(vdev, &vdev->region[i]);
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	vdev->num_regions = 0;
4488c2ecf20Sopenharmony_ci	kfree(vdev->region);
4498c2ecf20Sopenharmony_ci	vdev->region = NULL; /* don't krealloc a freed pointer */
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	vfio_config_free(vdev);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
4548c2ecf20Sopenharmony_ci		bar = i + PCI_STD_RESOURCES;
4558c2ecf20Sopenharmony_ci		if (!vdev->barmap[bar])
4568c2ecf20Sopenharmony_ci			continue;
4578c2ecf20Sopenharmony_ci		pci_iounmap(pdev, vdev->barmap[bar]);
4588c2ecf20Sopenharmony_ci		pci_release_selected_regions(pdev, 1 << bar);
4598c2ecf20Sopenharmony_ci		vdev->barmap[bar] = NULL;
4608c2ecf20Sopenharmony_ci	}
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	list_for_each_entry_safe(dummy_res, tmp,
4638c2ecf20Sopenharmony_ci				 &vdev->dummy_resources_list, res_next) {
4648c2ecf20Sopenharmony_ci		list_del(&dummy_res->res_next);
4658c2ecf20Sopenharmony_ci		release_resource(&dummy_res->resource);
4668c2ecf20Sopenharmony_ci		kfree(dummy_res);
4678c2ecf20Sopenharmony_ci	}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	vdev->needs_reset = true;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	/*
4728c2ecf20Sopenharmony_ci	 * If we have saved state, restore it.  If we can reset the device,
4738c2ecf20Sopenharmony_ci	 * even better.  Resetting with current state seems better than
4748c2ecf20Sopenharmony_ci	 * nothing, but saving and restoring current state without reset
4758c2ecf20Sopenharmony_ci	 * is just busy work.
4768c2ecf20Sopenharmony_ci	 */
4778c2ecf20Sopenharmony_ci	if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
4788c2ecf20Sopenharmony_ci		pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci		if (!vdev->reset_works)
4818c2ecf20Sopenharmony_ci			goto out;
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci		pci_save_state(pdev);
4848c2ecf20Sopenharmony_ci	}
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	/*
4878c2ecf20Sopenharmony_ci	 * Disable INTx and MSI, presumably to avoid spurious interrupts
4888c2ecf20Sopenharmony_ci	 * during reset.  Stolen from pci_reset_function()
4898c2ecf20Sopenharmony_ci	 */
4908c2ecf20Sopenharmony_ci	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	/*
4938c2ecf20Sopenharmony_ci	 * Try to get the locks ourselves to prevent a deadlock. The
4948c2ecf20Sopenharmony_ci	 * success of this is dependent on being able to lock the device,
4958c2ecf20Sopenharmony_ci	 * which is not always possible.
4968c2ecf20Sopenharmony_ci	 * We can not use the "try" reset interface here, which will
4978c2ecf20Sopenharmony_ci	 * overwrite the previously restored configuration information.
4988c2ecf20Sopenharmony_ci	 */
4998c2ecf20Sopenharmony_ci	if (vdev->reset_works && pci_cfg_access_trylock(pdev)) {
5008c2ecf20Sopenharmony_ci		if (device_trylock(&pdev->dev)) {
5018c2ecf20Sopenharmony_ci			if (!__pci_reset_function_locked(pdev))
5028c2ecf20Sopenharmony_ci				vdev->needs_reset = false;
5038c2ecf20Sopenharmony_ci			device_unlock(&pdev->dev);
5048c2ecf20Sopenharmony_ci		}
5058c2ecf20Sopenharmony_ci		pci_cfg_access_unlock(pdev);
5068c2ecf20Sopenharmony_ci	}
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	pci_restore_state(pdev);
5098c2ecf20Sopenharmony_ciout:
5108c2ecf20Sopenharmony_ci	pci_disable_device(pdev);
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	vfio_pci_try_bus_reset(vdev);
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	if (!disable_idle_d3)
5158c2ecf20Sopenharmony_ci		vfio_pci_set_power_state(vdev, PCI_D3hot);
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_cistatic struct pci_driver vfio_pci_driver;
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_cistatic struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
5218c2ecf20Sopenharmony_ci					   struct vfio_device **pf_dev)
5228c2ecf20Sopenharmony_ci{
5238c2ecf20Sopenharmony_ci	struct pci_dev *physfn = pci_physfn(vdev->pdev);
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	if (!vdev->pdev->is_virtfn)
5268c2ecf20Sopenharmony_ci		return NULL;
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	*pf_dev = vfio_device_get_from_dev(&physfn->dev);
5298c2ecf20Sopenharmony_ci	if (!*pf_dev)
5308c2ecf20Sopenharmony_ci		return NULL;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	if (pci_dev_driver(physfn) != &vfio_pci_driver) {
5338c2ecf20Sopenharmony_ci		vfio_device_put(*pf_dev);
5348c2ecf20Sopenharmony_ci		return NULL;
5358c2ecf20Sopenharmony_ci	}
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	return vfio_device_data(*pf_dev);
5388c2ecf20Sopenharmony_ci}
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_cistatic void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
5418c2ecf20Sopenharmony_ci{
5428c2ecf20Sopenharmony_ci	struct vfio_device *pf_dev;
5438c2ecf20Sopenharmony_ci	struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	if (!pf_vdev)
5468c2ecf20Sopenharmony_ci		return;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	mutex_lock(&pf_vdev->vf_token->lock);
5498c2ecf20Sopenharmony_ci	pf_vdev->vf_token->users += val;
5508c2ecf20Sopenharmony_ci	WARN_ON(pf_vdev->vf_token->users < 0);
5518c2ecf20Sopenharmony_ci	mutex_unlock(&pf_vdev->vf_token->lock);
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	vfio_device_put(pf_dev);
5548c2ecf20Sopenharmony_ci}
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_cistatic void vfio_pci_release(void *device_data)
5578c2ecf20Sopenharmony_ci{
5588c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	mutex_lock(&vdev->reflck->lock);
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	if (!(--vdev->refcnt)) {
5638c2ecf20Sopenharmony_ci		vfio_pci_vf_token_user_add(vdev, -1);
5648c2ecf20Sopenharmony_ci		vfio_spapr_pci_eeh_release(vdev->pdev);
5658c2ecf20Sopenharmony_ci		vfio_pci_disable(vdev);
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci		mutex_lock(&vdev->igate);
5688c2ecf20Sopenharmony_ci		if (vdev->err_trigger) {
5698c2ecf20Sopenharmony_ci			eventfd_ctx_put(vdev->err_trigger);
5708c2ecf20Sopenharmony_ci			vdev->err_trigger = NULL;
5718c2ecf20Sopenharmony_ci		}
5728c2ecf20Sopenharmony_ci		if (vdev->req_trigger) {
5738c2ecf20Sopenharmony_ci			eventfd_ctx_put(vdev->req_trigger);
5748c2ecf20Sopenharmony_ci			vdev->req_trigger = NULL;
5758c2ecf20Sopenharmony_ci		}
5768c2ecf20Sopenharmony_ci		mutex_unlock(&vdev->igate);
5778c2ecf20Sopenharmony_ci	}
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->reflck->lock);
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci	module_put(THIS_MODULE);
5828c2ecf20Sopenharmony_ci}
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_cistatic int vfio_pci_open(void *device_data)
5858c2ecf20Sopenharmony_ci{
5868c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
5878c2ecf20Sopenharmony_ci	int ret = 0;
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	if (!try_module_get(THIS_MODULE))
5908c2ecf20Sopenharmony_ci		return -ENODEV;
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci	mutex_lock(&vdev->reflck->lock);
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	if (!vdev->refcnt) {
5958c2ecf20Sopenharmony_ci		ret = vfio_pci_enable(vdev);
5968c2ecf20Sopenharmony_ci		if (ret)
5978c2ecf20Sopenharmony_ci			goto error;
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci		vfio_spapr_pci_eeh_open(vdev->pdev);
6008c2ecf20Sopenharmony_ci		vfio_pci_vf_token_user_add(vdev, 1);
6018c2ecf20Sopenharmony_ci	}
6028c2ecf20Sopenharmony_ci	vdev->refcnt++;
6038c2ecf20Sopenharmony_cierror:
6048c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->reflck->lock);
6058c2ecf20Sopenharmony_ci	if (ret)
6068c2ecf20Sopenharmony_ci		module_put(THIS_MODULE);
6078c2ecf20Sopenharmony_ci	return ret;
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_cistatic int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
6118c2ecf20Sopenharmony_ci{
6128c2ecf20Sopenharmony_ci	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
6138c2ecf20Sopenharmony_ci		u8 pin;
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci		if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
6168c2ecf20Sopenharmony_ci		    vdev->nointx || vdev->pdev->is_virtfn)
6178c2ecf20Sopenharmony_ci			return 0;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
6208c2ecf20Sopenharmony_ci
6218c2ecf20Sopenharmony_ci		return pin ? 1 : 0;
6228c2ecf20Sopenharmony_ci	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
6238c2ecf20Sopenharmony_ci		u8 pos;
6248c2ecf20Sopenharmony_ci		u16 flags;
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci		pos = vdev->pdev->msi_cap;
6278c2ecf20Sopenharmony_ci		if (pos) {
6288c2ecf20Sopenharmony_ci			pci_read_config_word(vdev->pdev,
6298c2ecf20Sopenharmony_ci					     pos + PCI_MSI_FLAGS, &flags);
6308c2ecf20Sopenharmony_ci			return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
6318c2ecf20Sopenharmony_ci		}
6328c2ecf20Sopenharmony_ci	} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
6338c2ecf20Sopenharmony_ci		u8 pos;
6348c2ecf20Sopenharmony_ci		u16 flags;
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ci		pos = vdev->pdev->msix_cap;
6378c2ecf20Sopenharmony_ci		if (pos) {
6388c2ecf20Sopenharmony_ci			pci_read_config_word(vdev->pdev,
6398c2ecf20Sopenharmony_ci					     pos + PCI_MSIX_FLAGS, &flags);
6408c2ecf20Sopenharmony_ci
6418c2ecf20Sopenharmony_ci			return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
6428c2ecf20Sopenharmony_ci		}
6438c2ecf20Sopenharmony_ci	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {
6448c2ecf20Sopenharmony_ci		if (pci_is_pcie(vdev->pdev))
6458c2ecf20Sopenharmony_ci			return 1;
6468c2ecf20Sopenharmony_ci	} else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
6478c2ecf20Sopenharmony_ci		return 1;
6488c2ecf20Sopenharmony_ci	}
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	return 0;
6518c2ecf20Sopenharmony_ci}
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_cistatic int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
6548c2ecf20Sopenharmony_ci{
6558c2ecf20Sopenharmony_ci	(*(int *)data)++;
6568c2ecf20Sopenharmony_ci	return 0;
6578c2ecf20Sopenharmony_ci}
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_cistruct vfio_pci_fill_info {
6608c2ecf20Sopenharmony_ci	int max;
6618c2ecf20Sopenharmony_ci	int cur;
6628c2ecf20Sopenharmony_ci	struct vfio_pci_dependent_device *devices;
6638c2ecf20Sopenharmony_ci};
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_cistatic int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
6668c2ecf20Sopenharmony_ci{
6678c2ecf20Sopenharmony_ci	struct vfio_pci_fill_info *fill = data;
6688c2ecf20Sopenharmony_ci	struct iommu_group *iommu_group;
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	if (fill->cur == fill->max)
6718c2ecf20Sopenharmony_ci		return -EAGAIN; /* Something changed, try again */
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	iommu_group = iommu_group_get(&pdev->dev);
6748c2ecf20Sopenharmony_ci	if (!iommu_group)
6758c2ecf20Sopenharmony_ci		return -EPERM; /* Cannot reset non-isolated devices */
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
6788c2ecf20Sopenharmony_ci	fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
6798c2ecf20Sopenharmony_ci	fill->devices[fill->cur].bus = pdev->bus->number;
6808c2ecf20Sopenharmony_ci	fill->devices[fill->cur].devfn = pdev->devfn;
6818c2ecf20Sopenharmony_ci	fill->cur++;
6828c2ecf20Sopenharmony_ci	iommu_group_put(iommu_group);
6838c2ecf20Sopenharmony_ci	return 0;
6848c2ecf20Sopenharmony_ci}
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_cistruct vfio_pci_group_entry {
6878c2ecf20Sopenharmony_ci	struct vfio_group *group;
6888c2ecf20Sopenharmony_ci	int id;
6898c2ecf20Sopenharmony_ci};
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_cistruct vfio_pci_group_info {
6928c2ecf20Sopenharmony_ci	int count;
6938c2ecf20Sopenharmony_ci	struct vfio_pci_group_entry *groups;
6948c2ecf20Sopenharmony_ci};
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_cistatic int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
6978c2ecf20Sopenharmony_ci{
6988c2ecf20Sopenharmony_ci	struct vfio_pci_group_info *info = data;
6998c2ecf20Sopenharmony_ci	struct iommu_group *group;
7008c2ecf20Sopenharmony_ci	int id, i;
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci	group = iommu_group_get(&pdev->dev);
7038c2ecf20Sopenharmony_ci	if (!group)
7048c2ecf20Sopenharmony_ci		return -EPERM;
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci	id = iommu_group_id(group);
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci	for (i = 0; i < info->count; i++)
7098c2ecf20Sopenharmony_ci		if (info->groups[i].id == id)
7108c2ecf20Sopenharmony_ci			break;
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	iommu_group_put(group);
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	return (i == info->count) ? -EINVAL : 0;
7158c2ecf20Sopenharmony_ci}
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_cistatic bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
7188c2ecf20Sopenharmony_ci{
7198c2ecf20Sopenharmony_ci	for (; pdev; pdev = pdev->bus->self)
7208c2ecf20Sopenharmony_ci		if (pdev->bus == slot->bus)
7218c2ecf20Sopenharmony_ci			return (pdev->slot == slot);
7228c2ecf20Sopenharmony_ci	return false;
7238c2ecf20Sopenharmony_ci}
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_cistruct vfio_pci_walk_info {
7268c2ecf20Sopenharmony_ci	int (*fn)(struct pci_dev *, void *data);
7278c2ecf20Sopenharmony_ci	void *data;
7288c2ecf20Sopenharmony_ci	struct pci_dev *pdev;
7298c2ecf20Sopenharmony_ci	bool slot;
7308c2ecf20Sopenharmony_ci	int ret;
7318c2ecf20Sopenharmony_ci};
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_cistatic int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
7348c2ecf20Sopenharmony_ci{
7358c2ecf20Sopenharmony_ci	struct vfio_pci_walk_info *walk = data;
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci	if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
7388c2ecf20Sopenharmony_ci		walk->ret = walk->fn(pdev, walk->data);
7398c2ecf20Sopenharmony_ci
7408c2ecf20Sopenharmony_ci	return walk->ret;
7418c2ecf20Sopenharmony_ci}
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_cistatic int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
7448c2ecf20Sopenharmony_ci					 int (*fn)(struct pci_dev *,
7458c2ecf20Sopenharmony_ci						   void *data), void *data,
7468c2ecf20Sopenharmony_ci					 bool slot)
7478c2ecf20Sopenharmony_ci{
7488c2ecf20Sopenharmony_ci	struct vfio_pci_walk_info walk = {
7498c2ecf20Sopenharmony_ci		.fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
7508c2ecf20Sopenharmony_ci	};
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	return walk.ret;
7558c2ecf20Sopenharmony_ci}
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_cistatic int msix_mmappable_cap(struct vfio_pci_device *vdev,
7588c2ecf20Sopenharmony_ci			      struct vfio_info_cap *caps)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	struct vfio_info_cap_header header = {
7618c2ecf20Sopenharmony_ci		.id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,
7628c2ecf20Sopenharmony_ci		.version = 1
7638c2ecf20Sopenharmony_ci	};
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci	return vfio_info_add_capability(caps, &header, sizeof(header));
7668c2ecf20Sopenharmony_ci}
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ciint vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
7698c2ecf20Sopenharmony_ci				 unsigned int type, unsigned int subtype,
7708c2ecf20Sopenharmony_ci				 const struct vfio_pci_regops *ops,
7718c2ecf20Sopenharmony_ci				 size_t size, u32 flags, void *data)
7728c2ecf20Sopenharmony_ci{
7738c2ecf20Sopenharmony_ci	struct vfio_pci_region *region;
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci	region = krealloc(vdev->region,
7768c2ecf20Sopenharmony_ci			  (vdev->num_regions + 1) * sizeof(*region),
7778c2ecf20Sopenharmony_ci			  GFP_KERNEL);
7788c2ecf20Sopenharmony_ci	if (!region)
7798c2ecf20Sopenharmony_ci		return -ENOMEM;
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci	vdev->region = region;
7828c2ecf20Sopenharmony_ci	vdev->region[vdev->num_regions].type = type;
7838c2ecf20Sopenharmony_ci	vdev->region[vdev->num_regions].subtype = subtype;
7848c2ecf20Sopenharmony_ci	vdev->region[vdev->num_regions].ops = ops;
7858c2ecf20Sopenharmony_ci	vdev->region[vdev->num_regions].size = size;
7868c2ecf20Sopenharmony_ci	vdev->region[vdev->num_regions].flags = flags;
7878c2ecf20Sopenharmony_ci	vdev->region[vdev->num_regions].data = data;
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	vdev->num_regions++;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	return 0;
7928c2ecf20Sopenharmony_ci}
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_cistruct vfio_devices {
7958c2ecf20Sopenharmony_ci	struct vfio_device **devices;
7968c2ecf20Sopenharmony_ci	int cur_index;
7978c2ecf20Sopenharmony_ci	int max_index;
7988c2ecf20Sopenharmony_ci};
7998c2ecf20Sopenharmony_ci
8008c2ecf20Sopenharmony_cistatic long vfio_pci_ioctl(void *device_data,
8018c2ecf20Sopenharmony_ci			   unsigned int cmd, unsigned long arg)
8028c2ecf20Sopenharmony_ci{
8038c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
8048c2ecf20Sopenharmony_ci	unsigned long minsz;
8058c2ecf20Sopenharmony_ci
8068c2ecf20Sopenharmony_ci	if (cmd == VFIO_DEVICE_GET_INFO) {
8078c2ecf20Sopenharmony_ci		struct vfio_device_info info;
8088c2ecf20Sopenharmony_ci		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
8098c2ecf20Sopenharmony_ci		unsigned long capsz;
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_device_info, num_irqs);
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci		/* For backward compatibility, cannot require this */
8148c2ecf20Sopenharmony_ci		capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ci		if (copy_from_user(&info, (void __user *)arg, minsz))
8178c2ecf20Sopenharmony_ci			return -EFAULT;
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ci		if (info.argsz < minsz)
8208c2ecf20Sopenharmony_ci			return -EINVAL;
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci		if (info.argsz >= capsz) {
8238c2ecf20Sopenharmony_ci			minsz = capsz;
8248c2ecf20Sopenharmony_ci			info.cap_offset = 0;
8258c2ecf20Sopenharmony_ci		}
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci		info.flags = VFIO_DEVICE_FLAGS_PCI;
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci		if (vdev->reset_works)
8308c2ecf20Sopenharmony_ci			info.flags |= VFIO_DEVICE_FLAGS_RESET;
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci		info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
8338c2ecf20Sopenharmony_ci		info.num_irqs = VFIO_PCI_NUM_IRQS;
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci		if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
8368c2ecf20Sopenharmony_ci			int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci			if (ret && ret != -ENODEV) {
8398c2ecf20Sopenharmony_ci				pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
8408c2ecf20Sopenharmony_ci				return ret;
8418c2ecf20Sopenharmony_ci			}
8428c2ecf20Sopenharmony_ci		}
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci		if (caps.size) {
8458c2ecf20Sopenharmony_ci			info.flags |= VFIO_DEVICE_FLAGS_CAPS;
8468c2ecf20Sopenharmony_ci			if (info.argsz < sizeof(info) + caps.size) {
8478c2ecf20Sopenharmony_ci				info.argsz = sizeof(info) + caps.size;
8488c2ecf20Sopenharmony_ci			} else {
8498c2ecf20Sopenharmony_ci				vfio_info_cap_shift(&caps, sizeof(info));
8508c2ecf20Sopenharmony_ci				if (copy_to_user((void __user *)arg +
8518c2ecf20Sopenharmony_ci						  sizeof(info), caps.buf,
8528c2ecf20Sopenharmony_ci						  caps.size)) {
8538c2ecf20Sopenharmony_ci					kfree(caps.buf);
8548c2ecf20Sopenharmony_ci					return -EFAULT;
8558c2ecf20Sopenharmony_ci				}
8568c2ecf20Sopenharmony_ci				info.cap_offset = sizeof(info);
8578c2ecf20Sopenharmony_ci			}
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci			kfree(caps.buf);
8608c2ecf20Sopenharmony_ci		}
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci		return copy_to_user((void __user *)arg, &info, minsz) ?
8638c2ecf20Sopenharmony_ci			-EFAULT : 0;
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
8668c2ecf20Sopenharmony_ci		struct pci_dev *pdev = vdev->pdev;
8678c2ecf20Sopenharmony_ci		struct vfio_region_info info;
8688c2ecf20Sopenharmony_ci		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
8698c2ecf20Sopenharmony_ci		int i, ret;
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_region_info, offset);
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci		if (copy_from_user(&info, (void __user *)arg, minsz))
8748c2ecf20Sopenharmony_ci			return -EFAULT;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci		if (info.argsz < minsz)
8778c2ecf20Sopenharmony_ci			return -EINVAL;
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci		switch (info.index) {
8808c2ecf20Sopenharmony_ci		case VFIO_PCI_CONFIG_REGION_INDEX:
8818c2ecf20Sopenharmony_ci			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
8828c2ecf20Sopenharmony_ci			info.size = pdev->cfg_size;
8838c2ecf20Sopenharmony_ci			info.flags = VFIO_REGION_INFO_FLAG_READ |
8848c2ecf20Sopenharmony_ci				     VFIO_REGION_INFO_FLAG_WRITE;
8858c2ecf20Sopenharmony_ci			break;
8868c2ecf20Sopenharmony_ci		case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
8878c2ecf20Sopenharmony_ci			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
8888c2ecf20Sopenharmony_ci			info.size = pci_resource_len(pdev, info.index);
8898c2ecf20Sopenharmony_ci			if (!info.size) {
8908c2ecf20Sopenharmony_ci				info.flags = 0;
8918c2ecf20Sopenharmony_ci				break;
8928c2ecf20Sopenharmony_ci			}
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ci			info.flags = VFIO_REGION_INFO_FLAG_READ |
8958c2ecf20Sopenharmony_ci				     VFIO_REGION_INFO_FLAG_WRITE;
8968c2ecf20Sopenharmony_ci			if (vdev->bar_mmap_supported[info.index]) {
8978c2ecf20Sopenharmony_ci				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
8988c2ecf20Sopenharmony_ci				if (info.index == vdev->msix_bar) {
8998c2ecf20Sopenharmony_ci					ret = msix_mmappable_cap(vdev, &caps);
9008c2ecf20Sopenharmony_ci					if (ret)
9018c2ecf20Sopenharmony_ci						return ret;
9028c2ecf20Sopenharmony_ci				}
9038c2ecf20Sopenharmony_ci			}
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci			break;
9068c2ecf20Sopenharmony_ci		case VFIO_PCI_ROM_REGION_INDEX:
9078c2ecf20Sopenharmony_ci		{
9088c2ecf20Sopenharmony_ci			void __iomem *io;
9098c2ecf20Sopenharmony_ci			size_t size;
9108c2ecf20Sopenharmony_ci			u16 cmd;
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
9138c2ecf20Sopenharmony_ci			info.flags = 0;
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci			/* Report the BAR size, not the ROM size */
9168c2ecf20Sopenharmony_ci			info.size = pci_resource_len(pdev, info.index);
9178c2ecf20Sopenharmony_ci			if (!info.size) {
9188c2ecf20Sopenharmony_ci				/* Shadow ROMs appear as PCI option ROMs */
9198c2ecf20Sopenharmony_ci				if (pdev->resource[PCI_ROM_RESOURCE].flags &
9208c2ecf20Sopenharmony_ci							IORESOURCE_ROM_SHADOW)
9218c2ecf20Sopenharmony_ci					info.size = 0x20000;
9228c2ecf20Sopenharmony_ci				else
9238c2ecf20Sopenharmony_ci					break;
9248c2ecf20Sopenharmony_ci			}
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci			/*
9278c2ecf20Sopenharmony_ci			 * Is it really there?  Enable memory decode for
9288c2ecf20Sopenharmony_ci			 * implicit access in pci_map_rom().
9298c2ecf20Sopenharmony_ci			 */
9308c2ecf20Sopenharmony_ci			cmd = vfio_pci_memory_lock_and_enable(vdev);
9318c2ecf20Sopenharmony_ci			io = pci_map_rom(pdev, &size);
9328c2ecf20Sopenharmony_ci			if (io) {
9338c2ecf20Sopenharmony_ci				info.flags = VFIO_REGION_INFO_FLAG_READ;
9348c2ecf20Sopenharmony_ci				pci_unmap_rom(pdev, io);
9358c2ecf20Sopenharmony_ci			} else {
9368c2ecf20Sopenharmony_ci				info.size = 0;
9378c2ecf20Sopenharmony_ci			}
9388c2ecf20Sopenharmony_ci			vfio_pci_memory_unlock_and_restore(vdev, cmd);
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci			break;
9418c2ecf20Sopenharmony_ci		}
9428c2ecf20Sopenharmony_ci		case VFIO_PCI_VGA_REGION_INDEX:
9438c2ecf20Sopenharmony_ci			if (!vdev->has_vga)
9448c2ecf20Sopenharmony_ci				return -EINVAL;
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_ci			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
9478c2ecf20Sopenharmony_ci			info.size = 0xc0000;
9488c2ecf20Sopenharmony_ci			info.flags = VFIO_REGION_INFO_FLAG_READ |
9498c2ecf20Sopenharmony_ci				     VFIO_REGION_INFO_FLAG_WRITE;
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_ci			break;
9528c2ecf20Sopenharmony_ci		default:
9538c2ecf20Sopenharmony_ci		{
9548c2ecf20Sopenharmony_ci			struct vfio_region_info_cap_type cap_type = {
9558c2ecf20Sopenharmony_ci					.header.id = VFIO_REGION_INFO_CAP_TYPE,
9568c2ecf20Sopenharmony_ci					.header.version = 1 };
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ci			if (info.index >=
9598c2ecf20Sopenharmony_ci			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
9608c2ecf20Sopenharmony_ci				return -EINVAL;
9618c2ecf20Sopenharmony_ci			info.index = array_index_nospec(info.index,
9628c2ecf20Sopenharmony_ci							VFIO_PCI_NUM_REGIONS +
9638c2ecf20Sopenharmony_ci							vdev->num_regions);
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ci			i = info.index - VFIO_PCI_NUM_REGIONS;
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
9688c2ecf20Sopenharmony_ci			info.size = vdev->region[i].size;
9698c2ecf20Sopenharmony_ci			info.flags = vdev->region[i].flags;
9708c2ecf20Sopenharmony_ci
9718c2ecf20Sopenharmony_ci			cap_type.type = vdev->region[i].type;
9728c2ecf20Sopenharmony_ci			cap_type.subtype = vdev->region[i].subtype;
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci			ret = vfio_info_add_capability(&caps, &cap_type.header,
9758c2ecf20Sopenharmony_ci						       sizeof(cap_type));
9768c2ecf20Sopenharmony_ci			if (ret)
9778c2ecf20Sopenharmony_ci				return ret;
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_ci			if (vdev->region[i].ops->add_capability) {
9808c2ecf20Sopenharmony_ci				ret = vdev->region[i].ops->add_capability(vdev,
9818c2ecf20Sopenharmony_ci						&vdev->region[i], &caps);
9828c2ecf20Sopenharmony_ci				if (ret)
9838c2ecf20Sopenharmony_ci					return ret;
9848c2ecf20Sopenharmony_ci			}
9858c2ecf20Sopenharmony_ci		}
9868c2ecf20Sopenharmony_ci		}
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_ci		if (caps.size) {
9898c2ecf20Sopenharmony_ci			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
9908c2ecf20Sopenharmony_ci			if (info.argsz < sizeof(info) + caps.size) {
9918c2ecf20Sopenharmony_ci				info.argsz = sizeof(info) + caps.size;
9928c2ecf20Sopenharmony_ci				info.cap_offset = 0;
9938c2ecf20Sopenharmony_ci			} else {
9948c2ecf20Sopenharmony_ci				vfio_info_cap_shift(&caps, sizeof(info));
9958c2ecf20Sopenharmony_ci				if (copy_to_user((void __user *)arg +
9968c2ecf20Sopenharmony_ci						  sizeof(info), caps.buf,
9978c2ecf20Sopenharmony_ci						  caps.size)) {
9988c2ecf20Sopenharmony_ci					kfree(caps.buf);
9998c2ecf20Sopenharmony_ci					return -EFAULT;
10008c2ecf20Sopenharmony_ci				}
10018c2ecf20Sopenharmony_ci				info.cap_offset = sizeof(info);
10028c2ecf20Sopenharmony_ci			}
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_ci			kfree(caps.buf);
10058c2ecf20Sopenharmony_ci		}
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_ci		return copy_to_user((void __user *)arg, &info, minsz) ?
10088c2ecf20Sopenharmony_ci			-EFAULT : 0;
10098c2ecf20Sopenharmony_ci
10108c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
10118c2ecf20Sopenharmony_ci		struct vfio_irq_info info;
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_irq_info, count);
10148c2ecf20Sopenharmony_ci
10158c2ecf20Sopenharmony_ci		if (copy_from_user(&info, (void __user *)arg, minsz))
10168c2ecf20Sopenharmony_ci			return -EFAULT;
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci		if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
10198c2ecf20Sopenharmony_ci			return -EINVAL;
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci		switch (info.index) {
10228c2ecf20Sopenharmony_ci		case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
10238c2ecf20Sopenharmony_ci		case VFIO_PCI_REQ_IRQ_INDEX:
10248c2ecf20Sopenharmony_ci			break;
10258c2ecf20Sopenharmony_ci		case VFIO_PCI_ERR_IRQ_INDEX:
10268c2ecf20Sopenharmony_ci			if (pci_is_pcie(vdev->pdev))
10278c2ecf20Sopenharmony_ci				break;
10288c2ecf20Sopenharmony_ci			fallthrough;
10298c2ecf20Sopenharmony_ci		default:
10308c2ecf20Sopenharmony_ci			return -EINVAL;
10318c2ecf20Sopenharmony_ci		}
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_ci		info.flags = VFIO_IRQ_INFO_EVENTFD;
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_ci		info.count = vfio_pci_get_irq_count(vdev, info.index);
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci		if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
10388c2ecf20Sopenharmony_ci			info.flags |= (VFIO_IRQ_INFO_MASKABLE |
10398c2ecf20Sopenharmony_ci				       VFIO_IRQ_INFO_AUTOMASKED);
10408c2ecf20Sopenharmony_ci		else
10418c2ecf20Sopenharmony_ci			info.flags |= VFIO_IRQ_INFO_NORESIZE;
10428c2ecf20Sopenharmony_ci
10438c2ecf20Sopenharmony_ci		return copy_to_user((void __user *)arg, &info, minsz) ?
10448c2ecf20Sopenharmony_ci			-EFAULT : 0;
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
10478c2ecf20Sopenharmony_ci		struct vfio_irq_set hdr;
10488c2ecf20Sopenharmony_ci		u8 *data = NULL;
10498c2ecf20Sopenharmony_ci		int max, ret = 0;
10508c2ecf20Sopenharmony_ci		size_t data_size = 0;
10518c2ecf20Sopenharmony_ci
10528c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_irq_set, count);
10538c2ecf20Sopenharmony_ci
10548c2ecf20Sopenharmony_ci		if (copy_from_user(&hdr, (void __user *)arg, minsz))
10558c2ecf20Sopenharmony_ci			return -EFAULT;
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_ci		max = vfio_pci_get_irq_count(vdev, hdr.index);
10588c2ecf20Sopenharmony_ci
10598c2ecf20Sopenharmony_ci		ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
10608c2ecf20Sopenharmony_ci						 VFIO_PCI_NUM_IRQS, &data_size);
10618c2ecf20Sopenharmony_ci		if (ret)
10628c2ecf20Sopenharmony_ci			return ret;
10638c2ecf20Sopenharmony_ci
10648c2ecf20Sopenharmony_ci		if (data_size) {
10658c2ecf20Sopenharmony_ci			data = memdup_user((void __user *)(arg + minsz),
10668c2ecf20Sopenharmony_ci					    data_size);
10678c2ecf20Sopenharmony_ci			if (IS_ERR(data))
10688c2ecf20Sopenharmony_ci				return PTR_ERR(data);
10698c2ecf20Sopenharmony_ci		}
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci		mutex_lock(&vdev->igate);
10728c2ecf20Sopenharmony_ci
10738c2ecf20Sopenharmony_ci		ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
10748c2ecf20Sopenharmony_ci					      hdr.start, hdr.count, data);
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci		mutex_unlock(&vdev->igate);
10778c2ecf20Sopenharmony_ci		kfree(data);
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_ci		return ret;
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_RESET) {
10828c2ecf20Sopenharmony_ci		int ret;
10838c2ecf20Sopenharmony_ci
10848c2ecf20Sopenharmony_ci		if (!vdev->reset_works)
10858c2ecf20Sopenharmony_ci			return -EINVAL;
10868c2ecf20Sopenharmony_ci
10878c2ecf20Sopenharmony_ci		vfio_pci_zap_and_down_write_memory_lock(vdev);
10888c2ecf20Sopenharmony_ci		ret = pci_try_reset_function(vdev->pdev);
10898c2ecf20Sopenharmony_ci		up_write(&vdev->memory_lock);
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci		return ret;
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
10948c2ecf20Sopenharmony_ci		struct vfio_pci_hot_reset_info hdr;
10958c2ecf20Sopenharmony_ci		struct vfio_pci_fill_info fill = { 0 };
10968c2ecf20Sopenharmony_ci		struct vfio_pci_dependent_device *devices = NULL;
10978c2ecf20Sopenharmony_ci		bool slot = false;
10988c2ecf20Sopenharmony_ci		int ret = 0;
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
11018c2ecf20Sopenharmony_ci
11028c2ecf20Sopenharmony_ci		if (copy_from_user(&hdr, (void __user *)arg, minsz))
11038c2ecf20Sopenharmony_ci			return -EFAULT;
11048c2ecf20Sopenharmony_ci
11058c2ecf20Sopenharmony_ci		if (hdr.argsz < minsz)
11068c2ecf20Sopenharmony_ci			return -EINVAL;
11078c2ecf20Sopenharmony_ci
11088c2ecf20Sopenharmony_ci		hdr.flags = 0;
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_ci		/* Can we do a slot or bus reset or neither? */
11118c2ecf20Sopenharmony_ci		if (!pci_probe_reset_slot(vdev->pdev->slot))
11128c2ecf20Sopenharmony_ci			slot = true;
11138c2ecf20Sopenharmony_ci		else if (pci_probe_reset_bus(vdev->pdev->bus))
11148c2ecf20Sopenharmony_ci			return -ENODEV;
11158c2ecf20Sopenharmony_ci
11168c2ecf20Sopenharmony_ci		/* How many devices are affected? */
11178c2ecf20Sopenharmony_ci		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
11188c2ecf20Sopenharmony_ci						    vfio_pci_count_devs,
11198c2ecf20Sopenharmony_ci						    &fill.max, slot);
11208c2ecf20Sopenharmony_ci		if (ret)
11218c2ecf20Sopenharmony_ci			return ret;
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ci		WARN_ON(!fill.max); /* Should always be at least one */
11248c2ecf20Sopenharmony_ci
11258c2ecf20Sopenharmony_ci		/*
11268c2ecf20Sopenharmony_ci		 * If there's enough space, fill it now, otherwise return
11278c2ecf20Sopenharmony_ci		 * -ENOSPC and the number of devices affected.
11288c2ecf20Sopenharmony_ci		 */
11298c2ecf20Sopenharmony_ci		if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
11308c2ecf20Sopenharmony_ci			ret = -ENOSPC;
11318c2ecf20Sopenharmony_ci			hdr.count = fill.max;
11328c2ecf20Sopenharmony_ci			goto reset_info_exit;
11338c2ecf20Sopenharmony_ci		}
11348c2ecf20Sopenharmony_ci
11358c2ecf20Sopenharmony_ci		devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
11368c2ecf20Sopenharmony_ci		if (!devices)
11378c2ecf20Sopenharmony_ci			return -ENOMEM;
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci		fill.devices = devices;
11408c2ecf20Sopenharmony_ci
11418c2ecf20Sopenharmony_ci		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
11428c2ecf20Sopenharmony_ci						    vfio_pci_fill_devs,
11438c2ecf20Sopenharmony_ci						    &fill, slot);
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci		/*
11468c2ecf20Sopenharmony_ci		 * If a device was removed between counting and filling,
11478c2ecf20Sopenharmony_ci		 * we may come up short of fill.max.  If a device was
11488c2ecf20Sopenharmony_ci		 * added, we'll have a return of -EAGAIN above.
11498c2ecf20Sopenharmony_ci		 */
11508c2ecf20Sopenharmony_ci		if (!ret)
11518c2ecf20Sopenharmony_ci			hdr.count = fill.cur;
11528c2ecf20Sopenharmony_ci
11538c2ecf20Sopenharmony_cireset_info_exit:
11548c2ecf20Sopenharmony_ci		if (copy_to_user((void __user *)arg, &hdr, minsz))
11558c2ecf20Sopenharmony_ci			ret = -EFAULT;
11568c2ecf20Sopenharmony_ci
11578c2ecf20Sopenharmony_ci		if (!ret) {
11588c2ecf20Sopenharmony_ci			if (copy_to_user((void __user *)(arg + minsz), devices,
11598c2ecf20Sopenharmony_ci					 hdr.count * sizeof(*devices)))
11608c2ecf20Sopenharmony_ci				ret = -EFAULT;
11618c2ecf20Sopenharmony_ci		}
11628c2ecf20Sopenharmony_ci
11638c2ecf20Sopenharmony_ci		kfree(devices);
11648c2ecf20Sopenharmony_ci		return ret;
11658c2ecf20Sopenharmony_ci
11668c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
11678c2ecf20Sopenharmony_ci		struct vfio_pci_hot_reset hdr;
11688c2ecf20Sopenharmony_ci		int32_t *group_fds;
11698c2ecf20Sopenharmony_ci		struct vfio_pci_group_entry *groups;
11708c2ecf20Sopenharmony_ci		struct vfio_pci_group_info info;
11718c2ecf20Sopenharmony_ci		struct vfio_devices devs = { .cur_index = 0 };
11728c2ecf20Sopenharmony_ci		bool slot = false;
11738c2ecf20Sopenharmony_ci		int i, group_idx, mem_idx = 0, count = 0, ret = 0;
11748c2ecf20Sopenharmony_ci
11758c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_pci_hot_reset, count);
11768c2ecf20Sopenharmony_ci
11778c2ecf20Sopenharmony_ci		if (copy_from_user(&hdr, (void __user *)arg, minsz))
11788c2ecf20Sopenharmony_ci			return -EFAULT;
11798c2ecf20Sopenharmony_ci
11808c2ecf20Sopenharmony_ci		if (hdr.argsz < minsz || hdr.flags)
11818c2ecf20Sopenharmony_ci			return -EINVAL;
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci		/* Can we do a slot or bus reset or neither? */
11848c2ecf20Sopenharmony_ci		if (!pci_probe_reset_slot(vdev->pdev->slot))
11858c2ecf20Sopenharmony_ci			slot = true;
11868c2ecf20Sopenharmony_ci		else if (pci_probe_reset_bus(vdev->pdev->bus))
11878c2ecf20Sopenharmony_ci			return -ENODEV;
11888c2ecf20Sopenharmony_ci
11898c2ecf20Sopenharmony_ci		/*
11908c2ecf20Sopenharmony_ci		 * We can't let userspace give us an arbitrarily large
11918c2ecf20Sopenharmony_ci		 * buffer to copy, so verify how many we think there
11928c2ecf20Sopenharmony_ci		 * could be.  Note groups can have multiple devices so
11938c2ecf20Sopenharmony_ci		 * one group per device is the max.
11948c2ecf20Sopenharmony_ci		 */
11958c2ecf20Sopenharmony_ci		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
11968c2ecf20Sopenharmony_ci						    vfio_pci_count_devs,
11978c2ecf20Sopenharmony_ci						    &count, slot);
11988c2ecf20Sopenharmony_ci		if (ret)
11998c2ecf20Sopenharmony_ci			return ret;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci		/* Somewhere between 1 and count is OK */
12028c2ecf20Sopenharmony_ci		if (!hdr.count || hdr.count > count)
12038c2ecf20Sopenharmony_ci			return -EINVAL;
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci		group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
12068c2ecf20Sopenharmony_ci		groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
12078c2ecf20Sopenharmony_ci		if (!group_fds || !groups) {
12088c2ecf20Sopenharmony_ci			kfree(group_fds);
12098c2ecf20Sopenharmony_ci			kfree(groups);
12108c2ecf20Sopenharmony_ci			return -ENOMEM;
12118c2ecf20Sopenharmony_ci		}
12128c2ecf20Sopenharmony_ci
12138c2ecf20Sopenharmony_ci		if (copy_from_user(group_fds, (void __user *)(arg + minsz),
12148c2ecf20Sopenharmony_ci				   hdr.count * sizeof(*group_fds))) {
12158c2ecf20Sopenharmony_ci			kfree(group_fds);
12168c2ecf20Sopenharmony_ci			kfree(groups);
12178c2ecf20Sopenharmony_ci			return -EFAULT;
12188c2ecf20Sopenharmony_ci		}
12198c2ecf20Sopenharmony_ci
12208c2ecf20Sopenharmony_ci		/*
12218c2ecf20Sopenharmony_ci		 * For each group_fd, get the group through the vfio external
12228c2ecf20Sopenharmony_ci		 * user interface and store the group and iommu ID.  This
12238c2ecf20Sopenharmony_ci		 * ensures the group is held across the reset.
12248c2ecf20Sopenharmony_ci		 */
12258c2ecf20Sopenharmony_ci		for (group_idx = 0; group_idx < hdr.count; group_idx++) {
12268c2ecf20Sopenharmony_ci			struct vfio_group *group;
12278c2ecf20Sopenharmony_ci			struct fd f = fdget(group_fds[group_idx]);
12288c2ecf20Sopenharmony_ci			if (!f.file) {
12298c2ecf20Sopenharmony_ci				ret = -EBADF;
12308c2ecf20Sopenharmony_ci				break;
12318c2ecf20Sopenharmony_ci			}
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_ci			group = vfio_group_get_external_user(f.file);
12348c2ecf20Sopenharmony_ci			fdput(f);
12358c2ecf20Sopenharmony_ci			if (IS_ERR(group)) {
12368c2ecf20Sopenharmony_ci				ret = PTR_ERR(group);
12378c2ecf20Sopenharmony_ci				break;
12388c2ecf20Sopenharmony_ci			}
12398c2ecf20Sopenharmony_ci
12408c2ecf20Sopenharmony_ci			groups[group_idx].group = group;
12418c2ecf20Sopenharmony_ci			groups[group_idx].id =
12428c2ecf20Sopenharmony_ci					vfio_external_user_iommu_id(group);
12438c2ecf20Sopenharmony_ci		}
12448c2ecf20Sopenharmony_ci
12458c2ecf20Sopenharmony_ci		kfree(group_fds);
12468c2ecf20Sopenharmony_ci
12478c2ecf20Sopenharmony_ci		/* release reference to groups on error */
12488c2ecf20Sopenharmony_ci		if (ret)
12498c2ecf20Sopenharmony_ci			goto hot_reset_release;
12508c2ecf20Sopenharmony_ci
12518c2ecf20Sopenharmony_ci		info.count = hdr.count;
12528c2ecf20Sopenharmony_ci		info.groups = groups;
12538c2ecf20Sopenharmony_ci
12548c2ecf20Sopenharmony_ci		/*
12558c2ecf20Sopenharmony_ci		 * Test whether all the affected devices are contained
12568c2ecf20Sopenharmony_ci		 * by the set of groups provided by the user.
12578c2ecf20Sopenharmony_ci		 */
12588c2ecf20Sopenharmony_ci		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
12598c2ecf20Sopenharmony_ci						    vfio_pci_validate_devs,
12608c2ecf20Sopenharmony_ci						    &info, slot);
12618c2ecf20Sopenharmony_ci		if (ret)
12628c2ecf20Sopenharmony_ci			goto hot_reset_release;
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci		devs.max_index = count;
12658c2ecf20Sopenharmony_ci		devs.devices = kcalloc(count, sizeof(struct vfio_device *),
12668c2ecf20Sopenharmony_ci				       GFP_KERNEL);
12678c2ecf20Sopenharmony_ci		if (!devs.devices) {
12688c2ecf20Sopenharmony_ci			ret = -ENOMEM;
12698c2ecf20Sopenharmony_ci			goto hot_reset_release;
12708c2ecf20Sopenharmony_ci		}
12718c2ecf20Sopenharmony_ci
12728c2ecf20Sopenharmony_ci		/*
12738c2ecf20Sopenharmony_ci		 * We need to get memory_lock for each device, but devices
12748c2ecf20Sopenharmony_ci		 * can share mmap_lock, therefore we need to zap and hold
12758c2ecf20Sopenharmony_ci		 * the vma_lock for each device, and only then get each
12768c2ecf20Sopenharmony_ci		 * memory_lock.
12778c2ecf20Sopenharmony_ci		 */
12788c2ecf20Sopenharmony_ci		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
12798c2ecf20Sopenharmony_ci					    vfio_pci_try_zap_and_vma_lock_cb,
12808c2ecf20Sopenharmony_ci					    &devs, slot);
12818c2ecf20Sopenharmony_ci		if (ret)
12828c2ecf20Sopenharmony_ci			goto hot_reset_release;
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci		for (; mem_idx < devs.cur_index; mem_idx++) {
12858c2ecf20Sopenharmony_ci			struct vfio_pci_device *tmp;
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_ci			tmp = vfio_device_data(devs.devices[mem_idx]);
12888c2ecf20Sopenharmony_ci
12898c2ecf20Sopenharmony_ci			ret = down_write_trylock(&tmp->memory_lock);
12908c2ecf20Sopenharmony_ci			if (!ret) {
12918c2ecf20Sopenharmony_ci				ret = -EBUSY;
12928c2ecf20Sopenharmony_ci				goto hot_reset_release;
12938c2ecf20Sopenharmony_ci			}
12948c2ecf20Sopenharmony_ci			mutex_unlock(&tmp->vma_lock);
12958c2ecf20Sopenharmony_ci		}
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci		/* User has access, do the reset */
12988c2ecf20Sopenharmony_ci		ret = pci_reset_bus(vdev->pdev);
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_cihot_reset_release:
13018c2ecf20Sopenharmony_ci		for (i = 0; i < devs.cur_index; i++) {
13028c2ecf20Sopenharmony_ci			struct vfio_device *device;
13038c2ecf20Sopenharmony_ci			struct vfio_pci_device *tmp;
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci			device = devs.devices[i];
13068c2ecf20Sopenharmony_ci			tmp = vfio_device_data(device);
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci			if (i < mem_idx)
13098c2ecf20Sopenharmony_ci				up_write(&tmp->memory_lock);
13108c2ecf20Sopenharmony_ci			else
13118c2ecf20Sopenharmony_ci				mutex_unlock(&tmp->vma_lock);
13128c2ecf20Sopenharmony_ci			vfio_device_put(device);
13138c2ecf20Sopenharmony_ci		}
13148c2ecf20Sopenharmony_ci		kfree(devs.devices);
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci		for (group_idx--; group_idx >= 0; group_idx--)
13178c2ecf20Sopenharmony_ci			vfio_group_put_external_user(groups[group_idx].group);
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_ci		kfree(groups);
13208c2ecf20Sopenharmony_ci		return ret;
13218c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_IOEVENTFD) {
13228c2ecf20Sopenharmony_ci		struct vfio_device_ioeventfd ioeventfd;
13238c2ecf20Sopenharmony_ci		int count;
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_device_ioeventfd, fd);
13268c2ecf20Sopenharmony_ci
13278c2ecf20Sopenharmony_ci		if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))
13288c2ecf20Sopenharmony_ci			return -EFAULT;
13298c2ecf20Sopenharmony_ci
13308c2ecf20Sopenharmony_ci		if (ioeventfd.argsz < minsz)
13318c2ecf20Sopenharmony_ci			return -EINVAL;
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci		if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
13348c2ecf20Sopenharmony_ci			return -EINVAL;
13358c2ecf20Sopenharmony_ci
13368c2ecf20Sopenharmony_ci		count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci		if (hweight8(count) != 1 || ioeventfd.fd < -1)
13398c2ecf20Sopenharmony_ci			return -EINVAL;
13408c2ecf20Sopenharmony_ci
13418c2ecf20Sopenharmony_ci		return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
13428c2ecf20Sopenharmony_ci					  ioeventfd.data, count, ioeventfd.fd);
13438c2ecf20Sopenharmony_ci	} else if (cmd == VFIO_DEVICE_FEATURE) {
13448c2ecf20Sopenharmony_ci		struct vfio_device_feature feature;
13458c2ecf20Sopenharmony_ci		uuid_t uuid;
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci		minsz = offsetofend(struct vfio_device_feature, flags);
13488c2ecf20Sopenharmony_ci
13498c2ecf20Sopenharmony_ci		if (copy_from_user(&feature, (void __user *)arg, minsz))
13508c2ecf20Sopenharmony_ci			return -EFAULT;
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci		if (feature.argsz < minsz)
13538c2ecf20Sopenharmony_ci			return -EINVAL;
13548c2ecf20Sopenharmony_ci
13558c2ecf20Sopenharmony_ci		/* Check unknown flags */
13568c2ecf20Sopenharmony_ci		if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
13578c2ecf20Sopenharmony_ci				      VFIO_DEVICE_FEATURE_SET |
13588c2ecf20Sopenharmony_ci				      VFIO_DEVICE_FEATURE_GET |
13598c2ecf20Sopenharmony_ci				      VFIO_DEVICE_FEATURE_PROBE))
13608c2ecf20Sopenharmony_ci			return -EINVAL;
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci		/* GET & SET are mutually exclusive except with PROBE */
13638c2ecf20Sopenharmony_ci		if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
13648c2ecf20Sopenharmony_ci		    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
13658c2ecf20Sopenharmony_ci		    (feature.flags & VFIO_DEVICE_FEATURE_GET))
13668c2ecf20Sopenharmony_ci			return -EINVAL;
13678c2ecf20Sopenharmony_ci
13688c2ecf20Sopenharmony_ci		switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
13698c2ecf20Sopenharmony_ci		case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
13708c2ecf20Sopenharmony_ci			if (!vdev->vf_token)
13718c2ecf20Sopenharmony_ci				return -ENOTTY;
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_ci			/*
13748c2ecf20Sopenharmony_ci			 * We do not support GET of the VF Token UUID as this
13758c2ecf20Sopenharmony_ci			 * could expose the token of the previous device user.
13768c2ecf20Sopenharmony_ci			 */
13778c2ecf20Sopenharmony_ci			if (feature.flags & VFIO_DEVICE_FEATURE_GET)
13788c2ecf20Sopenharmony_ci				return -EINVAL;
13798c2ecf20Sopenharmony_ci
13808c2ecf20Sopenharmony_ci			if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
13818c2ecf20Sopenharmony_ci				return 0;
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_ci			/* Don't SET unless told to do so */
13848c2ecf20Sopenharmony_ci			if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
13858c2ecf20Sopenharmony_ci				return -EINVAL;
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci			if (feature.argsz < minsz + sizeof(uuid))
13888c2ecf20Sopenharmony_ci				return -EINVAL;
13898c2ecf20Sopenharmony_ci
13908c2ecf20Sopenharmony_ci			if (copy_from_user(&uuid, (void __user *)(arg + minsz),
13918c2ecf20Sopenharmony_ci					   sizeof(uuid)))
13928c2ecf20Sopenharmony_ci				return -EFAULT;
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci			mutex_lock(&vdev->vf_token->lock);
13958c2ecf20Sopenharmony_ci			uuid_copy(&vdev->vf_token->uuid, &uuid);
13968c2ecf20Sopenharmony_ci			mutex_unlock(&vdev->vf_token->lock);
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_ci			return 0;
13998c2ecf20Sopenharmony_ci		default:
14008c2ecf20Sopenharmony_ci			return -ENOTTY;
14018c2ecf20Sopenharmony_ci		}
14028c2ecf20Sopenharmony_ci	}
14038c2ecf20Sopenharmony_ci
14048c2ecf20Sopenharmony_ci	return -ENOTTY;
14058c2ecf20Sopenharmony_ci}
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_cistatic ssize_t vfio_pci_rw(void *device_data, char __user *buf,
14088c2ecf20Sopenharmony_ci			   size_t count, loff_t *ppos, bool iswrite)
14098c2ecf20Sopenharmony_ci{
14108c2ecf20Sopenharmony_ci	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
14118c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
14128c2ecf20Sopenharmony_ci
14138c2ecf20Sopenharmony_ci	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
14148c2ecf20Sopenharmony_ci		return -EINVAL;
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_ci	switch (index) {
14178c2ecf20Sopenharmony_ci	case VFIO_PCI_CONFIG_REGION_INDEX:
14188c2ecf20Sopenharmony_ci		return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci	case VFIO_PCI_ROM_REGION_INDEX:
14218c2ecf20Sopenharmony_ci		if (iswrite)
14228c2ecf20Sopenharmony_ci			return -EINVAL;
14238c2ecf20Sopenharmony_ci		return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
14268c2ecf20Sopenharmony_ci		return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
14278c2ecf20Sopenharmony_ci
14288c2ecf20Sopenharmony_ci	case VFIO_PCI_VGA_REGION_INDEX:
14298c2ecf20Sopenharmony_ci		return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
14308c2ecf20Sopenharmony_ci	default:
14318c2ecf20Sopenharmony_ci		index -= VFIO_PCI_NUM_REGIONS;
14328c2ecf20Sopenharmony_ci		return vdev->region[index].ops->rw(vdev, buf,
14338c2ecf20Sopenharmony_ci						   count, ppos, iswrite);
14348c2ecf20Sopenharmony_ci	}
14358c2ecf20Sopenharmony_ci
14368c2ecf20Sopenharmony_ci	return -EINVAL;
14378c2ecf20Sopenharmony_ci}
14388c2ecf20Sopenharmony_ci
14398c2ecf20Sopenharmony_cistatic ssize_t vfio_pci_read(void *device_data, char __user *buf,
14408c2ecf20Sopenharmony_ci			     size_t count, loff_t *ppos)
14418c2ecf20Sopenharmony_ci{
14428c2ecf20Sopenharmony_ci	if (!count)
14438c2ecf20Sopenharmony_ci		return 0;
14448c2ecf20Sopenharmony_ci
14458c2ecf20Sopenharmony_ci	return vfio_pci_rw(device_data, buf, count, ppos, false);
14468c2ecf20Sopenharmony_ci}
14478c2ecf20Sopenharmony_ci
14488c2ecf20Sopenharmony_cistatic ssize_t vfio_pci_write(void *device_data, const char __user *buf,
14498c2ecf20Sopenharmony_ci			      size_t count, loff_t *ppos)
14508c2ecf20Sopenharmony_ci{
14518c2ecf20Sopenharmony_ci	if (!count)
14528c2ecf20Sopenharmony_ci		return 0;
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_ci	return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
14558c2ecf20Sopenharmony_ci}
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_ci/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
14588c2ecf20Sopenharmony_cistatic int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
14598c2ecf20Sopenharmony_ci{
14608c2ecf20Sopenharmony_ci	struct vfio_pci_mmap_vma *mmap_vma, *tmp;
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_ci	/*
14638c2ecf20Sopenharmony_ci	 * Lock ordering:
14648c2ecf20Sopenharmony_ci	 * vma_lock is nested under mmap_lock for vm_ops callback paths.
14658c2ecf20Sopenharmony_ci	 * The memory_lock semaphore is used by both code paths calling
14668c2ecf20Sopenharmony_ci	 * into this function to zap vmas and the vm_ops.fault callback
14678c2ecf20Sopenharmony_ci	 * to protect the memory enable state of the device.
14688c2ecf20Sopenharmony_ci	 *
14698c2ecf20Sopenharmony_ci	 * When zapping vmas we need to maintain the mmap_lock => vma_lock
14708c2ecf20Sopenharmony_ci	 * ordering, which requires using vma_lock to walk vma_list to
14718c2ecf20Sopenharmony_ci	 * acquire an mm, then dropping vma_lock to get the mmap_lock and
14728c2ecf20Sopenharmony_ci	 * reacquiring vma_lock.  This logic is derived from similar
14738c2ecf20Sopenharmony_ci	 * requirements in uverbs_user_mmap_disassociate().
14748c2ecf20Sopenharmony_ci	 *
14758c2ecf20Sopenharmony_ci	 * mmap_lock must always be the top-level lock when it is taken.
14768c2ecf20Sopenharmony_ci	 * Therefore we can only hold the memory_lock write lock when
14778c2ecf20Sopenharmony_ci	 * vma_list is empty, as we'd need to take mmap_lock to clear
14788c2ecf20Sopenharmony_ci	 * entries.  vma_list can only be guaranteed empty when holding
14798c2ecf20Sopenharmony_ci	 * vma_lock, thus memory_lock is nested under vma_lock.
14808c2ecf20Sopenharmony_ci	 *
14818c2ecf20Sopenharmony_ci	 * This enables the vm_ops.fault callback to acquire vma_lock,
14828c2ecf20Sopenharmony_ci	 * followed by memory_lock read lock, while already holding
14838c2ecf20Sopenharmony_ci	 * mmap_lock without risk of deadlock.
14848c2ecf20Sopenharmony_ci	 */
14858c2ecf20Sopenharmony_ci	while (1) {
14868c2ecf20Sopenharmony_ci		struct mm_struct *mm = NULL;
14878c2ecf20Sopenharmony_ci
14888c2ecf20Sopenharmony_ci		if (try) {
14898c2ecf20Sopenharmony_ci			if (!mutex_trylock(&vdev->vma_lock))
14908c2ecf20Sopenharmony_ci				return 0;
14918c2ecf20Sopenharmony_ci		} else {
14928c2ecf20Sopenharmony_ci			mutex_lock(&vdev->vma_lock);
14938c2ecf20Sopenharmony_ci		}
14948c2ecf20Sopenharmony_ci		while (!list_empty(&vdev->vma_list)) {
14958c2ecf20Sopenharmony_ci			mmap_vma = list_first_entry(&vdev->vma_list,
14968c2ecf20Sopenharmony_ci						    struct vfio_pci_mmap_vma,
14978c2ecf20Sopenharmony_ci						    vma_next);
14988c2ecf20Sopenharmony_ci			mm = mmap_vma->vma->vm_mm;
14998c2ecf20Sopenharmony_ci			if (mmget_not_zero(mm))
15008c2ecf20Sopenharmony_ci				break;
15018c2ecf20Sopenharmony_ci
15028c2ecf20Sopenharmony_ci			list_del(&mmap_vma->vma_next);
15038c2ecf20Sopenharmony_ci			kfree(mmap_vma);
15048c2ecf20Sopenharmony_ci			mm = NULL;
15058c2ecf20Sopenharmony_ci		}
15068c2ecf20Sopenharmony_ci		if (!mm)
15078c2ecf20Sopenharmony_ci			return 1;
15088c2ecf20Sopenharmony_ci		mutex_unlock(&vdev->vma_lock);
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci		if (try) {
15118c2ecf20Sopenharmony_ci			if (!mmap_read_trylock(mm)) {
15128c2ecf20Sopenharmony_ci				mmput(mm);
15138c2ecf20Sopenharmony_ci				return 0;
15148c2ecf20Sopenharmony_ci			}
15158c2ecf20Sopenharmony_ci		} else {
15168c2ecf20Sopenharmony_ci			mmap_read_lock(mm);
15178c2ecf20Sopenharmony_ci		}
15188c2ecf20Sopenharmony_ci		if (try) {
15198c2ecf20Sopenharmony_ci			if (!mutex_trylock(&vdev->vma_lock)) {
15208c2ecf20Sopenharmony_ci				mmap_read_unlock(mm);
15218c2ecf20Sopenharmony_ci				mmput(mm);
15228c2ecf20Sopenharmony_ci				return 0;
15238c2ecf20Sopenharmony_ci			}
15248c2ecf20Sopenharmony_ci		} else {
15258c2ecf20Sopenharmony_ci			mutex_lock(&vdev->vma_lock);
15268c2ecf20Sopenharmony_ci		}
15278c2ecf20Sopenharmony_ci		list_for_each_entry_safe(mmap_vma, tmp,
15288c2ecf20Sopenharmony_ci					 &vdev->vma_list, vma_next) {
15298c2ecf20Sopenharmony_ci			struct vm_area_struct *vma = mmap_vma->vma;
15308c2ecf20Sopenharmony_ci
15318c2ecf20Sopenharmony_ci			if (vma->vm_mm != mm)
15328c2ecf20Sopenharmony_ci				continue;
15338c2ecf20Sopenharmony_ci
15348c2ecf20Sopenharmony_ci			list_del(&mmap_vma->vma_next);
15358c2ecf20Sopenharmony_ci			kfree(mmap_vma);
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_ci			zap_vma_ptes(vma, vma->vm_start,
15388c2ecf20Sopenharmony_ci				     vma->vm_end - vma->vm_start);
15398c2ecf20Sopenharmony_ci		}
15408c2ecf20Sopenharmony_ci		mutex_unlock(&vdev->vma_lock);
15418c2ecf20Sopenharmony_ci		mmap_read_unlock(mm);
15428c2ecf20Sopenharmony_ci		mmput(mm);
15438c2ecf20Sopenharmony_ci	}
15448c2ecf20Sopenharmony_ci}
15458c2ecf20Sopenharmony_ci
15468c2ecf20Sopenharmony_civoid vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev)
15478c2ecf20Sopenharmony_ci{
15488c2ecf20Sopenharmony_ci	vfio_pci_zap_and_vma_lock(vdev, false);
15498c2ecf20Sopenharmony_ci	down_write(&vdev->memory_lock);
15508c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->vma_lock);
15518c2ecf20Sopenharmony_ci}
15528c2ecf20Sopenharmony_ci
15538c2ecf20Sopenharmony_ciu16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev)
15548c2ecf20Sopenharmony_ci{
15558c2ecf20Sopenharmony_ci	u16 cmd;
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci	down_write(&vdev->memory_lock);
15588c2ecf20Sopenharmony_ci	pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
15598c2ecf20Sopenharmony_ci	if (!(cmd & PCI_COMMAND_MEMORY))
15608c2ecf20Sopenharmony_ci		pci_write_config_word(vdev->pdev, PCI_COMMAND,
15618c2ecf20Sopenharmony_ci				      cmd | PCI_COMMAND_MEMORY);
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	return cmd;
15648c2ecf20Sopenharmony_ci}
15658c2ecf20Sopenharmony_ci
15668c2ecf20Sopenharmony_civoid vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd)
15678c2ecf20Sopenharmony_ci{
15688c2ecf20Sopenharmony_ci	pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
15698c2ecf20Sopenharmony_ci	up_write(&vdev->memory_lock);
15708c2ecf20Sopenharmony_ci}
15718c2ecf20Sopenharmony_ci
15728c2ecf20Sopenharmony_ci/* Caller holds vma_lock */
15738c2ecf20Sopenharmony_cistatic int __vfio_pci_add_vma(struct vfio_pci_device *vdev,
15748c2ecf20Sopenharmony_ci			      struct vm_area_struct *vma)
15758c2ecf20Sopenharmony_ci{
15768c2ecf20Sopenharmony_ci	struct vfio_pci_mmap_vma *mmap_vma;
15778c2ecf20Sopenharmony_ci
15788c2ecf20Sopenharmony_ci	mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
15798c2ecf20Sopenharmony_ci	if (!mmap_vma)
15808c2ecf20Sopenharmony_ci		return -ENOMEM;
15818c2ecf20Sopenharmony_ci
15828c2ecf20Sopenharmony_ci	mmap_vma->vma = vma;
15838c2ecf20Sopenharmony_ci	list_add(&mmap_vma->vma_next, &vdev->vma_list);
15848c2ecf20Sopenharmony_ci
15858c2ecf20Sopenharmony_ci	return 0;
15868c2ecf20Sopenharmony_ci}
15878c2ecf20Sopenharmony_ci
15888c2ecf20Sopenharmony_ci/*
15898c2ecf20Sopenharmony_ci * Zap mmaps on open so that we can fault them in on access and therefore
15908c2ecf20Sopenharmony_ci * our vma_list only tracks mappings accessed since last zap.
15918c2ecf20Sopenharmony_ci */
15928c2ecf20Sopenharmony_cistatic void vfio_pci_mmap_open(struct vm_area_struct *vma)
15938c2ecf20Sopenharmony_ci{
15948c2ecf20Sopenharmony_ci	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
15958c2ecf20Sopenharmony_ci}
15968c2ecf20Sopenharmony_ci
15978c2ecf20Sopenharmony_cistatic void vfio_pci_mmap_close(struct vm_area_struct *vma)
15988c2ecf20Sopenharmony_ci{
15998c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = vma->vm_private_data;
16008c2ecf20Sopenharmony_ci	struct vfio_pci_mmap_vma *mmap_vma;
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci	mutex_lock(&vdev->vma_lock);
16038c2ecf20Sopenharmony_ci	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
16048c2ecf20Sopenharmony_ci		if (mmap_vma->vma == vma) {
16058c2ecf20Sopenharmony_ci			list_del(&mmap_vma->vma_next);
16068c2ecf20Sopenharmony_ci			kfree(mmap_vma);
16078c2ecf20Sopenharmony_ci			break;
16088c2ecf20Sopenharmony_ci		}
16098c2ecf20Sopenharmony_ci	}
16108c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->vma_lock);
16118c2ecf20Sopenharmony_ci}
16128c2ecf20Sopenharmony_ci
16138c2ecf20Sopenharmony_cistatic vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
16148c2ecf20Sopenharmony_ci{
16158c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
16168c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = vma->vm_private_data;
16178c2ecf20Sopenharmony_ci	struct vfio_pci_mmap_vma *mmap_vma;
16188c2ecf20Sopenharmony_ci	vm_fault_t ret = VM_FAULT_NOPAGE;
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	mutex_lock(&vdev->vma_lock);
16218c2ecf20Sopenharmony_ci	down_read(&vdev->memory_lock);
16228c2ecf20Sopenharmony_ci
16238c2ecf20Sopenharmony_ci	if (!__vfio_pci_memory_enabled(vdev)) {
16248c2ecf20Sopenharmony_ci		ret = VM_FAULT_SIGBUS;
16258c2ecf20Sopenharmony_ci		goto up_out;
16268c2ecf20Sopenharmony_ci	}
16278c2ecf20Sopenharmony_ci
16288c2ecf20Sopenharmony_ci	/*
16298c2ecf20Sopenharmony_ci	 * We populate the whole vma on fault, so we need to test whether
16308c2ecf20Sopenharmony_ci	 * the vma has already been mapped, such as for concurrent faults
16318c2ecf20Sopenharmony_ci	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
16328c2ecf20Sopenharmony_ci	 * we ask it to fill the same range again.
16338c2ecf20Sopenharmony_ci	 */
16348c2ecf20Sopenharmony_ci	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
16358c2ecf20Sopenharmony_ci		if (mmap_vma->vma == vma)
16368c2ecf20Sopenharmony_ci			goto up_out;
16378c2ecf20Sopenharmony_ci	}
16388c2ecf20Sopenharmony_ci
16398c2ecf20Sopenharmony_ci	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
16408c2ecf20Sopenharmony_ci			       vma->vm_end - vma->vm_start,
16418c2ecf20Sopenharmony_ci			       vma->vm_page_prot)) {
16428c2ecf20Sopenharmony_ci		ret = VM_FAULT_SIGBUS;
16438c2ecf20Sopenharmony_ci		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
16448c2ecf20Sopenharmony_ci		goto up_out;
16458c2ecf20Sopenharmony_ci	}
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_ci	if (__vfio_pci_add_vma(vdev, vma)) {
16488c2ecf20Sopenharmony_ci		ret = VM_FAULT_OOM;
16498c2ecf20Sopenharmony_ci		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
16508c2ecf20Sopenharmony_ci	}
16518c2ecf20Sopenharmony_ci
16528c2ecf20Sopenharmony_ciup_out:
16538c2ecf20Sopenharmony_ci	up_read(&vdev->memory_lock);
16548c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->vma_lock);
16558c2ecf20Sopenharmony_ci	return ret;
16568c2ecf20Sopenharmony_ci}
16578c2ecf20Sopenharmony_ci
16588c2ecf20Sopenharmony_cistatic const struct vm_operations_struct vfio_pci_mmap_ops = {
16598c2ecf20Sopenharmony_ci	.open = vfio_pci_mmap_open,
16608c2ecf20Sopenharmony_ci	.close = vfio_pci_mmap_close,
16618c2ecf20Sopenharmony_ci	.fault = vfio_pci_mmap_fault,
16628c2ecf20Sopenharmony_ci};
16638c2ecf20Sopenharmony_ci
16648c2ecf20Sopenharmony_cistatic int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
16658c2ecf20Sopenharmony_ci{
16668c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
16678c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
16688c2ecf20Sopenharmony_ci	unsigned int index;
16698c2ecf20Sopenharmony_ci	u64 phys_len, req_len, pgoff, req_start;
16708c2ecf20Sopenharmony_ci	int ret;
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_ci	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
16738c2ecf20Sopenharmony_ci
16748c2ecf20Sopenharmony_ci	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
16758c2ecf20Sopenharmony_ci		return -EINVAL;
16768c2ecf20Sopenharmony_ci	if (vma->vm_end < vma->vm_start)
16778c2ecf20Sopenharmony_ci		return -EINVAL;
16788c2ecf20Sopenharmony_ci	if ((vma->vm_flags & VM_SHARED) == 0)
16798c2ecf20Sopenharmony_ci		return -EINVAL;
16808c2ecf20Sopenharmony_ci	if (index >= VFIO_PCI_NUM_REGIONS) {
16818c2ecf20Sopenharmony_ci		int regnum = index - VFIO_PCI_NUM_REGIONS;
16828c2ecf20Sopenharmony_ci		struct vfio_pci_region *region = vdev->region + regnum;
16838c2ecf20Sopenharmony_ci
16848c2ecf20Sopenharmony_ci		if (region->ops && region->ops->mmap &&
16858c2ecf20Sopenharmony_ci		    (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
16868c2ecf20Sopenharmony_ci			return region->ops->mmap(vdev, region, vma);
16878c2ecf20Sopenharmony_ci		return -EINVAL;
16888c2ecf20Sopenharmony_ci	}
16898c2ecf20Sopenharmony_ci	if (index >= VFIO_PCI_ROM_REGION_INDEX)
16908c2ecf20Sopenharmony_ci		return -EINVAL;
16918c2ecf20Sopenharmony_ci	if (!vdev->bar_mmap_supported[index])
16928c2ecf20Sopenharmony_ci		return -EINVAL;
16938c2ecf20Sopenharmony_ci
16948c2ecf20Sopenharmony_ci	phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
16958c2ecf20Sopenharmony_ci	req_len = vma->vm_end - vma->vm_start;
16968c2ecf20Sopenharmony_ci	pgoff = vma->vm_pgoff &
16978c2ecf20Sopenharmony_ci		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
16988c2ecf20Sopenharmony_ci	req_start = pgoff << PAGE_SHIFT;
16998c2ecf20Sopenharmony_ci
17008c2ecf20Sopenharmony_ci	if (req_start + req_len > phys_len)
17018c2ecf20Sopenharmony_ci		return -EINVAL;
17028c2ecf20Sopenharmony_ci
17038c2ecf20Sopenharmony_ci	/*
17048c2ecf20Sopenharmony_ci	 * Even though we don't make use of the barmap for the mmap,
17058c2ecf20Sopenharmony_ci	 * we need to request the region and the barmap tracks that.
17068c2ecf20Sopenharmony_ci	 */
17078c2ecf20Sopenharmony_ci	if (!vdev->barmap[index]) {
17088c2ecf20Sopenharmony_ci		ret = pci_request_selected_regions(pdev,
17098c2ecf20Sopenharmony_ci						   1 << index, "vfio-pci");
17108c2ecf20Sopenharmony_ci		if (ret)
17118c2ecf20Sopenharmony_ci			return ret;
17128c2ecf20Sopenharmony_ci
17138c2ecf20Sopenharmony_ci		vdev->barmap[index] = pci_iomap(pdev, index, 0);
17148c2ecf20Sopenharmony_ci		if (!vdev->barmap[index]) {
17158c2ecf20Sopenharmony_ci			pci_release_selected_regions(pdev, 1 << index);
17168c2ecf20Sopenharmony_ci			return -ENOMEM;
17178c2ecf20Sopenharmony_ci		}
17188c2ecf20Sopenharmony_ci	}
17198c2ecf20Sopenharmony_ci
17208c2ecf20Sopenharmony_ci	vma->vm_private_data = vdev;
17218c2ecf20Sopenharmony_ci	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
17228c2ecf20Sopenharmony_ci	vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
17238c2ecf20Sopenharmony_ci
17248c2ecf20Sopenharmony_ci	/*
17258c2ecf20Sopenharmony_ci	 * See remap_pfn_range(), called from vfio_pci_fault() but we can't
17268c2ecf20Sopenharmony_ci	 * change vm_flags within the fault handler.  Set them now.
17278c2ecf20Sopenharmony_ci	 */
17288c2ecf20Sopenharmony_ci	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
17298c2ecf20Sopenharmony_ci	vma->vm_ops = &vfio_pci_mmap_ops;
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci	return 0;
17328c2ecf20Sopenharmony_ci}
17338c2ecf20Sopenharmony_ci
17348c2ecf20Sopenharmony_cistatic void vfio_pci_request(void *device_data, unsigned int count)
17358c2ecf20Sopenharmony_ci{
17368c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
17378c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ci	mutex_lock(&vdev->igate);
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	if (vdev->req_trigger) {
17428c2ecf20Sopenharmony_ci		if (!(count % 10))
17438c2ecf20Sopenharmony_ci			pci_notice_ratelimited(pdev,
17448c2ecf20Sopenharmony_ci				"Relaying device request to user (#%u)\n",
17458c2ecf20Sopenharmony_ci				count);
17468c2ecf20Sopenharmony_ci		eventfd_signal(vdev->req_trigger, 1);
17478c2ecf20Sopenharmony_ci	} else if (count == 0) {
17488c2ecf20Sopenharmony_ci		pci_warn(pdev,
17498c2ecf20Sopenharmony_ci			"No device request channel registered, blocked until released by user\n");
17508c2ecf20Sopenharmony_ci	}
17518c2ecf20Sopenharmony_ci
17528c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->igate);
17538c2ecf20Sopenharmony_ci}
17548c2ecf20Sopenharmony_ci
17558c2ecf20Sopenharmony_cistatic int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
17568c2ecf20Sopenharmony_ci				      bool vf_token, uuid_t *uuid)
17578c2ecf20Sopenharmony_ci{
17588c2ecf20Sopenharmony_ci	/*
17598c2ecf20Sopenharmony_ci	 * There's always some degree of trust or collaboration between SR-IOV
17608c2ecf20Sopenharmony_ci	 * PF and VFs, even if just that the PF hosts the SR-IOV capability and
17618c2ecf20Sopenharmony_ci	 * can disrupt VFs with a reset, but often the PF has more explicit
17628c2ecf20Sopenharmony_ci	 * access to deny service to the VF or access data passed through the
17638c2ecf20Sopenharmony_ci	 * VF.  We therefore require an opt-in via a shared VF token (UUID) to
17648c2ecf20Sopenharmony_ci	 * represent this trust.  This both prevents that a VF driver might
17658c2ecf20Sopenharmony_ci	 * assume the PF driver is a trusted, in-kernel driver, and also that
17668c2ecf20Sopenharmony_ci	 * a PF driver might be replaced with a rogue driver, unknown to in-use
17678c2ecf20Sopenharmony_ci	 * VF drivers.
17688c2ecf20Sopenharmony_ci	 *
17698c2ecf20Sopenharmony_ci	 * Therefore when presented with a VF, if the PF is a vfio device and
17708c2ecf20Sopenharmony_ci	 * it is bound to the vfio-pci driver, the user needs to provide a VF
17718c2ecf20Sopenharmony_ci	 * token to access the device, in the form of appending a vf_token to
17728c2ecf20Sopenharmony_ci	 * the device name, for example:
17738c2ecf20Sopenharmony_ci	 *
17748c2ecf20Sopenharmony_ci	 * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
17758c2ecf20Sopenharmony_ci	 *
17768c2ecf20Sopenharmony_ci	 * When presented with a PF which has VFs in use, the user must also
17778c2ecf20Sopenharmony_ci	 * provide the current VF token to prove collaboration with existing
17788c2ecf20Sopenharmony_ci	 * VF users.  If VFs are not in use, the VF token provided for the PF
17798c2ecf20Sopenharmony_ci	 * device will act to set the VF token.
17808c2ecf20Sopenharmony_ci	 *
17818c2ecf20Sopenharmony_ci	 * If the VF token is provided but unused, an error is generated.
17828c2ecf20Sopenharmony_ci	 */
17838c2ecf20Sopenharmony_ci	if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
17848c2ecf20Sopenharmony_ci		return 0; /* No VF token provided or required */
17858c2ecf20Sopenharmony_ci
17868c2ecf20Sopenharmony_ci	if (vdev->pdev->is_virtfn) {
17878c2ecf20Sopenharmony_ci		struct vfio_device *pf_dev;
17888c2ecf20Sopenharmony_ci		struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
17898c2ecf20Sopenharmony_ci		bool match;
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_ci		if (!pf_vdev) {
17928c2ecf20Sopenharmony_ci			if (!vf_token)
17938c2ecf20Sopenharmony_ci				return 0; /* PF is not vfio-pci, no VF token */
17948c2ecf20Sopenharmony_ci
17958c2ecf20Sopenharmony_ci			pci_info_ratelimited(vdev->pdev,
17968c2ecf20Sopenharmony_ci				"VF token incorrectly provided, PF not bound to vfio-pci\n");
17978c2ecf20Sopenharmony_ci			return -EINVAL;
17988c2ecf20Sopenharmony_ci		}
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_ci		if (!vf_token) {
18018c2ecf20Sopenharmony_ci			vfio_device_put(pf_dev);
18028c2ecf20Sopenharmony_ci			pci_info_ratelimited(vdev->pdev,
18038c2ecf20Sopenharmony_ci				"VF token required to access device\n");
18048c2ecf20Sopenharmony_ci			return -EACCES;
18058c2ecf20Sopenharmony_ci		}
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_ci		mutex_lock(&pf_vdev->vf_token->lock);
18088c2ecf20Sopenharmony_ci		match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
18098c2ecf20Sopenharmony_ci		mutex_unlock(&pf_vdev->vf_token->lock);
18108c2ecf20Sopenharmony_ci
18118c2ecf20Sopenharmony_ci		vfio_device_put(pf_dev);
18128c2ecf20Sopenharmony_ci
18138c2ecf20Sopenharmony_ci		if (!match) {
18148c2ecf20Sopenharmony_ci			pci_info_ratelimited(vdev->pdev,
18158c2ecf20Sopenharmony_ci				"Incorrect VF token provided for device\n");
18168c2ecf20Sopenharmony_ci			return -EACCES;
18178c2ecf20Sopenharmony_ci		}
18188c2ecf20Sopenharmony_ci	} else if (vdev->vf_token) {
18198c2ecf20Sopenharmony_ci		mutex_lock(&vdev->vf_token->lock);
18208c2ecf20Sopenharmony_ci		if (vdev->vf_token->users) {
18218c2ecf20Sopenharmony_ci			if (!vf_token) {
18228c2ecf20Sopenharmony_ci				mutex_unlock(&vdev->vf_token->lock);
18238c2ecf20Sopenharmony_ci				pci_info_ratelimited(vdev->pdev,
18248c2ecf20Sopenharmony_ci					"VF token required to access device\n");
18258c2ecf20Sopenharmony_ci				return -EACCES;
18268c2ecf20Sopenharmony_ci			}
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci			if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
18298c2ecf20Sopenharmony_ci				mutex_unlock(&vdev->vf_token->lock);
18308c2ecf20Sopenharmony_ci				pci_info_ratelimited(vdev->pdev,
18318c2ecf20Sopenharmony_ci					"Incorrect VF token provided for device\n");
18328c2ecf20Sopenharmony_ci				return -EACCES;
18338c2ecf20Sopenharmony_ci			}
18348c2ecf20Sopenharmony_ci		} else if (vf_token) {
18358c2ecf20Sopenharmony_ci			uuid_copy(&vdev->vf_token->uuid, uuid);
18368c2ecf20Sopenharmony_ci		}
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci		mutex_unlock(&vdev->vf_token->lock);
18398c2ecf20Sopenharmony_ci	} else if (vf_token) {
18408c2ecf20Sopenharmony_ci		pci_info_ratelimited(vdev->pdev,
18418c2ecf20Sopenharmony_ci			"VF token incorrectly provided, not a PF or VF\n");
18428c2ecf20Sopenharmony_ci		return -EINVAL;
18438c2ecf20Sopenharmony_ci	}
18448c2ecf20Sopenharmony_ci
18458c2ecf20Sopenharmony_ci	return 0;
18468c2ecf20Sopenharmony_ci}
18478c2ecf20Sopenharmony_ci
18488c2ecf20Sopenharmony_ci#define VF_TOKEN_ARG "vf_token="
18498c2ecf20Sopenharmony_ci
18508c2ecf20Sopenharmony_cistatic int vfio_pci_match(void *device_data, char *buf)
18518c2ecf20Sopenharmony_ci{
18528c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = device_data;
18538c2ecf20Sopenharmony_ci	bool vf_token = false;
18548c2ecf20Sopenharmony_ci	uuid_t uuid;
18558c2ecf20Sopenharmony_ci	int ret;
18568c2ecf20Sopenharmony_ci
18578c2ecf20Sopenharmony_ci	if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
18588c2ecf20Sopenharmony_ci		return 0; /* No match */
18598c2ecf20Sopenharmony_ci
18608c2ecf20Sopenharmony_ci	if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
18618c2ecf20Sopenharmony_ci		buf += strlen(pci_name(vdev->pdev));
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci		if (*buf != ' ')
18648c2ecf20Sopenharmony_ci			return 0; /* No match: non-whitespace after name */
18658c2ecf20Sopenharmony_ci
18668c2ecf20Sopenharmony_ci		while (*buf) {
18678c2ecf20Sopenharmony_ci			if (*buf == ' ') {
18688c2ecf20Sopenharmony_ci				buf++;
18698c2ecf20Sopenharmony_ci				continue;
18708c2ecf20Sopenharmony_ci			}
18718c2ecf20Sopenharmony_ci
18728c2ecf20Sopenharmony_ci			if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
18738c2ecf20Sopenharmony_ci						  strlen(VF_TOKEN_ARG))) {
18748c2ecf20Sopenharmony_ci				buf += strlen(VF_TOKEN_ARG);
18758c2ecf20Sopenharmony_ci
18768c2ecf20Sopenharmony_ci				if (strlen(buf) < UUID_STRING_LEN)
18778c2ecf20Sopenharmony_ci					return -EINVAL;
18788c2ecf20Sopenharmony_ci
18798c2ecf20Sopenharmony_ci				ret = uuid_parse(buf, &uuid);
18808c2ecf20Sopenharmony_ci				if (ret)
18818c2ecf20Sopenharmony_ci					return ret;
18828c2ecf20Sopenharmony_ci
18838c2ecf20Sopenharmony_ci				vf_token = true;
18848c2ecf20Sopenharmony_ci				buf += UUID_STRING_LEN;
18858c2ecf20Sopenharmony_ci			} else {
18868c2ecf20Sopenharmony_ci				/* Unknown/duplicate option */
18878c2ecf20Sopenharmony_ci				return -EINVAL;
18888c2ecf20Sopenharmony_ci			}
18898c2ecf20Sopenharmony_ci		}
18908c2ecf20Sopenharmony_ci	}
18918c2ecf20Sopenharmony_ci
18928c2ecf20Sopenharmony_ci	ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
18938c2ecf20Sopenharmony_ci	if (ret)
18948c2ecf20Sopenharmony_ci		return ret;
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci	return 1; /* Match */
18978c2ecf20Sopenharmony_ci}
18988c2ecf20Sopenharmony_ci
18998c2ecf20Sopenharmony_cistatic const struct vfio_device_ops vfio_pci_ops = {
19008c2ecf20Sopenharmony_ci	.name		= "vfio-pci",
19018c2ecf20Sopenharmony_ci	.open		= vfio_pci_open,
19028c2ecf20Sopenharmony_ci	.release	= vfio_pci_release,
19038c2ecf20Sopenharmony_ci	.ioctl		= vfio_pci_ioctl,
19048c2ecf20Sopenharmony_ci	.read		= vfio_pci_read,
19058c2ecf20Sopenharmony_ci	.write		= vfio_pci_write,
19068c2ecf20Sopenharmony_ci	.mmap		= vfio_pci_mmap,
19078c2ecf20Sopenharmony_ci	.request	= vfio_pci_request,
19088c2ecf20Sopenharmony_ci	.match		= vfio_pci_match,
19098c2ecf20Sopenharmony_ci};
19108c2ecf20Sopenharmony_ci
19118c2ecf20Sopenharmony_cistatic int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
19128c2ecf20Sopenharmony_cistatic void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_cistatic int vfio_pci_bus_notifier(struct notifier_block *nb,
19158c2ecf20Sopenharmony_ci				 unsigned long action, void *data)
19168c2ecf20Sopenharmony_ci{
19178c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev = container_of(nb,
19188c2ecf20Sopenharmony_ci						    struct vfio_pci_device, nb);
19198c2ecf20Sopenharmony_ci	struct device *dev = data;
19208c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev);
19218c2ecf20Sopenharmony_ci	struct pci_dev *physfn = pci_physfn(pdev);
19228c2ecf20Sopenharmony_ci
19238c2ecf20Sopenharmony_ci	if (action == BUS_NOTIFY_ADD_DEVICE &&
19248c2ecf20Sopenharmony_ci	    pdev->is_virtfn && physfn == vdev->pdev) {
19258c2ecf20Sopenharmony_ci		pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
19268c2ecf20Sopenharmony_ci			 pci_name(pdev));
19278c2ecf20Sopenharmony_ci		pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
19288c2ecf20Sopenharmony_ci						  vfio_pci_ops.name);
19298c2ecf20Sopenharmony_ci	} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
19308c2ecf20Sopenharmony_ci		   pdev->is_virtfn && physfn == vdev->pdev) {
19318c2ecf20Sopenharmony_ci		struct pci_driver *drv = pci_dev_driver(pdev);
19328c2ecf20Sopenharmony_ci
19338c2ecf20Sopenharmony_ci		if (drv && drv != &vfio_pci_driver)
19348c2ecf20Sopenharmony_ci			pci_warn(vdev->pdev,
19358c2ecf20Sopenharmony_ci				 "VF %s bound to driver %s while PF bound to vfio-pci\n",
19368c2ecf20Sopenharmony_ci				 pci_name(pdev), drv->name);
19378c2ecf20Sopenharmony_ci	}
19388c2ecf20Sopenharmony_ci
19398c2ecf20Sopenharmony_ci	return 0;
19408c2ecf20Sopenharmony_ci}
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_cistatic int vfio_pci_vf_init(struct vfio_pci_device *vdev)
19438c2ecf20Sopenharmony_ci{
19448c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
19458c2ecf20Sopenharmony_ci	int ret;
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci	if (!pdev->is_physfn)
19488c2ecf20Sopenharmony_ci		return 0;
19498c2ecf20Sopenharmony_ci
19508c2ecf20Sopenharmony_ci	vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
19518c2ecf20Sopenharmony_ci	if (!vdev->vf_token)
19528c2ecf20Sopenharmony_ci		return -ENOMEM;
19538c2ecf20Sopenharmony_ci
19548c2ecf20Sopenharmony_ci	mutex_init(&vdev->vf_token->lock);
19558c2ecf20Sopenharmony_ci	uuid_gen(&vdev->vf_token->uuid);
19568c2ecf20Sopenharmony_ci
19578c2ecf20Sopenharmony_ci	vdev->nb.notifier_call = vfio_pci_bus_notifier;
19588c2ecf20Sopenharmony_ci	ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
19598c2ecf20Sopenharmony_ci	if (ret) {
19608c2ecf20Sopenharmony_ci		kfree(vdev->vf_token);
19618c2ecf20Sopenharmony_ci		return ret;
19628c2ecf20Sopenharmony_ci	}
19638c2ecf20Sopenharmony_ci	return 0;
19648c2ecf20Sopenharmony_ci}
19658c2ecf20Sopenharmony_ci
19668c2ecf20Sopenharmony_cistatic void vfio_pci_vf_uninit(struct vfio_pci_device *vdev)
19678c2ecf20Sopenharmony_ci{
19688c2ecf20Sopenharmony_ci	if (!vdev->vf_token)
19698c2ecf20Sopenharmony_ci		return;
19708c2ecf20Sopenharmony_ci
19718c2ecf20Sopenharmony_ci	bus_unregister_notifier(&pci_bus_type, &vdev->nb);
19728c2ecf20Sopenharmony_ci	WARN_ON(vdev->vf_token->users);
19738c2ecf20Sopenharmony_ci	mutex_destroy(&vdev->vf_token->lock);
19748c2ecf20Sopenharmony_ci	kfree(vdev->vf_token);
19758c2ecf20Sopenharmony_ci}
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_cistatic int vfio_pci_vga_init(struct vfio_pci_device *vdev)
19788c2ecf20Sopenharmony_ci{
19798c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
19808c2ecf20Sopenharmony_ci	int ret;
19818c2ecf20Sopenharmony_ci
19828c2ecf20Sopenharmony_ci	if (!vfio_pci_is_vga(pdev))
19838c2ecf20Sopenharmony_ci		return 0;
19848c2ecf20Sopenharmony_ci
19858c2ecf20Sopenharmony_ci	ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
19868c2ecf20Sopenharmony_ci	if (ret)
19878c2ecf20Sopenharmony_ci		return ret;
19888c2ecf20Sopenharmony_ci	vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false));
19898c2ecf20Sopenharmony_ci	return 0;
19908c2ecf20Sopenharmony_ci}
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_cistatic void vfio_pci_vga_uninit(struct vfio_pci_device *vdev)
19938c2ecf20Sopenharmony_ci{
19948c2ecf20Sopenharmony_ci	struct pci_dev *pdev = vdev->pdev;
19958c2ecf20Sopenharmony_ci
19968c2ecf20Sopenharmony_ci	if (!vfio_pci_is_vga(pdev))
19978c2ecf20Sopenharmony_ci		return;
19988c2ecf20Sopenharmony_ci	vga_client_register(pdev, NULL, NULL, NULL);
19998c2ecf20Sopenharmony_ci	vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
20008c2ecf20Sopenharmony_ci					      VGA_RSRC_LEGACY_IO |
20018c2ecf20Sopenharmony_ci					      VGA_RSRC_LEGACY_MEM);
20028c2ecf20Sopenharmony_ci}
20038c2ecf20Sopenharmony_ci
20048c2ecf20Sopenharmony_cistatic int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
20058c2ecf20Sopenharmony_ci{
20068c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
20078c2ecf20Sopenharmony_ci	struct iommu_group *group;
20088c2ecf20Sopenharmony_ci	int ret;
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci	if (vfio_pci_is_denylisted(pdev))
20118c2ecf20Sopenharmony_ci		return -EINVAL;
20128c2ecf20Sopenharmony_ci
20138c2ecf20Sopenharmony_ci	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
20148c2ecf20Sopenharmony_ci		return -EINVAL;
20158c2ecf20Sopenharmony_ci
20168c2ecf20Sopenharmony_ci	/*
20178c2ecf20Sopenharmony_ci	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
20188c2ecf20Sopenharmony_ci	 * by the host or other users.  We cannot capture the VFs if they
20198c2ecf20Sopenharmony_ci	 * already exist, nor can we track VF users.  Disabling SR-IOV here
20208c2ecf20Sopenharmony_ci	 * would initiate removing the VFs, which would unbind the driver,
20218c2ecf20Sopenharmony_ci	 * which is prone to blocking if that VF is also in use by vfio-pci.
20228c2ecf20Sopenharmony_ci	 * Just reject these PFs and let the user sort it out.
20238c2ecf20Sopenharmony_ci	 */
20248c2ecf20Sopenharmony_ci	if (pci_num_vf(pdev)) {
20258c2ecf20Sopenharmony_ci		pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
20268c2ecf20Sopenharmony_ci		return -EBUSY;
20278c2ecf20Sopenharmony_ci	}
20288c2ecf20Sopenharmony_ci
20298c2ecf20Sopenharmony_ci	group = vfio_iommu_group_get(&pdev->dev);
20308c2ecf20Sopenharmony_ci	if (!group)
20318c2ecf20Sopenharmony_ci		return -EINVAL;
20328c2ecf20Sopenharmony_ci
20338c2ecf20Sopenharmony_ci	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
20348c2ecf20Sopenharmony_ci	if (!vdev) {
20358c2ecf20Sopenharmony_ci		ret = -ENOMEM;
20368c2ecf20Sopenharmony_ci		goto out_group_put;
20378c2ecf20Sopenharmony_ci	}
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	vdev->pdev = pdev;
20408c2ecf20Sopenharmony_ci	vdev->irq_type = VFIO_PCI_NUM_IRQS;
20418c2ecf20Sopenharmony_ci	mutex_init(&vdev->igate);
20428c2ecf20Sopenharmony_ci	spin_lock_init(&vdev->irqlock);
20438c2ecf20Sopenharmony_ci	mutex_init(&vdev->ioeventfds_lock);
20448c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&vdev->dummy_resources_list);
20458c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&vdev->ioeventfds_list);
20468c2ecf20Sopenharmony_ci	mutex_init(&vdev->vma_lock);
20478c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&vdev->vma_list);
20488c2ecf20Sopenharmony_ci	init_rwsem(&vdev->memory_lock);
20498c2ecf20Sopenharmony_ci
20508c2ecf20Sopenharmony_ci	ret = vfio_pci_reflck_attach(vdev);
20518c2ecf20Sopenharmony_ci	if (ret)
20528c2ecf20Sopenharmony_ci		goto out_free;
20538c2ecf20Sopenharmony_ci	ret = vfio_pci_vf_init(vdev);
20548c2ecf20Sopenharmony_ci	if (ret)
20558c2ecf20Sopenharmony_ci		goto out_reflck;
20568c2ecf20Sopenharmony_ci	ret = vfio_pci_vga_init(vdev);
20578c2ecf20Sopenharmony_ci	if (ret)
20588c2ecf20Sopenharmony_ci		goto out_vf;
20598c2ecf20Sopenharmony_ci
20608c2ecf20Sopenharmony_ci	vfio_pci_probe_power_state(vdev);
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci	if (!disable_idle_d3) {
20638c2ecf20Sopenharmony_ci		/*
20648c2ecf20Sopenharmony_ci		 * pci-core sets the device power state to an unknown value at
20658c2ecf20Sopenharmony_ci		 * bootup and after being removed from a driver.  The only
20668c2ecf20Sopenharmony_ci		 * transition it allows from this unknown state is to D0, which
20678c2ecf20Sopenharmony_ci		 * typically happens when a driver calls pci_enable_device().
20688c2ecf20Sopenharmony_ci		 * We're not ready to enable the device yet, but we do want to
20698c2ecf20Sopenharmony_ci		 * be able to get to D3.  Therefore first do a D0 transition
20708c2ecf20Sopenharmony_ci		 * before going to D3.
20718c2ecf20Sopenharmony_ci		 */
20728c2ecf20Sopenharmony_ci		vfio_pci_set_power_state(vdev, PCI_D0);
20738c2ecf20Sopenharmony_ci		vfio_pci_set_power_state(vdev, PCI_D3hot);
20748c2ecf20Sopenharmony_ci	}
20758c2ecf20Sopenharmony_ci
20768c2ecf20Sopenharmony_ci	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
20778c2ecf20Sopenharmony_ci	if (ret)
20788c2ecf20Sopenharmony_ci		goto out_power;
20798c2ecf20Sopenharmony_ci	return 0;
20808c2ecf20Sopenharmony_ci
20818c2ecf20Sopenharmony_ciout_power:
20828c2ecf20Sopenharmony_ci	if (!disable_idle_d3)
20838c2ecf20Sopenharmony_ci		vfio_pci_set_power_state(vdev, PCI_D0);
20848c2ecf20Sopenharmony_ciout_vf:
20858c2ecf20Sopenharmony_ci	vfio_pci_vf_uninit(vdev);
20868c2ecf20Sopenharmony_ciout_reflck:
20878c2ecf20Sopenharmony_ci	vfio_pci_reflck_put(vdev->reflck);
20888c2ecf20Sopenharmony_ciout_free:
20898c2ecf20Sopenharmony_ci	kfree(vdev->pm_save);
20908c2ecf20Sopenharmony_ci	kfree(vdev);
20918c2ecf20Sopenharmony_ciout_group_put:
20928c2ecf20Sopenharmony_ci	vfio_iommu_group_put(group, &pdev->dev);
20938c2ecf20Sopenharmony_ci	return ret;
20948c2ecf20Sopenharmony_ci}
20958c2ecf20Sopenharmony_ci
20968c2ecf20Sopenharmony_cistatic void vfio_pci_remove(struct pci_dev *pdev)
20978c2ecf20Sopenharmony_ci{
20988c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
20998c2ecf20Sopenharmony_ci
21008c2ecf20Sopenharmony_ci	pci_disable_sriov(pdev);
21018c2ecf20Sopenharmony_ci
21028c2ecf20Sopenharmony_ci	vdev = vfio_del_group_dev(&pdev->dev);
21038c2ecf20Sopenharmony_ci	if (!vdev)
21048c2ecf20Sopenharmony_ci		return;
21058c2ecf20Sopenharmony_ci
21068c2ecf20Sopenharmony_ci	vfio_pci_vf_uninit(vdev);
21078c2ecf20Sopenharmony_ci	vfio_pci_reflck_put(vdev->reflck);
21088c2ecf20Sopenharmony_ci	vfio_pci_vga_uninit(vdev);
21098c2ecf20Sopenharmony_ci
21108c2ecf20Sopenharmony_ci	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
21118c2ecf20Sopenharmony_ci
21128c2ecf20Sopenharmony_ci	if (!disable_idle_d3)
21138c2ecf20Sopenharmony_ci		vfio_pci_set_power_state(vdev, PCI_D0);
21148c2ecf20Sopenharmony_ci
21158c2ecf20Sopenharmony_ci	mutex_destroy(&vdev->ioeventfds_lock);
21168c2ecf20Sopenharmony_ci	kfree(vdev->region);
21178c2ecf20Sopenharmony_ci	kfree(vdev->pm_save);
21188c2ecf20Sopenharmony_ci	kfree(vdev);
21198c2ecf20Sopenharmony_ci}
21208c2ecf20Sopenharmony_ci
21218c2ecf20Sopenharmony_cistatic pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
21228c2ecf20Sopenharmony_ci						  pci_channel_state_t state)
21238c2ecf20Sopenharmony_ci{
21248c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
21258c2ecf20Sopenharmony_ci	struct vfio_device *device;
21268c2ecf20Sopenharmony_ci
21278c2ecf20Sopenharmony_ci	device = vfio_device_get_from_dev(&pdev->dev);
21288c2ecf20Sopenharmony_ci	if (device == NULL)
21298c2ecf20Sopenharmony_ci		return PCI_ERS_RESULT_DISCONNECT;
21308c2ecf20Sopenharmony_ci
21318c2ecf20Sopenharmony_ci	vdev = vfio_device_data(device);
21328c2ecf20Sopenharmony_ci	if (vdev == NULL) {
21338c2ecf20Sopenharmony_ci		vfio_device_put(device);
21348c2ecf20Sopenharmony_ci		return PCI_ERS_RESULT_DISCONNECT;
21358c2ecf20Sopenharmony_ci	}
21368c2ecf20Sopenharmony_ci
21378c2ecf20Sopenharmony_ci	mutex_lock(&vdev->igate);
21388c2ecf20Sopenharmony_ci
21398c2ecf20Sopenharmony_ci	if (vdev->err_trigger)
21408c2ecf20Sopenharmony_ci		eventfd_signal(vdev->err_trigger, 1);
21418c2ecf20Sopenharmony_ci
21428c2ecf20Sopenharmony_ci	mutex_unlock(&vdev->igate);
21438c2ecf20Sopenharmony_ci
21448c2ecf20Sopenharmony_ci	vfio_device_put(device);
21458c2ecf20Sopenharmony_ci
21468c2ecf20Sopenharmony_ci	return PCI_ERS_RESULT_CAN_RECOVER;
21478c2ecf20Sopenharmony_ci}
21488c2ecf20Sopenharmony_ci
21498c2ecf20Sopenharmony_cistatic int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
21508c2ecf20Sopenharmony_ci{
21518c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
21528c2ecf20Sopenharmony_ci	struct vfio_device *device;
21538c2ecf20Sopenharmony_ci	int ret = 0;
21548c2ecf20Sopenharmony_ci
21558c2ecf20Sopenharmony_ci	might_sleep();
21568c2ecf20Sopenharmony_ci
21578c2ecf20Sopenharmony_ci	if (!enable_sriov)
21588c2ecf20Sopenharmony_ci		return -ENOENT;
21598c2ecf20Sopenharmony_ci
21608c2ecf20Sopenharmony_ci	device = vfio_device_get_from_dev(&pdev->dev);
21618c2ecf20Sopenharmony_ci	if (!device)
21628c2ecf20Sopenharmony_ci		return -ENODEV;
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_ci	vdev = vfio_device_data(device);
21658c2ecf20Sopenharmony_ci	if (!vdev) {
21668c2ecf20Sopenharmony_ci		vfio_device_put(device);
21678c2ecf20Sopenharmony_ci		return -ENODEV;
21688c2ecf20Sopenharmony_ci	}
21698c2ecf20Sopenharmony_ci
21708c2ecf20Sopenharmony_ci	if (nr_virtfn == 0)
21718c2ecf20Sopenharmony_ci		pci_disable_sriov(pdev);
21728c2ecf20Sopenharmony_ci	else
21738c2ecf20Sopenharmony_ci		ret = pci_enable_sriov(pdev, nr_virtfn);
21748c2ecf20Sopenharmony_ci
21758c2ecf20Sopenharmony_ci	vfio_device_put(device);
21768c2ecf20Sopenharmony_ci
21778c2ecf20Sopenharmony_ci	return ret < 0 ? ret : nr_virtfn;
21788c2ecf20Sopenharmony_ci}
21798c2ecf20Sopenharmony_ci
21808c2ecf20Sopenharmony_cistatic const struct pci_error_handlers vfio_err_handlers = {
21818c2ecf20Sopenharmony_ci	.error_detected = vfio_pci_aer_err_detected,
21828c2ecf20Sopenharmony_ci};
21838c2ecf20Sopenharmony_ci
21848c2ecf20Sopenharmony_cistatic struct pci_driver vfio_pci_driver = {
21858c2ecf20Sopenharmony_ci	.name			= "vfio-pci",
21868c2ecf20Sopenharmony_ci	.id_table		= NULL, /* only dynamic ids */
21878c2ecf20Sopenharmony_ci	.probe			= vfio_pci_probe,
21888c2ecf20Sopenharmony_ci	.remove			= vfio_pci_remove,
21898c2ecf20Sopenharmony_ci	.sriov_configure	= vfio_pci_sriov_configure,
21908c2ecf20Sopenharmony_ci	.err_handler		= &vfio_err_handlers,
21918c2ecf20Sopenharmony_ci};
21928c2ecf20Sopenharmony_ci
21938c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(reflck_lock);
21948c2ecf20Sopenharmony_ci
21958c2ecf20Sopenharmony_cistatic struct vfio_pci_reflck *vfio_pci_reflck_alloc(void)
21968c2ecf20Sopenharmony_ci{
21978c2ecf20Sopenharmony_ci	struct vfio_pci_reflck *reflck;
21988c2ecf20Sopenharmony_ci
21998c2ecf20Sopenharmony_ci	reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
22008c2ecf20Sopenharmony_ci	if (!reflck)
22018c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
22028c2ecf20Sopenharmony_ci
22038c2ecf20Sopenharmony_ci	kref_init(&reflck->kref);
22048c2ecf20Sopenharmony_ci	mutex_init(&reflck->lock);
22058c2ecf20Sopenharmony_ci
22068c2ecf20Sopenharmony_ci	return reflck;
22078c2ecf20Sopenharmony_ci}
22088c2ecf20Sopenharmony_ci
22098c2ecf20Sopenharmony_cistatic void vfio_pci_reflck_get(struct vfio_pci_reflck *reflck)
22108c2ecf20Sopenharmony_ci{
22118c2ecf20Sopenharmony_ci	kref_get(&reflck->kref);
22128c2ecf20Sopenharmony_ci}
22138c2ecf20Sopenharmony_ci
22148c2ecf20Sopenharmony_cistatic int vfio_pci_reflck_find(struct pci_dev *pdev, void *data)
22158c2ecf20Sopenharmony_ci{
22168c2ecf20Sopenharmony_ci	struct vfio_pci_reflck **preflck = data;
22178c2ecf20Sopenharmony_ci	struct vfio_device *device;
22188c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci	device = vfio_device_get_from_dev(&pdev->dev);
22218c2ecf20Sopenharmony_ci	if (!device)
22228c2ecf20Sopenharmony_ci		return 0;
22238c2ecf20Sopenharmony_ci
22248c2ecf20Sopenharmony_ci	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
22258c2ecf20Sopenharmony_ci		vfio_device_put(device);
22268c2ecf20Sopenharmony_ci		return 0;
22278c2ecf20Sopenharmony_ci	}
22288c2ecf20Sopenharmony_ci
22298c2ecf20Sopenharmony_ci	vdev = vfio_device_data(device);
22308c2ecf20Sopenharmony_ci
22318c2ecf20Sopenharmony_ci	if (vdev->reflck) {
22328c2ecf20Sopenharmony_ci		vfio_pci_reflck_get(vdev->reflck);
22338c2ecf20Sopenharmony_ci		*preflck = vdev->reflck;
22348c2ecf20Sopenharmony_ci		vfio_device_put(device);
22358c2ecf20Sopenharmony_ci		return 1;
22368c2ecf20Sopenharmony_ci	}
22378c2ecf20Sopenharmony_ci
22388c2ecf20Sopenharmony_ci	vfio_device_put(device);
22398c2ecf20Sopenharmony_ci	return 0;
22408c2ecf20Sopenharmony_ci}
22418c2ecf20Sopenharmony_ci
22428c2ecf20Sopenharmony_cistatic int vfio_pci_reflck_attach(struct vfio_pci_device *vdev)
22438c2ecf20Sopenharmony_ci{
22448c2ecf20Sopenharmony_ci	bool slot = !pci_probe_reset_slot(vdev->pdev->slot);
22458c2ecf20Sopenharmony_ci
22468c2ecf20Sopenharmony_ci	mutex_lock(&reflck_lock);
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci	if (pci_is_root_bus(vdev->pdev->bus) ||
22498c2ecf20Sopenharmony_ci	    vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_reflck_find,
22508c2ecf20Sopenharmony_ci					  &vdev->reflck, slot) <= 0)
22518c2ecf20Sopenharmony_ci		vdev->reflck = vfio_pci_reflck_alloc();
22528c2ecf20Sopenharmony_ci
22538c2ecf20Sopenharmony_ci	mutex_unlock(&reflck_lock);
22548c2ecf20Sopenharmony_ci
22558c2ecf20Sopenharmony_ci	return PTR_ERR_OR_ZERO(vdev->reflck);
22568c2ecf20Sopenharmony_ci}
22578c2ecf20Sopenharmony_ci
22588c2ecf20Sopenharmony_cistatic void vfio_pci_reflck_release(struct kref *kref)
22598c2ecf20Sopenharmony_ci{
22608c2ecf20Sopenharmony_ci	struct vfio_pci_reflck *reflck = container_of(kref,
22618c2ecf20Sopenharmony_ci						      struct vfio_pci_reflck,
22628c2ecf20Sopenharmony_ci						      kref);
22638c2ecf20Sopenharmony_ci
22648c2ecf20Sopenharmony_ci	kfree(reflck);
22658c2ecf20Sopenharmony_ci	mutex_unlock(&reflck_lock);
22668c2ecf20Sopenharmony_ci}
22678c2ecf20Sopenharmony_ci
22688c2ecf20Sopenharmony_cistatic void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck)
22698c2ecf20Sopenharmony_ci{
22708c2ecf20Sopenharmony_ci	kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock);
22718c2ecf20Sopenharmony_ci}
22728c2ecf20Sopenharmony_ci
22738c2ecf20Sopenharmony_cistatic int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
22748c2ecf20Sopenharmony_ci{
22758c2ecf20Sopenharmony_ci	struct vfio_devices *devs = data;
22768c2ecf20Sopenharmony_ci	struct vfio_device *device;
22778c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
22788c2ecf20Sopenharmony_ci
22798c2ecf20Sopenharmony_ci	if (devs->cur_index == devs->max_index)
22808c2ecf20Sopenharmony_ci		return -ENOSPC;
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_ci	device = vfio_device_get_from_dev(&pdev->dev);
22838c2ecf20Sopenharmony_ci	if (!device)
22848c2ecf20Sopenharmony_ci		return -EINVAL;
22858c2ecf20Sopenharmony_ci
22868c2ecf20Sopenharmony_ci	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
22878c2ecf20Sopenharmony_ci		vfio_device_put(device);
22888c2ecf20Sopenharmony_ci		return -EBUSY;
22898c2ecf20Sopenharmony_ci	}
22908c2ecf20Sopenharmony_ci
22918c2ecf20Sopenharmony_ci	vdev = vfio_device_data(device);
22928c2ecf20Sopenharmony_ci
22938c2ecf20Sopenharmony_ci	/* Fault if the device is not unused */
22948c2ecf20Sopenharmony_ci	if (vdev->refcnt) {
22958c2ecf20Sopenharmony_ci		vfio_device_put(device);
22968c2ecf20Sopenharmony_ci		return -EBUSY;
22978c2ecf20Sopenharmony_ci	}
22988c2ecf20Sopenharmony_ci
22998c2ecf20Sopenharmony_ci	devs->devices[devs->cur_index++] = device;
23008c2ecf20Sopenharmony_ci	return 0;
23018c2ecf20Sopenharmony_ci}
23028c2ecf20Sopenharmony_ci
23038c2ecf20Sopenharmony_cistatic int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
23048c2ecf20Sopenharmony_ci{
23058c2ecf20Sopenharmony_ci	struct vfio_devices *devs = data;
23068c2ecf20Sopenharmony_ci	struct vfio_device *device;
23078c2ecf20Sopenharmony_ci	struct vfio_pci_device *vdev;
23088c2ecf20Sopenharmony_ci
23098c2ecf20Sopenharmony_ci	if (devs->cur_index == devs->max_index)
23108c2ecf20Sopenharmony_ci		return -ENOSPC;
23118c2ecf20Sopenharmony_ci
23128c2ecf20Sopenharmony_ci	device = vfio_device_get_from_dev(&pdev->dev);
23138c2ecf20Sopenharmony_ci	if (!device)
23148c2ecf20Sopenharmony_ci		return -EINVAL;
23158c2ecf20Sopenharmony_ci
23168c2ecf20Sopenharmony_ci	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
23178c2ecf20Sopenharmony_ci		vfio_device_put(device);
23188c2ecf20Sopenharmony_ci		return -EBUSY;
23198c2ecf20Sopenharmony_ci	}
23208c2ecf20Sopenharmony_ci
23218c2ecf20Sopenharmony_ci	vdev = vfio_device_data(device);
23228c2ecf20Sopenharmony_ci
23238c2ecf20Sopenharmony_ci	/*
23248c2ecf20Sopenharmony_ci	 * Locking multiple devices is prone to deadlock, runaway and
23258c2ecf20Sopenharmony_ci	 * unwind if we hit contention.
23268c2ecf20Sopenharmony_ci	 */
23278c2ecf20Sopenharmony_ci	if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
23288c2ecf20Sopenharmony_ci		vfio_device_put(device);
23298c2ecf20Sopenharmony_ci		return -EBUSY;
23308c2ecf20Sopenharmony_ci	}
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_ci	devs->devices[devs->cur_index++] = device;
23338c2ecf20Sopenharmony_ci	return 0;
23348c2ecf20Sopenharmony_ci}
23358c2ecf20Sopenharmony_ci
23368c2ecf20Sopenharmony_ci/*
23378c2ecf20Sopenharmony_ci * If a bus or slot reset is available for the provided device and:
23388c2ecf20Sopenharmony_ci *  - All of the devices affected by that bus or slot reset are unused
23398c2ecf20Sopenharmony_ci *    (!refcnt)
23408c2ecf20Sopenharmony_ci *  - At least one of the affected devices is marked dirty via
23418c2ecf20Sopenharmony_ci *    needs_reset (such as by lack of FLR support)
23428c2ecf20Sopenharmony_ci * Then attempt to perform that bus or slot reset.  Callers are required
23438c2ecf20Sopenharmony_ci * to hold vdev->reflck->lock, protecting the bus/slot reset group from
23448c2ecf20Sopenharmony_ci * concurrent opens.  A vfio_device reference is acquired for each device
23458c2ecf20Sopenharmony_ci * to prevent unbinds during the reset operation.
23468c2ecf20Sopenharmony_ci *
23478c2ecf20Sopenharmony_ci * NB: vfio-core considers a group to be viable even if some devices are
23488c2ecf20Sopenharmony_ci * bound to drivers like pci-stub or pcieport.  Here we require all devices
23498c2ecf20Sopenharmony_ci * to be bound to vfio_pci since that's the only way we can be sure they
23508c2ecf20Sopenharmony_ci * stay put.
23518c2ecf20Sopenharmony_ci */
23528c2ecf20Sopenharmony_cistatic void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
23538c2ecf20Sopenharmony_ci{
23548c2ecf20Sopenharmony_ci	struct vfio_devices devs = { .cur_index = 0 };
23558c2ecf20Sopenharmony_ci	int i = 0, ret = -EINVAL;
23568c2ecf20Sopenharmony_ci	bool slot = false;
23578c2ecf20Sopenharmony_ci	struct vfio_pci_device *tmp;
23588c2ecf20Sopenharmony_ci
23598c2ecf20Sopenharmony_ci	if (!pci_probe_reset_slot(vdev->pdev->slot))
23608c2ecf20Sopenharmony_ci		slot = true;
23618c2ecf20Sopenharmony_ci	else if (pci_probe_reset_bus(vdev->pdev->bus))
23628c2ecf20Sopenharmony_ci		return;
23638c2ecf20Sopenharmony_ci
23648c2ecf20Sopenharmony_ci	if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
23658c2ecf20Sopenharmony_ci					  &i, slot) || !i)
23668c2ecf20Sopenharmony_ci		return;
23678c2ecf20Sopenharmony_ci
23688c2ecf20Sopenharmony_ci	devs.max_index = i;
23698c2ecf20Sopenharmony_ci	devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
23708c2ecf20Sopenharmony_ci	if (!devs.devices)
23718c2ecf20Sopenharmony_ci		return;
23728c2ecf20Sopenharmony_ci
23738c2ecf20Sopenharmony_ci	if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
23748c2ecf20Sopenharmony_ci					  vfio_pci_get_unused_devs,
23758c2ecf20Sopenharmony_ci					  &devs, slot))
23768c2ecf20Sopenharmony_ci		goto put_devs;
23778c2ecf20Sopenharmony_ci
23788c2ecf20Sopenharmony_ci	/* Does at least one need a reset? */
23798c2ecf20Sopenharmony_ci	for (i = 0; i < devs.cur_index; i++) {
23808c2ecf20Sopenharmony_ci		tmp = vfio_device_data(devs.devices[i]);
23818c2ecf20Sopenharmony_ci		if (tmp->needs_reset) {
23828c2ecf20Sopenharmony_ci			ret = pci_reset_bus(vdev->pdev);
23838c2ecf20Sopenharmony_ci			break;
23848c2ecf20Sopenharmony_ci		}
23858c2ecf20Sopenharmony_ci	}
23868c2ecf20Sopenharmony_ci
23878c2ecf20Sopenharmony_ciput_devs:
23888c2ecf20Sopenharmony_ci	for (i = 0; i < devs.cur_index; i++) {
23898c2ecf20Sopenharmony_ci		tmp = vfio_device_data(devs.devices[i]);
23908c2ecf20Sopenharmony_ci
23918c2ecf20Sopenharmony_ci		/*
23928c2ecf20Sopenharmony_ci		 * If reset was successful, affected devices no longer need
23938c2ecf20Sopenharmony_ci		 * a reset and we should return all the collateral devices
23948c2ecf20Sopenharmony_ci		 * to low power.  If not successful, we either didn't reset
23958c2ecf20Sopenharmony_ci		 * the bus or timed out waiting for it, so let's not touch
23968c2ecf20Sopenharmony_ci		 * the power state.
23978c2ecf20Sopenharmony_ci		 */
23988c2ecf20Sopenharmony_ci		if (!ret) {
23998c2ecf20Sopenharmony_ci			tmp->needs_reset = false;
24008c2ecf20Sopenharmony_ci
24018c2ecf20Sopenharmony_ci			if (tmp != vdev && !disable_idle_d3)
24028c2ecf20Sopenharmony_ci				vfio_pci_set_power_state(tmp, PCI_D3hot);
24038c2ecf20Sopenharmony_ci		}
24048c2ecf20Sopenharmony_ci
24058c2ecf20Sopenharmony_ci		vfio_device_put(devs.devices[i]);
24068c2ecf20Sopenharmony_ci	}
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci	kfree(devs.devices);
24098c2ecf20Sopenharmony_ci}
24108c2ecf20Sopenharmony_ci
24118c2ecf20Sopenharmony_cistatic void __exit vfio_pci_cleanup(void)
24128c2ecf20Sopenharmony_ci{
24138c2ecf20Sopenharmony_ci	pci_unregister_driver(&vfio_pci_driver);
24148c2ecf20Sopenharmony_ci	vfio_pci_uninit_perm_bits();
24158c2ecf20Sopenharmony_ci}
24168c2ecf20Sopenharmony_ci
24178c2ecf20Sopenharmony_cistatic void __init vfio_pci_fill_ids(void)
24188c2ecf20Sopenharmony_ci{
24198c2ecf20Sopenharmony_ci	char *p, *id;
24208c2ecf20Sopenharmony_ci	int rc;
24218c2ecf20Sopenharmony_ci
24228c2ecf20Sopenharmony_ci	/* no ids passed actually */
24238c2ecf20Sopenharmony_ci	if (ids[0] == '\0')
24248c2ecf20Sopenharmony_ci		return;
24258c2ecf20Sopenharmony_ci
24268c2ecf20Sopenharmony_ci	/* add ids specified in the module parameter */
24278c2ecf20Sopenharmony_ci	p = ids;
24288c2ecf20Sopenharmony_ci	while ((id = strsep(&p, ","))) {
24298c2ecf20Sopenharmony_ci		unsigned int vendor, device, subvendor = PCI_ANY_ID,
24308c2ecf20Sopenharmony_ci			subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
24318c2ecf20Sopenharmony_ci		int fields;
24328c2ecf20Sopenharmony_ci
24338c2ecf20Sopenharmony_ci		if (!strlen(id))
24348c2ecf20Sopenharmony_ci			continue;
24358c2ecf20Sopenharmony_ci
24368c2ecf20Sopenharmony_ci		fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
24378c2ecf20Sopenharmony_ci				&vendor, &device, &subvendor, &subdevice,
24388c2ecf20Sopenharmony_ci				&class, &class_mask);
24398c2ecf20Sopenharmony_ci
24408c2ecf20Sopenharmony_ci		if (fields < 2) {
24418c2ecf20Sopenharmony_ci			pr_warn("invalid id string \"%s\"\n", id);
24428c2ecf20Sopenharmony_ci			continue;
24438c2ecf20Sopenharmony_ci		}
24448c2ecf20Sopenharmony_ci
24458c2ecf20Sopenharmony_ci		rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
24468c2ecf20Sopenharmony_ci				   subvendor, subdevice, class, class_mask, 0);
24478c2ecf20Sopenharmony_ci		if (rc)
24488c2ecf20Sopenharmony_ci			pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
24498c2ecf20Sopenharmony_ci				vendor, device, subvendor, subdevice,
24508c2ecf20Sopenharmony_ci				class, class_mask, rc);
24518c2ecf20Sopenharmony_ci		else
24528c2ecf20Sopenharmony_ci			pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
24538c2ecf20Sopenharmony_ci				vendor, device, subvendor, subdevice,
24548c2ecf20Sopenharmony_ci				class, class_mask);
24558c2ecf20Sopenharmony_ci	}
24568c2ecf20Sopenharmony_ci}
24578c2ecf20Sopenharmony_ci
24588c2ecf20Sopenharmony_cistatic int __init vfio_pci_init(void)
24598c2ecf20Sopenharmony_ci{
24608c2ecf20Sopenharmony_ci	int ret;
24618c2ecf20Sopenharmony_ci
24628c2ecf20Sopenharmony_ci	/* Allocate shared config space permision data used by all devices */
24638c2ecf20Sopenharmony_ci	ret = vfio_pci_init_perm_bits();
24648c2ecf20Sopenharmony_ci	if (ret)
24658c2ecf20Sopenharmony_ci		return ret;
24668c2ecf20Sopenharmony_ci
24678c2ecf20Sopenharmony_ci	/* Register and scan for devices */
24688c2ecf20Sopenharmony_ci	ret = pci_register_driver(&vfio_pci_driver);
24698c2ecf20Sopenharmony_ci	if (ret)
24708c2ecf20Sopenharmony_ci		goto out_driver;
24718c2ecf20Sopenharmony_ci
24728c2ecf20Sopenharmony_ci	vfio_pci_fill_ids();
24738c2ecf20Sopenharmony_ci
24748c2ecf20Sopenharmony_ci	if (disable_denylist)
24758c2ecf20Sopenharmony_ci		pr_warn("device denylist disabled.\n");
24768c2ecf20Sopenharmony_ci
24778c2ecf20Sopenharmony_ci	return 0;
24788c2ecf20Sopenharmony_ci
24798c2ecf20Sopenharmony_ciout_driver:
24808c2ecf20Sopenharmony_ci	vfio_pci_uninit_perm_bits();
24818c2ecf20Sopenharmony_ci	return ret;
24828c2ecf20Sopenharmony_ci}
24838c2ecf20Sopenharmony_ci
24848c2ecf20Sopenharmony_cimodule_init(vfio_pci_init);
24858c2ecf20Sopenharmony_cimodule_exit(vfio_pci_cleanup);
24868c2ecf20Sopenharmony_ci
24878c2ecf20Sopenharmony_ciMODULE_VERSION(DRIVER_VERSION);
24888c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
24898c2ecf20Sopenharmony_ciMODULE_AUTHOR(DRIVER_AUTHOR);
24908c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DRIVER_DESC);
2491