162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * VFIO core
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
662306a36Sopenharmony_ci *     Author: Alex Williamson <alex.williamson@redhat.com>
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Derived from original vfio:
962306a36Sopenharmony_ci * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
1062306a36Sopenharmony_ci * Author: Tom Lyon, pugs@cisco.com
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/cdev.h>
1462306a36Sopenharmony_ci#include <linux/compat.h>
1562306a36Sopenharmony_ci#include <linux/device.h>
1662306a36Sopenharmony_ci#include <linux/fs.h>
1762306a36Sopenharmony_ci#include <linux/idr.h>
1862306a36Sopenharmony_ci#include <linux/iommu.h>
1962306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM
2062306a36Sopenharmony_ci#include <linux/kvm_host.h>
2162306a36Sopenharmony_ci#endif
2262306a36Sopenharmony_ci#include <linux/list.h>
2362306a36Sopenharmony_ci#include <linux/miscdevice.h>
2462306a36Sopenharmony_ci#include <linux/module.h>
2562306a36Sopenharmony_ci#include <linux/mutex.h>
2662306a36Sopenharmony_ci#include <linux/pci.h>
2762306a36Sopenharmony_ci#include <linux/rwsem.h>
2862306a36Sopenharmony_ci#include <linux/sched.h>
2962306a36Sopenharmony_ci#include <linux/slab.h>
3062306a36Sopenharmony_ci#include <linux/stat.h>
3162306a36Sopenharmony_ci#include <linux/string.h>
3262306a36Sopenharmony_ci#include <linux/uaccess.h>
3362306a36Sopenharmony_ci#include <linux/vfio.h>
3462306a36Sopenharmony_ci#include <linux/wait.h>
3562306a36Sopenharmony_ci#include <linux/sched/signal.h>
3662306a36Sopenharmony_ci#include <linux/pm_runtime.h>
3762306a36Sopenharmony_ci#include <linux/interval_tree.h>
3862306a36Sopenharmony_ci#include <linux/iova_bitmap.h>
3962306a36Sopenharmony_ci#include <linux/iommufd.h>
4062306a36Sopenharmony_ci#include "vfio.h"
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#define DRIVER_VERSION	"0.3"
4362306a36Sopenharmony_ci#define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
4462306a36Sopenharmony_ci#define DRIVER_DESC	"VFIO - User Level meta-driver"
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic struct vfio {
4762306a36Sopenharmony_ci	struct class			*device_class;
4862306a36Sopenharmony_ci	struct ida			device_ida;
4962306a36Sopenharmony_ci} vfio;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci#ifdef CONFIG_VFIO_NOIOMMU
5262306a36Sopenharmony_cibool vfio_noiommu __read_mostly;
5362306a36Sopenharmony_cimodule_param_named(enable_unsafe_noiommu_mode,
5462306a36Sopenharmony_ci		   vfio_noiommu, bool, S_IRUGO | S_IWUSR);
5562306a36Sopenharmony_ciMODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
5662306a36Sopenharmony_ci#endif
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_cistatic DEFINE_XARRAY(vfio_device_set_xa);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ciint vfio_assign_device_set(struct vfio_device *device, void *set_id)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	unsigned long idx = (unsigned long)set_id;
6362306a36Sopenharmony_ci	struct vfio_device_set *new_dev_set;
6462306a36Sopenharmony_ci	struct vfio_device_set *dev_set;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	if (WARN_ON(!set_id))
6762306a36Sopenharmony_ci		return -EINVAL;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	/*
7062306a36Sopenharmony_ci	 * Atomically acquire a singleton object in the xarray for this set_id
7162306a36Sopenharmony_ci	 */
7262306a36Sopenharmony_ci	xa_lock(&vfio_device_set_xa);
7362306a36Sopenharmony_ci	dev_set = xa_load(&vfio_device_set_xa, idx);
7462306a36Sopenharmony_ci	if (dev_set)
7562306a36Sopenharmony_ci		goto found_get_ref;
7662306a36Sopenharmony_ci	xa_unlock(&vfio_device_set_xa);
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
7962306a36Sopenharmony_ci	if (!new_dev_set)
8062306a36Sopenharmony_ci		return -ENOMEM;
8162306a36Sopenharmony_ci	mutex_init(&new_dev_set->lock);
8262306a36Sopenharmony_ci	INIT_LIST_HEAD(&new_dev_set->device_list);
8362306a36Sopenharmony_ci	new_dev_set->set_id = set_id;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	xa_lock(&vfio_device_set_xa);
8662306a36Sopenharmony_ci	dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
8762306a36Sopenharmony_ci			       GFP_KERNEL);
8862306a36Sopenharmony_ci	if (!dev_set) {
8962306a36Sopenharmony_ci		dev_set = new_dev_set;
9062306a36Sopenharmony_ci		goto found_get_ref;
9162306a36Sopenharmony_ci	}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	kfree(new_dev_set);
9462306a36Sopenharmony_ci	if (xa_is_err(dev_set)) {
9562306a36Sopenharmony_ci		xa_unlock(&vfio_device_set_xa);
9662306a36Sopenharmony_ci		return xa_err(dev_set);
9762306a36Sopenharmony_ci	}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_cifound_get_ref:
10062306a36Sopenharmony_ci	dev_set->device_count++;
10162306a36Sopenharmony_ci	xa_unlock(&vfio_device_set_xa);
10262306a36Sopenharmony_ci	mutex_lock(&dev_set->lock);
10362306a36Sopenharmony_ci	device->dev_set = dev_set;
10462306a36Sopenharmony_ci	list_add_tail(&device->dev_set_list, &dev_set->device_list);
10562306a36Sopenharmony_ci	mutex_unlock(&dev_set->lock);
10662306a36Sopenharmony_ci	return 0;
10762306a36Sopenharmony_ci}
10862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_assign_device_set);
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_cistatic void vfio_release_device_set(struct vfio_device *device)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	struct vfio_device_set *dev_set = device->dev_set;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	if (!dev_set)
11562306a36Sopenharmony_ci		return;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	mutex_lock(&dev_set->lock);
11862306a36Sopenharmony_ci	list_del(&device->dev_set_list);
11962306a36Sopenharmony_ci	mutex_unlock(&dev_set->lock);
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	xa_lock(&vfio_device_set_xa);
12262306a36Sopenharmony_ci	if (!--dev_set->device_count) {
12362306a36Sopenharmony_ci		__xa_erase(&vfio_device_set_xa,
12462306a36Sopenharmony_ci			   (unsigned long)dev_set->set_id);
12562306a36Sopenharmony_ci		mutex_destroy(&dev_set->lock);
12662306a36Sopenharmony_ci		kfree(dev_set);
12762306a36Sopenharmony_ci	}
12862306a36Sopenharmony_ci	xa_unlock(&vfio_device_set_xa);
12962306a36Sopenharmony_ci}
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ciunsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
13262306a36Sopenharmony_ci{
13362306a36Sopenharmony_ci	struct vfio_device *cur;
13462306a36Sopenharmony_ci	unsigned int open_count = 0;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	lockdep_assert_held(&dev_set->lock);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
13962306a36Sopenharmony_ci		open_count += cur->open_count;
14062306a36Sopenharmony_ci	return open_count;
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_device_set_open_count);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_cistruct vfio_device *
14562306a36Sopenharmony_civfio_find_device_in_devset(struct vfio_device_set *dev_set,
14662306a36Sopenharmony_ci			   struct device *dev)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	struct vfio_device *cur;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	lockdep_assert_held(&dev_set->lock);
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
15362306a36Sopenharmony_ci		if (cur->dev == dev)
15462306a36Sopenharmony_ci			return cur;
15562306a36Sopenharmony_ci	return NULL;
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_find_device_in_devset);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci/*
16062306a36Sopenharmony_ci * Device objects - create, release, get, put, search
16162306a36Sopenharmony_ci */
16262306a36Sopenharmony_ci/* Device reference always implies a group reference */
16362306a36Sopenharmony_civoid vfio_device_put_registration(struct vfio_device *device)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	if (refcount_dec_and_test(&device->refcount))
16662306a36Sopenharmony_ci		complete(&device->comp);
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cibool vfio_device_try_get_registration(struct vfio_device *device)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	return refcount_inc_not_zero(&device->refcount);
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci/*
17562306a36Sopenharmony_ci * VFIO driver API
17662306a36Sopenharmony_ci */
17762306a36Sopenharmony_ci/* Release helper called by vfio_put_device() */
17862306a36Sopenharmony_cistatic void vfio_device_release(struct device *dev)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	struct vfio_device *device =
18162306a36Sopenharmony_ci			container_of(dev, struct vfio_device, device);
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	vfio_release_device_set(device);
18462306a36Sopenharmony_ci	ida_free(&vfio.device_ida, device->index);
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	if (device->ops->release)
18762306a36Sopenharmony_ci		device->ops->release(device);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	kvfree(device);
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_cistatic int vfio_init_device(struct vfio_device *device, struct device *dev,
19362306a36Sopenharmony_ci			    const struct vfio_device_ops *ops);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci/*
19662306a36Sopenharmony_ci * Allocate and initialize vfio_device so it can be registered to vfio
19762306a36Sopenharmony_ci * core.
19862306a36Sopenharmony_ci *
19962306a36Sopenharmony_ci * Drivers should use the wrapper vfio_alloc_device() for allocation.
20062306a36Sopenharmony_ci * @size is the size of the structure to be allocated, including any
20162306a36Sopenharmony_ci * private data used by the driver.
20262306a36Sopenharmony_ci *
20362306a36Sopenharmony_ci * Driver may provide an @init callback to cover device private data.
20462306a36Sopenharmony_ci *
20562306a36Sopenharmony_ci * Use vfio_put_device() to release the structure after success return.
20662306a36Sopenharmony_ci */
20762306a36Sopenharmony_cistruct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
20862306a36Sopenharmony_ci				       const struct vfio_device_ops *ops)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	struct vfio_device *device;
21162306a36Sopenharmony_ci	int ret;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	if (WARN_ON(size < sizeof(struct vfio_device)))
21462306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	device = kvzalloc(size, GFP_KERNEL);
21762306a36Sopenharmony_ci	if (!device)
21862306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	ret = vfio_init_device(device, dev, ops);
22162306a36Sopenharmony_ci	if (ret)
22262306a36Sopenharmony_ci		goto out_free;
22362306a36Sopenharmony_ci	return device;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ciout_free:
22662306a36Sopenharmony_ci	kvfree(device);
22762306a36Sopenharmony_ci	return ERR_PTR(ret);
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(_vfio_alloc_device);
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci/*
23262306a36Sopenharmony_ci * Initialize a vfio_device so it can be registered to vfio core.
23362306a36Sopenharmony_ci */
23462306a36Sopenharmony_cistatic int vfio_init_device(struct vfio_device *device, struct device *dev,
23562306a36Sopenharmony_ci			    const struct vfio_device_ops *ops)
23662306a36Sopenharmony_ci{
23762306a36Sopenharmony_ci	int ret;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
24062306a36Sopenharmony_ci	if (ret < 0) {
24162306a36Sopenharmony_ci		dev_dbg(dev, "Error to alloc index\n");
24262306a36Sopenharmony_ci		return ret;
24362306a36Sopenharmony_ci	}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	device->index = ret;
24662306a36Sopenharmony_ci	init_completion(&device->comp);
24762306a36Sopenharmony_ci	device->dev = dev;
24862306a36Sopenharmony_ci	device->ops = ops;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	if (ops->init) {
25162306a36Sopenharmony_ci		ret = ops->init(device);
25262306a36Sopenharmony_ci		if (ret)
25362306a36Sopenharmony_ci			goto out_uninit;
25462306a36Sopenharmony_ci	}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	device_initialize(&device->device);
25762306a36Sopenharmony_ci	device->device.release = vfio_device_release;
25862306a36Sopenharmony_ci	device->device.class = vfio.device_class;
25962306a36Sopenharmony_ci	device->device.parent = device->dev;
26062306a36Sopenharmony_ci	return 0;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ciout_uninit:
26362306a36Sopenharmony_ci	vfio_release_device_set(device);
26462306a36Sopenharmony_ci	ida_free(&vfio.device_ida, device->index);
26562306a36Sopenharmony_ci	return ret;
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_cistatic int __vfio_register_dev(struct vfio_device *device,
26962306a36Sopenharmony_ci			       enum vfio_group_type type)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	int ret;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) &&
27462306a36Sopenharmony_ci		    (!device->ops->bind_iommufd ||
27562306a36Sopenharmony_ci		     !device->ops->unbind_iommufd ||
27662306a36Sopenharmony_ci		     !device->ops->attach_ioas ||
27762306a36Sopenharmony_ci		     !device->ops->detach_ioas)))
27862306a36Sopenharmony_ci		return -EINVAL;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	/*
28162306a36Sopenharmony_ci	 * If the driver doesn't specify a set then the device is added to a
28262306a36Sopenharmony_ci	 * singleton set just for itself.
28362306a36Sopenharmony_ci	 */
28462306a36Sopenharmony_ci	if (!device->dev_set)
28562306a36Sopenharmony_ci		vfio_assign_device_set(device, device);
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	ret = dev_set_name(&device->device, "vfio%d", device->index);
28862306a36Sopenharmony_ci	if (ret)
28962306a36Sopenharmony_ci		return ret;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	ret = vfio_device_set_group(device, type);
29262306a36Sopenharmony_ci	if (ret)
29362306a36Sopenharmony_ci		return ret;
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	/*
29662306a36Sopenharmony_ci	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
29762306a36Sopenharmony_ci	 * restore cache coherency. It has to be checked here because it is only
29862306a36Sopenharmony_ci	 * valid for cases where we are using iommu groups.
29962306a36Sopenharmony_ci	 */
30062306a36Sopenharmony_ci	if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
30162306a36Sopenharmony_ci	    !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
30262306a36Sopenharmony_ci		ret = -EINVAL;
30362306a36Sopenharmony_ci		goto err_out;
30462306a36Sopenharmony_ci	}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	ret = vfio_device_add(device);
30762306a36Sopenharmony_ci	if (ret)
30862306a36Sopenharmony_ci		goto err_out;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	/* Refcounting can't start until the driver calls register */
31162306a36Sopenharmony_ci	refcount_set(&device->refcount, 1);
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	vfio_device_group_register(device);
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	return 0;
31662306a36Sopenharmony_cierr_out:
31762306a36Sopenharmony_ci	vfio_device_remove_group(device);
31862306a36Sopenharmony_ci	return ret;
31962306a36Sopenharmony_ci}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ciint vfio_register_group_dev(struct vfio_device *device)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci	return __vfio_register_dev(device, VFIO_IOMMU);
32462306a36Sopenharmony_ci}
32562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_register_group_dev);
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci/*
32862306a36Sopenharmony_ci * Register a virtual device without IOMMU backing.  The user of this
32962306a36Sopenharmony_ci * device must not be able to directly trigger unmediated DMA.
33062306a36Sopenharmony_ci */
33162306a36Sopenharmony_ciint vfio_register_emulated_iommu_dev(struct vfio_device *device)
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
33462306a36Sopenharmony_ci}
33562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci/*
33862306a36Sopenharmony_ci * Decrement the device reference count and wait for the device to be
33962306a36Sopenharmony_ci * removed.  Open file descriptors for the device... */
34062306a36Sopenharmony_civoid vfio_unregister_group_dev(struct vfio_device *device)
34162306a36Sopenharmony_ci{
34262306a36Sopenharmony_ci	unsigned int i = 0;
34362306a36Sopenharmony_ci	bool interrupted = false;
34462306a36Sopenharmony_ci	long rc;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * Prevent new device opened by userspace via the
34862306a36Sopenharmony_ci	 * VFIO_GROUP_GET_DEVICE_FD in the group path.
34962306a36Sopenharmony_ci	 */
35062306a36Sopenharmony_ci	vfio_device_group_unregister(device);
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	/*
35362306a36Sopenharmony_ci	 * Balances vfio_device_add() in register path, also prevents
35462306a36Sopenharmony_ci	 * new device opened by userspace in the cdev path.
35562306a36Sopenharmony_ci	 */
35662306a36Sopenharmony_ci	vfio_device_del(device);
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	vfio_device_put_registration(device);
35962306a36Sopenharmony_ci	rc = try_wait_for_completion(&device->comp);
36062306a36Sopenharmony_ci	while (rc <= 0) {
36162306a36Sopenharmony_ci		if (device->ops->request)
36262306a36Sopenharmony_ci			device->ops->request(device, i++);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci		if (interrupted) {
36562306a36Sopenharmony_ci			rc = wait_for_completion_timeout(&device->comp,
36662306a36Sopenharmony_ci							 HZ * 10);
36762306a36Sopenharmony_ci		} else {
36862306a36Sopenharmony_ci			rc = wait_for_completion_interruptible_timeout(
36962306a36Sopenharmony_ci				&device->comp, HZ * 10);
37062306a36Sopenharmony_ci			if (rc < 0) {
37162306a36Sopenharmony_ci				interrupted = true;
37262306a36Sopenharmony_ci				dev_warn(device->dev,
37362306a36Sopenharmony_ci					 "Device is currently in use, task"
37462306a36Sopenharmony_ci					 " \"%s\" (%d) "
37562306a36Sopenharmony_ci					 "blocked until device is released",
37662306a36Sopenharmony_ci					 current->comm, task_pid_nr(current));
37762306a36Sopenharmony_ci			}
37862306a36Sopenharmony_ci		}
37962306a36Sopenharmony_ci	}
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	/* Balances vfio_device_set_group in register path */
38262306a36Sopenharmony_ci	vfio_device_remove_group(device);
38362306a36Sopenharmony_ci}
38462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM
38762306a36Sopenharmony_civoid vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
38862306a36Sopenharmony_ci{
38962306a36Sopenharmony_ci	void (*pfn)(struct kvm *kvm);
39062306a36Sopenharmony_ci	bool (*fn)(struct kvm *kvm);
39162306a36Sopenharmony_ci	bool ret;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	lockdep_assert_held(&device->dev_set->lock);
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	if (!kvm)
39662306a36Sopenharmony_ci		return;
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	pfn = symbol_get(kvm_put_kvm);
39962306a36Sopenharmony_ci	if (WARN_ON(!pfn))
40062306a36Sopenharmony_ci		return;
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	fn = symbol_get(kvm_get_kvm_safe);
40362306a36Sopenharmony_ci	if (WARN_ON(!fn)) {
40462306a36Sopenharmony_ci		symbol_put(kvm_put_kvm);
40562306a36Sopenharmony_ci		return;
40662306a36Sopenharmony_ci	}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	ret = fn(kvm);
40962306a36Sopenharmony_ci	symbol_put(kvm_get_kvm_safe);
41062306a36Sopenharmony_ci	if (!ret) {
41162306a36Sopenharmony_ci		symbol_put(kvm_put_kvm);
41262306a36Sopenharmony_ci		return;
41362306a36Sopenharmony_ci	}
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	device->put_kvm = pfn;
41662306a36Sopenharmony_ci	device->kvm = kvm;
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_civoid vfio_device_put_kvm(struct vfio_device *device)
42062306a36Sopenharmony_ci{
42162306a36Sopenharmony_ci	lockdep_assert_held(&device->dev_set->lock);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	if (!device->kvm)
42462306a36Sopenharmony_ci		return;
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	if (WARN_ON(!device->put_kvm))
42762306a36Sopenharmony_ci		goto clear;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	device->put_kvm(device->kvm);
43062306a36Sopenharmony_ci	device->put_kvm = NULL;
43162306a36Sopenharmony_ci	symbol_put(kvm_put_kvm);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ciclear:
43462306a36Sopenharmony_ci	device->kvm = NULL;
43562306a36Sopenharmony_ci}
43662306a36Sopenharmony_ci#endif
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci/* true if the vfio_device has open_device() called but not close_device() */
43962306a36Sopenharmony_cistatic bool vfio_assert_device_open(struct vfio_device *device)
44062306a36Sopenharmony_ci{
44162306a36Sopenharmony_ci	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
44262306a36Sopenharmony_ci}
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_cistruct vfio_device_file *
44562306a36Sopenharmony_civfio_allocate_device_file(struct vfio_device *device)
44662306a36Sopenharmony_ci{
44762306a36Sopenharmony_ci	struct vfio_device_file *df;
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT);
45062306a36Sopenharmony_ci	if (!df)
45162306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	df->device = device;
45462306a36Sopenharmony_ci	spin_lock_init(&df->kvm_ref_lock);
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	return df;
45762306a36Sopenharmony_ci}
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_cistatic int vfio_df_device_first_open(struct vfio_device_file *df)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	struct vfio_device *device = df->device;
46262306a36Sopenharmony_ci	struct iommufd_ctx *iommufd = df->iommufd;
46362306a36Sopenharmony_ci	int ret;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	lockdep_assert_held(&device->dev_set->lock);
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if (!try_module_get(device->dev->driver->owner))
46862306a36Sopenharmony_ci		return -ENODEV;
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci	if (iommufd)
47162306a36Sopenharmony_ci		ret = vfio_df_iommufd_bind(df);
47262306a36Sopenharmony_ci	else
47362306a36Sopenharmony_ci		ret = vfio_device_group_use_iommu(device);
47462306a36Sopenharmony_ci	if (ret)
47562306a36Sopenharmony_ci		goto err_module_put;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	if (device->ops->open_device) {
47862306a36Sopenharmony_ci		ret = device->ops->open_device(device);
47962306a36Sopenharmony_ci		if (ret)
48062306a36Sopenharmony_ci			goto err_unuse_iommu;
48162306a36Sopenharmony_ci	}
48262306a36Sopenharmony_ci	return 0;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_cierr_unuse_iommu:
48562306a36Sopenharmony_ci	if (iommufd)
48662306a36Sopenharmony_ci		vfio_df_iommufd_unbind(df);
48762306a36Sopenharmony_ci	else
48862306a36Sopenharmony_ci		vfio_device_group_unuse_iommu(device);
48962306a36Sopenharmony_cierr_module_put:
49062306a36Sopenharmony_ci	module_put(device->dev->driver->owner);
49162306a36Sopenharmony_ci	return ret;
49262306a36Sopenharmony_ci}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_cistatic void vfio_df_device_last_close(struct vfio_device_file *df)
49562306a36Sopenharmony_ci{
49662306a36Sopenharmony_ci	struct vfio_device *device = df->device;
49762306a36Sopenharmony_ci	struct iommufd_ctx *iommufd = df->iommufd;
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	lockdep_assert_held(&device->dev_set->lock);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	if (device->ops->close_device)
50262306a36Sopenharmony_ci		device->ops->close_device(device);
50362306a36Sopenharmony_ci	if (iommufd)
50462306a36Sopenharmony_ci		vfio_df_iommufd_unbind(df);
50562306a36Sopenharmony_ci	else
50662306a36Sopenharmony_ci		vfio_device_group_unuse_iommu(device);
50762306a36Sopenharmony_ci	module_put(device->dev->driver->owner);
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ciint vfio_df_open(struct vfio_device_file *df)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	struct vfio_device *device = df->device;
51362306a36Sopenharmony_ci	int ret = 0;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	lockdep_assert_held(&device->dev_set->lock);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	/*
51862306a36Sopenharmony_ci	 * Only the group path allows the device to be opened multiple
51962306a36Sopenharmony_ci	 * times.  The device cdev path doesn't have a secure way for it.
52062306a36Sopenharmony_ci	 */
52162306a36Sopenharmony_ci	if (device->open_count != 0 && !df->group)
52262306a36Sopenharmony_ci		return -EINVAL;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	device->open_count++;
52562306a36Sopenharmony_ci	if (device->open_count == 1) {
52662306a36Sopenharmony_ci		ret = vfio_df_device_first_open(df);
52762306a36Sopenharmony_ci		if (ret)
52862306a36Sopenharmony_ci			device->open_count--;
52962306a36Sopenharmony_ci	}
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	return ret;
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_civoid vfio_df_close(struct vfio_device_file *df)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	struct vfio_device *device = df->device;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	lockdep_assert_held(&device->dev_set->lock);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	vfio_assert_device_open(device);
54162306a36Sopenharmony_ci	if (device->open_count == 1)
54262306a36Sopenharmony_ci		vfio_df_device_last_close(df);
54362306a36Sopenharmony_ci	device->open_count--;
54462306a36Sopenharmony_ci}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci/*
54762306a36Sopenharmony_ci * Wrapper around pm_runtime_resume_and_get().
54862306a36Sopenharmony_ci * Return error code on failure or 0 on success.
54962306a36Sopenharmony_ci */
55062306a36Sopenharmony_cistatic inline int vfio_device_pm_runtime_get(struct vfio_device *device)
55162306a36Sopenharmony_ci{
55262306a36Sopenharmony_ci	struct device *dev = device->dev;
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	if (dev->driver && dev->driver->pm) {
55562306a36Sopenharmony_ci		int ret;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci		ret = pm_runtime_resume_and_get(dev);
55862306a36Sopenharmony_ci		if (ret) {
55962306a36Sopenharmony_ci			dev_info_ratelimited(dev,
56062306a36Sopenharmony_ci				"vfio: runtime resume failed %d\n", ret);
56162306a36Sopenharmony_ci			return -EIO;
56262306a36Sopenharmony_ci		}
56362306a36Sopenharmony_ci	}
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	return 0;
56662306a36Sopenharmony_ci}
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci/*
56962306a36Sopenharmony_ci * Wrapper around pm_runtime_put().
57062306a36Sopenharmony_ci */
57162306a36Sopenharmony_cistatic inline void vfio_device_pm_runtime_put(struct vfio_device *device)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	struct device *dev = device->dev;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	if (dev->driver && dev->driver->pm)
57662306a36Sopenharmony_ci		pm_runtime_put(dev);
57762306a36Sopenharmony_ci}
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci/*
58062306a36Sopenharmony_ci * VFIO Device fd
58162306a36Sopenharmony_ci */
58262306a36Sopenharmony_cistatic int vfio_device_fops_release(struct inode *inode, struct file *filep)
58362306a36Sopenharmony_ci{
58462306a36Sopenharmony_ci	struct vfio_device_file *df = filep->private_data;
58562306a36Sopenharmony_ci	struct vfio_device *device = df->device;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	if (df->group)
58862306a36Sopenharmony_ci		vfio_df_group_close(df);
58962306a36Sopenharmony_ci	else
59062306a36Sopenharmony_ci		vfio_df_unbind_iommufd(df);
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci	vfio_device_put_registration(device);
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	kfree(df);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	return 0;
59762306a36Sopenharmony_ci}
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci/*
60062306a36Sopenharmony_ci * vfio_mig_get_next_state - Compute the next step in the FSM
60162306a36Sopenharmony_ci * @cur_fsm - The current state the device is in
60262306a36Sopenharmony_ci * @new_fsm - The target state to reach
60362306a36Sopenharmony_ci * @next_fsm - Pointer to the next step to get to new_fsm
60462306a36Sopenharmony_ci *
60562306a36Sopenharmony_ci * Return 0 upon success, otherwise -errno
60662306a36Sopenharmony_ci * Upon success the next step in the state progression between cur_fsm and
60762306a36Sopenharmony_ci * new_fsm will be set in next_fsm.
60862306a36Sopenharmony_ci *
60962306a36Sopenharmony_ci * This breaks down requests for combination transitions into smaller steps and
61062306a36Sopenharmony_ci * returns the next step to get to new_fsm. The function may need to be called
61162306a36Sopenharmony_ci * multiple times before reaching new_fsm.
61262306a36Sopenharmony_ci *
61362306a36Sopenharmony_ci */
61462306a36Sopenharmony_ciint vfio_mig_get_next_state(struct vfio_device *device,
61562306a36Sopenharmony_ci			    enum vfio_device_mig_state cur_fsm,
61662306a36Sopenharmony_ci			    enum vfio_device_mig_state new_fsm,
61762306a36Sopenharmony_ci			    enum vfio_device_mig_state *next_fsm)
61862306a36Sopenharmony_ci{
61962306a36Sopenharmony_ci	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 };
62062306a36Sopenharmony_ci	/*
62162306a36Sopenharmony_ci	 * The coding in this table requires the driver to implement the
62262306a36Sopenharmony_ci	 * following FSM arcs:
62362306a36Sopenharmony_ci	 *         RESUMING -> STOP
62462306a36Sopenharmony_ci	 *         STOP -> RESUMING
62562306a36Sopenharmony_ci	 *         STOP -> STOP_COPY
62662306a36Sopenharmony_ci	 *         STOP_COPY -> STOP
62762306a36Sopenharmony_ci	 *
62862306a36Sopenharmony_ci	 * If P2P is supported then the driver must also implement these FSM
62962306a36Sopenharmony_ci	 * arcs:
63062306a36Sopenharmony_ci	 *         RUNNING -> RUNNING_P2P
63162306a36Sopenharmony_ci	 *         RUNNING_P2P -> RUNNING
63262306a36Sopenharmony_ci	 *         RUNNING_P2P -> STOP
63362306a36Sopenharmony_ci	 *         STOP -> RUNNING_P2P
63462306a36Sopenharmony_ci	 *
63562306a36Sopenharmony_ci	 * If precopy is supported then the driver must support these additional
63662306a36Sopenharmony_ci	 * FSM arcs:
63762306a36Sopenharmony_ci	 *         RUNNING -> PRE_COPY
63862306a36Sopenharmony_ci	 *         PRE_COPY -> RUNNING
63962306a36Sopenharmony_ci	 *         PRE_COPY -> STOP_COPY
64062306a36Sopenharmony_ci	 * However, if precopy and P2P are supported together then the driver
64162306a36Sopenharmony_ci	 * must support these additional arcs beyond the P2P arcs above:
64262306a36Sopenharmony_ci	 *         PRE_COPY -> RUNNING
64362306a36Sopenharmony_ci	 *         PRE_COPY -> PRE_COPY_P2P
64462306a36Sopenharmony_ci	 *         PRE_COPY_P2P -> PRE_COPY
64562306a36Sopenharmony_ci	 *         PRE_COPY_P2P -> RUNNING_P2P
64662306a36Sopenharmony_ci	 *         PRE_COPY_P2P -> STOP_COPY
64762306a36Sopenharmony_ci	 *         RUNNING -> PRE_COPY
64862306a36Sopenharmony_ci	 *         RUNNING_P2P -> PRE_COPY_P2P
64962306a36Sopenharmony_ci	 *
65062306a36Sopenharmony_ci	 * Without P2P and precopy the driver must implement:
65162306a36Sopenharmony_ci	 *         RUNNING -> STOP
65262306a36Sopenharmony_ci	 *         STOP -> RUNNING
65362306a36Sopenharmony_ci	 *
65462306a36Sopenharmony_ci	 * The coding will step through multiple states for some combination
65562306a36Sopenharmony_ci	 * transitions; if all optional features are supported, this means the
65662306a36Sopenharmony_ci	 * following ones:
65762306a36Sopenharmony_ci	 *         PRE_COPY -> PRE_COPY_P2P -> STOP_COPY
65862306a36Sopenharmony_ci	 *         PRE_COPY -> RUNNING -> RUNNING_P2P
65962306a36Sopenharmony_ci	 *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP
66062306a36Sopenharmony_ci	 *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING
66162306a36Sopenharmony_ci	 *         PRE_COPY_P2P -> RUNNING_P2P -> RUNNING
66262306a36Sopenharmony_ci	 *         PRE_COPY_P2P -> RUNNING_P2P -> STOP
66362306a36Sopenharmony_ci	 *         PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING
66462306a36Sopenharmony_ci	 *         RESUMING -> STOP -> RUNNING_P2P
66562306a36Sopenharmony_ci	 *         RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P
66662306a36Sopenharmony_ci	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
66762306a36Sopenharmony_ci	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
66862306a36Sopenharmony_ci	 *         RESUMING -> STOP -> STOP_COPY
66962306a36Sopenharmony_ci	 *         RUNNING -> RUNNING_P2P -> PRE_COPY_P2P
67062306a36Sopenharmony_ci	 *         RUNNING -> RUNNING_P2P -> STOP
67162306a36Sopenharmony_ci	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
67262306a36Sopenharmony_ci	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
67362306a36Sopenharmony_ci	 *         RUNNING_P2P -> RUNNING -> PRE_COPY
67462306a36Sopenharmony_ci	 *         RUNNING_P2P -> STOP -> RESUMING
67562306a36Sopenharmony_ci	 *         RUNNING_P2P -> STOP -> STOP_COPY
67662306a36Sopenharmony_ci	 *         STOP -> RUNNING_P2P -> PRE_COPY_P2P
67762306a36Sopenharmony_ci	 *         STOP -> RUNNING_P2P -> RUNNING
67862306a36Sopenharmony_ci	 *         STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
67962306a36Sopenharmony_ci	 *         STOP_COPY -> STOP -> RESUMING
68062306a36Sopenharmony_ci	 *         STOP_COPY -> STOP -> RUNNING_P2P
68162306a36Sopenharmony_ci	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
68262306a36Sopenharmony_ci	 *
68362306a36Sopenharmony_ci	 *  The following transitions are blocked:
68462306a36Sopenharmony_ci	 *         STOP_COPY -> PRE_COPY
68562306a36Sopenharmony_ci	 *         STOP_COPY -> PRE_COPY_P2P
68662306a36Sopenharmony_ci	 */
68762306a36Sopenharmony_ci	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
68862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_STOP] = {
68962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
69062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
69162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
69262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
69362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
69462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
69562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
69662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
69762306a36Sopenharmony_ci		},
69862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_RUNNING] = {
69962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
70062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
70162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
70262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
70362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
70462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
70562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
70662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
70762306a36Sopenharmony_ci		},
70862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_PRE_COPY] = {
70962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING,
71062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
71162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
71262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
71362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
71462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING,
71562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING,
71662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
71762306a36Sopenharmony_ci		},
71862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_PRE_COPY_P2P] = {
71962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
72062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
72162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
72262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
72362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
72462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
72562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
72662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
72762306a36Sopenharmony_ci		},
72862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_STOP_COPY] = {
72962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
73062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
73162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
73262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
73362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
73462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
73562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
73662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
73762306a36Sopenharmony_ci		},
73862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_RESUMING] = {
73962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
74062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
74162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP,
74262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP,
74362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
74462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
74562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
74662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
74762306a36Sopenharmony_ci		},
74862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
74962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
75062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
75162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING,
75262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
75362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
75462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
75562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
75662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
75762306a36Sopenharmony_ci		},
75862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_ERROR] = {
75962306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
76062306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
76162306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
76262306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
76362306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
76462306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
76562306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
76662306a36Sopenharmony_ci			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
76762306a36Sopenharmony_ci		},
76862306a36Sopenharmony_ci	};
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
77162306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
77262306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
77362306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_PRE_COPY] =
77462306a36Sopenharmony_ci			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY,
77562306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY |
77662306a36Sopenharmony_ci						   VFIO_MIGRATION_P2P |
77762306a36Sopenharmony_ci						   VFIO_MIGRATION_PRE_COPY,
77862306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
77962306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
78062306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_RUNNING_P2P] =
78162306a36Sopenharmony_ci			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
78262306a36Sopenharmony_ci		[VFIO_DEVICE_STATE_ERROR] = ~0U,
78362306a36Sopenharmony_ci	};
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
78662306a36Sopenharmony_ci		    (state_flags_table[cur_fsm] & device->migration_flags) !=
78762306a36Sopenharmony_ci			state_flags_table[cur_fsm]))
78862306a36Sopenharmony_ci		return -EINVAL;
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
79162306a36Sopenharmony_ci	   (state_flags_table[new_fsm] & device->migration_flags) !=
79262306a36Sopenharmony_ci			state_flags_table[new_fsm])
79362306a36Sopenharmony_ci		return -EINVAL;
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci	/*
79662306a36Sopenharmony_ci	 * Arcs touching optional and unsupported states are skipped over. The
79762306a36Sopenharmony_ci	 * driver will instead see an arc from the original state to the next
79862306a36Sopenharmony_ci	 * logical state, as per the above comment.
79962306a36Sopenharmony_ci	 */
80062306a36Sopenharmony_ci	*next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
80162306a36Sopenharmony_ci	while ((state_flags_table[*next_fsm] & device->migration_flags) !=
80262306a36Sopenharmony_ci			state_flags_table[*next_fsm])
80362306a36Sopenharmony_ci		*next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
80662306a36Sopenharmony_ci}
80762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci/*
81062306a36Sopenharmony_ci * Convert the drivers's struct file into a FD number and return it to userspace
81162306a36Sopenharmony_ci */
81262306a36Sopenharmony_cistatic int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
81362306a36Sopenharmony_ci				   struct vfio_device_feature_mig_state *mig)
81462306a36Sopenharmony_ci{
81562306a36Sopenharmony_ci	int ret;
81662306a36Sopenharmony_ci	int fd;
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	fd = get_unused_fd_flags(O_CLOEXEC);
81962306a36Sopenharmony_ci	if (fd < 0) {
82062306a36Sopenharmony_ci		ret = fd;
82162306a36Sopenharmony_ci		goto out_fput;
82262306a36Sopenharmony_ci	}
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	mig->data_fd = fd;
82562306a36Sopenharmony_ci	if (copy_to_user(arg, mig, sizeof(*mig))) {
82662306a36Sopenharmony_ci		ret = -EFAULT;
82762306a36Sopenharmony_ci		goto out_put_unused;
82862306a36Sopenharmony_ci	}
82962306a36Sopenharmony_ci	fd_install(fd, filp);
83062306a36Sopenharmony_ci	return 0;
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ciout_put_unused:
83362306a36Sopenharmony_ci	put_unused_fd(fd);
83462306a36Sopenharmony_ciout_fput:
83562306a36Sopenharmony_ci	fput(filp);
83662306a36Sopenharmony_ci	return ret;
83762306a36Sopenharmony_ci}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_cistatic int
84062306a36Sopenharmony_civfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
84162306a36Sopenharmony_ci					   u32 flags, void __user *arg,
84262306a36Sopenharmony_ci					   size_t argsz)
84362306a36Sopenharmony_ci{
84462306a36Sopenharmony_ci	size_t minsz =
84562306a36Sopenharmony_ci		offsetofend(struct vfio_device_feature_mig_state, data_fd);
84662306a36Sopenharmony_ci	struct vfio_device_feature_mig_state mig;
84762306a36Sopenharmony_ci	struct file *filp = NULL;
84862306a36Sopenharmony_ci	int ret;
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci	if (!device->mig_ops)
85162306a36Sopenharmony_ci		return -ENOTTY;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	ret = vfio_check_feature(flags, argsz,
85462306a36Sopenharmony_ci				 VFIO_DEVICE_FEATURE_SET |
85562306a36Sopenharmony_ci				 VFIO_DEVICE_FEATURE_GET,
85662306a36Sopenharmony_ci				 sizeof(mig));
85762306a36Sopenharmony_ci	if (ret != 1)
85862306a36Sopenharmony_ci		return ret;
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	if (copy_from_user(&mig, arg, minsz))
86162306a36Sopenharmony_ci		return -EFAULT;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	if (flags & VFIO_DEVICE_FEATURE_GET) {
86462306a36Sopenharmony_ci		enum vfio_device_mig_state curr_state;
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci		ret = device->mig_ops->migration_get_state(device,
86762306a36Sopenharmony_ci							   &curr_state);
86862306a36Sopenharmony_ci		if (ret)
86962306a36Sopenharmony_ci			return ret;
87062306a36Sopenharmony_ci		mig.device_state = curr_state;
87162306a36Sopenharmony_ci		goto out_copy;
87262306a36Sopenharmony_ci	}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	/* Handle the VFIO_DEVICE_FEATURE_SET */
87562306a36Sopenharmony_ci	filp = device->mig_ops->migration_set_state(device, mig.device_state);
87662306a36Sopenharmony_ci	if (IS_ERR(filp) || !filp)
87762306a36Sopenharmony_ci		goto out_copy;
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	return vfio_ioct_mig_return_fd(filp, arg, &mig);
88062306a36Sopenharmony_ciout_copy:
88162306a36Sopenharmony_ci	mig.data_fd = -1;
88262306a36Sopenharmony_ci	if (copy_to_user(arg, &mig, sizeof(mig)))
88362306a36Sopenharmony_ci		return -EFAULT;
88462306a36Sopenharmony_ci	if (IS_ERR(filp))
88562306a36Sopenharmony_ci		return PTR_ERR(filp);
88662306a36Sopenharmony_ci	return 0;
88762306a36Sopenharmony_ci}
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_cistatic int
89062306a36Sopenharmony_civfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
89162306a36Sopenharmony_ci					      u32 flags, void __user *arg,
89262306a36Sopenharmony_ci					      size_t argsz)
89362306a36Sopenharmony_ci{
89462306a36Sopenharmony_ci	struct vfio_device_feature_mig_data_size data_size = {};
89562306a36Sopenharmony_ci	unsigned long stop_copy_length;
89662306a36Sopenharmony_ci	int ret;
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci	if (!device->mig_ops)
89962306a36Sopenharmony_ci		return -ENOTTY;
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
90262306a36Sopenharmony_ci				 sizeof(data_size));
90362306a36Sopenharmony_ci	if (ret != 1)
90462306a36Sopenharmony_ci		return ret;
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length);
90762306a36Sopenharmony_ci	if (ret)
90862306a36Sopenharmony_ci		return ret;
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci	data_size.stop_copy_length = stop_copy_length;
91162306a36Sopenharmony_ci	if (copy_to_user(arg, &data_size, sizeof(data_size)))
91262306a36Sopenharmony_ci		return -EFAULT;
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	return 0;
91562306a36Sopenharmony_ci}
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_cistatic int vfio_ioctl_device_feature_migration(struct vfio_device *device,
91862306a36Sopenharmony_ci					       u32 flags, void __user *arg,
91962306a36Sopenharmony_ci					       size_t argsz)
92062306a36Sopenharmony_ci{
92162306a36Sopenharmony_ci	struct vfio_device_feature_migration mig = {
92262306a36Sopenharmony_ci		.flags = device->migration_flags,
92362306a36Sopenharmony_ci	};
92462306a36Sopenharmony_ci	int ret;
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	if (!device->mig_ops)
92762306a36Sopenharmony_ci		return -ENOTTY;
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
93062306a36Sopenharmony_ci				 sizeof(mig));
93162306a36Sopenharmony_ci	if (ret != 1)
93262306a36Sopenharmony_ci		return ret;
93362306a36Sopenharmony_ci	if (copy_to_user(arg, &mig, sizeof(mig)))
93462306a36Sopenharmony_ci		return -EFAULT;
93562306a36Sopenharmony_ci	return 0;
93662306a36Sopenharmony_ci}
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_civoid vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
93962306a36Sopenharmony_ci			      u32 req_nodes)
94062306a36Sopenharmony_ci{
94162306a36Sopenharmony_ci	struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
94262306a36Sopenharmony_ci	unsigned long min_gap, curr_gap;
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci	/* Special shortcut when a single range is required */
94562306a36Sopenharmony_ci	if (req_nodes == 1) {
94662306a36Sopenharmony_ci		unsigned long last;
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_ci		comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
94962306a36Sopenharmony_ci		curr = comb_start;
95062306a36Sopenharmony_ci		while (curr) {
95162306a36Sopenharmony_ci			last = curr->last;
95262306a36Sopenharmony_ci			prev = curr;
95362306a36Sopenharmony_ci			curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
95462306a36Sopenharmony_ci			if (prev != comb_start)
95562306a36Sopenharmony_ci				interval_tree_remove(prev, root);
95662306a36Sopenharmony_ci		}
95762306a36Sopenharmony_ci		comb_start->last = last;
95862306a36Sopenharmony_ci		return;
95962306a36Sopenharmony_ci	}
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	/* Combine ranges which have the smallest gap */
96262306a36Sopenharmony_ci	while (cur_nodes > req_nodes) {
96362306a36Sopenharmony_ci		prev = NULL;
96462306a36Sopenharmony_ci		min_gap = ULONG_MAX;
96562306a36Sopenharmony_ci		curr = interval_tree_iter_first(root, 0, ULONG_MAX);
96662306a36Sopenharmony_ci		while (curr) {
96762306a36Sopenharmony_ci			if (prev) {
96862306a36Sopenharmony_ci				curr_gap = curr->start - prev->last;
96962306a36Sopenharmony_ci				if (curr_gap < min_gap) {
97062306a36Sopenharmony_ci					min_gap = curr_gap;
97162306a36Sopenharmony_ci					comb_start = prev;
97262306a36Sopenharmony_ci					comb_end = curr;
97362306a36Sopenharmony_ci				}
97462306a36Sopenharmony_ci			}
97562306a36Sopenharmony_ci			prev = curr;
97662306a36Sopenharmony_ci			curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
97762306a36Sopenharmony_ci		}
97862306a36Sopenharmony_ci		comb_start->last = comb_end->last;
97962306a36Sopenharmony_ci		interval_tree_remove(comb_end, root);
98062306a36Sopenharmony_ci		cur_nodes--;
98162306a36Sopenharmony_ci	}
98262306a36Sopenharmony_ci}
98362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_combine_iova_ranges);
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci/* Ranges should fit into a single kernel page */
98662306a36Sopenharmony_ci#define LOG_MAX_RANGES \
98762306a36Sopenharmony_ci	(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_cistatic int
99062306a36Sopenharmony_civfio_ioctl_device_feature_logging_start(struct vfio_device *device,
99162306a36Sopenharmony_ci					u32 flags, void __user *arg,
99262306a36Sopenharmony_ci					size_t argsz)
99362306a36Sopenharmony_ci{
99462306a36Sopenharmony_ci	size_t minsz =
99562306a36Sopenharmony_ci		offsetofend(struct vfio_device_feature_dma_logging_control,
99662306a36Sopenharmony_ci			    ranges);
99762306a36Sopenharmony_ci	struct vfio_device_feature_dma_logging_range __user *ranges;
99862306a36Sopenharmony_ci	struct vfio_device_feature_dma_logging_control control;
99962306a36Sopenharmony_ci	struct vfio_device_feature_dma_logging_range range;
100062306a36Sopenharmony_ci	struct rb_root_cached root = RB_ROOT_CACHED;
100162306a36Sopenharmony_ci	struct interval_tree_node *nodes;
100262306a36Sopenharmony_ci	u64 iova_end;
100362306a36Sopenharmony_ci	u32 nnodes;
100462306a36Sopenharmony_ci	int i, ret;
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci	if (!device->log_ops)
100762306a36Sopenharmony_ci		return -ENOTTY;
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	ret = vfio_check_feature(flags, argsz,
101062306a36Sopenharmony_ci				 VFIO_DEVICE_FEATURE_SET,
101162306a36Sopenharmony_ci				 sizeof(control));
101262306a36Sopenharmony_ci	if (ret != 1)
101362306a36Sopenharmony_ci		return ret;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	if (copy_from_user(&control, arg, minsz))
101662306a36Sopenharmony_ci		return -EFAULT;
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci	nnodes = control.num_ranges;
101962306a36Sopenharmony_ci	if (!nnodes)
102062306a36Sopenharmony_ci		return -EINVAL;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	if (nnodes > LOG_MAX_RANGES)
102362306a36Sopenharmony_ci		return -E2BIG;
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci	ranges = u64_to_user_ptr(control.ranges);
102662306a36Sopenharmony_ci	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
102762306a36Sopenharmony_ci			      GFP_KERNEL);
102862306a36Sopenharmony_ci	if (!nodes)
102962306a36Sopenharmony_ci		return -ENOMEM;
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci	for (i = 0; i < nnodes; i++) {
103262306a36Sopenharmony_ci		if (copy_from_user(&range, &ranges[i], sizeof(range))) {
103362306a36Sopenharmony_ci			ret = -EFAULT;
103462306a36Sopenharmony_ci			goto end;
103562306a36Sopenharmony_ci		}
103662306a36Sopenharmony_ci		if (!IS_ALIGNED(range.iova, control.page_size) ||
103762306a36Sopenharmony_ci		    !IS_ALIGNED(range.length, control.page_size)) {
103862306a36Sopenharmony_ci			ret = -EINVAL;
103962306a36Sopenharmony_ci			goto end;
104062306a36Sopenharmony_ci		}
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci		if (check_add_overflow(range.iova, range.length, &iova_end) ||
104362306a36Sopenharmony_ci		    iova_end > ULONG_MAX) {
104462306a36Sopenharmony_ci			ret = -EOVERFLOW;
104562306a36Sopenharmony_ci			goto end;
104662306a36Sopenharmony_ci		}
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci		nodes[i].start = range.iova;
104962306a36Sopenharmony_ci		nodes[i].last = range.iova + range.length - 1;
105062306a36Sopenharmony_ci		if (interval_tree_iter_first(&root, nodes[i].start,
105162306a36Sopenharmony_ci					     nodes[i].last)) {
105262306a36Sopenharmony_ci			/* Range overlapping */
105362306a36Sopenharmony_ci			ret = -EINVAL;
105462306a36Sopenharmony_ci			goto end;
105562306a36Sopenharmony_ci		}
105662306a36Sopenharmony_ci		interval_tree_insert(nodes + i, &root);
105762306a36Sopenharmony_ci	}
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	ret = device->log_ops->log_start(device, &root, nnodes,
106062306a36Sopenharmony_ci					 &control.page_size);
106162306a36Sopenharmony_ci	if (ret)
106262306a36Sopenharmony_ci		goto end;
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	if (copy_to_user(arg, &control, sizeof(control))) {
106562306a36Sopenharmony_ci		ret = -EFAULT;
106662306a36Sopenharmony_ci		device->log_ops->log_stop(device);
106762306a36Sopenharmony_ci	}
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ciend:
107062306a36Sopenharmony_ci	kfree(nodes);
107162306a36Sopenharmony_ci	return ret;
107262306a36Sopenharmony_ci}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_cistatic int
107562306a36Sopenharmony_civfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
107662306a36Sopenharmony_ci				       u32 flags, void __user *arg,
107762306a36Sopenharmony_ci				       size_t argsz)
107862306a36Sopenharmony_ci{
107962306a36Sopenharmony_ci	int ret;
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	if (!device->log_ops)
108262306a36Sopenharmony_ci		return -ENOTTY;
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	ret = vfio_check_feature(flags, argsz,
108562306a36Sopenharmony_ci				 VFIO_DEVICE_FEATURE_SET, 0);
108662306a36Sopenharmony_ci	if (ret != 1)
108762306a36Sopenharmony_ci		return ret;
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	return device->log_ops->log_stop(device);
109062306a36Sopenharmony_ci}
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_cistatic int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
109362306a36Sopenharmony_ci					  unsigned long iova, size_t length,
109462306a36Sopenharmony_ci					  void *opaque)
109562306a36Sopenharmony_ci{
109662306a36Sopenharmony_ci	struct vfio_device *device = opaque;
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	return device->log_ops->log_read_and_clear(device, iova, length, iter);
109962306a36Sopenharmony_ci}
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_cistatic int
110262306a36Sopenharmony_civfio_ioctl_device_feature_logging_report(struct vfio_device *device,
110362306a36Sopenharmony_ci					 u32 flags, void __user *arg,
110462306a36Sopenharmony_ci					 size_t argsz)
110562306a36Sopenharmony_ci{
110662306a36Sopenharmony_ci	size_t minsz =
110762306a36Sopenharmony_ci		offsetofend(struct vfio_device_feature_dma_logging_report,
110862306a36Sopenharmony_ci			    bitmap);
110962306a36Sopenharmony_ci	struct vfio_device_feature_dma_logging_report report;
111062306a36Sopenharmony_ci	struct iova_bitmap *iter;
111162306a36Sopenharmony_ci	u64 iova_end;
111262306a36Sopenharmony_ci	int ret;
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	if (!device->log_ops)
111562306a36Sopenharmony_ci		return -ENOTTY;
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	ret = vfio_check_feature(flags, argsz,
111862306a36Sopenharmony_ci				 VFIO_DEVICE_FEATURE_GET,
111962306a36Sopenharmony_ci				 sizeof(report));
112062306a36Sopenharmony_ci	if (ret != 1)
112162306a36Sopenharmony_ci		return ret;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	if (copy_from_user(&report, arg, minsz))
112462306a36Sopenharmony_ci		return -EFAULT;
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
112762306a36Sopenharmony_ci		return -EINVAL;
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_ci	if (check_add_overflow(report.iova, report.length, &iova_end) ||
113062306a36Sopenharmony_ci	    iova_end > ULONG_MAX)
113162306a36Sopenharmony_ci		return -EOVERFLOW;
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci	iter = iova_bitmap_alloc(report.iova, report.length,
113462306a36Sopenharmony_ci				 report.page_size,
113562306a36Sopenharmony_ci				 u64_to_user_ptr(report.bitmap));
113662306a36Sopenharmony_ci	if (IS_ERR(iter))
113762306a36Sopenharmony_ci		return PTR_ERR(iter);
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci	ret = iova_bitmap_for_each(iter, device,
114062306a36Sopenharmony_ci				   vfio_device_log_read_and_clear);
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	iova_bitmap_free(iter);
114362306a36Sopenharmony_ci	return ret;
114462306a36Sopenharmony_ci}
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_cistatic int vfio_ioctl_device_feature(struct vfio_device *device,
114762306a36Sopenharmony_ci				     struct vfio_device_feature __user *arg)
114862306a36Sopenharmony_ci{
114962306a36Sopenharmony_ci	size_t minsz = offsetofend(struct vfio_device_feature, flags);
115062306a36Sopenharmony_ci	struct vfio_device_feature feature;
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	if (copy_from_user(&feature, arg, minsz))
115362306a36Sopenharmony_ci		return -EFAULT;
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	if (feature.argsz < minsz)
115662306a36Sopenharmony_ci		return -EINVAL;
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	/* Check unknown flags */
115962306a36Sopenharmony_ci	if (feature.flags &
116062306a36Sopenharmony_ci	    ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
116162306a36Sopenharmony_ci	      VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
116262306a36Sopenharmony_ci		return -EINVAL;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	/* GET & SET are mutually exclusive except with PROBE */
116562306a36Sopenharmony_ci	if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
116662306a36Sopenharmony_ci	    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
116762306a36Sopenharmony_ci	    (feature.flags & VFIO_DEVICE_FEATURE_GET))
116862306a36Sopenharmony_ci		return -EINVAL;
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
117162306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE_MIGRATION:
117262306a36Sopenharmony_ci		return vfio_ioctl_device_feature_migration(
117362306a36Sopenharmony_ci			device, feature.flags, arg->data,
117462306a36Sopenharmony_ci			feature.argsz - minsz);
117562306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
117662306a36Sopenharmony_ci		return vfio_ioctl_device_feature_mig_device_state(
117762306a36Sopenharmony_ci			device, feature.flags, arg->data,
117862306a36Sopenharmony_ci			feature.argsz - minsz);
117962306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
118062306a36Sopenharmony_ci		return vfio_ioctl_device_feature_logging_start(
118162306a36Sopenharmony_ci			device, feature.flags, arg->data,
118262306a36Sopenharmony_ci			feature.argsz - minsz);
118362306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
118462306a36Sopenharmony_ci		return vfio_ioctl_device_feature_logging_stop(
118562306a36Sopenharmony_ci			device, feature.flags, arg->data,
118662306a36Sopenharmony_ci			feature.argsz - minsz);
118762306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
118862306a36Sopenharmony_ci		return vfio_ioctl_device_feature_logging_report(
118962306a36Sopenharmony_ci			device, feature.flags, arg->data,
119062306a36Sopenharmony_ci			feature.argsz - minsz);
119162306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE:
119262306a36Sopenharmony_ci		return vfio_ioctl_device_feature_migration_data_size(
119362306a36Sopenharmony_ci			device, feature.flags, arg->data,
119462306a36Sopenharmony_ci			feature.argsz - minsz);
119562306a36Sopenharmony_ci	default:
119662306a36Sopenharmony_ci		if (unlikely(!device->ops->device_feature))
119762306a36Sopenharmony_ci			return -EINVAL;
119862306a36Sopenharmony_ci		return device->ops->device_feature(device, feature.flags,
119962306a36Sopenharmony_ci						   arg->data,
120062306a36Sopenharmony_ci						   feature.argsz - minsz);
120162306a36Sopenharmony_ci	}
120262306a36Sopenharmony_ci}
120362306a36Sopenharmony_ci
120462306a36Sopenharmony_cistatic long vfio_device_fops_unl_ioctl(struct file *filep,
120562306a36Sopenharmony_ci				       unsigned int cmd, unsigned long arg)
120662306a36Sopenharmony_ci{
120762306a36Sopenharmony_ci	struct vfio_device_file *df = filep->private_data;
120862306a36Sopenharmony_ci	struct vfio_device *device = df->device;
120962306a36Sopenharmony_ci	void __user *uptr = (void __user *)arg;
121062306a36Sopenharmony_ci	int ret;
121162306a36Sopenharmony_ci
121262306a36Sopenharmony_ci	if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
121362306a36Sopenharmony_ci		return vfio_df_ioctl_bind_iommufd(df, uptr);
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	/* Paired with smp_store_release() following vfio_df_open() */
121662306a36Sopenharmony_ci	if (!smp_load_acquire(&df->access_granted))
121762306a36Sopenharmony_ci		return -EINVAL;
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	ret = vfio_device_pm_runtime_get(device);
122062306a36Sopenharmony_ci	if (ret)
122162306a36Sopenharmony_ci		return ret;
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	/* cdev only ioctls */
122462306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) {
122562306a36Sopenharmony_ci		switch (cmd) {
122662306a36Sopenharmony_ci		case VFIO_DEVICE_ATTACH_IOMMUFD_PT:
122762306a36Sopenharmony_ci			ret = vfio_df_ioctl_attach_pt(df, uptr);
122862306a36Sopenharmony_ci			goto out;
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci		case VFIO_DEVICE_DETACH_IOMMUFD_PT:
123162306a36Sopenharmony_ci			ret = vfio_df_ioctl_detach_pt(df, uptr);
123262306a36Sopenharmony_ci			goto out;
123362306a36Sopenharmony_ci		}
123462306a36Sopenharmony_ci	}
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci	switch (cmd) {
123762306a36Sopenharmony_ci	case VFIO_DEVICE_FEATURE:
123862306a36Sopenharmony_ci		ret = vfio_ioctl_device_feature(device, uptr);
123962306a36Sopenharmony_ci		break;
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_ci	default:
124262306a36Sopenharmony_ci		if (unlikely(!device->ops->ioctl))
124362306a36Sopenharmony_ci			ret = -EINVAL;
124462306a36Sopenharmony_ci		else
124562306a36Sopenharmony_ci			ret = device->ops->ioctl(device, cmd, arg);
124662306a36Sopenharmony_ci		break;
124762306a36Sopenharmony_ci	}
124862306a36Sopenharmony_ciout:
124962306a36Sopenharmony_ci	vfio_device_pm_runtime_put(device);
125062306a36Sopenharmony_ci	return ret;
125162306a36Sopenharmony_ci}
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_cistatic ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
125462306a36Sopenharmony_ci				     size_t count, loff_t *ppos)
125562306a36Sopenharmony_ci{
125662306a36Sopenharmony_ci	struct vfio_device_file *df = filep->private_data;
125762306a36Sopenharmony_ci	struct vfio_device *device = df->device;
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_ci	/* Paired with smp_store_release() following vfio_df_open() */
126062306a36Sopenharmony_ci	if (!smp_load_acquire(&df->access_granted))
126162306a36Sopenharmony_ci		return -EINVAL;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	if (unlikely(!device->ops->read))
126462306a36Sopenharmony_ci		return -EINVAL;
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_ci	return device->ops->read(device, buf, count, ppos);
126762306a36Sopenharmony_ci}
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_cistatic ssize_t vfio_device_fops_write(struct file *filep,
127062306a36Sopenharmony_ci				      const char __user *buf,
127162306a36Sopenharmony_ci				      size_t count, loff_t *ppos)
127262306a36Sopenharmony_ci{
127362306a36Sopenharmony_ci	struct vfio_device_file *df = filep->private_data;
127462306a36Sopenharmony_ci	struct vfio_device *device = df->device;
127562306a36Sopenharmony_ci
127662306a36Sopenharmony_ci	/* Paired with smp_store_release() following vfio_df_open() */
127762306a36Sopenharmony_ci	if (!smp_load_acquire(&df->access_granted))
127862306a36Sopenharmony_ci		return -EINVAL;
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	if (unlikely(!device->ops->write))
128162306a36Sopenharmony_ci		return -EINVAL;
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_ci	return device->ops->write(device, buf, count, ppos);
128462306a36Sopenharmony_ci}
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_cistatic int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
128762306a36Sopenharmony_ci{
128862306a36Sopenharmony_ci	struct vfio_device_file *df = filep->private_data;
128962306a36Sopenharmony_ci	struct vfio_device *device = df->device;
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_ci	/* Paired with smp_store_release() following vfio_df_open() */
129262306a36Sopenharmony_ci	if (!smp_load_acquire(&df->access_granted))
129362306a36Sopenharmony_ci		return -EINVAL;
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_ci	if (unlikely(!device->ops->mmap))
129662306a36Sopenharmony_ci		return -EINVAL;
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci	return device->ops->mmap(device, vma);
129962306a36Sopenharmony_ci}
130062306a36Sopenharmony_ci
130162306a36Sopenharmony_ciconst struct file_operations vfio_device_fops = {
130262306a36Sopenharmony_ci	.owner		= THIS_MODULE,
130362306a36Sopenharmony_ci	.open		= vfio_device_fops_cdev_open,
130462306a36Sopenharmony_ci	.release	= vfio_device_fops_release,
130562306a36Sopenharmony_ci	.read		= vfio_device_fops_read,
130662306a36Sopenharmony_ci	.write		= vfio_device_fops_write,
130762306a36Sopenharmony_ci	.unlocked_ioctl	= vfio_device_fops_unl_ioctl,
130862306a36Sopenharmony_ci	.compat_ioctl	= compat_ptr_ioctl,
130962306a36Sopenharmony_ci	.mmap		= vfio_device_fops_mmap,
131062306a36Sopenharmony_ci};
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_cistatic struct vfio_device *vfio_device_from_file(struct file *file)
131362306a36Sopenharmony_ci{
131462306a36Sopenharmony_ci	struct vfio_device_file *df = file->private_data;
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci	if (file->f_op != &vfio_device_fops)
131762306a36Sopenharmony_ci		return NULL;
131862306a36Sopenharmony_ci	return df->device;
131962306a36Sopenharmony_ci}
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci/**
132262306a36Sopenharmony_ci * vfio_file_is_valid - True if the file is valid vfio file
132362306a36Sopenharmony_ci * @file: VFIO group file or VFIO device file
132462306a36Sopenharmony_ci */
132562306a36Sopenharmony_cibool vfio_file_is_valid(struct file *file)
132662306a36Sopenharmony_ci{
132762306a36Sopenharmony_ci	return vfio_group_from_file(file) ||
132862306a36Sopenharmony_ci	       vfio_device_from_file(file);
132962306a36Sopenharmony_ci}
133062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_file_is_valid);
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_ci/**
133362306a36Sopenharmony_ci * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
133462306a36Sopenharmony_ci *        is always CPU cache coherent
133562306a36Sopenharmony_ci * @file: VFIO group file or VFIO device file
133662306a36Sopenharmony_ci *
133762306a36Sopenharmony_ci * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
133862306a36Sopenharmony_ci * bit in DMA transactions. A return of false indicates that the user has
133962306a36Sopenharmony_ci * rights to access additional instructions such as wbinvd on x86.
134062306a36Sopenharmony_ci */
134162306a36Sopenharmony_cibool vfio_file_enforced_coherent(struct file *file)
134262306a36Sopenharmony_ci{
134362306a36Sopenharmony_ci	struct vfio_device *device;
134462306a36Sopenharmony_ci	struct vfio_group *group;
134562306a36Sopenharmony_ci
134662306a36Sopenharmony_ci	group = vfio_group_from_file(file);
134762306a36Sopenharmony_ci	if (group)
134862306a36Sopenharmony_ci		return vfio_group_enforced_coherent(group);
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_ci	device = vfio_device_from_file(file);
135162306a36Sopenharmony_ci	if (device)
135262306a36Sopenharmony_ci		return device_iommu_capable(device->dev,
135362306a36Sopenharmony_ci					    IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ci	return true;
135662306a36Sopenharmony_ci}
135762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_cistatic void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
136062306a36Sopenharmony_ci{
136162306a36Sopenharmony_ci	struct vfio_device_file *df = file->private_data;
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_ci	/*
136462306a36Sopenharmony_ci	 * The kvm is first recorded in the vfio_device_file, and will
136562306a36Sopenharmony_ci	 * be propagated to vfio_device::kvm when the file is bound to
136662306a36Sopenharmony_ci	 * iommufd successfully in the vfio device cdev path.
136762306a36Sopenharmony_ci	 */
136862306a36Sopenharmony_ci	spin_lock(&df->kvm_ref_lock);
136962306a36Sopenharmony_ci	df->kvm = kvm;
137062306a36Sopenharmony_ci	spin_unlock(&df->kvm_ref_lock);
137162306a36Sopenharmony_ci}
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci/**
137462306a36Sopenharmony_ci * vfio_file_set_kvm - Link a kvm with VFIO drivers
137562306a36Sopenharmony_ci * @file: VFIO group file or VFIO device file
137662306a36Sopenharmony_ci * @kvm: KVM to link
137762306a36Sopenharmony_ci *
137862306a36Sopenharmony_ci * When a VFIO device is first opened the KVM will be available in
137962306a36Sopenharmony_ci * device->kvm if one was associated with the file.
138062306a36Sopenharmony_ci */
138162306a36Sopenharmony_civoid vfio_file_set_kvm(struct file *file, struct kvm *kvm)
138262306a36Sopenharmony_ci{
138362306a36Sopenharmony_ci	struct vfio_group *group;
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci	group = vfio_group_from_file(file);
138662306a36Sopenharmony_ci	if (group)
138762306a36Sopenharmony_ci		vfio_group_set_kvm(group, kvm);
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	if (vfio_device_from_file(file))
139062306a36Sopenharmony_ci		vfio_device_file_set_kvm(file, kvm);
139162306a36Sopenharmony_ci}
139262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_file_set_kvm);
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_ci/*
139562306a36Sopenharmony_ci * Sub-module support
139662306a36Sopenharmony_ci */
139762306a36Sopenharmony_ci/*
139862306a36Sopenharmony_ci * Helper for managing a buffer of info chain capabilities, allocate or
139962306a36Sopenharmony_ci * reallocate a buffer with additional @size, filling in @id and @version
140062306a36Sopenharmony_ci * of the capability.  A pointer to the new capability is returned.
140162306a36Sopenharmony_ci *
140262306a36Sopenharmony_ci * NB. The chain is based at the head of the buffer, so new entries are
140362306a36Sopenharmony_ci * added to the tail, vfio_info_cap_shift() should be called to fixup the
140462306a36Sopenharmony_ci * next offsets prior to copying to the user buffer.
140562306a36Sopenharmony_ci */
140662306a36Sopenharmony_cistruct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
140762306a36Sopenharmony_ci					       size_t size, u16 id, u16 version)
140862306a36Sopenharmony_ci{
140962306a36Sopenharmony_ci	void *buf;
141062306a36Sopenharmony_ci	struct vfio_info_cap_header *header, *tmp;
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci	/* Ensure that the next capability struct will be aligned */
141362306a36Sopenharmony_ci	size = ALIGN(size, sizeof(u64));
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ci	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
141662306a36Sopenharmony_ci	if (!buf) {
141762306a36Sopenharmony_ci		kfree(caps->buf);
141862306a36Sopenharmony_ci		caps->buf = NULL;
141962306a36Sopenharmony_ci		caps->size = 0;
142062306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
142162306a36Sopenharmony_ci	}
142262306a36Sopenharmony_ci
142362306a36Sopenharmony_ci	caps->buf = buf;
142462306a36Sopenharmony_ci	header = buf + caps->size;
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_ci	/* Eventually copied to user buffer, zero */
142762306a36Sopenharmony_ci	memset(header, 0, size);
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	header->id = id;
143062306a36Sopenharmony_ci	header->version = version;
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci	/* Add to the end of the capability chain */
143362306a36Sopenharmony_ci	for (tmp = buf; tmp->next; tmp = buf + tmp->next)
143462306a36Sopenharmony_ci		; /* nothing */
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci	tmp->next = caps->size;
143762306a36Sopenharmony_ci	caps->size += size;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	return header;
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_info_cap_add);
144262306a36Sopenharmony_ci
144362306a36Sopenharmony_civoid vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
144462306a36Sopenharmony_ci{
144562306a36Sopenharmony_ci	struct vfio_info_cap_header *tmp;
144662306a36Sopenharmony_ci	void *buf = (void *)caps->buf;
144762306a36Sopenharmony_ci
144862306a36Sopenharmony_ci	/* Capability structs should start with proper alignment */
144962306a36Sopenharmony_ci	WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
145262306a36Sopenharmony_ci		tmp->next += offset;
145362306a36Sopenharmony_ci}
145462306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_info_cap_shift);
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_ciint vfio_info_add_capability(struct vfio_info_cap *caps,
145762306a36Sopenharmony_ci			     struct vfio_info_cap_header *cap, size_t size)
145862306a36Sopenharmony_ci{
145962306a36Sopenharmony_ci	struct vfio_info_cap_header *header;
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
146262306a36Sopenharmony_ci	if (IS_ERR(header))
146362306a36Sopenharmony_ci		return PTR_ERR(header);
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci	memcpy(header + 1, cap + 1, size - sizeof(*header));
146662306a36Sopenharmony_ci
146762306a36Sopenharmony_ci	return 0;
146862306a36Sopenharmony_ci}
146962306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_info_add_capability);
147062306a36Sopenharmony_ci
147162306a36Sopenharmony_ciint vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
147262306a36Sopenharmony_ci				       int max_irq_type, size_t *data_size)
147362306a36Sopenharmony_ci{
147462306a36Sopenharmony_ci	unsigned long minsz;
147562306a36Sopenharmony_ci	size_t size;
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci	minsz = offsetofend(struct vfio_irq_set, count);
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
148062306a36Sopenharmony_ci	    (hdr->count >= (U32_MAX - hdr->start)) ||
148162306a36Sopenharmony_ci	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
148262306a36Sopenharmony_ci				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
148362306a36Sopenharmony_ci		return -EINVAL;
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci	if (data_size)
148662306a36Sopenharmony_ci		*data_size = 0;
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci	if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
148962306a36Sopenharmony_ci		return -EINVAL;
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci	switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
149262306a36Sopenharmony_ci	case VFIO_IRQ_SET_DATA_NONE:
149362306a36Sopenharmony_ci		size = 0;
149462306a36Sopenharmony_ci		break;
149562306a36Sopenharmony_ci	case VFIO_IRQ_SET_DATA_BOOL:
149662306a36Sopenharmony_ci		size = sizeof(uint8_t);
149762306a36Sopenharmony_ci		break;
149862306a36Sopenharmony_ci	case VFIO_IRQ_SET_DATA_EVENTFD:
149962306a36Sopenharmony_ci		size = sizeof(int32_t);
150062306a36Sopenharmony_ci		break;
150162306a36Sopenharmony_ci	default:
150262306a36Sopenharmony_ci		return -EINVAL;
150362306a36Sopenharmony_ci	}
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	if (size) {
150662306a36Sopenharmony_ci		if (hdr->argsz - minsz < hdr->count * size)
150762306a36Sopenharmony_ci			return -EINVAL;
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci		if (!data_size)
151062306a36Sopenharmony_ci			return -EINVAL;
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_ci		*data_size = hdr->count * size;
151362306a36Sopenharmony_ci	}
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	return 0;
151662306a36Sopenharmony_ci}
151762306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
151862306a36Sopenharmony_ci
151962306a36Sopenharmony_ci/*
152062306a36Sopenharmony_ci * Pin contiguous user pages and return their associated host pages for local
152162306a36Sopenharmony_ci * domain only.
152262306a36Sopenharmony_ci * @device [in]  : device
152362306a36Sopenharmony_ci * @iova [in]    : starting IOVA of user pages to be pinned.
152462306a36Sopenharmony_ci * @npage [in]   : count of pages to be pinned.  This count should not
152562306a36Sopenharmony_ci *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
152662306a36Sopenharmony_ci * @prot [in]    : protection flags
152762306a36Sopenharmony_ci * @pages[out]   : array of host pages
152862306a36Sopenharmony_ci * Return error or number of pages pinned.
152962306a36Sopenharmony_ci *
153062306a36Sopenharmony_ci * A driver may only call this function if the vfio_device was created
153162306a36Sopenharmony_ci * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
153262306a36Sopenharmony_ci */
153362306a36Sopenharmony_ciint vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
153462306a36Sopenharmony_ci		   int npage, int prot, struct page **pages)
153562306a36Sopenharmony_ci{
153662306a36Sopenharmony_ci	/* group->container cannot change while a vfio device is open */
153762306a36Sopenharmony_ci	if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
153862306a36Sopenharmony_ci		return -EINVAL;
153962306a36Sopenharmony_ci	if (!device->ops->dma_unmap)
154062306a36Sopenharmony_ci		return -EINVAL;
154162306a36Sopenharmony_ci	if (vfio_device_has_container(device))
154262306a36Sopenharmony_ci		return vfio_device_container_pin_pages(device, iova,
154362306a36Sopenharmony_ci						       npage, prot, pages);
154462306a36Sopenharmony_ci	if (device->iommufd_access) {
154562306a36Sopenharmony_ci		int ret;
154662306a36Sopenharmony_ci
154762306a36Sopenharmony_ci		if (iova > ULONG_MAX)
154862306a36Sopenharmony_ci			return -EINVAL;
154962306a36Sopenharmony_ci		/*
155062306a36Sopenharmony_ci		 * VFIO ignores the sub page offset, npages is from the start of
155162306a36Sopenharmony_ci		 * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
155262306a36Sopenharmony_ci		 * the sub page offset by doing:
155362306a36Sopenharmony_ci		 *     pages[0] + (iova % PAGE_SIZE)
155462306a36Sopenharmony_ci		 */
155562306a36Sopenharmony_ci		ret = iommufd_access_pin_pages(
155662306a36Sopenharmony_ci			device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
155762306a36Sopenharmony_ci			npage * PAGE_SIZE, pages,
155862306a36Sopenharmony_ci			(prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
155962306a36Sopenharmony_ci		if (ret)
156062306a36Sopenharmony_ci			return ret;
156162306a36Sopenharmony_ci		return npage;
156262306a36Sopenharmony_ci	}
156362306a36Sopenharmony_ci	return -EINVAL;
156462306a36Sopenharmony_ci}
156562306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_pin_pages);
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci/*
156862306a36Sopenharmony_ci * Unpin contiguous host pages for local domain only.
156962306a36Sopenharmony_ci * @device [in]  : device
157062306a36Sopenharmony_ci * @iova [in]    : starting address of user pages to be unpinned.
157162306a36Sopenharmony_ci * @npage [in]   : count of pages to be unpinned.  This count should not
157262306a36Sopenharmony_ci *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
157362306a36Sopenharmony_ci */
157462306a36Sopenharmony_civoid vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
157562306a36Sopenharmony_ci{
157662306a36Sopenharmony_ci	if (WARN_ON(!vfio_assert_device_open(device)))
157762306a36Sopenharmony_ci		return;
157862306a36Sopenharmony_ci	if (WARN_ON(!device->ops->dma_unmap))
157962306a36Sopenharmony_ci		return;
158062306a36Sopenharmony_ci
158162306a36Sopenharmony_ci	if (vfio_device_has_container(device)) {
158262306a36Sopenharmony_ci		vfio_device_container_unpin_pages(device, iova, npage);
158362306a36Sopenharmony_ci		return;
158462306a36Sopenharmony_ci	}
158562306a36Sopenharmony_ci	if (device->iommufd_access) {
158662306a36Sopenharmony_ci		if (WARN_ON(iova > ULONG_MAX))
158762306a36Sopenharmony_ci			return;
158862306a36Sopenharmony_ci		iommufd_access_unpin_pages(device->iommufd_access,
158962306a36Sopenharmony_ci					   ALIGN_DOWN(iova, PAGE_SIZE),
159062306a36Sopenharmony_ci					   npage * PAGE_SIZE);
159162306a36Sopenharmony_ci		return;
159262306a36Sopenharmony_ci	}
159362306a36Sopenharmony_ci}
159462306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_unpin_pages);
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci/*
159762306a36Sopenharmony_ci * This interface allows the CPUs to perform some sort of virtual DMA on
159862306a36Sopenharmony_ci * behalf of the device.
159962306a36Sopenharmony_ci *
160062306a36Sopenharmony_ci * CPUs read/write from/into a range of IOVAs pointing to user space memory
160162306a36Sopenharmony_ci * into/from a kernel buffer.
160262306a36Sopenharmony_ci *
160362306a36Sopenharmony_ci * As the read/write of user space memory is conducted via the CPUs and is
160462306a36Sopenharmony_ci * not a real device DMA, it is not necessary to pin the user space memory.
160562306a36Sopenharmony_ci *
160662306a36Sopenharmony_ci * @device [in]		: VFIO device
160762306a36Sopenharmony_ci * @iova [in]		: base IOVA of a user space buffer
160862306a36Sopenharmony_ci * @data [in]		: pointer to kernel buffer
160962306a36Sopenharmony_ci * @len [in]		: kernel buffer length
161062306a36Sopenharmony_ci * @write		: indicate read or write
161162306a36Sopenharmony_ci * Return error code on failure or 0 on success.
161262306a36Sopenharmony_ci */
161362306a36Sopenharmony_ciint vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
161462306a36Sopenharmony_ci		size_t len, bool write)
161562306a36Sopenharmony_ci{
161662306a36Sopenharmony_ci	if (!data || len <= 0 || !vfio_assert_device_open(device))
161762306a36Sopenharmony_ci		return -EINVAL;
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	if (vfio_device_has_container(device))
162062306a36Sopenharmony_ci		return vfio_device_container_dma_rw(device, iova,
162162306a36Sopenharmony_ci						    data, len, write);
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci	if (device->iommufd_access) {
162462306a36Sopenharmony_ci		unsigned int flags = 0;
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci		if (iova > ULONG_MAX)
162762306a36Sopenharmony_ci			return -EINVAL;
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_ci		/* VFIO historically tries to auto-detect a kthread */
163062306a36Sopenharmony_ci		if (!current->mm)
163162306a36Sopenharmony_ci			flags |= IOMMUFD_ACCESS_RW_KTHREAD;
163262306a36Sopenharmony_ci		if (write)
163362306a36Sopenharmony_ci			flags |= IOMMUFD_ACCESS_RW_WRITE;
163462306a36Sopenharmony_ci		return iommufd_access_rw(device->iommufd_access, iova, data,
163562306a36Sopenharmony_ci					 len, flags);
163662306a36Sopenharmony_ci	}
163762306a36Sopenharmony_ci	return -EINVAL;
163862306a36Sopenharmony_ci}
163962306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_dma_rw);
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci/*
164262306a36Sopenharmony_ci * Module/class support
164362306a36Sopenharmony_ci */
164462306a36Sopenharmony_cistatic int __init vfio_init(void)
164562306a36Sopenharmony_ci{
164662306a36Sopenharmony_ci	int ret;
164762306a36Sopenharmony_ci
164862306a36Sopenharmony_ci	ida_init(&vfio.device_ida);
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	ret = vfio_group_init();
165162306a36Sopenharmony_ci	if (ret)
165262306a36Sopenharmony_ci		return ret;
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci	ret = vfio_virqfd_init();
165562306a36Sopenharmony_ci	if (ret)
165662306a36Sopenharmony_ci		goto err_virqfd;
165762306a36Sopenharmony_ci
165862306a36Sopenharmony_ci	/* /sys/class/vfio-dev/vfioX */
165962306a36Sopenharmony_ci	vfio.device_class = class_create("vfio-dev");
166062306a36Sopenharmony_ci	if (IS_ERR(vfio.device_class)) {
166162306a36Sopenharmony_ci		ret = PTR_ERR(vfio.device_class);
166262306a36Sopenharmony_ci		goto err_dev_class;
166362306a36Sopenharmony_ci	}
166462306a36Sopenharmony_ci
166562306a36Sopenharmony_ci	ret = vfio_cdev_init(vfio.device_class);
166662306a36Sopenharmony_ci	if (ret)
166762306a36Sopenharmony_ci		goto err_alloc_dev_chrdev;
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_ci	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
167062306a36Sopenharmony_ci	return 0;
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_cierr_alloc_dev_chrdev:
167362306a36Sopenharmony_ci	class_destroy(vfio.device_class);
167462306a36Sopenharmony_ci	vfio.device_class = NULL;
167562306a36Sopenharmony_cierr_dev_class:
167662306a36Sopenharmony_ci	vfio_virqfd_exit();
167762306a36Sopenharmony_cierr_virqfd:
167862306a36Sopenharmony_ci	vfio_group_cleanup();
167962306a36Sopenharmony_ci	return ret;
168062306a36Sopenharmony_ci}
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_cistatic void __exit vfio_cleanup(void)
168362306a36Sopenharmony_ci{
168462306a36Sopenharmony_ci	ida_destroy(&vfio.device_ida);
168562306a36Sopenharmony_ci	vfio_cdev_cleanup();
168662306a36Sopenharmony_ci	class_destroy(vfio.device_class);
168762306a36Sopenharmony_ci	vfio.device_class = NULL;
168862306a36Sopenharmony_ci	vfio_virqfd_exit();
168962306a36Sopenharmony_ci	vfio_group_cleanup();
169062306a36Sopenharmony_ci	xa_destroy(&vfio_device_set_xa);
169162306a36Sopenharmony_ci}
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_cimodule_init(vfio_init);
169462306a36Sopenharmony_cimodule_exit(vfio_cleanup);
169562306a36Sopenharmony_ci
169662306a36Sopenharmony_ciMODULE_VERSION(DRIVER_VERSION);
169762306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
169862306a36Sopenharmony_ciMODULE_AUTHOR(DRIVER_AUTHOR);
169962306a36Sopenharmony_ciMODULE_DESCRIPTION(DRIVER_DESC);
170062306a36Sopenharmony_ciMODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
1701