162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * VFIO core 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 662306a36Sopenharmony_ci * Author: Alex Williamson <alex.williamson@redhat.com> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Derived from original vfio: 962306a36Sopenharmony_ci * Copyright 2010 Cisco Systems, Inc. All rights reserved. 1062306a36Sopenharmony_ci * Author: Tom Lyon, pugs@cisco.com 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/cdev.h> 1462306a36Sopenharmony_ci#include <linux/compat.h> 1562306a36Sopenharmony_ci#include <linux/device.h> 1662306a36Sopenharmony_ci#include <linux/fs.h> 1762306a36Sopenharmony_ci#include <linux/idr.h> 1862306a36Sopenharmony_ci#include <linux/iommu.h> 1962306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM 2062306a36Sopenharmony_ci#include <linux/kvm_host.h> 2162306a36Sopenharmony_ci#endif 2262306a36Sopenharmony_ci#include <linux/list.h> 2362306a36Sopenharmony_ci#include <linux/miscdevice.h> 2462306a36Sopenharmony_ci#include <linux/module.h> 2562306a36Sopenharmony_ci#include <linux/mutex.h> 2662306a36Sopenharmony_ci#include <linux/pci.h> 2762306a36Sopenharmony_ci#include <linux/rwsem.h> 2862306a36Sopenharmony_ci#include <linux/sched.h> 2962306a36Sopenharmony_ci#include <linux/slab.h> 3062306a36Sopenharmony_ci#include <linux/stat.h> 3162306a36Sopenharmony_ci#include <linux/string.h> 3262306a36Sopenharmony_ci#include <linux/uaccess.h> 3362306a36Sopenharmony_ci#include <linux/vfio.h> 3462306a36Sopenharmony_ci#include <linux/wait.h> 3562306a36Sopenharmony_ci#include <linux/sched/signal.h> 3662306a36Sopenharmony_ci#include <linux/pm_runtime.h> 3762306a36Sopenharmony_ci#include <linux/interval_tree.h> 3862306a36Sopenharmony_ci#include <linux/iova_bitmap.h> 3962306a36Sopenharmony_ci#include <linux/iommufd.h> 4062306a36Sopenharmony_ci#include "vfio.h" 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#define DRIVER_VERSION "0.3" 4362306a36Sopenharmony_ci#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 4462306a36Sopenharmony_ci#define DRIVER_DESC "VFIO - User Level meta-driver" 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic struct vfio { 4762306a36Sopenharmony_ci struct class *device_class; 4862306a36Sopenharmony_ci struct ida device_ida; 4962306a36Sopenharmony_ci} vfio; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci#ifdef CONFIG_VFIO_NOIOMMU 5262306a36Sopenharmony_cibool vfio_noiommu __read_mostly; 5362306a36Sopenharmony_cimodule_param_named(enable_unsafe_noiommu_mode, 5462306a36Sopenharmony_ci vfio_noiommu, bool, S_IRUGO | S_IWUSR); 5562306a36Sopenharmony_ciMODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 5662306a36Sopenharmony_ci#endif 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic DEFINE_XARRAY(vfio_device_set_xa); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ciint vfio_assign_device_set(struct vfio_device *device, void *set_id) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci unsigned long idx = (unsigned long)set_id; 6362306a36Sopenharmony_ci struct vfio_device_set *new_dev_set; 6462306a36Sopenharmony_ci struct vfio_device_set *dev_set; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (WARN_ON(!set_id)) 6762306a36Sopenharmony_ci return -EINVAL; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci /* 7062306a36Sopenharmony_ci * Atomically acquire a singleton object in the xarray for this set_id 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci xa_lock(&vfio_device_set_xa); 7362306a36Sopenharmony_ci dev_set = xa_load(&vfio_device_set_xa, idx); 7462306a36Sopenharmony_ci if (dev_set) 7562306a36Sopenharmony_ci goto found_get_ref; 7662306a36Sopenharmony_ci xa_unlock(&vfio_device_set_xa); 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 7962306a36Sopenharmony_ci if (!new_dev_set) 8062306a36Sopenharmony_ci return -ENOMEM; 8162306a36Sopenharmony_ci mutex_init(&new_dev_set->lock); 8262306a36Sopenharmony_ci INIT_LIST_HEAD(&new_dev_set->device_list); 8362306a36Sopenharmony_ci new_dev_set->set_id = set_id; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci xa_lock(&vfio_device_set_xa); 8662306a36Sopenharmony_ci dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 8762306a36Sopenharmony_ci GFP_KERNEL); 8862306a36Sopenharmony_ci if (!dev_set) { 8962306a36Sopenharmony_ci dev_set = new_dev_set; 9062306a36Sopenharmony_ci goto found_get_ref; 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci kfree(new_dev_set); 9462306a36Sopenharmony_ci if (xa_is_err(dev_set)) { 9562306a36Sopenharmony_ci xa_unlock(&vfio_device_set_xa); 9662306a36Sopenharmony_ci return xa_err(dev_set); 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_cifound_get_ref: 10062306a36Sopenharmony_ci dev_set->device_count++; 10162306a36Sopenharmony_ci xa_unlock(&vfio_device_set_xa); 10262306a36Sopenharmony_ci mutex_lock(&dev_set->lock); 10362306a36Sopenharmony_ci device->dev_set = dev_set; 10462306a36Sopenharmony_ci list_add_tail(&device->dev_set_list, &dev_set->device_list); 10562306a36Sopenharmony_ci mutex_unlock(&dev_set->lock); 10662306a36Sopenharmony_ci return 0; 10762306a36Sopenharmony_ci} 10862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_assign_device_set); 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_cistatic void vfio_release_device_set(struct vfio_device *device) 11162306a36Sopenharmony_ci{ 11262306a36Sopenharmony_ci struct vfio_device_set *dev_set = device->dev_set; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci if (!dev_set) 11562306a36Sopenharmony_ci return; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci mutex_lock(&dev_set->lock); 11862306a36Sopenharmony_ci list_del(&device->dev_set_list); 11962306a36Sopenharmony_ci mutex_unlock(&dev_set->lock); 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci xa_lock(&vfio_device_set_xa); 12262306a36Sopenharmony_ci if (!--dev_set->device_count) { 12362306a36Sopenharmony_ci __xa_erase(&vfio_device_set_xa, 12462306a36Sopenharmony_ci (unsigned long)dev_set->set_id); 12562306a36Sopenharmony_ci mutex_destroy(&dev_set->lock); 12662306a36Sopenharmony_ci kfree(dev_set); 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci xa_unlock(&vfio_device_set_xa); 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ciunsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) 13262306a36Sopenharmony_ci{ 13362306a36Sopenharmony_ci struct vfio_device *cur; 13462306a36Sopenharmony_ci unsigned int open_count = 0; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci lockdep_assert_held(&dev_set->lock); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci list_for_each_entry(cur, &dev_set->device_list, dev_set_list) 13962306a36Sopenharmony_ci open_count += cur->open_count; 14062306a36Sopenharmony_ci return open_count; 14162306a36Sopenharmony_ci} 14262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_device_set_open_count); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_cistruct vfio_device * 14562306a36Sopenharmony_civfio_find_device_in_devset(struct vfio_device_set *dev_set, 14662306a36Sopenharmony_ci struct device *dev) 14762306a36Sopenharmony_ci{ 14862306a36Sopenharmony_ci struct vfio_device *cur; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci lockdep_assert_held(&dev_set->lock); 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci list_for_each_entry(cur, &dev_set->device_list, dev_set_list) 15362306a36Sopenharmony_ci if (cur->dev == dev) 15462306a36Sopenharmony_ci return cur; 15562306a36Sopenharmony_ci return NULL; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_find_device_in_devset); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/* 16062306a36Sopenharmony_ci * Device objects - create, release, get, put, search 16162306a36Sopenharmony_ci */ 16262306a36Sopenharmony_ci/* Device reference always implies a group reference */ 16362306a36Sopenharmony_civoid vfio_device_put_registration(struct vfio_device *device) 16462306a36Sopenharmony_ci{ 16562306a36Sopenharmony_ci if (refcount_dec_and_test(&device->refcount)) 16662306a36Sopenharmony_ci complete(&device->comp); 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_cibool vfio_device_try_get_registration(struct vfio_device *device) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci return refcount_inc_not_zero(&device->refcount); 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci/* 17562306a36Sopenharmony_ci * VFIO driver API 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci/* Release helper called by vfio_put_device() */ 17862306a36Sopenharmony_cistatic void vfio_device_release(struct device *dev) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci struct vfio_device *device = 18162306a36Sopenharmony_ci container_of(dev, struct vfio_device, device); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci vfio_release_device_set(device); 18462306a36Sopenharmony_ci ida_free(&vfio.device_ida, device->index); 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci if (device->ops->release) 18762306a36Sopenharmony_ci device->ops->release(device); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci kvfree(device); 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_cistatic int vfio_init_device(struct vfio_device *device, struct device *dev, 19362306a36Sopenharmony_ci const struct vfio_device_ops *ops); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci/* 19662306a36Sopenharmony_ci * Allocate and initialize vfio_device so it can be registered to vfio 19762306a36Sopenharmony_ci * core. 19862306a36Sopenharmony_ci * 19962306a36Sopenharmony_ci * Drivers should use the wrapper vfio_alloc_device() for allocation. 20062306a36Sopenharmony_ci * @size is the size of the structure to be allocated, including any 20162306a36Sopenharmony_ci * private data used by the driver. 20262306a36Sopenharmony_ci * 20362306a36Sopenharmony_ci * Driver may provide an @init callback to cover device private data. 20462306a36Sopenharmony_ci * 20562306a36Sopenharmony_ci * Use vfio_put_device() to release the structure after success return. 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_cistruct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, 20862306a36Sopenharmony_ci const struct vfio_device_ops *ops) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci struct vfio_device *device; 21162306a36Sopenharmony_ci int ret; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci if (WARN_ON(size < sizeof(struct vfio_device))) 21462306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci device = kvzalloc(size, GFP_KERNEL); 21762306a36Sopenharmony_ci if (!device) 21862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci ret = vfio_init_device(device, dev, ops); 22162306a36Sopenharmony_ci if (ret) 22262306a36Sopenharmony_ci goto out_free; 22362306a36Sopenharmony_ci return device; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ciout_free: 22662306a36Sopenharmony_ci kvfree(device); 22762306a36Sopenharmony_ci return ERR_PTR(ret); 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(_vfio_alloc_device); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci/* 23262306a36Sopenharmony_ci * Initialize a vfio_device so it can be registered to vfio core. 23362306a36Sopenharmony_ci */ 23462306a36Sopenharmony_cistatic int vfio_init_device(struct vfio_device *device, struct device *dev, 23562306a36Sopenharmony_ci const struct vfio_device_ops *ops) 23662306a36Sopenharmony_ci{ 23762306a36Sopenharmony_ci int ret; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); 24062306a36Sopenharmony_ci if (ret < 0) { 24162306a36Sopenharmony_ci dev_dbg(dev, "Error to alloc index\n"); 24262306a36Sopenharmony_ci return ret; 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci device->index = ret; 24662306a36Sopenharmony_ci init_completion(&device->comp); 24762306a36Sopenharmony_ci device->dev = dev; 24862306a36Sopenharmony_ci device->ops = ops; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci if (ops->init) { 25162306a36Sopenharmony_ci ret = ops->init(device); 25262306a36Sopenharmony_ci if (ret) 25362306a36Sopenharmony_ci goto out_uninit; 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci device_initialize(&device->device); 25762306a36Sopenharmony_ci device->device.release = vfio_device_release; 25862306a36Sopenharmony_ci device->device.class = vfio.device_class; 25962306a36Sopenharmony_ci device->device.parent = device->dev; 26062306a36Sopenharmony_ci return 0; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ciout_uninit: 26362306a36Sopenharmony_ci vfio_release_device_set(device); 26462306a36Sopenharmony_ci ida_free(&vfio.device_ida, device->index); 26562306a36Sopenharmony_ci return ret; 26662306a36Sopenharmony_ci} 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_cistatic int __vfio_register_dev(struct vfio_device *device, 26962306a36Sopenharmony_ci enum vfio_group_type type) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci int ret; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) && 27462306a36Sopenharmony_ci (!device->ops->bind_iommufd || 27562306a36Sopenharmony_ci !device->ops->unbind_iommufd || 27662306a36Sopenharmony_ci !device->ops->attach_ioas || 27762306a36Sopenharmony_ci !device->ops->detach_ioas))) 27862306a36Sopenharmony_ci return -EINVAL; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci /* 28162306a36Sopenharmony_ci * If the driver doesn't specify a set then the device is added to a 28262306a36Sopenharmony_ci * singleton set just for itself. 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_ci if (!device->dev_set) 28562306a36Sopenharmony_ci vfio_assign_device_set(device, device); 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci ret = dev_set_name(&device->device, "vfio%d", device->index); 28862306a36Sopenharmony_ci if (ret) 28962306a36Sopenharmony_ci return ret; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci ret = vfio_device_set_group(device, type); 29262306a36Sopenharmony_ci if (ret) 29362306a36Sopenharmony_ci return ret; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci /* 29662306a36Sopenharmony_ci * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 29762306a36Sopenharmony_ci * restore cache coherency. It has to be checked here because it is only 29862306a36Sopenharmony_ci * valid for cases where we are using iommu groups. 29962306a36Sopenharmony_ci */ 30062306a36Sopenharmony_ci if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) && 30162306a36Sopenharmony_ci !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) { 30262306a36Sopenharmony_ci ret = -EINVAL; 30362306a36Sopenharmony_ci goto err_out; 30462306a36Sopenharmony_ci } 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci ret = vfio_device_add(device); 30762306a36Sopenharmony_ci if (ret) 30862306a36Sopenharmony_ci goto err_out; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci /* Refcounting can't start until the driver calls register */ 31162306a36Sopenharmony_ci refcount_set(&device->refcount, 1); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci vfio_device_group_register(device); 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci return 0; 31662306a36Sopenharmony_cierr_out: 31762306a36Sopenharmony_ci vfio_device_remove_group(device); 31862306a36Sopenharmony_ci return ret; 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ciint vfio_register_group_dev(struct vfio_device *device) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci return __vfio_register_dev(device, VFIO_IOMMU); 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_register_group_dev); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci/* 32862306a36Sopenharmony_ci * Register a virtual device without IOMMU backing. The user of this 32962306a36Sopenharmony_ci * device must not be able to directly trigger unmediated DMA. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_ciint vfio_register_emulated_iommu_dev(struct vfio_device *device) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); 33462306a36Sopenharmony_ci} 33562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci/* 33862306a36Sopenharmony_ci * Decrement the device reference count and wait for the device to be 33962306a36Sopenharmony_ci * removed. Open file descriptors for the device... */ 34062306a36Sopenharmony_civoid vfio_unregister_group_dev(struct vfio_device *device) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci unsigned int i = 0; 34362306a36Sopenharmony_ci bool interrupted = false; 34462306a36Sopenharmony_ci long rc; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci /* 34762306a36Sopenharmony_ci * Prevent new device opened by userspace via the 34862306a36Sopenharmony_ci * VFIO_GROUP_GET_DEVICE_FD in the group path. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci vfio_device_group_unregister(device); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci /* 35362306a36Sopenharmony_ci * Balances vfio_device_add() in register path, also prevents 35462306a36Sopenharmony_ci * new device opened by userspace in the cdev path. 35562306a36Sopenharmony_ci */ 35662306a36Sopenharmony_ci vfio_device_del(device); 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci vfio_device_put_registration(device); 35962306a36Sopenharmony_ci rc = try_wait_for_completion(&device->comp); 36062306a36Sopenharmony_ci while (rc <= 0) { 36162306a36Sopenharmony_ci if (device->ops->request) 36262306a36Sopenharmony_ci device->ops->request(device, i++); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (interrupted) { 36562306a36Sopenharmony_ci rc = wait_for_completion_timeout(&device->comp, 36662306a36Sopenharmony_ci HZ * 10); 36762306a36Sopenharmony_ci } else { 36862306a36Sopenharmony_ci rc = wait_for_completion_interruptible_timeout( 36962306a36Sopenharmony_ci &device->comp, HZ * 10); 37062306a36Sopenharmony_ci if (rc < 0) { 37162306a36Sopenharmony_ci interrupted = true; 37262306a36Sopenharmony_ci dev_warn(device->dev, 37362306a36Sopenharmony_ci "Device is currently in use, task" 37462306a36Sopenharmony_ci " \"%s\" (%d) " 37562306a36Sopenharmony_ci "blocked until device is released", 37662306a36Sopenharmony_ci current->comm, task_pid_nr(current)); 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci } 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci /* Balances vfio_device_set_group in register path */ 38262306a36Sopenharmony_ci vfio_device_remove_group(device); 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci#ifdef CONFIG_HAVE_KVM 38762306a36Sopenharmony_civoid vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) 38862306a36Sopenharmony_ci{ 38962306a36Sopenharmony_ci void (*pfn)(struct kvm *kvm); 39062306a36Sopenharmony_ci bool (*fn)(struct kvm *kvm); 39162306a36Sopenharmony_ci bool ret; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci lockdep_assert_held(&device->dev_set->lock); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci if (!kvm) 39662306a36Sopenharmony_ci return; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci pfn = symbol_get(kvm_put_kvm); 39962306a36Sopenharmony_ci if (WARN_ON(!pfn)) 40062306a36Sopenharmony_ci return; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci fn = symbol_get(kvm_get_kvm_safe); 40362306a36Sopenharmony_ci if (WARN_ON(!fn)) { 40462306a36Sopenharmony_ci symbol_put(kvm_put_kvm); 40562306a36Sopenharmony_ci return; 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci ret = fn(kvm); 40962306a36Sopenharmony_ci symbol_put(kvm_get_kvm_safe); 41062306a36Sopenharmony_ci if (!ret) { 41162306a36Sopenharmony_ci symbol_put(kvm_put_kvm); 41262306a36Sopenharmony_ci return; 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci device->put_kvm = pfn; 41662306a36Sopenharmony_ci device->kvm = kvm; 41762306a36Sopenharmony_ci} 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_civoid vfio_device_put_kvm(struct vfio_device *device) 42062306a36Sopenharmony_ci{ 42162306a36Sopenharmony_ci lockdep_assert_held(&device->dev_set->lock); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci if (!device->kvm) 42462306a36Sopenharmony_ci return; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci if (WARN_ON(!device->put_kvm)) 42762306a36Sopenharmony_ci goto clear; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci device->put_kvm(device->kvm); 43062306a36Sopenharmony_ci device->put_kvm = NULL; 43162306a36Sopenharmony_ci symbol_put(kvm_put_kvm); 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ciclear: 43462306a36Sopenharmony_ci device->kvm = NULL; 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci#endif 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci/* true if the vfio_device has open_device() called but not close_device() */ 43962306a36Sopenharmony_cistatic bool vfio_assert_device_open(struct vfio_device *device) 44062306a36Sopenharmony_ci{ 44162306a36Sopenharmony_ci return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_cistruct vfio_device_file * 44562306a36Sopenharmony_civfio_allocate_device_file(struct vfio_device *device) 44662306a36Sopenharmony_ci{ 44762306a36Sopenharmony_ci struct vfio_device_file *df; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT); 45062306a36Sopenharmony_ci if (!df) 45162306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci df->device = device; 45462306a36Sopenharmony_ci spin_lock_init(&df->kvm_ref_lock); 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci return df; 45762306a36Sopenharmony_ci} 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_cistatic int vfio_df_device_first_open(struct vfio_device_file *df) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci struct vfio_device *device = df->device; 46262306a36Sopenharmony_ci struct iommufd_ctx *iommufd = df->iommufd; 46362306a36Sopenharmony_ci int ret; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci lockdep_assert_held(&device->dev_set->lock); 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if (!try_module_get(device->dev->driver->owner)) 46862306a36Sopenharmony_ci return -ENODEV; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (iommufd) 47162306a36Sopenharmony_ci ret = vfio_df_iommufd_bind(df); 47262306a36Sopenharmony_ci else 47362306a36Sopenharmony_ci ret = vfio_device_group_use_iommu(device); 47462306a36Sopenharmony_ci if (ret) 47562306a36Sopenharmony_ci goto err_module_put; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (device->ops->open_device) { 47862306a36Sopenharmony_ci ret = device->ops->open_device(device); 47962306a36Sopenharmony_ci if (ret) 48062306a36Sopenharmony_ci goto err_unuse_iommu; 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci return 0; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_cierr_unuse_iommu: 48562306a36Sopenharmony_ci if (iommufd) 48662306a36Sopenharmony_ci vfio_df_iommufd_unbind(df); 48762306a36Sopenharmony_ci else 48862306a36Sopenharmony_ci vfio_device_group_unuse_iommu(device); 48962306a36Sopenharmony_cierr_module_put: 49062306a36Sopenharmony_ci module_put(device->dev->driver->owner); 49162306a36Sopenharmony_ci return ret; 49262306a36Sopenharmony_ci} 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_cistatic void vfio_df_device_last_close(struct vfio_device_file *df) 49562306a36Sopenharmony_ci{ 49662306a36Sopenharmony_ci struct vfio_device *device = df->device; 49762306a36Sopenharmony_ci struct iommufd_ctx *iommufd = df->iommufd; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci lockdep_assert_held(&device->dev_set->lock); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci if (device->ops->close_device) 50262306a36Sopenharmony_ci device->ops->close_device(device); 50362306a36Sopenharmony_ci if (iommufd) 50462306a36Sopenharmony_ci vfio_df_iommufd_unbind(df); 50562306a36Sopenharmony_ci else 50662306a36Sopenharmony_ci vfio_device_group_unuse_iommu(device); 50762306a36Sopenharmony_ci module_put(device->dev->driver->owner); 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ciint vfio_df_open(struct vfio_device_file *df) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci struct vfio_device *device = df->device; 51362306a36Sopenharmony_ci int ret = 0; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci lockdep_assert_held(&device->dev_set->lock); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci /* 51862306a36Sopenharmony_ci * Only the group path allows the device to be opened multiple 51962306a36Sopenharmony_ci * times. The device cdev path doesn't have a secure way for it. 52062306a36Sopenharmony_ci */ 52162306a36Sopenharmony_ci if (device->open_count != 0 && !df->group) 52262306a36Sopenharmony_ci return -EINVAL; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci device->open_count++; 52562306a36Sopenharmony_ci if (device->open_count == 1) { 52662306a36Sopenharmony_ci ret = vfio_df_device_first_open(df); 52762306a36Sopenharmony_ci if (ret) 52862306a36Sopenharmony_ci device->open_count--; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci return ret; 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_civoid vfio_df_close(struct vfio_device_file *df) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct vfio_device *device = df->device; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci lockdep_assert_held(&device->dev_set->lock); 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci vfio_assert_device_open(device); 54162306a36Sopenharmony_ci if (device->open_count == 1) 54262306a36Sopenharmony_ci vfio_df_device_last_close(df); 54362306a36Sopenharmony_ci device->open_count--; 54462306a36Sopenharmony_ci} 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci/* 54762306a36Sopenharmony_ci * Wrapper around pm_runtime_resume_and_get(). 54862306a36Sopenharmony_ci * Return error code on failure or 0 on success. 54962306a36Sopenharmony_ci */ 55062306a36Sopenharmony_cistatic inline int vfio_device_pm_runtime_get(struct vfio_device *device) 55162306a36Sopenharmony_ci{ 55262306a36Sopenharmony_ci struct device *dev = device->dev; 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci if (dev->driver && dev->driver->pm) { 55562306a36Sopenharmony_ci int ret; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci ret = pm_runtime_resume_and_get(dev); 55862306a36Sopenharmony_ci if (ret) { 55962306a36Sopenharmony_ci dev_info_ratelimited(dev, 56062306a36Sopenharmony_ci "vfio: runtime resume failed %d\n", ret); 56162306a36Sopenharmony_ci return -EIO; 56262306a36Sopenharmony_ci } 56362306a36Sopenharmony_ci } 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci return 0; 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci/* 56962306a36Sopenharmony_ci * Wrapper around pm_runtime_put(). 57062306a36Sopenharmony_ci */ 57162306a36Sopenharmony_cistatic inline void vfio_device_pm_runtime_put(struct vfio_device *device) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci struct device *dev = device->dev; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (dev->driver && dev->driver->pm) 57662306a36Sopenharmony_ci pm_runtime_put(dev); 57762306a36Sopenharmony_ci} 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci/* 58062306a36Sopenharmony_ci * VFIO Device fd 58162306a36Sopenharmony_ci */ 58262306a36Sopenharmony_cistatic int vfio_device_fops_release(struct inode *inode, struct file *filep) 58362306a36Sopenharmony_ci{ 58462306a36Sopenharmony_ci struct vfio_device_file *df = filep->private_data; 58562306a36Sopenharmony_ci struct vfio_device *device = df->device; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci if (df->group) 58862306a36Sopenharmony_ci vfio_df_group_close(df); 58962306a36Sopenharmony_ci else 59062306a36Sopenharmony_ci vfio_df_unbind_iommufd(df); 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci vfio_device_put_registration(device); 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci kfree(df); 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci return 0; 59762306a36Sopenharmony_ci} 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci/* 60062306a36Sopenharmony_ci * vfio_mig_get_next_state - Compute the next step in the FSM 60162306a36Sopenharmony_ci * @cur_fsm - The current state the device is in 60262306a36Sopenharmony_ci * @new_fsm - The target state to reach 60362306a36Sopenharmony_ci * @next_fsm - Pointer to the next step to get to new_fsm 60462306a36Sopenharmony_ci * 60562306a36Sopenharmony_ci * Return 0 upon success, otherwise -errno 60662306a36Sopenharmony_ci * Upon success the next step in the state progression between cur_fsm and 60762306a36Sopenharmony_ci * new_fsm will be set in next_fsm. 60862306a36Sopenharmony_ci * 60962306a36Sopenharmony_ci * This breaks down requests for combination transitions into smaller steps and 61062306a36Sopenharmony_ci * returns the next step to get to new_fsm. The function may need to be called 61162306a36Sopenharmony_ci * multiple times before reaching new_fsm. 61262306a36Sopenharmony_ci * 61362306a36Sopenharmony_ci */ 61462306a36Sopenharmony_ciint vfio_mig_get_next_state(struct vfio_device *device, 61562306a36Sopenharmony_ci enum vfio_device_mig_state cur_fsm, 61662306a36Sopenharmony_ci enum vfio_device_mig_state new_fsm, 61762306a36Sopenharmony_ci enum vfio_device_mig_state *next_fsm) 61862306a36Sopenharmony_ci{ 61962306a36Sopenharmony_ci enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; 62062306a36Sopenharmony_ci /* 62162306a36Sopenharmony_ci * The coding in this table requires the driver to implement the 62262306a36Sopenharmony_ci * following FSM arcs: 62362306a36Sopenharmony_ci * RESUMING -> STOP 62462306a36Sopenharmony_ci * STOP -> RESUMING 62562306a36Sopenharmony_ci * STOP -> STOP_COPY 62662306a36Sopenharmony_ci * STOP_COPY -> STOP 62762306a36Sopenharmony_ci * 62862306a36Sopenharmony_ci * If P2P is supported then the driver must also implement these FSM 62962306a36Sopenharmony_ci * arcs: 63062306a36Sopenharmony_ci * RUNNING -> RUNNING_P2P 63162306a36Sopenharmony_ci * RUNNING_P2P -> RUNNING 63262306a36Sopenharmony_ci * RUNNING_P2P -> STOP 63362306a36Sopenharmony_ci * STOP -> RUNNING_P2P 63462306a36Sopenharmony_ci * 63562306a36Sopenharmony_ci * If precopy is supported then the driver must support these additional 63662306a36Sopenharmony_ci * FSM arcs: 63762306a36Sopenharmony_ci * RUNNING -> PRE_COPY 63862306a36Sopenharmony_ci * PRE_COPY -> RUNNING 63962306a36Sopenharmony_ci * PRE_COPY -> STOP_COPY 64062306a36Sopenharmony_ci * However, if precopy and P2P are supported together then the driver 64162306a36Sopenharmony_ci * must support these additional arcs beyond the P2P arcs above: 64262306a36Sopenharmony_ci * PRE_COPY -> RUNNING 64362306a36Sopenharmony_ci * PRE_COPY -> PRE_COPY_P2P 64462306a36Sopenharmony_ci * PRE_COPY_P2P -> PRE_COPY 64562306a36Sopenharmony_ci * PRE_COPY_P2P -> RUNNING_P2P 64662306a36Sopenharmony_ci * PRE_COPY_P2P -> STOP_COPY 64762306a36Sopenharmony_ci * RUNNING -> PRE_COPY 64862306a36Sopenharmony_ci * RUNNING_P2P -> PRE_COPY_P2P 64962306a36Sopenharmony_ci * 65062306a36Sopenharmony_ci * Without P2P and precopy the driver must implement: 65162306a36Sopenharmony_ci * RUNNING -> STOP 65262306a36Sopenharmony_ci * STOP -> RUNNING 65362306a36Sopenharmony_ci * 65462306a36Sopenharmony_ci * The coding will step through multiple states for some combination 65562306a36Sopenharmony_ci * transitions; if all optional features are supported, this means the 65662306a36Sopenharmony_ci * following ones: 65762306a36Sopenharmony_ci * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY 65862306a36Sopenharmony_ci * PRE_COPY -> RUNNING -> RUNNING_P2P 65962306a36Sopenharmony_ci * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP 66062306a36Sopenharmony_ci * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING 66162306a36Sopenharmony_ci * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING 66262306a36Sopenharmony_ci * PRE_COPY_P2P -> RUNNING_P2P -> STOP 66362306a36Sopenharmony_ci * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING 66462306a36Sopenharmony_ci * RESUMING -> STOP -> RUNNING_P2P 66562306a36Sopenharmony_ci * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P 66662306a36Sopenharmony_ci * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 66762306a36Sopenharmony_ci * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY 66862306a36Sopenharmony_ci * RESUMING -> STOP -> STOP_COPY 66962306a36Sopenharmony_ci * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P 67062306a36Sopenharmony_ci * RUNNING -> RUNNING_P2P -> STOP 67162306a36Sopenharmony_ci * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 67262306a36Sopenharmony_ci * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 67362306a36Sopenharmony_ci * RUNNING_P2P -> RUNNING -> PRE_COPY 67462306a36Sopenharmony_ci * RUNNING_P2P -> STOP -> RESUMING 67562306a36Sopenharmony_ci * RUNNING_P2P -> STOP -> STOP_COPY 67662306a36Sopenharmony_ci * STOP -> RUNNING_P2P -> PRE_COPY_P2P 67762306a36Sopenharmony_ci * STOP -> RUNNING_P2P -> RUNNING 67862306a36Sopenharmony_ci * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY 67962306a36Sopenharmony_ci * STOP_COPY -> STOP -> RESUMING 68062306a36Sopenharmony_ci * STOP_COPY -> STOP -> RUNNING_P2P 68162306a36Sopenharmony_ci * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 68262306a36Sopenharmony_ci * 68362306a36Sopenharmony_ci * The following transitions are blocked: 68462306a36Sopenharmony_ci * STOP_COPY -> PRE_COPY 68562306a36Sopenharmony_ci * STOP_COPY -> PRE_COPY_P2P 68662306a36Sopenharmony_ci */ 68762306a36Sopenharmony_ci static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 68862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = { 68962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 69062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 69162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 69262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 69362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 69462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 69562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 69662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 69762306a36Sopenharmony_ci }, 69862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = { 69962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 70062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 70162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 70262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 70362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 70462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 70562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 70662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 70762306a36Sopenharmony_ci }, 70862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = { 70962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, 71062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 71162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 71262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 71362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 71462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, 71562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, 71662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 71762306a36Sopenharmony_ci }, 71862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { 71962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 72062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 72162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, 72262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 72362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 72462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 72562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 72662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 72762306a36Sopenharmony_ci }, 72862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = { 72962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 73062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 73162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, 73262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, 73362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 73462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 73562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 73662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 73762306a36Sopenharmony_ci }, 73862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = { 73962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 74062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 74162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, 74262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, 74362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 74462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 74562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 74662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 74762306a36Sopenharmony_ci }, 74862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = { 74962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 75062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 75162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, 75262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, 75362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 75462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 75562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 75662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 75762306a36Sopenharmony_ci }, 75862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = { 75962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 76062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 76162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, 76262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, 76362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 76462306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 76562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 76662306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 76762306a36Sopenharmony_ci }, 76862306a36Sopenharmony_ci }; 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 77162306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 77262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 77362306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY] = 77462306a36Sopenharmony_ci VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, 77562306a36Sopenharmony_ci [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | 77662306a36Sopenharmony_ci VFIO_MIGRATION_P2P | 77762306a36Sopenharmony_ci VFIO_MIGRATION_PRE_COPY, 77862306a36Sopenharmony_ci [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 77962306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 78062306a36Sopenharmony_ci [VFIO_DEVICE_STATE_RUNNING_P2P] = 78162306a36Sopenharmony_ci VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 78262306a36Sopenharmony_ci [VFIO_DEVICE_STATE_ERROR] = ~0U, 78362306a36Sopenharmony_ci }; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 78662306a36Sopenharmony_ci (state_flags_table[cur_fsm] & device->migration_flags) != 78762306a36Sopenharmony_ci state_flags_table[cur_fsm])) 78862306a36Sopenharmony_ci return -EINVAL; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 79162306a36Sopenharmony_ci (state_flags_table[new_fsm] & device->migration_flags) != 79262306a36Sopenharmony_ci state_flags_table[new_fsm]) 79362306a36Sopenharmony_ci return -EINVAL; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci /* 79662306a36Sopenharmony_ci * Arcs touching optional and unsupported states are skipped over. The 79762306a36Sopenharmony_ci * driver will instead see an arc from the original state to the next 79862306a36Sopenharmony_ci * logical state, as per the above comment. 79962306a36Sopenharmony_ci */ 80062306a36Sopenharmony_ci *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 80162306a36Sopenharmony_ci while ((state_flags_table[*next_fsm] & device->migration_flags) != 80262306a36Sopenharmony_ci state_flags_table[*next_fsm]) 80362306a36Sopenharmony_ci *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 80662306a36Sopenharmony_ci} 80762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci/* 81062306a36Sopenharmony_ci * Convert the drivers's struct file into a FD number and return it to userspace 81162306a36Sopenharmony_ci */ 81262306a36Sopenharmony_cistatic int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 81362306a36Sopenharmony_ci struct vfio_device_feature_mig_state *mig) 81462306a36Sopenharmony_ci{ 81562306a36Sopenharmony_ci int ret; 81662306a36Sopenharmony_ci int fd; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci fd = get_unused_fd_flags(O_CLOEXEC); 81962306a36Sopenharmony_ci if (fd < 0) { 82062306a36Sopenharmony_ci ret = fd; 82162306a36Sopenharmony_ci goto out_fput; 82262306a36Sopenharmony_ci } 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci mig->data_fd = fd; 82562306a36Sopenharmony_ci if (copy_to_user(arg, mig, sizeof(*mig))) { 82662306a36Sopenharmony_ci ret = -EFAULT; 82762306a36Sopenharmony_ci goto out_put_unused; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci fd_install(fd, filp); 83062306a36Sopenharmony_ci return 0; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ciout_put_unused: 83362306a36Sopenharmony_ci put_unused_fd(fd); 83462306a36Sopenharmony_ciout_fput: 83562306a36Sopenharmony_ci fput(filp); 83662306a36Sopenharmony_ci return ret; 83762306a36Sopenharmony_ci} 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_cistatic int 84062306a36Sopenharmony_civfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 84162306a36Sopenharmony_ci u32 flags, void __user *arg, 84262306a36Sopenharmony_ci size_t argsz) 84362306a36Sopenharmony_ci{ 84462306a36Sopenharmony_ci size_t minsz = 84562306a36Sopenharmony_ci offsetofend(struct vfio_device_feature_mig_state, data_fd); 84662306a36Sopenharmony_ci struct vfio_device_feature_mig_state mig; 84762306a36Sopenharmony_ci struct file *filp = NULL; 84862306a36Sopenharmony_ci int ret; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci if (!device->mig_ops) 85162306a36Sopenharmony_ci return -ENOTTY; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci ret = vfio_check_feature(flags, argsz, 85462306a36Sopenharmony_ci VFIO_DEVICE_FEATURE_SET | 85562306a36Sopenharmony_ci VFIO_DEVICE_FEATURE_GET, 85662306a36Sopenharmony_ci sizeof(mig)); 85762306a36Sopenharmony_ci if (ret != 1) 85862306a36Sopenharmony_ci return ret; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (copy_from_user(&mig, arg, minsz)) 86162306a36Sopenharmony_ci return -EFAULT; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci if (flags & VFIO_DEVICE_FEATURE_GET) { 86462306a36Sopenharmony_ci enum vfio_device_mig_state curr_state; 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci ret = device->mig_ops->migration_get_state(device, 86762306a36Sopenharmony_ci &curr_state); 86862306a36Sopenharmony_ci if (ret) 86962306a36Sopenharmony_ci return ret; 87062306a36Sopenharmony_ci mig.device_state = curr_state; 87162306a36Sopenharmony_ci goto out_copy; 87262306a36Sopenharmony_ci } 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci /* Handle the VFIO_DEVICE_FEATURE_SET */ 87562306a36Sopenharmony_ci filp = device->mig_ops->migration_set_state(device, mig.device_state); 87662306a36Sopenharmony_ci if (IS_ERR(filp) || !filp) 87762306a36Sopenharmony_ci goto out_copy; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci return vfio_ioct_mig_return_fd(filp, arg, &mig); 88062306a36Sopenharmony_ciout_copy: 88162306a36Sopenharmony_ci mig.data_fd = -1; 88262306a36Sopenharmony_ci if (copy_to_user(arg, &mig, sizeof(mig))) 88362306a36Sopenharmony_ci return -EFAULT; 88462306a36Sopenharmony_ci if (IS_ERR(filp)) 88562306a36Sopenharmony_ci return PTR_ERR(filp); 88662306a36Sopenharmony_ci return 0; 88762306a36Sopenharmony_ci} 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_cistatic int 89062306a36Sopenharmony_civfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, 89162306a36Sopenharmony_ci u32 flags, void __user *arg, 89262306a36Sopenharmony_ci size_t argsz) 89362306a36Sopenharmony_ci{ 89462306a36Sopenharmony_ci struct vfio_device_feature_mig_data_size data_size = {}; 89562306a36Sopenharmony_ci unsigned long stop_copy_length; 89662306a36Sopenharmony_ci int ret; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci if (!device->mig_ops) 89962306a36Sopenharmony_ci return -ENOTTY; 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 90262306a36Sopenharmony_ci sizeof(data_size)); 90362306a36Sopenharmony_ci if (ret != 1) 90462306a36Sopenharmony_ci return ret; 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); 90762306a36Sopenharmony_ci if (ret) 90862306a36Sopenharmony_ci return ret; 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci data_size.stop_copy_length = stop_copy_length; 91162306a36Sopenharmony_ci if (copy_to_user(arg, &data_size, sizeof(data_size))) 91262306a36Sopenharmony_ci return -EFAULT; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci return 0; 91562306a36Sopenharmony_ci} 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_cistatic int vfio_ioctl_device_feature_migration(struct vfio_device *device, 91862306a36Sopenharmony_ci u32 flags, void __user *arg, 91962306a36Sopenharmony_ci size_t argsz) 92062306a36Sopenharmony_ci{ 92162306a36Sopenharmony_ci struct vfio_device_feature_migration mig = { 92262306a36Sopenharmony_ci .flags = device->migration_flags, 92362306a36Sopenharmony_ci }; 92462306a36Sopenharmony_ci int ret; 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci if (!device->mig_ops) 92762306a36Sopenharmony_ci return -ENOTTY; 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 93062306a36Sopenharmony_ci sizeof(mig)); 93162306a36Sopenharmony_ci if (ret != 1) 93262306a36Sopenharmony_ci return ret; 93362306a36Sopenharmony_ci if (copy_to_user(arg, &mig, sizeof(mig))) 93462306a36Sopenharmony_ci return -EFAULT; 93562306a36Sopenharmony_ci return 0; 93662306a36Sopenharmony_ci} 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_civoid vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, 93962306a36Sopenharmony_ci u32 req_nodes) 94062306a36Sopenharmony_ci{ 94162306a36Sopenharmony_ci struct interval_tree_node *prev, *curr, *comb_start, *comb_end; 94262306a36Sopenharmony_ci unsigned long min_gap, curr_gap; 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci /* Special shortcut when a single range is required */ 94562306a36Sopenharmony_ci if (req_nodes == 1) { 94662306a36Sopenharmony_ci unsigned long last; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); 94962306a36Sopenharmony_ci curr = comb_start; 95062306a36Sopenharmony_ci while (curr) { 95162306a36Sopenharmony_ci last = curr->last; 95262306a36Sopenharmony_ci prev = curr; 95362306a36Sopenharmony_ci curr = interval_tree_iter_next(curr, 0, ULONG_MAX); 95462306a36Sopenharmony_ci if (prev != comb_start) 95562306a36Sopenharmony_ci interval_tree_remove(prev, root); 95662306a36Sopenharmony_ci } 95762306a36Sopenharmony_ci comb_start->last = last; 95862306a36Sopenharmony_ci return; 95962306a36Sopenharmony_ci } 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci /* Combine ranges which have the smallest gap */ 96262306a36Sopenharmony_ci while (cur_nodes > req_nodes) { 96362306a36Sopenharmony_ci prev = NULL; 96462306a36Sopenharmony_ci min_gap = ULONG_MAX; 96562306a36Sopenharmony_ci curr = interval_tree_iter_first(root, 0, ULONG_MAX); 96662306a36Sopenharmony_ci while (curr) { 96762306a36Sopenharmony_ci if (prev) { 96862306a36Sopenharmony_ci curr_gap = curr->start - prev->last; 96962306a36Sopenharmony_ci if (curr_gap < min_gap) { 97062306a36Sopenharmony_ci min_gap = curr_gap; 97162306a36Sopenharmony_ci comb_start = prev; 97262306a36Sopenharmony_ci comb_end = curr; 97362306a36Sopenharmony_ci } 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci prev = curr; 97662306a36Sopenharmony_ci curr = interval_tree_iter_next(curr, 0, ULONG_MAX); 97762306a36Sopenharmony_ci } 97862306a36Sopenharmony_ci comb_start->last = comb_end->last; 97962306a36Sopenharmony_ci interval_tree_remove(comb_end, root); 98062306a36Sopenharmony_ci cur_nodes--; 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci} 98362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_combine_iova_ranges); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci/* Ranges should fit into a single kernel page */ 98662306a36Sopenharmony_ci#define LOG_MAX_RANGES \ 98762306a36Sopenharmony_ci (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_cistatic int 99062306a36Sopenharmony_civfio_ioctl_device_feature_logging_start(struct vfio_device *device, 99162306a36Sopenharmony_ci u32 flags, void __user *arg, 99262306a36Sopenharmony_ci size_t argsz) 99362306a36Sopenharmony_ci{ 99462306a36Sopenharmony_ci size_t minsz = 99562306a36Sopenharmony_ci offsetofend(struct vfio_device_feature_dma_logging_control, 99662306a36Sopenharmony_ci ranges); 99762306a36Sopenharmony_ci struct vfio_device_feature_dma_logging_range __user *ranges; 99862306a36Sopenharmony_ci struct vfio_device_feature_dma_logging_control control; 99962306a36Sopenharmony_ci struct vfio_device_feature_dma_logging_range range; 100062306a36Sopenharmony_ci struct rb_root_cached root = RB_ROOT_CACHED; 100162306a36Sopenharmony_ci struct interval_tree_node *nodes; 100262306a36Sopenharmony_ci u64 iova_end; 100362306a36Sopenharmony_ci u32 nnodes; 100462306a36Sopenharmony_ci int i, ret; 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci if (!device->log_ops) 100762306a36Sopenharmony_ci return -ENOTTY; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci ret = vfio_check_feature(flags, argsz, 101062306a36Sopenharmony_ci VFIO_DEVICE_FEATURE_SET, 101162306a36Sopenharmony_ci sizeof(control)); 101262306a36Sopenharmony_ci if (ret != 1) 101362306a36Sopenharmony_ci return ret; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci if (copy_from_user(&control, arg, minsz)) 101662306a36Sopenharmony_ci return -EFAULT; 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci nnodes = control.num_ranges; 101962306a36Sopenharmony_ci if (!nnodes) 102062306a36Sopenharmony_ci return -EINVAL; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci if (nnodes > LOG_MAX_RANGES) 102362306a36Sopenharmony_ci return -E2BIG; 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_ci ranges = u64_to_user_ptr(control.ranges); 102662306a36Sopenharmony_ci nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), 102762306a36Sopenharmony_ci GFP_KERNEL); 102862306a36Sopenharmony_ci if (!nodes) 102962306a36Sopenharmony_ci return -ENOMEM; 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci for (i = 0; i < nnodes; i++) { 103262306a36Sopenharmony_ci if (copy_from_user(&range, &ranges[i], sizeof(range))) { 103362306a36Sopenharmony_ci ret = -EFAULT; 103462306a36Sopenharmony_ci goto end; 103562306a36Sopenharmony_ci } 103662306a36Sopenharmony_ci if (!IS_ALIGNED(range.iova, control.page_size) || 103762306a36Sopenharmony_ci !IS_ALIGNED(range.length, control.page_size)) { 103862306a36Sopenharmony_ci ret = -EINVAL; 103962306a36Sopenharmony_ci goto end; 104062306a36Sopenharmony_ci } 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci if (check_add_overflow(range.iova, range.length, &iova_end) || 104362306a36Sopenharmony_ci iova_end > ULONG_MAX) { 104462306a36Sopenharmony_ci ret = -EOVERFLOW; 104562306a36Sopenharmony_ci goto end; 104662306a36Sopenharmony_ci } 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci nodes[i].start = range.iova; 104962306a36Sopenharmony_ci nodes[i].last = range.iova + range.length - 1; 105062306a36Sopenharmony_ci if (interval_tree_iter_first(&root, nodes[i].start, 105162306a36Sopenharmony_ci nodes[i].last)) { 105262306a36Sopenharmony_ci /* Range overlapping */ 105362306a36Sopenharmony_ci ret = -EINVAL; 105462306a36Sopenharmony_ci goto end; 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci interval_tree_insert(nodes + i, &root); 105762306a36Sopenharmony_ci } 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci ret = device->log_ops->log_start(device, &root, nnodes, 106062306a36Sopenharmony_ci &control.page_size); 106162306a36Sopenharmony_ci if (ret) 106262306a36Sopenharmony_ci goto end; 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci if (copy_to_user(arg, &control, sizeof(control))) { 106562306a36Sopenharmony_ci ret = -EFAULT; 106662306a36Sopenharmony_ci device->log_ops->log_stop(device); 106762306a36Sopenharmony_ci } 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ciend: 107062306a36Sopenharmony_ci kfree(nodes); 107162306a36Sopenharmony_ci return ret; 107262306a36Sopenharmony_ci} 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_cistatic int 107562306a36Sopenharmony_civfio_ioctl_device_feature_logging_stop(struct vfio_device *device, 107662306a36Sopenharmony_ci u32 flags, void __user *arg, 107762306a36Sopenharmony_ci size_t argsz) 107862306a36Sopenharmony_ci{ 107962306a36Sopenharmony_ci int ret; 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci if (!device->log_ops) 108262306a36Sopenharmony_ci return -ENOTTY; 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci ret = vfio_check_feature(flags, argsz, 108562306a36Sopenharmony_ci VFIO_DEVICE_FEATURE_SET, 0); 108662306a36Sopenharmony_ci if (ret != 1) 108762306a36Sopenharmony_ci return ret; 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci return device->log_ops->log_stop(device); 109062306a36Sopenharmony_ci} 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_cistatic int vfio_device_log_read_and_clear(struct iova_bitmap *iter, 109362306a36Sopenharmony_ci unsigned long iova, size_t length, 109462306a36Sopenharmony_ci void *opaque) 109562306a36Sopenharmony_ci{ 109662306a36Sopenharmony_ci struct vfio_device *device = opaque; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci return device->log_ops->log_read_and_clear(device, iova, length, iter); 109962306a36Sopenharmony_ci} 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_cistatic int 110262306a36Sopenharmony_civfio_ioctl_device_feature_logging_report(struct vfio_device *device, 110362306a36Sopenharmony_ci u32 flags, void __user *arg, 110462306a36Sopenharmony_ci size_t argsz) 110562306a36Sopenharmony_ci{ 110662306a36Sopenharmony_ci size_t minsz = 110762306a36Sopenharmony_ci offsetofend(struct vfio_device_feature_dma_logging_report, 110862306a36Sopenharmony_ci bitmap); 110962306a36Sopenharmony_ci struct vfio_device_feature_dma_logging_report report; 111062306a36Sopenharmony_ci struct iova_bitmap *iter; 111162306a36Sopenharmony_ci u64 iova_end; 111262306a36Sopenharmony_ci int ret; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci if (!device->log_ops) 111562306a36Sopenharmony_ci return -ENOTTY; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci ret = vfio_check_feature(flags, argsz, 111862306a36Sopenharmony_ci VFIO_DEVICE_FEATURE_GET, 111962306a36Sopenharmony_ci sizeof(report)); 112062306a36Sopenharmony_ci if (ret != 1) 112162306a36Sopenharmony_ci return ret; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci if (copy_from_user(&report, arg, minsz)) 112462306a36Sopenharmony_ci return -EFAULT; 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) 112762306a36Sopenharmony_ci return -EINVAL; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci if (check_add_overflow(report.iova, report.length, &iova_end) || 113062306a36Sopenharmony_ci iova_end > ULONG_MAX) 113162306a36Sopenharmony_ci return -EOVERFLOW; 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci iter = iova_bitmap_alloc(report.iova, report.length, 113462306a36Sopenharmony_ci report.page_size, 113562306a36Sopenharmony_ci u64_to_user_ptr(report.bitmap)); 113662306a36Sopenharmony_ci if (IS_ERR(iter)) 113762306a36Sopenharmony_ci return PTR_ERR(iter); 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci ret = iova_bitmap_for_each(iter, device, 114062306a36Sopenharmony_ci vfio_device_log_read_and_clear); 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci iova_bitmap_free(iter); 114362306a36Sopenharmony_ci return ret; 114462306a36Sopenharmony_ci} 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_cistatic int vfio_ioctl_device_feature(struct vfio_device *device, 114762306a36Sopenharmony_ci struct vfio_device_feature __user *arg) 114862306a36Sopenharmony_ci{ 114962306a36Sopenharmony_ci size_t minsz = offsetofend(struct vfio_device_feature, flags); 115062306a36Sopenharmony_ci struct vfio_device_feature feature; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci if (copy_from_user(&feature, arg, minsz)) 115362306a36Sopenharmony_ci return -EFAULT; 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci if (feature.argsz < minsz) 115662306a36Sopenharmony_ci return -EINVAL; 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci /* Check unknown flags */ 115962306a36Sopenharmony_ci if (feature.flags & 116062306a36Sopenharmony_ci ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 116162306a36Sopenharmony_ci VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 116262306a36Sopenharmony_ci return -EINVAL; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci /* GET & SET are mutually exclusive except with PROBE */ 116562306a36Sopenharmony_ci if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 116662306a36Sopenharmony_ci (feature.flags & VFIO_DEVICE_FEATURE_SET) && 116762306a36Sopenharmony_ci (feature.flags & VFIO_DEVICE_FEATURE_GET)) 116862306a36Sopenharmony_ci return -EINVAL; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 117162306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE_MIGRATION: 117262306a36Sopenharmony_ci return vfio_ioctl_device_feature_migration( 117362306a36Sopenharmony_ci device, feature.flags, arg->data, 117462306a36Sopenharmony_ci feature.argsz - minsz); 117562306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 117662306a36Sopenharmony_ci return vfio_ioctl_device_feature_mig_device_state( 117762306a36Sopenharmony_ci device, feature.flags, arg->data, 117862306a36Sopenharmony_ci feature.argsz - minsz); 117962306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: 118062306a36Sopenharmony_ci return vfio_ioctl_device_feature_logging_start( 118162306a36Sopenharmony_ci device, feature.flags, arg->data, 118262306a36Sopenharmony_ci feature.argsz - minsz); 118362306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: 118462306a36Sopenharmony_ci return vfio_ioctl_device_feature_logging_stop( 118562306a36Sopenharmony_ci device, feature.flags, arg->data, 118662306a36Sopenharmony_ci feature.argsz - minsz); 118762306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: 118862306a36Sopenharmony_ci return vfio_ioctl_device_feature_logging_report( 118962306a36Sopenharmony_ci device, feature.flags, arg->data, 119062306a36Sopenharmony_ci feature.argsz - minsz); 119162306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: 119262306a36Sopenharmony_ci return vfio_ioctl_device_feature_migration_data_size( 119362306a36Sopenharmony_ci device, feature.flags, arg->data, 119462306a36Sopenharmony_ci feature.argsz - minsz); 119562306a36Sopenharmony_ci default: 119662306a36Sopenharmony_ci if (unlikely(!device->ops->device_feature)) 119762306a36Sopenharmony_ci return -EINVAL; 119862306a36Sopenharmony_ci return device->ops->device_feature(device, feature.flags, 119962306a36Sopenharmony_ci arg->data, 120062306a36Sopenharmony_ci feature.argsz - minsz); 120162306a36Sopenharmony_ci } 120262306a36Sopenharmony_ci} 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_cistatic long vfio_device_fops_unl_ioctl(struct file *filep, 120562306a36Sopenharmony_ci unsigned int cmd, unsigned long arg) 120662306a36Sopenharmony_ci{ 120762306a36Sopenharmony_ci struct vfio_device_file *df = filep->private_data; 120862306a36Sopenharmony_ci struct vfio_device *device = df->device; 120962306a36Sopenharmony_ci void __user *uptr = (void __user *)arg; 121062306a36Sopenharmony_ci int ret; 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci if (cmd == VFIO_DEVICE_BIND_IOMMUFD) 121362306a36Sopenharmony_ci return vfio_df_ioctl_bind_iommufd(df, uptr); 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci /* Paired with smp_store_release() following vfio_df_open() */ 121662306a36Sopenharmony_ci if (!smp_load_acquire(&df->access_granted)) 121762306a36Sopenharmony_ci return -EINVAL; 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci ret = vfio_device_pm_runtime_get(device); 122062306a36Sopenharmony_ci if (ret) 122162306a36Sopenharmony_ci return ret; 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci /* cdev only ioctls */ 122462306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) { 122562306a36Sopenharmony_ci switch (cmd) { 122662306a36Sopenharmony_ci case VFIO_DEVICE_ATTACH_IOMMUFD_PT: 122762306a36Sopenharmony_ci ret = vfio_df_ioctl_attach_pt(df, uptr); 122862306a36Sopenharmony_ci goto out; 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci case VFIO_DEVICE_DETACH_IOMMUFD_PT: 123162306a36Sopenharmony_ci ret = vfio_df_ioctl_detach_pt(df, uptr); 123262306a36Sopenharmony_ci goto out; 123362306a36Sopenharmony_ci } 123462306a36Sopenharmony_ci } 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci switch (cmd) { 123762306a36Sopenharmony_ci case VFIO_DEVICE_FEATURE: 123862306a36Sopenharmony_ci ret = vfio_ioctl_device_feature(device, uptr); 123962306a36Sopenharmony_ci break; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci default: 124262306a36Sopenharmony_ci if (unlikely(!device->ops->ioctl)) 124362306a36Sopenharmony_ci ret = -EINVAL; 124462306a36Sopenharmony_ci else 124562306a36Sopenharmony_ci ret = device->ops->ioctl(device, cmd, arg); 124662306a36Sopenharmony_ci break; 124762306a36Sopenharmony_ci } 124862306a36Sopenharmony_ciout: 124962306a36Sopenharmony_ci vfio_device_pm_runtime_put(device); 125062306a36Sopenharmony_ci return ret; 125162306a36Sopenharmony_ci} 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_cistatic ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 125462306a36Sopenharmony_ci size_t count, loff_t *ppos) 125562306a36Sopenharmony_ci{ 125662306a36Sopenharmony_ci struct vfio_device_file *df = filep->private_data; 125762306a36Sopenharmony_ci struct vfio_device *device = df->device; 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci /* Paired with smp_store_release() following vfio_df_open() */ 126062306a36Sopenharmony_ci if (!smp_load_acquire(&df->access_granted)) 126162306a36Sopenharmony_ci return -EINVAL; 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci if (unlikely(!device->ops->read)) 126462306a36Sopenharmony_ci return -EINVAL; 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci return device->ops->read(device, buf, count, ppos); 126762306a36Sopenharmony_ci} 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_cistatic ssize_t vfio_device_fops_write(struct file *filep, 127062306a36Sopenharmony_ci const char __user *buf, 127162306a36Sopenharmony_ci size_t count, loff_t *ppos) 127262306a36Sopenharmony_ci{ 127362306a36Sopenharmony_ci struct vfio_device_file *df = filep->private_data; 127462306a36Sopenharmony_ci struct vfio_device *device = df->device; 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci /* Paired with smp_store_release() following vfio_df_open() */ 127762306a36Sopenharmony_ci if (!smp_load_acquire(&df->access_granted)) 127862306a36Sopenharmony_ci return -EINVAL; 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci if (unlikely(!device->ops->write)) 128162306a36Sopenharmony_ci return -EINVAL; 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci return device->ops->write(device, buf, count, ppos); 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_cistatic int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci struct vfio_device_file *df = filep->private_data; 128962306a36Sopenharmony_ci struct vfio_device *device = df->device; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci /* Paired with smp_store_release() following vfio_df_open() */ 129262306a36Sopenharmony_ci if (!smp_load_acquire(&df->access_granted)) 129362306a36Sopenharmony_ci return -EINVAL; 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci if (unlikely(!device->ops->mmap)) 129662306a36Sopenharmony_ci return -EINVAL; 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci return device->ops->mmap(device, vma); 129962306a36Sopenharmony_ci} 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ciconst struct file_operations vfio_device_fops = { 130262306a36Sopenharmony_ci .owner = THIS_MODULE, 130362306a36Sopenharmony_ci .open = vfio_device_fops_cdev_open, 130462306a36Sopenharmony_ci .release = vfio_device_fops_release, 130562306a36Sopenharmony_ci .read = vfio_device_fops_read, 130662306a36Sopenharmony_ci .write = vfio_device_fops_write, 130762306a36Sopenharmony_ci .unlocked_ioctl = vfio_device_fops_unl_ioctl, 130862306a36Sopenharmony_ci .compat_ioctl = compat_ptr_ioctl, 130962306a36Sopenharmony_ci .mmap = vfio_device_fops_mmap, 131062306a36Sopenharmony_ci}; 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_cistatic struct vfio_device *vfio_device_from_file(struct file *file) 131362306a36Sopenharmony_ci{ 131462306a36Sopenharmony_ci struct vfio_device_file *df = file->private_data; 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci if (file->f_op != &vfio_device_fops) 131762306a36Sopenharmony_ci return NULL; 131862306a36Sopenharmony_ci return df->device; 131962306a36Sopenharmony_ci} 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci/** 132262306a36Sopenharmony_ci * vfio_file_is_valid - True if the file is valid vfio file 132362306a36Sopenharmony_ci * @file: VFIO group file or VFIO device file 132462306a36Sopenharmony_ci */ 132562306a36Sopenharmony_cibool vfio_file_is_valid(struct file *file) 132662306a36Sopenharmony_ci{ 132762306a36Sopenharmony_ci return vfio_group_from_file(file) || 132862306a36Sopenharmony_ci vfio_device_from_file(file); 132962306a36Sopenharmony_ci} 133062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_file_is_valid); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci/** 133362306a36Sopenharmony_ci * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 133462306a36Sopenharmony_ci * is always CPU cache coherent 133562306a36Sopenharmony_ci * @file: VFIO group file or VFIO device file 133662306a36Sopenharmony_ci * 133762306a36Sopenharmony_ci * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 133862306a36Sopenharmony_ci * bit in DMA transactions. A return of false indicates that the user has 133962306a36Sopenharmony_ci * rights to access additional instructions such as wbinvd on x86. 134062306a36Sopenharmony_ci */ 134162306a36Sopenharmony_cibool vfio_file_enforced_coherent(struct file *file) 134262306a36Sopenharmony_ci{ 134362306a36Sopenharmony_ci struct vfio_device *device; 134462306a36Sopenharmony_ci struct vfio_group *group; 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci group = vfio_group_from_file(file); 134762306a36Sopenharmony_ci if (group) 134862306a36Sopenharmony_ci return vfio_group_enforced_coherent(group); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci device = vfio_device_from_file(file); 135162306a36Sopenharmony_ci if (device) 135262306a36Sopenharmony_ci return device_iommu_capable(device->dev, 135362306a36Sopenharmony_ci IOMMU_CAP_ENFORCE_CACHE_COHERENCY); 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci return true; 135662306a36Sopenharmony_ci} 135762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_cistatic void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm) 136062306a36Sopenharmony_ci{ 136162306a36Sopenharmony_ci struct vfio_device_file *df = file->private_data; 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci /* 136462306a36Sopenharmony_ci * The kvm is first recorded in the vfio_device_file, and will 136562306a36Sopenharmony_ci * be propagated to vfio_device::kvm when the file is bound to 136662306a36Sopenharmony_ci * iommufd successfully in the vfio device cdev path. 136762306a36Sopenharmony_ci */ 136862306a36Sopenharmony_ci spin_lock(&df->kvm_ref_lock); 136962306a36Sopenharmony_ci df->kvm = kvm; 137062306a36Sopenharmony_ci spin_unlock(&df->kvm_ref_lock); 137162306a36Sopenharmony_ci} 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci/** 137462306a36Sopenharmony_ci * vfio_file_set_kvm - Link a kvm with VFIO drivers 137562306a36Sopenharmony_ci * @file: VFIO group file or VFIO device file 137662306a36Sopenharmony_ci * @kvm: KVM to link 137762306a36Sopenharmony_ci * 137862306a36Sopenharmony_ci * When a VFIO device is first opened the KVM will be available in 137962306a36Sopenharmony_ci * device->kvm if one was associated with the file. 138062306a36Sopenharmony_ci */ 138162306a36Sopenharmony_civoid vfio_file_set_kvm(struct file *file, struct kvm *kvm) 138262306a36Sopenharmony_ci{ 138362306a36Sopenharmony_ci struct vfio_group *group; 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci group = vfio_group_from_file(file); 138662306a36Sopenharmony_ci if (group) 138762306a36Sopenharmony_ci vfio_group_set_kvm(group, kvm); 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci if (vfio_device_from_file(file)) 139062306a36Sopenharmony_ci vfio_device_file_set_kvm(file, kvm); 139162306a36Sopenharmony_ci} 139262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_file_set_kvm); 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci/* 139562306a36Sopenharmony_ci * Sub-module support 139662306a36Sopenharmony_ci */ 139762306a36Sopenharmony_ci/* 139862306a36Sopenharmony_ci * Helper for managing a buffer of info chain capabilities, allocate or 139962306a36Sopenharmony_ci * reallocate a buffer with additional @size, filling in @id and @version 140062306a36Sopenharmony_ci * of the capability. A pointer to the new capability is returned. 140162306a36Sopenharmony_ci * 140262306a36Sopenharmony_ci * NB. The chain is based at the head of the buffer, so new entries are 140362306a36Sopenharmony_ci * added to the tail, vfio_info_cap_shift() should be called to fixup the 140462306a36Sopenharmony_ci * next offsets prior to copying to the user buffer. 140562306a36Sopenharmony_ci */ 140662306a36Sopenharmony_cistruct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 140762306a36Sopenharmony_ci size_t size, u16 id, u16 version) 140862306a36Sopenharmony_ci{ 140962306a36Sopenharmony_ci void *buf; 141062306a36Sopenharmony_ci struct vfio_info_cap_header *header, *tmp; 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci /* Ensure that the next capability struct will be aligned */ 141362306a36Sopenharmony_ci size = ALIGN(size, sizeof(u64)); 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 141662306a36Sopenharmony_ci if (!buf) { 141762306a36Sopenharmony_ci kfree(caps->buf); 141862306a36Sopenharmony_ci caps->buf = NULL; 141962306a36Sopenharmony_ci caps->size = 0; 142062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 142162306a36Sopenharmony_ci } 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci caps->buf = buf; 142462306a36Sopenharmony_ci header = buf + caps->size; 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci /* Eventually copied to user buffer, zero */ 142762306a36Sopenharmony_ci memset(header, 0, size); 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci header->id = id; 143062306a36Sopenharmony_ci header->version = version; 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci /* Add to the end of the capability chain */ 143362306a36Sopenharmony_ci for (tmp = buf; tmp->next; tmp = buf + tmp->next) 143462306a36Sopenharmony_ci ; /* nothing */ 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci tmp->next = caps->size; 143762306a36Sopenharmony_ci caps->size += size; 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci return header; 144062306a36Sopenharmony_ci} 144162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfio_info_cap_add); 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_civoid vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 144462306a36Sopenharmony_ci{ 144562306a36Sopenharmony_ci struct vfio_info_cap_header *tmp; 144662306a36Sopenharmony_ci void *buf = (void *)caps->buf; 144762306a36Sopenharmony_ci 144862306a36Sopenharmony_ci /* Capability structs should start with proper alignment */ 144962306a36Sopenharmony_ci WARN_ON(!IS_ALIGNED(offset, sizeof(u64))); 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 145262306a36Sopenharmony_ci tmp->next += offset; 145362306a36Sopenharmony_ci} 145462306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_info_cap_shift); 145562306a36Sopenharmony_ci 145662306a36Sopenharmony_ciint vfio_info_add_capability(struct vfio_info_cap *caps, 145762306a36Sopenharmony_ci struct vfio_info_cap_header *cap, size_t size) 145862306a36Sopenharmony_ci{ 145962306a36Sopenharmony_ci struct vfio_info_cap_header *header; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci header = vfio_info_cap_add(caps, size, cap->id, cap->version); 146262306a36Sopenharmony_ci if (IS_ERR(header)) 146362306a36Sopenharmony_ci return PTR_ERR(header); 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci memcpy(header + 1, cap + 1, size - sizeof(*header)); 146662306a36Sopenharmony_ci 146762306a36Sopenharmony_ci return 0; 146862306a36Sopenharmony_ci} 146962306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_info_add_capability); 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ciint vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 147262306a36Sopenharmony_ci int max_irq_type, size_t *data_size) 147362306a36Sopenharmony_ci{ 147462306a36Sopenharmony_ci unsigned long minsz; 147562306a36Sopenharmony_ci size_t size; 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci minsz = offsetofend(struct vfio_irq_set, count); 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 148062306a36Sopenharmony_ci (hdr->count >= (U32_MAX - hdr->start)) || 148162306a36Sopenharmony_ci (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 148262306a36Sopenharmony_ci VFIO_IRQ_SET_ACTION_TYPE_MASK))) 148362306a36Sopenharmony_ci return -EINVAL; 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci if (data_size) 148662306a36Sopenharmony_ci *data_size = 0; 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 148962306a36Sopenharmony_ci return -EINVAL; 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 149262306a36Sopenharmony_ci case VFIO_IRQ_SET_DATA_NONE: 149362306a36Sopenharmony_ci size = 0; 149462306a36Sopenharmony_ci break; 149562306a36Sopenharmony_ci case VFIO_IRQ_SET_DATA_BOOL: 149662306a36Sopenharmony_ci size = sizeof(uint8_t); 149762306a36Sopenharmony_ci break; 149862306a36Sopenharmony_ci case VFIO_IRQ_SET_DATA_EVENTFD: 149962306a36Sopenharmony_ci size = sizeof(int32_t); 150062306a36Sopenharmony_ci break; 150162306a36Sopenharmony_ci default: 150262306a36Sopenharmony_ci return -EINVAL; 150362306a36Sopenharmony_ci } 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci if (size) { 150662306a36Sopenharmony_ci if (hdr->argsz - minsz < hdr->count * size) 150762306a36Sopenharmony_ci return -EINVAL; 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci if (!data_size) 151062306a36Sopenharmony_ci return -EINVAL; 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci *data_size = hdr->count * size; 151362306a36Sopenharmony_ci } 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci return 0; 151662306a36Sopenharmony_ci} 151762306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci/* 152062306a36Sopenharmony_ci * Pin contiguous user pages and return their associated host pages for local 152162306a36Sopenharmony_ci * domain only. 152262306a36Sopenharmony_ci * @device [in] : device 152362306a36Sopenharmony_ci * @iova [in] : starting IOVA of user pages to be pinned. 152462306a36Sopenharmony_ci * @npage [in] : count of pages to be pinned. This count should not 152562306a36Sopenharmony_ci * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 152662306a36Sopenharmony_ci * @prot [in] : protection flags 152762306a36Sopenharmony_ci * @pages[out] : array of host pages 152862306a36Sopenharmony_ci * Return error or number of pages pinned. 152962306a36Sopenharmony_ci * 153062306a36Sopenharmony_ci * A driver may only call this function if the vfio_device was created 153162306a36Sopenharmony_ci * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). 153262306a36Sopenharmony_ci */ 153362306a36Sopenharmony_ciint vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, 153462306a36Sopenharmony_ci int npage, int prot, struct page **pages) 153562306a36Sopenharmony_ci{ 153662306a36Sopenharmony_ci /* group->container cannot change while a vfio device is open */ 153762306a36Sopenharmony_ci if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) 153862306a36Sopenharmony_ci return -EINVAL; 153962306a36Sopenharmony_ci if (!device->ops->dma_unmap) 154062306a36Sopenharmony_ci return -EINVAL; 154162306a36Sopenharmony_ci if (vfio_device_has_container(device)) 154262306a36Sopenharmony_ci return vfio_device_container_pin_pages(device, iova, 154362306a36Sopenharmony_ci npage, prot, pages); 154462306a36Sopenharmony_ci if (device->iommufd_access) { 154562306a36Sopenharmony_ci int ret; 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci if (iova > ULONG_MAX) 154862306a36Sopenharmony_ci return -EINVAL; 154962306a36Sopenharmony_ci /* 155062306a36Sopenharmony_ci * VFIO ignores the sub page offset, npages is from the start of 155162306a36Sopenharmony_ci * a PAGE_SIZE chunk of IOVA. The caller is expected to recover 155262306a36Sopenharmony_ci * the sub page offset by doing: 155362306a36Sopenharmony_ci * pages[0] + (iova % PAGE_SIZE) 155462306a36Sopenharmony_ci */ 155562306a36Sopenharmony_ci ret = iommufd_access_pin_pages( 155662306a36Sopenharmony_ci device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), 155762306a36Sopenharmony_ci npage * PAGE_SIZE, pages, 155862306a36Sopenharmony_ci (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); 155962306a36Sopenharmony_ci if (ret) 156062306a36Sopenharmony_ci return ret; 156162306a36Sopenharmony_ci return npage; 156262306a36Sopenharmony_ci } 156362306a36Sopenharmony_ci return -EINVAL; 156462306a36Sopenharmony_ci} 156562306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_pin_pages); 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci/* 156862306a36Sopenharmony_ci * Unpin contiguous host pages for local domain only. 156962306a36Sopenharmony_ci * @device [in] : device 157062306a36Sopenharmony_ci * @iova [in] : starting address of user pages to be unpinned. 157162306a36Sopenharmony_ci * @npage [in] : count of pages to be unpinned. This count should not 157262306a36Sopenharmony_ci * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 157362306a36Sopenharmony_ci */ 157462306a36Sopenharmony_civoid vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) 157562306a36Sopenharmony_ci{ 157662306a36Sopenharmony_ci if (WARN_ON(!vfio_assert_device_open(device))) 157762306a36Sopenharmony_ci return; 157862306a36Sopenharmony_ci if (WARN_ON(!device->ops->dma_unmap)) 157962306a36Sopenharmony_ci return; 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci if (vfio_device_has_container(device)) { 158262306a36Sopenharmony_ci vfio_device_container_unpin_pages(device, iova, npage); 158362306a36Sopenharmony_ci return; 158462306a36Sopenharmony_ci } 158562306a36Sopenharmony_ci if (device->iommufd_access) { 158662306a36Sopenharmony_ci if (WARN_ON(iova > ULONG_MAX)) 158762306a36Sopenharmony_ci return; 158862306a36Sopenharmony_ci iommufd_access_unpin_pages(device->iommufd_access, 158962306a36Sopenharmony_ci ALIGN_DOWN(iova, PAGE_SIZE), 159062306a36Sopenharmony_ci npage * PAGE_SIZE); 159162306a36Sopenharmony_ci return; 159262306a36Sopenharmony_ci } 159362306a36Sopenharmony_ci} 159462306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_unpin_pages); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci/* 159762306a36Sopenharmony_ci * This interface allows the CPUs to perform some sort of virtual DMA on 159862306a36Sopenharmony_ci * behalf of the device. 159962306a36Sopenharmony_ci * 160062306a36Sopenharmony_ci * CPUs read/write from/into a range of IOVAs pointing to user space memory 160162306a36Sopenharmony_ci * into/from a kernel buffer. 160262306a36Sopenharmony_ci * 160362306a36Sopenharmony_ci * As the read/write of user space memory is conducted via the CPUs and is 160462306a36Sopenharmony_ci * not a real device DMA, it is not necessary to pin the user space memory. 160562306a36Sopenharmony_ci * 160662306a36Sopenharmony_ci * @device [in] : VFIO device 160762306a36Sopenharmony_ci * @iova [in] : base IOVA of a user space buffer 160862306a36Sopenharmony_ci * @data [in] : pointer to kernel buffer 160962306a36Sopenharmony_ci * @len [in] : kernel buffer length 161062306a36Sopenharmony_ci * @write : indicate read or write 161162306a36Sopenharmony_ci * Return error code on failure or 0 on success. 161262306a36Sopenharmony_ci */ 161362306a36Sopenharmony_ciint vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, 161462306a36Sopenharmony_ci size_t len, bool write) 161562306a36Sopenharmony_ci{ 161662306a36Sopenharmony_ci if (!data || len <= 0 || !vfio_assert_device_open(device)) 161762306a36Sopenharmony_ci return -EINVAL; 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci if (vfio_device_has_container(device)) 162062306a36Sopenharmony_ci return vfio_device_container_dma_rw(device, iova, 162162306a36Sopenharmony_ci data, len, write); 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci if (device->iommufd_access) { 162462306a36Sopenharmony_ci unsigned int flags = 0; 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci if (iova > ULONG_MAX) 162762306a36Sopenharmony_ci return -EINVAL; 162862306a36Sopenharmony_ci 162962306a36Sopenharmony_ci /* VFIO historically tries to auto-detect a kthread */ 163062306a36Sopenharmony_ci if (!current->mm) 163162306a36Sopenharmony_ci flags |= IOMMUFD_ACCESS_RW_KTHREAD; 163262306a36Sopenharmony_ci if (write) 163362306a36Sopenharmony_ci flags |= IOMMUFD_ACCESS_RW_WRITE; 163462306a36Sopenharmony_ci return iommufd_access_rw(device->iommufd_access, iova, data, 163562306a36Sopenharmony_ci len, flags); 163662306a36Sopenharmony_ci } 163762306a36Sopenharmony_ci return -EINVAL; 163862306a36Sopenharmony_ci} 163962306a36Sopenharmony_ciEXPORT_SYMBOL(vfio_dma_rw); 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci/* 164262306a36Sopenharmony_ci * Module/class support 164362306a36Sopenharmony_ci */ 164462306a36Sopenharmony_cistatic int __init vfio_init(void) 164562306a36Sopenharmony_ci{ 164662306a36Sopenharmony_ci int ret; 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci ida_init(&vfio.device_ida); 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci ret = vfio_group_init(); 165162306a36Sopenharmony_ci if (ret) 165262306a36Sopenharmony_ci return ret; 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci ret = vfio_virqfd_init(); 165562306a36Sopenharmony_ci if (ret) 165662306a36Sopenharmony_ci goto err_virqfd; 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ci /* /sys/class/vfio-dev/vfioX */ 165962306a36Sopenharmony_ci vfio.device_class = class_create("vfio-dev"); 166062306a36Sopenharmony_ci if (IS_ERR(vfio.device_class)) { 166162306a36Sopenharmony_ci ret = PTR_ERR(vfio.device_class); 166262306a36Sopenharmony_ci goto err_dev_class; 166362306a36Sopenharmony_ci } 166462306a36Sopenharmony_ci 166562306a36Sopenharmony_ci ret = vfio_cdev_init(vfio.device_class); 166662306a36Sopenharmony_ci if (ret) 166762306a36Sopenharmony_ci goto err_alloc_dev_chrdev; 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 167062306a36Sopenharmony_ci return 0; 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_cierr_alloc_dev_chrdev: 167362306a36Sopenharmony_ci class_destroy(vfio.device_class); 167462306a36Sopenharmony_ci vfio.device_class = NULL; 167562306a36Sopenharmony_cierr_dev_class: 167662306a36Sopenharmony_ci vfio_virqfd_exit(); 167762306a36Sopenharmony_cierr_virqfd: 167862306a36Sopenharmony_ci vfio_group_cleanup(); 167962306a36Sopenharmony_ci return ret; 168062306a36Sopenharmony_ci} 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_cistatic void __exit vfio_cleanup(void) 168362306a36Sopenharmony_ci{ 168462306a36Sopenharmony_ci ida_destroy(&vfio.device_ida); 168562306a36Sopenharmony_ci vfio_cdev_cleanup(); 168662306a36Sopenharmony_ci class_destroy(vfio.device_class); 168762306a36Sopenharmony_ci vfio.device_class = NULL; 168862306a36Sopenharmony_ci vfio_virqfd_exit(); 168962306a36Sopenharmony_ci vfio_group_cleanup(); 169062306a36Sopenharmony_ci xa_destroy(&vfio_device_set_xa); 169162306a36Sopenharmony_ci} 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_cimodule_init(vfio_init); 169462306a36Sopenharmony_cimodule_exit(vfio_cleanup); 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ciMODULE_VERSION(DRIVER_VERSION); 169762306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 169862306a36Sopenharmony_ciMODULE_AUTHOR(DRIVER_AUTHOR); 169962306a36Sopenharmony_ciMODULE_DESCRIPTION(DRIVER_DESC); 170062306a36Sopenharmony_ciMODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); 1701