162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
362306a36Sopenharmony_ci */
462306a36Sopenharmony_ci#include <linux/iommufd.h>
562306a36Sopenharmony_ci#include <linux/slab.h>
662306a36Sopenharmony_ci#include <linux/iommu.h>
762306a36Sopenharmony_ci#include <uapi/linux/iommufd.h>
862306a36Sopenharmony_ci#include "../iommu-priv.h"
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include "io_pagetable.h"
1162306a36Sopenharmony_ci#include "iommufd_private.h"
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_cistatic bool allow_unsafe_interrupts;
1462306a36Sopenharmony_cimodule_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
1562306a36Sopenharmony_ciMODULE_PARM_DESC(
1662306a36Sopenharmony_ci	allow_unsafe_interrupts,
1762306a36Sopenharmony_ci	"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
1862306a36Sopenharmony_ci	"the MSI interrupt window. Enabling this is a security weakness.");
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_cistatic void iommufd_group_release(struct kref *kref)
2162306a36Sopenharmony_ci{
2262306a36Sopenharmony_ci	struct iommufd_group *igroup =
2362306a36Sopenharmony_ci		container_of(kref, struct iommufd_group, ref);
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci	WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
2862306a36Sopenharmony_ci		   NULL, GFP_KERNEL);
2962306a36Sopenharmony_ci	iommu_group_put(igroup->group);
3062306a36Sopenharmony_ci	mutex_destroy(&igroup->lock);
3162306a36Sopenharmony_ci	kfree(igroup);
3262306a36Sopenharmony_ci}
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic void iommufd_put_group(struct iommufd_group *group)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	kref_put(&group->ref, iommufd_group_release);
3762306a36Sopenharmony_ci}
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cistatic bool iommufd_group_try_get(struct iommufd_group *igroup,
4062306a36Sopenharmony_ci				  struct iommu_group *group)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	if (!igroup)
4362306a36Sopenharmony_ci		return false;
4462306a36Sopenharmony_ci	/*
4562306a36Sopenharmony_ci	 * group ID's cannot be re-used until the group is put back which does
4662306a36Sopenharmony_ci	 * not happen if we could get an igroup pointer under the xa_lock.
4762306a36Sopenharmony_ci	 */
4862306a36Sopenharmony_ci	if (WARN_ON(igroup->group != group))
4962306a36Sopenharmony_ci		return false;
5062306a36Sopenharmony_ci	return kref_get_unless_zero(&igroup->ref);
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * iommufd needs to store some more data for each iommu_group, we keep a
5562306a36Sopenharmony_ci * parallel xarray indexed by iommu_group id to hold this instead of putting it
5662306a36Sopenharmony_ci * in the core structure. To keep things simple the iommufd_group memory is
5762306a36Sopenharmony_ci * unique within the iommufd_ctx. This makes it easy to check there are no
5862306a36Sopenharmony_ci * memory leaks.
5962306a36Sopenharmony_ci */
6062306a36Sopenharmony_cistatic struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
6162306a36Sopenharmony_ci					       struct device *dev)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	struct iommufd_group *new_igroup;
6462306a36Sopenharmony_ci	struct iommufd_group *cur_igroup;
6562306a36Sopenharmony_ci	struct iommufd_group *igroup;
6662306a36Sopenharmony_ci	struct iommu_group *group;
6762306a36Sopenharmony_ci	unsigned int id;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	group = iommu_group_get(dev);
7062306a36Sopenharmony_ci	if (!group)
7162306a36Sopenharmony_ci		return ERR_PTR(-ENODEV);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	id = iommu_group_id(group);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	xa_lock(&ictx->groups);
7662306a36Sopenharmony_ci	igroup = xa_load(&ictx->groups, id);
7762306a36Sopenharmony_ci	if (iommufd_group_try_get(igroup, group)) {
7862306a36Sopenharmony_ci		xa_unlock(&ictx->groups);
7962306a36Sopenharmony_ci		iommu_group_put(group);
8062306a36Sopenharmony_ci		return igroup;
8162306a36Sopenharmony_ci	}
8262306a36Sopenharmony_ci	xa_unlock(&ictx->groups);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
8562306a36Sopenharmony_ci	if (!new_igroup) {
8662306a36Sopenharmony_ci		iommu_group_put(group);
8762306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
8862306a36Sopenharmony_ci	}
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	kref_init(&new_igroup->ref);
9162306a36Sopenharmony_ci	mutex_init(&new_igroup->lock);
9262306a36Sopenharmony_ci	INIT_LIST_HEAD(&new_igroup->device_list);
9362306a36Sopenharmony_ci	new_igroup->sw_msi_start = PHYS_ADDR_MAX;
9462306a36Sopenharmony_ci	/* group reference moves into new_igroup */
9562306a36Sopenharmony_ci	new_igroup->group = group;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	/*
9862306a36Sopenharmony_ci	 * The ictx is not additionally refcounted here becase all objects using
9962306a36Sopenharmony_ci	 * an igroup must put it before their destroy completes.
10062306a36Sopenharmony_ci	 */
10162306a36Sopenharmony_ci	new_igroup->ictx = ictx;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	/*
10462306a36Sopenharmony_ci	 * We dropped the lock so igroup is invalid. NULL is a safe and likely
10562306a36Sopenharmony_ci	 * value to assume for the xa_cmpxchg algorithm.
10662306a36Sopenharmony_ci	 */
10762306a36Sopenharmony_ci	cur_igroup = NULL;
10862306a36Sopenharmony_ci	xa_lock(&ictx->groups);
10962306a36Sopenharmony_ci	while (true) {
11062306a36Sopenharmony_ci		igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
11162306a36Sopenharmony_ci				      GFP_KERNEL);
11262306a36Sopenharmony_ci		if (xa_is_err(igroup)) {
11362306a36Sopenharmony_ci			xa_unlock(&ictx->groups);
11462306a36Sopenharmony_ci			iommufd_put_group(new_igroup);
11562306a36Sopenharmony_ci			return ERR_PTR(xa_err(igroup));
11662306a36Sopenharmony_ci		}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci		/* new_group was successfully installed */
11962306a36Sopenharmony_ci		if (cur_igroup == igroup) {
12062306a36Sopenharmony_ci			xa_unlock(&ictx->groups);
12162306a36Sopenharmony_ci			return new_igroup;
12262306a36Sopenharmony_ci		}
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci		/* Check again if the current group is any good */
12562306a36Sopenharmony_ci		if (iommufd_group_try_get(igroup, group)) {
12662306a36Sopenharmony_ci			xa_unlock(&ictx->groups);
12762306a36Sopenharmony_ci			iommufd_put_group(new_igroup);
12862306a36Sopenharmony_ci			return igroup;
12962306a36Sopenharmony_ci		}
13062306a36Sopenharmony_ci		cur_igroup = igroup;
13162306a36Sopenharmony_ci	}
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_civoid iommufd_device_destroy(struct iommufd_object *obj)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci	struct iommufd_device *idev =
13762306a36Sopenharmony_ci		container_of(obj, struct iommufd_device, obj);
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	iommu_device_release_dma_owner(idev->dev);
14062306a36Sopenharmony_ci	iommufd_put_group(idev->igroup);
14162306a36Sopenharmony_ci	if (!iommufd_selftest_is_mock_dev(idev->dev))
14262306a36Sopenharmony_ci		iommufd_ctx_put(idev->ictx);
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci/**
14662306a36Sopenharmony_ci * iommufd_device_bind - Bind a physical device to an iommu fd
14762306a36Sopenharmony_ci * @ictx: iommufd file descriptor
14862306a36Sopenharmony_ci * @dev: Pointer to a physical device struct
14962306a36Sopenharmony_ci * @id: Output ID number to return to userspace for this device
15062306a36Sopenharmony_ci *
15162306a36Sopenharmony_ci * A successful bind establishes an ownership over the device and returns
15262306a36Sopenharmony_ci * struct iommufd_device pointer, otherwise returns error pointer.
15362306a36Sopenharmony_ci *
15462306a36Sopenharmony_ci * A driver using this API must set driver_managed_dma and must not touch
15562306a36Sopenharmony_ci * the device until this routine succeeds and establishes ownership.
15662306a36Sopenharmony_ci *
15762306a36Sopenharmony_ci * Binding a PCI device places the entire RID under iommufd control.
15862306a36Sopenharmony_ci *
15962306a36Sopenharmony_ci * The caller must undo this with iommufd_device_unbind()
16062306a36Sopenharmony_ci */
16162306a36Sopenharmony_cistruct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
16262306a36Sopenharmony_ci					   struct device *dev, u32 *id)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	struct iommufd_device *idev;
16562306a36Sopenharmony_ci	struct iommufd_group *igroup;
16662306a36Sopenharmony_ci	int rc;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	/*
16962306a36Sopenharmony_ci	 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
17062306a36Sopenharmony_ci	 * to restore cache coherency.
17162306a36Sopenharmony_ci	 */
17262306a36Sopenharmony_ci	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
17362306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	igroup = iommufd_get_group(ictx, dev);
17662306a36Sopenharmony_ci	if (IS_ERR(igroup))
17762306a36Sopenharmony_ci		return ERR_CAST(igroup);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	/*
18062306a36Sopenharmony_ci	 * For historical compat with VFIO the insecure interrupt path is
18162306a36Sopenharmony_ci	 * allowed if the module parameter is set. Secure/Isolated means that a
18262306a36Sopenharmony_ci	 * MemWr operation from the device (eg a simple DMA) cannot trigger an
18362306a36Sopenharmony_ci	 * interrupt outside this iommufd context.
18462306a36Sopenharmony_ci	 */
18562306a36Sopenharmony_ci	if (!iommufd_selftest_is_mock_dev(dev) &&
18662306a36Sopenharmony_ci	    !iommu_group_has_isolated_msi(igroup->group)) {
18762306a36Sopenharmony_ci		if (!allow_unsafe_interrupts) {
18862306a36Sopenharmony_ci			rc = -EPERM;
18962306a36Sopenharmony_ci			goto out_group_put;
19062306a36Sopenharmony_ci		}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci		dev_warn(
19362306a36Sopenharmony_ci			dev,
19462306a36Sopenharmony_ci			"MSI interrupts are not secure, they cannot be isolated by the platform. "
19562306a36Sopenharmony_ci			"Check that platform features like interrupt remapping are enabled. "
19662306a36Sopenharmony_ci			"Use the \"allow_unsafe_interrupts\" module parameter to override\n");
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	rc = iommu_device_claim_dma_owner(dev, ictx);
20062306a36Sopenharmony_ci	if (rc)
20162306a36Sopenharmony_ci		goto out_group_put;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
20462306a36Sopenharmony_ci	if (IS_ERR(idev)) {
20562306a36Sopenharmony_ci		rc = PTR_ERR(idev);
20662306a36Sopenharmony_ci		goto out_release_owner;
20762306a36Sopenharmony_ci	}
20862306a36Sopenharmony_ci	idev->ictx = ictx;
20962306a36Sopenharmony_ci	if (!iommufd_selftest_is_mock_dev(dev))
21062306a36Sopenharmony_ci		iommufd_ctx_get(ictx);
21162306a36Sopenharmony_ci	idev->dev = dev;
21262306a36Sopenharmony_ci	idev->enforce_cache_coherency =
21362306a36Sopenharmony_ci		device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
21462306a36Sopenharmony_ci	/* The calling driver is a user until iommufd_device_unbind() */
21562306a36Sopenharmony_ci	refcount_inc(&idev->obj.users);
21662306a36Sopenharmony_ci	/* igroup refcount moves into iommufd_device */
21762306a36Sopenharmony_ci	idev->igroup = igroup;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	/*
22062306a36Sopenharmony_ci	 * If the caller fails after this success it must call
22162306a36Sopenharmony_ci	 * iommufd_unbind_device() which is safe since we hold this refcount.
22262306a36Sopenharmony_ci	 * This also means the device is a leaf in the graph and no other object
22362306a36Sopenharmony_ci	 * can take a reference on it.
22462306a36Sopenharmony_ci	 */
22562306a36Sopenharmony_ci	iommufd_object_finalize(ictx, &idev->obj);
22662306a36Sopenharmony_ci	*id = idev->obj.id;
22762306a36Sopenharmony_ci	return idev;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ciout_release_owner:
23062306a36Sopenharmony_ci	iommu_device_release_dma_owner(dev);
23162306a36Sopenharmony_ciout_group_put:
23262306a36Sopenharmony_ci	iommufd_put_group(igroup);
23362306a36Sopenharmony_ci	return ERR_PTR(rc);
23462306a36Sopenharmony_ci}
23562306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci/**
23862306a36Sopenharmony_ci * iommufd_ctx_has_group - True if any device within the group is bound
23962306a36Sopenharmony_ci *                         to the ictx
24062306a36Sopenharmony_ci * @ictx: iommufd file descriptor
24162306a36Sopenharmony_ci * @group: Pointer to a physical iommu_group struct
24262306a36Sopenharmony_ci *
24362306a36Sopenharmony_ci * True if any device within the group has been bound to this ictx, ex. via
24462306a36Sopenharmony_ci * iommufd_device_bind(), therefore implying ictx ownership of the group.
24562306a36Sopenharmony_ci */
24662306a36Sopenharmony_cibool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
24762306a36Sopenharmony_ci{
24862306a36Sopenharmony_ci	struct iommufd_object *obj;
24962306a36Sopenharmony_ci	unsigned long index;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	if (!ictx || !group)
25262306a36Sopenharmony_ci		return false;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	xa_lock(&ictx->objects);
25562306a36Sopenharmony_ci	xa_for_each(&ictx->objects, index, obj) {
25662306a36Sopenharmony_ci		if (obj->type == IOMMUFD_OBJ_DEVICE &&
25762306a36Sopenharmony_ci		    container_of(obj, struct iommufd_device, obj)
25862306a36Sopenharmony_ci				    ->igroup->group == group) {
25962306a36Sopenharmony_ci			xa_unlock(&ictx->objects);
26062306a36Sopenharmony_ci			return true;
26162306a36Sopenharmony_ci		}
26262306a36Sopenharmony_ci	}
26362306a36Sopenharmony_ci	xa_unlock(&ictx->objects);
26462306a36Sopenharmony_ci	return false;
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci/**
26962306a36Sopenharmony_ci * iommufd_device_unbind - Undo iommufd_device_bind()
27062306a36Sopenharmony_ci * @idev: Device returned by iommufd_device_bind()
27162306a36Sopenharmony_ci *
27262306a36Sopenharmony_ci * Release the device from iommufd control. The DMA ownership will return back
27362306a36Sopenharmony_ci * to unowned with DMA controlled by the DMA API. This invalidates the
27462306a36Sopenharmony_ci * iommufd_device pointer, other APIs that consume it must not be called
27562306a36Sopenharmony_ci * concurrently.
27662306a36Sopenharmony_ci */
27762306a36Sopenharmony_civoid iommufd_device_unbind(struct iommufd_device *idev)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	iommufd_object_destroy_user(idev->ictx, &idev->obj);
28062306a36Sopenharmony_ci}
28162306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_cistruct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
28462306a36Sopenharmony_ci{
28562306a36Sopenharmony_ci	return idev->ictx;
28662306a36Sopenharmony_ci}
28762306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ciu32 iommufd_device_to_id(struct iommufd_device *idev)
29062306a36Sopenharmony_ci{
29162306a36Sopenharmony_ci	return idev->obj.id;
29262306a36Sopenharmony_ci}
29362306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic int iommufd_group_setup_msi(struct iommufd_group *igroup,
29662306a36Sopenharmony_ci				   struct iommufd_hw_pagetable *hwpt)
29762306a36Sopenharmony_ci{
29862306a36Sopenharmony_ci	phys_addr_t sw_msi_start = igroup->sw_msi_start;
29962306a36Sopenharmony_ci	int rc;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	/*
30262306a36Sopenharmony_ci	 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
30362306a36Sopenharmony_ci	 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
30462306a36Sopenharmony_ci	 * the MSI window so iommu_dma_prepare_msi() can install pages into our
30562306a36Sopenharmony_ci	 * domain after request_irq(). If it is not done interrupts will not
30662306a36Sopenharmony_ci	 * work on this domain.
30762306a36Sopenharmony_ci	 *
30862306a36Sopenharmony_ci	 * FIXME: This is conceptually broken for iommufd since we want to allow
30962306a36Sopenharmony_ci	 * userspace to change the domains, eg switch from an identity IOAS to a
31062306a36Sopenharmony_ci	 * DMA IOAS. There is currently no way to create a MSI window that
31162306a36Sopenharmony_ci	 * matches what the IRQ layer actually expects in a newly created
31262306a36Sopenharmony_ci	 * domain.
31362306a36Sopenharmony_ci	 */
31462306a36Sopenharmony_ci	if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) {
31562306a36Sopenharmony_ci		rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
31662306a36Sopenharmony_ci		if (rc)
31762306a36Sopenharmony_ci			return rc;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci		/*
32062306a36Sopenharmony_ci		 * iommu_get_msi_cookie() can only be called once per domain,
32162306a36Sopenharmony_ci		 * it returns -EBUSY on later calls.
32262306a36Sopenharmony_ci		 */
32362306a36Sopenharmony_ci		hwpt->msi_cookie = true;
32462306a36Sopenharmony_ci	}
32562306a36Sopenharmony_ci	return 0;
32662306a36Sopenharmony_ci}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ciint iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
32962306a36Sopenharmony_ci				struct iommufd_device *idev)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	int rc;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	mutex_lock(&idev->igroup->lock);
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
33662306a36Sopenharmony_ci		rc = -EINVAL;
33762306a36Sopenharmony_ci		goto err_unlock;
33862306a36Sopenharmony_ci	}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	/* Try to upgrade the domain we have */
34162306a36Sopenharmony_ci	if (idev->enforce_cache_coherency) {
34262306a36Sopenharmony_ci		rc = iommufd_hw_pagetable_enforce_cc(hwpt);
34362306a36Sopenharmony_ci		if (rc)
34462306a36Sopenharmony_ci			goto err_unlock;
34562306a36Sopenharmony_ci	}
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev,
34862306a36Sopenharmony_ci						 &idev->igroup->sw_msi_start);
34962306a36Sopenharmony_ci	if (rc)
35062306a36Sopenharmony_ci		goto err_unlock;
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	/*
35362306a36Sopenharmony_ci	 * Only attach to the group once for the first device that is in the
35462306a36Sopenharmony_ci	 * group. All the other devices will follow this attachment. The user
35562306a36Sopenharmony_ci	 * should attach every device individually to the hwpt as the per-device
35662306a36Sopenharmony_ci	 * reserved regions are only updated during individual device
35762306a36Sopenharmony_ci	 * attachment.
35862306a36Sopenharmony_ci	 */
35962306a36Sopenharmony_ci	if (list_empty(&idev->igroup->device_list)) {
36062306a36Sopenharmony_ci		rc = iommufd_group_setup_msi(idev->igroup, hwpt);
36162306a36Sopenharmony_ci		if (rc)
36262306a36Sopenharmony_ci			goto err_unresv;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
36562306a36Sopenharmony_ci		if (rc)
36662306a36Sopenharmony_ci			goto err_unresv;
36762306a36Sopenharmony_ci		idev->igroup->hwpt = hwpt;
36862306a36Sopenharmony_ci	}
36962306a36Sopenharmony_ci	refcount_inc(&hwpt->obj.users);
37062306a36Sopenharmony_ci	list_add_tail(&idev->group_item, &idev->igroup->device_list);
37162306a36Sopenharmony_ci	mutex_unlock(&idev->igroup->lock);
37262306a36Sopenharmony_ci	return 0;
37362306a36Sopenharmony_cierr_unresv:
37462306a36Sopenharmony_ci	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
37562306a36Sopenharmony_cierr_unlock:
37662306a36Sopenharmony_ci	mutex_unlock(&idev->igroup->lock);
37762306a36Sopenharmony_ci	return rc;
37862306a36Sopenharmony_ci}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_cistruct iommufd_hw_pagetable *
38162306a36Sopenharmony_ciiommufd_hw_pagetable_detach(struct iommufd_device *idev)
38262306a36Sopenharmony_ci{
38362306a36Sopenharmony_ci	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	mutex_lock(&idev->igroup->lock);
38662306a36Sopenharmony_ci	list_del(&idev->group_item);
38762306a36Sopenharmony_ci	if (list_empty(&idev->igroup->device_list)) {
38862306a36Sopenharmony_ci		iommu_detach_group(hwpt->domain, idev->igroup->group);
38962306a36Sopenharmony_ci		idev->igroup->hwpt = NULL;
39062306a36Sopenharmony_ci	}
39162306a36Sopenharmony_ci	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
39262306a36Sopenharmony_ci	mutex_unlock(&idev->igroup->lock);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	/* Caller must destroy hwpt */
39562306a36Sopenharmony_ci	return hwpt;
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic struct iommufd_hw_pagetable *
39962306a36Sopenharmony_ciiommufd_device_do_attach(struct iommufd_device *idev,
40062306a36Sopenharmony_ci			 struct iommufd_hw_pagetable *hwpt)
40162306a36Sopenharmony_ci{
40262306a36Sopenharmony_ci	int rc;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	rc = iommufd_hw_pagetable_attach(hwpt, idev);
40562306a36Sopenharmony_ci	if (rc)
40662306a36Sopenharmony_ci		return ERR_PTR(rc);
40762306a36Sopenharmony_ci	return NULL;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_cistatic struct iommufd_hw_pagetable *
41162306a36Sopenharmony_ciiommufd_device_do_replace(struct iommufd_device *idev,
41262306a36Sopenharmony_ci			  struct iommufd_hw_pagetable *hwpt)
41362306a36Sopenharmony_ci{
41462306a36Sopenharmony_ci	struct iommufd_group *igroup = idev->igroup;
41562306a36Sopenharmony_ci	struct iommufd_hw_pagetable *old_hwpt;
41662306a36Sopenharmony_ci	unsigned int num_devices = 0;
41762306a36Sopenharmony_ci	struct iommufd_device *cur;
41862306a36Sopenharmony_ci	int rc;
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	mutex_lock(&idev->igroup->lock);
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	if (igroup->hwpt == NULL) {
42362306a36Sopenharmony_ci		rc = -EINVAL;
42462306a36Sopenharmony_ci		goto err_unlock;
42562306a36Sopenharmony_ci	}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	if (hwpt == igroup->hwpt) {
42862306a36Sopenharmony_ci		mutex_unlock(&idev->igroup->lock);
42962306a36Sopenharmony_ci		return NULL;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	/* Try to upgrade the domain we have */
43362306a36Sopenharmony_ci	list_for_each_entry(cur, &igroup->device_list, group_item) {
43462306a36Sopenharmony_ci		num_devices++;
43562306a36Sopenharmony_ci		if (cur->enforce_cache_coherency) {
43662306a36Sopenharmony_ci			rc = iommufd_hw_pagetable_enforce_cc(hwpt);
43762306a36Sopenharmony_ci			if (rc)
43862306a36Sopenharmony_ci				goto err_unlock;
43962306a36Sopenharmony_ci		}
44062306a36Sopenharmony_ci	}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	old_hwpt = igroup->hwpt;
44362306a36Sopenharmony_ci	if (hwpt->ioas != old_hwpt->ioas) {
44462306a36Sopenharmony_ci		list_for_each_entry(cur, &igroup->device_list, group_item) {
44562306a36Sopenharmony_ci			rc = iopt_table_enforce_dev_resv_regions(
44662306a36Sopenharmony_ci				&hwpt->ioas->iopt, cur->dev, NULL);
44762306a36Sopenharmony_ci			if (rc)
44862306a36Sopenharmony_ci				goto err_unresv;
44962306a36Sopenharmony_ci		}
45062306a36Sopenharmony_ci	}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	rc = iommufd_group_setup_msi(idev->igroup, hwpt);
45362306a36Sopenharmony_ci	if (rc)
45462306a36Sopenharmony_ci		goto err_unresv;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
45762306a36Sopenharmony_ci	if (rc)
45862306a36Sopenharmony_ci		goto err_unresv;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	if (hwpt->ioas != old_hwpt->ioas) {
46162306a36Sopenharmony_ci		list_for_each_entry(cur, &igroup->device_list, group_item)
46262306a36Sopenharmony_ci			iopt_remove_reserved_iova(&old_hwpt->ioas->iopt,
46362306a36Sopenharmony_ci						  cur->dev);
46462306a36Sopenharmony_ci	}
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	igroup->hwpt = hwpt;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	/*
46962306a36Sopenharmony_ci	 * Move the refcounts held by the device_list to the new hwpt. Retain a
47062306a36Sopenharmony_ci	 * refcount for this thread as the caller will free it.
47162306a36Sopenharmony_ci	 */
47262306a36Sopenharmony_ci	refcount_add(num_devices, &hwpt->obj.users);
47362306a36Sopenharmony_ci	if (num_devices > 1)
47462306a36Sopenharmony_ci		WARN_ON(refcount_sub_and_test(num_devices - 1,
47562306a36Sopenharmony_ci					      &old_hwpt->obj.users));
47662306a36Sopenharmony_ci	mutex_unlock(&idev->igroup->lock);
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	/* Caller must destroy old_hwpt */
47962306a36Sopenharmony_ci	return old_hwpt;
48062306a36Sopenharmony_cierr_unresv:
48162306a36Sopenharmony_ci	list_for_each_entry(cur, &igroup->device_list, group_item)
48262306a36Sopenharmony_ci		iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
48362306a36Sopenharmony_cierr_unlock:
48462306a36Sopenharmony_ci	mutex_unlock(&idev->igroup->lock);
48562306a36Sopenharmony_ci	return ERR_PTR(rc);
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_citypedef struct iommufd_hw_pagetable *(*attach_fn)(
48962306a36Sopenharmony_ci	struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci/*
49262306a36Sopenharmony_ci * When automatically managing the domains we search for a compatible domain in
49362306a36Sopenharmony_ci * the iopt and if one is found use it, otherwise create a new domain.
49462306a36Sopenharmony_ci * Automatic domain selection will never pick a manually created domain.
49562306a36Sopenharmony_ci */
49662306a36Sopenharmony_cistatic struct iommufd_hw_pagetable *
49762306a36Sopenharmony_ciiommufd_device_auto_get_domain(struct iommufd_device *idev,
49862306a36Sopenharmony_ci			       struct iommufd_ioas *ioas, u32 *pt_id,
49962306a36Sopenharmony_ci			       attach_fn do_attach)
50062306a36Sopenharmony_ci{
50162306a36Sopenharmony_ci	/*
50262306a36Sopenharmony_ci	 * iommufd_hw_pagetable_attach() is called by
50362306a36Sopenharmony_ci	 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
50462306a36Sopenharmony_ci	 * iommufd_device_do_attach(). So if we are in this mode then we prefer
50562306a36Sopenharmony_ci	 * to use the immediate_attach path as it supports drivers that can't
50662306a36Sopenharmony_ci	 * directly allocate a domain.
50762306a36Sopenharmony_ci	 */
50862306a36Sopenharmony_ci	bool immediate_attach = do_attach == iommufd_device_do_attach;
50962306a36Sopenharmony_ci	struct iommufd_hw_pagetable *destroy_hwpt;
51062306a36Sopenharmony_ci	struct iommufd_hw_pagetable *hwpt;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	/*
51362306a36Sopenharmony_ci	 * There is no differentiation when domains are allocated, so any domain
51462306a36Sopenharmony_ci	 * that is willing to attach to the device is interchangeable with any
51562306a36Sopenharmony_ci	 * other.
51662306a36Sopenharmony_ci	 */
51762306a36Sopenharmony_ci	mutex_lock(&ioas->mutex);
51862306a36Sopenharmony_ci	list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
51962306a36Sopenharmony_ci		if (!hwpt->auto_domain)
52062306a36Sopenharmony_ci			continue;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci		if (!iommufd_lock_obj(&hwpt->obj))
52362306a36Sopenharmony_ci			continue;
52462306a36Sopenharmony_ci		destroy_hwpt = (*do_attach)(idev, hwpt);
52562306a36Sopenharmony_ci		if (IS_ERR(destroy_hwpt)) {
52662306a36Sopenharmony_ci			iommufd_put_object(&hwpt->obj);
52762306a36Sopenharmony_ci			/*
52862306a36Sopenharmony_ci			 * -EINVAL means the domain is incompatible with the
52962306a36Sopenharmony_ci			 * device. Other error codes should propagate to
53062306a36Sopenharmony_ci			 * userspace as failure. Success means the domain is
53162306a36Sopenharmony_ci			 * attached.
53262306a36Sopenharmony_ci			 */
53362306a36Sopenharmony_ci			if (PTR_ERR(destroy_hwpt) == -EINVAL)
53462306a36Sopenharmony_ci				continue;
53562306a36Sopenharmony_ci			goto out_unlock;
53662306a36Sopenharmony_ci		}
53762306a36Sopenharmony_ci		*pt_id = hwpt->obj.id;
53862306a36Sopenharmony_ci		iommufd_put_object(&hwpt->obj);
53962306a36Sopenharmony_ci		goto out_unlock;
54062306a36Sopenharmony_ci	}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev,
54362306a36Sopenharmony_ci					  immediate_attach);
54462306a36Sopenharmony_ci	if (IS_ERR(hwpt)) {
54562306a36Sopenharmony_ci		destroy_hwpt = ERR_CAST(hwpt);
54662306a36Sopenharmony_ci		goto out_unlock;
54762306a36Sopenharmony_ci	}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	if (!immediate_attach) {
55062306a36Sopenharmony_ci		destroy_hwpt = (*do_attach)(idev, hwpt);
55162306a36Sopenharmony_ci		if (IS_ERR(destroy_hwpt))
55262306a36Sopenharmony_ci			goto out_abort;
55362306a36Sopenharmony_ci	} else {
55462306a36Sopenharmony_ci		destroy_hwpt = NULL;
55562306a36Sopenharmony_ci	}
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	hwpt->auto_domain = true;
55862306a36Sopenharmony_ci	*pt_id = hwpt->obj.id;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	iommufd_object_finalize(idev->ictx, &hwpt->obj);
56162306a36Sopenharmony_ci	mutex_unlock(&ioas->mutex);
56262306a36Sopenharmony_ci	return destroy_hwpt;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ciout_abort:
56562306a36Sopenharmony_ci	iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
56662306a36Sopenharmony_ciout_unlock:
56762306a36Sopenharmony_ci	mutex_unlock(&ioas->mutex);
56862306a36Sopenharmony_ci	return destroy_hwpt;
56962306a36Sopenharmony_ci}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_cistatic int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
57262306a36Sopenharmony_ci				    attach_fn do_attach)
57362306a36Sopenharmony_ci{
57462306a36Sopenharmony_ci	struct iommufd_hw_pagetable *destroy_hwpt;
57562306a36Sopenharmony_ci	struct iommufd_object *pt_obj;
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
57862306a36Sopenharmony_ci	if (IS_ERR(pt_obj))
57962306a36Sopenharmony_ci		return PTR_ERR(pt_obj);
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	switch (pt_obj->type) {
58262306a36Sopenharmony_ci	case IOMMUFD_OBJ_HW_PAGETABLE: {
58362306a36Sopenharmony_ci		struct iommufd_hw_pagetable *hwpt =
58462306a36Sopenharmony_ci			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci		destroy_hwpt = (*do_attach)(idev, hwpt);
58762306a36Sopenharmony_ci		if (IS_ERR(destroy_hwpt))
58862306a36Sopenharmony_ci			goto out_put_pt_obj;
58962306a36Sopenharmony_ci		break;
59062306a36Sopenharmony_ci	}
59162306a36Sopenharmony_ci	case IOMMUFD_OBJ_IOAS: {
59262306a36Sopenharmony_ci		struct iommufd_ioas *ioas =
59362306a36Sopenharmony_ci			container_of(pt_obj, struct iommufd_ioas, obj);
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci		destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
59662306a36Sopenharmony_ci							      do_attach);
59762306a36Sopenharmony_ci		if (IS_ERR(destroy_hwpt))
59862306a36Sopenharmony_ci			goto out_put_pt_obj;
59962306a36Sopenharmony_ci		break;
60062306a36Sopenharmony_ci	}
60162306a36Sopenharmony_ci	default:
60262306a36Sopenharmony_ci		destroy_hwpt = ERR_PTR(-EINVAL);
60362306a36Sopenharmony_ci		goto out_put_pt_obj;
60462306a36Sopenharmony_ci	}
60562306a36Sopenharmony_ci	iommufd_put_object(pt_obj);
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	/* This destruction has to be after we unlock everything */
60862306a36Sopenharmony_ci	if (destroy_hwpt)
60962306a36Sopenharmony_ci		iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
61062306a36Sopenharmony_ci	return 0;
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ciout_put_pt_obj:
61362306a36Sopenharmony_ci	iommufd_put_object(pt_obj);
61462306a36Sopenharmony_ci	return PTR_ERR(destroy_hwpt);
61562306a36Sopenharmony_ci}
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci/**
61862306a36Sopenharmony_ci * iommufd_device_attach - Connect a device to an iommu_domain
61962306a36Sopenharmony_ci * @idev: device to attach
62062306a36Sopenharmony_ci * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
62162306a36Sopenharmony_ci *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
62262306a36Sopenharmony_ci *
62362306a36Sopenharmony_ci * This connects the device to an iommu_domain, either automatically or manually
62462306a36Sopenharmony_ci * selected. Once this completes the device could do DMA.
62562306a36Sopenharmony_ci *
62662306a36Sopenharmony_ci * The caller should return the resulting pt_id back to userspace.
62762306a36Sopenharmony_ci * This function is undone by calling iommufd_device_detach().
62862306a36Sopenharmony_ci */
62962306a36Sopenharmony_ciint iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
63062306a36Sopenharmony_ci{
63162306a36Sopenharmony_ci	int rc;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
63462306a36Sopenharmony_ci	if (rc)
63562306a36Sopenharmony_ci		return rc;
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	/*
63862306a36Sopenharmony_ci	 * Pairs with iommufd_device_detach() - catches caller bugs attempting
63962306a36Sopenharmony_ci	 * to destroy a device with an attachment.
64062306a36Sopenharmony_ci	 */
64162306a36Sopenharmony_ci	refcount_inc(&idev->obj.users);
64262306a36Sopenharmony_ci	return 0;
64362306a36Sopenharmony_ci}
64462306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci/**
64762306a36Sopenharmony_ci * iommufd_device_replace - Change the device's iommu_domain
64862306a36Sopenharmony_ci * @idev: device to change
64962306a36Sopenharmony_ci * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
65062306a36Sopenharmony_ci *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
65162306a36Sopenharmony_ci *
65262306a36Sopenharmony_ci * This is the same as::
65362306a36Sopenharmony_ci *
65462306a36Sopenharmony_ci *   iommufd_device_detach();
65562306a36Sopenharmony_ci *   iommufd_device_attach();
65662306a36Sopenharmony_ci *
65762306a36Sopenharmony_ci * If it fails then no change is made to the attachment. The iommu driver may
65862306a36Sopenharmony_ci * implement this so there is no disruption in translation. This can only be
65962306a36Sopenharmony_ci * called if iommufd_device_attach() has already succeeded.
66062306a36Sopenharmony_ci */
66162306a36Sopenharmony_ciint iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
66262306a36Sopenharmony_ci{
66362306a36Sopenharmony_ci	return iommufd_device_change_pt(idev, pt_id,
66462306a36Sopenharmony_ci					&iommufd_device_do_replace);
66562306a36Sopenharmony_ci}
66662306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci/**
66962306a36Sopenharmony_ci * iommufd_device_detach - Disconnect a device to an iommu_domain
67062306a36Sopenharmony_ci * @idev: device to detach
67162306a36Sopenharmony_ci *
67262306a36Sopenharmony_ci * Undo iommufd_device_attach(). This disconnects the idev from the previously
67362306a36Sopenharmony_ci * attached pt_id. The device returns back to a blocked DMA translation.
67462306a36Sopenharmony_ci */
67562306a36Sopenharmony_civoid iommufd_device_detach(struct iommufd_device *idev)
67662306a36Sopenharmony_ci{
67762306a36Sopenharmony_ci	struct iommufd_hw_pagetable *hwpt;
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_ci	hwpt = iommufd_hw_pagetable_detach(idev);
68062306a36Sopenharmony_ci	iommufd_hw_pagetable_put(idev->ictx, hwpt);
68162306a36Sopenharmony_ci	refcount_dec(&idev->obj.users);
68262306a36Sopenharmony_ci}
68362306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci/*
68662306a36Sopenharmony_ci * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
68762306a36Sopenharmony_ci * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
68862306a36Sopenharmony_ci * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
68962306a36Sopenharmony_ci */
69062306a36Sopenharmony_cistatic int iommufd_access_change_ioas(struct iommufd_access *access,
69162306a36Sopenharmony_ci				      struct iommufd_ioas *new_ioas)
69262306a36Sopenharmony_ci{
69362306a36Sopenharmony_ci	u32 iopt_access_list_id = access->iopt_access_list_id;
69462306a36Sopenharmony_ci	struct iommufd_ioas *cur_ioas = access->ioas;
69562306a36Sopenharmony_ci	int rc;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	lockdep_assert_held(&access->ioas_lock);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	/* We are racing with a concurrent detach, bail */
70062306a36Sopenharmony_ci	if (cur_ioas != access->ioas_unpin)
70162306a36Sopenharmony_ci		return -EBUSY;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	if (cur_ioas == new_ioas)
70462306a36Sopenharmony_ci		return 0;
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci	/*
70762306a36Sopenharmony_ci	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
70862306a36Sopenharmony_ci	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
70962306a36Sopenharmony_ci	 */
71062306a36Sopenharmony_ci	access->ioas = NULL;
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	if (new_ioas) {
71362306a36Sopenharmony_ci		rc = iopt_add_access(&new_ioas->iopt, access);
71462306a36Sopenharmony_ci		if (rc) {
71562306a36Sopenharmony_ci			access->ioas = cur_ioas;
71662306a36Sopenharmony_ci			return rc;
71762306a36Sopenharmony_ci		}
71862306a36Sopenharmony_ci		refcount_inc(&new_ioas->obj.users);
71962306a36Sopenharmony_ci	}
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	if (cur_ioas) {
72262306a36Sopenharmony_ci		if (access->ops->unmap) {
72362306a36Sopenharmony_ci			mutex_unlock(&access->ioas_lock);
72462306a36Sopenharmony_ci			access->ops->unmap(access->data, 0, ULONG_MAX);
72562306a36Sopenharmony_ci			mutex_lock(&access->ioas_lock);
72662306a36Sopenharmony_ci		}
72762306a36Sopenharmony_ci		iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
72862306a36Sopenharmony_ci		refcount_dec(&cur_ioas->obj.users);
72962306a36Sopenharmony_ci	}
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	access->ioas = new_ioas;
73262306a36Sopenharmony_ci	access->ioas_unpin = new_ioas;
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	return 0;
73562306a36Sopenharmony_ci}
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_cistatic int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
73862306a36Sopenharmony_ci{
73962306a36Sopenharmony_ci	struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
74062306a36Sopenharmony_ci	int rc;
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	if (IS_ERR(ioas))
74362306a36Sopenharmony_ci		return PTR_ERR(ioas);
74462306a36Sopenharmony_ci	rc = iommufd_access_change_ioas(access, ioas);
74562306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
74662306a36Sopenharmony_ci	return rc;
74762306a36Sopenharmony_ci}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_civoid iommufd_access_destroy_object(struct iommufd_object *obj)
75062306a36Sopenharmony_ci{
75162306a36Sopenharmony_ci	struct iommufd_access *access =
75262306a36Sopenharmony_ci		container_of(obj, struct iommufd_access, obj);
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
75562306a36Sopenharmony_ci	if (access->ioas)
75662306a36Sopenharmony_ci		WARN_ON(iommufd_access_change_ioas(access, NULL));
75762306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
75862306a36Sopenharmony_ci	iommufd_ctx_put(access->ictx);
75962306a36Sopenharmony_ci}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci/**
76262306a36Sopenharmony_ci * iommufd_access_create - Create an iommufd_access
76362306a36Sopenharmony_ci * @ictx: iommufd file descriptor
76462306a36Sopenharmony_ci * @ops: Driver's ops to associate with the access
76562306a36Sopenharmony_ci * @data: Opaque data to pass into ops functions
76662306a36Sopenharmony_ci * @id: Output ID number to return to userspace for this access
76762306a36Sopenharmony_ci *
76862306a36Sopenharmony_ci * An iommufd_access allows a driver to read/write to the IOAS without using
76962306a36Sopenharmony_ci * DMA. The underlying CPU memory can be accessed using the
77062306a36Sopenharmony_ci * iommufd_access_pin_pages() or iommufd_access_rw() functions.
77162306a36Sopenharmony_ci *
77262306a36Sopenharmony_ci * The provided ops are required to use iommufd_access_pin_pages().
77362306a36Sopenharmony_ci */
77462306a36Sopenharmony_cistruct iommufd_access *
77562306a36Sopenharmony_ciiommufd_access_create(struct iommufd_ctx *ictx,
77662306a36Sopenharmony_ci		      const struct iommufd_access_ops *ops, void *data, u32 *id)
77762306a36Sopenharmony_ci{
77862306a36Sopenharmony_ci	struct iommufd_access *access;
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	/*
78162306a36Sopenharmony_ci	 * There is no uAPI for the access object, but to keep things symmetric
78262306a36Sopenharmony_ci	 * use the object infrastructure anyhow.
78362306a36Sopenharmony_ci	 */
78462306a36Sopenharmony_ci	access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
78562306a36Sopenharmony_ci	if (IS_ERR(access))
78662306a36Sopenharmony_ci		return access;
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	access->data = data;
78962306a36Sopenharmony_ci	access->ops = ops;
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	if (ops->needs_pin_pages)
79262306a36Sopenharmony_ci		access->iova_alignment = PAGE_SIZE;
79362306a36Sopenharmony_ci	else
79462306a36Sopenharmony_ci		access->iova_alignment = 1;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	/* The calling driver is a user until iommufd_access_destroy() */
79762306a36Sopenharmony_ci	refcount_inc(&access->obj.users);
79862306a36Sopenharmony_ci	access->ictx = ictx;
79962306a36Sopenharmony_ci	iommufd_ctx_get(ictx);
80062306a36Sopenharmony_ci	iommufd_object_finalize(ictx, &access->obj);
80162306a36Sopenharmony_ci	*id = access->obj.id;
80262306a36Sopenharmony_ci	mutex_init(&access->ioas_lock);
80362306a36Sopenharmony_ci	return access;
80462306a36Sopenharmony_ci}
80562306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci/**
80862306a36Sopenharmony_ci * iommufd_access_destroy - Destroy an iommufd_access
80962306a36Sopenharmony_ci * @access: The access to destroy
81062306a36Sopenharmony_ci *
81162306a36Sopenharmony_ci * The caller must stop using the access before destroying it.
81262306a36Sopenharmony_ci */
81362306a36Sopenharmony_civoid iommufd_access_destroy(struct iommufd_access *access)
81462306a36Sopenharmony_ci{
81562306a36Sopenharmony_ci	iommufd_object_destroy_user(access->ictx, &access->obj);
81662306a36Sopenharmony_ci}
81762306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_civoid iommufd_access_detach(struct iommufd_access *access)
82062306a36Sopenharmony_ci{
82162306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
82262306a36Sopenharmony_ci	if (WARN_ON(!access->ioas)) {
82362306a36Sopenharmony_ci		mutex_unlock(&access->ioas_lock);
82462306a36Sopenharmony_ci		return;
82562306a36Sopenharmony_ci	}
82662306a36Sopenharmony_ci	WARN_ON(iommufd_access_change_ioas(access, NULL));
82762306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
82862306a36Sopenharmony_ci}
82962306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ciint iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
83262306a36Sopenharmony_ci{
83362306a36Sopenharmony_ci	int rc;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
83662306a36Sopenharmony_ci	if (WARN_ON(access->ioas)) {
83762306a36Sopenharmony_ci		mutex_unlock(&access->ioas_lock);
83862306a36Sopenharmony_ci		return -EINVAL;
83962306a36Sopenharmony_ci	}
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	rc = iommufd_access_change_ioas_id(access, ioas_id);
84262306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
84362306a36Sopenharmony_ci	return rc;
84462306a36Sopenharmony_ci}
84562306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ciint iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
84862306a36Sopenharmony_ci{
84962306a36Sopenharmony_ci	int rc;
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
85262306a36Sopenharmony_ci	if (!access->ioas) {
85362306a36Sopenharmony_ci		mutex_unlock(&access->ioas_lock);
85462306a36Sopenharmony_ci		return -ENOENT;
85562306a36Sopenharmony_ci	}
85662306a36Sopenharmony_ci	rc = iommufd_access_change_ioas_id(access, ioas_id);
85762306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
85862306a36Sopenharmony_ci	return rc;
85962306a36Sopenharmony_ci}
86062306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci/**
86362306a36Sopenharmony_ci * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
86462306a36Sopenharmony_ci * @iopt: iopt to work on
86562306a36Sopenharmony_ci * @iova: Starting iova in the iopt
86662306a36Sopenharmony_ci * @length: Number of bytes
86762306a36Sopenharmony_ci *
86862306a36Sopenharmony_ci * After this function returns there should be no users attached to the pages
86962306a36Sopenharmony_ci * linked to this iopt that intersect with iova,length. Anyone that has attached
87062306a36Sopenharmony_ci * a user through iopt_access_pages() needs to detach it through
87162306a36Sopenharmony_ci * iommufd_access_unpin_pages() before this function returns.
87262306a36Sopenharmony_ci *
87362306a36Sopenharmony_ci * iommufd_access_destroy() will wait for any outstanding unmap callback to
87462306a36Sopenharmony_ci * complete. Once iommufd_access_destroy() no unmap ops are running or will
87562306a36Sopenharmony_ci * run in the future. Due to this a driver must not create locking that prevents
87662306a36Sopenharmony_ci * unmap to complete while iommufd_access_destroy() is running.
87762306a36Sopenharmony_ci */
87862306a36Sopenharmony_civoid iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
87962306a36Sopenharmony_ci				 unsigned long length)
88062306a36Sopenharmony_ci{
88162306a36Sopenharmony_ci	struct iommufd_ioas *ioas =
88262306a36Sopenharmony_ci		container_of(iopt, struct iommufd_ioas, iopt);
88362306a36Sopenharmony_ci	struct iommufd_access *access;
88462306a36Sopenharmony_ci	unsigned long index;
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	xa_lock(&ioas->iopt.access_list);
88762306a36Sopenharmony_ci	xa_for_each(&ioas->iopt.access_list, index, access) {
88862306a36Sopenharmony_ci		if (!iommufd_lock_obj(&access->obj))
88962306a36Sopenharmony_ci			continue;
89062306a36Sopenharmony_ci		xa_unlock(&ioas->iopt.access_list);
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci		access->ops->unmap(access->data, iova, length);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci		iommufd_put_object(&access->obj);
89562306a36Sopenharmony_ci		xa_lock(&ioas->iopt.access_list);
89662306a36Sopenharmony_ci	}
89762306a36Sopenharmony_ci	xa_unlock(&ioas->iopt.access_list);
89862306a36Sopenharmony_ci}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci/**
90162306a36Sopenharmony_ci * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
90262306a36Sopenharmony_ci * @access: IOAS access to act on
90362306a36Sopenharmony_ci * @iova: Starting IOVA
90462306a36Sopenharmony_ci * @length: Number of bytes to access
90562306a36Sopenharmony_ci *
90662306a36Sopenharmony_ci * Return the struct page's. The caller must stop accessing them before calling
90762306a36Sopenharmony_ci * this. The iova/length must exactly match the one provided to access_pages.
90862306a36Sopenharmony_ci */
90962306a36Sopenharmony_civoid iommufd_access_unpin_pages(struct iommufd_access *access,
91062306a36Sopenharmony_ci				unsigned long iova, unsigned long length)
91162306a36Sopenharmony_ci{
91262306a36Sopenharmony_ci	struct iopt_area_contig_iter iter;
91362306a36Sopenharmony_ci	struct io_pagetable *iopt;
91462306a36Sopenharmony_ci	unsigned long last_iova;
91562306a36Sopenharmony_ci	struct iopt_area *area;
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	if (WARN_ON(!length) ||
91862306a36Sopenharmony_ci	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
91962306a36Sopenharmony_ci		return;
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
92262306a36Sopenharmony_ci	/*
92362306a36Sopenharmony_ci	 * The driver must be doing something wrong if it calls this before an
92462306a36Sopenharmony_ci	 * iommufd_access_attach() or after an iommufd_access_detach().
92562306a36Sopenharmony_ci	 */
92662306a36Sopenharmony_ci	if (WARN_ON(!access->ioas_unpin)) {
92762306a36Sopenharmony_ci		mutex_unlock(&access->ioas_lock);
92862306a36Sopenharmony_ci		return;
92962306a36Sopenharmony_ci	}
93062306a36Sopenharmony_ci	iopt = &access->ioas_unpin->iopt;
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_ci	down_read(&iopt->iova_rwsem);
93362306a36Sopenharmony_ci	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
93462306a36Sopenharmony_ci		iopt_area_remove_access(
93562306a36Sopenharmony_ci			area, iopt_area_iova_to_index(area, iter.cur_iova),
93662306a36Sopenharmony_ci			iopt_area_iova_to_index(
93762306a36Sopenharmony_ci				area,
93862306a36Sopenharmony_ci				min(last_iova, iopt_area_last_iova(area))));
93962306a36Sopenharmony_ci	WARN_ON(!iopt_area_contig_done(&iter));
94062306a36Sopenharmony_ci	up_read(&iopt->iova_rwsem);
94162306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
94262306a36Sopenharmony_ci}
94362306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_cistatic bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
94662306a36Sopenharmony_ci{
94762306a36Sopenharmony_ci	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
94862306a36Sopenharmony_ci		return false;
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	if (!iopt_area_contig_done(iter) &&
95162306a36Sopenharmony_ci	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
95262306a36Sopenharmony_ci	     PAGE_SIZE) != (PAGE_SIZE - 1))
95362306a36Sopenharmony_ci		return false;
95462306a36Sopenharmony_ci	return true;
95562306a36Sopenharmony_ci}
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_cistatic bool check_area_prot(struct iopt_area *area, unsigned int flags)
95862306a36Sopenharmony_ci{
95962306a36Sopenharmony_ci	if (flags & IOMMUFD_ACCESS_RW_WRITE)
96062306a36Sopenharmony_ci		return area->iommu_prot & IOMMU_WRITE;
96162306a36Sopenharmony_ci	return area->iommu_prot & IOMMU_READ;
96262306a36Sopenharmony_ci}
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci/**
96562306a36Sopenharmony_ci * iommufd_access_pin_pages() - Return a list of pages under the iova
96662306a36Sopenharmony_ci * @access: IOAS access to act on
96762306a36Sopenharmony_ci * @iova: Starting IOVA
96862306a36Sopenharmony_ci * @length: Number of bytes to access
96962306a36Sopenharmony_ci * @out_pages: Output page list
97062306a36Sopenharmony_ci * @flags: IOPMMUFD_ACCESS_RW_* flags
97162306a36Sopenharmony_ci *
97262306a36Sopenharmony_ci * Reads @length bytes starting at iova and returns the struct page * pointers.
97362306a36Sopenharmony_ci * These can be kmap'd by the caller for CPU access.
97462306a36Sopenharmony_ci *
97562306a36Sopenharmony_ci * The caller must perform iommufd_access_unpin_pages() when done to balance
97662306a36Sopenharmony_ci * this.
97762306a36Sopenharmony_ci *
97862306a36Sopenharmony_ci * This API always requires a page aligned iova. This happens naturally if the
97962306a36Sopenharmony_ci * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
98062306a36Sopenharmony_ci * smaller alignments have corner cases where this API can fail on otherwise
98162306a36Sopenharmony_ci * aligned iova.
98262306a36Sopenharmony_ci */
98362306a36Sopenharmony_ciint iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
98462306a36Sopenharmony_ci			     unsigned long length, struct page **out_pages,
98562306a36Sopenharmony_ci			     unsigned int flags)
98662306a36Sopenharmony_ci{
98762306a36Sopenharmony_ci	struct iopt_area_contig_iter iter;
98862306a36Sopenharmony_ci	struct io_pagetable *iopt;
98962306a36Sopenharmony_ci	unsigned long last_iova;
99062306a36Sopenharmony_ci	struct iopt_area *area;
99162306a36Sopenharmony_ci	int rc;
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	/* Driver's ops don't support pin_pages */
99462306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
99562306a36Sopenharmony_ci	    WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
99662306a36Sopenharmony_ci		return -EINVAL;
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	if (!length)
99962306a36Sopenharmony_ci		return -EINVAL;
100062306a36Sopenharmony_ci	if (check_add_overflow(iova, length - 1, &last_iova))
100162306a36Sopenharmony_ci		return -EOVERFLOW;
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
100462306a36Sopenharmony_ci	if (!access->ioas) {
100562306a36Sopenharmony_ci		mutex_unlock(&access->ioas_lock);
100662306a36Sopenharmony_ci		return -ENOENT;
100762306a36Sopenharmony_ci	}
100862306a36Sopenharmony_ci	iopt = &access->ioas->iopt;
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	down_read(&iopt->iova_rwsem);
101162306a36Sopenharmony_ci	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
101262306a36Sopenharmony_ci		unsigned long last = min(last_iova, iopt_area_last_iova(area));
101362306a36Sopenharmony_ci		unsigned long last_index = iopt_area_iova_to_index(area, last);
101462306a36Sopenharmony_ci		unsigned long index =
101562306a36Sopenharmony_ci			iopt_area_iova_to_index(area, iter.cur_iova);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci		if (area->prevent_access ||
101862306a36Sopenharmony_ci		    !iopt_area_contig_is_aligned(&iter)) {
101962306a36Sopenharmony_ci			rc = -EINVAL;
102062306a36Sopenharmony_ci			goto err_remove;
102162306a36Sopenharmony_ci		}
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci		if (!check_area_prot(area, flags)) {
102462306a36Sopenharmony_ci			rc = -EPERM;
102562306a36Sopenharmony_ci			goto err_remove;
102662306a36Sopenharmony_ci		}
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci		rc = iopt_area_add_access(area, index, last_index, out_pages,
102962306a36Sopenharmony_ci					  flags);
103062306a36Sopenharmony_ci		if (rc)
103162306a36Sopenharmony_ci			goto err_remove;
103262306a36Sopenharmony_ci		out_pages += last_index - index + 1;
103362306a36Sopenharmony_ci	}
103462306a36Sopenharmony_ci	if (!iopt_area_contig_done(&iter)) {
103562306a36Sopenharmony_ci		rc = -ENOENT;
103662306a36Sopenharmony_ci		goto err_remove;
103762306a36Sopenharmony_ci	}
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_ci	up_read(&iopt->iova_rwsem);
104062306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
104162306a36Sopenharmony_ci	return 0;
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_cierr_remove:
104462306a36Sopenharmony_ci	if (iova < iter.cur_iova) {
104562306a36Sopenharmony_ci		last_iova = iter.cur_iova - 1;
104662306a36Sopenharmony_ci		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
104762306a36Sopenharmony_ci			iopt_area_remove_access(
104862306a36Sopenharmony_ci				area,
104962306a36Sopenharmony_ci				iopt_area_iova_to_index(area, iter.cur_iova),
105062306a36Sopenharmony_ci				iopt_area_iova_to_index(
105162306a36Sopenharmony_ci					area, min(last_iova,
105262306a36Sopenharmony_ci						  iopt_area_last_iova(area))));
105362306a36Sopenharmony_ci	}
105462306a36Sopenharmony_ci	up_read(&iopt->iova_rwsem);
105562306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
105662306a36Sopenharmony_ci	return rc;
105762306a36Sopenharmony_ci}
105862306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci/**
106162306a36Sopenharmony_ci * iommufd_access_rw - Read or write data under the iova
106262306a36Sopenharmony_ci * @access: IOAS access to act on
106362306a36Sopenharmony_ci * @iova: Starting IOVA
106462306a36Sopenharmony_ci * @data: Kernel buffer to copy to/from
106562306a36Sopenharmony_ci * @length: Number of bytes to access
106662306a36Sopenharmony_ci * @flags: IOMMUFD_ACCESS_RW_* flags
106762306a36Sopenharmony_ci *
106862306a36Sopenharmony_ci * Copy kernel to/from data into the range given by IOVA/length. If flags
106962306a36Sopenharmony_ci * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
107062306a36Sopenharmony_ci * by changing it into copy_to/from_user().
107162306a36Sopenharmony_ci */
107262306a36Sopenharmony_ciint iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
107362306a36Sopenharmony_ci		      void *data, size_t length, unsigned int flags)
107462306a36Sopenharmony_ci{
107562306a36Sopenharmony_ci	struct iopt_area_contig_iter iter;
107662306a36Sopenharmony_ci	struct io_pagetable *iopt;
107762306a36Sopenharmony_ci	struct iopt_area *area;
107862306a36Sopenharmony_ci	unsigned long last_iova;
107962306a36Sopenharmony_ci	int rc;
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	if (!length)
108262306a36Sopenharmony_ci		return -EINVAL;
108362306a36Sopenharmony_ci	if (check_add_overflow(iova, length - 1, &last_iova))
108462306a36Sopenharmony_ci		return -EOVERFLOW;
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	mutex_lock(&access->ioas_lock);
108762306a36Sopenharmony_ci	if (!access->ioas) {
108862306a36Sopenharmony_ci		mutex_unlock(&access->ioas_lock);
108962306a36Sopenharmony_ci		return -ENOENT;
109062306a36Sopenharmony_ci	}
109162306a36Sopenharmony_ci	iopt = &access->ioas->iopt;
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	down_read(&iopt->iova_rwsem);
109462306a36Sopenharmony_ci	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
109562306a36Sopenharmony_ci		unsigned long last = min(last_iova, iopt_area_last_iova(area));
109662306a36Sopenharmony_ci		unsigned long bytes = (last - iter.cur_iova) + 1;
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci		if (area->prevent_access) {
109962306a36Sopenharmony_ci			rc = -EINVAL;
110062306a36Sopenharmony_ci			goto err_out;
110162306a36Sopenharmony_ci		}
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci		if (!check_area_prot(area, flags)) {
110462306a36Sopenharmony_ci			rc = -EPERM;
110562306a36Sopenharmony_ci			goto err_out;
110662306a36Sopenharmony_ci		}
110762306a36Sopenharmony_ci
110862306a36Sopenharmony_ci		rc = iopt_pages_rw_access(
110962306a36Sopenharmony_ci			area->pages, iopt_area_start_byte(area, iter.cur_iova),
111062306a36Sopenharmony_ci			data, bytes, flags);
111162306a36Sopenharmony_ci		if (rc)
111262306a36Sopenharmony_ci			goto err_out;
111362306a36Sopenharmony_ci		data += bytes;
111462306a36Sopenharmony_ci	}
111562306a36Sopenharmony_ci	if (!iopt_area_contig_done(&iter))
111662306a36Sopenharmony_ci		rc = -ENOENT;
111762306a36Sopenharmony_cierr_out:
111862306a36Sopenharmony_ci	up_read(&iopt->iova_rwsem);
111962306a36Sopenharmony_ci	mutex_unlock(&access->ioas_lock);
112062306a36Sopenharmony_ci	return rc;
112162306a36Sopenharmony_ci}
112262306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_ciint iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
112562306a36Sopenharmony_ci{
112662306a36Sopenharmony_ci	struct iommu_hw_info *cmd = ucmd->cmd;
112762306a36Sopenharmony_ci	void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
112862306a36Sopenharmony_ci	const struct iommu_ops *ops;
112962306a36Sopenharmony_ci	struct iommufd_device *idev;
113062306a36Sopenharmony_ci	unsigned int data_len;
113162306a36Sopenharmony_ci	unsigned int copy_len;
113262306a36Sopenharmony_ci	void *data;
113362306a36Sopenharmony_ci	int rc;
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_ci	if (cmd->flags || cmd->__reserved)
113662306a36Sopenharmony_ci		return -EOPNOTSUPP;
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	idev = iommufd_get_device(ucmd, cmd->dev_id);
113962306a36Sopenharmony_ci	if (IS_ERR(idev))
114062306a36Sopenharmony_ci		return PTR_ERR(idev);
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	ops = dev_iommu_ops(idev->dev);
114362306a36Sopenharmony_ci	if (ops->hw_info) {
114462306a36Sopenharmony_ci		data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
114562306a36Sopenharmony_ci		if (IS_ERR(data)) {
114662306a36Sopenharmony_ci			rc = PTR_ERR(data);
114762306a36Sopenharmony_ci			goto out_put;
114862306a36Sopenharmony_ci		}
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci		/*
115162306a36Sopenharmony_ci		 * drivers that have hw_info callback should have a unique
115262306a36Sopenharmony_ci		 * iommu_hw_info_type.
115362306a36Sopenharmony_ci		 */
115462306a36Sopenharmony_ci		if (WARN_ON_ONCE(cmd->out_data_type ==
115562306a36Sopenharmony_ci				 IOMMU_HW_INFO_TYPE_NONE)) {
115662306a36Sopenharmony_ci			rc = -ENODEV;
115762306a36Sopenharmony_ci			goto out_free;
115862306a36Sopenharmony_ci		}
115962306a36Sopenharmony_ci	} else {
116062306a36Sopenharmony_ci		cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
116162306a36Sopenharmony_ci		data_len = 0;
116262306a36Sopenharmony_ci		data = NULL;
116362306a36Sopenharmony_ci	}
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_ci	copy_len = min(cmd->data_len, data_len);
116662306a36Sopenharmony_ci	if (copy_to_user(user_ptr, data, copy_len)) {
116762306a36Sopenharmony_ci		rc = -EFAULT;
116862306a36Sopenharmony_ci		goto out_free;
116962306a36Sopenharmony_ci	}
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci	/*
117262306a36Sopenharmony_ci	 * Zero the trailing bytes if the user buffer is bigger than the
117362306a36Sopenharmony_ci	 * data size kernel actually has.
117462306a36Sopenharmony_ci	 */
117562306a36Sopenharmony_ci	if (copy_len < cmd->data_len) {
117662306a36Sopenharmony_ci		if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
117762306a36Sopenharmony_ci			rc = -EFAULT;
117862306a36Sopenharmony_ci			goto out_free;
117962306a36Sopenharmony_ci		}
118062306a36Sopenharmony_ci	}
118162306a36Sopenharmony_ci
118262306a36Sopenharmony_ci	/*
118362306a36Sopenharmony_ci	 * We return the length the kernel supports so userspace may know what
118462306a36Sopenharmony_ci	 * the kernel capability is. It could be larger than the input buffer.
118562306a36Sopenharmony_ci	 */
118662306a36Sopenharmony_ci	cmd->data_len = data_len;
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
118962306a36Sopenharmony_ciout_free:
119062306a36Sopenharmony_ci	kfree(data);
119162306a36Sopenharmony_ciout_put:
119262306a36Sopenharmony_ci	iommufd_put_object(&idev->obj);
119362306a36Sopenharmony_ci	return rc;
119462306a36Sopenharmony_ci}
1195