162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
362306a36Sopenharmony_ci */
462306a36Sopenharmony_ci#include <linux/file.h>
562306a36Sopenharmony_ci#include <linux/interval_tree.h>
662306a36Sopenharmony_ci#include <linux/iommu.h>
762306a36Sopenharmony_ci#include <linux/iommufd.h>
862306a36Sopenharmony_ci#include <linux/slab.h>
962306a36Sopenharmony_ci#include <linux/vfio.h>
1062306a36Sopenharmony_ci#include <uapi/linux/vfio.h>
1162306a36Sopenharmony_ci#include <uapi/linux/iommufd.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include "iommufd_private.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_cistatic struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx)
1662306a36Sopenharmony_ci{
1762306a36Sopenharmony_ci	struct iommufd_ioas *ioas = ERR_PTR(-ENODEV);
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci	xa_lock(&ictx->objects);
2062306a36Sopenharmony_ci	if (!ictx->vfio_ioas || !iommufd_lock_obj(&ictx->vfio_ioas->obj))
2162306a36Sopenharmony_ci		goto out_unlock;
2262306a36Sopenharmony_ci	ioas = ictx->vfio_ioas;
2362306a36Sopenharmony_ciout_unlock:
2462306a36Sopenharmony_ci	xa_unlock(&ictx->objects);
2562306a36Sopenharmony_ci	return ioas;
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci/**
2962306a36Sopenharmony_ci * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists
3062306a36Sopenharmony_ci * @ictx: Context to operate on
3162306a36Sopenharmony_ci * @out_ioas_id: The IOAS ID of the compatibility IOAS
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * Return the ID of the current compatibility IOAS. The ID can be passed into
3462306a36Sopenharmony_ci * other functions that take an ioas_id.
3562306a36Sopenharmony_ci */
3662306a36Sopenharmony_ciint iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	struct iommufd_ioas *ioas;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	ioas = get_compat_ioas(ictx);
4162306a36Sopenharmony_ci	if (IS_ERR(ioas))
4262306a36Sopenharmony_ci		return PTR_ERR(ioas);
4362306a36Sopenharmony_ci	*out_ioas_id = ioas->obj.id;
4462306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
4562306a36Sopenharmony_ci	return 0;
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci/**
5062306a36Sopenharmony_ci * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached
5162306a36Sopenharmony_ci * @ictx: Context to operate on
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types.
5462306a36Sopenharmony_ci */
5562306a36Sopenharmony_ciint iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	int ret;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	xa_lock(&ictx->objects);
6062306a36Sopenharmony_ci	if (!ictx->vfio_ioas) {
6162306a36Sopenharmony_ci		ictx->no_iommu_mode = 1;
6262306a36Sopenharmony_ci		ret = 0;
6362306a36Sopenharmony_ci	} else {
6462306a36Sopenharmony_ci		ret = -EINVAL;
6562306a36Sopenharmony_ci	}
6662306a36Sopenharmony_ci	xa_unlock(&ictx->objects);
6762306a36Sopenharmony_ci	return ret;
6862306a36Sopenharmony_ci}
6962306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci/**
7262306a36Sopenharmony_ci * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created
7362306a36Sopenharmony_ci * @ictx: Context to operate on
7462306a36Sopenharmony_ci *
7562306a36Sopenharmony_ci * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
7662306a36Sopenharmony_ci * on since they do not have an IOAS ID input in their ABI. Only attaching a
7762306a36Sopenharmony_ci * group should cause a default creation of the internal ioas, this does nothing
7862306a36Sopenharmony_ci * if an existing ioas has already been assigned somehow.
7962306a36Sopenharmony_ci */
8062306a36Sopenharmony_ciint iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	struct iommufd_ioas *ioas = NULL;
8362306a36Sopenharmony_ci	int ret;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	ioas = iommufd_ioas_alloc(ictx);
8662306a36Sopenharmony_ci	if (IS_ERR(ioas))
8762306a36Sopenharmony_ci		return PTR_ERR(ioas);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	xa_lock(&ictx->objects);
9062306a36Sopenharmony_ci	/*
9162306a36Sopenharmony_ci	 * VFIO won't allow attaching a container to both iommu and no iommu
9262306a36Sopenharmony_ci	 * operation
9362306a36Sopenharmony_ci	 */
9462306a36Sopenharmony_ci	if (ictx->no_iommu_mode) {
9562306a36Sopenharmony_ci		ret = -EINVAL;
9662306a36Sopenharmony_ci		goto out_abort;
9762306a36Sopenharmony_ci	}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
10062306a36Sopenharmony_ci		ret = 0;
10162306a36Sopenharmony_ci		iommufd_put_object(&ictx->vfio_ioas->obj);
10262306a36Sopenharmony_ci		goto out_abort;
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci	ictx->vfio_ioas = ioas;
10562306a36Sopenharmony_ci	xa_unlock(&ictx->objects);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	/*
10862306a36Sopenharmony_ci	 * An automatically created compat IOAS is treated as a userspace
10962306a36Sopenharmony_ci	 * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET,
11062306a36Sopenharmony_ci	 * and if not manually destroyed it will be destroyed automatically
11162306a36Sopenharmony_ci	 * at iommufd release.
11262306a36Sopenharmony_ci	 */
11362306a36Sopenharmony_ci	iommufd_object_finalize(ictx, &ioas->obj);
11462306a36Sopenharmony_ci	return 0;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ciout_abort:
11762306a36Sopenharmony_ci	xa_unlock(&ictx->objects);
11862306a36Sopenharmony_ci	iommufd_object_abort(ictx, &ioas->obj);
11962306a36Sopenharmony_ci	return ret;
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO);
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ciint iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	struct iommu_vfio_ioas *cmd = ucmd->cmd;
12662306a36Sopenharmony_ci	struct iommufd_ioas *ioas;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	if (cmd->__reserved)
12962306a36Sopenharmony_ci		return -EOPNOTSUPP;
13062306a36Sopenharmony_ci	switch (cmd->op) {
13162306a36Sopenharmony_ci	case IOMMU_VFIO_IOAS_GET:
13262306a36Sopenharmony_ci		ioas = get_compat_ioas(ucmd->ictx);
13362306a36Sopenharmony_ci		if (IS_ERR(ioas))
13462306a36Sopenharmony_ci			return PTR_ERR(ioas);
13562306a36Sopenharmony_ci		cmd->ioas_id = ioas->obj.id;
13662306a36Sopenharmony_ci		iommufd_put_object(&ioas->obj);
13762306a36Sopenharmony_ci		return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	case IOMMU_VFIO_IOAS_SET:
14062306a36Sopenharmony_ci		ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id);
14162306a36Sopenharmony_ci		if (IS_ERR(ioas))
14262306a36Sopenharmony_ci			return PTR_ERR(ioas);
14362306a36Sopenharmony_ci		xa_lock(&ucmd->ictx->objects);
14462306a36Sopenharmony_ci		ucmd->ictx->vfio_ioas = ioas;
14562306a36Sopenharmony_ci		xa_unlock(&ucmd->ictx->objects);
14662306a36Sopenharmony_ci		iommufd_put_object(&ioas->obj);
14762306a36Sopenharmony_ci		return 0;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	case IOMMU_VFIO_IOAS_CLEAR:
15062306a36Sopenharmony_ci		xa_lock(&ucmd->ictx->objects);
15162306a36Sopenharmony_ci		ucmd->ictx->vfio_ioas = NULL;
15262306a36Sopenharmony_ci		xa_unlock(&ucmd->ictx->objects);
15362306a36Sopenharmony_ci		return 0;
15462306a36Sopenharmony_ci	default:
15562306a36Sopenharmony_ci		return -EOPNOTSUPP;
15662306a36Sopenharmony_ci	}
15762306a36Sopenharmony_ci}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_cistatic int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd,
16062306a36Sopenharmony_ci				void __user *arg)
16162306a36Sopenharmony_ci{
16262306a36Sopenharmony_ci	u32 supported_flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
16362306a36Sopenharmony_ci	size_t minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
16462306a36Sopenharmony_ci	struct vfio_iommu_type1_dma_map map;
16562306a36Sopenharmony_ci	int iommu_prot = IOMMU_CACHE;
16662306a36Sopenharmony_ci	struct iommufd_ioas *ioas;
16762306a36Sopenharmony_ci	unsigned long iova;
16862306a36Sopenharmony_ci	int rc;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	if (copy_from_user(&map, arg, minsz))
17162306a36Sopenharmony_ci		return -EFAULT;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (map.argsz < minsz || map.flags & ~supported_flags)
17462306a36Sopenharmony_ci		return -EINVAL;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	if (map.flags & VFIO_DMA_MAP_FLAG_READ)
17762306a36Sopenharmony_ci		iommu_prot |= IOMMU_READ;
17862306a36Sopenharmony_ci	if (map.flags & VFIO_DMA_MAP_FLAG_WRITE)
17962306a36Sopenharmony_ci		iommu_prot |= IOMMU_WRITE;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	ioas = get_compat_ioas(ictx);
18262306a36Sopenharmony_ci	if (IS_ERR(ioas))
18362306a36Sopenharmony_ci		return PTR_ERR(ioas);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	/*
18662306a36Sopenharmony_ci	 * Maps created through the legacy interface always use VFIO compatible
18762306a36Sopenharmony_ci	 * rlimit accounting. If the user wishes to use the faster user based
18862306a36Sopenharmony_ci	 * rlimit accounting then they must use the new interface.
18962306a36Sopenharmony_ci	 */
19062306a36Sopenharmony_ci	iova = map.iova;
19162306a36Sopenharmony_ci	rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr),
19262306a36Sopenharmony_ci				 map.size, iommu_prot, 0);
19362306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
19462306a36Sopenharmony_ci	return rc;
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistatic int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
19862306a36Sopenharmony_ci				  void __user *arg)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
20162306a36Sopenharmony_ci	/*
20262306a36Sopenharmony_ci	 * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is obsoleted by the new
20362306a36Sopenharmony_ci	 * dirty tracking direction:
20462306a36Sopenharmony_ci	 *  https://lore.kernel.org/kvm/20220731125503.142683-1-yishaih@nvidia.com/
20562306a36Sopenharmony_ci	 *  https://lore.kernel.org/kvm/20220428210933.3583-1-joao.m.martins@oracle.com/
20662306a36Sopenharmony_ci	 */
20762306a36Sopenharmony_ci	u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL;
20862306a36Sopenharmony_ci	struct vfio_iommu_type1_dma_unmap unmap;
20962306a36Sopenharmony_ci	unsigned long unmapped = 0;
21062306a36Sopenharmony_ci	struct iommufd_ioas *ioas;
21162306a36Sopenharmony_ci	int rc;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	if (copy_from_user(&unmap, arg, minsz))
21462306a36Sopenharmony_ci		return -EFAULT;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	if (unmap.argsz < minsz || unmap.flags & ~supported_flags)
21762306a36Sopenharmony_ci		return -EINVAL;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	ioas = get_compat_ioas(ictx);
22062306a36Sopenharmony_ci	if (IS_ERR(ioas))
22162306a36Sopenharmony_ci		return PTR_ERR(ioas);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) {
22462306a36Sopenharmony_ci		if (unmap.iova != 0 || unmap.size != 0) {
22562306a36Sopenharmony_ci			rc = -EINVAL;
22662306a36Sopenharmony_ci			goto err_put;
22762306a36Sopenharmony_ci		}
22862306a36Sopenharmony_ci		rc = iopt_unmap_all(&ioas->iopt, &unmapped);
22962306a36Sopenharmony_ci	} else {
23062306a36Sopenharmony_ci		if (READ_ONCE(ioas->iopt.disable_large_pages)) {
23162306a36Sopenharmony_ci			/*
23262306a36Sopenharmony_ci			 * Create cuts at the start and last of the requested
23362306a36Sopenharmony_ci			 * range. If the start IOVA is 0 then it doesn't need to
23462306a36Sopenharmony_ci			 * be cut.
23562306a36Sopenharmony_ci			 */
23662306a36Sopenharmony_ci			unsigned long iovas[] = { unmap.iova + unmap.size - 1,
23762306a36Sopenharmony_ci						  unmap.iova - 1 };
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci			rc = iopt_cut_iova(&ioas->iopt, iovas,
24062306a36Sopenharmony_ci					   unmap.iova ? 2 : 1);
24162306a36Sopenharmony_ci			if (rc)
24262306a36Sopenharmony_ci				goto err_put;
24362306a36Sopenharmony_ci		}
24462306a36Sopenharmony_ci		rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size,
24562306a36Sopenharmony_ci				     &unmapped);
24662306a36Sopenharmony_ci	}
24762306a36Sopenharmony_ci	unmap.size = unmapped;
24862306a36Sopenharmony_ci	if (copy_to_user(arg, &unmap, minsz))
24962306a36Sopenharmony_ci		rc = -EFAULT;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_cierr_put:
25262306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
25362306a36Sopenharmony_ci	return rc;
25462306a36Sopenharmony_ci}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_cistatic int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
25762306a36Sopenharmony_ci{
25862306a36Sopenharmony_ci	struct iommufd_hw_pagetable *hwpt;
25962306a36Sopenharmony_ci	struct iommufd_ioas *ioas;
26062306a36Sopenharmony_ci	int rc = 1;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	ioas = get_compat_ioas(ictx);
26362306a36Sopenharmony_ci	if (IS_ERR(ioas))
26462306a36Sopenharmony_ci		return PTR_ERR(ioas);
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	mutex_lock(&ioas->mutex);
26762306a36Sopenharmony_ci	list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
26862306a36Sopenharmony_ci		if (!hwpt->enforce_cache_coherency) {
26962306a36Sopenharmony_ci			rc = 0;
27062306a36Sopenharmony_ci			break;
27162306a36Sopenharmony_ci		}
27262306a36Sopenharmony_ci	}
27362306a36Sopenharmony_ci	mutex_unlock(&ioas->mutex);
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
27662306a36Sopenharmony_ci	return rc;
27762306a36Sopenharmony_ci}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_cistatic int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
28062306a36Sopenharmony_ci					unsigned long type)
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci	switch (type) {
28362306a36Sopenharmony_ci	case VFIO_TYPE1_IOMMU:
28462306a36Sopenharmony_ci	case VFIO_TYPE1v2_IOMMU:
28562306a36Sopenharmony_ci	case VFIO_UNMAP_ALL:
28662306a36Sopenharmony_ci		return 1;
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci	case VFIO_NOIOMMU_IOMMU:
28962306a36Sopenharmony_ci		return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	case VFIO_DMA_CC_IOMMU:
29262306a36Sopenharmony_ci		return iommufd_vfio_cc_iommu(ictx);
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	/*
29562306a36Sopenharmony_ci	 * This is obsolete, and to be removed from VFIO. It was an incomplete
29662306a36Sopenharmony_ci	 * idea that got merged.
29762306a36Sopenharmony_ci	 * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/
29862306a36Sopenharmony_ci	 */
29962306a36Sopenharmony_ci	case VFIO_TYPE1_NESTING_IOMMU:
30062306a36Sopenharmony_ci		return 0;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	/*
30362306a36Sopenharmony_ci	 * VFIO_DMA_MAP_FLAG_VADDR
30462306a36Sopenharmony_ci	 * https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sistare@oracle.com/
30562306a36Sopenharmony_ci	 * https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/
30662306a36Sopenharmony_ci	 *
30762306a36Sopenharmony_ci	 * It is hard to see how this could be implemented safely.
30862306a36Sopenharmony_ci	 */
30962306a36Sopenharmony_ci	case VFIO_UPDATE_VADDR:
31062306a36Sopenharmony_ci	default:
31162306a36Sopenharmony_ci		return 0;
31262306a36Sopenharmony_ci	}
31362306a36Sopenharmony_ci}
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_cistatic int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
31662306a36Sopenharmony_ci{
31762306a36Sopenharmony_ci	bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode);
31862306a36Sopenharmony_ci	struct iommufd_ioas *ioas = NULL;
31962306a36Sopenharmony_ci	int rc = 0;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	/*
32262306a36Sopenharmony_ci	 * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all
32362306a36Sopenharmony_ci	 * other ioctls. We let them keep working but they mostly fail since no
32462306a36Sopenharmony_ci	 * IOAS should exist.
32562306a36Sopenharmony_ci	 */
32662306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU &&
32762306a36Sopenharmony_ci	    no_iommu_mode) {
32862306a36Sopenharmony_ci		if (!capable(CAP_SYS_RAWIO))
32962306a36Sopenharmony_ci			return -EPERM;
33062306a36Sopenharmony_ci		return 0;
33162306a36Sopenharmony_ci	}
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) ||
33462306a36Sopenharmony_ci	    no_iommu_mode)
33562306a36Sopenharmony_ci		return -EINVAL;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	/* VFIO fails the set_iommu if there is no group */
33862306a36Sopenharmony_ci	ioas = get_compat_ioas(ictx);
33962306a36Sopenharmony_ci	if (IS_ERR(ioas))
34062306a36Sopenharmony_ci		return PTR_ERR(ioas);
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	/*
34362306a36Sopenharmony_ci	 * The difference between TYPE1 and TYPE1v2 is the ability to unmap in
34462306a36Sopenharmony_ci	 * the middle of mapped ranges. This is complicated by huge page support
34562306a36Sopenharmony_ci	 * which creates single large IOPTEs that cannot be split by the iommu
34662306a36Sopenharmony_ci	 * driver. TYPE1 is very old at this point and likely nothing uses it,
34762306a36Sopenharmony_ci	 * however it is simple enough to emulate by simply disabling the
34862306a36Sopenharmony_ci	 * problematic large IOPTEs. Then we can safely unmap within any range.
34962306a36Sopenharmony_ci	 */
35062306a36Sopenharmony_ci	if (type == VFIO_TYPE1_IOMMU)
35162306a36Sopenharmony_ci		rc = iopt_disable_large_pages(&ioas->iopt);
35262306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
35362306a36Sopenharmony_ci	return rc;
35462306a36Sopenharmony_ci}
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_cistatic unsigned long iommufd_get_pagesizes(struct iommufd_ioas *ioas)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	struct io_pagetable *iopt = &ioas->iopt;
35962306a36Sopenharmony_ci	unsigned long pgsize_bitmap = ULONG_MAX;
36062306a36Sopenharmony_ci	struct iommu_domain *domain;
36162306a36Sopenharmony_ci	unsigned long index;
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	down_read(&iopt->domains_rwsem);
36462306a36Sopenharmony_ci	xa_for_each(&iopt->domains, index, domain)
36562306a36Sopenharmony_ci		pgsize_bitmap &= domain->pgsize_bitmap;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	/* See vfio_update_pgsize_bitmap() */
36862306a36Sopenharmony_ci	if (pgsize_bitmap & ~PAGE_MASK) {
36962306a36Sopenharmony_ci		pgsize_bitmap &= PAGE_MASK;
37062306a36Sopenharmony_ci		pgsize_bitmap |= PAGE_SIZE;
37162306a36Sopenharmony_ci	}
37262306a36Sopenharmony_ci	pgsize_bitmap = max(pgsize_bitmap, ioas->iopt.iova_alignment);
37362306a36Sopenharmony_ci	up_read(&iopt->domains_rwsem);
37462306a36Sopenharmony_ci	return pgsize_bitmap;
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_cistatic int iommufd_fill_cap_iova(struct iommufd_ioas *ioas,
37862306a36Sopenharmony_ci				 struct vfio_info_cap_header __user *cur,
37962306a36Sopenharmony_ci				 size_t avail)
38062306a36Sopenharmony_ci{
38162306a36Sopenharmony_ci	struct vfio_iommu_type1_info_cap_iova_range __user *ucap_iovas =
38262306a36Sopenharmony_ci		container_of(cur,
38362306a36Sopenharmony_ci			     struct vfio_iommu_type1_info_cap_iova_range __user,
38462306a36Sopenharmony_ci			     header);
38562306a36Sopenharmony_ci	struct vfio_iommu_type1_info_cap_iova_range cap_iovas = {
38662306a36Sopenharmony_ci		.header = {
38762306a36Sopenharmony_ci			.id = VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE,
38862306a36Sopenharmony_ci			.version = 1,
38962306a36Sopenharmony_ci		},
39062306a36Sopenharmony_ci	};
39162306a36Sopenharmony_ci	struct interval_tree_span_iter span;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0,
39462306a36Sopenharmony_ci				    ULONG_MAX) {
39562306a36Sopenharmony_ci		struct vfio_iova_range range;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci		if (!span.is_hole)
39862306a36Sopenharmony_ci			continue;
39962306a36Sopenharmony_ci		range.start = span.start_hole;
40062306a36Sopenharmony_ci		range.end = span.last_hole;
40162306a36Sopenharmony_ci		if (avail >= struct_size(&cap_iovas, iova_ranges,
40262306a36Sopenharmony_ci					 cap_iovas.nr_iovas + 1) &&
40362306a36Sopenharmony_ci		    copy_to_user(&ucap_iovas->iova_ranges[cap_iovas.nr_iovas],
40462306a36Sopenharmony_ci				 &range, sizeof(range)))
40562306a36Sopenharmony_ci			return -EFAULT;
40662306a36Sopenharmony_ci		cap_iovas.nr_iovas++;
40762306a36Sopenharmony_ci	}
40862306a36Sopenharmony_ci	if (avail >= struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas) &&
40962306a36Sopenharmony_ci	    copy_to_user(ucap_iovas, &cap_iovas, sizeof(cap_iovas)))
41062306a36Sopenharmony_ci		return -EFAULT;
41162306a36Sopenharmony_ci	return struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas);
41262306a36Sopenharmony_ci}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_cistatic int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas,
41562306a36Sopenharmony_ci				      struct vfio_info_cap_header __user *cur,
41662306a36Sopenharmony_ci				      size_t avail)
41762306a36Sopenharmony_ci{
41862306a36Sopenharmony_ci	struct vfio_iommu_type1_info_dma_avail cap_dma = {
41962306a36Sopenharmony_ci		.header = {
42062306a36Sopenharmony_ci			.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL,
42162306a36Sopenharmony_ci			.version = 1,
42262306a36Sopenharmony_ci		},
42362306a36Sopenharmony_ci		/*
42462306a36Sopenharmony_ci		 * iommufd's limit is based on the cgroup's memory limit.
42562306a36Sopenharmony_ci		 * Normally vfio would return U16_MAX here, and provide a module
42662306a36Sopenharmony_ci		 * parameter to adjust it. Since S390 qemu userspace actually
42762306a36Sopenharmony_ci		 * pays attention and needs a value bigger than U16_MAX return
42862306a36Sopenharmony_ci		 * U32_MAX.
42962306a36Sopenharmony_ci		 */
43062306a36Sopenharmony_ci		.avail = U32_MAX,
43162306a36Sopenharmony_ci	};
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	if (avail >= sizeof(cap_dma) &&
43462306a36Sopenharmony_ci	    copy_to_user(cur, &cap_dma, sizeof(cap_dma)))
43562306a36Sopenharmony_ci		return -EFAULT;
43662306a36Sopenharmony_ci	return sizeof(cap_dma);
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_cistatic int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
44062306a36Sopenharmony_ci				       void __user *arg)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	typedef int (*fill_cap_fn)(struct iommufd_ioas *ioas,
44362306a36Sopenharmony_ci				   struct vfio_info_cap_header __user *cur,
44462306a36Sopenharmony_ci				   size_t avail);
44562306a36Sopenharmony_ci	static const fill_cap_fn fill_fns[] = {
44662306a36Sopenharmony_ci		iommufd_fill_cap_dma_avail,
44762306a36Sopenharmony_ci		iommufd_fill_cap_iova,
44862306a36Sopenharmony_ci	};
44962306a36Sopenharmony_ci	size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
45062306a36Sopenharmony_ci	struct vfio_info_cap_header __user *last_cap = NULL;
45162306a36Sopenharmony_ci	struct vfio_iommu_type1_info info = {};
45262306a36Sopenharmony_ci	struct iommufd_ioas *ioas;
45362306a36Sopenharmony_ci	size_t total_cap_size;
45462306a36Sopenharmony_ci	int rc;
45562306a36Sopenharmony_ci	int i;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	if (copy_from_user(&info, arg, minsz))
45862306a36Sopenharmony_ci		return -EFAULT;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	if (info.argsz < minsz)
46162306a36Sopenharmony_ci		return -EINVAL;
46262306a36Sopenharmony_ci	minsz = min_t(size_t, info.argsz, sizeof(info));
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	ioas = get_compat_ioas(ictx);
46562306a36Sopenharmony_ci	if (IS_ERR(ioas))
46662306a36Sopenharmony_ci		return PTR_ERR(ioas);
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	info.flags = VFIO_IOMMU_INFO_PGSIZES;
46962306a36Sopenharmony_ci	info.iova_pgsizes = iommufd_get_pagesizes(ioas);
47062306a36Sopenharmony_ci	info.cap_offset = 0;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	down_read(&ioas->iopt.iova_rwsem);
47362306a36Sopenharmony_ci	total_cap_size = sizeof(info);
47462306a36Sopenharmony_ci	for (i = 0; i != ARRAY_SIZE(fill_fns); i++) {
47562306a36Sopenharmony_ci		int cap_size;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci		if (info.argsz > total_cap_size)
47862306a36Sopenharmony_ci			cap_size = fill_fns[i](ioas, arg + total_cap_size,
47962306a36Sopenharmony_ci					       info.argsz - total_cap_size);
48062306a36Sopenharmony_ci		else
48162306a36Sopenharmony_ci			cap_size = fill_fns[i](ioas, NULL, 0);
48262306a36Sopenharmony_ci		if (cap_size < 0) {
48362306a36Sopenharmony_ci			rc = cap_size;
48462306a36Sopenharmony_ci			goto out_put;
48562306a36Sopenharmony_ci		}
48662306a36Sopenharmony_ci		cap_size = ALIGN(cap_size, sizeof(u64));
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		if (last_cap && info.argsz >= total_cap_size &&
48962306a36Sopenharmony_ci		    put_user(total_cap_size, &last_cap->next)) {
49062306a36Sopenharmony_ci			rc = -EFAULT;
49162306a36Sopenharmony_ci			goto out_put;
49262306a36Sopenharmony_ci		}
49362306a36Sopenharmony_ci		last_cap = arg + total_cap_size;
49462306a36Sopenharmony_ci		total_cap_size += cap_size;
49562306a36Sopenharmony_ci	}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	/*
49862306a36Sopenharmony_ci	 * If the user did not provide enough space then only some caps are
49962306a36Sopenharmony_ci	 * returned and the argsz will be updated to the correct amount to get
50062306a36Sopenharmony_ci	 * all caps.
50162306a36Sopenharmony_ci	 */
50262306a36Sopenharmony_ci	if (info.argsz >= total_cap_size)
50362306a36Sopenharmony_ci		info.cap_offset = sizeof(info);
50462306a36Sopenharmony_ci	info.argsz = total_cap_size;
50562306a36Sopenharmony_ci	info.flags |= VFIO_IOMMU_INFO_CAPS;
50662306a36Sopenharmony_ci	if (copy_to_user(arg, &info, minsz)) {
50762306a36Sopenharmony_ci		rc = -EFAULT;
50862306a36Sopenharmony_ci		goto out_put;
50962306a36Sopenharmony_ci	}
51062306a36Sopenharmony_ci	rc = 0;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ciout_put:
51362306a36Sopenharmony_ci	up_read(&ioas->iopt.iova_rwsem);
51462306a36Sopenharmony_ci	iommufd_put_object(&ioas->obj);
51562306a36Sopenharmony_ci	return rc;
51662306a36Sopenharmony_ci}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ciint iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
51962306a36Sopenharmony_ci		       unsigned long arg)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	void __user *uarg = (void __user *)arg;
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	switch (cmd) {
52462306a36Sopenharmony_ci	case VFIO_GET_API_VERSION:
52562306a36Sopenharmony_ci		return VFIO_API_VERSION;
52662306a36Sopenharmony_ci	case VFIO_SET_IOMMU:
52762306a36Sopenharmony_ci		return iommufd_vfio_set_iommu(ictx, arg);
52862306a36Sopenharmony_ci	case VFIO_CHECK_EXTENSION:
52962306a36Sopenharmony_ci		return iommufd_vfio_check_extension(ictx, arg);
53062306a36Sopenharmony_ci	case VFIO_IOMMU_GET_INFO:
53162306a36Sopenharmony_ci		return iommufd_vfio_iommu_get_info(ictx, uarg);
53262306a36Sopenharmony_ci	case VFIO_IOMMU_MAP_DMA:
53362306a36Sopenharmony_ci		return iommufd_vfio_map_dma(ictx, cmd, uarg);
53462306a36Sopenharmony_ci	case VFIO_IOMMU_UNMAP_DMA:
53562306a36Sopenharmony_ci		return iommufd_vfio_unmap_dma(ictx, cmd, uarg);
53662306a36Sopenharmony_ci	case VFIO_IOMMU_DIRTY_PAGES:
53762306a36Sopenharmony_ci	default:
53862306a36Sopenharmony_ci		return -ENOIOCTLCMD;
53962306a36Sopenharmony_ci	}
54062306a36Sopenharmony_ci	return -ENOIOCTLCMD;
54162306a36Sopenharmony_ci}
542