162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES 362306a36Sopenharmony_ci */ 462306a36Sopenharmony_ci#include <linux/file.h> 562306a36Sopenharmony_ci#include <linux/interval_tree.h> 662306a36Sopenharmony_ci#include <linux/iommu.h> 762306a36Sopenharmony_ci#include <linux/iommufd.h> 862306a36Sopenharmony_ci#include <linux/slab.h> 962306a36Sopenharmony_ci#include <linux/vfio.h> 1062306a36Sopenharmony_ci#include <uapi/linux/vfio.h> 1162306a36Sopenharmony_ci#include <uapi/linux/iommufd.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include "iommufd_private.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_cistatic struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) 1662306a36Sopenharmony_ci{ 1762306a36Sopenharmony_ci struct iommufd_ioas *ioas = ERR_PTR(-ENODEV); 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci xa_lock(&ictx->objects); 2062306a36Sopenharmony_ci if (!ictx->vfio_ioas || !iommufd_lock_obj(&ictx->vfio_ioas->obj)) 2162306a36Sopenharmony_ci goto out_unlock; 2262306a36Sopenharmony_ci ioas = ictx->vfio_ioas; 2362306a36Sopenharmony_ciout_unlock: 2462306a36Sopenharmony_ci xa_unlock(&ictx->objects); 2562306a36Sopenharmony_ci return ioas; 2662306a36Sopenharmony_ci} 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci/** 2962306a36Sopenharmony_ci * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists 3062306a36Sopenharmony_ci * @ictx: Context to operate on 3162306a36Sopenharmony_ci * @out_ioas_id: The IOAS ID of the compatibility IOAS 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * Return the ID of the current compatibility IOAS. The ID can be passed into 3462306a36Sopenharmony_ci * other functions that take an ioas_id. 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_ciint iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci struct iommufd_ioas *ioas; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci ioas = get_compat_ioas(ictx); 4162306a36Sopenharmony_ci if (IS_ERR(ioas)) 4262306a36Sopenharmony_ci return PTR_ERR(ioas); 4362306a36Sopenharmony_ci *out_ioas_id = ioas->obj.id; 4462306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 4562306a36Sopenharmony_ci return 0; 4662306a36Sopenharmony_ci} 4762306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci/** 5062306a36Sopenharmony_ci * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached 5162306a36Sopenharmony_ci * @ictx: Context to operate on 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types. 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_ciint iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci int ret; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci xa_lock(&ictx->objects); 6062306a36Sopenharmony_ci if (!ictx->vfio_ioas) { 6162306a36Sopenharmony_ci ictx->no_iommu_mode = 1; 6262306a36Sopenharmony_ci ret = 0; 6362306a36Sopenharmony_ci } else { 6462306a36Sopenharmony_ci ret = -EINVAL; 6562306a36Sopenharmony_ci } 6662306a36Sopenharmony_ci xa_unlock(&ictx->objects); 6762306a36Sopenharmony_ci return ret; 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/** 7262306a36Sopenharmony_ci * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created 7362306a36Sopenharmony_ci * @ictx: Context to operate on 7462306a36Sopenharmony_ci * 7562306a36Sopenharmony_ci * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate 7662306a36Sopenharmony_ci * on since they do not have an IOAS ID input in their ABI. Only attaching a 7762306a36Sopenharmony_ci * group should cause a default creation of the internal ioas, this does nothing 7862306a36Sopenharmony_ci * if an existing ioas has already been assigned somehow. 7962306a36Sopenharmony_ci */ 8062306a36Sopenharmony_ciint iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci struct iommufd_ioas *ioas = NULL; 8362306a36Sopenharmony_ci int ret; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci ioas = iommufd_ioas_alloc(ictx); 8662306a36Sopenharmony_ci if (IS_ERR(ioas)) 8762306a36Sopenharmony_ci return PTR_ERR(ioas); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci xa_lock(&ictx->objects); 9062306a36Sopenharmony_ci /* 9162306a36Sopenharmony_ci * VFIO won't allow attaching a container to both iommu and no iommu 9262306a36Sopenharmony_ci * operation 9362306a36Sopenharmony_ci */ 9462306a36Sopenharmony_ci if (ictx->no_iommu_mode) { 9562306a36Sopenharmony_ci ret = -EINVAL; 9662306a36Sopenharmony_ci goto out_abort; 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { 10062306a36Sopenharmony_ci ret = 0; 10162306a36Sopenharmony_ci iommufd_put_object(&ictx->vfio_ioas->obj); 10262306a36Sopenharmony_ci goto out_abort; 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci ictx->vfio_ioas = ioas; 10562306a36Sopenharmony_ci xa_unlock(&ictx->objects); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci /* 10862306a36Sopenharmony_ci * An automatically created compat IOAS is treated as a userspace 10962306a36Sopenharmony_ci * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, 11062306a36Sopenharmony_ci * and if not manually destroyed it will be destroyed automatically 11162306a36Sopenharmony_ci * at iommufd release. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_ci iommufd_object_finalize(ictx, &ioas->obj); 11462306a36Sopenharmony_ci return 0; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ciout_abort: 11762306a36Sopenharmony_ci xa_unlock(&ictx->objects); 11862306a36Sopenharmony_ci iommufd_object_abort(ictx, &ioas->obj); 11962306a36Sopenharmony_ci return ret; 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ciEXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO); 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ciint iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci struct iommu_vfio_ioas *cmd = ucmd->cmd; 12662306a36Sopenharmony_ci struct iommufd_ioas *ioas; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (cmd->__reserved) 12962306a36Sopenharmony_ci return -EOPNOTSUPP; 13062306a36Sopenharmony_ci switch (cmd->op) { 13162306a36Sopenharmony_ci case IOMMU_VFIO_IOAS_GET: 13262306a36Sopenharmony_ci ioas = get_compat_ioas(ucmd->ictx); 13362306a36Sopenharmony_ci if (IS_ERR(ioas)) 13462306a36Sopenharmony_ci return PTR_ERR(ioas); 13562306a36Sopenharmony_ci cmd->ioas_id = ioas->obj.id; 13662306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 13762306a36Sopenharmony_ci return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci case IOMMU_VFIO_IOAS_SET: 14062306a36Sopenharmony_ci ioas = iommufd_get_ioas(ucmd->ictx, cmd->ioas_id); 14162306a36Sopenharmony_ci if (IS_ERR(ioas)) 14262306a36Sopenharmony_ci return PTR_ERR(ioas); 14362306a36Sopenharmony_ci xa_lock(&ucmd->ictx->objects); 14462306a36Sopenharmony_ci ucmd->ictx->vfio_ioas = ioas; 14562306a36Sopenharmony_ci xa_unlock(&ucmd->ictx->objects); 14662306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 14762306a36Sopenharmony_ci return 0; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci case IOMMU_VFIO_IOAS_CLEAR: 15062306a36Sopenharmony_ci xa_lock(&ucmd->ictx->objects); 15162306a36Sopenharmony_ci ucmd->ictx->vfio_ioas = NULL; 15262306a36Sopenharmony_ci xa_unlock(&ucmd->ictx->objects); 15362306a36Sopenharmony_ci return 0; 15462306a36Sopenharmony_ci default: 15562306a36Sopenharmony_ci return -EOPNOTSUPP; 15662306a36Sopenharmony_ci } 15762306a36Sopenharmony_ci} 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_cistatic int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd, 16062306a36Sopenharmony_ci void __user *arg) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci u32 supported_flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; 16362306a36Sopenharmony_ci size_t minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); 16462306a36Sopenharmony_ci struct vfio_iommu_type1_dma_map map; 16562306a36Sopenharmony_ci int iommu_prot = IOMMU_CACHE; 16662306a36Sopenharmony_ci struct iommufd_ioas *ioas; 16762306a36Sopenharmony_ci unsigned long iova; 16862306a36Sopenharmony_ci int rc; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci if (copy_from_user(&map, arg, minsz)) 17162306a36Sopenharmony_ci return -EFAULT; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (map.argsz < minsz || map.flags & ~supported_flags) 17462306a36Sopenharmony_ci return -EINVAL; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci if (map.flags & VFIO_DMA_MAP_FLAG_READ) 17762306a36Sopenharmony_ci iommu_prot |= IOMMU_READ; 17862306a36Sopenharmony_ci if (map.flags & VFIO_DMA_MAP_FLAG_WRITE) 17962306a36Sopenharmony_ci iommu_prot |= IOMMU_WRITE; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci ioas = get_compat_ioas(ictx); 18262306a36Sopenharmony_ci if (IS_ERR(ioas)) 18362306a36Sopenharmony_ci return PTR_ERR(ioas); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci /* 18662306a36Sopenharmony_ci * Maps created through the legacy interface always use VFIO compatible 18762306a36Sopenharmony_ci * rlimit accounting. If the user wishes to use the faster user based 18862306a36Sopenharmony_ci * rlimit accounting then they must use the new interface. 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ci iova = map.iova; 19162306a36Sopenharmony_ci rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr), 19262306a36Sopenharmony_ci map.size, iommu_prot, 0); 19362306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 19462306a36Sopenharmony_ci return rc; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistatic int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, 19862306a36Sopenharmony_ci void __user *arg) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); 20162306a36Sopenharmony_ci /* 20262306a36Sopenharmony_ci * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is obsoleted by the new 20362306a36Sopenharmony_ci * dirty tracking direction: 20462306a36Sopenharmony_ci * https://lore.kernel.org/kvm/20220731125503.142683-1-yishaih@nvidia.com/ 20562306a36Sopenharmony_ci * https://lore.kernel.org/kvm/20220428210933.3583-1-joao.m.martins@oracle.com/ 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_ci u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL; 20862306a36Sopenharmony_ci struct vfio_iommu_type1_dma_unmap unmap; 20962306a36Sopenharmony_ci unsigned long unmapped = 0; 21062306a36Sopenharmony_ci struct iommufd_ioas *ioas; 21162306a36Sopenharmony_ci int rc; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci if (copy_from_user(&unmap, arg, minsz)) 21462306a36Sopenharmony_ci return -EFAULT; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (unmap.argsz < minsz || unmap.flags & ~supported_flags) 21762306a36Sopenharmony_ci return -EINVAL; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci ioas = get_compat_ioas(ictx); 22062306a36Sopenharmony_ci if (IS_ERR(ioas)) 22162306a36Sopenharmony_ci return PTR_ERR(ioas); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) { 22462306a36Sopenharmony_ci if (unmap.iova != 0 || unmap.size != 0) { 22562306a36Sopenharmony_ci rc = -EINVAL; 22662306a36Sopenharmony_ci goto err_put; 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci rc = iopt_unmap_all(&ioas->iopt, &unmapped); 22962306a36Sopenharmony_ci } else { 23062306a36Sopenharmony_ci if (READ_ONCE(ioas->iopt.disable_large_pages)) { 23162306a36Sopenharmony_ci /* 23262306a36Sopenharmony_ci * Create cuts at the start and last of the requested 23362306a36Sopenharmony_ci * range. If the start IOVA is 0 then it doesn't need to 23462306a36Sopenharmony_ci * be cut. 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ci unsigned long iovas[] = { unmap.iova + unmap.size - 1, 23762306a36Sopenharmony_ci unmap.iova - 1 }; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci rc = iopt_cut_iova(&ioas->iopt, iovas, 24062306a36Sopenharmony_ci unmap.iova ? 2 : 1); 24162306a36Sopenharmony_ci if (rc) 24262306a36Sopenharmony_ci goto err_put; 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, 24562306a36Sopenharmony_ci &unmapped); 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci unmap.size = unmapped; 24862306a36Sopenharmony_ci if (copy_to_user(arg, &unmap, minsz)) 24962306a36Sopenharmony_ci rc = -EFAULT; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_cierr_put: 25262306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 25362306a36Sopenharmony_ci return rc; 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_cistatic int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) 25762306a36Sopenharmony_ci{ 25862306a36Sopenharmony_ci struct iommufd_hw_pagetable *hwpt; 25962306a36Sopenharmony_ci struct iommufd_ioas *ioas; 26062306a36Sopenharmony_ci int rc = 1; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci ioas = get_compat_ioas(ictx); 26362306a36Sopenharmony_ci if (IS_ERR(ioas)) 26462306a36Sopenharmony_ci return PTR_ERR(ioas); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci mutex_lock(&ioas->mutex); 26762306a36Sopenharmony_ci list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { 26862306a36Sopenharmony_ci if (!hwpt->enforce_cache_coherency) { 26962306a36Sopenharmony_ci rc = 0; 27062306a36Sopenharmony_ci break; 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci } 27362306a36Sopenharmony_ci mutex_unlock(&ioas->mutex); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 27662306a36Sopenharmony_ci return rc; 27762306a36Sopenharmony_ci} 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_cistatic int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, 28062306a36Sopenharmony_ci unsigned long type) 28162306a36Sopenharmony_ci{ 28262306a36Sopenharmony_ci switch (type) { 28362306a36Sopenharmony_ci case VFIO_TYPE1_IOMMU: 28462306a36Sopenharmony_ci case VFIO_TYPE1v2_IOMMU: 28562306a36Sopenharmony_ci case VFIO_UNMAP_ALL: 28662306a36Sopenharmony_ci return 1; 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci case VFIO_NOIOMMU_IOMMU: 28962306a36Sopenharmony_ci return IS_ENABLED(CONFIG_VFIO_NOIOMMU); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci case VFIO_DMA_CC_IOMMU: 29262306a36Sopenharmony_ci return iommufd_vfio_cc_iommu(ictx); 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci /* 29562306a36Sopenharmony_ci * This is obsolete, and to be removed from VFIO. It was an incomplete 29662306a36Sopenharmony_ci * idea that got merged. 29762306a36Sopenharmony_ci * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci case VFIO_TYPE1_NESTING_IOMMU: 30062306a36Sopenharmony_ci return 0; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci /* 30362306a36Sopenharmony_ci * VFIO_DMA_MAP_FLAG_VADDR 30462306a36Sopenharmony_ci * https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sistare@oracle.com/ 30562306a36Sopenharmony_ci * https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/ 30662306a36Sopenharmony_ci * 30762306a36Sopenharmony_ci * It is hard to see how this could be implemented safely. 30862306a36Sopenharmony_ci */ 30962306a36Sopenharmony_ci case VFIO_UPDATE_VADDR: 31062306a36Sopenharmony_ci default: 31162306a36Sopenharmony_ci return 0; 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci} 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_cistatic int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) 31662306a36Sopenharmony_ci{ 31762306a36Sopenharmony_ci bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode); 31862306a36Sopenharmony_ci struct iommufd_ioas *ioas = NULL; 31962306a36Sopenharmony_ci int rc = 0; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci /* 32262306a36Sopenharmony_ci * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all 32362306a36Sopenharmony_ci * other ioctls. We let them keep working but they mostly fail since no 32462306a36Sopenharmony_ci * IOAS should exist. 32562306a36Sopenharmony_ci */ 32662306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU && 32762306a36Sopenharmony_ci no_iommu_mode) { 32862306a36Sopenharmony_ci if (!capable(CAP_SYS_RAWIO)) 32962306a36Sopenharmony_ci return -EPERM; 33062306a36Sopenharmony_ci return 0; 33162306a36Sopenharmony_ci } 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) || 33462306a36Sopenharmony_ci no_iommu_mode) 33562306a36Sopenharmony_ci return -EINVAL; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* VFIO fails the set_iommu if there is no group */ 33862306a36Sopenharmony_ci ioas = get_compat_ioas(ictx); 33962306a36Sopenharmony_ci if (IS_ERR(ioas)) 34062306a36Sopenharmony_ci return PTR_ERR(ioas); 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci /* 34362306a36Sopenharmony_ci * The difference between TYPE1 and TYPE1v2 is the ability to unmap in 34462306a36Sopenharmony_ci * the middle of mapped ranges. This is complicated by huge page support 34562306a36Sopenharmony_ci * which creates single large IOPTEs that cannot be split by the iommu 34662306a36Sopenharmony_ci * driver. TYPE1 is very old at this point and likely nothing uses it, 34762306a36Sopenharmony_ci * however it is simple enough to emulate by simply disabling the 34862306a36Sopenharmony_ci * problematic large IOPTEs. Then we can safely unmap within any range. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci if (type == VFIO_TYPE1_IOMMU) 35162306a36Sopenharmony_ci rc = iopt_disable_large_pages(&ioas->iopt); 35262306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 35362306a36Sopenharmony_ci return rc; 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_cistatic unsigned long iommufd_get_pagesizes(struct iommufd_ioas *ioas) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci struct io_pagetable *iopt = &ioas->iopt; 35962306a36Sopenharmony_ci unsigned long pgsize_bitmap = ULONG_MAX; 36062306a36Sopenharmony_ci struct iommu_domain *domain; 36162306a36Sopenharmony_ci unsigned long index; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci down_read(&iopt->domains_rwsem); 36462306a36Sopenharmony_ci xa_for_each(&iopt->domains, index, domain) 36562306a36Sopenharmony_ci pgsize_bitmap &= domain->pgsize_bitmap; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* See vfio_update_pgsize_bitmap() */ 36862306a36Sopenharmony_ci if (pgsize_bitmap & ~PAGE_MASK) { 36962306a36Sopenharmony_ci pgsize_bitmap &= PAGE_MASK; 37062306a36Sopenharmony_ci pgsize_bitmap |= PAGE_SIZE; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci pgsize_bitmap = max(pgsize_bitmap, ioas->iopt.iova_alignment); 37362306a36Sopenharmony_ci up_read(&iopt->domains_rwsem); 37462306a36Sopenharmony_ci return pgsize_bitmap; 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_cistatic int iommufd_fill_cap_iova(struct iommufd_ioas *ioas, 37862306a36Sopenharmony_ci struct vfio_info_cap_header __user *cur, 37962306a36Sopenharmony_ci size_t avail) 38062306a36Sopenharmony_ci{ 38162306a36Sopenharmony_ci struct vfio_iommu_type1_info_cap_iova_range __user *ucap_iovas = 38262306a36Sopenharmony_ci container_of(cur, 38362306a36Sopenharmony_ci struct vfio_iommu_type1_info_cap_iova_range __user, 38462306a36Sopenharmony_ci header); 38562306a36Sopenharmony_ci struct vfio_iommu_type1_info_cap_iova_range cap_iovas = { 38662306a36Sopenharmony_ci .header = { 38762306a36Sopenharmony_ci .id = VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 38862306a36Sopenharmony_ci .version = 1, 38962306a36Sopenharmony_ci }, 39062306a36Sopenharmony_ci }; 39162306a36Sopenharmony_ci struct interval_tree_span_iter span; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0, 39462306a36Sopenharmony_ci ULONG_MAX) { 39562306a36Sopenharmony_ci struct vfio_iova_range range; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci if (!span.is_hole) 39862306a36Sopenharmony_ci continue; 39962306a36Sopenharmony_ci range.start = span.start_hole; 40062306a36Sopenharmony_ci range.end = span.last_hole; 40162306a36Sopenharmony_ci if (avail >= struct_size(&cap_iovas, iova_ranges, 40262306a36Sopenharmony_ci cap_iovas.nr_iovas + 1) && 40362306a36Sopenharmony_ci copy_to_user(&ucap_iovas->iova_ranges[cap_iovas.nr_iovas], 40462306a36Sopenharmony_ci &range, sizeof(range))) 40562306a36Sopenharmony_ci return -EFAULT; 40662306a36Sopenharmony_ci cap_iovas.nr_iovas++; 40762306a36Sopenharmony_ci } 40862306a36Sopenharmony_ci if (avail >= struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas) && 40962306a36Sopenharmony_ci copy_to_user(ucap_iovas, &cap_iovas, sizeof(cap_iovas))) 41062306a36Sopenharmony_ci return -EFAULT; 41162306a36Sopenharmony_ci return struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas); 41262306a36Sopenharmony_ci} 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_cistatic int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas, 41562306a36Sopenharmony_ci struct vfio_info_cap_header __user *cur, 41662306a36Sopenharmony_ci size_t avail) 41762306a36Sopenharmony_ci{ 41862306a36Sopenharmony_ci struct vfio_iommu_type1_info_dma_avail cap_dma = { 41962306a36Sopenharmony_ci .header = { 42062306a36Sopenharmony_ci .id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL, 42162306a36Sopenharmony_ci .version = 1, 42262306a36Sopenharmony_ci }, 42362306a36Sopenharmony_ci /* 42462306a36Sopenharmony_ci * iommufd's limit is based on the cgroup's memory limit. 42562306a36Sopenharmony_ci * Normally vfio would return U16_MAX here, and provide a module 42662306a36Sopenharmony_ci * parameter to adjust it. Since S390 qemu userspace actually 42762306a36Sopenharmony_ci * pays attention and needs a value bigger than U16_MAX return 42862306a36Sopenharmony_ci * U32_MAX. 42962306a36Sopenharmony_ci */ 43062306a36Sopenharmony_ci .avail = U32_MAX, 43162306a36Sopenharmony_ci }; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci if (avail >= sizeof(cap_dma) && 43462306a36Sopenharmony_ci copy_to_user(cur, &cap_dma, sizeof(cap_dma))) 43562306a36Sopenharmony_ci return -EFAULT; 43662306a36Sopenharmony_ci return sizeof(cap_dma); 43762306a36Sopenharmony_ci} 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_cistatic int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, 44062306a36Sopenharmony_ci void __user *arg) 44162306a36Sopenharmony_ci{ 44262306a36Sopenharmony_ci typedef int (*fill_cap_fn)(struct iommufd_ioas *ioas, 44362306a36Sopenharmony_ci struct vfio_info_cap_header __user *cur, 44462306a36Sopenharmony_ci size_t avail); 44562306a36Sopenharmony_ci static const fill_cap_fn fill_fns[] = { 44662306a36Sopenharmony_ci iommufd_fill_cap_dma_avail, 44762306a36Sopenharmony_ci iommufd_fill_cap_iova, 44862306a36Sopenharmony_ci }; 44962306a36Sopenharmony_ci size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); 45062306a36Sopenharmony_ci struct vfio_info_cap_header __user *last_cap = NULL; 45162306a36Sopenharmony_ci struct vfio_iommu_type1_info info = {}; 45262306a36Sopenharmony_ci struct iommufd_ioas *ioas; 45362306a36Sopenharmony_ci size_t total_cap_size; 45462306a36Sopenharmony_ci int rc; 45562306a36Sopenharmony_ci int i; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci if (copy_from_user(&info, arg, minsz)) 45862306a36Sopenharmony_ci return -EFAULT; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci if (info.argsz < minsz) 46162306a36Sopenharmony_ci return -EINVAL; 46262306a36Sopenharmony_ci minsz = min_t(size_t, info.argsz, sizeof(info)); 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci ioas = get_compat_ioas(ictx); 46562306a36Sopenharmony_ci if (IS_ERR(ioas)) 46662306a36Sopenharmony_ci return PTR_ERR(ioas); 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci info.flags = VFIO_IOMMU_INFO_PGSIZES; 46962306a36Sopenharmony_ci info.iova_pgsizes = iommufd_get_pagesizes(ioas); 47062306a36Sopenharmony_ci info.cap_offset = 0; 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci down_read(&ioas->iopt.iova_rwsem); 47362306a36Sopenharmony_ci total_cap_size = sizeof(info); 47462306a36Sopenharmony_ci for (i = 0; i != ARRAY_SIZE(fill_fns); i++) { 47562306a36Sopenharmony_ci int cap_size; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (info.argsz > total_cap_size) 47862306a36Sopenharmony_ci cap_size = fill_fns[i](ioas, arg + total_cap_size, 47962306a36Sopenharmony_ci info.argsz - total_cap_size); 48062306a36Sopenharmony_ci else 48162306a36Sopenharmony_ci cap_size = fill_fns[i](ioas, NULL, 0); 48262306a36Sopenharmony_ci if (cap_size < 0) { 48362306a36Sopenharmony_ci rc = cap_size; 48462306a36Sopenharmony_ci goto out_put; 48562306a36Sopenharmony_ci } 48662306a36Sopenharmony_ci cap_size = ALIGN(cap_size, sizeof(u64)); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci if (last_cap && info.argsz >= total_cap_size && 48962306a36Sopenharmony_ci put_user(total_cap_size, &last_cap->next)) { 49062306a36Sopenharmony_ci rc = -EFAULT; 49162306a36Sopenharmony_ci goto out_put; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci last_cap = arg + total_cap_size; 49462306a36Sopenharmony_ci total_cap_size += cap_size; 49562306a36Sopenharmony_ci } 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci /* 49862306a36Sopenharmony_ci * If the user did not provide enough space then only some caps are 49962306a36Sopenharmony_ci * returned and the argsz will be updated to the correct amount to get 50062306a36Sopenharmony_ci * all caps. 50162306a36Sopenharmony_ci */ 50262306a36Sopenharmony_ci if (info.argsz >= total_cap_size) 50362306a36Sopenharmony_ci info.cap_offset = sizeof(info); 50462306a36Sopenharmony_ci info.argsz = total_cap_size; 50562306a36Sopenharmony_ci info.flags |= VFIO_IOMMU_INFO_CAPS; 50662306a36Sopenharmony_ci if (copy_to_user(arg, &info, minsz)) { 50762306a36Sopenharmony_ci rc = -EFAULT; 50862306a36Sopenharmony_ci goto out_put; 50962306a36Sopenharmony_ci } 51062306a36Sopenharmony_ci rc = 0; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ciout_put: 51362306a36Sopenharmony_ci up_read(&ioas->iopt.iova_rwsem); 51462306a36Sopenharmony_ci iommufd_put_object(&ioas->obj); 51562306a36Sopenharmony_ci return rc; 51662306a36Sopenharmony_ci} 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ciint iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, 51962306a36Sopenharmony_ci unsigned long arg) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci void __user *uarg = (void __user *)arg; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci switch (cmd) { 52462306a36Sopenharmony_ci case VFIO_GET_API_VERSION: 52562306a36Sopenharmony_ci return VFIO_API_VERSION; 52662306a36Sopenharmony_ci case VFIO_SET_IOMMU: 52762306a36Sopenharmony_ci return iommufd_vfio_set_iommu(ictx, arg); 52862306a36Sopenharmony_ci case VFIO_CHECK_EXTENSION: 52962306a36Sopenharmony_ci return iommufd_vfio_check_extension(ictx, arg); 53062306a36Sopenharmony_ci case VFIO_IOMMU_GET_INFO: 53162306a36Sopenharmony_ci return iommufd_vfio_iommu_get_info(ictx, uarg); 53262306a36Sopenharmony_ci case VFIO_IOMMU_MAP_DMA: 53362306a36Sopenharmony_ci return iommufd_vfio_map_dma(ictx, cmd, uarg); 53462306a36Sopenharmony_ci case VFIO_IOMMU_UNMAP_DMA: 53562306a36Sopenharmony_ci return iommufd_vfio_unmap_dma(ictx, cmd, uarg); 53662306a36Sopenharmony_ci case VFIO_IOMMU_DIRTY_PAGES: 53762306a36Sopenharmony_ci default: 53862306a36Sopenharmony_ci return -ENOIOCTLCMD; 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci return -ENOIOCTLCMD; 54162306a36Sopenharmony_ci} 542