162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The 562306a36Sopenharmony_ci * PFNs can be placed into an iommu_domain, or returned to the caller as a page 662306a36Sopenharmony_ci * list for access by an in-kernel user. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * The datastructure uses the iopt_pages to optimize the storage of the PFNs 962306a36Sopenharmony_ci * between the domains and xarray. 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci#include <linux/iommufd.h> 1262306a36Sopenharmony_ci#include <linux/lockdep.h> 1362306a36Sopenharmony_ci#include <linux/iommu.h> 1462306a36Sopenharmony_ci#include <linux/sched/mm.h> 1562306a36Sopenharmony_ci#include <linux/err.h> 1662306a36Sopenharmony_ci#include <linux/slab.h> 1762306a36Sopenharmony_ci#include <linux/errno.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include "io_pagetable.h" 2062306a36Sopenharmony_ci#include "double_span.h" 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistruct iopt_pages_list { 2362306a36Sopenharmony_ci struct iopt_pages *pages; 2462306a36Sopenharmony_ci struct iopt_area *area; 2562306a36Sopenharmony_ci struct list_head next; 2662306a36Sopenharmony_ci unsigned long start_byte; 2762306a36Sopenharmony_ci unsigned long length; 2862306a36Sopenharmony_ci}; 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_cistruct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, 3162306a36Sopenharmony_ci struct io_pagetable *iopt, 3262306a36Sopenharmony_ci unsigned long iova, 3362306a36Sopenharmony_ci unsigned long last_iova) 3462306a36Sopenharmony_ci{ 3562306a36Sopenharmony_ci lockdep_assert_held(&iopt->iova_rwsem); 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci iter->cur_iova = iova; 3862306a36Sopenharmony_ci iter->last_iova = last_iova; 3962306a36Sopenharmony_ci iter->area = iopt_area_iter_first(iopt, iova, iova); 4062306a36Sopenharmony_ci if (!iter->area) 4162306a36Sopenharmony_ci return NULL; 4262306a36Sopenharmony_ci if (!iter->area->pages) { 4362306a36Sopenharmony_ci iter->area = NULL; 4462306a36Sopenharmony_ci return NULL; 4562306a36Sopenharmony_ci } 4662306a36Sopenharmony_ci return iter->area; 4762306a36Sopenharmony_ci} 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistruct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci unsigned long last_iova; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci if (!iter->area) 5462306a36Sopenharmony_ci return NULL; 5562306a36Sopenharmony_ci last_iova = iopt_area_last_iova(iter->area); 5662306a36Sopenharmony_ci if (iter->last_iova <= last_iova) 5762306a36Sopenharmony_ci return NULL; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci iter->cur_iova = last_iova + 1; 6062306a36Sopenharmony_ci iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, 6162306a36Sopenharmony_ci iter->last_iova); 6262306a36Sopenharmony_ci if (!iter->area) 6362306a36Sopenharmony_ci return NULL; 6462306a36Sopenharmony_ci if (iter->cur_iova != iopt_area_iova(iter->area) || 6562306a36Sopenharmony_ci !iter->area->pages) { 6662306a36Sopenharmony_ci iter->area = NULL; 6762306a36Sopenharmony_ci return NULL; 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci return iter->area; 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cistatic bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, 7362306a36Sopenharmony_ci unsigned long length, 7462306a36Sopenharmony_ci unsigned long iova_alignment, 7562306a36Sopenharmony_ci unsigned long page_offset) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci if (span->is_used || span->last_hole - span->start_hole < length - 1) 7862306a36Sopenharmony_ci return false; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci span->start_hole = ALIGN(span->start_hole, iova_alignment) | 8162306a36Sopenharmony_ci page_offset; 8262306a36Sopenharmony_ci if (span->start_hole > span->last_hole || 8362306a36Sopenharmony_ci span->last_hole - span->start_hole < length - 1) 8462306a36Sopenharmony_ci return false; 8562306a36Sopenharmony_ci return true; 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cistatic bool __alloc_iova_check_used(struct interval_tree_span_iter *span, 8962306a36Sopenharmony_ci unsigned long length, 9062306a36Sopenharmony_ci unsigned long iova_alignment, 9162306a36Sopenharmony_ci unsigned long page_offset) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci if (span->is_hole || span->last_used - span->start_used < length - 1) 9462306a36Sopenharmony_ci return false; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci span->start_used = ALIGN(span->start_used, iova_alignment) | 9762306a36Sopenharmony_ci page_offset; 9862306a36Sopenharmony_ci if (span->start_used > span->last_used || 9962306a36Sopenharmony_ci span->last_used - span->start_used < length - 1) 10062306a36Sopenharmony_ci return false; 10162306a36Sopenharmony_ci return true; 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci/* 10562306a36Sopenharmony_ci * Automatically find a block of IOVA that is not being used and not reserved. 10662306a36Sopenharmony_ci * Does not return a 0 IOVA even if it is valid. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_cistatic int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, 10962306a36Sopenharmony_ci unsigned long uptr, unsigned long length) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci unsigned long page_offset = uptr % PAGE_SIZE; 11262306a36Sopenharmony_ci struct interval_tree_double_span_iter used_span; 11362306a36Sopenharmony_ci struct interval_tree_span_iter allowed_span; 11462306a36Sopenharmony_ci unsigned long iova_alignment; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci lockdep_assert_held(&iopt->iova_rwsem); 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* Protect roundup_pow-of_two() from overflow */ 11962306a36Sopenharmony_ci if (length == 0 || length >= ULONG_MAX / 2) 12062306a36Sopenharmony_ci return -EOVERFLOW; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* 12362306a36Sopenharmony_ci * Keep alignment present in the uptr when building the IOVA, this 12462306a36Sopenharmony_ci * increases the chance we can map a THP. 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ci if (!uptr) 12762306a36Sopenharmony_ci iova_alignment = roundup_pow_of_two(length); 12862306a36Sopenharmony_ci else 12962306a36Sopenharmony_ci iova_alignment = min_t(unsigned long, 13062306a36Sopenharmony_ci roundup_pow_of_two(length), 13162306a36Sopenharmony_ci 1UL << __ffs64(uptr)); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci if (iova_alignment < iopt->iova_alignment) 13462306a36Sopenharmony_ci return -EINVAL; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, 13762306a36Sopenharmony_ci PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { 13862306a36Sopenharmony_ci if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { 13962306a36Sopenharmony_ci allowed_span.start_used = PAGE_SIZE; 14062306a36Sopenharmony_ci allowed_span.last_used = ULONG_MAX - PAGE_SIZE; 14162306a36Sopenharmony_ci allowed_span.is_hole = false; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (!__alloc_iova_check_used(&allowed_span, length, 14562306a36Sopenharmony_ci iova_alignment, page_offset)) 14662306a36Sopenharmony_ci continue; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci interval_tree_for_each_double_span( 14962306a36Sopenharmony_ci &used_span, &iopt->reserved_itree, &iopt->area_itree, 15062306a36Sopenharmony_ci allowed_span.start_used, allowed_span.last_used) { 15162306a36Sopenharmony_ci if (!__alloc_iova_check_hole(&used_span, length, 15262306a36Sopenharmony_ci iova_alignment, 15362306a36Sopenharmony_ci page_offset)) 15462306a36Sopenharmony_ci continue; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci *iova = used_span.start_hole; 15762306a36Sopenharmony_ci return 0; 15862306a36Sopenharmony_ci } 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci return -ENOSPC; 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cistatic int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, 16462306a36Sopenharmony_ci unsigned long length) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci unsigned long last; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci lockdep_assert_held(&iopt->iova_rwsem); 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci if ((iova & (iopt->iova_alignment - 1))) 17162306a36Sopenharmony_ci return -EINVAL; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (check_add_overflow(iova, length - 1, &last)) 17462306a36Sopenharmony_ci return -EOVERFLOW; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci /* No reserved IOVA intersects the range */ 17762306a36Sopenharmony_ci if (iopt_reserved_iter_first(iopt, iova, last)) 17862306a36Sopenharmony_ci return -EINVAL; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci /* Check that there is not already a mapping in the range */ 18162306a36Sopenharmony_ci if (iopt_area_iter_first(iopt, iova, last)) 18262306a36Sopenharmony_ci return -EEXIST; 18362306a36Sopenharmony_ci return 0; 18462306a36Sopenharmony_ci} 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci/* 18762306a36Sopenharmony_ci * The area takes a slice of the pages from start_bytes to start_byte + length 18862306a36Sopenharmony_ci */ 18962306a36Sopenharmony_cistatic int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, 19062306a36Sopenharmony_ci struct iopt_pages *pages, unsigned long iova, 19162306a36Sopenharmony_ci unsigned long start_byte, unsigned long length, 19262306a36Sopenharmony_ci int iommu_prot) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->iova_rwsem); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci if ((iommu_prot & IOMMU_WRITE) && !pages->writable) 19762306a36Sopenharmony_ci return -EPERM; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci area->iommu_prot = iommu_prot; 20062306a36Sopenharmony_ci area->page_offset = start_byte % PAGE_SIZE; 20162306a36Sopenharmony_ci if (area->page_offset & (iopt->iova_alignment - 1)) 20262306a36Sopenharmony_ci return -EINVAL; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci area->node.start = iova; 20562306a36Sopenharmony_ci if (check_add_overflow(iova, length - 1, &area->node.last)) 20662306a36Sopenharmony_ci return -EOVERFLOW; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci area->pages_node.start = start_byte / PAGE_SIZE; 20962306a36Sopenharmony_ci if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) 21062306a36Sopenharmony_ci return -EOVERFLOW; 21162306a36Sopenharmony_ci area->pages_node.last = area->pages_node.last / PAGE_SIZE; 21262306a36Sopenharmony_ci if (WARN_ON(area->pages_node.last >= pages->npages)) 21362306a36Sopenharmony_ci return -EOVERFLOW; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci /* 21662306a36Sopenharmony_ci * The area is inserted with a NULL pages indicating it is not fully 21762306a36Sopenharmony_ci * initialized yet. 21862306a36Sopenharmony_ci */ 21962306a36Sopenharmony_ci area->iopt = iopt; 22062306a36Sopenharmony_ci interval_tree_insert(&area->node, &iopt->area_itree); 22162306a36Sopenharmony_ci return 0; 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic struct iopt_area *iopt_area_alloc(void) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci struct iopt_area *area; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); 22962306a36Sopenharmony_ci if (!area) 23062306a36Sopenharmony_ci return NULL; 23162306a36Sopenharmony_ci RB_CLEAR_NODE(&area->node.rb); 23262306a36Sopenharmony_ci RB_CLEAR_NODE(&area->pages_node.rb); 23362306a36Sopenharmony_ci return area; 23462306a36Sopenharmony_ci} 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_cistatic int iopt_alloc_area_pages(struct io_pagetable *iopt, 23762306a36Sopenharmony_ci struct list_head *pages_list, 23862306a36Sopenharmony_ci unsigned long length, unsigned long *dst_iova, 23962306a36Sopenharmony_ci int iommu_prot, unsigned int flags) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci struct iopt_pages_list *elm; 24262306a36Sopenharmony_ci unsigned long iova; 24362306a36Sopenharmony_ci int rc = 0; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci list_for_each_entry(elm, pages_list, next) { 24662306a36Sopenharmony_ci elm->area = iopt_area_alloc(); 24762306a36Sopenharmony_ci if (!elm->area) 24862306a36Sopenharmony_ci return -ENOMEM; 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 25262306a36Sopenharmony_ci if ((length & (iopt->iova_alignment - 1)) || !length) { 25362306a36Sopenharmony_ci rc = -EINVAL; 25462306a36Sopenharmony_ci goto out_unlock; 25562306a36Sopenharmony_ci } 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci if (flags & IOPT_ALLOC_IOVA) { 25862306a36Sopenharmony_ci /* Use the first entry to guess the ideal IOVA alignment */ 25962306a36Sopenharmony_ci elm = list_first_entry(pages_list, struct iopt_pages_list, 26062306a36Sopenharmony_ci next); 26162306a36Sopenharmony_ci rc = iopt_alloc_iova( 26262306a36Sopenharmony_ci iopt, dst_iova, 26362306a36Sopenharmony_ci (uintptr_t)elm->pages->uptr + elm->start_byte, length); 26462306a36Sopenharmony_ci if (rc) 26562306a36Sopenharmony_ci goto out_unlock; 26662306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 26762306a36Sopenharmony_ci WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { 26862306a36Sopenharmony_ci rc = -EINVAL; 26962306a36Sopenharmony_ci goto out_unlock; 27062306a36Sopenharmony_ci } 27162306a36Sopenharmony_ci } else { 27262306a36Sopenharmony_ci rc = iopt_check_iova(iopt, *dst_iova, length); 27362306a36Sopenharmony_ci if (rc) 27462306a36Sopenharmony_ci goto out_unlock; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci /* 27862306a36Sopenharmony_ci * Areas are created with a NULL pages so that the IOVA space is 27962306a36Sopenharmony_ci * reserved and we can unlock the iova_rwsem. 28062306a36Sopenharmony_ci */ 28162306a36Sopenharmony_ci iova = *dst_iova; 28262306a36Sopenharmony_ci list_for_each_entry(elm, pages_list, next) { 28362306a36Sopenharmony_ci rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, 28462306a36Sopenharmony_ci elm->start_byte, elm->length, iommu_prot); 28562306a36Sopenharmony_ci if (rc) 28662306a36Sopenharmony_ci goto out_unlock; 28762306a36Sopenharmony_ci iova += elm->length; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ciout_unlock: 29162306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 29262306a36Sopenharmony_ci return rc; 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_cistatic void iopt_abort_area(struct iopt_area *area) 29662306a36Sopenharmony_ci{ 29762306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 29862306a36Sopenharmony_ci WARN_ON(area->pages); 29962306a36Sopenharmony_ci if (area->iopt) { 30062306a36Sopenharmony_ci down_write(&area->iopt->iova_rwsem); 30162306a36Sopenharmony_ci interval_tree_remove(&area->node, &area->iopt->area_itree); 30262306a36Sopenharmony_ci up_write(&area->iopt->iova_rwsem); 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci kfree(area); 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_civoid iopt_free_pages_list(struct list_head *pages_list) 30862306a36Sopenharmony_ci{ 30962306a36Sopenharmony_ci struct iopt_pages_list *elm; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci while ((elm = list_first_entry_or_null(pages_list, 31262306a36Sopenharmony_ci struct iopt_pages_list, next))) { 31362306a36Sopenharmony_ci if (elm->area) 31462306a36Sopenharmony_ci iopt_abort_area(elm->area); 31562306a36Sopenharmony_ci if (elm->pages) 31662306a36Sopenharmony_ci iopt_put_pages(elm->pages); 31762306a36Sopenharmony_ci list_del(&elm->next); 31862306a36Sopenharmony_ci kfree(elm); 31962306a36Sopenharmony_ci } 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_cistatic int iopt_fill_domains_pages(struct list_head *pages_list) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci struct iopt_pages_list *undo_elm; 32562306a36Sopenharmony_ci struct iopt_pages_list *elm; 32662306a36Sopenharmony_ci int rc; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci list_for_each_entry(elm, pages_list, next) { 32962306a36Sopenharmony_ci rc = iopt_area_fill_domains(elm->area, elm->pages); 33062306a36Sopenharmony_ci if (rc) 33162306a36Sopenharmony_ci goto err_undo; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci return 0; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_cierr_undo: 33662306a36Sopenharmony_ci list_for_each_entry(undo_elm, pages_list, next) { 33762306a36Sopenharmony_ci if (undo_elm == elm) 33862306a36Sopenharmony_ci break; 33962306a36Sopenharmony_ci iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); 34062306a36Sopenharmony_ci } 34162306a36Sopenharmony_ci return rc; 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ciint iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, 34562306a36Sopenharmony_ci unsigned long length, unsigned long *dst_iova, 34662306a36Sopenharmony_ci int iommu_prot, unsigned int flags) 34762306a36Sopenharmony_ci{ 34862306a36Sopenharmony_ci struct iopt_pages_list *elm; 34962306a36Sopenharmony_ci int rc; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, 35262306a36Sopenharmony_ci iommu_prot, flags); 35362306a36Sopenharmony_ci if (rc) 35462306a36Sopenharmony_ci return rc; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci down_read(&iopt->domains_rwsem); 35762306a36Sopenharmony_ci rc = iopt_fill_domains_pages(pages_list); 35862306a36Sopenharmony_ci if (rc) 35962306a36Sopenharmony_ci goto out_unlock_domains; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 36262306a36Sopenharmony_ci list_for_each_entry(elm, pages_list, next) { 36362306a36Sopenharmony_ci /* 36462306a36Sopenharmony_ci * area->pages must be set inside the domains_rwsem to ensure 36562306a36Sopenharmony_ci * any newly added domains will get filled. Moves the reference 36662306a36Sopenharmony_ci * in from the list. 36762306a36Sopenharmony_ci */ 36862306a36Sopenharmony_ci elm->area->pages = elm->pages; 36962306a36Sopenharmony_ci elm->pages = NULL; 37062306a36Sopenharmony_ci elm->area = NULL; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 37362306a36Sopenharmony_ciout_unlock_domains: 37462306a36Sopenharmony_ci up_read(&iopt->domains_rwsem); 37562306a36Sopenharmony_ci return rc; 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci/** 37962306a36Sopenharmony_ci * iopt_map_user_pages() - Map a user VA to an iova in the io page table 38062306a36Sopenharmony_ci * @ictx: iommufd_ctx the iopt is part of 38162306a36Sopenharmony_ci * @iopt: io_pagetable to act on 38262306a36Sopenharmony_ci * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains 38362306a36Sopenharmony_ci * the chosen iova on output. Otherwise is the iova to map to on input 38462306a36Sopenharmony_ci * @uptr: User VA to map 38562306a36Sopenharmony_ci * @length: Number of bytes to map 38662306a36Sopenharmony_ci * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping 38762306a36Sopenharmony_ci * @flags: IOPT_ALLOC_IOVA or zero 38862306a36Sopenharmony_ci * 38962306a36Sopenharmony_ci * iova, uptr, and length must be aligned to iova_alignment. For domain backed 39062306a36Sopenharmony_ci * page tables this will pin the pages and load them into the domain at iova. 39162306a36Sopenharmony_ci * For non-domain page tables this will only setup a lazy reference and the 39262306a36Sopenharmony_ci * caller must use iopt_access_pages() to touch them. 39362306a36Sopenharmony_ci * 39462306a36Sopenharmony_ci * iopt_unmap_iova() must be called to undo this before the io_pagetable can be 39562306a36Sopenharmony_ci * destroyed. 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ciint iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, 39862306a36Sopenharmony_ci unsigned long *iova, void __user *uptr, 39962306a36Sopenharmony_ci unsigned long length, int iommu_prot, 40062306a36Sopenharmony_ci unsigned int flags) 40162306a36Sopenharmony_ci{ 40262306a36Sopenharmony_ci struct iopt_pages_list elm = {}; 40362306a36Sopenharmony_ci LIST_HEAD(pages_list); 40462306a36Sopenharmony_ci int rc; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); 40762306a36Sopenharmony_ci if (IS_ERR(elm.pages)) 40862306a36Sopenharmony_ci return PTR_ERR(elm.pages); 40962306a36Sopenharmony_ci if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && 41062306a36Sopenharmony_ci elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) 41162306a36Sopenharmony_ci elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; 41262306a36Sopenharmony_ci elm.start_byte = uptr - elm.pages->uptr; 41362306a36Sopenharmony_ci elm.length = length; 41462306a36Sopenharmony_ci list_add(&elm.next, &pages_list); 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); 41762306a36Sopenharmony_ci if (rc) { 41862306a36Sopenharmony_ci if (elm.area) 41962306a36Sopenharmony_ci iopt_abort_area(elm.area); 42062306a36Sopenharmony_ci if (elm.pages) 42162306a36Sopenharmony_ci iopt_put_pages(elm.pages); 42262306a36Sopenharmony_ci return rc; 42362306a36Sopenharmony_ci } 42462306a36Sopenharmony_ci return 0; 42562306a36Sopenharmony_ci} 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ciint iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, 42862306a36Sopenharmony_ci unsigned long length, struct list_head *pages_list) 42962306a36Sopenharmony_ci{ 43062306a36Sopenharmony_ci struct iopt_area_contig_iter iter; 43162306a36Sopenharmony_ci unsigned long last_iova; 43262306a36Sopenharmony_ci struct iopt_area *area; 43362306a36Sopenharmony_ci int rc; 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci if (!length) 43662306a36Sopenharmony_ci return -EINVAL; 43762306a36Sopenharmony_ci if (check_add_overflow(iova, length - 1, &last_iova)) 43862306a36Sopenharmony_ci return -EOVERFLOW; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci down_read(&iopt->iova_rwsem); 44162306a36Sopenharmony_ci iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { 44262306a36Sopenharmony_ci struct iopt_pages_list *elm; 44362306a36Sopenharmony_ci unsigned long last = min(last_iova, iopt_area_last_iova(area)); 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); 44662306a36Sopenharmony_ci if (!elm) { 44762306a36Sopenharmony_ci rc = -ENOMEM; 44862306a36Sopenharmony_ci goto err_free; 44962306a36Sopenharmony_ci } 45062306a36Sopenharmony_ci elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); 45162306a36Sopenharmony_ci elm->pages = area->pages; 45262306a36Sopenharmony_ci elm->length = (last - iter.cur_iova) + 1; 45362306a36Sopenharmony_ci kref_get(&elm->pages->kref); 45462306a36Sopenharmony_ci list_add_tail(&elm->next, pages_list); 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci if (!iopt_area_contig_done(&iter)) { 45762306a36Sopenharmony_ci rc = -ENOENT; 45862306a36Sopenharmony_ci goto err_free; 45962306a36Sopenharmony_ci } 46062306a36Sopenharmony_ci up_read(&iopt->iova_rwsem); 46162306a36Sopenharmony_ci return 0; 46262306a36Sopenharmony_cierr_free: 46362306a36Sopenharmony_ci up_read(&iopt->iova_rwsem); 46462306a36Sopenharmony_ci iopt_free_pages_list(pages_list); 46562306a36Sopenharmony_ci return rc; 46662306a36Sopenharmony_ci} 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_cistatic int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, 46962306a36Sopenharmony_ci unsigned long last, unsigned long *unmapped) 47062306a36Sopenharmony_ci{ 47162306a36Sopenharmony_ci struct iopt_area *area; 47262306a36Sopenharmony_ci unsigned long unmapped_bytes = 0; 47362306a36Sopenharmony_ci unsigned int tries = 0; 47462306a36Sopenharmony_ci int rc = -ENOENT; 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci /* 47762306a36Sopenharmony_ci * The domains_rwsem must be held in read mode any time any area->pages 47862306a36Sopenharmony_ci * is NULL. This prevents domain attach/detatch from running 47962306a36Sopenharmony_ci * concurrently with cleaning up the area. 48062306a36Sopenharmony_ci */ 48162306a36Sopenharmony_ciagain: 48262306a36Sopenharmony_ci down_read(&iopt->domains_rwsem); 48362306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 48462306a36Sopenharmony_ci while ((area = iopt_area_iter_first(iopt, start, last))) { 48562306a36Sopenharmony_ci unsigned long area_last = iopt_area_last_iova(area); 48662306a36Sopenharmony_ci unsigned long area_first = iopt_area_iova(area); 48762306a36Sopenharmony_ci struct iopt_pages *pages; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci /* Userspace should not race map/unmap's of the same area */ 49062306a36Sopenharmony_ci if (!area->pages) { 49162306a36Sopenharmony_ci rc = -EBUSY; 49262306a36Sopenharmony_ci goto out_unlock_iova; 49362306a36Sopenharmony_ci } 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci if (area_first < start || area_last > last) { 49662306a36Sopenharmony_ci rc = -ENOENT; 49762306a36Sopenharmony_ci goto out_unlock_iova; 49862306a36Sopenharmony_ci } 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci if (area_first != start) 50162306a36Sopenharmony_ci tries = 0; 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* 50462306a36Sopenharmony_ci * num_accesses writers must hold the iova_rwsem too, so we can 50562306a36Sopenharmony_ci * safely read it under the write side of the iovam_rwsem 50662306a36Sopenharmony_ci * without the pages->mutex. 50762306a36Sopenharmony_ci */ 50862306a36Sopenharmony_ci if (area->num_accesses) { 50962306a36Sopenharmony_ci size_t length = iopt_area_length(area); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci start = area_first; 51262306a36Sopenharmony_ci area->prevent_access = true; 51362306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 51462306a36Sopenharmony_ci up_read(&iopt->domains_rwsem); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci iommufd_access_notify_unmap(iopt, area_first, length); 51762306a36Sopenharmony_ci /* Something is not responding to unmap requests. */ 51862306a36Sopenharmony_ci tries++; 51962306a36Sopenharmony_ci if (WARN_ON(tries > 100)) 52062306a36Sopenharmony_ci return -EDEADLOCK; 52162306a36Sopenharmony_ci goto again; 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci pages = area->pages; 52562306a36Sopenharmony_ci area->pages = NULL; 52662306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci iopt_area_unfill_domains(area, pages); 52962306a36Sopenharmony_ci iopt_abort_area(area); 53062306a36Sopenharmony_ci iopt_put_pages(pages); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci unmapped_bytes += area_last - area_first + 1; 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci if (unmapped_bytes) 53762306a36Sopenharmony_ci rc = 0; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ciout_unlock_iova: 54062306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 54162306a36Sopenharmony_ci up_read(&iopt->domains_rwsem); 54262306a36Sopenharmony_ci if (unmapped) 54362306a36Sopenharmony_ci *unmapped = unmapped_bytes; 54462306a36Sopenharmony_ci return rc; 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/** 54862306a36Sopenharmony_ci * iopt_unmap_iova() - Remove a range of iova 54962306a36Sopenharmony_ci * @iopt: io_pagetable to act on 55062306a36Sopenharmony_ci * @iova: Starting iova to unmap 55162306a36Sopenharmony_ci * @length: Number of bytes to unmap 55262306a36Sopenharmony_ci * @unmapped: Return number of bytes unmapped 55362306a36Sopenharmony_ci * 55462306a36Sopenharmony_ci * The requested range must be a superset of existing ranges. 55562306a36Sopenharmony_ci * Splitting/truncating IOVA mappings is not allowed. 55662306a36Sopenharmony_ci */ 55762306a36Sopenharmony_ciint iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, 55862306a36Sopenharmony_ci unsigned long length, unsigned long *unmapped) 55962306a36Sopenharmony_ci{ 56062306a36Sopenharmony_ci unsigned long iova_last; 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci if (!length) 56362306a36Sopenharmony_ci return -EINVAL; 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci if (check_add_overflow(iova, length - 1, &iova_last)) 56662306a36Sopenharmony_ci return -EOVERFLOW; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); 56962306a36Sopenharmony_ci} 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ciint iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci int rc; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 57662306a36Sopenharmony_ci /* If the IOVAs are empty then unmap all succeeds */ 57762306a36Sopenharmony_ci if (rc == -ENOENT) 57862306a36Sopenharmony_ci return 0; 57962306a36Sopenharmony_ci return rc; 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci/* The caller must always free all the nodes in the allowed_iova rb_root. */ 58362306a36Sopenharmony_ciint iopt_set_allow_iova(struct io_pagetable *iopt, 58462306a36Sopenharmony_ci struct rb_root_cached *allowed_iova) 58562306a36Sopenharmony_ci{ 58662306a36Sopenharmony_ci struct iopt_allowed *allowed; 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 58962306a36Sopenharmony_ci swap(*allowed_iova, iopt->allowed_itree); 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; 59262306a36Sopenharmony_ci allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { 59362306a36Sopenharmony_ci if (iopt_reserved_iter_first(iopt, allowed->node.start, 59462306a36Sopenharmony_ci allowed->node.last)) { 59562306a36Sopenharmony_ci swap(*allowed_iova, iopt->allowed_itree); 59662306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 59762306a36Sopenharmony_ci return -EADDRINUSE; 59862306a36Sopenharmony_ci } 59962306a36Sopenharmony_ci } 60062306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 60162306a36Sopenharmony_ci return 0; 60262306a36Sopenharmony_ci} 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ciint iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, 60562306a36Sopenharmony_ci unsigned long last, void *owner) 60662306a36Sopenharmony_ci{ 60762306a36Sopenharmony_ci struct iopt_reserved *reserved; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->iova_rwsem); 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci if (iopt_area_iter_first(iopt, start, last) || 61262306a36Sopenharmony_ci iopt_allowed_iter_first(iopt, start, last)) 61362306a36Sopenharmony_ci return -EADDRINUSE; 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); 61662306a36Sopenharmony_ci if (!reserved) 61762306a36Sopenharmony_ci return -ENOMEM; 61862306a36Sopenharmony_ci reserved->node.start = start; 61962306a36Sopenharmony_ci reserved->node.last = last; 62062306a36Sopenharmony_ci reserved->owner = owner; 62162306a36Sopenharmony_ci interval_tree_insert(&reserved->node, &iopt->reserved_itree); 62262306a36Sopenharmony_ci return 0; 62362306a36Sopenharmony_ci} 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_cistatic void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci struct iopt_reserved *reserved, *next; 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->iova_rwsem); 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; 63262306a36Sopenharmony_ci reserved = next) { 63362306a36Sopenharmony_ci next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci if (reserved->owner == owner) { 63662306a36Sopenharmony_ci interval_tree_remove(&reserved->node, 63762306a36Sopenharmony_ci &iopt->reserved_itree); 63862306a36Sopenharmony_ci kfree(reserved); 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci } 64162306a36Sopenharmony_ci} 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_civoid iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) 64462306a36Sopenharmony_ci{ 64562306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 64662306a36Sopenharmony_ci __iopt_remove_reserved_iova(iopt, owner); 64762306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 64862306a36Sopenharmony_ci} 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_civoid iopt_init_table(struct io_pagetable *iopt) 65162306a36Sopenharmony_ci{ 65262306a36Sopenharmony_ci init_rwsem(&iopt->iova_rwsem); 65362306a36Sopenharmony_ci init_rwsem(&iopt->domains_rwsem); 65462306a36Sopenharmony_ci iopt->area_itree = RB_ROOT_CACHED; 65562306a36Sopenharmony_ci iopt->allowed_itree = RB_ROOT_CACHED; 65662306a36Sopenharmony_ci iopt->reserved_itree = RB_ROOT_CACHED; 65762306a36Sopenharmony_ci xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); 65862306a36Sopenharmony_ci xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci /* 66162306a36Sopenharmony_ci * iopt's start as SW tables that can use the entire size_t IOVA space 66262306a36Sopenharmony_ci * due to the use of size_t in the APIs. They have no alignment 66362306a36Sopenharmony_ci * restriction. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_ci iopt->iova_alignment = 1; 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_civoid iopt_destroy_table(struct io_pagetable *iopt) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci struct interval_tree_node *node; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 67362306a36Sopenharmony_ci iopt_remove_reserved_iova(iopt, NULL); 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, 67662306a36Sopenharmony_ci ULONG_MAX))) { 67762306a36Sopenharmony_ci interval_tree_remove(node, &iopt->allowed_itree); 67862306a36Sopenharmony_ci kfree(container_of(node, struct iopt_allowed, node)); 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); 68262306a36Sopenharmony_ci WARN_ON(!xa_empty(&iopt->domains)); 68362306a36Sopenharmony_ci WARN_ON(!xa_empty(&iopt->access_list)); 68462306a36Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci/** 68862306a36Sopenharmony_ci * iopt_unfill_domain() - Unfill a domain with PFNs 68962306a36Sopenharmony_ci * @iopt: io_pagetable to act on 69062306a36Sopenharmony_ci * @domain: domain to unfill 69162306a36Sopenharmony_ci * 69262306a36Sopenharmony_ci * This is used when removing a domain from the iopt. Every area in the iopt 69362306a36Sopenharmony_ci * will be unmapped from the domain. The domain must already be removed from the 69462306a36Sopenharmony_ci * domains xarray. 69562306a36Sopenharmony_ci */ 69662306a36Sopenharmony_cistatic void iopt_unfill_domain(struct io_pagetable *iopt, 69762306a36Sopenharmony_ci struct iommu_domain *domain) 69862306a36Sopenharmony_ci{ 69962306a36Sopenharmony_ci struct iopt_area *area; 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci lockdep_assert_held(&iopt->iova_rwsem); 70262306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->domains_rwsem); 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci /* 70562306a36Sopenharmony_ci * Some other domain is holding all the pfns still, rapidly unmap this 70662306a36Sopenharmony_ci * domain. 70762306a36Sopenharmony_ci */ 70862306a36Sopenharmony_ci if (iopt->next_domain_id != 0) { 70962306a36Sopenharmony_ci /* Pick an arbitrary remaining domain to act as storage */ 71062306a36Sopenharmony_ci struct iommu_domain *storage_domain = 71162306a36Sopenharmony_ci xa_load(&iopt->domains, 0); 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 71462306a36Sopenharmony_ci area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 71562306a36Sopenharmony_ci struct iopt_pages *pages = area->pages; 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci if (!pages) 71862306a36Sopenharmony_ci continue; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci mutex_lock(&pages->mutex); 72162306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 72262306a36Sopenharmony_ci WARN_ON(!area->storage_domain); 72362306a36Sopenharmony_ci if (area->storage_domain == domain) 72462306a36Sopenharmony_ci area->storage_domain = storage_domain; 72562306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci iopt_area_unmap_domain(area, domain); 72862306a36Sopenharmony_ci } 72962306a36Sopenharmony_ci return; 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 73362306a36Sopenharmony_ci area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 73462306a36Sopenharmony_ci struct iopt_pages *pages = area->pages; 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci if (!pages) 73762306a36Sopenharmony_ci continue; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci mutex_lock(&pages->mutex); 74062306a36Sopenharmony_ci interval_tree_remove(&area->pages_node, &pages->domains_itree); 74162306a36Sopenharmony_ci WARN_ON(area->storage_domain != domain); 74262306a36Sopenharmony_ci area->storage_domain = NULL; 74362306a36Sopenharmony_ci iopt_area_unfill_domain(area, pages, domain); 74462306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci} 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci/** 74962306a36Sopenharmony_ci * iopt_fill_domain() - Fill a domain with PFNs 75062306a36Sopenharmony_ci * @iopt: io_pagetable to act on 75162306a36Sopenharmony_ci * @domain: domain to fill 75262306a36Sopenharmony_ci * 75362306a36Sopenharmony_ci * Fill the domain with PFNs from every area in the iopt. On failure the domain 75462306a36Sopenharmony_ci * is left unchanged. 75562306a36Sopenharmony_ci */ 75662306a36Sopenharmony_cistatic int iopt_fill_domain(struct io_pagetable *iopt, 75762306a36Sopenharmony_ci struct iommu_domain *domain) 75862306a36Sopenharmony_ci{ 75962306a36Sopenharmony_ci struct iopt_area *end_area; 76062306a36Sopenharmony_ci struct iopt_area *area; 76162306a36Sopenharmony_ci int rc; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci lockdep_assert_held(&iopt->iova_rwsem); 76462306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->domains_rwsem); 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 76762306a36Sopenharmony_ci area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 76862306a36Sopenharmony_ci struct iopt_pages *pages = area->pages; 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci if (!pages) 77162306a36Sopenharmony_ci continue; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci mutex_lock(&pages->mutex); 77462306a36Sopenharmony_ci rc = iopt_area_fill_domain(area, domain); 77562306a36Sopenharmony_ci if (rc) { 77662306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 77762306a36Sopenharmony_ci goto out_unfill; 77862306a36Sopenharmony_ci } 77962306a36Sopenharmony_ci if (!area->storage_domain) { 78062306a36Sopenharmony_ci WARN_ON(iopt->next_domain_id != 0); 78162306a36Sopenharmony_ci area->storage_domain = domain; 78262306a36Sopenharmony_ci interval_tree_insert(&area->pages_node, 78362306a36Sopenharmony_ci &pages->domains_itree); 78462306a36Sopenharmony_ci } 78562306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci return 0; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ciout_unfill: 79062306a36Sopenharmony_ci end_area = area; 79162306a36Sopenharmony_ci for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 79262306a36Sopenharmony_ci area = iopt_area_iter_next(area, 0, ULONG_MAX)) { 79362306a36Sopenharmony_ci struct iopt_pages *pages = area->pages; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci if (area == end_area) 79662306a36Sopenharmony_ci break; 79762306a36Sopenharmony_ci if (!pages) 79862306a36Sopenharmony_ci continue; 79962306a36Sopenharmony_ci mutex_lock(&pages->mutex); 80062306a36Sopenharmony_ci if (iopt->next_domain_id == 0) { 80162306a36Sopenharmony_ci interval_tree_remove(&area->pages_node, 80262306a36Sopenharmony_ci &pages->domains_itree); 80362306a36Sopenharmony_ci area->storage_domain = NULL; 80462306a36Sopenharmony_ci } 80562306a36Sopenharmony_ci iopt_area_unfill_domain(area, pages, domain); 80662306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci return rc; 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci/* All existing area's conform to an increased page size */ 81262306a36Sopenharmony_cistatic int iopt_check_iova_alignment(struct io_pagetable *iopt, 81362306a36Sopenharmony_ci unsigned long new_iova_alignment) 81462306a36Sopenharmony_ci{ 81562306a36Sopenharmony_ci unsigned long align_mask = new_iova_alignment - 1; 81662306a36Sopenharmony_ci struct iopt_area *area; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci lockdep_assert_held(&iopt->iova_rwsem); 81962306a36Sopenharmony_ci lockdep_assert_held(&iopt->domains_rwsem); 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; 82262306a36Sopenharmony_ci area = iopt_area_iter_next(area, 0, ULONG_MAX)) 82362306a36Sopenharmony_ci if ((iopt_area_iova(area) & align_mask) || 82462306a36Sopenharmony_ci (iopt_area_length(area) & align_mask) || 82562306a36Sopenharmony_ci (area->page_offset & align_mask)) 82662306a36Sopenharmony_ci return -EADDRINUSE; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { 82962306a36Sopenharmony_ci struct iommufd_access *access; 83062306a36Sopenharmony_ci unsigned long index; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci xa_for_each(&iopt->access_list, index, access) 83362306a36Sopenharmony_ci if (WARN_ON(access->iova_alignment > 83462306a36Sopenharmony_ci new_iova_alignment)) 83562306a36Sopenharmony_ci return -EADDRINUSE; 83662306a36Sopenharmony_ci } 83762306a36Sopenharmony_ci return 0; 83862306a36Sopenharmony_ci} 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ciint iopt_table_add_domain(struct io_pagetable *iopt, 84162306a36Sopenharmony_ci struct iommu_domain *domain) 84262306a36Sopenharmony_ci{ 84362306a36Sopenharmony_ci const struct iommu_domain_geometry *geometry = &domain->geometry; 84462306a36Sopenharmony_ci struct iommu_domain *iter_domain; 84562306a36Sopenharmony_ci unsigned int new_iova_alignment; 84662306a36Sopenharmony_ci unsigned long index; 84762306a36Sopenharmony_ci int rc; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci down_write(&iopt->domains_rwsem); 85062306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci xa_for_each(&iopt->domains, index, iter_domain) { 85362306a36Sopenharmony_ci if (WARN_ON(iter_domain == domain)) { 85462306a36Sopenharmony_ci rc = -EEXIST; 85562306a36Sopenharmony_ci goto out_unlock; 85662306a36Sopenharmony_ci } 85762306a36Sopenharmony_ci } 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci /* 86062306a36Sopenharmony_ci * The io page size drives the iova_alignment. Internally the iopt_pages 86162306a36Sopenharmony_ci * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE 86262306a36Sopenharmony_ci * objects into the iommu_domain. 86362306a36Sopenharmony_ci * 86462306a36Sopenharmony_ci * A iommu_domain must always be able to accept PAGE_SIZE to be 86562306a36Sopenharmony_ci * compatible as we can't guarantee higher contiguity. 86662306a36Sopenharmony_ci */ 86762306a36Sopenharmony_ci new_iova_alignment = max_t(unsigned long, 86862306a36Sopenharmony_ci 1UL << __ffs(domain->pgsize_bitmap), 86962306a36Sopenharmony_ci iopt->iova_alignment); 87062306a36Sopenharmony_ci if (new_iova_alignment > PAGE_SIZE) { 87162306a36Sopenharmony_ci rc = -EINVAL; 87262306a36Sopenharmony_ci goto out_unlock; 87362306a36Sopenharmony_ci } 87462306a36Sopenharmony_ci if (new_iova_alignment != iopt->iova_alignment) { 87562306a36Sopenharmony_ci rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 87662306a36Sopenharmony_ci if (rc) 87762306a36Sopenharmony_ci goto out_unlock; 87862306a36Sopenharmony_ci } 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci /* No area exists that is outside the allowed domain aperture */ 88162306a36Sopenharmony_ci if (geometry->aperture_start != 0) { 88262306a36Sopenharmony_ci rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, 88362306a36Sopenharmony_ci domain); 88462306a36Sopenharmony_ci if (rc) 88562306a36Sopenharmony_ci goto out_reserved; 88662306a36Sopenharmony_ci } 88762306a36Sopenharmony_ci if (geometry->aperture_end != ULONG_MAX) { 88862306a36Sopenharmony_ci rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, 88962306a36Sopenharmony_ci ULONG_MAX, domain); 89062306a36Sopenharmony_ci if (rc) 89162306a36Sopenharmony_ci goto out_reserved; 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); 89562306a36Sopenharmony_ci if (rc) 89662306a36Sopenharmony_ci goto out_reserved; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci rc = iopt_fill_domain(iopt, domain); 89962306a36Sopenharmony_ci if (rc) 90062306a36Sopenharmony_ci goto out_release; 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci iopt->iova_alignment = new_iova_alignment; 90362306a36Sopenharmony_ci xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); 90462306a36Sopenharmony_ci iopt->next_domain_id++; 90562306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 90662306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 90762306a36Sopenharmony_ci return 0; 90862306a36Sopenharmony_ciout_release: 90962306a36Sopenharmony_ci xa_release(&iopt->domains, iopt->next_domain_id); 91062306a36Sopenharmony_ciout_reserved: 91162306a36Sopenharmony_ci __iopt_remove_reserved_iova(iopt, domain); 91262306a36Sopenharmony_ciout_unlock: 91362306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 91462306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 91562306a36Sopenharmony_ci return rc; 91662306a36Sopenharmony_ci} 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_cistatic int iopt_calculate_iova_alignment(struct io_pagetable *iopt) 91962306a36Sopenharmony_ci{ 92062306a36Sopenharmony_ci unsigned long new_iova_alignment; 92162306a36Sopenharmony_ci struct iommufd_access *access; 92262306a36Sopenharmony_ci struct iommu_domain *domain; 92362306a36Sopenharmony_ci unsigned long index; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->iova_rwsem); 92662306a36Sopenharmony_ci lockdep_assert_held(&iopt->domains_rwsem); 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci /* See batch_iommu_map_small() */ 92962306a36Sopenharmony_ci if (iopt->disable_large_pages) 93062306a36Sopenharmony_ci new_iova_alignment = PAGE_SIZE; 93162306a36Sopenharmony_ci else 93262306a36Sopenharmony_ci new_iova_alignment = 1; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci xa_for_each(&iopt->domains, index, domain) 93562306a36Sopenharmony_ci new_iova_alignment = max_t(unsigned long, 93662306a36Sopenharmony_ci 1UL << __ffs(domain->pgsize_bitmap), 93762306a36Sopenharmony_ci new_iova_alignment); 93862306a36Sopenharmony_ci xa_for_each(&iopt->access_list, index, access) 93962306a36Sopenharmony_ci new_iova_alignment = max_t(unsigned long, 94062306a36Sopenharmony_ci access->iova_alignment, 94162306a36Sopenharmony_ci new_iova_alignment); 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci if (new_iova_alignment > iopt->iova_alignment) { 94462306a36Sopenharmony_ci int rc; 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci rc = iopt_check_iova_alignment(iopt, new_iova_alignment); 94762306a36Sopenharmony_ci if (rc) 94862306a36Sopenharmony_ci return rc; 94962306a36Sopenharmony_ci } 95062306a36Sopenharmony_ci iopt->iova_alignment = new_iova_alignment; 95162306a36Sopenharmony_ci return 0; 95262306a36Sopenharmony_ci} 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_civoid iopt_table_remove_domain(struct io_pagetable *iopt, 95562306a36Sopenharmony_ci struct iommu_domain *domain) 95662306a36Sopenharmony_ci{ 95762306a36Sopenharmony_ci struct iommu_domain *iter_domain = NULL; 95862306a36Sopenharmony_ci unsigned long index; 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci down_write(&iopt->domains_rwsem); 96162306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci xa_for_each(&iopt->domains, index, iter_domain) 96462306a36Sopenharmony_ci if (iter_domain == domain) 96562306a36Sopenharmony_ci break; 96662306a36Sopenharmony_ci if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) 96762306a36Sopenharmony_ci goto out_unlock; 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci /* 97062306a36Sopenharmony_ci * Compress the xarray to keep it linear by swapping the entry to erase 97162306a36Sopenharmony_ci * with the tail entry and shrinking the tail. 97262306a36Sopenharmony_ci */ 97362306a36Sopenharmony_ci iopt->next_domain_id--; 97462306a36Sopenharmony_ci iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); 97562306a36Sopenharmony_ci if (index != iopt->next_domain_id) 97662306a36Sopenharmony_ci xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci iopt_unfill_domain(iopt, domain); 97962306a36Sopenharmony_ci __iopt_remove_reserved_iova(iopt, domain); 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci WARN_ON(iopt_calculate_iova_alignment(iopt)); 98262306a36Sopenharmony_ciout_unlock: 98362306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 98462306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci/** 98862306a36Sopenharmony_ci * iopt_area_split - Split an area into two parts at iova 98962306a36Sopenharmony_ci * @area: The area to split 99062306a36Sopenharmony_ci * @iova: Becomes the last of a new area 99162306a36Sopenharmony_ci * 99262306a36Sopenharmony_ci * This splits an area into two. It is part of the VFIO compatibility to allow 99362306a36Sopenharmony_ci * poking a hole in the mapping. The two areas continue to point at the same 99462306a36Sopenharmony_ci * iopt_pages, just with different starting bytes. 99562306a36Sopenharmony_ci */ 99662306a36Sopenharmony_cistatic int iopt_area_split(struct iopt_area *area, unsigned long iova) 99762306a36Sopenharmony_ci{ 99862306a36Sopenharmony_ci unsigned long alignment = area->iopt->iova_alignment; 99962306a36Sopenharmony_ci unsigned long last_iova = iopt_area_last_iova(area); 100062306a36Sopenharmony_ci unsigned long start_iova = iopt_area_iova(area); 100162306a36Sopenharmony_ci unsigned long new_start = iova + 1; 100262306a36Sopenharmony_ci struct io_pagetable *iopt = area->iopt; 100362306a36Sopenharmony_ci struct iopt_pages *pages = area->pages; 100462306a36Sopenharmony_ci struct iopt_area *lhs; 100562306a36Sopenharmony_ci struct iopt_area *rhs; 100662306a36Sopenharmony_ci int rc; 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci lockdep_assert_held_write(&iopt->iova_rwsem); 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci if (iova == start_iova || iova == last_iova) 101162306a36Sopenharmony_ci return 0; 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci if (!pages || area->prevent_access) 101462306a36Sopenharmony_ci return -EBUSY; 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci if (new_start & (alignment - 1) || 101762306a36Sopenharmony_ci iopt_area_start_byte(area, new_start) & (alignment - 1)) 101862306a36Sopenharmony_ci return -EINVAL; 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci lhs = iopt_area_alloc(); 102162306a36Sopenharmony_ci if (!lhs) 102262306a36Sopenharmony_ci return -ENOMEM; 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci rhs = iopt_area_alloc(); 102562306a36Sopenharmony_ci if (!rhs) { 102662306a36Sopenharmony_ci rc = -ENOMEM; 102762306a36Sopenharmony_ci goto err_free_lhs; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci mutex_lock(&pages->mutex); 103162306a36Sopenharmony_ci /* 103262306a36Sopenharmony_ci * Splitting is not permitted if an access exists, we don't track enough 103362306a36Sopenharmony_ci * information to split existing accesses. 103462306a36Sopenharmony_ci */ 103562306a36Sopenharmony_ci if (area->num_accesses) { 103662306a36Sopenharmony_ci rc = -EINVAL; 103762306a36Sopenharmony_ci goto err_unlock; 103862306a36Sopenharmony_ci } 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci /* 104162306a36Sopenharmony_ci * Splitting is not permitted if a domain could have been mapped with 104262306a36Sopenharmony_ci * huge pages. 104362306a36Sopenharmony_ci */ 104462306a36Sopenharmony_ci if (area->storage_domain && !iopt->disable_large_pages) { 104562306a36Sopenharmony_ci rc = -EINVAL; 104662306a36Sopenharmony_ci goto err_unlock; 104762306a36Sopenharmony_ci } 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci interval_tree_remove(&area->node, &iopt->area_itree); 105062306a36Sopenharmony_ci rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, 105162306a36Sopenharmony_ci iopt_area_start_byte(area, start_iova), 105262306a36Sopenharmony_ci (new_start - 1) - start_iova + 1, 105362306a36Sopenharmony_ci area->iommu_prot); 105462306a36Sopenharmony_ci if (WARN_ON(rc)) 105562306a36Sopenharmony_ci goto err_insert; 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci rc = iopt_insert_area(iopt, rhs, area->pages, new_start, 105862306a36Sopenharmony_ci iopt_area_start_byte(area, new_start), 105962306a36Sopenharmony_ci last_iova - new_start + 1, area->iommu_prot); 106062306a36Sopenharmony_ci if (WARN_ON(rc)) 106162306a36Sopenharmony_ci goto err_remove_lhs; 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci /* 106462306a36Sopenharmony_ci * If the original area has filled a domain, domains_itree has to be 106562306a36Sopenharmony_ci * updated. 106662306a36Sopenharmony_ci */ 106762306a36Sopenharmony_ci if (area->storage_domain) { 106862306a36Sopenharmony_ci interval_tree_remove(&area->pages_node, &pages->domains_itree); 106962306a36Sopenharmony_ci interval_tree_insert(&lhs->pages_node, &pages->domains_itree); 107062306a36Sopenharmony_ci interval_tree_insert(&rhs->pages_node, &pages->domains_itree); 107162306a36Sopenharmony_ci } 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci lhs->storage_domain = area->storage_domain; 107462306a36Sopenharmony_ci lhs->pages = area->pages; 107562306a36Sopenharmony_ci rhs->storage_domain = area->storage_domain; 107662306a36Sopenharmony_ci rhs->pages = area->pages; 107762306a36Sopenharmony_ci kref_get(&rhs->pages->kref); 107862306a36Sopenharmony_ci kfree(area); 107962306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci /* 108262306a36Sopenharmony_ci * No change to domains or accesses because the pages hasn't been 108362306a36Sopenharmony_ci * changed 108462306a36Sopenharmony_ci */ 108562306a36Sopenharmony_ci return 0; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_cierr_remove_lhs: 108862306a36Sopenharmony_ci interval_tree_remove(&lhs->node, &iopt->area_itree); 108962306a36Sopenharmony_cierr_insert: 109062306a36Sopenharmony_ci interval_tree_insert(&area->node, &iopt->area_itree); 109162306a36Sopenharmony_cierr_unlock: 109262306a36Sopenharmony_ci mutex_unlock(&pages->mutex); 109362306a36Sopenharmony_ci kfree(rhs); 109462306a36Sopenharmony_cierr_free_lhs: 109562306a36Sopenharmony_ci kfree(lhs); 109662306a36Sopenharmony_ci return rc; 109762306a36Sopenharmony_ci} 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ciint iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, 110062306a36Sopenharmony_ci size_t num_iovas) 110162306a36Sopenharmony_ci{ 110262306a36Sopenharmony_ci int rc = 0; 110362306a36Sopenharmony_ci int i; 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 110662306a36Sopenharmony_ci for (i = 0; i < num_iovas; i++) { 110762306a36Sopenharmony_ci struct iopt_area *area; 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); 111062306a36Sopenharmony_ci if (!area) 111162306a36Sopenharmony_ci continue; 111262306a36Sopenharmony_ci rc = iopt_area_split(area, iovas[i]); 111362306a36Sopenharmony_ci if (rc) 111462306a36Sopenharmony_ci break; 111562306a36Sopenharmony_ci } 111662306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 111762306a36Sopenharmony_ci return rc; 111862306a36Sopenharmony_ci} 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_civoid iopt_enable_large_pages(struct io_pagetable *iopt) 112162306a36Sopenharmony_ci{ 112262306a36Sopenharmony_ci int rc; 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci down_write(&iopt->domains_rwsem); 112562306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 112662306a36Sopenharmony_ci WRITE_ONCE(iopt->disable_large_pages, false); 112762306a36Sopenharmony_ci rc = iopt_calculate_iova_alignment(iopt); 112862306a36Sopenharmony_ci WARN_ON(rc); 112962306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 113062306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 113162306a36Sopenharmony_ci} 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ciint iopt_disable_large_pages(struct io_pagetable *iopt) 113462306a36Sopenharmony_ci{ 113562306a36Sopenharmony_ci int rc = 0; 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci down_write(&iopt->domains_rwsem); 113862306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 113962306a36Sopenharmony_ci if (iopt->disable_large_pages) 114062306a36Sopenharmony_ci goto out_unlock; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci /* Won't do it if domains already have pages mapped in them */ 114362306a36Sopenharmony_ci if (!xa_empty(&iopt->domains) && 114462306a36Sopenharmony_ci !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { 114562306a36Sopenharmony_ci rc = -EINVAL; 114662306a36Sopenharmony_ci goto out_unlock; 114762306a36Sopenharmony_ci } 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci WRITE_ONCE(iopt->disable_large_pages, true); 115062306a36Sopenharmony_ci rc = iopt_calculate_iova_alignment(iopt); 115162306a36Sopenharmony_ci if (rc) 115262306a36Sopenharmony_ci WRITE_ONCE(iopt->disable_large_pages, false); 115362306a36Sopenharmony_ciout_unlock: 115462306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 115562306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 115662306a36Sopenharmony_ci return rc; 115762306a36Sopenharmony_ci} 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ciint iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) 116062306a36Sopenharmony_ci{ 116162306a36Sopenharmony_ci u32 new_id; 116262306a36Sopenharmony_ci int rc; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci down_write(&iopt->domains_rwsem); 116562306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 116662306a36Sopenharmony_ci rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, 116762306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci if (rc) 117062306a36Sopenharmony_ci goto out_unlock; 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci rc = iopt_calculate_iova_alignment(iopt); 117362306a36Sopenharmony_ci if (rc) { 117462306a36Sopenharmony_ci xa_erase(&iopt->access_list, new_id); 117562306a36Sopenharmony_ci goto out_unlock; 117662306a36Sopenharmony_ci } 117762306a36Sopenharmony_ci access->iopt_access_list_id = new_id; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ciout_unlock: 118062306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 118162306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 118262306a36Sopenharmony_ci return rc; 118362306a36Sopenharmony_ci} 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_civoid iopt_remove_access(struct io_pagetable *iopt, 118662306a36Sopenharmony_ci struct iommufd_access *access, 118762306a36Sopenharmony_ci u32 iopt_access_list_id) 118862306a36Sopenharmony_ci{ 118962306a36Sopenharmony_ci down_write(&iopt->domains_rwsem); 119062306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 119162306a36Sopenharmony_ci WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); 119262306a36Sopenharmony_ci WARN_ON(iopt_calculate_iova_alignment(iopt)); 119362306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 119462306a36Sopenharmony_ci up_write(&iopt->domains_rwsem); 119562306a36Sopenharmony_ci} 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci/* Narrow the valid_iova_itree to include reserved ranges from a device. */ 119862306a36Sopenharmony_ciint iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, 119962306a36Sopenharmony_ci struct device *dev, 120062306a36Sopenharmony_ci phys_addr_t *sw_msi_start) 120162306a36Sopenharmony_ci{ 120262306a36Sopenharmony_ci struct iommu_resv_region *resv; 120362306a36Sopenharmony_ci LIST_HEAD(resv_regions); 120462306a36Sopenharmony_ci unsigned int num_hw_msi = 0; 120562306a36Sopenharmony_ci unsigned int num_sw_msi = 0; 120662306a36Sopenharmony_ci int rc; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci if (iommufd_should_fail()) 120962306a36Sopenharmony_ci return -EINVAL; 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci down_write(&iopt->iova_rwsem); 121262306a36Sopenharmony_ci /* FIXME: drivers allocate memory but there is no failure propogated */ 121362306a36Sopenharmony_ci iommu_get_resv_regions(dev, &resv_regions); 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci list_for_each_entry(resv, &resv_regions, list) { 121662306a36Sopenharmony_ci if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) 121762306a36Sopenharmony_ci continue; 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci if (sw_msi_start && resv->type == IOMMU_RESV_MSI) 122062306a36Sopenharmony_ci num_hw_msi++; 122162306a36Sopenharmony_ci if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { 122262306a36Sopenharmony_ci *sw_msi_start = resv->start; 122362306a36Sopenharmony_ci num_sw_msi++; 122462306a36Sopenharmony_ci } 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci rc = iopt_reserve_iova(iopt, resv->start, 122762306a36Sopenharmony_ci resv->length - 1 + resv->start, dev); 122862306a36Sopenharmony_ci if (rc) 122962306a36Sopenharmony_ci goto out_reserved; 123062306a36Sopenharmony_ci } 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci /* Drivers must offer sane combinations of regions */ 123362306a36Sopenharmony_ci if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { 123462306a36Sopenharmony_ci rc = -EINVAL; 123562306a36Sopenharmony_ci goto out_reserved; 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci rc = 0; 123962306a36Sopenharmony_ci goto out_free_resv; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ciout_reserved: 124262306a36Sopenharmony_ci __iopt_remove_reserved_iova(iopt, dev); 124362306a36Sopenharmony_ciout_free_resv: 124462306a36Sopenharmony_ci iommu_put_resv_regions(dev, &resv_regions); 124562306a36Sopenharmony_ci up_write(&iopt->iova_rwsem); 124662306a36Sopenharmony_ci return rc; 124762306a36Sopenharmony_ci} 1248