162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The
562306a36Sopenharmony_ci * PFNs can be placed into an iommu_domain, or returned to the caller as a page
662306a36Sopenharmony_ci * list for access by an in-kernel user.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * The datastructure uses the iopt_pages to optimize the storage of the PFNs
962306a36Sopenharmony_ci * between the domains and xarray.
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci#include <linux/iommufd.h>
1262306a36Sopenharmony_ci#include <linux/lockdep.h>
1362306a36Sopenharmony_ci#include <linux/iommu.h>
1462306a36Sopenharmony_ci#include <linux/sched/mm.h>
1562306a36Sopenharmony_ci#include <linux/err.h>
1662306a36Sopenharmony_ci#include <linux/slab.h>
1762306a36Sopenharmony_ci#include <linux/errno.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "io_pagetable.h"
2062306a36Sopenharmony_ci#include "double_span.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistruct iopt_pages_list {
2362306a36Sopenharmony_ci	struct iopt_pages *pages;
2462306a36Sopenharmony_ci	struct iopt_area *area;
2562306a36Sopenharmony_ci	struct list_head next;
2662306a36Sopenharmony_ci	unsigned long start_byte;
2762306a36Sopenharmony_ci	unsigned long length;
2862306a36Sopenharmony_ci};
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_cistruct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
3162306a36Sopenharmony_ci					struct io_pagetable *iopt,
3262306a36Sopenharmony_ci					unsigned long iova,
3362306a36Sopenharmony_ci					unsigned long last_iova)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	lockdep_assert_held(&iopt->iova_rwsem);
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	iter->cur_iova = iova;
3862306a36Sopenharmony_ci	iter->last_iova = last_iova;
3962306a36Sopenharmony_ci	iter->area = iopt_area_iter_first(iopt, iova, iova);
4062306a36Sopenharmony_ci	if (!iter->area)
4162306a36Sopenharmony_ci		return NULL;
4262306a36Sopenharmony_ci	if (!iter->area->pages) {
4362306a36Sopenharmony_ci		iter->area = NULL;
4462306a36Sopenharmony_ci		return NULL;
4562306a36Sopenharmony_ci	}
4662306a36Sopenharmony_ci	return iter->area;
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistruct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	unsigned long last_iova;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	if (!iter->area)
5462306a36Sopenharmony_ci		return NULL;
5562306a36Sopenharmony_ci	last_iova = iopt_area_last_iova(iter->area);
5662306a36Sopenharmony_ci	if (iter->last_iova <= last_iova)
5762306a36Sopenharmony_ci		return NULL;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	iter->cur_iova = last_iova + 1;
6062306a36Sopenharmony_ci	iter->area = iopt_area_iter_next(iter->area, iter->cur_iova,
6162306a36Sopenharmony_ci					 iter->last_iova);
6262306a36Sopenharmony_ci	if (!iter->area)
6362306a36Sopenharmony_ci		return NULL;
6462306a36Sopenharmony_ci	if (iter->cur_iova != iopt_area_iova(iter->area) ||
6562306a36Sopenharmony_ci	    !iter->area->pages) {
6662306a36Sopenharmony_ci		iter->area = NULL;
6762306a36Sopenharmony_ci		return NULL;
6862306a36Sopenharmony_ci	}
6962306a36Sopenharmony_ci	return iter->area;
7062306a36Sopenharmony_ci}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_cistatic bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
7362306a36Sopenharmony_ci				    unsigned long length,
7462306a36Sopenharmony_ci				    unsigned long iova_alignment,
7562306a36Sopenharmony_ci				    unsigned long page_offset)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	if (span->is_used || span->last_hole - span->start_hole < length - 1)
7862306a36Sopenharmony_ci		return false;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	span->start_hole = ALIGN(span->start_hole, iova_alignment) |
8162306a36Sopenharmony_ci			   page_offset;
8262306a36Sopenharmony_ci	if (span->start_hole > span->last_hole ||
8362306a36Sopenharmony_ci	    span->last_hole - span->start_hole < length - 1)
8462306a36Sopenharmony_ci		return false;
8562306a36Sopenharmony_ci	return true;
8662306a36Sopenharmony_ci}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_cistatic bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
8962306a36Sopenharmony_ci				    unsigned long length,
9062306a36Sopenharmony_ci				    unsigned long iova_alignment,
9162306a36Sopenharmony_ci				    unsigned long page_offset)
9262306a36Sopenharmony_ci{
9362306a36Sopenharmony_ci	if (span->is_hole || span->last_used - span->start_used < length - 1)
9462306a36Sopenharmony_ci		return false;
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	span->start_used = ALIGN(span->start_used, iova_alignment) |
9762306a36Sopenharmony_ci			   page_offset;
9862306a36Sopenharmony_ci	if (span->start_used > span->last_used ||
9962306a36Sopenharmony_ci	    span->last_used - span->start_used < length - 1)
10062306a36Sopenharmony_ci		return false;
10162306a36Sopenharmony_ci	return true;
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci/*
10562306a36Sopenharmony_ci * Automatically find a block of IOVA that is not being used and not reserved.
10662306a36Sopenharmony_ci * Does not return a 0 IOVA even if it is valid.
10762306a36Sopenharmony_ci */
10862306a36Sopenharmony_cistatic int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova,
10962306a36Sopenharmony_ci			   unsigned long uptr, unsigned long length)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci	unsigned long page_offset = uptr % PAGE_SIZE;
11262306a36Sopenharmony_ci	struct interval_tree_double_span_iter used_span;
11362306a36Sopenharmony_ci	struct interval_tree_span_iter allowed_span;
11462306a36Sopenharmony_ci	unsigned long iova_alignment;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	lockdep_assert_held(&iopt->iova_rwsem);
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	/* Protect roundup_pow-of_two() from overflow */
11962306a36Sopenharmony_ci	if (length == 0 || length >= ULONG_MAX / 2)
12062306a36Sopenharmony_ci		return -EOVERFLOW;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	/*
12362306a36Sopenharmony_ci	 * Keep alignment present in the uptr when building the IOVA, this
12462306a36Sopenharmony_ci	 * increases the chance we can map a THP.
12562306a36Sopenharmony_ci	 */
12662306a36Sopenharmony_ci	if (!uptr)
12762306a36Sopenharmony_ci		iova_alignment = roundup_pow_of_two(length);
12862306a36Sopenharmony_ci	else
12962306a36Sopenharmony_ci		iova_alignment = min_t(unsigned long,
13062306a36Sopenharmony_ci				       roundup_pow_of_two(length),
13162306a36Sopenharmony_ci				       1UL << __ffs64(uptr));
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	if (iova_alignment < iopt->iova_alignment)
13462306a36Sopenharmony_ci		return -EINVAL;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree,
13762306a36Sopenharmony_ci				    PAGE_SIZE, ULONG_MAX - PAGE_SIZE) {
13862306a36Sopenharmony_ci		if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) {
13962306a36Sopenharmony_ci			allowed_span.start_used = PAGE_SIZE;
14062306a36Sopenharmony_ci			allowed_span.last_used = ULONG_MAX - PAGE_SIZE;
14162306a36Sopenharmony_ci			allowed_span.is_hole = false;
14262306a36Sopenharmony_ci		}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci		if (!__alloc_iova_check_used(&allowed_span, length,
14562306a36Sopenharmony_ci					     iova_alignment, page_offset))
14662306a36Sopenharmony_ci			continue;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci		interval_tree_for_each_double_span(
14962306a36Sopenharmony_ci			&used_span, &iopt->reserved_itree, &iopt->area_itree,
15062306a36Sopenharmony_ci			allowed_span.start_used, allowed_span.last_used) {
15162306a36Sopenharmony_ci			if (!__alloc_iova_check_hole(&used_span, length,
15262306a36Sopenharmony_ci						     iova_alignment,
15362306a36Sopenharmony_ci						     page_offset))
15462306a36Sopenharmony_ci				continue;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci			*iova = used_span.start_hole;
15762306a36Sopenharmony_ci			return 0;
15862306a36Sopenharmony_ci		}
15962306a36Sopenharmony_ci	}
16062306a36Sopenharmony_ci	return -ENOSPC;
16162306a36Sopenharmony_ci}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_cistatic int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova,
16462306a36Sopenharmony_ci			   unsigned long length)
16562306a36Sopenharmony_ci{
16662306a36Sopenharmony_ci	unsigned long last;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	lockdep_assert_held(&iopt->iova_rwsem);
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	if ((iova & (iopt->iova_alignment - 1)))
17162306a36Sopenharmony_ci		return -EINVAL;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (check_add_overflow(iova, length - 1, &last))
17462306a36Sopenharmony_ci		return -EOVERFLOW;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	/* No reserved IOVA intersects the range */
17762306a36Sopenharmony_ci	if (iopt_reserved_iter_first(iopt, iova, last))
17862306a36Sopenharmony_ci		return -EINVAL;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	/* Check that there is not already a mapping in the range */
18162306a36Sopenharmony_ci	if (iopt_area_iter_first(iopt, iova, last))
18262306a36Sopenharmony_ci		return -EEXIST;
18362306a36Sopenharmony_ci	return 0;
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci/*
18762306a36Sopenharmony_ci * The area takes a slice of the pages from start_bytes to start_byte + length
18862306a36Sopenharmony_ci */
18962306a36Sopenharmony_cistatic int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area,
19062306a36Sopenharmony_ci			    struct iopt_pages *pages, unsigned long iova,
19162306a36Sopenharmony_ci			    unsigned long start_byte, unsigned long length,
19262306a36Sopenharmony_ci			    int iommu_prot)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->iova_rwsem);
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	if ((iommu_prot & IOMMU_WRITE) && !pages->writable)
19762306a36Sopenharmony_ci		return -EPERM;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	area->iommu_prot = iommu_prot;
20062306a36Sopenharmony_ci	area->page_offset = start_byte % PAGE_SIZE;
20162306a36Sopenharmony_ci	if (area->page_offset & (iopt->iova_alignment - 1))
20262306a36Sopenharmony_ci		return -EINVAL;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	area->node.start = iova;
20562306a36Sopenharmony_ci	if (check_add_overflow(iova, length - 1, &area->node.last))
20662306a36Sopenharmony_ci		return -EOVERFLOW;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	area->pages_node.start = start_byte / PAGE_SIZE;
20962306a36Sopenharmony_ci	if (check_add_overflow(start_byte, length - 1, &area->pages_node.last))
21062306a36Sopenharmony_ci		return -EOVERFLOW;
21162306a36Sopenharmony_ci	area->pages_node.last = area->pages_node.last / PAGE_SIZE;
21262306a36Sopenharmony_ci	if (WARN_ON(area->pages_node.last >= pages->npages))
21362306a36Sopenharmony_ci		return -EOVERFLOW;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	/*
21662306a36Sopenharmony_ci	 * The area is inserted with a NULL pages indicating it is not fully
21762306a36Sopenharmony_ci	 * initialized yet.
21862306a36Sopenharmony_ci	 */
21962306a36Sopenharmony_ci	area->iopt = iopt;
22062306a36Sopenharmony_ci	interval_tree_insert(&area->node, &iopt->area_itree);
22162306a36Sopenharmony_ci	return 0;
22262306a36Sopenharmony_ci}
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_cistatic struct iopt_area *iopt_area_alloc(void)
22562306a36Sopenharmony_ci{
22662306a36Sopenharmony_ci	struct iopt_area *area;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT);
22962306a36Sopenharmony_ci	if (!area)
23062306a36Sopenharmony_ci		return NULL;
23162306a36Sopenharmony_ci	RB_CLEAR_NODE(&area->node.rb);
23262306a36Sopenharmony_ci	RB_CLEAR_NODE(&area->pages_node.rb);
23362306a36Sopenharmony_ci	return area;
23462306a36Sopenharmony_ci}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_cistatic int iopt_alloc_area_pages(struct io_pagetable *iopt,
23762306a36Sopenharmony_ci				 struct list_head *pages_list,
23862306a36Sopenharmony_ci				 unsigned long length, unsigned long *dst_iova,
23962306a36Sopenharmony_ci				 int iommu_prot, unsigned int flags)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	struct iopt_pages_list *elm;
24262306a36Sopenharmony_ci	unsigned long iova;
24362306a36Sopenharmony_ci	int rc = 0;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	list_for_each_entry(elm, pages_list, next) {
24662306a36Sopenharmony_ci		elm->area = iopt_area_alloc();
24762306a36Sopenharmony_ci		if (!elm->area)
24862306a36Sopenharmony_ci			return -ENOMEM;
24962306a36Sopenharmony_ci	}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
25262306a36Sopenharmony_ci	if ((length & (iopt->iova_alignment - 1)) || !length) {
25362306a36Sopenharmony_ci		rc = -EINVAL;
25462306a36Sopenharmony_ci		goto out_unlock;
25562306a36Sopenharmony_ci	}
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	if (flags & IOPT_ALLOC_IOVA) {
25862306a36Sopenharmony_ci		/* Use the first entry to guess the ideal IOVA alignment */
25962306a36Sopenharmony_ci		elm = list_first_entry(pages_list, struct iopt_pages_list,
26062306a36Sopenharmony_ci				       next);
26162306a36Sopenharmony_ci		rc = iopt_alloc_iova(
26262306a36Sopenharmony_ci			iopt, dst_iova,
26362306a36Sopenharmony_ci			(uintptr_t)elm->pages->uptr + elm->start_byte, length);
26462306a36Sopenharmony_ci		if (rc)
26562306a36Sopenharmony_ci			goto out_unlock;
26662306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
26762306a36Sopenharmony_ci		    WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
26862306a36Sopenharmony_ci			rc = -EINVAL;
26962306a36Sopenharmony_ci			goto out_unlock;
27062306a36Sopenharmony_ci		}
27162306a36Sopenharmony_ci	} else {
27262306a36Sopenharmony_ci		rc = iopt_check_iova(iopt, *dst_iova, length);
27362306a36Sopenharmony_ci		if (rc)
27462306a36Sopenharmony_ci			goto out_unlock;
27562306a36Sopenharmony_ci	}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	/*
27862306a36Sopenharmony_ci	 * Areas are created with a NULL pages so that the IOVA space is
27962306a36Sopenharmony_ci	 * reserved and we can unlock the iova_rwsem.
28062306a36Sopenharmony_ci	 */
28162306a36Sopenharmony_ci	iova = *dst_iova;
28262306a36Sopenharmony_ci	list_for_each_entry(elm, pages_list, next) {
28362306a36Sopenharmony_ci		rc = iopt_insert_area(iopt, elm->area, elm->pages, iova,
28462306a36Sopenharmony_ci				      elm->start_byte, elm->length, iommu_prot);
28562306a36Sopenharmony_ci		if (rc)
28662306a36Sopenharmony_ci			goto out_unlock;
28762306a36Sopenharmony_ci		iova += elm->length;
28862306a36Sopenharmony_ci	}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ciout_unlock:
29162306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
29262306a36Sopenharmony_ci	return rc;
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic void iopt_abort_area(struct iopt_area *area)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
29862306a36Sopenharmony_ci		WARN_ON(area->pages);
29962306a36Sopenharmony_ci	if (area->iopt) {
30062306a36Sopenharmony_ci		down_write(&area->iopt->iova_rwsem);
30162306a36Sopenharmony_ci		interval_tree_remove(&area->node, &area->iopt->area_itree);
30262306a36Sopenharmony_ci		up_write(&area->iopt->iova_rwsem);
30362306a36Sopenharmony_ci	}
30462306a36Sopenharmony_ci	kfree(area);
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_civoid iopt_free_pages_list(struct list_head *pages_list)
30862306a36Sopenharmony_ci{
30962306a36Sopenharmony_ci	struct iopt_pages_list *elm;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	while ((elm = list_first_entry_or_null(pages_list,
31262306a36Sopenharmony_ci					       struct iopt_pages_list, next))) {
31362306a36Sopenharmony_ci		if (elm->area)
31462306a36Sopenharmony_ci			iopt_abort_area(elm->area);
31562306a36Sopenharmony_ci		if (elm->pages)
31662306a36Sopenharmony_ci			iopt_put_pages(elm->pages);
31762306a36Sopenharmony_ci		list_del(&elm->next);
31862306a36Sopenharmony_ci		kfree(elm);
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_cistatic int iopt_fill_domains_pages(struct list_head *pages_list)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	struct iopt_pages_list *undo_elm;
32562306a36Sopenharmony_ci	struct iopt_pages_list *elm;
32662306a36Sopenharmony_ci	int rc;
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	list_for_each_entry(elm, pages_list, next) {
32962306a36Sopenharmony_ci		rc = iopt_area_fill_domains(elm->area, elm->pages);
33062306a36Sopenharmony_ci		if (rc)
33162306a36Sopenharmony_ci			goto err_undo;
33262306a36Sopenharmony_ci	}
33362306a36Sopenharmony_ci	return 0;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_cierr_undo:
33662306a36Sopenharmony_ci	list_for_each_entry(undo_elm, pages_list, next) {
33762306a36Sopenharmony_ci		if (undo_elm == elm)
33862306a36Sopenharmony_ci			break;
33962306a36Sopenharmony_ci		iopt_area_unfill_domains(undo_elm->area, undo_elm->pages);
34062306a36Sopenharmony_ci	}
34162306a36Sopenharmony_ci	return rc;
34262306a36Sopenharmony_ci}
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ciint iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
34562306a36Sopenharmony_ci		   unsigned long length, unsigned long *dst_iova,
34662306a36Sopenharmony_ci		   int iommu_prot, unsigned int flags)
34762306a36Sopenharmony_ci{
34862306a36Sopenharmony_ci	struct iopt_pages_list *elm;
34962306a36Sopenharmony_ci	int rc;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova,
35262306a36Sopenharmony_ci				   iommu_prot, flags);
35362306a36Sopenharmony_ci	if (rc)
35462306a36Sopenharmony_ci		return rc;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	down_read(&iopt->domains_rwsem);
35762306a36Sopenharmony_ci	rc = iopt_fill_domains_pages(pages_list);
35862306a36Sopenharmony_ci	if (rc)
35962306a36Sopenharmony_ci		goto out_unlock_domains;
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
36262306a36Sopenharmony_ci	list_for_each_entry(elm, pages_list, next) {
36362306a36Sopenharmony_ci		/*
36462306a36Sopenharmony_ci		 * area->pages must be set inside the domains_rwsem to ensure
36562306a36Sopenharmony_ci		 * any newly added domains will get filled. Moves the reference
36662306a36Sopenharmony_ci		 * in from the list.
36762306a36Sopenharmony_ci		 */
36862306a36Sopenharmony_ci		elm->area->pages = elm->pages;
36962306a36Sopenharmony_ci		elm->pages = NULL;
37062306a36Sopenharmony_ci		elm->area = NULL;
37162306a36Sopenharmony_ci	}
37262306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
37362306a36Sopenharmony_ciout_unlock_domains:
37462306a36Sopenharmony_ci	up_read(&iopt->domains_rwsem);
37562306a36Sopenharmony_ci	return rc;
37662306a36Sopenharmony_ci}
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci/**
37962306a36Sopenharmony_ci * iopt_map_user_pages() - Map a user VA to an iova in the io page table
38062306a36Sopenharmony_ci * @ictx: iommufd_ctx the iopt is part of
38162306a36Sopenharmony_ci * @iopt: io_pagetable to act on
38262306a36Sopenharmony_ci * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
38362306a36Sopenharmony_ci *        the chosen iova on output. Otherwise is the iova to map to on input
38462306a36Sopenharmony_ci * @uptr: User VA to map
38562306a36Sopenharmony_ci * @length: Number of bytes to map
38662306a36Sopenharmony_ci * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
38762306a36Sopenharmony_ci * @flags: IOPT_ALLOC_IOVA or zero
38862306a36Sopenharmony_ci *
38962306a36Sopenharmony_ci * iova, uptr, and length must be aligned to iova_alignment. For domain backed
39062306a36Sopenharmony_ci * page tables this will pin the pages and load them into the domain at iova.
39162306a36Sopenharmony_ci * For non-domain page tables this will only setup a lazy reference and the
39262306a36Sopenharmony_ci * caller must use iopt_access_pages() to touch them.
39362306a36Sopenharmony_ci *
39462306a36Sopenharmony_ci * iopt_unmap_iova() must be called to undo this before the io_pagetable can be
39562306a36Sopenharmony_ci * destroyed.
39662306a36Sopenharmony_ci */
39762306a36Sopenharmony_ciint iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
39862306a36Sopenharmony_ci			unsigned long *iova, void __user *uptr,
39962306a36Sopenharmony_ci			unsigned long length, int iommu_prot,
40062306a36Sopenharmony_ci			unsigned int flags)
40162306a36Sopenharmony_ci{
40262306a36Sopenharmony_ci	struct iopt_pages_list elm = {};
40362306a36Sopenharmony_ci	LIST_HEAD(pages_list);
40462306a36Sopenharmony_ci	int rc;
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE);
40762306a36Sopenharmony_ci	if (IS_ERR(elm.pages))
40862306a36Sopenharmony_ci		return PTR_ERR(elm.pages);
40962306a36Sopenharmony_ci	if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM &&
41062306a36Sopenharmony_ci	    elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER)
41162306a36Sopenharmony_ci		elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM;
41262306a36Sopenharmony_ci	elm.start_byte = uptr - elm.pages->uptr;
41362306a36Sopenharmony_ci	elm.length = length;
41462306a36Sopenharmony_ci	list_add(&elm.next, &pages_list);
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags);
41762306a36Sopenharmony_ci	if (rc) {
41862306a36Sopenharmony_ci		if (elm.area)
41962306a36Sopenharmony_ci			iopt_abort_area(elm.area);
42062306a36Sopenharmony_ci		if (elm.pages)
42162306a36Sopenharmony_ci			iopt_put_pages(elm.pages);
42262306a36Sopenharmony_ci		return rc;
42362306a36Sopenharmony_ci	}
42462306a36Sopenharmony_ci	return 0;
42562306a36Sopenharmony_ci}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ciint iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
42862306a36Sopenharmony_ci		   unsigned long length, struct list_head *pages_list)
42962306a36Sopenharmony_ci{
43062306a36Sopenharmony_ci	struct iopt_area_contig_iter iter;
43162306a36Sopenharmony_ci	unsigned long last_iova;
43262306a36Sopenharmony_ci	struct iopt_area *area;
43362306a36Sopenharmony_ci	int rc;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	if (!length)
43662306a36Sopenharmony_ci		return -EINVAL;
43762306a36Sopenharmony_ci	if (check_add_overflow(iova, length - 1, &last_iova))
43862306a36Sopenharmony_ci		return -EOVERFLOW;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	down_read(&iopt->iova_rwsem);
44162306a36Sopenharmony_ci	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
44262306a36Sopenharmony_ci		struct iopt_pages_list *elm;
44362306a36Sopenharmony_ci		unsigned long last = min(last_iova, iopt_area_last_iova(area));
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci		elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT);
44662306a36Sopenharmony_ci		if (!elm) {
44762306a36Sopenharmony_ci			rc = -ENOMEM;
44862306a36Sopenharmony_ci			goto err_free;
44962306a36Sopenharmony_ci		}
45062306a36Sopenharmony_ci		elm->start_byte = iopt_area_start_byte(area, iter.cur_iova);
45162306a36Sopenharmony_ci		elm->pages = area->pages;
45262306a36Sopenharmony_ci		elm->length = (last - iter.cur_iova) + 1;
45362306a36Sopenharmony_ci		kref_get(&elm->pages->kref);
45462306a36Sopenharmony_ci		list_add_tail(&elm->next, pages_list);
45562306a36Sopenharmony_ci	}
45662306a36Sopenharmony_ci	if (!iopt_area_contig_done(&iter)) {
45762306a36Sopenharmony_ci		rc = -ENOENT;
45862306a36Sopenharmony_ci		goto err_free;
45962306a36Sopenharmony_ci	}
46062306a36Sopenharmony_ci	up_read(&iopt->iova_rwsem);
46162306a36Sopenharmony_ci	return 0;
46262306a36Sopenharmony_cierr_free:
46362306a36Sopenharmony_ci	up_read(&iopt->iova_rwsem);
46462306a36Sopenharmony_ci	iopt_free_pages_list(pages_list);
46562306a36Sopenharmony_ci	return rc;
46662306a36Sopenharmony_ci}
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_cistatic int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
46962306a36Sopenharmony_ci				 unsigned long last, unsigned long *unmapped)
47062306a36Sopenharmony_ci{
47162306a36Sopenharmony_ci	struct iopt_area *area;
47262306a36Sopenharmony_ci	unsigned long unmapped_bytes = 0;
47362306a36Sopenharmony_ci	unsigned int tries = 0;
47462306a36Sopenharmony_ci	int rc = -ENOENT;
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	/*
47762306a36Sopenharmony_ci	 * The domains_rwsem must be held in read mode any time any area->pages
47862306a36Sopenharmony_ci	 * is NULL. This prevents domain attach/detatch from running
47962306a36Sopenharmony_ci	 * concurrently with cleaning up the area.
48062306a36Sopenharmony_ci	 */
48162306a36Sopenharmony_ciagain:
48262306a36Sopenharmony_ci	down_read(&iopt->domains_rwsem);
48362306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
48462306a36Sopenharmony_ci	while ((area = iopt_area_iter_first(iopt, start, last))) {
48562306a36Sopenharmony_ci		unsigned long area_last = iopt_area_last_iova(area);
48662306a36Sopenharmony_ci		unsigned long area_first = iopt_area_iova(area);
48762306a36Sopenharmony_ci		struct iopt_pages *pages;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci		/* Userspace should not race map/unmap's of the same area */
49062306a36Sopenharmony_ci		if (!area->pages) {
49162306a36Sopenharmony_ci			rc = -EBUSY;
49262306a36Sopenharmony_ci			goto out_unlock_iova;
49362306a36Sopenharmony_ci		}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci		if (area_first < start || area_last > last) {
49662306a36Sopenharmony_ci			rc = -ENOENT;
49762306a36Sopenharmony_ci			goto out_unlock_iova;
49862306a36Sopenharmony_ci		}
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci		if (area_first != start)
50162306a36Sopenharmony_ci			tries = 0;
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci		/*
50462306a36Sopenharmony_ci		 * num_accesses writers must hold the iova_rwsem too, so we can
50562306a36Sopenharmony_ci		 * safely read it under the write side of the iovam_rwsem
50662306a36Sopenharmony_ci		 * without the pages->mutex.
50762306a36Sopenharmony_ci		 */
50862306a36Sopenharmony_ci		if (area->num_accesses) {
50962306a36Sopenharmony_ci			size_t length = iopt_area_length(area);
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci			start = area_first;
51262306a36Sopenharmony_ci			area->prevent_access = true;
51362306a36Sopenharmony_ci			up_write(&iopt->iova_rwsem);
51462306a36Sopenharmony_ci			up_read(&iopt->domains_rwsem);
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci			iommufd_access_notify_unmap(iopt, area_first, length);
51762306a36Sopenharmony_ci			/* Something is not responding to unmap requests. */
51862306a36Sopenharmony_ci			tries++;
51962306a36Sopenharmony_ci			if (WARN_ON(tries > 100))
52062306a36Sopenharmony_ci				return -EDEADLOCK;
52162306a36Sopenharmony_ci			goto again;
52262306a36Sopenharmony_ci		}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci		pages = area->pages;
52562306a36Sopenharmony_ci		area->pages = NULL;
52662306a36Sopenharmony_ci		up_write(&iopt->iova_rwsem);
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci		iopt_area_unfill_domains(area, pages);
52962306a36Sopenharmony_ci		iopt_abort_area(area);
53062306a36Sopenharmony_ci		iopt_put_pages(pages);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci		unmapped_bytes += area_last - area_first + 1;
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci		down_write(&iopt->iova_rwsem);
53562306a36Sopenharmony_ci	}
53662306a36Sopenharmony_ci	if (unmapped_bytes)
53762306a36Sopenharmony_ci		rc = 0;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ciout_unlock_iova:
54062306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
54162306a36Sopenharmony_ci	up_read(&iopt->domains_rwsem);
54262306a36Sopenharmony_ci	if (unmapped)
54362306a36Sopenharmony_ci		*unmapped = unmapped_bytes;
54462306a36Sopenharmony_ci	return rc;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci/**
54862306a36Sopenharmony_ci * iopt_unmap_iova() - Remove a range of iova
54962306a36Sopenharmony_ci * @iopt: io_pagetable to act on
55062306a36Sopenharmony_ci * @iova: Starting iova to unmap
55162306a36Sopenharmony_ci * @length: Number of bytes to unmap
55262306a36Sopenharmony_ci * @unmapped: Return number of bytes unmapped
55362306a36Sopenharmony_ci *
55462306a36Sopenharmony_ci * The requested range must be a superset of existing ranges.
55562306a36Sopenharmony_ci * Splitting/truncating IOVA mappings is not allowed.
55662306a36Sopenharmony_ci */
55762306a36Sopenharmony_ciint iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
55862306a36Sopenharmony_ci		    unsigned long length, unsigned long *unmapped)
55962306a36Sopenharmony_ci{
56062306a36Sopenharmony_ci	unsigned long iova_last;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	if (!length)
56362306a36Sopenharmony_ci		return -EINVAL;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	if (check_add_overflow(iova, length - 1, &iova_last))
56662306a36Sopenharmony_ci		return -EOVERFLOW;
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
56962306a36Sopenharmony_ci}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ciint iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	int rc;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped);
57662306a36Sopenharmony_ci	/* If the IOVAs are empty then unmap all succeeds */
57762306a36Sopenharmony_ci	if (rc == -ENOENT)
57862306a36Sopenharmony_ci		return 0;
57962306a36Sopenharmony_ci	return rc;
58062306a36Sopenharmony_ci}
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci/* The caller must always free all the nodes in the allowed_iova rb_root. */
58362306a36Sopenharmony_ciint iopt_set_allow_iova(struct io_pagetable *iopt,
58462306a36Sopenharmony_ci			struct rb_root_cached *allowed_iova)
58562306a36Sopenharmony_ci{
58662306a36Sopenharmony_ci	struct iopt_allowed *allowed;
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
58962306a36Sopenharmony_ci	swap(*allowed_iova, iopt->allowed_itree);
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed;
59262306a36Sopenharmony_ci	     allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) {
59362306a36Sopenharmony_ci		if (iopt_reserved_iter_first(iopt, allowed->node.start,
59462306a36Sopenharmony_ci					     allowed->node.last)) {
59562306a36Sopenharmony_ci			swap(*allowed_iova, iopt->allowed_itree);
59662306a36Sopenharmony_ci			up_write(&iopt->iova_rwsem);
59762306a36Sopenharmony_ci			return -EADDRINUSE;
59862306a36Sopenharmony_ci		}
59962306a36Sopenharmony_ci	}
60062306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
60162306a36Sopenharmony_ci	return 0;
60262306a36Sopenharmony_ci}
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ciint iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
60562306a36Sopenharmony_ci		      unsigned long last, void *owner)
60662306a36Sopenharmony_ci{
60762306a36Sopenharmony_ci	struct iopt_reserved *reserved;
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->iova_rwsem);
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	if (iopt_area_iter_first(iopt, start, last) ||
61262306a36Sopenharmony_ci	    iopt_allowed_iter_first(iopt, start, last))
61362306a36Sopenharmony_ci		return -EADDRINUSE;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT);
61662306a36Sopenharmony_ci	if (!reserved)
61762306a36Sopenharmony_ci		return -ENOMEM;
61862306a36Sopenharmony_ci	reserved->node.start = start;
61962306a36Sopenharmony_ci	reserved->node.last = last;
62062306a36Sopenharmony_ci	reserved->owner = owner;
62162306a36Sopenharmony_ci	interval_tree_insert(&reserved->node, &iopt->reserved_itree);
62262306a36Sopenharmony_ci	return 0;
62362306a36Sopenharmony_ci}
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_cistatic void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
62662306a36Sopenharmony_ci{
62762306a36Sopenharmony_ci	struct iopt_reserved *reserved, *next;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->iova_rwsem);
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci	for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved;
63262306a36Sopenharmony_ci	     reserved = next) {
63362306a36Sopenharmony_ci		next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci		if (reserved->owner == owner) {
63662306a36Sopenharmony_ci			interval_tree_remove(&reserved->node,
63762306a36Sopenharmony_ci					     &iopt->reserved_itree);
63862306a36Sopenharmony_ci			kfree(reserved);
63962306a36Sopenharmony_ci		}
64062306a36Sopenharmony_ci	}
64162306a36Sopenharmony_ci}
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_civoid iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
64462306a36Sopenharmony_ci{
64562306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
64662306a36Sopenharmony_ci	__iopt_remove_reserved_iova(iopt, owner);
64762306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
64862306a36Sopenharmony_ci}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_civoid iopt_init_table(struct io_pagetable *iopt)
65162306a36Sopenharmony_ci{
65262306a36Sopenharmony_ci	init_rwsem(&iopt->iova_rwsem);
65362306a36Sopenharmony_ci	init_rwsem(&iopt->domains_rwsem);
65462306a36Sopenharmony_ci	iopt->area_itree = RB_ROOT_CACHED;
65562306a36Sopenharmony_ci	iopt->allowed_itree = RB_ROOT_CACHED;
65662306a36Sopenharmony_ci	iopt->reserved_itree = RB_ROOT_CACHED;
65762306a36Sopenharmony_ci	xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT);
65862306a36Sopenharmony_ci	xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC);
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	/*
66162306a36Sopenharmony_ci	 * iopt's start as SW tables that can use the entire size_t IOVA space
66262306a36Sopenharmony_ci	 * due to the use of size_t in the APIs. They have no alignment
66362306a36Sopenharmony_ci	 * restriction.
66462306a36Sopenharmony_ci	 */
66562306a36Sopenharmony_ci	iopt->iova_alignment = 1;
66662306a36Sopenharmony_ci}
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_civoid iopt_destroy_table(struct io_pagetable *iopt)
66962306a36Sopenharmony_ci{
67062306a36Sopenharmony_ci	struct interval_tree_node *node;
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
67362306a36Sopenharmony_ci		iopt_remove_reserved_iova(iopt, NULL);
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci	while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0,
67662306a36Sopenharmony_ci						ULONG_MAX))) {
67762306a36Sopenharmony_ci		interval_tree_remove(node, &iopt->allowed_itree);
67862306a36Sopenharmony_ci		kfree(container_of(node, struct iopt_allowed, node));
67962306a36Sopenharmony_ci	}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
68262306a36Sopenharmony_ci	WARN_ON(!xa_empty(&iopt->domains));
68362306a36Sopenharmony_ci	WARN_ON(!xa_empty(&iopt->access_list));
68462306a36Sopenharmony_ci	WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
68562306a36Sopenharmony_ci}
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci/**
68862306a36Sopenharmony_ci * iopt_unfill_domain() - Unfill a domain with PFNs
68962306a36Sopenharmony_ci * @iopt: io_pagetable to act on
69062306a36Sopenharmony_ci * @domain: domain to unfill
69162306a36Sopenharmony_ci *
69262306a36Sopenharmony_ci * This is used when removing a domain from the iopt. Every area in the iopt
69362306a36Sopenharmony_ci * will be unmapped from the domain. The domain must already be removed from the
69462306a36Sopenharmony_ci * domains xarray.
69562306a36Sopenharmony_ci */
69662306a36Sopenharmony_cistatic void iopt_unfill_domain(struct io_pagetable *iopt,
69762306a36Sopenharmony_ci			       struct iommu_domain *domain)
69862306a36Sopenharmony_ci{
69962306a36Sopenharmony_ci	struct iopt_area *area;
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	lockdep_assert_held(&iopt->iova_rwsem);
70262306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->domains_rwsem);
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	/*
70562306a36Sopenharmony_ci	 * Some other domain is holding all the pfns still, rapidly unmap this
70662306a36Sopenharmony_ci	 * domain.
70762306a36Sopenharmony_ci	 */
70862306a36Sopenharmony_ci	if (iopt->next_domain_id != 0) {
70962306a36Sopenharmony_ci		/* Pick an arbitrary remaining domain to act as storage */
71062306a36Sopenharmony_ci		struct iommu_domain *storage_domain =
71162306a36Sopenharmony_ci			xa_load(&iopt->domains, 0);
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci		for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
71462306a36Sopenharmony_ci		     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
71562306a36Sopenharmony_ci			struct iopt_pages *pages = area->pages;
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci			if (!pages)
71862306a36Sopenharmony_ci				continue;
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci			mutex_lock(&pages->mutex);
72162306a36Sopenharmony_ci			if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
72262306a36Sopenharmony_ci				WARN_ON(!area->storage_domain);
72362306a36Sopenharmony_ci			if (area->storage_domain == domain)
72462306a36Sopenharmony_ci				area->storage_domain = storage_domain;
72562306a36Sopenharmony_ci			mutex_unlock(&pages->mutex);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci			iopt_area_unmap_domain(area, domain);
72862306a36Sopenharmony_ci		}
72962306a36Sopenharmony_ci		return;
73062306a36Sopenharmony_ci	}
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
73362306a36Sopenharmony_ci	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
73462306a36Sopenharmony_ci		struct iopt_pages *pages = area->pages;
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci		if (!pages)
73762306a36Sopenharmony_ci			continue;
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci		mutex_lock(&pages->mutex);
74062306a36Sopenharmony_ci		interval_tree_remove(&area->pages_node, &pages->domains_itree);
74162306a36Sopenharmony_ci		WARN_ON(area->storage_domain != domain);
74262306a36Sopenharmony_ci		area->storage_domain = NULL;
74362306a36Sopenharmony_ci		iopt_area_unfill_domain(area, pages, domain);
74462306a36Sopenharmony_ci		mutex_unlock(&pages->mutex);
74562306a36Sopenharmony_ci	}
74662306a36Sopenharmony_ci}
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci/**
74962306a36Sopenharmony_ci * iopt_fill_domain() - Fill a domain with PFNs
75062306a36Sopenharmony_ci * @iopt: io_pagetable to act on
75162306a36Sopenharmony_ci * @domain: domain to fill
75262306a36Sopenharmony_ci *
75362306a36Sopenharmony_ci * Fill the domain with PFNs from every area in the iopt. On failure the domain
75462306a36Sopenharmony_ci * is left unchanged.
75562306a36Sopenharmony_ci */
75662306a36Sopenharmony_cistatic int iopt_fill_domain(struct io_pagetable *iopt,
75762306a36Sopenharmony_ci			    struct iommu_domain *domain)
75862306a36Sopenharmony_ci{
75962306a36Sopenharmony_ci	struct iopt_area *end_area;
76062306a36Sopenharmony_ci	struct iopt_area *area;
76162306a36Sopenharmony_ci	int rc;
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	lockdep_assert_held(&iopt->iova_rwsem);
76462306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->domains_rwsem);
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
76762306a36Sopenharmony_ci	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
76862306a36Sopenharmony_ci		struct iopt_pages *pages = area->pages;
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci		if (!pages)
77162306a36Sopenharmony_ci			continue;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci		mutex_lock(&pages->mutex);
77462306a36Sopenharmony_ci		rc = iopt_area_fill_domain(area, domain);
77562306a36Sopenharmony_ci		if (rc) {
77662306a36Sopenharmony_ci			mutex_unlock(&pages->mutex);
77762306a36Sopenharmony_ci			goto out_unfill;
77862306a36Sopenharmony_ci		}
77962306a36Sopenharmony_ci		if (!area->storage_domain) {
78062306a36Sopenharmony_ci			WARN_ON(iopt->next_domain_id != 0);
78162306a36Sopenharmony_ci			area->storage_domain = domain;
78262306a36Sopenharmony_ci			interval_tree_insert(&area->pages_node,
78362306a36Sopenharmony_ci					     &pages->domains_itree);
78462306a36Sopenharmony_ci		}
78562306a36Sopenharmony_ci		mutex_unlock(&pages->mutex);
78662306a36Sopenharmony_ci	}
78762306a36Sopenharmony_ci	return 0;
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ciout_unfill:
79062306a36Sopenharmony_ci	end_area = area;
79162306a36Sopenharmony_ci	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
79262306a36Sopenharmony_ci	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
79362306a36Sopenharmony_ci		struct iopt_pages *pages = area->pages;
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci		if (area == end_area)
79662306a36Sopenharmony_ci			break;
79762306a36Sopenharmony_ci		if (!pages)
79862306a36Sopenharmony_ci			continue;
79962306a36Sopenharmony_ci		mutex_lock(&pages->mutex);
80062306a36Sopenharmony_ci		if (iopt->next_domain_id == 0) {
80162306a36Sopenharmony_ci			interval_tree_remove(&area->pages_node,
80262306a36Sopenharmony_ci					     &pages->domains_itree);
80362306a36Sopenharmony_ci			area->storage_domain = NULL;
80462306a36Sopenharmony_ci		}
80562306a36Sopenharmony_ci		iopt_area_unfill_domain(area, pages, domain);
80662306a36Sopenharmony_ci		mutex_unlock(&pages->mutex);
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci	return rc;
80962306a36Sopenharmony_ci}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci/* All existing area's conform to an increased page size */
81262306a36Sopenharmony_cistatic int iopt_check_iova_alignment(struct io_pagetable *iopt,
81362306a36Sopenharmony_ci				     unsigned long new_iova_alignment)
81462306a36Sopenharmony_ci{
81562306a36Sopenharmony_ci	unsigned long align_mask = new_iova_alignment - 1;
81662306a36Sopenharmony_ci	struct iopt_area *area;
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	lockdep_assert_held(&iopt->iova_rwsem);
81962306a36Sopenharmony_ci	lockdep_assert_held(&iopt->domains_rwsem);
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
82262306a36Sopenharmony_ci	     area = iopt_area_iter_next(area, 0, ULONG_MAX))
82362306a36Sopenharmony_ci		if ((iopt_area_iova(area) & align_mask) ||
82462306a36Sopenharmony_ci		    (iopt_area_length(area) & align_mask) ||
82562306a36Sopenharmony_ci		    (area->page_offset & align_mask))
82662306a36Sopenharmony_ci			return -EADDRINUSE;
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) {
82962306a36Sopenharmony_ci		struct iommufd_access *access;
83062306a36Sopenharmony_ci		unsigned long index;
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci		xa_for_each(&iopt->access_list, index, access)
83362306a36Sopenharmony_ci			if (WARN_ON(access->iova_alignment >
83462306a36Sopenharmony_ci				    new_iova_alignment))
83562306a36Sopenharmony_ci				return -EADDRINUSE;
83662306a36Sopenharmony_ci	}
83762306a36Sopenharmony_ci	return 0;
83862306a36Sopenharmony_ci}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ciint iopt_table_add_domain(struct io_pagetable *iopt,
84162306a36Sopenharmony_ci			  struct iommu_domain *domain)
84262306a36Sopenharmony_ci{
84362306a36Sopenharmony_ci	const struct iommu_domain_geometry *geometry = &domain->geometry;
84462306a36Sopenharmony_ci	struct iommu_domain *iter_domain;
84562306a36Sopenharmony_ci	unsigned int new_iova_alignment;
84662306a36Sopenharmony_ci	unsigned long index;
84762306a36Sopenharmony_ci	int rc;
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	down_write(&iopt->domains_rwsem);
85062306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	xa_for_each(&iopt->domains, index, iter_domain) {
85362306a36Sopenharmony_ci		if (WARN_ON(iter_domain == domain)) {
85462306a36Sopenharmony_ci			rc = -EEXIST;
85562306a36Sopenharmony_ci			goto out_unlock;
85662306a36Sopenharmony_ci		}
85762306a36Sopenharmony_ci	}
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci	/*
86062306a36Sopenharmony_ci	 * The io page size drives the iova_alignment. Internally the iopt_pages
86162306a36Sopenharmony_ci	 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE
86262306a36Sopenharmony_ci	 * objects into the iommu_domain.
86362306a36Sopenharmony_ci	 *
86462306a36Sopenharmony_ci	 * A iommu_domain must always be able to accept PAGE_SIZE to be
86562306a36Sopenharmony_ci	 * compatible as we can't guarantee higher contiguity.
86662306a36Sopenharmony_ci	 */
86762306a36Sopenharmony_ci	new_iova_alignment = max_t(unsigned long,
86862306a36Sopenharmony_ci				   1UL << __ffs(domain->pgsize_bitmap),
86962306a36Sopenharmony_ci				   iopt->iova_alignment);
87062306a36Sopenharmony_ci	if (new_iova_alignment > PAGE_SIZE) {
87162306a36Sopenharmony_ci		rc = -EINVAL;
87262306a36Sopenharmony_ci		goto out_unlock;
87362306a36Sopenharmony_ci	}
87462306a36Sopenharmony_ci	if (new_iova_alignment != iopt->iova_alignment) {
87562306a36Sopenharmony_ci		rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
87662306a36Sopenharmony_ci		if (rc)
87762306a36Sopenharmony_ci			goto out_unlock;
87862306a36Sopenharmony_ci	}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	/* No area exists that is outside the allowed domain aperture */
88162306a36Sopenharmony_ci	if (geometry->aperture_start != 0) {
88262306a36Sopenharmony_ci		rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1,
88362306a36Sopenharmony_ci				       domain);
88462306a36Sopenharmony_ci		if (rc)
88562306a36Sopenharmony_ci			goto out_reserved;
88662306a36Sopenharmony_ci	}
88762306a36Sopenharmony_ci	if (geometry->aperture_end != ULONG_MAX) {
88862306a36Sopenharmony_ci		rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1,
88962306a36Sopenharmony_ci				       ULONG_MAX, domain);
89062306a36Sopenharmony_ci		if (rc)
89162306a36Sopenharmony_ci			goto out_reserved;
89262306a36Sopenharmony_ci	}
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL);
89562306a36Sopenharmony_ci	if (rc)
89662306a36Sopenharmony_ci		goto out_reserved;
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci	rc = iopt_fill_domain(iopt, domain);
89962306a36Sopenharmony_ci	if (rc)
90062306a36Sopenharmony_ci		goto out_release;
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	iopt->iova_alignment = new_iova_alignment;
90362306a36Sopenharmony_ci	xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL);
90462306a36Sopenharmony_ci	iopt->next_domain_id++;
90562306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
90662306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
90762306a36Sopenharmony_ci	return 0;
90862306a36Sopenharmony_ciout_release:
90962306a36Sopenharmony_ci	xa_release(&iopt->domains, iopt->next_domain_id);
91062306a36Sopenharmony_ciout_reserved:
91162306a36Sopenharmony_ci	__iopt_remove_reserved_iova(iopt, domain);
91262306a36Sopenharmony_ciout_unlock:
91362306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
91462306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
91562306a36Sopenharmony_ci	return rc;
91662306a36Sopenharmony_ci}
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_cistatic int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
91962306a36Sopenharmony_ci{
92062306a36Sopenharmony_ci	unsigned long new_iova_alignment;
92162306a36Sopenharmony_ci	struct iommufd_access *access;
92262306a36Sopenharmony_ci	struct iommu_domain *domain;
92362306a36Sopenharmony_ci	unsigned long index;
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->iova_rwsem);
92662306a36Sopenharmony_ci	lockdep_assert_held(&iopt->domains_rwsem);
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	/* See batch_iommu_map_small() */
92962306a36Sopenharmony_ci	if (iopt->disable_large_pages)
93062306a36Sopenharmony_ci		new_iova_alignment = PAGE_SIZE;
93162306a36Sopenharmony_ci	else
93262306a36Sopenharmony_ci		new_iova_alignment = 1;
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	xa_for_each(&iopt->domains, index, domain)
93562306a36Sopenharmony_ci		new_iova_alignment = max_t(unsigned long,
93662306a36Sopenharmony_ci					   1UL << __ffs(domain->pgsize_bitmap),
93762306a36Sopenharmony_ci					   new_iova_alignment);
93862306a36Sopenharmony_ci	xa_for_each(&iopt->access_list, index, access)
93962306a36Sopenharmony_ci		new_iova_alignment = max_t(unsigned long,
94062306a36Sopenharmony_ci					   access->iova_alignment,
94162306a36Sopenharmony_ci					   new_iova_alignment);
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	if (new_iova_alignment > iopt->iova_alignment) {
94462306a36Sopenharmony_ci		int rc;
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci		rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
94762306a36Sopenharmony_ci		if (rc)
94862306a36Sopenharmony_ci			return rc;
94962306a36Sopenharmony_ci	}
95062306a36Sopenharmony_ci	iopt->iova_alignment = new_iova_alignment;
95162306a36Sopenharmony_ci	return 0;
95262306a36Sopenharmony_ci}
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_civoid iopt_table_remove_domain(struct io_pagetable *iopt,
95562306a36Sopenharmony_ci			      struct iommu_domain *domain)
95662306a36Sopenharmony_ci{
95762306a36Sopenharmony_ci	struct iommu_domain *iter_domain = NULL;
95862306a36Sopenharmony_ci	unsigned long index;
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	down_write(&iopt->domains_rwsem);
96162306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	xa_for_each(&iopt->domains, index, iter_domain)
96462306a36Sopenharmony_ci		if (iter_domain == domain)
96562306a36Sopenharmony_ci			break;
96662306a36Sopenharmony_ci	if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id)
96762306a36Sopenharmony_ci		goto out_unlock;
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	/*
97062306a36Sopenharmony_ci	 * Compress the xarray to keep it linear by swapping the entry to erase
97162306a36Sopenharmony_ci	 * with the tail entry and shrinking the tail.
97262306a36Sopenharmony_ci	 */
97362306a36Sopenharmony_ci	iopt->next_domain_id--;
97462306a36Sopenharmony_ci	iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id);
97562306a36Sopenharmony_ci	if (index != iopt->next_domain_id)
97662306a36Sopenharmony_ci		xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL);
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	iopt_unfill_domain(iopt, domain);
97962306a36Sopenharmony_ci	__iopt_remove_reserved_iova(iopt, domain);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	WARN_ON(iopt_calculate_iova_alignment(iopt));
98262306a36Sopenharmony_ciout_unlock:
98362306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
98462306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci/**
98862306a36Sopenharmony_ci * iopt_area_split - Split an area into two parts at iova
98962306a36Sopenharmony_ci * @area: The area to split
99062306a36Sopenharmony_ci * @iova: Becomes the last of a new area
99162306a36Sopenharmony_ci *
99262306a36Sopenharmony_ci * This splits an area into two. It is part of the VFIO compatibility to allow
99362306a36Sopenharmony_ci * poking a hole in the mapping. The two areas continue to point at the same
99462306a36Sopenharmony_ci * iopt_pages, just with different starting bytes.
99562306a36Sopenharmony_ci */
99662306a36Sopenharmony_cistatic int iopt_area_split(struct iopt_area *area, unsigned long iova)
99762306a36Sopenharmony_ci{
99862306a36Sopenharmony_ci	unsigned long alignment = area->iopt->iova_alignment;
99962306a36Sopenharmony_ci	unsigned long last_iova = iopt_area_last_iova(area);
100062306a36Sopenharmony_ci	unsigned long start_iova = iopt_area_iova(area);
100162306a36Sopenharmony_ci	unsigned long new_start = iova + 1;
100262306a36Sopenharmony_ci	struct io_pagetable *iopt = area->iopt;
100362306a36Sopenharmony_ci	struct iopt_pages *pages = area->pages;
100462306a36Sopenharmony_ci	struct iopt_area *lhs;
100562306a36Sopenharmony_ci	struct iopt_area *rhs;
100662306a36Sopenharmony_ci	int rc;
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci	lockdep_assert_held_write(&iopt->iova_rwsem);
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	if (iova == start_iova || iova == last_iova)
101162306a36Sopenharmony_ci		return 0;
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	if (!pages || area->prevent_access)
101462306a36Sopenharmony_ci		return -EBUSY;
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_ci	if (new_start & (alignment - 1) ||
101762306a36Sopenharmony_ci	    iopt_area_start_byte(area, new_start) & (alignment - 1))
101862306a36Sopenharmony_ci		return -EINVAL;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	lhs = iopt_area_alloc();
102162306a36Sopenharmony_ci	if (!lhs)
102262306a36Sopenharmony_ci		return -ENOMEM;
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	rhs = iopt_area_alloc();
102562306a36Sopenharmony_ci	if (!rhs) {
102662306a36Sopenharmony_ci		rc = -ENOMEM;
102762306a36Sopenharmony_ci		goto err_free_lhs;
102862306a36Sopenharmony_ci	}
102962306a36Sopenharmony_ci
103062306a36Sopenharmony_ci	mutex_lock(&pages->mutex);
103162306a36Sopenharmony_ci	/*
103262306a36Sopenharmony_ci	 * Splitting is not permitted if an access exists, we don't track enough
103362306a36Sopenharmony_ci	 * information to split existing accesses.
103462306a36Sopenharmony_ci	 */
103562306a36Sopenharmony_ci	if (area->num_accesses) {
103662306a36Sopenharmony_ci		rc = -EINVAL;
103762306a36Sopenharmony_ci		goto err_unlock;
103862306a36Sopenharmony_ci	}
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci	/*
104162306a36Sopenharmony_ci	 * Splitting is not permitted if a domain could have been mapped with
104262306a36Sopenharmony_ci	 * huge pages.
104362306a36Sopenharmony_ci	 */
104462306a36Sopenharmony_ci	if (area->storage_domain && !iopt->disable_large_pages) {
104562306a36Sopenharmony_ci		rc = -EINVAL;
104662306a36Sopenharmony_ci		goto err_unlock;
104762306a36Sopenharmony_ci	}
104862306a36Sopenharmony_ci
104962306a36Sopenharmony_ci	interval_tree_remove(&area->node, &iopt->area_itree);
105062306a36Sopenharmony_ci	rc = iopt_insert_area(iopt, lhs, area->pages, start_iova,
105162306a36Sopenharmony_ci			      iopt_area_start_byte(area, start_iova),
105262306a36Sopenharmony_ci			      (new_start - 1) - start_iova + 1,
105362306a36Sopenharmony_ci			      area->iommu_prot);
105462306a36Sopenharmony_ci	if (WARN_ON(rc))
105562306a36Sopenharmony_ci		goto err_insert;
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci	rc = iopt_insert_area(iopt, rhs, area->pages, new_start,
105862306a36Sopenharmony_ci			      iopt_area_start_byte(area, new_start),
105962306a36Sopenharmony_ci			      last_iova - new_start + 1, area->iommu_prot);
106062306a36Sopenharmony_ci	if (WARN_ON(rc))
106162306a36Sopenharmony_ci		goto err_remove_lhs;
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	/*
106462306a36Sopenharmony_ci	 * If the original area has filled a domain, domains_itree has to be
106562306a36Sopenharmony_ci	 * updated.
106662306a36Sopenharmony_ci	 */
106762306a36Sopenharmony_ci	if (area->storage_domain) {
106862306a36Sopenharmony_ci		interval_tree_remove(&area->pages_node, &pages->domains_itree);
106962306a36Sopenharmony_ci		interval_tree_insert(&lhs->pages_node, &pages->domains_itree);
107062306a36Sopenharmony_ci		interval_tree_insert(&rhs->pages_node, &pages->domains_itree);
107162306a36Sopenharmony_ci	}
107262306a36Sopenharmony_ci
107362306a36Sopenharmony_ci	lhs->storage_domain = area->storage_domain;
107462306a36Sopenharmony_ci	lhs->pages = area->pages;
107562306a36Sopenharmony_ci	rhs->storage_domain = area->storage_domain;
107662306a36Sopenharmony_ci	rhs->pages = area->pages;
107762306a36Sopenharmony_ci	kref_get(&rhs->pages->kref);
107862306a36Sopenharmony_ci	kfree(area);
107962306a36Sopenharmony_ci	mutex_unlock(&pages->mutex);
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	/*
108262306a36Sopenharmony_ci	 * No change to domains or accesses because the pages hasn't been
108362306a36Sopenharmony_ci	 * changed
108462306a36Sopenharmony_ci	 */
108562306a36Sopenharmony_ci	return 0;
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_cierr_remove_lhs:
108862306a36Sopenharmony_ci	interval_tree_remove(&lhs->node, &iopt->area_itree);
108962306a36Sopenharmony_cierr_insert:
109062306a36Sopenharmony_ci	interval_tree_insert(&area->node, &iopt->area_itree);
109162306a36Sopenharmony_cierr_unlock:
109262306a36Sopenharmony_ci	mutex_unlock(&pages->mutex);
109362306a36Sopenharmony_ci	kfree(rhs);
109462306a36Sopenharmony_cierr_free_lhs:
109562306a36Sopenharmony_ci	kfree(lhs);
109662306a36Sopenharmony_ci	return rc;
109762306a36Sopenharmony_ci}
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ciint iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
110062306a36Sopenharmony_ci		  size_t num_iovas)
110162306a36Sopenharmony_ci{
110262306a36Sopenharmony_ci	int rc = 0;
110362306a36Sopenharmony_ci	int i;
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
110662306a36Sopenharmony_ci	for (i = 0; i < num_iovas; i++) {
110762306a36Sopenharmony_ci		struct iopt_area *area;
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci		area = iopt_area_iter_first(iopt, iovas[i], iovas[i]);
111062306a36Sopenharmony_ci		if (!area)
111162306a36Sopenharmony_ci			continue;
111262306a36Sopenharmony_ci		rc = iopt_area_split(area, iovas[i]);
111362306a36Sopenharmony_ci		if (rc)
111462306a36Sopenharmony_ci			break;
111562306a36Sopenharmony_ci	}
111662306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
111762306a36Sopenharmony_ci	return rc;
111862306a36Sopenharmony_ci}
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_civoid iopt_enable_large_pages(struct io_pagetable *iopt)
112162306a36Sopenharmony_ci{
112262306a36Sopenharmony_ci	int rc;
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_ci	down_write(&iopt->domains_rwsem);
112562306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
112662306a36Sopenharmony_ci	WRITE_ONCE(iopt->disable_large_pages, false);
112762306a36Sopenharmony_ci	rc = iopt_calculate_iova_alignment(iopt);
112862306a36Sopenharmony_ci	WARN_ON(rc);
112962306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
113062306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
113162306a36Sopenharmony_ci}
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ciint iopt_disable_large_pages(struct io_pagetable *iopt)
113462306a36Sopenharmony_ci{
113562306a36Sopenharmony_ci	int rc = 0;
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci	down_write(&iopt->domains_rwsem);
113862306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
113962306a36Sopenharmony_ci	if (iopt->disable_large_pages)
114062306a36Sopenharmony_ci		goto out_unlock;
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	/* Won't do it if domains already have pages mapped in them */
114362306a36Sopenharmony_ci	if (!xa_empty(&iopt->domains) &&
114462306a36Sopenharmony_ci	    !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
114562306a36Sopenharmony_ci		rc = -EINVAL;
114662306a36Sopenharmony_ci		goto out_unlock;
114762306a36Sopenharmony_ci	}
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci	WRITE_ONCE(iopt->disable_large_pages, true);
115062306a36Sopenharmony_ci	rc = iopt_calculate_iova_alignment(iopt);
115162306a36Sopenharmony_ci	if (rc)
115262306a36Sopenharmony_ci		WRITE_ONCE(iopt->disable_large_pages, false);
115362306a36Sopenharmony_ciout_unlock:
115462306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
115562306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
115662306a36Sopenharmony_ci	return rc;
115762306a36Sopenharmony_ci}
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ciint iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
116062306a36Sopenharmony_ci{
116162306a36Sopenharmony_ci	u32 new_id;
116262306a36Sopenharmony_ci	int rc;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	down_write(&iopt->domains_rwsem);
116562306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
116662306a36Sopenharmony_ci	rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b,
116762306a36Sopenharmony_ci		      GFP_KERNEL_ACCOUNT);
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	if (rc)
117062306a36Sopenharmony_ci		goto out_unlock;
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	rc = iopt_calculate_iova_alignment(iopt);
117362306a36Sopenharmony_ci	if (rc) {
117462306a36Sopenharmony_ci		xa_erase(&iopt->access_list, new_id);
117562306a36Sopenharmony_ci		goto out_unlock;
117662306a36Sopenharmony_ci	}
117762306a36Sopenharmony_ci	access->iopt_access_list_id = new_id;
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ciout_unlock:
118062306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
118162306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
118262306a36Sopenharmony_ci	return rc;
118362306a36Sopenharmony_ci}
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_civoid iopt_remove_access(struct io_pagetable *iopt,
118662306a36Sopenharmony_ci			struct iommufd_access *access,
118762306a36Sopenharmony_ci			u32 iopt_access_list_id)
118862306a36Sopenharmony_ci{
118962306a36Sopenharmony_ci	down_write(&iopt->domains_rwsem);
119062306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
119162306a36Sopenharmony_ci	WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access);
119262306a36Sopenharmony_ci	WARN_ON(iopt_calculate_iova_alignment(iopt));
119362306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
119462306a36Sopenharmony_ci	up_write(&iopt->domains_rwsem);
119562306a36Sopenharmony_ci}
119662306a36Sopenharmony_ci
119762306a36Sopenharmony_ci/* Narrow the valid_iova_itree to include reserved ranges from a device. */
119862306a36Sopenharmony_ciint iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
119962306a36Sopenharmony_ci					struct device *dev,
120062306a36Sopenharmony_ci					phys_addr_t *sw_msi_start)
120162306a36Sopenharmony_ci{
120262306a36Sopenharmony_ci	struct iommu_resv_region *resv;
120362306a36Sopenharmony_ci	LIST_HEAD(resv_regions);
120462306a36Sopenharmony_ci	unsigned int num_hw_msi = 0;
120562306a36Sopenharmony_ci	unsigned int num_sw_msi = 0;
120662306a36Sopenharmony_ci	int rc;
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci	if (iommufd_should_fail())
120962306a36Sopenharmony_ci		return -EINVAL;
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_ci	down_write(&iopt->iova_rwsem);
121262306a36Sopenharmony_ci	/* FIXME: drivers allocate memory but there is no failure propogated */
121362306a36Sopenharmony_ci	iommu_get_resv_regions(dev, &resv_regions);
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	list_for_each_entry(resv, &resv_regions, list) {
121662306a36Sopenharmony_ci		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
121762306a36Sopenharmony_ci			continue;
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci		if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
122062306a36Sopenharmony_ci			num_hw_msi++;
122162306a36Sopenharmony_ci		if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
122262306a36Sopenharmony_ci			*sw_msi_start = resv->start;
122362306a36Sopenharmony_ci			num_sw_msi++;
122462306a36Sopenharmony_ci		}
122562306a36Sopenharmony_ci
122662306a36Sopenharmony_ci		rc = iopt_reserve_iova(iopt, resv->start,
122762306a36Sopenharmony_ci				       resv->length - 1 + resv->start, dev);
122862306a36Sopenharmony_ci		if (rc)
122962306a36Sopenharmony_ci			goto out_reserved;
123062306a36Sopenharmony_ci	}
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	/* Drivers must offer sane combinations of regions */
123362306a36Sopenharmony_ci	if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) {
123462306a36Sopenharmony_ci		rc = -EINVAL;
123562306a36Sopenharmony_ci		goto out_reserved;
123662306a36Sopenharmony_ci	}
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci	rc = 0;
123962306a36Sopenharmony_ci	goto out_free_resv;
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_ciout_reserved:
124262306a36Sopenharmony_ci	__iopt_remove_reserved_iova(iopt, dev);
124362306a36Sopenharmony_ciout_free_resv:
124462306a36Sopenharmony_ci	iommu_put_resv_regions(dev, &resv_regions);
124562306a36Sopenharmony_ci	up_write(&iopt->iova_rwsem);
124662306a36Sopenharmony_ci	return rc;
124762306a36Sopenharmony_ci}
1248