162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2005 Topspin Communications.  All rights reserved.
362306a36Sopenharmony_ci * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
462306a36Sopenharmony_ci * Copyright (c) 2013 Cisco Systems.  All rights reserved.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This software is available to you under a choice of one of two
762306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
862306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
962306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the
1062306a36Sopenharmony_ci * BSD license below:
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
1362306a36Sopenharmony_ci *     without modification, are permitted provided that the following
1462306a36Sopenharmony_ci *     conditions are met:
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci *      - Redistributions of source code must retain the above
1762306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
1862306a36Sopenharmony_ci *        disclaimer.
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
2162306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
2262306a36Sopenharmony_ci *        disclaimer in the documentation and/or other materials
2362306a36Sopenharmony_ci *        provided with the distribution.
2462306a36Sopenharmony_ci *
2562306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2662306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2762306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2862306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2962306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3062306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
3162306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3262306a36Sopenharmony_ci * SOFTWARE.
3362306a36Sopenharmony_ci */
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#include <linux/mm.h>
3662306a36Sopenharmony_ci#include <linux/dma-mapping.h>
3762306a36Sopenharmony_ci#include <linux/sched/signal.h>
3862306a36Sopenharmony_ci#include <linux/sched/mm.h>
3962306a36Sopenharmony_ci#include <linux/hugetlb.h>
4062306a36Sopenharmony_ci#include <linux/iommu.h>
4162306a36Sopenharmony_ci#include <linux/workqueue.h>
4262306a36Sopenharmony_ci#include <linux/list.h>
4362306a36Sopenharmony_ci#include <rdma/ib_verbs.h>
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci#include "usnic_log.h"
4662306a36Sopenharmony_ci#include "usnic_uiom.h"
4762306a36Sopenharmony_ci#include "usnic_uiom_interval_tree.h"
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#define USNIC_UIOM_PAGE_CHUNK						\
5062306a36Sopenharmony_ci	((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list))	/\
5162306a36Sopenharmony_ci	((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] -	\
5262306a36Sopenharmony_ci	(void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cistatic int usnic_uiom_dma_fault(struct iommu_domain *domain,
5562306a36Sopenharmony_ci				struct device *dev,
5662306a36Sopenharmony_ci				unsigned long iova, int flags,
5762306a36Sopenharmony_ci				void *token)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
6062306a36Sopenharmony_ci		dev_name(dev),
6162306a36Sopenharmony_ci		domain, iova, flags);
6262306a36Sopenharmony_ci	return -ENOSYS;
6362306a36Sopenharmony_ci}
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_cistatic void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
6662306a36Sopenharmony_ci{
6762306a36Sopenharmony_ci	struct usnic_uiom_chunk *chunk, *tmp;
6862306a36Sopenharmony_ci	struct page *page;
6962306a36Sopenharmony_ci	struct scatterlist *sg;
7062306a36Sopenharmony_ci	int i;
7162306a36Sopenharmony_ci	dma_addr_t pa;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
7462306a36Sopenharmony_ci		for_each_sg(chunk->page_list, sg, chunk->nents, i) {
7562306a36Sopenharmony_ci			page = sg_page(sg);
7662306a36Sopenharmony_ci			pa = sg_phys(sg);
7762306a36Sopenharmony_ci			unpin_user_pages_dirty_lock(&page, 1, dirty);
7862306a36Sopenharmony_ci			usnic_dbg("pa: %pa\n", &pa);
7962306a36Sopenharmony_ci		}
8062306a36Sopenharmony_ci		kfree(chunk);
8162306a36Sopenharmony_ci	}
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
8562306a36Sopenharmony_ci				int dmasync, struct usnic_uiom_reg *uiomr)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	struct list_head *chunk_list = &uiomr->chunk_list;
8862306a36Sopenharmony_ci	unsigned int gup_flags = FOLL_LONGTERM;
8962306a36Sopenharmony_ci	struct page **page_list;
9062306a36Sopenharmony_ci	struct scatterlist *sg;
9162306a36Sopenharmony_ci	struct usnic_uiom_chunk *chunk;
9262306a36Sopenharmony_ci	unsigned long locked;
9362306a36Sopenharmony_ci	unsigned long lock_limit;
9462306a36Sopenharmony_ci	unsigned long cur_base;
9562306a36Sopenharmony_ci	unsigned long npages;
9662306a36Sopenharmony_ci	int ret;
9762306a36Sopenharmony_ci	int off;
9862306a36Sopenharmony_ci	int i;
9962306a36Sopenharmony_ci	dma_addr_t pa;
10062306a36Sopenharmony_ci	struct mm_struct *mm;
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	/*
10362306a36Sopenharmony_ci	 * If the combination of the addr and size requested for this memory
10462306a36Sopenharmony_ci	 * region causes an integer overflow, return error.
10562306a36Sopenharmony_ci	 */
10662306a36Sopenharmony_ci	if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size))
10762306a36Sopenharmony_ci		return -EINVAL;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	if (!size)
11062306a36Sopenharmony_ci		return -EINVAL;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if (!can_do_mlock())
11362306a36Sopenharmony_ci		return -EPERM;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	INIT_LIST_HEAD(chunk_list);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	page_list = (struct page **) __get_free_page(GFP_KERNEL);
11862306a36Sopenharmony_ci	if (!page_list)
11962306a36Sopenharmony_ci		return -ENOMEM;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	uiomr->owning_mm = mm = current->mm;
12462306a36Sopenharmony_ci	mmap_read_lock(mm);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	locked = atomic64_add_return(npages, &current->mm->pinned_vm);
12762306a36Sopenharmony_ci	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
13062306a36Sopenharmony_ci		ret = -ENOMEM;
13162306a36Sopenharmony_ci		goto out;
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	if (writable)
13562306a36Sopenharmony_ci		gup_flags |= FOLL_WRITE;
13662306a36Sopenharmony_ci	cur_base = addr & PAGE_MASK;
13762306a36Sopenharmony_ci	ret = 0;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	while (npages) {
14062306a36Sopenharmony_ci		ret = pin_user_pages(cur_base,
14162306a36Sopenharmony_ci				     min_t(unsigned long, npages,
14262306a36Sopenharmony_ci				     PAGE_SIZE / sizeof(struct page *)),
14362306a36Sopenharmony_ci				     gup_flags, page_list);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		if (ret < 0)
14662306a36Sopenharmony_ci			goto out;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci		npages -= ret;
14962306a36Sopenharmony_ci		off = 0;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci		while (ret) {
15262306a36Sopenharmony_ci			chunk = kmalloc(struct_size(chunk, page_list,
15362306a36Sopenharmony_ci					min_t(int, ret, USNIC_UIOM_PAGE_CHUNK)),
15462306a36Sopenharmony_ci					GFP_KERNEL);
15562306a36Sopenharmony_ci			if (!chunk) {
15662306a36Sopenharmony_ci				ret = -ENOMEM;
15762306a36Sopenharmony_ci				goto out;
15862306a36Sopenharmony_ci			}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci			chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
16162306a36Sopenharmony_ci			sg_init_table(chunk->page_list, chunk->nents);
16262306a36Sopenharmony_ci			for_each_sg(chunk->page_list, sg, chunk->nents, i) {
16362306a36Sopenharmony_ci				sg_set_page(sg, page_list[i + off],
16462306a36Sopenharmony_ci						PAGE_SIZE, 0);
16562306a36Sopenharmony_ci				pa = sg_phys(sg);
16662306a36Sopenharmony_ci				usnic_dbg("va: 0x%lx pa: %pa\n",
16762306a36Sopenharmony_ci						cur_base + i*PAGE_SIZE, &pa);
16862306a36Sopenharmony_ci			}
16962306a36Sopenharmony_ci			cur_base += chunk->nents * PAGE_SIZE;
17062306a36Sopenharmony_ci			ret -= chunk->nents;
17162306a36Sopenharmony_ci			off += chunk->nents;
17262306a36Sopenharmony_ci			list_add_tail(&chunk->list, chunk_list);
17362306a36Sopenharmony_ci		}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci		ret = 0;
17662306a36Sopenharmony_ci	}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ciout:
17962306a36Sopenharmony_ci	if (ret < 0) {
18062306a36Sopenharmony_ci		usnic_uiom_put_pages(chunk_list, 0);
18162306a36Sopenharmony_ci		atomic64_sub(npages, &current->mm->pinned_vm);
18262306a36Sopenharmony_ci	} else
18362306a36Sopenharmony_ci		mmgrab(uiomr->owning_mm);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	mmap_read_unlock(mm);
18662306a36Sopenharmony_ci	free_page((unsigned long) page_list);
18762306a36Sopenharmony_ci	return ret;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistatic void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
19162306a36Sopenharmony_ci						struct usnic_uiom_pd *pd)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	struct usnic_uiom_interval_node *interval, *tmp;
19462306a36Sopenharmony_ci	long unsigned va, size;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	list_for_each_entry_safe(interval, tmp, intervals, link) {
19762306a36Sopenharmony_ci		va = interval->start << PAGE_SHIFT;
19862306a36Sopenharmony_ci		size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
19962306a36Sopenharmony_ci		while (size > 0) {
20062306a36Sopenharmony_ci			/* Workaround for RH 970401 */
20162306a36Sopenharmony_ci			usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
20262306a36Sopenharmony_ci			iommu_unmap(pd->domain, va, PAGE_SIZE);
20362306a36Sopenharmony_ci			va += PAGE_SIZE;
20462306a36Sopenharmony_ci			size -= PAGE_SIZE;
20562306a36Sopenharmony_ci		}
20662306a36Sopenharmony_ci	}
20762306a36Sopenharmony_ci}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_cistatic void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
21062306a36Sopenharmony_ci					struct usnic_uiom_reg *uiomr,
21162306a36Sopenharmony_ci					int dirty)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	int npages;
21462306a36Sopenharmony_ci	unsigned long vpn_start, vpn_last;
21562306a36Sopenharmony_ci	struct usnic_uiom_interval_node *interval, *tmp;
21662306a36Sopenharmony_ci	int writable = 0;
21762306a36Sopenharmony_ci	LIST_HEAD(rm_intervals);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
22062306a36Sopenharmony_ci	vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
22162306a36Sopenharmony_ci	vpn_last = vpn_start + npages - 1;
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	spin_lock(&pd->lock);
22462306a36Sopenharmony_ci	usnic_uiom_remove_interval(&pd->root, vpn_start,
22562306a36Sopenharmony_ci					vpn_last, &rm_intervals);
22662306a36Sopenharmony_ci	usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
22962306a36Sopenharmony_ci		if (interval->flags & IOMMU_WRITE)
23062306a36Sopenharmony_ci			writable = 1;
23162306a36Sopenharmony_ci		list_del(&interval->link);
23262306a36Sopenharmony_ci		kfree(interval);
23362306a36Sopenharmony_ci	}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
23662306a36Sopenharmony_ci	spin_unlock(&pd->lock);
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_cistatic int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
24062306a36Sopenharmony_ci						struct usnic_uiom_reg *uiomr)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	int i, err;
24362306a36Sopenharmony_ci	size_t size;
24462306a36Sopenharmony_ci	struct usnic_uiom_chunk *chunk;
24562306a36Sopenharmony_ci	struct usnic_uiom_interval_node *interval_node;
24662306a36Sopenharmony_ci	dma_addr_t pa;
24762306a36Sopenharmony_ci	dma_addr_t pa_start = 0;
24862306a36Sopenharmony_ci	dma_addr_t pa_end = 0;
24962306a36Sopenharmony_ci	long int va_start = -EINVAL;
25062306a36Sopenharmony_ci	struct usnic_uiom_pd *pd = uiomr->pd;
25162306a36Sopenharmony_ci	long int va = uiomr->va & PAGE_MASK;
25262306a36Sopenharmony_ci	int flags = IOMMU_READ | IOMMU_CACHE;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
25562306a36Sopenharmony_ci	chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
25662306a36Sopenharmony_ci									list);
25762306a36Sopenharmony_ci	list_for_each_entry(interval_node, intervals, link) {
25862306a36Sopenharmony_ciiter_chunk:
25962306a36Sopenharmony_ci		for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
26062306a36Sopenharmony_ci			pa = sg_phys(&chunk->page_list[i]);
26162306a36Sopenharmony_ci			if ((va >> PAGE_SHIFT) < interval_node->start)
26262306a36Sopenharmony_ci				continue;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci			if ((va >> PAGE_SHIFT) == interval_node->start) {
26562306a36Sopenharmony_ci				/* First page of the interval */
26662306a36Sopenharmony_ci				va_start = va;
26762306a36Sopenharmony_ci				pa_start = pa;
26862306a36Sopenharmony_ci				pa_end = pa;
26962306a36Sopenharmony_ci			}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci			WARN_ON(va_start == -EINVAL);
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci			if ((pa_end + PAGE_SIZE != pa) &&
27462306a36Sopenharmony_ci					(pa != pa_start)) {
27562306a36Sopenharmony_ci				/* PAs are not contiguous */
27662306a36Sopenharmony_ci				size = pa_end - pa_start + PAGE_SIZE;
27762306a36Sopenharmony_ci				usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
27862306a36Sopenharmony_ci					va_start, &pa_start, size, flags);
27962306a36Sopenharmony_ci				err = iommu_map(pd->domain, va_start, pa_start,
28062306a36Sopenharmony_ci						size, flags, GFP_ATOMIC);
28162306a36Sopenharmony_ci				if (err) {
28262306a36Sopenharmony_ci					usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
28362306a36Sopenharmony_ci						va_start, &pa_start, size, err);
28462306a36Sopenharmony_ci					goto err_out;
28562306a36Sopenharmony_ci				}
28662306a36Sopenharmony_ci				va_start = va;
28762306a36Sopenharmony_ci				pa_start = pa;
28862306a36Sopenharmony_ci				pa_end = pa;
28962306a36Sopenharmony_ci			}
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci			if ((va >> PAGE_SHIFT) == interval_node->last) {
29262306a36Sopenharmony_ci				/* Last page of the interval */
29362306a36Sopenharmony_ci				size = pa - pa_start + PAGE_SIZE;
29462306a36Sopenharmony_ci				usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
29562306a36Sopenharmony_ci					va_start, &pa_start, size, flags);
29662306a36Sopenharmony_ci				err = iommu_map(pd->domain, va_start, pa_start,
29762306a36Sopenharmony_ci						size, flags, GFP_ATOMIC);
29862306a36Sopenharmony_ci				if (err) {
29962306a36Sopenharmony_ci					usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
30062306a36Sopenharmony_ci						va_start, &pa_start, size, err);
30162306a36Sopenharmony_ci					goto err_out;
30262306a36Sopenharmony_ci				}
30362306a36Sopenharmony_ci				break;
30462306a36Sopenharmony_ci			}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci			if (pa != pa_start)
30762306a36Sopenharmony_ci				pa_end += PAGE_SIZE;
30862306a36Sopenharmony_ci		}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci		if (i == chunk->nents) {
31162306a36Sopenharmony_ci			/*
31262306a36Sopenharmony_ci			 * Hit last entry of the chunk,
31362306a36Sopenharmony_ci			 * hence advance to next chunk
31462306a36Sopenharmony_ci			 */
31562306a36Sopenharmony_ci			chunk = list_first_entry(&chunk->list,
31662306a36Sopenharmony_ci							struct usnic_uiom_chunk,
31762306a36Sopenharmony_ci							list);
31862306a36Sopenharmony_ci			goto iter_chunk;
31962306a36Sopenharmony_ci		}
32062306a36Sopenharmony_ci	}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	return 0;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_cierr_out:
32562306a36Sopenharmony_ci	usnic_uiom_unmap_sorted_intervals(intervals, pd);
32662306a36Sopenharmony_ci	return err;
32762306a36Sopenharmony_ci}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_cistruct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
33062306a36Sopenharmony_ci						unsigned long addr, size_t size,
33162306a36Sopenharmony_ci						int writable, int dmasync)
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	struct usnic_uiom_reg *uiomr;
33462306a36Sopenharmony_ci	unsigned long va_base, vpn_start, vpn_last;
33562306a36Sopenharmony_ci	unsigned long npages;
33662306a36Sopenharmony_ci	int offset, err;
33762306a36Sopenharmony_ci	LIST_HEAD(sorted_diff_intervals);
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	/*
34062306a36Sopenharmony_ci	 * Intel IOMMU map throws an error if a translation entry is
34162306a36Sopenharmony_ci	 * changed from read to write.  This module may not unmap
34262306a36Sopenharmony_ci	 * and then remap the entry after fixing the permission
34362306a36Sopenharmony_ci	 * b/c this open up a small windows where hw DMA may page fault
34462306a36Sopenharmony_ci	 * Hence, make all entries to be writable.
34562306a36Sopenharmony_ci	 */
34662306a36Sopenharmony_ci	writable = 1;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	va_base = addr & PAGE_MASK;
34962306a36Sopenharmony_ci	offset = addr & ~PAGE_MASK;
35062306a36Sopenharmony_ci	npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
35162306a36Sopenharmony_ci	vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
35262306a36Sopenharmony_ci	vpn_last = vpn_start + npages - 1;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
35562306a36Sopenharmony_ci	if (!uiomr)
35662306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	uiomr->va = va_base;
35962306a36Sopenharmony_ci	uiomr->offset = offset;
36062306a36Sopenharmony_ci	uiomr->length = size;
36162306a36Sopenharmony_ci	uiomr->writable = writable;
36262306a36Sopenharmony_ci	uiomr->pd = pd;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	err = usnic_uiom_get_pages(addr, size, writable, dmasync,
36562306a36Sopenharmony_ci				   uiomr);
36662306a36Sopenharmony_ci	if (err) {
36762306a36Sopenharmony_ci		usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
36862306a36Sopenharmony_ci				vpn_start, vpn_last, err);
36962306a36Sopenharmony_ci		goto out_free_uiomr;
37062306a36Sopenharmony_ci	}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	spin_lock(&pd->lock);
37362306a36Sopenharmony_ci	err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
37462306a36Sopenharmony_ci						(writable) ? IOMMU_WRITE : 0,
37562306a36Sopenharmony_ci						IOMMU_WRITE,
37662306a36Sopenharmony_ci						&pd->root,
37762306a36Sopenharmony_ci						&sorted_diff_intervals);
37862306a36Sopenharmony_ci	if (err) {
37962306a36Sopenharmony_ci		usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
38062306a36Sopenharmony_ci						vpn_start, vpn_last, err);
38162306a36Sopenharmony_ci		goto out_put_pages;
38262306a36Sopenharmony_ci	}
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
38562306a36Sopenharmony_ci	if (err) {
38662306a36Sopenharmony_ci		usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
38762306a36Sopenharmony_ci						vpn_start, vpn_last, err);
38862306a36Sopenharmony_ci		goto out_put_intervals;
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	err = usnic_uiom_insert_interval(&pd->root, vpn_start, vpn_last,
39362306a36Sopenharmony_ci					(writable) ? IOMMU_WRITE : 0);
39462306a36Sopenharmony_ci	if (err) {
39562306a36Sopenharmony_ci		usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
39662306a36Sopenharmony_ci						vpn_start, vpn_last, err);
39762306a36Sopenharmony_ci		goto out_unmap_intervals;
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	usnic_uiom_put_interval_set(&sorted_diff_intervals);
40162306a36Sopenharmony_ci	spin_unlock(&pd->lock);
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	return uiomr;
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ciout_unmap_intervals:
40662306a36Sopenharmony_ci	usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
40762306a36Sopenharmony_ciout_put_intervals:
40862306a36Sopenharmony_ci	usnic_uiom_put_interval_set(&sorted_diff_intervals);
40962306a36Sopenharmony_ciout_put_pages:
41062306a36Sopenharmony_ci	usnic_uiom_put_pages(&uiomr->chunk_list, 0);
41162306a36Sopenharmony_ci	spin_unlock(&pd->lock);
41262306a36Sopenharmony_ci	mmdrop(uiomr->owning_mm);
41362306a36Sopenharmony_ciout_free_uiomr:
41462306a36Sopenharmony_ci	kfree(uiomr);
41562306a36Sopenharmony_ci	return ERR_PTR(err);
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cistatic void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	mmdrop(uiomr->owning_mm);
42162306a36Sopenharmony_ci	kfree(uiomr);
42262306a36Sopenharmony_ci}
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_cistatic inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr)
42562306a36Sopenharmony_ci{
42662306a36Sopenharmony_ci	return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
42762306a36Sopenharmony_ci}
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_civoid usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
43462306a36Sopenharmony_ci	__usnic_uiom_release_tail(uiomr);
43562306a36Sopenharmony_ci}
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_cistruct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev)
43862306a36Sopenharmony_ci{
43962306a36Sopenharmony_ci	struct usnic_uiom_pd *pd;
44062306a36Sopenharmony_ci	void *domain;
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
44362306a36Sopenharmony_ci	if (!pd)
44462306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	pd->domain = domain = iommu_domain_alloc(dev->bus);
44762306a36Sopenharmony_ci	if (!domain) {
44862306a36Sopenharmony_ci		usnic_err("Failed to allocate IOMMU domain");
44962306a36Sopenharmony_ci		kfree(pd);
45062306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
45162306a36Sopenharmony_ci	}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	spin_lock_init(&pd->lock);
45662306a36Sopenharmony_ci	INIT_LIST_HEAD(&pd->devs);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	return pd;
45962306a36Sopenharmony_ci}
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_civoid usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	iommu_domain_free(pd->domain);
46462306a36Sopenharmony_ci	kfree(pd);
46562306a36Sopenharmony_ci}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ciint usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
46862306a36Sopenharmony_ci{
46962306a36Sopenharmony_ci	struct usnic_uiom_dev *uiom_dev;
47062306a36Sopenharmony_ci	int err;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
47362306a36Sopenharmony_ci	if (!uiom_dev)
47462306a36Sopenharmony_ci		return -ENOMEM;
47562306a36Sopenharmony_ci	uiom_dev->dev = dev;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	err = iommu_attach_device(pd->domain, dev);
47862306a36Sopenharmony_ci	if (err)
47962306a36Sopenharmony_ci		goto out_free_dev;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
48262306a36Sopenharmony_ci		usnic_err("IOMMU of %s does not support cache coherency\n",
48362306a36Sopenharmony_ci				dev_name(dev));
48462306a36Sopenharmony_ci		err = -EINVAL;
48562306a36Sopenharmony_ci		goto out_detach_device;
48662306a36Sopenharmony_ci	}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	spin_lock(&pd->lock);
48962306a36Sopenharmony_ci	list_add_tail(&uiom_dev->link, &pd->devs);
49062306a36Sopenharmony_ci	pd->dev_cnt++;
49162306a36Sopenharmony_ci	spin_unlock(&pd->lock);
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	return 0;
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ciout_detach_device:
49662306a36Sopenharmony_ci	iommu_detach_device(pd->domain, dev);
49762306a36Sopenharmony_ciout_free_dev:
49862306a36Sopenharmony_ci	kfree(uiom_dev);
49962306a36Sopenharmony_ci	return err;
50062306a36Sopenharmony_ci}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_civoid usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
50362306a36Sopenharmony_ci{
50462306a36Sopenharmony_ci	struct usnic_uiom_dev *uiom_dev;
50562306a36Sopenharmony_ci	int found = 0;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	spin_lock(&pd->lock);
50862306a36Sopenharmony_ci	list_for_each_entry(uiom_dev, &pd->devs, link) {
50962306a36Sopenharmony_ci		if (uiom_dev->dev == dev) {
51062306a36Sopenharmony_ci			found = 1;
51162306a36Sopenharmony_ci			break;
51262306a36Sopenharmony_ci		}
51362306a36Sopenharmony_ci	}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	if (!found) {
51662306a36Sopenharmony_ci		usnic_err("Unable to free dev %s - not found\n",
51762306a36Sopenharmony_ci				dev_name(dev));
51862306a36Sopenharmony_ci		spin_unlock(&pd->lock);
51962306a36Sopenharmony_ci		return;
52062306a36Sopenharmony_ci	}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	list_del(&uiom_dev->link);
52362306a36Sopenharmony_ci	pd->dev_cnt--;
52462306a36Sopenharmony_ci	spin_unlock(&pd->lock);
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci	return iommu_detach_device(pd->domain, dev);
52762306a36Sopenharmony_ci}
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_cistruct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
53062306a36Sopenharmony_ci{
53162306a36Sopenharmony_ci	struct usnic_uiom_dev *uiom_dev;
53262306a36Sopenharmony_ci	struct device **devs;
53362306a36Sopenharmony_ci	int i = 0;
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	spin_lock(&pd->lock);
53662306a36Sopenharmony_ci	devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
53762306a36Sopenharmony_ci	if (!devs) {
53862306a36Sopenharmony_ci		devs = ERR_PTR(-ENOMEM);
53962306a36Sopenharmony_ci		goto out;
54062306a36Sopenharmony_ci	}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	list_for_each_entry(uiom_dev, &pd->devs, link) {
54362306a36Sopenharmony_ci		devs[i++] = uiom_dev->dev;
54462306a36Sopenharmony_ci	}
54562306a36Sopenharmony_ciout:
54662306a36Sopenharmony_ci	spin_unlock(&pd->lock);
54762306a36Sopenharmony_ci	return devs;
54862306a36Sopenharmony_ci}
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_civoid usnic_uiom_free_dev_list(struct device **devs)
55162306a36Sopenharmony_ci{
55262306a36Sopenharmony_ci	kfree(devs);
55362306a36Sopenharmony_ci}
554