162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * VFIO: IOMMU DMA mapping support for TCE on POWER
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2013 IBM Corp.  All rights reserved.
662306a36Sopenharmony_ci *     Author: Alexey Kardashevskiy <aik@ozlabs.ru>
762306a36Sopenharmony_ci * Copyright Gavin Shan, IBM Corporation 2014.
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Derived from original vfio_iommu_type1.c:
1062306a36Sopenharmony_ci * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
1162306a36Sopenharmony_ci *     Author: Alex Williamson <alex.williamson@redhat.com>
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/module.h>
1562306a36Sopenharmony_ci#include <linux/pci.h>
1662306a36Sopenharmony_ci#include <linux/slab.h>
1762306a36Sopenharmony_ci#include <linux/uaccess.h>
1862306a36Sopenharmony_ci#include <linux/err.h>
1962306a36Sopenharmony_ci#include <linux/vfio.h>
2062306a36Sopenharmony_ci#include <linux/vmalloc.h>
2162306a36Sopenharmony_ci#include <linux/sched/mm.h>
2262306a36Sopenharmony_ci#include <linux/sched/signal.h>
2362306a36Sopenharmony_ci#include <linux/mm.h>
2462306a36Sopenharmony_ci#include "vfio.h"
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include <asm/iommu.h>
2762306a36Sopenharmony_ci#include <asm/tce.h>
2862306a36Sopenharmony_ci#include <asm/mmu_context.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#define DRIVER_VERSION  "0.1"
3162306a36Sopenharmony_ci#define DRIVER_AUTHOR   "aik@ozlabs.ru"
3262306a36Sopenharmony_ci#define DRIVER_DESC     "VFIO IOMMU SPAPR TCE"
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_cistatic void tce_iommu_detach_group(void *iommu_data,
3562306a36Sopenharmony_ci		struct iommu_group *iommu_group);
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * This code handles mapping and unmapping of user data buffers
4162306a36Sopenharmony_ci * into DMA'ble space using the IOMMU
4262306a36Sopenharmony_ci */
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistruct tce_iommu_group {
4562306a36Sopenharmony_ci	struct list_head next;
4662306a36Sopenharmony_ci	struct iommu_group *grp;
4762306a36Sopenharmony_ci};
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci/*
5062306a36Sopenharmony_ci * A container needs to remember which preregistered region  it has
5162306a36Sopenharmony_ci * referenced to do proper cleanup at the userspace process exit.
5262306a36Sopenharmony_ci */
5362306a36Sopenharmony_cistruct tce_iommu_prereg {
5462306a36Sopenharmony_ci	struct list_head next;
5562306a36Sopenharmony_ci	struct mm_iommu_table_group_mem_t *mem;
5662306a36Sopenharmony_ci};
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci/*
5962306a36Sopenharmony_ci * The container descriptor supports only a single group per container.
6062306a36Sopenharmony_ci * Required by the API as the container is not supplied with the IOMMU group
6162306a36Sopenharmony_ci * at the moment of initialization.
6262306a36Sopenharmony_ci */
6362306a36Sopenharmony_cistruct tce_container {
6462306a36Sopenharmony_ci	struct mutex lock;
6562306a36Sopenharmony_ci	bool enabled;
6662306a36Sopenharmony_ci	bool v2;
6762306a36Sopenharmony_ci	bool def_window_pending;
6862306a36Sopenharmony_ci	unsigned long locked_pages;
6962306a36Sopenharmony_ci	struct mm_struct *mm;
7062306a36Sopenharmony_ci	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
7162306a36Sopenharmony_ci	struct list_head group_list;
7262306a36Sopenharmony_ci	struct list_head prereg_list;
7362306a36Sopenharmony_ci};
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cistatic long tce_iommu_mm_set(struct tce_container *container)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	if (container->mm) {
7862306a36Sopenharmony_ci		if (container->mm == current->mm)
7962306a36Sopenharmony_ci			return 0;
8062306a36Sopenharmony_ci		return -EPERM;
8162306a36Sopenharmony_ci	}
8262306a36Sopenharmony_ci	BUG_ON(!current->mm);
8362306a36Sopenharmony_ci	container->mm = current->mm;
8462306a36Sopenharmony_ci	mmgrab(container->mm);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	return 0;
8762306a36Sopenharmony_ci}
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_cistatic long tce_iommu_prereg_free(struct tce_container *container,
9062306a36Sopenharmony_ci		struct tce_iommu_prereg *tcemem)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	long ret;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	ret = mm_iommu_put(container->mm, tcemem->mem);
9562306a36Sopenharmony_ci	if (ret)
9662306a36Sopenharmony_ci		return ret;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	list_del(&tcemem->next);
9962306a36Sopenharmony_ci	kfree(tcemem);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	return 0;
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistatic long tce_iommu_unregister_pages(struct tce_container *container,
10562306a36Sopenharmony_ci		__u64 vaddr, __u64 size)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	struct mm_iommu_table_group_mem_t *mem;
10862306a36Sopenharmony_ci	struct tce_iommu_prereg *tcemem;
10962306a36Sopenharmony_ci	bool found = false;
11062306a36Sopenharmony_ci	long ret;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
11362306a36Sopenharmony_ci		return -EINVAL;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
11662306a36Sopenharmony_ci	if (!mem)
11762306a36Sopenharmony_ci		return -ENOENT;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	list_for_each_entry(tcemem, &container->prereg_list, next) {
12062306a36Sopenharmony_ci		if (tcemem->mem == mem) {
12162306a36Sopenharmony_ci			found = true;
12262306a36Sopenharmony_ci			break;
12362306a36Sopenharmony_ci		}
12462306a36Sopenharmony_ci	}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	if (!found)
12762306a36Sopenharmony_ci		ret = -ENOENT;
12862306a36Sopenharmony_ci	else
12962306a36Sopenharmony_ci		ret = tce_iommu_prereg_free(container, tcemem);
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	mm_iommu_put(container->mm, mem);
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	return ret;
13462306a36Sopenharmony_ci}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_cistatic long tce_iommu_register_pages(struct tce_container *container,
13762306a36Sopenharmony_ci		__u64 vaddr, __u64 size)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	long ret = 0;
14062306a36Sopenharmony_ci	struct mm_iommu_table_group_mem_t *mem = NULL;
14162306a36Sopenharmony_ci	struct tce_iommu_prereg *tcemem;
14262306a36Sopenharmony_ci	unsigned long entries = size >> PAGE_SHIFT;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
14562306a36Sopenharmony_ci			((vaddr + size) < vaddr))
14662306a36Sopenharmony_ci		return -EINVAL;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	mem = mm_iommu_get(container->mm, vaddr, entries);
14962306a36Sopenharmony_ci	if (mem) {
15062306a36Sopenharmony_ci		list_for_each_entry(tcemem, &container->prereg_list, next) {
15162306a36Sopenharmony_ci			if (tcemem->mem == mem) {
15262306a36Sopenharmony_ci				ret = -EBUSY;
15362306a36Sopenharmony_ci				goto put_exit;
15462306a36Sopenharmony_ci			}
15562306a36Sopenharmony_ci		}
15662306a36Sopenharmony_ci	} else {
15762306a36Sopenharmony_ci		ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
15862306a36Sopenharmony_ci		if (ret)
15962306a36Sopenharmony_ci			return ret;
16062306a36Sopenharmony_ci	}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
16362306a36Sopenharmony_ci	if (!tcemem) {
16462306a36Sopenharmony_ci		ret = -ENOMEM;
16562306a36Sopenharmony_ci		goto put_exit;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	tcemem->mem = mem;
16962306a36Sopenharmony_ci	list_add(&tcemem->next, &container->prereg_list);
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	container->enabled = true;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	return 0;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ciput_exit:
17662306a36Sopenharmony_ci	mm_iommu_put(container->mm, mem);
17762306a36Sopenharmony_ci	return ret;
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_cistatic bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
18162306a36Sopenharmony_ci		unsigned int it_page_shift)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	struct page *page;
18462306a36Sopenharmony_ci	unsigned long size = 0;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
18762306a36Sopenharmony_ci		return size == (1UL << it_page_shift);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	page = pfn_to_page(hpa >> PAGE_SHIFT);
19062306a36Sopenharmony_ci	/*
19162306a36Sopenharmony_ci	 * Check that the TCE table granularity is not bigger than the size of
19262306a36Sopenharmony_ci	 * a page we just found. Otherwise the hardware can get access to
19362306a36Sopenharmony_ci	 * a bigger memory chunk that it should.
19462306a36Sopenharmony_ci	 */
19562306a36Sopenharmony_ci	return page_shift(compound_head(page)) >= it_page_shift;
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_cistatic inline bool tce_groups_attached(struct tce_container *container)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	return !list_empty(&container->group_list);
20162306a36Sopenharmony_ci}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_cistatic long tce_iommu_find_table(struct tce_container *container,
20462306a36Sopenharmony_ci		phys_addr_t ioba, struct iommu_table **ptbl)
20562306a36Sopenharmony_ci{
20662306a36Sopenharmony_ci	long i;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
20962306a36Sopenharmony_ci		struct iommu_table *tbl = container->tables[i];
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci		if (tbl) {
21262306a36Sopenharmony_ci			unsigned long entry = ioba >> tbl->it_page_shift;
21362306a36Sopenharmony_ci			unsigned long start = tbl->it_offset;
21462306a36Sopenharmony_ci			unsigned long end = start + tbl->it_size;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci			if ((start <= entry) && (entry < end)) {
21762306a36Sopenharmony_ci				*ptbl = tbl;
21862306a36Sopenharmony_ci				return i;
21962306a36Sopenharmony_ci			}
22062306a36Sopenharmony_ci		}
22162306a36Sopenharmony_ci	}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	return -1;
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cistatic int tce_iommu_find_free_table(struct tce_container *container)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	int i;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
23162306a36Sopenharmony_ci		if (!container->tables[i])
23262306a36Sopenharmony_ci			return i;
23362306a36Sopenharmony_ci	}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	return -ENOSPC;
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_cistatic int tce_iommu_enable(struct tce_container *container)
23962306a36Sopenharmony_ci{
24062306a36Sopenharmony_ci	int ret = 0;
24162306a36Sopenharmony_ci	unsigned long locked;
24262306a36Sopenharmony_ci	struct iommu_table_group *table_group;
24362306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	if (container->enabled)
24662306a36Sopenharmony_ci		return -EBUSY;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	/*
24962306a36Sopenharmony_ci	 * When userspace pages are mapped into the IOMMU, they are effectively
25062306a36Sopenharmony_ci	 * locked memory, so, theoretically, we need to update the accounting
25162306a36Sopenharmony_ci	 * of locked pages on each map and unmap.  For powerpc, the map unmap
25262306a36Sopenharmony_ci	 * paths can be very hot, though, and the accounting would kill
25362306a36Sopenharmony_ci	 * performance, especially since it would be difficult to impossible
25462306a36Sopenharmony_ci	 * to handle the accounting in real mode only.
25562306a36Sopenharmony_ci	 *
25662306a36Sopenharmony_ci	 * To address that, rather than precisely accounting every page, we
25762306a36Sopenharmony_ci	 * instead account for a worst case on locked memory when the iommu is
25862306a36Sopenharmony_ci	 * enabled and disabled.  The worst case upper bound on locked memory
25962306a36Sopenharmony_ci	 * is the size of the whole iommu window, which is usually relatively
26062306a36Sopenharmony_ci	 * small (compared to total memory sizes) on POWER hardware.
26162306a36Sopenharmony_ci	 *
26262306a36Sopenharmony_ci	 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
26362306a36Sopenharmony_ci	 * that would effectively kill the guest at random points, much better
26462306a36Sopenharmony_ci	 * enforcing the limit based on the max that the guest can map.
26562306a36Sopenharmony_ci	 *
26662306a36Sopenharmony_ci	 * Unfortunately at the moment it counts whole tables, no matter how
26762306a36Sopenharmony_ci	 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
26862306a36Sopenharmony_ci	 * each with 2GB DMA window, 8GB will be counted here. The reason for
26962306a36Sopenharmony_ci	 * this is that we cannot tell here the amount of RAM used by the guest
27062306a36Sopenharmony_ci	 * as this information is only available from KVM and VFIO is
27162306a36Sopenharmony_ci	 * KVM agnostic.
27262306a36Sopenharmony_ci	 *
27362306a36Sopenharmony_ci	 * So we do not allow enabling a container without a group attached
27462306a36Sopenharmony_ci	 * as there is no way to know how much we should increment
27562306a36Sopenharmony_ci	 * the locked_vm counter.
27662306a36Sopenharmony_ci	 */
27762306a36Sopenharmony_ci	if (!tce_groups_attached(container))
27862306a36Sopenharmony_ci		return -ENODEV;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	tcegrp = list_first_entry(&container->group_list,
28162306a36Sopenharmony_ci			struct tce_iommu_group, next);
28262306a36Sopenharmony_ci	table_group = iommu_group_get_iommudata(tcegrp->grp);
28362306a36Sopenharmony_ci	if (!table_group)
28462306a36Sopenharmony_ci		return -ENODEV;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	if (!table_group->tce32_size)
28762306a36Sopenharmony_ci		return -EPERM;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	ret = tce_iommu_mm_set(container);
29062306a36Sopenharmony_ci	if (ret)
29162306a36Sopenharmony_ci		return ret;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	locked = table_group->tce32_size >> PAGE_SHIFT;
29462306a36Sopenharmony_ci	ret = account_locked_vm(container->mm, locked, true);
29562306a36Sopenharmony_ci	if (ret)
29662306a36Sopenharmony_ci		return ret;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	container->locked_pages = locked;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	container->enabled = true;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	return ret;
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_cistatic void tce_iommu_disable(struct tce_container *container)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	if (!container->enabled)
30862306a36Sopenharmony_ci		return;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	container->enabled = false;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	BUG_ON(!container->mm);
31362306a36Sopenharmony_ci	account_locked_vm(container->mm, container->locked_pages, false);
31462306a36Sopenharmony_ci}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_cistatic void *tce_iommu_open(unsigned long arg)
31762306a36Sopenharmony_ci{
31862306a36Sopenharmony_ci	struct tce_container *container;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
32162306a36Sopenharmony_ci		pr_err("tce_vfio: Wrong IOMMU type\n");
32262306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
32362306a36Sopenharmony_ci	}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	container = kzalloc(sizeof(*container), GFP_KERNEL);
32662306a36Sopenharmony_ci	if (!container)
32762306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	mutex_init(&container->lock);
33062306a36Sopenharmony_ci	INIT_LIST_HEAD_RCU(&container->group_list);
33162306a36Sopenharmony_ci	INIT_LIST_HEAD_RCU(&container->prereg_list);
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	return container;
33662306a36Sopenharmony_ci}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_cistatic int tce_iommu_clear(struct tce_container *container,
33962306a36Sopenharmony_ci		struct iommu_table *tbl,
34062306a36Sopenharmony_ci		unsigned long entry, unsigned long pages);
34162306a36Sopenharmony_cistatic void tce_iommu_free_table(struct tce_container *container,
34262306a36Sopenharmony_ci		struct iommu_table *tbl);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_cistatic void tce_iommu_release(void *iommu_data)
34562306a36Sopenharmony_ci{
34662306a36Sopenharmony_ci	struct tce_container *container = iommu_data;
34762306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp;
34862306a36Sopenharmony_ci	struct tce_iommu_prereg *tcemem, *tmtmp;
34962306a36Sopenharmony_ci	long i;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	while (tce_groups_attached(container)) {
35262306a36Sopenharmony_ci		tcegrp = list_first_entry(&container->group_list,
35362306a36Sopenharmony_ci				struct tce_iommu_group, next);
35462306a36Sopenharmony_ci		tce_iommu_detach_group(iommu_data, tcegrp->grp);
35562306a36Sopenharmony_ci	}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	/*
35862306a36Sopenharmony_ci	 * If VFIO created a table, it was not disposed
35962306a36Sopenharmony_ci	 * by tce_iommu_detach_group() so do it now.
36062306a36Sopenharmony_ci	 */
36162306a36Sopenharmony_ci	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
36262306a36Sopenharmony_ci		struct iommu_table *tbl = container->tables[i];
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci		if (!tbl)
36562306a36Sopenharmony_ci			continue;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
36862306a36Sopenharmony_ci		tce_iommu_free_table(container, tbl);
36962306a36Sopenharmony_ci	}
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
37262306a36Sopenharmony_ci		WARN_ON(tce_iommu_prereg_free(container, tcemem));
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	tce_iommu_disable(container);
37562306a36Sopenharmony_ci	if (container->mm)
37662306a36Sopenharmony_ci		mmdrop(container->mm);
37762306a36Sopenharmony_ci	mutex_destroy(&container->lock);
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	kfree(container);
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_cistatic void tce_iommu_unuse_page(unsigned long hpa)
38362306a36Sopenharmony_ci{
38462306a36Sopenharmony_ci	struct page *page;
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	page = pfn_to_page(hpa >> PAGE_SHIFT);
38762306a36Sopenharmony_ci	unpin_user_page(page);
38862306a36Sopenharmony_ci}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_cistatic int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
39162306a36Sopenharmony_ci		unsigned long tce, unsigned long shift,
39262306a36Sopenharmony_ci		unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	long ret = 0;
39562306a36Sopenharmony_ci	struct mm_iommu_table_group_mem_t *mem;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
39862306a36Sopenharmony_ci	if (!mem)
39962306a36Sopenharmony_ci		return -EINVAL;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
40262306a36Sopenharmony_ci	if (ret)
40362306a36Sopenharmony_ci		return -EINVAL;
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	*pmem = mem;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	return 0;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_cistatic void tce_iommu_unuse_page_v2(struct tce_container *container,
41162306a36Sopenharmony_ci		struct iommu_table *tbl, unsigned long entry)
41262306a36Sopenharmony_ci{
41362306a36Sopenharmony_ci	struct mm_iommu_table_group_mem_t *mem = NULL;
41462306a36Sopenharmony_ci	int ret;
41562306a36Sopenharmony_ci	unsigned long hpa = 0;
41662306a36Sopenharmony_ci	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	if (!pua)
41962306a36Sopenharmony_ci		return;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
42262306a36Sopenharmony_ci			tbl->it_page_shift, &hpa, &mem);
42362306a36Sopenharmony_ci	if (ret)
42462306a36Sopenharmony_ci		pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
42562306a36Sopenharmony_ci				__func__, be64_to_cpu(*pua), entry, ret);
42662306a36Sopenharmony_ci	if (mem)
42762306a36Sopenharmony_ci		mm_iommu_mapped_dec(mem);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	*pua = cpu_to_be64(0);
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_cistatic int tce_iommu_clear(struct tce_container *container,
43362306a36Sopenharmony_ci		struct iommu_table *tbl,
43462306a36Sopenharmony_ci		unsigned long entry, unsigned long pages)
43562306a36Sopenharmony_ci{
43662306a36Sopenharmony_ci	unsigned long oldhpa;
43762306a36Sopenharmony_ci	long ret;
43862306a36Sopenharmony_ci	enum dma_data_direction direction;
43962306a36Sopenharmony_ci	unsigned long lastentry = entry + pages, firstentry = entry;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	for ( ; entry < lastentry; ++entry) {
44262306a36Sopenharmony_ci		if (tbl->it_indirect_levels && tbl->it_userspace) {
44362306a36Sopenharmony_ci			/*
44462306a36Sopenharmony_ci			 * For multilevel tables, we can take a shortcut here
44562306a36Sopenharmony_ci			 * and skip some TCEs as we know that the userspace
44662306a36Sopenharmony_ci			 * addresses cache is a mirror of the real TCE table
44762306a36Sopenharmony_ci			 * and if it is missing some indirect levels, then
44862306a36Sopenharmony_ci			 * the hardware table does not have them allocated
44962306a36Sopenharmony_ci			 * either and therefore does not require updating.
45062306a36Sopenharmony_ci			 */
45162306a36Sopenharmony_ci			__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
45262306a36Sopenharmony_ci					entry);
45362306a36Sopenharmony_ci			if (!pua) {
45462306a36Sopenharmony_ci				/* align to level_size which is power of two */
45562306a36Sopenharmony_ci				entry |= tbl->it_level_size - 1;
45662306a36Sopenharmony_ci				continue;
45762306a36Sopenharmony_ci			}
45862306a36Sopenharmony_ci		}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci		cond_resched();
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci		direction = DMA_NONE;
46362306a36Sopenharmony_ci		oldhpa = 0;
46462306a36Sopenharmony_ci		ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa,
46562306a36Sopenharmony_ci				&direction);
46662306a36Sopenharmony_ci		if (ret)
46762306a36Sopenharmony_ci			continue;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci		if (direction == DMA_NONE)
47062306a36Sopenharmony_ci			continue;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci		if (container->v2) {
47362306a36Sopenharmony_ci			tce_iommu_unuse_page_v2(container, tbl, entry);
47462306a36Sopenharmony_ci			continue;
47562306a36Sopenharmony_ci		}
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci		tce_iommu_unuse_page(oldhpa);
47862306a36Sopenharmony_ci	}
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci	iommu_tce_kill(tbl, firstentry, pages);
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	return 0;
48362306a36Sopenharmony_ci}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_cistatic int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	struct page *page = NULL;
48862306a36Sopenharmony_ci	enum dma_data_direction direction = iommu_tce_direction(tce);
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	if (pin_user_pages_fast(tce & PAGE_MASK, 1,
49162306a36Sopenharmony_ci			direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
49262306a36Sopenharmony_ci			&page) != 1)
49362306a36Sopenharmony_ci		return -EFAULT;
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci	*hpa = __pa((unsigned long) page_address(page));
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	return 0;
49862306a36Sopenharmony_ci}
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_cistatic long tce_iommu_build(struct tce_container *container,
50162306a36Sopenharmony_ci		struct iommu_table *tbl,
50262306a36Sopenharmony_ci		unsigned long entry, unsigned long tce, unsigned long pages,
50362306a36Sopenharmony_ci		enum dma_data_direction direction)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	long i, ret = 0;
50662306a36Sopenharmony_ci	unsigned long hpa;
50762306a36Sopenharmony_ci	enum dma_data_direction dirtmp;
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	for (i = 0; i < pages; ++i) {
51062306a36Sopenharmony_ci		unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci		ret = tce_iommu_use_page(tce, &hpa);
51362306a36Sopenharmony_ci		if (ret)
51462306a36Sopenharmony_ci			break;
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci		if (!tce_page_is_contained(container->mm, hpa,
51762306a36Sopenharmony_ci				tbl->it_page_shift)) {
51862306a36Sopenharmony_ci			ret = -EPERM;
51962306a36Sopenharmony_ci			break;
52062306a36Sopenharmony_ci		}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci		hpa |= offset;
52362306a36Sopenharmony_ci		dirtmp = direction;
52462306a36Sopenharmony_ci		ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
52562306a36Sopenharmony_ci				&hpa, &dirtmp);
52662306a36Sopenharmony_ci		if (ret) {
52762306a36Sopenharmony_ci			tce_iommu_unuse_page(hpa);
52862306a36Sopenharmony_ci			pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
52962306a36Sopenharmony_ci					__func__, entry << tbl->it_page_shift,
53062306a36Sopenharmony_ci					tce, ret);
53162306a36Sopenharmony_ci			break;
53262306a36Sopenharmony_ci		}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci		if (dirtmp != DMA_NONE)
53562306a36Sopenharmony_ci			tce_iommu_unuse_page(hpa);
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci		tce += IOMMU_PAGE_SIZE(tbl);
53862306a36Sopenharmony_ci	}
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (ret)
54162306a36Sopenharmony_ci		tce_iommu_clear(container, tbl, entry, i);
54262306a36Sopenharmony_ci	else
54362306a36Sopenharmony_ci		iommu_tce_kill(tbl, entry, pages);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	return ret;
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_cistatic long tce_iommu_build_v2(struct tce_container *container,
54962306a36Sopenharmony_ci		struct iommu_table *tbl,
55062306a36Sopenharmony_ci		unsigned long entry, unsigned long tce, unsigned long pages,
55162306a36Sopenharmony_ci		enum dma_data_direction direction)
55262306a36Sopenharmony_ci{
55362306a36Sopenharmony_ci	long i, ret = 0;
55462306a36Sopenharmony_ci	unsigned long hpa;
55562306a36Sopenharmony_ci	enum dma_data_direction dirtmp;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	for (i = 0; i < pages; ++i) {
55862306a36Sopenharmony_ci		struct mm_iommu_table_group_mem_t *mem = NULL;
55962306a36Sopenharmony_ci		__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci		ret = tce_iommu_prereg_ua_to_hpa(container,
56262306a36Sopenharmony_ci				tce, tbl->it_page_shift, &hpa, &mem);
56362306a36Sopenharmony_ci		if (ret)
56462306a36Sopenharmony_ci			break;
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci		if (!tce_page_is_contained(container->mm, hpa,
56762306a36Sopenharmony_ci				tbl->it_page_shift)) {
56862306a36Sopenharmony_ci			ret = -EPERM;
56962306a36Sopenharmony_ci			break;
57062306a36Sopenharmony_ci		}
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci		/* Preserve offset within IOMMU page */
57362306a36Sopenharmony_ci		hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
57462306a36Sopenharmony_ci		dirtmp = direction;
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci		/* The registered region is being unregistered */
57762306a36Sopenharmony_ci		if (mm_iommu_mapped_inc(mem))
57862306a36Sopenharmony_ci			break;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci		ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
58162306a36Sopenharmony_ci				&hpa, &dirtmp);
58262306a36Sopenharmony_ci		if (ret) {
58362306a36Sopenharmony_ci			/* dirtmp cannot be DMA_NONE here */
58462306a36Sopenharmony_ci			tce_iommu_unuse_page_v2(container, tbl, entry + i);
58562306a36Sopenharmony_ci			pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
58662306a36Sopenharmony_ci					__func__, entry << tbl->it_page_shift,
58762306a36Sopenharmony_ci					tce, ret);
58862306a36Sopenharmony_ci			break;
58962306a36Sopenharmony_ci		}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci		if (dirtmp != DMA_NONE)
59262306a36Sopenharmony_ci			tce_iommu_unuse_page_v2(container, tbl, entry + i);
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci		*pua = cpu_to_be64(tce);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci		tce += IOMMU_PAGE_SIZE(tbl);
59762306a36Sopenharmony_ci	}
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	if (ret)
60062306a36Sopenharmony_ci		tce_iommu_clear(container, tbl, entry, i);
60162306a36Sopenharmony_ci	else
60262306a36Sopenharmony_ci		iommu_tce_kill(tbl, entry, pages);
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	return ret;
60562306a36Sopenharmony_ci}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_cistatic long tce_iommu_create_table(struct tce_container *container,
60862306a36Sopenharmony_ci			struct iommu_table_group *table_group,
60962306a36Sopenharmony_ci			int num,
61062306a36Sopenharmony_ci			__u32 page_shift,
61162306a36Sopenharmony_ci			__u64 window_size,
61262306a36Sopenharmony_ci			__u32 levels,
61362306a36Sopenharmony_ci			struct iommu_table **ptbl)
61462306a36Sopenharmony_ci{
61562306a36Sopenharmony_ci	long ret, table_size;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	table_size = table_group->ops->get_table_size(page_shift, window_size,
61862306a36Sopenharmony_ci			levels);
61962306a36Sopenharmony_ci	if (!table_size)
62062306a36Sopenharmony_ci		return -EINVAL;
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
62362306a36Sopenharmony_ci	if (ret)
62462306a36Sopenharmony_ci		return ret;
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	ret = table_group->ops->create_table(table_group, num,
62762306a36Sopenharmony_ci			page_shift, window_size, levels, ptbl);
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	WARN_ON(!ret && !(*ptbl)->it_ops->free);
63062306a36Sopenharmony_ci	WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	return ret;
63362306a36Sopenharmony_ci}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_cistatic void tce_iommu_free_table(struct tce_container *container,
63662306a36Sopenharmony_ci		struct iommu_table *tbl)
63762306a36Sopenharmony_ci{
63862306a36Sopenharmony_ci	unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	iommu_tce_table_put(tbl);
64162306a36Sopenharmony_ci	account_locked_vm(container->mm, pages, false);
64262306a36Sopenharmony_ci}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_cistatic long tce_iommu_create_window(struct tce_container *container,
64562306a36Sopenharmony_ci		__u32 page_shift, __u64 window_size, __u32 levels,
64662306a36Sopenharmony_ci		__u64 *start_addr)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp;
64962306a36Sopenharmony_ci	struct iommu_table_group *table_group;
65062306a36Sopenharmony_ci	struct iommu_table *tbl = NULL;
65162306a36Sopenharmony_ci	long ret, num;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	num = tce_iommu_find_free_table(container);
65462306a36Sopenharmony_ci	if (num < 0)
65562306a36Sopenharmony_ci		return num;
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	/* Get the first group for ops::create_table */
65862306a36Sopenharmony_ci	tcegrp = list_first_entry(&container->group_list,
65962306a36Sopenharmony_ci			struct tce_iommu_group, next);
66062306a36Sopenharmony_ci	table_group = iommu_group_get_iommudata(tcegrp->grp);
66162306a36Sopenharmony_ci	if (!table_group)
66262306a36Sopenharmony_ci		return -EFAULT;
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci	if (!(table_group->pgsizes & (1ULL << page_shift)))
66562306a36Sopenharmony_ci		return -EINVAL;
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	if (!table_group->ops->set_window || !table_group->ops->unset_window ||
66862306a36Sopenharmony_ci			!table_group->ops->get_table_size ||
66962306a36Sopenharmony_ci			!table_group->ops->create_table)
67062306a36Sopenharmony_ci		return -EPERM;
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	/* Create TCE table */
67362306a36Sopenharmony_ci	ret = tce_iommu_create_table(container, table_group, num,
67462306a36Sopenharmony_ci			page_shift, window_size, levels, &tbl);
67562306a36Sopenharmony_ci	if (ret)
67662306a36Sopenharmony_ci		return ret;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	BUG_ON(!tbl->it_ops->free);
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ci	/*
68162306a36Sopenharmony_ci	 * Program the table to every group.
68262306a36Sopenharmony_ci	 * Groups have been tested for compatibility at the attach time.
68362306a36Sopenharmony_ci	 */
68462306a36Sopenharmony_ci	list_for_each_entry(tcegrp, &container->group_list, next) {
68562306a36Sopenharmony_ci		table_group = iommu_group_get_iommudata(tcegrp->grp);
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci		ret = table_group->ops->set_window(table_group, num, tbl);
68862306a36Sopenharmony_ci		if (ret)
68962306a36Sopenharmony_ci			goto unset_exit;
69062306a36Sopenharmony_ci	}
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci	container->tables[num] = tbl;
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	/* Return start address assigned by platform in create_table() */
69562306a36Sopenharmony_ci	*start_addr = tbl->it_offset << tbl->it_page_shift;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	return 0;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ciunset_exit:
70062306a36Sopenharmony_ci	list_for_each_entry(tcegrp, &container->group_list, next) {
70162306a36Sopenharmony_ci		table_group = iommu_group_get_iommudata(tcegrp->grp);
70262306a36Sopenharmony_ci		table_group->ops->unset_window(table_group, num);
70362306a36Sopenharmony_ci	}
70462306a36Sopenharmony_ci	tce_iommu_free_table(container, tbl);
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci	return ret;
70762306a36Sopenharmony_ci}
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_cistatic long tce_iommu_remove_window(struct tce_container *container,
71062306a36Sopenharmony_ci		__u64 start_addr)
71162306a36Sopenharmony_ci{
71262306a36Sopenharmony_ci	struct iommu_table_group *table_group = NULL;
71362306a36Sopenharmony_ci	struct iommu_table *tbl;
71462306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp;
71562306a36Sopenharmony_ci	int num;
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	num = tce_iommu_find_table(container, start_addr, &tbl);
71862306a36Sopenharmony_ci	if (num < 0)
71962306a36Sopenharmony_ci		return -EINVAL;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	BUG_ON(!tbl->it_size);
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	/* Detach groups from IOMMUs */
72462306a36Sopenharmony_ci	list_for_each_entry(tcegrp, &container->group_list, next) {
72562306a36Sopenharmony_ci		table_group = iommu_group_get_iommudata(tcegrp->grp);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci		/*
72862306a36Sopenharmony_ci		 * SPAPR TCE IOMMU exposes the default DMA window to
72962306a36Sopenharmony_ci		 * the guest via dma32_window_start/size of
73062306a36Sopenharmony_ci		 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
73162306a36Sopenharmony_ci		 * the userspace to remove this window, some do not so
73262306a36Sopenharmony_ci		 * here we check for the platform capability.
73362306a36Sopenharmony_ci		 */
73462306a36Sopenharmony_ci		if (!table_group->ops || !table_group->ops->unset_window)
73562306a36Sopenharmony_ci			return -EPERM;
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci		table_group->ops->unset_window(table_group, num);
73862306a36Sopenharmony_ci	}
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	/* Free table */
74162306a36Sopenharmony_ci	tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
74262306a36Sopenharmony_ci	tce_iommu_free_table(container, tbl);
74362306a36Sopenharmony_ci	container->tables[num] = NULL;
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	return 0;
74662306a36Sopenharmony_ci}
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_cistatic long tce_iommu_create_default_window(struct tce_container *container)
74962306a36Sopenharmony_ci{
75062306a36Sopenharmony_ci	long ret;
75162306a36Sopenharmony_ci	__u64 start_addr = 0;
75262306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp;
75362306a36Sopenharmony_ci	struct iommu_table_group *table_group;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	if (!container->def_window_pending)
75662306a36Sopenharmony_ci		return 0;
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci	if (!tce_groups_attached(container))
75962306a36Sopenharmony_ci		return -ENODEV;
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	tcegrp = list_first_entry(&container->group_list,
76262306a36Sopenharmony_ci			struct tce_iommu_group, next);
76362306a36Sopenharmony_ci	table_group = iommu_group_get_iommudata(tcegrp->grp);
76462306a36Sopenharmony_ci	if (!table_group)
76562306a36Sopenharmony_ci		return -ENODEV;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
76862306a36Sopenharmony_ci			table_group->tce32_size, 1, &start_addr);
76962306a36Sopenharmony_ci	WARN_ON_ONCE(!ret && start_addr);
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	if (!ret)
77262306a36Sopenharmony_ci		container->def_window_pending = false;
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	return ret;
77562306a36Sopenharmony_ci}
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_cistatic long vfio_spapr_ioctl_eeh_pe_op(struct iommu_group *group,
77862306a36Sopenharmony_ci				       unsigned long arg)
77962306a36Sopenharmony_ci{
78062306a36Sopenharmony_ci	struct eeh_pe *pe;
78162306a36Sopenharmony_ci	struct vfio_eeh_pe_op op;
78262306a36Sopenharmony_ci	unsigned long minsz;
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	pe = eeh_iommu_group_to_pe(group);
78562306a36Sopenharmony_ci	if (!pe)
78662306a36Sopenharmony_ci		return -ENODEV;
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	minsz = offsetofend(struct vfio_eeh_pe_op, op);
78962306a36Sopenharmony_ci	if (copy_from_user(&op, (void __user *)arg, minsz))
79062306a36Sopenharmony_ci		return -EFAULT;
79162306a36Sopenharmony_ci	if (op.argsz < minsz || op.flags)
79262306a36Sopenharmony_ci		return -EINVAL;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	switch (op.op) {
79562306a36Sopenharmony_ci	case VFIO_EEH_PE_DISABLE:
79662306a36Sopenharmony_ci		return eeh_pe_set_option(pe, EEH_OPT_DISABLE);
79762306a36Sopenharmony_ci	case VFIO_EEH_PE_ENABLE:
79862306a36Sopenharmony_ci		return eeh_pe_set_option(pe, EEH_OPT_ENABLE);
79962306a36Sopenharmony_ci	case VFIO_EEH_PE_UNFREEZE_IO:
80062306a36Sopenharmony_ci		return eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
80162306a36Sopenharmony_ci	case VFIO_EEH_PE_UNFREEZE_DMA:
80262306a36Sopenharmony_ci		return eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
80362306a36Sopenharmony_ci	case VFIO_EEH_PE_GET_STATE:
80462306a36Sopenharmony_ci		return eeh_pe_get_state(pe);
80562306a36Sopenharmony_ci		break;
80662306a36Sopenharmony_ci	case VFIO_EEH_PE_RESET_DEACTIVATE:
80762306a36Sopenharmony_ci		return eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true);
80862306a36Sopenharmony_ci	case VFIO_EEH_PE_RESET_HOT:
80962306a36Sopenharmony_ci		return eeh_pe_reset(pe, EEH_RESET_HOT, true);
81062306a36Sopenharmony_ci	case VFIO_EEH_PE_RESET_FUNDAMENTAL:
81162306a36Sopenharmony_ci		return eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true);
81262306a36Sopenharmony_ci	case VFIO_EEH_PE_CONFIGURE:
81362306a36Sopenharmony_ci		return eeh_pe_configure(pe);
81462306a36Sopenharmony_ci	case VFIO_EEH_PE_INJECT_ERR:
81562306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
81662306a36Sopenharmony_ci		if (op.argsz < minsz)
81762306a36Sopenharmony_ci			return -EINVAL;
81862306a36Sopenharmony_ci		if (copy_from_user(&op, (void __user *)arg, minsz))
81962306a36Sopenharmony_ci			return -EFAULT;
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci		return eeh_pe_inject_err(pe, op.err.type, op.err.func,
82262306a36Sopenharmony_ci					 op.err.addr, op.err.mask);
82362306a36Sopenharmony_ci	default:
82462306a36Sopenharmony_ci		return -EINVAL;
82562306a36Sopenharmony_ci	}
82662306a36Sopenharmony_ci}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_cistatic long tce_iommu_ioctl(void *iommu_data,
82962306a36Sopenharmony_ci				 unsigned int cmd, unsigned long arg)
83062306a36Sopenharmony_ci{
83162306a36Sopenharmony_ci	struct tce_container *container = iommu_data;
83262306a36Sopenharmony_ci	unsigned long minsz, ddwsz;
83362306a36Sopenharmony_ci	long ret;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	switch (cmd) {
83662306a36Sopenharmony_ci	case VFIO_CHECK_EXTENSION:
83762306a36Sopenharmony_ci		switch (arg) {
83862306a36Sopenharmony_ci		case VFIO_SPAPR_TCE_IOMMU:
83962306a36Sopenharmony_ci		case VFIO_SPAPR_TCE_v2_IOMMU:
84062306a36Sopenharmony_ci			return 1;
84162306a36Sopenharmony_ci		case VFIO_EEH:
84262306a36Sopenharmony_ci			return eeh_enabled();
84362306a36Sopenharmony_ci		default:
84462306a36Sopenharmony_ci			return 0;
84562306a36Sopenharmony_ci		}
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	/*
84962306a36Sopenharmony_ci	 * Sanity check to prevent one userspace from manipulating
85062306a36Sopenharmony_ci	 * another userspace mm.
85162306a36Sopenharmony_ci	 */
85262306a36Sopenharmony_ci	BUG_ON(!container);
85362306a36Sopenharmony_ci	if (container->mm && container->mm != current->mm)
85462306a36Sopenharmony_ci		return -EPERM;
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci	switch (cmd) {
85762306a36Sopenharmony_ci	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
85862306a36Sopenharmony_ci		struct vfio_iommu_spapr_tce_info info;
85962306a36Sopenharmony_ci		struct tce_iommu_group *tcegrp;
86062306a36Sopenharmony_ci		struct iommu_table_group *table_group;
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci		if (!tce_groups_attached(container))
86362306a36Sopenharmony_ci			return -ENXIO;
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci		tcegrp = list_first_entry(&container->group_list,
86662306a36Sopenharmony_ci				struct tce_iommu_group, next);
86762306a36Sopenharmony_ci		table_group = iommu_group_get_iommudata(tcegrp->grp);
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci		if (!table_group)
87062306a36Sopenharmony_ci			return -ENXIO;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
87362306a36Sopenharmony_ci				dma32_window_size);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci		if (copy_from_user(&info, (void __user *)arg, minsz))
87662306a36Sopenharmony_ci			return -EFAULT;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci		if (info.argsz < minsz)
87962306a36Sopenharmony_ci			return -EINVAL;
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci		info.dma32_window_start = table_group->tce32_start;
88262306a36Sopenharmony_ci		info.dma32_window_size = table_group->tce32_size;
88362306a36Sopenharmony_ci		info.flags = 0;
88462306a36Sopenharmony_ci		memset(&info.ddw, 0, sizeof(info.ddw));
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci		if (table_group->max_dynamic_windows_supported &&
88762306a36Sopenharmony_ci				container->v2) {
88862306a36Sopenharmony_ci			info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
88962306a36Sopenharmony_ci			info.ddw.pgsizes = table_group->pgsizes;
89062306a36Sopenharmony_ci			info.ddw.max_dynamic_windows_supported =
89162306a36Sopenharmony_ci				table_group->max_dynamic_windows_supported;
89262306a36Sopenharmony_ci			info.ddw.levels = table_group->max_levels;
89362306a36Sopenharmony_ci		}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci		ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci		if (info.argsz >= ddwsz)
89862306a36Sopenharmony_ci			minsz = ddwsz;
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci		if (copy_to_user((void __user *)arg, &info, minsz))
90162306a36Sopenharmony_ci			return -EFAULT;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci		return 0;
90462306a36Sopenharmony_ci	}
90562306a36Sopenharmony_ci	case VFIO_IOMMU_MAP_DMA: {
90662306a36Sopenharmony_ci		struct vfio_iommu_type1_dma_map param;
90762306a36Sopenharmony_ci		struct iommu_table *tbl = NULL;
90862306a36Sopenharmony_ci		long num;
90962306a36Sopenharmony_ci		enum dma_data_direction direction;
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci		if (!container->enabled)
91262306a36Sopenharmony_ci			return -EPERM;
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci		if (copy_from_user(&param, (void __user *)arg, minsz))
91762306a36Sopenharmony_ci			return -EFAULT;
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci		if (param.argsz < minsz)
92062306a36Sopenharmony_ci			return -EINVAL;
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci		if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
92362306a36Sopenharmony_ci				VFIO_DMA_MAP_FLAG_WRITE))
92462306a36Sopenharmony_ci			return -EINVAL;
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci		ret = tce_iommu_create_default_window(container);
92762306a36Sopenharmony_ci		if (ret)
92862306a36Sopenharmony_ci			return ret;
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci		num = tce_iommu_find_table(container, param.iova, &tbl);
93162306a36Sopenharmony_ci		if (num < 0)
93262306a36Sopenharmony_ci			return -ENXIO;
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci		if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
93562306a36Sopenharmony_ci				(param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
93662306a36Sopenharmony_ci			return -EINVAL;
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci		/* iova is checked by the IOMMU API */
93962306a36Sopenharmony_ci		if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
94062306a36Sopenharmony_ci			if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
94162306a36Sopenharmony_ci				direction = DMA_BIDIRECTIONAL;
94262306a36Sopenharmony_ci			else
94362306a36Sopenharmony_ci				direction = DMA_TO_DEVICE;
94462306a36Sopenharmony_ci		} else {
94562306a36Sopenharmony_ci			if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
94662306a36Sopenharmony_ci				direction = DMA_FROM_DEVICE;
94762306a36Sopenharmony_ci			else
94862306a36Sopenharmony_ci				return -EINVAL;
94962306a36Sopenharmony_ci		}
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci		ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
95262306a36Sopenharmony_ci		if (ret)
95362306a36Sopenharmony_ci			return ret;
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci		if (container->v2)
95662306a36Sopenharmony_ci			ret = tce_iommu_build_v2(container, tbl,
95762306a36Sopenharmony_ci					param.iova >> tbl->it_page_shift,
95862306a36Sopenharmony_ci					param.vaddr,
95962306a36Sopenharmony_ci					param.size >> tbl->it_page_shift,
96062306a36Sopenharmony_ci					direction);
96162306a36Sopenharmony_ci		else
96262306a36Sopenharmony_ci			ret = tce_iommu_build(container, tbl,
96362306a36Sopenharmony_ci					param.iova >> tbl->it_page_shift,
96462306a36Sopenharmony_ci					param.vaddr,
96562306a36Sopenharmony_ci					param.size >> tbl->it_page_shift,
96662306a36Sopenharmony_ci					direction);
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci		iommu_flush_tce(tbl);
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci		return ret;
97162306a36Sopenharmony_ci	}
97262306a36Sopenharmony_ci	case VFIO_IOMMU_UNMAP_DMA: {
97362306a36Sopenharmony_ci		struct vfio_iommu_type1_dma_unmap param;
97462306a36Sopenharmony_ci		struct iommu_table *tbl = NULL;
97562306a36Sopenharmony_ci		long num;
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci		if (!container->enabled)
97862306a36Sopenharmony_ci			return -EPERM;
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
98162306a36Sopenharmony_ci				size);
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci		if (copy_from_user(&param, (void __user *)arg, minsz))
98462306a36Sopenharmony_ci			return -EFAULT;
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci		if (param.argsz < minsz)
98762306a36Sopenharmony_ci			return -EINVAL;
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci		/* No flag is supported now */
99062306a36Sopenharmony_ci		if (param.flags)
99162306a36Sopenharmony_ci			return -EINVAL;
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci		ret = tce_iommu_create_default_window(container);
99462306a36Sopenharmony_ci		if (ret)
99562306a36Sopenharmony_ci			return ret;
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci		num = tce_iommu_find_table(container, param.iova, &tbl);
99862306a36Sopenharmony_ci		if (num < 0)
99962306a36Sopenharmony_ci			return -ENXIO;
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci		if (param.size & ~IOMMU_PAGE_MASK(tbl))
100262306a36Sopenharmony_ci			return -EINVAL;
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci		ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
100562306a36Sopenharmony_ci				param.size >> tbl->it_page_shift);
100662306a36Sopenharmony_ci		if (ret)
100762306a36Sopenharmony_ci			return ret;
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci		ret = tce_iommu_clear(container, tbl,
101062306a36Sopenharmony_ci				param.iova >> tbl->it_page_shift,
101162306a36Sopenharmony_ci				param.size >> tbl->it_page_shift);
101262306a36Sopenharmony_ci		iommu_flush_tce(tbl);
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci		return ret;
101562306a36Sopenharmony_ci	}
101662306a36Sopenharmony_ci	case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
101762306a36Sopenharmony_ci		struct vfio_iommu_spapr_register_memory param;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci		if (!container->v2)
102062306a36Sopenharmony_ci			break;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
102362306a36Sopenharmony_ci				size);
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci		ret = tce_iommu_mm_set(container);
102662306a36Sopenharmony_ci		if (ret)
102762306a36Sopenharmony_ci			return ret;
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci		if (copy_from_user(&param, (void __user *)arg, minsz))
103062306a36Sopenharmony_ci			return -EFAULT;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci		if (param.argsz < minsz)
103362306a36Sopenharmony_ci			return -EINVAL;
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci		/* No flag is supported now */
103662306a36Sopenharmony_ci		if (param.flags)
103762306a36Sopenharmony_ci			return -EINVAL;
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_ci		mutex_lock(&container->lock);
104062306a36Sopenharmony_ci		ret = tce_iommu_register_pages(container, param.vaddr,
104162306a36Sopenharmony_ci				param.size);
104262306a36Sopenharmony_ci		mutex_unlock(&container->lock);
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci		return ret;
104562306a36Sopenharmony_ci	}
104662306a36Sopenharmony_ci	case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
104762306a36Sopenharmony_ci		struct vfio_iommu_spapr_register_memory param;
104862306a36Sopenharmony_ci
104962306a36Sopenharmony_ci		if (!container->v2)
105062306a36Sopenharmony_ci			break;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci		if (!container->mm)
105362306a36Sopenharmony_ci			return -EPERM;
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
105662306a36Sopenharmony_ci				size);
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci		if (copy_from_user(&param, (void __user *)arg, minsz))
105962306a36Sopenharmony_ci			return -EFAULT;
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci		if (param.argsz < minsz)
106262306a36Sopenharmony_ci			return -EINVAL;
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci		/* No flag is supported now */
106562306a36Sopenharmony_ci		if (param.flags)
106662306a36Sopenharmony_ci			return -EINVAL;
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci		mutex_lock(&container->lock);
106962306a36Sopenharmony_ci		ret = tce_iommu_unregister_pages(container, param.vaddr,
107062306a36Sopenharmony_ci				param.size);
107162306a36Sopenharmony_ci		mutex_unlock(&container->lock);
107262306a36Sopenharmony_ci
107362306a36Sopenharmony_ci		return ret;
107462306a36Sopenharmony_ci	}
107562306a36Sopenharmony_ci	case VFIO_IOMMU_ENABLE:
107662306a36Sopenharmony_ci		if (container->v2)
107762306a36Sopenharmony_ci			break;
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci		mutex_lock(&container->lock);
108062306a36Sopenharmony_ci		ret = tce_iommu_enable(container);
108162306a36Sopenharmony_ci		mutex_unlock(&container->lock);
108262306a36Sopenharmony_ci		return ret;
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci
108562306a36Sopenharmony_ci	case VFIO_IOMMU_DISABLE:
108662306a36Sopenharmony_ci		if (container->v2)
108762306a36Sopenharmony_ci			break;
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci		mutex_lock(&container->lock);
109062306a36Sopenharmony_ci		tce_iommu_disable(container);
109162306a36Sopenharmony_ci		mutex_unlock(&container->lock);
109262306a36Sopenharmony_ci		return 0;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	case VFIO_EEH_PE_OP: {
109562306a36Sopenharmony_ci		struct tce_iommu_group *tcegrp;
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci		ret = 0;
109862306a36Sopenharmony_ci		list_for_each_entry(tcegrp, &container->group_list, next) {
109962306a36Sopenharmony_ci			ret = vfio_spapr_ioctl_eeh_pe_op(tcegrp->grp, arg);
110062306a36Sopenharmony_ci			if (ret)
110162306a36Sopenharmony_ci				return ret;
110262306a36Sopenharmony_ci		}
110362306a36Sopenharmony_ci		return ret;
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	case VFIO_IOMMU_SPAPR_TCE_CREATE: {
110762306a36Sopenharmony_ci		struct vfio_iommu_spapr_tce_create create;
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci		if (!container->v2)
111062306a36Sopenharmony_ci			break;
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci		ret = tce_iommu_mm_set(container);
111362306a36Sopenharmony_ci		if (ret)
111462306a36Sopenharmony_ci			return ret;
111562306a36Sopenharmony_ci
111662306a36Sopenharmony_ci		if (!tce_groups_attached(container))
111762306a36Sopenharmony_ci			return -ENXIO;
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
112062306a36Sopenharmony_ci				start_addr);
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_ci		if (copy_from_user(&create, (void __user *)arg, minsz))
112362306a36Sopenharmony_ci			return -EFAULT;
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci		if (create.argsz < minsz)
112662306a36Sopenharmony_ci			return -EINVAL;
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci		if (create.flags)
112962306a36Sopenharmony_ci			return -EINVAL;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci		mutex_lock(&container->lock);
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci		ret = tce_iommu_create_default_window(container);
113462306a36Sopenharmony_ci		if (!ret)
113562306a36Sopenharmony_ci			ret = tce_iommu_create_window(container,
113662306a36Sopenharmony_ci					create.page_shift,
113762306a36Sopenharmony_ci					create.window_size, create.levels,
113862306a36Sopenharmony_ci					&create.start_addr);
113962306a36Sopenharmony_ci
114062306a36Sopenharmony_ci		mutex_unlock(&container->lock);
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci		if (!ret && copy_to_user((void __user *)arg, &create, minsz))
114362306a36Sopenharmony_ci			ret = -EFAULT;
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci		return ret;
114662306a36Sopenharmony_ci	}
114762306a36Sopenharmony_ci	case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
114862306a36Sopenharmony_ci		struct vfio_iommu_spapr_tce_remove remove;
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci		if (!container->v2)
115162306a36Sopenharmony_ci			break;
115262306a36Sopenharmony_ci
115362306a36Sopenharmony_ci		ret = tce_iommu_mm_set(container);
115462306a36Sopenharmony_ci		if (ret)
115562306a36Sopenharmony_ci			return ret;
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci		if (!tce_groups_attached(container))
115862306a36Sopenharmony_ci			return -ENXIO;
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci		minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
116162306a36Sopenharmony_ci				start_addr);
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci		if (copy_from_user(&remove, (void __user *)arg, minsz))
116462306a36Sopenharmony_ci			return -EFAULT;
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci		if (remove.argsz < minsz)
116762306a36Sopenharmony_ci			return -EINVAL;
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci		if (remove.flags)
117062306a36Sopenharmony_ci			return -EINVAL;
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci		if (container->def_window_pending && !remove.start_addr) {
117362306a36Sopenharmony_ci			container->def_window_pending = false;
117462306a36Sopenharmony_ci			return 0;
117562306a36Sopenharmony_ci		}
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci		mutex_lock(&container->lock);
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci		ret = tce_iommu_remove_window(container, remove.start_addr);
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci		mutex_unlock(&container->lock);
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci		return ret;
118462306a36Sopenharmony_ci	}
118562306a36Sopenharmony_ci	}
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_ci	return -ENOTTY;
118862306a36Sopenharmony_ci}
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_cistatic void tce_iommu_release_ownership(struct tce_container *container,
119162306a36Sopenharmony_ci		struct iommu_table_group *table_group)
119262306a36Sopenharmony_ci{
119362306a36Sopenharmony_ci	long i;
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_ci	if (!table_group->ops->unset_window) {
119662306a36Sopenharmony_ci		WARN_ON_ONCE(1);
119762306a36Sopenharmony_ci		return;
119862306a36Sopenharmony_ci	}
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
120162306a36Sopenharmony_ci		if (container->tables[i])
120262306a36Sopenharmony_ci			table_group->ops->unset_window(table_group, i);
120362306a36Sopenharmony_ci}
120462306a36Sopenharmony_ci
120562306a36Sopenharmony_cistatic long tce_iommu_take_ownership(struct tce_container *container,
120662306a36Sopenharmony_ci		struct iommu_table_group *table_group)
120762306a36Sopenharmony_ci{
120862306a36Sopenharmony_ci	long i, ret = 0;
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci	/* Set all windows to the new group */
121162306a36Sopenharmony_ci	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
121262306a36Sopenharmony_ci		struct iommu_table *tbl = container->tables[i];
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci		if (!tbl)
121562306a36Sopenharmony_ci			continue;
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci		ret = table_group->ops->set_window(table_group, i, tbl);
121862306a36Sopenharmony_ci		if (ret)
121962306a36Sopenharmony_ci			goto release_exit;
122062306a36Sopenharmony_ci	}
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	return 0;
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_cirelease_exit:
122562306a36Sopenharmony_ci	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
122662306a36Sopenharmony_ci		table_group->ops->unset_window(table_group, i);
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci	return ret;
122962306a36Sopenharmony_ci}
123062306a36Sopenharmony_ci
123162306a36Sopenharmony_cistatic int tce_iommu_attach_group(void *iommu_data,
123262306a36Sopenharmony_ci		struct iommu_group *iommu_group, enum vfio_group_type type)
123362306a36Sopenharmony_ci{
123462306a36Sopenharmony_ci	int ret = 0;
123562306a36Sopenharmony_ci	struct tce_container *container = iommu_data;
123662306a36Sopenharmony_ci	struct iommu_table_group *table_group;
123762306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp = NULL;
123862306a36Sopenharmony_ci
123962306a36Sopenharmony_ci	if (type == VFIO_EMULATED_IOMMU)
124062306a36Sopenharmony_ci		return -EINVAL;
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	mutex_lock(&container->lock);
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci	/* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
124562306a36Sopenharmony_ci			iommu_group_id(iommu_group), iommu_group); */
124662306a36Sopenharmony_ci	table_group = iommu_group_get_iommudata(iommu_group);
124762306a36Sopenharmony_ci	if (!table_group) {
124862306a36Sopenharmony_ci		ret = -ENODEV;
124962306a36Sopenharmony_ci		goto unlock_exit;
125062306a36Sopenharmony_ci	}
125162306a36Sopenharmony_ci
125262306a36Sopenharmony_ci	/* v2 requires full support of dynamic DMA windows */
125362306a36Sopenharmony_ci	if (container->v2 && table_group->max_dynamic_windows_supported == 0) {
125462306a36Sopenharmony_ci		ret = -EINVAL;
125562306a36Sopenharmony_ci		goto unlock_exit;
125662306a36Sopenharmony_ci	}
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	/* v1 reuses TCE tables and does not share them among PEs */
125962306a36Sopenharmony_ci	if (!container->v2 && tce_groups_attached(container)) {
126062306a36Sopenharmony_ci		ret = -EBUSY;
126162306a36Sopenharmony_ci		goto unlock_exit;
126262306a36Sopenharmony_ci	}
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_ci	/*
126562306a36Sopenharmony_ci	 * Check if new group has the same iommu_table_group_ops
126662306a36Sopenharmony_ci	 * (i.e. compatible)
126762306a36Sopenharmony_ci	 */
126862306a36Sopenharmony_ci	list_for_each_entry(tcegrp, &container->group_list, next) {
126962306a36Sopenharmony_ci		struct iommu_table_group *table_group_tmp;
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci		if (tcegrp->grp == iommu_group) {
127262306a36Sopenharmony_ci			pr_warn("tce_vfio: Group %d is already attached\n",
127362306a36Sopenharmony_ci					iommu_group_id(iommu_group));
127462306a36Sopenharmony_ci			ret = -EBUSY;
127562306a36Sopenharmony_ci			goto unlock_exit;
127662306a36Sopenharmony_ci		}
127762306a36Sopenharmony_ci		table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
127862306a36Sopenharmony_ci		if (table_group_tmp->ops->create_table !=
127962306a36Sopenharmony_ci				table_group->ops->create_table) {
128062306a36Sopenharmony_ci			pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
128162306a36Sopenharmony_ci					iommu_group_id(iommu_group),
128262306a36Sopenharmony_ci					iommu_group_id(tcegrp->grp));
128362306a36Sopenharmony_ci			ret = -EPERM;
128462306a36Sopenharmony_ci			goto unlock_exit;
128562306a36Sopenharmony_ci		}
128662306a36Sopenharmony_ci	}
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci	tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
128962306a36Sopenharmony_ci	if (!tcegrp) {
129062306a36Sopenharmony_ci		ret = -ENOMEM;
129162306a36Sopenharmony_ci		goto unlock_exit;
129262306a36Sopenharmony_ci	}
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	ret = tce_iommu_take_ownership(container, table_group);
129562306a36Sopenharmony_ci	if (!tce_groups_attached(container) && !container->tables[0])
129662306a36Sopenharmony_ci		container->def_window_pending = true;
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci	if (!ret) {
129962306a36Sopenharmony_ci		tcegrp->grp = iommu_group;
130062306a36Sopenharmony_ci		list_add(&tcegrp->next, &container->group_list);
130162306a36Sopenharmony_ci	}
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci	if (ret && tcegrp)
130462306a36Sopenharmony_ci		kfree(tcegrp);
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ciunlock_exit:
130762306a36Sopenharmony_ci	mutex_unlock(&container->lock);
130862306a36Sopenharmony_ci
130962306a36Sopenharmony_ci	return ret;
131062306a36Sopenharmony_ci}
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_cistatic void tce_iommu_detach_group(void *iommu_data,
131362306a36Sopenharmony_ci		struct iommu_group *iommu_group)
131462306a36Sopenharmony_ci{
131562306a36Sopenharmony_ci	struct tce_container *container = iommu_data;
131662306a36Sopenharmony_ci	struct iommu_table_group *table_group;
131762306a36Sopenharmony_ci	bool found = false;
131862306a36Sopenharmony_ci	struct tce_iommu_group *tcegrp;
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci	mutex_lock(&container->lock);
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_ci	list_for_each_entry(tcegrp, &container->group_list, next) {
132362306a36Sopenharmony_ci		if (tcegrp->grp == iommu_group) {
132462306a36Sopenharmony_ci			found = true;
132562306a36Sopenharmony_ci			break;
132662306a36Sopenharmony_ci		}
132762306a36Sopenharmony_ci	}
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci	if (!found) {
133062306a36Sopenharmony_ci		pr_warn("tce_vfio: detaching unattached group #%u\n",
133162306a36Sopenharmony_ci				iommu_group_id(iommu_group));
133262306a36Sopenharmony_ci		goto unlock_exit;
133362306a36Sopenharmony_ci	}
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	list_del(&tcegrp->next);
133662306a36Sopenharmony_ci	kfree(tcegrp);
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci	table_group = iommu_group_get_iommudata(iommu_group);
133962306a36Sopenharmony_ci	BUG_ON(!table_group);
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci	tce_iommu_release_ownership(container, table_group);
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_ciunlock_exit:
134462306a36Sopenharmony_ci	mutex_unlock(&container->lock);
134562306a36Sopenharmony_ci}
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_cistatic const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
134862306a36Sopenharmony_ci	.name		= "iommu-vfio-powerpc",
134962306a36Sopenharmony_ci	.owner		= THIS_MODULE,
135062306a36Sopenharmony_ci	.open		= tce_iommu_open,
135162306a36Sopenharmony_ci	.release	= tce_iommu_release,
135262306a36Sopenharmony_ci	.ioctl		= tce_iommu_ioctl,
135362306a36Sopenharmony_ci	.attach_group	= tce_iommu_attach_group,
135462306a36Sopenharmony_ci	.detach_group	= tce_iommu_detach_group,
135562306a36Sopenharmony_ci};
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_cistatic int __init tce_iommu_init(void)
135862306a36Sopenharmony_ci{
135962306a36Sopenharmony_ci	return vfio_register_iommu_driver(&tce_iommu_driver_ops);
136062306a36Sopenharmony_ci}
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_cistatic void __exit tce_iommu_cleanup(void)
136362306a36Sopenharmony_ci{
136462306a36Sopenharmony_ci	vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
136562306a36Sopenharmony_ci}
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_cimodule_init(tce_iommu_init);
136862306a36Sopenharmony_cimodule_exit(tce_iommu_cleanup);
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_ciMODULE_VERSION(DRIVER_VERSION);
137162306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
137262306a36Sopenharmony_ciMODULE_AUTHOR(DRIVER_AUTHOR);
137362306a36Sopenharmony_ciMODULE_DESCRIPTION(DRIVER_DESC);
137462306a36Sopenharmony_ci
1375