1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *  IOMMU helpers in MMU context.
4 *
5 *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
6 */
7
8#include <linux/sched/signal.h>
9#include <linux/slab.h>
10#include <linux/rculist.h>
11#include <linux/vmalloc.h>
12#include <linux/mutex.h>
13#include <linux/migrate.h>
14#include <linux/hugetlb.h>
15#include <linux/swap.h>
16#include <linux/sizes.h>
17#include <linux/mm.h>
18#include <asm/mmu_context.h>
19#include <asm/pte-walk.h>
20#include <linux/mm_inline.h>
21
22static DEFINE_MUTEX(mem_list_mutex);
23
24#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY	0x1
25#define MM_IOMMU_TABLE_GROUP_PAGE_MASK	~(SZ_4K - 1)
26
27struct mm_iommu_table_group_mem_t {
28	struct list_head next;
29	struct rcu_head rcu;
30	unsigned long used;
31	atomic64_t mapped;
32	unsigned int pageshift;
33	u64 ua;			/* userspace address */
34	u64 entries;		/* number of entries in hpas/hpages[] */
35	/*
36	 * in mm_iommu_get we temporarily use this to store
37	 * struct page address.
38	 *
39	 * We need to convert ua to hpa in real mode. Make it
40	 * simpler by storing physical address.
41	 */
42	union {
43		struct page **hpages;	/* vmalloc'ed */
44		phys_addr_t *hpas;
45	};
46#define MM_IOMMU_TABLE_INVALID_HPA	((uint64_t)-1)
47	u64 dev_hpa;		/* Device memory base address */
48};
49
50bool mm_iommu_preregistered(struct mm_struct *mm)
51{
52	return !list_empty(&mm->context.iommu_group_mem_list);
53}
54EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
55
56static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
57			      unsigned long entries, unsigned long dev_hpa,
58			      struct mm_iommu_table_group_mem_t **pmem)
59{
60	struct mm_iommu_table_group_mem_t *mem, *mem2;
61	long i, ret, locked_entries = 0, pinned = 0;
62	unsigned int pageshift;
63	unsigned long entry, chunk;
64
65	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
66		ret = account_locked_vm(mm, entries, true);
67		if (ret)
68			return ret;
69
70		locked_entries = entries;
71	}
72
73	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
74	if (!mem) {
75		ret = -ENOMEM;
76		goto unlock_exit;
77	}
78
79	if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
80		mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
81		mem->dev_hpa = dev_hpa;
82		goto good_exit;
83	}
84	mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
85
86	/*
87	 * For a starting point for a maximum page size calculation
88	 * we use @ua and @entries natural alignment to allow IOMMU pages
89	 * smaller than huge pages but still bigger than PAGE_SIZE.
90	 */
91	mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
92	mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
93	if (!mem->hpas) {
94		kfree(mem);
95		ret = -ENOMEM;
96		goto unlock_exit;
97	}
98
99	mmap_read_lock(mm);
100	chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
101			sizeof(struct vm_area_struct *);
102	chunk = min(chunk, entries);
103	for (entry = 0; entry < entries; entry += chunk) {
104		unsigned long n = min(entries - entry, chunk);
105
106		ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
107				FOLL_WRITE | FOLL_LONGTERM,
108				mem->hpages + entry, NULL);
109		if (ret == n) {
110			pinned += n;
111			continue;
112		}
113		if (ret > 0)
114			pinned += ret;
115		break;
116	}
117	mmap_read_unlock(mm);
118	if (pinned != entries) {
119		if (!ret)
120			ret = -EFAULT;
121		goto free_exit;
122	}
123
124good_exit:
125	atomic64_set(&mem->mapped, 1);
126	mem->used = 1;
127	mem->ua = ua;
128	mem->entries = entries;
129
130	mutex_lock(&mem_list_mutex);
131
132	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
133		/* Overlap? */
134		if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
135				(ua < (mem2->ua +
136				       (mem2->entries << PAGE_SHIFT)))) {
137			ret = -EINVAL;
138			mutex_unlock(&mem_list_mutex);
139			goto free_exit;
140		}
141	}
142
143	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
144		/*
145		 * Allow to use larger than 64k IOMMU pages. Only do that
146		 * if we are backed by hugetlb. Skip device memory as it is not
147		 * backed with page structs.
148		 */
149		pageshift = PAGE_SHIFT;
150		for (i = 0; i < entries; ++i) {
151			struct page *page = mem->hpages[i];
152
153			if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
154				pageshift = page_shift(compound_head(page));
155			mem->pageshift = min(mem->pageshift, pageshift);
156			/*
157			 * We don't need struct page reference any more, switch
158			 * to physical address.
159			 */
160			mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
161		}
162	}
163
164	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
165
166	mutex_unlock(&mem_list_mutex);
167
168	*pmem = mem;
169
170	return 0;
171
172free_exit:
173	/* free the references taken */
174	unpin_user_pages(mem->hpages, pinned);
175
176	vfree(mem->hpas);
177	kfree(mem);
178
179unlock_exit:
180	account_locked_vm(mm, locked_entries, false);
181
182	return ret;
183}
184
185long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
186		struct mm_iommu_table_group_mem_t **pmem)
187{
188	return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
189			pmem);
190}
191EXPORT_SYMBOL_GPL(mm_iommu_new);
192
193long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
194		unsigned long entries, unsigned long dev_hpa,
195		struct mm_iommu_table_group_mem_t **pmem)
196{
197	return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
198}
199EXPORT_SYMBOL_GPL(mm_iommu_newdev);
200
201static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
202{
203	long i;
204	struct page *page = NULL;
205
206	if (!mem->hpas)
207		return;
208
209	for (i = 0; i < mem->entries; ++i) {
210		if (!mem->hpas[i])
211			continue;
212
213		page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
214		if (!page)
215			continue;
216
217		if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
218			SetPageDirty(page);
219
220		unpin_user_page(page);
221
222		mem->hpas[i] = 0;
223	}
224}
225
226static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
227{
228
229	mm_iommu_unpin(mem);
230	vfree(mem->hpas);
231	kfree(mem);
232}
233
234static void mm_iommu_free(struct rcu_head *head)
235{
236	struct mm_iommu_table_group_mem_t *mem = container_of(head,
237			struct mm_iommu_table_group_mem_t, rcu);
238
239	mm_iommu_do_free(mem);
240}
241
242static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
243{
244	list_del_rcu(&mem->next);
245	call_rcu(&mem->rcu, mm_iommu_free);
246}
247
248long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
249{
250	long ret = 0;
251	unsigned long unlock_entries = 0;
252
253	mutex_lock(&mem_list_mutex);
254
255	if (mem->used == 0) {
256		ret = -ENOENT;
257		goto unlock_exit;
258	}
259
260	--mem->used;
261	/* There are still users, exit */
262	if (mem->used)
263		goto unlock_exit;
264
265	/* Are there still mappings? */
266	if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
267		++mem->used;
268		ret = -EBUSY;
269		goto unlock_exit;
270	}
271
272	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
273		unlock_entries = mem->entries;
274
275	/* @mapped became 0 so now mappings are disabled, release the region */
276	mm_iommu_release(mem);
277
278unlock_exit:
279	mutex_unlock(&mem_list_mutex);
280
281	account_locked_vm(mm, unlock_entries, false);
282
283	return ret;
284}
285EXPORT_SYMBOL_GPL(mm_iommu_put);
286
287struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
288		unsigned long ua, unsigned long size)
289{
290	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
291
292	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
293		if ((mem->ua <= ua) &&
294				(ua + size <= mem->ua +
295				 (mem->entries << PAGE_SHIFT))) {
296			ret = mem;
297			break;
298		}
299	}
300
301	return ret;
302}
303EXPORT_SYMBOL_GPL(mm_iommu_lookup);
304
305struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
306		unsigned long ua, unsigned long size)
307{
308	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
309
310	list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
311			next) {
312		if ((mem->ua <= ua) &&
313				(ua + size <= mem->ua +
314				 (mem->entries << PAGE_SHIFT))) {
315			ret = mem;
316			break;
317		}
318	}
319
320	return ret;
321}
322
323struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
324		unsigned long ua, unsigned long entries)
325{
326	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
327
328	mutex_lock(&mem_list_mutex);
329
330	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
331		if ((mem->ua == ua) && (mem->entries == entries)) {
332			ret = mem;
333			++mem->used;
334			break;
335		}
336	}
337
338	mutex_unlock(&mem_list_mutex);
339
340	return ret;
341}
342EXPORT_SYMBOL_GPL(mm_iommu_get);
343
344long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
345		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
346{
347	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
348	u64 *va;
349
350	if (entry >= mem->entries)
351		return -EFAULT;
352
353	if (pageshift > mem->pageshift)
354		return -EFAULT;
355
356	if (!mem->hpas) {
357		*hpa = mem->dev_hpa + (ua - mem->ua);
358		return 0;
359	}
360
361	va = &mem->hpas[entry];
362	*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
363
364	return 0;
365}
366EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
367
368long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
369		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
370{
371	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
372	unsigned long *pa;
373
374	if (entry >= mem->entries)
375		return -EFAULT;
376
377	if (pageshift > mem->pageshift)
378		return -EFAULT;
379
380	if (!mem->hpas) {
381		*hpa = mem->dev_hpa + (ua - mem->ua);
382		return 0;
383	}
384
385	pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
386	if (!pa)
387		return -EFAULT;
388
389	*hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
390
391	return 0;
392}
393
394extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
395{
396	struct mm_iommu_table_group_mem_t *mem;
397	long entry;
398	void *va;
399	unsigned long *pa;
400
401	mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
402	if (!mem)
403		return;
404
405	if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
406		return;
407
408	entry = (ua - mem->ua) >> PAGE_SHIFT;
409	va = &mem->hpas[entry];
410
411	pa = (void *) vmalloc_to_phys(va);
412	if (!pa)
413		return;
414
415	*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
416}
417
418bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
419		unsigned int pageshift, unsigned long *size)
420{
421	struct mm_iommu_table_group_mem_t *mem;
422	unsigned long end;
423
424	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
425		if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
426			continue;
427
428		end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
429		if ((mem->dev_hpa <= hpa) && (hpa < end)) {
430			/*
431			 * Since the IOMMU page size might be bigger than
432			 * PAGE_SIZE, the amount of preregistered memory
433			 * starting from @hpa might be smaller than 1<<pageshift
434			 * and the caller needs to distinguish this situation.
435			 */
436			*size = min(1UL << pageshift, end - hpa);
437			return true;
438		}
439	}
440
441	return false;
442}
443EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
444
445long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
446{
447	if (atomic64_inc_not_zero(&mem->mapped))
448		return 0;
449
450	/* Last mm_iommu_put() has been called, no more mappings allowed() */
451	return -ENXIO;
452}
453EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
454
455void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
456{
457	atomic64_add_unless(&mem->mapped, -1, 1);
458}
459EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
460
461void mm_iommu_init(struct mm_struct *mm)
462{
463	INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
464}
465