1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * MMU-based software IOTLB.
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <linux/anon_inodes.h>
14#include <linux/highmem.h>
15#include <linux/vmalloc.h>
16#include <linux/vdpa.h>
17
18#include "iova_domain.h"
19
20static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
21				 u64 start, u64 last,
22				 u64 addr, unsigned int perm,
23				 struct file *file, u64 offset)
24{
25	struct vdpa_map_file *map_file;
26	int ret;
27
28	map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
29	if (!map_file)
30		return -ENOMEM;
31
32	map_file->file = get_file(file);
33	map_file->offset = offset;
34
35	ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
36					addr, perm, map_file);
37	if (ret) {
38		fput(map_file->file);
39		kfree(map_file);
40		return ret;
41	}
42	return 0;
43}
44
45static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
46				  u64 start, u64 last)
47{
48	struct vdpa_map_file *map_file;
49	struct vhost_iotlb_map *map;
50
51	while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
52		map_file = (struct vdpa_map_file *)map->opaque;
53		fput(map_file->file);
54		kfree(map_file);
55		vhost_iotlb_map_free(domain->iotlb, map);
56	}
57}
58
59int vduse_domain_set_map(struct vduse_iova_domain *domain,
60			 struct vhost_iotlb *iotlb)
61{
62	struct vdpa_map_file *map_file;
63	struct vhost_iotlb_map *map;
64	u64 start = 0ULL, last = ULLONG_MAX;
65	int ret;
66
67	spin_lock(&domain->iotlb_lock);
68	vduse_iotlb_del_range(domain, start, last);
69
70	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
71	     map = vhost_iotlb_itree_next(map, start, last)) {
72		map_file = (struct vdpa_map_file *)map->opaque;
73		ret = vduse_iotlb_add_range(domain, map->start, map->last,
74					    map->addr, map->perm,
75					    map_file->file,
76					    map_file->offset);
77		if (ret)
78			goto err;
79	}
80	spin_unlock(&domain->iotlb_lock);
81
82	return 0;
83err:
84	vduse_iotlb_del_range(domain, start, last);
85	spin_unlock(&domain->iotlb_lock);
86	return ret;
87}
88
89void vduse_domain_clear_map(struct vduse_iova_domain *domain,
90			    struct vhost_iotlb *iotlb)
91{
92	struct vhost_iotlb_map *map;
93	u64 start = 0ULL, last = ULLONG_MAX;
94
95	spin_lock(&domain->iotlb_lock);
96	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
97	     map = vhost_iotlb_itree_next(map, start, last)) {
98		vduse_iotlb_del_range(domain, map->start, map->last);
99	}
100	spin_unlock(&domain->iotlb_lock);
101}
102
103static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
104					 u64 iova, u64 size, u64 paddr)
105{
106	struct vduse_bounce_map *map;
107	u64 last = iova + size - 1;
108
109	while (iova <= last) {
110		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
111		if (!map->bounce_page) {
112			map->bounce_page = alloc_page(GFP_ATOMIC);
113			if (!map->bounce_page)
114				return -ENOMEM;
115		}
116		map->orig_phys = paddr;
117		paddr += PAGE_SIZE;
118		iova += PAGE_SIZE;
119	}
120	return 0;
121}
122
123static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
124					   u64 iova, u64 size)
125{
126	struct vduse_bounce_map *map;
127	u64 last = iova + size - 1;
128
129	while (iova <= last) {
130		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
131		map->orig_phys = INVALID_PHYS_ADDR;
132		iova += PAGE_SIZE;
133	}
134}
135
136static void do_bounce(phys_addr_t orig, void *addr, size_t size,
137		      enum dma_data_direction dir)
138{
139	unsigned long pfn = PFN_DOWN(orig);
140	unsigned int offset = offset_in_page(orig);
141	struct page *page;
142	unsigned int sz = 0;
143
144	while (size) {
145		sz = min_t(size_t, PAGE_SIZE - offset, size);
146
147		page = pfn_to_page(pfn);
148		if (dir == DMA_TO_DEVICE)
149			memcpy_from_page(addr, page, offset, sz);
150		else
151			memcpy_to_page(page, offset, addr, sz);
152
153		size -= sz;
154		pfn++;
155		addr += sz;
156		offset = 0;
157	}
158}
159
160static void vduse_domain_bounce(struct vduse_iova_domain *domain,
161				dma_addr_t iova, size_t size,
162				enum dma_data_direction dir)
163{
164	struct vduse_bounce_map *map;
165	unsigned int offset;
166	void *addr;
167	size_t sz;
168
169	if (iova >= domain->bounce_size)
170		return;
171
172	while (size) {
173		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
174		offset = offset_in_page(iova);
175		sz = min_t(size_t, PAGE_SIZE - offset, size);
176
177		if (WARN_ON(!map->bounce_page ||
178			    map->orig_phys == INVALID_PHYS_ADDR))
179			return;
180
181		addr = kmap_local_page(map->bounce_page);
182		do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
183		kunmap_local(addr);
184		size -= sz;
185		iova += sz;
186	}
187}
188
189static struct page *
190vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
191{
192	u64 start = iova & PAGE_MASK;
193	u64 last = start + PAGE_SIZE - 1;
194	struct vhost_iotlb_map *map;
195	struct page *page = NULL;
196
197	spin_lock(&domain->iotlb_lock);
198	map = vhost_iotlb_itree_first(domain->iotlb, start, last);
199	if (!map)
200		goto out;
201
202	page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
203	get_page(page);
204out:
205	spin_unlock(&domain->iotlb_lock);
206
207	return page;
208}
209
210static struct page *
211vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
212{
213	struct vduse_bounce_map *map;
214	struct page *page = NULL;
215
216	read_lock(&domain->bounce_lock);
217	map = &domain->bounce_maps[iova >> PAGE_SHIFT];
218	if (domain->user_bounce_pages || !map->bounce_page)
219		goto out;
220
221	page = map->bounce_page;
222	get_page(page);
223out:
224	read_unlock(&domain->bounce_lock);
225
226	return page;
227}
228
229static void
230vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
231{
232	struct vduse_bounce_map *map;
233	unsigned long pfn, bounce_pfns;
234
235	bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
236
237	for (pfn = 0; pfn < bounce_pfns; pfn++) {
238		map = &domain->bounce_maps[pfn];
239		if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
240			continue;
241
242		if (!map->bounce_page)
243			continue;
244
245		__free_page(map->bounce_page);
246		map->bounce_page = NULL;
247	}
248}
249
250int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
251				       struct page **pages, int count)
252{
253	struct vduse_bounce_map *map;
254	int i, ret;
255
256	/* Now we don't support partial mapping */
257	if (count != (domain->bounce_size >> PAGE_SHIFT))
258		return -EINVAL;
259
260	write_lock(&domain->bounce_lock);
261	ret = -EEXIST;
262	if (domain->user_bounce_pages)
263		goto out;
264
265	for (i = 0; i < count; i++) {
266		map = &domain->bounce_maps[i];
267		if (map->bounce_page) {
268			/* Copy kernel page to user page if it's in use */
269			if (map->orig_phys != INVALID_PHYS_ADDR)
270				memcpy_to_page(pages[i], 0,
271					       page_address(map->bounce_page),
272					       PAGE_SIZE);
273			__free_page(map->bounce_page);
274		}
275		map->bounce_page = pages[i];
276		get_page(pages[i]);
277	}
278	domain->user_bounce_pages = true;
279	ret = 0;
280out:
281	write_unlock(&domain->bounce_lock);
282
283	return ret;
284}
285
286void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
287{
288	struct vduse_bounce_map *map;
289	unsigned long i, count;
290
291	write_lock(&domain->bounce_lock);
292	if (!domain->user_bounce_pages)
293		goto out;
294
295	count = domain->bounce_size >> PAGE_SHIFT;
296	for (i = 0; i < count; i++) {
297		struct page *page = NULL;
298
299		map = &domain->bounce_maps[i];
300		if (WARN_ON(!map->bounce_page))
301			continue;
302
303		/* Copy user page to kernel page if it's in use */
304		if (map->orig_phys != INVALID_PHYS_ADDR) {
305			page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
306			memcpy_from_page(page_address(page),
307					 map->bounce_page, 0, PAGE_SIZE);
308		}
309		put_page(map->bounce_page);
310		map->bounce_page = page;
311	}
312	domain->user_bounce_pages = false;
313out:
314	write_unlock(&domain->bounce_lock);
315}
316
317void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
318{
319	if (!domain->bounce_map)
320		return;
321
322	spin_lock(&domain->iotlb_lock);
323	if (!domain->bounce_map)
324		goto unlock;
325
326	vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
327	domain->bounce_map = 0;
328unlock:
329	spin_unlock(&domain->iotlb_lock);
330}
331
332static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
333{
334	int ret = 0;
335
336	if (domain->bounce_map)
337		return 0;
338
339	spin_lock(&domain->iotlb_lock);
340	if (domain->bounce_map)
341		goto unlock;
342
343	ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
344				    0, VHOST_MAP_RW, domain->file, 0);
345	if (ret)
346		goto unlock;
347
348	domain->bounce_map = 1;
349unlock:
350	spin_unlock(&domain->iotlb_lock);
351	return ret;
352}
353
354static dma_addr_t
355vduse_domain_alloc_iova(struct iova_domain *iovad,
356			unsigned long size, unsigned long limit)
357{
358	unsigned long shift = iova_shift(iovad);
359	unsigned long iova_len = iova_align(iovad, size) >> shift;
360	unsigned long iova_pfn;
361
362	iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
363
364	return (dma_addr_t)iova_pfn << shift;
365}
366
367static void vduse_domain_free_iova(struct iova_domain *iovad,
368				   dma_addr_t iova, size_t size)
369{
370	unsigned long shift = iova_shift(iovad);
371	unsigned long iova_len = iova_align(iovad, size) >> shift;
372
373	free_iova_fast(iovad, iova >> shift, iova_len);
374}
375
376dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
377				 struct page *page, unsigned long offset,
378				 size_t size, enum dma_data_direction dir,
379				 unsigned long attrs)
380{
381	struct iova_domain *iovad = &domain->stream_iovad;
382	unsigned long limit = domain->bounce_size - 1;
383	phys_addr_t pa = page_to_phys(page) + offset;
384	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
385
386	if (!iova)
387		return DMA_MAPPING_ERROR;
388
389	if (vduse_domain_init_bounce_map(domain))
390		goto err;
391
392	read_lock(&domain->bounce_lock);
393	if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
394		goto err_unlock;
395
396	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
397		vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
398
399	read_unlock(&domain->bounce_lock);
400
401	return iova;
402err_unlock:
403	read_unlock(&domain->bounce_lock);
404err:
405	vduse_domain_free_iova(iovad, iova, size);
406	return DMA_MAPPING_ERROR;
407}
408
409void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
410			     dma_addr_t dma_addr, size_t size,
411			     enum dma_data_direction dir, unsigned long attrs)
412{
413	struct iova_domain *iovad = &domain->stream_iovad;
414
415	read_lock(&domain->bounce_lock);
416	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
417		vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
418
419	vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
420	read_unlock(&domain->bounce_lock);
421	vduse_domain_free_iova(iovad, dma_addr, size);
422}
423
424void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
425				  size_t size, dma_addr_t *dma_addr,
426				  gfp_t flag, unsigned long attrs)
427{
428	struct iova_domain *iovad = &domain->consistent_iovad;
429	unsigned long limit = domain->iova_limit;
430	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
431	void *orig = alloc_pages_exact(size, flag);
432
433	if (!iova || !orig)
434		goto err;
435
436	spin_lock(&domain->iotlb_lock);
437	if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
438				  virt_to_phys(orig), VHOST_MAP_RW,
439				  domain->file, (u64)iova)) {
440		spin_unlock(&domain->iotlb_lock);
441		goto err;
442	}
443	spin_unlock(&domain->iotlb_lock);
444
445	*dma_addr = iova;
446
447	return orig;
448err:
449	*dma_addr = DMA_MAPPING_ERROR;
450	if (orig)
451		free_pages_exact(orig, size);
452	if (iova)
453		vduse_domain_free_iova(iovad, iova, size);
454
455	return NULL;
456}
457
458void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
459				void *vaddr, dma_addr_t dma_addr,
460				unsigned long attrs)
461{
462	struct iova_domain *iovad = &domain->consistent_iovad;
463	struct vhost_iotlb_map *map;
464	struct vdpa_map_file *map_file;
465	phys_addr_t pa;
466
467	spin_lock(&domain->iotlb_lock);
468	map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
469				      (u64)dma_addr + size - 1);
470	if (WARN_ON(!map)) {
471		spin_unlock(&domain->iotlb_lock);
472		return;
473	}
474	map_file = (struct vdpa_map_file *)map->opaque;
475	fput(map_file->file);
476	kfree(map_file);
477	pa = map->addr;
478	vhost_iotlb_map_free(domain->iotlb, map);
479	spin_unlock(&domain->iotlb_lock);
480
481	vduse_domain_free_iova(iovad, dma_addr, size);
482	free_pages_exact(phys_to_virt(pa), size);
483}
484
485static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
486{
487	struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
488	unsigned long iova = vmf->pgoff << PAGE_SHIFT;
489	struct page *page;
490
491	if (!domain)
492		return VM_FAULT_SIGBUS;
493
494	if (iova < domain->bounce_size)
495		page = vduse_domain_get_bounce_page(domain, iova);
496	else
497		page = vduse_domain_get_coherent_page(domain, iova);
498
499	if (!page)
500		return VM_FAULT_SIGBUS;
501
502	vmf->page = page;
503
504	return 0;
505}
506
507static const struct vm_operations_struct vduse_domain_mmap_ops = {
508	.fault = vduse_domain_mmap_fault,
509};
510
511static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
512{
513	struct vduse_iova_domain *domain = file->private_data;
514
515	vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
516	vma->vm_private_data = domain;
517	vma->vm_ops = &vduse_domain_mmap_ops;
518
519	return 0;
520}
521
522static int vduse_domain_release(struct inode *inode, struct file *file)
523{
524	struct vduse_iova_domain *domain = file->private_data;
525
526	spin_lock(&domain->iotlb_lock);
527	vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
528	vduse_domain_remove_user_bounce_pages(domain);
529	vduse_domain_free_kernel_bounce_pages(domain);
530	spin_unlock(&domain->iotlb_lock);
531	put_iova_domain(&domain->stream_iovad);
532	put_iova_domain(&domain->consistent_iovad);
533	vhost_iotlb_free(domain->iotlb);
534	vfree(domain->bounce_maps);
535	kfree(domain);
536
537	return 0;
538}
539
540static const struct file_operations vduse_domain_fops = {
541	.owner = THIS_MODULE,
542	.mmap = vduse_domain_mmap,
543	.release = vduse_domain_release,
544};
545
546void vduse_domain_destroy(struct vduse_iova_domain *domain)
547{
548	fput(domain->file);
549}
550
551struct vduse_iova_domain *
552vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
553{
554	struct vduse_iova_domain *domain;
555	struct file *file;
556	struct vduse_bounce_map *map;
557	unsigned long pfn, bounce_pfns;
558	int ret;
559
560	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
561	if (iova_limit <= bounce_size)
562		return NULL;
563
564	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
565	if (!domain)
566		return NULL;
567
568	domain->iotlb = vhost_iotlb_alloc(0, 0);
569	if (!domain->iotlb)
570		goto err_iotlb;
571
572	domain->iova_limit = iova_limit;
573	domain->bounce_size = PAGE_ALIGN(bounce_size);
574	domain->bounce_maps = vzalloc(bounce_pfns *
575				sizeof(struct vduse_bounce_map));
576	if (!domain->bounce_maps)
577		goto err_map;
578
579	for (pfn = 0; pfn < bounce_pfns; pfn++) {
580		map = &domain->bounce_maps[pfn];
581		map->orig_phys = INVALID_PHYS_ADDR;
582	}
583	file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
584				domain, O_RDWR);
585	if (IS_ERR(file))
586		goto err_file;
587
588	domain->file = file;
589	rwlock_init(&domain->bounce_lock);
590	spin_lock_init(&domain->iotlb_lock);
591	init_iova_domain(&domain->stream_iovad,
592			PAGE_SIZE, IOVA_START_PFN);
593	ret = iova_domain_init_rcaches(&domain->stream_iovad);
594	if (ret)
595		goto err_iovad_stream;
596	init_iova_domain(&domain->consistent_iovad,
597			PAGE_SIZE, bounce_pfns);
598	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
599	if (ret)
600		goto err_iovad_consistent;
601
602	return domain;
603err_iovad_consistent:
604	put_iova_domain(&domain->stream_iovad);
605err_iovad_stream:
606	fput(file);
607err_file:
608	vfree(domain->bounce_maps);
609err_map:
610	vhost_iotlb_free(domain->iotlb);
611err_iotlb:
612	kfree(domain);
613	return NULL;
614}
615
616int vduse_domain_init(void)
617{
618	return iova_cache_get();
619}
620
621void vduse_domain_exit(void)
622{
623	iova_cache_put();
624}
625