1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include <uapi/misc/habanalabs.h>
9#include "habanalabs.h"
10
11#include <linux/mm.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/genalloc.h>
15
16static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
17{
18	struct hl_device *hdev = ctx->hdev;
19	struct asic_fixed_properties *prop = &hdev->asic_prop;
20	struct hl_vm_va_block *va_block, *tmp;
21	dma_addr_t bus_addr;
22	u64 virt_addr;
23	u32 page_size = prop->pmmu.page_size;
24	s32 offset;
25	int rc;
26
27	if (!hdev->supports_cb_mapping) {
28		dev_err_ratelimited(hdev->dev,
29				"Cannot map CB because no VA range is allocated for CB mapping\n");
30		return -EINVAL;
31	}
32
33	if (!hdev->mmu_enable) {
34		dev_err_ratelimited(hdev->dev,
35				"Cannot map CB because MMU is disabled\n");
36		return -EINVAL;
37	}
38
39	INIT_LIST_HEAD(&cb->va_block_list);
40
41	for (bus_addr = cb->bus_address;
42			bus_addr < cb->bus_address + cb->size;
43			bus_addr += page_size) {
44
45		virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
46		if (!virt_addr) {
47			dev_err(hdev->dev,
48				"Failed to allocate device virtual address for CB\n");
49			rc = -ENOMEM;
50			goto err_va_pool_free;
51		}
52
53		va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
54		if (!va_block) {
55			rc = -ENOMEM;
56			gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
57			goto err_va_pool_free;
58		}
59
60		va_block->start = virt_addr;
61		va_block->end = virt_addr + page_size;
62		va_block->size = page_size;
63		list_add_tail(&va_block->node, &cb->va_block_list);
64	}
65
66	mutex_lock(&ctx->mmu_lock);
67
68	bus_addr = cb->bus_address;
69	offset = 0;
70	list_for_each_entry(va_block, &cb->va_block_list, node) {
71		rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
72				list_is_last(&va_block->node,
73						&cb->va_block_list));
74		if (rc) {
75			dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
76				va_block->start);
77			goto err_va_umap;
78		}
79
80		bus_addr += va_block->size;
81		offset += va_block->size;
82	}
83
84	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
85
86	mutex_unlock(&ctx->mmu_lock);
87
88	cb->is_mmu_mapped = true;
89
90	return 0;
91
92err_va_umap:
93	list_for_each_entry(va_block, &cb->va_block_list, node) {
94		if (offset <= 0)
95			break;
96		hl_mmu_unmap(ctx, va_block->start, va_block->size,
97				offset <= va_block->size);
98		offset -= va_block->size;
99	}
100
101	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
102
103	mutex_unlock(&ctx->mmu_lock);
104
105err_va_pool_free:
106	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
107		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
108		list_del(&va_block->node);
109		kfree(va_block);
110	}
111
112	return rc;
113}
114
115static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
116{
117	struct hl_device *hdev = ctx->hdev;
118	struct hl_vm_va_block *va_block, *tmp;
119
120	mutex_lock(&ctx->mmu_lock);
121
122	list_for_each_entry(va_block, &cb->va_block_list, node)
123		if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
124				list_is_last(&va_block->node,
125						&cb->va_block_list)))
126			dev_warn_ratelimited(hdev->dev,
127					"Failed to unmap CB's va 0x%llx\n",
128					va_block->start);
129
130	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
131
132	mutex_unlock(&ctx->mmu_lock);
133
134	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
135		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
136		list_del(&va_block->node);
137		kfree(va_block);
138	}
139}
140
141static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
142{
143	if (cb->is_internal)
144		gen_pool_free(hdev->internal_cb_pool,
145				(uintptr_t)cb->kernel_address, cb->size);
146	else
147		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
148				cb->kernel_address, cb->bus_address);
149
150	kfree(cb);
151}
152
153static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
154{
155	if (cb->is_pool) {
156		spin_lock(&hdev->cb_pool_lock);
157		list_add(&cb->pool_list, &hdev->cb_pool);
158		spin_unlock(&hdev->cb_pool_lock);
159	} else {
160		cb_fini(hdev, cb);
161	}
162}
163
164static void cb_release(struct kref *ref)
165{
166	struct hl_device *hdev;
167	struct hl_cb *cb;
168
169	cb = container_of(ref, struct hl_cb, refcount);
170	hdev = cb->hdev;
171
172	hl_debugfs_remove_cb(cb);
173
174	if (cb->is_mmu_mapped)
175		cb_unmap_mem(cb->ctx, cb);
176
177	hl_ctx_put(cb->ctx);
178
179	cb_do_release(hdev, cb);
180}
181
182static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
183					int ctx_id, bool internal_cb)
184{
185	struct hl_cb *cb;
186	u32 cb_offset;
187	void *p;
188
189	/*
190	 * We use of GFP_ATOMIC here because this function can be called from
191	 * the latency-sensitive code path for command submission. Due to H/W
192	 * limitations in some of the ASICs, the kernel must copy the user CB
193	 * that is designated for an external queue and actually enqueue
194	 * the kernel's copy. Hence, we must never sleep in this code section
195	 * and must use GFP_ATOMIC for all memory allocations.
196	 */
197	if (ctx_id == HL_KERNEL_ASID_ID)
198		cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
199	else
200		cb = kzalloc(sizeof(*cb), GFP_KERNEL);
201
202	if (!cb)
203		return NULL;
204
205	if (internal_cb) {
206		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
207		if (!p) {
208			kfree(cb);
209			return NULL;
210		}
211
212		cb_offset = p - hdev->internal_cb_pool_virt_addr;
213		cb->is_internal = true;
214		cb->bus_address =  hdev->internal_cb_va_base + cb_offset;
215	} else if (ctx_id == HL_KERNEL_ASID_ID) {
216		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
217						&cb->bus_address, GFP_ATOMIC);
218	} else {
219		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
220						&cb->bus_address,
221						GFP_USER | __GFP_ZERO);
222	}
223
224	if (!p) {
225		dev_err(hdev->dev,
226			"failed to allocate %d of dma memory for CB\n",
227			cb_size);
228		kfree(cb);
229		return NULL;
230	}
231
232	cb->kernel_address = p;
233	cb->size = cb_size;
234
235	return cb;
236}
237
238int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
239			struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
240			bool map_cb, u64 *handle)
241{
242	struct hl_cb *cb;
243	bool alloc_new_cb = true;
244	int rc, ctx_id = ctx->asid;
245
246	/*
247	 * Can't use generic function to check this because of special case
248	 * where we create a CB as part of the reset process
249	 */
250	if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) &&
251					(ctx_id != HL_KERNEL_ASID_ID))) {
252		dev_warn_ratelimited(hdev->dev,
253			"Device is disabled or in reset. Can't create new CBs\n");
254		rc = -EBUSY;
255		goto out_err;
256	}
257
258	if (cb_size > SZ_2M) {
259		dev_err(hdev->dev, "CB size %d must be less than %d\n",
260			cb_size, SZ_2M);
261		rc = -EINVAL;
262		goto out_err;
263	}
264
265	if (!internal_cb) {
266		/* Minimum allocation must be PAGE SIZE */
267		if (cb_size < PAGE_SIZE)
268			cb_size = PAGE_SIZE;
269
270		if (ctx_id == HL_KERNEL_ASID_ID &&
271				cb_size <= hdev->asic_prop.cb_pool_cb_size) {
272
273			spin_lock(&hdev->cb_pool_lock);
274			if (!list_empty(&hdev->cb_pool)) {
275				cb = list_first_entry(&hdev->cb_pool,
276						typeof(*cb), pool_list);
277				list_del(&cb->pool_list);
278				spin_unlock(&hdev->cb_pool_lock);
279				alloc_new_cb = false;
280			} else {
281				spin_unlock(&hdev->cb_pool_lock);
282				dev_dbg(hdev->dev, "CB pool is empty\n");
283			}
284		}
285	}
286
287	if (alloc_new_cb) {
288		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
289		if (!cb) {
290			rc = -ENOMEM;
291			goto out_err;
292		}
293	}
294
295	cb->hdev = hdev;
296	cb->ctx = ctx;
297	hl_ctx_get(hdev, cb->ctx);
298
299	if (map_cb) {
300		if (ctx_id == HL_KERNEL_ASID_ID) {
301			dev_err(hdev->dev,
302				"CB mapping is not supported for kernel context\n");
303			rc = -EINVAL;
304			goto release_cb;
305		}
306
307		rc = cb_map_mem(ctx, cb);
308		if (rc)
309			goto release_cb;
310	}
311
312	spin_lock(&mgr->cb_lock);
313	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
314	spin_unlock(&mgr->cb_lock);
315
316	if (rc < 0) {
317		dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
318		goto unmap_mem;
319	}
320
321	cb->id = (u64) rc;
322
323	kref_init(&cb->refcount);
324	spin_lock_init(&cb->lock);
325
326	/*
327	 * idr is 32-bit so we can safely OR it with a mask that is above
328	 * 32 bit
329	 */
330	*handle = cb->id | HL_MMAP_TYPE_CB;
331	*handle <<= PAGE_SHIFT;
332
333	hl_debugfs_add_cb(cb);
334
335	return 0;
336
337unmap_mem:
338	if (cb->is_mmu_mapped)
339		cb_unmap_mem(cb->ctx, cb);
340release_cb:
341	hl_ctx_put(cb->ctx);
342	cb_do_release(hdev, cb);
343out_err:
344	*handle = 0;
345
346	return rc;
347}
348
349int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
350{
351	struct hl_cb *cb;
352	u32 handle;
353	int rc = 0;
354
355	/*
356	 * handle was given to user to do mmap, I need to shift it back to
357	 * how the idr module gave it to me
358	 */
359	cb_handle >>= PAGE_SHIFT;
360	handle = (u32) cb_handle;
361
362	spin_lock(&mgr->cb_lock);
363
364	cb = idr_find(&mgr->cb_handles, handle);
365	if (cb) {
366		idr_remove(&mgr->cb_handles, handle);
367		spin_unlock(&mgr->cb_lock);
368		kref_put(&cb->refcount, cb_release);
369	} else {
370		spin_unlock(&mgr->cb_lock);
371		dev_err(hdev->dev,
372			"CB destroy failed, no match to handle 0x%x\n", handle);
373		rc = -EINVAL;
374	}
375
376	return rc;
377}
378
379int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
380{
381	union hl_cb_args *args = data;
382	struct hl_device *hdev = hpriv->hdev;
383	u64 handle = 0;
384	int rc;
385
386	if (hl_device_disabled_or_in_reset(hdev)) {
387		dev_warn_ratelimited(hdev->dev,
388			"Device is %s. Can't execute CB IOCTL\n",
389			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
390		return -EBUSY;
391	}
392
393	switch (args->in.op) {
394	case HL_CB_OP_CREATE:
395		if (args->in.cb_size > HL_MAX_CB_SIZE) {
396			dev_err(hdev->dev,
397				"User requested CB size %d must be less than %d\n",
398				args->in.cb_size, HL_MAX_CB_SIZE);
399			rc = -EINVAL;
400		} else {
401			rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx,
402					args->in.cb_size, false,
403					!!(args->in.flags & HL_CB_FLAGS_MAP),
404					&handle);
405		}
406
407		memset(args, 0, sizeof(*args));
408		args->out.cb_handle = handle;
409		break;
410
411	case HL_CB_OP_DESTROY:
412		rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
413					args->in.cb_handle);
414		break;
415
416	default:
417		rc = -ENOTTY;
418		break;
419	}
420
421	return rc;
422}
423
424static void cb_vm_close(struct vm_area_struct *vma)
425{
426	struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
427	long new_mmap_size;
428
429	new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start);
430
431	if (new_mmap_size > 0) {
432		cb->mmap_size = new_mmap_size;
433		return;
434	}
435
436	spin_lock(&cb->lock);
437	cb->mmap = false;
438	spin_unlock(&cb->lock);
439
440	hl_cb_put(cb);
441	vma->vm_private_data = NULL;
442}
443
444static const struct vm_operations_struct cb_vm_ops = {
445	.close = cb_vm_close
446};
447
448int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
449{
450	struct hl_device *hdev = hpriv->hdev;
451	struct hl_cb *cb;
452	u32 handle, user_cb_size;
453	int rc;
454
455	/* We use the page offset to hold the idr and thus we need to clear
456	 * it before doing the mmap itself
457	 */
458	handle = vma->vm_pgoff;
459	vma->vm_pgoff = 0;
460
461	/* reference was taken here */
462	cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
463	if (!cb) {
464		dev_err(hdev->dev,
465			"CB mmap failed, no match to handle 0x%x\n", handle);
466		return -EINVAL;
467	}
468
469	/* Validation check */
470	user_cb_size = vma->vm_end - vma->vm_start;
471	if (user_cb_size != ALIGN(cb->size, PAGE_SIZE)) {
472		dev_err(hdev->dev,
473			"CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
474			vma->vm_end - vma->vm_start, cb->size);
475		rc = -EINVAL;
476		goto put_cb;
477	}
478
479	if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
480							user_cb_size)) {
481		dev_err(hdev->dev,
482			"user pointer is invalid - 0x%lx\n",
483			vma->vm_start);
484
485		rc = -EINVAL;
486		goto put_cb;
487	}
488
489	spin_lock(&cb->lock);
490
491	if (cb->mmap) {
492		dev_err(hdev->dev,
493			"CB mmap failed, CB already mmaped to user\n");
494		rc = -EINVAL;
495		goto release_lock;
496	}
497
498	cb->mmap = true;
499
500	spin_unlock(&cb->lock);
501
502	vma->vm_ops = &cb_vm_ops;
503
504	/*
505	 * Note: We're transferring the cb reference to
506	 * vma->vm_private_data here.
507	 */
508
509	vma->vm_private_data = cb;
510
511	rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
512					cb->bus_address, cb->size);
513	if (rc) {
514		spin_lock(&cb->lock);
515		cb->mmap = false;
516		goto release_lock;
517	}
518
519	cb->mmap_size = cb->size;
520
521	return 0;
522
523release_lock:
524	spin_unlock(&cb->lock);
525put_cb:
526	hl_cb_put(cb);
527	return rc;
528}
529
530struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
531			u32 handle)
532{
533	struct hl_cb *cb;
534
535	spin_lock(&mgr->cb_lock);
536	cb = idr_find(&mgr->cb_handles, handle);
537
538	if (!cb) {
539		spin_unlock(&mgr->cb_lock);
540		dev_warn(hdev->dev,
541			"CB get failed, no match to handle 0x%x\n", handle);
542		return NULL;
543	}
544
545	kref_get(&cb->refcount);
546
547	spin_unlock(&mgr->cb_lock);
548
549	return cb;
550
551}
552
553void hl_cb_put(struct hl_cb *cb)
554{
555	kref_put(&cb->refcount, cb_release);
556}
557
558void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
559{
560	spin_lock_init(&mgr->cb_lock);
561	idr_init(&mgr->cb_handles);
562}
563
564void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
565{
566	struct hl_cb *cb;
567	struct idr *idp;
568	u32 id;
569
570	idp = &mgr->cb_handles;
571
572	idr_for_each_entry(idp, cb, id) {
573		if (kref_put(&cb->refcount, cb_release) != 1)
574			dev_err(hdev->dev,
575				"CB %d for CTX ID %d is still alive\n",
576				id, cb->ctx->asid);
577	}
578
579	idr_destroy(&mgr->cb_handles);
580}
581
582struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
583					bool internal_cb)
584{
585	u64 cb_handle;
586	struct hl_cb *cb;
587	int rc;
588
589	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size,
590				internal_cb, false, &cb_handle);
591	if (rc) {
592		dev_err(hdev->dev,
593			"Failed to allocate CB for the kernel driver %d\n", rc);
594		return NULL;
595	}
596
597	cb_handle >>= PAGE_SHIFT;
598	cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
599	/* hl_cb_get should never fail here so use kernel WARN */
600	WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
601	if (!cb)
602		goto destroy_cb;
603
604	return cb;
605
606destroy_cb:
607	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
608
609	return NULL;
610}
611
612int hl_cb_pool_init(struct hl_device *hdev)
613{
614	struct hl_cb *cb;
615	int i;
616
617	INIT_LIST_HEAD(&hdev->cb_pool);
618	spin_lock_init(&hdev->cb_pool_lock);
619
620	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
621		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
622				HL_KERNEL_ASID_ID, false);
623		if (cb) {
624			cb->is_pool = true;
625			list_add(&cb->pool_list, &hdev->cb_pool);
626		} else {
627			hl_cb_pool_fini(hdev);
628			return -ENOMEM;
629		}
630	}
631
632	return 0;
633}
634
635int hl_cb_pool_fini(struct hl_device *hdev)
636{
637	struct hl_cb *cb, *tmp;
638
639	list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
640		list_del(&cb->pool_list);
641		cb_fini(hdev, cb);
642	}
643
644	return 0;
645}
646
647int hl_cb_va_pool_init(struct hl_ctx *ctx)
648{
649	struct hl_device *hdev = ctx->hdev;
650	struct asic_fixed_properties *prop = &hdev->asic_prop;
651	int rc;
652
653	if (!hdev->supports_cb_mapping)
654		return 0;
655
656	ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
657	if (!ctx->cb_va_pool) {
658		dev_err(hdev->dev,
659			"Failed to create VA gen pool for CB mapping\n");
660		return -ENOMEM;
661	}
662
663	rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
664			prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
665	if (rc) {
666		dev_err(hdev->dev,
667			"Failed to add memory to VA gen pool for CB mapping\n");
668		goto err_pool_destroy;
669	}
670
671	return 0;
672
673err_pool_destroy:
674	gen_pool_destroy(ctx->cb_va_pool);
675
676	return rc;
677}
678
679void hl_cb_va_pool_fini(struct hl_ctx *ctx)
680{
681	struct hl_device *hdev = ctx->hdev;
682
683	if (!hdev->supports_cb_mapping)
684		return;
685
686	gen_pool_destroy(ctx->cb_va_pool);
687}
688