1/*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Jerome Glisse <glisse@freedesktop.org>
26 */
27
28#include <linux/file.h>
29#include <linux/pagemap.h>
30#include <linux/sync_file.h>
31#include <linux/dma-buf.h>
32
33#include <drm/amdgpu_drm.h>
34#include <drm/drm_syncobj.h>
35#include "amdgpu.h"
36#include "amdgpu_trace.h"
37#include "amdgpu_gmc.h"
38#include "amdgpu_gem.h"
39#include "amdgpu_ras.h"
40
41static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
42				      struct drm_amdgpu_cs_chunk_fence *data,
43				      uint32_t *offset)
44{
45	struct drm_gem_object *gobj;
46	struct amdgpu_bo *bo;
47	unsigned long size;
48
49	gobj = drm_gem_object_lookup(p->filp, data->handle);
50	if (gobj == NULL)
51		return -EINVAL;
52
53	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
54	p->uf_entry.priority = 0;
55	p->uf_entry.tv.bo = &bo->tbo;
56	/* One for TTM and one for the CS job */
57	p->uf_entry.tv.num_shared = 2;
58
59	drm_gem_object_put(gobj);
60
61	size = amdgpu_bo_size(bo);
62	if (size != PAGE_SIZE || data->offset > (size - 8))
63		return -EINVAL;
64
65	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
66		return -EINVAL;
67
68	*offset = data->offset;
69	return 0;
70}
71
72static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
73				      struct drm_amdgpu_bo_list_in *data)
74{
75	int r;
76	struct drm_amdgpu_bo_list_entry *info = NULL;
77
78	r = amdgpu_bo_create_list_entry_array(data, &info);
79	if (r)
80		return r;
81
82	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
83				  &p->bo_list);
84	if (r)
85		goto error_free;
86
87	kvfree(info);
88	return 0;
89
90error_free:
91	if (info)
92		kvfree(info);
93
94	return r;
95}
96
97static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
98{
99	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
100	struct amdgpu_vm *vm = &fpriv->vm;
101	uint64_t *chunk_array_user;
102	uint64_t *chunk_array;
103	unsigned size, num_ibs = 0;
104	uint32_t uf_offset = 0;
105	int i;
106	int ret;
107
108	if (cs->in.num_chunks == 0)
109		return -EINVAL;
110
111	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
112	if (!chunk_array)
113		return -ENOMEM;
114
115	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
116	if (!p->ctx) {
117		ret = -EINVAL;
118		goto free_chunk;
119	}
120
121	mutex_lock(&p->ctx->lock);
122
123	/* skip guilty context job */
124	if (atomic_read(&p->ctx->guilty) == 1) {
125		ret = -ECANCELED;
126		goto free_chunk;
127	}
128
129	/* get chunks */
130	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
131	if (copy_from_user(chunk_array, chunk_array_user,
132			   sizeof(uint64_t)*cs->in.num_chunks)) {
133		ret = -EFAULT;
134		goto free_chunk;
135	}
136
137	p->nchunks = cs->in.num_chunks;
138	p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
139			    GFP_KERNEL);
140	if (!p->chunks) {
141		ret = -ENOMEM;
142		goto free_chunk;
143	}
144
145	for (i = 0; i < p->nchunks; i++) {
146		struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
147		struct drm_amdgpu_cs_chunk user_chunk;
148		uint32_t __user *cdata;
149
150		chunk_ptr = u64_to_user_ptr(chunk_array[i]);
151		if (copy_from_user(&user_chunk, chunk_ptr,
152				       sizeof(struct drm_amdgpu_cs_chunk))) {
153			ret = -EFAULT;
154			i--;
155			goto free_partial_kdata;
156		}
157		p->chunks[i].chunk_id = user_chunk.chunk_id;
158		p->chunks[i].length_dw = user_chunk.length_dw;
159
160		size = p->chunks[i].length_dw;
161		cdata = u64_to_user_ptr(user_chunk.chunk_data);
162
163		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
164		if (p->chunks[i].kdata == NULL) {
165			ret = -ENOMEM;
166			i--;
167			goto free_partial_kdata;
168		}
169		size *= sizeof(uint32_t);
170		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
171			ret = -EFAULT;
172			goto free_partial_kdata;
173		}
174
175		switch (p->chunks[i].chunk_id) {
176		case AMDGPU_CHUNK_ID_IB:
177			++num_ibs;
178			break;
179
180		case AMDGPU_CHUNK_ID_FENCE:
181			size = sizeof(struct drm_amdgpu_cs_chunk_fence);
182			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
183				ret = -EINVAL;
184				goto free_partial_kdata;
185			}
186
187			ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
188							 &uf_offset);
189			if (ret)
190				goto free_partial_kdata;
191
192			break;
193
194		case AMDGPU_CHUNK_ID_BO_HANDLES:
195			size = sizeof(struct drm_amdgpu_bo_list_in);
196			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
197				ret = -EINVAL;
198				goto free_partial_kdata;
199			}
200
201			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
202			if (ret)
203				goto free_partial_kdata;
204
205			break;
206
207		case AMDGPU_CHUNK_ID_DEPENDENCIES:
208		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
209		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
210		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
211		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
212		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
213			break;
214
215		default:
216			ret = -EINVAL;
217			goto free_partial_kdata;
218		}
219	}
220
221	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
222	if (ret)
223		goto free_all_kdata;
224
225	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
226		ret = -ECANCELED;
227		goto free_all_kdata;
228	}
229
230	if (p->uf_entry.tv.bo)
231		p->job->uf_addr = uf_offset;
232	kfree(chunk_array);
233
234	/* Use this opportunity to fill in task info for the vm */
235	amdgpu_vm_set_task_info(vm);
236
237	return 0;
238
239free_all_kdata:
240	i = p->nchunks - 1;
241free_partial_kdata:
242	for (; i >= 0; i--)
243		kvfree(p->chunks[i].kdata);
244	kfree(p->chunks);
245	p->chunks = NULL;
246	p->nchunks = 0;
247free_chunk:
248	kfree(chunk_array);
249
250	return ret;
251}
252
253/* Convert microseconds to bytes. */
254static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
255{
256	if (us <= 0 || !adev->mm_stats.log2_max_MBps)
257		return 0;
258
259	/* Since accum_us is incremented by a million per second, just
260	 * multiply it by the number of MB/s to get the number of bytes.
261	 */
262	return us << adev->mm_stats.log2_max_MBps;
263}
264
265static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
266{
267	if (!adev->mm_stats.log2_max_MBps)
268		return 0;
269
270	return bytes >> adev->mm_stats.log2_max_MBps;
271}
272
273/* Returns how many bytes TTM can move right now. If no bytes can be moved,
274 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
275 * which means it can go over the threshold once. If that happens, the driver
276 * will be in debt and no other buffer migrations can be done until that debt
277 * is repaid.
278 *
279 * This approach allows moving a buffer of any size (it's important to allow
280 * that).
281 *
282 * The currency is simply time in microseconds and it increases as the clock
283 * ticks. The accumulated microseconds (us) are converted to bytes and
284 * returned.
285 */
286static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
287					      u64 *max_bytes,
288					      u64 *max_vis_bytes)
289{
290	s64 time_us, increment_us;
291	u64 free_vram, total_vram, used_vram;
292	struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
293	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
294	 * throttling.
295	 *
296	 * It means that in order to get full max MBps, at least 5 IBs per
297	 * second must be submitted and not more than 200ms apart from each
298	 * other.
299	 */
300	const s64 us_upper_bound = 200000;
301
302	if (!adev->mm_stats.log2_max_MBps) {
303		*max_bytes = 0;
304		*max_vis_bytes = 0;
305		return;
306	}
307
308	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
309	used_vram = amdgpu_vram_mgr_usage(vram_man);
310	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
311
312	spin_lock(&adev->mm_stats.lock);
313
314	/* Increase the amount of accumulated us. */
315	time_us = ktime_to_us(ktime_get());
316	increment_us = time_us - adev->mm_stats.last_update_us;
317	adev->mm_stats.last_update_us = time_us;
318	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
319                                      us_upper_bound);
320
321	/* This prevents the short period of low performance when the VRAM
322	 * usage is low and the driver is in debt or doesn't have enough
323	 * accumulated us to fill VRAM quickly.
324	 *
325	 * The situation can occur in these cases:
326	 * - a lot of VRAM is freed by userspace
327	 * - the presence of a big buffer causes a lot of evictions
328	 *   (solution: split buffers into smaller ones)
329	 *
330	 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
331	 * accum_us to a positive number.
332	 */
333	if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
334		s64 min_us;
335
336		/* Be more aggresive on dGPUs. Try to fill a portion of free
337		 * VRAM now.
338		 */
339		if (!(adev->flags & AMD_IS_APU))
340			min_us = bytes_to_us(adev, free_vram / 4);
341		else
342			min_us = 0; /* Reset accum_us on APUs. */
343
344		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
345	}
346
347	/* This is set to 0 if the driver is in debt to disallow (optional)
348	 * buffer moves.
349	 */
350	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
351
352	/* Do the same for visible VRAM if half of it is free */
353	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
354		u64 total_vis_vram = adev->gmc.visible_vram_size;
355		u64 used_vis_vram =
356		  amdgpu_vram_mgr_vis_usage(vram_man);
357
358		if (used_vis_vram < total_vis_vram) {
359			u64 free_vis_vram = total_vis_vram - used_vis_vram;
360			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
361							  increment_us, us_upper_bound);
362
363			if (free_vis_vram >= total_vis_vram / 2)
364				adev->mm_stats.accum_us_vis =
365					max(bytes_to_us(adev, free_vis_vram / 2),
366					    adev->mm_stats.accum_us_vis);
367		}
368
369		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
370	} else {
371		*max_vis_bytes = 0;
372	}
373
374	spin_unlock(&adev->mm_stats.lock);
375}
376
377/* Report how many bytes have really been moved for the last command
378 * submission. This can result in a debt that can stop buffer migrations
379 * temporarily.
380 */
381void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
382				  u64 num_vis_bytes)
383{
384	spin_lock(&adev->mm_stats.lock);
385	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
386	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
387	spin_unlock(&adev->mm_stats.lock);
388}
389
390static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
391				 struct amdgpu_bo *bo)
392{
393	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
394	struct ttm_operation_ctx ctx = {
395		.interruptible = true,
396		.no_wait_gpu = false,
397		.resv = bo->tbo.base.resv,
398		.flags = 0
399	};
400	uint32_t domain;
401	int r;
402
403	if (bo->pin_count)
404		return 0;
405
406	/* Don't move this buffer if we have depleted our allowance
407	 * to move it. Don't move anything if the threshold is zero.
408	 */
409	if (p->bytes_moved < p->bytes_moved_threshold &&
410	    (!bo->tbo.base.dma_buf ||
411	    list_empty(&bo->tbo.base.dma_buf->attachments))) {
412		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
413		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
414			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
415			 * visible VRAM if we've depleted our allowance to do
416			 * that.
417			 */
418			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
419				domain = bo->preferred_domains;
420			else
421				domain = bo->allowed_domains;
422		} else {
423			domain = bo->preferred_domains;
424		}
425	} else {
426		domain = bo->allowed_domains;
427	}
428
429retry:
430	amdgpu_bo_placement_from_domain(bo, domain);
431	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
432
433	p->bytes_moved += ctx.bytes_moved;
434	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
435	    amdgpu_bo_in_cpu_visible_vram(bo))
436		p->bytes_moved_vis += ctx.bytes_moved;
437
438	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
439		domain = bo->allowed_domains;
440		goto retry;
441	}
442
443	return r;
444}
445
446static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
447{
448	struct amdgpu_cs_parser *p = param;
449	int r;
450
451	r = amdgpu_cs_bo_validate(p, bo);
452	if (r)
453		return r;
454
455	if (bo->shadow)
456		r = amdgpu_cs_bo_validate(p, bo->shadow);
457
458	return r;
459}
460
461static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
462			    struct list_head *validated)
463{
464	struct ttm_operation_ctx ctx = { true, false };
465	struct amdgpu_bo_list_entry *lobj;
466	int r;
467
468	list_for_each_entry(lobj, validated, tv.head) {
469		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
470		struct mm_struct *usermm;
471
472		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
473		if (usermm && usermm != current->mm)
474			return -EPERM;
475
476		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
477		    lobj->user_invalidated && lobj->user_pages) {
478			amdgpu_bo_placement_from_domain(bo,
479							AMDGPU_GEM_DOMAIN_CPU);
480			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
481			if (r)
482				return r;
483
484			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
485						     lobj->user_pages);
486		}
487
488		r = amdgpu_cs_validate(p, bo);
489		if (r)
490			return r;
491
492		kvfree(lobj->user_pages);
493		lobj->user_pages = NULL;
494	}
495	return 0;
496}
497
498static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
499				union drm_amdgpu_cs *cs)
500{
501	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
502	struct amdgpu_vm *vm = &fpriv->vm;
503	struct amdgpu_bo_list_entry *e;
504	struct list_head duplicates;
505	struct amdgpu_bo *gds;
506	struct amdgpu_bo *gws;
507	struct amdgpu_bo *oa;
508	int r;
509
510	INIT_LIST_HEAD(&p->validated);
511
512	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
513	if (cs->in.bo_list_handle) {
514		if (p->bo_list)
515			return -EINVAL;
516
517		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
518				       &p->bo_list);
519		if (r)
520			return r;
521	} else if (!p->bo_list) {
522		/* Create a empty bo_list when no handle is provided */
523		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
524					  &p->bo_list);
525		if (r)
526			return r;
527	}
528
529	/* One for TTM and one for the CS job */
530	amdgpu_bo_list_for_each_entry(e, p->bo_list)
531		e->tv.num_shared = 2;
532
533	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
534
535	INIT_LIST_HEAD(&duplicates);
536	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
537
538	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
539		list_add(&p->uf_entry.tv.head, &p->validated);
540
541	/* Get userptr backing pages. If pages are updated after registered
542	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
543	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
544	 */
545	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
546		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
547		bool userpage_invalidated = false;
548		int i;
549
550		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
551					sizeof(struct page *),
552					GFP_KERNEL | __GFP_ZERO);
553		if (!e->user_pages) {
554			DRM_ERROR("calloc failure\n");
555			return -ENOMEM;
556		}
557
558		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
559		if (r) {
560			kvfree(e->user_pages);
561			e->user_pages = NULL;
562			return r;
563		}
564
565		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
566			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
567				userpage_invalidated = true;
568				break;
569			}
570		}
571		e->user_invalidated = userpage_invalidated;
572	}
573
574	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
575				   &duplicates);
576	if (unlikely(r != 0)) {
577		if (r != -ERESTARTSYS)
578			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
579		goto out;
580	}
581
582	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
583					  &p->bytes_moved_vis_threshold);
584	p->bytes_moved = 0;
585	p->bytes_moved_vis = 0;
586
587	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
588				      amdgpu_cs_validate, p);
589	if (r) {
590		DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
591		goto error_validate;
592	}
593
594	r = amdgpu_cs_list_validate(p, &duplicates);
595	if (r)
596		goto error_validate;
597
598	r = amdgpu_cs_list_validate(p, &p->validated);
599	if (r)
600		goto error_validate;
601
602	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
603				     p->bytes_moved_vis);
604
605	gds = p->bo_list->gds_obj;
606	gws = p->bo_list->gws_obj;
607	oa = p->bo_list->oa_obj;
608
609	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
610		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
611
612		/* Make sure we use the exclusive slot for shared BOs */
613		if (bo->prime_shared_count)
614			e->tv.num_shared = 0;
615		e->bo_va = amdgpu_vm_bo_find(vm, bo);
616	}
617
618	if (gds) {
619		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
620		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
621	}
622	if (gws) {
623		p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
624		p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
625	}
626	if (oa) {
627		p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
628		p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
629	}
630
631	if (!r && p->uf_entry.tv.bo) {
632		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
633
634		r = amdgpu_ttm_alloc_gart(&uf->tbo);
635		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
636	}
637
638error_validate:
639	if (r)
640		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
641out:
642	return r;
643}
644
645static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
646{
647	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
648	struct amdgpu_bo_list_entry *e;
649	int r;
650
651	list_for_each_entry(e, &p->validated, tv.head) {
652		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
653		struct dma_resv *resv = bo->tbo.base.resv;
654		enum amdgpu_sync_mode sync_mode;
655
656		sync_mode = amdgpu_bo_explicit_sync(bo) ?
657			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
658		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
659				     &fpriv->vm);
660		if (r)
661			return r;
662	}
663	return 0;
664}
665
666/**
667 * cs_parser_fini() - clean parser states
668 * @parser:	parser structure holding parsing context.
669 * @error:	error number
670 *
671 * If error is set than unvalidate buffer, otherwise just free memory
672 * used by parsing context.
673 **/
674static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
675				  bool backoff)
676{
677	unsigned i;
678
679	if (error && backoff)
680		ttm_eu_backoff_reservation(&parser->ticket,
681					   &parser->validated);
682
683	for (i = 0; i < parser->num_post_deps; i++) {
684		drm_syncobj_put(parser->post_deps[i].syncobj);
685		kfree(parser->post_deps[i].chain);
686	}
687	kfree(parser->post_deps);
688
689	dma_fence_put(parser->fence);
690
691	if (parser->ctx) {
692		mutex_unlock(&parser->ctx->lock);
693		amdgpu_ctx_put(parser->ctx);
694	}
695	if (parser->bo_list)
696		amdgpu_bo_list_put(parser->bo_list);
697
698	for (i = 0; i < parser->nchunks; i++)
699		kvfree(parser->chunks[i].kdata);
700	kfree(parser->chunks);
701	if (parser->job)
702		amdgpu_job_free(parser->job);
703	if (parser->uf_entry.tv.bo) {
704		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
705
706		amdgpu_bo_unref(&uf);
707	}
708}
709
710static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
711{
712	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
713	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
714	struct amdgpu_device *adev = p->adev;
715	struct amdgpu_vm *vm = &fpriv->vm;
716	struct amdgpu_bo_list_entry *e;
717	struct amdgpu_bo_va *bo_va;
718	struct amdgpu_bo *bo;
719	int r;
720
721	/* Only for UVD/VCE VM emulation */
722	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
723		unsigned i, j;
724
725		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
726			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
727			struct amdgpu_bo_va_mapping *m;
728			struct amdgpu_bo *aobj = NULL;
729			struct amdgpu_cs_chunk *chunk;
730			uint64_t offset, va_start;
731			struct amdgpu_ib *ib;
732			uint8_t *kptr;
733
734			chunk = &p->chunks[i];
735			ib = &p->job->ibs[j];
736			chunk_ib = chunk->kdata;
737
738			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
739				continue;
740
741			va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
742			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
743			if (r) {
744				DRM_ERROR("IB va_start is invalid\n");
745				return r;
746			}
747
748			if ((va_start + chunk_ib->ib_bytes) >
749			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
750				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
751				return -EINVAL;
752			}
753
754			/* the IB should be reserved at this point */
755			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
756			if (r) {
757				return r;
758			}
759
760			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
761			kptr += va_start - offset;
762
763			if (ring->funcs->parse_cs) {
764				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
765				amdgpu_bo_kunmap(aobj);
766
767				r = amdgpu_ring_parse_cs(ring, p, j);
768				if (r)
769					return r;
770			} else {
771				ib->ptr = (uint32_t *)kptr;
772				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
773				amdgpu_bo_kunmap(aobj);
774				if (r)
775					return r;
776			}
777
778			j++;
779		}
780	}
781
782	if (!p->job->vm)
783		return amdgpu_cs_sync_rings(p);
784
785
786	r = amdgpu_vm_clear_freed(adev, vm, NULL);
787	if (r)
788		return r;
789
790	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
791	if (r)
792		return r;
793
794	r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
795	if (r)
796		return r;
797
798	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
799		bo_va = fpriv->csa_va;
800		BUG_ON(!bo_va);
801		r = amdgpu_vm_bo_update(adev, bo_va, false);
802		if (r)
803			return r;
804
805		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
806		if (r)
807			return r;
808	}
809
810	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
811		/* ignore duplicates */
812		bo = ttm_to_amdgpu_bo(e->tv.bo);
813		if (!bo)
814			continue;
815
816		bo_va = e->bo_va;
817		if (bo_va == NULL)
818			continue;
819
820		r = amdgpu_vm_bo_update(adev, bo_va, false);
821		if (r)
822			return r;
823
824		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
825		if (r)
826			return r;
827	}
828
829	r = amdgpu_vm_handle_moved(adev, vm);
830	if (r)
831		return r;
832
833	r = amdgpu_vm_update_pdes(adev, vm, false);
834	if (r)
835		return r;
836
837	r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
838	if (r)
839		return r;
840
841	p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
842
843	if (amdgpu_vm_debug) {
844		/* Invalidate all BOs to test for userspace bugs */
845		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
846			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
847
848			/* ignore duplicates */
849			if (!bo)
850				continue;
851
852			amdgpu_vm_bo_invalidate(adev, bo, false);
853		}
854	}
855
856	return amdgpu_cs_sync_rings(p);
857}
858
859static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
860			     struct amdgpu_cs_parser *parser)
861{
862	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
863	struct amdgpu_vm *vm = &fpriv->vm;
864	int r, ce_preempt = 0, de_preempt = 0;
865	struct amdgpu_ring *ring;
866	int i, j;
867
868	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
869		struct amdgpu_cs_chunk *chunk;
870		struct amdgpu_ib *ib;
871		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
872		struct drm_sched_entity *entity;
873
874		chunk = &parser->chunks[i];
875		ib = &parser->job->ibs[j];
876		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
877
878		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
879			continue;
880
881		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
882		    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
883			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
884				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
885					ce_preempt++;
886				else
887					de_preempt++;
888			}
889
890			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
891			if (ce_preempt > 1 || de_preempt > 1)
892				return -EINVAL;
893		}
894
895		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
896					  chunk_ib->ip_instance, chunk_ib->ring,
897					  &entity);
898		if (r)
899			return r;
900
901		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
902			parser->job->preamble_status |=
903				AMDGPU_PREAMBLE_IB_PRESENT;
904
905		if (parser->entity && parser->entity != entity)
906			return -EINVAL;
907
908		/* Return if there is no run queue associated with this entity.
909		 * Possibly because of disabled HW IP*/
910		if (entity->rq == NULL)
911			return -EINVAL;
912
913		parser->entity = entity;
914
915		ring = to_amdgpu_ring(entity->rq->sched);
916		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
917				   chunk_ib->ib_bytes : 0,
918				   AMDGPU_IB_POOL_DELAYED, ib);
919		if (r) {
920			DRM_ERROR("Failed to get ib !\n");
921			return r;
922		}
923
924		ib->gpu_addr = chunk_ib->va_start;
925		ib->length_dw = chunk_ib->ib_bytes / 4;
926		ib->flags = chunk_ib->flags;
927
928		j++;
929	}
930
931	/* MM engine doesn't support user fences */
932	ring = to_amdgpu_ring(parser->entity->rq->sched);
933	if (parser->job->uf_addr && ring->funcs->no_user_fence)
934		return -EINVAL;
935
936	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
937}
938
939static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
940				       struct amdgpu_cs_chunk *chunk)
941{
942	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
943	unsigned num_deps;
944	int i, r;
945	struct drm_amdgpu_cs_chunk_dep *deps;
946
947	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
948	num_deps = chunk->length_dw * 4 /
949		sizeof(struct drm_amdgpu_cs_chunk_dep);
950
951	for (i = 0; i < num_deps; ++i) {
952		struct amdgpu_ctx *ctx;
953		struct drm_sched_entity *entity;
954		struct dma_fence *fence;
955
956		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
957		if (ctx == NULL)
958			return -EINVAL;
959
960		r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
961					  deps[i].ip_instance,
962					  deps[i].ring, &entity);
963		if (r) {
964			amdgpu_ctx_put(ctx);
965			return r;
966		}
967
968		fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
969		amdgpu_ctx_put(ctx);
970
971		if (IS_ERR(fence))
972			return PTR_ERR(fence);
973		else if (!fence)
974			continue;
975
976		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
977			struct drm_sched_fence *s_fence;
978			struct dma_fence *old = fence;
979
980			s_fence = to_drm_sched_fence(fence);
981			fence = dma_fence_get(&s_fence->scheduled);
982			dma_fence_put(old);
983		}
984
985		r = amdgpu_sync_fence(&p->job->sync, fence);
986		dma_fence_put(fence);
987		if (r)
988			return r;
989	}
990	return 0;
991}
992
993static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
994						 uint32_t handle, u64 point,
995						 u64 flags)
996{
997	struct dma_fence *fence;
998	int r;
999
1000	r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1001	if (r) {
1002		DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1003			  handle, point, r);
1004		return r;
1005	}
1006
1007	r = amdgpu_sync_fence(&p->job->sync, fence);
1008	dma_fence_put(fence);
1009
1010	return r;
1011}
1012
1013static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1014					    struct amdgpu_cs_chunk *chunk)
1015{
1016	struct drm_amdgpu_cs_chunk_sem *deps;
1017	unsigned num_deps;
1018	int i, r;
1019
1020	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1021	num_deps = chunk->length_dw * 4 /
1022		sizeof(struct drm_amdgpu_cs_chunk_sem);
1023	for (i = 0; i < num_deps; ++i) {
1024		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1025							  0, 0);
1026		if (r)
1027			return r;
1028	}
1029
1030	return 0;
1031}
1032
1033
1034static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1035						     struct amdgpu_cs_chunk *chunk)
1036{
1037	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1038	unsigned num_deps;
1039	int i, r;
1040
1041	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1042	num_deps = chunk->length_dw * 4 /
1043		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1044	for (i = 0; i < num_deps; ++i) {
1045		r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1046							  syncobj_deps[i].handle,
1047							  syncobj_deps[i].point,
1048							  syncobj_deps[i].flags);
1049		if (r)
1050			return r;
1051	}
1052
1053	return 0;
1054}
1055
1056static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1057					     struct amdgpu_cs_chunk *chunk)
1058{
1059	struct drm_amdgpu_cs_chunk_sem *deps;
1060	unsigned num_deps;
1061	int i;
1062
1063	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1064	num_deps = chunk->length_dw * 4 /
1065		sizeof(struct drm_amdgpu_cs_chunk_sem);
1066
1067	if (p->post_deps)
1068		return -EINVAL;
1069
1070	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1071				     GFP_KERNEL);
1072	p->num_post_deps = 0;
1073
1074	if (!p->post_deps)
1075		return -ENOMEM;
1076
1077
1078	for (i = 0; i < num_deps; ++i) {
1079		p->post_deps[i].syncobj =
1080			drm_syncobj_find(p->filp, deps[i].handle);
1081		if (!p->post_deps[i].syncobj)
1082			return -EINVAL;
1083		p->post_deps[i].chain = NULL;
1084		p->post_deps[i].point = 0;
1085		p->num_post_deps++;
1086	}
1087
1088	return 0;
1089}
1090
1091
1092static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1093						      struct amdgpu_cs_chunk *chunk)
1094{
1095	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1096	unsigned num_deps;
1097	int i;
1098
1099	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1100	num_deps = chunk->length_dw * 4 /
1101		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1102
1103	if (p->post_deps)
1104		return -EINVAL;
1105
1106	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1107				     GFP_KERNEL);
1108	p->num_post_deps = 0;
1109
1110	if (!p->post_deps)
1111		return -ENOMEM;
1112
1113	for (i = 0; i < num_deps; ++i) {
1114		struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1115
1116		dep->chain = NULL;
1117		if (syncobj_deps[i].point) {
1118			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
1119			if (!dep->chain)
1120				return -ENOMEM;
1121		}
1122
1123		dep->syncobj = drm_syncobj_find(p->filp,
1124						syncobj_deps[i].handle);
1125		if (!dep->syncobj) {
1126			kfree(dep->chain);
1127			return -EINVAL;
1128		}
1129		dep->point = syncobj_deps[i].point;
1130		p->num_post_deps++;
1131	}
1132
1133	return 0;
1134}
1135
1136static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1137				  struct amdgpu_cs_parser *p)
1138{
1139	int i, r;
1140
1141	for (i = 0; i < p->nchunks; ++i) {
1142		struct amdgpu_cs_chunk *chunk;
1143
1144		chunk = &p->chunks[i];
1145
1146		switch (chunk->chunk_id) {
1147		case AMDGPU_CHUNK_ID_DEPENDENCIES:
1148		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1149			r = amdgpu_cs_process_fence_dep(p, chunk);
1150			if (r)
1151				return r;
1152			break;
1153		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1154			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1155			if (r)
1156				return r;
1157			break;
1158		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1159			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1160			if (r)
1161				return r;
1162			break;
1163		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1164			r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1165			if (r)
1166				return r;
1167			break;
1168		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1169			r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1170			if (r)
1171				return r;
1172			break;
1173		}
1174	}
1175
1176	return 0;
1177}
1178
1179static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1180{
1181	int i;
1182
1183	for (i = 0; i < p->num_post_deps; ++i) {
1184		if (p->post_deps[i].chain && p->post_deps[i].point) {
1185			drm_syncobj_add_point(p->post_deps[i].syncobj,
1186					      p->post_deps[i].chain,
1187					      p->fence, p->post_deps[i].point);
1188			p->post_deps[i].chain = NULL;
1189		} else {
1190			drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1191						  p->fence);
1192		}
1193	}
1194}
1195
1196static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1197			    union drm_amdgpu_cs *cs)
1198{
1199	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1200	struct drm_sched_entity *entity = p->entity;
1201	struct amdgpu_bo_list_entry *e;
1202	struct amdgpu_job *job;
1203	uint64_t seq;
1204	int r;
1205
1206	job = p->job;
1207	p->job = NULL;
1208
1209	r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1210	if (r)
1211		goto error_unlock;
1212
1213	/* No memory allocation is allowed while holding the notifier lock.
1214	 * The lock is held until amdgpu_cs_submit is finished and fence is
1215	 * added to BOs.
1216	 */
1217	mutex_lock(&p->adev->notifier_lock);
1218
1219	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1220	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1221	 */
1222	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1224
1225		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1226	}
1227	if (r) {
1228		r = -EAGAIN;
1229		goto error_abort;
1230	}
1231
1232	p->fence = dma_fence_get(&job->base.s_fence->finished);
1233
1234	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1235	amdgpu_cs_post_dependencies(p);
1236
1237	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1238	    !p->ctx->preamble_presented) {
1239		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1240		p->ctx->preamble_presented = true;
1241	}
1242
1243	cs->out.handle = seq;
1244	job->uf_sequence = seq;
1245
1246	amdgpu_job_free_resources(job);
1247
1248	trace_amdgpu_cs_ioctl(job);
1249	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1250	drm_sched_entity_push_job(&job->base, entity);
1251
1252	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1253
1254	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1255	mutex_unlock(&p->adev->notifier_lock);
1256
1257	return 0;
1258
1259error_abort:
1260	drm_sched_job_cleanup(&job->base);
1261	mutex_unlock(&p->adev->notifier_lock);
1262
1263error_unlock:
1264	amdgpu_job_free(job);
1265	return r;
1266}
1267
1268static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1269{
1270	int i;
1271
1272	if (!trace_amdgpu_cs_enabled())
1273		return;
1274
1275	for (i = 0; i < parser->job->num_ibs; i++)
1276		trace_amdgpu_cs(parser, i);
1277}
1278
1279int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1280{
1281	struct amdgpu_device *adev = drm_to_adev(dev);
1282	union drm_amdgpu_cs *cs = data;
1283	struct amdgpu_cs_parser parser = {};
1284	bool reserved_buffers = false;
1285	int r;
1286
1287	if (amdgpu_ras_intr_triggered())
1288		return -EHWPOISON;
1289
1290	if (!adev->accel_working)
1291		return -EBUSY;
1292
1293	parser.adev = adev;
1294	parser.filp = filp;
1295
1296	r = amdgpu_cs_parser_init(&parser, data);
1297	if (r) {
1298		if (printk_ratelimit())
1299			DRM_ERROR("Failed to initialize parser %d!\n", r);
1300		goto out;
1301	}
1302
1303	r = amdgpu_cs_ib_fill(adev, &parser);
1304	if (r)
1305		goto out;
1306
1307	r = amdgpu_cs_dependencies(adev, &parser);
1308	if (r) {
1309		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1310		goto out;
1311	}
1312
1313	r = amdgpu_cs_parser_bos(&parser, data);
1314	if (r) {
1315		if (r == -ENOMEM)
1316			DRM_ERROR("Not enough memory for command submission!\n");
1317		else if (r != -ERESTARTSYS && r != -EAGAIN)
1318			DRM_ERROR("Failed to process the buffer list %d!\n", r);
1319		goto out;
1320	}
1321
1322	reserved_buffers = true;
1323
1324	trace_amdgpu_cs_ibs(&parser);
1325
1326	r = amdgpu_cs_vm_handling(&parser);
1327	if (r)
1328		goto out;
1329
1330	r = amdgpu_cs_submit(&parser, cs);
1331
1332out:
1333	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1334
1335	return r;
1336}
1337
1338/**
1339 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1340 *
1341 * @dev: drm device
1342 * @data: data from userspace
1343 * @filp: file private
1344 *
1345 * Wait for the command submission identified by handle to finish.
1346 */
1347int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1348			 struct drm_file *filp)
1349{
1350	union drm_amdgpu_wait_cs *wait = data;
1351	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1352	struct drm_sched_entity *entity;
1353	struct amdgpu_ctx *ctx;
1354	struct dma_fence *fence;
1355	long r;
1356
1357	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1358	if (ctx == NULL)
1359		return -EINVAL;
1360
1361	r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1362				  wait->in.ring, &entity);
1363	if (r) {
1364		amdgpu_ctx_put(ctx);
1365		return r;
1366	}
1367
1368	fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1369	if (IS_ERR(fence))
1370		r = PTR_ERR(fence);
1371	else if (fence) {
1372		r = dma_fence_wait_timeout(fence, true, timeout);
1373		if (r > 0 && fence->error)
1374			r = fence->error;
1375		dma_fence_put(fence);
1376	} else
1377		r = 1;
1378
1379	amdgpu_ctx_put(ctx);
1380	if (r < 0)
1381		return r;
1382
1383	memset(wait, 0, sizeof(*wait));
1384	wait->out.status = (r == 0);
1385
1386	return 0;
1387}
1388
1389/**
1390 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1391 *
1392 * @adev: amdgpu device
1393 * @filp: file private
1394 * @user: drm_amdgpu_fence copied from user space
1395 */
1396static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1397					     struct drm_file *filp,
1398					     struct drm_amdgpu_fence *user)
1399{
1400	struct drm_sched_entity *entity;
1401	struct amdgpu_ctx *ctx;
1402	struct dma_fence *fence;
1403	int r;
1404
1405	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1406	if (ctx == NULL)
1407		return ERR_PTR(-EINVAL);
1408
1409	r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1410				  user->ring, &entity);
1411	if (r) {
1412		amdgpu_ctx_put(ctx);
1413		return ERR_PTR(r);
1414	}
1415
1416	fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1417	amdgpu_ctx_put(ctx);
1418
1419	return fence;
1420}
1421
1422int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1423				    struct drm_file *filp)
1424{
1425	struct amdgpu_device *adev = drm_to_adev(dev);
1426	union drm_amdgpu_fence_to_handle *info = data;
1427	struct dma_fence *fence;
1428	struct drm_syncobj *syncobj;
1429	struct sync_file *sync_file;
1430	int fd, r;
1431
1432	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1433	if (IS_ERR(fence))
1434		return PTR_ERR(fence);
1435
1436	if (!fence)
1437		fence = dma_fence_get_stub();
1438
1439	switch (info->in.what) {
1440	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1441		r = drm_syncobj_create(&syncobj, 0, fence);
1442		dma_fence_put(fence);
1443		if (r)
1444			return r;
1445		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1446		drm_syncobj_put(syncobj);
1447		return r;
1448
1449	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1450		r = drm_syncobj_create(&syncobj, 0, fence);
1451		dma_fence_put(fence);
1452		if (r)
1453			return r;
1454		r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1455		drm_syncobj_put(syncobj);
1456		return r;
1457
1458	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1459		fd = get_unused_fd_flags(O_CLOEXEC);
1460		if (fd < 0) {
1461			dma_fence_put(fence);
1462			return fd;
1463		}
1464
1465		sync_file = sync_file_create(fence);
1466		dma_fence_put(fence);
1467		if (!sync_file) {
1468			put_unused_fd(fd);
1469			return -ENOMEM;
1470		}
1471
1472		fd_install(fd, sync_file->file);
1473		info->out.handle = fd;
1474		return 0;
1475
1476	default:
1477		dma_fence_put(fence);
1478		return -EINVAL;
1479	}
1480}
1481
1482/**
1483 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1484 *
1485 * @adev: amdgpu device
1486 * @filp: file private
1487 * @wait: wait parameters
1488 * @fences: array of drm_amdgpu_fence
1489 */
1490static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1491				     struct drm_file *filp,
1492				     union drm_amdgpu_wait_fences *wait,
1493				     struct drm_amdgpu_fence *fences)
1494{
1495	uint32_t fence_count = wait->in.fence_count;
1496	unsigned int i;
1497	long r = 1;
1498
1499	for (i = 0; i < fence_count; i++) {
1500		struct dma_fence *fence;
1501		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1502
1503		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1504		if (IS_ERR(fence))
1505			return PTR_ERR(fence);
1506		else if (!fence)
1507			continue;
1508
1509		r = dma_fence_wait_timeout(fence, true, timeout);
1510		if (r > 0 && fence->error)
1511			r = fence->error;
1512
1513		dma_fence_put(fence);
1514		if (r < 0)
1515			return r;
1516
1517		if (r == 0)
1518			break;
1519	}
1520
1521	memset(wait, 0, sizeof(*wait));
1522	wait->out.status = (r > 0);
1523
1524	return 0;
1525}
1526
1527/**
1528 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1529 *
1530 * @adev: amdgpu device
1531 * @filp: file private
1532 * @wait: wait parameters
1533 * @fences: array of drm_amdgpu_fence
1534 */
1535static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1536				    struct drm_file *filp,
1537				    union drm_amdgpu_wait_fences *wait,
1538				    struct drm_amdgpu_fence *fences)
1539{
1540	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1541	uint32_t fence_count = wait->in.fence_count;
1542	uint32_t first = ~0;
1543	struct dma_fence **array;
1544	unsigned int i;
1545	long r;
1546
1547	/* Prepare the fence array */
1548	array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1549
1550	if (array == NULL)
1551		return -ENOMEM;
1552
1553	for (i = 0; i < fence_count; i++) {
1554		struct dma_fence *fence;
1555
1556		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1557		if (IS_ERR(fence)) {
1558			r = PTR_ERR(fence);
1559			goto err_free_fence_array;
1560		} else if (fence) {
1561			array[i] = fence;
1562		} else { /* NULL, the fence has been already signaled */
1563			r = 1;
1564			first = i;
1565			goto out;
1566		}
1567	}
1568
1569	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1570				       &first);
1571	if (r < 0)
1572		goto err_free_fence_array;
1573
1574out:
1575	memset(wait, 0, sizeof(*wait));
1576	wait->out.status = (r > 0);
1577	wait->out.first_signaled = first;
1578
1579	if (first < fence_count && array[first])
1580		r = array[first]->error;
1581	else
1582		r = 0;
1583
1584err_free_fence_array:
1585	for (i = 0; i < fence_count; i++)
1586		dma_fence_put(array[i]);
1587	kfree(array);
1588
1589	return r;
1590}
1591
1592/**
1593 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1594 *
1595 * @dev: drm device
1596 * @data: data from userspace
1597 * @filp: file private
1598 */
1599int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1600				struct drm_file *filp)
1601{
1602	struct amdgpu_device *adev = drm_to_adev(dev);
1603	union drm_amdgpu_wait_fences *wait = data;
1604	uint32_t fence_count = wait->in.fence_count;
1605	struct drm_amdgpu_fence *fences_user;
1606	struct drm_amdgpu_fence *fences;
1607	int r;
1608
1609	/* Get the fences from userspace */
1610	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1611			GFP_KERNEL);
1612	if (fences == NULL)
1613		return -ENOMEM;
1614
1615	fences_user = u64_to_user_ptr(wait->in.fences);
1616	if (copy_from_user(fences, fences_user,
1617		sizeof(struct drm_amdgpu_fence) * fence_count)) {
1618		r = -EFAULT;
1619		goto err_free_fences;
1620	}
1621
1622	if (wait->in.wait_all)
1623		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1624	else
1625		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1626
1627err_free_fences:
1628	kfree(fences);
1629
1630	return r;
1631}
1632
1633/**
1634 * amdgpu_cs_find_bo_va - find bo_va for VM address
1635 *
1636 * @parser: command submission parser context
1637 * @addr: VM address
1638 * @bo: resulting BO of the mapping found
1639 *
1640 * Search the buffer objects in the command submission context for a certain
1641 * virtual memory address. Returns allocation structure when found, NULL
1642 * otherwise.
1643 */
1644int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1645			   uint64_t addr, struct amdgpu_bo **bo,
1646			   struct amdgpu_bo_va_mapping **map)
1647{
1648	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1649	struct ttm_operation_ctx ctx = { false, false };
1650	struct amdgpu_vm *vm = &fpriv->vm;
1651	struct amdgpu_bo_va_mapping *mapping;
1652	int r;
1653
1654	addr /= AMDGPU_GPU_PAGE_SIZE;
1655
1656	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1657	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1658		return -EINVAL;
1659
1660	*bo = mapping->bo_va->base.bo;
1661	*map = mapping;
1662
1663	/* Double check that the BO is reserved by this CS */
1664	if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1665		return -EINVAL;
1666
1667	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1668		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1669		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1670		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1671		if (r)
1672			return r;
1673	}
1674
1675	return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1676}
1677