1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 * Authors: Christian König <christian.koenig@amd.com>
26 */
27
28#include <linux/firmware.h>
29#include <linux/module.h>
30
31#include <drm/drm.h>
32
33#include "amdgpu.h"
34#include "amdgpu_pm.h"
35#include "amdgpu_vce.h"
36#include "cikd.h"
37
38/* 1 second timeout */
39#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
40
41/* Firmware Names */
42#ifdef CONFIG_DRM_AMDGPU_CIK
43#define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
44#define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
45#define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
46#define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
47#define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
48#endif
49#define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
50#define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
51#define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
52#define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
53#define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
54#define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
55#define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
56#define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
57
58#define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
59#define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
60#define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
61
62#ifdef CONFIG_DRM_AMDGPU_CIK
63MODULE_FIRMWARE(FIRMWARE_BONAIRE);
64MODULE_FIRMWARE(FIRMWARE_KABINI);
65MODULE_FIRMWARE(FIRMWARE_KAVERI);
66MODULE_FIRMWARE(FIRMWARE_HAWAII);
67MODULE_FIRMWARE(FIRMWARE_MULLINS);
68#endif
69MODULE_FIRMWARE(FIRMWARE_TONGA);
70MODULE_FIRMWARE(FIRMWARE_CARRIZO);
71MODULE_FIRMWARE(FIRMWARE_FIJI);
72MODULE_FIRMWARE(FIRMWARE_STONEY);
73MODULE_FIRMWARE(FIRMWARE_POLARIS10);
74MODULE_FIRMWARE(FIRMWARE_POLARIS11);
75MODULE_FIRMWARE(FIRMWARE_POLARIS12);
76MODULE_FIRMWARE(FIRMWARE_VEGAM);
77
78MODULE_FIRMWARE(FIRMWARE_VEGA10);
79MODULE_FIRMWARE(FIRMWARE_VEGA12);
80MODULE_FIRMWARE(FIRMWARE_VEGA20);
81
82static void amdgpu_vce_idle_work_handler(struct work_struct *work);
83static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
84				     struct amdgpu_bo *bo,
85				     struct dma_fence **fence);
86static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
87				      bool direct, struct dma_fence **fence);
88
89/**
90 * amdgpu_vce_init - allocate memory, load vce firmware
91 *
92 * @adev: amdgpu_device pointer
93 *
94 * First step to get VCE online, allocate memory and load the firmware
95 */
96int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
97{
98	const char *fw_name;
99	const struct common_firmware_header *hdr;
100	unsigned ucode_version, version_major, version_minor, binary_id;
101	int i, r;
102
103	switch (adev->asic_type) {
104#ifdef CONFIG_DRM_AMDGPU_CIK
105	case CHIP_BONAIRE:
106		fw_name = FIRMWARE_BONAIRE;
107		break;
108	case CHIP_KAVERI:
109		fw_name = FIRMWARE_KAVERI;
110		break;
111	case CHIP_KABINI:
112		fw_name = FIRMWARE_KABINI;
113		break;
114	case CHIP_HAWAII:
115		fw_name = FIRMWARE_HAWAII;
116		break;
117	case CHIP_MULLINS:
118		fw_name = FIRMWARE_MULLINS;
119		break;
120#endif
121	case CHIP_TONGA:
122		fw_name = FIRMWARE_TONGA;
123		break;
124	case CHIP_CARRIZO:
125		fw_name = FIRMWARE_CARRIZO;
126		break;
127	case CHIP_FIJI:
128		fw_name = FIRMWARE_FIJI;
129		break;
130	case CHIP_STONEY:
131		fw_name = FIRMWARE_STONEY;
132		break;
133	case CHIP_POLARIS10:
134		fw_name = FIRMWARE_POLARIS10;
135		break;
136	case CHIP_POLARIS11:
137		fw_name = FIRMWARE_POLARIS11;
138		break;
139	case CHIP_POLARIS12:
140		fw_name = FIRMWARE_POLARIS12;
141		break;
142	case CHIP_VEGAM:
143		fw_name = FIRMWARE_VEGAM;
144		break;
145	case CHIP_VEGA10:
146		fw_name = FIRMWARE_VEGA10;
147		break;
148	case CHIP_VEGA12:
149		fw_name = FIRMWARE_VEGA12;
150		break;
151	case CHIP_VEGA20:
152		fw_name = FIRMWARE_VEGA20;
153		break;
154
155	default:
156		return -EINVAL;
157	}
158
159	r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
160	if (r) {
161		dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
162			fw_name);
163		return r;
164	}
165
166	r = amdgpu_ucode_validate(adev->vce.fw);
167	if (r) {
168		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
169			fw_name);
170		release_firmware(adev->vce.fw);
171		adev->vce.fw = NULL;
172		return r;
173	}
174
175	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
176
177	ucode_version = le32_to_cpu(hdr->ucode_version);
178	version_major = (ucode_version >> 20) & 0xfff;
179	version_minor = (ucode_version >> 8) & 0xfff;
180	binary_id = ucode_version & 0xff;
181	DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n",
182		version_major, version_minor, binary_id);
183	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
184				(binary_id << 8));
185
186	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
187				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
188				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
189	if (r) {
190		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
191		return r;
192	}
193
194	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
195		atomic_set(&adev->vce.handles[i], 0);
196		adev->vce.filp[i] = NULL;
197	}
198
199	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
200	mutex_init(&adev->vce.idle_mutex);
201
202	return 0;
203}
204
205/**
206 * amdgpu_vce_fini - free memory
207 *
208 * @adev: amdgpu_device pointer
209 *
210 * Last step on VCE teardown, free firmware memory
211 */
212int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
213{
214	unsigned i;
215
216	if (adev->vce.vcpu_bo == NULL)
217		return 0;
218
219	cancel_delayed_work_sync(&adev->vce.idle_work);
220	drm_sched_entity_destroy(&adev->vce.entity);
221
222	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
223		(void **)&adev->vce.cpu_addr);
224
225	for (i = 0; i < adev->vce.num_rings; i++)
226		amdgpu_ring_fini(&adev->vce.ring[i]);
227
228	release_firmware(adev->vce.fw);
229	mutex_destroy(&adev->vce.idle_mutex);
230
231	return 0;
232}
233
234/**
235 * amdgpu_vce_entity_init - init entity
236 *
237 * @adev: amdgpu_device pointer
238 *
239 */
240int amdgpu_vce_entity_init(struct amdgpu_device *adev)
241{
242	struct amdgpu_ring *ring;
243	struct drm_gpu_scheduler *sched;
244	int r;
245
246	ring = &adev->vce.ring[0];
247	sched = &ring->sched;
248	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
249				  &sched, 1, NULL);
250	if (r != 0) {
251		DRM_ERROR("Failed setting up VCE run queue.\n");
252		return r;
253	}
254
255	return 0;
256}
257
258/**
259 * amdgpu_vce_suspend - unpin VCE fw memory
260 *
261 * @adev: amdgpu_device pointer
262 *
263 */
264int amdgpu_vce_suspend(struct amdgpu_device *adev)
265{
266	int i;
267
268	cancel_delayed_work_sync(&adev->vce.idle_work);
269
270	if (adev->vce.vcpu_bo == NULL)
271		return 0;
272
273	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
274		if (atomic_read(&adev->vce.handles[i]))
275			break;
276
277	if (i == AMDGPU_MAX_VCE_HANDLES)
278		return 0;
279
280	/* TODO: suspending running encoding sessions isn't supported */
281	return -EINVAL;
282}
283
284/**
285 * amdgpu_vce_resume - pin VCE fw memory
286 *
287 * @adev: amdgpu_device pointer
288 *
289 */
290int amdgpu_vce_resume(struct amdgpu_device *adev)
291{
292	void *cpu_addr;
293	const struct common_firmware_header *hdr;
294	unsigned offset;
295	int r;
296
297	if (adev->vce.vcpu_bo == NULL)
298		return -EINVAL;
299
300	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
301	if (r) {
302		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
303		return r;
304	}
305
306	r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
307	if (r) {
308		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
309		dev_err(adev->dev, "(%d) VCE map failed\n", r);
310		return r;
311	}
312
313	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
314	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
315	memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
316		    adev->vce.fw->size - offset);
317
318	amdgpu_bo_kunmap(adev->vce.vcpu_bo);
319
320	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
321
322	return 0;
323}
324
325/**
326 * amdgpu_vce_idle_work_handler - power off VCE
327 *
328 * @work: pointer to work structure
329 *
330 * power of VCE when it's not used any more
331 */
332static void amdgpu_vce_idle_work_handler(struct work_struct *work)
333{
334	struct amdgpu_device *adev =
335		container_of(work, struct amdgpu_device, vce.idle_work.work);
336	unsigned i, count = 0;
337
338	for (i = 0; i < adev->vce.num_rings; i++)
339		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
340
341	if (count == 0) {
342		if (adev->pm.dpm_enabled) {
343			amdgpu_dpm_enable_vce(adev, false);
344		} else {
345			amdgpu_asic_set_vce_clocks(adev, 0, 0);
346			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
347							       AMD_PG_STATE_GATE);
348			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
349							       AMD_CG_STATE_GATE);
350		}
351	} else {
352		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
353	}
354}
355
356/**
357 * amdgpu_vce_ring_begin_use - power up VCE
358 *
359 * @ring: amdgpu ring
360 *
361 * Make sure VCE is powerd up when we want to use it
362 */
363void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
364{
365	struct amdgpu_device *adev = ring->adev;
366	bool set_clocks;
367
368	if (amdgpu_sriov_vf(adev))
369		return;
370
371	mutex_lock(&adev->vce.idle_mutex);
372	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
373	if (set_clocks) {
374		if (adev->pm.dpm_enabled) {
375			amdgpu_dpm_enable_vce(adev, true);
376		} else {
377			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
378			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
379							       AMD_CG_STATE_UNGATE);
380			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
381							       AMD_PG_STATE_UNGATE);
382
383		}
384	}
385	mutex_unlock(&adev->vce.idle_mutex);
386}
387
388/**
389 * amdgpu_vce_ring_end_use - power VCE down
390 *
391 * @ring: amdgpu ring
392 *
393 * Schedule work to power VCE down again
394 */
395void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
396{
397	if (!amdgpu_sriov_vf(ring->adev))
398		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
399}
400
401/**
402 * amdgpu_vce_free_handles - free still open VCE handles
403 *
404 * @adev: amdgpu_device pointer
405 * @filp: drm file pointer
406 *
407 * Close all VCE handles still open by this file pointer
408 */
409void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
410{
411	struct amdgpu_ring *ring = &adev->vce.ring[0];
412	int i, r;
413	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
414		uint32_t handle = atomic_read(&adev->vce.handles[i]);
415
416		if (!handle || adev->vce.filp[i] != filp)
417			continue;
418
419		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
420		if (r)
421			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
422
423		adev->vce.filp[i] = NULL;
424		atomic_set(&adev->vce.handles[i], 0);
425	}
426}
427
428/**
429 * amdgpu_vce_get_create_msg - generate a VCE create msg
430 *
431 * @adev: amdgpu_device pointer
432 * @ring: ring we should submit the msg to
433 * @handle: VCE session handle to use
434 * @fence: optional fence to return
435 *
436 * Open up a stream for HW test
437 */
438static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
439				     struct amdgpu_bo *bo,
440				     struct dma_fence **fence)
441{
442	const unsigned ib_size_dw = 1024;
443	struct amdgpu_job *job;
444	struct amdgpu_ib *ib;
445	struct dma_fence *f = NULL;
446	uint64_t addr;
447	int i, r;
448
449	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
450				     AMDGPU_IB_POOL_DIRECT, &job);
451	if (r)
452		return r;
453
454	ib = &job->ibs[0];
455
456	addr = amdgpu_bo_gpu_offset(bo);
457
458	/* stitch together an VCE create msg */
459	ib->length_dw = 0;
460	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
461	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
462	ib->ptr[ib->length_dw++] = handle;
463
464	if ((ring->adev->vce.fw_version >> 24) >= 52)
465		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
466	else
467		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
468	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
469	ib->ptr[ib->length_dw++] = 0x00000000;
470	ib->ptr[ib->length_dw++] = 0x00000042;
471	ib->ptr[ib->length_dw++] = 0x0000000a;
472	ib->ptr[ib->length_dw++] = 0x00000001;
473	ib->ptr[ib->length_dw++] = 0x00000080;
474	ib->ptr[ib->length_dw++] = 0x00000060;
475	ib->ptr[ib->length_dw++] = 0x00000100;
476	ib->ptr[ib->length_dw++] = 0x00000100;
477	ib->ptr[ib->length_dw++] = 0x0000000c;
478	ib->ptr[ib->length_dw++] = 0x00000000;
479	if ((ring->adev->vce.fw_version >> 24) >= 52) {
480		ib->ptr[ib->length_dw++] = 0x00000000;
481		ib->ptr[ib->length_dw++] = 0x00000000;
482		ib->ptr[ib->length_dw++] = 0x00000000;
483		ib->ptr[ib->length_dw++] = 0x00000000;
484	}
485
486	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
487	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
488	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
489	ib->ptr[ib->length_dw++] = addr;
490	ib->ptr[ib->length_dw++] = 0x00000001;
491
492	for (i = ib->length_dw; i < ib_size_dw; ++i)
493		ib->ptr[i] = 0x0;
494
495	r = amdgpu_job_submit_direct(job, ring, &f);
496	if (r)
497		goto err;
498
499	if (fence)
500		*fence = dma_fence_get(f);
501	dma_fence_put(f);
502	return 0;
503
504err:
505	amdgpu_job_free(job);
506	return r;
507}
508
509/**
510 * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
511 *
512 * @adev: amdgpu_device pointer
513 * @ring: ring we should submit the msg to
514 * @handle: VCE session handle to use
515 * @fence: optional fence to return
516 *
517 * Close up a stream for HW test or if userspace failed to do so
518 */
519static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
520				      bool direct, struct dma_fence **fence)
521{
522	const unsigned ib_size_dw = 1024;
523	struct amdgpu_job *job;
524	struct amdgpu_ib *ib;
525	struct dma_fence *f = NULL;
526	int i, r;
527
528	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
529				     direct ? AMDGPU_IB_POOL_DIRECT :
530				     AMDGPU_IB_POOL_DELAYED, &job);
531	if (r)
532		return r;
533
534	ib = &job->ibs[0];
535
536	/* stitch together an VCE destroy msg */
537	ib->length_dw = 0;
538	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
539	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
540	ib->ptr[ib->length_dw++] = handle;
541
542	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
543	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
544	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
545	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
546	ib->ptr[ib->length_dw++] = 0x00000000;
547	ib->ptr[ib->length_dw++] = 0x00000000;
548	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
549	ib->ptr[ib->length_dw++] = 0x00000000;
550
551	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
552	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
553
554	for (i = ib->length_dw; i < ib_size_dw; ++i)
555		ib->ptr[i] = 0x0;
556
557	if (direct)
558		r = amdgpu_job_submit_direct(job, ring, &f);
559	else
560		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
561				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
562	if (r)
563		goto err;
564
565	if (fence)
566		*fence = dma_fence_get(f);
567	dma_fence_put(f);
568	return 0;
569
570err:
571	amdgpu_job_free(job);
572	return r;
573}
574
575/**
576 * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary
577 *
578 * @p: parser context
579 * @lo: address of lower dword
580 * @hi: address of higher dword
581 * @size: minimum size
582 * @index: bs/fb index
583 *
584 * Make sure that no BO cross a 4GB boundary.
585 */
586static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
587				  int lo, int hi, unsigned size, int32_t index)
588{
589	int64_t offset = ((uint64_t)size) * ((int64_t)index);
590	struct ttm_operation_ctx ctx = { false, false };
591	struct amdgpu_bo_va_mapping *mapping;
592	unsigned i, fpfn, lpfn;
593	struct amdgpu_bo *bo;
594	uint64_t addr;
595	int r;
596
597	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
598	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
599	if (index >= 0) {
600		addr += offset;
601		fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
602		lpfn = 0x100000000ULL >> PAGE_SHIFT;
603	} else {
604		fpfn = 0;
605		lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
606	}
607
608	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
609	if (r) {
610		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
611			  addr, lo, hi, size, index);
612		return r;
613	}
614
615	for (i = 0; i < bo->placement.num_placement; ++i) {
616		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
617		bo->placements[i].lpfn = bo->placements[i].lpfn ?
618			min(bo->placements[i].lpfn, lpfn) : lpfn;
619	}
620	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
621}
622
623
624/**
625 * amdgpu_vce_cs_reloc - command submission relocation
626 *
627 * @p: parser context
628 * @lo: address of lower dword
629 * @hi: address of higher dword
630 * @size: minimum size
631 *
632 * Patch relocation inside command stream with real buffer address
633 */
634static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
635			       int lo, int hi, unsigned size, uint32_t index)
636{
637	struct amdgpu_bo_va_mapping *mapping;
638	struct amdgpu_bo *bo;
639	uint64_t addr;
640	int r;
641
642	if (index == 0xffffffff)
643		index = 0;
644
645	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
646	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
647	addr += ((uint64_t)size) * ((uint64_t)index);
648
649	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
650	if (r) {
651		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
652			  addr, lo, hi, size, index);
653		return r;
654	}
655
656	if ((addr + (uint64_t)size) >
657	    (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
658		DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
659			  addr, lo, hi);
660		return -EINVAL;
661	}
662
663	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
664	addr += amdgpu_bo_gpu_offset(bo);
665	addr -= ((uint64_t)size) * ((uint64_t)index);
666
667	amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
668	amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
669
670	return 0;
671}
672
673/**
674 * amdgpu_vce_validate_handle - validate stream handle
675 *
676 * @p: parser context
677 * @handle: handle to validate
678 * @allocated: allocated a new handle?
679 *
680 * Validates the handle and return the found session index or -EINVAL
681 * we we don't have another free session index.
682 */
683static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
684				      uint32_t handle, uint32_t *allocated)
685{
686	unsigned i;
687
688	/* validate the handle */
689	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
690		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
691			if (p->adev->vce.filp[i] != p->filp) {
692				DRM_ERROR("VCE handle collision detected!\n");
693				return -EINVAL;
694			}
695			return i;
696		}
697	}
698
699	/* handle not found try to alloc a new one */
700	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
701		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
702			p->adev->vce.filp[i] = p->filp;
703			p->adev->vce.img_size[i] = 0;
704			*allocated |= 1 << i;
705			return i;
706		}
707	}
708
709	DRM_ERROR("No more free VCE handles!\n");
710	return -EINVAL;
711}
712
713/**
714 * amdgpu_vce_cs_parse - parse and validate the command stream
715 *
716 * @p: parser context
717 *
718 */
719int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
720{
721	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
722	unsigned fb_idx = 0, bs_idx = 0;
723	int session_idx = -1;
724	uint32_t destroyed = 0;
725	uint32_t created = 0;
726	uint32_t allocated = 0;
727	uint32_t tmp, handle = 0;
728	uint32_t *size = &tmp;
729	unsigned idx;
730	int i, r = 0;
731
732	p->job->vm = NULL;
733	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
734
735	for (idx = 0; idx < ib->length_dw;) {
736		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
737		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
738
739		if ((len < 8) || (len & 3)) {
740			DRM_ERROR("invalid VCE command length (%d)!\n", len);
741			r = -EINVAL;
742			goto out;
743		}
744
745		switch (cmd) {
746		case 0x00000002: /* task info */
747			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
748			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
749			break;
750
751		case 0x03000001: /* encode */
752			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
753						   idx + 9, 0, 0);
754			if (r)
755				goto out;
756
757			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
758						   idx + 11, 0, 0);
759			if (r)
760				goto out;
761			break;
762
763		case 0x05000001: /* context buffer */
764			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
765						   idx + 2, 0, 0);
766			if (r)
767				goto out;
768			break;
769
770		case 0x05000004: /* video bitstream buffer */
771			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
772			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
773						   tmp, bs_idx);
774			if (r)
775				goto out;
776			break;
777
778		case 0x05000005: /* feedback buffer */
779			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
780						   4096, fb_idx);
781			if (r)
782				goto out;
783			break;
784
785		case 0x0500000d: /* MV buffer */
786			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
787							idx + 2, 0, 0);
788			if (r)
789				goto out;
790
791			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
792							idx + 7, 0, 0);
793			if (r)
794				goto out;
795			break;
796		}
797
798		idx += len / 4;
799	}
800
801	for (idx = 0; idx < ib->length_dw;) {
802		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
803		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
804
805		switch (cmd) {
806		case 0x00000001: /* session */
807			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
808			session_idx = amdgpu_vce_validate_handle(p, handle,
809								 &allocated);
810			if (session_idx < 0) {
811				r = session_idx;
812				goto out;
813			}
814			size = &p->adev->vce.img_size[session_idx];
815			break;
816
817		case 0x00000002: /* task info */
818			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
819			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
820			break;
821
822		case 0x01000001: /* create */
823			created |= 1 << session_idx;
824			if (destroyed & (1 << session_idx)) {
825				destroyed &= ~(1 << session_idx);
826				allocated |= 1 << session_idx;
827
828			} else if (!(allocated & (1 << session_idx))) {
829				DRM_ERROR("Handle already in use!\n");
830				r = -EINVAL;
831				goto out;
832			}
833
834			*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
835				amdgpu_get_ib_value(p, ib_idx, idx + 10) *
836				8 * 3 / 2;
837			break;
838
839		case 0x04000001: /* config extension */
840		case 0x04000002: /* pic control */
841		case 0x04000005: /* rate control */
842		case 0x04000007: /* motion estimation */
843		case 0x04000008: /* rdo */
844		case 0x04000009: /* vui */
845		case 0x05000002: /* auxiliary buffer */
846		case 0x05000009: /* clock table */
847			break;
848
849		case 0x0500000c: /* hw config */
850			switch (p->adev->asic_type) {
851#ifdef CONFIG_DRM_AMDGPU_CIK
852			case CHIP_KAVERI:
853			case CHIP_MULLINS:
854#endif
855			case CHIP_CARRIZO:
856				break;
857			default:
858				r = -EINVAL;
859				goto out;
860			}
861			break;
862
863		case 0x03000001: /* encode */
864			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
865						*size, 0);
866			if (r)
867				goto out;
868
869			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
870						*size / 3, 0);
871			if (r)
872				goto out;
873			break;
874
875		case 0x02000001: /* destroy */
876			destroyed |= 1 << session_idx;
877			break;
878
879		case 0x05000001: /* context buffer */
880			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
881						*size * 2, 0);
882			if (r)
883				goto out;
884			break;
885
886		case 0x05000004: /* video bitstream buffer */
887			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
888			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
889						tmp, bs_idx);
890			if (r)
891				goto out;
892			break;
893
894		case 0x05000005: /* feedback buffer */
895			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
896						4096, fb_idx);
897			if (r)
898				goto out;
899			break;
900
901		case 0x0500000d: /* MV buffer */
902			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
903							idx + 2, *size, 0);
904			if (r)
905				goto out;
906
907			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
908							idx + 7, *size / 12, 0);
909			if (r)
910				goto out;
911			break;
912
913		default:
914			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
915			r = -EINVAL;
916			goto out;
917		}
918
919		if (session_idx == -1) {
920			DRM_ERROR("no session command at start of IB\n");
921			r = -EINVAL;
922			goto out;
923		}
924
925		idx += len / 4;
926	}
927
928	if (allocated & ~created) {
929		DRM_ERROR("New session without create command!\n");
930		r = -ENOENT;
931	}
932
933out:
934	if (!r) {
935		/* No error, free all destroyed handle slots */
936		tmp = destroyed;
937	} else {
938		/* Error during parsing, free all allocated handle slots */
939		tmp = allocated;
940	}
941
942	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
943		if (tmp & (1 << i))
944			atomic_set(&p->adev->vce.handles[i], 0);
945
946	return r;
947}
948
949/**
950 * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
951 *
952 * @p: parser context
953 *
954 */
955int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
956{
957	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
958	int session_idx = -1;
959	uint32_t destroyed = 0;
960	uint32_t created = 0;
961	uint32_t allocated = 0;
962	uint32_t tmp, handle = 0;
963	int i, r = 0, idx = 0;
964
965	while (idx < ib->length_dw) {
966		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
967		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
968
969		if ((len < 8) || (len & 3)) {
970			DRM_ERROR("invalid VCE command length (%d)!\n", len);
971			r = -EINVAL;
972			goto out;
973		}
974
975		switch (cmd) {
976		case 0x00000001: /* session */
977			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
978			session_idx = amdgpu_vce_validate_handle(p, handle,
979								 &allocated);
980			if (session_idx < 0) {
981				r = session_idx;
982				goto out;
983			}
984			break;
985
986		case 0x01000001: /* create */
987			created |= 1 << session_idx;
988			if (destroyed & (1 << session_idx)) {
989				destroyed &= ~(1 << session_idx);
990				allocated |= 1 << session_idx;
991
992			} else if (!(allocated & (1 << session_idx))) {
993				DRM_ERROR("Handle already in use!\n");
994				r = -EINVAL;
995				goto out;
996			}
997
998			break;
999
1000		case 0x02000001: /* destroy */
1001			destroyed |= 1 << session_idx;
1002			break;
1003
1004		default:
1005			break;
1006		}
1007
1008		if (session_idx == -1) {
1009			DRM_ERROR("no session command at start of IB\n");
1010			r = -EINVAL;
1011			goto out;
1012		}
1013
1014		idx += len / 4;
1015	}
1016
1017	if (allocated & ~created) {
1018		DRM_ERROR("New session without create command!\n");
1019		r = -ENOENT;
1020	}
1021
1022out:
1023	if (!r) {
1024		/* No error, free all destroyed handle slots */
1025		tmp = destroyed;
1026		amdgpu_ib_free(p->adev, ib, NULL);
1027	} else {
1028		/* Error during parsing, free all allocated handle slots */
1029		tmp = allocated;
1030	}
1031
1032	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1033		if (tmp & (1 << i))
1034			atomic_set(&p->adev->vce.handles[i], 0);
1035
1036	return r;
1037}
1038
1039/**
1040 * amdgpu_vce_ring_emit_ib - execute indirect buffer
1041 *
1042 * @ring: engine to use
1043 * @ib: the IB to execute
1044 *
1045 */
1046void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1047				struct amdgpu_job *job,
1048				struct amdgpu_ib *ib,
1049				uint32_t flags)
1050{
1051	amdgpu_ring_write(ring, VCE_CMD_IB);
1052	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1053	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1054	amdgpu_ring_write(ring, ib->length_dw);
1055}
1056
1057/**
1058 * amdgpu_vce_ring_emit_fence - add a fence command to the ring
1059 *
1060 * @ring: engine to use
1061 * @fence: the fence
1062 *
1063 */
1064void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1065				unsigned flags)
1066{
1067	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1068
1069	amdgpu_ring_write(ring, VCE_CMD_FENCE);
1070	amdgpu_ring_write(ring, addr);
1071	amdgpu_ring_write(ring, upper_32_bits(addr));
1072	amdgpu_ring_write(ring, seq);
1073	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1074	amdgpu_ring_write(ring, VCE_CMD_END);
1075}
1076
1077/**
1078 * amdgpu_vce_ring_test_ring - test if VCE ring is working
1079 *
1080 * @ring: the engine to test on
1081 *
1082 */
1083int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1084{
1085	struct amdgpu_device *adev = ring->adev;
1086	uint32_t rptr;
1087	unsigned i;
1088	int r, timeout = adev->usec_timeout;
1089
1090	/* skip ring test for sriov*/
1091	if (amdgpu_sriov_vf(adev))
1092		return 0;
1093
1094	r = amdgpu_ring_alloc(ring, 16);
1095	if (r)
1096		return r;
1097
1098	rptr = amdgpu_ring_get_rptr(ring);
1099
1100	amdgpu_ring_write(ring, VCE_CMD_END);
1101	amdgpu_ring_commit(ring);
1102
1103	for (i = 0; i < timeout; i++) {
1104		if (amdgpu_ring_get_rptr(ring) != rptr)
1105			break;
1106		udelay(1);
1107	}
1108
1109	if (i >= timeout)
1110		r = -ETIMEDOUT;
1111
1112	return r;
1113}
1114
1115/**
1116 * amdgpu_vce_ring_test_ib - test if VCE IBs are working
1117 *
1118 * @ring: the engine to test on
1119 *
1120 */
1121int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1122{
1123	struct dma_fence *fence = NULL;
1124	struct amdgpu_bo *bo = NULL;
1125	long r;
1126
1127	/* skip vce ring1/2 ib test for now, since it's not reliable */
1128	if (ring != &ring->adev->vce.ring[0])
1129		return 0;
1130
1131	r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
1132				      AMDGPU_GEM_DOMAIN_VRAM,
1133				      &bo, NULL, NULL);
1134	if (r)
1135		return r;
1136
1137	r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
1138	if (r)
1139		goto error;
1140
1141	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1142	if (r)
1143		goto error;
1144
1145	r = dma_fence_wait_timeout(fence, false, timeout);
1146	if (r == 0)
1147		r = -ETIMEDOUT;
1148	else if (r > 0)
1149		r = 0;
1150
1151error:
1152	dma_fence_put(fence);
1153	amdgpu_bo_unreserve(bo);
1154	amdgpu_bo_unref(&bo);
1155	return r;
1156}
1157