1d722e3fbSopenharmony_ci/*
2d722e3fbSopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc.
3d722e3fbSopenharmony_ci *
4d722e3fbSopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5d722e3fbSopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6d722e3fbSopenharmony_ci * to deal in the Software without restriction, including without limitation
7d722e3fbSopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8d722e3fbSopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9d722e3fbSopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10d722e3fbSopenharmony_ci *
11d722e3fbSopenharmony_ci * The above copyright notice and this permission notice shall be included in
12d722e3fbSopenharmony_ci * all copies or substantial portions of the Software.
13d722e3fbSopenharmony_ci *
14d722e3fbSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15d722e3fbSopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16d722e3fbSopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17d722e3fbSopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18d722e3fbSopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19d722e3fbSopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20d722e3fbSopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21d722e3fbSopenharmony_ci *
22d722e3fbSopenharmony_ci*/
23d722e3fbSopenharmony_ci
24d722e3fbSopenharmony_ci#include <stdio.h>
25d722e3fbSopenharmony_ci#include <inttypes.h>
26d722e3fbSopenharmony_ci
27d722e3fbSopenharmony_ci#include "CUnit/Basic.h"
28d722e3fbSopenharmony_ci
29d722e3fbSopenharmony_ci#include "util_math.h"
30d722e3fbSopenharmony_ci
31d722e3fbSopenharmony_ci#include "amdgpu_test.h"
32d722e3fbSopenharmony_ci#include "amdgpu_drm.h"
33d722e3fbSopenharmony_ci#include "amdgpu_internal.h"
34d722e3fbSopenharmony_ci
35d722e3fbSopenharmony_ci#include "vce_ib.h"
36d722e3fbSopenharmony_ci#include "frame.h"
37d722e3fbSopenharmony_ci
38d722e3fbSopenharmony_ci#define IB_SIZE		4096
39d722e3fbSopenharmony_ci#define MAX_RESOURCES	16
40d722e3fbSopenharmony_ci#define FW_53_0_03 ((53 << 24) | (0 << 16) | (03 << 8))
41d722e3fbSopenharmony_ci
42d722e3fbSopenharmony_cistruct amdgpu_vce_bo {
43d722e3fbSopenharmony_ci	amdgpu_bo_handle handle;
44d722e3fbSopenharmony_ci	amdgpu_va_handle va_handle;
45d722e3fbSopenharmony_ci	uint64_t addr;
46d722e3fbSopenharmony_ci	uint64_t size;
47d722e3fbSopenharmony_ci	uint8_t *ptr;
48d722e3fbSopenharmony_ci};
49d722e3fbSopenharmony_ci
50d722e3fbSopenharmony_cistruct amdgpu_vce_encode {
51d722e3fbSopenharmony_ci	unsigned width;
52d722e3fbSopenharmony_ci	unsigned height;
53d722e3fbSopenharmony_ci	struct amdgpu_vce_bo vbuf;
54d722e3fbSopenharmony_ci	struct amdgpu_vce_bo bs[2];
55d722e3fbSopenharmony_ci	struct amdgpu_vce_bo fb[2];
56d722e3fbSopenharmony_ci	struct amdgpu_vce_bo cpb;
57d722e3fbSopenharmony_ci	unsigned ib_len;
58d722e3fbSopenharmony_ci	bool two_instance;
59d722e3fbSopenharmony_ci	struct amdgpu_vce_bo mvrefbuf;
60d722e3fbSopenharmony_ci	struct amdgpu_vce_bo mvb;
61d722e3fbSopenharmony_ci	unsigned mvbuf_size;
62d722e3fbSopenharmony_ci};
63d722e3fbSopenharmony_ci
64d722e3fbSopenharmony_cistatic amdgpu_device_handle device_handle;
65d722e3fbSopenharmony_cistatic uint32_t major_version;
66d722e3fbSopenharmony_cistatic uint32_t minor_version;
67d722e3fbSopenharmony_cistatic uint32_t family_id;
68d722e3fbSopenharmony_cistatic uint32_t vce_harvest_config;
69d722e3fbSopenharmony_cistatic uint32_t chip_rev;
70d722e3fbSopenharmony_cistatic uint32_t chip_id;
71d722e3fbSopenharmony_cistatic uint32_t ids_flags;
72d722e3fbSopenharmony_cistatic bool is_mv_supported = true;
73d722e3fbSopenharmony_ci
74d722e3fbSopenharmony_cistatic amdgpu_context_handle context_handle;
75d722e3fbSopenharmony_cistatic amdgpu_bo_handle ib_handle;
76d722e3fbSopenharmony_cistatic amdgpu_va_handle ib_va_handle;
77d722e3fbSopenharmony_cistatic uint64_t ib_mc_address;
78d722e3fbSopenharmony_cistatic uint32_t *ib_cpu;
79d722e3fbSopenharmony_ci
80d722e3fbSopenharmony_cistatic struct amdgpu_vce_encode enc;
81d722e3fbSopenharmony_cistatic amdgpu_bo_handle resources[MAX_RESOURCES];
82d722e3fbSopenharmony_cistatic unsigned num_resources;
83d722e3fbSopenharmony_ci
84d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_create(void);
85d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode(void);
86d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_mv(void);
87d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_destroy(void);
88d722e3fbSopenharmony_ci
89d722e3fbSopenharmony_ciCU_TestInfo vce_tests[] = {
90d722e3fbSopenharmony_ci	{ "VCE create",  amdgpu_cs_vce_create },
91d722e3fbSopenharmony_ci	{ "VCE encode",  amdgpu_cs_vce_encode },
92d722e3fbSopenharmony_ci	{ "VCE MV dump",  amdgpu_cs_vce_encode_mv },
93d722e3fbSopenharmony_ci	{ "VCE destroy",  amdgpu_cs_vce_destroy },
94d722e3fbSopenharmony_ci	CU_TEST_INFO_NULL,
95d722e3fbSopenharmony_ci};
96d722e3fbSopenharmony_ci
97d722e3fbSopenharmony_ciCU_BOOL suite_vce_tests_enable(void)
98d722e3fbSopenharmony_ci{
99d722e3fbSopenharmony_ci	uint32_t version, feature, asic_id;
100d722e3fbSopenharmony_ci	CU_BOOL ret_mv = CU_FALSE;
101d722e3fbSopenharmony_ci
102d722e3fbSopenharmony_ci	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
103d722e3fbSopenharmony_ci					     &minor_version, &device_handle))
104d722e3fbSopenharmony_ci		return CU_FALSE;
105d722e3fbSopenharmony_ci
106d722e3fbSopenharmony_ci	family_id = device_handle->info.family_id;
107d722e3fbSopenharmony_ci	chip_rev = device_handle->info.chip_rev;
108d722e3fbSopenharmony_ci	chip_id = device_handle->info.chip_external_rev;
109d722e3fbSopenharmony_ci	ids_flags = device_handle->info.ids_flags;
110d722e3fbSopenharmony_ci	asic_id = device_handle->info.asic_id;
111d722e3fbSopenharmony_ci
112d722e3fbSopenharmony_ci	amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
113d722e3fbSopenharmony_ci					  0, &version, &feature);
114d722e3fbSopenharmony_ci
115d722e3fbSopenharmony_ci	if (amdgpu_device_deinitialize(device_handle))
116d722e3fbSopenharmony_ci		return CU_FALSE;
117d722e3fbSopenharmony_ci
118d722e3fbSopenharmony_ci	if (family_id >= AMDGPU_FAMILY_RV || family_id == AMDGPU_FAMILY_SI ||
119d722e3fbSopenharmony_ci		asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
120d722e3fbSopenharmony_ci		printf("\n\nThe ASIC NOT support VCE, suite disabled\n");
121d722e3fbSopenharmony_ci		return CU_FALSE;
122d722e3fbSopenharmony_ci	}
123d722e3fbSopenharmony_ci
124d722e3fbSopenharmony_ci	if (!(chip_id == (chip_rev + 0x3C) || /* FIJI */
125d722e3fbSopenharmony_ci			chip_id == (chip_rev + 0x50) || /* Polaris 10*/
126d722e3fbSopenharmony_ci			chip_id == (chip_rev + 0x5A) || /* Polaris 11*/
127d722e3fbSopenharmony_ci			chip_id == (chip_rev + 0x64) || /* Polaris 12*/
128d722e3fbSopenharmony_ci			(family_id >= AMDGPU_FAMILY_AI && !ids_flags))) /* dGPU > Polaris */
129d722e3fbSopenharmony_ci		printf("\n\nThe ASIC NOT support VCE MV, suite disabled\n");
130d722e3fbSopenharmony_ci	else if (FW_53_0_03 > version)
131d722e3fbSopenharmony_ci		printf("\n\nThe ASIC FW version NOT support VCE MV, suite disabled\n");
132d722e3fbSopenharmony_ci	else
133d722e3fbSopenharmony_ci		ret_mv = CU_TRUE;
134d722e3fbSopenharmony_ci
135d722e3fbSopenharmony_ci	if (ret_mv == CU_FALSE) {
136d722e3fbSopenharmony_ci		amdgpu_set_test_active("VCE Tests", "VCE MV dump", ret_mv);
137d722e3fbSopenharmony_ci		is_mv_supported = false;
138d722e3fbSopenharmony_ci	}
139d722e3fbSopenharmony_ci
140d722e3fbSopenharmony_ci	return CU_TRUE;
141d722e3fbSopenharmony_ci}
142d722e3fbSopenharmony_ci
143d722e3fbSopenharmony_ciint suite_vce_tests_init(void)
144d722e3fbSopenharmony_ci{
145d722e3fbSopenharmony_ci	int r;
146d722e3fbSopenharmony_ci
147d722e3fbSopenharmony_ci	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
148d722e3fbSopenharmony_ci				     &minor_version, &device_handle);
149d722e3fbSopenharmony_ci	if (r) {
150d722e3fbSopenharmony_ci		if ((r == -EACCES) && (errno == EACCES))
151d722e3fbSopenharmony_ci			printf("\n\nError:%s. "
152d722e3fbSopenharmony_ci				"Hint:Try to run this test program as root.",
153d722e3fbSopenharmony_ci				strerror(errno));
154d722e3fbSopenharmony_ci
155d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
156d722e3fbSopenharmony_ci	}
157d722e3fbSopenharmony_ci
158d722e3fbSopenharmony_ci	family_id = device_handle->info.family_id;
159d722e3fbSopenharmony_ci	vce_harvest_config = device_handle->info.vce_harvest_config;
160d722e3fbSopenharmony_ci
161d722e3fbSopenharmony_ci	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
162d722e3fbSopenharmony_ci	if (r)
163d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
164d722e3fbSopenharmony_ci
165d722e3fbSopenharmony_ci	r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
166d722e3fbSopenharmony_ci				    AMDGPU_GEM_DOMAIN_GTT, 0,
167d722e3fbSopenharmony_ci				    &ib_handle, (void**)&ib_cpu,
168d722e3fbSopenharmony_ci				    &ib_mc_address, &ib_va_handle);
169d722e3fbSopenharmony_ci	if (r)
170d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
171d722e3fbSopenharmony_ci
172d722e3fbSopenharmony_ci	memset(&enc, 0, sizeof(struct amdgpu_vce_encode));
173d722e3fbSopenharmony_ci
174d722e3fbSopenharmony_ci	return CUE_SUCCESS;
175d722e3fbSopenharmony_ci}
176d722e3fbSopenharmony_ci
177d722e3fbSopenharmony_ciint suite_vce_tests_clean(void)
178d722e3fbSopenharmony_ci{
179d722e3fbSopenharmony_ci	int r;
180d722e3fbSopenharmony_ci
181d722e3fbSopenharmony_ci	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
182d722e3fbSopenharmony_ci				     ib_mc_address, IB_SIZE);
183d722e3fbSopenharmony_ci	if (r)
184d722e3fbSopenharmony_ci		return CUE_SCLEAN_FAILED;
185d722e3fbSopenharmony_ci
186d722e3fbSopenharmony_ci	r = amdgpu_cs_ctx_free(context_handle);
187d722e3fbSopenharmony_ci	if (r)
188d722e3fbSopenharmony_ci		return CUE_SCLEAN_FAILED;
189d722e3fbSopenharmony_ci
190d722e3fbSopenharmony_ci	r = amdgpu_device_deinitialize(device_handle);
191d722e3fbSopenharmony_ci	if (r)
192d722e3fbSopenharmony_ci		return CUE_SCLEAN_FAILED;
193d722e3fbSopenharmony_ci
194d722e3fbSopenharmony_ci	return CUE_SUCCESS;
195d722e3fbSopenharmony_ci}
196d722e3fbSopenharmony_ci
197d722e3fbSopenharmony_cistatic int submit(unsigned ndw, unsigned ip)
198d722e3fbSopenharmony_ci{
199d722e3fbSopenharmony_ci	struct amdgpu_cs_request ibs_request = {0};
200d722e3fbSopenharmony_ci	struct amdgpu_cs_ib_info ib_info = {0};
201d722e3fbSopenharmony_ci	struct amdgpu_cs_fence fence_status = {0};
202d722e3fbSopenharmony_ci	uint32_t expired;
203d722e3fbSopenharmony_ci	int r;
204d722e3fbSopenharmony_ci
205d722e3fbSopenharmony_ci	ib_info.ib_mc_address = ib_mc_address;
206d722e3fbSopenharmony_ci	ib_info.size = ndw;
207d722e3fbSopenharmony_ci
208d722e3fbSopenharmony_ci	ibs_request.ip_type = ip;
209d722e3fbSopenharmony_ci
210d722e3fbSopenharmony_ci	r = amdgpu_bo_list_create(device_handle, num_resources, resources,
211d722e3fbSopenharmony_ci				  NULL, &ibs_request.resources);
212d722e3fbSopenharmony_ci	if (r)
213d722e3fbSopenharmony_ci		return r;
214d722e3fbSopenharmony_ci
215d722e3fbSopenharmony_ci	ibs_request.number_of_ibs = 1;
216d722e3fbSopenharmony_ci	ibs_request.ibs = &ib_info;
217d722e3fbSopenharmony_ci	ibs_request.fence_info.handle = NULL;
218d722e3fbSopenharmony_ci
219d722e3fbSopenharmony_ci	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
220d722e3fbSopenharmony_ci	if (r)
221d722e3fbSopenharmony_ci		return r;
222d722e3fbSopenharmony_ci
223d722e3fbSopenharmony_ci	r = amdgpu_bo_list_destroy(ibs_request.resources);
224d722e3fbSopenharmony_ci	if (r)
225d722e3fbSopenharmony_ci		return r;
226d722e3fbSopenharmony_ci
227d722e3fbSopenharmony_ci	fence_status.context = context_handle;
228d722e3fbSopenharmony_ci	fence_status.ip_type = ip;
229d722e3fbSopenharmony_ci	fence_status.fence = ibs_request.seq_no;
230d722e3fbSopenharmony_ci
231d722e3fbSopenharmony_ci	r = amdgpu_cs_query_fence_status(&fence_status,
232d722e3fbSopenharmony_ci					 AMDGPU_TIMEOUT_INFINITE,
233d722e3fbSopenharmony_ci					 0, &expired);
234d722e3fbSopenharmony_ci	if (r)
235d722e3fbSopenharmony_ci		return r;
236d722e3fbSopenharmony_ci
237d722e3fbSopenharmony_ci	return 0;
238d722e3fbSopenharmony_ci}
239d722e3fbSopenharmony_ci
240d722e3fbSopenharmony_cistatic void alloc_resource(struct amdgpu_vce_bo *vce_bo, unsigned size, unsigned domain)
241d722e3fbSopenharmony_ci{
242d722e3fbSopenharmony_ci	struct amdgpu_bo_alloc_request req = {0};
243d722e3fbSopenharmony_ci	amdgpu_bo_handle buf_handle;
244d722e3fbSopenharmony_ci	amdgpu_va_handle va_handle;
245d722e3fbSopenharmony_ci	uint64_t va = 0;
246d722e3fbSopenharmony_ci	int r;
247d722e3fbSopenharmony_ci
248d722e3fbSopenharmony_ci	req.alloc_size = ALIGN(size, 4096);
249d722e3fbSopenharmony_ci	req.preferred_heap = domain;
250d722e3fbSopenharmony_ci	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
251d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
252d722e3fbSopenharmony_ci	r = amdgpu_va_range_alloc(device_handle,
253d722e3fbSopenharmony_ci				  amdgpu_gpu_va_range_general,
254d722e3fbSopenharmony_ci				  req.alloc_size, 1, 0, &va,
255d722e3fbSopenharmony_ci				  &va_handle, 0);
256d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
257d722e3fbSopenharmony_ci	r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0,
258d722e3fbSopenharmony_ci			    AMDGPU_VA_OP_MAP);
259d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
260d722e3fbSopenharmony_ci	vce_bo->addr = va;
261d722e3fbSopenharmony_ci	vce_bo->handle = buf_handle;
262d722e3fbSopenharmony_ci	vce_bo->size = req.alloc_size;
263d722e3fbSopenharmony_ci	vce_bo->va_handle = va_handle;
264d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(vce_bo->handle, (void **)&vce_bo->ptr);
265d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
266d722e3fbSopenharmony_ci	memset(vce_bo->ptr, 0, size);
267d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(vce_bo->handle);
268d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
269d722e3fbSopenharmony_ci}
270d722e3fbSopenharmony_ci
271d722e3fbSopenharmony_cistatic void free_resource(struct amdgpu_vce_bo *vce_bo)
272d722e3fbSopenharmony_ci{
273d722e3fbSopenharmony_ci	int r;
274d722e3fbSopenharmony_ci
275d722e3fbSopenharmony_ci	r = amdgpu_bo_va_op(vce_bo->handle, 0, vce_bo->size,
276d722e3fbSopenharmony_ci			    vce_bo->addr, 0, AMDGPU_VA_OP_UNMAP);
277d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
278d722e3fbSopenharmony_ci
279d722e3fbSopenharmony_ci	r = amdgpu_va_range_free(vce_bo->va_handle);
280d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
281d722e3fbSopenharmony_ci
282d722e3fbSopenharmony_ci	r = amdgpu_bo_free(vce_bo->handle);
283d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
284d722e3fbSopenharmony_ci	memset(vce_bo, 0, sizeof(*vce_bo));
285d722e3fbSopenharmony_ci}
286d722e3fbSopenharmony_ci
287d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_create(void)
288d722e3fbSopenharmony_ci{
289d722e3fbSopenharmony_ci	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
290d722e3fbSopenharmony_ci	int len, r;
291d722e3fbSopenharmony_ci
292d722e3fbSopenharmony_ci	enc.width = vce_create[6];
293d722e3fbSopenharmony_ci	enc.height = vce_create[7];
294d722e3fbSopenharmony_ci
295d722e3fbSopenharmony_ci	num_resources  = 0;
296d722e3fbSopenharmony_ci	alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
297d722e3fbSopenharmony_ci	resources[num_resources++] = enc.fb[0].handle;
298d722e3fbSopenharmony_ci	resources[num_resources++] = ib_handle;
299d722e3fbSopenharmony_ci
300d722e3fbSopenharmony_ci	len = 0;
301d722e3fbSopenharmony_ci	memcpy(ib_cpu, vce_session, sizeof(vce_session));
302d722e3fbSopenharmony_ci	len += sizeof(vce_session) / 4;
303d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
304d722e3fbSopenharmony_ci	len += sizeof(vce_taskinfo) / 4;
305d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_create, sizeof(vce_create));
306d722e3fbSopenharmony_ci	ib_cpu[len + 8] = ALIGN(enc.width, align);
307d722e3fbSopenharmony_ci	ib_cpu[len + 9] = ALIGN(enc.width, align);
308d722e3fbSopenharmony_ci	if (is_mv_supported == true) {/* disableTwoInstance */
309d722e3fbSopenharmony_ci		if (family_id >= AMDGPU_FAMILY_AI)
310d722e3fbSopenharmony_ci			ib_cpu[len + 11] = 0x01000001;
311d722e3fbSopenharmony_ci		else
312d722e3fbSopenharmony_ci			ib_cpu[len + 11] = 0x01000201;
313d722e3fbSopenharmony_ci	}
314d722e3fbSopenharmony_ci	len += sizeof(vce_create) / 4;
315d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
316d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc.fb[0].addr >> 32;
317d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc.fb[0].addr;
318d722e3fbSopenharmony_ci	len += sizeof(vce_feedback) / 4;
319d722e3fbSopenharmony_ci
320d722e3fbSopenharmony_ci	r = submit(len, AMDGPU_HW_IP_VCE);
321d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
322d722e3fbSopenharmony_ci
323d722e3fbSopenharmony_ci	free_resource(&enc.fb[0]);
324d722e3fbSopenharmony_ci}
325d722e3fbSopenharmony_ci
326d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_config(void)
327d722e3fbSopenharmony_ci{
328d722e3fbSopenharmony_ci	int len = 0, r;
329d722e3fbSopenharmony_ci
330d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
331d722e3fbSopenharmony_ci	len += sizeof(vce_session) / 4;
332d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
333d722e3fbSopenharmony_ci	ib_cpu[len + 3] = 2;
334d722e3fbSopenharmony_ci	ib_cpu[len + 6] = 0xffffffff;
335d722e3fbSopenharmony_ci	len += sizeof(vce_taskinfo) / 4;
336d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_rate_ctrl, sizeof(vce_rate_ctrl));
337d722e3fbSopenharmony_ci	len += sizeof(vce_rate_ctrl) / 4;
338d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_config_ext, sizeof(vce_config_ext));
339d722e3fbSopenharmony_ci	len += sizeof(vce_config_ext) / 4;
340d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_motion_est, sizeof(vce_motion_est));
341d722e3fbSopenharmony_ci	len += sizeof(vce_motion_est) / 4;
342d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_rdo, sizeof(vce_rdo));
343d722e3fbSopenharmony_ci	len += sizeof(vce_rdo) / 4;
344d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_pic_ctrl, sizeof(vce_pic_ctrl));
345d722e3fbSopenharmony_ci	if (is_mv_supported == true)
346d722e3fbSopenharmony_ci		ib_cpu[len + 27] = 0x00000001; /* encSliceMode */
347d722e3fbSopenharmony_ci	len += sizeof(vce_pic_ctrl) / 4;
348d722e3fbSopenharmony_ci
349d722e3fbSopenharmony_ci	r = submit(len, AMDGPU_HW_IP_VCE);
350d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
351d722e3fbSopenharmony_ci}
352d722e3fbSopenharmony_ci
353d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
354d722e3fbSopenharmony_ci{
355d722e3fbSopenharmony_ci
356d722e3fbSopenharmony_ci	uint64_t luma_offset, chroma_offset;
357d722e3fbSopenharmony_ci	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
358d722e3fbSopenharmony_ci	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
359d722e3fbSopenharmony_ci	int len = 0, i, r;
360d722e3fbSopenharmony_ci
361d722e3fbSopenharmony_ci	luma_offset = enc->vbuf.addr;
362d722e3fbSopenharmony_ci	chroma_offset = luma_offset + luma_size;
363d722e3fbSopenharmony_ci
364d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
365d722e3fbSopenharmony_ci	len += sizeof(vce_session) / 4;
366d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
367d722e3fbSopenharmony_ci	len += sizeof(vce_taskinfo) / 4;
368d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer));
369d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->bs[0].addr >> 32;
370d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->bs[0].addr;
371d722e3fbSopenharmony_ci	len += sizeof(vce_bs_buffer) / 4;
372d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer));
373d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->cpb.addr >> 32;
374d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->cpb.addr;
375d722e3fbSopenharmony_ci	len += sizeof(vce_context_buffer) / 4;
376d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
377d722e3fbSopenharmony_ci	for (i = 0; i <  8; ++i)
378d722e3fbSopenharmony_ci		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
379d722e3fbSopenharmony_ci	for (i = 0; i <  8; ++i)
380d722e3fbSopenharmony_ci		ib_cpu[len + 10 + i] = luma_size * 1.5;
381d722e3fbSopenharmony_ci	len += sizeof(vce_aux_buffer) / 4;
382d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
383d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->fb[0].addr >> 32;
384d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->fb[0].addr;
385d722e3fbSopenharmony_ci	len += sizeof(vce_feedback) / 4;
386d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode));
387d722e3fbSopenharmony_ci	ib_cpu[len + 9] = luma_offset >> 32;
388d722e3fbSopenharmony_ci	ib_cpu[len + 10] = luma_offset;
389d722e3fbSopenharmony_ci	ib_cpu[len + 11] = chroma_offset >> 32;
390d722e3fbSopenharmony_ci	ib_cpu[len + 12] = chroma_offset;
391d722e3fbSopenharmony_ci	ib_cpu[len + 14] = ALIGN(enc->width, align);
392d722e3fbSopenharmony_ci	ib_cpu[len + 15] = ALIGN(enc->width, align);
393d722e3fbSopenharmony_ci	ib_cpu[len + 73] = luma_size * 1.5;
394d722e3fbSopenharmony_ci	ib_cpu[len + 74] = luma_size * 2.5;
395d722e3fbSopenharmony_ci	len += sizeof(vce_encode) / 4;
396d722e3fbSopenharmony_ci	enc->ib_len = len;
397d722e3fbSopenharmony_ci	if (!enc->two_instance) {
398d722e3fbSopenharmony_ci		r = submit(len, AMDGPU_HW_IP_VCE);
399d722e3fbSopenharmony_ci		CU_ASSERT_EQUAL(r, 0);
400d722e3fbSopenharmony_ci	}
401d722e3fbSopenharmony_ci}
402d722e3fbSopenharmony_ci
403d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
404d722e3fbSopenharmony_ci{
405d722e3fbSopenharmony_ci	uint64_t luma_offset, chroma_offset;
406d722e3fbSopenharmony_ci	int len, i, r;
407d722e3fbSopenharmony_ci	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
408d722e3fbSopenharmony_ci	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
409d722e3fbSopenharmony_ci
410d722e3fbSopenharmony_ci	len = (enc->two_instance) ? enc->ib_len : 0;
411d722e3fbSopenharmony_ci	luma_offset = enc->vbuf.addr;
412d722e3fbSopenharmony_ci	chroma_offset = luma_offset + luma_size;
413d722e3fbSopenharmony_ci
414d722e3fbSopenharmony_ci	if (!enc->two_instance) {
415d722e3fbSopenharmony_ci		memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
416d722e3fbSopenharmony_ci		len += sizeof(vce_session) / 4;
417d722e3fbSopenharmony_ci	}
418d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
419d722e3fbSopenharmony_ci	len += sizeof(vce_taskinfo) / 4;
420d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer));
421d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->bs[1].addr >> 32;
422d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->bs[1].addr;
423d722e3fbSopenharmony_ci	len += sizeof(vce_bs_buffer) / 4;
424d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer));
425d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->cpb.addr >> 32;
426d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->cpb.addr;
427d722e3fbSopenharmony_ci	len += sizeof(vce_context_buffer) / 4;
428d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
429d722e3fbSopenharmony_ci	for (i = 0; i <  8; ++i)
430d722e3fbSopenharmony_ci		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
431d722e3fbSopenharmony_ci	for (i = 0; i <  8; ++i)
432d722e3fbSopenharmony_ci		ib_cpu[len + 10 + i] = luma_size * 1.5;
433d722e3fbSopenharmony_ci	len += sizeof(vce_aux_buffer) / 4;
434d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
435d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->fb[1].addr >> 32;
436d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->fb[1].addr;
437d722e3fbSopenharmony_ci	len += sizeof(vce_feedback) / 4;
438d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode));
439d722e3fbSopenharmony_ci	ib_cpu[len + 2] = 0;
440d722e3fbSopenharmony_ci	ib_cpu[len + 9] = luma_offset >> 32;
441d722e3fbSopenharmony_ci	ib_cpu[len + 10] = luma_offset;
442d722e3fbSopenharmony_ci	ib_cpu[len + 11] = chroma_offset >> 32;
443d722e3fbSopenharmony_ci	ib_cpu[len + 12] = chroma_offset;
444d722e3fbSopenharmony_ci	ib_cpu[len + 14] = ALIGN(enc->width, align);
445d722e3fbSopenharmony_ci	ib_cpu[len + 15] = ALIGN(enc->width, align);
446d722e3fbSopenharmony_ci	ib_cpu[len + 18] = 0;
447d722e3fbSopenharmony_ci	ib_cpu[len + 19] = 0;
448d722e3fbSopenharmony_ci	ib_cpu[len + 56] = 3;
449d722e3fbSopenharmony_ci	ib_cpu[len + 57] = 0;
450d722e3fbSopenharmony_ci	ib_cpu[len + 58] = 0;
451d722e3fbSopenharmony_ci	ib_cpu[len + 59] = luma_size * 1.5;
452d722e3fbSopenharmony_ci	ib_cpu[len + 60] = luma_size * 2.5;
453d722e3fbSopenharmony_ci	ib_cpu[len + 73] = 0;
454d722e3fbSopenharmony_ci	ib_cpu[len + 74] = luma_size;
455d722e3fbSopenharmony_ci	ib_cpu[len + 81] = 1;
456d722e3fbSopenharmony_ci	ib_cpu[len + 82] = 1;
457d722e3fbSopenharmony_ci	len += sizeof(vce_encode) / 4;
458d722e3fbSopenharmony_ci
459d722e3fbSopenharmony_ci	r = submit(len, AMDGPU_HW_IP_VCE);
460d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
461d722e3fbSopenharmony_ci}
462d722e3fbSopenharmony_ci
463d722e3fbSopenharmony_cistatic void check_result(struct amdgpu_vce_encode *enc)
464d722e3fbSopenharmony_ci{
465d722e3fbSopenharmony_ci	uint64_t sum;
466d722e3fbSopenharmony_ci	uint32_t s[2] = {180325, 15946};
467d722e3fbSopenharmony_ci	uint32_t *ptr, size;
468d722e3fbSopenharmony_ci	int i, j, r;
469d722e3fbSopenharmony_ci
470d722e3fbSopenharmony_ci	for (i = 0; i < 2; ++i) {
471d722e3fbSopenharmony_ci		r = amdgpu_bo_cpu_map(enc->fb[i].handle, (void **)&enc->fb[i].ptr);
472d722e3fbSopenharmony_ci		CU_ASSERT_EQUAL(r, 0);
473d722e3fbSopenharmony_ci		ptr = (uint32_t *)enc->fb[i].ptr;
474d722e3fbSopenharmony_ci		size = ptr[4] - ptr[9];
475d722e3fbSopenharmony_ci		r = amdgpu_bo_cpu_unmap(enc->fb[i].handle);
476d722e3fbSopenharmony_ci		CU_ASSERT_EQUAL(r, 0);
477d722e3fbSopenharmony_ci		r = amdgpu_bo_cpu_map(enc->bs[i].handle, (void **)&enc->bs[i].ptr);
478d722e3fbSopenharmony_ci		CU_ASSERT_EQUAL(r, 0);
479d722e3fbSopenharmony_ci		for (j = 0, sum = 0; j < size; ++j)
480d722e3fbSopenharmony_ci			sum += enc->bs[i].ptr[j];
481d722e3fbSopenharmony_ci		CU_ASSERT_EQUAL(sum, s[i]);
482d722e3fbSopenharmony_ci		r = amdgpu_bo_cpu_unmap(enc->bs[i].handle);
483d722e3fbSopenharmony_ci		CU_ASSERT_EQUAL(r, 0);
484d722e3fbSopenharmony_ci	}
485d722e3fbSopenharmony_ci}
486d722e3fbSopenharmony_ci
487d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode(void)
488d722e3fbSopenharmony_ci{
489d722e3fbSopenharmony_ci	uint32_t vbuf_size, bs_size = 0x154000, cpb_size;
490d722e3fbSopenharmony_ci	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
491d722e3fbSopenharmony_ci	int i, r;
492d722e3fbSopenharmony_ci
493d722e3fbSopenharmony_ci	vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
494d722e3fbSopenharmony_ci	cpb_size = vbuf_size * 10;
495d722e3fbSopenharmony_ci	num_resources = 0;
496d722e3fbSopenharmony_ci	alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
497d722e3fbSopenharmony_ci	resources[num_resources++] = enc.fb[0].handle;
498d722e3fbSopenharmony_ci	alloc_resource(&enc.fb[1], 4096, AMDGPU_GEM_DOMAIN_GTT);
499d722e3fbSopenharmony_ci	resources[num_resources++] = enc.fb[1].handle;
500d722e3fbSopenharmony_ci	alloc_resource(&enc.bs[0], bs_size, AMDGPU_GEM_DOMAIN_GTT);
501d722e3fbSopenharmony_ci	resources[num_resources++] = enc.bs[0].handle;
502d722e3fbSopenharmony_ci	alloc_resource(&enc.bs[1], bs_size, AMDGPU_GEM_DOMAIN_GTT);
503d722e3fbSopenharmony_ci	resources[num_resources++] = enc.bs[1].handle;
504d722e3fbSopenharmony_ci	alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM);
505d722e3fbSopenharmony_ci	resources[num_resources++] = enc.vbuf.handle;
506d722e3fbSopenharmony_ci	alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM);
507d722e3fbSopenharmony_ci	resources[num_resources++] = enc.cpb.handle;
508d722e3fbSopenharmony_ci	resources[num_resources++] = ib_handle;
509d722e3fbSopenharmony_ci
510d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
511d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
512d722e3fbSopenharmony_ci
513d722e3fbSopenharmony_ci	memset(enc.vbuf.ptr, 0, vbuf_size);
514d722e3fbSopenharmony_ci	for (i = 0; i < enc.height; ++i) {
515d722e3fbSopenharmony_ci		memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
516d722e3fbSopenharmony_ci		enc.vbuf.ptr += ALIGN(enc.width, align);
517d722e3fbSopenharmony_ci	}
518d722e3fbSopenharmony_ci	for (i = 0; i < enc.height / 2; ++i) {
519d722e3fbSopenharmony_ci		memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width);
520d722e3fbSopenharmony_ci		enc.vbuf.ptr += ALIGN(enc.width, align);
521d722e3fbSopenharmony_ci	}
522d722e3fbSopenharmony_ci
523d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
524d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
525d722e3fbSopenharmony_ci
526d722e3fbSopenharmony_ci	amdgpu_cs_vce_config();
527d722e3fbSopenharmony_ci
528d722e3fbSopenharmony_ci	if (family_id >= AMDGPU_FAMILY_VI) {
529d722e3fbSopenharmony_ci		vce_taskinfo[3] = 3;
530d722e3fbSopenharmony_ci		amdgpu_cs_vce_encode_idr(&enc);
531d722e3fbSopenharmony_ci		amdgpu_cs_vce_encode_p(&enc);
532d722e3fbSopenharmony_ci		check_result(&enc);
533d722e3fbSopenharmony_ci
534d722e3fbSopenharmony_ci		/* two pipes */
535d722e3fbSopenharmony_ci		vce_encode[16] = 0;
536d722e3fbSopenharmony_ci		amdgpu_cs_vce_encode_idr(&enc);
537d722e3fbSopenharmony_ci		amdgpu_cs_vce_encode_p(&enc);
538d722e3fbSopenharmony_ci		check_result(&enc);
539d722e3fbSopenharmony_ci
540d722e3fbSopenharmony_ci		/* two instances */
541d722e3fbSopenharmony_ci		if (vce_harvest_config == 0) {
542d722e3fbSopenharmony_ci			enc.two_instance = true;
543d722e3fbSopenharmony_ci			vce_taskinfo[2] = 0x83;
544d722e3fbSopenharmony_ci			vce_taskinfo[4] = 1;
545d722e3fbSopenharmony_ci			amdgpu_cs_vce_encode_idr(&enc);
546d722e3fbSopenharmony_ci			vce_taskinfo[2] = 0xffffffff;
547d722e3fbSopenharmony_ci			vce_taskinfo[4] = 2;
548d722e3fbSopenharmony_ci			amdgpu_cs_vce_encode_p(&enc);
549d722e3fbSopenharmony_ci			check_result(&enc);
550d722e3fbSopenharmony_ci		}
551d722e3fbSopenharmony_ci	} else {
552d722e3fbSopenharmony_ci		vce_taskinfo[3] = 3;
553d722e3fbSopenharmony_ci		vce_encode[16] = 0;
554d722e3fbSopenharmony_ci		amdgpu_cs_vce_encode_idr(&enc);
555d722e3fbSopenharmony_ci		amdgpu_cs_vce_encode_p(&enc);
556d722e3fbSopenharmony_ci		check_result(&enc);
557d722e3fbSopenharmony_ci	}
558d722e3fbSopenharmony_ci
559d722e3fbSopenharmony_ci	free_resource(&enc.fb[0]);
560d722e3fbSopenharmony_ci	free_resource(&enc.fb[1]);
561d722e3fbSopenharmony_ci	free_resource(&enc.bs[0]);
562d722e3fbSopenharmony_ci	free_resource(&enc.bs[1]);
563d722e3fbSopenharmony_ci	free_resource(&enc.vbuf);
564d722e3fbSopenharmony_ci	free_resource(&enc.cpb);
565d722e3fbSopenharmony_ci}
566d722e3fbSopenharmony_ci
567d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_mv(struct amdgpu_vce_encode *enc)
568d722e3fbSopenharmony_ci{
569d722e3fbSopenharmony_ci	uint64_t luma_offset, chroma_offset;
570d722e3fbSopenharmony_ci	uint64_t mv_ref_luma_offset;
571d722e3fbSopenharmony_ci	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
572d722e3fbSopenharmony_ci	unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
573d722e3fbSopenharmony_ci	int len = 0, i, r;
574d722e3fbSopenharmony_ci
575d722e3fbSopenharmony_ci	luma_offset = enc->vbuf.addr;
576d722e3fbSopenharmony_ci	chroma_offset = luma_offset + luma_size;
577d722e3fbSopenharmony_ci	mv_ref_luma_offset = enc->mvrefbuf.addr;
578d722e3fbSopenharmony_ci
579d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
580d722e3fbSopenharmony_ci	len += sizeof(vce_session) / 4;
581d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
582d722e3fbSopenharmony_ci	len += sizeof(vce_taskinfo) / 4;
583d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer));
584d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->bs[0].addr >> 32;
585d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->bs[0].addr;
586d722e3fbSopenharmony_ci	len += sizeof(vce_bs_buffer) / 4;
587d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer));
588d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->cpb.addr >> 32;
589d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->cpb.addr;
590d722e3fbSopenharmony_ci	len += sizeof(vce_context_buffer) / 4;
591d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
592d722e3fbSopenharmony_ci	for (i = 0; i <  8; ++i)
593d722e3fbSopenharmony_ci		ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
594d722e3fbSopenharmony_ci	for (i = 0; i <  8; ++i)
595d722e3fbSopenharmony_ci		ib_cpu[len + 10 + i] = luma_size * 1.5;
596d722e3fbSopenharmony_ci	len += sizeof(vce_aux_buffer) / 4;
597d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
598d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc->fb[0].addr >> 32;
599d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc->fb[0].addr;
600d722e3fbSopenharmony_ci	len += sizeof(vce_feedback) / 4;
601d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_mv_buffer, sizeof(vce_mv_buffer));
602d722e3fbSopenharmony_ci	ib_cpu[len + 2] = mv_ref_luma_offset >> 32;
603d722e3fbSopenharmony_ci	ib_cpu[len + 3] = mv_ref_luma_offset;
604d722e3fbSopenharmony_ci	ib_cpu[len + 4] = ALIGN(enc->width, align);
605d722e3fbSopenharmony_ci	ib_cpu[len + 5] = ALIGN(enc->width, align);
606d722e3fbSopenharmony_ci	ib_cpu[len + 6] = luma_size;
607d722e3fbSopenharmony_ci	ib_cpu[len + 7] = enc->mvb.addr >> 32;
608d722e3fbSopenharmony_ci	ib_cpu[len + 8] = enc->mvb.addr;
609d722e3fbSopenharmony_ci	len += sizeof(vce_mv_buffer) / 4;
610d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode));
611d722e3fbSopenharmony_ci	ib_cpu[len + 2] = 0;
612d722e3fbSopenharmony_ci	ib_cpu[len + 3] = 0;
613d722e3fbSopenharmony_ci	ib_cpu[len + 4] = 0x154000;
614d722e3fbSopenharmony_ci	ib_cpu[len + 9] = luma_offset >> 32;
615d722e3fbSopenharmony_ci	ib_cpu[len + 10] = luma_offset;
616d722e3fbSopenharmony_ci	ib_cpu[len + 11] = chroma_offset >> 32;
617d722e3fbSopenharmony_ci	ib_cpu[len + 12] = chroma_offset;
618d722e3fbSopenharmony_ci	ib_cpu[len + 13] = ALIGN(enc->height, 16);;
619d722e3fbSopenharmony_ci	ib_cpu[len + 14] = ALIGN(enc->width, align);
620d722e3fbSopenharmony_ci	ib_cpu[len + 15] = ALIGN(enc->width, align);
621d722e3fbSopenharmony_ci	/* encDisableMBOffloading-encDisableTwoPipeMode-encInputPicArrayMode-encInputPicAddrMode */
622d722e3fbSopenharmony_ci	ib_cpu[len + 16] = 0x01010000;
623d722e3fbSopenharmony_ci	ib_cpu[len + 18] = 0; /* encPicType */
624d722e3fbSopenharmony_ci	ib_cpu[len + 19] = 0; /* encIdrFlag */
625d722e3fbSopenharmony_ci	ib_cpu[len + 20] = 0; /* encIdrPicId */
626d722e3fbSopenharmony_ci	ib_cpu[len + 21] = 0; /* encMGSKeyPic */
627d722e3fbSopenharmony_ci	ib_cpu[len + 22] = 0; /* encReferenceFlag */
628d722e3fbSopenharmony_ci	ib_cpu[len + 23] = 0; /* encTemporalLayerIndex */
629d722e3fbSopenharmony_ci	ib_cpu[len + 55] = 0; /* pictureStructure */
630d722e3fbSopenharmony_ci	ib_cpu[len + 56] = 0; /* encPicType -ref[0] */
631d722e3fbSopenharmony_ci	ib_cpu[len + 61] = 0; /* pictureStructure */
632d722e3fbSopenharmony_ci	ib_cpu[len + 62] = 0; /* encPicType -ref[1] */
633d722e3fbSopenharmony_ci	ib_cpu[len + 67] = 0; /* pictureStructure */
634d722e3fbSopenharmony_ci	ib_cpu[len + 68] = 0; /* encPicType -ref1 */
635d722e3fbSopenharmony_ci	ib_cpu[len + 81] = 1; /* frameNumber */
636d722e3fbSopenharmony_ci	ib_cpu[len + 82] = 2; /* pictureOrderCount */
637d722e3fbSopenharmony_ci	ib_cpu[len + 83] = 0xffffffff; /* numIPicRemainInRCGOP */
638d722e3fbSopenharmony_ci	ib_cpu[len + 84] = 0xffffffff; /* numPPicRemainInRCGOP */
639d722e3fbSopenharmony_ci	ib_cpu[len + 85] = 0xffffffff; /* numBPicRemainInRCGOP */
640d722e3fbSopenharmony_ci	ib_cpu[len + 86] = 0xffffffff; /* numIRPicRemainInRCGOP */
641d722e3fbSopenharmony_ci	ib_cpu[len + 87] = 0; /* remainedIntraRefreshPictures */
642d722e3fbSopenharmony_ci	len += sizeof(vce_encode) / 4;
643d722e3fbSopenharmony_ci
644d722e3fbSopenharmony_ci	enc->ib_len = len;
645d722e3fbSopenharmony_ci	r = submit(len, AMDGPU_HW_IP_VCE);
646d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
647d722e3fbSopenharmony_ci}
648d722e3fbSopenharmony_ci
649d722e3fbSopenharmony_cistatic void check_mv_result(struct amdgpu_vce_encode *enc)
650d722e3fbSopenharmony_ci{
651d722e3fbSopenharmony_ci	uint64_t sum;
652d722e3fbSopenharmony_ci	uint32_t s = 140790;
653d722e3fbSopenharmony_ci	int j, r;
654d722e3fbSopenharmony_ci
655d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(enc->fb[0].handle, (void **)&enc->fb[0].ptr);
656d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
657d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(enc->fb[0].handle);
658d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
659d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(enc->mvb.handle, (void **)&enc->mvb.ptr);
660d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
661d722e3fbSopenharmony_ci	for (j = 0, sum = 0; j < enc->mvbuf_size; ++j)
662d722e3fbSopenharmony_ci		sum += enc->mvb.ptr[j];
663d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(sum, s);
664d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(enc->mvb.handle);
665d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
666d722e3fbSopenharmony_ci}
667d722e3fbSopenharmony_ci
668d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_mv(void)
669d722e3fbSopenharmony_ci{
670d722e3fbSopenharmony_ci	uint32_t vbuf_size, bs_size = 0x154000, cpb_size;
671d722e3fbSopenharmony_ci	unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
672d722e3fbSopenharmony_ci	int i, r;
673d722e3fbSopenharmony_ci
674d722e3fbSopenharmony_ci	vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
675d722e3fbSopenharmony_ci	enc.mvbuf_size = ALIGN(enc.width, 16) * ALIGN(enc.height, 16) / 8;
676d722e3fbSopenharmony_ci	cpb_size = vbuf_size * 10;
677d722e3fbSopenharmony_ci	num_resources = 0;
678d722e3fbSopenharmony_ci	alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
679d722e3fbSopenharmony_ci	resources[num_resources++] = enc.fb[0].handle;
680d722e3fbSopenharmony_ci	alloc_resource(&enc.bs[0], bs_size, AMDGPU_GEM_DOMAIN_GTT);
681d722e3fbSopenharmony_ci	resources[num_resources++] = enc.bs[0].handle;
682d722e3fbSopenharmony_ci	alloc_resource(&enc.mvb, enc.mvbuf_size, AMDGPU_GEM_DOMAIN_GTT);
683d722e3fbSopenharmony_ci	resources[num_resources++] = enc.mvb.handle;
684d722e3fbSopenharmony_ci	alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM);
685d722e3fbSopenharmony_ci	resources[num_resources++] = enc.vbuf.handle;
686d722e3fbSopenharmony_ci	alloc_resource(&enc.mvrefbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM);
687d722e3fbSopenharmony_ci	resources[num_resources++] = enc.mvrefbuf.handle;
688d722e3fbSopenharmony_ci	alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM);
689d722e3fbSopenharmony_ci	resources[num_resources++] = enc.cpb.handle;
690d722e3fbSopenharmony_ci	resources[num_resources++] = ib_handle;
691d722e3fbSopenharmony_ci
692d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
693d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
694d722e3fbSopenharmony_ci
695d722e3fbSopenharmony_ci	memset(enc.vbuf.ptr, 0, vbuf_size);
696d722e3fbSopenharmony_ci	for (i = 0; i < enc.height; ++i) {
697d722e3fbSopenharmony_ci		memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
698d722e3fbSopenharmony_ci		enc.vbuf.ptr += ALIGN(enc.width, align);
699d722e3fbSopenharmony_ci	}
700d722e3fbSopenharmony_ci	for (i = 0; i < enc.height / 2; ++i) {
701d722e3fbSopenharmony_ci		memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width);
702d722e3fbSopenharmony_ci		enc.vbuf.ptr += ALIGN(enc.width, align);
703d722e3fbSopenharmony_ci	}
704d722e3fbSopenharmony_ci
705d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
706d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
707d722e3fbSopenharmony_ci
708d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(enc.mvrefbuf.handle, (void **)&enc.mvrefbuf.ptr);
709d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
710d722e3fbSopenharmony_ci
711d722e3fbSopenharmony_ci	memset(enc.mvrefbuf.ptr, 0, vbuf_size);
712d722e3fbSopenharmony_ci	for (i = 0; i < enc.height; ++i) {
713d722e3fbSopenharmony_ci		memcpy(enc.mvrefbuf.ptr, (frame + (enc.height - i -1) * enc.width), enc.width);
714d722e3fbSopenharmony_ci		enc.mvrefbuf.ptr += ALIGN(enc.width, align);
715d722e3fbSopenharmony_ci	}
716d722e3fbSopenharmony_ci	for (i = 0; i < enc.height / 2; ++i) {
717d722e3fbSopenharmony_ci		memcpy(enc.mvrefbuf.ptr,
718d722e3fbSopenharmony_ci		((frame + enc.height * enc.width) + (enc.height / 2 - i -1) * enc.width), enc.width);
719d722e3fbSopenharmony_ci		enc.mvrefbuf.ptr += ALIGN(enc.width, align);
720d722e3fbSopenharmony_ci	}
721d722e3fbSopenharmony_ci
722d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(enc.mvrefbuf.handle);
723d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
724d722e3fbSopenharmony_ci
725d722e3fbSopenharmony_ci	amdgpu_cs_vce_config();
726d722e3fbSopenharmony_ci
727d722e3fbSopenharmony_ci	vce_taskinfo[3] = 3;
728d722e3fbSopenharmony_ci	amdgpu_cs_vce_mv(&enc);
729d722e3fbSopenharmony_ci	check_mv_result(&enc);
730d722e3fbSopenharmony_ci
731d722e3fbSopenharmony_ci	free_resource(&enc.fb[0]);
732d722e3fbSopenharmony_ci	free_resource(&enc.bs[0]);
733d722e3fbSopenharmony_ci	free_resource(&enc.vbuf);
734d722e3fbSopenharmony_ci	free_resource(&enc.cpb);
735d722e3fbSopenharmony_ci	free_resource(&enc.mvrefbuf);
736d722e3fbSopenharmony_ci	free_resource(&enc.mvb);
737d722e3fbSopenharmony_ci}
738d722e3fbSopenharmony_ci
739d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_destroy(void)
740d722e3fbSopenharmony_ci{
741d722e3fbSopenharmony_ci	int len, r;
742d722e3fbSopenharmony_ci
743d722e3fbSopenharmony_ci	num_resources  = 0;
744d722e3fbSopenharmony_ci	alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
745d722e3fbSopenharmony_ci	resources[num_resources++] = enc.fb[0].handle;
746d722e3fbSopenharmony_ci	resources[num_resources++] = ib_handle;
747d722e3fbSopenharmony_ci
748d722e3fbSopenharmony_ci	len = 0;
749d722e3fbSopenharmony_ci	memcpy(ib_cpu, vce_session, sizeof(vce_session));
750d722e3fbSopenharmony_ci	len += sizeof(vce_session) / 4;
751d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
752d722e3fbSopenharmony_ci	ib_cpu[len + 3] = 1;
753d722e3fbSopenharmony_ci	len += sizeof(vce_taskinfo) / 4;
754d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
755d722e3fbSopenharmony_ci	ib_cpu[len + 2] = enc.fb[0].addr >> 32;
756d722e3fbSopenharmony_ci	ib_cpu[len + 3] = enc.fb[0].addr;
757d722e3fbSopenharmony_ci	len += sizeof(vce_feedback) / 4;
758d722e3fbSopenharmony_ci	memcpy((ib_cpu + len), vce_destroy, sizeof(vce_destroy));
759d722e3fbSopenharmony_ci	len += sizeof(vce_destroy) / 4;
760d722e3fbSopenharmony_ci
761d722e3fbSopenharmony_ci	r = submit(len, AMDGPU_HW_IP_VCE);
762d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
763d722e3fbSopenharmony_ci
764d722e3fbSopenharmony_ci	free_resource(&enc.fb[0]);
765d722e3fbSopenharmony_ci}
766