1d722e3fbSopenharmony_ci/*
2d722e3fbSopenharmony_ci * Copyright 2022 Advanced Micro Devices, Inc.
3d722e3fbSopenharmony_ci *
4d722e3fbSopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5d722e3fbSopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6d722e3fbSopenharmony_ci * to deal in the Software without restriction, including without limitation
7d722e3fbSopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8d722e3fbSopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9d722e3fbSopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10d722e3fbSopenharmony_ci *
11d722e3fbSopenharmony_ci * The above copyright notice and this permission notice shall be included in
12d722e3fbSopenharmony_ci * all copies or substantial portions of the Software.
13d722e3fbSopenharmony_ci *
14d722e3fbSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15d722e3fbSopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16d722e3fbSopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17d722e3fbSopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18d722e3fbSopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19d722e3fbSopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20d722e3fbSopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21d722e3fbSopenharmony_ci *
22d722e3fbSopenharmony_ci*/
23d722e3fbSopenharmony_ci
24d722e3fbSopenharmony_ci#include <unistd.h>
25d722e3fbSopenharmony_ci#include <stdio.h>
26d722e3fbSopenharmony_ci#include <stdlib.h>
27d722e3fbSopenharmony_ci#include <inttypes.h>
28d722e3fbSopenharmony_ci
29d722e3fbSopenharmony_ci#include "CUnit/Basic.h"
30d722e3fbSopenharmony_ci
31d722e3fbSopenharmony_ci#include "amdgpu_test.h"
32d722e3fbSopenharmony_ci#include "amdgpu_drm.h"
33d722e3fbSopenharmony_ci#include "amdgpu_internal.h"
34d722e3fbSopenharmony_ci
35d722e3fbSopenharmony_ci#define IB_SIZE 4096
36d722e3fbSopenharmony_ci#define MAX_RESOURCES 8
37d722e3fbSopenharmony_ci
38d722e3fbSopenharmony_ci#define DMA_SIZE 4097
39d722e3fbSopenharmony_ci#define DMA_DATA_BYTE 0xea
40d722e3fbSopenharmony_ci
41d722e3fbSopenharmony_cistatic bool do_p2p;
42d722e3fbSopenharmony_ci
43d722e3fbSopenharmony_cistatic amdgpu_device_handle executing_device_handle;
44d722e3fbSopenharmony_cistatic uint32_t executing_device_major_version;
45d722e3fbSopenharmony_cistatic uint32_t executing_device_minor_version;
46d722e3fbSopenharmony_ci
47d722e3fbSopenharmony_cistatic amdgpu_device_handle peer_exporting_device_handle;
48d722e3fbSopenharmony_cistatic uint32_t peer_exporting_device_major_version;
49d722e3fbSopenharmony_cistatic uint32_t peer_exporting_device_minor_version;
50d722e3fbSopenharmony_ci
51d722e3fbSopenharmony_cistatic amdgpu_context_handle context_handle;
52d722e3fbSopenharmony_cistatic amdgpu_bo_handle ib_handle;
53d722e3fbSopenharmony_cistatic uint32_t *ib_cpu;
54d722e3fbSopenharmony_cistatic uint64_t ib_mc_address;
55d722e3fbSopenharmony_cistatic amdgpu_va_handle ib_va_handle;
56d722e3fbSopenharmony_cistatic uint32_t num_dword;
57d722e3fbSopenharmony_ci
58d722e3fbSopenharmony_cistatic amdgpu_bo_handle resources[MAX_RESOURCES];
59d722e3fbSopenharmony_cistatic unsigned num_resources;
60d722e3fbSopenharmony_ci
61d722e3fbSopenharmony_cistatic uint8_t* reference_data;
62d722e3fbSopenharmony_ci
63d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_host_to_vram(void);
64d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_vram_to_host(void);
65d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p_vram_to_vram(void);
66d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p_host_to_vram(void);
67d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p_vram_to_host(void);
68d722e3fbSopenharmony_ci
69d722e3fbSopenharmony_ci/**
70d722e3fbSopenharmony_ci * Tests in cp dma test suite
71d722e3fbSopenharmony_ci */
72d722e3fbSopenharmony_ciCU_TestInfo cp_dma_tests[] = {
73d722e3fbSopenharmony_ci	{ "CP DMA write Host to VRAM",  amdgpu_cp_dma_host_to_vram },
74d722e3fbSopenharmony_ci	{ "CP DMA write VRAM to Host",  amdgpu_cp_dma_vram_to_host },
75d722e3fbSopenharmony_ci
76d722e3fbSopenharmony_ci	{ "Peer to Peer CP DMA write VRAM to VRAM",  amdgpu_cp_dma_p2p_vram_to_vram },
77d722e3fbSopenharmony_ci	{ "Peer to Peer CP DMA write Host to VRAM",  amdgpu_cp_dma_p2p_host_to_vram },
78d722e3fbSopenharmony_ci	{ "Peer to Peer CP DMA write VRAM to Host",  amdgpu_cp_dma_p2p_vram_to_host },
79d722e3fbSopenharmony_ci	CU_TEST_INFO_NULL,
80d722e3fbSopenharmony_ci};
81d722e3fbSopenharmony_ci
82d722e3fbSopenharmony_cistruct amdgpu_cp_dma_bo{
83d722e3fbSopenharmony_ci	amdgpu_bo_handle buf_handle;
84d722e3fbSopenharmony_ci	amdgpu_va_handle va_handle;
85d722e3fbSopenharmony_ci	uint64_t gpu_va;
86d722e3fbSopenharmony_ci	uint64_t size;
87d722e3fbSopenharmony_ci};
88d722e3fbSopenharmony_ci
89d722e3fbSopenharmony_cistatic int allocate_bo_and_va(amdgpu_device_handle dev,
90d722e3fbSopenharmony_ci		uint64_t size, uint64_t alignment,
91d722e3fbSopenharmony_ci		uint32_t heap, uint64_t alloc_flags,
92d722e3fbSopenharmony_ci		struct amdgpu_cp_dma_bo *bo) {
93d722e3fbSopenharmony_ci	struct amdgpu_bo_alloc_request request = {};
94d722e3fbSopenharmony_ci	amdgpu_bo_handle buf_handle;
95d722e3fbSopenharmony_ci	amdgpu_va_handle va_handle;
96d722e3fbSopenharmony_ci	uint64_t vmc_addr;
97d722e3fbSopenharmony_ci	int r;
98d722e3fbSopenharmony_ci
99d722e3fbSopenharmony_ci	request.alloc_size = size;
100d722e3fbSopenharmony_ci	request.phys_alignment = alignment;
101d722e3fbSopenharmony_ci	request.preferred_heap = heap;
102d722e3fbSopenharmony_ci	request.flags = alloc_flags;
103d722e3fbSopenharmony_ci
104d722e3fbSopenharmony_ci	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
105d722e3fbSopenharmony_ci	if (r)
106d722e3fbSopenharmony_ci		goto error_bo_alloc;
107d722e3fbSopenharmony_ci
108d722e3fbSopenharmony_ci	r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
109d722e3fbSopenharmony_ci			size, alignment, 0,
110d722e3fbSopenharmony_ci			&vmc_addr, &va_handle, 0);
111d722e3fbSopenharmony_ci	if (r)
112d722e3fbSopenharmony_ci		goto error_va_alloc;
113d722e3fbSopenharmony_ci
114d722e3fbSopenharmony_ci	r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr,
115d722e3fbSopenharmony_ci						AMDGPU_VM_PAGE_READABLE |
116d722e3fbSopenharmony_ci							AMDGPU_VM_PAGE_WRITEABLE |
117d722e3fbSopenharmony_ci							AMDGPU_VM_PAGE_EXECUTABLE,
118d722e3fbSopenharmony_ci						AMDGPU_VA_OP_MAP);
119d722e3fbSopenharmony_ci	if (r)
120d722e3fbSopenharmony_ci		goto error_va_map;
121d722e3fbSopenharmony_ci
122d722e3fbSopenharmony_ci	bo->buf_handle = buf_handle;
123d722e3fbSopenharmony_ci	bo->va_handle = va_handle;
124d722e3fbSopenharmony_ci	bo->gpu_va = vmc_addr;
125d722e3fbSopenharmony_ci	bo->size = size;
126d722e3fbSopenharmony_ci
127d722e3fbSopenharmony_ci	return 0;
128d722e3fbSopenharmony_ci
129d722e3fbSopenharmony_cierror_va_map:
130d722e3fbSopenharmony_ci	amdgpu_bo_va_op(buf_handle, 0,
131d722e3fbSopenharmony_ci			size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
132d722e3fbSopenharmony_ci
133d722e3fbSopenharmony_cierror_va_alloc:
134d722e3fbSopenharmony_ci	amdgpu_va_range_free(va_handle);
135d722e3fbSopenharmony_ci
136d722e3fbSopenharmony_cierror_bo_alloc:
137d722e3fbSopenharmony_ci	amdgpu_bo_free(buf_handle);
138d722e3fbSopenharmony_ci
139d722e3fbSopenharmony_ci	return r;
140d722e3fbSopenharmony_ci}
141d722e3fbSopenharmony_ci
142d722e3fbSopenharmony_cistatic int import_dma_buf_to_bo(amdgpu_device_handle dev,
143d722e3fbSopenharmony_ci		int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) {
144d722e3fbSopenharmony_ci	amdgpu_va_handle va_handle;
145d722e3fbSopenharmony_ci	uint64_t vmc_addr;
146d722e3fbSopenharmony_ci	int r;
147d722e3fbSopenharmony_ci	struct amdgpu_bo_import_result bo_import_result = {};
148d722e3fbSopenharmony_ci
149d722e3fbSopenharmony_ci	r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd,
150d722e3fbSopenharmony_ci			dmabuf_fd, &bo_import_result);
151d722e3fbSopenharmony_ci	if (r)
152d722e3fbSopenharmony_ci		goto error_bo_import;
153d722e3fbSopenharmony_ci
154d722e3fbSopenharmony_ci	r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
155d722e3fbSopenharmony_ci				bo_import_result.alloc_size, 0, 0,
156d722e3fbSopenharmony_ci				&vmc_addr, &va_handle, 0);
157d722e3fbSopenharmony_ci	if (r)
158d722e3fbSopenharmony_ci		goto error_va_alloc;
159d722e3fbSopenharmony_ci
160d722e3fbSopenharmony_ci	r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
161d722e3fbSopenharmony_ci			bo_import_result.alloc_size, vmc_addr,
162d722e3fbSopenharmony_ci			AMDGPU_VM_PAGE_READABLE |
163d722e3fbSopenharmony_ci				AMDGPU_VM_PAGE_WRITEABLE |
164d722e3fbSopenharmony_ci				AMDGPU_VM_PAGE_EXECUTABLE,
165d722e3fbSopenharmony_ci			AMDGPU_VA_OP_MAP);
166d722e3fbSopenharmony_ci	if (r)
167d722e3fbSopenharmony_ci		goto error_va_map;
168d722e3fbSopenharmony_ci
169d722e3fbSopenharmony_ci	bo->buf_handle = bo_import_result.buf_handle;
170d722e3fbSopenharmony_ci	bo->va_handle = va_handle;
171d722e3fbSopenharmony_ci	bo->gpu_va = vmc_addr;
172d722e3fbSopenharmony_ci	bo->size = bo_import_result.alloc_size;
173d722e3fbSopenharmony_ci
174d722e3fbSopenharmony_ci	return 0;
175d722e3fbSopenharmony_ci
176d722e3fbSopenharmony_cierror_va_map:
177d722e3fbSopenharmony_ci	amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
178d722e3fbSopenharmony_ci			bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
179d722e3fbSopenharmony_ci
180d722e3fbSopenharmony_cierror_va_alloc:
181d722e3fbSopenharmony_ci	amdgpu_va_range_free(va_handle);
182d722e3fbSopenharmony_ci
183d722e3fbSopenharmony_cierror_bo_import:
184d722e3fbSopenharmony_ci	amdgpu_bo_free(bo_import_result.buf_handle);
185d722e3fbSopenharmony_ci
186d722e3fbSopenharmony_ci	return r;
187d722e3fbSopenharmony_ci}
188d722e3fbSopenharmony_ci
189d722e3fbSopenharmony_cistatic int free_bo(struct amdgpu_cp_dma_bo bo) {
190d722e3fbSopenharmony_ci	int r;
191d722e3fbSopenharmony_ci	r = amdgpu_bo_va_op(bo.buf_handle, 0,
192d722e3fbSopenharmony_ci			bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP);
193d722e3fbSopenharmony_ci	if(r)
194d722e3fbSopenharmony_ci		return r;
195d722e3fbSopenharmony_ci
196d722e3fbSopenharmony_ci	r = amdgpu_va_range_free(bo.va_handle);
197d722e3fbSopenharmony_ci	if(r)
198d722e3fbSopenharmony_ci		return r;
199d722e3fbSopenharmony_ci
200d722e3fbSopenharmony_ci	r = amdgpu_bo_free(bo.buf_handle);
201d722e3fbSopenharmony_ci	if(r)
202d722e3fbSopenharmony_ci		return r;
203d722e3fbSopenharmony_ci
204d722e3fbSopenharmony_ci	return 0;
205d722e3fbSopenharmony_ci}
206d722e3fbSopenharmony_ci
207d722e3fbSopenharmony_cistatic int submit_and_sync() {
208d722e3fbSopenharmony_ci	struct amdgpu_cs_request ibs_request = {0};
209d722e3fbSopenharmony_ci	struct amdgpu_cs_ib_info ib_info = {0};
210d722e3fbSopenharmony_ci	struct amdgpu_cs_fence fence_status = {0};
211d722e3fbSopenharmony_ci	uint32_t expired;
212d722e3fbSopenharmony_ci	uint32_t family_id, chip_id, chip_rev;
213d722e3fbSopenharmony_ci	unsigned gc_ip_type;
214d722e3fbSopenharmony_ci	int r;
215d722e3fbSopenharmony_ci
216d722e3fbSopenharmony_ci	r = amdgpu_bo_list_create(executing_device_handle,
217d722e3fbSopenharmony_ci			num_resources, resources,
218d722e3fbSopenharmony_ci			NULL, &ibs_request.resources);
219d722e3fbSopenharmony_ci	if (r)
220d722e3fbSopenharmony_ci		return r;
221d722e3fbSopenharmony_ci
222d722e3fbSopenharmony_ci	family_id = executing_device_handle->info.family_id;
223d722e3fbSopenharmony_ci	chip_id = executing_device_handle->info.chip_external_rev;
224d722e3fbSopenharmony_ci	chip_rev = executing_device_handle->info.chip_rev;
225d722e3fbSopenharmony_ci
226d722e3fbSopenharmony_ci	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
227d722e3fbSopenharmony_ci		AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
228d722e3fbSopenharmony_ci
229d722e3fbSopenharmony_ci	ib_info.ib_mc_address = ib_mc_address;
230d722e3fbSopenharmony_ci	ib_info.size = num_dword;
231d722e3fbSopenharmony_ci
232d722e3fbSopenharmony_ci	ibs_request.ip_type = gc_ip_type;
233d722e3fbSopenharmony_ci	ibs_request.number_of_ibs = 1;
234d722e3fbSopenharmony_ci	ibs_request.ibs = &ib_info;
235d722e3fbSopenharmony_ci	ibs_request.fence_info.handle = NULL;
236d722e3fbSopenharmony_ci
237d722e3fbSopenharmony_ci	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
238d722e3fbSopenharmony_ci	if (r)
239d722e3fbSopenharmony_ci		return r;
240d722e3fbSopenharmony_ci
241d722e3fbSopenharmony_ci	r = amdgpu_bo_list_destroy(ibs_request.resources);
242d722e3fbSopenharmony_ci	if (r)
243d722e3fbSopenharmony_ci		return r;
244d722e3fbSopenharmony_ci
245d722e3fbSopenharmony_ci	fence_status.context = context_handle;
246d722e3fbSopenharmony_ci	fence_status.ip_type = gc_ip_type;
247d722e3fbSopenharmony_ci	fence_status.fence = ibs_request.seq_no;
248d722e3fbSopenharmony_ci
249d722e3fbSopenharmony_ci	r = amdgpu_cs_query_fence_status(&fence_status,
250d722e3fbSopenharmony_ci			AMDGPU_TIMEOUT_INFINITE,
251d722e3fbSopenharmony_ci			0, &expired);
252d722e3fbSopenharmony_ci	if (r)
253d722e3fbSopenharmony_ci		return r;
254d722e3fbSopenharmony_ci
255d722e3fbSopenharmony_ci	return 0;
256d722e3fbSopenharmony_ci}
257d722e3fbSopenharmony_ci
258d722e3fbSopenharmony_cistatic void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo,
259d722e3fbSopenharmony_ci		struct amdgpu_cp_dma_bo dst_bo) {
260d722e3fbSopenharmony_ci	_Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!");
261d722e3fbSopenharmony_ci
262d722e3fbSopenharmony_ci	ib_cpu[0] = 0xc0055000;
263d722e3fbSopenharmony_ci	ib_cpu[1] = 0x80000000;
264d722e3fbSopenharmony_ci	ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff;
265d722e3fbSopenharmony_ci	ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32;
266d722e3fbSopenharmony_ci	ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff;
267d722e3fbSopenharmony_ci	ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32;
268d722e3fbSopenharmony_ci	// size is read from the lower 26bits.
269d722e3fbSopenharmony_ci	ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE;
270d722e3fbSopenharmony_ci	ib_cpu[7] = 0xffff1000;
271d722e3fbSopenharmony_ci
272d722e3fbSopenharmony_ci	num_dword = 8;
273d722e3fbSopenharmony_ci
274d722e3fbSopenharmony_ci	resources[0] = src_bo.buf_handle;
275d722e3fbSopenharmony_ci	resources[1] = dst_bo.buf_handle;
276d722e3fbSopenharmony_ci	resources[2] = ib_handle;
277d722e3fbSopenharmony_ci	num_resources = 3;
278d722e3fbSopenharmony_ci}
279d722e3fbSopenharmony_ci
280d722e3fbSopenharmony_cistatic void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) {
281d722e3fbSopenharmony_ci	int r;
282d722e3fbSopenharmony_ci	struct amdgpu_cp_dma_bo src_bo = {0};
283d722e3fbSopenharmony_ci	struct amdgpu_cp_dma_bo dst_bo = {0};
284d722e3fbSopenharmony_ci	void *src_bo_cpu;
285d722e3fbSopenharmony_ci	void *dst_bo_cpu;
286d722e3fbSopenharmony_ci
287d722e3fbSopenharmony_ci	/* allocate the src bo, set its data to DMA_DATA_BYTE */
288d722e3fbSopenharmony_ci	r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
289d722e3fbSopenharmony_ci			src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
290d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
291d722e3fbSopenharmony_ci
292d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
293d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
294d722e3fbSopenharmony_ci	memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
295d722e3fbSopenharmony_ci
296d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
297d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
298d722e3fbSopenharmony_ci
299d722e3fbSopenharmony_ci	/* allocate the dst bo and clear its content to all 0 */
300d722e3fbSopenharmony_ci	r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
301d722e3fbSopenharmony_ci			dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo);
302d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
303d722e3fbSopenharmony_ci
304d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu);
305d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
306d722e3fbSopenharmony_ci
307d722e3fbSopenharmony_ci	_Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
308d722e3fbSopenharmony_ci	memset(dst_bo_cpu, 0, DMA_SIZE);
309d722e3fbSopenharmony_ci
310d722e3fbSopenharmony_ci	/* record CP DMA command and dispatch the command */
311d722e3fbSopenharmony_ci	cp_dma_cmd(src_bo, dst_bo);
312d722e3fbSopenharmony_ci
313d722e3fbSopenharmony_ci	r = submit_and_sync();
314d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
315d722e3fbSopenharmony_ci
316d722e3fbSopenharmony_ci	/* verify the dst bo is filled with DMA_DATA_BYTE */
317d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true);
318d722e3fbSopenharmony_ci
319d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle);
320d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
321d722e3fbSopenharmony_ci
322d722e3fbSopenharmony_ci	r = free_bo(src_bo);
323d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
324d722e3fbSopenharmony_ci
325d722e3fbSopenharmony_ci	r = free_bo(dst_bo);
326d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
327d722e3fbSopenharmony_ci}
328d722e3fbSopenharmony_ci
329d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) {
330d722e3fbSopenharmony_ci	int r;
331d722e3fbSopenharmony_ci	struct amdgpu_cp_dma_bo exported_bo = {0};
332d722e3fbSopenharmony_ci	int dma_buf_fd;
333d722e3fbSopenharmony_ci	int dma_buf_fd_dup;
334d722e3fbSopenharmony_ci	struct amdgpu_cp_dma_bo src_bo = {0};
335d722e3fbSopenharmony_ci	struct amdgpu_cp_dma_bo imported_dst_bo = {0};
336d722e3fbSopenharmony_ci	void *exported_bo_cpu;
337d722e3fbSopenharmony_ci	void *src_bo_cpu;
338d722e3fbSopenharmony_ci
339d722e3fbSopenharmony_ci	/* allocate a bo on the peer device and export it to dma-buf */
340d722e3fbSopenharmony_ci	r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096,
341d722e3fbSopenharmony_ci			src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo);
342d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
343d722e3fbSopenharmony_ci
344d722e3fbSopenharmony_ci	/* map the exported bo and clear its content to 0 */
345d722e3fbSopenharmony_ci	_Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
346d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu);
347d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
348d722e3fbSopenharmony_ci	memset(exported_bo_cpu, 0, DMA_SIZE);
349d722e3fbSopenharmony_ci
350d722e3fbSopenharmony_ci	r = amdgpu_bo_export(exported_bo.buf_handle,
351d722e3fbSopenharmony_ci			amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd);
352d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
353d722e3fbSopenharmony_ci
354d722e3fbSopenharmony_ci    // According to amdgpu_drm:
355d722e3fbSopenharmony_ci	// "Buffer must be "imported" only using new "fd"
356d722e3fbSopenharmony_ci	// (different from one used by "exporter")"
357d722e3fbSopenharmony_ci	dma_buf_fd_dup = dup(dma_buf_fd);
358d722e3fbSopenharmony_ci	r = close(dma_buf_fd);
359d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
360d722e3fbSopenharmony_ci
361d722e3fbSopenharmony_ci	/* import the dma-buf to the executing device, imported bo is the DMA destination */
362d722e3fbSopenharmony_ci	r = import_dma_buf_to_bo(
363d722e3fbSopenharmony_ci			executing_device_handle, dma_buf_fd_dup, &imported_dst_bo);
364d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
365d722e3fbSopenharmony_ci
366d722e3fbSopenharmony_ci	r = close(dma_buf_fd_dup);
367d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
368d722e3fbSopenharmony_ci
369d722e3fbSopenharmony_ci	/* allocate the src bo and set its content to DMA_DATA_BYTE */
370d722e3fbSopenharmony_ci	r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
371d722e3fbSopenharmony_ci			dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
372d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
373d722e3fbSopenharmony_ci
374d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
375d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
376d722e3fbSopenharmony_ci
377d722e3fbSopenharmony_ci	memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
378d722e3fbSopenharmony_ci
379d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
380d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
381d722e3fbSopenharmony_ci
382d722e3fbSopenharmony_ci	/* record CP DMA command and dispatch the command */
383d722e3fbSopenharmony_ci	cp_dma_cmd(src_bo, imported_dst_bo);
384d722e3fbSopenharmony_ci
385d722e3fbSopenharmony_ci	r = submit_and_sync();
386d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
387d722e3fbSopenharmony_ci
388d722e3fbSopenharmony_ci	/* verify the bo from the peer device is filled with DMA_DATA_BYTE */
389d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true);
390d722e3fbSopenharmony_ci
391d722e3fbSopenharmony_ci	r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle);
392d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
393d722e3fbSopenharmony_ci
394d722e3fbSopenharmony_ci	r = free_bo(exported_bo);
395d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
396d722e3fbSopenharmony_ci
397d722e3fbSopenharmony_ci	r = free_bo(imported_dst_bo);
398d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
399d722e3fbSopenharmony_ci
400d722e3fbSopenharmony_ci	r = free_bo(src_bo);
401d722e3fbSopenharmony_ci	CU_ASSERT_EQUAL(r, 0);
402d722e3fbSopenharmony_ci}
403d722e3fbSopenharmony_ci
404d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_host_to_vram(void) {
405d722e3fbSopenharmony_ci	amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
406d722e3fbSopenharmony_ci}
407d722e3fbSopenharmony_ci
408d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_vram_to_host(void) {
409d722e3fbSopenharmony_ci	amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
410d722e3fbSopenharmony_ci}
411d722e3fbSopenharmony_ci
412d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p_vram_to_vram(void) {
413d722e3fbSopenharmony_ci	amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM);
414d722e3fbSopenharmony_ci}
415d722e3fbSopenharmony_ci
416d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p_host_to_vram(void) {
417d722e3fbSopenharmony_ci	amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
418d722e3fbSopenharmony_ci}
419d722e3fbSopenharmony_ci
420d722e3fbSopenharmony_cistatic void amdgpu_cp_dma_p2p_vram_to_host(void) {
421d722e3fbSopenharmony_ci	amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
422d722e3fbSopenharmony_ci}
423d722e3fbSopenharmony_ci
424d722e3fbSopenharmony_ciint suite_cp_dma_tests_init() {
425d722e3fbSopenharmony_ci	int r;
426d722e3fbSopenharmony_ci
427d722e3fbSopenharmony_ci	r = amdgpu_device_initialize(drm_amdgpu[0],
428d722e3fbSopenharmony_ci			&executing_device_major_version,
429d722e3fbSopenharmony_ci			&executing_device_minor_version,
430d722e3fbSopenharmony_ci			&executing_device_handle);
431d722e3fbSopenharmony_ci	if (r)
432d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
433d722e3fbSopenharmony_ci
434d722e3fbSopenharmony_ci	r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle);
435d722e3fbSopenharmony_ci	if (r)
436d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
437d722e3fbSopenharmony_ci
438d722e3fbSopenharmony_ci	r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096,
439d722e3fbSopenharmony_ci					AMDGPU_GEM_DOMAIN_GTT, 0,
440d722e3fbSopenharmony_ci					&ib_handle, (void**)&ib_cpu,
441d722e3fbSopenharmony_ci					&ib_mc_address, &ib_va_handle);
442d722e3fbSopenharmony_ci	if (r)
443d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
444d722e3fbSopenharmony_ci
445d722e3fbSopenharmony_ci	if (do_p2p) {
446d722e3fbSopenharmony_ci		r = amdgpu_device_initialize(drm_amdgpu[1],
447d722e3fbSopenharmony_ci				&peer_exporting_device_major_version,
448d722e3fbSopenharmony_ci				&peer_exporting_device_minor_version,
449d722e3fbSopenharmony_ci				&peer_exporting_device_handle);
450d722e3fbSopenharmony_ci
451d722e3fbSopenharmony_ci		if (r)
452d722e3fbSopenharmony_ci			return CUE_SINIT_FAILED;
453d722e3fbSopenharmony_ci	}
454d722e3fbSopenharmony_ci
455d722e3fbSopenharmony_ci	reference_data = (uint8_t*)malloc(DMA_SIZE);
456d722e3fbSopenharmony_ci	if (!reference_data)
457d722e3fbSopenharmony_ci		return CUE_SINIT_FAILED;
458d722e3fbSopenharmony_ci	memset(reference_data, DMA_DATA_BYTE, DMA_SIZE);
459d722e3fbSopenharmony_ci
460d722e3fbSopenharmony_ci	return CUE_SUCCESS;
461d722e3fbSopenharmony_ci}
462d722e3fbSopenharmony_ci
463d722e3fbSopenharmony_ciint suite_cp_dma_tests_clean() {
464d722e3fbSopenharmony_ci	int r;
465d722e3fbSopenharmony_ci
466d722e3fbSopenharmony_ci	free(reference_data);
467d722e3fbSopenharmony_ci
468d722e3fbSopenharmony_ci	r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
469d722e3fbSopenharmony_ci				 ib_mc_address, IB_SIZE);
470d722e3fbSopenharmony_ci	if (r)
471d722e3fbSopenharmony_ci		return CUE_SCLEAN_FAILED;
472d722e3fbSopenharmony_ci
473d722e3fbSopenharmony_ci	r = amdgpu_cs_ctx_free(context_handle);
474d722e3fbSopenharmony_ci	if (r)
475d722e3fbSopenharmony_ci		return CUE_SCLEAN_FAILED;
476d722e3fbSopenharmony_ci
477d722e3fbSopenharmony_ci	r = amdgpu_device_deinitialize(executing_device_handle);
478d722e3fbSopenharmony_ci	if (r)
479d722e3fbSopenharmony_ci		return CUE_SCLEAN_FAILED;
480d722e3fbSopenharmony_ci
481d722e3fbSopenharmony_ci	if (do_p2p) {
482d722e3fbSopenharmony_ci		r = amdgpu_device_deinitialize(peer_exporting_device_handle);
483d722e3fbSopenharmony_ci		if (r)
484d722e3fbSopenharmony_ci			return CUE_SCLEAN_FAILED;
485d722e3fbSopenharmony_ci	}
486d722e3fbSopenharmony_ci
487d722e3fbSopenharmony_ci	return CUE_SUCCESS;
488d722e3fbSopenharmony_ci}
489d722e3fbSopenharmony_ci
490d722e3fbSopenharmony_ciCU_BOOL suite_cp_dma_tests_enable(void) {
491d722e3fbSopenharmony_ci	int r = 0;
492d722e3fbSopenharmony_ci
493d722e3fbSopenharmony_ci	if (amdgpu_device_initialize(drm_amdgpu[0],
494d722e3fbSopenharmony_ci			&executing_device_major_version,
495d722e3fbSopenharmony_ci			&executing_device_minor_version,
496d722e3fbSopenharmony_ci			&executing_device_handle))
497d722e3fbSopenharmony_ci		return CU_FALSE;
498d722e3fbSopenharmony_ci
499d722e3fbSopenharmony_ci	if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
500d722e3fbSopenharmony_ci			executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
501d722e3fbSopenharmony_ci		printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n");
502d722e3fbSopenharmony_ci		return CU_FALSE;
503d722e3fbSopenharmony_ci	}
504d722e3fbSopenharmony_ci
505d722e3fbSopenharmony_ci	if (amdgpu_device_deinitialize(executing_device_handle))
506d722e3fbSopenharmony_ci		return CU_FALSE;
507d722e3fbSopenharmony_ci
508d722e3fbSopenharmony_ci	if (drm_amdgpu[1] >= 0) {
509d722e3fbSopenharmony_ci		r = amdgpu_device_initialize(drm_amdgpu[1],
510d722e3fbSopenharmony_ci				&peer_exporting_device_major_version,
511d722e3fbSopenharmony_ci				&peer_exporting_device_minor_version,
512d722e3fbSopenharmony_ci				&peer_exporting_device_handle);
513d722e3fbSopenharmony_ci
514d722e3fbSopenharmony_ci		if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
515d722e3fbSopenharmony_ci						peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
516d722e3fbSopenharmony_ci			do_p2p = true;
517d722e3fbSopenharmony_ci		}
518d722e3fbSopenharmony_ci
519d722e3fbSopenharmony_ci		if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) {
520d722e3fbSopenharmony_ci			printf("Deinitialize peer_exporting_device_handle failed!\n");
521d722e3fbSopenharmony_ci			return CU_FALSE;
522d722e3fbSopenharmony_ci		}
523d722e3fbSopenharmony_ci	}
524d722e3fbSopenharmony_ci
525d722e3fbSopenharmony_ci	if (!do_p2p) {
526d722e3fbSopenharmony_ci		amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE);
527d722e3fbSopenharmony_ci		amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE);
528d722e3fbSopenharmony_ci		amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE);
529d722e3fbSopenharmony_ci		printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n");
530d722e3fbSopenharmony_ci	}
531d722e3fbSopenharmony_ci
532d722e3fbSopenharmony_ci	return CU_TRUE;
533d722e3fbSopenharmony_ci}
534