1/* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <unistd.h> 25#include <stdio.h> 26#include <stdlib.h> 27#include <inttypes.h> 28 29#include "CUnit/Basic.h" 30 31#include "amdgpu_test.h" 32#include "amdgpu_drm.h" 33#include "amdgpu_internal.h" 34 35#define IB_SIZE 4096 36#define MAX_RESOURCES 8 37 38#define DMA_SIZE 4097 39#define DMA_DATA_BYTE 0xea 40 41static bool do_p2p; 42 43static amdgpu_device_handle executing_device_handle; 44static uint32_t executing_device_major_version; 45static uint32_t executing_device_minor_version; 46 47static amdgpu_device_handle peer_exporting_device_handle; 48static uint32_t peer_exporting_device_major_version; 49static uint32_t peer_exporting_device_minor_version; 50 51static amdgpu_context_handle context_handle; 52static amdgpu_bo_handle ib_handle; 53static uint32_t *ib_cpu; 54static uint64_t ib_mc_address; 55static amdgpu_va_handle ib_va_handle; 56static uint32_t num_dword; 57 58static amdgpu_bo_handle resources[MAX_RESOURCES]; 59static unsigned num_resources; 60 61static uint8_t* reference_data; 62 63static void amdgpu_cp_dma_host_to_vram(void); 64static void amdgpu_cp_dma_vram_to_host(void); 65static void amdgpu_cp_dma_p2p_vram_to_vram(void); 66static void amdgpu_cp_dma_p2p_host_to_vram(void); 67static void amdgpu_cp_dma_p2p_vram_to_host(void); 68 69/** 70 * Tests in cp dma test suite 71 */ 72CU_TestInfo cp_dma_tests[] = { 73 { "CP DMA write Host to VRAM", amdgpu_cp_dma_host_to_vram }, 74 { "CP DMA write VRAM to Host", amdgpu_cp_dma_vram_to_host }, 75 76 { "Peer to Peer CP DMA write VRAM to VRAM", amdgpu_cp_dma_p2p_vram_to_vram }, 77 { "Peer to Peer CP DMA write Host to VRAM", amdgpu_cp_dma_p2p_host_to_vram }, 78 { "Peer to Peer CP DMA write VRAM to Host", amdgpu_cp_dma_p2p_vram_to_host }, 79 CU_TEST_INFO_NULL, 80}; 81 82struct amdgpu_cp_dma_bo{ 83 amdgpu_bo_handle buf_handle; 84 amdgpu_va_handle va_handle; 85 uint64_t gpu_va; 86 uint64_t size; 87}; 88 89static int allocate_bo_and_va(amdgpu_device_handle dev, 90 uint64_t size, uint64_t alignment, 91 uint32_t heap, uint64_t alloc_flags, 92 struct amdgpu_cp_dma_bo *bo) { 93 struct amdgpu_bo_alloc_request request = {}; 94 amdgpu_bo_handle buf_handle; 95 amdgpu_va_handle va_handle; 96 uint64_t vmc_addr; 97 int r; 98 99 request.alloc_size = size; 100 request.phys_alignment = alignment; 101 request.preferred_heap = heap; 102 request.flags = alloc_flags; 103 104 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 105 if (r) 106 goto error_bo_alloc; 107 108 r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general, 109 size, alignment, 0, 110 &vmc_addr, &va_handle, 0); 111 if (r) 112 goto error_va_alloc; 113 114 r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 115 AMDGPU_VM_PAGE_READABLE | 116 AMDGPU_VM_PAGE_WRITEABLE | 117 AMDGPU_VM_PAGE_EXECUTABLE, 118 AMDGPU_VA_OP_MAP); 119 if (r) 120 goto error_va_map; 121 122 bo->buf_handle = buf_handle; 123 bo->va_handle = va_handle; 124 bo->gpu_va = vmc_addr; 125 bo->size = size; 126 127 return 0; 128 129error_va_map: 130 amdgpu_bo_va_op(buf_handle, 0, 131 size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 132 133error_va_alloc: 134 amdgpu_va_range_free(va_handle); 135 136error_bo_alloc: 137 amdgpu_bo_free(buf_handle); 138 139 return r; 140} 141 142static int import_dma_buf_to_bo(amdgpu_device_handle dev, 143 int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) { 144 amdgpu_va_handle va_handle; 145 uint64_t vmc_addr; 146 int r; 147 struct amdgpu_bo_import_result bo_import_result = {}; 148 149 r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd, 150 dmabuf_fd, &bo_import_result); 151 if (r) 152 goto error_bo_import; 153 154 r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general, 155 bo_import_result.alloc_size, 0, 0, 156 &vmc_addr, &va_handle, 0); 157 if (r) 158 goto error_va_alloc; 159 160 r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0, 161 bo_import_result.alloc_size, vmc_addr, 162 AMDGPU_VM_PAGE_READABLE | 163 AMDGPU_VM_PAGE_WRITEABLE | 164 AMDGPU_VM_PAGE_EXECUTABLE, 165 AMDGPU_VA_OP_MAP); 166 if (r) 167 goto error_va_map; 168 169 bo->buf_handle = bo_import_result.buf_handle; 170 bo->va_handle = va_handle; 171 bo->gpu_va = vmc_addr; 172 bo->size = bo_import_result.alloc_size; 173 174 return 0; 175 176error_va_map: 177 amdgpu_bo_va_op(bo_import_result.buf_handle, 0, 178 bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 179 180error_va_alloc: 181 amdgpu_va_range_free(va_handle); 182 183error_bo_import: 184 amdgpu_bo_free(bo_import_result.buf_handle); 185 186 return r; 187} 188 189static int free_bo(struct amdgpu_cp_dma_bo bo) { 190 int r; 191 r = amdgpu_bo_va_op(bo.buf_handle, 0, 192 bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP); 193 if(r) 194 return r; 195 196 r = amdgpu_va_range_free(bo.va_handle); 197 if(r) 198 return r; 199 200 r = amdgpu_bo_free(bo.buf_handle); 201 if(r) 202 return r; 203 204 return 0; 205} 206 207static int submit_and_sync() { 208 struct amdgpu_cs_request ibs_request = {0}; 209 struct amdgpu_cs_ib_info ib_info = {0}; 210 struct amdgpu_cs_fence fence_status = {0}; 211 uint32_t expired; 212 uint32_t family_id, chip_id, chip_rev; 213 unsigned gc_ip_type; 214 int r; 215 216 r = amdgpu_bo_list_create(executing_device_handle, 217 num_resources, resources, 218 NULL, &ibs_request.resources); 219 if (r) 220 return r; 221 222 family_id = executing_device_handle->info.family_id; 223 chip_id = executing_device_handle->info.chip_external_rev; 224 chip_rev = executing_device_handle->info.chip_rev; 225 226 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 227 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 228 229 ib_info.ib_mc_address = ib_mc_address; 230 ib_info.size = num_dword; 231 232 ibs_request.ip_type = gc_ip_type; 233 ibs_request.number_of_ibs = 1; 234 ibs_request.ibs = &ib_info; 235 ibs_request.fence_info.handle = NULL; 236 237 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 238 if (r) 239 return r; 240 241 r = amdgpu_bo_list_destroy(ibs_request.resources); 242 if (r) 243 return r; 244 245 fence_status.context = context_handle; 246 fence_status.ip_type = gc_ip_type; 247 fence_status.fence = ibs_request.seq_no; 248 249 r = amdgpu_cs_query_fence_status(&fence_status, 250 AMDGPU_TIMEOUT_INFINITE, 251 0, &expired); 252 if (r) 253 return r; 254 255 return 0; 256} 257 258static void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo, 259 struct amdgpu_cp_dma_bo dst_bo) { 260 _Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!"); 261 262 ib_cpu[0] = 0xc0055000; 263 ib_cpu[1] = 0x80000000; 264 ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff; 265 ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32; 266 ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff; 267 ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32; 268 // size is read from the lower 26bits. 269 ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE; 270 ib_cpu[7] = 0xffff1000; 271 272 num_dword = 8; 273 274 resources[0] = src_bo.buf_handle; 275 resources[1] = dst_bo.buf_handle; 276 resources[2] = ib_handle; 277 num_resources = 3; 278} 279 280static void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) { 281 int r; 282 struct amdgpu_cp_dma_bo src_bo = {0}; 283 struct amdgpu_cp_dma_bo dst_bo = {0}; 284 void *src_bo_cpu; 285 void *dst_bo_cpu; 286 287 /* allocate the src bo, set its data to DMA_DATA_BYTE */ 288 r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096, 289 src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo); 290 CU_ASSERT_EQUAL(r, 0); 291 292 r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu); 293 CU_ASSERT_EQUAL(r, 0); 294 memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE); 295 296 r = amdgpu_bo_cpu_unmap(src_bo.buf_handle); 297 CU_ASSERT_EQUAL(r, 0); 298 299 /* allocate the dst bo and clear its content to all 0 */ 300 r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096, 301 dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo); 302 CU_ASSERT_EQUAL(r, 0); 303 304 r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu); 305 CU_ASSERT_EQUAL(r, 0); 306 307 _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!"); 308 memset(dst_bo_cpu, 0, DMA_SIZE); 309 310 /* record CP DMA command and dispatch the command */ 311 cp_dma_cmd(src_bo, dst_bo); 312 313 r = submit_and_sync(); 314 CU_ASSERT_EQUAL(r, 0); 315 316 /* verify the dst bo is filled with DMA_DATA_BYTE */ 317 CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true); 318 319 r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle); 320 CU_ASSERT_EQUAL(r, 0); 321 322 r = free_bo(src_bo); 323 CU_ASSERT_EQUAL(r, 0); 324 325 r = free_bo(dst_bo); 326 CU_ASSERT_EQUAL(r, 0); 327} 328 329static void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) { 330 int r; 331 struct amdgpu_cp_dma_bo exported_bo = {0}; 332 int dma_buf_fd; 333 int dma_buf_fd_dup; 334 struct amdgpu_cp_dma_bo src_bo = {0}; 335 struct amdgpu_cp_dma_bo imported_dst_bo = {0}; 336 void *exported_bo_cpu; 337 void *src_bo_cpu; 338 339 /* allocate a bo on the peer device and export it to dma-buf */ 340 r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096, 341 src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo); 342 CU_ASSERT_EQUAL(r, 0); 343 344 /* map the exported bo and clear its content to 0 */ 345 _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!"); 346 r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu); 347 CU_ASSERT_EQUAL(r, 0); 348 memset(exported_bo_cpu, 0, DMA_SIZE); 349 350 r = amdgpu_bo_export(exported_bo.buf_handle, 351 amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd); 352 CU_ASSERT_EQUAL(r, 0); 353 354 // According to amdgpu_drm: 355 // "Buffer must be "imported" only using new "fd" 356 // (different from one used by "exporter")" 357 dma_buf_fd_dup = dup(dma_buf_fd); 358 r = close(dma_buf_fd); 359 CU_ASSERT_EQUAL(r, 0); 360 361 /* import the dma-buf to the executing device, imported bo is the DMA destination */ 362 r = import_dma_buf_to_bo( 363 executing_device_handle, dma_buf_fd_dup, &imported_dst_bo); 364 CU_ASSERT_EQUAL(r, 0); 365 366 r = close(dma_buf_fd_dup); 367 CU_ASSERT_EQUAL(r, 0); 368 369 /* allocate the src bo and set its content to DMA_DATA_BYTE */ 370 r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096, 371 dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo); 372 CU_ASSERT_EQUAL(r, 0); 373 374 r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu); 375 CU_ASSERT_EQUAL(r, 0); 376 377 memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE); 378 379 r = amdgpu_bo_cpu_unmap(src_bo.buf_handle); 380 CU_ASSERT_EQUAL(r, 0); 381 382 /* record CP DMA command and dispatch the command */ 383 cp_dma_cmd(src_bo, imported_dst_bo); 384 385 r = submit_and_sync(); 386 CU_ASSERT_EQUAL(r, 0); 387 388 /* verify the bo from the peer device is filled with DMA_DATA_BYTE */ 389 CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true); 390 391 r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle); 392 CU_ASSERT_EQUAL(r, 0); 393 394 r = free_bo(exported_bo); 395 CU_ASSERT_EQUAL(r, 0); 396 397 r = free_bo(imported_dst_bo); 398 CU_ASSERT_EQUAL(r, 0); 399 400 r = free_bo(src_bo); 401 CU_ASSERT_EQUAL(r, 0); 402} 403 404static void amdgpu_cp_dma_host_to_vram(void) { 405 amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM); 406} 407 408static void amdgpu_cp_dma_vram_to_host(void) { 409 amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT); 410} 411 412static void amdgpu_cp_dma_p2p_vram_to_vram(void) { 413 amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM); 414} 415 416static void amdgpu_cp_dma_p2p_host_to_vram(void) { 417 amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM); 418} 419 420static void amdgpu_cp_dma_p2p_vram_to_host(void) { 421 amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT); 422} 423 424int suite_cp_dma_tests_init() { 425 int r; 426 427 r = amdgpu_device_initialize(drm_amdgpu[0], 428 &executing_device_major_version, 429 &executing_device_minor_version, 430 &executing_device_handle); 431 if (r) 432 return CUE_SINIT_FAILED; 433 434 r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle); 435 if (r) 436 return CUE_SINIT_FAILED; 437 438 r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096, 439 AMDGPU_GEM_DOMAIN_GTT, 0, 440 &ib_handle, (void**)&ib_cpu, 441 &ib_mc_address, &ib_va_handle); 442 if (r) 443 return CUE_SINIT_FAILED; 444 445 if (do_p2p) { 446 r = amdgpu_device_initialize(drm_amdgpu[1], 447 &peer_exporting_device_major_version, 448 &peer_exporting_device_minor_version, 449 &peer_exporting_device_handle); 450 451 if (r) 452 return CUE_SINIT_FAILED; 453 } 454 455 reference_data = (uint8_t*)malloc(DMA_SIZE); 456 if (!reference_data) 457 return CUE_SINIT_FAILED; 458 memset(reference_data, DMA_DATA_BYTE, DMA_SIZE); 459 460 return CUE_SUCCESS; 461} 462 463int suite_cp_dma_tests_clean() { 464 int r; 465 466 free(reference_data); 467 468 r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, 469 ib_mc_address, IB_SIZE); 470 if (r) 471 return CUE_SCLEAN_FAILED; 472 473 r = amdgpu_cs_ctx_free(context_handle); 474 if (r) 475 return CUE_SCLEAN_FAILED; 476 477 r = amdgpu_device_deinitialize(executing_device_handle); 478 if (r) 479 return CUE_SCLEAN_FAILED; 480 481 if (do_p2p) { 482 r = amdgpu_device_deinitialize(peer_exporting_device_handle); 483 if (r) 484 return CUE_SCLEAN_FAILED; 485 } 486 487 return CUE_SUCCESS; 488} 489 490CU_BOOL suite_cp_dma_tests_enable(void) { 491 int r = 0; 492 493 if (amdgpu_device_initialize(drm_amdgpu[0], 494 &executing_device_major_version, 495 &executing_device_minor_version, 496 &executing_device_handle)) 497 return CU_FALSE; 498 499 if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI && 500 executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) { 501 printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n"); 502 return CU_FALSE; 503 } 504 505 if (amdgpu_device_deinitialize(executing_device_handle)) 506 return CU_FALSE; 507 508 if (drm_amdgpu[1] >= 0) { 509 r = amdgpu_device_initialize(drm_amdgpu[1], 510 &peer_exporting_device_major_version, 511 &peer_exporting_device_minor_version, 512 &peer_exporting_device_handle); 513 514 if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI && 515 peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) { 516 do_p2p = true; 517 } 518 519 if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) { 520 printf("Deinitialize peer_exporting_device_handle failed!\n"); 521 return CU_FALSE; 522 } 523 } 524 525 if (!do_p2p) { 526 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE); 527 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE); 528 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE); 529 printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n"); 530 } 531 532 return CU_TRUE; 533} 534