1/* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdlib.h> 25#include <unistd.h> 26#include <sys/types.h> 27#include <sys/stat.h> 28#include <fcntl.h> 29#if HAVE_ALLOCA_H 30# include <alloca.h> 31#endif 32 33#include "CUnit/Basic.h" 34 35#include "amdgpu_test.h" 36#include "amdgpu_drm.h" 37#include "amdgpu_internal.h" 38#include "xf86drm.h" 39#include <pthread.h> 40 41#define GFX_COMPUTE_NOP 0xffff1000 42 43static amdgpu_device_handle device_handle; 44static uint32_t major_version; 45static uint32_t minor_version; 46static char *sysfs_remove = NULL; 47static bool do_cs; 48 49CU_BOOL suite_hotunplug_tests_enable(void) 50{ 51 CU_BOOL enable = CU_TRUE; 52 drmDevicePtr device; 53 54 if (drmGetDevice2(drm_amdgpu[0], DRM_DEVICE_GET_PCI_REVISION, &device)) { 55 printf("\n\nGPU Failed to get DRM device PCI info!\n"); 56 return CU_FALSE; 57 } 58 59 if (device->bustype != DRM_BUS_PCI) { 60 printf("\n\nGPU device is not on PCI bus!\n"); 61 amdgpu_device_deinitialize(device_handle); 62 return CU_FALSE; 63 } 64 65 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 66 &minor_version, &device_handle)) 67 return CU_FALSE; 68 69 /* Latest tested amdgpu version to work with all the tests */ 70 if (minor_version < 46) 71 enable = false; 72 73 if (amdgpu_device_deinitialize(device_handle)) 74 return CU_FALSE; 75 76 return enable; 77} 78 79int suite_hotunplug_tests_init(void) 80{ 81 /* We need to open/close device at each test manually */ 82 amdgpu_close_devices(); 83 84 return CUE_SUCCESS; 85} 86 87int suite_hotunplug_tests_clean(void) 88{ 89 90 91 return CUE_SUCCESS; 92} 93 94static int amdgpu_hotunplug_trigger(const char *pathname) 95{ 96 int fd, len; 97 98 fd = open(pathname, O_WRONLY); 99 if (fd < 0) 100 return -errno; 101 102 len = write(fd, "1", 1); 103 close(fd); 104 105 return len; 106} 107 108static int amdgpu_hotunplug_setup_test() 109{ 110 int r; 111 char *tmp_str; 112 113 if (amdgpu_open_device_on_test_index(open_render_node) < 0) { 114 printf("\n\n Failed to reopen device file!\n"); 115 return CUE_SINIT_FAILED; 116 117 118 119 } 120 121 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 122 &minor_version, &device_handle); 123 124 if (r) { 125 if ((r == -EACCES) && (errno == EACCES)) 126 printf("\n\nError:%s. " 127 "Hint:Try to run this test program as root.", 128 strerror(errno)); 129 return CUE_SINIT_FAILED; 130 } 131 132 tmp_str = amdgpu_get_device_from_fd(drm_amdgpu[0]); 133 if (!tmp_str){ 134 printf("\n\n Device path not found!\n"); 135 return CUE_SINIT_FAILED; 136 } 137 138 sysfs_remove = realloc(tmp_str, strlen(tmp_str) * 2); 139 strcat(sysfs_remove, "/remove"); 140 141 return 0; 142} 143 144static int amdgpu_hotunplug_teardown_test() 145{ 146 if (amdgpu_device_deinitialize(device_handle)) 147 return CUE_SCLEAN_FAILED; 148 149 amdgpu_close_devices(); 150 151 if (sysfs_remove) 152 free(sysfs_remove); 153 154 return 0; 155} 156 157static inline int amdgpu_hotunplug_remove() 158{ 159 return amdgpu_hotunplug_trigger(sysfs_remove); 160} 161 162static inline int amdgpu_hotunplug_rescan() 163{ 164 return amdgpu_hotunplug_trigger("/sys/bus/pci/rescan"); 165} 166 167static int amdgpu_cs_sync(amdgpu_context_handle context, 168 unsigned int ip_type, 169 int ring, 170 unsigned int seqno) 171{ 172 struct amdgpu_cs_fence fence = { 173 .context = context, 174 .ip_type = ip_type, 175 .ring = ring, 176 .fence = seqno, 177 }; 178 uint32_t expired; 179 180 return amdgpu_cs_query_fence_status(&fence, 181 AMDGPU_TIMEOUT_INFINITE, 182 0, &expired); 183} 184 185static void *amdgpu_nop_cs() 186{ 187 amdgpu_bo_handle ib_result_handle; 188 void *ib_result_cpu; 189 uint64_t ib_result_mc_address; 190 uint32_t *ptr; 191 int i, r; 192 amdgpu_bo_list_handle bo_list; 193 amdgpu_va_handle va_handle; 194 amdgpu_context_handle context; 195 struct amdgpu_cs_request ibs_request; 196 struct amdgpu_cs_ib_info ib_info; 197 198 r = amdgpu_cs_ctx_create(device_handle, &context); 199 CU_ASSERT_EQUAL(r, 0); 200 201 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 202 AMDGPU_GEM_DOMAIN_GTT, 0, 203 &ib_result_handle, &ib_result_cpu, 204 &ib_result_mc_address, &va_handle); 205 CU_ASSERT_EQUAL(r, 0); 206 207 ptr = ib_result_cpu; 208 for (i = 0; i < 16; ++i) 209 ptr[i] = GFX_COMPUTE_NOP; 210 211 r = amdgpu_bo_list_create(device_handle, 1, &ib_result_handle, NULL, &bo_list); 212 CU_ASSERT_EQUAL(r, 0); 213 214 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 215 ib_info.ib_mc_address = ib_result_mc_address; 216 ib_info.size = 16; 217 218 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 219 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 220 ibs_request.ring = 0; 221 ibs_request.number_of_ibs = 1; 222 ibs_request.ibs = &ib_info; 223 ibs_request.resources = bo_list; 224 225 while (do_cs) 226 amdgpu_cs_submit(context, 0, &ibs_request, 1); 227 228 amdgpu_cs_sync(context, AMDGPU_HW_IP_GFX, 0, ibs_request.seq_no); 229 amdgpu_bo_list_destroy(bo_list); 230 amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 231 ib_result_mc_address, 4096); 232 233 amdgpu_cs_ctx_free(context); 234 235 return (void *)0; 236} 237 238static pthread_t* amdgpu_create_cs_thread() 239{ 240 int r; 241 pthread_t *thread = malloc(sizeof(*thread)); 242 if (!thread) 243 return NULL; 244 245 do_cs = true; 246 247 r = pthread_create(thread, NULL, amdgpu_nop_cs, NULL); 248 CU_ASSERT_EQUAL(r, 0); 249 250 /* Give thread enough time to start*/ 251 usleep(100000); 252 return thread; 253} 254 255static void amdgpu_destroy_cs_thread(pthread_t *thread) 256{ 257 void *status; 258 259 do_cs = false; 260 261 pthread_join(*thread, &status); 262 CU_ASSERT_EQUAL(status, 0); 263 264 free(thread); 265} 266 267 268static void amdgpu_hotunplug_test(bool with_cs) 269{ 270 int r; 271 pthread_t *thread = NULL; 272 273 r = amdgpu_hotunplug_setup_test(); 274 CU_ASSERT_EQUAL(r , 0); 275 276 if (with_cs) { 277 thread = amdgpu_create_cs_thread(); 278 CU_ASSERT_NOT_EQUAL(thread, NULL); 279 } 280 281 r = amdgpu_hotunplug_remove(); 282 CU_ASSERT_EQUAL(r > 0, 1); 283 284 if (with_cs) 285 amdgpu_destroy_cs_thread(thread); 286 287 r = amdgpu_hotunplug_teardown_test(); 288 CU_ASSERT_EQUAL(r , 0); 289 290 r = amdgpu_hotunplug_rescan(); 291 CU_ASSERT_EQUAL(r > 0, 1); 292} 293 294static void amdgpu_hotunplug_simple(void) 295{ 296 amdgpu_hotunplug_test(false); 297} 298 299static void amdgpu_hotunplug_with_cs(void) 300{ 301 amdgpu_hotunplug_test(true); 302} 303 304static void amdgpu_hotunplug_with_exported_bo(void) 305{ 306 int r; 307 uint32_t dma_buf_fd; 308 unsigned int *ptr; 309 amdgpu_bo_handle bo_handle; 310 311 struct amdgpu_bo_alloc_request request = { 312 .alloc_size = 4096, 313 .phys_alignment = 4096, 314 .preferred_heap = AMDGPU_GEM_DOMAIN_GTT, 315 .flags = 0, 316 }; 317 318 r = amdgpu_hotunplug_setup_test(); 319 CU_ASSERT_EQUAL(r , 0); 320 321 amdgpu_bo_alloc(device_handle, &request, &bo_handle); 322 CU_ASSERT_EQUAL(r, 0); 323 324 r = amdgpu_bo_export(bo_handle, amdgpu_bo_handle_type_dma_buf_fd, &dma_buf_fd); 325 CU_ASSERT_EQUAL(r, 0); 326 327 ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, dma_buf_fd, 0); 328 CU_ASSERT_NOT_EQUAL(ptr, MAP_FAILED); 329 330 r = amdgpu_hotunplug_remove(); 331 CU_ASSERT_EQUAL(r > 0, 1); 332 333 amdgpu_bo_free(bo_handle); 334 335 r = amdgpu_hotunplug_teardown_test(); 336 CU_ASSERT_EQUAL(r , 0); 337 338 *ptr = 0xdeafbeef; 339 340 munmap(ptr, 4096); 341 close (dma_buf_fd); 342 343 r = amdgpu_hotunplug_rescan(); 344 CU_ASSERT_EQUAL(r > 0, 1); 345} 346 347static void amdgpu_hotunplug_with_exported_fence(void) 348{ 349 amdgpu_bo_handle ib_result_handle; 350 void *ib_result_cpu; 351 uint64_t ib_result_mc_address; 352 uint32_t *ptr, sync_obj_handle, sync_obj_handle2; 353 int i, r; 354 amdgpu_bo_list_handle bo_list; 355 amdgpu_va_handle va_handle; 356 uint32_t major2, minor2; 357 amdgpu_device_handle device2; 358 amdgpu_context_handle context; 359 struct amdgpu_cs_request ibs_request; 360 struct amdgpu_cs_ib_info ib_info; 361 struct amdgpu_cs_fence fence_status = {0}; 362 int shared_fd; 363 364 r = amdgpu_hotunplug_setup_test(); 365 CU_ASSERT_EQUAL(r , 0); 366 367 r = amdgpu_device_initialize(drm_amdgpu[1], &major2, &minor2, &device2); 368 CU_ASSERT_EQUAL(r, 0); 369 370 r = amdgpu_cs_ctx_create(device_handle, &context); 371 CU_ASSERT_EQUAL(r, 0); 372 373 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 374 AMDGPU_GEM_DOMAIN_GTT, 0, 375 &ib_result_handle, &ib_result_cpu, 376 &ib_result_mc_address, &va_handle); 377 CU_ASSERT_EQUAL(r, 0); 378 379 ptr = ib_result_cpu; 380 for (i = 0; i < 16; ++i) 381 ptr[i] = GFX_COMPUTE_NOP; 382 383 r = amdgpu_bo_list_create(device_handle, 1, &ib_result_handle, NULL, &bo_list); 384 CU_ASSERT_EQUAL(r, 0); 385 386 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 387 ib_info.ib_mc_address = ib_result_mc_address; 388 ib_info.size = 16; 389 390 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 391 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 392 ibs_request.ring = 0; 393 ibs_request.number_of_ibs = 1; 394 ibs_request.ibs = &ib_info; 395 ibs_request.resources = bo_list; 396 397 CU_ASSERT_EQUAL(amdgpu_cs_submit(context, 0, &ibs_request, 1), 0); 398 399 fence_status.context = context; 400 fence_status.ip_type = AMDGPU_HW_IP_GFX; 401 fence_status.ip_instance = 0; 402 fence_status.fence = ibs_request.seq_no; 403 404 CU_ASSERT_EQUAL(amdgpu_cs_fence_to_handle(device_handle, &fence_status, 405 AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ, 406 &sync_obj_handle), 407 0); 408 409 CU_ASSERT_EQUAL(amdgpu_cs_export_syncobj(device_handle, sync_obj_handle, &shared_fd), 0); 410 411 CU_ASSERT_EQUAL(amdgpu_cs_import_syncobj(device2, shared_fd, &sync_obj_handle2), 0); 412 413 CU_ASSERT_EQUAL(amdgpu_cs_destroy_syncobj(device_handle, sync_obj_handle), 0); 414 415 CU_ASSERT_EQUAL(amdgpu_bo_list_destroy(bo_list), 0); 416 CU_ASSERT_EQUAL(amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 417 ib_result_mc_address, 4096), 0); 418 CU_ASSERT_EQUAL(amdgpu_cs_ctx_free(context), 0); 419 420 r = amdgpu_hotunplug_remove(); 421 CU_ASSERT_EQUAL(r > 0, 1); 422 423 CU_ASSERT_EQUAL(amdgpu_cs_syncobj_wait(device2, &sync_obj_handle2, 1, 100000000, 0, NULL), 0); 424 425 CU_ASSERT_EQUAL(amdgpu_cs_destroy_syncobj(device2, sync_obj_handle2), 0); 426 427 amdgpu_device_deinitialize(device2); 428 429 r = amdgpu_hotunplug_teardown_test(); 430 CU_ASSERT_EQUAL(r , 0); 431 432 r = amdgpu_hotunplug_rescan(); 433 CU_ASSERT_EQUAL(r > 0, 1); 434} 435 436 437CU_TestInfo hotunplug_tests[] = { 438 { "Unplug card and rescan the bus to plug it back", amdgpu_hotunplug_simple }, 439 { "Same as first test but with command submission", amdgpu_hotunplug_with_cs }, 440 { "Unplug with exported bo", amdgpu_hotunplug_with_exported_bo }, 441 { "Unplug with exported fence", amdgpu_hotunplug_with_exported_fence }, 442 CU_TEST_INFO_NULL, 443}; 444