1/* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdio.h> 25#include <sys/types.h> 26#include <sys/stat.h> 27#include <fcntl.h> 28#include <stdarg.h> 29#include <string.h> 30#include <errno.h> 31#include <unistd.h> 32#include <stdlib.h> 33 34#include "drm.h" 35#include "xf86drmMode.h" 36#include "xf86drm.h" 37#include "amdgpu.h" 38#include "amdgpu_drm.h" 39#include "amdgpu_internal.h" 40 41#define MAX_CARDS_SUPPORTED 4 42#define NUM_BUFFER_OBJECTS 1024 43 44#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 45 (((sub_op) & 0xFF) << 8) | \ 46 (((op) & 0xFF) << 0)) 47 48#define SDMA_OPCODE_COPY 1 49# define SDMA_COPY_SUB_OPCODE_LINEAR 0 50 51 52#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 53 (((b) & 0x1) << 26) | \ 54 (((t) & 0x1) << 23) | \ 55 (((s) & 0x1) << 22) | \ 56 (((cnt) & 0xFFFFF) << 0)) 57#define SDMA_OPCODE_COPY_SI 3 58 59 60/** Help string for command line parameters */ 61static const char usage[] = 62 "Usage: %s [-?h] [-b v|g|vg size] " 63 "[-c from to size count]\n" 64 "where:\n" 65 " b - Allocate a BO in VRAM, GTT or VRAM|GTT of size bytes.\n" 66 " This flag can be used multiple times. The first bo will\n" 67 " have id `1`, then second id `2`, ...\n" 68 " c - Copy size bytes from BO (bo_id1) to BO (bo_id2), count times\n" 69 " h - Display this help\n" 70 "\n" 71 "Sizes can be postfixes with k, m or g for kilo, mega and gigabyte scaling\n"; 72 73/** Specified options strings for getopt */ 74static const char options[] = "?hb:c:"; 75 76/* Open AMD devices. 77 * Returns the fd of the first device it could open. 78 */ 79static int amdgpu_open_device(void) 80{ 81 drmDevicePtr devices[MAX_CARDS_SUPPORTED]; 82 unsigned int i; 83 int drm_count; 84 85 drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED); 86 if (drm_count < 0) { 87 fprintf(stderr, "drmGetDevices2() returned an error %d\n", 88 drm_count); 89 return drm_count; 90 } 91 92 for (i = 0; i < drm_count; i++) { 93 drmVersionPtr version; 94 int fd; 95 96 /* If this is not PCI device, skip*/ 97 if (devices[i]->bustype != DRM_BUS_PCI) 98 continue; 99 100 /* If this is not AMD GPU vender ID, skip*/ 101 if (devices[i]->deviceinfo.pci->vendor_id != 0x1002) 102 continue; 103 104 if (!(devices[i]->available_nodes & 1 << DRM_NODE_RENDER)) 105 continue; 106 107 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC); 108 109 /* This node is not available. */ 110 if (fd < 0) continue; 111 112 version = drmGetVersion(fd); 113 if (!version) { 114 fprintf(stderr, 115 "Warning: Cannot get version for %s." 116 "Error is %s\n", 117 devices[i]->nodes[DRM_NODE_RENDER], 118 strerror(errno)); 119 close(fd); 120 continue; 121 } 122 123 if (strcmp(version->name, "amdgpu")) { 124 /* This is not AMDGPU driver, skip.*/ 125 drmFreeVersion(version); 126 close(fd); 127 continue; 128 } 129 130 drmFreeVersion(version); 131 drmFreeDevices(devices, drm_count); 132 return fd; 133 } 134 135 return -1; 136} 137 138amdgpu_device_handle device_handle; 139amdgpu_context_handle context_handle; 140 141amdgpu_bo_handle resources[NUM_BUFFER_OBJECTS]; 142uint64_t virtual[NUM_BUFFER_OBJECTS]; 143unsigned int num_buffers; 144uint32_t *pm4; 145 146int alloc_bo(uint32_t domain, uint64_t size) 147{ 148 struct amdgpu_bo_alloc_request request = {}; 149 amdgpu_bo_handle bo; 150 amdgpu_va_handle va; 151 uint64_t addr; 152 int r; 153 154 if (num_buffers >= NUM_BUFFER_OBJECTS) 155 return -ENOSPC; 156 157 request.alloc_size = size; 158 request.phys_alignment = 0; 159 request.preferred_heap = domain; 160 request.flags = 0; 161 r = amdgpu_bo_alloc(device_handle, &request, &bo); 162 if (r) 163 return r; 164 165 r = amdgpu_va_range_alloc(device_handle, amdgpu_gpu_va_range_general, 166 size, 0, 0, &addr, &va, 0); 167 if (r) 168 return r; 169 170 r = amdgpu_bo_va_op_raw(device_handle, bo, 0, size, addr, 171 AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | 172 AMDGPU_VM_PAGE_EXECUTABLE, AMDGPU_VA_OP_MAP); 173 if (r) 174 return r; 175 176 resources[num_buffers] = bo; 177 virtual[num_buffers] = addr; 178 fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size %lu\n", 179 num_buffers++, addr, domain, size); 180 return 0; 181} 182 183int submit_ib(uint32_t from, uint32_t to, uint64_t size, uint32_t count) 184{ 185 struct amdgpu_cs_request ibs_request; 186 struct amdgpu_cs_fence fence_status; 187 struct amdgpu_cs_ib_info ib_info; 188 uint64_t copied = size, delta; 189 struct timespec start, stop; 190 191 uint64_t src = virtual[from]; 192 uint64_t dst = virtual[to]; 193 uint32_t expired; 194 int i, r; 195 196 i = 0; 197 while (size) { 198 uint64_t bytes = size < 0x40000 ? size : 0x40000; 199 200 if (device_handle->info.family_id == AMDGPU_FAMILY_SI) { 201 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 202 bytes); 203 pm4[i++] = 0xffffffff & dst; 204 pm4[i++] = 0xffffffff & src; 205 pm4[i++] = (0xffffffff00000000 & dst) >> 32; 206 pm4[i++] = (0xffffffff00000000 & src) >> 32; 207 } else { 208 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 209 SDMA_COPY_SUB_OPCODE_LINEAR, 210 0); 211 if ( device_handle->info.family_id >= AMDGPU_FAMILY_AI) 212 pm4[i++] = bytes - 1; 213 else 214 pm4[i++] = bytes; 215 pm4[i++] = 0; 216 pm4[i++] = 0xffffffff & src; 217 pm4[i++] = (0xffffffff00000000 & src) >> 32; 218 pm4[i++] = 0xffffffff & dst; 219 pm4[i++] = (0xffffffff00000000 & dst) >> 32; 220 } 221 222 size -= bytes; 223 src += bytes; 224 dst += bytes; 225 } 226 227 memset(&ib_info, 0, sizeof(ib_info)); 228 ib_info.ib_mc_address = virtual[0]; 229 ib_info.size = i; 230 231 memset(&ibs_request, 0, sizeof(ibs_request)); 232 ibs_request.ip_type = AMDGPU_HW_IP_DMA; 233 ibs_request.ring = 0; 234 ibs_request.number_of_ibs = 1; 235 ibs_request.ibs = &ib_info; 236 ibs_request.fence_info.handle = NULL; 237 238 r = clock_gettime(CLOCK_MONOTONIC, &start); 239 if (r) 240 return errno; 241 242 r = amdgpu_bo_list_create(device_handle, num_buffers, resources, NULL, 243 &ibs_request.resources); 244 if (r) 245 return r; 246 247 for (i = 0; i < count; ++i) { 248 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 249 if (r) 250 return r; 251 } 252 253 r = amdgpu_bo_list_destroy(ibs_request.resources); 254 if (r) 255 return r; 256 257 memset(&fence_status, 0, sizeof(fence_status)); 258 fence_status.ip_type = ibs_request.ip_type; 259 fence_status.ip_instance = 0; 260 fence_status.ring = ibs_request.ring; 261 fence_status.context = context_handle; 262 fence_status.fence = ibs_request.seq_no; 263 r = amdgpu_cs_query_fence_status(&fence_status, 264 AMDGPU_TIMEOUT_INFINITE, 265 0, &expired); 266 if (r) 267 return r; 268 269 r = clock_gettime(CLOCK_MONOTONIC, &stop); 270 if (r) 271 return errno; 272 273 delta = stop.tv_nsec + stop.tv_sec * 1000000000UL; 274 delta -= start.tv_nsec + start.tv_sec * 1000000000UL; 275 276 fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", 277 count, from, virtual[from], to, virtual[to], copied, delta / 1000); 278 return 0; 279} 280 281void next_arg(int argc, char **argv, const char *msg) 282{ 283 optarg = argv[optind++]; 284 if (optind > argc || optarg[0] == '-') { 285 fprintf(stderr, "%s\n", msg); 286 exit(EXIT_FAILURE); 287 } 288} 289 290uint64_t parse_size(void) 291{ 292 uint64_t size; 293 char ext[2]; 294 295 ext[0] = 0; 296 if (sscanf(optarg, "%li%1[kmgKMG]", &size, ext) < 1) { 297 fprintf(stderr, "Can't parse size arg: %s\n", optarg); 298 exit(EXIT_FAILURE); 299 } 300 switch (ext[0]) { 301 case 'k': 302 case 'K': 303 size *= 1024; 304 break; 305 case 'm': 306 case 'M': 307 size *= 1024 * 1024; 308 break; 309 case 'g': 310 case 'G': 311 size *= 1024 * 1024 * 1024; 312 break; 313 default: 314 break; 315 } 316 return size; 317} 318 319int main(int argc, char **argv) 320{ 321 uint32_t major_version, minor_version; 322 uint32_t domain, from, to, count; 323 uint64_t size; 324 int fd, r, c; 325 326 fd = amdgpu_open_device(); 327 if (fd < 0) { 328 perror("Cannot open AMDGPU device"); 329 exit(EXIT_FAILURE); 330 } 331 332 r = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle); 333 if (r) { 334 fprintf(stderr, "amdgpu_device_initialize returned %d\n", r); 335 exit(EXIT_FAILURE); 336 } 337 338 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 339 if (r) { 340 fprintf(stderr, "amdgpu_cs_ctx_create returned %d\n", r); 341 exit(EXIT_FAILURE); 342 } 343 344 if (argc == 1) { 345 fprintf(stderr, usage, argv[0]); 346 exit(EXIT_FAILURE); 347 } 348 349 r = alloc_bo(AMDGPU_GEM_DOMAIN_GTT, 2ULL * 1024 * 1024); 350 if (r) { 351 fprintf(stderr, "Buffer allocation failed with %d\n", r); 352 exit(EXIT_FAILURE); 353 } 354 355 r = amdgpu_bo_cpu_map(resources[0], (void **)&pm4); 356 if (r) { 357 fprintf(stderr, "Buffer mapping failed with %d\n", r); 358 exit(EXIT_FAILURE); 359 } 360 361 opterr = 0; 362 while ((c = getopt(argc, argv, options)) != -1) { 363 switch (c) { 364 case 'b': 365 if (!strcmp(optarg, "v")) 366 domain = AMDGPU_GEM_DOMAIN_VRAM; 367 else if (!strcmp(optarg, "g")) 368 domain = AMDGPU_GEM_DOMAIN_GTT; 369 else if (!strcmp(optarg, "vg")) 370 domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT; 371 else { 372 fprintf(stderr, "Invalid domain: %s\n", optarg); 373 exit(EXIT_FAILURE); 374 } 375 next_arg(argc, argv, "Missing buffer size"); 376 size = parse_size(); 377 if (size < getpagesize()) { 378 fprintf(stderr, "Buffer size to small %lu\n", size); 379 exit(EXIT_FAILURE); 380 } 381 r = alloc_bo(domain, size); 382 if (r) { 383 fprintf(stderr, "Buffer allocation failed with %d\n", r); 384 exit(EXIT_FAILURE); 385 } 386 break; 387 case 'c': 388 if (sscanf(optarg, "%u", &from) != 1) { 389 fprintf(stderr, "Can't parse from buffer: %s\n", optarg); 390 exit(EXIT_FAILURE); 391 } 392 next_arg(argc, argv, "Missing to buffer"); 393 if (sscanf(optarg, "%u", &to) != 1) { 394 fprintf(stderr, "Can't parse to buffer: %s\n", optarg); 395 exit(EXIT_FAILURE); 396 } 397 next_arg(argc, argv, "Missing size"); 398 size = parse_size(); 399 next_arg(argc, argv, "Missing count"); 400 count = parse_size(); 401 r = submit_ib(from, to, size, count); 402 if (r) { 403 fprintf(stderr, "IB submission failed with %d\n", r); 404 exit(EXIT_FAILURE); 405 } 406 break; 407 case '?': 408 case 'h': 409 fprintf(stderr, usage, argv[0]); 410 exit(EXIT_SUCCESS); 411 default: 412 fprintf(stderr, usage, argv[0]); 413 exit(EXIT_FAILURE); 414 } 415 } 416 417 return EXIT_SUCCESS; 418} 419