1/* 2 * Copyright 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26#include <errno.h> 27#include <stdio.h> 28#include <fcntl.h> 29#include <xf86drm.h> 30#include <pthread.h> 31#include "drm-uapi/panfrost_drm.h" 32 33#include "pan_bo.h" 34#include "pan_device.h" 35#include "pan_util.h" 36#include "wrap.h" 37 38#include "os/os_mman.h" 39 40#include "util/u_inlines.h" 41#include "util/u_math.h" 42 43/* This file implements a userspace BO cache. Allocating and freeing 44 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips 45 * adds more work than we would like at this point. So caching BOs in userspace 46 * solves both of these problems and does not require kernel updates. 47 * 48 * Cached BOs are sorted into a bucket based on rounding their size down to the 49 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo 50 * objects. Putting a BO into the cache is accomplished by adding it to the 51 * corresponding bucket. Getting a BO from the cache consists of finding the 52 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a 53 * BO and removing it from the bucket. We special case evicting all BOs from 54 * the cache, since that's what helpful in practice and avoids extra logic 55 * around the linked list. 56 */ 57 58static struct panfrost_bo * 59panfrost_bo_alloc(struct panfrost_device *dev, size_t size, 60 uint32_t flags, const char *label) 61{ 62 struct drm_panfrost_create_bo create_bo = { .size = size }; 63 struct panfrost_bo *bo; 64 int ret; 65 66 if (dev->kernel_version->version_major > 1 || 67 dev->kernel_version->version_minor >= 1) { 68 if (flags & PAN_BO_GROWABLE) 69 create_bo.flags |= PANFROST_BO_HEAP; 70 if (!(flags & PAN_BO_EXECUTE)) 71 create_bo.flags |= PANFROST_BO_NOEXEC; 72 } 73 74 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo); 75 if (ret) { 76 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n"); 77 return NULL; 78 } 79 80 bo = pan_lookup_bo(dev, create_bo.handle); 81 assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo))); 82 83 bo->size = create_bo.size; 84 bo->ptr.gpu = create_bo.offset; 85 bo->gem_handle = create_bo.handle; 86 bo->flags = flags; 87 bo->dev = dev; 88 bo->label = label; 89 return bo; 90} 91 92static void 93panfrost_bo_free(struct panfrost_bo *bo) 94{ 95 struct drm_gem_close gem_close = { .handle = bo->gem_handle }; 96 int ret; 97 98 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close); 99 if (ret) { 100 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n"); 101 assert(0); 102 } 103 104 /* BO will be freed with the sparse array, but zero to indicate free */ 105 memset(bo, 0, sizeof(*bo)); 106} 107 108/* Returns true if the BO is ready, false otherwise. 109 * access_type is encoding the type of access one wants to ensure is done. 110 * Waiting is always done for writers, but if wait_readers is set then readers 111 * are also waited for. 112 */ 113bool 114panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers) 115{ 116 struct drm_panfrost_wait_bo req = { 117 .handle = bo->gem_handle, 118 .timeout_ns = timeout_ns, 119 }; 120 int ret; 121 122 /* If the BO has been exported or imported we can't rely on the cached 123 * state, we need to call the WAIT_BO ioctl. 124 */ 125 if (!(bo->flags & PAN_BO_SHARED)) { 126 /* If ->gpu_access is 0, the BO is idle, no need to wait. */ 127 if (!bo->gpu_access) 128 return true; 129 130 /* If the caller only wants to wait for writers and no 131 * writes are pending, we don't have to wait. 132 */ 133 if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE)) 134 return true; 135 } 136 137 /* The ioctl returns >= 0 value when the BO we are waiting for is ready 138 * -1 otherwise. 139 */ 140 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req); 141 if (ret != -1) { 142 /* Set gpu_access to 0 so that the next call to bo_wait() 143 * doesn't have to call the WAIT_BO ioctl. 144 */ 145 bo->gpu_access = 0; 146 return true; 147 } 148 149 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed 150 * is invalid, which shouldn't happen here. 151 */ 152 assert(errno == ETIMEDOUT || errno == EBUSY); 153 return false; 154} 155 156/* Helper to calculate the bucket index of a BO */ 157 158static unsigned 159pan_bucket_index(unsigned size) 160{ 161 /* Round down to POT to compute a bucket index */ 162 163 unsigned bucket_index = util_logbase2(size); 164 165 /* Clamp the bucket index; all huge allocations will be 166 * sorted into the largest bucket */ 167 168 bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, 169 MAX_BO_CACHE_BUCKET); 170 171 /* Reindex from 0 */ 172 return (bucket_index - MIN_BO_CACHE_BUCKET); 173} 174 175static struct list_head * 176pan_bucket(struct panfrost_device *dev, unsigned size) 177{ 178 return &dev->bo_cache.buckets[pan_bucket_index(size)]; 179} 180 181/* Tries to fetch a BO of sufficient size with the appropriate flags from the 182 * BO cache. If it succeeds, it returns that BO and removes the BO from the 183 * cache. If it fails, it returns NULL signaling the caller to allocate a new 184 * BO. */ 185 186static struct panfrost_bo * 187panfrost_bo_cache_fetch(struct panfrost_device *dev, 188 size_t size, uint32_t flags, const char *label, 189 bool dontwait) 190{ 191 pthread_mutex_lock(&dev->bo_cache.lock); 192 struct list_head *bucket = pan_bucket(dev, size); 193 struct panfrost_bo *bo = NULL; 194 195 /* Iterate the bucket looking for something suitable */ 196 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, 197 bucket_link) { 198 if (entry->size < size || entry->flags != flags) 199 continue; 200 201 /* If the oldest BO in the cache is busy, likely so is 202 * everything newer, so bail. */ 203 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, 204 PAN_BO_ACCESS_RW)) 205 break; 206 207 struct drm_panfrost_madvise madv = { 208 .handle = entry->gem_handle, 209 .madv = PANFROST_MADV_WILLNEED, 210 }; 211 int ret; 212 213 /* This one works, splice it out of the cache */ 214 list_del(&entry->bucket_link); 215 list_del(&entry->lru_link); 216 217 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); 218 if (!ret && !madv.retained) { 219 panfrost_bo_free(entry); 220 continue; 221 } 222 /* Let's go! */ 223 bo = entry; 224 bo->label = label; 225 break; 226 } 227 pthread_mutex_unlock(&dev->bo_cache.lock); 228 229 return bo; 230} 231 232static void 233panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev) 234{ 235 struct timespec time; 236 237 clock_gettime(CLOCK_MONOTONIC, &time); 238 list_for_each_entry_safe(struct panfrost_bo, entry, 239 &dev->bo_cache.lru, lru_link) { 240 /* We want all entries that have been used more than 1 sec 241 * ago to be dropped, others can be kept. 242 * Note the <= 2 check and not <= 1. It's here to account for 243 * the fact that we're only testing ->tv_sec, not ->tv_nsec. 244 * That means we might keep entries that are between 1 and 2 245 * seconds old, but we don't really care, as long as unused BOs 246 * are dropped at some point. 247 */ 248 if (time.tv_sec - entry->last_used <= 2) 249 break; 250 251 list_del(&entry->bucket_link); 252 list_del(&entry->lru_link); 253 panfrost_bo_free(entry); 254 } 255} 256 257/* Tries to add a BO to the cache. Returns if it was 258 * successful */ 259 260static bool 261panfrost_bo_cache_put(struct panfrost_bo *bo) 262{ 263 struct panfrost_device *dev = bo->dev; 264 265 if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE) 266 return false; 267 268 /* Must be first */ 269 pthread_mutex_lock(&dev->bo_cache.lock); 270 271 struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096)); 272 struct drm_panfrost_madvise madv; 273 struct timespec time; 274 275 madv.handle = bo->gem_handle; 276 madv.madv = PANFROST_MADV_DONTNEED; 277 madv.retained = 0; 278 279 drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv); 280 281 /* Add us to the bucket */ 282 list_addtail(&bo->bucket_link, bucket); 283 284 /* Add us to the LRU list and update the last_used field. */ 285 list_addtail(&bo->lru_link, &dev->bo_cache.lru); 286 clock_gettime(CLOCK_MONOTONIC, &time); 287 bo->last_used = time.tv_sec; 288 289 /* Let's do some cleanup in the BO cache while we hold the 290 * lock. 291 */ 292 panfrost_bo_cache_evict_stale_bos(dev); 293 294 /* Update the label to help debug BO cache memory usage issues */ 295 bo->label = "Unused (BO cache)"; 296 297 /* Must be last */ 298 pthread_mutex_unlock(&dev->bo_cache.lock); 299 return true; 300} 301 302/* Evicts all BOs from the cache. Called during context 303 * destroy or during low-memory situations (to free up 304 * memory that may be unused by us just sitting in our 305 * cache, but still reserved from the perspective of the 306 * OS) */ 307 308void 309panfrost_bo_cache_evict_all( 310 struct panfrost_device *dev) 311{ 312 pthread_mutex_lock(&dev->bo_cache.lock); 313 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) { 314 struct list_head *bucket = &dev->bo_cache.buckets[i]; 315 316 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, 317 bucket_link) { 318 list_del(&entry->bucket_link); 319 list_del(&entry->lru_link); 320 panfrost_bo_free(entry); 321 } 322 } 323 pthread_mutex_unlock(&dev->bo_cache.lock); 324} 325 326void 327panfrost_bo_mmap(struct panfrost_bo *bo) 328{ 329 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle }; 330 int ret; 331 332 if (bo->ptr.cpu) 333 return; 334 335 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo); 336 if (ret) { 337 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n"); 338 assert(0); 339 } 340 341 bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, 342 bo->dev->fd, mmap_bo.offset); 343 if (bo->ptr.cpu == MAP_FAILED) { 344 bo->ptr.cpu = NULL; 345 fprintf(stderr, 346 "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n", 347 bo->ptr.cpu, (long long)bo->size, bo->dev->fd, 348 (long long)mmap_bo.offset); 349 } 350} 351 352static void 353panfrost_bo_munmap(struct panfrost_bo *bo) 354{ 355 if (!bo->ptr.cpu) 356 return; 357 358 if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) { 359 perror("munmap"); 360 abort(); 361 } 362 363 bo->ptr.cpu = NULL; 364} 365 366struct panfrost_bo * 367panfrost_bo_create(struct panfrost_device *dev, size_t size, 368 uint32_t flags, const char *label) 369{ 370 struct panfrost_bo *bo; 371 372 /* Kernel will fail (confusingly) with EPERM otherwise */ 373 assert(size > 0); 374 375 /* To maximize BO cache usage, don't allocate tiny BOs */ 376 size = ALIGN_POT(size, 4096); 377 378 /* GROWABLE BOs cannot be mmapped */ 379 if (flags & PAN_BO_GROWABLE) 380 assert(flags & PAN_BO_INVISIBLE); 381 382 /* Before creating a BO, we first want to check the cache but without 383 * waiting for BO readiness (BOs in the cache can still be referenced 384 * by jobs that are not finished yet). 385 * If the cached allocation fails we fall back on fresh BO allocation, 386 * and if that fails too, we try one more time to allocate from the 387 * cache, but this time we accept to wait. 388 */ 389 bo = panfrost_bo_cache_fetch(dev, size, flags, label, true); 390 if (!bo) 391 bo = panfrost_bo_alloc(dev, size, flags, label); 392 if (!bo) 393 bo = panfrost_bo_cache_fetch(dev, size, flags, label, false); 394 395 assert(bo); 396 397 if (!bo) { 398 fprintf(stderr, "BO creation failed\n"); 399 return NULL; 400 } 401 402 /* Only mmap now if we know we need to. For CPU-invisible buffers, we 403 * never map since we don't care about their contents; they're purely 404 * for GPU-internal use. But we do trace them anyway. */ 405 406 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP))) 407 panfrost_bo_mmap(bo); 408 409 p_atomic_set(&bo->refcnt, 1); 410 411 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) { 412 if (flags & PAN_BO_INVISIBLE) 413 pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL); 414 else if (!(flags & PAN_BO_DELAY_MMAP)) 415 pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL); 416 } 417 418 return bo; 419} 420 421void 422panfrost_bo_reference(struct panfrost_bo *bo) 423{ 424 if (bo) { 425 ASSERTED int count = p_atomic_inc_return(&bo->refcnt); 426 assert(count != 1); 427 } 428} 429 430void 431panfrost_bo_unreference(struct panfrost_bo *bo) 432{ 433 if (!bo) 434 return; 435 436 /* Don't return to cache if there are still references */ 437 if (p_atomic_dec_return(&bo->refcnt)) 438 return; 439 440 struct panfrost_device *dev = bo->dev; 441 442 pthread_mutex_lock(&dev->bo_map_lock); 443 444 /* Someone might have imported this BO while we were waiting for the 445 * lock, let's make sure it's still not referenced before freeing it. 446 */ 447 if (p_atomic_read(&bo->refcnt) == 0) { 448 /* When the reference count goes to zero, we need to cleanup */ 449 panfrost_bo_munmap(bo); 450 451 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) 452 pandecode_inject_free(bo->ptr.gpu, bo->size); 453 454 /* Rather than freeing the BO now, we'll cache the BO for later 455 * allocations if we're allowed to. 456 */ 457 if (!panfrost_bo_cache_put(bo)) 458 panfrost_bo_free(bo); 459 460 } 461 pthread_mutex_unlock(&dev->bo_map_lock); 462} 463 464struct panfrost_bo * 465panfrost_bo_import(struct panfrost_device *dev, int fd) 466{ 467 struct panfrost_bo *bo; 468 struct drm_panfrost_get_bo_offset get_bo_offset = {0,}; 469 ASSERTED int ret; 470 unsigned gem_handle; 471 472 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle); 473 assert(!ret); 474 475 pthread_mutex_lock(&dev->bo_map_lock); 476 bo = pan_lookup_bo(dev, gem_handle); 477 478 if (!bo->dev) { 479 get_bo_offset.handle = gem_handle; 480 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset); 481 assert(!ret); 482 483 bo->dev = dev; 484 bo->ptr.gpu = (mali_ptr) get_bo_offset.offset; 485 bo->size = lseek(fd, 0, SEEK_END); 486 /* Sometimes this can fail and return -1. size of -1 is not 487 * a nice thing for mmap to try mmap. Be more robust also 488 * for zero sized maps and fail nicely too 489 */ 490 if ((bo->size == 0) || (bo->size == (size_t)-1)) { 491 pthread_mutex_unlock(&dev->bo_map_lock); 492 return NULL; 493 } 494 bo->flags = PAN_BO_SHARED; 495 bo->gem_handle = gem_handle; 496 p_atomic_set(&bo->refcnt, 1); 497 } else { 498 /* bo->refcnt == 0 can happen if the BO 499 * was being released but panfrost_bo_import() acquired the 500 * lock before panfrost_bo_unreference(). In that case, refcnt 501 * is 0 and we can't use panfrost_bo_reference() directly, we 502 * have to re-initialize the refcnt(). 503 * Note that panfrost_bo_unreference() checks 504 * refcnt value just after acquiring the lock to 505 * make sure the object is not freed if panfrost_bo_import() 506 * acquired it in the meantime. 507 */ 508 if (p_atomic_read(&bo->refcnt) == 0) 509 p_atomic_set(&bo->refcnt, 1); 510 else 511 panfrost_bo_reference(bo); 512 } 513 pthread_mutex_unlock(&dev->bo_map_lock); 514 515 return bo; 516} 517 518int 519panfrost_bo_export(struct panfrost_bo *bo) 520{ 521 struct drm_prime_handle args = { 522 .handle = bo->gem_handle, 523 .flags = DRM_CLOEXEC, 524 }; 525 526 int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); 527 if (ret == -1) 528 return -1; 529 530 bo->flags |= PAN_BO_SHARED; 531 return args.fd; 532} 533 534