1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_bufmgr.c 25 * 26 * The crocus buffer manager. 27 * 28 * XXX: write better comments 29 * - BOs 30 * - Explain BO cache 31 * - main interface to GEM in the kernel 32 */ 33 34#ifdef HAVE_CONFIG_H 35#include "config.h" 36#endif 37 38#include <xf86drm.h> 39#include <util/u_atomic.h> 40#include <fcntl.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <unistd.h> 45#include <assert.h> 46#include <sys/ioctl.h> 47#include <sys/mman.h> 48#include <sys/stat.h> 49#include <sys/types.h> 50#include <stdbool.h> 51#include <time.h> 52 53#include "errno.h" 54#include "common/intel_clflush.h" 55#include "dev/intel_debug.h" 56#include "common/intel_gem.h" 57#include "dev/intel_device_info.h" 58#include "util/debug.h" 59#include "util/macros.h" 60#include "util/hash_table.h" 61#include "util/list.h" 62#include "util/os_file.h" 63#include "util/u_dynarray.h" 64#include "util/vma.h" 65#include "crocus_bufmgr.h" 66#include "crocus_context.h" 67#include "string.h" 68 69#include "drm-uapi/i915_drm.h" 70 71#ifdef HAVE_VALGRIND 72#include <valgrind.h> 73#include <memcheck.h> 74#define VG(x) x 75#else 76#define VG(x) 77#endif 78 79/** 80 * For debugging purposes, this returns a time in seconds. 81 */ 82static double 83get_time(void) 84{ 85 struct timespec tp; 86 87 clock_gettime(CLOCK_MONOTONIC, &tp); 88 89 return tp.tv_sec + tp.tv_nsec / 1000000000.0; 90} 91 92/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier 93 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is 94 * leaked. All because it does not call VG(cli_free) from its 95 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like 96 * and allocation, we mark it available for use upon mmapping and remove 97 * it upon unmapping. 98 */ 99#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size)) 100#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size)) 101 102#define PAGE_SIZE 4096 103 104#define WARN_ONCE(cond, fmt...) do { \ 105 if (unlikely(cond)) { \ 106 static bool _warned = false; \ 107 if (!_warned) { \ 108 fprintf(stderr, "WARNING: "); \ 109 fprintf(stderr, fmt); \ 110 _warned = true; \ 111 } \ 112 } \ 113} while (0) 114 115#define FILE_DEBUG_FLAG DEBUG_BUFMGR 116 117struct bo_cache_bucket { 118 /** List of cached BOs. */ 119 struct list_head head; 120 121 /** Size of this bucket, in bytes. */ 122 uint64_t size; 123}; 124 125struct bo_export { 126 /** File descriptor associated with a handle export. */ 127 int drm_fd; 128 129 /** GEM handle in drm_fd */ 130 uint32_t gem_handle; 131 132 struct list_head link; 133}; 134 135struct crocus_bufmgr { 136 /** 137 * List into the list of bufmgr. 138 */ 139 struct list_head link; 140 141 uint32_t refcount; 142 143 int fd; 144 145 simple_mtx_t lock; 146 147 /** Array of lists of cached gem objects of power-of-two sizes */ 148 struct bo_cache_bucket cache_bucket[14 * 4]; 149 int num_buckets; 150 time_t time; 151 152 struct hash_table *name_table; 153 struct hash_table *handle_table; 154 155 /** 156 * List of BOs which we've effectively freed, but are hanging on to 157 * until they're idle before closing and returning the VMA. 158 */ 159 struct list_head zombie_list; 160 161 bool has_llc:1; 162 bool has_mmap_offset:1; 163 bool has_tiling_uapi:1; 164 bool bo_reuse:1; 165}; 166 167static simple_mtx_t global_bufmgr_list_mutex = _SIMPLE_MTX_INITIALIZER_NP; 168static struct list_head global_bufmgr_list = { 169 .next = &global_bufmgr_list, 170 .prev = &global_bufmgr_list, 171}; 172 173static int bo_set_tiling_internal(struct crocus_bo *bo, uint32_t tiling_mode, 174 uint32_t stride); 175 176static void bo_free(struct crocus_bo *bo); 177 178static uint32_t 179key_hash_uint(const void *key) 180{ 181 return _mesa_hash_data(key, 4); 182} 183 184static bool 185key_uint_equal(const void *a, const void *b) 186{ 187 return *((unsigned *) a) == *((unsigned *) b); 188} 189 190static struct crocus_bo * 191find_and_ref_external_bo(struct hash_table *ht, unsigned int key) 192{ 193 struct hash_entry *entry = _mesa_hash_table_search(ht, &key); 194 struct crocus_bo *bo = entry ? entry->data : NULL; 195 196 if (bo) { 197 assert(bo->external); 198 assert(!bo->reusable); 199 200 /* Being non-reusable, the BO cannot be in the cache lists, but it 201 * may be in the zombie list if it had reached zero references, but 202 * we hadn't yet closed it...and then reimported the same BO. If it 203 * is, then remove it since it's now been resurrected. 204 */ 205 if (bo->head.prev || bo->head.next) 206 list_del(&bo->head); 207 208 crocus_bo_reference(bo); 209 } 210 211 return bo; 212} 213 214/** 215 * This function finds the correct bucket fit for the input size. 216 * The function works with O(1) complexity when the requested size 217 * was queried instead of iterating the size through all the buckets. 218 */ 219static struct bo_cache_bucket * 220bucket_for_size(struct crocus_bufmgr *bufmgr, uint64_t size) 221{ 222 /* Calculating the pages and rounding up to the page size. */ 223 const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 224 225 /* Row Bucket sizes clz((x-1) | 3) Row Column 226 * in pages stride size 227 * 0: 1 2 3 4 -> 30 30 30 30 4 1 228 * 1: 5 6 7 8 -> 29 29 29 29 4 1 229 * 2: 10 12 14 16 -> 28 28 28 28 8 2 230 * 3: 20 24 28 32 -> 27 27 27 27 16 4 231 */ 232 const unsigned row = 30 - __builtin_clz((pages - 1) | 3); 233 const unsigned row_max_pages = 4 << row; 234 235 /* The '& ~2' is the special case for row 1. In row 1, max pages / 236 * 2 is 2, but the previous row maximum is zero (because there is 237 * no previous row). All row maximum sizes are power of 2, so that 238 * is the only case where that bit will be set. 239 */ 240 const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; 241 int col_size_log2 = row - 1; 242 col_size_log2 += (col_size_log2 < 0); 243 244 const unsigned col = (pages - prev_row_max_pages + 245 ((1 << col_size_log2) - 1)) >> col_size_log2; 246 247 /* Calculating the index based on the row and column. */ 248 const unsigned index = (row * 4) + (col - 1); 249 250 return (index < bufmgr->num_buckets) ? 251 &bufmgr->cache_bucket[index] : NULL; 252} 253 254 255int 256crocus_bo_busy(struct crocus_bo *bo) 257{ 258 struct crocus_bufmgr *bufmgr = bo->bufmgr; 259 struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; 260 261 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 262 if (ret == 0) { 263 bo->idle = !busy.busy; 264 return busy.busy; 265 } 266 return false; 267} 268 269int 270crocus_bo_madvise(struct crocus_bo *bo, int state) 271{ 272 struct drm_i915_gem_madvise madv = { 273 .handle = bo->gem_handle, 274 .madv = state, 275 .retained = 1, 276 }; 277 278 intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 279 280 return madv.retained; 281} 282 283static struct crocus_bo * 284bo_calloc(void) 285{ 286 struct crocus_bo *bo = calloc(1, sizeof(*bo)); 287 if (!bo) 288 return NULL; 289 290 list_inithead(&bo->exports); 291 bo->hash = _mesa_hash_pointer(bo); 292 return bo; 293} 294 295static struct crocus_bo * 296alloc_bo_from_cache(struct crocus_bufmgr *bufmgr, 297 struct bo_cache_bucket *bucket, 298 uint32_t alignment, 299 unsigned flags) 300{ 301 if (!bucket) 302 return NULL; 303 304 struct crocus_bo *bo = NULL; 305 306 list_for_each_entry_safe(struct crocus_bo, cur, &bucket->head, head) { 307 /* If the last BO in the cache is busy, there are no idle BOs. Bail, 308 * either falling back to a non-matching memzone, or if that fails, 309 * allocating a fresh buffer. 310 */ 311 if (crocus_bo_busy(cur)) 312 return NULL; 313 314 list_del(&cur->head); 315 316 /* Tell the kernel we need this BO. If it still exists, we're done! */ 317 if (crocus_bo_madvise(cur, I915_MADV_WILLNEED)) { 318 bo = cur; 319 break; 320 } 321 322 /* This BO was purged, throw it out and keep looking. */ 323 bo_free(cur); 324 } 325 326 if (!bo) 327 return NULL; 328 329 /* Zero the contents if necessary. If this fails, fall back to 330 * allocating a fresh BO, which will always be zeroed by the kernel. 331 */ 332 if (flags & BO_ALLOC_ZEROED) { 333 void *map = crocus_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 334 if (map) { 335 memset(map, 0, bo->size); 336 } else { 337 bo_free(bo); 338 return NULL; 339 } 340 } 341 342 return bo; 343} 344 345static struct crocus_bo * 346alloc_fresh_bo(struct crocus_bufmgr *bufmgr, uint64_t bo_size) 347{ 348 struct crocus_bo *bo = bo_calloc(); 349 if (!bo) 350 return NULL; 351 352 struct drm_i915_gem_create create = { .size = bo_size }; 353 354 /* All new BOs we get from the kernel are zeroed, so we don't need to 355 * worry about that here. 356 */ 357 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create) != 0) { 358 free(bo); 359 return NULL; 360 } 361 362 bo->gem_handle = create.handle; 363 bo->bufmgr = bufmgr; 364 bo->size = bo_size; 365 bo->idle = true; 366 bo->tiling_mode = I915_TILING_NONE; 367 bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 368 bo->stride = 0; 369 370 /* Calling set_domain() will allocate pages for the BO outside of the 371 * struct mutex lock in the kernel, which is more efficient than waiting 372 * to create them during the first execbuf that uses the BO. 373 */ 374 struct drm_i915_gem_set_domain sd = { 375 .handle = bo->gem_handle, 376 .read_domains = I915_GEM_DOMAIN_CPU, 377 .write_domain = 0, 378 }; 379 380 if (intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) { 381 bo_free(bo); 382 return NULL; 383 } 384 385 return bo; 386} 387 388static struct crocus_bo * 389bo_alloc_internal(struct crocus_bufmgr *bufmgr, 390 const char *name, 391 uint64_t size, 392 uint32_t alignment, 393 unsigned flags, 394 uint32_t tiling_mode, 395 uint32_t stride) 396{ 397 struct crocus_bo *bo; 398 unsigned int page_size = getpagesize(); 399 struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size); 400 401 /* Round the size up to the bucket size, or if we don't have caching 402 * at this size, a multiple of the page size. 403 */ 404 uint64_t bo_size = 405 bucket ? bucket->size : MAX2(ALIGN(size, page_size), page_size); 406 407 simple_mtx_lock(&bufmgr->lock); 408 409 /* Get a buffer out of the cache if available. First, we try to find 410 * one with a matching memory zone so we can avoid reallocating VMA. 411 */ 412 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, flags); 413 414 simple_mtx_unlock(&bufmgr->lock); 415 416 if (!bo) { 417 bo = alloc_fresh_bo(bufmgr, bo_size); 418 if (!bo) 419 return NULL; 420 } 421 422 if (bo_set_tiling_internal(bo, tiling_mode, stride)) 423 goto err_free; 424 425 bo->name = name; 426 p_atomic_set(&bo->refcount, 1); 427 bo->reusable = bucket && bufmgr->bo_reuse; 428 bo->cache_coherent = bufmgr->has_llc; 429 bo->index = -1; 430 bo->kflags = 0; 431 432 if (flags & BO_ALLOC_SCANOUT) 433 bo->scanout = 1; 434 435 if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) { 436 struct drm_i915_gem_caching arg = { 437 .handle = bo->gem_handle, 438 .caching = 1, 439 }; 440 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0) { 441 bo->cache_coherent = true; 442 bo->reusable = false; 443 } 444 } 445 446 DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, 447 bo->name, (unsigned long long) size); 448 449 return bo; 450 451err_free: 452 bo_free(bo); 453 return NULL; 454} 455 456struct crocus_bo * 457crocus_bo_alloc(struct crocus_bufmgr *bufmgr, 458 const char *name, 459 uint64_t size) 460{ 461 return bo_alloc_internal(bufmgr, name, size, 1, 462 0, I915_TILING_NONE, 0); 463} 464 465struct crocus_bo * 466crocus_bo_alloc_tiled(struct crocus_bufmgr *bufmgr, const char *name, 467 uint64_t size, uint32_t alignment, 468 uint32_t tiling_mode, uint32_t pitch, unsigned flags) 469{ 470 return bo_alloc_internal(bufmgr, name, size, alignment, 471 flags, tiling_mode, pitch); 472} 473 474struct crocus_bo * 475crocus_bo_create_userptr(struct crocus_bufmgr *bufmgr, const char *name, 476 void *ptr, size_t size) 477{ 478 struct crocus_bo *bo; 479 480 bo = bo_calloc(); 481 if (!bo) 482 return NULL; 483 484 struct drm_i915_gem_userptr arg = { 485 .user_ptr = (uintptr_t)ptr, 486 .user_size = size, 487 }; 488 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) 489 goto err_free; 490 bo->gem_handle = arg.handle; 491 492 /* Check the buffer for validity before we try and use it in a batch */ 493 struct drm_i915_gem_set_domain sd = { 494 .handle = bo->gem_handle, 495 .read_domains = I915_GEM_DOMAIN_CPU, 496 }; 497 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd)) 498 goto err_close; 499 500 bo->name = name; 501 bo->size = size; 502 bo->map_cpu = ptr; 503 504 bo->bufmgr = bufmgr; 505 bo->kflags = 0; 506 507 p_atomic_set(&bo->refcount, 1); 508 bo->userptr = true; 509 bo->cache_coherent = true; 510 bo->index = -1; 511 bo->idle = true; 512 513 return bo; 514 515err_close: 516 intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle); 517err_free: 518 free(bo); 519 return NULL; 520} 521 522/** 523 * Returns a crocus_bo wrapping the given buffer object handle. 524 * 525 * This can be used when one application needs to pass a buffer object 526 * to another. 527 */ 528struct crocus_bo * 529crocus_bo_gem_create_from_name(struct crocus_bufmgr *bufmgr, 530 const char *name, unsigned int handle) 531{ 532 struct crocus_bo *bo; 533 534 /* At the moment most applications only have a few named bo. 535 * For instance, in a DRI client only the render buffers passed 536 * between X and the client are named. And since X returns the 537 * alternating names for the front/back buffer a linear search 538 * provides a sufficiently fast match. 539 */ 540 simple_mtx_lock(&bufmgr->lock); 541 bo = find_and_ref_external_bo(bufmgr->name_table, handle); 542 if (bo) 543 goto out; 544 545 struct drm_gem_open open_arg = { .name = handle }; 546 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 547 if (ret != 0) { 548 DBG("Couldn't reference %s handle 0x%08x: %s\n", 549 name, handle, strerror(errno)); 550 bo = NULL; 551 goto out; 552 } 553 /* Now see if someone has used a prime handle to get this 554 * object from the kernel before by looking through the list 555 * again for a matching gem_handle 556 */ 557 bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle); 558 if (bo) 559 goto out; 560 561 bo = bo_calloc(); 562 if (!bo) 563 goto out; 564 565 p_atomic_set(&bo->refcount, 1); 566 567 bo->size = open_arg.size; 568 bo->gtt_offset = 0; 569 bo->bufmgr = bufmgr; 570 bo->gem_handle = open_arg.handle; 571 bo->name = name; 572 bo->global_name = handle; 573 bo->reusable = false; 574 bo->external = true; 575 bo->kflags = 0; 576 577 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 578 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 579 580 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 581 ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 582 if (ret != 0) 583 goto err_unref; 584 585 bo->tiling_mode = get_tiling.tiling_mode; 586 bo->swizzle_mode = get_tiling.swizzle_mode; 587 /* XXX stride is unknown */ 588 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); 589 590out: 591 simple_mtx_unlock(&bufmgr->lock); 592 return bo; 593 594err_unref: 595 bo_free(bo); 596 simple_mtx_unlock(&bufmgr->lock); 597 return NULL; 598} 599 600static void 601bo_close(struct crocus_bo *bo) 602{ 603 struct crocus_bufmgr *bufmgr = bo->bufmgr; 604 605 if (bo->external) { 606 struct hash_entry *entry; 607 608 if (bo->global_name) { 609 entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name); 610 _mesa_hash_table_remove(bufmgr->name_table, entry); 611 } 612 613 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); 614 _mesa_hash_table_remove(bufmgr->handle_table, entry); 615 616 list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) { 617 struct drm_gem_close close = { .handle = export->gem_handle }; 618 intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close); 619 620 list_del(&export->link); 621 free(export); 622 } 623 } else { 624 assert(list_is_empty(&bo->exports)); 625 } 626 627 /* Close this object */ 628 struct drm_gem_close close = { .handle = bo->gem_handle }; 629 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); 630 if (ret != 0) { 631 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 632 bo->gem_handle, bo->name, strerror(errno)); 633 } 634 635 free(bo); 636} 637 638static void 639bo_free(struct crocus_bo *bo) 640{ 641 struct crocus_bufmgr *bufmgr = bo->bufmgr; 642 643 if (bo->map_cpu && !bo->userptr) { 644 VG_NOACCESS(bo->map_cpu, bo->size); 645 munmap(bo->map_cpu, bo->size); 646 } 647 if (bo->map_wc) { 648 VG_NOACCESS(bo->map_wc, bo->size); 649 munmap(bo->map_wc, bo->size); 650 } 651 if (bo->map_gtt) { 652 VG_NOACCESS(bo->map_gtt, bo->size); 653 munmap(bo->map_gtt, bo->size); 654 } 655 656 if (bo->idle) { 657 bo_close(bo); 658 } else { 659 /* Defer closing the GEM BO and returning the VMA for reuse until the 660 * BO is idle. Just move it to the dead list for now. 661 */ 662 list_addtail(&bo->head, &bufmgr->zombie_list); 663 } 664} 665 666/** Frees all cached buffers significantly older than @time. */ 667static void 668cleanup_bo_cache(struct crocus_bufmgr *bufmgr, time_t time) 669{ 670 int i; 671 672 if (bufmgr->time == time) 673 return; 674 675 for (i = 0; i < bufmgr->num_buckets; i++) { 676 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 677 678 list_for_each_entry_safe(struct crocus_bo, bo, &bucket->head, head) { 679 if (time - bo->free_time <= 1) 680 break; 681 682 list_del(&bo->head); 683 684 bo_free(bo); 685 } 686 } 687 688 list_for_each_entry_safe(struct crocus_bo, bo, &bufmgr->zombie_list, head) { 689 /* Stop once we reach a busy BO - all others past this point were 690 * freed more recently so are likely also busy. 691 */ 692 if (!bo->idle && crocus_bo_busy(bo)) 693 break; 694 695 list_del(&bo->head); 696 bo_close(bo); 697 } 698 699 bufmgr->time = time; 700} 701 702static void 703bo_unreference_final(struct crocus_bo *bo, time_t time) 704{ 705 struct crocus_bufmgr *bufmgr = bo->bufmgr; 706 struct bo_cache_bucket *bucket; 707 708 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); 709 710 bucket = NULL; 711 if (bo->reusable) 712 bucket = bucket_for_size(bufmgr, bo->size); 713 /* Put the buffer into our internal cache for reuse if we can. */ 714 if (bucket && crocus_bo_madvise(bo, I915_MADV_DONTNEED)) { 715 bo->free_time = time; 716 bo->name = NULL; 717 718 list_addtail(&bo->head, &bucket->head); 719 } else { 720 bo_free(bo); 721 } 722} 723 724void 725__crocus_bo_unreference(struct crocus_bo *bo) 726{ 727 struct crocus_bufmgr *bufmgr = bo->bufmgr; 728 struct timespec time; 729 730 clock_gettime(CLOCK_MONOTONIC, &time); 731 732 simple_mtx_lock(&bufmgr->lock); 733 734 if (p_atomic_dec_zero(&bo->refcount)) { 735 bo_unreference_final(bo, time.tv_sec); 736 cleanup_bo_cache(bufmgr, time.tv_sec); 737 } 738 739 simple_mtx_unlock(&bufmgr->lock); 740} 741 742static void 743bo_wait_with_stall_warning(struct util_debug_callback *dbg, 744 struct crocus_bo *bo, 745 const char *action) 746{ 747 bool busy = dbg && !bo->idle; 748 double elapsed = unlikely(busy) ? -get_time() : 0.0; 749 750 crocus_bo_wait_rendering(bo); 751 752 if (unlikely(busy)) { 753 elapsed += get_time(); 754 if (elapsed > 1e-5) /* 0.01ms */ { 755 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n", 756 action, bo->name, elapsed * 1000); 757 } 758 } 759} 760 761static void 762print_flags(unsigned flags) 763{ 764 if (flags & MAP_READ) 765 DBG("READ "); 766 if (flags & MAP_WRITE) 767 DBG("WRITE "); 768 if (flags & MAP_ASYNC) 769 DBG("ASYNC "); 770 if (flags & MAP_PERSISTENT) 771 DBG("PERSISTENT "); 772 if (flags & MAP_COHERENT) 773 DBG("COHERENT "); 774 if (flags & MAP_RAW) 775 DBG("RAW "); 776 DBG("\n"); 777} 778 779static void * 780crocus_bo_gem_mmap_legacy(struct util_debug_callback *dbg, 781 struct crocus_bo *bo, bool wc) 782{ 783 struct crocus_bufmgr *bufmgr = bo->bufmgr; 784 785 struct drm_i915_gem_mmap mmap_arg = { 786 .handle = bo->gem_handle, 787 .size = bo->size, 788 .flags = wc ? I915_MMAP_WC : 0, 789 }; 790 791 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 792 if (ret != 0) { 793 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 794 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 795 return NULL; 796 } 797 void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 798 799 return map; 800} 801 802static void * 803crocus_bo_gem_mmap_offset(struct util_debug_callback *dbg, struct crocus_bo *bo, 804 bool wc) 805{ 806 struct crocus_bufmgr *bufmgr = bo->bufmgr; 807 808 struct drm_i915_gem_mmap_offset mmap_arg = { 809 .handle = bo->gem_handle, 810 .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB, 811 }; 812 813 /* Get the fake offset back */ 814 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg); 815 if (ret != 0) { 816 DBG("%s:%d: Error preparing buffer %d (%s): %s .\n", 817 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 818 return NULL; 819 } 820 821 /* And map it */ 822 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, 823 bufmgr->fd, mmap_arg.offset); 824 if (map == MAP_FAILED) { 825 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 826 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 827 return NULL; 828 } 829 830 return map; 831} 832 833static void * 834crocus_bo_gem_mmap(struct util_debug_callback *dbg, struct crocus_bo *bo, bool wc) 835{ 836 struct crocus_bufmgr *bufmgr = bo->bufmgr; 837 838 if (bufmgr->has_mmap_offset) 839 return crocus_bo_gem_mmap_offset(dbg, bo, wc); 840 else 841 return crocus_bo_gem_mmap_legacy(dbg, bo, wc); 842} 843 844static void * 845crocus_bo_map_cpu(struct util_debug_callback *dbg, 846 struct crocus_bo *bo, unsigned flags) 847{ 848 /* We disallow CPU maps for writing to non-coherent buffers, as the 849 * CPU map can become invalidated when a batch is flushed out, which 850 * can happen at unpredictable times. You should use WC maps instead. 851 */ 852 assert(bo->cache_coherent || !(flags & MAP_WRITE)); 853 854 if (!bo->map_cpu) { 855 DBG("crocus_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name); 856 857 void *map = crocus_bo_gem_mmap(dbg, bo, false); 858 if (!map) { 859 return NULL; 860 } 861 862 VG_DEFINED(map, bo->size); 863 864 if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) { 865 VG_NOACCESS(map, bo->size); 866 munmap(map, bo->size); 867 } 868 } 869 assert(bo->map_cpu); 870 871 DBG("crocus_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name, 872 bo->map_cpu); 873 print_flags(flags); 874 875 if (!(flags & MAP_ASYNC)) { 876 bo_wait_with_stall_warning(dbg, bo, "CPU mapping"); 877 } 878 879 if (!bo->cache_coherent && !bo->bufmgr->has_llc) { 880 /* If we're reusing an existing CPU mapping, the CPU caches may 881 * contain stale data from the last time we read from that mapping. 882 * (With the BO cache, it might even be data from a previous buffer!) 883 * Even if it's a brand new mapping, the kernel may have zeroed the 884 * buffer via CPU writes. 885 * 886 * We need to invalidate those cachelines so that we see the latest 887 * contents, and so long as we only read from the CPU mmap we do not 888 * need to write those cachelines back afterwards. 889 * 890 * On LLC, the emprical evidence suggests that writes from the GPU 891 * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU 892 * cachelines. (Other reads, such as the display engine, bypass the 893 * LLC entirely requiring us to keep dirty pixels for the scanout 894 * out of any cache.) 895 */ 896 intel_invalidate_range(bo->map_cpu, bo->size); 897 } 898 899 return bo->map_cpu; 900} 901 902static void * 903crocus_bo_map_wc(struct util_debug_callback *dbg, 904 struct crocus_bo *bo, unsigned flags) 905{ 906 if (!bo->map_wc) { 907 DBG("crocus_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name); 908 909 void *map = crocus_bo_gem_mmap(dbg, bo, true); 910 if (!map) { 911 return NULL; 912 } 913 914 VG_DEFINED(map, bo->size); 915 916 if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) { 917 VG_NOACCESS(map, bo->size); 918 munmap(map, bo->size); 919 } 920 } 921 assert(bo->map_wc); 922 923 DBG("crocus_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc); 924 print_flags(flags); 925 926 if (!(flags & MAP_ASYNC)) { 927 bo_wait_with_stall_warning(dbg, bo, "WC mapping"); 928 } 929 930 return bo->map_wc; 931} 932 933/** 934 * Perform an uncached mapping via the GTT. 935 * 936 * Write access through the GTT is not quite fully coherent. On low power 937 * systems especially, like modern Atoms, we can observe reads from RAM before 938 * the write via GTT has landed. A write memory barrier that flushes the Write 939 * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later 940 * read after the write as the GTT write suffers a small delay through the GTT 941 * indirection. The kernel uses an uncached mmio read to ensure the GTT write 942 * is ordered with reads (either by the GPU, WB or WC) and unconditionally 943 * flushes prior to execbuf submission. However, if we are not informing the 944 * kernel about our GTT writes, it will not flush before earlier access, such 945 * as when using the cmdparser. Similarly, we need to be careful if we should 946 * ever issue a CPU read immediately following a GTT write. 947 * 948 * Telling the kernel about write access also has one more important 949 * side-effect. Upon receiving notification about the write, it cancels any 950 * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by 951 * either SW_FINISH or DIRTYFB. The presumption is that we never write to the 952 * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR 953 * tracking is handled on the buffer exchange instead. 954 */ 955static void * 956crocus_bo_map_gtt(struct util_debug_callback *dbg, 957 struct crocus_bo *bo, unsigned flags) 958{ 959 struct crocus_bufmgr *bufmgr = bo->bufmgr; 960 961 /* If we don't support get/set_tiling, there's no support for GTT mapping 962 * either (it won't do any de-tiling for us). 963 */ 964 assert(bufmgr->has_tiling_uapi); 965 966 /* Get a mapping of the buffer if we haven't before. */ 967 if (bo->map_gtt == NULL) { 968 DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name); 969 970 struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; 971 972 /* Get the fake offset back... */ 973 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); 974 if (ret != 0) { 975 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 976 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 977 return NULL; 978 } 979 980 /* and mmap it. */ 981 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, 982 MAP_SHARED, bufmgr->fd, mmap_arg.offset); 983 if (map == MAP_FAILED) { 984 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 985 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 986 return NULL; 987 } 988 989 /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will 990 * already intercept this mmap call. However, for consistency between 991 * all the mmap paths, we mark the pointer as defined now and mark it 992 * as inaccessible afterwards. 993 */ 994 VG_DEFINED(map, bo->size); 995 996 if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) { 997 VG_NOACCESS(map, bo->size); 998 munmap(map, bo->size); 999 } 1000 } 1001 assert(bo->map_gtt); 1002 1003 DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt); 1004 print_flags(flags); 1005 1006 if (!(flags & MAP_ASYNC)) { 1007 bo_wait_with_stall_warning(dbg, bo, "GTT mapping"); 1008 } 1009 1010 return bo->map_gtt; 1011} 1012 1013static bool 1014can_map_cpu(struct crocus_bo *bo, unsigned flags) 1015{ 1016 if (bo->scanout) 1017 return false; 1018 1019 if (bo->cache_coherent) 1020 return true; 1021 1022 /* Even if the buffer itself is not cache-coherent (such as a scanout), on 1023 * an LLC platform reads always are coherent (as they are performed via the 1024 * central system agent). It is just the writes that we need to take special 1025 * care to ensure that land in main memory and not stick in the CPU cache. 1026 */ 1027 if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc) 1028 return true; 1029 1030 /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid 1031 * across batch flushes where the kernel will change cache domains of the 1032 * bo, invalidating continued access to the CPU mmap on non-LLC device. 1033 * 1034 * Similarly, ASYNC typically means that the buffer will be accessed via 1035 * both the CPU and the GPU simultaneously. Batches may be executed that 1036 * use the BO even while it is mapped. While OpenGL technically disallows 1037 * most drawing while non-persistent mappings are active, we may still use 1038 * the GPU for blits or other operations, causing batches to happen at 1039 * inconvenient times. 1040 * 1041 * If RAW is set, we expect the caller to be able to handle a WC buffer 1042 * more efficiently than the involuntary clflushes. 1043 */ 1044 if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC | MAP_RAW)) 1045 return false; 1046 1047 return !(flags & MAP_WRITE); 1048} 1049 1050void * 1051crocus_bo_map(struct util_debug_callback *dbg, 1052 struct crocus_bo *bo, unsigned flags) 1053{ 1054 if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) 1055 return crocus_bo_map_gtt(dbg, bo, flags); 1056 1057 void *map; 1058 1059 if (can_map_cpu(bo, flags)) 1060 map = crocus_bo_map_cpu(dbg, bo, flags); 1061 else 1062 map = crocus_bo_map_wc(dbg, bo, flags); 1063 1064 /* Allow the attempt to fail by falling back to the GTT where necessary. 1065 * 1066 * Not every buffer can be mmaped directly using the CPU (or WC), for 1067 * example buffers that wrap stolen memory or are imported from other 1068 * devices. For those, we have little choice but to use a GTT mmapping. 1069 * However, if we use a slow GTT mmapping for reads where we expected fast 1070 * access, that order of magnitude difference in throughput will be clearly 1071 * expressed by angry users. 1072 * 1073 * We skip MAP_RAW because we want to avoid map_gtt's fence detiling. 1074 */ 1075 if (!map && !(flags & MAP_RAW)) { 1076 perf_debug(dbg, "Fallback GTT mapping for %s with access flags %x\n", 1077 bo->name, flags); 1078 map = crocus_bo_map_gtt(dbg, bo, flags); 1079 } 1080 1081 return map; 1082} 1083 1084/** Waits for all GPU rendering with the object to have completed. */ 1085void 1086crocus_bo_wait_rendering(struct crocus_bo *bo) 1087{ 1088 /* We require a kernel recent enough for WAIT_IOCTL support. 1089 * See intel_init_bufmgr() 1090 */ 1091 crocus_bo_wait(bo, -1); 1092} 1093 1094/** 1095 * Waits on a BO for the given amount of time. 1096 * 1097 * @bo: buffer object to wait for 1098 * @timeout_ns: amount of time to wait in nanoseconds. 1099 * If value is less than 0, an infinite wait will occur. 1100 * 1101 * Returns 0 if the wait was successful ie. the last batch referencing the 1102 * object has completed within the allotted time. Otherwise some negative return 1103 * value describes the error. Of particular interest is -ETIME when the wait has 1104 * failed to yield the desired result. 1105 * 1106 * Similar to crocus_bo_wait_rendering except a timeout parameter allows 1107 * the operation to give up after a certain amount of time. Another subtle 1108 * difference is the internal locking semantics are different (this variant does 1109 * not hold the lock for the duration of the wait). This makes the wait subject 1110 * to a larger userspace race window. 1111 * 1112 * The implementation shall wait until the object is no longer actively 1113 * referenced within a batch buffer at the time of the call. The wait will 1114 * not guarantee that the buffer is re-issued via another thread, or an flinked 1115 * handle. Userspace must make sure this race does not occur if such precision 1116 * is important. 1117 * 1118 * Note that some kernels have broken the inifite wait for negative values 1119 * promise, upgrade to latest stable kernels if this is the case. 1120 */ 1121int 1122crocus_bo_wait(struct crocus_bo *bo, int64_t timeout_ns) 1123{ 1124 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1125 1126 /* If we know it's idle, don't bother with the kernel round trip */ 1127 if (bo->idle && !bo->external) 1128 return 0; 1129 1130 struct drm_i915_gem_wait wait = { 1131 .bo_handle = bo->gem_handle, 1132 .timeout_ns = timeout_ns, 1133 }; 1134 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1135 if (ret != 0) 1136 return -errno; 1137 1138 bo->idle = true; 1139 1140 return ret; 1141} 1142 1143static void 1144crocus_bufmgr_destroy(struct crocus_bufmgr *bufmgr) 1145{ 1146 simple_mtx_destroy(&bufmgr->lock); 1147 1148 /* Free any cached buffer objects we were going to reuse */ 1149 for (int i = 0; i < bufmgr->num_buckets; i++) { 1150 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 1151 1152 list_for_each_entry_safe(struct crocus_bo, bo, &bucket->head, head) { 1153 list_del(&bo->head); 1154 1155 bo_free(bo); 1156 } 1157 } 1158 1159 /* Close any buffer objects on the dead list. */ 1160 list_for_each_entry_safe(struct crocus_bo, bo, &bufmgr->zombie_list, head) { 1161 list_del(&bo->head); 1162 bo_close(bo); 1163 } 1164 1165 _mesa_hash_table_destroy(bufmgr->name_table, NULL); 1166 _mesa_hash_table_destroy(bufmgr->handle_table, NULL); 1167 1168 close(bufmgr->fd); 1169 1170 free(bufmgr); 1171} 1172 1173static int 1174bo_set_tiling_internal(struct crocus_bo *bo, uint32_t tiling_mode, 1175 uint32_t stride) 1176{ 1177 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1178 struct drm_i915_gem_set_tiling set_tiling; 1179 int ret; 1180 1181 if (bo->global_name == 0 && 1182 tiling_mode == bo->tiling_mode && stride == bo->stride) 1183 return 0; 1184 1185 memset(&set_tiling, 0, sizeof(set_tiling)); 1186 do { 1187 /* set_tiling is slightly broken and overwrites the 1188 * input on the error path, so we have to open code 1189 * drm_ioctl. 1190 */ 1191 set_tiling.handle = bo->gem_handle; 1192 set_tiling.tiling_mode = tiling_mode; 1193 set_tiling.stride = stride; 1194 1195 ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1196 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1197 if (ret == -1) 1198 return -errno; 1199 1200 bo->tiling_mode = set_tiling.tiling_mode; 1201 bo->swizzle_mode = set_tiling.swizzle_mode; 1202 bo->stride = set_tiling.stride; 1203 return 0; 1204} 1205 1206int 1207crocus_bo_get_tiling(struct crocus_bo *bo, uint32_t *tiling_mode, 1208 uint32_t *swizzle_mode) 1209{ 1210 *tiling_mode = bo->tiling_mode; 1211 *swizzle_mode = bo->swizzle_mode; 1212 return 0; 1213} 1214 1215struct crocus_bo * 1216crocus_bo_import_dmabuf(struct crocus_bufmgr *bufmgr, int prime_fd, 1217 uint64_t modifier) 1218{ 1219 uint32_t handle; 1220 struct crocus_bo *bo; 1221 1222 simple_mtx_lock(&bufmgr->lock); 1223 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 1224 if (ret) { 1225 DBG("import_dmabuf: failed to obtain handle from fd: %s\n", 1226 strerror(errno)); 1227 simple_mtx_unlock(&bufmgr->lock); 1228 return NULL; 1229 } 1230 1231 /* 1232 * See if the kernel has already returned this buffer to us. Just as 1233 * for named buffers, we must not create two bo's pointing at the same 1234 * kernel object 1235 */ 1236 bo = find_and_ref_external_bo(bufmgr->handle_table, handle); 1237 if (bo) 1238 goto out; 1239 1240 bo = bo_calloc(); 1241 if (!bo) 1242 goto out; 1243 1244 p_atomic_set(&bo->refcount, 1); 1245 1246 /* Determine size of bo. The fd-to-handle ioctl really should 1247 * return the size, but it doesn't. If we have kernel 3.12 or 1248 * later, we can lseek on the prime fd to get the size. Older 1249 * kernels will just fail, in which case we fall back to the 1250 * provided (estimated or guess size). */ 1251 ret = lseek(prime_fd, 0, SEEK_END); 1252 if (ret != -1) 1253 bo->size = ret; 1254 1255 bo->bufmgr = bufmgr; 1256 bo->name = "prime"; 1257 bo->reusable = false; 1258 bo->external = true; 1259 bo->kflags = 0; 1260 bo->gem_handle = handle; 1261 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1262 1263 const struct isl_drm_modifier_info *mod_info = 1264 isl_drm_modifier_get_info(modifier); 1265 if (mod_info) { 1266 bo->tiling_mode = isl_tiling_to_i915_tiling(mod_info->tiling); 1267 } else if (bufmgr->has_tiling_uapi) { 1268 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 1269 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) 1270 goto err; 1271 1272 bo->tiling_mode = get_tiling.tiling_mode; 1273 } else { 1274 bo->tiling_mode = I915_TILING_NONE; 1275 } 1276 1277out: 1278 simple_mtx_unlock(&bufmgr->lock); 1279 return bo; 1280 1281err: 1282 bo_free(bo); 1283 simple_mtx_unlock(&bufmgr->lock); 1284 return NULL; 1285} 1286 1287struct crocus_bo * 1288crocus_bo_import_dmabuf_no_mods(struct crocus_bufmgr *bufmgr, 1289 int prime_fd) 1290{ 1291 uint32_t handle; 1292 struct crocus_bo *bo; 1293 1294 simple_mtx_lock(&bufmgr->lock); 1295 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 1296 if (ret) { 1297 DBG("import_dmabuf: failed to obtain handle from fd: %s\n", 1298 strerror(errno)); 1299 simple_mtx_unlock(&bufmgr->lock); 1300 return NULL; 1301 } 1302 1303 /* 1304 * See if the kernel has already returned this buffer to us. Just as 1305 * for named buffers, we must not create two bo's pointing at the same 1306 * kernel object 1307 */ 1308 bo = find_and_ref_external_bo(bufmgr->handle_table, handle); 1309 if (bo) 1310 goto out; 1311 1312 bo = bo_calloc(); 1313 if (!bo) 1314 goto out; 1315 1316 p_atomic_set(&bo->refcount, 1); 1317 1318 /* Determine size of bo. The fd-to-handle ioctl really should 1319 * return the size, but it doesn't. If we have kernel 3.12 or 1320 * later, we can lseek on the prime fd to get the size. Older 1321 * kernels will just fail, in which case we fall back to the 1322 * provided (estimated or guess size). */ 1323 ret = lseek(prime_fd, 0, SEEK_END); 1324 if (ret != -1) 1325 bo->size = ret; 1326 1327 bo->bufmgr = bufmgr; 1328 bo->name = "prime"; 1329 bo->reusable = false; 1330 bo->external = true; 1331 bo->kflags = 0; 1332 bo->gem_handle = handle; 1333 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1334 1335out: 1336 simple_mtx_unlock(&bufmgr->lock); 1337 return bo; 1338} 1339 1340static void 1341crocus_bo_make_external_locked(struct crocus_bo *bo) 1342{ 1343 if (!bo->external) { 1344 _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo); 1345 bo->external = true; 1346 bo->reusable = false; 1347 } 1348} 1349 1350static void 1351crocus_bo_make_external(struct crocus_bo *bo) 1352{ 1353 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1354 1355 if (bo->external) { 1356 assert(!bo->reusable); 1357 return; 1358 } 1359 1360 simple_mtx_lock(&bufmgr->lock); 1361 crocus_bo_make_external_locked(bo); 1362 simple_mtx_unlock(&bufmgr->lock); 1363} 1364 1365int 1366crocus_bo_export_dmabuf(struct crocus_bo *bo, int *prime_fd) 1367{ 1368 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1369 1370 crocus_bo_make_external(bo); 1371 1372 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, 1373 DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) 1374 return -errno; 1375 1376 return 0; 1377} 1378 1379uint32_t 1380crocus_bo_export_gem_handle(struct crocus_bo *bo) 1381{ 1382 crocus_bo_make_external(bo); 1383 1384 return bo->gem_handle; 1385} 1386 1387int 1388crocus_bo_flink(struct crocus_bo *bo, uint32_t *name) 1389{ 1390 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1391 1392 if (!bo->global_name) { 1393 struct drm_gem_flink flink = { .handle = bo->gem_handle }; 1394 1395 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) 1396 return -errno; 1397 1398 simple_mtx_lock(&bufmgr->lock); 1399 if (!bo->global_name) { 1400 crocus_bo_make_external_locked(bo); 1401 bo->global_name = flink.name; 1402 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 1403 } 1404 simple_mtx_unlock(&bufmgr->lock); 1405 } 1406 1407 *name = bo->global_name; 1408 return 0; 1409} 1410 1411int 1412crocus_bo_export_gem_handle_for_device(struct crocus_bo *bo, int drm_fd, 1413 uint32_t *out_handle) 1414{ 1415 /* Only add the new GEM handle to the list of export if it belongs to a 1416 * different GEM device. Otherwise we might close the same buffer multiple 1417 * times. 1418 */ 1419 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1420 int ret = os_same_file_description(drm_fd, bufmgr->fd); 1421 WARN_ONCE(ret < 0, 1422 "Kernel has no file descriptor comparison support: %s\n", 1423 strerror(errno)); 1424 if (ret == 0) { 1425 *out_handle = crocus_bo_export_gem_handle(bo); 1426 return 0; 1427 } 1428 1429 struct bo_export *export = calloc(1, sizeof(*export)); 1430 if (!export) 1431 return -ENOMEM; 1432 1433 export->drm_fd = drm_fd; 1434 1435 int dmabuf_fd = -1; 1436 int err = crocus_bo_export_dmabuf(bo, &dmabuf_fd); 1437 if (err) { 1438 free(export); 1439 return err; 1440 } 1441 1442 simple_mtx_lock(&bufmgr->lock); 1443 err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle); 1444 close(dmabuf_fd); 1445 if (err) { 1446 simple_mtx_unlock(&bufmgr->lock); 1447 free(export); 1448 return err; 1449 } 1450 1451 bool found = false; 1452 list_for_each_entry(struct bo_export, iter, &bo->exports, link) { 1453 if (iter->drm_fd != drm_fd) 1454 continue; 1455 /* Here we assume that for a given DRM fd, we'll always get back the 1456 * same GEM handle for a given buffer. 1457 */ 1458 assert(iter->gem_handle == export->gem_handle); 1459 free(export); 1460 export = iter; 1461 found = true; 1462 break; 1463 } 1464 if (!found) 1465 list_addtail(&export->link, &bo->exports); 1466 1467 simple_mtx_unlock(&bufmgr->lock); 1468 1469 *out_handle = export->gem_handle; 1470 1471 return 0; 1472} 1473 1474static void 1475add_bucket(struct crocus_bufmgr *bufmgr, int size) 1476{ 1477 unsigned int i = bufmgr->num_buckets; 1478 1479 assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); 1480 1481 list_inithead(&bufmgr->cache_bucket[i].head); 1482 bufmgr->cache_bucket[i].size = size; 1483 bufmgr->num_buckets++; 1484 1485 assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); 1486 assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); 1487 assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); 1488} 1489 1490static void 1491init_cache_buckets(struct crocus_bufmgr *bufmgr) 1492{ 1493 uint64_t size, cache_max_size = 64 * 1024 * 1024; 1494 1495 /* OK, so power of two buckets was too wasteful of memory. 1496 * Give 3 other sizes between each power of two, to hopefully 1497 * cover things accurately enough. (The alternative is 1498 * probably to just go for exact matching of sizes, and assume 1499 * that for things like composited window resize the tiled 1500 * width/height alignment and rounding of sizes to pages will 1501 * get us useful cache hit rates anyway) 1502 */ 1503 add_bucket(bufmgr, PAGE_SIZE); 1504 add_bucket(bufmgr, PAGE_SIZE * 2); 1505 add_bucket(bufmgr, PAGE_SIZE * 3); 1506 1507 /* Initialize the linked lists for BO reuse cache. */ 1508 for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) { 1509 add_bucket(bufmgr, size); 1510 1511 add_bucket(bufmgr, size + size * 1 / 4); 1512 add_bucket(bufmgr, size + size * 2 / 4); 1513 add_bucket(bufmgr, size + size * 3 / 4); 1514 } 1515} 1516 1517uint32_t 1518crocus_create_hw_context(struct crocus_bufmgr *bufmgr) 1519{ 1520 struct drm_i915_gem_context_create create = { }; 1521 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 1522 if (ret != 0) { 1523 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); 1524 return 0; 1525 } 1526 1527 /* Upon declaring a GPU hang, the kernel will zap the guilty context 1528 * back to the default logical HW state and attempt to continue on to 1529 * our next submitted batchbuffer. However, our render batches assume 1530 * the previous GPU state is preserved, and only emit commands needed 1531 * to incrementally change that state. In particular, we inherit the 1532 * STATE_BASE_ADDRESS and PIPELINE_SELECT settings, which are critical. 1533 * With default base addresses, our next batches will almost certainly 1534 * cause more GPU hangs, leading to repeated hangs until we're banned 1535 * or the machine is dead. 1536 * 1537 * Here we tell the kernel not to attempt to recover our context but 1538 * immediately (on the next batchbuffer submission) report that the 1539 * context is lost, and we will do the recovery ourselves. Ideally, 1540 * we'll have two lost batches instead of a continual stream of hangs. 1541 */ 1542 struct drm_i915_gem_context_param p = { 1543 .ctx_id = create.ctx_id, 1544 .param = I915_CONTEXT_PARAM_RECOVERABLE, 1545 .value = false, 1546 }; 1547 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p); 1548 1549 return create.ctx_id; 1550} 1551 1552static int 1553crocus_hw_context_get_priority(struct crocus_bufmgr *bufmgr, uint32_t ctx_id) 1554{ 1555 struct drm_i915_gem_context_param p = { 1556 .ctx_id = ctx_id, 1557 .param = I915_CONTEXT_PARAM_PRIORITY, 1558 }; 1559 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p); 1560 return p.value; /* on error, return 0 i.e. default priority */ 1561} 1562 1563int 1564crocus_hw_context_set_priority(struct crocus_bufmgr *bufmgr, 1565 uint32_t ctx_id, 1566 int priority) 1567{ 1568 struct drm_i915_gem_context_param p = { 1569 .ctx_id = ctx_id, 1570 .param = I915_CONTEXT_PARAM_PRIORITY, 1571 .value = priority, 1572 }; 1573 int err; 1574 1575 err = 0; 1576 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) 1577 err = -errno; 1578 1579 return err; 1580} 1581 1582uint32_t 1583crocus_clone_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id) 1584{ 1585 uint32_t new_ctx = crocus_create_hw_context(bufmgr); 1586 1587 if (new_ctx) { 1588 int priority = crocus_hw_context_get_priority(bufmgr, ctx_id); 1589 crocus_hw_context_set_priority(bufmgr, new_ctx, priority); 1590 } 1591 1592 return new_ctx; 1593} 1594 1595void 1596crocus_destroy_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id) 1597{ 1598 struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; 1599 1600 if (ctx_id != 0 && 1601 intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { 1602 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 1603 strerror(errno)); 1604 } 1605} 1606 1607int 1608crocus_reg_read(struct crocus_bufmgr *bufmgr, uint32_t offset, uint64_t *result) 1609{ 1610 struct drm_i915_reg_read reg_read = { .offset = offset }; 1611 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); 1612 1613 *result = reg_read.val; 1614 return ret; 1615} 1616 1617static int 1618gem_param(int fd, int name) 1619{ 1620 int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */ 1621 1622 struct drm_i915_getparam gp = { .param = name, .value = &v }; 1623 if (intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 1624 return -1; 1625 1626 return v; 1627} 1628 1629/** 1630 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1631 * and manage map buffer objections. 1632 * 1633 * \param fd File descriptor of the opened DRM device. 1634 */ 1635static struct crocus_bufmgr * 1636crocus_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) 1637{ 1638 struct crocus_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr)); 1639 if (bufmgr == NULL) 1640 return NULL; 1641 1642 /* Handles to buffer objects belong to the device fd and are not 1643 * reference counted by the kernel. If the same fd is used by 1644 * multiple parties (threads sharing the same screen bufmgr, or 1645 * even worse the same device fd passed to multiple libraries) 1646 * ownership of those handles is shared by those independent parties. 1647 * 1648 * Don't do this! Ensure that each library/bufmgr has its own device 1649 * fd so that its namespace does not clash with another. 1650 */ 1651 bufmgr->fd = os_dupfd_cloexec(fd); 1652 1653 p_atomic_set(&bufmgr->refcount, 1); 1654 1655 simple_mtx_init(&bufmgr->lock, mtx_plain); 1656 1657 list_inithead(&bufmgr->zombie_list); 1658 1659 bufmgr->has_llc = devinfo->has_llc; 1660 bufmgr->has_tiling_uapi = devinfo->has_tiling_uapi; 1661 bufmgr->bo_reuse = bo_reuse; 1662 bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4; 1663 1664 init_cache_buckets(bufmgr); 1665 1666 bufmgr->name_table = 1667 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1668 bufmgr->handle_table = 1669 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1670 1671 return bufmgr; 1672} 1673 1674static struct crocus_bufmgr * 1675crocus_bufmgr_ref(struct crocus_bufmgr *bufmgr) 1676{ 1677 p_atomic_inc(&bufmgr->refcount); 1678 return bufmgr; 1679} 1680 1681void 1682crocus_bufmgr_unref(struct crocus_bufmgr *bufmgr) 1683{ 1684 simple_mtx_lock(&global_bufmgr_list_mutex); 1685 if (p_atomic_dec_zero(&bufmgr->refcount)) { 1686 list_del(&bufmgr->link); 1687 crocus_bufmgr_destroy(bufmgr); 1688 } 1689 simple_mtx_unlock(&global_bufmgr_list_mutex); 1690} 1691 1692/** 1693 * Gets an already existing GEM buffer manager or create a new one. 1694 * 1695 * \param fd File descriptor of the opened DRM device. 1696 */ 1697struct crocus_bufmgr * 1698crocus_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse) 1699{ 1700 struct stat st; 1701 1702 if (fstat(fd, &st)) 1703 return NULL; 1704 1705 struct crocus_bufmgr *bufmgr = NULL; 1706 1707 simple_mtx_lock(&global_bufmgr_list_mutex); 1708 list_for_each_entry(struct crocus_bufmgr, iter_bufmgr, &global_bufmgr_list, link) { 1709 struct stat iter_st; 1710 if (fstat(iter_bufmgr->fd, &iter_st)) 1711 continue; 1712 1713 if (st.st_rdev == iter_st.st_rdev) { 1714 assert(iter_bufmgr->bo_reuse == bo_reuse); 1715 bufmgr = crocus_bufmgr_ref(iter_bufmgr); 1716 goto unlock; 1717 } 1718 } 1719 1720 bufmgr = crocus_bufmgr_create(devinfo, fd, bo_reuse); 1721 if (bufmgr) 1722 list_addtail(&bufmgr->link, &global_bufmgr_list); 1723 1724 unlock: 1725 simple_mtx_unlock(&global_bufmgr_list_mutex); 1726 1727 return bufmgr; 1728} 1729 1730int 1731crocus_bufmgr_get_fd(struct crocus_bufmgr *bufmgr) 1732{ 1733 return bufmgr->fd; 1734} 1735