1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 #include "radeon_drm_cs.h"
28 
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "os/os_thread.h"
32 #include "os/os_mman.h"
33 #include "util/os_time.h"
34 
35 #include "frontend/drm_driver.h"
36 
37 #include <sys/ioctl.h>
38 #include <xf86drm.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <inttypes.h>
43 
44 static struct pb_buffer *
45 radeon_winsys_bo_create(struct radeon_winsys *rws,
46                         uint64_t size,
47                         unsigned alignment,
48                         enum radeon_bo_domain domain,
49                         enum radeon_bo_flag flags);
50 
radeon_bo(struct pb_buffer *bo)51 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
52 {
53    return (struct radeon_bo *)bo;
54 }
55 
56 struct radeon_bo_va_hole {
57    struct list_head list;
58    uint64_t         offset;
59    uint64_t         size;
60 };
61 
radeon_real_bo_is_busy(struct radeon_bo *bo)62 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
63 {
64    struct drm_radeon_gem_busy args = {0};
65 
66    args.handle = bo->handle;
67    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
68                               &args, sizeof(args)) != 0;
69 }
70 
radeon_bo_is_busy(struct radeon_bo *bo)71 static bool radeon_bo_is_busy(struct radeon_bo *bo)
72 {
73    unsigned num_idle;
74    bool busy = false;
75 
76    if (bo->handle)
77       return radeon_real_bo_is_busy(bo);
78 
79    mtx_lock(&bo->rws->bo_fence_lock);
80    for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
81       if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
82          busy = true;
83          break;
84       }
85       radeon_ws_bo_reference(&bo->u.slab.fences[num_idle], NULL);
86    }
87    memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
88          (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
89    bo->u.slab.num_fences -= num_idle;
90    mtx_unlock(&bo->rws->bo_fence_lock);
91 
92    return busy;
93 }
94 
radeon_real_bo_wait_idle(struct radeon_bo *bo)95 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
96 {
97    struct drm_radeon_gem_wait_idle args = {0};
98 
99    args.handle = bo->handle;
100    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
101                           &args, sizeof(args)) == -EBUSY);
102 }
103 
radeon_bo_wait_idle(struct radeon_bo *bo)104 static void radeon_bo_wait_idle(struct radeon_bo *bo)
105 {
106    if (bo->handle) {
107       radeon_real_bo_wait_idle(bo);
108    } else {
109       mtx_lock(&bo->rws->bo_fence_lock);
110       while (bo->u.slab.num_fences) {
111          struct radeon_bo *fence = NULL;
112          radeon_ws_bo_reference(&fence, bo->u.slab.fences[0]);
113          mtx_unlock(&bo->rws->bo_fence_lock);
114 
115          /* Wait without holding the fence lock. */
116          radeon_real_bo_wait_idle(fence);
117 
118          mtx_lock(&bo->rws->bo_fence_lock);
119          if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
120             radeon_ws_bo_reference(&bo->u.slab.fences[0], NULL);
121             memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
122                   (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
123             bo->u.slab.num_fences--;
124          }
125          radeon_ws_bo_reference(&fence, NULL);
126       }
127       mtx_unlock(&bo->rws->bo_fence_lock);
128    }
129 }
130 
radeon_bo_wait(struct radeon_winsys *rws, struct pb_buffer *_buf, uint64_t timeout, unsigned usage)131 static bool radeon_bo_wait(struct radeon_winsys *rws,
132                            struct pb_buffer *_buf, uint64_t timeout,
133                            unsigned usage)
134 {
135    struct radeon_bo *bo = radeon_bo(_buf);
136    int64_t abs_timeout;
137 
138    /* No timeout. Just query. */
139    if (timeout == 0)
140       return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
141 
142    abs_timeout = os_time_get_absolute_timeout(timeout);
143 
144    /* Wait if any ioctl is being submitted with this buffer. */
145    if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
146       return false;
147 
148    /* Infinite timeout. */
149    if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
150       radeon_bo_wait_idle(bo);
151       return true;
152    }
153 
154    /* Other timeouts need to be emulated with a loop. */
155    while (radeon_bo_is_busy(bo)) {
156       if (os_time_get_nano() >= abs_timeout)
157          return false;
158       os_time_sleep(10);
159    }
160 
161    return true;
162 }
163 
get_valid_domain(enum radeon_bo_domain domain)164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
165 {
166    /* Zero domains the driver doesn't understand. */
167    domain &= RADEON_DOMAIN_VRAM_GTT;
168 
169    /* If no domain is set, we must set something... */
170    if (!domain)
171       domain = RADEON_DOMAIN_VRAM_GTT;
172 
173    return domain;
174 }
175 
radeon_bo_get_initial_domain( struct pb_buffer *buf)176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
177       struct pb_buffer *buf)
178 {
179    struct radeon_bo *bo = (struct radeon_bo*)buf;
180    struct drm_radeon_gem_op args;
181 
182    memset(&args, 0, sizeof(args));
183    args.handle = bo->handle;
184    args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
185 
186    if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
187                            &args, sizeof(args))) {
188       fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
189               bo, bo->handle);
190       /* Default domain as returned by get_valid_domain. */
191       return RADEON_DOMAIN_VRAM_GTT;
192    }
193 
194    /* GEM domains and winsys domains are defined the same. */
195    return get_valid_domain(args.value);
196 }
197 
radeon_bomgr_find_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t size, uint64_t alignment)198 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
199                                      struct radeon_vm_heap *heap,
200                                      uint64_t size, uint64_t alignment)
201 {
202    struct radeon_bo_va_hole *hole, *n;
203    uint64_t offset = 0, waste = 0;
204 
205    /* All VM address space holes will implicitly start aligned to the
206     * size alignment, so we don't need to sanitize the alignment here
207     */
208    size = align(size, info->gart_page_size);
209 
210    mtx_lock(&heap->mutex);
211    /* first look for a hole */
212    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
213       offset = hole->offset;
214       waste = offset % alignment;
215       waste = waste ? alignment - waste : 0;
216       offset += waste;
217       if (offset >= (hole->offset + hole->size)) {
218          continue;
219       }
220       if (!waste && hole->size == size) {
221          offset = hole->offset;
222          list_del(&hole->list);
223          FREE(hole);
224          mtx_unlock(&heap->mutex);
225          return offset;
226       }
227       if ((hole->size - waste) > size) {
228          if (waste) {
229             n = CALLOC_STRUCT(radeon_bo_va_hole);
230             n->size = waste;
231             n->offset = hole->offset;
232             list_add(&n->list, &hole->list);
233          }
234          hole->size -= (size + waste);
235          hole->offset += size + waste;
236          mtx_unlock(&heap->mutex);
237          return offset;
238       }
239       if ((hole->size - waste) == size) {
240          hole->size = waste;
241          mtx_unlock(&heap->mutex);
242          return offset;
243       }
244    }
245 
246    offset = heap->start;
247    waste = offset % alignment;
248    waste = waste ? alignment - waste : 0;
249 
250    if (offset + waste + size > heap->end) {
251       mtx_unlock(&heap->mutex);
252       return 0;
253    }
254 
255    if (waste) {
256       n = CALLOC_STRUCT(radeon_bo_va_hole);
257       n->size = waste;
258       n->offset = offset;
259       list_add(&n->list, &heap->holes);
260    }
261    offset += waste;
262    heap->start += size + waste;
263    mtx_unlock(&heap->mutex);
264    return offset;
265 }
266 
radeon_bomgr_find_va64(struct radeon_drm_winsys *ws, uint64_t size, uint64_t alignment)267 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
268                                        uint64_t size, uint64_t alignment)
269 {
270    uint64_t va = 0;
271 
272    /* Try to allocate from the 64-bit address space first.
273     * If it doesn't exist (start = 0) or if it doesn't have enough space,
274     * fall back to the 32-bit address space.
275     */
276    if (ws->vm64.start)
277       va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
278    if (!va)
279       va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
280    return va;
281 }
282 
radeon_bomgr_free_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t va, uint64_t size)283 static void radeon_bomgr_free_va(const struct radeon_info *info,
284                                  struct radeon_vm_heap *heap,
285                                  uint64_t va, uint64_t size)
286 {
287    struct radeon_bo_va_hole *hole = NULL;
288 
289    size = align(size, info->gart_page_size);
290 
291    mtx_lock(&heap->mutex);
292    if ((va + size) == heap->start) {
293       heap->start = va;
294       /* Delete uppermost hole if it reaches the new top */
295       if (!list_is_empty(&heap->holes)) {
296          hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
297          if ((hole->offset + hole->size) == va) {
298             heap->start = hole->offset;
299             list_del(&hole->list);
300             FREE(hole);
301          }
302       }
303    } else {
304       struct radeon_bo_va_hole *next;
305 
306       hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
307       LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
308          if (next->offset < va)
309             break;
310          hole = next;
311       }
312 
313       if (&hole->list != &heap->holes) {
314          /* Grow upper hole if it's adjacent */
315          if (hole->offset == (va + size)) {
316             hole->offset = va;
317             hole->size += size;
318             /* Merge lower hole if it's adjacent */
319             if (next != hole && &next->list != &heap->holes &&
320                 (next->offset + next->size) == va) {
321                next->size += hole->size;
322                list_del(&hole->list);
323                FREE(hole);
324             }
325             goto out;
326          }
327       }
328 
329       /* Grow lower hole if it's adjacent */
330       if (next != hole && &next->list != &heap->holes &&
331           (next->offset + next->size) == va) {
332          next->size += size;
333          goto out;
334       }
335 
336       /* FIXME on allocation failure we just lose virtual address space
337        * maybe print a warning
338        */
339       next = CALLOC_STRUCT(radeon_bo_va_hole);
340       if (next) {
341          next->size = size;
342          next->offset = va;
343          list_add(&next->list, &hole->list);
344       }
345    }
346 out:
347    mtx_unlock(&heap->mutex);
348 }
349 
radeon_bo_destroy(void *winsys, struct pb_buffer *_buf)350 void radeon_bo_destroy(void *winsys, struct pb_buffer *_buf)
351 {
352    struct radeon_bo *bo = radeon_bo(_buf);
353    struct radeon_drm_winsys *rws = bo->rws;
354    struct drm_gem_close args;
355 
356    assert(bo->handle && "must not be called for slab entries");
357 
358    memset(&args, 0, sizeof(args));
359 
360    mtx_lock(&rws->bo_handles_mutex);
361    _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
362    if (bo->flink_name) {
363       _mesa_hash_table_remove_key(rws->bo_names,
364                                   (void*)(uintptr_t)bo->flink_name);
365    }
366    mtx_unlock(&rws->bo_handles_mutex);
367 
368    if (bo->u.real.ptr)
369       os_munmap(bo->u.real.ptr, bo->base.size);
370 
371    if (rws->info.r600_has_virtual_memory) {
372       if (rws->va_unmap_working) {
373          struct drm_radeon_gem_va va;
374 
375          va.handle = bo->handle;
376          va.vm_id = 0;
377          va.operation = RADEON_VA_UNMAP;
378          va.flags = RADEON_VM_PAGE_READABLE |
379                     RADEON_VM_PAGE_WRITEABLE |
380                     RADEON_VM_PAGE_SNOOPED;
381          va.offset = bo->va;
382 
383          if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
384                                  sizeof(va)) != 0 &&
385              va.operation == RADEON_VA_RESULT_ERROR) {
386             fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
387             fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
388             fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
389          }
390       }
391 
392       radeon_bomgr_free_va(&rws->info,
393                            bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
394                            bo->va, bo->base.size);
395    }
396 
397    /* Close object. */
398    args.handle = bo->handle;
399    drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
400 
401    mtx_destroy(&bo->u.real.map_mutex);
402 
403    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
404       rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
405    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
406       rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
407 
408    if (bo->u.real.map_count >= 1) {
409       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
410          bo->rws->mapped_vram -= bo->base.size;
411       else
412          bo->rws->mapped_gtt -= bo->base.size;
413       bo->rws->num_mapped_buffers--;
414    }
415 
416    FREE(bo);
417 }
418 
radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer *_buf)419 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer *_buf)
420 {
421    struct radeon_bo *bo = radeon_bo(_buf);
422 
423    assert(bo->handle && "must not be called for slab entries");
424 
425    if (bo->u.real.use_reusable_pool)
426       pb_cache_add_buffer(&bo->u.real.cache_entry);
427    else
428       radeon_bo_destroy(NULL, _buf);
429 }
430 
radeon_bo_do_map(struct radeon_bo *bo)431 void *radeon_bo_do_map(struct radeon_bo *bo)
432 {
433    struct drm_radeon_gem_mmap args = {0};
434    void *ptr;
435    unsigned offset;
436 
437    /* If the buffer is created from user memory, return the user pointer. */
438    if (bo->user_ptr)
439       return bo->user_ptr;
440 
441    if (bo->handle) {
442       offset = 0;
443    } else {
444       offset = bo->va - bo->u.slab.real->va;
445       bo = bo->u.slab.real;
446    }
447 
448    /* Map the buffer. */
449    mtx_lock(&bo->u.real.map_mutex);
450    /* Return the pointer if it's already mapped. */
451    if (bo->u.real.ptr) {
452       bo->u.real.map_count++;
453       mtx_unlock(&bo->u.real.map_mutex);
454       return (uint8_t*)bo->u.real.ptr + offset;
455    }
456    args.handle = bo->handle;
457    args.offset = 0;
458    args.size = (uint64_t)bo->base.size;
459    if (drmCommandWriteRead(bo->rws->fd,
460                            DRM_RADEON_GEM_MMAP,
461                            &args,
462                            sizeof(args))) {
463       mtx_unlock(&bo->u.real.map_mutex);
464       fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
465               bo, bo->handle);
466       return NULL;
467    }
468 
469    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
470                  bo->rws->fd, args.addr_ptr);
471    if (ptr == MAP_FAILED) {
472       /* Clear the cache and try again. */
473       pb_cache_release_all_buffers(&bo->rws->bo_cache);
474 
475       ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
476                     bo->rws->fd, args.addr_ptr);
477       if (ptr == MAP_FAILED) {
478          mtx_unlock(&bo->u.real.map_mutex);
479          fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
480          return NULL;
481       }
482    }
483    bo->u.real.ptr = ptr;
484    bo->u.real.map_count = 1;
485 
486    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
487       bo->rws->mapped_vram += bo->base.size;
488    else
489       bo->rws->mapped_gtt += bo->base.size;
490    bo->rws->num_mapped_buffers++;
491 
492    mtx_unlock(&bo->u.real.map_mutex);
493    return (uint8_t*)bo->u.real.ptr + offset;
494 }
495 
radeon_bo_map(struct radeon_winsys *rws, struct pb_buffer *buf, struct radeon_cmdbuf *rcs, enum pipe_map_flags usage)496 static void *radeon_bo_map(struct radeon_winsys *rws,
497                            struct pb_buffer *buf,
498                            struct radeon_cmdbuf *rcs,
499                            enum pipe_map_flags usage)
500 {
501    struct radeon_bo *bo = (struct radeon_bo*)buf;
502    struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
503 
504    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
505    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
506       /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
507       if (usage & PIPE_MAP_DONTBLOCK) {
508          if (!(usage & PIPE_MAP_WRITE)) {
509             /* Mapping for read.
510              *
511              * Since we are mapping for read, we don't need to wait
512              * if the GPU is using the buffer for read too
513              * (neither one is changing it).
514              *
515              * Only check whether the buffer is being used for write. */
516             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
517                cs->flush_cs(cs->flush_data,
518                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
519                return NULL;
520             }
521 
522             if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
523                                 RADEON_USAGE_WRITE)) {
524                return NULL;
525             }
526          } else {
527             if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
528                cs->flush_cs(cs->flush_data,
529                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
530                return NULL;
531             }
532 
533             if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
534                                 RADEON_USAGE_READWRITE)) {
535                return NULL;
536             }
537          }
538       } else {
539          uint64_t time = os_time_get_nano();
540 
541          if (!(usage & PIPE_MAP_WRITE)) {
542             /* Mapping for read.
543              *
544              * Since we are mapping for read, we don't need to wait
545              * if the GPU is using the buffer for read too
546              * (neither one is changing it).
547              *
548              * Only check whether the buffer is being used for write. */
549             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
550                cs->flush_cs(cs->flush_data,
551                             RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
552             }
553             radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
554                            RADEON_USAGE_WRITE);
555          } else {
556             /* Mapping for write. */
557             if (cs) {
558                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
559                   cs->flush_cs(cs->flush_data,
560                                RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
561                } else {
562                   /* Try to avoid busy-waiting in radeon_bo_wait. */
563                   if (p_atomic_read(&bo->num_active_ioctls))
564                      radeon_drm_cs_sync_flush(rcs);
565                }
566             }
567 
568             radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
569                            RADEON_USAGE_READWRITE);
570          }
571 
572          bo->rws->buffer_wait_time += os_time_get_nano() - time;
573       }
574    }
575 
576    return radeon_bo_do_map(bo);
577 }
578 
radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *_buf)579 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *_buf)
580 {
581    struct radeon_bo *bo = (struct radeon_bo*)_buf;
582 
583    if (bo->user_ptr)
584       return;
585 
586    if (!bo->handle)
587       bo = bo->u.slab.real;
588 
589    mtx_lock(&bo->u.real.map_mutex);
590    if (!bo->u.real.ptr) {
591       mtx_unlock(&bo->u.real.map_mutex);
592       return; /* it's not been mapped */
593    }
594 
595    assert(bo->u.real.map_count);
596    if (--bo->u.real.map_count) {
597       mtx_unlock(&bo->u.real.map_mutex);
598       return; /* it's been mapped multiple times */
599    }
600 
601    os_munmap(bo->u.real.ptr, bo->base.size);
602    bo->u.real.ptr = NULL;
603 
604    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
605       bo->rws->mapped_vram -= bo->base.size;
606    else
607       bo->rws->mapped_gtt -= bo->base.size;
608    bo->rws->num_mapped_buffers--;
609 
610    mtx_unlock(&bo->u.real.map_mutex);
611 }
612 
613 static const struct pb_vtbl radeon_bo_vtbl = {
614    radeon_bo_destroy_or_cache
615    /* other functions are never called */
616 };
617 
radeon_create_bo(struct radeon_drm_winsys *rws, unsigned size, unsigned alignment, unsigned initial_domains, unsigned flags, int heap)618 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
619                                           unsigned size, unsigned alignment,
620                                           unsigned initial_domains,
621                                           unsigned flags,
622                                           int heap)
623 {
624    struct radeon_bo *bo;
625    struct drm_radeon_gem_create args;
626    int r;
627 
628    memset(&args, 0, sizeof(args));
629 
630    assert(initial_domains);
631    assert((initial_domains &
632            ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
633 
634    args.size = size;
635    args.alignment = alignment;
636    args.initial_domain = initial_domains;
637    args.flags = 0;
638 
639    /* If VRAM is just stolen system memory, allow both VRAM and
640     * GTT, whichever has free space. If a buffer is evicted from
641     * VRAM to GTT, it will stay there.
642     */
643    if (!rws->info.has_dedicated_vram)
644       args.initial_domain |= RADEON_DOMAIN_GTT;
645 
646    if (flags & RADEON_FLAG_GTT_WC)
647       args.flags |= RADEON_GEM_GTT_WC;
648    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
649       args.flags |= RADEON_GEM_NO_CPU_ACCESS;
650 
651    if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
652                            &args, sizeof(args))) {
653       fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
654       fprintf(stderr, "radeon:    size      : %u bytes\n", size);
655       fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
656       fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
657       fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
658       return NULL;
659    }
660 
661    assert(args.handle != 0);
662 
663    bo = CALLOC_STRUCT(radeon_bo);
664    if (!bo)
665       return NULL;
666 
667    pipe_reference_init(&bo->base.reference, 1);
668    bo->base.alignment_log2 = util_logbase2(alignment);
669    bo->base.usage = 0;
670    bo->base.size = size;
671    bo->base.vtbl = &radeon_bo_vtbl;
672    bo->rws = rws;
673    bo->handle = args.handle;
674    bo->va = 0;
675    bo->initial_domain = initial_domains;
676    bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
677    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
678 
679    if (heap >= 0) {
680       pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
681                           heap);
682    }
683 
684    if (rws->info.r600_has_virtual_memory) {
685       struct drm_radeon_gem_va va;
686       unsigned va_gap_size;
687 
688       va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
689 
690       if (flags & RADEON_FLAG_32BIT) {
691          bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
692                                        size + va_gap_size, alignment);
693          assert(bo->va + size < rws->vm32.end);
694       } else {
695          bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
696       }
697 
698       va.handle = bo->handle;
699       va.vm_id = 0;
700       va.operation = RADEON_VA_MAP;
701       va.flags = RADEON_VM_PAGE_READABLE |
702                  RADEON_VM_PAGE_WRITEABLE |
703                  RADEON_VM_PAGE_SNOOPED;
704       va.offset = bo->va;
705       r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
706       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
707          fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
708          fprintf(stderr, "radeon:    size      : %d bytes\n", size);
709          fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
710          fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
711          fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
712          radeon_bo_destroy(NULL, &bo->base);
713          return NULL;
714       }
715       mtx_lock(&rws->bo_handles_mutex);
716       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
717          struct pb_buffer *b = &bo->base;
718          struct radeon_bo *old_bo =
719                _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
720 
721          mtx_unlock(&rws->bo_handles_mutex);
722          pb_reference(&b, &old_bo->base);
723          return radeon_bo(b);
724       }
725 
726       _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
727       mtx_unlock(&rws->bo_handles_mutex);
728    }
729 
730    if (initial_domains & RADEON_DOMAIN_VRAM)
731       rws->allocated_vram += align(size, rws->info.gart_page_size);
732    else if (initial_domains & RADEON_DOMAIN_GTT)
733       rws->allocated_gtt += align(size, rws->info.gart_page_size);
734 
735    return bo;
736 }
737 
radeon_bo_can_reclaim(void *winsys, struct pb_buffer *_buf)738 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer *_buf)
739 {
740    struct radeon_bo *bo = radeon_bo(_buf);
741 
742    if (radeon_bo_is_referenced_by_any_cs(bo))
743       return false;
744 
745    return radeon_bo_wait(winsys, _buf, 0, RADEON_USAGE_READWRITE);
746 }
747 
radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)748 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
749 {
750    struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
751 
752    return radeon_bo_can_reclaim(NULL, &bo->base);
753 }
754 
radeon_bo_slab_destroy(void *winsys, struct pb_buffer *_buf)755 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer *_buf)
756 {
757    struct radeon_bo *bo = radeon_bo(_buf);
758 
759    assert(!bo->handle);
760 
761    pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
762 }
763 
764 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
765    radeon_bo_slab_destroy
766    /* other functions are never called */
767 };
768 
radeon_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)769 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
770                                      unsigned entry_size,
771                                      unsigned group_index)
772 {
773    struct radeon_drm_winsys *ws = priv;
774    struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
775    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
776    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
777    unsigned base_hash;
778 
779    if (!slab)
780       return NULL;
781 
782    slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
783                                                     64 * 1024, 64 * 1024,
784                                                     domains, flags));
785    if (!slab->buffer)
786       goto fail;
787 
788    assert(slab->buffer->handle);
789 
790    slab->base.num_entries = slab->buffer->base.size / entry_size;
791    slab->base.num_free = slab->base.num_entries;
792    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
793    if (!slab->entries)
794       goto fail_buffer;
795 
796    list_inithead(&slab->base.free);
797 
798    base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
799 
800    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
801       struct radeon_bo *bo = &slab->entries[i];
802 
803       bo->base.alignment_log2 = util_logbase2(entry_size);
804       bo->base.usage = slab->buffer->base.usage;
805       bo->base.size = entry_size;
806       bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
807       bo->rws = ws;
808       bo->va = slab->buffer->va + i * entry_size;
809       bo->initial_domain = domains;
810       bo->hash = base_hash + i;
811       bo->u.slab.entry.slab = &slab->base;
812       bo->u.slab.entry.group_index = group_index;
813       bo->u.slab.entry.entry_size = entry_size;
814       bo->u.slab.real = slab->buffer;
815 
816       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
817    }
818 
819    return &slab->base;
820 
821 fail_buffer:
822    radeon_ws_bo_reference(&slab->buffer, NULL);
823 fail:
824    FREE(slab);
825    return NULL;
826 }
827 
radeon_bo_slab_free(void *priv, struct pb_slab *pslab)828 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
829 {
830    struct radeon_slab *slab = (struct radeon_slab *)pslab;
831 
832    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
833       struct radeon_bo *bo = &slab->entries[i];
834       for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
835          radeon_ws_bo_reference(&bo->u.slab.fences[j], NULL);
836       FREE(bo->u.slab.fences);
837    }
838 
839    FREE(slab->entries);
840    radeon_ws_bo_reference(&slab->buffer, NULL);
841    FREE(slab);
842 }
843 
eg_tile_split(unsigned tile_split)844 static unsigned eg_tile_split(unsigned tile_split)
845 {
846    switch (tile_split) {
847    case 0:     tile_split = 64;    break;
848    case 1:     tile_split = 128;   break;
849    case 2:     tile_split = 256;   break;
850    case 3:     tile_split = 512;   break;
851    default:
852    case 4:     tile_split = 1024;  break;
853    case 5:     tile_split = 2048;  break;
854    case 6:     tile_split = 4096;  break;
855    }
856    return tile_split;
857 }
858 
eg_tile_split_rev(unsigned eg_tile_split)859 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
860 {
861    switch (eg_tile_split) {
862    case 64:    return 0;
863    case 128:   return 1;
864    case 256:   return 2;
865    case 512:   return 3;
866    default:
867    case 1024:  return 4;
868    case 2048:  return 5;
869    case 4096:  return 6;
870    }
871 }
872 
radeon_bo_get_metadata(struct radeon_winsys *rws, struct pb_buffer *_buf, struct radeon_bo_metadata *md, struct radeon_surf *surf)873 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
874                                    struct pb_buffer *_buf,
875                                    struct radeon_bo_metadata *md,
876                                    struct radeon_surf *surf)
877 {
878    struct radeon_bo *bo = radeon_bo(_buf);
879    struct drm_radeon_gem_set_tiling args;
880 
881    assert(bo->handle && "must not be called for slab entries");
882 
883    memset(&args, 0, sizeof(args));
884 
885    args.handle = bo->handle;
886 
887    drmCommandWriteRead(bo->rws->fd,
888                        DRM_RADEON_GEM_GET_TILING,
889                        &args,
890                        sizeof(args));
891 
892    if (surf) {
893       if (args.tiling_flags & RADEON_TILING_MACRO)
894          md->mode = RADEON_SURF_MODE_2D;
895       else if (args.tiling_flags & RADEON_TILING_MICRO)
896          md->mode = RADEON_SURF_MODE_1D;
897       else
898          md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
899 
900       surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
901       surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
902       surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
903       surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
904       surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
905 
906       if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
907          surf->flags |= RADEON_SURF_SCANOUT;
908       else
909          surf->flags &= ~RADEON_SURF_SCANOUT;
910       return;
911    }
912 
913    md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
914    md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
915    if (args.tiling_flags & RADEON_TILING_MICRO)
916       md->u.legacy.microtile = RADEON_LAYOUT_TILED;
917    else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
918       md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
919 
920    if (args.tiling_flags & RADEON_TILING_MACRO)
921       md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
922 
923    md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
924    md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
925    md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
926    md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
927    md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
928    md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
929 }
930 
radeon_bo_set_metadata(struct radeon_winsys *rws, struct pb_buffer *_buf, struct radeon_bo_metadata *md, struct radeon_surf *surf)931 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
932                                    struct pb_buffer *_buf,
933                                    struct radeon_bo_metadata *md,
934                                    struct radeon_surf *surf)
935 {
936    struct radeon_bo *bo = radeon_bo(_buf);
937    struct drm_radeon_gem_set_tiling args;
938 
939    assert(bo->handle && "must not be called for slab entries");
940 
941    memset(&args, 0, sizeof(args));
942 
943    os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
944 
945    if (surf) {
946       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
947          args.tiling_flags |= RADEON_TILING_MICRO;
948       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
949          args.tiling_flags |= RADEON_TILING_MACRO;
950 
951       args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
952                            RADEON_TILING_EG_BANKW_SHIFT;
953       args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
954                            RADEON_TILING_EG_BANKH_SHIFT;
955       if (surf->u.legacy.tile_split) {
956          args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
957                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
958                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
959       }
960       args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
961                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
962 
963       if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
964          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
965 
966       args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
967    } else {
968       if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
969          args.tiling_flags |= RADEON_TILING_MICRO;
970       else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
971          args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
972 
973       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
974          args.tiling_flags |= RADEON_TILING_MACRO;
975 
976       args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
977                            RADEON_TILING_EG_BANKW_SHIFT;
978       args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
979                            RADEON_TILING_EG_BANKH_SHIFT;
980       if (md->u.legacy.tile_split) {
981          args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
982                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
983                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
984       }
985       args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
986                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
987 
988       if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
989          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
990 
991       args.pitch = md->u.legacy.stride;
992    }
993 
994    args.handle = bo->handle;
995 
996    drmCommandWriteRead(bo->rws->fd,
997                        DRM_RADEON_GEM_SET_TILING,
998                        &args,
999                        sizeof(args));
1000 }
1001 
1002 static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys *rws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags)1003 radeon_winsys_bo_create(struct radeon_winsys *rws,
1004                         uint64_t size,
1005                         unsigned alignment,
1006                         enum radeon_bo_domain domain,
1007                         enum radeon_bo_flag flags)
1008 {
1009    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1010    struct radeon_bo *bo;
1011 
1012    radeon_canonicalize_bo_flags(&domain, &flags);
1013 
1014    assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
1015 
1016    /* Only 32-bit sizes are supported. */
1017    if (size > UINT_MAX)
1018       return NULL;
1019 
1020    int heap = radeon_get_heap_index(domain, flags);
1021 
1022    /* Sub-allocate small buffers from slabs. */
1023    if (heap >= 0 &&
1024        size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1025        ws->info.r600_has_virtual_memory &&
1026        alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1027       struct pb_slab_entry *entry;
1028 
1029       entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1030       if (!entry) {
1031          /* Clear the cache and try again. */
1032          pb_cache_release_all_buffers(&ws->bo_cache);
1033 
1034          entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1035       }
1036       if (!entry)
1037          return NULL;
1038 
1039       bo = container_of(entry, struct radeon_bo, u.slab.entry);
1040 
1041       pipe_reference_init(&bo->base.reference, 1);
1042 
1043       return &bo->base;
1044    }
1045 
1046    /* Align size to page size. This is the minimum alignment for normal
1047     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1048     * like constant/uniform buffers, can benefit from better and more reuse.
1049     */
1050    size = align(size, ws->info.gart_page_size);
1051    alignment = align(alignment, ws->info.gart_page_size);
1052 
1053    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
1054                             !(flags & RADEON_FLAG_DISCARDABLE);
1055 
1056    /* Shared resources don't use cached heaps. */
1057    if (use_reusable_pool) {
1058       /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
1059       heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
1060       assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
1061 
1062       bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1063                                              0, heap));
1064       if (bo)
1065          return &bo->base;
1066    }
1067 
1068    bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1069    if (!bo) {
1070       /* Clear the cache and try again. */
1071       if (ws->info.r600_has_virtual_memory)
1072          pb_slabs_reclaim(&ws->bo_slabs);
1073       pb_cache_release_all_buffers(&ws->bo_cache);
1074       bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1075       if (!bo)
1076          return NULL;
1077    }
1078 
1079    bo->u.real.use_reusable_pool = use_reusable_pool;
1080 
1081    mtx_lock(&ws->bo_handles_mutex);
1082    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1083    mtx_unlock(&ws->bo_handles_mutex);
1084 
1085    return &bo->base;
1086 }
1087 
radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, void *pointer, uint64_t size, enum radeon_bo_flag flags)1088 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1089                                                    void *pointer, uint64_t size,
1090                                                    enum radeon_bo_flag flags)
1091 {
1092    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1093    struct drm_radeon_gem_userptr args;
1094    struct radeon_bo *bo;
1095    int r;
1096 
1097    bo = CALLOC_STRUCT(radeon_bo);
1098    if (!bo)
1099       return NULL;
1100 
1101    memset(&args, 0, sizeof(args));
1102    args.addr = (uintptr_t)pointer;
1103    args.size = align(size, ws->info.gart_page_size);
1104 
1105    if (flags & RADEON_FLAG_READ_ONLY)
1106       args.flags = RADEON_GEM_USERPTR_READONLY |
1107                    RADEON_GEM_USERPTR_VALIDATE;
1108    else
1109       args.flags = RADEON_GEM_USERPTR_ANONONLY |
1110                    RADEON_GEM_USERPTR_REGISTER |
1111                    RADEON_GEM_USERPTR_VALIDATE;
1112 
1113    if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1114                            &args, sizeof(args))) {
1115       FREE(bo);
1116       return NULL;
1117    }
1118 
1119    assert(args.handle != 0);
1120 
1121    mtx_lock(&ws->bo_handles_mutex);
1122 
1123    /* Initialize it. */
1124    pipe_reference_init(&bo->base.reference, 1);
1125    bo->handle = args.handle;
1126    bo->base.alignment_log2 = 0;
1127    bo->base.size = size;
1128    bo->base.vtbl = &radeon_bo_vtbl;
1129    bo->rws = ws;
1130    bo->user_ptr = pointer;
1131    bo->va = 0;
1132    bo->initial_domain = RADEON_DOMAIN_GTT;
1133    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1134    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1135 
1136    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1137 
1138    mtx_unlock(&ws->bo_handles_mutex);
1139 
1140    if (ws->info.r600_has_virtual_memory) {
1141       struct drm_radeon_gem_va va;
1142 
1143       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1144 
1145       va.handle = bo->handle;
1146       va.operation = RADEON_VA_MAP;
1147       va.vm_id = 0;
1148       va.offset = bo->va;
1149       va.flags = RADEON_VM_PAGE_READABLE |
1150                  RADEON_VM_PAGE_WRITEABLE |
1151                  RADEON_VM_PAGE_SNOOPED;
1152       va.offset = bo->va;
1153       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1154       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1155          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1156          radeon_bo_destroy(NULL, &bo->base);
1157          return NULL;
1158       }
1159       mtx_lock(&ws->bo_handles_mutex);
1160       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1161          struct pb_buffer *b = &bo->base;
1162          struct radeon_bo *old_bo =
1163                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1164 
1165          mtx_unlock(&ws->bo_handles_mutex);
1166          pb_reference(&b, &old_bo->base);
1167          return b;
1168       }
1169 
1170       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1171       mtx_unlock(&ws->bo_handles_mutex);
1172    }
1173 
1174    ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1175 
1176    return (struct pb_buffer*)bo;
1177 }
1178 
radeon_winsys_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, unsigned vm_alignment, bool is_dri_prime_linear_buffer)1179 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1180                                                       struct winsys_handle *whandle,
1181                                                       unsigned vm_alignment,
1182                                                       bool is_dri_prime_linear_buffer)
1183 {
1184    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1185    struct radeon_bo *bo;
1186    int r;
1187    unsigned handle;
1188    uint64_t size = 0;
1189 
1190    /* We must maintain a list of pairs <handle, bo>, so that we always return
1191     * the same BO for one particular handle. If we didn't do that and created
1192     * more than one BO for the same handle and then relocated them in a CS,
1193     * we would hit a deadlock in the kernel.
1194     *
1195     * The list of pairs is guarded by a mutex, of course. */
1196    mtx_lock(&ws->bo_handles_mutex);
1197 
1198    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1199       /* First check if there already is an existing bo for the handle. */
1200       bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1201    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1202       /* We must first get the GEM handle, as fds are unreliable keys */
1203       r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1204       if (r)
1205          goto fail;
1206       bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1207    } else {
1208       /* Unknown handle type */
1209       goto fail;
1210    }
1211 
1212    if (bo) {
1213       /* Increase the refcount. */
1214       struct pb_buffer *b = NULL;
1215       pb_reference(&b, &bo->base);
1216       goto done;
1217    }
1218 
1219    /* There isn't, create a new one. */
1220    bo = CALLOC_STRUCT(radeon_bo);
1221    if (!bo) {
1222       goto fail;
1223    }
1224 
1225    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1226       struct drm_gem_open open_arg = {};
1227       memset(&open_arg, 0, sizeof(open_arg));
1228       /* Open the BO. */
1229       open_arg.name = whandle->handle;
1230       if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1231          FREE(bo);
1232          goto fail;
1233       }
1234       handle = open_arg.handle;
1235       size = open_arg.size;
1236       bo->flink_name = whandle->handle;
1237    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1238       size = lseek(whandle->handle, 0, SEEK_END);
1239       /*
1240        * Could check errno to determine whether the kernel is new enough, but
1241        * it doesn't really matter why this failed, just that it failed.
1242        */
1243       if (size == (off_t)-1) {
1244          FREE(bo);
1245          goto fail;
1246       }
1247       lseek(whandle->handle, 0, SEEK_SET);
1248    }
1249 
1250    assert(handle != 0);
1251 
1252    bo->handle = handle;
1253 
1254    /* Initialize it. */
1255    pipe_reference_init(&bo->base.reference, 1);
1256    bo->base.alignment_log2 = 0;
1257    bo->base.size = (unsigned) size;
1258    bo->base.vtbl = &radeon_bo_vtbl;
1259    bo->rws = ws;
1260    bo->va = 0;
1261    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1262    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1263 
1264    if (bo->flink_name)
1265       _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1266 
1267    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1268 
1269 done:
1270    mtx_unlock(&ws->bo_handles_mutex);
1271 
1272    if (ws->info.r600_has_virtual_memory && !bo->va) {
1273       struct drm_radeon_gem_va va;
1274 
1275       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1276 
1277       va.handle = bo->handle;
1278       va.operation = RADEON_VA_MAP;
1279       va.vm_id = 0;
1280       va.offset = bo->va;
1281       va.flags = RADEON_VM_PAGE_READABLE |
1282                  RADEON_VM_PAGE_WRITEABLE |
1283                  RADEON_VM_PAGE_SNOOPED;
1284       va.offset = bo->va;
1285       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1286       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1287          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1288          radeon_bo_destroy(NULL, &bo->base);
1289          return NULL;
1290       }
1291       mtx_lock(&ws->bo_handles_mutex);
1292       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1293          struct pb_buffer *b = &bo->base;
1294          struct radeon_bo *old_bo =
1295                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1296 
1297          mtx_unlock(&ws->bo_handles_mutex);
1298          pb_reference(&b, &old_bo->base);
1299          return b;
1300       }
1301 
1302       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1303       mtx_unlock(&ws->bo_handles_mutex);
1304    }
1305 
1306    bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1307 
1308    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1309       ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1310    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1311       ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1312 
1313    return (struct pb_buffer*)bo;
1314 
1315 fail:
1316    mtx_unlock(&ws->bo_handles_mutex);
1317    return NULL;
1318 }
1319 
radeon_winsys_bo_get_handle(struct radeon_winsys *rws, struct pb_buffer *buffer, struct winsys_handle *whandle)1320 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1321                                         struct pb_buffer *buffer,
1322                                         struct winsys_handle *whandle)
1323 {
1324    struct drm_gem_flink flink;
1325    struct radeon_bo *bo = radeon_bo(buffer);
1326    struct radeon_drm_winsys *ws = bo->rws;
1327 
1328    /* Don't allow exports of slab entries. */
1329    if (!bo->handle)
1330       return false;
1331 
1332    memset(&flink, 0, sizeof(flink));
1333 
1334    bo->u.real.use_reusable_pool = false;
1335 
1336    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1337       if (!bo->flink_name) {
1338          flink.handle = bo->handle;
1339 
1340          if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1341             return false;
1342          }
1343 
1344          bo->flink_name = flink.name;
1345 
1346          mtx_lock(&ws->bo_handles_mutex);
1347          _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1348          mtx_unlock(&ws->bo_handles_mutex);
1349       }
1350       whandle->handle = bo->flink_name;
1351    } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1352       whandle->handle = bo->handle;
1353    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1354       if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1355          return false;
1356    }
1357 
1358    return true;
1359 }
1360 
radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)1361 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1362 {
1363    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1364 }
1365 
radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)1366 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1367 {
1368    return !((struct radeon_bo*)buf)->handle;
1369 }
1370 
radeon_winsys_bo_va(struct pb_buffer *buf)1371 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1372 {
1373    return ((struct radeon_bo*)buf)->va;
1374 }
1375 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)1376 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1377 {
1378    struct radeon_bo *bo = radeon_bo(buf);
1379 
1380    if (bo->handle)
1381       return 0;
1382 
1383    return bo->va - bo->u.slab.real->va;
1384 }
1385 
radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)1386 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1387 {
1388    ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1389    ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1390    ws->base.buffer_map = radeon_bo_map;
1391    ws->base.buffer_unmap = radeon_bo_unmap;
1392    ws->base.buffer_wait = radeon_bo_wait;
1393    ws->base.buffer_create = radeon_winsys_bo_create;
1394    ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1395    ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1396    ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1397    ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1398    ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1399    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1400    ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1401    ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1402 }
1403