1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 #include "radeon_drm_cs.h"
28
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "os/os_thread.h"
32 #include "os/os_mman.h"
33 #include "util/os_time.h"
34
35 #include "frontend/drm_driver.h"
36
37 #include <sys/ioctl.h>
38 #include <xf86drm.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <inttypes.h>
43
44 static struct pb_buffer *
45 radeon_winsys_bo_create(struct radeon_winsys *rws,
46 uint64_t size,
47 unsigned alignment,
48 enum radeon_bo_domain domain,
49 enum radeon_bo_flag flags);
50
radeon_bo(struct pb_buffer *bo)51 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
52 {
53 return (struct radeon_bo *)bo;
54 }
55
56 struct radeon_bo_va_hole {
57 struct list_head list;
58 uint64_t offset;
59 uint64_t size;
60 };
61
radeon_real_bo_is_busy(struct radeon_bo *bo)62 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
63 {
64 struct drm_radeon_gem_busy args = {0};
65
66 args.handle = bo->handle;
67 return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
68 &args, sizeof(args)) != 0;
69 }
70
radeon_bo_is_busy(struct radeon_bo *bo)71 static bool radeon_bo_is_busy(struct radeon_bo *bo)
72 {
73 unsigned num_idle;
74 bool busy = false;
75
76 if (bo->handle)
77 return radeon_real_bo_is_busy(bo);
78
79 mtx_lock(&bo->rws->bo_fence_lock);
80 for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
81 if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
82 busy = true;
83 break;
84 }
85 radeon_ws_bo_reference(&bo->u.slab.fences[num_idle], NULL);
86 }
87 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
88 (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
89 bo->u.slab.num_fences -= num_idle;
90 mtx_unlock(&bo->rws->bo_fence_lock);
91
92 return busy;
93 }
94
radeon_real_bo_wait_idle(struct radeon_bo *bo)95 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
96 {
97 struct drm_radeon_gem_wait_idle args = {0};
98
99 args.handle = bo->handle;
100 while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
101 &args, sizeof(args)) == -EBUSY);
102 }
103
radeon_bo_wait_idle(struct radeon_bo *bo)104 static void radeon_bo_wait_idle(struct radeon_bo *bo)
105 {
106 if (bo->handle) {
107 radeon_real_bo_wait_idle(bo);
108 } else {
109 mtx_lock(&bo->rws->bo_fence_lock);
110 while (bo->u.slab.num_fences) {
111 struct radeon_bo *fence = NULL;
112 radeon_ws_bo_reference(&fence, bo->u.slab.fences[0]);
113 mtx_unlock(&bo->rws->bo_fence_lock);
114
115 /* Wait without holding the fence lock. */
116 radeon_real_bo_wait_idle(fence);
117
118 mtx_lock(&bo->rws->bo_fence_lock);
119 if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
120 radeon_ws_bo_reference(&bo->u.slab.fences[0], NULL);
121 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
122 (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
123 bo->u.slab.num_fences--;
124 }
125 radeon_ws_bo_reference(&fence, NULL);
126 }
127 mtx_unlock(&bo->rws->bo_fence_lock);
128 }
129 }
130
radeon_bo_wait(struct radeon_winsys *rws, struct pb_buffer *_buf, uint64_t timeout, unsigned usage)131 static bool radeon_bo_wait(struct radeon_winsys *rws,
132 struct pb_buffer *_buf, uint64_t timeout,
133 unsigned usage)
134 {
135 struct radeon_bo *bo = radeon_bo(_buf);
136 int64_t abs_timeout;
137
138 /* No timeout. Just query. */
139 if (timeout == 0)
140 return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
141
142 abs_timeout = os_time_get_absolute_timeout(timeout);
143
144 /* Wait if any ioctl is being submitted with this buffer. */
145 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
146 return false;
147
148 /* Infinite timeout. */
149 if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
150 radeon_bo_wait_idle(bo);
151 return true;
152 }
153
154 /* Other timeouts need to be emulated with a loop. */
155 while (radeon_bo_is_busy(bo)) {
156 if (os_time_get_nano() >= abs_timeout)
157 return false;
158 os_time_sleep(10);
159 }
160
161 return true;
162 }
163
get_valid_domain(enum radeon_bo_domain domain)164 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
165 {
166 /* Zero domains the driver doesn't understand. */
167 domain &= RADEON_DOMAIN_VRAM_GTT;
168
169 /* If no domain is set, we must set something... */
170 if (!domain)
171 domain = RADEON_DOMAIN_VRAM_GTT;
172
173 return domain;
174 }
175
radeon_bo_get_initial_domain( struct pb_buffer *buf)176 static enum radeon_bo_domain radeon_bo_get_initial_domain(
177 struct pb_buffer *buf)
178 {
179 struct radeon_bo *bo = (struct radeon_bo*)buf;
180 struct drm_radeon_gem_op args;
181
182 memset(&args, 0, sizeof(args));
183 args.handle = bo->handle;
184 args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
185
186 if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
187 &args, sizeof(args))) {
188 fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
189 bo, bo->handle);
190 /* Default domain as returned by get_valid_domain. */
191 return RADEON_DOMAIN_VRAM_GTT;
192 }
193
194 /* GEM domains and winsys domains are defined the same. */
195 return get_valid_domain(args.value);
196 }
197
radeon_bomgr_find_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t size, uint64_t alignment)198 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
199 struct radeon_vm_heap *heap,
200 uint64_t size, uint64_t alignment)
201 {
202 struct radeon_bo_va_hole *hole, *n;
203 uint64_t offset = 0, waste = 0;
204
205 /* All VM address space holes will implicitly start aligned to the
206 * size alignment, so we don't need to sanitize the alignment here
207 */
208 size = align(size, info->gart_page_size);
209
210 mtx_lock(&heap->mutex);
211 /* first look for a hole */
212 LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
213 offset = hole->offset;
214 waste = offset % alignment;
215 waste = waste ? alignment - waste : 0;
216 offset += waste;
217 if (offset >= (hole->offset + hole->size)) {
218 continue;
219 }
220 if (!waste && hole->size == size) {
221 offset = hole->offset;
222 list_del(&hole->list);
223 FREE(hole);
224 mtx_unlock(&heap->mutex);
225 return offset;
226 }
227 if ((hole->size - waste) > size) {
228 if (waste) {
229 n = CALLOC_STRUCT(radeon_bo_va_hole);
230 n->size = waste;
231 n->offset = hole->offset;
232 list_add(&n->list, &hole->list);
233 }
234 hole->size -= (size + waste);
235 hole->offset += size + waste;
236 mtx_unlock(&heap->mutex);
237 return offset;
238 }
239 if ((hole->size - waste) == size) {
240 hole->size = waste;
241 mtx_unlock(&heap->mutex);
242 return offset;
243 }
244 }
245
246 offset = heap->start;
247 waste = offset % alignment;
248 waste = waste ? alignment - waste : 0;
249
250 if (offset + waste + size > heap->end) {
251 mtx_unlock(&heap->mutex);
252 return 0;
253 }
254
255 if (waste) {
256 n = CALLOC_STRUCT(radeon_bo_va_hole);
257 n->size = waste;
258 n->offset = offset;
259 list_add(&n->list, &heap->holes);
260 }
261 offset += waste;
262 heap->start += size + waste;
263 mtx_unlock(&heap->mutex);
264 return offset;
265 }
266
radeon_bomgr_find_va64(struct radeon_drm_winsys *ws, uint64_t size, uint64_t alignment)267 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
268 uint64_t size, uint64_t alignment)
269 {
270 uint64_t va = 0;
271
272 /* Try to allocate from the 64-bit address space first.
273 * If it doesn't exist (start = 0) or if it doesn't have enough space,
274 * fall back to the 32-bit address space.
275 */
276 if (ws->vm64.start)
277 va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
278 if (!va)
279 va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
280 return va;
281 }
282
radeon_bomgr_free_va(const struct radeon_info *info, struct radeon_vm_heap *heap, uint64_t va, uint64_t size)283 static void radeon_bomgr_free_va(const struct radeon_info *info,
284 struct radeon_vm_heap *heap,
285 uint64_t va, uint64_t size)
286 {
287 struct radeon_bo_va_hole *hole = NULL;
288
289 size = align(size, info->gart_page_size);
290
291 mtx_lock(&heap->mutex);
292 if ((va + size) == heap->start) {
293 heap->start = va;
294 /* Delete uppermost hole if it reaches the new top */
295 if (!list_is_empty(&heap->holes)) {
296 hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
297 if ((hole->offset + hole->size) == va) {
298 heap->start = hole->offset;
299 list_del(&hole->list);
300 FREE(hole);
301 }
302 }
303 } else {
304 struct radeon_bo_va_hole *next;
305
306 hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
307 LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
308 if (next->offset < va)
309 break;
310 hole = next;
311 }
312
313 if (&hole->list != &heap->holes) {
314 /* Grow upper hole if it's adjacent */
315 if (hole->offset == (va + size)) {
316 hole->offset = va;
317 hole->size += size;
318 /* Merge lower hole if it's adjacent */
319 if (next != hole && &next->list != &heap->holes &&
320 (next->offset + next->size) == va) {
321 next->size += hole->size;
322 list_del(&hole->list);
323 FREE(hole);
324 }
325 goto out;
326 }
327 }
328
329 /* Grow lower hole if it's adjacent */
330 if (next != hole && &next->list != &heap->holes &&
331 (next->offset + next->size) == va) {
332 next->size += size;
333 goto out;
334 }
335
336 /* FIXME on allocation failure we just lose virtual address space
337 * maybe print a warning
338 */
339 next = CALLOC_STRUCT(radeon_bo_va_hole);
340 if (next) {
341 next->size = size;
342 next->offset = va;
343 list_add(&next->list, &hole->list);
344 }
345 }
346 out:
347 mtx_unlock(&heap->mutex);
348 }
349
radeon_bo_destroy(void *winsys, struct pb_buffer *_buf)350 void radeon_bo_destroy(void *winsys, struct pb_buffer *_buf)
351 {
352 struct radeon_bo *bo = radeon_bo(_buf);
353 struct radeon_drm_winsys *rws = bo->rws;
354 struct drm_gem_close args;
355
356 assert(bo->handle && "must not be called for slab entries");
357
358 memset(&args, 0, sizeof(args));
359
360 mtx_lock(&rws->bo_handles_mutex);
361 _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
362 if (bo->flink_name) {
363 _mesa_hash_table_remove_key(rws->bo_names,
364 (void*)(uintptr_t)bo->flink_name);
365 }
366 mtx_unlock(&rws->bo_handles_mutex);
367
368 if (bo->u.real.ptr)
369 os_munmap(bo->u.real.ptr, bo->base.size);
370
371 if (rws->info.r600_has_virtual_memory) {
372 if (rws->va_unmap_working) {
373 struct drm_radeon_gem_va va;
374
375 va.handle = bo->handle;
376 va.vm_id = 0;
377 va.operation = RADEON_VA_UNMAP;
378 va.flags = RADEON_VM_PAGE_READABLE |
379 RADEON_VM_PAGE_WRITEABLE |
380 RADEON_VM_PAGE_SNOOPED;
381 va.offset = bo->va;
382
383 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
384 sizeof(va)) != 0 &&
385 va.operation == RADEON_VA_RESULT_ERROR) {
386 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
387 fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
388 fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
389 }
390 }
391
392 radeon_bomgr_free_va(&rws->info,
393 bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
394 bo->va, bo->base.size);
395 }
396
397 /* Close object. */
398 args.handle = bo->handle;
399 drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
400
401 mtx_destroy(&bo->u.real.map_mutex);
402
403 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
404 rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
405 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
406 rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
407
408 if (bo->u.real.map_count >= 1) {
409 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
410 bo->rws->mapped_vram -= bo->base.size;
411 else
412 bo->rws->mapped_gtt -= bo->base.size;
413 bo->rws->num_mapped_buffers--;
414 }
415
416 FREE(bo);
417 }
418
radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer *_buf)419 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer *_buf)
420 {
421 struct radeon_bo *bo = radeon_bo(_buf);
422
423 assert(bo->handle && "must not be called for slab entries");
424
425 if (bo->u.real.use_reusable_pool)
426 pb_cache_add_buffer(&bo->u.real.cache_entry);
427 else
428 radeon_bo_destroy(NULL, _buf);
429 }
430
radeon_bo_do_map(struct radeon_bo *bo)431 void *radeon_bo_do_map(struct radeon_bo *bo)
432 {
433 struct drm_radeon_gem_mmap args = {0};
434 void *ptr;
435 unsigned offset;
436
437 /* If the buffer is created from user memory, return the user pointer. */
438 if (bo->user_ptr)
439 return bo->user_ptr;
440
441 if (bo->handle) {
442 offset = 0;
443 } else {
444 offset = bo->va - bo->u.slab.real->va;
445 bo = bo->u.slab.real;
446 }
447
448 /* Map the buffer. */
449 mtx_lock(&bo->u.real.map_mutex);
450 /* Return the pointer if it's already mapped. */
451 if (bo->u.real.ptr) {
452 bo->u.real.map_count++;
453 mtx_unlock(&bo->u.real.map_mutex);
454 return (uint8_t*)bo->u.real.ptr + offset;
455 }
456 args.handle = bo->handle;
457 args.offset = 0;
458 args.size = (uint64_t)bo->base.size;
459 if (drmCommandWriteRead(bo->rws->fd,
460 DRM_RADEON_GEM_MMAP,
461 &args,
462 sizeof(args))) {
463 mtx_unlock(&bo->u.real.map_mutex);
464 fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
465 bo, bo->handle);
466 return NULL;
467 }
468
469 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
470 bo->rws->fd, args.addr_ptr);
471 if (ptr == MAP_FAILED) {
472 /* Clear the cache and try again. */
473 pb_cache_release_all_buffers(&bo->rws->bo_cache);
474
475 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
476 bo->rws->fd, args.addr_ptr);
477 if (ptr == MAP_FAILED) {
478 mtx_unlock(&bo->u.real.map_mutex);
479 fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
480 return NULL;
481 }
482 }
483 bo->u.real.ptr = ptr;
484 bo->u.real.map_count = 1;
485
486 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
487 bo->rws->mapped_vram += bo->base.size;
488 else
489 bo->rws->mapped_gtt += bo->base.size;
490 bo->rws->num_mapped_buffers++;
491
492 mtx_unlock(&bo->u.real.map_mutex);
493 return (uint8_t*)bo->u.real.ptr + offset;
494 }
495
radeon_bo_map(struct radeon_winsys *rws, struct pb_buffer *buf, struct radeon_cmdbuf *rcs, enum pipe_map_flags usage)496 static void *radeon_bo_map(struct radeon_winsys *rws,
497 struct pb_buffer *buf,
498 struct radeon_cmdbuf *rcs,
499 enum pipe_map_flags usage)
500 {
501 struct radeon_bo *bo = (struct radeon_bo*)buf;
502 struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
503
504 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
505 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
506 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
507 if (usage & PIPE_MAP_DONTBLOCK) {
508 if (!(usage & PIPE_MAP_WRITE)) {
509 /* Mapping for read.
510 *
511 * Since we are mapping for read, we don't need to wait
512 * if the GPU is using the buffer for read too
513 * (neither one is changing it).
514 *
515 * Only check whether the buffer is being used for write. */
516 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
517 cs->flush_cs(cs->flush_data,
518 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
519 return NULL;
520 }
521
522 if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
523 RADEON_USAGE_WRITE)) {
524 return NULL;
525 }
526 } else {
527 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
528 cs->flush_cs(cs->flush_data,
529 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
530 return NULL;
531 }
532
533 if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
534 RADEON_USAGE_READWRITE)) {
535 return NULL;
536 }
537 }
538 } else {
539 uint64_t time = os_time_get_nano();
540
541 if (!(usage & PIPE_MAP_WRITE)) {
542 /* Mapping for read.
543 *
544 * Since we are mapping for read, we don't need to wait
545 * if the GPU is using the buffer for read too
546 * (neither one is changing it).
547 *
548 * Only check whether the buffer is being used for write. */
549 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
550 cs->flush_cs(cs->flush_data,
551 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
552 }
553 radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
554 RADEON_USAGE_WRITE);
555 } else {
556 /* Mapping for write. */
557 if (cs) {
558 if (radeon_bo_is_referenced_by_cs(cs, bo)) {
559 cs->flush_cs(cs->flush_data,
560 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
561 } else {
562 /* Try to avoid busy-waiting in radeon_bo_wait. */
563 if (p_atomic_read(&bo->num_active_ioctls))
564 radeon_drm_cs_sync_flush(rcs);
565 }
566 }
567
568 radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
569 RADEON_USAGE_READWRITE);
570 }
571
572 bo->rws->buffer_wait_time += os_time_get_nano() - time;
573 }
574 }
575
576 return radeon_bo_do_map(bo);
577 }
578
radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *_buf)579 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *_buf)
580 {
581 struct radeon_bo *bo = (struct radeon_bo*)_buf;
582
583 if (bo->user_ptr)
584 return;
585
586 if (!bo->handle)
587 bo = bo->u.slab.real;
588
589 mtx_lock(&bo->u.real.map_mutex);
590 if (!bo->u.real.ptr) {
591 mtx_unlock(&bo->u.real.map_mutex);
592 return; /* it's not been mapped */
593 }
594
595 assert(bo->u.real.map_count);
596 if (--bo->u.real.map_count) {
597 mtx_unlock(&bo->u.real.map_mutex);
598 return; /* it's been mapped multiple times */
599 }
600
601 os_munmap(bo->u.real.ptr, bo->base.size);
602 bo->u.real.ptr = NULL;
603
604 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
605 bo->rws->mapped_vram -= bo->base.size;
606 else
607 bo->rws->mapped_gtt -= bo->base.size;
608 bo->rws->num_mapped_buffers--;
609
610 mtx_unlock(&bo->u.real.map_mutex);
611 }
612
613 static const struct pb_vtbl radeon_bo_vtbl = {
614 radeon_bo_destroy_or_cache
615 /* other functions are never called */
616 };
617
radeon_create_bo(struct radeon_drm_winsys *rws, unsigned size, unsigned alignment, unsigned initial_domains, unsigned flags, int heap)618 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
619 unsigned size, unsigned alignment,
620 unsigned initial_domains,
621 unsigned flags,
622 int heap)
623 {
624 struct radeon_bo *bo;
625 struct drm_radeon_gem_create args;
626 int r;
627
628 memset(&args, 0, sizeof(args));
629
630 assert(initial_domains);
631 assert((initial_domains &
632 ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
633
634 args.size = size;
635 args.alignment = alignment;
636 args.initial_domain = initial_domains;
637 args.flags = 0;
638
639 /* If VRAM is just stolen system memory, allow both VRAM and
640 * GTT, whichever has free space. If a buffer is evicted from
641 * VRAM to GTT, it will stay there.
642 */
643 if (!rws->info.has_dedicated_vram)
644 args.initial_domain |= RADEON_DOMAIN_GTT;
645
646 if (flags & RADEON_FLAG_GTT_WC)
647 args.flags |= RADEON_GEM_GTT_WC;
648 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
649 args.flags |= RADEON_GEM_NO_CPU_ACCESS;
650
651 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
652 &args, sizeof(args))) {
653 fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
654 fprintf(stderr, "radeon: size : %u bytes\n", size);
655 fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
656 fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
657 fprintf(stderr, "radeon: flags : %u\n", args.flags);
658 return NULL;
659 }
660
661 assert(args.handle != 0);
662
663 bo = CALLOC_STRUCT(radeon_bo);
664 if (!bo)
665 return NULL;
666
667 pipe_reference_init(&bo->base.reference, 1);
668 bo->base.alignment_log2 = util_logbase2(alignment);
669 bo->base.usage = 0;
670 bo->base.size = size;
671 bo->base.vtbl = &radeon_bo_vtbl;
672 bo->rws = rws;
673 bo->handle = args.handle;
674 bo->va = 0;
675 bo->initial_domain = initial_domains;
676 bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
677 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
678
679 if (heap >= 0) {
680 pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
681 heap);
682 }
683
684 if (rws->info.r600_has_virtual_memory) {
685 struct drm_radeon_gem_va va;
686 unsigned va_gap_size;
687
688 va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
689
690 if (flags & RADEON_FLAG_32BIT) {
691 bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
692 size + va_gap_size, alignment);
693 assert(bo->va + size < rws->vm32.end);
694 } else {
695 bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
696 }
697
698 va.handle = bo->handle;
699 va.vm_id = 0;
700 va.operation = RADEON_VA_MAP;
701 va.flags = RADEON_VM_PAGE_READABLE |
702 RADEON_VM_PAGE_WRITEABLE |
703 RADEON_VM_PAGE_SNOOPED;
704 va.offset = bo->va;
705 r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
706 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
707 fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
708 fprintf(stderr, "radeon: size : %d bytes\n", size);
709 fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
710 fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
711 fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
712 radeon_bo_destroy(NULL, &bo->base);
713 return NULL;
714 }
715 mtx_lock(&rws->bo_handles_mutex);
716 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
717 struct pb_buffer *b = &bo->base;
718 struct radeon_bo *old_bo =
719 _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
720
721 mtx_unlock(&rws->bo_handles_mutex);
722 pb_reference(&b, &old_bo->base);
723 return radeon_bo(b);
724 }
725
726 _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
727 mtx_unlock(&rws->bo_handles_mutex);
728 }
729
730 if (initial_domains & RADEON_DOMAIN_VRAM)
731 rws->allocated_vram += align(size, rws->info.gart_page_size);
732 else if (initial_domains & RADEON_DOMAIN_GTT)
733 rws->allocated_gtt += align(size, rws->info.gart_page_size);
734
735 return bo;
736 }
737
radeon_bo_can_reclaim(void *winsys, struct pb_buffer *_buf)738 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer *_buf)
739 {
740 struct radeon_bo *bo = radeon_bo(_buf);
741
742 if (radeon_bo_is_referenced_by_any_cs(bo))
743 return false;
744
745 return radeon_bo_wait(winsys, _buf, 0, RADEON_USAGE_READWRITE);
746 }
747
radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)748 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
749 {
750 struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
751
752 return radeon_bo_can_reclaim(NULL, &bo->base);
753 }
754
radeon_bo_slab_destroy(void *winsys, struct pb_buffer *_buf)755 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer *_buf)
756 {
757 struct radeon_bo *bo = radeon_bo(_buf);
758
759 assert(!bo->handle);
760
761 pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
762 }
763
764 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
765 radeon_bo_slab_destroy
766 /* other functions are never called */
767 };
768
radeon_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)769 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
770 unsigned entry_size,
771 unsigned group_index)
772 {
773 struct radeon_drm_winsys *ws = priv;
774 struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
775 enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
776 enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
777 unsigned base_hash;
778
779 if (!slab)
780 return NULL;
781
782 slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
783 64 * 1024, 64 * 1024,
784 domains, flags));
785 if (!slab->buffer)
786 goto fail;
787
788 assert(slab->buffer->handle);
789
790 slab->base.num_entries = slab->buffer->base.size / entry_size;
791 slab->base.num_free = slab->base.num_entries;
792 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
793 if (!slab->entries)
794 goto fail_buffer;
795
796 list_inithead(&slab->base.free);
797
798 base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
799
800 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
801 struct radeon_bo *bo = &slab->entries[i];
802
803 bo->base.alignment_log2 = util_logbase2(entry_size);
804 bo->base.usage = slab->buffer->base.usage;
805 bo->base.size = entry_size;
806 bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
807 bo->rws = ws;
808 bo->va = slab->buffer->va + i * entry_size;
809 bo->initial_domain = domains;
810 bo->hash = base_hash + i;
811 bo->u.slab.entry.slab = &slab->base;
812 bo->u.slab.entry.group_index = group_index;
813 bo->u.slab.entry.entry_size = entry_size;
814 bo->u.slab.real = slab->buffer;
815
816 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
817 }
818
819 return &slab->base;
820
821 fail_buffer:
822 radeon_ws_bo_reference(&slab->buffer, NULL);
823 fail:
824 FREE(slab);
825 return NULL;
826 }
827
radeon_bo_slab_free(void *priv, struct pb_slab *pslab)828 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
829 {
830 struct radeon_slab *slab = (struct radeon_slab *)pslab;
831
832 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
833 struct radeon_bo *bo = &slab->entries[i];
834 for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
835 radeon_ws_bo_reference(&bo->u.slab.fences[j], NULL);
836 FREE(bo->u.slab.fences);
837 }
838
839 FREE(slab->entries);
840 radeon_ws_bo_reference(&slab->buffer, NULL);
841 FREE(slab);
842 }
843
eg_tile_split(unsigned tile_split)844 static unsigned eg_tile_split(unsigned tile_split)
845 {
846 switch (tile_split) {
847 case 0: tile_split = 64; break;
848 case 1: tile_split = 128; break;
849 case 2: tile_split = 256; break;
850 case 3: tile_split = 512; break;
851 default:
852 case 4: tile_split = 1024; break;
853 case 5: tile_split = 2048; break;
854 case 6: tile_split = 4096; break;
855 }
856 return tile_split;
857 }
858
eg_tile_split_rev(unsigned eg_tile_split)859 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
860 {
861 switch (eg_tile_split) {
862 case 64: return 0;
863 case 128: return 1;
864 case 256: return 2;
865 case 512: return 3;
866 default:
867 case 1024: return 4;
868 case 2048: return 5;
869 case 4096: return 6;
870 }
871 }
872
radeon_bo_get_metadata(struct radeon_winsys *rws, struct pb_buffer *_buf, struct radeon_bo_metadata *md, struct radeon_surf *surf)873 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
874 struct pb_buffer *_buf,
875 struct radeon_bo_metadata *md,
876 struct radeon_surf *surf)
877 {
878 struct radeon_bo *bo = radeon_bo(_buf);
879 struct drm_radeon_gem_set_tiling args;
880
881 assert(bo->handle && "must not be called for slab entries");
882
883 memset(&args, 0, sizeof(args));
884
885 args.handle = bo->handle;
886
887 drmCommandWriteRead(bo->rws->fd,
888 DRM_RADEON_GEM_GET_TILING,
889 &args,
890 sizeof(args));
891
892 if (surf) {
893 if (args.tiling_flags & RADEON_TILING_MACRO)
894 md->mode = RADEON_SURF_MODE_2D;
895 else if (args.tiling_flags & RADEON_TILING_MICRO)
896 md->mode = RADEON_SURF_MODE_1D;
897 else
898 md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
899
900 surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
901 surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
902 surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
903 surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
904 surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
905
906 if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
907 surf->flags |= RADEON_SURF_SCANOUT;
908 else
909 surf->flags &= ~RADEON_SURF_SCANOUT;
910 return;
911 }
912
913 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
914 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
915 if (args.tiling_flags & RADEON_TILING_MICRO)
916 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
917 else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
918 md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
919
920 if (args.tiling_flags & RADEON_TILING_MACRO)
921 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
922
923 md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
924 md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
925 md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
926 md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
927 md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
928 md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
929 }
930
radeon_bo_set_metadata(struct radeon_winsys *rws, struct pb_buffer *_buf, struct radeon_bo_metadata *md, struct radeon_surf *surf)931 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
932 struct pb_buffer *_buf,
933 struct radeon_bo_metadata *md,
934 struct radeon_surf *surf)
935 {
936 struct radeon_bo *bo = radeon_bo(_buf);
937 struct drm_radeon_gem_set_tiling args;
938
939 assert(bo->handle && "must not be called for slab entries");
940
941 memset(&args, 0, sizeof(args));
942
943 os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
944
945 if (surf) {
946 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
947 args.tiling_flags |= RADEON_TILING_MICRO;
948 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
949 args.tiling_flags |= RADEON_TILING_MACRO;
950
951 args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
952 RADEON_TILING_EG_BANKW_SHIFT;
953 args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
954 RADEON_TILING_EG_BANKH_SHIFT;
955 if (surf->u.legacy.tile_split) {
956 args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
957 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
958 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
959 }
960 args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
961 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
962
963 if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
964 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
965
966 args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
967 } else {
968 if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
969 args.tiling_flags |= RADEON_TILING_MICRO;
970 else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
971 args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
972
973 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
974 args.tiling_flags |= RADEON_TILING_MACRO;
975
976 args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
977 RADEON_TILING_EG_BANKW_SHIFT;
978 args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
979 RADEON_TILING_EG_BANKH_SHIFT;
980 if (md->u.legacy.tile_split) {
981 args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
982 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
983 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
984 }
985 args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
986 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
987
988 if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
989 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
990
991 args.pitch = md->u.legacy.stride;
992 }
993
994 args.handle = bo->handle;
995
996 drmCommandWriteRead(bo->rws->fd,
997 DRM_RADEON_GEM_SET_TILING,
998 &args,
999 sizeof(args));
1000 }
1001
1002 static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys *rws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags)1003 radeon_winsys_bo_create(struct radeon_winsys *rws,
1004 uint64_t size,
1005 unsigned alignment,
1006 enum radeon_bo_domain domain,
1007 enum radeon_bo_flag flags)
1008 {
1009 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1010 struct radeon_bo *bo;
1011
1012 radeon_canonicalize_bo_flags(&domain, &flags);
1013
1014 assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
1015
1016 /* Only 32-bit sizes are supported. */
1017 if (size > UINT_MAX)
1018 return NULL;
1019
1020 int heap = radeon_get_heap_index(domain, flags);
1021
1022 /* Sub-allocate small buffers from slabs. */
1023 if (heap >= 0 &&
1024 size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1025 ws->info.r600_has_virtual_memory &&
1026 alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1027 struct pb_slab_entry *entry;
1028
1029 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1030 if (!entry) {
1031 /* Clear the cache and try again. */
1032 pb_cache_release_all_buffers(&ws->bo_cache);
1033
1034 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1035 }
1036 if (!entry)
1037 return NULL;
1038
1039 bo = container_of(entry, struct radeon_bo, u.slab.entry);
1040
1041 pipe_reference_init(&bo->base.reference, 1);
1042
1043 return &bo->base;
1044 }
1045
1046 /* Align size to page size. This is the minimum alignment for normal
1047 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1048 * like constant/uniform buffers, can benefit from better and more reuse.
1049 */
1050 size = align(size, ws->info.gart_page_size);
1051 alignment = align(alignment, ws->info.gart_page_size);
1052
1053 bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
1054 !(flags & RADEON_FLAG_DISCARDABLE);
1055
1056 /* Shared resources don't use cached heaps. */
1057 if (use_reusable_pool) {
1058 /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
1059 heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
1060 assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
1061
1062 bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1063 0, heap));
1064 if (bo)
1065 return &bo->base;
1066 }
1067
1068 bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1069 if (!bo) {
1070 /* Clear the cache and try again. */
1071 if (ws->info.r600_has_virtual_memory)
1072 pb_slabs_reclaim(&ws->bo_slabs);
1073 pb_cache_release_all_buffers(&ws->bo_cache);
1074 bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1075 if (!bo)
1076 return NULL;
1077 }
1078
1079 bo->u.real.use_reusable_pool = use_reusable_pool;
1080
1081 mtx_lock(&ws->bo_handles_mutex);
1082 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1083 mtx_unlock(&ws->bo_handles_mutex);
1084
1085 return &bo->base;
1086 }
1087
radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, void *pointer, uint64_t size, enum radeon_bo_flag flags)1088 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1089 void *pointer, uint64_t size,
1090 enum radeon_bo_flag flags)
1091 {
1092 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1093 struct drm_radeon_gem_userptr args;
1094 struct radeon_bo *bo;
1095 int r;
1096
1097 bo = CALLOC_STRUCT(radeon_bo);
1098 if (!bo)
1099 return NULL;
1100
1101 memset(&args, 0, sizeof(args));
1102 args.addr = (uintptr_t)pointer;
1103 args.size = align(size, ws->info.gart_page_size);
1104
1105 if (flags & RADEON_FLAG_READ_ONLY)
1106 args.flags = RADEON_GEM_USERPTR_READONLY |
1107 RADEON_GEM_USERPTR_VALIDATE;
1108 else
1109 args.flags = RADEON_GEM_USERPTR_ANONONLY |
1110 RADEON_GEM_USERPTR_REGISTER |
1111 RADEON_GEM_USERPTR_VALIDATE;
1112
1113 if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1114 &args, sizeof(args))) {
1115 FREE(bo);
1116 return NULL;
1117 }
1118
1119 assert(args.handle != 0);
1120
1121 mtx_lock(&ws->bo_handles_mutex);
1122
1123 /* Initialize it. */
1124 pipe_reference_init(&bo->base.reference, 1);
1125 bo->handle = args.handle;
1126 bo->base.alignment_log2 = 0;
1127 bo->base.size = size;
1128 bo->base.vtbl = &radeon_bo_vtbl;
1129 bo->rws = ws;
1130 bo->user_ptr = pointer;
1131 bo->va = 0;
1132 bo->initial_domain = RADEON_DOMAIN_GTT;
1133 bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1134 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1135
1136 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1137
1138 mtx_unlock(&ws->bo_handles_mutex);
1139
1140 if (ws->info.r600_has_virtual_memory) {
1141 struct drm_radeon_gem_va va;
1142
1143 bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1144
1145 va.handle = bo->handle;
1146 va.operation = RADEON_VA_MAP;
1147 va.vm_id = 0;
1148 va.offset = bo->va;
1149 va.flags = RADEON_VM_PAGE_READABLE |
1150 RADEON_VM_PAGE_WRITEABLE |
1151 RADEON_VM_PAGE_SNOOPED;
1152 va.offset = bo->va;
1153 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1154 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1155 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1156 radeon_bo_destroy(NULL, &bo->base);
1157 return NULL;
1158 }
1159 mtx_lock(&ws->bo_handles_mutex);
1160 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1161 struct pb_buffer *b = &bo->base;
1162 struct radeon_bo *old_bo =
1163 _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1164
1165 mtx_unlock(&ws->bo_handles_mutex);
1166 pb_reference(&b, &old_bo->base);
1167 return b;
1168 }
1169
1170 _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1171 mtx_unlock(&ws->bo_handles_mutex);
1172 }
1173
1174 ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1175
1176 return (struct pb_buffer*)bo;
1177 }
1178
radeon_winsys_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, unsigned vm_alignment, bool is_dri_prime_linear_buffer)1179 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1180 struct winsys_handle *whandle,
1181 unsigned vm_alignment,
1182 bool is_dri_prime_linear_buffer)
1183 {
1184 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1185 struct radeon_bo *bo;
1186 int r;
1187 unsigned handle;
1188 uint64_t size = 0;
1189
1190 /* We must maintain a list of pairs <handle, bo>, so that we always return
1191 * the same BO for one particular handle. If we didn't do that and created
1192 * more than one BO for the same handle and then relocated them in a CS,
1193 * we would hit a deadlock in the kernel.
1194 *
1195 * The list of pairs is guarded by a mutex, of course. */
1196 mtx_lock(&ws->bo_handles_mutex);
1197
1198 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1199 /* First check if there already is an existing bo for the handle. */
1200 bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1201 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1202 /* We must first get the GEM handle, as fds are unreliable keys */
1203 r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1204 if (r)
1205 goto fail;
1206 bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1207 } else {
1208 /* Unknown handle type */
1209 goto fail;
1210 }
1211
1212 if (bo) {
1213 /* Increase the refcount. */
1214 struct pb_buffer *b = NULL;
1215 pb_reference(&b, &bo->base);
1216 goto done;
1217 }
1218
1219 /* There isn't, create a new one. */
1220 bo = CALLOC_STRUCT(radeon_bo);
1221 if (!bo) {
1222 goto fail;
1223 }
1224
1225 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1226 struct drm_gem_open open_arg = {};
1227 memset(&open_arg, 0, sizeof(open_arg));
1228 /* Open the BO. */
1229 open_arg.name = whandle->handle;
1230 if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1231 FREE(bo);
1232 goto fail;
1233 }
1234 handle = open_arg.handle;
1235 size = open_arg.size;
1236 bo->flink_name = whandle->handle;
1237 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1238 size = lseek(whandle->handle, 0, SEEK_END);
1239 /*
1240 * Could check errno to determine whether the kernel is new enough, but
1241 * it doesn't really matter why this failed, just that it failed.
1242 */
1243 if (size == (off_t)-1) {
1244 FREE(bo);
1245 goto fail;
1246 }
1247 lseek(whandle->handle, 0, SEEK_SET);
1248 }
1249
1250 assert(handle != 0);
1251
1252 bo->handle = handle;
1253
1254 /* Initialize it. */
1255 pipe_reference_init(&bo->base.reference, 1);
1256 bo->base.alignment_log2 = 0;
1257 bo->base.size = (unsigned) size;
1258 bo->base.vtbl = &radeon_bo_vtbl;
1259 bo->rws = ws;
1260 bo->va = 0;
1261 bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1262 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1263
1264 if (bo->flink_name)
1265 _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1266
1267 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1268
1269 done:
1270 mtx_unlock(&ws->bo_handles_mutex);
1271
1272 if (ws->info.r600_has_virtual_memory && !bo->va) {
1273 struct drm_radeon_gem_va va;
1274
1275 bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1276
1277 va.handle = bo->handle;
1278 va.operation = RADEON_VA_MAP;
1279 va.vm_id = 0;
1280 va.offset = bo->va;
1281 va.flags = RADEON_VM_PAGE_READABLE |
1282 RADEON_VM_PAGE_WRITEABLE |
1283 RADEON_VM_PAGE_SNOOPED;
1284 va.offset = bo->va;
1285 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1286 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1287 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1288 radeon_bo_destroy(NULL, &bo->base);
1289 return NULL;
1290 }
1291 mtx_lock(&ws->bo_handles_mutex);
1292 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1293 struct pb_buffer *b = &bo->base;
1294 struct radeon_bo *old_bo =
1295 _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1296
1297 mtx_unlock(&ws->bo_handles_mutex);
1298 pb_reference(&b, &old_bo->base);
1299 return b;
1300 }
1301
1302 _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1303 mtx_unlock(&ws->bo_handles_mutex);
1304 }
1305
1306 bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1307
1308 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1309 ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1310 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1311 ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1312
1313 return (struct pb_buffer*)bo;
1314
1315 fail:
1316 mtx_unlock(&ws->bo_handles_mutex);
1317 return NULL;
1318 }
1319
radeon_winsys_bo_get_handle(struct radeon_winsys *rws, struct pb_buffer *buffer, struct winsys_handle *whandle)1320 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1321 struct pb_buffer *buffer,
1322 struct winsys_handle *whandle)
1323 {
1324 struct drm_gem_flink flink;
1325 struct radeon_bo *bo = radeon_bo(buffer);
1326 struct radeon_drm_winsys *ws = bo->rws;
1327
1328 /* Don't allow exports of slab entries. */
1329 if (!bo->handle)
1330 return false;
1331
1332 memset(&flink, 0, sizeof(flink));
1333
1334 bo->u.real.use_reusable_pool = false;
1335
1336 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1337 if (!bo->flink_name) {
1338 flink.handle = bo->handle;
1339
1340 if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1341 return false;
1342 }
1343
1344 bo->flink_name = flink.name;
1345
1346 mtx_lock(&ws->bo_handles_mutex);
1347 _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1348 mtx_unlock(&ws->bo_handles_mutex);
1349 }
1350 whandle->handle = bo->flink_name;
1351 } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1352 whandle->handle = bo->handle;
1353 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1354 if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1355 return false;
1356 }
1357
1358 return true;
1359 }
1360
radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)1361 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1362 {
1363 return ((struct radeon_bo*)buf)->user_ptr != NULL;
1364 }
1365
radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)1366 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1367 {
1368 return !((struct radeon_bo*)buf)->handle;
1369 }
1370
radeon_winsys_bo_va(struct pb_buffer *buf)1371 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1372 {
1373 return ((struct radeon_bo*)buf)->va;
1374 }
1375
radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)1376 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1377 {
1378 struct radeon_bo *bo = radeon_bo(buf);
1379
1380 if (bo->handle)
1381 return 0;
1382
1383 return bo->va - bo->u.slab.real->va;
1384 }
1385
radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)1386 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1387 {
1388 ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1389 ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1390 ws->base.buffer_map = radeon_bo_map;
1391 ws->base.buffer_unmap = radeon_bo_unmap;
1392 ws->base.buffer_wait = radeon_bo_wait;
1393 ws->base.buffer_create = radeon_winsys_bo_create;
1394 ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1395 ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1396 ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1397 ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1398 ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1399 ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1400 ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1401 ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1402 }
1403