1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based on amdgpu winsys.
6  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7  * Copyright © 2015 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include <stdio.h>
30 
31 #include "radv_amdgpu_bo.h"
32 #include "radv_debug.h"
33 
34 #include <amdgpu.h>
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 #include "drm-uapi/amdgpu_drm.h"
39 
40 #include "util/os_time.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 
45 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
46 
47 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)48 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset,
49                      uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags,
50                      uint32_t ops)
51 {
52    uint64_t flags = internal_flags;
53    if (bo) {
54       flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
55 
56       if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.gfx_level >= GFX9)
57          flags |= AMDGPU_VM_MTYPE_UC;
58 
59       if (!(bo_flags & RADEON_FLAG_READ_ONLY))
60          flags |= AMDGPU_VM_PAGE_WRITEABLE;
61    }
62 
63    size = align64(size, getpagesize());
64 
65    return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
66 }
67 
68 static int
bo_comparator(const void *ap, const void *bp)69 bo_comparator(const void *ap, const void *bp)
70 {
71    struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
72    struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
73    return (a > b) ? 1 : (a < b) ? -1 : 0;
74 }
75 
76 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)77 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
78 {
79    if (bo->bo_capacity < bo->range_count) {
80       uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
81       struct radv_amdgpu_winsys_bo **bos =
82          realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
83       if (!bos)
84          return VK_ERROR_OUT_OF_HOST_MEMORY;
85       bo->bos = bos;
86       bo->bo_capacity = new_count;
87    }
88 
89    uint32_t temp_bo_count = 0;
90    for (uint32_t i = 0; i < bo->range_count; ++i)
91       if (bo->ranges[i].bo)
92          bo->bos[temp_bo_count++] = bo->ranges[i].bo;
93 
94    qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
95 
96    if (!temp_bo_count) {
97       bo->bo_count = 0;
98    } else {
99       uint32_t final_bo_count = 1;
100       for (uint32_t i = 1; i < temp_bo_count; ++i)
101          if (bo->bos[i] != bo->bos[i - 1])
102             bo->bos[final_bo_count++] = bo->bos[i];
103 
104       bo->bo_count = final_bo_count;
105    }
106 
107    return VK_SUCCESS;
108 }
109 
110 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo, uint64_t bo_offset)111 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent,
112                                    uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo,
113                                    uint64_t bo_offset)
114 {
115    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
116    struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
117    struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
118    int range_count_delta, new_idx;
119    int first = 0, last;
120    struct radv_amdgpu_map_range new_first, new_last;
121    VkResult result;
122    int r;
123 
124    assert(parent->is_virtual);
125    assert(!bo || !bo->is_virtual);
126 
127    /* When the BO is NULL, AMDGPU will reset the PTE VA range to the initial state. Otherwise, it
128     * will first unmap all existing VA that overlap the requested range and then map.
129     */
130    if (bo) {
131       r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0,
132                                AMDGPU_VA_OP_REPLACE);
133    } else {
134       r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT,
135                                AMDGPU_VA_OP_REPLACE);
136    }
137 
138    if (r) {
139       fprintf(stderr, "radv/amdgpu: Failed to replace a PRT VA region (%d).\n", r);
140       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
141    }
142 
143    /* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
144     * BO references because it might have been destroyed without being previously unbound. Resetting
145     * it to NULL clears the old BO ranges if present.
146     *
147     * This is going to be clarified in the Vulkan spec:
148     * https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
149     *
150     * The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
151     * RADV fault (mostly because the solution looks more complicated).
152     */
153    if (bo && bo->base.use_global_list) {
154       bo = NULL;
155       bo_offset = 0;
156    }
157 
158    /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
159     * contains the newly bound range). */
160    if (parent->range_capacity - parent->range_count < 2) {
161       uint32_t range_capacity = parent->range_capacity + 2;
162       struct radv_amdgpu_map_range *ranges =
163          realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
164       if (!ranges)
165          return VK_ERROR_OUT_OF_HOST_MEMORY;
166       parent->ranges = ranges;
167       parent->range_capacity = range_capacity;
168    }
169 
170    /*
171     * [first, last] is exactly the range of ranges that either overlap the
172     * new parent, or are adjacent to it. This corresponds to the bind ranges
173     * that may change.
174     */
175    while (first + 1 < parent->range_count &&
176           parent->ranges[first].offset + parent->ranges[first].size < offset)
177       ++first;
178 
179    last = first;
180    while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
181       ++last;
182 
183    /* Whether the first or last range are going to be totally removed or just
184     * resized/left alone. Note that in the case of first == last, we will split
185     * this into a part before and after the new range. The remove flag is then
186     * whether to not create the corresponding split part. */
187    bool remove_first = parent->ranges[first].offset == offset;
188    bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
189 
190    assert(parent->ranges[first].offset <= offset);
191    assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
192 
193    /* Try to merge the new range with the first range. */
194    if (parent->ranges[first].bo == bo &&
195        (!bo ||
196         offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
197       size += offset - parent->ranges[first].offset;
198       offset = parent->ranges[first].offset;
199       bo_offset = parent->ranges[first].bo_offset;
200       remove_first = true;
201    }
202 
203    /* Try to merge the new range with the last range. */
204    if (parent->ranges[last].bo == bo &&
205        (!bo ||
206         offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
207       size = parent->ranges[last].offset + parent->ranges[last].size - offset;
208       remove_last = true;
209    }
210 
211    range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
212    new_idx = first + !remove_first;
213 
214    /* If the first/last range are not left alone we unmap then and optionally map
215     * them again after modifications. Not that this implicitly can do the splitting
216     * if first == last. */
217    new_first = parent->ranges[first];
218    new_last = parent->ranges[last];
219 
220    if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
221       if (!remove_first) {
222          new_first.size = offset - new_first.offset;
223       }
224    }
225 
226    if (parent->ranges[last].offset < offset + size || remove_last) {
227       if (!remove_last) {
228          new_last.size -= offset + size - new_last.offset;
229          new_last.bo_offset += (offset + size - new_last.offset);
230          new_last.offset = offset + size;
231       }
232    }
233 
234    /* Moves the range list after last to account for the changed number of ranges. */
235    memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
236            sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
237 
238    if (!remove_first)
239       parent->ranges[first] = new_first;
240 
241    if (!remove_last)
242       parent->ranges[new_idx + 1] = new_last;
243 
244    /* Actually set up the new range. */
245    parent->ranges[new_idx].offset = offset;
246    parent->ranges[new_idx].size = size;
247    parent->ranges[new_idx].bo = bo;
248    parent->ranges[new_idx].bo_offset = bo_offset;
249 
250    parent->range_count += range_count_delta;
251 
252    result = radv_amdgpu_winsys_rebuild_bo_list(parent);
253    if (result != VK_SUCCESS)
254       return result;
255 
256    return VK_SUCCESS;
257 }
258 
259 struct radv_amdgpu_winsys_bo_log {
260    struct list_head list;
261    uint64_t va;
262    uint64_t size;
263    uint64_t timestamp; /* CPU timestamp */
264    uint8_t is_virtual : 1;
265    uint8_t destroyed : 1;
266 };
267 
268 static void
radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)269 radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
270 {
271    struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
272 
273    if (!ws->debug_log_bos)
274       return;
275 
276    bo_log = malloc(sizeof(*bo_log));
277    if (!bo_log)
278       return;
279 
280    bo_log->va = bo->base.va;
281    bo_log->size = bo->size;
282    bo_log->timestamp = os_time_get_nano();
283    bo_log->is_virtual = bo->is_virtual;
284    bo_log->destroyed = destroyed;
285 
286    u_rwlock_wrlock(&ws->log_bo_list_lock);
287    list_addtail(&bo_log->list, &ws->log_bo_list);
288    u_rwlock_wrunlock(&ws->log_bo_list_lock);
289 }
290 
291 static int
radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)292 radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
293 {
294    u_rwlock_wrlock(&ws->global_bo_list.lock);
295    if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
296       unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
297       void *data =
298          realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
299       if (!data) {
300          u_rwlock_wrunlock(&ws->global_bo_list.lock);
301          return VK_ERROR_OUT_OF_HOST_MEMORY;
302       }
303 
304       ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
305       ws->global_bo_list.capacity = capacity;
306    }
307 
308    ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
309    bo->base.use_global_list = true;
310    u_rwlock_wrunlock(&ws->global_bo_list.lock);
311    return VK_SUCCESS;
312 }
313 
314 static void
radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)315 radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
316 {
317    u_rwlock_wrlock(&ws->global_bo_list.lock);
318    for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
319       if (ws->global_bo_list.bos[i] == bo) {
320          ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
321          --ws->global_bo_list.count;
322          bo->base.use_global_list = false;
323          break;
324       }
325    }
326    u_rwlock_wrunlock(&ws->global_bo_list.lock);
327 }
328 
329 static void
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)330 radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
331 {
332    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
333    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
334 
335    radv_amdgpu_log_bo(ws, bo, true);
336 
337    if (bo->is_virtual) {
338       int r;
339 
340       /* Clear mappings of this PRT VA region. */
341       r = radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_CLEAR);
342       if (r) {
343          fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
344       }
345 
346       free(bo->bos);
347       free(bo->ranges);
348    } else {
349       if (ws->debug_all_bos)
350          radv_amdgpu_global_bo_list_del(ws, bo);
351       radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
352       amdgpu_bo_free(bo->bo);
353    }
354 
355    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
356       if (bo->base.vram_no_cpu_access) {
357          p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
358       } else {
359          p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
360       }
361    }
362 
363    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
364       p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
365 
366    amdgpu_va_range_free(bo->va_handle);
367    FREE(bo);
368 }
369 
370 static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment, enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority, uint64_t replay_address, struct radeon_winsys_bo **out_bo)371 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
372                              enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
373                              unsigned priority, uint64_t replay_address,
374                              struct radeon_winsys_bo **out_bo)
375 {
376    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
377    struct radv_amdgpu_winsys_bo *bo;
378    struct amdgpu_bo_alloc_request request = {0};
379    struct radv_amdgpu_map_range *ranges = NULL;
380    amdgpu_bo_handle buf_handle;
381    uint64_t va = 0;
382    amdgpu_va_handle va_handle;
383    int r;
384    VkResult result = VK_SUCCESS;
385 
386    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
387     */
388    *out_bo = NULL;
389 
390    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
391    if (!bo) {
392       return VK_ERROR_OUT_OF_HOST_MEMORY;
393    }
394 
395    unsigned virt_alignment = alignment;
396    if (size >= ws->info.pte_fragment_size)
397       virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
398 
399    assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
400 
401    const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH |
402                              (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
403                              (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
404    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address,
405                              &va, &va_handle, va_flags);
406    if (r) {
407       result =
408          replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
409       goto error_va_alloc;
410    }
411 
412    bo->base.va = va;
413    bo->va_handle = va_handle;
414    bo->size = size;
415    bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
416 
417    if (flags & RADEON_FLAG_VIRTUAL) {
418       ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
419       if (!ranges) {
420          result = VK_ERROR_OUT_OF_HOST_MEMORY;
421          goto error_ranges_alloc;
422       }
423 
424       bo->ranges = ranges;
425       bo->range_count = 1;
426       bo->range_capacity = 1;
427 
428       bo->ranges[0].offset = 0;
429       bo->ranges[0].size = size;
430       bo->ranges[0].bo = NULL;
431       bo->ranges[0].bo_offset = 0;
432 
433       /* Reserve a PRT VA region. */
434       r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT,
435                                AMDGPU_VA_OP_MAP);
436       if (r) {
437          fprintf(stderr, "radv/amdgpu: Failed to reserve a PRT VA region (%d).\n", r);
438          result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
439          goto error_ranges_alloc;
440       }
441 
442       radv_amdgpu_log_bo(ws, bo, false);
443 
444       *out_bo = (struct radeon_winsys_bo *)bo;
445       return VK_SUCCESS;
446    }
447 
448    request.alloc_size = size;
449    request.phys_alignment = alignment;
450 
451    if (initial_domain & RADEON_DOMAIN_VRAM) {
452       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
453 
454       /* Since VRAM and GTT have almost the same performance on
455        * APUs, we could just set GTT. However, in order to decrease
456        * GTT(RAM) usage, which is shared with the OS, allow VRAM
457        * placements too. The idea is not to use VRAM usefully, but
458        * to use it so that it's not unused and wasted.
459        *
460        * Furthermore, even on discrete GPUs this is beneficial. If
461        * both GTT and VRAM are set then AMDGPU still prefers VRAM
462        * for the initial placement, but it makes the buffers
463        * spillable. Otherwise AMDGPU tries to place the buffers in
464        * VRAM really hard to the extent that we are getting a lot
465        * of unnecessary movement. This helps significantly when
466        * e.g. Horizon Zero Dawn allocates more memory than we have
467        * VRAM.
468        */
469       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
470    }
471 
472    if (initial_domain & RADEON_DOMAIN_GTT)
473       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
474    if (initial_domain & RADEON_DOMAIN_GDS)
475       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
476    if (initial_domain & RADEON_DOMAIN_OA)
477       request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
478 
479    if (flags & RADEON_FLAG_CPU_ACCESS)
480       request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
481    if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
482       bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
483       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
484    }
485    if (flags & RADEON_FLAG_GTT_WC)
486       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
487    if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
488       request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
489    if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
490        ((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
491       bo->base.is_local = true;
492       request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
493    }
494 
495    if (initial_domain & RADEON_DOMAIN_VRAM) {
496       if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
497          request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
498    }
499 
500    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
501    if (r) {
502       fprintf(stderr, "radv/amdgpu: Failed to allocate a buffer:\n");
503       fprintf(stderr, "radv/amdgpu:    size      : %" PRIu64 " bytes\n", size);
504       fprintf(stderr, "radv/amdgpu:    alignment : %u bytes\n", alignment);
505       fprintf(stderr, "radv/amdgpu:    domains   : %u\n", initial_domain);
506       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
507       goto error_bo_alloc;
508    }
509 
510    r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
511    if (r) {
512       result = VK_ERROR_UNKNOWN;
513       goto error_va_map;
514    }
515 
516    bo->bo = buf_handle;
517    bo->base.initial_domain = initial_domain;
518    bo->base.use_global_list = bo->base.is_local;
519    bo->priority = priority;
520 
521    r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
522    assert(!r);
523 
524    if (initial_domain & RADEON_DOMAIN_VRAM) {
525       /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
526        * aren't mappable and they are counted as part of the VRAM
527        * counter.
528        *
529        * Otherwise, buffers with the CPU_ACCESS flag or without any
530        * of both (imported buffers) are counted as part of the VRAM
531        * visible counter because they can be mapped.
532        */
533       if (bo->base.vram_no_cpu_access) {
534          p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
535       } else {
536          p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
537       }
538    }
539 
540    if (initial_domain & RADEON_DOMAIN_GTT)
541       p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
542 
543    if (ws->debug_all_bos)
544       radv_amdgpu_global_bo_list_add(ws, bo);
545    radv_amdgpu_log_bo(ws, bo, false);
546 
547    *out_bo = (struct radeon_winsys_bo *)bo;
548    return VK_SUCCESS;
549 error_va_map:
550    amdgpu_bo_free(buf_handle);
551 
552 error_bo_alloc:
553    free(ranges);
554 
555 error_ranges_alloc:
556    amdgpu_va_range_free(va_handle);
557 
558 error_va_alloc:
559    FREE(bo);
560    return result;
561 }
562 
563 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)564 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
565 {
566    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
567    int ret;
568    void *data;
569    ret = amdgpu_bo_cpu_map(bo->bo, &data);
570    if (ret)
571       return NULL;
572    return data;
573 }
574 
575 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)576 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
577 {
578    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
579    amdgpu_bo_cpu_unmap(bo->bo);
580 }
581 
582 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, unsigned alignment)583 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size,
584                                      unsigned alignment)
585 {
586    uint64_t vm_alignment = alignment;
587 
588    /* Increase the VM alignment for faster address translation. */
589    if (size >= ws->info.pte_fragment_size)
590       vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
591 
592    /* Gfx9: Increase the VM alignment to the most significant bit set
593     * in the size for faster address translation.
594     */
595    if (ws->info.gfx_level >= GFX9) {
596       unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
597       uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
598 
599       vm_alignment = MAX2(vm_alignment, msb_alignment);
600    }
601    return vm_alignment;
602 }
603 
604 static VkResult
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, unsigned priority, struct radeon_winsys_bo **out_bo)605 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size,
606                                unsigned priority, struct radeon_winsys_bo **out_bo)
607 {
608    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
609    amdgpu_bo_handle buf_handle;
610    struct radv_amdgpu_winsys_bo *bo;
611    uint64_t va;
612    amdgpu_va_handle va_handle;
613    uint64_t vm_alignment;
614    VkResult result = VK_SUCCESS;
615 
616    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
617     */
618    *out_bo = NULL;
619 
620    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
621    if (!bo)
622       return VK_ERROR_OUT_OF_HOST_MEMORY;
623 
624    if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) {
625       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
626       goto error;
627    }
628 
629    /* Using the optimal VM alignment also fixes GPU hangs for buffers that
630     * are imported.
631     */
632    vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
633 
634    if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va,
635                              &va_handle, AMDGPU_VA_RANGE_HIGH)) {
636       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
637       goto error_va_alloc;
638    }
639 
640    if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) {
641       result = VK_ERROR_UNKNOWN;
642       goto error_va_map;
643    }
644 
645    /* Initialize it */
646    bo->base.va = va;
647    bo->va_handle = va_handle;
648    bo->size = size;
649    bo->bo = buf_handle;
650    bo->base.initial_domain = RADEON_DOMAIN_GTT;
651    bo->base.use_global_list = false;
652    bo->priority = priority;
653 
654    ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
655    assert(!r);
656 
657    p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
658 
659    if (ws->debug_all_bos)
660       radv_amdgpu_global_bo_list_add(ws, bo);
661    radv_amdgpu_log_bo(ws, bo, false);
662 
663    *out_bo = (struct radeon_winsys_bo *)bo;
664    return VK_SUCCESS;
665 
666 error_va_map:
667    amdgpu_va_range_free(va_handle);
668 
669 error_va_alloc:
670    amdgpu_bo_free(buf_handle);
671 
672 error:
673    FREE(bo);
674    return result;
675 }
676 
677 static VkResult
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo, uint64_t *alloc_size)678 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority,
679                               struct radeon_winsys_bo **out_bo, uint64_t *alloc_size)
680 {
681    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
682    struct radv_amdgpu_winsys_bo *bo;
683    uint64_t va;
684    amdgpu_va_handle va_handle;
685    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
686    struct amdgpu_bo_import_result result;
687    struct amdgpu_bo_info info;
688    enum radeon_bo_domain initial = 0;
689    int r;
690    VkResult vk_result = VK_SUCCESS;
691 
692    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
693     */
694    *out_bo = NULL;
695 
696    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
697    if (!bo)
698       return VK_ERROR_OUT_OF_HOST_MEMORY;
699 
700    r = amdgpu_bo_import(ws->dev, type, fd, &result);
701    if (r) {
702       vk_result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
703       goto error;
704    }
705 
706    r = amdgpu_bo_query_info(result.buf_handle, &info);
707    if (r) {
708       vk_result = VK_ERROR_UNKNOWN;
709       goto error_query;
710    }
711 
712    if (alloc_size) {
713       *alloc_size = info.alloc_size;
714    }
715 
716    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0,
717                              &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
718    if (r) {
719       vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
720       goto error_query;
721    }
722 
723    r =
724       radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
725    if (r) {
726       vk_result = VK_ERROR_UNKNOWN;
727       goto error_va_map;
728    }
729 
730    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
731       initial |= RADEON_DOMAIN_VRAM;
732    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
733       initial |= RADEON_DOMAIN_GTT;
734 
735    bo->bo = result.buf_handle;
736    bo->base.va = va;
737    bo->va_handle = va_handle;
738    bo->base.initial_domain = initial;
739    bo->base.use_global_list = false;
740    bo->size = result.alloc_size;
741    bo->priority = priority;
742 
743    r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
744    assert(!r);
745 
746    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
747       p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
748    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
749       p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
750 
751    if (ws->debug_all_bos)
752       radv_amdgpu_global_bo_list_add(ws, bo);
753    radv_amdgpu_log_bo(ws, bo, false);
754 
755    *out_bo = (struct radeon_winsys_bo *)bo;
756    return VK_SUCCESS;
757 error_va_map:
758    amdgpu_va_range_free(va_handle);
759 
760 error_query:
761    amdgpu_bo_free(result.buf_handle);
762 
763 error:
764    FREE(bo);
765    return vk_result;
766 }
767 
768 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)769 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
770 {
771    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
772    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
773    int r;
774    unsigned handle;
775    r = amdgpu_bo_export(bo->bo, type, &handle);
776    if (r)
777       return false;
778 
779    *fd = (int)handle;
780    return true;
781 }
782 
783 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains, enum radeon_bo_flag *flags)784 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
785                                  enum radeon_bo_flag *flags)
786 {
787    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
788    struct amdgpu_bo_import_result result = {0};
789    struct amdgpu_bo_info info = {0};
790    int r;
791 
792    *domains = 0;
793    *flags = 0;
794 
795    r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
796    if (r)
797       return false;
798 
799    r = amdgpu_bo_query_info(result.buf_handle, &info);
800    amdgpu_bo_free(result.buf_handle);
801    if (r)
802       return false;
803 
804    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
805       *domains |= RADEON_DOMAIN_VRAM;
806    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
807       *domains |= RADEON_DOMAIN_GTT;
808    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
809       *domains |= RADEON_DOMAIN_GDS;
810    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
811       *domains |= RADEON_DOMAIN_OA;
812 
813    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
814       *flags |= RADEON_FLAG_CPU_ACCESS;
815    if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
816       *flags |= RADEON_FLAG_NO_CPU_ACCESS;
817    if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
818       *flags |= RADEON_FLAG_IMPLICIT_SYNC;
819    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
820       *flags |= RADEON_FLAG_GTT_WC;
821    if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
822       *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
823    if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
824       *flags |= RADEON_FLAG_ZERO_VRAM;
825    return true;
826 }
827 
828 static unsigned
eg_tile_split(unsigned tile_split)829 eg_tile_split(unsigned tile_split)
830 {
831    switch (tile_split) {
832    case 0:
833       tile_split = 64;
834       break;
835    case 1:
836       tile_split = 128;
837       break;
838    case 2:
839       tile_split = 256;
840       break;
841    case 3:
842       tile_split = 512;
843       break;
844    default:
845    case 4:
846       tile_split = 1024;
847       break;
848    case 5:
849       tile_split = 2048;
850       break;
851    case 6:
852       tile_split = 4096;
853       break;
854    }
855    return tile_split;
856 }
857 
858 static unsigned
radv_eg_tile_split_rev(unsigned eg_tile_split)859 radv_eg_tile_split_rev(unsigned eg_tile_split)
860 {
861    switch (eg_tile_split) {
862    case 64:
863       return 0;
864    case 128:
865       return 1;
866    case 256:
867       return 2;
868    case 512:
869       return 3;
870    default:
871    case 1024:
872       return 4;
873    case 2048:
874       return 5;
875    case 4096:
876       return 6;
877    }
878 }
879 
880 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
881 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
882 
883 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, struct radeon_bo_metadata *md)884 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
885                                    struct radeon_bo_metadata *md)
886 {
887    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
888    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
889    struct amdgpu_bo_metadata metadata = {0};
890    uint64_t tiling_flags = 0;
891 
892    if (ws->info.gfx_level >= GFX9) {
893       tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
894       tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
895       tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
896       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
897       tiling_flags |=
898          AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
899       tiling_flags |=
900          AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
901       tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
902    } else {
903       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
904          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
905       else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
906          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
907       else
908          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
909 
910       tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
911       tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
912       tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
913       if (md->u.legacy.tile_split)
914          tiling_flags |=
915             AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
916       tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
917       tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
918 
919       if (md->u.legacy.scanout)
920          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
921       else
922          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
923    }
924 
925    metadata.tiling_info = tiling_flags;
926    metadata.size_metadata = md->size_metadata;
927    memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
928 
929    amdgpu_bo_set_metadata(bo->bo, &metadata);
930 }
931 
932 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, struct radeon_bo_metadata *md)933 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
934                                    struct radeon_bo_metadata *md)
935 {
936    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
937    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
938    struct amdgpu_bo_info info = {0};
939 
940    int r = amdgpu_bo_query_info(bo->bo, &info);
941    if (r)
942       return;
943 
944    uint64_t tiling_flags = info.metadata.tiling_info;
945 
946    if (ws->info.gfx_level >= GFX9) {
947       md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
948       md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
949    } else {
950       md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
951       md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
952 
953       if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
954          md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
955       else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
956          md->u.legacy.microtile = RADEON_LAYOUT_TILED;
957 
958       md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
959       md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
960       md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
961       md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
962       md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
963       md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
964       md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
965    }
966 
967    md->size_metadata = info.metadata.size_metadata;
968    memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
969 }
970 
971 static VkResult
radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident)972 radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
973                                     bool resident)
974 {
975    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
976    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
977    VkResult result = VK_SUCCESS;
978 
979    /* Do not add the BO to the global list if it's a local BO because the
980     * kernel maintains a list for us.
981     */
982    if (bo->base.is_local)
983       return VK_SUCCESS;
984 
985    /* Do not add the BO twice to the global list if the allbos debug
986     * option is enabled.
987     */
988    if (ws->debug_all_bos)
989       return VK_SUCCESS;
990 
991    if (resident) {
992       result = radv_amdgpu_global_bo_list_add(ws, bo);
993    } else {
994       radv_amdgpu_global_bo_list_del(ws, bo);
995    }
996 
997    return result;
998 }
999 
1000 static int
radv_amdgpu_bo_va_compare(const void *a, const void *b)1001 radv_amdgpu_bo_va_compare(const void *a, const void *b)
1002 {
1003    const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
1004    const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
1005    return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
1006 }
1007 
1008 static void
radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)1009 radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
1010 {
1011    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1012    struct radv_amdgpu_winsys_bo_log *bo_log;
1013 
1014    if (!ws->debug_log_bos)
1015       return;
1016 
1017    u_rwlock_rdlock(&ws->log_bo_list_lock);
1018    LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
1019       fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
1020               (long long)bo_log->timestamp, (long long)bo_log->va,
1021               (long long)(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
1022    }
1023    u_rwlock_rdunlock(&ws->log_bo_list_lock);
1024 }
1025 
1026 static void
radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)1027 radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
1028 {
1029    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1030    if (ws->debug_all_bos) {
1031       struct radv_amdgpu_winsys_bo **bos = NULL;
1032       int i = 0;
1033 
1034       u_rwlock_rdlock(&ws->global_bo_list.lock);
1035       bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
1036       if (!bos) {
1037          u_rwlock_rdunlock(&ws->global_bo_list.lock);
1038          fprintf(file, "  Failed to allocate memory to sort VA ranges for dumping\n");
1039          return;
1040       }
1041 
1042       for (i = 0; i < ws->global_bo_list.count; i++) {
1043          bos[i] = ws->global_bo_list.bos[i];
1044       }
1045       qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
1046 
1047       for (i = 0; i < ws->global_bo_list.count; ++i) {
1048          fprintf(file, "  VA=%.16llx-%.16llx, handle=%d%s\n", (long long)bos[i]->base.va,
1049                  (long long)(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle,
1050                  bos[i]->is_virtual ? " sparse" : "");
1051       }
1052       free(bos);
1053       u_rwlock_rdunlock(&ws->global_bo_list.lock);
1054    } else
1055       fprintf(file, "  To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
1056 }
1057 void
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)1058 radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
1059 {
1060    ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
1061    ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
1062    ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
1063    ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
1064    ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
1065    ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
1066    ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
1067    ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
1068    ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
1069    ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
1070    ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
1071    ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
1072    ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
1073    ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
1074 }
1075