1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2011 Marek Olšák <maraeo@gmail.com> 3bf215546Sopenharmony_ci * Copyright © 2015 Advanced Micro Devices, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining 7bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16bf215546Sopenharmony_ci * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17bf215546Sopenharmony_ci * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18bf215546Sopenharmony_ci * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 24bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 25bf215546Sopenharmony_ci * of the Software. 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "amdgpu_cs.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "util/hash_table.h" 31bf215546Sopenharmony_ci#include "util/os_time.h" 32bf215546Sopenharmony_ci#include "util/u_hash_table.h" 33bf215546Sopenharmony_ci#include "frontend/drm_driver.h" 34bf215546Sopenharmony_ci#include "drm-uapi/amdgpu_drm.h" 35bf215546Sopenharmony_ci#include <xf86drm.h> 36bf215546Sopenharmony_ci#include <stdio.h> 37bf215546Sopenharmony_ci#include <inttypes.h> 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#ifndef AMDGPU_VA_RANGE_HIGH 40bf215546Sopenharmony_ci#define AMDGPU_VA_RANGE_HIGH 0x2 41bf215546Sopenharmony_ci#endif 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci/* Set to 1 for verbose output showing committed sparse buffer ranges. */ 44bf215546Sopenharmony_ci#define DEBUG_SPARSE_COMMITS 0 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cistruct amdgpu_sparse_backing_chunk { 47bf215546Sopenharmony_ci uint32_t begin, end; 48bf215546Sopenharmony_ci}; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistatic bool amdgpu_bo_wait(struct radeon_winsys *rws, 51bf215546Sopenharmony_ci struct pb_buffer *_buf, uint64_t timeout, 52bf215546Sopenharmony_ci unsigned usage) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 55bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 56bf215546Sopenharmony_ci int64_t abs_timeout = 0; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci if (timeout == 0) { 59bf215546Sopenharmony_ci if (p_atomic_read(&bo->num_active_ioctls)) 60bf215546Sopenharmony_ci return false; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci } else { 63bf215546Sopenharmony_ci abs_timeout = os_time_get_absolute_timeout(timeout); 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci /* Wait if any ioctl is being submitted with this buffer. */ 66bf215546Sopenharmony_ci if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout)) 67bf215546Sopenharmony_ci return false; 68bf215546Sopenharmony_ci } 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci if (bo->bo && bo->u.real.is_shared) { 71bf215546Sopenharmony_ci /* We can't use user fences for shared buffers, because user fences 72bf215546Sopenharmony_ci * are local to this process only. If we want to wait for all buffer 73bf215546Sopenharmony_ci * uses in all processes, we have to use amdgpu_bo_wait_for_idle. 74bf215546Sopenharmony_ci */ 75bf215546Sopenharmony_ci bool buffer_busy = true; 76bf215546Sopenharmony_ci int r; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy); 79bf215546Sopenharmony_ci if (r) 80bf215546Sopenharmony_ci fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__, 81bf215546Sopenharmony_ci r); 82bf215546Sopenharmony_ci return !buffer_busy; 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci if (timeout == 0) { 86bf215546Sopenharmony_ci unsigned idle_fences; 87bf215546Sopenharmony_ci bool buffer_idle; 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_fence_lock); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) { 92bf215546Sopenharmony_ci if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false)) 93bf215546Sopenharmony_ci break; 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci /* Release the idle fences to avoid checking them again later. */ 97bf215546Sopenharmony_ci for (unsigned i = 0; i < idle_fences; ++i) 98bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[i], NULL); 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci memmove(&bo->fences[0], &bo->fences[idle_fences], 101bf215546Sopenharmony_ci (bo->num_fences - idle_fences) * sizeof(*bo->fences)); 102bf215546Sopenharmony_ci bo->num_fences -= idle_fences; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci buffer_idle = !bo->num_fences; 105bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_fence_lock); 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci return buffer_idle; 108bf215546Sopenharmony_ci } else { 109bf215546Sopenharmony_ci bool buffer_idle = true; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_fence_lock); 112bf215546Sopenharmony_ci while (bo->num_fences && buffer_idle) { 113bf215546Sopenharmony_ci struct pipe_fence_handle *fence = NULL; 114bf215546Sopenharmony_ci bool fence_idle = false; 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci amdgpu_fence_reference(&fence, bo->fences[0]); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci /* Wait for the fence. */ 119bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_fence_lock); 120bf215546Sopenharmony_ci if (amdgpu_fence_wait(fence, abs_timeout, true)) 121bf215546Sopenharmony_ci fence_idle = true; 122bf215546Sopenharmony_ci else 123bf215546Sopenharmony_ci buffer_idle = false; 124bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_fence_lock); 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci /* Release an idle fence to avoid checking it again later, keeping in 127bf215546Sopenharmony_ci * mind that the fence array may have been modified by other threads. 128bf215546Sopenharmony_ci */ 129bf215546Sopenharmony_ci if (fence_idle && bo->num_fences && bo->fences[0] == fence) { 130bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[0], NULL); 131bf215546Sopenharmony_ci memmove(&bo->fences[0], &bo->fences[1], 132bf215546Sopenharmony_ci (bo->num_fences - 1) * sizeof(*bo->fences)); 133bf215546Sopenharmony_ci bo->num_fences--; 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci amdgpu_fence_reference(&fence, NULL); 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_fence_lock); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci return buffer_idle; 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci} 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_cistatic enum radeon_bo_domain amdgpu_bo_get_initial_domain( 145bf215546Sopenharmony_ci struct pb_buffer *buf) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci return ((struct amdgpu_winsys_bo*)buf)->base.placement; 148bf215546Sopenharmony_ci} 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_cistatic enum radeon_bo_flag amdgpu_bo_get_flags( 151bf215546Sopenharmony_ci struct pb_buffer *buf) 152bf215546Sopenharmony_ci{ 153bf215546Sopenharmony_ci return ((struct amdgpu_winsys_bo*)buf)->base.usage; 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistatic void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo) 157bf215546Sopenharmony_ci{ 158bf215546Sopenharmony_ci for (unsigned i = 0; i < bo->num_fences; ++i) 159bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[i], NULL); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci FREE(bo->fences); 162bf215546Sopenharmony_ci bo->num_fences = 0; 163bf215546Sopenharmony_ci bo->max_fences = 0; 164bf215546Sopenharmony_ci} 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_civoid amdgpu_bo_destroy(struct amdgpu_winsys *ws, struct pb_buffer *_buf) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 169bf215546Sopenharmony_ci struct amdgpu_screen_winsys *sws_iter; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci assert(bo->bo && "must not be called for slab entries"); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) { 174bf215546Sopenharmony_ci bo->u.real.cpu_ptr = NULL; 175bf215546Sopenharmony_ci amdgpu_bo_unmap(&ws->dummy_ws.base, &bo->base); 176bf215546Sopenharmony_ci } 177bf215546Sopenharmony_ci assert(bo->u.real.is_user_ptr || bo->u.real.map_count == 0); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci#if DEBUG 180bf215546Sopenharmony_ci if (ws->debug_all_bos) { 181bf215546Sopenharmony_ci simple_mtx_lock(&ws->global_bo_list_lock); 182bf215546Sopenharmony_ci list_del(&bo->u.real.global_list_item); 183bf215546Sopenharmony_ci ws->num_buffers--; 184bf215546Sopenharmony_ci simple_mtx_unlock(&ws->global_bo_list_lock); 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci#endif 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci /* Close all KMS handles retrieved for other DRM file descriptions */ 189bf215546Sopenharmony_ci simple_mtx_lock(&ws->sws_list_lock); 190bf215546Sopenharmony_ci for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) { 191bf215546Sopenharmony_ci struct hash_entry *entry; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci if (!sws_iter->kms_handles) 194bf215546Sopenharmony_ci continue; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci entry = _mesa_hash_table_search(sws_iter->kms_handles, bo); 197bf215546Sopenharmony_ci if (entry) { 198bf215546Sopenharmony_ci struct drm_gem_close args = { .handle = (uintptr_t)entry->data }; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci drmIoctl(sws_iter->fd, DRM_IOCTL_GEM_CLOSE, &args); 201bf215546Sopenharmony_ci _mesa_hash_table_remove(sws_iter->kms_handles, entry); 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci simple_mtx_unlock(&ws->sws_list_lock); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_export_table_lock); 207bf215546Sopenharmony_ci _mesa_hash_table_remove_key(ws->bo_export_table, bo->bo); 208bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_export_table_lock); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM_GTT) { 211bf215546Sopenharmony_ci amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); 212bf215546Sopenharmony_ci amdgpu_va_range_free(bo->u.real.va_handle); 213bf215546Sopenharmony_ci } 214bf215546Sopenharmony_ci amdgpu_bo_free(bo->bo); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci amdgpu_bo_remove_fences(bo); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM) 219bf215546Sopenharmony_ci ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size); 220bf215546Sopenharmony_ci else if (bo->base.placement & RADEON_DOMAIN_GTT) 221bf215546Sopenharmony_ci ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size); 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci simple_mtx_destroy(&bo->lock); 224bf215546Sopenharmony_ci FREE(bo); 225bf215546Sopenharmony_ci} 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_cistatic void amdgpu_bo_destroy_or_cache(struct radeon_winsys *rws, struct pb_buffer *_buf) 228bf215546Sopenharmony_ci{ 229bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 230bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci assert(bo->bo); /* slab buffers have a separate vtbl */ 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci if (bo->u.real.use_reusable_pool) 235bf215546Sopenharmony_ci pb_cache_add_buffer(bo->cache_entry); 236bf215546Sopenharmony_ci else 237bf215546Sopenharmony_ci amdgpu_bo_destroy(ws, _buf); 238bf215546Sopenharmony_ci} 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_cistatic void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws) 241bf215546Sopenharmony_ci{ 242bf215546Sopenharmony_ci for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) 243bf215546Sopenharmony_ci pb_slabs_reclaim(&ws->bo_slabs[i]); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci pb_cache_release_all_buffers(&ws->bo_cache); 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_cistatic bool amdgpu_bo_do_map(struct radeon_winsys *rws, struct amdgpu_winsys_bo *bo, void **cpu) 249bf215546Sopenharmony_ci{ 250bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci assert(!(bo->base.usage & RADEON_FLAG_SPARSE) && bo->bo && !bo->u.real.is_user_ptr); 253bf215546Sopenharmony_ci int r = amdgpu_bo_cpu_map(bo->bo, cpu); 254bf215546Sopenharmony_ci if (r) { 255bf215546Sopenharmony_ci /* Clean up buffer managers and try again. */ 256bf215546Sopenharmony_ci amdgpu_clean_up_buffer_managers(ws); 257bf215546Sopenharmony_ci r = amdgpu_bo_cpu_map(bo->bo, cpu); 258bf215546Sopenharmony_ci if (r) 259bf215546Sopenharmony_ci return false; 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci if (p_atomic_inc_return(&bo->u.real.map_count) == 1) { 263bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM) 264bf215546Sopenharmony_ci ws->mapped_vram += bo->base.size; 265bf215546Sopenharmony_ci else if (bo->base.placement & RADEON_DOMAIN_GTT) 266bf215546Sopenharmony_ci ws->mapped_gtt += bo->base.size; 267bf215546Sopenharmony_ci ws->num_mapped_buffers++; 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci return true; 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_civoid *amdgpu_bo_map(struct radeon_winsys *rws, 274bf215546Sopenharmony_ci struct pb_buffer *buf, 275bf215546Sopenharmony_ci struct radeon_cmdbuf *rcs, 276bf215546Sopenharmony_ci enum pipe_map_flags usage) 277bf215546Sopenharmony_ci{ 278bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 279bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 280bf215546Sopenharmony_ci struct amdgpu_winsys_bo *real; 281bf215546Sopenharmony_ci struct amdgpu_cs *cs = rcs ? amdgpu_cs(rcs) : NULL; 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci assert(!(bo->base.usage & RADEON_FLAG_SPARSE)); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ 286bf215546Sopenharmony_ci if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { 287bf215546Sopenharmony_ci /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ 288bf215546Sopenharmony_ci if (usage & PIPE_MAP_DONTBLOCK) { 289bf215546Sopenharmony_ci if (!(usage & PIPE_MAP_WRITE)) { 290bf215546Sopenharmony_ci /* Mapping for read. 291bf215546Sopenharmony_ci * 292bf215546Sopenharmony_ci * Since we are mapping for read, we don't need to wait 293bf215546Sopenharmony_ci * if the GPU is using the buffer for read too 294bf215546Sopenharmony_ci * (neither one is changing it). 295bf215546Sopenharmony_ci * 296bf215546Sopenharmony_ci * Only check whether the buffer is being used for write. */ 297bf215546Sopenharmony_ci if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, 298bf215546Sopenharmony_ci RADEON_USAGE_WRITE)) { 299bf215546Sopenharmony_ci cs->flush_cs(cs->flush_data, 300bf215546Sopenharmony_ci RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); 301bf215546Sopenharmony_ci return NULL; 302bf215546Sopenharmony_ci } 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci if (!amdgpu_bo_wait(rws, (struct pb_buffer*)bo, 0, 305bf215546Sopenharmony_ci RADEON_USAGE_WRITE)) { 306bf215546Sopenharmony_ci return NULL; 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci } else { 309bf215546Sopenharmony_ci if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) { 310bf215546Sopenharmony_ci cs->flush_cs(cs->flush_data, 311bf215546Sopenharmony_ci RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); 312bf215546Sopenharmony_ci return NULL; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci if (!amdgpu_bo_wait(rws, (struct pb_buffer*)bo, 0, 316bf215546Sopenharmony_ci RADEON_USAGE_READWRITE)) { 317bf215546Sopenharmony_ci return NULL; 318bf215546Sopenharmony_ci } 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci } else { 321bf215546Sopenharmony_ci uint64_t time = os_time_get_nano(); 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci if (!(usage & PIPE_MAP_WRITE)) { 324bf215546Sopenharmony_ci /* Mapping for read. 325bf215546Sopenharmony_ci * 326bf215546Sopenharmony_ci * Since we are mapping for read, we don't need to wait 327bf215546Sopenharmony_ci * if the GPU is using the buffer for read too 328bf215546Sopenharmony_ci * (neither one is changing it). 329bf215546Sopenharmony_ci * 330bf215546Sopenharmony_ci * Only check whether the buffer is being used for write. */ 331bf215546Sopenharmony_ci if (cs) { 332bf215546Sopenharmony_ci if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, 333bf215546Sopenharmony_ci RADEON_USAGE_WRITE)) { 334bf215546Sopenharmony_ci cs->flush_cs(cs->flush_data, 335bf215546Sopenharmony_ci RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); 336bf215546Sopenharmony_ci } else { 337bf215546Sopenharmony_ci /* Try to avoid busy-waiting in amdgpu_bo_wait. */ 338bf215546Sopenharmony_ci if (p_atomic_read(&bo->num_active_ioctls)) 339bf215546Sopenharmony_ci amdgpu_cs_sync_flush(rcs); 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci } 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci amdgpu_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, 344bf215546Sopenharmony_ci RADEON_USAGE_WRITE); 345bf215546Sopenharmony_ci } else { 346bf215546Sopenharmony_ci /* Mapping for write. */ 347bf215546Sopenharmony_ci if (cs) { 348bf215546Sopenharmony_ci if (amdgpu_bo_is_referenced_by_cs(cs, bo)) { 349bf215546Sopenharmony_ci cs->flush_cs(cs->flush_data, 350bf215546Sopenharmony_ci RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); 351bf215546Sopenharmony_ci } else { 352bf215546Sopenharmony_ci /* Try to avoid busy-waiting in amdgpu_bo_wait. */ 353bf215546Sopenharmony_ci if (p_atomic_read(&bo->num_active_ioctls)) 354bf215546Sopenharmony_ci amdgpu_cs_sync_flush(rcs); 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci amdgpu_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, 359bf215546Sopenharmony_ci RADEON_USAGE_READWRITE); 360bf215546Sopenharmony_ci } 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci ws->buffer_wait_time += os_time_get_nano() - time; 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci /* Buffer synchronization has been checked, now actually map the buffer. */ 367bf215546Sopenharmony_ci void *cpu = NULL; 368bf215546Sopenharmony_ci uint64_t offset = 0; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci if (bo->bo) { 371bf215546Sopenharmony_ci real = bo; 372bf215546Sopenharmony_ci } else { 373bf215546Sopenharmony_ci real = bo->u.slab.real; 374bf215546Sopenharmony_ci offset = bo->va - real->va; 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci if (usage & RADEON_MAP_TEMPORARY) { 378bf215546Sopenharmony_ci if (real->u.real.is_user_ptr) { 379bf215546Sopenharmony_ci cpu = real->u.real.cpu_ptr; 380bf215546Sopenharmony_ci } else { 381bf215546Sopenharmony_ci if (!amdgpu_bo_do_map(rws, real, &cpu)) 382bf215546Sopenharmony_ci return NULL; 383bf215546Sopenharmony_ci } 384bf215546Sopenharmony_ci } else { 385bf215546Sopenharmony_ci cpu = p_atomic_read(&real->u.real.cpu_ptr); 386bf215546Sopenharmony_ci if (!cpu) { 387bf215546Sopenharmony_ci simple_mtx_lock(&real->lock); 388bf215546Sopenharmony_ci /* Must re-check due to the possibility of a race. Re-check need not 389bf215546Sopenharmony_ci * be atomic thanks to the lock. */ 390bf215546Sopenharmony_ci cpu = real->u.real.cpu_ptr; 391bf215546Sopenharmony_ci if (!cpu) { 392bf215546Sopenharmony_ci if (!amdgpu_bo_do_map(rws, real, &cpu)) { 393bf215546Sopenharmony_ci simple_mtx_unlock(&real->lock); 394bf215546Sopenharmony_ci return NULL; 395bf215546Sopenharmony_ci } 396bf215546Sopenharmony_ci p_atomic_set(&real->u.real.cpu_ptr, cpu); 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci simple_mtx_unlock(&real->lock); 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci } 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci return (uint8_t*)cpu + offset; 403bf215546Sopenharmony_ci} 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_civoid amdgpu_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *buf) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 408bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 409bf215546Sopenharmony_ci struct amdgpu_winsys_bo *real; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci assert(!(bo->base.usage & RADEON_FLAG_SPARSE)); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci real = bo->bo ? bo : bo->u.slab.real; 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (real->u.real.is_user_ptr) 416bf215546Sopenharmony_ci return; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci assert(real->u.real.map_count != 0 && "too many unmaps"); 419bf215546Sopenharmony_ci if (p_atomic_dec_zero(&real->u.real.map_count)) { 420bf215546Sopenharmony_ci assert(!real->u.real.cpu_ptr && 421bf215546Sopenharmony_ci "too many unmaps or forgot RADEON_MAP_TEMPORARY flag"); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci if (real->base.placement & RADEON_DOMAIN_VRAM) 424bf215546Sopenharmony_ci ws->mapped_vram -= real->base.size; 425bf215546Sopenharmony_ci else if (real->base.placement & RADEON_DOMAIN_GTT) 426bf215546Sopenharmony_ci ws->mapped_gtt -= real->base.size; 427bf215546Sopenharmony_ci ws->num_mapped_buffers--; 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci amdgpu_bo_cpu_unmap(real->bo); 431bf215546Sopenharmony_ci} 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_cistatic const struct pb_vtbl amdgpu_winsys_bo_vtbl = { 434bf215546Sopenharmony_ci /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */ 435bf215546Sopenharmony_ci (void*)amdgpu_bo_destroy_or_cache 436bf215546Sopenharmony_ci /* other functions are never called */ 437bf215546Sopenharmony_ci}; 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_cistatic void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo) 440bf215546Sopenharmony_ci{ 441bf215546Sopenharmony_ci#if DEBUG 442bf215546Sopenharmony_ci assert(bo->bo); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci if (ws->debug_all_bos) { 445bf215546Sopenharmony_ci simple_mtx_lock(&ws->global_bo_list_lock); 446bf215546Sopenharmony_ci list_addtail(&bo->u.real.global_list_item, &ws->global_bo_list); 447bf215546Sopenharmony_ci ws->num_buffers++; 448bf215546Sopenharmony_ci simple_mtx_unlock(&ws->global_bo_list_lock); 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci#endif 451bf215546Sopenharmony_ci} 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_cistatic unsigned amdgpu_get_optimal_alignment(struct amdgpu_winsys *ws, 454bf215546Sopenharmony_ci uint64_t size, unsigned alignment) 455bf215546Sopenharmony_ci{ 456bf215546Sopenharmony_ci /* Increase the alignment for faster address translation and better memory 457bf215546Sopenharmony_ci * access pattern. 458bf215546Sopenharmony_ci */ 459bf215546Sopenharmony_ci if (size >= ws->info.pte_fragment_size) { 460bf215546Sopenharmony_ci alignment = MAX2(alignment, ws->info.pte_fragment_size); 461bf215546Sopenharmony_ci } else if (size) { 462bf215546Sopenharmony_ci unsigned msb = util_last_bit(size); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci alignment = MAX2(alignment, 1u << (msb - 1)); 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci return alignment; 467bf215546Sopenharmony_ci} 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_cistatic struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, 470bf215546Sopenharmony_ci uint64_t size, 471bf215546Sopenharmony_ci unsigned alignment, 472bf215546Sopenharmony_ci enum radeon_bo_domain initial_domain, 473bf215546Sopenharmony_ci unsigned flags, 474bf215546Sopenharmony_ci int heap) 475bf215546Sopenharmony_ci{ 476bf215546Sopenharmony_ci struct amdgpu_bo_alloc_request request = {0}; 477bf215546Sopenharmony_ci amdgpu_bo_handle buf_handle; 478bf215546Sopenharmony_ci uint64_t va = 0; 479bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo; 480bf215546Sopenharmony_ci amdgpu_va_handle va_handle = NULL; 481bf215546Sopenharmony_ci int r; 482bf215546Sopenharmony_ci bool init_pb_cache; 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci /* VRAM or GTT must be specified, but not both at the same time. */ 485bf215546Sopenharmony_ci assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT | 486bf215546Sopenharmony_ci RADEON_DOMAIN_GDS | 487bf215546Sopenharmony_ci RADEON_DOMAIN_OA)) == 1); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci alignment = amdgpu_get_optimal_alignment(ws, size, alignment); 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci init_pb_cache = heap >= 0 && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING); 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci bo = CALLOC(1, sizeof(struct amdgpu_winsys_bo) + 494bf215546Sopenharmony_ci init_pb_cache * sizeof(struct pb_cache_entry)); 495bf215546Sopenharmony_ci if (!bo) { 496bf215546Sopenharmony_ci return NULL; 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci if (init_pb_cache) { 500bf215546Sopenharmony_ci bo->u.real.use_reusable_pool = true; 501bf215546Sopenharmony_ci pb_cache_init_entry(&ws->bo_cache, bo->cache_entry, &bo->base, 502bf215546Sopenharmony_ci heap); 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci request.alloc_size = size; 505bf215546Sopenharmony_ci request.phys_alignment = alignment; 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci if (initial_domain & RADEON_DOMAIN_VRAM) { 508bf215546Sopenharmony_ci request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci /* Since VRAM and GTT have almost the same performance on APUs, we could 511bf215546Sopenharmony_ci * just set GTT. However, in order to decrease GTT(RAM) usage, which is 512bf215546Sopenharmony_ci * shared with the OS, allow VRAM placements too. The idea is not to use 513bf215546Sopenharmony_ci * VRAM usefully, but to use it so that it's not unused and wasted. 514bf215546Sopenharmony_ci */ 515bf215546Sopenharmony_ci if (!ws->info.has_dedicated_vram) 516bf215546Sopenharmony_ci request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci if (initial_domain & RADEON_DOMAIN_GTT) 520bf215546Sopenharmony_ci request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; 521bf215546Sopenharmony_ci if (initial_domain & RADEON_DOMAIN_GDS) 522bf215546Sopenharmony_ci request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS; 523bf215546Sopenharmony_ci if (initial_domain & RADEON_DOMAIN_OA) 524bf215546Sopenharmony_ci request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci if (flags & RADEON_FLAG_NO_CPU_ACCESS) 527bf215546Sopenharmony_ci request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 528bf215546Sopenharmony_ci if (flags & RADEON_FLAG_GTT_WC) 529bf215546Sopenharmony_ci request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci if (flags & RADEON_FLAG_DISCARDABLE && 532bf215546Sopenharmony_ci ws->info.drm_minor >= 47) 533bf215546Sopenharmony_ci request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci if (ws->zero_all_vram_allocs && 536bf215546Sopenharmony_ci (request.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)) 537bf215546Sopenharmony_ci request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci if ((flags & RADEON_FLAG_ENCRYPTED) && 540bf215546Sopenharmony_ci ws->info.has_tmz_support) { 541bf215546Sopenharmony_ci request.flags |= AMDGPU_GEM_CREATE_ENCRYPTED; 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci if (!(flags & RADEON_FLAG_DRIVER_INTERNAL)) { 544bf215546Sopenharmony_ci struct amdgpu_screen_winsys *sws_iter; 545bf215546Sopenharmony_ci simple_mtx_lock(&ws->sws_list_lock); 546bf215546Sopenharmony_ci for (sws_iter = ws->sws_list; sws_iter; sws_iter = sws_iter->next) { 547bf215546Sopenharmony_ci *((bool*) &sws_iter->base.uses_secure_bos) = true; 548bf215546Sopenharmony_ci } 549bf215546Sopenharmony_ci simple_mtx_unlock(&ws->sws_list_lock); 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci } 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); 554bf215546Sopenharmony_ci if (r) { 555bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); 556bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); 557bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); 558bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); 559bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: flags : %" PRIx64 "\n", request.flags); 560bf215546Sopenharmony_ci goto error_bo_alloc; 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci if (initial_domain & RADEON_DOMAIN_VRAM_GTT) { 564bf215546Sopenharmony_ci unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 567bf215546Sopenharmony_ci size + va_gap_size, alignment, 568bf215546Sopenharmony_ci 0, &va, &va_handle, 569bf215546Sopenharmony_ci (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | 570bf215546Sopenharmony_ci AMDGPU_VA_RANGE_HIGH); 571bf215546Sopenharmony_ci if (r) 572bf215546Sopenharmony_ci goto error_va_alloc; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | 575bf215546Sopenharmony_ci AMDGPU_VM_PAGE_EXECUTABLE; 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci if (!(flags & RADEON_FLAG_READ_ONLY)) 578bf215546Sopenharmony_ci vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (flags & RADEON_FLAG_GL2_BYPASS) 581bf215546Sopenharmony_ci vm_flags |= AMDGPU_VM_MTYPE_UC; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci if (flags & RADEON_FLAG_MALL_NOALLOC && 584bf215546Sopenharmony_ci ws->info.drm_minor >= 47) 585bf215546Sopenharmony_ci vm_flags |= AMDGPU_VM_PAGE_NOALLOC; 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, 588bf215546Sopenharmony_ci AMDGPU_VA_OP_MAP); 589bf215546Sopenharmony_ci if (r) 590bf215546Sopenharmony_ci goto error_va_map; 591bf215546Sopenharmony_ci } 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci simple_mtx_init(&bo->lock, mtx_plain); 594bf215546Sopenharmony_ci pipe_reference_init(&bo->base.reference, 1); 595bf215546Sopenharmony_ci bo->base.alignment_log2 = util_logbase2(alignment); 596bf215546Sopenharmony_ci bo->base.size = size; 597bf215546Sopenharmony_ci bo->base.vtbl = &amdgpu_winsys_bo_vtbl; 598bf215546Sopenharmony_ci bo->bo = buf_handle; 599bf215546Sopenharmony_ci bo->va = va; 600bf215546Sopenharmony_ci bo->u.real.va_handle = va_handle; 601bf215546Sopenharmony_ci bo->base.placement = initial_domain; 602bf215546Sopenharmony_ci bo->base.usage = flags; 603bf215546Sopenharmony_ci bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci if (initial_domain & RADEON_DOMAIN_VRAM) 606bf215546Sopenharmony_ci ws->allocated_vram += align64(size, ws->info.gart_page_size); 607bf215546Sopenharmony_ci else if (initial_domain & RADEON_DOMAIN_GTT) 608bf215546Sopenharmony_ci ws->allocated_gtt += align64(size, ws->info.gart_page_size); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle); 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci amdgpu_add_buffer_to_global_list(ws, bo); 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci return bo; 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_cierror_va_map: 617bf215546Sopenharmony_ci amdgpu_va_range_free(va_handle); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_cierror_va_alloc: 620bf215546Sopenharmony_ci amdgpu_bo_free(buf_handle); 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_cierror_bo_alloc: 623bf215546Sopenharmony_ci FREE(bo); 624bf215546Sopenharmony_ci return NULL; 625bf215546Sopenharmony_ci} 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_cibool amdgpu_bo_can_reclaim(struct amdgpu_winsys *ws, struct pb_buffer *_buf) 628bf215546Sopenharmony_ci{ 629bf215546Sopenharmony_ci return amdgpu_bo_wait(&ws->dummy_ws.base, _buf, 0, RADEON_USAGE_READWRITE); 630bf215546Sopenharmony_ci} 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_cibool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) 633bf215546Sopenharmony_ci{ 634bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = container_of(entry, struct amdgpu_winsys_bo, u.slab.entry); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci return amdgpu_bo_can_reclaim(priv, &bo->base); 637bf215546Sopenharmony_ci} 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_cistatic struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size) 640bf215546Sopenharmony_ci{ 641bf215546Sopenharmony_ci /* Find the correct slab allocator for the given size. */ 642bf215546Sopenharmony_ci for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 643bf215546Sopenharmony_ci struct pb_slabs *slabs = &ws->bo_slabs[i]; 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci if (size <= 1 << (slabs->min_order + slabs->num_orders - 1)) 646bf215546Sopenharmony_ci return slabs; 647bf215546Sopenharmony_ci } 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci assert(0); 650bf215546Sopenharmony_ci return NULL; 651bf215546Sopenharmony_ci} 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_cistatic unsigned get_slab_wasted_size(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo) 654bf215546Sopenharmony_ci{ 655bf215546Sopenharmony_ci assert(bo->base.size <= bo->u.slab.entry.entry_size); 656bf215546Sopenharmony_ci assert(bo->base.size < (1 << bo->base.alignment_log2) || 657bf215546Sopenharmony_ci bo->base.size < 1 << ws->bo_slabs[0].min_order || 658bf215546Sopenharmony_ci bo->base.size > bo->u.slab.entry.entry_size / 2); 659bf215546Sopenharmony_ci return bo->u.slab.entry.entry_size - bo->base.size; 660bf215546Sopenharmony_ci} 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_cistatic void amdgpu_bo_slab_destroy(struct radeon_winsys *rws, struct pb_buffer *_buf) 663bf215546Sopenharmony_ci{ 664bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 665bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 666bf215546Sopenharmony_ci struct pb_slabs *slabs; 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci assert(!bo->bo); 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci slabs = get_slabs(ws, bo->base.size); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM) 673bf215546Sopenharmony_ci ws->slab_wasted_vram -= get_slab_wasted_size(ws, bo); 674bf215546Sopenharmony_ci else 675bf215546Sopenharmony_ci ws->slab_wasted_gtt -= get_slab_wasted_size(ws, bo); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci pb_slab_free(slabs, &bo->u.slab.entry); 678bf215546Sopenharmony_ci} 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_cistatic const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { 681bf215546Sopenharmony_ci /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */ 682bf215546Sopenharmony_ci (void*)amdgpu_bo_slab_destroy 683bf215546Sopenharmony_ci /* other functions are never called */ 684bf215546Sopenharmony_ci}; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci/* Return the power of two size of a slab entry matching the input size. */ 687bf215546Sopenharmony_cistatic unsigned get_slab_pot_entry_size(struct amdgpu_winsys *ws, unsigned size) 688bf215546Sopenharmony_ci{ 689bf215546Sopenharmony_ci unsigned entry_size = util_next_power_of_two(size); 690bf215546Sopenharmony_ci unsigned min_entry_size = 1 << ws->bo_slabs[0].min_order; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci return MAX2(entry_size, min_entry_size); 693bf215546Sopenharmony_ci} 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci/* Return the slab entry alignment. */ 696bf215546Sopenharmony_cistatic unsigned get_slab_entry_alignment(struct amdgpu_winsys *ws, unsigned size) 697bf215546Sopenharmony_ci{ 698bf215546Sopenharmony_ci unsigned entry_size = get_slab_pot_entry_size(ws, size); 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci if (size <= entry_size * 3 / 4) 701bf215546Sopenharmony_ci return entry_size / 4; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci return entry_size; 704bf215546Sopenharmony_ci} 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_cistruct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, 707bf215546Sopenharmony_ci unsigned group_index) 708bf215546Sopenharmony_ci{ 709bf215546Sopenharmony_ci struct amdgpu_winsys *ws = priv; 710bf215546Sopenharmony_ci struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); 711bf215546Sopenharmony_ci enum radeon_bo_domain domains = radeon_domain_from_heap(heap); 712bf215546Sopenharmony_ci enum radeon_bo_flag flags = radeon_flags_from_heap(heap); 713bf215546Sopenharmony_ci uint32_t base_id; 714bf215546Sopenharmony_ci unsigned slab_size = 0; 715bf215546Sopenharmony_ci 716bf215546Sopenharmony_ci if (!slab) 717bf215546Sopenharmony_ci return NULL; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci /* Determine the slab buffer size. */ 720bf215546Sopenharmony_ci for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 721bf215546Sopenharmony_ci unsigned max_entry_size = 1 << (ws->bo_slabs[i].min_order + ws->bo_slabs[i].num_orders - 1); 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci if (entry_size <= max_entry_size) { 724bf215546Sopenharmony_ci /* The slab size is twice the size of the largest possible entry. */ 725bf215546Sopenharmony_ci slab_size = max_entry_size * 2; 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci if (!util_is_power_of_two_nonzero(entry_size)) { 728bf215546Sopenharmony_ci assert(util_is_power_of_two_nonzero(entry_size * 4 / 3)); 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci /* If the entry size is 3/4 of a power of two, we would waste space and not gain 731bf215546Sopenharmony_ci * anything if we allocated only twice the power of two for the backing buffer: 732bf215546Sopenharmony_ci * 2 * 3/4 = 1.5 usable with buffer size 2 733bf215546Sopenharmony_ci * 734bf215546Sopenharmony_ci * Allocating 5 times the entry size leads us to the next power of two and results 735bf215546Sopenharmony_ci * in a much better memory utilization: 736bf215546Sopenharmony_ci * 5 * 3/4 = 3.75 usable with buffer size 4 737bf215546Sopenharmony_ci */ 738bf215546Sopenharmony_ci if (entry_size * 5 > slab_size) 739bf215546Sopenharmony_ci slab_size = util_next_power_of_two(entry_size * 5); 740bf215546Sopenharmony_ci } 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci /* The largest slab should have the same size as the PTE fragment 743bf215546Sopenharmony_ci * size to get faster address translation. 744bf215546Sopenharmony_ci */ 745bf215546Sopenharmony_ci if (i == NUM_SLAB_ALLOCATORS - 1 && 746bf215546Sopenharmony_ci slab_size < ws->info.pte_fragment_size) 747bf215546Sopenharmony_ci slab_size = ws->info.pte_fragment_size; 748bf215546Sopenharmony_ci break; 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci } 751bf215546Sopenharmony_ci assert(slab_size != 0); 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(ws, 754bf215546Sopenharmony_ci slab_size, slab_size, 755bf215546Sopenharmony_ci domains, flags)); 756bf215546Sopenharmony_ci if (!slab->buffer) 757bf215546Sopenharmony_ci goto fail; 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci slab_size = slab->buffer->base.size; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci slab->base.num_entries = slab_size / entry_size; 762bf215546Sopenharmony_ci slab->base.num_free = slab->base.num_entries; 763bf215546Sopenharmony_ci slab->entry_size = entry_size; 764bf215546Sopenharmony_ci slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); 765bf215546Sopenharmony_ci if (!slab->entries) 766bf215546Sopenharmony_ci goto fail_buffer; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci list_inithead(&slab->base.free); 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries); 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci for (unsigned i = 0; i < slab->base.num_entries; ++i) { 773bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = &slab->entries[i]; 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci simple_mtx_init(&bo->lock, mtx_plain); 776bf215546Sopenharmony_ci bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(ws, entry_size)); 777bf215546Sopenharmony_ci bo->base.size = entry_size; 778bf215546Sopenharmony_ci bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl; 779bf215546Sopenharmony_ci bo->va = slab->buffer->va + i * entry_size; 780bf215546Sopenharmony_ci bo->base.placement = domains; 781bf215546Sopenharmony_ci bo->unique_id = base_id + i; 782bf215546Sopenharmony_ci bo->u.slab.entry.slab = &slab->base; 783bf215546Sopenharmony_ci bo->u.slab.entry.group_index = group_index; 784bf215546Sopenharmony_ci bo->u.slab.entry.entry_size = entry_size; 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci if (slab->buffer->bo) { 787bf215546Sopenharmony_ci /* The slab is not suballocated. */ 788bf215546Sopenharmony_ci bo->u.slab.real = slab->buffer; 789bf215546Sopenharmony_ci } else { 790bf215546Sopenharmony_ci /* The slab is allocated out of a bigger slab. */ 791bf215546Sopenharmony_ci bo->u.slab.real = slab->buffer->u.slab.real; 792bf215546Sopenharmony_ci assert(bo->u.slab.real->bo); 793bf215546Sopenharmony_ci } 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci list_addtail(&bo->u.slab.entry.head, &slab->base.free); 796bf215546Sopenharmony_ci } 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci /* Wasted alignment due to slabs with 3/4 allocations being aligned to a power of two. */ 799bf215546Sopenharmony_ci assert(slab->base.num_entries * entry_size <= slab_size); 800bf215546Sopenharmony_ci if (domains & RADEON_DOMAIN_VRAM) 801bf215546Sopenharmony_ci ws->slab_wasted_vram += slab_size - slab->base.num_entries * entry_size; 802bf215546Sopenharmony_ci else 803bf215546Sopenharmony_ci ws->slab_wasted_gtt += slab_size - slab->base.num_entries * entry_size; 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci return &slab->base; 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_cifail_buffer: 808bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(ws, &slab->buffer, NULL); 809bf215546Sopenharmony_cifail: 810bf215546Sopenharmony_ci FREE(slab); 811bf215546Sopenharmony_ci return NULL; 812bf215546Sopenharmony_ci} 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_civoid amdgpu_bo_slab_free(struct amdgpu_winsys *ws, struct pb_slab *pslab) 815bf215546Sopenharmony_ci{ 816bf215546Sopenharmony_ci struct amdgpu_slab *slab = amdgpu_slab(pslab); 817bf215546Sopenharmony_ci unsigned slab_size = slab->buffer->base.size; 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci assert(slab->base.num_entries * slab->entry_size <= slab_size); 820bf215546Sopenharmony_ci if (slab->buffer->base.placement & RADEON_DOMAIN_VRAM) 821bf215546Sopenharmony_ci ws->slab_wasted_vram -= slab_size - slab->base.num_entries * slab->entry_size; 822bf215546Sopenharmony_ci else 823bf215546Sopenharmony_ci ws->slab_wasted_gtt -= slab_size - slab->base.num_entries * slab->entry_size; 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci for (unsigned i = 0; i < slab->base.num_entries; ++i) { 826bf215546Sopenharmony_ci amdgpu_bo_remove_fences(&slab->entries[i]); 827bf215546Sopenharmony_ci simple_mtx_destroy(&slab->entries[i].lock); 828bf215546Sopenharmony_ci } 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci FREE(slab->entries); 831bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(ws, &slab->buffer, NULL); 832bf215546Sopenharmony_ci FREE(slab); 833bf215546Sopenharmony_ci} 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci#if DEBUG_SPARSE_COMMITS 836bf215546Sopenharmony_cistatic void 837bf215546Sopenharmony_cisparse_dump(struct amdgpu_winsys_bo *bo, const char *func) 838bf215546Sopenharmony_ci{ 839bf215546Sopenharmony_ci fprintf(stderr, "%s: %p (size=%"PRIu64", num_va_pages=%u) @ %s\n" 840bf215546Sopenharmony_ci "Commitments:\n", 841bf215546Sopenharmony_ci __func__, bo, bo->base.size, bo->u.sparse.num_va_pages, func); 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci struct amdgpu_sparse_backing *span_backing = NULL; 844bf215546Sopenharmony_ci uint32_t span_first_backing_page = 0; 845bf215546Sopenharmony_ci uint32_t span_first_va_page = 0; 846bf215546Sopenharmony_ci uint32_t va_page = 0; 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci for (;;) { 849bf215546Sopenharmony_ci struct amdgpu_sparse_backing *backing = 0; 850bf215546Sopenharmony_ci uint32_t backing_page = 0; 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci if (va_page < bo->u.sparse.num_va_pages) { 853bf215546Sopenharmony_ci backing = bo->u.sparse.commitments[va_page].backing; 854bf215546Sopenharmony_ci backing_page = bo->u.sparse.commitments[va_page].page; 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci if (span_backing && 858bf215546Sopenharmony_ci (backing != span_backing || 859bf215546Sopenharmony_ci backing_page != span_first_backing_page + (va_page - span_first_va_page))) { 860bf215546Sopenharmony_ci fprintf(stderr, " %u..%u: backing=%p:%u..%u\n", 861bf215546Sopenharmony_ci span_first_va_page, va_page - 1, span_backing, 862bf215546Sopenharmony_ci span_first_backing_page, 863bf215546Sopenharmony_ci span_first_backing_page + (va_page - span_first_va_page) - 1); 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci span_backing = NULL; 866bf215546Sopenharmony_ci } 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci if (va_page >= bo->u.sparse.num_va_pages) 869bf215546Sopenharmony_ci break; 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci if (backing && !span_backing) { 872bf215546Sopenharmony_ci span_backing = backing; 873bf215546Sopenharmony_ci span_first_backing_page = backing_page; 874bf215546Sopenharmony_ci span_first_va_page = va_page; 875bf215546Sopenharmony_ci } 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci va_page++; 878bf215546Sopenharmony_ci } 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci fprintf(stderr, "Backing:\n"); 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { 883bf215546Sopenharmony_ci fprintf(stderr, " %p (size=%"PRIu64")\n", backing, backing->bo->base.size); 884bf215546Sopenharmony_ci for (unsigned i = 0; i < backing->num_chunks; ++i) 885bf215546Sopenharmony_ci fprintf(stderr, " %u..%u\n", backing->chunks[i].begin, backing->chunks[i].end); 886bf215546Sopenharmony_ci } 887bf215546Sopenharmony_ci} 888bf215546Sopenharmony_ci#endif 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci/* 891bf215546Sopenharmony_ci * Attempt to allocate the given number of backing pages. Fewer pages may be 892bf215546Sopenharmony_ci * allocated (depending on the fragmentation of existing backing buffers), 893bf215546Sopenharmony_ci * which will be reflected by a change to *pnum_pages. 894bf215546Sopenharmony_ci */ 895bf215546Sopenharmony_cistatic struct amdgpu_sparse_backing * 896bf215546Sopenharmony_cisparse_backing_alloc(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo, 897bf215546Sopenharmony_ci uint32_t *pstart_page, uint32_t *pnum_pages) 898bf215546Sopenharmony_ci{ 899bf215546Sopenharmony_ci struct amdgpu_sparse_backing *best_backing; 900bf215546Sopenharmony_ci unsigned best_idx; 901bf215546Sopenharmony_ci uint32_t best_num_pages; 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci best_backing = NULL; 904bf215546Sopenharmony_ci best_idx = 0; 905bf215546Sopenharmony_ci best_num_pages = 0; 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci /* This is a very simple and inefficient best-fit algorithm. */ 908bf215546Sopenharmony_ci list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { 909bf215546Sopenharmony_ci for (unsigned idx = 0; idx < backing->num_chunks; ++idx) { 910bf215546Sopenharmony_ci uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin; 911bf215546Sopenharmony_ci if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) || 912bf215546Sopenharmony_ci (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) { 913bf215546Sopenharmony_ci best_backing = backing; 914bf215546Sopenharmony_ci best_idx = idx; 915bf215546Sopenharmony_ci best_num_pages = cur_num_pages; 916bf215546Sopenharmony_ci } 917bf215546Sopenharmony_ci } 918bf215546Sopenharmony_ci } 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci /* Allocate a new backing buffer if necessary. */ 921bf215546Sopenharmony_ci if (!best_backing) { 922bf215546Sopenharmony_ci struct pb_buffer *buf; 923bf215546Sopenharmony_ci uint64_t size; 924bf215546Sopenharmony_ci uint32_t pages; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci best_backing = CALLOC_STRUCT(amdgpu_sparse_backing); 927bf215546Sopenharmony_ci if (!best_backing) 928bf215546Sopenharmony_ci return NULL; 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci best_backing->max_chunks = 4; 931bf215546Sopenharmony_ci best_backing->chunks = CALLOC(best_backing->max_chunks, 932bf215546Sopenharmony_ci sizeof(*best_backing->chunks)); 933bf215546Sopenharmony_ci if (!best_backing->chunks) { 934bf215546Sopenharmony_ci FREE(best_backing); 935bf215546Sopenharmony_ci return NULL; 936bf215546Sopenharmony_ci } 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, RADEON_SPARSE_PAGE_SIZE)); 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci size = MIN3(bo->base.size / 16, 941bf215546Sopenharmony_ci 8 * 1024 * 1024, 942bf215546Sopenharmony_ci bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * RADEON_SPARSE_PAGE_SIZE); 943bf215546Sopenharmony_ci size = MAX2(size, RADEON_SPARSE_PAGE_SIZE); 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci buf = amdgpu_bo_create(ws, size, RADEON_SPARSE_PAGE_SIZE, 946bf215546Sopenharmony_ci bo->base.placement, 947bf215546Sopenharmony_ci (bo->base.usage & ~RADEON_FLAG_SPARSE & 948bf215546Sopenharmony_ci /* Set the interprocess sharing flag to disable pb_cache because 949bf215546Sopenharmony_ci * amdgpu_bo_wait doesn't wait for active CS jobs. 950bf215546Sopenharmony_ci */ 951bf215546Sopenharmony_ci ~RADEON_FLAG_NO_INTERPROCESS_SHARING) | RADEON_FLAG_NO_SUBALLOC); 952bf215546Sopenharmony_ci if (!buf) { 953bf215546Sopenharmony_ci FREE(best_backing->chunks); 954bf215546Sopenharmony_ci FREE(best_backing); 955bf215546Sopenharmony_ci return NULL; 956bf215546Sopenharmony_ci } 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci /* We might have gotten a bigger buffer than requested via caching. */ 959bf215546Sopenharmony_ci pages = buf->size / RADEON_SPARSE_PAGE_SIZE; 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_ci best_backing->bo = amdgpu_winsys_bo(buf); 962bf215546Sopenharmony_ci best_backing->num_chunks = 1; 963bf215546Sopenharmony_ci best_backing->chunks[0].begin = 0; 964bf215546Sopenharmony_ci best_backing->chunks[0].end = pages; 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci list_add(&best_backing->list, &bo->u.sparse.backing); 967bf215546Sopenharmony_ci bo->u.sparse.num_backing_pages += pages; 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci best_idx = 0; 970bf215546Sopenharmony_ci best_num_pages = pages; 971bf215546Sopenharmony_ci } 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci *pnum_pages = MIN2(*pnum_pages, best_num_pages); 974bf215546Sopenharmony_ci *pstart_page = best_backing->chunks[best_idx].begin; 975bf215546Sopenharmony_ci best_backing->chunks[best_idx].begin += *pnum_pages; 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) { 978bf215546Sopenharmony_ci memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1], 979bf215546Sopenharmony_ci sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1)); 980bf215546Sopenharmony_ci best_backing->num_chunks--; 981bf215546Sopenharmony_ci } 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci return best_backing; 984bf215546Sopenharmony_ci} 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_cistatic void 987bf215546Sopenharmony_cisparse_free_backing_buffer(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo, 988bf215546Sopenharmony_ci struct amdgpu_sparse_backing *backing) 989bf215546Sopenharmony_ci{ 990bf215546Sopenharmony_ci bo->u.sparse.num_backing_pages -= backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE; 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_fence_lock); 993bf215546Sopenharmony_ci amdgpu_add_fences(backing->bo, bo->num_fences, bo->fences); 994bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_fence_lock); 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci list_del(&backing->list); 997bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(ws, &backing->bo, NULL); 998bf215546Sopenharmony_ci FREE(backing->chunks); 999bf215546Sopenharmony_ci FREE(backing); 1000bf215546Sopenharmony_ci} 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci/* 1003bf215546Sopenharmony_ci * Return a range of pages from the given backing buffer back into the 1004bf215546Sopenharmony_ci * free structure. 1005bf215546Sopenharmony_ci */ 1006bf215546Sopenharmony_cistatic bool 1007bf215546Sopenharmony_cisparse_backing_free(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo, 1008bf215546Sopenharmony_ci struct amdgpu_sparse_backing *backing, 1009bf215546Sopenharmony_ci uint32_t start_page, uint32_t num_pages) 1010bf215546Sopenharmony_ci{ 1011bf215546Sopenharmony_ci uint32_t end_page = start_page + num_pages; 1012bf215546Sopenharmony_ci unsigned low = 0; 1013bf215546Sopenharmony_ci unsigned high = backing->num_chunks; 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci /* Find the first chunk with begin >= start_page. */ 1016bf215546Sopenharmony_ci while (low < high) { 1017bf215546Sopenharmony_ci unsigned mid = low + (high - low) / 2; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci if (backing->chunks[mid].begin >= start_page) 1020bf215546Sopenharmony_ci high = mid; 1021bf215546Sopenharmony_ci else 1022bf215546Sopenharmony_ci low = mid + 1; 1023bf215546Sopenharmony_ci } 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin); 1026bf215546Sopenharmony_ci assert(low == 0 || backing->chunks[low - 1].end <= start_page); 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci if (low > 0 && backing->chunks[low - 1].end == start_page) { 1029bf215546Sopenharmony_ci backing->chunks[low - 1].end = end_page; 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci if (low < backing->num_chunks && end_page == backing->chunks[low].begin) { 1032bf215546Sopenharmony_ci backing->chunks[low - 1].end = backing->chunks[low].end; 1033bf215546Sopenharmony_ci memmove(&backing->chunks[low], &backing->chunks[low + 1], 1034bf215546Sopenharmony_ci sizeof(*backing->chunks) * (backing->num_chunks - low - 1)); 1035bf215546Sopenharmony_ci backing->num_chunks--; 1036bf215546Sopenharmony_ci } 1037bf215546Sopenharmony_ci } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) { 1038bf215546Sopenharmony_ci backing->chunks[low].begin = start_page; 1039bf215546Sopenharmony_ci } else { 1040bf215546Sopenharmony_ci if (backing->num_chunks >= backing->max_chunks) { 1041bf215546Sopenharmony_ci unsigned new_max_chunks = 2 * backing->max_chunks; 1042bf215546Sopenharmony_ci struct amdgpu_sparse_backing_chunk *new_chunks = 1043bf215546Sopenharmony_ci REALLOC(backing->chunks, 1044bf215546Sopenharmony_ci sizeof(*backing->chunks) * backing->max_chunks, 1045bf215546Sopenharmony_ci sizeof(*backing->chunks) * new_max_chunks); 1046bf215546Sopenharmony_ci if (!new_chunks) 1047bf215546Sopenharmony_ci return false; 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci backing->max_chunks = new_max_chunks; 1050bf215546Sopenharmony_ci backing->chunks = new_chunks; 1051bf215546Sopenharmony_ci } 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci memmove(&backing->chunks[low + 1], &backing->chunks[low], 1054bf215546Sopenharmony_ci sizeof(*backing->chunks) * (backing->num_chunks - low)); 1055bf215546Sopenharmony_ci backing->chunks[low].begin = start_page; 1056bf215546Sopenharmony_ci backing->chunks[low].end = end_page; 1057bf215546Sopenharmony_ci backing->num_chunks++; 1058bf215546Sopenharmony_ci } 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 && 1061bf215546Sopenharmony_ci backing->chunks[0].end == backing->bo->base.size / RADEON_SPARSE_PAGE_SIZE) 1062bf215546Sopenharmony_ci sparse_free_backing_buffer(ws, bo, backing); 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci return true; 1065bf215546Sopenharmony_ci} 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_cistatic void amdgpu_bo_sparse_destroy(struct radeon_winsys *rws, struct pb_buffer *_buf) 1068bf215546Sopenharmony_ci{ 1069bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1070bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 1071bf215546Sopenharmony_ci int r; 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci assert(!bo->bo && bo->base.usage & RADEON_FLAG_SPARSE); 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_ci r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, 1076bf215546Sopenharmony_ci (uint64_t)bo->u.sparse.num_va_pages * RADEON_SPARSE_PAGE_SIZE, 1077bf215546Sopenharmony_ci bo->va, 0, AMDGPU_VA_OP_CLEAR); 1078bf215546Sopenharmony_ci if (r) { 1079bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: clearing PRT VA region on destroy failed (%d)\n", r); 1080bf215546Sopenharmony_ci } 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci while (!list_is_empty(&bo->u.sparse.backing)) { 1083bf215546Sopenharmony_ci sparse_free_backing_buffer(ws, bo, 1084bf215546Sopenharmony_ci container_of(bo->u.sparse.backing.next, 1085bf215546Sopenharmony_ci struct amdgpu_sparse_backing, list)); 1086bf215546Sopenharmony_ci } 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci amdgpu_va_range_free(bo->u.sparse.va_handle); 1089bf215546Sopenharmony_ci FREE(bo->u.sparse.commitments); 1090bf215546Sopenharmony_ci simple_mtx_destroy(&bo->lock); 1091bf215546Sopenharmony_ci FREE(bo); 1092bf215546Sopenharmony_ci} 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_cistatic const struct pb_vtbl amdgpu_winsys_bo_sparse_vtbl = { 1095bf215546Sopenharmony_ci /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */ 1096bf215546Sopenharmony_ci (void*)amdgpu_bo_sparse_destroy 1097bf215546Sopenharmony_ci /* other functions are never called */ 1098bf215546Sopenharmony_ci}; 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_cistatic struct pb_buffer * 1101bf215546Sopenharmony_ciamdgpu_bo_sparse_create(struct amdgpu_winsys *ws, uint64_t size, 1102bf215546Sopenharmony_ci enum radeon_bo_domain domain, 1103bf215546Sopenharmony_ci enum radeon_bo_flag flags) 1104bf215546Sopenharmony_ci{ 1105bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo; 1106bf215546Sopenharmony_ci uint64_t map_size; 1107bf215546Sopenharmony_ci uint64_t va_gap_size; 1108bf215546Sopenharmony_ci int r; 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers 1111bf215546Sopenharmony_ci * that exceed this limit. This is not really a restriction: we don't have 1112bf215546Sopenharmony_ci * that much virtual address space anyway. 1113bf215546Sopenharmony_ci */ 1114bf215546Sopenharmony_ci if (size > (uint64_t)INT32_MAX * RADEON_SPARSE_PAGE_SIZE) 1115bf215546Sopenharmony_ci return NULL; 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci bo = CALLOC_STRUCT(amdgpu_winsys_bo); 1118bf215546Sopenharmony_ci if (!bo) 1119bf215546Sopenharmony_ci return NULL; 1120bf215546Sopenharmony_ci 1121bf215546Sopenharmony_ci simple_mtx_init(&bo->lock, mtx_plain); 1122bf215546Sopenharmony_ci pipe_reference_init(&bo->base.reference, 1); 1123bf215546Sopenharmony_ci bo->base.alignment_log2 = util_logbase2(RADEON_SPARSE_PAGE_SIZE); 1124bf215546Sopenharmony_ci bo->base.size = size; 1125bf215546Sopenharmony_ci bo->base.vtbl = &amdgpu_winsys_bo_sparse_vtbl; 1126bf215546Sopenharmony_ci bo->base.placement = domain; 1127bf215546Sopenharmony_ci bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 1128bf215546Sopenharmony_ci bo->base.usage = flags; 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE); 1131bf215546Sopenharmony_ci bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages, 1132bf215546Sopenharmony_ci sizeof(*bo->u.sparse.commitments)); 1133bf215546Sopenharmony_ci if (!bo->u.sparse.commitments) 1134bf215546Sopenharmony_ci goto error_alloc_commitments; 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci list_inithead(&bo->u.sparse.backing); 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci /* For simplicity, we always map a multiple of the page size. */ 1139bf215546Sopenharmony_ci map_size = align64(size, RADEON_SPARSE_PAGE_SIZE); 1140bf215546Sopenharmony_ci va_gap_size = ws->check_vm ? 4 * RADEON_SPARSE_PAGE_SIZE : 0; 1141bf215546Sopenharmony_ci r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 1142bf215546Sopenharmony_ci map_size + va_gap_size, RADEON_SPARSE_PAGE_SIZE, 1143bf215546Sopenharmony_ci 0, &bo->va, &bo->u.sparse.va_handle, 1144bf215546Sopenharmony_ci AMDGPU_VA_RANGE_HIGH); 1145bf215546Sopenharmony_ci if (r) 1146bf215546Sopenharmony_ci goto error_va_alloc; 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, map_size, bo->va, 1149bf215546Sopenharmony_ci AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP); 1150bf215546Sopenharmony_ci if (r) 1151bf215546Sopenharmony_ci goto error_va_map; 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci return &bo->base; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_cierror_va_map: 1156bf215546Sopenharmony_ci amdgpu_va_range_free(bo->u.sparse.va_handle); 1157bf215546Sopenharmony_cierror_va_alloc: 1158bf215546Sopenharmony_ci FREE(bo->u.sparse.commitments); 1159bf215546Sopenharmony_cierror_alloc_commitments: 1160bf215546Sopenharmony_ci simple_mtx_destroy(&bo->lock); 1161bf215546Sopenharmony_ci FREE(bo); 1162bf215546Sopenharmony_ci return NULL; 1163bf215546Sopenharmony_ci} 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_cistatic bool 1166bf215546Sopenharmony_ciamdgpu_bo_sparse_commit(struct radeon_winsys *rws, struct pb_buffer *buf, 1167bf215546Sopenharmony_ci uint64_t offset, uint64_t size, bool commit) 1168bf215546Sopenharmony_ci{ 1169bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1170bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf); 1171bf215546Sopenharmony_ci struct amdgpu_sparse_commitment *comm; 1172bf215546Sopenharmony_ci uint32_t va_page, end_va_page; 1173bf215546Sopenharmony_ci bool ok = true; 1174bf215546Sopenharmony_ci int r; 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci assert(bo->base.usage & RADEON_FLAG_SPARSE); 1177bf215546Sopenharmony_ci assert(offset % RADEON_SPARSE_PAGE_SIZE == 0); 1178bf215546Sopenharmony_ci assert(offset <= bo->base.size); 1179bf215546Sopenharmony_ci assert(size <= bo->base.size - offset); 1180bf215546Sopenharmony_ci assert(size % RADEON_SPARSE_PAGE_SIZE == 0 || offset + size == bo->base.size); 1181bf215546Sopenharmony_ci 1182bf215546Sopenharmony_ci comm = bo->u.sparse.commitments; 1183bf215546Sopenharmony_ci va_page = offset / RADEON_SPARSE_PAGE_SIZE; 1184bf215546Sopenharmony_ci end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE); 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_ci simple_mtx_lock(&bo->lock); 1187bf215546Sopenharmony_ci 1188bf215546Sopenharmony_ci#if DEBUG_SPARSE_COMMITS 1189bf215546Sopenharmony_ci sparse_dump(bo, __func__); 1190bf215546Sopenharmony_ci#endif 1191bf215546Sopenharmony_ci 1192bf215546Sopenharmony_ci if (commit) { 1193bf215546Sopenharmony_ci while (va_page < end_va_page) { 1194bf215546Sopenharmony_ci uint32_t span_va_page; 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_ci /* Skip pages that are already committed. */ 1197bf215546Sopenharmony_ci if (comm[va_page].backing) { 1198bf215546Sopenharmony_ci va_page++; 1199bf215546Sopenharmony_ci continue; 1200bf215546Sopenharmony_ci } 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci /* Determine length of uncommitted span. */ 1203bf215546Sopenharmony_ci span_va_page = va_page; 1204bf215546Sopenharmony_ci while (va_page < end_va_page && !comm[va_page].backing) 1205bf215546Sopenharmony_ci va_page++; 1206bf215546Sopenharmony_ci 1207bf215546Sopenharmony_ci /* Fill the uncommitted span with chunks of backing memory. */ 1208bf215546Sopenharmony_ci while (span_va_page < va_page) { 1209bf215546Sopenharmony_ci struct amdgpu_sparse_backing *backing; 1210bf215546Sopenharmony_ci uint32_t backing_start, backing_size; 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci backing_size = va_page - span_va_page; 1213bf215546Sopenharmony_ci backing = sparse_backing_alloc(ws, bo, &backing_start, &backing_size); 1214bf215546Sopenharmony_ci if (!backing) { 1215bf215546Sopenharmony_ci ok = false; 1216bf215546Sopenharmony_ci goto out; 1217bf215546Sopenharmony_ci } 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci r = amdgpu_bo_va_op_raw(ws->dev, backing->bo->bo, 1220bf215546Sopenharmony_ci (uint64_t)backing_start * RADEON_SPARSE_PAGE_SIZE, 1221bf215546Sopenharmony_ci (uint64_t)backing_size * RADEON_SPARSE_PAGE_SIZE, 1222bf215546Sopenharmony_ci bo->va + (uint64_t)span_va_page * RADEON_SPARSE_PAGE_SIZE, 1223bf215546Sopenharmony_ci AMDGPU_VM_PAGE_READABLE | 1224bf215546Sopenharmony_ci AMDGPU_VM_PAGE_WRITEABLE | 1225bf215546Sopenharmony_ci AMDGPU_VM_PAGE_EXECUTABLE, 1226bf215546Sopenharmony_ci AMDGPU_VA_OP_REPLACE); 1227bf215546Sopenharmony_ci if (r) { 1228bf215546Sopenharmony_ci ok = sparse_backing_free(ws, bo, backing, backing_start, backing_size); 1229bf215546Sopenharmony_ci assert(ok && "sufficient memory should already be allocated"); 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci ok = false; 1232bf215546Sopenharmony_ci goto out; 1233bf215546Sopenharmony_ci } 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_ci while (backing_size) { 1236bf215546Sopenharmony_ci comm[span_va_page].backing = backing; 1237bf215546Sopenharmony_ci comm[span_va_page].page = backing_start; 1238bf215546Sopenharmony_ci span_va_page++; 1239bf215546Sopenharmony_ci backing_start++; 1240bf215546Sopenharmony_ci backing_size--; 1241bf215546Sopenharmony_ci } 1242bf215546Sopenharmony_ci } 1243bf215546Sopenharmony_ci } 1244bf215546Sopenharmony_ci } else { 1245bf215546Sopenharmony_ci r = amdgpu_bo_va_op_raw(ws->dev, NULL, 0, 1246bf215546Sopenharmony_ci (uint64_t)(end_va_page - va_page) * RADEON_SPARSE_PAGE_SIZE, 1247bf215546Sopenharmony_ci bo->va + (uint64_t)va_page * RADEON_SPARSE_PAGE_SIZE, 1248bf215546Sopenharmony_ci AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE); 1249bf215546Sopenharmony_ci if (r) { 1250bf215546Sopenharmony_ci ok = false; 1251bf215546Sopenharmony_ci goto out; 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci while (va_page < end_va_page) { 1255bf215546Sopenharmony_ci struct amdgpu_sparse_backing *backing; 1256bf215546Sopenharmony_ci uint32_t backing_start; 1257bf215546Sopenharmony_ci uint32_t span_pages; 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci /* Skip pages that are already uncommitted. */ 1260bf215546Sopenharmony_ci if (!comm[va_page].backing) { 1261bf215546Sopenharmony_ci va_page++; 1262bf215546Sopenharmony_ci continue; 1263bf215546Sopenharmony_ci } 1264bf215546Sopenharmony_ci 1265bf215546Sopenharmony_ci /* Group contiguous spans of pages. */ 1266bf215546Sopenharmony_ci backing = comm[va_page].backing; 1267bf215546Sopenharmony_ci backing_start = comm[va_page].page; 1268bf215546Sopenharmony_ci comm[va_page].backing = NULL; 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_ci span_pages = 1; 1271bf215546Sopenharmony_ci va_page++; 1272bf215546Sopenharmony_ci 1273bf215546Sopenharmony_ci while (va_page < end_va_page && 1274bf215546Sopenharmony_ci comm[va_page].backing == backing && 1275bf215546Sopenharmony_ci comm[va_page].page == backing_start + span_pages) { 1276bf215546Sopenharmony_ci comm[va_page].backing = NULL; 1277bf215546Sopenharmony_ci va_page++; 1278bf215546Sopenharmony_ci span_pages++; 1279bf215546Sopenharmony_ci } 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_ci if (!sparse_backing_free(ws, bo, backing, backing_start, span_pages)) { 1282bf215546Sopenharmony_ci /* Couldn't allocate tracking data structures, so we have to leak */ 1283bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: leaking PRT backing memory\n"); 1284bf215546Sopenharmony_ci ok = false; 1285bf215546Sopenharmony_ci } 1286bf215546Sopenharmony_ci } 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ciout: 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci simple_mtx_unlock(&bo->lock); 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_ci return ok; 1293bf215546Sopenharmony_ci} 1294bf215546Sopenharmony_ci 1295bf215546Sopenharmony_cistatic void amdgpu_buffer_get_metadata(struct radeon_winsys *rws, 1296bf215546Sopenharmony_ci struct pb_buffer *_buf, 1297bf215546Sopenharmony_ci struct radeon_bo_metadata *md, 1298bf215546Sopenharmony_ci struct radeon_surf *surf) 1299bf215546Sopenharmony_ci{ 1300bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1301bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 1302bf215546Sopenharmony_ci struct amdgpu_bo_info info = {0}; 1303bf215546Sopenharmony_ci int r; 1304bf215546Sopenharmony_ci 1305bf215546Sopenharmony_ci assert(bo->bo && "must not be called for slab entries"); 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_ci r = amdgpu_bo_query_info(bo->bo, &info); 1308bf215546Sopenharmony_ci if (r) 1309bf215546Sopenharmony_ci return; 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_ci ac_surface_set_bo_metadata(&ws->info, surf, info.metadata.tiling_info, 1312bf215546Sopenharmony_ci &md->mode); 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci md->size_metadata = info.metadata.size_metadata; 1315bf215546Sopenharmony_ci memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata)); 1316bf215546Sopenharmony_ci} 1317bf215546Sopenharmony_ci 1318bf215546Sopenharmony_cistatic void amdgpu_buffer_set_metadata(struct radeon_winsys *rws, 1319bf215546Sopenharmony_ci struct pb_buffer *_buf, 1320bf215546Sopenharmony_ci struct radeon_bo_metadata *md, 1321bf215546Sopenharmony_ci struct radeon_surf *surf) 1322bf215546Sopenharmony_ci{ 1323bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1324bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 1325bf215546Sopenharmony_ci struct amdgpu_bo_metadata metadata = {0}; 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_ci assert(bo->bo && "must not be called for slab entries"); 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci ac_surface_get_bo_metadata(&ws->info, surf, &metadata.tiling_info); 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_ci metadata.size_metadata = md->size_metadata; 1332bf215546Sopenharmony_ci memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata)); 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci amdgpu_bo_set_metadata(bo->bo, &metadata); 1335bf215546Sopenharmony_ci} 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_cistruct pb_buffer * 1338bf215546Sopenharmony_ciamdgpu_bo_create(struct amdgpu_winsys *ws, 1339bf215546Sopenharmony_ci uint64_t size, 1340bf215546Sopenharmony_ci unsigned alignment, 1341bf215546Sopenharmony_ci enum radeon_bo_domain domain, 1342bf215546Sopenharmony_ci enum radeon_bo_flag flags) 1343bf215546Sopenharmony_ci{ 1344bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo; 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci radeon_canonicalize_bo_flags(&domain, &flags); 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci /* Handle sparse buffers first. */ 1349bf215546Sopenharmony_ci if (flags & RADEON_FLAG_SPARSE) { 1350bf215546Sopenharmony_ci assert(RADEON_SPARSE_PAGE_SIZE % alignment == 0); 1351bf215546Sopenharmony_ci 1352bf215546Sopenharmony_ci return amdgpu_bo_sparse_create(ws, size, domain, flags); 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1]; 1356bf215546Sopenharmony_ci unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1); 1357bf215546Sopenharmony_ci int heap = radeon_get_heap_index(domain, flags); 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci /* Sub-allocate small buffers from slabs. */ 1360bf215546Sopenharmony_ci if (heap >= 0 && size <= max_slab_entry_size) { 1361bf215546Sopenharmony_ci struct pb_slab_entry *entry; 1362bf215546Sopenharmony_ci unsigned alloc_size = size; 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci /* Always use slabs for sizes less than 4 KB because the kernel aligns 1365bf215546Sopenharmony_ci * everything to 4 KB. 1366bf215546Sopenharmony_ci */ 1367bf215546Sopenharmony_ci if (size < alignment && alignment <= 4 * 1024) 1368bf215546Sopenharmony_ci alloc_size = alignment; 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci if (alignment > get_slab_entry_alignment(ws, alloc_size)) { 1371bf215546Sopenharmony_ci /* 3/4 allocations can return too small alignment. Try again with a power of two 1372bf215546Sopenharmony_ci * allocation size. 1373bf215546Sopenharmony_ci */ 1374bf215546Sopenharmony_ci unsigned pot_size = get_slab_pot_entry_size(ws, alloc_size); 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci if (alignment <= pot_size) { 1377bf215546Sopenharmony_ci /* This size works but wastes some memory to fulfil the alignment. */ 1378bf215546Sopenharmony_ci alloc_size = pot_size; 1379bf215546Sopenharmony_ci } else { 1380bf215546Sopenharmony_ci goto no_slab; /* can't fulfil alignment requirements */ 1381bf215546Sopenharmony_ci } 1382bf215546Sopenharmony_ci } 1383bf215546Sopenharmony_ci 1384bf215546Sopenharmony_ci struct pb_slabs *slabs = get_slabs(ws, alloc_size); 1385bf215546Sopenharmony_ci entry = pb_slab_alloc(slabs, alloc_size, heap); 1386bf215546Sopenharmony_ci if (!entry) { 1387bf215546Sopenharmony_ci /* Clean up buffer managers and try again. */ 1388bf215546Sopenharmony_ci amdgpu_clean_up_buffer_managers(ws); 1389bf215546Sopenharmony_ci 1390bf215546Sopenharmony_ci entry = pb_slab_alloc(slabs, alloc_size, heap); 1391bf215546Sopenharmony_ci } 1392bf215546Sopenharmony_ci if (!entry) 1393bf215546Sopenharmony_ci return NULL; 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci bo = container_of(entry, struct amdgpu_winsys_bo, u.slab.entry); 1396bf215546Sopenharmony_ci pipe_reference_init(&bo->base.reference, 1); 1397bf215546Sopenharmony_ci bo->base.size = size; 1398bf215546Sopenharmony_ci assert(alignment <= 1 << bo->base.alignment_log2); 1399bf215546Sopenharmony_ci 1400bf215546Sopenharmony_ci if (domain & RADEON_DOMAIN_VRAM) 1401bf215546Sopenharmony_ci ws->slab_wasted_vram += get_slab_wasted_size(ws, bo); 1402bf215546Sopenharmony_ci else 1403bf215546Sopenharmony_ci ws->slab_wasted_gtt += get_slab_wasted_size(ws, bo); 1404bf215546Sopenharmony_ci 1405bf215546Sopenharmony_ci return &bo->base; 1406bf215546Sopenharmony_ci } 1407bf215546Sopenharmony_cino_slab: 1408bf215546Sopenharmony_ci 1409bf215546Sopenharmony_ci /* Align size to page size. This is the minimum alignment for normal 1410bf215546Sopenharmony_ci * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, 1411bf215546Sopenharmony_ci * like constant/uniform buffers, can benefit from better and more reuse. 1412bf215546Sopenharmony_ci */ 1413bf215546Sopenharmony_ci if (domain & RADEON_DOMAIN_VRAM_GTT) { 1414bf215546Sopenharmony_ci size = align64(size, ws->info.gart_page_size); 1415bf215546Sopenharmony_ci alignment = align(alignment, ws->info.gart_page_size); 1416bf215546Sopenharmony_ci } 1417bf215546Sopenharmony_ci 1418bf215546Sopenharmony_ci bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && 1419bf215546Sopenharmony_ci !(flags & RADEON_FLAG_DISCARDABLE); 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci if (use_reusable_pool) { 1422bf215546Sopenharmony_ci /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */ 1423bf215546Sopenharmony_ci heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC); 1424bf215546Sopenharmony_ci assert(heap >= 0 && heap < RADEON_NUM_HEAPS); 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_ci /* Get a buffer from the cache. */ 1427bf215546Sopenharmony_ci bo = (struct amdgpu_winsys_bo*) 1428bf215546Sopenharmony_ci pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap); 1429bf215546Sopenharmony_ci if (bo) 1430bf215546Sopenharmony_ci return &bo->base; 1431bf215546Sopenharmony_ci } 1432bf215546Sopenharmony_ci 1433bf215546Sopenharmony_ci /* Create a new one. */ 1434bf215546Sopenharmony_ci bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap); 1435bf215546Sopenharmony_ci if (!bo) { 1436bf215546Sopenharmony_ci /* Clean up buffer managers and try again. */ 1437bf215546Sopenharmony_ci amdgpu_clean_up_buffer_managers(ws); 1438bf215546Sopenharmony_ci 1439bf215546Sopenharmony_ci bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap); 1440bf215546Sopenharmony_ci if (!bo) 1441bf215546Sopenharmony_ci return NULL; 1442bf215546Sopenharmony_ci } 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci return &bo->base; 1445bf215546Sopenharmony_ci} 1446bf215546Sopenharmony_ci 1447bf215546Sopenharmony_cistatic struct pb_buffer * 1448bf215546Sopenharmony_ciamdgpu_buffer_create(struct radeon_winsys *ws, 1449bf215546Sopenharmony_ci uint64_t size, 1450bf215546Sopenharmony_ci unsigned alignment, 1451bf215546Sopenharmony_ci enum radeon_bo_domain domain, 1452bf215546Sopenharmony_ci enum radeon_bo_flag flags) 1453bf215546Sopenharmony_ci{ 1454bf215546Sopenharmony_ci struct pb_buffer * res = amdgpu_bo_create(amdgpu_winsys(ws), size, alignment, domain, 1455bf215546Sopenharmony_ci flags); 1456bf215546Sopenharmony_ci return res; 1457bf215546Sopenharmony_ci} 1458bf215546Sopenharmony_ci 1459bf215546Sopenharmony_cistatic struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, 1460bf215546Sopenharmony_ci struct winsys_handle *whandle, 1461bf215546Sopenharmony_ci unsigned vm_alignment, 1462bf215546Sopenharmony_ci bool is_prime_linear_buffer) 1463bf215546Sopenharmony_ci{ 1464bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1465bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = NULL; 1466bf215546Sopenharmony_ci enum amdgpu_bo_handle_type type; 1467bf215546Sopenharmony_ci struct amdgpu_bo_import_result result = {0}; 1468bf215546Sopenharmony_ci uint64_t va; 1469bf215546Sopenharmony_ci amdgpu_va_handle va_handle = NULL; 1470bf215546Sopenharmony_ci struct amdgpu_bo_info info = {0}; 1471bf215546Sopenharmony_ci enum radeon_bo_domain initial = 0; 1472bf215546Sopenharmony_ci enum radeon_bo_flag flags = 0; 1473bf215546Sopenharmony_ci int r; 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci switch (whandle->type) { 1476bf215546Sopenharmony_ci case WINSYS_HANDLE_TYPE_SHARED: 1477bf215546Sopenharmony_ci type = amdgpu_bo_handle_type_gem_flink_name; 1478bf215546Sopenharmony_ci break; 1479bf215546Sopenharmony_ci case WINSYS_HANDLE_TYPE_FD: 1480bf215546Sopenharmony_ci type = amdgpu_bo_handle_type_dma_buf_fd; 1481bf215546Sopenharmony_ci break; 1482bf215546Sopenharmony_ci default: 1483bf215546Sopenharmony_ci return NULL; 1484bf215546Sopenharmony_ci } 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result); 1487bf215546Sopenharmony_ci if (r) 1488bf215546Sopenharmony_ci return NULL; 1489bf215546Sopenharmony_ci 1490bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_export_table_lock); 1491bf215546Sopenharmony_ci bo = util_hash_table_get(ws->bo_export_table, result.buf_handle); 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_ci /* If the amdgpu_winsys_bo instance already exists, bump the reference 1494bf215546Sopenharmony_ci * counter and return it. 1495bf215546Sopenharmony_ci */ 1496bf215546Sopenharmony_ci if (bo) { 1497bf215546Sopenharmony_ci p_atomic_inc(&bo->base.reference.count); 1498bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_export_table_lock); 1499bf215546Sopenharmony_ci 1500bf215546Sopenharmony_ci /* Release the buffer handle, because we don't need it anymore. 1501bf215546Sopenharmony_ci * This function is returning an existing buffer, which has its own 1502bf215546Sopenharmony_ci * handle. 1503bf215546Sopenharmony_ci */ 1504bf215546Sopenharmony_ci amdgpu_bo_free(result.buf_handle); 1505bf215546Sopenharmony_ci return &bo->base; 1506bf215546Sopenharmony_ci } 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_ci /* Get initial domains. */ 1509bf215546Sopenharmony_ci r = amdgpu_bo_query_info(result.buf_handle, &info); 1510bf215546Sopenharmony_ci if (r) 1511bf215546Sopenharmony_ci goto error; 1512bf215546Sopenharmony_ci 1513bf215546Sopenharmony_ci r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 1514bf215546Sopenharmony_ci result.alloc_size, 1515bf215546Sopenharmony_ci amdgpu_get_optimal_alignment(ws, result.alloc_size, 1516bf215546Sopenharmony_ci vm_alignment), 1517bf215546Sopenharmony_ci 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH); 1518bf215546Sopenharmony_ci if (r) 1519bf215546Sopenharmony_ci goto error; 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci bo = CALLOC_STRUCT(amdgpu_winsys_bo); 1522bf215546Sopenharmony_ci if (!bo) 1523bf215546Sopenharmony_ci goto error; 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci r = amdgpu_bo_va_op_raw(ws->dev, result.buf_handle, 0, result.alloc_size, va, 1526bf215546Sopenharmony_ci AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | 1527bf215546Sopenharmony_ci AMDGPU_VM_PAGE_EXECUTABLE | 1528bf215546Sopenharmony_ci (is_prime_linear_buffer ? AMDGPU_VM_MTYPE_UC : 0), 1529bf215546Sopenharmony_ci AMDGPU_VA_OP_MAP); 1530bf215546Sopenharmony_ci if (r) 1531bf215546Sopenharmony_ci goto error; 1532bf215546Sopenharmony_ci 1533bf215546Sopenharmony_ci if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) 1534bf215546Sopenharmony_ci initial |= RADEON_DOMAIN_VRAM; 1535bf215546Sopenharmony_ci if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) 1536bf215546Sopenharmony_ci initial |= RADEON_DOMAIN_GTT; 1537bf215546Sopenharmony_ci if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) 1538bf215546Sopenharmony_ci flags |= RADEON_FLAG_NO_CPU_ACCESS; 1539bf215546Sopenharmony_ci if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) 1540bf215546Sopenharmony_ci flags |= RADEON_FLAG_GTT_WC; 1541bf215546Sopenharmony_ci if (info.alloc_flags & AMDGPU_GEM_CREATE_ENCRYPTED) { 1542bf215546Sopenharmony_ci /* Imports are always possible even if the importer isn't using TMZ. 1543bf215546Sopenharmony_ci * For instance libweston needs to import the buffer to be able to determine 1544bf215546Sopenharmony_ci * if it can be used for scanout. 1545bf215546Sopenharmony_ci */ 1546bf215546Sopenharmony_ci flags |= RADEON_FLAG_ENCRYPTED; 1547bf215546Sopenharmony_ci *((bool*)&rws->uses_secure_bos) = true; 1548bf215546Sopenharmony_ci } 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci /* Initialize the structure. */ 1551bf215546Sopenharmony_ci simple_mtx_init(&bo->lock, mtx_plain); 1552bf215546Sopenharmony_ci pipe_reference_init(&bo->base.reference, 1); 1553bf215546Sopenharmony_ci bo->base.alignment_log2 = util_logbase2(info.phys_alignment); 1554bf215546Sopenharmony_ci bo->bo = result.buf_handle; 1555bf215546Sopenharmony_ci bo->base.size = result.alloc_size; 1556bf215546Sopenharmony_ci bo->base.vtbl = &amdgpu_winsys_bo_vtbl; 1557bf215546Sopenharmony_ci bo->va = va; 1558bf215546Sopenharmony_ci bo->u.real.va_handle = va_handle; 1559bf215546Sopenharmony_ci bo->base.placement = initial; 1560bf215546Sopenharmony_ci bo->base.usage = flags; 1561bf215546Sopenharmony_ci bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 1562bf215546Sopenharmony_ci bo->u.real.is_shared = true; 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM) 1565bf215546Sopenharmony_ci ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size); 1566bf215546Sopenharmony_ci else if (bo->base.placement & RADEON_DOMAIN_GTT) 1567bf215546Sopenharmony_ci ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size); 1568bf215546Sopenharmony_ci 1569bf215546Sopenharmony_ci amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle); 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci amdgpu_add_buffer_to_global_list(ws, bo); 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo); 1574bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_export_table_lock); 1575bf215546Sopenharmony_ci 1576bf215546Sopenharmony_ci return &bo->base; 1577bf215546Sopenharmony_ci 1578bf215546Sopenharmony_cierror: 1579bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_export_table_lock); 1580bf215546Sopenharmony_ci if (bo) 1581bf215546Sopenharmony_ci FREE(bo); 1582bf215546Sopenharmony_ci if (va_handle) 1583bf215546Sopenharmony_ci amdgpu_va_range_free(va_handle); 1584bf215546Sopenharmony_ci amdgpu_bo_free(result.buf_handle); 1585bf215546Sopenharmony_ci return NULL; 1586bf215546Sopenharmony_ci} 1587bf215546Sopenharmony_ci 1588bf215546Sopenharmony_cistatic bool amdgpu_bo_get_handle(struct radeon_winsys *rws, 1589bf215546Sopenharmony_ci struct pb_buffer *buffer, 1590bf215546Sopenharmony_ci struct winsys_handle *whandle) 1591bf215546Sopenharmony_ci{ 1592bf215546Sopenharmony_ci struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws); 1593bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1594bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer); 1595bf215546Sopenharmony_ci enum amdgpu_bo_handle_type type; 1596bf215546Sopenharmony_ci struct hash_entry *entry; 1597bf215546Sopenharmony_ci int r; 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci /* Don't allow exports of slab entries and sparse buffers. */ 1600bf215546Sopenharmony_ci if (!bo->bo) 1601bf215546Sopenharmony_ci return false; 1602bf215546Sopenharmony_ci 1603bf215546Sopenharmony_ci bo->u.real.use_reusable_pool = false; 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci switch (whandle->type) { 1606bf215546Sopenharmony_ci case WINSYS_HANDLE_TYPE_SHARED: 1607bf215546Sopenharmony_ci type = amdgpu_bo_handle_type_gem_flink_name; 1608bf215546Sopenharmony_ci break; 1609bf215546Sopenharmony_ci case WINSYS_HANDLE_TYPE_KMS: 1610bf215546Sopenharmony_ci if (sws->fd == ws->fd) { 1611bf215546Sopenharmony_ci whandle->handle = bo->u.real.kms_handle; 1612bf215546Sopenharmony_ci 1613bf215546Sopenharmony_ci if (bo->u.real.is_shared) 1614bf215546Sopenharmony_ci return true; 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_ci goto hash_table_set; 1617bf215546Sopenharmony_ci } 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci simple_mtx_lock(&ws->sws_list_lock); 1620bf215546Sopenharmony_ci entry = _mesa_hash_table_search(sws->kms_handles, bo); 1621bf215546Sopenharmony_ci simple_mtx_unlock(&ws->sws_list_lock); 1622bf215546Sopenharmony_ci if (entry) { 1623bf215546Sopenharmony_ci whandle->handle = (uintptr_t)entry->data; 1624bf215546Sopenharmony_ci return true; 1625bf215546Sopenharmony_ci } 1626bf215546Sopenharmony_ci FALLTHROUGH; 1627bf215546Sopenharmony_ci case WINSYS_HANDLE_TYPE_FD: 1628bf215546Sopenharmony_ci type = amdgpu_bo_handle_type_dma_buf_fd; 1629bf215546Sopenharmony_ci break; 1630bf215546Sopenharmony_ci default: 1631bf215546Sopenharmony_ci return false; 1632bf215546Sopenharmony_ci } 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci r = amdgpu_bo_export(bo->bo, type, &whandle->handle); 1635bf215546Sopenharmony_ci if (r) 1636bf215546Sopenharmony_ci return false; 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { 1639bf215546Sopenharmony_ci int dma_fd = whandle->handle; 1640bf215546Sopenharmony_ci 1641bf215546Sopenharmony_ci r = drmPrimeFDToHandle(sws->fd, dma_fd, &whandle->handle); 1642bf215546Sopenharmony_ci close(dma_fd); 1643bf215546Sopenharmony_ci 1644bf215546Sopenharmony_ci if (r) 1645bf215546Sopenharmony_ci return false; 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_ci simple_mtx_lock(&ws->sws_list_lock); 1648bf215546Sopenharmony_ci _mesa_hash_table_insert_pre_hashed(sws->kms_handles, 1649bf215546Sopenharmony_ci bo->u.real.kms_handle, bo, 1650bf215546Sopenharmony_ci (void*)(uintptr_t)whandle->handle); 1651bf215546Sopenharmony_ci simple_mtx_unlock(&ws->sws_list_lock); 1652bf215546Sopenharmony_ci } 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci hash_table_set: 1655bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_export_table_lock); 1656bf215546Sopenharmony_ci _mesa_hash_table_insert(ws->bo_export_table, bo->bo, bo); 1657bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_export_table_lock); 1658bf215546Sopenharmony_ci 1659bf215546Sopenharmony_ci bo->u.real.is_shared = true; 1660bf215546Sopenharmony_ci return true; 1661bf215546Sopenharmony_ci} 1662bf215546Sopenharmony_ci 1663bf215546Sopenharmony_cistatic struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, 1664bf215546Sopenharmony_ci void *pointer, uint64_t size, 1665bf215546Sopenharmony_ci enum radeon_bo_flag flags) 1666bf215546Sopenharmony_ci{ 1667bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 1668bf215546Sopenharmony_ci amdgpu_bo_handle buf_handle; 1669bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo; 1670bf215546Sopenharmony_ci uint64_t va; 1671bf215546Sopenharmony_ci amdgpu_va_handle va_handle; 1672bf215546Sopenharmony_ci /* Avoid failure when the size is not page aligned */ 1673bf215546Sopenharmony_ci uint64_t aligned_size = align64(size, ws->info.gart_page_size); 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_ci bo = CALLOC_STRUCT(amdgpu_winsys_bo); 1676bf215546Sopenharmony_ci if (!bo) 1677bf215546Sopenharmony_ci return NULL; 1678bf215546Sopenharmony_ci 1679bf215546Sopenharmony_ci if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, 1680bf215546Sopenharmony_ci aligned_size, &buf_handle)) 1681bf215546Sopenharmony_ci goto error; 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 1684bf215546Sopenharmony_ci aligned_size, 1685bf215546Sopenharmony_ci amdgpu_get_optimal_alignment(ws, aligned_size, 1686bf215546Sopenharmony_ci ws->info.gart_page_size), 1687bf215546Sopenharmony_ci 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH)) 1688bf215546Sopenharmony_ci goto error_va_alloc; 1689bf215546Sopenharmony_ci 1690bf215546Sopenharmony_ci if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP)) 1691bf215546Sopenharmony_ci goto error_va_map; 1692bf215546Sopenharmony_ci 1693bf215546Sopenharmony_ci /* Initialize it. */ 1694bf215546Sopenharmony_ci bo->u.real.is_user_ptr = true; 1695bf215546Sopenharmony_ci pipe_reference_init(&bo->base.reference, 1); 1696bf215546Sopenharmony_ci simple_mtx_init(&bo->lock, mtx_plain); 1697bf215546Sopenharmony_ci bo->bo = buf_handle; 1698bf215546Sopenharmony_ci bo->base.alignment_log2 = 0; 1699bf215546Sopenharmony_ci bo->base.size = size; 1700bf215546Sopenharmony_ci bo->base.vtbl = &amdgpu_winsys_bo_vtbl; 1701bf215546Sopenharmony_ci bo->u.real.cpu_ptr = pointer; 1702bf215546Sopenharmony_ci bo->va = va; 1703bf215546Sopenharmony_ci bo->u.real.va_handle = va_handle; 1704bf215546Sopenharmony_ci bo->base.placement = RADEON_DOMAIN_GTT; 1705bf215546Sopenharmony_ci bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 1706bf215546Sopenharmony_ci 1707bf215546Sopenharmony_ci ws->allocated_gtt += aligned_size; 1708bf215546Sopenharmony_ci 1709bf215546Sopenharmony_ci amdgpu_add_buffer_to_global_list(ws, bo); 1710bf215546Sopenharmony_ci 1711bf215546Sopenharmony_ci amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle); 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci return (struct pb_buffer*)bo; 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_cierror_va_map: 1716bf215546Sopenharmony_ci amdgpu_va_range_free(va_handle); 1717bf215546Sopenharmony_ci 1718bf215546Sopenharmony_cierror_va_alloc: 1719bf215546Sopenharmony_ci amdgpu_bo_free(buf_handle); 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_cierror: 1722bf215546Sopenharmony_ci FREE(bo); 1723bf215546Sopenharmony_ci return NULL; 1724bf215546Sopenharmony_ci} 1725bf215546Sopenharmony_ci 1726bf215546Sopenharmony_cistatic bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf) 1727bf215546Sopenharmony_ci{ 1728bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci return bo->bo ? bo->u.real.is_user_ptr : false; 1731bf215546Sopenharmony_ci} 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_cistatic bool amdgpu_bo_is_suballocated(struct pb_buffer *buf) 1734bf215546Sopenharmony_ci{ 1735bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci return !bo->bo && !(bo->base.usage & RADEON_FLAG_SPARSE); 1738bf215546Sopenharmony_ci} 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_cistatic uint64_t amdgpu_bo_get_va(struct pb_buffer *buf) 1741bf215546Sopenharmony_ci{ 1742bf215546Sopenharmony_ci return ((struct amdgpu_winsys_bo*)buf)->va; 1743bf215546Sopenharmony_ci} 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_civoid amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws) 1746bf215546Sopenharmony_ci{ 1747bf215546Sopenharmony_ci ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata; 1748bf215546Sopenharmony_ci ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata; 1749bf215546Sopenharmony_ci ws->base.buffer_map = amdgpu_bo_map; 1750bf215546Sopenharmony_ci ws->base.buffer_unmap = amdgpu_bo_unmap; 1751bf215546Sopenharmony_ci ws->base.buffer_wait = amdgpu_bo_wait; 1752bf215546Sopenharmony_ci ws->base.buffer_create = amdgpu_buffer_create; 1753bf215546Sopenharmony_ci ws->base.buffer_from_handle = amdgpu_bo_from_handle; 1754bf215546Sopenharmony_ci ws->base.buffer_from_ptr = amdgpu_bo_from_ptr; 1755bf215546Sopenharmony_ci ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr; 1756bf215546Sopenharmony_ci ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated; 1757bf215546Sopenharmony_ci ws->base.buffer_get_handle = amdgpu_bo_get_handle; 1758bf215546Sopenharmony_ci ws->base.buffer_commit = amdgpu_bo_sparse_commit; 1759bf215546Sopenharmony_ci ws->base.buffer_get_virtual_address = amdgpu_bo_get_va; 1760bf215546Sopenharmony_ci ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain; 1761bf215546Sopenharmony_ci ws->base.buffer_get_flags = amdgpu_bo_get_flags; 1762bf215546Sopenharmony_ci} 1763