1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2008 Jérôme Glisse 3bf215546Sopenharmony_ci * Copyright © 2010 Marek Olšák <maraeo@gmail.com> 4bf215546Sopenharmony_ci * Copyright © 2015 Advanced Micro Devices, Inc. 5bf215546Sopenharmony_ci * All Rights Reserved. 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining 8bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the 9bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 10bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 11bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 12bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 13bf215546Sopenharmony_ci * the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17bf215546Sopenharmony_ci * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18bf215546Sopenharmony_ci * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 19bf215546Sopenharmony_ci * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci * 24bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 25bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 26bf215546Sopenharmony_ci * of the Software. 27bf215546Sopenharmony_ci */ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "amdgpu_cs.h" 30bf215546Sopenharmony_ci#include "util/os_time.h" 31bf215546Sopenharmony_ci#include <inttypes.h> 32bf215546Sopenharmony_ci#include <stdio.h> 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "amd/common/sid.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci/* FENCES */ 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistatic struct pipe_fence_handle * 39bf215546Sopenharmony_ciamdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type) 40bf215546Sopenharmony_ci{ 41bf215546Sopenharmony_ci struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci fence->reference.count = 1; 44bf215546Sopenharmony_ci fence->ws = ctx->ws; 45bf215546Sopenharmony_ci fence->ctx = ctx; 46bf215546Sopenharmony_ci fence->fence.context = ctx->ctx; 47bf215546Sopenharmony_ci fence->fence.ip_type = ip_type; 48bf215546Sopenharmony_ci util_queue_fence_init(&fence->submitted); 49bf215546Sopenharmony_ci util_queue_fence_reset(&fence->submitted); 50bf215546Sopenharmony_ci p_atomic_inc(&ctx->refcount); 51bf215546Sopenharmony_ci return (struct pipe_fence_handle *)fence; 52bf215546Sopenharmony_ci} 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cistatic struct pipe_fence_handle * 55bf215546Sopenharmony_ciamdgpu_fence_import_syncobj(struct radeon_winsys *rws, int fd) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 58bf215546Sopenharmony_ci struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); 59bf215546Sopenharmony_ci int r; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci if (!fence) 62bf215546Sopenharmony_ci return NULL; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci pipe_reference_init(&fence->reference, 1); 65bf215546Sopenharmony_ci fence->ws = ws; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci r = amdgpu_cs_import_syncobj(ws->dev, fd, &fence->syncobj); 68bf215546Sopenharmony_ci if (r) { 69bf215546Sopenharmony_ci FREE(fence); 70bf215546Sopenharmony_ci return NULL; 71bf215546Sopenharmony_ci } 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci util_queue_fence_init(&fence->submitted); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci assert(amdgpu_fence_is_syncobj(fence)); 76bf215546Sopenharmony_ci return (struct pipe_fence_handle*)fence; 77bf215546Sopenharmony_ci} 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistatic struct pipe_fence_handle * 80bf215546Sopenharmony_ciamdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 83bf215546Sopenharmony_ci struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci if (!fence) 86bf215546Sopenharmony_ci return NULL; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci pipe_reference_init(&fence->reference, 1); 89bf215546Sopenharmony_ci fence->ws = ws; 90bf215546Sopenharmony_ci /* fence->ctx == NULL means that the fence is syncobj-based. */ 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci /* Convert sync_file into syncobj. */ 93bf215546Sopenharmony_ci int r = amdgpu_cs_create_syncobj(ws->dev, &fence->syncobj); 94bf215546Sopenharmony_ci if (r) { 95bf215546Sopenharmony_ci FREE(fence); 96bf215546Sopenharmony_ci return NULL; 97bf215546Sopenharmony_ci } 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci r = amdgpu_cs_syncobj_import_sync_file(ws->dev, fence->syncobj, fd); 100bf215546Sopenharmony_ci if (r) { 101bf215546Sopenharmony_ci amdgpu_cs_destroy_syncobj(ws->dev, fence->syncobj); 102bf215546Sopenharmony_ci FREE(fence); 103bf215546Sopenharmony_ci return NULL; 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci util_queue_fence_init(&fence->submitted); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci return (struct pipe_fence_handle*)fence; 109bf215546Sopenharmony_ci} 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_cistatic int amdgpu_fence_export_sync_file(struct radeon_winsys *rws, 112bf215546Sopenharmony_ci struct pipe_fence_handle *pfence) 113bf215546Sopenharmony_ci{ 114bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 115bf215546Sopenharmony_ci struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci if (amdgpu_fence_is_syncobj(fence)) { 118bf215546Sopenharmony_ci int fd, r; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci /* Convert syncobj into sync_file. */ 121bf215546Sopenharmony_ci r = amdgpu_cs_syncobj_export_sync_file(ws->dev, fence->syncobj, &fd); 122bf215546Sopenharmony_ci return r ? -1 : fd; 123bf215546Sopenharmony_ci } 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci util_queue_fence_wait(&fence->submitted); 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci /* Convert the amdgpu fence into a fence FD. */ 128bf215546Sopenharmony_ci int fd; 129bf215546Sopenharmony_ci if (amdgpu_cs_fence_to_handle(ws->dev, &fence->fence, 130bf215546Sopenharmony_ci AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD, 131bf215546Sopenharmony_ci (uint32_t*)&fd)) 132bf215546Sopenharmony_ci return -1; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci return fd; 135bf215546Sopenharmony_ci} 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_cistatic int amdgpu_export_signalled_sync_file(struct radeon_winsys *rws) 138bf215546Sopenharmony_ci{ 139bf215546Sopenharmony_ci struct amdgpu_winsys *ws = amdgpu_winsys(rws); 140bf215546Sopenharmony_ci uint32_t syncobj; 141bf215546Sopenharmony_ci int fd = -1; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci int r = amdgpu_cs_create_syncobj2(ws->dev, DRM_SYNCOBJ_CREATE_SIGNALED, 144bf215546Sopenharmony_ci &syncobj); 145bf215546Sopenharmony_ci if (r) { 146bf215546Sopenharmony_ci return -1; 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci r = amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, &fd); 150bf215546Sopenharmony_ci if (r) { 151bf215546Sopenharmony_ci fd = -1; 152bf215546Sopenharmony_ci } 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci amdgpu_cs_destroy_syncobj(ws->dev, syncobj); 155bf215546Sopenharmony_ci return fd; 156bf215546Sopenharmony_ci} 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_cistatic void amdgpu_fence_submitted(struct pipe_fence_handle *fence, 159bf215546Sopenharmony_ci uint64_t seq_no, 160bf215546Sopenharmony_ci uint64_t *user_fence_cpu_address) 161bf215546Sopenharmony_ci{ 162bf215546Sopenharmony_ci struct amdgpu_fence *afence = (struct amdgpu_fence*)fence; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci afence->fence.fence = seq_no; 165bf215546Sopenharmony_ci afence->user_fence_cpu_address = user_fence_cpu_address; 166bf215546Sopenharmony_ci util_queue_fence_signal(&afence->submitted); 167bf215546Sopenharmony_ci} 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_cistatic void amdgpu_fence_signalled(struct pipe_fence_handle *fence) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci struct amdgpu_fence *afence = (struct amdgpu_fence*)fence; 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci afence->signalled = true; 174bf215546Sopenharmony_ci util_queue_fence_signal(&afence->submitted); 175bf215546Sopenharmony_ci} 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_cibool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, 178bf215546Sopenharmony_ci bool absolute) 179bf215546Sopenharmony_ci{ 180bf215546Sopenharmony_ci struct amdgpu_fence *afence = (struct amdgpu_fence*)fence; 181bf215546Sopenharmony_ci uint32_t expired; 182bf215546Sopenharmony_ci int64_t abs_timeout; 183bf215546Sopenharmony_ci uint64_t *user_fence_cpu; 184bf215546Sopenharmony_ci int r; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (afence->signalled) 187bf215546Sopenharmony_ci return true; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci if (absolute) 190bf215546Sopenharmony_ci abs_timeout = timeout; 191bf215546Sopenharmony_ci else 192bf215546Sopenharmony_ci abs_timeout = os_time_get_absolute_timeout(timeout); 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci /* Handle syncobjs. */ 195bf215546Sopenharmony_ci if (amdgpu_fence_is_syncobj(afence)) { 196bf215546Sopenharmony_ci if (abs_timeout == OS_TIMEOUT_INFINITE) 197bf215546Sopenharmony_ci abs_timeout = INT64_MAX; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci if (amdgpu_cs_syncobj_wait(afence->ws->dev, &afence->syncobj, 1, 200bf215546Sopenharmony_ci abs_timeout, 0, NULL)) 201bf215546Sopenharmony_ci return false; 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci afence->signalled = true; 204bf215546Sopenharmony_ci return true; 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci /* The fence might not have a number assigned if its IB is being 208bf215546Sopenharmony_ci * submitted in the other thread right now. Wait until the submission 209bf215546Sopenharmony_ci * is done. */ 210bf215546Sopenharmony_ci if (!util_queue_fence_wait_timeout(&afence->submitted, abs_timeout)) 211bf215546Sopenharmony_ci return false; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci user_fence_cpu = afence->user_fence_cpu_address; 214bf215546Sopenharmony_ci if (user_fence_cpu) { 215bf215546Sopenharmony_ci if (*user_fence_cpu >= afence->fence.fence) { 216bf215546Sopenharmony_ci afence->signalled = true; 217bf215546Sopenharmony_ci return true; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci /* No timeout, just query: no need for the ioctl. */ 221bf215546Sopenharmony_ci if (!absolute && !timeout) 222bf215546Sopenharmony_ci return false; 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* Now use the libdrm query. */ 226bf215546Sopenharmony_ci r = amdgpu_cs_query_fence_status(&afence->fence, 227bf215546Sopenharmony_ci abs_timeout, 228bf215546Sopenharmony_ci AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE, 229bf215546Sopenharmony_ci &expired); 230bf215546Sopenharmony_ci if (r) { 231bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_cs_query_fence_status failed.\n"); 232bf215546Sopenharmony_ci return false; 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci if (expired) { 236bf215546Sopenharmony_ci /* This variable can only transition from false to true, so it doesn't 237bf215546Sopenharmony_ci * matter if threads race for it. */ 238bf215546Sopenharmony_ci afence->signalled = true; 239bf215546Sopenharmony_ci return true; 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci return false; 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_cistatic bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws, 245bf215546Sopenharmony_ci struct pipe_fence_handle *fence, 246bf215546Sopenharmony_ci uint64_t timeout) 247bf215546Sopenharmony_ci{ 248bf215546Sopenharmony_ci return amdgpu_fence_wait(fence, timeout, false); 249bf215546Sopenharmony_ci} 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_cistatic struct pipe_fence_handle * 252bf215546Sopenharmony_ciamdgpu_cs_get_next_fence(struct radeon_cmdbuf *rcs) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 255bf215546Sopenharmony_ci struct pipe_fence_handle *fence = NULL; 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci if (cs->noop) 258bf215546Sopenharmony_ci return NULL; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci if (cs->next_fence) { 261bf215546Sopenharmony_ci amdgpu_fence_reference(&fence, cs->next_fence); 262bf215546Sopenharmony_ci return fence; 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci fence = amdgpu_fence_create(cs->ctx, 266bf215546Sopenharmony_ci cs->csc->ib[IB_MAIN].ip_type); 267bf215546Sopenharmony_ci if (!fence) 268bf215546Sopenharmony_ci return NULL; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci amdgpu_fence_reference(&cs->next_fence, fence); 271bf215546Sopenharmony_ci return fence; 272bf215546Sopenharmony_ci} 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci/* CONTEXTS */ 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_cistatic uint32_t 277bf215546Sopenharmony_ciradeon_to_amdgpu_priority(enum radeon_ctx_priority radeon_priority) 278bf215546Sopenharmony_ci{ 279bf215546Sopenharmony_ci switch (radeon_priority) { 280bf215546Sopenharmony_ci case RADEON_CTX_PRIORITY_REALTIME: 281bf215546Sopenharmony_ci return AMDGPU_CTX_PRIORITY_VERY_HIGH; 282bf215546Sopenharmony_ci case RADEON_CTX_PRIORITY_HIGH: 283bf215546Sopenharmony_ci return AMDGPU_CTX_PRIORITY_HIGH; 284bf215546Sopenharmony_ci case RADEON_CTX_PRIORITY_MEDIUM: 285bf215546Sopenharmony_ci return AMDGPU_CTX_PRIORITY_NORMAL; 286bf215546Sopenharmony_ci case RADEON_CTX_PRIORITY_LOW: 287bf215546Sopenharmony_ci return AMDGPU_CTX_PRIORITY_LOW; 288bf215546Sopenharmony_ci default: 289bf215546Sopenharmony_ci unreachable("Invalid context priority"); 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci} 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_cistatic struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws, 294bf215546Sopenharmony_ci enum radeon_ctx_priority priority) 295bf215546Sopenharmony_ci{ 296bf215546Sopenharmony_ci struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx); 297bf215546Sopenharmony_ci int r; 298bf215546Sopenharmony_ci struct amdgpu_bo_alloc_request alloc_buffer = {}; 299bf215546Sopenharmony_ci uint32_t amdgpu_priority = radeon_to_amdgpu_priority(priority); 300bf215546Sopenharmony_ci amdgpu_bo_handle buf_handle; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci if (!ctx) 303bf215546Sopenharmony_ci return NULL; 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci ctx->ws = amdgpu_winsys(ws); 306bf215546Sopenharmony_ci ctx->refcount = 1; 307bf215546Sopenharmony_ci ctx->initial_num_total_rejected_cs = ctx->ws->num_total_rejected_cs; 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci r = amdgpu_cs_ctx_create2(ctx->ws->dev, amdgpu_priority, &ctx->ctx); 310bf215546Sopenharmony_ci if (r) { 311bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create2 failed. (%i)\n", r); 312bf215546Sopenharmony_ci goto error_create; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci alloc_buffer.alloc_size = ctx->ws->info.gart_page_size; 316bf215546Sopenharmony_ci alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size; 317bf215546Sopenharmony_ci alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci r = amdgpu_bo_alloc(ctx->ws->dev, &alloc_buffer, &buf_handle); 320bf215546Sopenharmony_ci if (r) { 321bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_bo_alloc failed. (%i)\n", r); 322bf215546Sopenharmony_ci goto error_user_fence_alloc; 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci r = amdgpu_bo_cpu_map(buf_handle, (void**)&ctx->user_fence_cpu_address_base); 326bf215546Sopenharmony_ci if (r) { 327bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_bo_cpu_map failed. (%i)\n", r); 328bf215546Sopenharmony_ci goto error_user_fence_map; 329bf215546Sopenharmony_ci } 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci memset(ctx->user_fence_cpu_address_base, 0, alloc_buffer.alloc_size); 332bf215546Sopenharmony_ci ctx->user_fence_bo = buf_handle; 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci return (struct radeon_winsys_ctx*)ctx; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_cierror_user_fence_map: 337bf215546Sopenharmony_ci amdgpu_bo_free(buf_handle); 338bf215546Sopenharmony_cierror_user_fence_alloc: 339bf215546Sopenharmony_ci amdgpu_cs_ctx_free(ctx->ctx); 340bf215546Sopenharmony_cierror_create: 341bf215546Sopenharmony_ci FREE(ctx); 342bf215546Sopenharmony_ci return NULL; 343bf215546Sopenharmony_ci} 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_cistatic void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx) 346bf215546Sopenharmony_ci{ 347bf215546Sopenharmony_ci amdgpu_ctx_unref((struct amdgpu_ctx*)rwctx); 348bf215546Sopenharmony_ci} 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_cistatic enum pipe_reset_status 351bf215546Sopenharmony_ciamdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx, bool full_reset_only, 352bf215546Sopenharmony_ci bool *needs_reset) 353bf215546Sopenharmony_ci{ 354bf215546Sopenharmony_ci struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; 355bf215546Sopenharmony_ci int r; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci if (needs_reset) 358bf215546Sopenharmony_ci *needs_reset = false; 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci /* Return a failure due to a GPU hang. */ 361bf215546Sopenharmony_ci if (ctx->ws->info.drm_minor >= 24) { 362bf215546Sopenharmony_ci uint64_t flags; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci if (full_reset_only && 365bf215546Sopenharmony_ci ctx->initial_num_total_rejected_cs == ctx->ws->num_total_rejected_cs) { 366bf215546Sopenharmony_ci /* If the caller is only interested in full reset (= wants to ignore soft 367bf215546Sopenharmony_ci * recoveries), we can use the rejected cs count as a quick first check. 368bf215546Sopenharmony_ci */ 369bf215546Sopenharmony_ci return PIPE_NO_RESET; 370bf215546Sopenharmony_ci } 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci r = amdgpu_cs_query_reset_state2(ctx->ctx, &flags); 373bf215546Sopenharmony_ci if (r) { 374bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r); 375bf215546Sopenharmony_ci return PIPE_NO_RESET; 376bf215546Sopenharmony_ci } 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci if (flags & AMDGPU_CTX_QUERY2_FLAGS_RESET) { 379bf215546Sopenharmony_ci if (needs_reset) 380bf215546Sopenharmony_ci *needs_reset = flags & AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; 381bf215546Sopenharmony_ci if (flags & AMDGPU_CTX_QUERY2_FLAGS_GUILTY) 382bf215546Sopenharmony_ci return PIPE_GUILTY_CONTEXT_RESET; 383bf215546Sopenharmony_ci else 384bf215546Sopenharmony_ci return PIPE_INNOCENT_CONTEXT_RESET; 385bf215546Sopenharmony_ci } 386bf215546Sopenharmony_ci } else { 387bf215546Sopenharmony_ci uint32_t result, hangs; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs); 390bf215546Sopenharmony_ci if (r) { 391bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r); 392bf215546Sopenharmony_ci return PIPE_NO_RESET; 393bf215546Sopenharmony_ci } 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci if (needs_reset) 396bf215546Sopenharmony_ci *needs_reset = true; 397bf215546Sopenharmony_ci switch (result) { 398bf215546Sopenharmony_ci case AMDGPU_CTX_GUILTY_RESET: 399bf215546Sopenharmony_ci return PIPE_GUILTY_CONTEXT_RESET; 400bf215546Sopenharmony_ci case AMDGPU_CTX_INNOCENT_RESET: 401bf215546Sopenharmony_ci return PIPE_INNOCENT_CONTEXT_RESET; 402bf215546Sopenharmony_ci case AMDGPU_CTX_UNKNOWN_RESET: 403bf215546Sopenharmony_ci return PIPE_UNKNOWN_CONTEXT_RESET; 404bf215546Sopenharmony_ci } 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci /* Return a failure due to a rejected command submission. */ 408bf215546Sopenharmony_ci if (ctx->ws->num_total_rejected_cs > ctx->initial_num_total_rejected_cs) { 409bf215546Sopenharmony_ci if (needs_reset) 410bf215546Sopenharmony_ci *needs_reset = true; 411bf215546Sopenharmony_ci return ctx->num_rejected_cs ? PIPE_GUILTY_CONTEXT_RESET : 412bf215546Sopenharmony_ci PIPE_INNOCENT_CONTEXT_RESET; 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci if (needs_reset) 415bf215546Sopenharmony_ci *needs_reset = false; 416bf215546Sopenharmony_ci return PIPE_NO_RESET; 417bf215546Sopenharmony_ci} 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci/* COMMAND SUBMISSION */ 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_cistatic bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) 422bf215546Sopenharmony_ci{ 423bf215546Sopenharmony_ci return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD && 424bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE && 425bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC && 426bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC && 427bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC && 428bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_JPEG; 429bf215546Sopenharmony_ci} 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_cistatic inline unsigned amdgpu_cs_epilog_dws(struct amdgpu_cs *cs) 432bf215546Sopenharmony_ci{ 433bf215546Sopenharmony_ci if (cs->has_chaining) 434bf215546Sopenharmony_ci return 4; /* for chaining */ 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci return 0; 437bf215546Sopenharmony_ci} 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_cistatic int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo, 440bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffers, unsigned num_buffers) 441bf215546Sopenharmony_ci{ 442bf215546Sopenharmony_ci unsigned hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1); 443bf215546Sopenharmony_ci int i = cs->buffer_indices_hashlist[hash]; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci /* not found or found */ 446bf215546Sopenharmony_ci if (i < 0 || (i < num_buffers && buffers[i].bo == bo)) 447bf215546Sopenharmony_ci return i; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* Hash collision, look for the BO in the list of buffers linearly. */ 450bf215546Sopenharmony_ci for (int i = num_buffers - 1; i >= 0; i--) { 451bf215546Sopenharmony_ci if (buffers[i].bo == bo) { 452bf215546Sopenharmony_ci /* Put this buffer in the hash list. 453bf215546Sopenharmony_ci * This will prevent additional hash collisions if there are 454bf215546Sopenharmony_ci * several consecutive lookup_buffer calls for the same buffer. 455bf215546Sopenharmony_ci * 456bf215546Sopenharmony_ci * Example: Assuming buffers A,B,C collide in the hash list, 457bf215546Sopenharmony_ci * the following sequence of buffers: 458bf215546Sopenharmony_ci * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC 459bf215546Sopenharmony_ci * will collide here: ^ and here: ^, 460bf215546Sopenharmony_ci * meaning that we should get very few collisions in the end. */ 461bf215546Sopenharmony_ci cs->buffer_indices_hashlist[hash] = i & 0x7fff; 462bf215546Sopenharmony_ci return i; 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci } 465bf215546Sopenharmony_ci return -1; 466bf215546Sopenharmony_ci} 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ciint amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo) 469bf215546Sopenharmony_ci{ 470bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffers; 471bf215546Sopenharmony_ci int num_buffers; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci if (bo->bo) { 474bf215546Sopenharmony_ci buffers = cs->real_buffers; 475bf215546Sopenharmony_ci num_buffers = cs->num_real_buffers; 476bf215546Sopenharmony_ci } else if (!(bo->base.usage & RADEON_FLAG_SPARSE)) { 477bf215546Sopenharmony_ci buffers = cs->slab_buffers; 478bf215546Sopenharmony_ci num_buffers = cs->num_slab_buffers; 479bf215546Sopenharmony_ci } else { 480bf215546Sopenharmony_ci buffers = cs->sparse_buffers; 481bf215546Sopenharmony_ci num_buffers = cs->num_sparse_buffers; 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci return amdgpu_lookup_buffer(cs, bo, buffers, num_buffers); 485bf215546Sopenharmony_ci} 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_cistatic int 488bf215546Sopenharmony_ciamdgpu_do_add_real_buffer(struct amdgpu_cs_context *cs, 489bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo) 490bf215546Sopenharmony_ci{ 491bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer; 492bf215546Sopenharmony_ci int idx; 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci /* New buffer, check if the backing array is large enough. */ 495bf215546Sopenharmony_ci if (cs->num_real_buffers >= cs->max_real_buffers) { 496bf215546Sopenharmony_ci unsigned new_max = 497bf215546Sopenharmony_ci MAX2(cs->max_real_buffers + 16, (unsigned)(cs->max_real_buffers * 1.3)); 498bf215546Sopenharmony_ci struct amdgpu_cs_buffer *new_buffers; 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci new_buffers = MALLOC(new_max * sizeof(*new_buffers)); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (!new_buffers) { 503bf215546Sopenharmony_ci fprintf(stderr, "amdgpu_do_add_buffer: allocation failed\n"); 504bf215546Sopenharmony_ci FREE(new_buffers); 505bf215546Sopenharmony_ci return -1; 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci memcpy(new_buffers, cs->real_buffers, cs->num_real_buffers * sizeof(*new_buffers)); 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci FREE(cs->real_buffers); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci cs->max_real_buffers = new_max; 513bf215546Sopenharmony_ci cs->real_buffers = new_buffers; 514bf215546Sopenharmony_ci } 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci idx = cs->num_real_buffers; 517bf215546Sopenharmony_ci buffer = &cs->real_buffers[idx]; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci memset(buffer, 0, sizeof(*buffer)); 520bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo); 521bf215546Sopenharmony_ci cs->num_real_buffers++; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci return idx; 524bf215546Sopenharmony_ci} 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_cistatic int 527bf215546Sopenharmony_ciamdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs_context *cs, 528bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci unsigned hash; 531bf215546Sopenharmony_ci int idx = amdgpu_lookup_buffer(cs, bo, cs->real_buffers, cs->num_real_buffers); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci if (idx >= 0) 534bf215546Sopenharmony_ci return idx; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci idx = amdgpu_do_add_real_buffer(cs, bo); 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1); 539bf215546Sopenharmony_ci cs->buffer_indices_hashlist[hash] = idx & 0x7fff; 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM) 542bf215546Sopenharmony_ci rcs->used_vram_kb += bo->base.size / 1024; 543bf215546Sopenharmony_ci else if (bo->base.placement & RADEON_DOMAIN_GTT) 544bf215546Sopenharmony_ci rcs->used_gart_kb += bo->base.size / 1024; 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci return idx; 547bf215546Sopenharmony_ci} 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_cistatic int amdgpu_lookup_or_add_slab_buffer(struct radeon_cmdbuf *rcs, 550bf215546Sopenharmony_ci struct amdgpu_cs_context *cs, 551bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo) 552bf215546Sopenharmony_ci{ 553bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer; 554bf215546Sopenharmony_ci unsigned hash; 555bf215546Sopenharmony_ci int idx = amdgpu_lookup_buffer(cs, bo, cs->slab_buffers, cs->num_slab_buffers); 556bf215546Sopenharmony_ci int real_idx; 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci if (idx >= 0) 559bf215546Sopenharmony_ci return idx; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci real_idx = amdgpu_lookup_or_add_real_buffer(rcs, cs, bo->u.slab.real); 562bf215546Sopenharmony_ci if (real_idx < 0) 563bf215546Sopenharmony_ci return -1; 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci /* New buffer, check if the backing array is large enough. */ 566bf215546Sopenharmony_ci if (cs->num_slab_buffers >= cs->max_slab_buffers) { 567bf215546Sopenharmony_ci unsigned new_max = 568bf215546Sopenharmony_ci MAX2(cs->max_slab_buffers + 16, (unsigned)(cs->max_slab_buffers * 1.3)); 569bf215546Sopenharmony_ci struct amdgpu_cs_buffer *new_buffers; 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci new_buffers = REALLOC(cs->slab_buffers, 572bf215546Sopenharmony_ci cs->max_slab_buffers * sizeof(*new_buffers), 573bf215546Sopenharmony_ci new_max * sizeof(*new_buffers)); 574bf215546Sopenharmony_ci if (!new_buffers) { 575bf215546Sopenharmony_ci fprintf(stderr, "amdgpu_lookup_or_add_slab_buffer: allocation failed\n"); 576bf215546Sopenharmony_ci return -1; 577bf215546Sopenharmony_ci } 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci cs->max_slab_buffers = new_max; 580bf215546Sopenharmony_ci cs->slab_buffers = new_buffers; 581bf215546Sopenharmony_ci } 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci idx = cs->num_slab_buffers; 584bf215546Sopenharmony_ci buffer = &cs->slab_buffers[idx]; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci memset(buffer, 0, sizeof(*buffer)); 587bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo); 588bf215546Sopenharmony_ci buffer->slab_real_idx = real_idx; 589bf215546Sopenharmony_ci cs->num_slab_buffers++; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1); 592bf215546Sopenharmony_ci cs->buffer_indices_hashlist[hash] = idx & 0x7fff; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci return idx; 595bf215546Sopenharmony_ci} 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_cistatic int amdgpu_lookup_or_add_sparse_buffer(struct radeon_cmdbuf *rcs, 598bf215546Sopenharmony_ci struct amdgpu_cs_context *cs, 599bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo) 600bf215546Sopenharmony_ci{ 601bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer; 602bf215546Sopenharmony_ci unsigned hash; 603bf215546Sopenharmony_ci int idx = amdgpu_lookup_buffer(cs, bo, cs->sparse_buffers, cs->num_sparse_buffers); 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci if (idx >= 0) 606bf215546Sopenharmony_ci return idx; 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci /* New buffer, check if the backing array is large enough. */ 609bf215546Sopenharmony_ci if (cs->num_sparse_buffers >= cs->max_sparse_buffers) { 610bf215546Sopenharmony_ci unsigned new_max = 611bf215546Sopenharmony_ci MAX2(cs->max_sparse_buffers + 16, (unsigned)(cs->max_sparse_buffers * 1.3)); 612bf215546Sopenharmony_ci struct amdgpu_cs_buffer *new_buffers; 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci new_buffers = REALLOC(cs->sparse_buffers, 615bf215546Sopenharmony_ci cs->max_sparse_buffers * sizeof(*new_buffers), 616bf215546Sopenharmony_ci new_max * sizeof(*new_buffers)); 617bf215546Sopenharmony_ci if (!new_buffers) { 618bf215546Sopenharmony_ci fprintf(stderr, "amdgpu_lookup_or_add_sparse_buffer: allocation failed\n"); 619bf215546Sopenharmony_ci return -1; 620bf215546Sopenharmony_ci } 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci cs->max_sparse_buffers = new_max; 623bf215546Sopenharmony_ci cs->sparse_buffers = new_buffers; 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci idx = cs->num_sparse_buffers; 627bf215546Sopenharmony_ci buffer = &cs->sparse_buffers[idx]; 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_ci memset(buffer, 0, sizeof(*buffer)); 630bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo); 631bf215546Sopenharmony_ci cs->num_sparse_buffers++; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1); 634bf215546Sopenharmony_ci cs->buffer_indices_hashlist[hash] = idx & 0x7fff; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci /* We delay adding the backing buffers until we really have to. However, 637bf215546Sopenharmony_ci * we cannot delay accounting for memory use. 638bf215546Sopenharmony_ci */ 639bf215546Sopenharmony_ci simple_mtx_lock(&bo->lock); 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { 642bf215546Sopenharmony_ci if (bo->base.placement & RADEON_DOMAIN_VRAM) 643bf215546Sopenharmony_ci rcs->used_vram_kb += backing->bo->base.size / 1024; 644bf215546Sopenharmony_ci else if (bo->base.placement & RADEON_DOMAIN_GTT) 645bf215546Sopenharmony_ci rcs->used_gart_kb += backing->bo->base.size / 1024; 646bf215546Sopenharmony_ci } 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci simple_mtx_unlock(&bo->lock); 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci return idx; 651bf215546Sopenharmony_ci} 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_cistatic unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs, 654bf215546Sopenharmony_ci struct pb_buffer *buf, 655bf215546Sopenharmony_ci unsigned usage, 656bf215546Sopenharmony_ci enum radeon_bo_domain domains) 657bf215546Sopenharmony_ci{ 658bf215546Sopenharmony_ci /* Don't use the "domains" parameter. Amdgpu doesn't support changing 659bf215546Sopenharmony_ci * the buffer placement during command submission. 660bf215546Sopenharmony_ci */ 661bf215546Sopenharmony_ci struct amdgpu_cs_context *cs = (struct amdgpu_cs_context*)rcs->csc; 662bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 663bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer; 664bf215546Sopenharmony_ci int index; 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci /* Fast exit for no-op calls. 667bf215546Sopenharmony_ci * This is very effective with suballocators and linear uploaders that 668bf215546Sopenharmony_ci * are outside of the winsys. 669bf215546Sopenharmony_ci */ 670bf215546Sopenharmony_ci if (bo == cs->last_added_bo && 671bf215546Sopenharmony_ci (usage & cs->last_added_bo_usage) == usage) 672bf215546Sopenharmony_ci return cs->last_added_bo_index; 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci if (!(bo->base.usage & RADEON_FLAG_SPARSE)) { 675bf215546Sopenharmony_ci if (!bo->bo) { 676bf215546Sopenharmony_ci index = amdgpu_lookup_or_add_slab_buffer(rcs, cs, bo); 677bf215546Sopenharmony_ci if (index < 0) 678bf215546Sopenharmony_ci return 0; 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci buffer = &cs->slab_buffers[index]; 681bf215546Sopenharmony_ci buffer->usage |= usage; 682bf215546Sopenharmony_ci cs->last_added_bo_usage = buffer->usage; 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci index = buffer->slab_real_idx; 685bf215546Sopenharmony_ci buffer = &cs->real_buffers[index]; 686bf215546Sopenharmony_ci buffer->usage |= usage & ~RADEON_USAGE_SYNCHRONIZED; 687bf215546Sopenharmony_ci } else { 688bf215546Sopenharmony_ci index = amdgpu_lookup_or_add_real_buffer(rcs, cs, bo); 689bf215546Sopenharmony_ci if (index < 0) 690bf215546Sopenharmony_ci return 0; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci buffer = &cs->real_buffers[index]; 693bf215546Sopenharmony_ci buffer->usage |= usage; 694bf215546Sopenharmony_ci cs->last_added_bo_usage = buffer->usage; 695bf215546Sopenharmony_ci } 696bf215546Sopenharmony_ci } else { 697bf215546Sopenharmony_ci index = amdgpu_lookup_or_add_sparse_buffer(rcs, cs, bo); 698bf215546Sopenharmony_ci if (index < 0) 699bf215546Sopenharmony_ci return 0; 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci buffer = &cs->sparse_buffers[index]; 702bf215546Sopenharmony_ci buffer->usage |= usage; 703bf215546Sopenharmony_ci cs->last_added_bo_usage = buffer->usage; 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci cs->last_added_bo = bo; 707bf215546Sopenharmony_ci cs->last_added_bo_index = index; 708bf215546Sopenharmony_ci return index; 709bf215546Sopenharmony_ci} 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_cistatic bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, 712bf215546Sopenharmony_ci struct amdgpu_ib *ib, 713bf215546Sopenharmony_ci struct amdgpu_cs *cs) 714bf215546Sopenharmony_ci{ 715bf215546Sopenharmony_ci struct pb_buffer *pb; 716bf215546Sopenharmony_ci uint8_t *mapped; 717bf215546Sopenharmony_ci unsigned buffer_size; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci /* Always create a buffer that is at least as large as the maximum seen IB 720bf215546Sopenharmony_ci * size, aligned to a power of two (and multiplied by 4 to reduce internal 721bf215546Sopenharmony_ci * fragmentation if chaining is not available). Limit to 512k dwords, which 722bf215546Sopenharmony_ci * is the largest power of two that fits into the size field of the 723bf215546Sopenharmony_ci * INDIRECT_BUFFER packet. 724bf215546Sopenharmony_ci */ 725bf215546Sopenharmony_ci if (cs->has_chaining) 726bf215546Sopenharmony_ci buffer_size = 4 * util_next_power_of_two(ib->max_ib_size); 727bf215546Sopenharmony_ci else 728bf215546Sopenharmony_ci buffer_size = 4 * util_next_power_of_two(4 * ib->max_ib_size); 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci const unsigned min_size = MAX2(ib->max_check_space_size, 8 * 1024 * 4); 731bf215546Sopenharmony_ci const unsigned max_size = 512 * 1024 * 4; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci buffer_size = MIN2(buffer_size, max_size); 734bf215546Sopenharmony_ci buffer_size = MAX2(buffer_size, min_size); /* min_size is more important */ 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci enum radeon_bo_domain domain; 737bf215546Sopenharmony_ci unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING; 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci if (cs->ip_type == AMD_IP_GFX || 740bf215546Sopenharmony_ci cs->ip_type == AMD_IP_COMPUTE || 741bf215546Sopenharmony_ci cs->ip_type == AMD_IP_SDMA) { 742bf215546Sopenharmony_ci domain = ws->info.smart_access_memory ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT; 743bf215546Sopenharmony_ci flags |= RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC; 744bf215546Sopenharmony_ci } else { 745bf215546Sopenharmony_ci /* UVD/VCE */ 746bf215546Sopenharmony_ci /* TODO: validate that UVD/VCE don't read from IBs and enable WC or even VRAM. */ 747bf215546Sopenharmony_ci domain = RADEON_DOMAIN_GTT; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci pb = amdgpu_bo_create(ws, buffer_size, 751bf215546Sopenharmony_ci ws->info.gart_page_size, 752bf215546Sopenharmony_ci domain, flags); 753bf215546Sopenharmony_ci if (!pb) 754bf215546Sopenharmony_ci return false; 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci mapped = amdgpu_bo_map(&ws->dummy_ws.base, pb, NULL, PIPE_MAP_WRITE); 757bf215546Sopenharmony_ci if (!mapped) { 758bf215546Sopenharmony_ci radeon_bo_reference(&ws->dummy_ws.base, &pb, NULL); 759bf215546Sopenharmony_ci return false; 760bf215546Sopenharmony_ci } 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_ci radeon_bo_reference(&ws->dummy_ws.base, &ib->big_ib_buffer, pb); 763bf215546Sopenharmony_ci radeon_bo_reference(&ws->dummy_ws.base, &pb, NULL); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci ib->ib_mapped = mapped; 766bf215546Sopenharmony_ci ib->used_ib_space = 0; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci return true; 769bf215546Sopenharmony_ci} 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_cistatic bool amdgpu_get_new_ib(struct amdgpu_winsys *ws, 772bf215546Sopenharmony_ci struct radeon_cmdbuf *rcs, 773bf215546Sopenharmony_ci struct amdgpu_ib *ib, 774bf215546Sopenharmony_ci struct amdgpu_cs *cs) 775bf215546Sopenharmony_ci{ 776bf215546Sopenharmony_ci /* Small IBs are better than big IBs, because the GPU goes idle quicker 777bf215546Sopenharmony_ci * and there is less waiting for buffers and fences. Proof: 778bf215546Sopenharmony_ci * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 779bf215546Sopenharmony_ci */ 780bf215546Sopenharmony_ci struct drm_amdgpu_cs_chunk_ib *info = &cs->csc->ib[ib->ib_type]; 781bf215546Sopenharmony_ci /* This is the minimum size of a contiguous IB. */ 782bf215546Sopenharmony_ci unsigned ib_size = 4 * 1024 * 4; 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci /* Always allocate at least the size of the biggest cs_check_space call, 785bf215546Sopenharmony_ci * because precisely the last call might have requested this size. 786bf215546Sopenharmony_ci */ 787bf215546Sopenharmony_ci ib_size = MAX2(ib_size, ib->max_check_space_size); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci if (!cs->has_chaining) { 790bf215546Sopenharmony_ci ib_size = MAX2(ib_size, 791bf215546Sopenharmony_ci 4 * MIN2(util_next_power_of_two(ib->max_ib_size), 792bf215546Sopenharmony_ci IB_MAX_SUBMIT_DWORDS)); 793bf215546Sopenharmony_ci } 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32; 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci rcs->prev_dw = 0; 798bf215546Sopenharmony_ci rcs->num_prev = 0; 799bf215546Sopenharmony_ci rcs->current.cdw = 0; 800bf215546Sopenharmony_ci rcs->current.buf = NULL; 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci /* Allocate a new buffer for IBs if the current buffer is all used. */ 803bf215546Sopenharmony_ci if (!ib->big_ib_buffer || 804bf215546Sopenharmony_ci ib->used_ib_space + ib_size > ib->big_ib_buffer->size) { 805bf215546Sopenharmony_ci if (!amdgpu_ib_new_buffer(ws, ib, cs)) 806bf215546Sopenharmony_ci return false; 807bf215546Sopenharmony_ci } 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci info->va_start = amdgpu_winsys_bo(ib->big_ib_buffer)->va + ib->used_ib_space; 810bf215546Sopenharmony_ci info->ib_bytes = 0; 811bf215546Sopenharmony_ci /* ib_bytes is in dwords and the conversion to bytes will be done before 812bf215546Sopenharmony_ci * the CS ioctl. */ 813bf215546Sopenharmony_ci ib->ptr_ib_size = &info->ib_bytes; 814bf215546Sopenharmony_ci ib->ptr_ib_size_inside_ib = false; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci amdgpu_cs_add_buffer(cs->main.rcs, ib->big_ib_buffer, 817bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_IB, 0); 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci rcs->current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci if (ib->ib_type == IB_MAIN) 822bf215546Sopenharmony_ci cs->csc->ib_main_addr = rcs->current.buf; 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci ib_size = ib->big_ib_buffer->size - ib->used_ib_space; 825bf215546Sopenharmony_ci rcs->current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs); 826bf215546Sopenharmony_ci rcs->gpu_address = info->va_start; 827bf215546Sopenharmony_ci return true; 828bf215546Sopenharmony_ci} 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_cistatic void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib) 831bf215546Sopenharmony_ci{ 832bf215546Sopenharmony_ci if (ib->ptr_ib_size_inside_ib) { 833bf215546Sopenharmony_ci *ib->ptr_ib_size = rcs->current.cdw | 834bf215546Sopenharmony_ci S_3F2_CHAIN(1) | S_3F2_VALID(1); 835bf215546Sopenharmony_ci } else { 836bf215546Sopenharmony_ci *ib->ptr_ib_size = rcs->current.cdw; 837bf215546Sopenharmony_ci } 838bf215546Sopenharmony_ci} 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_cistatic void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *rcs, 841bf215546Sopenharmony_ci struct amdgpu_ib *ib) 842bf215546Sopenharmony_ci{ 843bf215546Sopenharmony_ci amdgpu_set_ib_size(rcs, ib); 844bf215546Sopenharmony_ci ib->used_ib_space += rcs->current.cdw * 4; 845bf215546Sopenharmony_ci ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_alignment); 846bf215546Sopenharmony_ci ib->max_ib_size = MAX2(ib->max_ib_size, rcs->prev_dw + rcs->current.cdw); 847bf215546Sopenharmony_ci} 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_cistatic bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, 850bf215546Sopenharmony_ci struct amdgpu_cs_context *cs, 851bf215546Sopenharmony_ci enum amd_ip_type ip_type) 852bf215546Sopenharmony_ci{ 853bf215546Sopenharmony_ci switch (ip_type) { 854bf215546Sopenharmony_ci case AMD_IP_SDMA: 855bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA; 856bf215546Sopenharmony_ci break; 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci case AMD_IP_UVD: 859bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD; 860bf215546Sopenharmony_ci break; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci case AMD_IP_UVD_ENC: 863bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC; 864bf215546Sopenharmony_ci break; 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci case AMD_IP_VCE: 867bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE; 868bf215546Sopenharmony_ci break; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci case AMD_IP_VCN_DEC: 871bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC; 872bf215546Sopenharmony_ci break; 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci case AMD_IP_VCN_ENC: 875bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC; 876bf215546Sopenharmony_ci break; 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_ci case AMD_IP_VCN_JPEG: 879bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_JPEG; 880bf215546Sopenharmony_ci break; 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci case AMD_IP_COMPUTE: 883bf215546Sopenharmony_ci case AMD_IP_GFX: 884bf215546Sopenharmony_ci cs->ib[IB_MAIN].ip_type = ip_type == AMD_IP_GFX ? AMDGPU_HW_IP_GFX : 885bf215546Sopenharmony_ci AMDGPU_HW_IP_COMPUTE; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci /* The kernel shouldn't invalidate L2 and vL1. The proper place for cache 888bf215546Sopenharmony_ci * invalidation is the beginning of IBs (the previous commit does that), 889bf215546Sopenharmony_ci * because completion of an IB doesn't care about the state of GPU caches, 890bf215546Sopenharmony_ci * but the beginning of an IB does. Draw calls from multiple IBs can be 891bf215546Sopenharmony_ci * executed in parallel, so draw calls from the current IB can finish after 892bf215546Sopenharmony_ci * the next IB starts drawing, and so the cache flush at the end of IB 893bf215546Sopenharmony_ci * is always late. 894bf215546Sopenharmony_ci */ 895bf215546Sopenharmony_ci if (ws->info.drm_minor >= 26) { 896bf215546Sopenharmony_ci cs->ib[IB_PREAMBLE].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE; 897bf215546Sopenharmony_ci cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE; 898bf215546Sopenharmony_ci } 899bf215546Sopenharmony_ci break; 900bf215546Sopenharmony_ci 901bf215546Sopenharmony_ci default: 902bf215546Sopenharmony_ci assert(0); 903bf215546Sopenharmony_ci } 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAG_PREAMBLE; 906bf215546Sopenharmony_ci cs->ib[IB_PREAMBLE].ip_type = cs->ib[IB_MAIN].ip_type; 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci cs->last_added_bo = NULL; 909bf215546Sopenharmony_ci return true; 910bf215546Sopenharmony_ci} 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_cistatic void cleanup_fence_list(struct amdgpu_fence_list *fences) 913bf215546Sopenharmony_ci{ 914bf215546Sopenharmony_ci for (unsigned i = 0; i < fences->num; i++) 915bf215546Sopenharmony_ci amdgpu_fence_reference(&fences->list[i], NULL); 916bf215546Sopenharmony_ci fences->num = 0; 917bf215546Sopenharmony_ci} 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_cistatic void amdgpu_cs_context_cleanup(struct amdgpu_winsys *ws, struct amdgpu_cs_context *cs) 920bf215546Sopenharmony_ci{ 921bf215546Sopenharmony_ci unsigned i; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci for (i = 0; i < cs->num_real_buffers; i++) { 924bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(ws, &cs->real_buffers[i].bo, NULL); 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci for (i = 0; i < cs->num_slab_buffers; i++) { 927bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(ws, &cs->slab_buffers[i].bo, NULL); 928bf215546Sopenharmony_ci } 929bf215546Sopenharmony_ci for (i = 0; i < cs->num_sparse_buffers; i++) { 930bf215546Sopenharmony_ci amdgpu_winsys_bo_reference(ws, &cs->sparse_buffers[i].bo, NULL); 931bf215546Sopenharmony_ci } 932bf215546Sopenharmony_ci cleanup_fence_list(&cs->fence_dependencies); 933bf215546Sopenharmony_ci cleanup_fence_list(&cs->syncobj_dependencies); 934bf215546Sopenharmony_ci cleanup_fence_list(&cs->syncobj_to_signal); 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci cs->num_real_buffers = 0; 937bf215546Sopenharmony_ci cs->num_slab_buffers = 0; 938bf215546Sopenharmony_ci cs->num_sparse_buffers = 0; 939bf215546Sopenharmony_ci amdgpu_fence_reference(&cs->fence, NULL); 940bf215546Sopenharmony_ci cs->last_added_bo = NULL; 941bf215546Sopenharmony_ci} 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_cistatic void amdgpu_destroy_cs_context(struct amdgpu_winsys *ws, struct amdgpu_cs_context *cs) 944bf215546Sopenharmony_ci{ 945bf215546Sopenharmony_ci amdgpu_cs_context_cleanup(ws, cs); 946bf215546Sopenharmony_ci FREE(cs->real_buffers); 947bf215546Sopenharmony_ci FREE(cs->slab_buffers); 948bf215546Sopenharmony_ci FREE(cs->sparse_buffers); 949bf215546Sopenharmony_ci FREE(cs->fence_dependencies.list); 950bf215546Sopenharmony_ci FREE(cs->syncobj_dependencies.list); 951bf215546Sopenharmony_ci FREE(cs->syncobj_to_signal.list); 952bf215546Sopenharmony_ci} 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_cistatic bool 956bf215546Sopenharmony_ciamdgpu_cs_create(struct radeon_cmdbuf *rcs, 957bf215546Sopenharmony_ci struct radeon_winsys_ctx *rwctx, 958bf215546Sopenharmony_ci enum amd_ip_type ip_type, 959bf215546Sopenharmony_ci void (*flush)(void *ctx, unsigned flags, 960bf215546Sopenharmony_ci struct pipe_fence_handle **fence), 961bf215546Sopenharmony_ci void *flush_ctx, 962bf215546Sopenharmony_ci bool stop_exec_on_failure) 963bf215546Sopenharmony_ci{ 964bf215546Sopenharmony_ci struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; 965bf215546Sopenharmony_ci struct amdgpu_cs *cs; 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci cs = CALLOC_STRUCT(amdgpu_cs); 968bf215546Sopenharmony_ci if (!cs) { 969bf215546Sopenharmony_ci return false; 970bf215546Sopenharmony_ci } 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci util_queue_fence_init(&cs->flush_completed); 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci cs->ws = ctx->ws; 975bf215546Sopenharmony_ci cs->ctx = ctx; 976bf215546Sopenharmony_ci cs->flush_cs = flush; 977bf215546Sopenharmony_ci cs->flush_data = flush_ctx; 978bf215546Sopenharmony_ci cs->ip_type = ip_type; 979bf215546Sopenharmony_ci cs->stop_exec_on_failure = stop_exec_on_failure; 980bf215546Sopenharmony_ci cs->noop = ctx->ws->noop_cs; 981bf215546Sopenharmony_ci cs->has_chaining = ctx->ws->info.gfx_level >= GFX7 && 982bf215546Sopenharmony_ci (ip_type == AMD_IP_GFX || ip_type == AMD_IP_COMPUTE); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci struct amdgpu_cs_fence_info fence_info; 985bf215546Sopenharmony_ci fence_info.handle = cs->ctx->user_fence_bo; 986bf215546Sopenharmony_ci fence_info.offset = cs->ip_type * 4; 987bf215546Sopenharmony_ci amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk); 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci cs->main.ib_type = IB_MAIN; 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ip_type)) { 992bf215546Sopenharmony_ci FREE(cs); 993bf215546Sopenharmony_ci return false; 994bf215546Sopenharmony_ci } 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci if (!amdgpu_init_cs_context(ctx->ws, &cs->csc2, ip_type)) { 997bf215546Sopenharmony_ci amdgpu_destroy_cs_context(ctx->ws, &cs->csc1); 998bf215546Sopenharmony_ci FREE(cs); 999bf215546Sopenharmony_ci return false; 1000bf215546Sopenharmony_ci } 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci /* Set the first submission context as current. */ 1005bf215546Sopenharmony_ci rcs->csc = cs->csc = &cs->csc1; 1006bf215546Sopenharmony_ci cs->cst = &cs->csc2; 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci /* Assign to both amdgpu_cs_context; only csc will use it. */ 1009bf215546Sopenharmony_ci cs->csc1.buffer_indices_hashlist = cs->buffer_indices_hashlist; 1010bf215546Sopenharmony_ci cs->csc2.buffer_indices_hashlist = cs->buffer_indices_hashlist; 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci cs->csc1.ws = ctx->ws; 1013bf215546Sopenharmony_ci cs->csc2.ws = ctx->ws; 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci cs->main.rcs = rcs; 1016bf215546Sopenharmony_ci rcs->priv = cs; 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ci if (!amdgpu_get_new_ib(ctx->ws, rcs, &cs->main, cs)) { 1019bf215546Sopenharmony_ci amdgpu_destroy_cs_context(ctx->ws, &cs->csc2); 1020bf215546Sopenharmony_ci amdgpu_destroy_cs_context(ctx->ws, &cs->csc1); 1021bf215546Sopenharmony_ci FREE(cs); 1022bf215546Sopenharmony_ci rcs->priv = NULL; 1023bf215546Sopenharmony_ci return false; 1024bf215546Sopenharmony_ci } 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_ci p_atomic_inc(&ctx->ws->num_cs); 1027bf215546Sopenharmony_ci return true; 1028bf215546Sopenharmony_ci} 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_cistatic void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib, 1031bf215546Sopenharmony_ci unsigned preamble_num_dw, bool preamble_changed) 1032bf215546Sopenharmony_ci{ 1033bf215546Sopenharmony_ci /* TODO: implement this properly */ 1034bf215546Sopenharmony_ci radeon_emit_array(cs, preamble_ib, preamble_num_dw); 1035bf215546Sopenharmony_ci} 1036bf215546Sopenharmony_ci 1037bf215546Sopenharmony_cistatic bool 1038bf215546Sopenharmony_ciamdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib, 1039bf215546Sopenharmony_ci unsigned preamble_num_dw) 1040bf215546Sopenharmony_ci{ 1041bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 1042bf215546Sopenharmony_ci struct amdgpu_winsys *ws = cs->ws; 1043bf215546Sopenharmony_ci struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2}; 1044bf215546Sopenharmony_ci unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment); 1045bf215546Sopenharmony_ci struct pb_buffer *preamble_bo; 1046bf215546Sopenharmony_ci uint32_t *map; 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci /* Create the preamble IB buffer. */ 1049bf215546Sopenharmony_ci preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment, 1050bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM, 1051bf215546Sopenharmony_ci RADEON_FLAG_NO_INTERPROCESS_SHARING | 1052bf215546Sopenharmony_ci RADEON_FLAG_GTT_WC | 1053bf215546Sopenharmony_ci RADEON_FLAG_READ_ONLY); 1054bf215546Sopenharmony_ci if (!preamble_bo) 1055bf215546Sopenharmony_ci return false; 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci map = (uint32_t*)amdgpu_bo_map(&ws->dummy_ws.base, preamble_bo, NULL, 1058bf215546Sopenharmony_ci PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); 1059bf215546Sopenharmony_ci if (!map) { 1060bf215546Sopenharmony_ci radeon_bo_reference(&ws->dummy_ws.base, &preamble_bo, NULL); 1061bf215546Sopenharmony_ci return false; 1062bf215546Sopenharmony_ci } 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci /* Upload the preamble IB. */ 1065bf215546Sopenharmony_ci memcpy(map, preamble_ib, preamble_num_dw * 4); 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci /* Pad the IB. */ 1068bf215546Sopenharmony_ci uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type]; 1069bf215546Sopenharmony_ci while (preamble_num_dw & ib_pad_dw_mask) 1070bf215546Sopenharmony_ci map[preamble_num_dw++] = PKT3_NOP_PAD; 1071bf215546Sopenharmony_ci amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1074bf215546Sopenharmony_ci csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va; 1075bf215546Sopenharmony_ci csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4; 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT; 1078bf215546Sopenharmony_ci } 1079bf215546Sopenharmony_ci 1080bf215546Sopenharmony_ci assert(!cs->preamble_ib_bo); 1081bf215546Sopenharmony_ci cs->preamble_ib_bo = preamble_bo; 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, 1084bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_IB, 0); 1085bf215546Sopenharmony_ci return true; 1086bf215546Sopenharmony_ci} 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_cistatic bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs) 1089bf215546Sopenharmony_ci{ 1090bf215546Sopenharmony_ci return true; 1091bf215546Sopenharmony_ci} 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_cistatic bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw) 1094bf215546Sopenharmony_ci{ 1095bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 1096bf215546Sopenharmony_ci struct amdgpu_ib *ib = &cs->main; 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci assert(rcs->current.cdw <= rcs->current.max_dw); 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_ci /* 125% of the size for IB epilog. */ 1101bf215546Sopenharmony_ci unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci if (requested_size > IB_MAX_SUBMIT_DWORDS) 1104bf215546Sopenharmony_ci return false; 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci if (rcs->current.max_dw - rcs->current.cdw >= dw) 1107bf215546Sopenharmony_ci return true; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs); 1110bf215546Sopenharmony_ci unsigned need_byte_size = (dw + cs_epilog_dw) * 4; 1111bf215546Sopenharmony_ci unsigned safe_byte_size = need_byte_size + need_byte_size / 4; 1112bf215546Sopenharmony_ci ib->max_check_space_size = MAX2(ib->max_check_space_size, 1113bf215546Sopenharmony_ci safe_byte_size); 1114bf215546Sopenharmony_ci ib->max_ib_size = MAX2(ib->max_ib_size, requested_size); 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci if (!cs->has_chaining) 1117bf215546Sopenharmony_ci return false; 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci /* Allocate a new chunk */ 1120bf215546Sopenharmony_ci if (rcs->num_prev >= rcs->max_prev) { 1121bf215546Sopenharmony_ci unsigned new_max_prev = MAX2(1, 2 * rcs->max_prev); 1122bf215546Sopenharmony_ci struct radeon_cmdbuf_chunk *new_prev; 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci new_prev = REALLOC(rcs->prev, 1125bf215546Sopenharmony_ci sizeof(*new_prev) * rcs->max_prev, 1126bf215546Sopenharmony_ci sizeof(*new_prev) * new_max_prev); 1127bf215546Sopenharmony_ci if (!new_prev) 1128bf215546Sopenharmony_ci return false; 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci rcs->prev = new_prev; 1131bf215546Sopenharmony_ci rcs->max_prev = new_max_prev; 1132bf215546Sopenharmony_ci } 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci if (!amdgpu_ib_new_buffer(cs->ws, ib, cs)) 1135bf215546Sopenharmony_ci return false; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci assert(ib->used_ib_space == 0); 1138bf215546Sopenharmony_ci uint64_t va = amdgpu_winsys_bo(ib->big_ib_buffer)->va; 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci /* This space was originally reserved. */ 1141bf215546Sopenharmony_ci rcs->current.max_dw += cs_epilog_dw; 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci /* Pad with NOPs but leave 4 dwords for INDIRECT_BUFFER. */ 1144bf215546Sopenharmony_ci uint32_t ib_pad_dw_mask = cs->ws->info.ib_pad_dw_mask[cs->ip_type]; 1145bf215546Sopenharmony_ci while ((rcs->current.cdw & ib_pad_dw_mask) != ib_pad_dw_mask - 3) 1146bf215546Sopenharmony_ci radeon_emit(rcs, PKT3_NOP_PAD); 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci radeon_emit(rcs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 1149bf215546Sopenharmony_ci radeon_emit(rcs, va); 1150bf215546Sopenharmony_ci radeon_emit(rcs, va >> 32); 1151bf215546Sopenharmony_ci uint32_t *new_ptr_ib_size = &rcs->current.buf[rcs->current.cdw++]; 1152bf215546Sopenharmony_ci assert((rcs->current.cdw & ib_pad_dw_mask) == 0); 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ci assert((rcs->current.cdw & 7) == 0); 1155bf215546Sopenharmony_ci assert(rcs->current.cdw <= rcs->current.max_dw); 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci amdgpu_set_ib_size(rcs, ib); 1158bf215546Sopenharmony_ci ib->ptr_ib_size = new_ptr_ib_size; 1159bf215546Sopenharmony_ci ib->ptr_ib_size_inside_ib = true; 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci /* Hook up the new chunk */ 1162bf215546Sopenharmony_ci rcs->prev[rcs->num_prev].buf = rcs->current.buf; 1163bf215546Sopenharmony_ci rcs->prev[rcs->num_prev].cdw = rcs->current.cdw; 1164bf215546Sopenharmony_ci rcs->prev[rcs->num_prev].max_dw = rcs->current.cdw; /* no modifications */ 1165bf215546Sopenharmony_ci rcs->num_prev++; 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci rcs->prev_dw += rcs->current.cdw; 1168bf215546Sopenharmony_ci rcs->current.cdw = 0; 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci rcs->current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); 1171bf215546Sopenharmony_ci rcs->current.max_dw = ib->big_ib_buffer->size / 4 - cs_epilog_dw; 1172bf215546Sopenharmony_ci rcs->gpu_address = va; 1173bf215546Sopenharmony_ci 1174bf215546Sopenharmony_ci amdgpu_cs_add_buffer(cs->main.rcs, ib->big_ib_buffer, 1175bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_IB, 0); 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_ci return true; 1178bf215546Sopenharmony_ci} 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_cistatic unsigned amdgpu_cs_get_buffer_list(struct radeon_cmdbuf *rcs, 1181bf215546Sopenharmony_ci struct radeon_bo_list_item *list) 1182bf215546Sopenharmony_ci{ 1183bf215546Sopenharmony_ci struct amdgpu_cs_context *cs = amdgpu_cs(rcs)->csc; 1184bf215546Sopenharmony_ci int i; 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_ci if (list) { 1187bf215546Sopenharmony_ci for (i = 0; i < cs->num_real_buffers; i++) { 1188bf215546Sopenharmony_ci list[i].bo_size = cs->real_buffers[i].bo->base.size; 1189bf215546Sopenharmony_ci list[i].vm_address = cs->real_buffers[i].bo->va; 1190bf215546Sopenharmony_ci list[i].priority_usage = cs->real_buffers[i].usage; 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci return cs->num_real_buffers; 1194bf215546Sopenharmony_ci} 1195bf215546Sopenharmony_ci 1196bf215546Sopenharmony_cistatic void add_fence_to_list(struct amdgpu_fence_list *fences, 1197bf215546Sopenharmony_ci struct amdgpu_fence *fence) 1198bf215546Sopenharmony_ci{ 1199bf215546Sopenharmony_ci unsigned idx = fences->num++; 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci if (idx >= fences->max) { 1202bf215546Sopenharmony_ci unsigned size; 1203bf215546Sopenharmony_ci const unsigned increment = 8; 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci fences->max = idx + increment; 1206bf215546Sopenharmony_ci size = fences->max * sizeof(fences->list[0]); 1207bf215546Sopenharmony_ci fences->list = realloc(fences->list, size); 1208bf215546Sopenharmony_ci /* Clear the newly-allocated elements. */ 1209bf215546Sopenharmony_ci memset(fences->list + idx, 0, 1210bf215546Sopenharmony_ci increment * sizeof(fences->list[0])); 1211bf215546Sopenharmony_ci } 1212bf215546Sopenharmony_ci amdgpu_fence_reference(&fences->list[idx], (struct pipe_fence_handle*)fence); 1213bf215546Sopenharmony_ci} 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_cistatic bool is_noop_fence_dependency(struct amdgpu_cs *acs, 1216bf215546Sopenharmony_ci struct amdgpu_fence *fence) 1217bf215546Sopenharmony_ci{ 1218bf215546Sopenharmony_ci struct amdgpu_cs_context *cs = acs->csc; 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci /* Detect no-op dependencies only when there is only 1 ring, 1221bf215546Sopenharmony_ci * because IBs on one ring are always executed one at a time. 1222bf215546Sopenharmony_ci * 1223bf215546Sopenharmony_ci * We always want no dependency between back-to-back gfx IBs, because 1224bf215546Sopenharmony_ci * we need the parallelism between IBs for good performance. 1225bf215546Sopenharmony_ci */ 1226bf215546Sopenharmony_ci if ((acs->ip_type == AMD_IP_GFX || 1227bf215546Sopenharmony_ci acs->ws->info.ip[acs->ip_type].num_queues == 1) && 1228bf215546Sopenharmony_ci !amdgpu_fence_is_syncobj(fence) && 1229bf215546Sopenharmony_ci fence->ctx == acs->ctx && 1230bf215546Sopenharmony_ci fence->fence.ip_type == cs->ib[IB_MAIN].ip_type) 1231bf215546Sopenharmony_ci return true; 1232bf215546Sopenharmony_ci 1233bf215546Sopenharmony_ci return amdgpu_fence_wait((void *)fence, 0, false); 1234bf215546Sopenharmony_ci} 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_cistatic void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws, 1237bf215546Sopenharmony_ci struct pipe_fence_handle *pfence, 1238bf215546Sopenharmony_ci unsigned dependency_flags) 1239bf215546Sopenharmony_ci{ 1240bf215546Sopenharmony_ci struct amdgpu_cs *acs = amdgpu_cs(rws); 1241bf215546Sopenharmony_ci struct amdgpu_cs_context *cs = acs->csc; 1242bf215546Sopenharmony_ci struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence; 1243bf215546Sopenharmony_ci 1244bf215546Sopenharmony_ci util_queue_fence_wait(&fence->submitted); 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci if (is_noop_fence_dependency(acs, fence)) 1247bf215546Sopenharmony_ci return; 1248bf215546Sopenharmony_ci 1249bf215546Sopenharmony_ci if (amdgpu_fence_is_syncobj(fence)) 1250bf215546Sopenharmony_ci add_fence_to_list(&cs->syncobj_dependencies, fence); 1251bf215546Sopenharmony_ci else 1252bf215546Sopenharmony_ci add_fence_to_list(&cs->fence_dependencies, fence); 1253bf215546Sopenharmony_ci} 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_cistatic void amdgpu_add_bo_fence_dependencies(struct amdgpu_cs *acs, 1256bf215546Sopenharmony_ci struct amdgpu_cs_context *cs, 1257bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer) 1258bf215546Sopenharmony_ci{ 1259bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = buffer->bo; 1260bf215546Sopenharmony_ci unsigned new_num_fences = 0; 1261bf215546Sopenharmony_ci const unsigned num_fences = bo->num_fences; 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci for (unsigned j = 0; j < num_fences; ++j) { 1264bf215546Sopenharmony_ci struct amdgpu_fence *bo_fence = (void *)bo->fences[j]; 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_ci if (is_noop_fence_dependency(acs, bo_fence)) 1267bf215546Sopenharmony_ci continue; 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[new_num_fences], bo->fences[j]); 1270bf215546Sopenharmony_ci new_num_fences++; 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci if (!(buffer->usage & RADEON_USAGE_SYNCHRONIZED)) 1273bf215546Sopenharmony_ci continue; 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci add_fence_to_list(&cs->fence_dependencies, bo_fence); 1276bf215546Sopenharmony_ci } 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci for (unsigned j = new_num_fences; j < num_fences; ++j) 1279bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[j], NULL); 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_ci bo->num_fences = new_num_fences; 1282bf215546Sopenharmony_ci} 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ci/* Add the given list of fences to the buffer's fence list. 1285bf215546Sopenharmony_ci * 1286bf215546Sopenharmony_ci * Must be called with the winsys bo_fence_lock held. 1287bf215546Sopenharmony_ci */ 1288bf215546Sopenharmony_civoid amdgpu_add_fences(struct amdgpu_winsys_bo *bo, 1289bf215546Sopenharmony_ci unsigned num_fences, 1290bf215546Sopenharmony_ci struct pipe_fence_handle **fences) 1291bf215546Sopenharmony_ci{ 1292bf215546Sopenharmony_ci if (bo->num_fences + num_fences > bo->max_fences) { 1293bf215546Sopenharmony_ci unsigned new_max_fences = MAX2(bo->num_fences + num_fences, bo->max_fences * 2); 1294bf215546Sopenharmony_ci struct pipe_fence_handle **new_fences = 1295bf215546Sopenharmony_ci REALLOC(bo->fences, 1296bf215546Sopenharmony_ci bo->num_fences * sizeof(*new_fences), 1297bf215546Sopenharmony_ci new_max_fences * sizeof(*new_fences)); 1298bf215546Sopenharmony_ci if (likely(new_fences && new_max_fences < UINT16_MAX)) { 1299bf215546Sopenharmony_ci bo->fences = new_fences; 1300bf215546Sopenharmony_ci bo->max_fences = new_max_fences; 1301bf215546Sopenharmony_ci } else { 1302bf215546Sopenharmony_ci unsigned drop; 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci fprintf(stderr, new_fences ? "amdgpu_add_fences: too many fences, dropping some\n" 1305bf215546Sopenharmony_ci : "amdgpu_add_fences: allocation failure, dropping fence(s)\n"); 1306bf215546Sopenharmony_ci free(new_fences); 1307bf215546Sopenharmony_ci 1308bf215546Sopenharmony_ci if (!bo->num_fences) 1309bf215546Sopenharmony_ci return; 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_ci bo->num_fences--; /* prefer to keep the most recent fence if possible */ 1312bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[bo->num_fences], NULL); 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci drop = bo->num_fences + num_fences - bo->max_fences; 1315bf215546Sopenharmony_ci num_fences -= drop; 1316bf215546Sopenharmony_ci fences += drop; 1317bf215546Sopenharmony_ci } 1318bf215546Sopenharmony_ci } 1319bf215546Sopenharmony_ci 1320bf215546Sopenharmony_ci unsigned bo_num_fences = bo->num_fences; 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_ci for (unsigned i = 0; i < num_fences; ++i) { 1323bf215546Sopenharmony_ci bo->fences[bo_num_fences] = NULL; 1324bf215546Sopenharmony_ci amdgpu_fence_reference(&bo->fences[bo_num_fences], fences[i]); 1325bf215546Sopenharmony_ci bo_num_fences++; 1326bf215546Sopenharmony_ci } 1327bf215546Sopenharmony_ci bo->num_fences = bo_num_fences; 1328bf215546Sopenharmony_ci} 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_cistatic void amdgpu_inc_bo_num_active_ioctls(unsigned num_buffers, 1331bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffers) 1332bf215546Sopenharmony_ci{ 1333bf215546Sopenharmony_ci for (unsigned i = 0; i < num_buffers; i++) 1334bf215546Sopenharmony_ci p_atomic_inc(&buffers[i].bo->num_active_ioctls); 1335bf215546Sopenharmony_ci} 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_cistatic void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs, 1338bf215546Sopenharmony_ci struct amdgpu_cs_context *cs, 1339bf215546Sopenharmony_ci struct pipe_fence_handle *fence, 1340bf215546Sopenharmony_ci unsigned num_buffers, 1341bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffers) 1342bf215546Sopenharmony_ci{ 1343bf215546Sopenharmony_ci for (unsigned i = 0; i < num_buffers; i++) { 1344bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer = &buffers[i]; 1345bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = buffer->bo; 1346bf215546Sopenharmony_ci 1347bf215546Sopenharmony_ci amdgpu_add_bo_fence_dependencies(acs, cs, buffer); 1348bf215546Sopenharmony_ci amdgpu_add_fences(bo, 1, &fence); 1349bf215546Sopenharmony_ci } 1350bf215546Sopenharmony_ci} 1351bf215546Sopenharmony_ci 1352bf215546Sopenharmony_ci/* Since the kernel driver doesn't synchronize execution between different 1353bf215546Sopenharmony_ci * rings automatically, we have to add fence dependencies manually. 1354bf215546Sopenharmony_ci */ 1355bf215546Sopenharmony_cistatic void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs, 1356bf215546Sopenharmony_ci struct amdgpu_cs_context *cs) 1357bf215546Sopenharmony_ci{ 1358bf215546Sopenharmony_ci amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_real_buffers, cs->real_buffers); 1359bf215546Sopenharmony_ci amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_slab_buffers, cs->slab_buffers); 1360bf215546Sopenharmony_ci amdgpu_add_fence_dependencies_bo_list(acs, cs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers); 1361bf215546Sopenharmony_ci} 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_cistatic void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws, 1364bf215546Sopenharmony_ci struct pipe_fence_handle *fence) 1365bf215546Sopenharmony_ci{ 1366bf215546Sopenharmony_ci struct amdgpu_cs *acs = amdgpu_cs(rws); 1367bf215546Sopenharmony_ci struct amdgpu_cs_context *cs = acs->csc; 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci assert(amdgpu_fence_is_syncobj((struct amdgpu_fence *)fence)); 1370bf215546Sopenharmony_ci 1371bf215546Sopenharmony_ci add_fence_to_list(&cs->syncobj_to_signal, (struct amdgpu_fence*)fence); 1372bf215546Sopenharmony_ci} 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci/* Add backing of sparse buffers to the buffer list. 1375bf215546Sopenharmony_ci * 1376bf215546Sopenharmony_ci * This is done late, during submission, to keep the buffer list short before 1377bf215546Sopenharmony_ci * submit, and to avoid managing fences for the backing buffers. 1378bf215546Sopenharmony_ci */ 1379bf215546Sopenharmony_cistatic bool amdgpu_add_sparse_backing_buffers(struct amdgpu_cs_context *cs) 1380bf215546Sopenharmony_ci{ 1381bf215546Sopenharmony_ci for (unsigned i = 0; i < cs->num_sparse_buffers; ++i) { 1382bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer = &cs->sparse_buffers[i]; 1383bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = buffer->bo; 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci simple_mtx_lock(&bo->lock); 1386bf215546Sopenharmony_ci 1387bf215546Sopenharmony_ci list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { 1388bf215546Sopenharmony_ci /* We can directly add the buffer here, because we know that each 1389bf215546Sopenharmony_ci * backing buffer occurs only once. 1390bf215546Sopenharmony_ci */ 1391bf215546Sopenharmony_ci int idx = amdgpu_do_add_real_buffer(cs, backing->bo); 1392bf215546Sopenharmony_ci if (idx < 0) { 1393bf215546Sopenharmony_ci fprintf(stderr, "%s: failed to add buffer\n", __FUNCTION__); 1394bf215546Sopenharmony_ci simple_mtx_unlock(&bo->lock); 1395bf215546Sopenharmony_ci return false; 1396bf215546Sopenharmony_ci } 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci cs->real_buffers[idx].usage = buffer->usage; 1399bf215546Sopenharmony_ci } 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci simple_mtx_unlock(&bo->lock); 1402bf215546Sopenharmony_ci } 1403bf215546Sopenharmony_ci 1404bf215546Sopenharmony_ci return true; 1405bf215546Sopenharmony_ci} 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_cistatic void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) 1408bf215546Sopenharmony_ci{ 1409bf215546Sopenharmony_ci struct amdgpu_cs *acs = (struct amdgpu_cs*)job; 1410bf215546Sopenharmony_ci struct amdgpu_winsys *ws = acs->ws; 1411bf215546Sopenharmony_ci struct amdgpu_cs_context *cs = acs->cst; 1412bf215546Sopenharmony_ci int i, r; 1413bf215546Sopenharmony_ci uint32_t bo_list = 0; 1414bf215546Sopenharmony_ci uint64_t seq_no = 0; 1415bf215546Sopenharmony_ci bool has_user_fence = amdgpu_cs_has_user_fence(cs); 1416bf215546Sopenharmony_ci bool use_bo_list_create = ws->info.drm_minor < 27; 1417bf215546Sopenharmony_ci struct drm_amdgpu_bo_list_in bo_list_in; 1418bf215546Sopenharmony_ci unsigned initial_num_real_buffers = cs->num_real_buffers; 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci simple_mtx_lock(&ws->bo_fence_lock); 1421bf215546Sopenharmony_ci amdgpu_add_fence_dependencies_bo_lists(acs, cs); 1422bf215546Sopenharmony_ci simple_mtx_unlock(&ws->bo_fence_lock); 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci#if DEBUG 1425bf215546Sopenharmony_ci /* Prepare the buffer list. */ 1426bf215546Sopenharmony_ci if (ws->debug_all_bos) { 1427bf215546Sopenharmony_ci /* The buffer list contains all buffers. This is a slow path that 1428bf215546Sopenharmony_ci * ensures that no buffer is missing in the BO list. 1429bf215546Sopenharmony_ci */ 1430bf215546Sopenharmony_ci unsigned num_handles = 0; 1431bf215546Sopenharmony_ci struct drm_amdgpu_bo_list_entry *list = 1432bf215546Sopenharmony_ci alloca(ws->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry)); 1433bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo; 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci simple_mtx_lock(&ws->global_bo_list_lock); 1436bf215546Sopenharmony_ci LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, u.real.global_list_item) { 1437bf215546Sopenharmony_ci list[num_handles].bo_handle = bo->u.real.kms_handle; 1438bf215546Sopenharmony_ci list[num_handles].bo_priority = 0; 1439bf215546Sopenharmony_ci ++num_handles; 1440bf215546Sopenharmony_ci } 1441bf215546Sopenharmony_ci 1442bf215546Sopenharmony_ci r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers, list, &bo_list); 1443bf215546Sopenharmony_ci simple_mtx_unlock(&ws->global_bo_list_lock); 1444bf215546Sopenharmony_ci if (r) { 1445bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); 1446bf215546Sopenharmony_ci goto cleanup; 1447bf215546Sopenharmony_ci } 1448bf215546Sopenharmony_ci } else 1449bf215546Sopenharmony_ci#endif 1450bf215546Sopenharmony_ci { 1451bf215546Sopenharmony_ci if (!amdgpu_add_sparse_backing_buffers(cs)) { 1452bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n"); 1453bf215546Sopenharmony_ci r = -ENOMEM; 1454bf215546Sopenharmony_ci goto cleanup; 1455bf215546Sopenharmony_ci } 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci struct drm_amdgpu_bo_list_entry *list = 1458bf215546Sopenharmony_ci alloca((cs->num_real_buffers + 2) * sizeof(struct drm_amdgpu_bo_list_entry)); 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci unsigned num_handles = 0; 1461bf215546Sopenharmony_ci for (i = 0; i < cs->num_real_buffers; ++i) { 1462bf215546Sopenharmony_ci struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i]; 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci list[num_handles].bo_handle = buffer->bo->u.real.kms_handle; 1465bf215546Sopenharmony_ci list[num_handles].bo_priority = 1466bf215546Sopenharmony_ci (util_last_bit(buffer->usage & RADEON_ALL_PRIORITIES) - 1) / 2; 1467bf215546Sopenharmony_ci ++num_handles; 1468bf215546Sopenharmony_ci } 1469bf215546Sopenharmony_ci 1470bf215546Sopenharmony_ci if (use_bo_list_create) { 1471bf215546Sopenharmony_ci /* Legacy path creating the buffer list handle and passing it to the CS ioctl. */ 1472bf215546Sopenharmony_ci r = amdgpu_bo_list_create_raw(ws->dev, num_handles, list, &bo_list); 1473bf215546Sopenharmony_ci if (r) { 1474bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); 1475bf215546Sopenharmony_ci goto cleanup; 1476bf215546Sopenharmony_ci } 1477bf215546Sopenharmony_ci } else { 1478bf215546Sopenharmony_ci /* Standard path passing the buffer list via the CS ioctl. */ 1479bf215546Sopenharmony_ci bo_list_in.operation = ~0; 1480bf215546Sopenharmony_ci bo_list_in.list_handle = ~0; 1481bf215546Sopenharmony_ci bo_list_in.bo_number = num_handles; 1482bf215546Sopenharmony_ci bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry); 1483bf215546Sopenharmony_ci bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list; 1484bf215546Sopenharmony_ci } 1485bf215546Sopenharmony_ci } 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci if (acs->ip_type == AMD_IP_GFX) 1488bf215546Sopenharmony_ci ws->gfx_bo_list_counter += cs->num_real_buffers; 1489bf215546Sopenharmony_ci 1490bf215546Sopenharmony_ci bool noop = false; 1491bf215546Sopenharmony_ci 1492bf215546Sopenharmony_ci if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) { 1493bf215546Sopenharmony_ci r = -ECANCELED; 1494bf215546Sopenharmony_ci } else { 1495bf215546Sopenharmony_ci struct drm_amdgpu_cs_chunk chunks[7]; 1496bf215546Sopenharmony_ci unsigned num_chunks = 0; 1497bf215546Sopenharmony_ci 1498bf215546Sopenharmony_ci /* BO list */ 1499bf215546Sopenharmony_ci if (!use_bo_list_create) { 1500bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES; 1501bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4; 1502bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in; 1503bf215546Sopenharmony_ci num_chunks++; 1504bf215546Sopenharmony_ci } 1505bf215546Sopenharmony_ci 1506bf215546Sopenharmony_ci /* Fence dependencies. */ 1507bf215546Sopenharmony_ci unsigned num_dependencies = cs->fence_dependencies.num; 1508bf215546Sopenharmony_ci if (num_dependencies) { 1509bf215546Sopenharmony_ci struct drm_amdgpu_cs_chunk_dep *dep_chunk = 1510bf215546Sopenharmony_ci alloca(num_dependencies * sizeof(*dep_chunk)); 1511bf215546Sopenharmony_ci 1512bf215546Sopenharmony_ci for (unsigned i = 0; i < num_dependencies; i++) { 1513bf215546Sopenharmony_ci struct amdgpu_fence *fence = 1514bf215546Sopenharmony_ci (struct amdgpu_fence*)cs->fence_dependencies.list[i]; 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci assert(util_queue_fence_is_signalled(&fence->submitted)); 1517bf215546Sopenharmony_ci amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]); 1518bf215546Sopenharmony_ci } 1519bf215546Sopenharmony_ci 1520bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; 1521bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies; 1522bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk; 1523bf215546Sopenharmony_ci num_chunks++; 1524bf215546Sopenharmony_ci } 1525bf215546Sopenharmony_ci 1526bf215546Sopenharmony_ci /* Syncobj dependencies. */ 1527bf215546Sopenharmony_ci unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num; 1528bf215546Sopenharmony_ci if (num_syncobj_dependencies) { 1529bf215546Sopenharmony_ci struct drm_amdgpu_cs_chunk_sem *sem_chunk = 1530bf215546Sopenharmony_ci alloca(num_syncobj_dependencies * sizeof(sem_chunk[0])); 1531bf215546Sopenharmony_ci 1532bf215546Sopenharmony_ci for (unsigned i = 0; i < num_syncobj_dependencies; i++) { 1533bf215546Sopenharmony_ci struct amdgpu_fence *fence = 1534bf215546Sopenharmony_ci (struct amdgpu_fence*)cs->syncobj_dependencies.list[i]; 1535bf215546Sopenharmony_ci 1536bf215546Sopenharmony_ci if (!amdgpu_fence_is_syncobj(fence)) 1537bf215546Sopenharmony_ci continue; 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_ci assert(util_queue_fence_is_signalled(&fence->submitted)); 1540bf215546Sopenharmony_ci sem_chunk[i].handle = fence->syncobj; 1541bf215546Sopenharmony_ci } 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN; 1544bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num_syncobj_dependencies; 1545bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; 1546bf215546Sopenharmony_ci num_chunks++; 1547bf215546Sopenharmony_ci } 1548bf215546Sopenharmony_ci 1549bf215546Sopenharmony_ci /* Syncobj signals. */ 1550bf215546Sopenharmony_ci unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num; 1551bf215546Sopenharmony_ci if (num_syncobj_to_signal) { 1552bf215546Sopenharmony_ci struct drm_amdgpu_cs_chunk_sem *sem_chunk = 1553bf215546Sopenharmony_ci alloca(num_syncobj_to_signal * sizeof(sem_chunk[0])); 1554bf215546Sopenharmony_ci 1555bf215546Sopenharmony_ci for (unsigned i = 0; i < num_syncobj_to_signal; i++) { 1556bf215546Sopenharmony_ci struct amdgpu_fence *fence = 1557bf215546Sopenharmony_ci (struct amdgpu_fence*)cs->syncobj_to_signal.list[i]; 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci assert(amdgpu_fence_is_syncobj(fence)); 1560bf215546Sopenharmony_ci sem_chunk[i].handle = fence->syncobj; 1561bf215546Sopenharmony_ci } 1562bf215546Sopenharmony_ci 1563bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT; 1564bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 1565bf215546Sopenharmony_ci * num_syncobj_to_signal; 1566bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; 1567bf215546Sopenharmony_ci num_chunks++; 1568bf215546Sopenharmony_ci } 1569bf215546Sopenharmony_ci 1570bf215546Sopenharmony_ci /* Fence */ 1571bf215546Sopenharmony_ci if (has_user_fence) { 1572bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; 1573bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; 1574bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk; 1575bf215546Sopenharmony_ci num_chunks++; 1576bf215546Sopenharmony_ci } 1577bf215546Sopenharmony_ci 1578bf215546Sopenharmony_ci /* IB */ 1579bf215546Sopenharmony_ci if (cs->ib[IB_PREAMBLE].ib_bytes) { 1580bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; 1581bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; 1582bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE]; 1583bf215546Sopenharmony_ci num_chunks++; 1584bf215546Sopenharmony_ci } 1585bf215546Sopenharmony_ci 1586bf215546Sopenharmony_ci /* IB */ 1587bf215546Sopenharmony_ci cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */ 1588bf215546Sopenharmony_ci chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; 1589bf215546Sopenharmony_ci chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; 1590bf215546Sopenharmony_ci chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN]; 1591bf215546Sopenharmony_ci num_chunks++; 1592bf215546Sopenharmony_ci 1593bf215546Sopenharmony_ci if (cs->secure) { 1594bf215546Sopenharmony_ci cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE; 1595bf215546Sopenharmony_ci cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE; 1596bf215546Sopenharmony_ci } else { 1597bf215546Sopenharmony_ci cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE; 1598bf215546Sopenharmony_ci cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE; 1599bf215546Sopenharmony_ci } 1600bf215546Sopenharmony_ci 1601bf215546Sopenharmony_ci /* Apply RADEON_NOOP. */ 1602bf215546Sopenharmony_ci if (acs->noop) { 1603bf215546Sopenharmony_ci if (acs->ip_type == AMD_IP_GFX) { 1604bf215546Sopenharmony_ci /* Reduce the IB size and fill it with NOP to make it like an empty IB. */ 1605bf215546Sopenharmony_ci unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment); 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_ci cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0); 1608bf215546Sopenharmony_ci cs->ib[IB_MAIN].ib_bytes = noop_size; 1609bf215546Sopenharmony_ci } else { 1610bf215546Sopenharmony_ci noop = true; 1611bf215546Sopenharmony_ci } 1612bf215546Sopenharmony_ci } 1613bf215546Sopenharmony_ci 1614bf215546Sopenharmony_ci assert(num_chunks <= ARRAY_SIZE(chunks)); 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_ci r = 0; 1617bf215546Sopenharmony_ci 1618bf215546Sopenharmony_ci if (!noop) { 1619bf215546Sopenharmony_ci /* The kernel returns -ENOMEM with many parallel processes using GDS such as test suites 1620bf215546Sopenharmony_ci * quite often, but it eventually succeeds after enough attempts. This happens frequently 1621bf215546Sopenharmony_ci * with dEQP using NGG streamout. 1622bf215546Sopenharmony_ci */ 1623bf215546Sopenharmony_ci do { 1624bf215546Sopenharmony_ci /* Wait 1 ms and try again. */ 1625bf215546Sopenharmony_ci if (r == -ENOMEM) 1626bf215546Sopenharmony_ci os_time_sleep(1000); 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_ci r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list, 1629bf215546Sopenharmony_ci num_chunks, chunks, &seq_no); 1630bf215546Sopenharmony_ci } while (r == -ENOMEM); 1631bf215546Sopenharmony_ci } 1632bf215546Sopenharmony_ci } 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci if (r) { 1635bf215546Sopenharmony_ci if (r == -ECANCELED) 1636bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n"); 1637bf215546Sopenharmony_ci else 1638bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: The CS has been rejected, " 1639bf215546Sopenharmony_ci "see dmesg for more information (%i).\n", r); 1640bf215546Sopenharmony_ci 1641bf215546Sopenharmony_ci acs->ctx->num_rejected_cs++; 1642bf215546Sopenharmony_ci ws->num_total_rejected_cs++; 1643bf215546Sopenharmony_ci } else if (!noop) { 1644bf215546Sopenharmony_ci /* Success. */ 1645bf215546Sopenharmony_ci uint64_t *user_fence = NULL; 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_ci /* Need to reserve 4 QWORD for user fence: 1648bf215546Sopenharmony_ci * QWORD[0]: completed fence 1649bf215546Sopenharmony_ci * QWORD[1]: preempted fence 1650bf215546Sopenharmony_ci * QWORD[2]: reset fence 1651bf215546Sopenharmony_ci * QWORD[3]: preempted then reset 1652bf215546Sopenharmony_ci **/ 1653bf215546Sopenharmony_ci if (has_user_fence) 1654bf215546Sopenharmony_ci user_fence = acs->ctx->user_fence_cpu_address_base + acs->ip_type * 4; 1655bf215546Sopenharmony_ci amdgpu_fence_submitted(cs->fence, seq_no, user_fence); 1656bf215546Sopenharmony_ci } 1657bf215546Sopenharmony_ci 1658bf215546Sopenharmony_ci /* Cleanup. */ 1659bf215546Sopenharmony_ci if (bo_list) 1660bf215546Sopenharmony_ci amdgpu_bo_list_destroy_raw(ws->dev, bo_list); 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_cicleanup: 1663bf215546Sopenharmony_ci /* If there was an error, signal the fence, because it won't be signalled 1664bf215546Sopenharmony_ci * by the hardware. */ 1665bf215546Sopenharmony_ci if (r || noop) 1666bf215546Sopenharmony_ci amdgpu_fence_signalled(cs->fence); 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci cs->error_code = r; 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci /* Only decrement num_active_ioctls for those buffers where we incremented it. */ 1671bf215546Sopenharmony_ci for (i = 0; i < initial_num_real_buffers; i++) 1672bf215546Sopenharmony_ci p_atomic_dec(&cs->real_buffers[i].bo->num_active_ioctls); 1673bf215546Sopenharmony_ci for (i = 0; i < cs->num_slab_buffers; i++) 1674bf215546Sopenharmony_ci p_atomic_dec(&cs->slab_buffers[i].bo->num_active_ioctls); 1675bf215546Sopenharmony_ci for (i = 0; i < cs->num_sparse_buffers; i++) 1676bf215546Sopenharmony_ci p_atomic_dec(&cs->sparse_buffers[i].bo->num_active_ioctls); 1677bf215546Sopenharmony_ci 1678bf215546Sopenharmony_ci amdgpu_cs_context_cleanup(ws, cs); 1679bf215546Sopenharmony_ci} 1680bf215546Sopenharmony_ci 1681bf215546Sopenharmony_ci/* Make sure the previous submission is completed. */ 1682bf215546Sopenharmony_civoid amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs) 1683bf215546Sopenharmony_ci{ 1684bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 1685bf215546Sopenharmony_ci 1686bf215546Sopenharmony_ci /* Wait for any pending ioctl of this CS to complete. */ 1687bf215546Sopenharmony_ci util_queue_fence_wait(&cs->flush_completed); 1688bf215546Sopenharmony_ci} 1689bf215546Sopenharmony_ci 1690bf215546Sopenharmony_cistatic int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, 1691bf215546Sopenharmony_ci unsigned flags, 1692bf215546Sopenharmony_ci struct pipe_fence_handle **fence) 1693bf215546Sopenharmony_ci{ 1694bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 1695bf215546Sopenharmony_ci struct amdgpu_winsys *ws = cs->ws; 1696bf215546Sopenharmony_ci int error_code = 0; 1697bf215546Sopenharmony_ci uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type]; 1698bf215546Sopenharmony_ci 1699bf215546Sopenharmony_ci rcs->current.max_dw += amdgpu_cs_epilog_dws(cs); 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_ci /* Pad the IB according to the mask. */ 1702bf215546Sopenharmony_ci switch (cs->ip_type) { 1703bf215546Sopenharmony_ci case AMD_IP_SDMA: 1704bf215546Sopenharmony_ci if (ws->info.gfx_level <= GFX6) { 1705bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) 1706bf215546Sopenharmony_ci radeon_emit(rcs, 0xf0000000); /* NOP packet */ 1707bf215546Sopenharmony_ci } else { 1708bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) 1709bf215546Sopenharmony_ci radeon_emit(rcs, SDMA_NOP_PAD); 1710bf215546Sopenharmony_ci } 1711bf215546Sopenharmony_ci break; 1712bf215546Sopenharmony_ci case AMD_IP_GFX: 1713bf215546Sopenharmony_ci case AMD_IP_COMPUTE: 1714bf215546Sopenharmony_ci if (ws->info.gfx_ib_pad_with_type2) { 1715bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) 1716bf215546Sopenharmony_ci radeon_emit(rcs, PKT2_NOP_PAD); 1717bf215546Sopenharmony_ci } else { 1718bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) 1719bf215546Sopenharmony_ci radeon_emit(rcs, PKT3_NOP_PAD); 1720bf215546Sopenharmony_ci } 1721bf215546Sopenharmony_ci if (cs->ip_type == AMD_IP_GFX) 1722bf215546Sopenharmony_ci ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; 1723bf215546Sopenharmony_ci break; 1724bf215546Sopenharmony_ci case AMD_IP_UVD: 1725bf215546Sopenharmony_ci case AMD_IP_UVD_ENC: 1726bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) 1727bf215546Sopenharmony_ci radeon_emit(rcs, 0x80000000); /* type2 nop packet */ 1728bf215546Sopenharmony_ci break; 1729bf215546Sopenharmony_ci case AMD_IP_VCN_JPEG: 1730bf215546Sopenharmony_ci if (rcs->current.cdw % 2) 1731bf215546Sopenharmony_ci assert(0); 1732bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) { 1733bf215546Sopenharmony_ci radeon_emit(rcs, 0x60000000); /* nop packet */ 1734bf215546Sopenharmony_ci radeon_emit(rcs, 0x00000000); 1735bf215546Sopenharmony_ci } 1736bf215546Sopenharmony_ci break; 1737bf215546Sopenharmony_ci case AMD_IP_VCN_DEC: 1738bf215546Sopenharmony_ci while (rcs->current.cdw & ib_pad_dw_mask) 1739bf215546Sopenharmony_ci radeon_emit(rcs, 0x81ff); /* nop packet */ 1740bf215546Sopenharmony_ci break; 1741bf215546Sopenharmony_ci default: 1742bf215546Sopenharmony_ci break; 1743bf215546Sopenharmony_ci } 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci if (rcs->current.cdw > rcs->current.max_dw) { 1746bf215546Sopenharmony_ci fprintf(stderr, "amdgpu: command stream overflowed\n"); 1747bf215546Sopenharmony_ci } 1748bf215546Sopenharmony_ci 1749bf215546Sopenharmony_ci /* If the CS is not empty or overflowed.... */ 1750bf215546Sopenharmony_ci if (likely(radeon_emitted(rcs, 0) && 1751bf215546Sopenharmony_ci rcs->current.cdw <= rcs->current.max_dw && 1752bf215546Sopenharmony_ci !(flags & RADEON_FLUSH_NOOP))) { 1753bf215546Sopenharmony_ci struct amdgpu_cs_context *cur = cs->csc; 1754bf215546Sopenharmony_ci 1755bf215546Sopenharmony_ci /* Set IB sizes. */ 1756bf215546Sopenharmony_ci amdgpu_ib_finalize(ws, rcs, &cs->main); 1757bf215546Sopenharmony_ci 1758bf215546Sopenharmony_ci /* Create a fence. */ 1759bf215546Sopenharmony_ci amdgpu_fence_reference(&cur->fence, NULL); 1760bf215546Sopenharmony_ci if (cs->next_fence) { 1761bf215546Sopenharmony_ci /* just move the reference */ 1762bf215546Sopenharmony_ci cur->fence = cs->next_fence; 1763bf215546Sopenharmony_ci cs->next_fence = NULL; 1764bf215546Sopenharmony_ci } else { 1765bf215546Sopenharmony_ci cur->fence = amdgpu_fence_create(cs->ctx, 1766bf215546Sopenharmony_ci cur->ib[IB_MAIN].ip_type); 1767bf215546Sopenharmony_ci } 1768bf215546Sopenharmony_ci if (fence) 1769bf215546Sopenharmony_ci amdgpu_fence_reference(fence, cur->fence); 1770bf215546Sopenharmony_ci 1771bf215546Sopenharmony_ci amdgpu_inc_bo_num_active_ioctls(cur->num_real_buffers, cur->real_buffers); 1772bf215546Sopenharmony_ci amdgpu_inc_bo_num_active_ioctls(cur->num_slab_buffers, cur->slab_buffers); 1773bf215546Sopenharmony_ci amdgpu_inc_bo_num_active_ioctls(cur->num_sparse_buffers, cur->sparse_buffers); 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_ci amdgpu_cs_sync_flush(rcs); 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci /* Swap command streams. "cst" is going to be submitted. */ 1778bf215546Sopenharmony_ci rcs->csc = cs->csc = cs->cst; 1779bf215546Sopenharmony_ci cs->cst = cur; 1780bf215546Sopenharmony_ci 1781bf215546Sopenharmony_ci /* Submit. */ 1782bf215546Sopenharmony_ci util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed, 1783bf215546Sopenharmony_ci amdgpu_cs_submit_ib, NULL, 0); 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_ci if (flags & RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION) 1786bf215546Sopenharmony_ci cs->csc->secure = !cs->cst->secure; 1787bf215546Sopenharmony_ci else 1788bf215546Sopenharmony_ci cs->csc->secure = cs->cst->secure; 1789bf215546Sopenharmony_ci 1790bf215546Sopenharmony_ci if (!(flags & PIPE_FLUSH_ASYNC)) { 1791bf215546Sopenharmony_ci amdgpu_cs_sync_flush(rcs); 1792bf215546Sopenharmony_ci error_code = cur->error_code; 1793bf215546Sopenharmony_ci } 1794bf215546Sopenharmony_ci } else { 1795bf215546Sopenharmony_ci if (flags & RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION) 1796bf215546Sopenharmony_ci cs->csc->secure = !cs->csc->secure; 1797bf215546Sopenharmony_ci amdgpu_cs_context_cleanup(ws, cs->csc); 1798bf215546Sopenharmony_ci } 1799bf215546Sopenharmony_ci 1800bf215546Sopenharmony_ci memset(cs->csc->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); 1801bf215546Sopenharmony_ci 1802bf215546Sopenharmony_ci amdgpu_get_new_ib(ws, rcs, &cs->main, cs); 1803bf215546Sopenharmony_ci 1804bf215546Sopenharmony_ci if (cs->preamble_ib_bo) { 1805bf215546Sopenharmony_ci amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo, 1806bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_IB, 0); 1807bf215546Sopenharmony_ci } 1808bf215546Sopenharmony_ci 1809bf215546Sopenharmony_ci rcs->used_gart_kb = 0; 1810bf215546Sopenharmony_ci rcs->used_vram_kb = 0; 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_ci if (cs->ip_type == AMD_IP_GFX) 1813bf215546Sopenharmony_ci ws->num_gfx_IBs++; 1814bf215546Sopenharmony_ci else if (cs->ip_type == AMD_IP_SDMA) 1815bf215546Sopenharmony_ci ws->num_sdma_IBs++; 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci return error_code; 1818bf215546Sopenharmony_ci} 1819bf215546Sopenharmony_ci 1820bf215546Sopenharmony_cistatic void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs) 1821bf215546Sopenharmony_ci{ 1822bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_ci if (!cs) 1825bf215546Sopenharmony_ci return; 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci amdgpu_cs_sync_flush(rcs); 1828bf215546Sopenharmony_ci util_queue_fence_destroy(&cs->flush_completed); 1829bf215546Sopenharmony_ci p_atomic_dec(&cs->ws->num_cs); 1830bf215546Sopenharmony_ci radeon_bo_reference(&cs->ws->dummy_ws.base, &cs->preamble_ib_bo, NULL); 1831bf215546Sopenharmony_ci radeon_bo_reference(&cs->ws->dummy_ws.base, &cs->main.big_ib_buffer, NULL); 1832bf215546Sopenharmony_ci FREE(rcs->prev); 1833bf215546Sopenharmony_ci amdgpu_destroy_cs_context(cs->ws, &cs->csc1); 1834bf215546Sopenharmony_ci amdgpu_destroy_cs_context(cs->ws, &cs->csc2); 1835bf215546Sopenharmony_ci amdgpu_fence_reference(&cs->next_fence, NULL); 1836bf215546Sopenharmony_ci FREE(cs); 1837bf215546Sopenharmony_ci} 1838bf215546Sopenharmony_ci 1839bf215546Sopenharmony_cistatic bool amdgpu_bo_is_referenced(struct radeon_cmdbuf *rcs, 1840bf215546Sopenharmony_ci struct pb_buffer *_buf, 1841bf215546Sopenharmony_ci unsigned usage) 1842bf215546Sopenharmony_ci{ 1843bf215546Sopenharmony_ci struct amdgpu_cs *cs = amdgpu_cs(rcs); 1844bf215546Sopenharmony_ci struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf; 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_ci return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage); 1847bf215546Sopenharmony_ci} 1848bf215546Sopenharmony_ci 1849bf215546Sopenharmony_civoid amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws) 1850bf215546Sopenharmony_ci{ 1851bf215546Sopenharmony_ci ws->base.ctx_create = amdgpu_ctx_create; 1852bf215546Sopenharmony_ci ws->base.ctx_destroy = amdgpu_ctx_destroy; 1853bf215546Sopenharmony_ci ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; 1854bf215546Sopenharmony_ci ws->base.cs_create = amdgpu_cs_create; 1855bf215546Sopenharmony_ci ws->base.cs_set_preamble = amdgpu_cs_set_preamble; 1856bf215546Sopenharmony_ci ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption; 1857bf215546Sopenharmony_ci ws->base.cs_destroy = amdgpu_cs_destroy; 1858bf215546Sopenharmony_ci ws->base.cs_add_buffer = amdgpu_cs_add_buffer; 1859bf215546Sopenharmony_ci ws->base.cs_validate = amdgpu_cs_validate; 1860bf215546Sopenharmony_ci ws->base.cs_check_space = amdgpu_cs_check_space; 1861bf215546Sopenharmony_ci ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list; 1862bf215546Sopenharmony_ci ws->base.cs_flush = amdgpu_cs_flush; 1863bf215546Sopenharmony_ci ws->base.cs_get_next_fence = amdgpu_cs_get_next_fence; 1864bf215546Sopenharmony_ci ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; 1865bf215546Sopenharmony_ci ws->base.cs_sync_flush = amdgpu_cs_sync_flush; 1866bf215546Sopenharmony_ci ws->base.cs_add_fence_dependency = amdgpu_cs_add_fence_dependency; 1867bf215546Sopenharmony_ci ws->base.cs_add_syncobj_signal = amdgpu_cs_add_syncobj_signal; 1868bf215546Sopenharmony_ci ws->base.fence_wait = amdgpu_fence_wait_rel_timeout; 1869bf215546Sopenharmony_ci ws->base.fence_reference = amdgpu_fence_reference; 1870bf215546Sopenharmony_ci ws->base.fence_import_syncobj = amdgpu_fence_import_syncobj; 1871bf215546Sopenharmony_ci ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file; 1872bf215546Sopenharmony_ci ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file; 1873bf215546Sopenharmony_ci ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file; 1874bf215546Sopenharmony_ci} 1875