162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright 2008 Jerome Glisse. 362306a36Sopenharmony_ci * All Rights Reserved. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 662306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 762306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 862306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 962306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 1062306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * The above copyright notice and this permission notice (including the next 1362306a36Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 1462306a36Sopenharmony_ci * Software. 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1762306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1862306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1962306a36Sopenharmony_ci * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 2062306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2162306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2262306a36Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * Authors: 2562306a36Sopenharmony_ci * Jerome Glisse <glisse@freedesktop.org> 2662306a36Sopenharmony_ci */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include <linux/file.h> 2962306a36Sopenharmony_ci#include <linux/pagemap.h> 3062306a36Sopenharmony_ci#include <linux/sync_file.h> 3162306a36Sopenharmony_ci#include <linux/dma-buf.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include <drm/amdgpu_drm.h> 3462306a36Sopenharmony_ci#include <drm/drm_syncobj.h> 3562306a36Sopenharmony_ci#include <drm/ttm/ttm_tt.h> 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#include "amdgpu_cs.h" 3862306a36Sopenharmony_ci#include "amdgpu.h" 3962306a36Sopenharmony_ci#include "amdgpu_trace.h" 4062306a36Sopenharmony_ci#include "amdgpu_gmc.h" 4162306a36Sopenharmony_ci#include "amdgpu_gem.h" 4262306a36Sopenharmony_ci#include "amdgpu_ras.h" 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_cistatic int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 4562306a36Sopenharmony_ci struct amdgpu_device *adev, 4662306a36Sopenharmony_ci struct drm_file *filp, 4762306a36Sopenharmony_ci union drm_amdgpu_cs *cs) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = filp->driver_priv; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci if (cs->in.num_chunks == 0) 5262306a36Sopenharmony_ci return -EINVAL; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci memset(p, 0, sizeof(*p)); 5562306a36Sopenharmony_ci p->adev = adev; 5662306a36Sopenharmony_ci p->filp = filp; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 5962306a36Sopenharmony_ci if (!p->ctx) 6062306a36Sopenharmony_ci return -EINVAL; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (atomic_read(&p->ctx->guilty)) { 6362306a36Sopenharmony_ci amdgpu_ctx_put(p->ctx); 6462306a36Sopenharmony_ci return -ECANCELED; 6562306a36Sopenharmony_ci } 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci amdgpu_sync_create(&p->sync); 6862306a36Sopenharmony_ci drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 6962306a36Sopenharmony_ci DRM_EXEC_IGNORE_DUPLICATES); 7062306a36Sopenharmony_ci return 0; 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_cistatic int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p, 7462306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_ib *chunk_ib) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci struct drm_sched_entity *entity; 7762306a36Sopenharmony_ci unsigned int i; 7862306a36Sopenharmony_ci int r; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, 8162306a36Sopenharmony_ci chunk_ib->ip_instance, 8262306a36Sopenharmony_ci chunk_ib->ring, &entity); 8362306a36Sopenharmony_ci if (r) 8462306a36Sopenharmony_ci return r; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci /* 8762306a36Sopenharmony_ci * Abort if there is no run queue associated with this entity. 8862306a36Sopenharmony_ci * Possibly because of disabled HW IP. 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_ci if (entity->rq == NULL) 9162306a36Sopenharmony_ci return -EINVAL; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci /* Check if we can add this IB to some existing job */ 9462306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) 9562306a36Sopenharmony_ci if (p->entities[i] == entity) 9662306a36Sopenharmony_ci return i; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci /* If not increase the gang size if possible */ 9962306a36Sopenharmony_ci if (i == AMDGPU_CS_GANG_SIZE) 10062306a36Sopenharmony_ci return -EINVAL; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci p->entities[i] = entity; 10362306a36Sopenharmony_ci p->gang_size = i + 1; 10462306a36Sopenharmony_ci return i; 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_cistatic int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, 10862306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_ib *chunk_ib, 10962306a36Sopenharmony_ci unsigned int *num_ibs) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci int r; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci r = amdgpu_cs_job_idx(p, chunk_ib); 11462306a36Sopenharmony_ci if (r < 0) 11562306a36Sopenharmony_ci return r; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type)) 11862306a36Sopenharmony_ci return -EINVAL; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci ++(num_ibs[r]); 12162306a36Sopenharmony_ci p->gang_leader_idx = r; 12262306a36Sopenharmony_ci return 0; 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, 12662306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_fence *data, 12762306a36Sopenharmony_ci uint32_t *offset) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci struct drm_gem_object *gobj; 13062306a36Sopenharmony_ci unsigned long size; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci gobj = drm_gem_object_lookup(p->filp, data->handle); 13362306a36Sopenharmony_ci if (gobj == NULL) 13462306a36Sopenharmony_ci return -EINVAL; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 13762306a36Sopenharmony_ci drm_gem_object_put(gobj); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci size = amdgpu_bo_size(p->uf_bo); 14062306a36Sopenharmony_ci if (size != PAGE_SIZE || data->offset > (size - 8)) 14162306a36Sopenharmony_ci return -EINVAL; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) 14462306a36Sopenharmony_ci return -EINVAL; 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci *offset = data->offset; 14762306a36Sopenharmony_ci return 0; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, 15162306a36Sopenharmony_ci struct drm_amdgpu_bo_list_in *data) 15262306a36Sopenharmony_ci{ 15362306a36Sopenharmony_ci struct drm_amdgpu_bo_list_entry *info; 15462306a36Sopenharmony_ci int r; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci r = amdgpu_bo_create_list_entry_array(data, &info); 15762306a36Sopenharmony_ci if (r) 15862306a36Sopenharmony_ci return r; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, 16162306a36Sopenharmony_ci &p->bo_list); 16262306a36Sopenharmony_ci if (r) 16362306a36Sopenharmony_ci goto error_free; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci kvfree(info); 16662306a36Sopenharmony_ci return 0; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_cierror_free: 16962306a36Sopenharmony_ci kvfree(info); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci return r; 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci/* Copy the data from userspace and go over it the first time */ 17562306a36Sopenharmony_cistatic int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, 17662306a36Sopenharmony_ci union drm_amdgpu_cs *cs) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 17962306a36Sopenharmony_ci unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; 18062306a36Sopenharmony_ci struct amdgpu_vm *vm = &fpriv->vm; 18162306a36Sopenharmony_ci uint64_t *chunk_array_user; 18262306a36Sopenharmony_ci uint64_t *chunk_array; 18362306a36Sopenharmony_ci uint32_t uf_offset = 0; 18462306a36Sopenharmony_ci size_t size; 18562306a36Sopenharmony_ci int ret; 18662306a36Sopenharmony_ci int i; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), 18962306a36Sopenharmony_ci GFP_KERNEL); 19062306a36Sopenharmony_ci if (!chunk_array) 19162306a36Sopenharmony_ci return -ENOMEM; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci /* get chunks */ 19462306a36Sopenharmony_ci chunk_array_user = u64_to_user_ptr(cs->in.chunks); 19562306a36Sopenharmony_ci if (copy_from_user(chunk_array, chunk_array_user, 19662306a36Sopenharmony_ci sizeof(uint64_t)*cs->in.num_chunks)) { 19762306a36Sopenharmony_ci ret = -EFAULT; 19862306a36Sopenharmony_ci goto free_chunk; 19962306a36Sopenharmony_ci } 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci p->nchunks = cs->in.num_chunks; 20262306a36Sopenharmony_ci p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 20362306a36Sopenharmony_ci GFP_KERNEL); 20462306a36Sopenharmony_ci if (!p->chunks) { 20562306a36Sopenharmony_ci ret = -ENOMEM; 20662306a36Sopenharmony_ci goto free_chunk; 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci for (i = 0; i < p->nchunks; i++) { 21062306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; 21162306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk user_chunk; 21262306a36Sopenharmony_ci uint32_t __user *cdata; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci chunk_ptr = u64_to_user_ptr(chunk_array[i]); 21562306a36Sopenharmony_ci if (copy_from_user(&user_chunk, chunk_ptr, 21662306a36Sopenharmony_ci sizeof(struct drm_amdgpu_cs_chunk))) { 21762306a36Sopenharmony_ci ret = -EFAULT; 21862306a36Sopenharmony_ci i--; 21962306a36Sopenharmony_ci goto free_partial_kdata; 22062306a36Sopenharmony_ci } 22162306a36Sopenharmony_ci p->chunks[i].chunk_id = user_chunk.chunk_id; 22262306a36Sopenharmony_ci p->chunks[i].length_dw = user_chunk.length_dw; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci size = p->chunks[i].length_dw; 22562306a36Sopenharmony_ci cdata = u64_to_user_ptr(user_chunk.chunk_data); 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), 22862306a36Sopenharmony_ci GFP_KERNEL); 22962306a36Sopenharmony_ci if (p->chunks[i].kdata == NULL) { 23062306a36Sopenharmony_ci ret = -ENOMEM; 23162306a36Sopenharmony_ci i--; 23262306a36Sopenharmony_ci goto free_partial_kdata; 23362306a36Sopenharmony_ci } 23462306a36Sopenharmony_ci size *= sizeof(uint32_t); 23562306a36Sopenharmony_ci if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 23662306a36Sopenharmony_ci ret = -EFAULT; 23762306a36Sopenharmony_ci goto free_partial_kdata; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci /* Assume the worst on the following checks */ 24162306a36Sopenharmony_ci ret = -EINVAL; 24262306a36Sopenharmony_ci switch (p->chunks[i].chunk_id) { 24362306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_IB: 24462306a36Sopenharmony_ci if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) 24562306a36Sopenharmony_ci goto free_partial_kdata; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); 24862306a36Sopenharmony_ci if (ret) 24962306a36Sopenharmony_ci goto free_partial_kdata; 25062306a36Sopenharmony_ci break; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_FENCE: 25362306a36Sopenharmony_ci if (size < sizeof(struct drm_amdgpu_cs_chunk_fence)) 25462306a36Sopenharmony_ci goto free_partial_kdata; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, 25762306a36Sopenharmony_ci &uf_offset); 25862306a36Sopenharmony_ci if (ret) 25962306a36Sopenharmony_ci goto free_partial_kdata; 26062306a36Sopenharmony_ci break; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_BO_HANDLES: 26362306a36Sopenharmony_ci if (size < sizeof(struct drm_amdgpu_bo_list_in)) 26462306a36Sopenharmony_ci goto free_partial_kdata; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); 26762306a36Sopenharmony_ci if (ret) 26862306a36Sopenharmony_ci goto free_partial_kdata; 26962306a36Sopenharmony_ci break; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_DEPENDENCIES: 27262306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 27362306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 27462306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 27562306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 27662306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 27762306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: 27862306a36Sopenharmony_ci break; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci default: 28162306a36Sopenharmony_ci goto free_partial_kdata; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci if (!p->gang_size) { 28662306a36Sopenharmony_ci ret = -EINVAL; 28762306a36Sopenharmony_ci goto free_all_kdata; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 29162306a36Sopenharmony_ci ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, 29262306a36Sopenharmony_ci num_ibs[i], &p->jobs[i]); 29362306a36Sopenharmony_ci if (ret) 29462306a36Sopenharmony_ci goto free_all_kdata; 29562306a36Sopenharmony_ci } 29662306a36Sopenharmony_ci p->gang_leader = p->jobs[p->gang_leader_idx]; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci if (p->ctx->generation != p->gang_leader->generation) { 29962306a36Sopenharmony_ci ret = -ECANCELED; 30062306a36Sopenharmony_ci goto free_all_kdata; 30162306a36Sopenharmony_ci } 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci if (p->uf_bo) 30462306a36Sopenharmony_ci p->gang_leader->uf_addr = uf_offset; 30562306a36Sopenharmony_ci kvfree(chunk_array); 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci /* Use this opportunity to fill in task info for the vm */ 30862306a36Sopenharmony_ci amdgpu_vm_set_task_info(vm); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci return 0; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_cifree_all_kdata: 31362306a36Sopenharmony_ci i = p->nchunks - 1; 31462306a36Sopenharmony_cifree_partial_kdata: 31562306a36Sopenharmony_ci for (; i >= 0; i--) 31662306a36Sopenharmony_ci kvfree(p->chunks[i].kdata); 31762306a36Sopenharmony_ci kvfree(p->chunks); 31862306a36Sopenharmony_ci p->chunks = NULL; 31962306a36Sopenharmony_ci p->nchunks = 0; 32062306a36Sopenharmony_cifree_chunk: 32162306a36Sopenharmony_ci kvfree(chunk_array); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci return ret; 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_cistatic int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, 32762306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk, 32862306a36Sopenharmony_ci unsigned int *ce_preempt, 32962306a36Sopenharmony_ci unsigned int *de_preempt) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; 33262306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 33362306a36Sopenharmony_ci struct amdgpu_vm *vm = &fpriv->vm; 33462306a36Sopenharmony_ci struct amdgpu_ring *ring; 33562306a36Sopenharmony_ci struct amdgpu_job *job; 33662306a36Sopenharmony_ci struct amdgpu_ib *ib; 33762306a36Sopenharmony_ci int r; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci r = amdgpu_cs_job_idx(p, chunk_ib); 34062306a36Sopenharmony_ci if (r < 0) 34162306a36Sopenharmony_ci return r; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci job = p->jobs[r]; 34462306a36Sopenharmony_ci ring = amdgpu_job_ring(job); 34562306a36Sopenharmony_ci ib = &job->ibs[job->num_ibs++]; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci /* MM engine doesn't support user fences */ 34862306a36Sopenharmony_ci if (p->uf_bo && ring->funcs->no_user_fence) 34962306a36Sopenharmony_ci return -EINVAL; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && 35262306a36Sopenharmony_ci chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 35362306a36Sopenharmony_ci if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) 35462306a36Sopenharmony_ci (*ce_preempt)++; 35562306a36Sopenharmony_ci else 35662306a36Sopenharmony_ci (*de_preempt)++; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci /* Each GFX command submit allows only 1 IB max 35962306a36Sopenharmony_ci * preemptible for CE & DE */ 36062306a36Sopenharmony_ci if (*ce_preempt > 1 || *de_preempt > 1) 36162306a36Sopenharmony_ci return -EINVAL; 36262306a36Sopenharmony_ci } 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) 36562306a36Sopenharmony_ci job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? 36862306a36Sopenharmony_ci chunk_ib->ib_bytes : 0, 36962306a36Sopenharmony_ci AMDGPU_IB_POOL_DELAYED, ib); 37062306a36Sopenharmony_ci if (r) { 37162306a36Sopenharmony_ci DRM_ERROR("Failed to get ib !\n"); 37262306a36Sopenharmony_ci return r; 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci ib->gpu_addr = chunk_ib->va_start; 37662306a36Sopenharmony_ci ib->length_dw = chunk_ib->ib_bytes / 4; 37762306a36Sopenharmony_ci ib->flags = chunk_ib->flags; 37862306a36Sopenharmony_ci return 0; 37962306a36Sopenharmony_ci} 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_cistatic int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, 38262306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk) 38362306a36Sopenharmony_ci{ 38462306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; 38562306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 38662306a36Sopenharmony_ci unsigned int num_deps; 38762306a36Sopenharmony_ci int i, r; 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci num_deps = chunk->length_dw * 4 / 39062306a36Sopenharmony_ci sizeof(struct drm_amdgpu_cs_chunk_dep); 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci for (i = 0; i < num_deps; ++i) { 39362306a36Sopenharmony_ci struct amdgpu_ctx *ctx; 39462306a36Sopenharmony_ci struct drm_sched_entity *entity; 39562306a36Sopenharmony_ci struct dma_fence *fence; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); 39862306a36Sopenharmony_ci if (ctx == NULL) 39962306a36Sopenharmony_ci return -EINVAL; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, 40262306a36Sopenharmony_ci deps[i].ip_instance, 40362306a36Sopenharmony_ci deps[i].ring, &entity); 40462306a36Sopenharmony_ci if (r) { 40562306a36Sopenharmony_ci amdgpu_ctx_put(ctx); 40662306a36Sopenharmony_ci return r; 40762306a36Sopenharmony_ci } 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); 41062306a36Sopenharmony_ci amdgpu_ctx_put(ctx); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci if (IS_ERR(fence)) 41362306a36Sopenharmony_ci return PTR_ERR(fence); 41462306a36Sopenharmony_ci else if (!fence) 41562306a36Sopenharmony_ci continue; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { 41862306a36Sopenharmony_ci struct drm_sched_fence *s_fence; 41962306a36Sopenharmony_ci struct dma_fence *old = fence; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci s_fence = to_drm_sched_fence(fence); 42262306a36Sopenharmony_ci fence = dma_fence_get(&s_fence->scheduled); 42362306a36Sopenharmony_ci dma_fence_put(old); 42462306a36Sopenharmony_ci } 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->sync, fence); 42762306a36Sopenharmony_ci dma_fence_put(fence); 42862306a36Sopenharmony_ci if (r) 42962306a36Sopenharmony_ci return r; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci return 0; 43262306a36Sopenharmony_ci} 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_cistatic int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, 43562306a36Sopenharmony_ci uint32_t handle, u64 point, 43662306a36Sopenharmony_ci u64 flags) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci struct dma_fence *fence; 43962306a36Sopenharmony_ci int r; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); 44262306a36Sopenharmony_ci if (r) { 44362306a36Sopenharmony_ci DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", 44462306a36Sopenharmony_ci handle, point, r); 44562306a36Sopenharmony_ci return r; 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->sync, fence); 44962306a36Sopenharmony_ci dma_fence_put(fence); 45062306a36Sopenharmony_ci return r; 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_cistatic int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, 45462306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; 45762306a36Sopenharmony_ci unsigned int num_deps; 45862306a36Sopenharmony_ci int i, r; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci num_deps = chunk->length_dw * 4 / 46162306a36Sopenharmony_ci sizeof(struct drm_amdgpu_cs_chunk_sem); 46262306a36Sopenharmony_ci for (i = 0; i < num_deps; ++i) { 46362306a36Sopenharmony_ci r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0); 46462306a36Sopenharmony_ci if (r) 46562306a36Sopenharmony_ci return r; 46662306a36Sopenharmony_ci } 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci return 0; 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_cistatic int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, 47262306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; 47562306a36Sopenharmony_ci unsigned int num_deps; 47662306a36Sopenharmony_ci int i, r; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci num_deps = chunk->length_dw * 4 / 47962306a36Sopenharmony_ci sizeof(struct drm_amdgpu_cs_chunk_syncobj); 48062306a36Sopenharmony_ci for (i = 0; i < num_deps; ++i) { 48162306a36Sopenharmony_ci r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle, 48262306a36Sopenharmony_ci syncobj_deps[i].point, 48362306a36Sopenharmony_ci syncobj_deps[i].flags); 48462306a36Sopenharmony_ci if (r) 48562306a36Sopenharmony_ci return r; 48662306a36Sopenharmony_ci } 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci return 0; 48962306a36Sopenharmony_ci} 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_cistatic int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, 49262306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk) 49362306a36Sopenharmony_ci{ 49462306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; 49562306a36Sopenharmony_ci unsigned int num_deps; 49662306a36Sopenharmony_ci int i; 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci num_deps = chunk->length_dw * 4 / 49962306a36Sopenharmony_ci sizeof(struct drm_amdgpu_cs_chunk_sem); 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci if (p->post_deps) 50262306a36Sopenharmony_ci return -EINVAL; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 50562306a36Sopenharmony_ci GFP_KERNEL); 50662306a36Sopenharmony_ci p->num_post_deps = 0; 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci if (!p->post_deps) 50962306a36Sopenharmony_ci return -ENOMEM; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci for (i = 0; i < num_deps; ++i) { 51362306a36Sopenharmony_ci p->post_deps[i].syncobj = 51462306a36Sopenharmony_ci drm_syncobj_find(p->filp, deps[i].handle); 51562306a36Sopenharmony_ci if (!p->post_deps[i].syncobj) 51662306a36Sopenharmony_ci return -EINVAL; 51762306a36Sopenharmony_ci p->post_deps[i].chain = NULL; 51862306a36Sopenharmony_ci p->post_deps[i].point = 0; 51962306a36Sopenharmony_ci p->num_post_deps++; 52062306a36Sopenharmony_ci } 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci return 0; 52362306a36Sopenharmony_ci} 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_cistatic int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, 52662306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk) 52762306a36Sopenharmony_ci{ 52862306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; 52962306a36Sopenharmony_ci unsigned int num_deps; 53062306a36Sopenharmony_ci int i; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci num_deps = chunk->length_dw * 4 / 53362306a36Sopenharmony_ci sizeof(struct drm_amdgpu_cs_chunk_syncobj); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci if (p->post_deps) 53662306a36Sopenharmony_ci return -EINVAL; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), 53962306a36Sopenharmony_ci GFP_KERNEL); 54062306a36Sopenharmony_ci p->num_post_deps = 0; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci if (!p->post_deps) 54362306a36Sopenharmony_ci return -ENOMEM; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci for (i = 0; i < num_deps; ++i) { 54662306a36Sopenharmony_ci struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci dep->chain = NULL; 54962306a36Sopenharmony_ci if (syncobj_deps[i].point) { 55062306a36Sopenharmony_ci dep->chain = dma_fence_chain_alloc(); 55162306a36Sopenharmony_ci if (!dep->chain) 55262306a36Sopenharmony_ci return -ENOMEM; 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci dep->syncobj = drm_syncobj_find(p->filp, 55662306a36Sopenharmony_ci syncobj_deps[i].handle); 55762306a36Sopenharmony_ci if (!dep->syncobj) { 55862306a36Sopenharmony_ci dma_fence_chain_free(dep->chain); 55962306a36Sopenharmony_ci return -EINVAL; 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci dep->point = syncobj_deps[i].point; 56262306a36Sopenharmony_ci p->num_post_deps++; 56362306a36Sopenharmony_ci } 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci return 0; 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cistatic int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, 56962306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; 57262306a36Sopenharmony_ci int i; 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) 57562306a36Sopenharmony_ci return -EINVAL; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 57862306a36Sopenharmony_ci p->jobs[i]->shadow_va = shadow->shadow_va; 57962306a36Sopenharmony_ci p->jobs[i]->csa_va = shadow->csa_va; 58062306a36Sopenharmony_ci p->jobs[i]->gds_va = shadow->gds_va; 58162306a36Sopenharmony_ci p->jobs[i]->init_shadow = 58262306a36Sopenharmony_ci shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci return 0; 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) 58962306a36Sopenharmony_ci{ 59062306a36Sopenharmony_ci unsigned int ce_preempt = 0, de_preempt = 0; 59162306a36Sopenharmony_ci int i, r; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci for (i = 0; i < p->nchunks; ++i) { 59462306a36Sopenharmony_ci struct amdgpu_cs_chunk *chunk; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci chunk = &p->chunks[i]; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci switch (chunk->chunk_id) { 59962306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_IB: 60062306a36Sopenharmony_ci r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt); 60162306a36Sopenharmony_ci if (r) 60262306a36Sopenharmony_ci return r; 60362306a36Sopenharmony_ci break; 60462306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_DEPENDENCIES: 60562306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: 60662306a36Sopenharmony_ci r = amdgpu_cs_p2_dependencies(p, chunk); 60762306a36Sopenharmony_ci if (r) 60862306a36Sopenharmony_ci return r; 60962306a36Sopenharmony_ci break; 61062306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 61162306a36Sopenharmony_ci r = amdgpu_cs_p2_syncobj_in(p, chunk); 61262306a36Sopenharmony_ci if (r) 61362306a36Sopenharmony_ci return r; 61462306a36Sopenharmony_ci break; 61562306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 61662306a36Sopenharmony_ci r = amdgpu_cs_p2_syncobj_out(p, chunk); 61762306a36Sopenharmony_ci if (r) 61862306a36Sopenharmony_ci return r; 61962306a36Sopenharmony_ci break; 62062306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: 62162306a36Sopenharmony_ci r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk); 62262306a36Sopenharmony_ci if (r) 62362306a36Sopenharmony_ci return r; 62462306a36Sopenharmony_ci break; 62562306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: 62662306a36Sopenharmony_ci r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk); 62762306a36Sopenharmony_ci if (r) 62862306a36Sopenharmony_ci return r; 62962306a36Sopenharmony_ci break; 63062306a36Sopenharmony_ci case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: 63162306a36Sopenharmony_ci r = amdgpu_cs_p2_shadow(p, chunk); 63262306a36Sopenharmony_ci if (r) 63362306a36Sopenharmony_ci return r; 63462306a36Sopenharmony_ci break; 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci } 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci return 0; 63962306a36Sopenharmony_ci} 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci/* Convert microseconds to bytes. */ 64262306a36Sopenharmony_cistatic u64 us_to_bytes(struct amdgpu_device *adev, s64 us) 64362306a36Sopenharmony_ci{ 64462306a36Sopenharmony_ci if (us <= 0 || !adev->mm_stats.log2_max_MBps) 64562306a36Sopenharmony_ci return 0; 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci /* Since accum_us is incremented by a million per second, just 64862306a36Sopenharmony_ci * multiply it by the number of MB/s to get the number of bytes. 64962306a36Sopenharmony_ci */ 65062306a36Sopenharmony_ci return us << adev->mm_stats.log2_max_MBps; 65162306a36Sopenharmony_ci} 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_cistatic s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci if (!adev->mm_stats.log2_max_MBps) 65662306a36Sopenharmony_ci return 0; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci return bytes >> adev->mm_stats.log2_max_MBps; 65962306a36Sopenharmony_ci} 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci/* Returns how many bytes TTM can move right now. If no bytes can be moved, 66262306a36Sopenharmony_ci * it returns 0. If it returns non-zero, it's OK to move at least one buffer, 66362306a36Sopenharmony_ci * which means it can go over the threshold once. If that happens, the driver 66462306a36Sopenharmony_ci * will be in debt and no other buffer migrations can be done until that debt 66562306a36Sopenharmony_ci * is repaid. 66662306a36Sopenharmony_ci * 66762306a36Sopenharmony_ci * This approach allows moving a buffer of any size (it's important to allow 66862306a36Sopenharmony_ci * that). 66962306a36Sopenharmony_ci * 67062306a36Sopenharmony_ci * The currency is simply time in microseconds and it increases as the clock 67162306a36Sopenharmony_ci * ticks. The accumulated microseconds (us) are converted to bytes and 67262306a36Sopenharmony_ci * returned. 67362306a36Sopenharmony_ci */ 67462306a36Sopenharmony_cistatic void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, 67562306a36Sopenharmony_ci u64 *max_bytes, 67662306a36Sopenharmony_ci u64 *max_vis_bytes) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci s64 time_us, increment_us; 67962306a36Sopenharmony_ci u64 free_vram, total_vram, used_vram; 68062306a36Sopenharmony_ci /* Allow a maximum of 200 accumulated ms. This is basically per-IB 68162306a36Sopenharmony_ci * throttling. 68262306a36Sopenharmony_ci * 68362306a36Sopenharmony_ci * It means that in order to get full max MBps, at least 5 IBs per 68462306a36Sopenharmony_ci * second must be submitted and not more than 200ms apart from each 68562306a36Sopenharmony_ci * other. 68662306a36Sopenharmony_ci */ 68762306a36Sopenharmony_ci const s64 us_upper_bound = 200000; 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci if (!adev->mm_stats.log2_max_MBps) { 69062306a36Sopenharmony_ci *max_bytes = 0; 69162306a36Sopenharmony_ci *max_vis_bytes = 0; 69262306a36Sopenharmony_ci return; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); 69662306a36Sopenharmony_ci used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); 69762306a36Sopenharmony_ci free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci spin_lock(&adev->mm_stats.lock); 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci /* Increase the amount of accumulated us. */ 70262306a36Sopenharmony_ci time_us = ktime_to_us(ktime_get()); 70362306a36Sopenharmony_ci increment_us = time_us - adev->mm_stats.last_update_us; 70462306a36Sopenharmony_ci adev->mm_stats.last_update_us = time_us; 70562306a36Sopenharmony_ci adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, 70662306a36Sopenharmony_ci us_upper_bound); 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci /* This prevents the short period of low performance when the VRAM 70962306a36Sopenharmony_ci * usage is low and the driver is in debt or doesn't have enough 71062306a36Sopenharmony_ci * accumulated us to fill VRAM quickly. 71162306a36Sopenharmony_ci * 71262306a36Sopenharmony_ci * The situation can occur in these cases: 71362306a36Sopenharmony_ci * - a lot of VRAM is freed by userspace 71462306a36Sopenharmony_ci * - the presence of a big buffer causes a lot of evictions 71562306a36Sopenharmony_ci * (solution: split buffers into smaller ones) 71662306a36Sopenharmony_ci * 71762306a36Sopenharmony_ci * If 128 MB or 1/8th of VRAM is free, start filling it now by setting 71862306a36Sopenharmony_ci * accum_us to a positive number. 71962306a36Sopenharmony_ci */ 72062306a36Sopenharmony_ci if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { 72162306a36Sopenharmony_ci s64 min_us; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci /* Be more aggressive on dGPUs. Try to fill a portion of free 72462306a36Sopenharmony_ci * VRAM now. 72562306a36Sopenharmony_ci */ 72662306a36Sopenharmony_ci if (!(adev->flags & AMD_IS_APU)) 72762306a36Sopenharmony_ci min_us = bytes_to_us(adev, free_vram / 4); 72862306a36Sopenharmony_ci else 72962306a36Sopenharmony_ci min_us = 0; /* Reset accum_us on APUs. */ 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); 73262306a36Sopenharmony_ci } 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci /* This is set to 0 if the driver is in debt to disallow (optional) 73562306a36Sopenharmony_ci * buffer moves. 73662306a36Sopenharmony_ci */ 73762306a36Sopenharmony_ci *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci /* Do the same for visible VRAM if half of it is free */ 74062306a36Sopenharmony_ci if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { 74162306a36Sopenharmony_ci u64 total_vis_vram = adev->gmc.visible_vram_size; 74262306a36Sopenharmony_ci u64 used_vis_vram = 74362306a36Sopenharmony_ci amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci if (used_vis_vram < total_vis_vram) { 74662306a36Sopenharmony_ci u64 free_vis_vram = total_vis_vram - used_vis_vram; 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + 74962306a36Sopenharmony_ci increment_us, us_upper_bound); 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci if (free_vis_vram >= total_vis_vram / 2) 75262306a36Sopenharmony_ci adev->mm_stats.accum_us_vis = 75362306a36Sopenharmony_ci max(bytes_to_us(adev, free_vis_vram / 2), 75462306a36Sopenharmony_ci adev->mm_stats.accum_us_vis); 75562306a36Sopenharmony_ci } 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); 75862306a36Sopenharmony_ci } else { 75962306a36Sopenharmony_ci *max_vis_bytes = 0; 76062306a36Sopenharmony_ci } 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci spin_unlock(&adev->mm_stats.lock); 76362306a36Sopenharmony_ci} 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci/* Report how many bytes have really been moved for the last command 76662306a36Sopenharmony_ci * submission. This can result in a debt that can stop buffer migrations 76762306a36Sopenharmony_ci * temporarily. 76862306a36Sopenharmony_ci */ 76962306a36Sopenharmony_civoid amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, 77062306a36Sopenharmony_ci u64 num_vis_bytes) 77162306a36Sopenharmony_ci{ 77262306a36Sopenharmony_ci spin_lock(&adev->mm_stats.lock); 77362306a36Sopenharmony_ci adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); 77462306a36Sopenharmony_ci adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); 77562306a36Sopenharmony_ci spin_unlock(&adev->mm_stats.lock); 77662306a36Sopenharmony_ci} 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_cistatic int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) 77962306a36Sopenharmony_ci{ 78062306a36Sopenharmony_ci struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 78162306a36Sopenharmony_ci struct amdgpu_cs_parser *p = param; 78262306a36Sopenharmony_ci struct ttm_operation_ctx ctx = { 78362306a36Sopenharmony_ci .interruptible = true, 78462306a36Sopenharmony_ci .no_wait_gpu = false, 78562306a36Sopenharmony_ci .resv = bo->tbo.base.resv 78662306a36Sopenharmony_ci }; 78762306a36Sopenharmony_ci uint32_t domain; 78862306a36Sopenharmony_ci int r; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci if (bo->tbo.pin_count) 79162306a36Sopenharmony_ci return 0; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci /* Don't move this buffer if we have depleted our allowance 79462306a36Sopenharmony_ci * to move it. Don't move anything if the threshold is zero. 79562306a36Sopenharmony_ci */ 79662306a36Sopenharmony_ci if (p->bytes_moved < p->bytes_moved_threshold && 79762306a36Sopenharmony_ci (!bo->tbo.base.dma_buf || 79862306a36Sopenharmony_ci list_empty(&bo->tbo.base.dma_buf->attachments))) { 79962306a36Sopenharmony_ci if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 80062306a36Sopenharmony_ci (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 80162306a36Sopenharmony_ci /* And don't move a CPU_ACCESS_REQUIRED BO to limited 80262306a36Sopenharmony_ci * visible VRAM if we've depleted our allowance to do 80362306a36Sopenharmony_ci * that. 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_ci if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) 80662306a36Sopenharmony_ci domain = bo->preferred_domains; 80762306a36Sopenharmony_ci else 80862306a36Sopenharmony_ci domain = bo->allowed_domains; 80962306a36Sopenharmony_ci } else { 81062306a36Sopenharmony_ci domain = bo->preferred_domains; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci } else { 81362306a36Sopenharmony_ci domain = bo->allowed_domains; 81462306a36Sopenharmony_ci } 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ciretry: 81762306a36Sopenharmony_ci amdgpu_bo_placement_from_domain(bo, domain); 81862306a36Sopenharmony_ci r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci p->bytes_moved += ctx.bytes_moved; 82162306a36Sopenharmony_ci if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 82262306a36Sopenharmony_ci amdgpu_bo_in_cpu_visible_vram(bo)) 82362306a36Sopenharmony_ci p->bytes_moved_vis += ctx.bytes_moved; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 82662306a36Sopenharmony_ci domain = bo->allowed_domains; 82762306a36Sopenharmony_ci goto retry; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci return r; 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_cistatic int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, 83462306a36Sopenharmony_ci union drm_amdgpu_cs *cs) 83562306a36Sopenharmony_ci{ 83662306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 83762306a36Sopenharmony_ci struct ttm_operation_ctx ctx = { true, false }; 83862306a36Sopenharmony_ci struct amdgpu_vm *vm = &fpriv->vm; 83962306a36Sopenharmony_ci struct amdgpu_bo_list_entry *e; 84062306a36Sopenharmony_ci struct drm_gem_object *obj; 84162306a36Sopenharmony_ci unsigned long index; 84262306a36Sopenharmony_ci unsigned int i; 84362306a36Sopenharmony_ci int r; 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ 84662306a36Sopenharmony_ci if (cs->in.bo_list_handle) { 84762306a36Sopenharmony_ci if (p->bo_list) 84862306a36Sopenharmony_ci return -EINVAL; 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, 85162306a36Sopenharmony_ci &p->bo_list); 85262306a36Sopenharmony_ci if (r) 85362306a36Sopenharmony_ci return r; 85462306a36Sopenharmony_ci } else if (!p->bo_list) { 85562306a36Sopenharmony_ci /* Create a empty bo_list when no handle is provided */ 85662306a36Sopenharmony_ci r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, 85762306a36Sopenharmony_ci &p->bo_list); 85862306a36Sopenharmony_ci if (r) 85962306a36Sopenharmony_ci return r; 86062306a36Sopenharmony_ci } 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci mutex_lock(&p->bo_list->bo_list_mutex); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci /* Get userptr backing pages. If pages are updated after registered 86562306a36Sopenharmony_ci * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do 86662306a36Sopenharmony_ci * amdgpu_ttm_backend_bind() to flush and invalidate new pages 86762306a36Sopenharmony_ci */ 86862306a36Sopenharmony_ci amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 86962306a36Sopenharmony_ci bool userpage_invalidated = false; 87062306a36Sopenharmony_ci struct amdgpu_bo *bo = e->bo; 87162306a36Sopenharmony_ci int i; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, 87462306a36Sopenharmony_ci sizeof(struct page *), 87562306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 87662306a36Sopenharmony_ci if (!e->user_pages) { 87762306a36Sopenharmony_ci DRM_ERROR("kvmalloc_array failure\n"); 87862306a36Sopenharmony_ci r = -ENOMEM; 87962306a36Sopenharmony_ci goto out_free_user_pages; 88062306a36Sopenharmony_ci } 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); 88362306a36Sopenharmony_ci if (r) { 88462306a36Sopenharmony_ci kvfree(e->user_pages); 88562306a36Sopenharmony_ci e->user_pages = NULL; 88662306a36Sopenharmony_ci goto out_free_user_pages; 88762306a36Sopenharmony_ci } 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci for (i = 0; i < bo->tbo.ttm->num_pages; i++) { 89062306a36Sopenharmony_ci if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { 89162306a36Sopenharmony_ci userpage_invalidated = true; 89262306a36Sopenharmony_ci break; 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci } 89562306a36Sopenharmony_ci e->user_invalidated = userpage_invalidated; 89662306a36Sopenharmony_ci } 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci drm_exec_until_all_locked(&p->exec) { 89962306a36Sopenharmony_ci r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); 90062306a36Sopenharmony_ci drm_exec_retry_on_contention(&p->exec); 90162306a36Sopenharmony_ci if (unlikely(r)) 90262306a36Sopenharmony_ci goto out_free_user_pages; 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci amdgpu_bo_list_for_each_entry(e, p->bo_list) { 90562306a36Sopenharmony_ci /* One fence for TTM and one for each CS job */ 90662306a36Sopenharmony_ci r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, 90762306a36Sopenharmony_ci 1 + p->gang_size); 90862306a36Sopenharmony_ci drm_exec_retry_on_contention(&p->exec); 90962306a36Sopenharmony_ci if (unlikely(r)) 91062306a36Sopenharmony_ci goto out_free_user_pages; 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci e->bo_va = amdgpu_vm_bo_find(vm, e->bo); 91362306a36Sopenharmony_ci } 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci if (p->uf_bo) { 91662306a36Sopenharmony_ci r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, 91762306a36Sopenharmony_ci 1 + p->gang_size); 91862306a36Sopenharmony_ci drm_exec_retry_on_contention(&p->exec); 91962306a36Sopenharmony_ci if (unlikely(r)) 92062306a36Sopenharmony_ci goto out_free_user_pages; 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci } 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 92562306a36Sopenharmony_ci struct mm_struct *usermm; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm); 92862306a36Sopenharmony_ci if (usermm && usermm != current->mm) { 92962306a36Sopenharmony_ci r = -EPERM; 93062306a36Sopenharmony_ci goto out_free_user_pages; 93162306a36Sopenharmony_ci } 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && 93462306a36Sopenharmony_ci e->user_invalidated && e->user_pages) { 93562306a36Sopenharmony_ci amdgpu_bo_placement_from_domain(e->bo, 93662306a36Sopenharmony_ci AMDGPU_GEM_DOMAIN_CPU); 93762306a36Sopenharmony_ci r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, 93862306a36Sopenharmony_ci &ctx); 93962306a36Sopenharmony_ci if (r) 94062306a36Sopenharmony_ci goto out_free_user_pages; 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, 94362306a36Sopenharmony_ci e->user_pages); 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci kvfree(e->user_pages); 94762306a36Sopenharmony_ci e->user_pages = NULL; 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, 95162306a36Sopenharmony_ci &p->bytes_moved_vis_threshold); 95262306a36Sopenharmony_ci p->bytes_moved = 0; 95362306a36Sopenharmony_ci p->bytes_moved_vis = 0; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, 95662306a36Sopenharmony_ci amdgpu_cs_bo_validate, p); 95762306a36Sopenharmony_ci if (r) { 95862306a36Sopenharmony_ci DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); 95962306a36Sopenharmony_ci goto out_free_user_pages; 96062306a36Sopenharmony_ci } 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci drm_exec_for_each_locked_object(&p->exec, index, obj) { 96362306a36Sopenharmony_ci r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj)); 96462306a36Sopenharmony_ci if (unlikely(r)) 96562306a36Sopenharmony_ci goto out_free_user_pages; 96662306a36Sopenharmony_ci } 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci if (p->uf_bo) { 96962306a36Sopenharmony_ci r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo); 97062306a36Sopenharmony_ci if (unlikely(r)) 97162306a36Sopenharmony_ci goto out_free_user_pages; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, 97762306a36Sopenharmony_ci p->bytes_moved_vis); 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) 98062306a36Sopenharmony_ci amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, 98162306a36Sopenharmony_ci p->bo_list->gws_obj, 98262306a36Sopenharmony_ci p->bo_list->oa_obj); 98362306a36Sopenharmony_ci return 0; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ciout_free_user_pages: 98662306a36Sopenharmony_ci amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 98762306a36Sopenharmony_ci struct amdgpu_bo *bo = e->bo; 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci if (!e->user_pages) 99062306a36Sopenharmony_ci continue; 99162306a36Sopenharmony_ci amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); 99262306a36Sopenharmony_ci kvfree(e->user_pages); 99362306a36Sopenharmony_ci e->user_pages = NULL; 99462306a36Sopenharmony_ci e->range = NULL; 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci mutex_unlock(&p->bo_list->bo_list_mutex); 99762306a36Sopenharmony_ci return r; 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_cistatic void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p) 100162306a36Sopenharmony_ci{ 100262306a36Sopenharmony_ci int i, j; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci if (!trace_amdgpu_cs_enabled()) 100562306a36Sopenharmony_ci return; 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 100862306a36Sopenharmony_ci struct amdgpu_job *job = p->jobs[i]; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci for (j = 0; j < job->num_ibs; ++j) 101162306a36Sopenharmony_ci trace_amdgpu_cs(p, job, &job->ibs[j]); 101262306a36Sopenharmony_ci } 101362306a36Sopenharmony_ci} 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_cistatic int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, 101662306a36Sopenharmony_ci struct amdgpu_job *job) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci struct amdgpu_ring *ring = amdgpu_job_ring(job); 101962306a36Sopenharmony_ci unsigned int i; 102062306a36Sopenharmony_ci int r; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci /* Only for UVD/VCE VM emulation */ 102362306a36Sopenharmony_ci if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place) 102462306a36Sopenharmony_ci return 0; 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci for (i = 0; i < job->num_ibs; ++i) { 102762306a36Sopenharmony_ci struct amdgpu_ib *ib = &job->ibs[i]; 102862306a36Sopenharmony_ci struct amdgpu_bo_va_mapping *m; 102962306a36Sopenharmony_ci struct amdgpu_bo *aobj; 103062306a36Sopenharmony_ci uint64_t va_start; 103162306a36Sopenharmony_ci uint8_t *kptr; 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; 103462306a36Sopenharmony_ci r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); 103562306a36Sopenharmony_ci if (r) { 103662306a36Sopenharmony_ci DRM_ERROR("IB va_start is invalid\n"); 103762306a36Sopenharmony_ci return r; 103862306a36Sopenharmony_ci } 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci if ((va_start + ib->length_dw * 4) > 104162306a36Sopenharmony_ci (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { 104262306a36Sopenharmony_ci DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 104362306a36Sopenharmony_ci return -EINVAL; 104462306a36Sopenharmony_ci } 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci /* the IB should be reserved at this point */ 104762306a36Sopenharmony_ci r = amdgpu_bo_kmap(aobj, (void **)&kptr); 104862306a36Sopenharmony_ci if (r) 104962306a36Sopenharmony_ci return r; 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci if (ring->funcs->parse_cs) { 105462306a36Sopenharmony_ci memcpy(ib->ptr, kptr, ib->length_dw * 4); 105562306a36Sopenharmony_ci amdgpu_bo_kunmap(aobj); 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci r = amdgpu_ring_parse_cs(ring, p, job, ib); 105862306a36Sopenharmony_ci if (r) 105962306a36Sopenharmony_ci return r; 106062306a36Sopenharmony_ci } else { 106162306a36Sopenharmony_ci ib->ptr = (uint32_t *)kptr; 106262306a36Sopenharmony_ci r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); 106362306a36Sopenharmony_ci amdgpu_bo_kunmap(aobj); 106462306a36Sopenharmony_ci if (r) 106562306a36Sopenharmony_ci return r; 106662306a36Sopenharmony_ci } 106762306a36Sopenharmony_ci } 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci return 0; 107062306a36Sopenharmony_ci} 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_cistatic int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p) 107362306a36Sopenharmony_ci{ 107462306a36Sopenharmony_ci unsigned int i; 107562306a36Sopenharmony_ci int r; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 107862306a36Sopenharmony_ci r = amdgpu_cs_patch_ibs(p, p->jobs[i]); 107962306a36Sopenharmony_ci if (r) 108062306a36Sopenharmony_ci return r; 108162306a36Sopenharmony_ci } 108262306a36Sopenharmony_ci return 0; 108362306a36Sopenharmony_ci} 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_cistatic int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) 108662306a36Sopenharmony_ci{ 108762306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 108862306a36Sopenharmony_ci struct amdgpu_job *job = p->gang_leader; 108962306a36Sopenharmony_ci struct amdgpu_device *adev = p->adev; 109062306a36Sopenharmony_ci struct amdgpu_vm *vm = &fpriv->vm; 109162306a36Sopenharmony_ci struct amdgpu_bo_list_entry *e; 109262306a36Sopenharmony_ci struct amdgpu_bo_va *bo_va; 109362306a36Sopenharmony_ci unsigned int i; 109462306a36Sopenharmony_ci int r; 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci r = amdgpu_vm_clear_freed(adev, vm, NULL); 109762306a36Sopenharmony_ci if (r) 109862306a36Sopenharmony_ci return r; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); 110162306a36Sopenharmony_ci if (r) 110262306a36Sopenharmony_ci return r; 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); 110562306a36Sopenharmony_ci if (r) 110662306a36Sopenharmony_ci return r; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci if (fpriv->csa_va) { 110962306a36Sopenharmony_ci bo_va = fpriv->csa_va; 111062306a36Sopenharmony_ci BUG_ON(!bo_va); 111162306a36Sopenharmony_ci r = amdgpu_vm_bo_update(adev, bo_va, false); 111262306a36Sopenharmony_ci if (r) 111362306a36Sopenharmony_ci return r; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); 111662306a36Sopenharmony_ci if (r) 111762306a36Sopenharmony_ci return r; 111862306a36Sopenharmony_ci } 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci amdgpu_bo_list_for_each_entry(e, p->bo_list) { 112162306a36Sopenharmony_ci bo_va = e->bo_va; 112262306a36Sopenharmony_ci if (bo_va == NULL) 112362306a36Sopenharmony_ci continue; 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci r = amdgpu_vm_bo_update(adev, bo_va, false); 112662306a36Sopenharmony_ci if (r) 112762306a36Sopenharmony_ci return r; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); 113062306a36Sopenharmony_ci if (r) 113162306a36Sopenharmony_ci return r; 113262306a36Sopenharmony_ci } 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci r = amdgpu_vm_handle_moved(adev, vm); 113562306a36Sopenharmony_ci if (r) 113662306a36Sopenharmony_ci return r; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci r = amdgpu_vm_update_pdes(adev, vm, false); 113962306a36Sopenharmony_ci if (r) 114062306a36Sopenharmony_ci return r; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->sync, vm->last_update); 114362306a36Sopenharmony_ci if (r) 114462306a36Sopenharmony_ci return r; 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 114762306a36Sopenharmony_ci job = p->jobs[i]; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci if (!job->vm) 115062306a36Sopenharmony_ci continue; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); 115362306a36Sopenharmony_ci } 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci if (amdgpu_vm_debug) { 115662306a36Sopenharmony_ci /* Invalidate all BOs to test for userspace bugs */ 115762306a36Sopenharmony_ci amdgpu_bo_list_for_each_entry(e, p->bo_list) { 115862306a36Sopenharmony_ci struct amdgpu_bo *bo = e->bo; 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci /* ignore duplicates */ 116162306a36Sopenharmony_ci if (!bo) 116262306a36Sopenharmony_ci continue; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci amdgpu_vm_bo_invalidate(adev, bo, false); 116562306a36Sopenharmony_ci } 116662306a36Sopenharmony_ci } 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci return 0; 116962306a36Sopenharmony_ci} 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_cistatic int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 117262306a36Sopenharmony_ci{ 117362306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 117462306a36Sopenharmony_ci struct drm_gpu_scheduler *sched; 117562306a36Sopenharmony_ci struct drm_gem_object *obj; 117662306a36Sopenharmony_ci struct dma_fence *fence; 117762306a36Sopenharmony_ci unsigned long index; 117862306a36Sopenharmony_ci unsigned int i; 117962306a36Sopenharmony_ci int r; 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); 118262306a36Sopenharmony_ci if (r) { 118362306a36Sopenharmony_ci if (r != -ERESTARTSYS) 118462306a36Sopenharmony_ci DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); 118562306a36Sopenharmony_ci return r; 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci drm_exec_for_each_locked_object(&p->exec, index, obj) { 118962306a36Sopenharmony_ci struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci struct dma_resv *resv = bo->tbo.base.resv; 119262306a36Sopenharmony_ci enum amdgpu_sync_mode sync_mode; 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci sync_mode = amdgpu_bo_explicit_sync(bo) ? 119562306a36Sopenharmony_ci AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; 119662306a36Sopenharmony_ci r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, 119762306a36Sopenharmony_ci &fpriv->vm); 119862306a36Sopenharmony_ci if (r) 119962306a36Sopenharmony_ci return r; 120062306a36Sopenharmony_ci } 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 120362306a36Sopenharmony_ci r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); 120462306a36Sopenharmony_ci if (r) 120562306a36Sopenharmony_ci return r; 120662306a36Sopenharmony_ci } 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci sched = p->gang_leader->base.entity->rq->sched; 120962306a36Sopenharmony_ci while ((fence = amdgpu_sync_get_fence(&p->sync))) { 121062306a36Sopenharmony_ci struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci /* 121362306a36Sopenharmony_ci * When we have an dependency it might be necessary to insert a 121462306a36Sopenharmony_ci * pipeline sync to make sure that all caches etc are flushed and the 121562306a36Sopenharmony_ci * next job actually sees the results from the previous one 121662306a36Sopenharmony_ci * before we start executing on the same scheduler ring. 121762306a36Sopenharmony_ci */ 121862306a36Sopenharmony_ci if (!s_fence || s_fence->sched != sched) { 121962306a36Sopenharmony_ci dma_fence_put(fence); 122062306a36Sopenharmony_ci continue; 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); 122462306a36Sopenharmony_ci dma_fence_put(fence); 122562306a36Sopenharmony_ci if (r) 122662306a36Sopenharmony_ci return r; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci return 0; 122962306a36Sopenharmony_ci} 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_cistatic void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) 123262306a36Sopenharmony_ci{ 123362306a36Sopenharmony_ci int i; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci for (i = 0; i < p->num_post_deps; ++i) { 123662306a36Sopenharmony_ci if (p->post_deps[i].chain && p->post_deps[i].point) { 123762306a36Sopenharmony_ci drm_syncobj_add_point(p->post_deps[i].syncobj, 123862306a36Sopenharmony_ci p->post_deps[i].chain, 123962306a36Sopenharmony_ci p->fence, p->post_deps[i].point); 124062306a36Sopenharmony_ci p->post_deps[i].chain = NULL; 124162306a36Sopenharmony_ci } else { 124262306a36Sopenharmony_ci drm_syncobj_replace_fence(p->post_deps[i].syncobj, 124362306a36Sopenharmony_ci p->fence); 124462306a36Sopenharmony_ci } 124562306a36Sopenharmony_ci } 124662306a36Sopenharmony_ci} 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_cistatic int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 124962306a36Sopenharmony_ci union drm_amdgpu_cs *cs) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 125262306a36Sopenharmony_ci struct amdgpu_job *leader = p->gang_leader; 125362306a36Sopenharmony_ci struct amdgpu_bo_list_entry *e; 125462306a36Sopenharmony_ci struct drm_gem_object *gobj; 125562306a36Sopenharmony_ci unsigned long index; 125662306a36Sopenharmony_ci unsigned int i; 125762306a36Sopenharmony_ci uint64_t seq; 125862306a36Sopenharmony_ci int r; 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) 126162306a36Sopenharmony_ci drm_sched_job_arm(&p->jobs[i]->base); 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 126462306a36Sopenharmony_ci struct dma_fence *fence; 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci if (p->jobs[i] == leader) 126762306a36Sopenharmony_ci continue; 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci fence = &p->jobs[i]->base.s_fence->scheduled; 127062306a36Sopenharmony_ci dma_fence_get(fence); 127162306a36Sopenharmony_ci r = drm_sched_job_add_dependency(&leader->base, fence); 127262306a36Sopenharmony_ci if (r) { 127362306a36Sopenharmony_ci dma_fence_put(fence); 127462306a36Sopenharmony_ci return r; 127562306a36Sopenharmony_ci } 127662306a36Sopenharmony_ci } 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci if (p->gang_size > 1) { 127962306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) 128062306a36Sopenharmony_ci amdgpu_job_set_gang_leader(p->jobs[i], leader); 128162306a36Sopenharmony_ci } 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci /* No memory allocation is allowed while holding the notifier lock. 128462306a36Sopenharmony_ci * The lock is held until amdgpu_cs_submit is finished and fence is 128562306a36Sopenharmony_ci * added to BOs. 128662306a36Sopenharmony_ci */ 128762306a36Sopenharmony_ci mutex_lock(&p->adev->notifier_lock); 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci /* If userptr are invalidated after amdgpu_cs_parser_bos(), return 129062306a36Sopenharmony_ci * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. 129162306a36Sopenharmony_ci */ 129262306a36Sopenharmony_ci r = 0; 129362306a36Sopenharmony_ci amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 129462306a36Sopenharmony_ci r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, 129562306a36Sopenharmony_ci e->range); 129662306a36Sopenharmony_ci e->range = NULL; 129762306a36Sopenharmony_ci } 129862306a36Sopenharmony_ci if (r) { 129962306a36Sopenharmony_ci r = -EAGAIN; 130062306a36Sopenharmony_ci mutex_unlock(&p->adev->notifier_lock); 130162306a36Sopenharmony_ci return r; 130262306a36Sopenharmony_ci } 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci p->fence = dma_fence_get(&leader->base.s_fence->finished); 130562306a36Sopenharmony_ci drm_exec_for_each_locked_object(&p->exec, index, gobj) { 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo); 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci /* Everybody except for the gang leader uses READ */ 131062306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 131162306a36Sopenharmony_ci if (p->jobs[i] == leader) 131262306a36Sopenharmony_ci continue; 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci dma_resv_add_fence(gobj->resv, 131562306a36Sopenharmony_ci &p->jobs[i]->base.s_fence->finished, 131662306a36Sopenharmony_ci DMA_RESV_USAGE_READ); 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci /* The gang leader as remembered as writer */ 132062306a36Sopenharmony_ci dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); 132162306a36Sopenharmony_ci } 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], 132462306a36Sopenharmony_ci p->fence); 132562306a36Sopenharmony_ci amdgpu_cs_post_dependencies(p); 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && 132862306a36Sopenharmony_ci !p->ctx->preamble_presented) { 132962306a36Sopenharmony_ci leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; 133062306a36Sopenharmony_ci p->ctx->preamble_presented = true; 133162306a36Sopenharmony_ci } 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci cs->out.handle = seq; 133462306a36Sopenharmony_ci leader->uf_sequence = seq; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket); 133762306a36Sopenharmony_ci for (i = 0; i < p->gang_size; ++i) { 133862306a36Sopenharmony_ci amdgpu_job_free_resources(p->jobs[i]); 133962306a36Sopenharmony_ci trace_amdgpu_cs_ioctl(p->jobs[i]); 134062306a36Sopenharmony_ci drm_sched_entity_push_job(&p->jobs[i]->base); 134162306a36Sopenharmony_ci p->jobs[i] = NULL; 134262306a36Sopenharmony_ci } 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci mutex_unlock(&p->adev->notifier_lock); 134762306a36Sopenharmony_ci mutex_unlock(&p->bo_list->bo_list_mutex); 134862306a36Sopenharmony_ci return 0; 134962306a36Sopenharmony_ci} 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci/* Cleanup the parser structure */ 135262306a36Sopenharmony_cistatic void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) 135362306a36Sopenharmony_ci{ 135462306a36Sopenharmony_ci unsigned int i; 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci amdgpu_sync_free(&parser->sync); 135762306a36Sopenharmony_ci drm_exec_fini(&parser->exec); 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci for (i = 0; i < parser->num_post_deps; i++) { 136062306a36Sopenharmony_ci drm_syncobj_put(parser->post_deps[i].syncobj); 136162306a36Sopenharmony_ci kfree(parser->post_deps[i].chain); 136262306a36Sopenharmony_ci } 136362306a36Sopenharmony_ci kfree(parser->post_deps); 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_ci dma_fence_put(parser->fence); 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci if (parser->ctx) 136862306a36Sopenharmony_ci amdgpu_ctx_put(parser->ctx); 136962306a36Sopenharmony_ci if (parser->bo_list) 137062306a36Sopenharmony_ci amdgpu_bo_list_put(parser->bo_list); 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci for (i = 0; i < parser->nchunks; i++) 137362306a36Sopenharmony_ci kvfree(parser->chunks[i].kdata); 137462306a36Sopenharmony_ci kvfree(parser->chunks); 137562306a36Sopenharmony_ci for (i = 0; i < parser->gang_size; ++i) { 137662306a36Sopenharmony_ci if (parser->jobs[i]) 137762306a36Sopenharmony_ci amdgpu_job_free(parser->jobs[i]); 137862306a36Sopenharmony_ci } 137962306a36Sopenharmony_ci amdgpu_bo_unref(&parser->uf_bo); 138062306a36Sopenharmony_ci} 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ciint amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 138362306a36Sopenharmony_ci{ 138462306a36Sopenharmony_ci struct amdgpu_device *adev = drm_to_adev(dev); 138562306a36Sopenharmony_ci struct amdgpu_cs_parser parser; 138662306a36Sopenharmony_ci int r; 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci if (amdgpu_ras_intr_triggered()) 138962306a36Sopenharmony_ci return -EHWPOISON; 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci if (!adev->accel_working) 139262306a36Sopenharmony_ci return -EBUSY; 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci r = amdgpu_cs_parser_init(&parser, adev, filp, data); 139562306a36Sopenharmony_ci if (r) { 139662306a36Sopenharmony_ci if (printk_ratelimit()) 139762306a36Sopenharmony_ci DRM_ERROR("Failed to initialize parser %d!\n", r); 139862306a36Sopenharmony_ci return r; 139962306a36Sopenharmony_ci } 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci r = amdgpu_cs_pass1(&parser, data); 140262306a36Sopenharmony_ci if (r) 140362306a36Sopenharmony_ci goto error_fini; 140462306a36Sopenharmony_ci 140562306a36Sopenharmony_ci r = amdgpu_cs_pass2(&parser); 140662306a36Sopenharmony_ci if (r) 140762306a36Sopenharmony_ci goto error_fini; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci r = amdgpu_cs_parser_bos(&parser, data); 141062306a36Sopenharmony_ci if (r) { 141162306a36Sopenharmony_ci if (r == -ENOMEM) 141262306a36Sopenharmony_ci DRM_ERROR("Not enough memory for command submission!\n"); 141362306a36Sopenharmony_ci else if (r != -ERESTARTSYS && r != -EAGAIN) 141462306a36Sopenharmony_ci DRM_DEBUG("Failed to process the buffer list %d!\n", r); 141562306a36Sopenharmony_ci goto error_fini; 141662306a36Sopenharmony_ci } 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci r = amdgpu_cs_patch_jobs(&parser); 141962306a36Sopenharmony_ci if (r) 142062306a36Sopenharmony_ci goto error_backoff; 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ci r = amdgpu_cs_vm_handling(&parser); 142362306a36Sopenharmony_ci if (r) 142462306a36Sopenharmony_ci goto error_backoff; 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci r = amdgpu_cs_sync_rings(&parser); 142762306a36Sopenharmony_ci if (r) 142862306a36Sopenharmony_ci goto error_backoff; 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci trace_amdgpu_cs_ibs(&parser); 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci r = amdgpu_cs_submit(&parser, data); 143362306a36Sopenharmony_ci if (r) 143462306a36Sopenharmony_ci goto error_backoff; 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci amdgpu_cs_parser_fini(&parser); 143762306a36Sopenharmony_ci return 0; 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_cierror_backoff: 144062306a36Sopenharmony_ci mutex_unlock(&parser.bo_list->bo_list_mutex); 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_cierror_fini: 144362306a36Sopenharmony_ci amdgpu_cs_parser_fini(&parser); 144462306a36Sopenharmony_ci return r; 144562306a36Sopenharmony_ci} 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci/** 144862306a36Sopenharmony_ci * amdgpu_cs_wait_ioctl - wait for a command submission to finish 144962306a36Sopenharmony_ci * 145062306a36Sopenharmony_ci * @dev: drm device 145162306a36Sopenharmony_ci * @data: data from userspace 145262306a36Sopenharmony_ci * @filp: file private 145362306a36Sopenharmony_ci * 145462306a36Sopenharmony_ci * Wait for the command submission identified by handle to finish. 145562306a36Sopenharmony_ci */ 145662306a36Sopenharmony_ciint amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 145762306a36Sopenharmony_ci struct drm_file *filp) 145862306a36Sopenharmony_ci{ 145962306a36Sopenharmony_ci union drm_amdgpu_wait_cs *wait = data; 146062306a36Sopenharmony_ci unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 146162306a36Sopenharmony_ci struct drm_sched_entity *entity; 146262306a36Sopenharmony_ci struct amdgpu_ctx *ctx; 146362306a36Sopenharmony_ci struct dma_fence *fence; 146462306a36Sopenharmony_ci long r; 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 146762306a36Sopenharmony_ci if (ctx == NULL) 146862306a36Sopenharmony_ci return -EINVAL; 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_ci r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, 147162306a36Sopenharmony_ci wait->in.ring, &entity); 147262306a36Sopenharmony_ci if (r) { 147362306a36Sopenharmony_ci amdgpu_ctx_put(ctx); 147462306a36Sopenharmony_ci return r; 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); 147862306a36Sopenharmony_ci if (IS_ERR(fence)) 147962306a36Sopenharmony_ci r = PTR_ERR(fence); 148062306a36Sopenharmony_ci else if (fence) { 148162306a36Sopenharmony_ci r = dma_fence_wait_timeout(fence, true, timeout); 148262306a36Sopenharmony_ci if (r > 0 && fence->error) 148362306a36Sopenharmony_ci r = fence->error; 148462306a36Sopenharmony_ci dma_fence_put(fence); 148562306a36Sopenharmony_ci } else 148662306a36Sopenharmony_ci r = 1; 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci amdgpu_ctx_put(ctx); 148962306a36Sopenharmony_ci if (r < 0) 149062306a36Sopenharmony_ci return r; 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci memset(wait, 0, sizeof(*wait)); 149362306a36Sopenharmony_ci wait->out.status = (r == 0); 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci return 0; 149662306a36Sopenharmony_ci} 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci/** 149962306a36Sopenharmony_ci * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence 150062306a36Sopenharmony_ci * 150162306a36Sopenharmony_ci * @adev: amdgpu device 150262306a36Sopenharmony_ci * @filp: file private 150362306a36Sopenharmony_ci * @user: drm_amdgpu_fence copied from user space 150462306a36Sopenharmony_ci */ 150562306a36Sopenharmony_cistatic struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, 150662306a36Sopenharmony_ci struct drm_file *filp, 150762306a36Sopenharmony_ci struct drm_amdgpu_fence *user) 150862306a36Sopenharmony_ci{ 150962306a36Sopenharmony_ci struct drm_sched_entity *entity; 151062306a36Sopenharmony_ci struct amdgpu_ctx *ctx; 151162306a36Sopenharmony_ci struct dma_fence *fence; 151262306a36Sopenharmony_ci int r; 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); 151562306a36Sopenharmony_ci if (ctx == NULL) 151662306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, 151962306a36Sopenharmony_ci user->ring, &entity); 152062306a36Sopenharmony_ci if (r) { 152162306a36Sopenharmony_ci amdgpu_ctx_put(ctx); 152262306a36Sopenharmony_ci return ERR_PTR(r); 152362306a36Sopenharmony_ci } 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); 152662306a36Sopenharmony_ci amdgpu_ctx_put(ctx); 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci return fence; 152962306a36Sopenharmony_ci} 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ciint amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, 153262306a36Sopenharmony_ci struct drm_file *filp) 153362306a36Sopenharmony_ci{ 153462306a36Sopenharmony_ci struct amdgpu_device *adev = drm_to_adev(dev); 153562306a36Sopenharmony_ci union drm_amdgpu_fence_to_handle *info = data; 153662306a36Sopenharmony_ci struct dma_fence *fence; 153762306a36Sopenharmony_ci struct drm_syncobj *syncobj; 153862306a36Sopenharmony_ci struct sync_file *sync_file; 153962306a36Sopenharmony_ci int fd, r; 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); 154262306a36Sopenharmony_ci if (IS_ERR(fence)) 154362306a36Sopenharmony_ci return PTR_ERR(fence); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci if (!fence) 154662306a36Sopenharmony_ci fence = dma_fence_get_stub(); 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci switch (info->in.what) { 154962306a36Sopenharmony_ci case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: 155062306a36Sopenharmony_ci r = drm_syncobj_create(&syncobj, 0, fence); 155162306a36Sopenharmony_ci dma_fence_put(fence); 155262306a36Sopenharmony_ci if (r) 155362306a36Sopenharmony_ci return r; 155462306a36Sopenharmony_ci r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); 155562306a36Sopenharmony_ci drm_syncobj_put(syncobj); 155662306a36Sopenharmony_ci return r; 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: 155962306a36Sopenharmony_ci r = drm_syncobj_create(&syncobj, 0, fence); 156062306a36Sopenharmony_ci dma_fence_put(fence); 156162306a36Sopenharmony_ci if (r) 156262306a36Sopenharmony_ci return r; 156362306a36Sopenharmony_ci r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle); 156462306a36Sopenharmony_ci drm_syncobj_put(syncobj); 156562306a36Sopenharmony_ci return r; 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: 156862306a36Sopenharmony_ci fd = get_unused_fd_flags(O_CLOEXEC); 156962306a36Sopenharmony_ci if (fd < 0) { 157062306a36Sopenharmony_ci dma_fence_put(fence); 157162306a36Sopenharmony_ci return fd; 157262306a36Sopenharmony_ci } 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci sync_file = sync_file_create(fence); 157562306a36Sopenharmony_ci dma_fence_put(fence); 157662306a36Sopenharmony_ci if (!sync_file) { 157762306a36Sopenharmony_ci put_unused_fd(fd); 157862306a36Sopenharmony_ci return -ENOMEM; 157962306a36Sopenharmony_ci } 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci fd_install(fd, sync_file->file); 158262306a36Sopenharmony_ci info->out.handle = fd; 158362306a36Sopenharmony_ci return 0; 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci default: 158662306a36Sopenharmony_ci dma_fence_put(fence); 158762306a36Sopenharmony_ci return -EINVAL; 158862306a36Sopenharmony_ci } 158962306a36Sopenharmony_ci} 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci/** 159262306a36Sopenharmony_ci * amdgpu_cs_wait_all_fences - wait on all fences to signal 159362306a36Sopenharmony_ci * 159462306a36Sopenharmony_ci * @adev: amdgpu device 159562306a36Sopenharmony_ci * @filp: file private 159662306a36Sopenharmony_ci * @wait: wait parameters 159762306a36Sopenharmony_ci * @fences: array of drm_amdgpu_fence 159862306a36Sopenharmony_ci */ 159962306a36Sopenharmony_cistatic int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, 160062306a36Sopenharmony_ci struct drm_file *filp, 160162306a36Sopenharmony_ci union drm_amdgpu_wait_fences *wait, 160262306a36Sopenharmony_ci struct drm_amdgpu_fence *fences) 160362306a36Sopenharmony_ci{ 160462306a36Sopenharmony_ci uint32_t fence_count = wait->in.fence_count; 160562306a36Sopenharmony_ci unsigned int i; 160662306a36Sopenharmony_ci long r = 1; 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci for (i = 0; i < fence_count; i++) { 160962306a36Sopenharmony_ci struct dma_fence *fence; 161062306a36Sopenharmony_ci unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 161362306a36Sopenharmony_ci if (IS_ERR(fence)) 161462306a36Sopenharmony_ci return PTR_ERR(fence); 161562306a36Sopenharmony_ci else if (!fence) 161662306a36Sopenharmony_ci continue; 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci r = dma_fence_wait_timeout(fence, true, timeout); 161962306a36Sopenharmony_ci if (r > 0 && fence->error) 162062306a36Sopenharmony_ci r = fence->error; 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci dma_fence_put(fence); 162362306a36Sopenharmony_ci if (r < 0) 162462306a36Sopenharmony_ci return r; 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci if (r == 0) 162762306a36Sopenharmony_ci break; 162862306a36Sopenharmony_ci } 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci memset(wait, 0, sizeof(*wait)); 163162306a36Sopenharmony_ci wait->out.status = (r > 0); 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci return 0; 163462306a36Sopenharmony_ci} 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci/** 163762306a36Sopenharmony_ci * amdgpu_cs_wait_any_fence - wait on any fence to signal 163862306a36Sopenharmony_ci * 163962306a36Sopenharmony_ci * @adev: amdgpu device 164062306a36Sopenharmony_ci * @filp: file private 164162306a36Sopenharmony_ci * @wait: wait parameters 164262306a36Sopenharmony_ci * @fences: array of drm_amdgpu_fence 164362306a36Sopenharmony_ci */ 164462306a36Sopenharmony_cistatic int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, 164562306a36Sopenharmony_ci struct drm_file *filp, 164662306a36Sopenharmony_ci union drm_amdgpu_wait_fences *wait, 164762306a36Sopenharmony_ci struct drm_amdgpu_fence *fences) 164862306a36Sopenharmony_ci{ 164962306a36Sopenharmony_ci unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); 165062306a36Sopenharmony_ci uint32_t fence_count = wait->in.fence_count; 165162306a36Sopenharmony_ci uint32_t first = ~0; 165262306a36Sopenharmony_ci struct dma_fence **array; 165362306a36Sopenharmony_ci unsigned int i; 165462306a36Sopenharmony_ci long r; 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci /* Prepare the fence array */ 165762306a36Sopenharmony_ci array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL); 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci if (array == NULL) 166062306a36Sopenharmony_ci return -ENOMEM; 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci for (i = 0; i < fence_count; i++) { 166362306a36Sopenharmony_ci struct dma_fence *fence; 166462306a36Sopenharmony_ci 166562306a36Sopenharmony_ci fence = amdgpu_cs_get_fence(adev, filp, &fences[i]); 166662306a36Sopenharmony_ci if (IS_ERR(fence)) { 166762306a36Sopenharmony_ci r = PTR_ERR(fence); 166862306a36Sopenharmony_ci goto err_free_fence_array; 166962306a36Sopenharmony_ci } else if (fence) { 167062306a36Sopenharmony_ci array[i] = fence; 167162306a36Sopenharmony_ci } else { /* NULL, the fence has been already signaled */ 167262306a36Sopenharmony_ci r = 1; 167362306a36Sopenharmony_ci first = i; 167462306a36Sopenharmony_ci goto out; 167562306a36Sopenharmony_ci } 167662306a36Sopenharmony_ci } 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci r = dma_fence_wait_any_timeout(array, fence_count, true, timeout, 167962306a36Sopenharmony_ci &first); 168062306a36Sopenharmony_ci if (r < 0) 168162306a36Sopenharmony_ci goto err_free_fence_array; 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_ciout: 168462306a36Sopenharmony_ci memset(wait, 0, sizeof(*wait)); 168562306a36Sopenharmony_ci wait->out.status = (r > 0); 168662306a36Sopenharmony_ci wait->out.first_signaled = first; 168762306a36Sopenharmony_ci 168862306a36Sopenharmony_ci if (first < fence_count && array[first]) 168962306a36Sopenharmony_ci r = array[first]->error; 169062306a36Sopenharmony_ci else 169162306a36Sopenharmony_ci r = 0; 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_cierr_free_fence_array: 169462306a36Sopenharmony_ci for (i = 0; i < fence_count; i++) 169562306a36Sopenharmony_ci dma_fence_put(array[i]); 169662306a36Sopenharmony_ci kfree(array); 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci return r; 169962306a36Sopenharmony_ci} 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci/** 170262306a36Sopenharmony_ci * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish 170362306a36Sopenharmony_ci * 170462306a36Sopenharmony_ci * @dev: drm device 170562306a36Sopenharmony_ci * @data: data from userspace 170662306a36Sopenharmony_ci * @filp: file private 170762306a36Sopenharmony_ci */ 170862306a36Sopenharmony_ciint amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, 170962306a36Sopenharmony_ci struct drm_file *filp) 171062306a36Sopenharmony_ci{ 171162306a36Sopenharmony_ci struct amdgpu_device *adev = drm_to_adev(dev); 171262306a36Sopenharmony_ci union drm_amdgpu_wait_fences *wait = data; 171362306a36Sopenharmony_ci uint32_t fence_count = wait->in.fence_count; 171462306a36Sopenharmony_ci struct drm_amdgpu_fence *fences_user; 171562306a36Sopenharmony_ci struct drm_amdgpu_fence *fences; 171662306a36Sopenharmony_ci int r; 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci /* Get the fences from userspace */ 171962306a36Sopenharmony_ci fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), 172062306a36Sopenharmony_ci GFP_KERNEL); 172162306a36Sopenharmony_ci if (fences == NULL) 172262306a36Sopenharmony_ci return -ENOMEM; 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci fences_user = u64_to_user_ptr(wait->in.fences); 172562306a36Sopenharmony_ci if (copy_from_user(fences, fences_user, 172662306a36Sopenharmony_ci sizeof(struct drm_amdgpu_fence) * fence_count)) { 172762306a36Sopenharmony_ci r = -EFAULT; 172862306a36Sopenharmony_ci goto err_free_fences; 172962306a36Sopenharmony_ci } 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci if (wait->in.wait_all) 173262306a36Sopenharmony_ci r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences); 173362306a36Sopenharmony_ci else 173462306a36Sopenharmony_ci r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences); 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_cierr_free_fences: 173762306a36Sopenharmony_ci kfree(fences); 173862306a36Sopenharmony_ci 173962306a36Sopenharmony_ci return r; 174062306a36Sopenharmony_ci} 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci/** 174362306a36Sopenharmony_ci * amdgpu_cs_find_mapping - find bo_va for VM address 174462306a36Sopenharmony_ci * 174562306a36Sopenharmony_ci * @parser: command submission parser context 174662306a36Sopenharmony_ci * @addr: VM address 174762306a36Sopenharmony_ci * @bo: resulting BO of the mapping found 174862306a36Sopenharmony_ci * @map: Placeholder to return found BO mapping 174962306a36Sopenharmony_ci * 175062306a36Sopenharmony_ci * Search the buffer objects in the command submission context for a certain 175162306a36Sopenharmony_ci * virtual memory address. Returns allocation structure when found, NULL 175262306a36Sopenharmony_ci * otherwise. 175362306a36Sopenharmony_ci */ 175462306a36Sopenharmony_ciint amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 175562306a36Sopenharmony_ci uint64_t addr, struct amdgpu_bo **bo, 175662306a36Sopenharmony_ci struct amdgpu_bo_va_mapping **map) 175762306a36Sopenharmony_ci{ 175862306a36Sopenharmony_ci struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 175962306a36Sopenharmony_ci struct ttm_operation_ctx ctx = { false, false }; 176062306a36Sopenharmony_ci struct amdgpu_vm *vm = &fpriv->vm; 176162306a36Sopenharmony_ci struct amdgpu_bo_va_mapping *mapping; 176262306a36Sopenharmony_ci int r; 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci addr /= AMDGPU_GPU_PAGE_SIZE; 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); 176762306a36Sopenharmony_ci if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) 176862306a36Sopenharmony_ci return -EINVAL; 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci *bo = mapping->bo_va->base.bo; 177162306a36Sopenharmony_ci *map = mapping; 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci /* Double check that the BO is reserved by this CS */ 177462306a36Sopenharmony_ci if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) 177562306a36Sopenharmony_ci return -EINVAL; 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { 177862306a36Sopenharmony_ci (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 177962306a36Sopenharmony_ci amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); 178062306a36Sopenharmony_ci r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); 178162306a36Sopenharmony_ci if (r) 178262306a36Sopenharmony_ci return r; 178362306a36Sopenharmony_ci } 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_ci return amdgpu_ttm_alloc_gart(&(*bo)->tbo); 178662306a36Sopenharmony_ci} 1787