1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <assert.h> 28bf215546Sopenharmony_ci#include <inttypes.h> 29bf215546Sopenharmony_ci#include <pthread.h> 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "util/hash_table.h" 32bf215546Sopenharmony_ci#include "util/os_file.h" 33bf215546Sopenharmony_ci#include "util/slab.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "freedreno_ringbuffer_sp.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead 38bf215546Sopenharmony_ci * by avoiding the additional tracking necessary to build cmds/relocs tables 39bf215546Sopenharmony_ci * (but still builds a bos table) 40bf215546Sopenharmony_ci */ 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#define INIT_SIZE 0x1000 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci#define SUBALLOC_SIZE (32 * 1024) 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on, 47bf215546Sopenharmony_ci * instead use a condition-variable. Note that pipe->flush() is not expected 48bf215546Sopenharmony_ci * to be a common/hot path. 49bf215546Sopenharmony_ci */ 50bf215546Sopenharmony_cistatic pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER; 51bf215546Sopenharmony_cistatic pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic void finalize_current_cmd(struct fd_ringbuffer *ring); 54bf215546Sopenharmony_cistatic struct fd_ringbuffer * 55bf215546Sopenharmony_cifd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size, 56bf215546Sopenharmony_ci enum fd_ringbuffer_flags flags); 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci/* add (if needed) bo to submit and return index: */ 59bf215546Sopenharmony_ciuint32_t 60bf215546Sopenharmony_cifd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci uint32_t idx; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci /* NOTE: it is legal to use the same bo on different threads for 65bf215546Sopenharmony_ci * different submits. But it is not legal to use the same submit 66bf215546Sopenharmony_ci * from different threads. 67bf215546Sopenharmony_ci */ 68bf215546Sopenharmony_ci idx = READ_ONCE(bo->idx); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) { 71bf215546Sopenharmony_ci uint32_t hash = _mesa_hash_pointer(bo); 72bf215546Sopenharmony_ci struct hash_entry *entry; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); 75bf215546Sopenharmony_ci if (entry) { 76bf215546Sopenharmony_ci /* found */ 77bf215546Sopenharmony_ci idx = (uint32_t)(uintptr_t)entry->data; 78bf215546Sopenharmony_ci } else { 79bf215546Sopenharmony_ci idx = APPEND(submit, bos, fd_bo_ref(bo)); 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, 82bf215546Sopenharmony_ci (void *)(uintptr_t)idx); 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci bo->idx = idx; 85bf215546Sopenharmony_ci } 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci return idx; 88bf215546Sopenharmony_ci} 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistatic void 91bf215546Sopenharmony_cifd_submit_suballoc_ring_bo(struct fd_submit *submit, 92bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring, uint32_t size) 93bf215546Sopenharmony_ci{ 94bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 95bf215546Sopenharmony_ci unsigned suballoc_offset = 0; 96bf215546Sopenharmony_ci struct fd_bo *suballoc_bo = NULL; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci if (fd_submit->suballoc_ring) { 99bf215546Sopenharmony_ci struct fd_ringbuffer_sp *suballoc_ring = 100bf215546Sopenharmony_ci to_fd_ringbuffer_sp(fd_submit->suballoc_ring); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci suballoc_bo = suballoc_ring->ring_bo; 103bf215546Sopenharmony_ci suballoc_offset = 104bf215546Sopenharmony_ci fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci suballoc_offset = align(suballoc_offset, 0x10); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci if ((size + suballoc_offset) > suballoc_bo->size) { 109bf215546Sopenharmony_ci suballoc_bo = NULL; 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci if (!suballoc_bo) { 114bf215546Sopenharmony_ci // TODO possibly larger size for streaming bo? 115bf215546Sopenharmony_ci fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE); 116bf215546Sopenharmony_ci fd_ring->offset = 0; 117bf215546Sopenharmony_ci } else { 118bf215546Sopenharmony_ci fd_ring->ring_bo = fd_bo_ref(suballoc_bo); 119bf215546Sopenharmony_ci fd_ring->offset = suballoc_offset; 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base); 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci if (old_suballoc_ring) 127bf215546Sopenharmony_ci fd_ringbuffer_del(old_suballoc_ring); 128bf215546Sopenharmony_ci} 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistatic struct fd_ringbuffer * 131bf215546Sopenharmony_cifd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, 132bf215546Sopenharmony_ci enum fd_ringbuffer_flags flags) 133bf215546Sopenharmony_ci{ 134bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 135bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci fd_ring = slab_alloc(&fd_submit->ring_pool); 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci fd_ring->u.submit = submit; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* NOTE: needs to be before _suballoc_ring_bo() since it could 142bf215546Sopenharmony_ci * increment the refcnt of the current ring 143bf215546Sopenharmony_ci */ 144bf215546Sopenharmony_ci fd_ring->base.refcnt = 1; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci if (flags & FD_RINGBUFFER_STREAMING) { 147bf215546Sopenharmony_ci fd_submit_suballoc_ring_bo(submit, fd_ring, size); 148bf215546Sopenharmony_ci } else { 149bf215546Sopenharmony_ci if (flags & FD_RINGBUFFER_GROWABLE) 150bf215546Sopenharmony_ci size = INIT_SIZE; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci fd_ring->offset = 0; 153bf215546Sopenharmony_ci fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size); 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci if (!fd_ringbuffer_sp_init(fd_ring, size, flags)) 157bf215546Sopenharmony_ci return NULL; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci return &fd_ring->base; 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci/** 163bf215546Sopenharmony_ci * Prepare submit for flush, always done synchronously. 164bf215546Sopenharmony_ci * 165bf215546Sopenharmony_ci * 1) Finalize primary ringbuffer, at this point no more cmdstream may 166bf215546Sopenharmony_ci * be written into it, since from the PoV of the upper level driver 167bf215546Sopenharmony_ci * the submit is flushed, even if deferred 168bf215546Sopenharmony_ci * 2) Add cmdstream bos to bos table 169bf215546Sopenharmony_ci * 3) Update bo fences 170bf215546Sopenharmony_ci */ 171bf215546Sopenharmony_cistatic bool 172bf215546Sopenharmony_cifd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd, 173bf215546Sopenharmony_ci struct fd_submit_fence *out_fence) 174bf215546Sopenharmony_ci{ 175bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 176bf215546Sopenharmony_ci bool has_shared = false; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci finalize_current_cmd(submit->primary); 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci struct fd_ringbuffer_sp *primary = 181bf215546Sopenharmony_ci to_fd_ringbuffer_sp(submit->primary); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci for (unsigned i = 0; i < primary->u.nr_cmds; i++) 184bf215546Sopenharmony_ci fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci simple_mtx_lock(&table_lock); 187bf215546Sopenharmony_ci for (unsigned i = 0; i < fd_submit->nr_bos; i++) { 188bf215546Sopenharmony_ci fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence); 189bf215546Sopenharmony_ci has_shared |= fd_submit->bos[i]->shared; 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci simple_mtx_unlock(&table_lock); 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci fd_submit->out_fence = out_fence; 194bf215546Sopenharmony_ci fd_submit->in_fence_fd = (in_fence_fd == -1) ? 195bf215546Sopenharmony_ci -1 : os_dupfd_cloexec(in_fence_fd); 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci return has_shared; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_cistatic void 201bf215546Sopenharmony_cifd_submit_sp_flush_execute(void *job, void *gdata, int thread_index) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci struct fd_submit *submit = job; 204bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 205bf215546Sopenharmony_ci struct fd_pipe *pipe = submit->pipe; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci fd_submit->flush_submit_list(&fd_submit->submit_list); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci pthread_mutex_lock(&flush_mtx); 210bf215546Sopenharmony_ci assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence)); 211bf215546Sopenharmony_ci pipe->last_submit_fence = fd_submit->base.fence; 212bf215546Sopenharmony_ci pthread_cond_broadcast(&flush_cnd); 213bf215546Sopenharmony_ci pthread_mutex_unlock(&flush_mtx); 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci DEBUG_MSG("finish: %u", submit->fence); 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_cistatic void 219bf215546Sopenharmony_cifd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci struct fd_submit *submit = job; 222bf215546Sopenharmony_ci fd_submit_del(submit); 223bf215546Sopenharmony_ci} 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_cistatic int 226bf215546Sopenharmony_cienqueue_submit_list(struct list_head *submit_list) 227bf215546Sopenharmony_ci{ 228bf215546Sopenharmony_ci struct fd_submit *submit = last_submit(submit_list); 229bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci list_replace(submit_list, &fd_submit->submit_list); 232bf215546Sopenharmony_ci list_inithead(submit_list); 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci struct util_queue_fence *fence; 235bf215546Sopenharmony_ci if (fd_submit->out_fence) { 236bf215546Sopenharmony_ci fence = &fd_submit->out_fence->ready; 237bf215546Sopenharmony_ci } else { 238bf215546Sopenharmony_ci util_queue_fence_init(&fd_submit->fence); 239bf215546Sopenharmony_ci fence = &fd_submit->fence; 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci DEBUG_MSG("enqueue: %u", submit->fence); 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci util_queue_add_job(&submit->pipe->dev->submit_queue, 245bf215546Sopenharmony_ci submit, fence, 246bf215546Sopenharmony_ci fd_submit_sp_flush_execute, 247bf215546Sopenharmony_ci fd_submit_sp_flush_cleanup, 248bf215546Sopenharmony_ci 0); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci return 0; 251bf215546Sopenharmony_ci} 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_cistatic bool 254bf215546Sopenharmony_cishould_defer(struct fd_submit *submit) 255bf215546Sopenharmony_ci{ 256bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci /* if too many bo's, it may not be worth the CPU cost of submit merging: */ 259bf215546Sopenharmony_ci if (fd_submit->nr_bos > 30) 260bf215546Sopenharmony_ci return false; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k 263bf215546Sopenharmony_ci * cmds before we exceed the size of the ringbuffer, which results in 264bf215546Sopenharmony_ci * deadlock writing into the RB (ie. kernel doesn't finish writing into 265bf215546Sopenharmony_ci * the RB so it doesn't kick the GPU to start consuming from the RB) 266bf215546Sopenharmony_ci */ 267bf215546Sopenharmony_ci if (submit->pipe->dev->deferred_cmds > 128) 268bf215546Sopenharmony_ci return false; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci return true; 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_cistatic int 274bf215546Sopenharmony_cifd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, 275bf215546Sopenharmony_ci struct fd_submit_fence *out_fence) 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci struct fd_device *dev = submit->pipe->dev; 278bf215546Sopenharmony_ci struct fd_pipe *pipe = submit->pipe; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci /* Acquire lock before flush_prep() because it is possible to race between 281bf215546Sopenharmony_ci * this and pipe->flush(): 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_ci simple_mtx_lock(&dev->submit_lock); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci /* If there are deferred submits from another fd_pipe, flush them now, 286bf215546Sopenharmony_ci * since we can't merge submits from different submitqueue's (ie. they 287bf215546Sopenharmony_ci * could have different priority, etc) 288bf215546Sopenharmony_ci */ 289bf215546Sopenharmony_ci if (!list_is_empty(&dev->deferred_submits) && 290bf215546Sopenharmony_ci (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) { 291bf215546Sopenharmony_ci struct list_head submit_list; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci list_replace(&dev->deferred_submits, &submit_list); 294bf215546Sopenharmony_ci list_inithead(&dev->deferred_submits); 295bf215546Sopenharmony_ci dev->deferred_cmds = 0; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci enqueue_submit_list(&submit_list); 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence); 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence)); 305bf215546Sopenharmony_ci pipe->last_enqueue_fence = submit->fence; 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci /* If we don't need an out-fence, we can defer the submit. 308bf215546Sopenharmony_ci * 309bf215546Sopenharmony_ci * TODO we could defer submits with in-fence as well.. if we took our own 310bf215546Sopenharmony_ci * reference to the fd, and merged all the in-fence-fd's when we flush the 311bf215546Sopenharmony_ci * deferred submits 312bf215546Sopenharmony_ci */ 313bf215546Sopenharmony_ci if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) { 314bf215546Sopenharmony_ci DEBUG_MSG("defer: %u", submit->fence); 315bf215546Sopenharmony_ci dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary); 316bf215546Sopenharmony_ci assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev)); 317bf215546Sopenharmony_ci simple_mtx_unlock(&dev->submit_lock); 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci return 0; 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci struct list_head submit_list; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci list_replace(&dev->deferred_submits, &submit_list); 325bf215546Sopenharmony_ci list_inithead(&dev->deferred_submits); 326bf215546Sopenharmony_ci dev->deferred_cmds = 0; 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci simple_mtx_unlock(&dev->submit_lock); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci return enqueue_submit_list(&submit_list); 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_civoid 334bf215546Sopenharmony_cifd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci struct fd_device *dev = pipe->dev; 337bf215546Sopenharmony_ci struct list_head submit_list; 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci DEBUG_MSG("flush: %u", fence); 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci list_inithead(&submit_list); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci simple_mtx_lock(&dev->submit_lock); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci assert(!fd_fence_after(fence, pipe->last_enqueue_fence)); 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci foreach_submit_safe (deferred_submit, &dev->deferred_submits) { 348bf215546Sopenharmony_ci /* We should never have submits from multiple pipes in the deferred 349bf215546Sopenharmony_ci * list. If we did, we couldn't compare their fence to our fence, 350bf215546Sopenharmony_ci * since each fd_pipe is an independent timeline. 351bf215546Sopenharmony_ci */ 352bf215546Sopenharmony_ci if (deferred_submit->pipe != pipe) 353bf215546Sopenharmony_ci break; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci if (fd_fence_after(deferred_submit->fence, fence)) 356bf215546Sopenharmony_ci break; 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci list_del(&deferred_submit->node); 359bf215546Sopenharmony_ci list_addtail(&deferred_submit->node, &submit_list); 360bf215546Sopenharmony_ci dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary); 361bf215546Sopenharmony_ci } 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev)); 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci simple_mtx_unlock(&dev->submit_lock); 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci if (list_is_empty(&submit_list)) 368bf215546Sopenharmony_ci goto flush_sync; 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci enqueue_submit_list(&submit_list); 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ciflush_sync: 373bf215546Sopenharmony_ci /* Once we are sure that we've enqueued at least up to the requested 374bf215546Sopenharmony_ci * submit, we need to be sure that submitq has caught up and flushed 375bf215546Sopenharmony_ci * them to the kernel 376bf215546Sopenharmony_ci */ 377bf215546Sopenharmony_ci pthread_mutex_lock(&flush_mtx); 378bf215546Sopenharmony_ci while (fd_fence_before(pipe->last_submit_fence, fence)) { 379bf215546Sopenharmony_ci pthread_cond_wait(&flush_cnd, &flush_mtx); 380bf215546Sopenharmony_ci } 381bf215546Sopenharmony_ci pthread_mutex_unlock(&flush_mtx); 382bf215546Sopenharmony_ci} 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_cistatic void 385bf215546Sopenharmony_cifd_submit_sp_destroy(struct fd_submit *submit) 386bf215546Sopenharmony_ci{ 387bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci if (fd_submit->suballoc_ring) 390bf215546Sopenharmony_ci fd_ringbuffer_del(fd_submit->suballoc_ring); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci _mesa_hash_table_destroy(fd_submit->bo_table, NULL); 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci // TODO it would be nice to have a way to assert() if all 395bf215546Sopenharmony_ci // rb's haven't been free'd back to the slab, because that is 396bf215546Sopenharmony_ci // an indication that we are leaking bo's 397bf215546Sopenharmony_ci slab_destroy_child(&fd_submit->ring_pool); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci for (unsigned i = 0; i < fd_submit->nr_bos; i++) 400bf215546Sopenharmony_ci fd_bo_del(fd_submit->bos[i]); 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci free(fd_submit->bos); 403bf215546Sopenharmony_ci free(fd_submit); 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cistatic const struct fd_submit_funcs submit_funcs = { 407bf215546Sopenharmony_ci .new_ringbuffer = fd_submit_sp_new_ringbuffer, 408bf215546Sopenharmony_ci .flush = fd_submit_sp_flush, 409bf215546Sopenharmony_ci .destroy = fd_submit_sp_destroy, 410bf215546Sopenharmony_ci}; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_cistruct fd_submit * 413bf215546Sopenharmony_cifd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list) 414bf215546Sopenharmony_ci{ 415bf215546Sopenharmony_ci struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit)); 416bf215546Sopenharmony_ci struct fd_submit *submit; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 419bf215546Sopenharmony_ci _mesa_key_pointer_equal); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci fd_submit->flush_submit_list = flush_submit_list; 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci submit = &fd_submit->base; 426bf215546Sopenharmony_ci submit->funcs = &submit_funcs; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci return submit; 429bf215546Sopenharmony_ci} 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_civoid 432bf215546Sopenharmony_cifd_pipe_sp_ringpool_init(struct fd_pipe *pipe) 433bf215546Sopenharmony_ci{ 434bf215546Sopenharmony_ci // TODO tune size: 435bf215546Sopenharmony_ci slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16); 436bf215546Sopenharmony_ci} 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_civoid 439bf215546Sopenharmony_cifd_pipe_sp_ringpool_fini(struct fd_pipe *pipe) 440bf215546Sopenharmony_ci{ 441bf215546Sopenharmony_ci if (pipe->ring_pool.num_elements) 442bf215546Sopenharmony_ci slab_destroy_parent(&pipe->ring_pool); 443bf215546Sopenharmony_ci} 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistatic void 446bf215546Sopenharmony_cifinalize_current_cmd(struct fd_ringbuffer *ring) 447bf215546Sopenharmony_ci{ 448bf215546Sopenharmony_ci assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 451bf215546Sopenharmony_ci APPEND(&fd_ring->u, cmds, 452bf215546Sopenharmony_ci (struct fd_cmd_sp){ 453bf215546Sopenharmony_ci .ring_bo = fd_bo_ref(fd_ring->ring_bo), 454bf215546Sopenharmony_ci .size = offset_bytes(ring->cur, ring->start), 455bf215546Sopenharmony_ci }); 456bf215546Sopenharmony_ci} 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_cistatic void 459bf215546Sopenharmony_cifd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) 460bf215546Sopenharmony_ci{ 461bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 462bf215546Sopenharmony_ci struct fd_pipe *pipe = fd_ring->u.submit->pipe; 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci assert(ring->flags & FD_RINGBUFFER_GROWABLE); 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ci finalize_current_cmd(ring); 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci fd_bo_del(fd_ring->ring_bo); 469bf215546Sopenharmony_ci fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size); 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci ring->start = fd_bo_map(fd_ring->ring_bo); 472bf215546Sopenharmony_ci ring->end = &(ring->start[size / 4]); 473bf215546Sopenharmony_ci ring->cur = ring->start; 474bf215546Sopenharmony_ci ring->size = size; 475bf215546Sopenharmony_ci} 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_cistatic inline bool 478bf215546Sopenharmony_cifd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo) 479bf215546Sopenharmony_ci{ 480bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) { 483bf215546Sopenharmony_ci if (fd_ring->u.reloc_bos[i] == bo) 484bf215546Sopenharmony_ci return true; 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci return false; 487bf215546Sopenharmony_ci} 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci#define PTRSZ 64 490bf215546Sopenharmony_ci#include "freedreno_ringbuffer_sp_reloc.h" 491bf215546Sopenharmony_ci#undef PTRSZ 492bf215546Sopenharmony_ci#define PTRSZ 32 493bf215546Sopenharmony_ci#include "freedreno_ringbuffer_sp_reloc.h" 494bf215546Sopenharmony_ci#undef PTRSZ 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_cistatic uint32_t 497bf215546Sopenharmony_cifd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) 498bf215546Sopenharmony_ci{ 499bf215546Sopenharmony_ci if (ring->flags & FD_RINGBUFFER_GROWABLE) 500bf215546Sopenharmony_ci return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1; 501bf215546Sopenharmony_ci return 1; 502bf215546Sopenharmony_ci} 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_cistatic bool 505bf215546Sopenharmony_cifd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring) 506bf215546Sopenharmony_ci{ 507bf215546Sopenharmony_ci assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); 508bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 509bf215546Sopenharmony_ci struct fd_submit *submit = fd_ring->u.submit; 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) { 512bf215546Sopenharmony_ci return false; 513bf215546Sopenharmony_ci } 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci return true; 516bf215546Sopenharmony_ci} 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_cistatic void 519bf215546Sopenharmony_cifd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) 520bf215546Sopenharmony_ci{ 521bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci fd_bo_del(fd_ring->ring_bo); 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci if (ring->flags & _FD_RINGBUFFER_OBJECT) { 526bf215546Sopenharmony_ci for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) { 527bf215546Sopenharmony_ci fd_bo_del(fd_ring->u.reloc_bos[i]); 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci free(fd_ring->u.reloc_bos); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci free(fd_ring); 532bf215546Sopenharmony_ci } else { 533bf215546Sopenharmony_ci struct fd_submit *submit = fd_ring->u.submit; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) { 536bf215546Sopenharmony_ci fd_bo_del(fd_ring->u.cmds[i].ring_bo); 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci free(fd_ring->u.cmds); 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring); 541bf215546Sopenharmony_ci } 542bf215546Sopenharmony_ci} 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_cistatic const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = { 545bf215546Sopenharmony_ci .grow = fd_ringbuffer_sp_grow, 546bf215546Sopenharmony_ci .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32, 547bf215546Sopenharmony_ci .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32, 548bf215546Sopenharmony_ci .cmd_count = fd_ringbuffer_sp_cmd_count, 549bf215546Sopenharmony_ci .check_size = fd_ringbuffer_sp_check_size, 550bf215546Sopenharmony_ci .destroy = fd_ringbuffer_sp_destroy, 551bf215546Sopenharmony_ci}; 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_cistatic const struct fd_ringbuffer_funcs ring_funcs_obj_32 = { 554bf215546Sopenharmony_ci .grow = fd_ringbuffer_sp_grow, 555bf215546Sopenharmony_ci .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32, 556bf215546Sopenharmony_ci .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32, 557bf215546Sopenharmony_ci .cmd_count = fd_ringbuffer_sp_cmd_count, 558bf215546Sopenharmony_ci .destroy = fd_ringbuffer_sp_destroy, 559bf215546Sopenharmony_ci}; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_cistatic const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = { 562bf215546Sopenharmony_ci .grow = fd_ringbuffer_sp_grow, 563bf215546Sopenharmony_ci .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64, 564bf215546Sopenharmony_ci .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64, 565bf215546Sopenharmony_ci .cmd_count = fd_ringbuffer_sp_cmd_count, 566bf215546Sopenharmony_ci .check_size = fd_ringbuffer_sp_check_size, 567bf215546Sopenharmony_ci .destroy = fd_ringbuffer_sp_destroy, 568bf215546Sopenharmony_ci}; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_cistatic const struct fd_ringbuffer_funcs ring_funcs_obj_64 = { 571bf215546Sopenharmony_ci .grow = fd_ringbuffer_sp_grow, 572bf215546Sopenharmony_ci .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64, 573bf215546Sopenharmony_ci .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64, 574bf215546Sopenharmony_ci .cmd_count = fd_ringbuffer_sp_cmd_count, 575bf215546Sopenharmony_ci .destroy = fd_ringbuffer_sp_destroy, 576bf215546Sopenharmony_ci}; 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_cistatic inline struct fd_ringbuffer * 579bf215546Sopenharmony_cifd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size, 580bf215546Sopenharmony_ci enum fd_ringbuffer_flags flags) 581bf215546Sopenharmony_ci{ 582bf215546Sopenharmony_ci struct fd_ringbuffer *ring = &fd_ring->base; 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci assert(fd_ring->ring_bo); 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci uint8_t *base = fd_bo_map(fd_ring->ring_bo); 587bf215546Sopenharmony_ci ring->start = (void *)(base + fd_ring->offset); 588bf215546Sopenharmony_ci ring->end = &(ring->start[size / 4]); 589bf215546Sopenharmony_ci ring->cur = ring->start; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci ring->size = size; 592bf215546Sopenharmony_ci ring->flags = flags; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci if (flags & _FD_RINGBUFFER_OBJECT) { 595bf215546Sopenharmony_ci if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) { 596bf215546Sopenharmony_ci ring->funcs = &ring_funcs_obj_64; 597bf215546Sopenharmony_ci } else { 598bf215546Sopenharmony_ci ring->funcs = &ring_funcs_obj_32; 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci } else { 601bf215546Sopenharmony_ci if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) { 602bf215546Sopenharmony_ci ring->funcs = &ring_funcs_nonobj_64; 603bf215546Sopenharmony_ci } else { 604bf215546Sopenharmony_ci ring->funcs = &ring_funcs_nonobj_32; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci } 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci // TODO initializing these could probably be conditional on flags 609bf215546Sopenharmony_ci // since unneed for FD_RINGBUFFER_STAGING case.. 610bf215546Sopenharmony_ci fd_ring->u.cmds = NULL; 611bf215546Sopenharmony_ci fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0; 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci fd_ring->u.reloc_bos = NULL; 614bf215546Sopenharmony_ci fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0; 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci return ring; 617bf215546Sopenharmony_ci} 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_cistruct fd_ringbuffer * 620bf215546Sopenharmony_cifd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) 621bf215546Sopenharmony_ci{ 622bf215546Sopenharmony_ci struct fd_device *dev = pipe->dev; 623bf215546Sopenharmony_ci struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring)); 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation 626bf215546Sopenharmony_ci * can happen both on the frontend (most CSOs) and the driver thread (a6xx 627bf215546Sopenharmony_ci * cached tex state, for example) 628bf215546Sopenharmony_ci */ 629bf215546Sopenharmony_ci simple_mtx_lock(&dev->suballoc_lock); 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */ 632bf215546Sopenharmony_ci fd_ring->offset = align(dev->suballoc_offset, 64); 633bf215546Sopenharmony_ci if (!dev->suballoc_bo || 634bf215546Sopenharmony_ci fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) { 635bf215546Sopenharmony_ci if (dev->suballoc_bo) 636bf215546Sopenharmony_ci fd_bo_del(dev->suballoc_bo); 637bf215546Sopenharmony_ci dev->suballoc_bo = 638bf215546Sopenharmony_ci fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096))); 639bf215546Sopenharmony_ci fd_ring->offset = 0; 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci fd_ring->u.pipe = pipe; 643bf215546Sopenharmony_ci fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo); 644bf215546Sopenharmony_ci fd_ring->base.refcnt = 1; 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci dev->suballoc_offset = fd_ring->offset + size; 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci simple_mtx_unlock(&dev->suballoc_lock); 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT); 651bf215546Sopenharmony_ci} 652