1/* 2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include <assert.h> 28#include <inttypes.h> 29#include <pthread.h> 30 31#include "util/hash_table.h" 32#include "util/os_file.h" 33#include "util/slab.h" 34 35#include "freedreno_ringbuffer_sp.h" 36 37/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead 38 * by avoiding the additional tracking necessary to build cmds/relocs tables 39 * (but still builds a bos table) 40 */ 41 42#define INIT_SIZE 0x1000 43 44#define SUBALLOC_SIZE (32 * 1024) 45 46/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on, 47 * instead use a condition-variable. Note that pipe->flush() is not expected 48 * to be a common/hot path. 49 */ 50static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER; 51static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER; 52 53static void finalize_current_cmd(struct fd_ringbuffer *ring); 54static struct fd_ringbuffer * 55fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size, 56 enum fd_ringbuffer_flags flags); 57 58/* add (if needed) bo to submit and return index: */ 59uint32_t 60fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo) 61{ 62 uint32_t idx; 63 64 /* NOTE: it is legal to use the same bo on different threads for 65 * different submits. But it is not legal to use the same submit 66 * from different threads. 67 */ 68 idx = READ_ONCE(bo->idx); 69 70 if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) { 71 uint32_t hash = _mesa_hash_pointer(bo); 72 struct hash_entry *entry; 73 74 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); 75 if (entry) { 76 /* found */ 77 idx = (uint32_t)(uintptr_t)entry->data; 78 } else { 79 idx = APPEND(submit, bos, fd_bo_ref(bo)); 80 81 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, 82 (void *)(uintptr_t)idx); 83 } 84 bo->idx = idx; 85 } 86 87 return idx; 88} 89 90static void 91fd_submit_suballoc_ring_bo(struct fd_submit *submit, 92 struct fd_ringbuffer_sp *fd_ring, uint32_t size) 93{ 94 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 95 unsigned suballoc_offset = 0; 96 struct fd_bo *suballoc_bo = NULL; 97 98 if (fd_submit->suballoc_ring) { 99 struct fd_ringbuffer_sp *suballoc_ring = 100 to_fd_ringbuffer_sp(fd_submit->suballoc_ring); 101 102 suballoc_bo = suballoc_ring->ring_bo; 103 suballoc_offset = 104 fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset; 105 106 suballoc_offset = align(suballoc_offset, 0x10); 107 108 if ((size + suballoc_offset) > suballoc_bo->size) { 109 suballoc_bo = NULL; 110 } 111 } 112 113 if (!suballoc_bo) { 114 // TODO possibly larger size for streaming bo? 115 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE); 116 fd_ring->offset = 0; 117 } else { 118 fd_ring->ring_bo = fd_bo_ref(suballoc_bo); 119 fd_ring->offset = suballoc_offset; 120 } 121 122 struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring; 123 124 fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base); 125 126 if (old_suballoc_ring) 127 fd_ringbuffer_del(old_suballoc_ring); 128} 129 130static struct fd_ringbuffer * 131fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, 132 enum fd_ringbuffer_flags flags) 133{ 134 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 135 struct fd_ringbuffer_sp *fd_ring; 136 137 fd_ring = slab_alloc(&fd_submit->ring_pool); 138 139 fd_ring->u.submit = submit; 140 141 /* NOTE: needs to be before _suballoc_ring_bo() since it could 142 * increment the refcnt of the current ring 143 */ 144 fd_ring->base.refcnt = 1; 145 146 if (flags & FD_RINGBUFFER_STREAMING) { 147 fd_submit_suballoc_ring_bo(submit, fd_ring, size); 148 } else { 149 if (flags & FD_RINGBUFFER_GROWABLE) 150 size = INIT_SIZE; 151 152 fd_ring->offset = 0; 153 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size); 154 } 155 156 if (!fd_ringbuffer_sp_init(fd_ring, size, flags)) 157 return NULL; 158 159 return &fd_ring->base; 160} 161 162/** 163 * Prepare submit for flush, always done synchronously. 164 * 165 * 1) Finalize primary ringbuffer, at this point no more cmdstream may 166 * be written into it, since from the PoV of the upper level driver 167 * the submit is flushed, even if deferred 168 * 2) Add cmdstream bos to bos table 169 * 3) Update bo fences 170 */ 171static bool 172fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd, 173 struct fd_submit_fence *out_fence) 174{ 175 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 176 bool has_shared = false; 177 178 finalize_current_cmd(submit->primary); 179 180 struct fd_ringbuffer_sp *primary = 181 to_fd_ringbuffer_sp(submit->primary); 182 183 for (unsigned i = 0; i < primary->u.nr_cmds; i++) 184 fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo); 185 186 simple_mtx_lock(&table_lock); 187 for (unsigned i = 0; i < fd_submit->nr_bos; i++) { 188 fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence); 189 has_shared |= fd_submit->bos[i]->shared; 190 } 191 simple_mtx_unlock(&table_lock); 192 193 fd_submit->out_fence = out_fence; 194 fd_submit->in_fence_fd = (in_fence_fd == -1) ? 195 -1 : os_dupfd_cloexec(in_fence_fd); 196 197 return has_shared; 198} 199 200static void 201fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index) 202{ 203 struct fd_submit *submit = job; 204 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 205 struct fd_pipe *pipe = submit->pipe; 206 207 fd_submit->flush_submit_list(&fd_submit->submit_list); 208 209 pthread_mutex_lock(&flush_mtx); 210 assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence)); 211 pipe->last_submit_fence = fd_submit->base.fence; 212 pthread_cond_broadcast(&flush_cnd); 213 pthread_mutex_unlock(&flush_mtx); 214 215 DEBUG_MSG("finish: %u", submit->fence); 216} 217 218static void 219fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index) 220{ 221 struct fd_submit *submit = job; 222 fd_submit_del(submit); 223} 224 225static int 226enqueue_submit_list(struct list_head *submit_list) 227{ 228 struct fd_submit *submit = last_submit(submit_list); 229 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 230 231 list_replace(submit_list, &fd_submit->submit_list); 232 list_inithead(submit_list); 233 234 struct util_queue_fence *fence; 235 if (fd_submit->out_fence) { 236 fence = &fd_submit->out_fence->ready; 237 } else { 238 util_queue_fence_init(&fd_submit->fence); 239 fence = &fd_submit->fence; 240 } 241 242 DEBUG_MSG("enqueue: %u", submit->fence); 243 244 util_queue_add_job(&submit->pipe->dev->submit_queue, 245 submit, fence, 246 fd_submit_sp_flush_execute, 247 fd_submit_sp_flush_cleanup, 248 0); 249 250 return 0; 251} 252 253static bool 254should_defer(struct fd_submit *submit) 255{ 256 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 257 258 /* if too many bo's, it may not be worth the CPU cost of submit merging: */ 259 if (fd_submit->nr_bos > 30) 260 return false; 261 262 /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k 263 * cmds before we exceed the size of the ringbuffer, which results in 264 * deadlock writing into the RB (ie. kernel doesn't finish writing into 265 * the RB so it doesn't kick the GPU to start consuming from the RB) 266 */ 267 if (submit->pipe->dev->deferred_cmds > 128) 268 return false; 269 270 return true; 271} 272 273static int 274fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, 275 struct fd_submit_fence *out_fence) 276{ 277 struct fd_device *dev = submit->pipe->dev; 278 struct fd_pipe *pipe = submit->pipe; 279 280 /* Acquire lock before flush_prep() because it is possible to race between 281 * this and pipe->flush(): 282 */ 283 simple_mtx_lock(&dev->submit_lock); 284 285 /* If there are deferred submits from another fd_pipe, flush them now, 286 * since we can't merge submits from different submitqueue's (ie. they 287 * could have different priority, etc) 288 */ 289 if (!list_is_empty(&dev->deferred_submits) && 290 (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) { 291 struct list_head submit_list; 292 293 list_replace(&dev->deferred_submits, &submit_list); 294 list_inithead(&dev->deferred_submits); 295 dev->deferred_cmds = 0; 296 297 enqueue_submit_list(&submit_list); 298 } 299 300 list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits); 301 302 bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence); 303 304 assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence)); 305 pipe->last_enqueue_fence = submit->fence; 306 307 /* If we don't need an out-fence, we can defer the submit. 308 * 309 * TODO we could defer submits with in-fence as well.. if we took our own 310 * reference to the fd, and merged all the in-fence-fd's when we flush the 311 * deferred submits 312 */ 313 if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) { 314 DEBUG_MSG("defer: %u", submit->fence); 315 dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary); 316 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev)); 317 simple_mtx_unlock(&dev->submit_lock); 318 319 return 0; 320 } 321 322 struct list_head submit_list; 323 324 list_replace(&dev->deferred_submits, &submit_list); 325 list_inithead(&dev->deferred_submits); 326 dev->deferred_cmds = 0; 327 328 simple_mtx_unlock(&dev->submit_lock); 329 330 return enqueue_submit_list(&submit_list); 331} 332 333void 334fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence) 335{ 336 struct fd_device *dev = pipe->dev; 337 struct list_head submit_list; 338 339 DEBUG_MSG("flush: %u", fence); 340 341 list_inithead(&submit_list); 342 343 simple_mtx_lock(&dev->submit_lock); 344 345 assert(!fd_fence_after(fence, pipe->last_enqueue_fence)); 346 347 foreach_submit_safe (deferred_submit, &dev->deferred_submits) { 348 /* We should never have submits from multiple pipes in the deferred 349 * list. If we did, we couldn't compare their fence to our fence, 350 * since each fd_pipe is an independent timeline. 351 */ 352 if (deferred_submit->pipe != pipe) 353 break; 354 355 if (fd_fence_after(deferred_submit->fence, fence)) 356 break; 357 358 list_del(&deferred_submit->node); 359 list_addtail(&deferred_submit->node, &submit_list); 360 dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary); 361 } 362 363 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev)); 364 365 simple_mtx_unlock(&dev->submit_lock); 366 367 if (list_is_empty(&submit_list)) 368 goto flush_sync; 369 370 enqueue_submit_list(&submit_list); 371 372flush_sync: 373 /* Once we are sure that we've enqueued at least up to the requested 374 * submit, we need to be sure that submitq has caught up and flushed 375 * them to the kernel 376 */ 377 pthread_mutex_lock(&flush_mtx); 378 while (fd_fence_before(pipe->last_submit_fence, fence)) { 379 pthread_cond_wait(&flush_cnd, &flush_mtx); 380 } 381 pthread_mutex_unlock(&flush_mtx); 382} 383 384static void 385fd_submit_sp_destroy(struct fd_submit *submit) 386{ 387 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit); 388 389 if (fd_submit->suballoc_ring) 390 fd_ringbuffer_del(fd_submit->suballoc_ring); 391 392 _mesa_hash_table_destroy(fd_submit->bo_table, NULL); 393 394 // TODO it would be nice to have a way to assert() if all 395 // rb's haven't been free'd back to the slab, because that is 396 // an indication that we are leaking bo's 397 slab_destroy_child(&fd_submit->ring_pool); 398 399 for (unsigned i = 0; i < fd_submit->nr_bos; i++) 400 fd_bo_del(fd_submit->bos[i]); 401 402 free(fd_submit->bos); 403 free(fd_submit); 404} 405 406static const struct fd_submit_funcs submit_funcs = { 407 .new_ringbuffer = fd_submit_sp_new_ringbuffer, 408 .flush = fd_submit_sp_flush, 409 .destroy = fd_submit_sp_destroy, 410}; 411 412struct fd_submit * 413fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list) 414{ 415 struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit)); 416 struct fd_submit *submit; 417 418 fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 419 _mesa_key_pointer_equal); 420 421 slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool); 422 423 fd_submit->flush_submit_list = flush_submit_list; 424 425 submit = &fd_submit->base; 426 submit->funcs = &submit_funcs; 427 428 return submit; 429} 430 431void 432fd_pipe_sp_ringpool_init(struct fd_pipe *pipe) 433{ 434 // TODO tune size: 435 slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16); 436} 437 438void 439fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe) 440{ 441 if (pipe->ring_pool.num_elements) 442 slab_destroy_parent(&pipe->ring_pool); 443} 444 445static void 446finalize_current_cmd(struct fd_ringbuffer *ring) 447{ 448 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); 449 450 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 451 APPEND(&fd_ring->u, cmds, 452 (struct fd_cmd_sp){ 453 .ring_bo = fd_bo_ref(fd_ring->ring_bo), 454 .size = offset_bytes(ring->cur, ring->start), 455 }); 456} 457 458static void 459fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) 460{ 461 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 462 struct fd_pipe *pipe = fd_ring->u.submit->pipe; 463 464 assert(ring->flags & FD_RINGBUFFER_GROWABLE); 465 466 finalize_current_cmd(ring); 467 468 fd_bo_del(fd_ring->ring_bo); 469 fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size); 470 471 ring->start = fd_bo_map(fd_ring->ring_bo); 472 ring->end = &(ring->start[size / 4]); 473 ring->cur = ring->start; 474 ring->size = size; 475} 476 477static inline bool 478fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo) 479{ 480 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 481 482 for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) { 483 if (fd_ring->u.reloc_bos[i] == bo) 484 return true; 485 } 486 return false; 487} 488 489#define PTRSZ 64 490#include "freedreno_ringbuffer_sp_reloc.h" 491#undef PTRSZ 492#define PTRSZ 32 493#include "freedreno_ringbuffer_sp_reloc.h" 494#undef PTRSZ 495 496static uint32_t 497fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) 498{ 499 if (ring->flags & FD_RINGBUFFER_GROWABLE) 500 return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1; 501 return 1; 502} 503 504static bool 505fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring) 506{ 507 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); 508 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 509 struct fd_submit *submit = fd_ring->u.submit; 510 511 if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) { 512 return false; 513 } 514 515 return true; 516} 517 518static void 519fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) 520{ 521 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring); 522 523 fd_bo_del(fd_ring->ring_bo); 524 525 if (ring->flags & _FD_RINGBUFFER_OBJECT) { 526 for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) { 527 fd_bo_del(fd_ring->u.reloc_bos[i]); 528 } 529 free(fd_ring->u.reloc_bos); 530 531 free(fd_ring); 532 } else { 533 struct fd_submit *submit = fd_ring->u.submit; 534 535 for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) { 536 fd_bo_del(fd_ring->u.cmds[i].ring_bo); 537 } 538 free(fd_ring->u.cmds); 539 540 slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring); 541 } 542} 543 544static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = { 545 .grow = fd_ringbuffer_sp_grow, 546 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32, 547 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32, 548 .cmd_count = fd_ringbuffer_sp_cmd_count, 549 .check_size = fd_ringbuffer_sp_check_size, 550 .destroy = fd_ringbuffer_sp_destroy, 551}; 552 553static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = { 554 .grow = fd_ringbuffer_sp_grow, 555 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32, 556 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32, 557 .cmd_count = fd_ringbuffer_sp_cmd_count, 558 .destroy = fd_ringbuffer_sp_destroy, 559}; 560 561static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = { 562 .grow = fd_ringbuffer_sp_grow, 563 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64, 564 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64, 565 .cmd_count = fd_ringbuffer_sp_cmd_count, 566 .check_size = fd_ringbuffer_sp_check_size, 567 .destroy = fd_ringbuffer_sp_destroy, 568}; 569 570static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = { 571 .grow = fd_ringbuffer_sp_grow, 572 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64, 573 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64, 574 .cmd_count = fd_ringbuffer_sp_cmd_count, 575 .destroy = fd_ringbuffer_sp_destroy, 576}; 577 578static inline struct fd_ringbuffer * 579fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size, 580 enum fd_ringbuffer_flags flags) 581{ 582 struct fd_ringbuffer *ring = &fd_ring->base; 583 584 assert(fd_ring->ring_bo); 585 586 uint8_t *base = fd_bo_map(fd_ring->ring_bo); 587 ring->start = (void *)(base + fd_ring->offset); 588 ring->end = &(ring->start[size / 4]); 589 ring->cur = ring->start; 590 591 ring->size = size; 592 ring->flags = flags; 593 594 if (flags & _FD_RINGBUFFER_OBJECT) { 595 if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) { 596 ring->funcs = &ring_funcs_obj_64; 597 } else { 598 ring->funcs = &ring_funcs_obj_32; 599 } 600 } else { 601 if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) { 602 ring->funcs = &ring_funcs_nonobj_64; 603 } else { 604 ring->funcs = &ring_funcs_nonobj_32; 605 } 606 } 607 608 // TODO initializing these could probably be conditional on flags 609 // since unneed for FD_RINGBUFFER_STAGING case.. 610 fd_ring->u.cmds = NULL; 611 fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0; 612 613 fd_ring->u.reloc_bos = NULL; 614 fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0; 615 616 return ring; 617} 618 619struct fd_ringbuffer * 620fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) 621{ 622 struct fd_device *dev = pipe->dev; 623 struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring)); 624 625 /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation 626 * can happen both on the frontend (most CSOs) and the driver thread (a6xx 627 * cached tex state, for example) 628 */ 629 simple_mtx_lock(&dev->suballoc_lock); 630 631 /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */ 632 fd_ring->offset = align(dev->suballoc_offset, 64); 633 if (!dev->suballoc_bo || 634 fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) { 635 if (dev->suballoc_bo) 636 fd_bo_del(dev->suballoc_bo); 637 dev->suballoc_bo = 638 fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096))); 639 fd_ring->offset = 0; 640 } 641 642 fd_ring->u.pipe = pipe; 643 fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo); 644 fd_ring->base.refcnt = 1; 645 646 dev->suballoc_offset = fd_ring->offset + size; 647 648 simple_mtx_unlock(&dev->suballoc_lock); 649 650 return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT); 651} 652