1/*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include <assert.h>
28#include <inttypes.h>
29#include <pthread.h>
30
31#include "util/hash_table.h"
32#include "util/os_file.h"
33#include "util/slab.h"
34
35#include "freedreno_ringbuffer_sp.h"
36
37/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
38 * by avoiding the additional tracking necessary to build cmds/relocs tables
39 * (but still builds a bos table)
40 */
41
42#define INIT_SIZE 0x1000
43
44#define SUBALLOC_SIZE (32 * 1024)
45
46/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
47 * instead use a condition-variable.  Note that pipe->flush() is not expected
48 * to be a common/hot path.
49 */
50static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
51static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
52
53static void finalize_current_cmd(struct fd_ringbuffer *ring);
54static struct fd_ringbuffer *
55fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
56                      enum fd_ringbuffer_flags flags);
57
58/* add (if needed) bo to submit and return index: */
59uint32_t
60fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
61{
62   uint32_t idx;
63
64   /* NOTE: it is legal to use the same bo on different threads for
65    * different submits.  But it is not legal to use the same submit
66    * from different threads.
67    */
68   idx = READ_ONCE(bo->idx);
69
70   if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
71      uint32_t hash = _mesa_hash_pointer(bo);
72      struct hash_entry *entry;
73
74      entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
75      if (entry) {
76         /* found */
77         idx = (uint32_t)(uintptr_t)entry->data;
78      } else {
79         idx = APPEND(submit, bos, fd_bo_ref(bo));
80
81         _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
82                                            (void *)(uintptr_t)idx);
83      }
84      bo->idx = idx;
85   }
86
87   return idx;
88}
89
90static void
91fd_submit_suballoc_ring_bo(struct fd_submit *submit,
92                           struct fd_ringbuffer_sp *fd_ring, uint32_t size)
93{
94   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
95   unsigned suballoc_offset = 0;
96   struct fd_bo *suballoc_bo = NULL;
97
98   if (fd_submit->suballoc_ring) {
99      struct fd_ringbuffer_sp *suballoc_ring =
100         to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
101
102      suballoc_bo = suballoc_ring->ring_bo;
103      suballoc_offset =
104         fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
105
106      suballoc_offset = align(suballoc_offset, 0x10);
107
108      if ((size + suballoc_offset) > suballoc_bo->size) {
109         suballoc_bo = NULL;
110      }
111   }
112
113   if (!suballoc_bo) {
114      // TODO possibly larger size for streaming bo?
115      fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
116      fd_ring->offset = 0;
117   } else {
118      fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
119      fd_ring->offset = suballoc_offset;
120   }
121
122   struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
123
124   fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
125
126   if (old_suballoc_ring)
127      fd_ringbuffer_del(old_suballoc_ring);
128}
129
130static struct fd_ringbuffer *
131fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
132                            enum fd_ringbuffer_flags flags)
133{
134   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
135   struct fd_ringbuffer_sp *fd_ring;
136
137   fd_ring = slab_alloc(&fd_submit->ring_pool);
138
139   fd_ring->u.submit = submit;
140
141   /* NOTE: needs to be before _suballoc_ring_bo() since it could
142    * increment the refcnt of the current ring
143    */
144   fd_ring->base.refcnt = 1;
145
146   if (flags & FD_RINGBUFFER_STREAMING) {
147      fd_submit_suballoc_ring_bo(submit, fd_ring, size);
148   } else {
149      if (flags & FD_RINGBUFFER_GROWABLE)
150         size = INIT_SIZE;
151
152      fd_ring->offset = 0;
153      fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
154   }
155
156   if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
157      return NULL;
158
159   return &fd_ring->base;
160}
161
162/**
163 * Prepare submit for flush, always done synchronously.
164 *
165 * 1) Finalize primary ringbuffer, at this point no more cmdstream may
166 *    be written into it, since from the PoV of the upper level driver
167 *    the submit is flushed, even if deferred
168 * 2) Add cmdstream bos to bos table
169 * 3) Update bo fences
170 */
171static bool
172fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
173                        struct fd_submit_fence *out_fence)
174{
175   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
176   bool has_shared = false;
177
178   finalize_current_cmd(submit->primary);
179
180   struct fd_ringbuffer_sp *primary =
181      to_fd_ringbuffer_sp(submit->primary);
182
183   for (unsigned i = 0; i < primary->u.nr_cmds; i++)
184      fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
185
186   simple_mtx_lock(&table_lock);
187   for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
188      fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence);
189      has_shared |= fd_submit->bos[i]->shared;
190   }
191   simple_mtx_unlock(&table_lock);
192
193   fd_submit->out_fence   = out_fence;
194   fd_submit->in_fence_fd = (in_fence_fd == -1) ?
195         -1 : os_dupfd_cloexec(in_fence_fd);
196
197   return has_shared;
198}
199
200static void
201fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
202{
203   struct fd_submit *submit = job;
204   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
205   struct fd_pipe *pipe = submit->pipe;
206
207   fd_submit->flush_submit_list(&fd_submit->submit_list);
208
209   pthread_mutex_lock(&flush_mtx);
210   assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
211   pipe->last_submit_fence = fd_submit->base.fence;
212   pthread_cond_broadcast(&flush_cnd);
213   pthread_mutex_unlock(&flush_mtx);
214
215   DEBUG_MSG("finish: %u", submit->fence);
216}
217
218static void
219fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
220{
221   struct fd_submit *submit = job;
222   fd_submit_del(submit);
223}
224
225static int
226enqueue_submit_list(struct list_head *submit_list)
227{
228   struct fd_submit *submit = last_submit(submit_list);
229   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
230
231   list_replace(submit_list, &fd_submit->submit_list);
232   list_inithead(submit_list);
233
234   struct util_queue_fence *fence;
235   if (fd_submit->out_fence) {
236      fence = &fd_submit->out_fence->ready;
237   } else {
238      util_queue_fence_init(&fd_submit->fence);
239      fence = &fd_submit->fence;
240   }
241
242   DEBUG_MSG("enqueue: %u", submit->fence);
243
244   util_queue_add_job(&submit->pipe->dev->submit_queue,
245                      submit, fence,
246                      fd_submit_sp_flush_execute,
247                      fd_submit_sp_flush_cleanup,
248                      0);
249
250   return 0;
251}
252
253static bool
254should_defer(struct fd_submit *submit)
255{
256   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
257
258   /* if too many bo's, it may not be worth the CPU cost of submit merging: */
259   if (fd_submit->nr_bos > 30)
260      return false;
261
262   /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
263    * cmds before we exceed the size of the ringbuffer, which results in
264    * deadlock writing into the RB (ie. kernel doesn't finish writing into
265    * the RB so it doesn't kick the GPU to start consuming from the RB)
266    */
267   if (submit->pipe->dev->deferred_cmds > 128)
268      return false;
269
270   return true;
271}
272
273static int
274fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
275                   struct fd_submit_fence *out_fence)
276{
277   struct fd_device *dev = submit->pipe->dev;
278   struct fd_pipe *pipe = submit->pipe;
279
280   /* Acquire lock before flush_prep() because it is possible to race between
281    * this and pipe->flush():
282    */
283   simple_mtx_lock(&dev->submit_lock);
284
285   /* If there are deferred submits from another fd_pipe, flush them now,
286    * since we can't merge submits from different submitqueue's (ie. they
287    * could have different priority, etc)
288    */
289   if (!list_is_empty(&dev->deferred_submits) &&
290       (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
291      struct list_head submit_list;
292
293      list_replace(&dev->deferred_submits, &submit_list);
294      list_inithead(&dev->deferred_submits);
295      dev->deferred_cmds = 0;
296
297      enqueue_submit_list(&submit_list);
298   }
299
300   list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
301
302   bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
303
304   assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
305   pipe->last_enqueue_fence = submit->fence;
306
307   /* If we don't need an out-fence, we can defer the submit.
308    *
309    * TODO we could defer submits with in-fence as well.. if we took our own
310    * reference to the fd, and merged all the in-fence-fd's when we flush the
311    * deferred submits
312    */
313   if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
314      DEBUG_MSG("defer: %u", submit->fence);
315      dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
316      assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
317      simple_mtx_unlock(&dev->submit_lock);
318
319      return 0;
320   }
321
322   struct list_head submit_list;
323
324   list_replace(&dev->deferred_submits, &submit_list);
325   list_inithead(&dev->deferred_submits);
326   dev->deferred_cmds = 0;
327
328   simple_mtx_unlock(&dev->submit_lock);
329
330   return enqueue_submit_list(&submit_list);
331}
332
333void
334fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
335{
336   struct fd_device *dev = pipe->dev;
337   struct list_head submit_list;
338
339   DEBUG_MSG("flush: %u", fence);
340
341   list_inithead(&submit_list);
342
343   simple_mtx_lock(&dev->submit_lock);
344
345   assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
346
347   foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
348      /* We should never have submits from multiple pipes in the deferred
349       * list.  If we did, we couldn't compare their fence to our fence,
350       * since each fd_pipe is an independent timeline.
351       */
352      if (deferred_submit->pipe != pipe)
353         break;
354
355      if (fd_fence_after(deferred_submit->fence, fence))
356         break;
357
358      list_del(&deferred_submit->node);
359      list_addtail(&deferred_submit->node, &submit_list);
360      dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
361   }
362
363   assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
364
365   simple_mtx_unlock(&dev->submit_lock);
366
367   if (list_is_empty(&submit_list))
368      goto flush_sync;
369
370   enqueue_submit_list(&submit_list);
371
372flush_sync:
373   /* Once we are sure that we've enqueued at least up to the requested
374    * submit, we need to be sure that submitq has caught up and flushed
375    * them to the kernel
376    */
377   pthread_mutex_lock(&flush_mtx);
378   while (fd_fence_before(pipe->last_submit_fence, fence)) {
379      pthread_cond_wait(&flush_cnd, &flush_mtx);
380   }
381   pthread_mutex_unlock(&flush_mtx);
382}
383
384static void
385fd_submit_sp_destroy(struct fd_submit *submit)
386{
387   struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
388
389   if (fd_submit->suballoc_ring)
390      fd_ringbuffer_del(fd_submit->suballoc_ring);
391
392   _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
393
394   // TODO it would be nice to have a way to assert() if all
395   // rb's haven't been free'd back to the slab, because that is
396   // an indication that we are leaking bo's
397   slab_destroy_child(&fd_submit->ring_pool);
398
399   for (unsigned i = 0; i < fd_submit->nr_bos; i++)
400      fd_bo_del(fd_submit->bos[i]);
401
402   free(fd_submit->bos);
403   free(fd_submit);
404}
405
406static const struct fd_submit_funcs submit_funcs = {
407   .new_ringbuffer = fd_submit_sp_new_ringbuffer,
408   .flush = fd_submit_sp_flush,
409   .destroy = fd_submit_sp_destroy,
410};
411
412struct fd_submit *
413fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
414{
415   struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
416   struct fd_submit *submit;
417
418   fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
419                                                 _mesa_key_pointer_equal);
420
421   slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
422
423   fd_submit->flush_submit_list = flush_submit_list;
424
425   submit = &fd_submit->base;
426   submit->funcs = &submit_funcs;
427
428   return submit;
429}
430
431void
432fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
433{
434   // TODO tune size:
435   slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
436}
437
438void
439fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
440{
441   if (pipe->ring_pool.num_elements)
442      slab_destroy_parent(&pipe->ring_pool);
443}
444
445static void
446finalize_current_cmd(struct fd_ringbuffer *ring)
447{
448   assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
449
450   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
451   APPEND(&fd_ring->u, cmds,
452          (struct fd_cmd_sp){
453             .ring_bo = fd_bo_ref(fd_ring->ring_bo),
454             .size = offset_bytes(ring->cur, ring->start),
455          });
456}
457
458static void
459fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
460{
461   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
462   struct fd_pipe *pipe = fd_ring->u.submit->pipe;
463
464   assert(ring->flags & FD_RINGBUFFER_GROWABLE);
465
466   finalize_current_cmd(ring);
467
468   fd_bo_del(fd_ring->ring_bo);
469   fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
470
471   ring->start = fd_bo_map(fd_ring->ring_bo);
472   ring->end = &(ring->start[size / 4]);
473   ring->cur = ring->start;
474   ring->size = size;
475}
476
477static inline bool
478fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
479{
480   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
481
482   for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
483      if (fd_ring->u.reloc_bos[i] == bo)
484         return true;
485   }
486   return false;
487}
488
489#define PTRSZ 64
490#include "freedreno_ringbuffer_sp_reloc.h"
491#undef PTRSZ
492#define PTRSZ 32
493#include "freedreno_ringbuffer_sp_reloc.h"
494#undef PTRSZ
495
496static uint32_t
497fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
498{
499   if (ring->flags & FD_RINGBUFFER_GROWABLE)
500      return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
501   return 1;
502}
503
504static bool
505fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
506{
507   assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
508   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
509   struct fd_submit *submit = fd_ring->u.submit;
510
511   if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
512      return false;
513   }
514
515   return true;
516}
517
518static void
519fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
520{
521   struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
522
523   fd_bo_del(fd_ring->ring_bo);
524
525   if (ring->flags & _FD_RINGBUFFER_OBJECT) {
526      for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
527         fd_bo_del(fd_ring->u.reloc_bos[i]);
528      }
529      free(fd_ring->u.reloc_bos);
530
531      free(fd_ring);
532   } else {
533      struct fd_submit *submit = fd_ring->u.submit;
534
535      for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
536         fd_bo_del(fd_ring->u.cmds[i].ring_bo);
537      }
538      free(fd_ring->u.cmds);
539
540      slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
541   }
542}
543
544static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
545   .grow = fd_ringbuffer_sp_grow,
546   .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
547   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
548   .cmd_count = fd_ringbuffer_sp_cmd_count,
549   .check_size = fd_ringbuffer_sp_check_size,
550   .destroy = fd_ringbuffer_sp_destroy,
551};
552
553static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
554   .grow = fd_ringbuffer_sp_grow,
555   .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
556   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
557   .cmd_count = fd_ringbuffer_sp_cmd_count,
558   .destroy = fd_ringbuffer_sp_destroy,
559};
560
561static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
562   .grow = fd_ringbuffer_sp_grow,
563   .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
564   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
565   .cmd_count = fd_ringbuffer_sp_cmd_count,
566   .check_size = fd_ringbuffer_sp_check_size,
567   .destroy = fd_ringbuffer_sp_destroy,
568};
569
570static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
571   .grow = fd_ringbuffer_sp_grow,
572   .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
573   .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
574   .cmd_count = fd_ringbuffer_sp_cmd_count,
575   .destroy = fd_ringbuffer_sp_destroy,
576};
577
578static inline struct fd_ringbuffer *
579fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
580                      enum fd_ringbuffer_flags flags)
581{
582   struct fd_ringbuffer *ring = &fd_ring->base;
583
584   assert(fd_ring->ring_bo);
585
586   uint8_t *base = fd_bo_map(fd_ring->ring_bo);
587   ring->start = (void *)(base + fd_ring->offset);
588   ring->end = &(ring->start[size / 4]);
589   ring->cur = ring->start;
590
591   ring->size = size;
592   ring->flags = flags;
593
594   if (flags & _FD_RINGBUFFER_OBJECT) {
595      if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) {
596         ring->funcs = &ring_funcs_obj_64;
597      } else {
598         ring->funcs = &ring_funcs_obj_32;
599      }
600   } else {
601      if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) {
602         ring->funcs = &ring_funcs_nonobj_64;
603      } else {
604         ring->funcs = &ring_funcs_nonobj_32;
605      }
606   }
607
608   // TODO initializing these could probably be conditional on flags
609   // since unneed for FD_RINGBUFFER_STAGING case..
610   fd_ring->u.cmds = NULL;
611   fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
612
613   fd_ring->u.reloc_bos = NULL;
614   fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
615
616   return ring;
617}
618
619struct fd_ringbuffer *
620fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
621{
622   struct fd_device *dev = pipe->dev;
623   struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
624
625   /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
626    * can happen both on the frontend (most CSOs) and the driver thread (a6xx
627    * cached tex state, for example)
628    */
629   simple_mtx_lock(&dev->suballoc_lock);
630
631   /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
632   fd_ring->offset = align(dev->suballoc_offset, 64);
633   if (!dev->suballoc_bo ||
634       fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
635      if (dev->suballoc_bo)
636         fd_bo_del(dev->suballoc_bo);
637      dev->suballoc_bo =
638         fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
639      fd_ring->offset = 0;
640   }
641
642   fd_ring->u.pipe = pipe;
643   fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
644   fd_ring->base.refcnt = 1;
645
646   dev->suballoc_offset = fd_ring->offset + size;
647
648   simple_mtx_unlock(&dev->suballoc_lock);
649
650   return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
651}
652