1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2022 Google, Inc.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include <assert.h>
25bf215546Sopenharmony_ci#include <inttypes.h>
26bf215546Sopenharmony_ci#include <pthread.h>
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "util/libsync.h"
29bf215546Sopenharmony_ci#include "util/os_file.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "drm/freedreno_ringbuffer_sp.h"
32bf215546Sopenharmony_ci#include "virtio_priv.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cistatic void
35bf215546Sopenharmony_ciretire_execute(void *job, void *gdata, int thread_index)
36bf215546Sopenharmony_ci{
37bf215546Sopenharmony_ci   struct fd_submit_sp *fd_submit = job;
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci   sync_wait(fd_submit->out_fence_fd, -1);
40bf215546Sopenharmony_ci   close(fd_submit->out_fence_fd);
41bf215546Sopenharmony_ci}
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_cistatic void
44bf215546Sopenharmony_ciretire_cleanup(void *job, void *gdata, int thread_index)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci   struct fd_submit_sp *fd_submit = job;
47bf215546Sopenharmony_ci   fd_submit_del(&fd_submit->base);
48bf215546Sopenharmony_ci}
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistatic int
51bf215546Sopenharmony_ciflush_submit_list(struct list_head *submit_list)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci   struct fd_submit_sp *fd_submit = to_fd_submit_sp(last_submit(submit_list));
54bf215546Sopenharmony_ci   struct virtio_pipe *virtio_pipe = to_virtio_pipe(fd_submit->base.pipe);
55bf215546Sopenharmony_ci   struct fd_device *dev = virtio_pipe->base.dev;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   unsigned nr_cmds = 0;
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   /* Determine the number of extra cmds's from deferred submits that
60bf215546Sopenharmony_ci    * we will be merging in:
61bf215546Sopenharmony_ci    */
62bf215546Sopenharmony_ci   foreach_submit (submit, submit_list) {
63bf215546Sopenharmony_ci      assert(submit->pipe == &virtio_pipe->base);
64bf215546Sopenharmony_ci      nr_cmds += to_fd_ringbuffer_sp(submit->primary)->u.nr_cmds;
65bf215546Sopenharmony_ci   }
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   /* TODO we can get rid of the extra copy into the req by just
68bf215546Sopenharmony_ci    * assuming the max amount that nr->bos will grow is by the
69bf215546Sopenharmony_ci    * nr_cmds, and just over-allocate a bit.
70bf215546Sopenharmony_ci    */
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   struct drm_msm_gem_submit_cmd cmds[nr_cmds];
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   unsigned cmd_idx = 0;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   /* Build up the table of cmds, and for all but the last submit in the
77bf215546Sopenharmony_ci    * list, merge their bo tables into the last submit.
78bf215546Sopenharmony_ci    */
79bf215546Sopenharmony_ci   foreach_submit_safe (submit, submit_list) {
80bf215546Sopenharmony_ci      struct fd_ringbuffer_sp *deferred_primary =
81bf215546Sopenharmony_ci         to_fd_ringbuffer_sp(submit->primary);
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci      for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
84bf215546Sopenharmony_ci         cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
85bf215546Sopenharmony_ci         cmds[cmd_idx].submit_idx =
86bf215546Sopenharmony_ci               fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
87bf215546Sopenharmony_ci         cmds[cmd_idx].submit_offset = deferred_primary->offset;
88bf215546Sopenharmony_ci         cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
89bf215546Sopenharmony_ci         cmds[cmd_idx].pad = 0;
90bf215546Sopenharmony_ci         cmds[cmd_idx].nr_relocs = 0;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci         cmd_idx++;
93bf215546Sopenharmony_ci      }
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci      /* We are merging all the submits in the list into the last submit,
96bf215546Sopenharmony_ci       * so the remainder of the loop body doesn't apply to the last submit
97bf215546Sopenharmony_ci       */
98bf215546Sopenharmony_ci      if (submit == last_submit(submit_list)) {
99bf215546Sopenharmony_ci         DEBUG_MSG("merged %u submits", cmd_idx);
100bf215546Sopenharmony_ci         break;
101bf215546Sopenharmony_ci      }
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci      struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
104bf215546Sopenharmony_ci      for (unsigned i = 0; i < fd_deferred_submit->nr_bos; i++) {
105bf215546Sopenharmony_ci         /* Note: if bo is used in both the current submit and the deferred
106bf215546Sopenharmony_ci          * submit being merged, we expect to hit the fast-path as we add it
107bf215546Sopenharmony_ci          * to the current submit:
108bf215546Sopenharmony_ci          */
109bf215546Sopenharmony_ci         fd_submit_append_bo(fd_submit, fd_deferred_submit->bos[i]);
110bf215546Sopenharmony_ci      }
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci      /* Now that the cmds/bos have been transfered over to the current submit,
113bf215546Sopenharmony_ci       * we can remove the deferred submit from the list and drop it's reference
114bf215546Sopenharmony_ci       */
115bf215546Sopenharmony_ci      list_del(&submit->node);
116bf215546Sopenharmony_ci      fd_submit_del(submit);
117bf215546Sopenharmony_ci   }
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci   /* Needs to be after get_cmd() as that could create bos/cmds table:
120bf215546Sopenharmony_ci    *
121bf215546Sopenharmony_ci    * NOTE allocate on-stack in the common case, but with an upper-
122bf215546Sopenharmony_ci    * bound to limit on-stack allocation to 4k:
123bf215546Sopenharmony_ci    */
124bf215546Sopenharmony_ci   const unsigned bo_limit = 4096 / sizeof(struct drm_msm_gem_submit_bo);
125bf215546Sopenharmony_ci   bool bos_on_stack = fd_submit->nr_bos < bo_limit;
126bf215546Sopenharmony_ci   struct drm_msm_gem_submit_bo
127bf215546Sopenharmony_ci      _submit_bos[bos_on_stack ? fd_submit->nr_bos : 0];
128bf215546Sopenharmony_ci   struct drm_msm_gem_submit_bo *submit_bos;
129bf215546Sopenharmony_ci   uint32_t _guest_handles[bos_on_stack ? fd_submit->nr_bos : 0];
130bf215546Sopenharmony_ci   uint32_t *guest_handles;
131bf215546Sopenharmony_ci   if (bos_on_stack) {
132bf215546Sopenharmony_ci      submit_bos = _submit_bos;
133bf215546Sopenharmony_ci      guest_handles = _guest_handles;
134bf215546Sopenharmony_ci   } else {
135bf215546Sopenharmony_ci      submit_bos = malloc(fd_submit->nr_bos * sizeof(submit_bos[0]));
136bf215546Sopenharmony_ci      guest_handles = malloc(fd_submit->nr_bos * sizeof(guest_handles[0]));
137bf215546Sopenharmony_ci   }
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
140bf215546Sopenharmony_ci      struct virtio_bo *virtio_bo = to_virtio_bo(fd_submit->bos[i]);
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci      guest_handles[i] = virtio_bo->base.handle;
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci      submit_bos[i].flags = fd_submit->bos[i]->reloc_flags;
145bf215546Sopenharmony_ci      submit_bos[i].handle = virtio_bo->res_id;
146bf215546Sopenharmony_ci      submit_bos[i].presumed = 0;
147bf215546Sopenharmony_ci   }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   if (virtio_pipe->next_submit_fence <= 0)
150bf215546Sopenharmony_ci      virtio_pipe->next_submit_fence = 1;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   uint32_t kfence = virtio_pipe->next_submit_fence++;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   /* TODO avoid extra memcpy, and populate bo's and cmds directly
155bf215546Sopenharmony_ci    * into the req msg
156bf215546Sopenharmony_ci    */
157bf215546Sopenharmony_ci   unsigned bos_len = fd_submit->nr_bos * sizeof(struct drm_msm_gem_submit_bo);
158bf215546Sopenharmony_ci   unsigned cmd_len = nr_cmds * sizeof(struct drm_msm_gem_submit_cmd);
159bf215546Sopenharmony_ci   unsigned req_len = sizeof(struct msm_ccmd_gem_submit_req) + bos_len + cmd_len;
160bf215546Sopenharmony_ci   struct msm_ccmd_gem_submit_req *req = malloc(req_len);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   req->hdr      = MSM_CCMD(GEM_SUBMIT, req_len);
163bf215546Sopenharmony_ci   req->flags    = virtio_pipe->pipe;
164bf215546Sopenharmony_ci   req->queue_id = virtio_pipe->queue_id;
165bf215546Sopenharmony_ci   req->nr_bos   = fd_submit->nr_bos;
166bf215546Sopenharmony_ci   req->nr_cmds  = nr_cmds;
167bf215546Sopenharmony_ci   req->fence    = kfence;
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   memcpy(req->payload, submit_bos, bos_len);
170bf215546Sopenharmony_ci   memcpy(req->payload + bos_len, cmds, cmd_len);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   struct fd_submit_fence *out_fence = fd_submit->out_fence;
173bf215546Sopenharmony_ci   int *out_fence_fd = NULL;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   if (out_fence) {
176bf215546Sopenharmony_ci      out_fence->fence.kfence = kfence;
177bf215546Sopenharmony_ci      out_fence->fence.ufence = fd_submit->base.fence;
178bf215546Sopenharmony_ci      /* Even if gallium driver hasn't requested a fence-fd, request one.
179bf215546Sopenharmony_ci       * This way, if we have to block waiting for the fence, we can do
180bf215546Sopenharmony_ci       * it in the guest, rather than in the single-threaded host.
181bf215546Sopenharmony_ci       */
182bf215546Sopenharmony_ci      out_fence->use_fence_fd = true;
183bf215546Sopenharmony_ci      out_fence_fd = &out_fence->fence_fd;
184bf215546Sopenharmony_ci   } else {
185bf215546Sopenharmony_ci      /* we are using retire_queue, so we need an out-fence for each
186bf215546Sopenharmony_ci       * submit.. we can just re-use fd_submit->out_fence_fd for temporary
187bf215546Sopenharmony_ci       * storage.
188bf215546Sopenharmony_ci       */
189bf215546Sopenharmony_ci      out_fence_fd = &fd_submit->out_fence_fd;
190bf215546Sopenharmony_ci   }
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   if (fd_submit->in_fence_fd != -1) {
193bf215546Sopenharmony_ci      virtio_pipe->no_implicit_sync = true;
194bf215546Sopenharmony_ci   }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   if (virtio_pipe->no_implicit_sync) {
197bf215546Sopenharmony_ci      req->flags |= MSM_SUBMIT_NO_IMPLICIT;
198bf215546Sopenharmony_ci   }
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   virtio_execbuf_fenced(dev, &req->hdr, guest_handles, req->nr_bos,
201bf215546Sopenharmony_ci                         fd_submit->in_fence_fd, out_fence_fd,
202bf215546Sopenharmony_ci                         virtio_pipe->ring_idx);
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   free(req);
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   if (!bos_on_stack) {
207bf215546Sopenharmony_ci      free(submit_bos);
208bf215546Sopenharmony_ci      free(guest_handles);
209bf215546Sopenharmony_ci   }
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci   if (fd_submit->in_fence_fd != -1)
212bf215546Sopenharmony_ci      close(fd_submit->in_fence_fd);
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   if (out_fence_fd != &fd_submit->out_fence_fd)
215bf215546Sopenharmony_ci      fd_submit->out_fence_fd = os_dupfd_cloexec(*out_fence_fd);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   fd_submit_ref(&fd_submit->base);
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   util_queue_fence_init(&fd_submit->retire_fence);
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   util_queue_add_job(&virtio_pipe->retire_queue,
222bf215546Sopenharmony_ci                      fd_submit, &fd_submit->retire_fence,
223bf215546Sopenharmony_ci                      retire_execute,
224bf215546Sopenharmony_ci                      retire_cleanup,
225bf215546Sopenharmony_ci                      0);
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   return 0;
228bf215546Sopenharmony_ci}
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_cistruct fd_submit *
231bf215546Sopenharmony_civirtio_submit_new(struct fd_pipe *pipe)
232bf215546Sopenharmony_ci{
233bf215546Sopenharmony_ci   /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
234bf215546Sopenharmony_ci   STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
235bf215546Sopenharmony_ci   STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
236bf215546Sopenharmony_ci   STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   return fd_submit_sp_new(pipe, flush_submit_list);
239bf215546Sopenharmony_ci}
240