1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Raspberry Pi Ltd
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "v3dv_private.h"
25bf215546Sopenharmony_ci#include "drm-uapi/v3d_drm.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "broadcom/clif/clif_dump.h"
28bf215546Sopenharmony_ci#include "util/libsync.h"
29bf215546Sopenharmony_ci#include "util/os_time.h"
30bf215546Sopenharmony_ci#include "vk_drm_syncobj.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#include <errno.h>
33bf215546Sopenharmony_ci#include <time.h>
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_cistatic void
36bf215546Sopenharmony_civ3dv_clif_dump(struct v3dv_device *device,
37bf215546Sopenharmony_ci               struct v3dv_job *job,
38bf215546Sopenharmony_ci               struct drm_v3d_submit_cl *submit)
39bf215546Sopenharmony_ci{
40bf215546Sopenharmony_ci   if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
41bf215546Sopenharmony_ci                               V3D_DEBUG_CL_NO_BIN |
42bf215546Sopenharmony_ci                               V3D_DEBUG_CLIF))))
43bf215546Sopenharmony_ci      return;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci   struct clif_dump *clif = clif_dump_init(&device->devinfo,
46bf215546Sopenharmony_ci                                           stderr,
47bf215546Sopenharmony_ci                                           V3D_DEBUG & (V3D_DEBUG_CL |
48bf215546Sopenharmony_ci                                                        V3D_DEBUG_CL_NO_BIN),
49bf215546Sopenharmony_ci                                           V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   set_foreach(job->bos, entry) {
52bf215546Sopenharmony_ci      struct v3dv_bo *bo = (void *)entry->key;
53bf215546Sopenharmony_ci      char *name = ralloc_asprintf(NULL, "%s_0x%x",
54bf215546Sopenharmony_ci                                   bo->name, bo->offset);
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci      bool ok = v3dv_bo_map(device, bo, bo->size);
57bf215546Sopenharmony_ci      if (!ok) {
58bf215546Sopenharmony_ci         fprintf(stderr, "failed to map BO for clif_dump.\n");
59bf215546Sopenharmony_ci         ralloc_free(name);
60bf215546Sopenharmony_ci         goto free_clif;
61bf215546Sopenharmony_ci      }
62bf215546Sopenharmony_ci      clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci      ralloc_free(name);
65bf215546Sopenharmony_ci   }
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   clif_dump(clif, submit);
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci free_clif:
70bf215546Sopenharmony_ci   clif_dump_destroy(clif);
71bf215546Sopenharmony_ci}
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cistatic VkResult
74bf215546Sopenharmony_ciqueue_wait_idle(struct v3dv_queue *queue,
75bf215546Sopenharmony_ci                struct v3dv_submit_sync_info *sync_info)
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   if (queue->device->pdevice->caps.multisync) {
78bf215546Sopenharmony_ci      int ret = drmSyncobjWait(queue->device->pdevice->render_fd,
79bf215546Sopenharmony_ci                               queue->last_job_syncs.syncs, 3,
80bf215546Sopenharmony_ci                               INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
81bf215546Sopenharmony_ci                               NULL);
82bf215546Sopenharmony_ci      if (ret) {
83bf215546Sopenharmony_ci         return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
84bf215546Sopenharmony_ci                          "syncobj wait failed: %m");
85bf215546Sopenharmony_ci      }
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci      bool first = true;
88bf215546Sopenharmony_ci      for (int i = 0; i < 3; i++) {
89bf215546Sopenharmony_ci         if (!queue->last_job_syncs.first[i])
90bf215546Sopenharmony_ci            first = false;
91bf215546Sopenharmony_ci      }
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci      /* If we're not the first job, that means we're waiting on some
94bf215546Sopenharmony_ci       * per-queue-type syncobj which transitively waited on the semaphores
95bf215546Sopenharmony_ci       * so we can skip the semaphore wait.
96bf215546Sopenharmony_ci       */
97bf215546Sopenharmony_ci      if (first) {
98bf215546Sopenharmony_ci         VkResult result = vk_sync_wait_many(&queue->device->vk,
99bf215546Sopenharmony_ci                                             sync_info->wait_count,
100bf215546Sopenharmony_ci                                             sync_info->waits,
101bf215546Sopenharmony_ci                                             VK_SYNC_WAIT_COMPLETE,
102bf215546Sopenharmony_ci                                             UINT64_MAX);
103bf215546Sopenharmony_ci         if (result != VK_SUCCESS)
104bf215546Sopenharmony_ci            return result;
105bf215546Sopenharmony_ci      }
106bf215546Sopenharmony_ci   } else {
107bf215546Sopenharmony_ci      /* Without multisync, all the semaphores are baked into the one syncobj
108bf215546Sopenharmony_ci       * at the start of each submit so we only need to wait on the one.
109bf215546Sopenharmony_ci       */
110bf215546Sopenharmony_ci      int ret = drmSyncobjWait(queue->device->pdevice->render_fd,
111bf215546Sopenharmony_ci                               &queue->last_job_syncs.syncs[V3DV_QUEUE_ANY], 1,
112bf215546Sopenharmony_ci                               INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
113bf215546Sopenharmony_ci                               NULL);
114bf215546Sopenharmony_ci      if (ret) {
115bf215546Sopenharmony_ci         return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
116bf215546Sopenharmony_ci                          "syncobj wait failed: %m");
117bf215546Sopenharmony_ci      }
118bf215546Sopenharmony_ci   }
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   for (int i = 0; i < 3; i++)
121bf215546Sopenharmony_ci      queue->last_job_syncs.first[i] = false;
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   return VK_SUCCESS;
124bf215546Sopenharmony_ci}
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_cistatic VkResult
127bf215546Sopenharmony_cihandle_reset_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
128bf215546Sopenharmony_ci                           struct v3dv_submit_sync_info *sync_info)
129bf215546Sopenharmony_ci{
130bf215546Sopenharmony_ci   struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
131bf215546Sopenharmony_ci   assert(info->pool);
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   /* We are about to reset query counters so we need to make sure that
134bf215546Sopenharmony_ci    * The GPU is not using them. The exception is timestamp queries, since
135bf215546Sopenharmony_ci    * we handle those in the CPU.
136bf215546Sopenharmony_ci    */
137bf215546Sopenharmony_ci   if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
138bf215546Sopenharmony_ci      v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
141bf215546Sopenharmony_ci      struct vk_sync_wait waits[info->count];
142bf215546Sopenharmony_ci      unsigned wait_count = 0;
143bf215546Sopenharmony_ci      for (int i = 0; i < info->count; i++) {
144bf215546Sopenharmony_ci         struct v3dv_query *query = &info->pool->queries[i];
145bf215546Sopenharmony_ci         /* Only wait for a query if we've used it otherwise we will be
146bf215546Sopenharmony_ci          * waiting forever for the fence to become signaled.
147bf215546Sopenharmony_ci          */
148bf215546Sopenharmony_ci         if (query->maybe_available) {
149bf215546Sopenharmony_ci            waits[wait_count] = (struct vk_sync_wait){
150bf215546Sopenharmony_ci               .sync = info->pool->queries[i].perf.last_job_sync
151bf215546Sopenharmony_ci            };
152bf215546Sopenharmony_ci            wait_count++;
153bf215546Sopenharmony_ci         };
154bf215546Sopenharmony_ci      }
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci      VkResult result = vk_sync_wait_many(&job->device->vk, wait_count, waits,
157bf215546Sopenharmony_ci                                          VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
160bf215546Sopenharmony_ci         return result;
161bf215546Sopenharmony_ci   }
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   v3dv_reset_query_pools(job->device, info->pool, info->first, info->count);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   return VK_SUCCESS;
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_cistatic VkResult
169bf215546Sopenharmony_ciexport_perfmon_last_job_sync(struct v3dv_queue *queue, struct v3dv_job *job, int *fd)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci   int err;
172bf215546Sopenharmony_ci   if (job->device->pdevice->caps.multisync) {
173bf215546Sopenharmony_ci      static const enum v3dv_queue_type queues_to_sync[] = {
174bf215546Sopenharmony_ci         V3DV_QUEUE_CL,
175bf215546Sopenharmony_ci         V3DV_QUEUE_CSD,
176bf215546Sopenharmony_ci      };
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci      for (uint32_t i = 0; i < ARRAY_SIZE(queues_to_sync); i++) {
179bf215546Sopenharmony_ci         enum v3dv_queue_type queue_type = queues_to_sync[i];
180bf215546Sopenharmony_ci         int tmp_fd = -1;
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci         err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd,
183bf215546Sopenharmony_ci                                        queue->last_job_syncs.syncs[queue_type],
184bf215546Sopenharmony_ci                                        &tmp_fd);
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci         if (err) {
187bf215546Sopenharmony_ci            close(*fd);
188bf215546Sopenharmony_ci            return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
189bf215546Sopenharmony_ci                             "sync file export failed: %m");
190bf215546Sopenharmony_ci         }
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci         err = sync_accumulate("v3dv", fd, tmp_fd);
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci         if (err) {
195bf215546Sopenharmony_ci            close(tmp_fd);
196bf215546Sopenharmony_ci            close(*fd);
197bf215546Sopenharmony_ci            return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
198bf215546Sopenharmony_ci                             "failed to accumulate sync files: %m");
199bf215546Sopenharmony_ci         }
200bf215546Sopenharmony_ci      }
201bf215546Sopenharmony_ci   } else {
202bf215546Sopenharmony_ci      err = drmSyncobjExportSyncFile(job->device->pdevice->render_fd,
203bf215546Sopenharmony_ci                                     queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
204bf215546Sopenharmony_ci                                     fd);
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci      if (err) {
207bf215546Sopenharmony_ci         return vk_errorf(&job->device->queue, VK_ERROR_UNKNOWN,
208bf215546Sopenharmony_ci                          "sync file export failed: %m");
209bf215546Sopenharmony_ci      }
210bf215546Sopenharmony_ci   }
211bf215546Sopenharmony_ci   return VK_SUCCESS;
212bf215546Sopenharmony_ci}
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_cistatic VkResult
215bf215546Sopenharmony_cihandle_end_query_cpu_job(struct v3dv_job *job, uint32_t counter_pass_idx)
216bf215546Sopenharmony_ci{
217bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   mtx_lock(&job->device->query_mutex);
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
222bf215546Sopenharmony_ci   struct v3dv_queue *queue = &job->device->queue;
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   int err = 0;
225bf215546Sopenharmony_ci   int fd = -1;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
228bf215546Sopenharmony_ci      result = export_perfmon_last_job_sync(queue, job, &fd);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
231bf215546Sopenharmony_ci         goto fail;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci      assert(fd >= 0);
234bf215546Sopenharmony_ci   }
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   for (uint32_t i = 0; i < info->count; i++) {
237bf215546Sopenharmony_ci      assert(info->query + i < info->pool->query_count);
238bf215546Sopenharmony_ci      struct v3dv_query *query = &info->pool->queries[info->query + i];
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci      if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
241bf215546Sopenharmony_ci         uint32_t syncobj = vk_sync_as_drm_syncobj(query->perf.last_job_sync)->syncobj;
242bf215546Sopenharmony_ci         err = drmSyncobjImportSyncFile(job->device->pdevice->render_fd,
243bf215546Sopenharmony_ci                                        syncobj, fd);
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci         if (err) {
246bf215546Sopenharmony_ci            result = vk_errorf(queue, VK_ERROR_UNKNOWN,
247bf215546Sopenharmony_ci                               "sync file import failed: %m");
248bf215546Sopenharmony_ci            goto fail;
249bf215546Sopenharmony_ci         }
250bf215546Sopenharmony_ci      }
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci      query->maybe_available = true;
253bf215546Sopenharmony_ci   }
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_cifail:
256bf215546Sopenharmony_ci   if (info->pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR)
257bf215546Sopenharmony_ci      close(fd);
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   cnd_broadcast(&job->device->query_ended);
260bf215546Sopenharmony_ci   mtx_unlock(&job->device->query_mutex);
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   return result;
263bf215546Sopenharmony_ci}
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_cistatic VkResult
266bf215546Sopenharmony_cihandle_copy_query_results_cpu_job(struct v3dv_job *job)
267bf215546Sopenharmony_ci{
268bf215546Sopenharmony_ci   struct v3dv_copy_query_results_cpu_job_info *info =
269bf215546Sopenharmony_ci      &job->cpu.query_copy_results;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   assert(info->dst && info->dst->mem && info->dst->mem->bo);
272bf215546Sopenharmony_ci   struct v3dv_bo *bo = info->dst->mem->bo;
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   /* Map the entire dst buffer for the CPU copy if needed */
275bf215546Sopenharmony_ci   assert(!bo->map || bo->map_size == bo->size);
276bf215546Sopenharmony_ci   if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
277bf215546Sopenharmony_ci      return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   uint8_t *offset = ((uint8_t *) bo->map) +
280bf215546Sopenharmony_ci                     info->offset + info->dst->mem_offset;
281bf215546Sopenharmony_ci   v3dv_get_query_pool_results(job->device,
282bf215546Sopenharmony_ci                               info->pool,
283bf215546Sopenharmony_ci                               info->first,
284bf215546Sopenharmony_ci                               info->count,
285bf215546Sopenharmony_ci                               offset,
286bf215546Sopenharmony_ci                               info->stride,
287bf215546Sopenharmony_ci                               info->flags);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   return VK_SUCCESS;
290bf215546Sopenharmony_ci}
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_cistatic VkResult
293bf215546Sopenharmony_cihandle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
294bf215546Sopenharmony_ci                         struct v3dv_submit_sync_info *sync_info)
295bf215546Sopenharmony_ci{
296bf215546Sopenharmony_ci   /* From the Vulkan 1.0 spec:
297bf215546Sopenharmony_ci    *
298bf215546Sopenharmony_ci    *    "When vkCmdSetEvent is submitted to a queue, it defines an execution
299bf215546Sopenharmony_ci    *     dependency on commands that were submitted before it, and defines an
300bf215546Sopenharmony_ci    *     event signal operation which sets the event to the signaled state.
301bf215546Sopenharmony_ci    *     The first synchronization scope includes every command previously
302bf215546Sopenharmony_ci    *     submitted to the same queue, including those in the same command
303bf215546Sopenharmony_ci    *     buffer and batch".
304bf215546Sopenharmony_ci    *
305bf215546Sopenharmony_ci    * So we should wait for all prior work to be completed before signaling
306bf215546Sopenharmony_ci    * the event, this includes all active CPU wait threads spawned for any
307bf215546Sopenharmony_ci    * command buffer submitted *before* this.
308bf215546Sopenharmony_ci    */
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   VkResult result = queue_wait_idle(queue, sync_info);
311bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
312bf215546Sopenharmony_ci      return result;
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
315bf215546Sopenharmony_ci   p_atomic_set(&info->event->state, info->state);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci   return VK_SUCCESS;
318bf215546Sopenharmony_ci}
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_cistatic bool
321bf215546Sopenharmony_cicheck_wait_events_complete(struct v3dv_job *job)
322bf215546Sopenharmony_ci{
323bf215546Sopenharmony_ci   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
326bf215546Sopenharmony_ci   for (uint32_t i = 0; i < info->event_count; i++) {
327bf215546Sopenharmony_ci      if (!p_atomic_read(&info->events[i]->state))
328bf215546Sopenharmony_ci         return false;
329bf215546Sopenharmony_ci   }
330bf215546Sopenharmony_ci   return true;
331bf215546Sopenharmony_ci}
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_cistatic VkResult
334bf215546Sopenharmony_cihandle_wait_events_cpu_job(struct v3dv_job *job)
335bf215546Sopenharmony_ci{
336bf215546Sopenharmony_ci   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   /* Wait for events to be signaled */
339bf215546Sopenharmony_ci   const useconds_t wait_interval_ms = 1;
340bf215546Sopenharmony_ci   while (!check_wait_events_complete(job))
341bf215546Sopenharmony_ci      usleep(wait_interval_ms * 1000);
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci   return VK_SUCCESS;
344bf215546Sopenharmony_ci}
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_cistatic VkResult
347bf215546Sopenharmony_cihandle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue,
348bf215546Sopenharmony_ci                                    struct v3dv_job *job,
349bf215546Sopenharmony_ci                                    struct v3dv_submit_sync_info *sync_info)
350bf215546Sopenharmony_ci{
351bf215546Sopenharmony_ci   assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
352bf215546Sopenharmony_ci   struct v3dv_copy_buffer_to_image_cpu_job_info *info =
353bf215546Sopenharmony_ci      &job->cpu.copy_buffer_to_image;
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci   /* Wait for all GPU work to finish first, since we may be accessing
356bf215546Sopenharmony_ci    * the BOs involved in the operation.
357bf215546Sopenharmony_ci    */
358bf215546Sopenharmony_ci   VkResult result = queue_wait_idle(queue, sync_info);
359bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
360bf215546Sopenharmony_ci      return result;
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   /* Map BOs */
363bf215546Sopenharmony_ci   struct v3dv_bo *dst_bo = info->image->mem->bo;
364bf215546Sopenharmony_ci   assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
365bf215546Sopenharmony_ci   if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
366bf215546Sopenharmony_ci      return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
367bf215546Sopenharmony_ci   void *dst_ptr = dst_bo->map;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   struct v3dv_bo *src_bo = info->buffer->mem->bo;
370bf215546Sopenharmony_ci   assert(!src_bo->map || src_bo->map_size == src_bo->size);
371bf215546Sopenharmony_ci   if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
372bf215546Sopenharmony_ci      return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
373bf215546Sopenharmony_ci   void *src_ptr = src_bo->map;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci   const struct v3d_resource_slice *slice =
376bf215546Sopenharmony_ci      &info->image->slices[info->mip_level];
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   const struct pipe_box box = {
379bf215546Sopenharmony_ci      info->image_offset.x, info->image_offset.y, info->base_layer,
380bf215546Sopenharmony_ci      info->image_extent.width, info->image_extent.height, info->layer_count,
381bf215546Sopenharmony_ci   };
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   /* Copy each layer */
384bf215546Sopenharmony_ci   for (uint32_t i = 0; i < info->layer_count; i++) {
385bf215546Sopenharmony_ci      const uint32_t dst_offset =
386bf215546Sopenharmony_ci         v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
387bf215546Sopenharmony_ci      const uint32_t src_offset =
388bf215546Sopenharmony_ci         info->buffer->mem_offset + info->buffer_offset +
389bf215546Sopenharmony_ci         info->buffer_layer_stride * i;
390bf215546Sopenharmony_ci      v3d_store_tiled_image(
391bf215546Sopenharmony_ci         dst_ptr + dst_offset, slice->stride,
392bf215546Sopenharmony_ci         src_ptr + src_offset, info->buffer_stride,
393bf215546Sopenharmony_ci         slice->tiling, info->image->cpp, slice->padded_height, &box);
394bf215546Sopenharmony_ci   }
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   return VK_SUCCESS;
397bf215546Sopenharmony_ci}
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_cistatic VkResult
400bf215546Sopenharmony_cihandle_timestamp_query_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
401bf215546Sopenharmony_ci                               struct v3dv_submit_sync_info *sync_info)
402bf215546Sopenharmony_ci{
403bf215546Sopenharmony_ci   assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
404bf215546Sopenharmony_ci   struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   /* Wait for completion of all work queued before the timestamp query */
407bf215546Sopenharmony_ci   VkResult result = queue_wait_idle(queue, sync_info);
408bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
409bf215546Sopenharmony_ci      return result;
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   mtx_lock(&job->device->query_mutex);
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   /* Compute timestamp */
414bf215546Sopenharmony_ci   struct timespec t;
415bf215546Sopenharmony_ci   clock_gettime(CLOCK_MONOTONIC, &t);
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   for (uint32_t i = 0; i < info->count; i++) {
418bf215546Sopenharmony_ci      assert(info->query + i < info->pool->query_count);
419bf215546Sopenharmony_ci      struct v3dv_query *query = &info->pool->queries[info->query + i];
420bf215546Sopenharmony_ci      query->maybe_available = true;
421bf215546Sopenharmony_ci      if (i == 0)
422bf215546Sopenharmony_ci         query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
423bf215546Sopenharmony_ci   }
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   cnd_broadcast(&job->device->query_ended);
426bf215546Sopenharmony_ci   mtx_unlock(&job->device->query_mutex);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   return VK_SUCCESS;
429bf215546Sopenharmony_ci}
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_cistatic VkResult
432bf215546Sopenharmony_cihandle_csd_indirect_cpu_job(struct v3dv_queue *queue,
433bf215546Sopenharmony_ci                            struct v3dv_job *job,
434bf215546Sopenharmony_ci                            struct v3dv_submit_sync_info *sync_info)
435bf215546Sopenharmony_ci{
436bf215546Sopenharmony_ci   assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
437bf215546Sopenharmony_ci   struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
438bf215546Sopenharmony_ci   assert(info->csd_job);
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   /* Make sure the GPU is no longer using the indirect buffer*/
441bf215546Sopenharmony_ci   assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
442bf215546Sopenharmony_ci   v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   /* Map the indirect buffer and read the dispatch parameters */
445bf215546Sopenharmony_ci   assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
446bf215546Sopenharmony_ci   struct v3dv_bo *bo = info->buffer->mem->bo;
447bf215546Sopenharmony_ci   if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
448bf215546Sopenharmony_ci      return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
449bf215546Sopenharmony_ci   assert(bo->map);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   const uint32_t offset = info->buffer->mem_offset + info->offset;
452bf215546Sopenharmony_ci   const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
453bf215546Sopenharmony_ci   if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
454bf215546Sopenharmony_ci      return VK_SUCCESS;
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   if (memcmp(group_counts, info->csd_job->csd.wg_count,
457bf215546Sopenharmony_ci              sizeof(info->csd_job->csd.wg_count)) != 0) {
458bf215546Sopenharmony_ci      v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
459bf215546Sopenharmony_ci   }
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   return VK_SUCCESS;
462bf215546Sopenharmony_ci}
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_cistatic VkResult
465bf215546Sopenharmony_ciprocess_waits(struct v3dv_queue *queue,
466bf215546Sopenharmony_ci              uint32_t count, struct vk_sync_wait *waits)
467bf215546Sopenharmony_ci{
468bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
469bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
470bf215546Sopenharmony_ci   int err = 0;
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci   if (count == 0)
473bf215546Sopenharmony_ci      return VK_SUCCESS;
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   /* If multisync is supported, we wait on semaphores in the first job
476bf215546Sopenharmony_ci    * submitted to each of the individual queues.  We don't need to
477bf215546Sopenharmony_ci    * pre-populate the syncobjs.
478bf215546Sopenharmony_ci    */
479bf215546Sopenharmony_ci   if (queue->device->pdevice->caps.multisync)
480bf215546Sopenharmony_ci      return VK_SUCCESS;
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci   int fd = -1;
483bf215546Sopenharmony_ci   err = drmSyncobjExportSyncFile(device->pdevice->render_fd,
484bf215546Sopenharmony_ci                                  queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
485bf215546Sopenharmony_ci                                  &fd);
486bf215546Sopenharmony_ci   if (err) {
487bf215546Sopenharmony_ci      result = vk_errorf(queue, VK_ERROR_UNKNOWN,
488bf215546Sopenharmony_ci                         "sync file export failed: %m");
489bf215546Sopenharmony_ci      goto fail;
490bf215546Sopenharmony_ci   }
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   for (uint32_t i = 0; i < count; i++) {
493bf215546Sopenharmony_ci      uint32_t syncobj = vk_sync_as_drm_syncobj(waits[i].sync)->syncobj;
494bf215546Sopenharmony_ci      int wait_fd = -1;
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci      err = drmSyncobjExportSyncFile(device->pdevice->render_fd,
497bf215546Sopenharmony_ci                                     syncobj, &wait_fd);
498bf215546Sopenharmony_ci      if (err) {
499bf215546Sopenharmony_ci         result = vk_errorf(queue, VK_ERROR_UNKNOWN,
500bf215546Sopenharmony_ci                            "sync file export failed: %m");
501bf215546Sopenharmony_ci         goto fail;
502bf215546Sopenharmony_ci      }
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci      err = sync_accumulate("v3dv", &fd, wait_fd);
505bf215546Sopenharmony_ci      close(wait_fd);
506bf215546Sopenharmony_ci      if (err) {
507bf215546Sopenharmony_ci         result = vk_errorf(queue, VK_ERROR_UNKNOWN,
508bf215546Sopenharmony_ci                            "sync file merge failed: %m");
509bf215546Sopenharmony_ci         goto fail;
510bf215546Sopenharmony_ci      }
511bf215546Sopenharmony_ci   }
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   err = drmSyncobjImportSyncFile(device->pdevice->render_fd,
514bf215546Sopenharmony_ci                                  queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
515bf215546Sopenharmony_ci                                  fd);
516bf215546Sopenharmony_ci   if (err) {
517bf215546Sopenharmony_ci      result = vk_errorf(queue, VK_ERROR_UNKNOWN,
518bf215546Sopenharmony_ci                         "sync file import failed: %m");
519bf215546Sopenharmony_ci   }
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_cifail:
522bf215546Sopenharmony_ci   close(fd);
523bf215546Sopenharmony_ci   return result;
524bf215546Sopenharmony_ci}
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_cistatic VkResult
527bf215546Sopenharmony_ciprocess_signals(struct v3dv_queue *queue,
528bf215546Sopenharmony_ci                uint32_t count, struct vk_sync_signal *signals)
529bf215546Sopenharmony_ci{
530bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   if (count == 0)
533bf215546Sopenharmony_ci      return VK_SUCCESS;
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   /* If multisync is supported, we are signalling semaphores in the last job
536bf215546Sopenharmony_ci    * of the last command buffer and, therefore, we do not need to process any
537bf215546Sopenharmony_ci    * semaphores here.
538bf215546Sopenharmony_ci    */
539bf215546Sopenharmony_ci   if (device->pdevice->caps.multisync)
540bf215546Sopenharmony_ci      return VK_SUCCESS;
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   int fd;
543bf215546Sopenharmony_ci   drmSyncobjExportSyncFile(device->pdevice->render_fd,
544bf215546Sopenharmony_ci                            queue->last_job_syncs.syncs[V3DV_QUEUE_ANY],
545bf215546Sopenharmony_ci                            &fd);
546bf215546Sopenharmony_ci   if (fd == -1) {
547bf215546Sopenharmony_ci      return vk_errorf(queue, VK_ERROR_UNKNOWN,
548bf215546Sopenharmony_ci                       "sync file export failed: %m");
549bf215546Sopenharmony_ci   }
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
552bf215546Sopenharmony_ci   for (uint32_t i = 0; i < count; i++) {
553bf215546Sopenharmony_ci      uint32_t syncobj = vk_sync_as_drm_syncobj(signals[i].sync)->syncobj;
554bf215546Sopenharmony_ci      int err = drmSyncobjImportSyncFile(device->pdevice->render_fd,
555bf215546Sopenharmony_ci                                         syncobj, fd);
556bf215546Sopenharmony_ci      if (err) {
557bf215546Sopenharmony_ci         result = vk_errorf(queue, VK_ERROR_UNKNOWN,
558bf215546Sopenharmony_ci                            "sync file import failed: %m");
559bf215546Sopenharmony_ci         break;
560bf215546Sopenharmony_ci      }
561bf215546Sopenharmony_ci   }
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   assert(fd >= 0);
564bf215546Sopenharmony_ci   close(fd);
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci   return result;
567bf215546Sopenharmony_ci}
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_cistatic void
570bf215546Sopenharmony_cimultisync_free(struct v3dv_device *device,
571bf215546Sopenharmony_ci               struct drm_v3d_multi_sync *ms)
572bf215546Sopenharmony_ci{
573bf215546Sopenharmony_ci   vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->out_syncs);
574bf215546Sopenharmony_ci   vk_free(&device->vk.alloc, (void *)(uintptr_t)ms->in_syncs);
575bf215546Sopenharmony_ci}
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_cistatic struct drm_v3d_sem *
578bf215546Sopenharmony_ciset_in_syncs(struct v3dv_queue *queue,
579bf215546Sopenharmony_ci             struct v3dv_job *job,
580bf215546Sopenharmony_ci             enum v3dv_queue_type queue_sync,
581bf215546Sopenharmony_ci             uint32_t *count,
582bf215546Sopenharmony_ci             struct v3dv_submit_sync_info *sync_info)
583bf215546Sopenharmony_ci{
584bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
585bf215546Sopenharmony_ci   uint32_t n_syncs = 0;
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci   /* If this is the first job submitted to a given GPU queue in this cmd buf
588bf215546Sopenharmony_ci    * batch, it has to wait on wait semaphores (if any) before running.
589bf215546Sopenharmony_ci    */
590bf215546Sopenharmony_ci   if (queue->last_job_syncs.first[queue_sync])
591bf215546Sopenharmony_ci      n_syncs = sync_info->wait_count;
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci   /* If the serialize flag is set the job needs to be serialized in the
594bf215546Sopenharmony_ci    * corresponding queues. Notice that we may implement transfer operations
595bf215546Sopenharmony_ci    * as both CL or TFU jobs.
596bf215546Sopenharmony_ci    *
597bf215546Sopenharmony_ci    * FIXME: maybe we could track more precisely if the source of a transfer
598bf215546Sopenharmony_ci    * barrier is a CL and/or a TFU job.
599bf215546Sopenharmony_ci    */
600bf215546Sopenharmony_ci   bool sync_csd  = job->serialize & V3DV_BARRIER_COMPUTE_BIT;
601bf215546Sopenharmony_ci   bool sync_tfu  = job->serialize & V3DV_BARRIER_TRANSFER_BIT;
602bf215546Sopenharmony_ci   bool sync_cl   = job->serialize & (V3DV_BARRIER_GRAPHICS_BIT |
603bf215546Sopenharmony_ci                                      V3DV_BARRIER_TRANSFER_BIT);
604bf215546Sopenharmony_ci   *count = n_syncs;
605bf215546Sopenharmony_ci   if (sync_cl)
606bf215546Sopenharmony_ci      (*count)++;
607bf215546Sopenharmony_ci   if (sync_tfu)
608bf215546Sopenharmony_ci      (*count)++;
609bf215546Sopenharmony_ci   if (sync_csd)
610bf215546Sopenharmony_ci      (*count)++;
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci   if (!*count)
613bf215546Sopenharmony_ci      return NULL;
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci   struct drm_v3d_sem *syncs =
616bf215546Sopenharmony_ci      vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem),
617bf215546Sopenharmony_ci                8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   if (!syncs)
620bf215546Sopenharmony_ci      return NULL;
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   for (int i = 0; i < n_syncs; i++) {
623bf215546Sopenharmony_ci      syncs[i].handle =
624bf215546Sopenharmony_ci         vk_sync_as_drm_syncobj(sync_info->waits[i].sync)->syncobj;
625bf215546Sopenharmony_ci   }
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci   if (sync_cl)
628bf215546Sopenharmony_ci      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CL];
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   if (sync_csd)
631bf215546Sopenharmony_ci      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_CSD];
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   if (sync_tfu)
634bf215546Sopenharmony_ci      syncs[n_syncs++].handle = queue->last_job_syncs.syncs[V3DV_QUEUE_TFU];
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   assert(n_syncs == *count);
637bf215546Sopenharmony_ci   return syncs;
638bf215546Sopenharmony_ci}
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_cistatic struct drm_v3d_sem *
641bf215546Sopenharmony_ciset_out_syncs(struct v3dv_queue *queue,
642bf215546Sopenharmony_ci              struct v3dv_job *job,
643bf215546Sopenharmony_ci              enum v3dv_queue_type queue_sync,
644bf215546Sopenharmony_ci              uint32_t *count,
645bf215546Sopenharmony_ci              struct v3dv_submit_sync_info *sync_info,
646bf215546Sopenharmony_ci              bool signal_syncs)
647bf215546Sopenharmony_ci{
648bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   uint32_t n_vk_syncs = signal_syncs ? sync_info->signal_count : 0;
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci   /* We always signal the syncobj from `device->last_job_syncs` related to
653bf215546Sopenharmony_ci    * this v3dv_queue_type to track the last job submitted to this queue.
654bf215546Sopenharmony_ci    */
655bf215546Sopenharmony_ci   (*count) = n_vk_syncs + 1;
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci   struct drm_v3d_sem *syncs =
658bf215546Sopenharmony_ci      vk_zalloc(&device->vk.alloc, *count * sizeof(struct drm_v3d_sem),
659bf215546Sopenharmony_ci                8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci   if (!syncs)
662bf215546Sopenharmony_ci      return NULL;
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci   if (n_vk_syncs) {
665bf215546Sopenharmony_ci      for (unsigned i = 0; i < n_vk_syncs; i++) {
666bf215546Sopenharmony_ci         syncs[i].handle =
667bf215546Sopenharmony_ci            vk_sync_as_drm_syncobj(sync_info->signals[i].sync)->syncobj;
668bf215546Sopenharmony_ci      }
669bf215546Sopenharmony_ci   }
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci   syncs[n_vk_syncs].handle = queue->last_job_syncs.syncs[queue_sync];
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_ci   return syncs;
674bf215546Sopenharmony_ci}
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_cistatic void
677bf215546Sopenharmony_ciset_ext(struct drm_v3d_extension *ext,
678bf215546Sopenharmony_ci	struct drm_v3d_extension *next,
679bf215546Sopenharmony_ci	uint32_t id,
680bf215546Sopenharmony_ci	uintptr_t flags)
681bf215546Sopenharmony_ci{
682bf215546Sopenharmony_ci   ext->next = (uintptr_t)(void *)next;
683bf215546Sopenharmony_ci   ext->id = id;
684bf215546Sopenharmony_ci   ext->flags = flags;
685bf215546Sopenharmony_ci}
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci/* This function sets the extension for multiple in/out syncobjs. When it is
688bf215546Sopenharmony_ci * successful, it sets the extension id to DRM_V3D_EXT_ID_MULTI_SYNC.
689bf215546Sopenharmony_ci * Otherwise, the extension id is 0, which means an out-of-memory error.
690bf215546Sopenharmony_ci */
691bf215546Sopenharmony_cistatic void
692bf215546Sopenharmony_ciset_multisync(struct drm_v3d_multi_sync *ms,
693bf215546Sopenharmony_ci              struct v3dv_submit_sync_info *sync_info,
694bf215546Sopenharmony_ci              struct drm_v3d_extension *next,
695bf215546Sopenharmony_ci              struct v3dv_device *device,
696bf215546Sopenharmony_ci              struct v3dv_job *job,
697bf215546Sopenharmony_ci              enum v3dv_queue_type queue_sync,
698bf215546Sopenharmony_ci              enum v3d_queue wait_stage,
699bf215546Sopenharmony_ci              bool signal_syncs)
700bf215546Sopenharmony_ci{
701bf215546Sopenharmony_ci   struct v3dv_queue *queue = &device->queue;
702bf215546Sopenharmony_ci   uint32_t out_sync_count = 0, in_sync_count = 0;
703bf215546Sopenharmony_ci   struct drm_v3d_sem *out_syncs = NULL, *in_syncs = NULL;
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci   in_syncs = set_in_syncs(queue, job, queue_sync,
706bf215546Sopenharmony_ci                           &in_sync_count, sync_info);
707bf215546Sopenharmony_ci   if (!in_syncs && in_sync_count)
708bf215546Sopenharmony_ci      goto fail;
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci   out_syncs = set_out_syncs(queue, job, queue_sync,
711bf215546Sopenharmony_ci                             &out_sync_count, sync_info, signal_syncs);
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci   assert(out_sync_count > 0);
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   if (!out_syncs)
716bf215546Sopenharmony_ci      goto fail;
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   set_ext(&ms->base, next, DRM_V3D_EXT_ID_MULTI_SYNC, 0);
719bf215546Sopenharmony_ci   ms->wait_stage = wait_stage;
720bf215546Sopenharmony_ci   ms->out_sync_count = out_sync_count;
721bf215546Sopenharmony_ci   ms->out_syncs = (uintptr_t)(void *)out_syncs;
722bf215546Sopenharmony_ci   ms->in_sync_count = in_sync_count;
723bf215546Sopenharmony_ci   ms->in_syncs = (uintptr_t)(void *)in_syncs;
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci   return;
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_cifail:
728bf215546Sopenharmony_ci   if (in_syncs)
729bf215546Sopenharmony_ci      vk_free(&device->vk.alloc, in_syncs);
730bf215546Sopenharmony_ci   assert(!out_syncs);
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci   return;
733bf215546Sopenharmony_ci}
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_cistatic VkResult
736bf215546Sopenharmony_cihandle_cl_job(struct v3dv_queue *queue,
737bf215546Sopenharmony_ci              struct v3dv_job *job,
738bf215546Sopenharmony_ci              uint32_t counter_pass_idx,
739bf215546Sopenharmony_ci              struct v3dv_submit_sync_info *sync_info,
740bf215546Sopenharmony_ci              bool signal_syncs)
741bf215546Sopenharmony_ci{
742bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   struct drm_v3d_submit_cl submit = { 0 };
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci   /* Sanity check: we should only flag a bcl sync on a job that needs to be
747bf215546Sopenharmony_ci    * serialized.
748bf215546Sopenharmony_ci    */
749bf215546Sopenharmony_ci   assert(job->serialize || !job->needs_bcl_sync);
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci   /* We expect to have just one RCL per job which should fit in just one BO.
752bf215546Sopenharmony_ci    * Our BCL, could chain multiple BOS together though.
753bf215546Sopenharmony_ci    */
754bf215546Sopenharmony_ci   assert(list_length(&job->rcl.bo_list) == 1);
755bf215546Sopenharmony_ci   assert(list_length(&job->bcl.bo_list) >= 1);
756bf215546Sopenharmony_ci   struct v3dv_bo *bcl_fist_bo =
757bf215546Sopenharmony_ci      list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
758bf215546Sopenharmony_ci   submit.bcl_start = bcl_fist_bo->offset;
759bf215546Sopenharmony_ci   submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
760bf215546Sopenharmony_ci   submit.rcl_start = job->rcl.bo->offset;
761bf215546Sopenharmony_ci   submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci   submit.qma = job->tile_alloc->offset;
764bf215546Sopenharmony_ci   submit.qms = job->tile_alloc->size;
765bf215546Sopenharmony_ci   submit.qts = job->tile_state->offset;
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci   submit.flags = 0;
768bf215546Sopenharmony_ci   if (job->tmu_dirty_rcl)
769bf215546Sopenharmony_ci      submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci   /* If the job uses VK_KHR_buffer_device_addess we need to ensure all
772bf215546Sopenharmony_ci    * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR
773bf215546Sopenharmony_ci    * are included.
774bf215546Sopenharmony_ci    */
775bf215546Sopenharmony_ci   if (job->uses_buffer_device_address) {
776bf215546Sopenharmony_ci      util_dynarray_foreach(&queue->device->device_address_bo_list,
777bf215546Sopenharmony_ci                            struct v3dv_bo *, bo) {
778bf215546Sopenharmony_ci         v3dv_job_add_bo(job, *bo);
779bf215546Sopenharmony_ci      }
780bf215546Sopenharmony_ci   }
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   submit.bo_handle_count = job->bo_count;
783bf215546Sopenharmony_ci   uint32_t *bo_handles =
784bf215546Sopenharmony_ci      (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count);
785bf215546Sopenharmony_ci   uint32_t bo_idx = 0;
786bf215546Sopenharmony_ci   set_foreach(job->bos, entry) {
787bf215546Sopenharmony_ci      struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
788bf215546Sopenharmony_ci      bo_handles[bo_idx++] = bo->handle;
789bf215546Sopenharmony_ci   }
790bf215546Sopenharmony_ci   assert(bo_idx == submit.bo_handle_count);
791bf215546Sopenharmony_ci   submit.bo_handles = (uintptr_t)(void *)bo_handles;
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci   submit.perfmon_id = job->perf ?
794bf215546Sopenharmony_ci      job->perf->kperfmon_ids[counter_pass_idx] : 0;
795bf215546Sopenharmony_ci   const bool needs_perf_sync = queue->last_perfmon_id != submit.perfmon_id;
796bf215546Sopenharmony_ci   queue->last_perfmon_id = submit.perfmon_id;
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   /* We need a binning sync if we are the first CL job waiting on a semaphore
799bf215546Sopenharmony_ci    * with a wait stage that involves the geometry pipeline, or if the job
800bf215546Sopenharmony_ci    * comes after a pipeline barrier that involves geometry stages
801bf215546Sopenharmony_ci    * (needs_bcl_sync) or when performance queries are in use.
802bf215546Sopenharmony_ci    *
803bf215546Sopenharmony_ci    * We need a render sync if the job doesn't need a binning sync but has
804bf215546Sopenharmony_ci    * still been flagged for serialization. It should be noted that RCL jobs
805bf215546Sopenharmony_ci    * don't start until the previous RCL job has finished so we don't really
806bf215546Sopenharmony_ci    * need to add a fence for those, however, we might need to wait on a CSD or
807bf215546Sopenharmony_ci    * TFU job, which are not automatically serialized with CL jobs.
808bf215546Sopenharmony_ci    */
809bf215546Sopenharmony_ci   bool needs_bcl_sync = job->needs_bcl_sync || needs_perf_sync;
810bf215546Sopenharmony_ci   if (queue->last_job_syncs.first[V3DV_QUEUE_CL]) {
811bf215546Sopenharmony_ci      for (int i = 0; !needs_bcl_sync && i < sync_info->wait_count; i++) {
812bf215546Sopenharmony_ci         needs_bcl_sync = sync_info->waits[i].stage_mask &
813bf215546Sopenharmony_ci             (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
814bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
815bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_ALL_COMMANDS_BIT |
816bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
817bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
818bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
819bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
820bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
821bf215546Sopenharmony_ci              VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT);
822bf215546Sopenharmony_ci      }
823bf215546Sopenharmony_ci   }
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci   bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci   /* Replace single semaphore settings whenever our kernel-driver supports
828bf215546Sopenharmony_ci    * multiple semaphores extension.
829bf215546Sopenharmony_ci    */
830bf215546Sopenharmony_ci   struct drm_v3d_multi_sync ms = { 0 };
831bf215546Sopenharmony_ci   if (device->pdevice->caps.multisync) {
832bf215546Sopenharmony_ci      enum v3d_queue wait_stage = needs_rcl_sync ? V3D_RENDER : V3D_BIN;
833bf215546Sopenharmony_ci      set_multisync(&ms, sync_info, NULL, device, job,
834bf215546Sopenharmony_ci                    V3DV_QUEUE_CL, wait_stage, signal_syncs);
835bf215546Sopenharmony_ci      if (!ms.base.id)
836bf215546Sopenharmony_ci         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci      submit.flags |= DRM_V3D_SUBMIT_EXTENSION;
839bf215546Sopenharmony_ci      submit.extensions = (uintptr_t)(void *)&ms;
840bf215546Sopenharmony_ci      /* Disable legacy sync interface when multisync extension is used */
841bf215546Sopenharmony_ci      submit.in_sync_rcl = 0;
842bf215546Sopenharmony_ci      submit.in_sync_bcl = 0;
843bf215546Sopenharmony_ci      submit.out_sync = 0;
844bf215546Sopenharmony_ci   } else {
845bf215546Sopenharmony_ci      uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY];
846bf215546Sopenharmony_ci      submit.in_sync_bcl = needs_bcl_sync ? last_job_sync : 0;
847bf215546Sopenharmony_ci      submit.in_sync_rcl = needs_rcl_sync ? last_job_sync : 0;
848bf215546Sopenharmony_ci      submit.out_sync = last_job_sync;
849bf215546Sopenharmony_ci   }
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci   v3dv_clif_dump(device, job, &submit);
852bf215546Sopenharmony_ci   int ret = v3dv_ioctl(device->pdevice->render_fd,
853bf215546Sopenharmony_ci                        DRM_IOCTL_V3D_SUBMIT_CL, &submit);
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci   static bool warned = false;
856bf215546Sopenharmony_ci   if (ret && !warned) {
857bf215546Sopenharmony_ci      fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
858bf215546Sopenharmony_ci              strerror(errno));
859bf215546Sopenharmony_ci      warned = true;
860bf215546Sopenharmony_ci   }
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   free(bo_handles);
863bf215546Sopenharmony_ci   multisync_free(device, &ms);
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci   queue->last_job_syncs.first[V3DV_QUEUE_CL] = false;
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   if (ret)
868bf215546Sopenharmony_ci      return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CL failed: %m");
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci   return VK_SUCCESS;
871bf215546Sopenharmony_ci}
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_cistatic VkResult
874bf215546Sopenharmony_cihandle_tfu_job(struct v3dv_queue *queue,
875bf215546Sopenharmony_ci               struct v3dv_job *job,
876bf215546Sopenharmony_ci               struct v3dv_submit_sync_info *sync_info,
877bf215546Sopenharmony_ci               bool signal_syncs)
878bf215546Sopenharmony_ci{
879bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci   const bool needs_sync = sync_info->wait_count || job->serialize;
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci   /* Replace single semaphore settings whenever our kernel-driver supports
884bf215546Sopenharmony_ci    * multiple semaphore extension.
885bf215546Sopenharmony_ci    */
886bf215546Sopenharmony_ci   struct drm_v3d_multi_sync ms = { 0 };
887bf215546Sopenharmony_ci   if (device->pdevice->caps.multisync) {
888bf215546Sopenharmony_ci      set_multisync(&ms, sync_info, NULL, device, job,
889bf215546Sopenharmony_ci                    V3DV_QUEUE_TFU, V3D_TFU, signal_syncs);
890bf215546Sopenharmony_ci      if (!ms.base.id)
891bf215546Sopenharmony_ci         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci      job->tfu.flags |= DRM_V3D_SUBMIT_EXTENSION;
894bf215546Sopenharmony_ci      job->tfu.extensions = (uintptr_t)(void *)&ms;
895bf215546Sopenharmony_ci      /* Disable legacy sync interface when multisync extension is used */
896bf215546Sopenharmony_ci      job->tfu.in_sync = 0;
897bf215546Sopenharmony_ci      job->tfu.out_sync = 0;
898bf215546Sopenharmony_ci   } else {
899bf215546Sopenharmony_ci      uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY];
900bf215546Sopenharmony_ci      job->tfu.in_sync = needs_sync ? last_job_sync : 0;
901bf215546Sopenharmony_ci      job->tfu.out_sync = last_job_sync;
902bf215546Sopenharmony_ci   }
903bf215546Sopenharmony_ci   int ret = v3dv_ioctl(device->pdevice->render_fd,
904bf215546Sopenharmony_ci                        DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci   multisync_free(device, &ms);
907bf215546Sopenharmony_ci   queue->last_job_syncs.first[V3DV_QUEUE_TFU] = false;
908bf215546Sopenharmony_ci
909bf215546Sopenharmony_ci   if (ret != 0)
910bf215546Sopenharmony_ci      return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_TFU failed: %m");
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci   return VK_SUCCESS;
913bf215546Sopenharmony_ci}
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_cistatic VkResult
916bf215546Sopenharmony_cihandle_csd_job(struct v3dv_queue *queue,
917bf215546Sopenharmony_ci               struct v3dv_job *job,
918bf215546Sopenharmony_ci               uint32_t counter_pass_idx,
919bf215546Sopenharmony_ci               struct v3dv_submit_sync_info *sync_info,
920bf215546Sopenharmony_ci               bool signal_syncs)
921bf215546Sopenharmony_ci{
922bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci   struct drm_v3d_submit_csd *submit = &job->csd.submit;
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci   /* If the job uses VK_KHR_buffer_device_addess we need to ensure all
927bf215546Sopenharmony_ci    * buffers flagged with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR
928bf215546Sopenharmony_ci    * are included.
929bf215546Sopenharmony_ci    */
930bf215546Sopenharmony_ci   if (job->uses_buffer_device_address) {
931bf215546Sopenharmony_ci      util_dynarray_foreach(&queue->device->device_address_bo_list,
932bf215546Sopenharmony_ci                            struct v3dv_bo *, bo) {
933bf215546Sopenharmony_ci         v3dv_job_add_bo(job, *bo);
934bf215546Sopenharmony_ci      }
935bf215546Sopenharmony_ci   }
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci   submit->bo_handle_count = job->bo_count;
938bf215546Sopenharmony_ci   uint32_t *bo_handles =
939bf215546Sopenharmony_ci      (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
940bf215546Sopenharmony_ci   uint32_t bo_idx = 0;
941bf215546Sopenharmony_ci   set_foreach(job->bos, entry) {
942bf215546Sopenharmony_ci      struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
943bf215546Sopenharmony_ci      bo_handles[bo_idx++] = bo->handle;
944bf215546Sopenharmony_ci   }
945bf215546Sopenharmony_ci   assert(bo_idx == submit->bo_handle_count);
946bf215546Sopenharmony_ci   submit->bo_handles = (uintptr_t)(void *)bo_handles;
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_ci   const bool needs_sync = sync_info->wait_count || job->serialize;
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci   /* Replace single semaphore settings whenever our kernel-driver supports
951bf215546Sopenharmony_ci    * multiple semaphore extension.
952bf215546Sopenharmony_ci    */
953bf215546Sopenharmony_ci   struct drm_v3d_multi_sync ms = { 0 };
954bf215546Sopenharmony_ci   if (device->pdevice->caps.multisync) {
955bf215546Sopenharmony_ci      set_multisync(&ms, sync_info, NULL, device, job,
956bf215546Sopenharmony_ci                    V3DV_QUEUE_CSD, V3D_CSD, signal_syncs);
957bf215546Sopenharmony_ci      if (!ms.base.id)
958bf215546Sopenharmony_ci         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci      submit->flags |= DRM_V3D_SUBMIT_EXTENSION;
961bf215546Sopenharmony_ci      submit->extensions = (uintptr_t)(void *)&ms;
962bf215546Sopenharmony_ci      /* Disable legacy sync interface when multisync extension is used */
963bf215546Sopenharmony_ci      submit->in_sync = 0;
964bf215546Sopenharmony_ci      submit->out_sync = 0;
965bf215546Sopenharmony_ci   } else {
966bf215546Sopenharmony_ci      uint32_t last_job_sync = queue->last_job_syncs.syncs[V3DV_QUEUE_ANY];
967bf215546Sopenharmony_ci      submit->in_sync = needs_sync ? last_job_sync : 0;
968bf215546Sopenharmony_ci      submit->out_sync = last_job_sync;
969bf215546Sopenharmony_ci   }
970bf215546Sopenharmony_ci   submit->perfmon_id = job->perf ?
971bf215546Sopenharmony_ci      job->perf->kperfmon_ids[counter_pass_idx] : 0;
972bf215546Sopenharmony_ci   queue->last_perfmon_id = submit->perfmon_id;
973bf215546Sopenharmony_ci   int ret = v3dv_ioctl(device->pdevice->render_fd,
974bf215546Sopenharmony_ci                        DRM_IOCTL_V3D_SUBMIT_CSD, submit);
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci   static bool warned = false;
977bf215546Sopenharmony_ci   if (ret && !warned) {
978bf215546Sopenharmony_ci      fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
979bf215546Sopenharmony_ci              strerror(errno));
980bf215546Sopenharmony_ci      warned = true;
981bf215546Sopenharmony_ci   }
982bf215546Sopenharmony_ci
983bf215546Sopenharmony_ci   free(bo_handles);
984bf215546Sopenharmony_ci
985bf215546Sopenharmony_ci   multisync_free(device, &ms);
986bf215546Sopenharmony_ci   queue->last_job_syncs.first[V3DV_QUEUE_CSD] = false;
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_ci   if (ret)
989bf215546Sopenharmony_ci      return vk_queue_set_lost(&queue->vk, "V3D_SUBMIT_CSD failed: %m");
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci   return VK_SUCCESS;
992bf215546Sopenharmony_ci}
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_cistatic VkResult
995bf215546Sopenharmony_ciqueue_handle_job(struct v3dv_queue *queue,
996bf215546Sopenharmony_ci                 struct v3dv_job *job,
997bf215546Sopenharmony_ci                 uint32_t counter_pass_idx,
998bf215546Sopenharmony_ci                 struct v3dv_submit_sync_info *sync_info,
999bf215546Sopenharmony_ci                 bool signal_syncs)
1000bf215546Sopenharmony_ci{
1001bf215546Sopenharmony_ci   switch (job->type) {
1002bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_GPU_CL:
1003bf215546Sopenharmony_ci      return handle_cl_job(queue, job, counter_pass_idx, sync_info, signal_syncs);
1004bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_GPU_TFU:
1005bf215546Sopenharmony_ci      return handle_tfu_job(queue, job, sync_info, signal_syncs);
1006bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_GPU_CSD:
1007bf215546Sopenharmony_ci      return handle_csd_job(queue, job, counter_pass_idx, sync_info, signal_syncs);
1008bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
1009bf215546Sopenharmony_ci      return handle_reset_query_cpu_job(queue, job, sync_info);
1010bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_END_QUERY:
1011bf215546Sopenharmony_ci      return handle_end_query_cpu_job(job, counter_pass_idx);
1012bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
1013bf215546Sopenharmony_ci      return handle_copy_query_results_cpu_job(job);
1014bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_SET_EVENT:
1015bf215546Sopenharmony_ci      return handle_set_event_cpu_job(queue, job, sync_info);
1016bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
1017bf215546Sopenharmony_ci      return handle_wait_events_cpu_job(job);
1018bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
1019bf215546Sopenharmony_ci      return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info);
1020bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
1021bf215546Sopenharmony_ci      return handle_csd_indirect_cpu_job(queue, job, sync_info);
1022bf215546Sopenharmony_ci   case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
1023bf215546Sopenharmony_ci      return handle_timestamp_query_cpu_job(queue, job, sync_info);
1024bf215546Sopenharmony_ci   default:
1025bf215546Sopenharmony_ci      unreachable("Unhandled job type");
1026bf215546Sopenharmony_ci   }
1027bf215546Sopenharmony_ci}
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_cistatic VkResult
1030bf215546Sopenharmony_ciqueue_create_noop_job(struct v3dv_queue *queue)
1031bf215546Sopenharmony_ci{
1032bf215546Sopenharmony_ci   struct v3dv_device *device = queue->device;
1033bf215546Sopenharmony_ci   queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,
1034bf215546Sopenharmony_ci                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1035bf215546Sopenharmony_ci   if (!queue->noop_job)
1036bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1037bf215546Sopenharmony_ci   v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci   v3dv_X(device, job_emit_noop)(queue->noop_job);
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_ci   /* We use no-op jobs to signal semaphores/fences. These jobs needs to be
1042bf215546Sopenharmony_ci    * serialized across all hw queues to comply with Vulkan's signal operation
1043bf215546Sopenharmony_ci    * order requirements, which basically require that signal operations occur
1044bf215546Sopenharmony_ci    * in submission order.
1045bf215546Sopenharmony_ci    */
1046bf215546Sopenharmony_ci   queue->noop_job->serialize = V3DV_BARRIER_ALL;
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ci   return VK_SUCCESS;
1049bf215546Sopenharmony_ci}
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_cistatic VkResult
1052bf215546Sopenharmony_ciqueue_submit_noop_job(struct v3dv_queue *queue,
1053bf215546Sopenharmony_ci                      uint32_t counter_pass_idx,
1054bf215546Sopenharmony_ci                      struct v3dv_submit_sync_info *sync_info,
1055bf215546Sopenharmony_ci                      bool signal_syncs)
1056bf215546Sopenharmony_ci{
1057bf215546Sopenharmony_ci   if (!queue->noop_job) {
1058bf215546Sopenharmony_ci      VkResult result = queue_create_noop_job(queue);
1059bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
1060bf215546Sopenharmony_ci         return result;
1061bf215546Sopenharmony_ci   }
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci   assert(queue->noop_job);
1064bf215546Sopenharmony_ci   return queue_handle_job(queue, queue->noop_job, counter_pass_idx,
1065bf215546Sopenharmony_ci                           sync_info, signal_syncs);
1066bf215546Sopenharmony_ci}
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ciVkResult
1069bf215546Sopenharmony_civ3dv_queue_driver_submit(struct vk_queue *vk_queue,
1070bf215546Sopenharmony_ci                         struct vk_queue_submit *submit)
1071bf215546Sopenharmony_ci{
1072bf215546Sopenharmony_ci   struct v3dv_queue *queue = container_of(vk_queue, struct v3dv_queue, vk);
1073bf215546Sopenharmony_ci   VkResult result;
1074bf215546Sopenharmony_ci
1075bf215546Sopenharmony_ci   struct v3dv_submit_sync_info sync_info = {
1076bf215546Sopenharmony_ci      .wait_count = submit->wait_count,
1077bf215546Sopenharmony_ci      .waits = submit->waits,
1078bf215546Sopenharmony_ci      .signal_count = submit->signal_count,
1079bf215546Sopenharmony_ci      .signals = submit->signals,
1080bf215546Sopenharmony_ci   };
1081bf215546Sopenharmony_ci
1082bf215546Sopenharmony_ci   for (int i = 0; i < V3DV_QUEUE_COUNT; i++)
1083bf215546Sopenharmony_ci      queue->last_job_syncs.first[i] = true;
1084bf215546Sopenharmony_ci
1085bf215546Sopenharmony_ci   result = process_waits(queue, sync_info.wait_count, sync_info.waits);
1086bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
1087bf215546Sopenharmony_ci      return result;
1088bf215546Sopenharmony_ci
1089bf215546Sopenharmony_ci   for (uint32_t i = 0; i < submit->command_buffer_count; i++) {
1090bf215546Sopenharmony_ci      struct v3dv_cmd_buffer *cmd_buffer =
1091bf215546Sopenharmony_ci         container_of(submit->command_buffers[i], struct v3dv_cmd_buffer, vk);
1092bf215546Sopenharmony_ci      list_for_each_entry_safe(struct v3dv_job, job,
1093bf215546Sopenharmony_ci                               &cmd_buffer->jobs, list_link) {
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_ci         result = queue_handle_job(queue, job, submit->perf_pass_index,
1096bf215546Sopenharmony_ci                                   &sync_info, false);
1097bf215546Sopenharmony_ci         if (result != VK_SUCCESS)
1098bf215546Sopenharmony_ci            return result;
1099bf215546Sopenharmony_ci      }
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci      /* If the command buffer ends with a barrier we need to consume it now.
1102bf215546Sopenharmony_ci       *
1103bf215546Sopenharmony_ci       * FIXME: this will drain all hw queues. Instead, we could use the pending
1104bf215546Sopenharmony_ci       * barrier state to limit the queues we serialize against.
1105bf215546Sopenharmony_ci       */
1106bf215546Sopenharmony_ci      if (cmd_buffer->state.barrier.dst_mask) {
1107bf215546Sopenharmony_ci         result = queue_submit_noop_job(queue, submit->perf_pass_index,
1108bf215546Sopenharmony_ci                                        &sync_info, false);
1109bf215546Sopenharmony_ci         if (result != VK_SUCCESS)
1110bf215546Sopenharmony_ci            return result;
1111bf215546Sopenharmony_ci      }
1112bf215546Sopenharmony_ci   }
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci   /* Finish by submitting a no-op job that synchronizes across all queues.
1115bf215546Sopenharmony_ci    * This will ensure that the signal semaphores don't get triggered until
1116bf215546Sopenharmony_ci    * all work on any queue completes. See Vulkan's signal operation order
1117bf215546Sopenharmony_ci    * requirements.
1118bf215546Sopenharmony_ci    */
1119bf215546Sopenharmony_ci   if (submit->signal_count > 0) {
1120bf215546Sopenharmony_ci      result = queue_submit_noop_job(queue, submit->perf_pass_index,
1121bf215546Sopenharmony_ci                                     &sync_info, true);
1122bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
1123bf215546Sopenharmony_ci         return result;
1124bf215546Sopenharmony_ci   }
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_ci   process_signals(queue, sync_info.signal_count, sync_info.signals);
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci   return VK_SUCCESS;
1129bf215546Sopenharmony_ci}
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
1132bf215546Sopenharmony_civ3dv_QueueBindSparse(VkQueue _queue,
1133bf215546Sopenharmony_ci                     uint32_t bindInfoCount,
1134bf215546Sopenharmony_ci                     const VkBindSparseInfo *pBindInfo,
1135bf215546Sopenharmony_ci                     VkFence fence)
1136bf215546Sopenharmony_ci{
1137bf215546Sopenharmony_ci   V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1138bf215546Sopenharmony_ci   return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
1139bf215546Sopenharmony_ci}
1140