1/*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_device.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#include "genxml/gen_macros.h"
30
31#include "decode.h"
32
33#include "panvk_private.h"
34#include "panvk_cs.h"
35
36#include "vk_drm_syncobj.h"
37
38static void
39panvk_queue_submit_batch(struct panvk_queue *queue,
40                         struct panvk_batch *batch,
41                         uint32_t *bos, unsigned nr_bos,
42                         uint32_t *in_fences,
43                         unsigned nr_in_fences)
44{
45   const struct panvk_device *dev = queue->device;
46   unsigned debug = dev->physical_device->instance->debug_flags;
47   const struct panfrost_device *pdev = &dev->physical_device->pdev;
48   int ret;
49
50   /* Reset the batch if it's already been issued */
51   if (batch->issued) {
52      util_dynarray_foreach(&batch->jobs, void *, job)
53         memset((*job), 0, 4 * 4);
54
55      /* Reset the tiler before re-issuing the batch */
56      if (batch->tiler.descs.cpu) {
57         memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
58                pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
59      }
60   }
61
62   if (batch->scoreboard.first_job) {
63      struct drm_panfrost_submit submit = {
64         .bo_handles = (uintptr_t)bos,
65         .bo_handle_count = nr_bos,
66         .in_syncs = (uintptr_t)in_fences,
67         .in_sync_count = nr_in_fences,
68         .out_sync = queue->sync,
69         .jc = batch->scoreboard.first_job,
70      };
71
72      ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
73      assert(!ret);
74
75      if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
76         ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
77         assert(!ret);
78      }
79
80      if (debug & PANVK_DEBUG_TRACE)
81         GENX(pandecode_jc)(batch->scoreboard.first_job, pdev->gpu_id);
82
83      if (debug & PANVK_DEBUG_DUMP)
84         pandecode_dump_mappings();
85   }
86
87   if (batch->fragment_job) {
88      struct drm_panfrost_submit submit = {
89         .bo_handles = (uintptr_t)bos,
90         .bo_handle_count = nr_bos,
91         .out_sync = queue->sync,
92         .jc = batch->fragment_job,
93         .requirements = PANFROST_JD_REQ_FS,
94      };
95
96      if (batch->scoreboard.first_job) {
97         submit.in_syncs = (uintptr_t)(&queue->sync);
98         submit.in_sync_count = 1;
99      } else {
100         submit.in_syncs = (uintptr_t)in_fences;
101         submit.in_sync_count = nr_in_fences;
102      }
103
104      ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
105      assert(!ret);
106      if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
107         ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
108         assert(!ret);
109      }
110
111      if (debug & PANVK_DEBUG_TRACE)
112         GENX(pandecode_jc)(batch->fragment_job, pdev->gpu_id);
113
114      if (debug & PANVK_DEBUG_DUMP)
115         pandecode_dump_mappings();
116   }
117
118   if (debug & PANVK_DEBUG_TRACE)
119      pandecode_next_frame();
120
121   batch->issued = true;
122}
123
124static void
125panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj)
126{
127   const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
128   int ret;
129
130   struct drm_syncobj_handle handle = {
131      .handle = queue->sync,
132      .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
133      .fd = -1,
134   };
135
136   ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
137   assert(!ret);
138   assert(handle.fd >= 0);
139
140   handle.handle = syncobj;
141   ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
142   assert(!ret);
143
144   close(handle.fd);
145}
146
147static void
148panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences)
149{
150   util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
151      switch (op->type) {
152      case PANVK_EVENT_OP_SET:
153         /* Nothing to do yet */
154         break;
155      case PANVK_EVENT_OP_RESET:
156         /* Nothing to do yet */
157         break;
158      case PANVK_EVENT_OP_WAIT:
159         in_fences[(*nr_in_fences)++] = op->event->syncobj;
160         break;
161      default:
162         unreachable("bad panvk_event_op type\n");
163      }
164   }
165}
166
167static void
168panvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch)
169{
170   const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
171
172   util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
173      switch (op->type) {
174      case PANVK_EVENT_OP_SET: {
175         panvk_queue_transfer_sync(queue, op->event->syncobj);
176         break;
177      }
178      case PANVK_EVENT_OP_RESET: {
179         struct panvk_event *event = op->event;
180
181         struct drm_syncobj_array objs = {
182            .handles = (uint64_t) (uintptr_t) &event->syncobj,
183            .count_handles = 1
184         };
185
186         int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs);
187         assert(!ret);
188         break;
189      }
190      case PANVK_EVENT_OP_WAIT:
191         /* Nothing left to do */
192         break;
193      default:
194         unreachable("bad panvk_event_op type\n");
195      }
196   }
197}
198
199VkResult
200panvk_per_arch(queue_submit)(struct vk_queue *vk_queue,
201                             struct vk_queue_submit *submit)
202{
203   struct panvk_queue *queue =
204      container_of(vk_queue, struct panvk_queue, vk);
205   const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
206
207   unsigned nr_semaphores = submit->wait_count + 1;
208   uint32_t semaphores[nr_semaphores];
209
210   semaphores[0] = queue->sync;
211   for (unsigned i = 0; i < submit->wait_count; i++) {
212      assert(vk_sync_type_is_drm_syncobj(submit->waits[i].sync->type));
213      struct vk_drm_syncobj *syncobj =
214         vk_sync_as_drm_syncobj(submit->waits[i].sync);
215
216      semaphores[i + 1] = syncobj->syncobj;
217   }
218
219   for (uint32_t j = 0; j < submit->command_buffer_count; ++j) {
220      struct panvk_cmd_buffer *cmdbuf =
221         container_of(submit->command_buffers[j], struct panvk_cmd_buffer, vk);
222
223      list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) {
224         /* FIXME: should be done at the batch level */
225         unsigned nr_bos =
226            panvk_pool_num_bos(&cmdbuf->desc_pool) +
227            panvk_pool_num_bos(&cmdbuf->varying_pool) +
228            panvk_pool_num_bos(&cmdbuf->tls_pool) +
229            (batch->fb.info ? batch->fb.info->attachment_count : 0) +
230            (batch->blit.src ? 1 : 0) +
231            (batch->blit.dst ? 1 : 0) +
232            (batch->scoreboard.first_tiler ? 1 : 0) + 1;
233         unsigned bo_idx = 0;
234         uint32_t bos[nr_bos];
235
236         panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]);
237         bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool);
238
239         panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]);
240         bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool);
241
242         panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]);
243         bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool);
244
245         if (batch->fb.info) {
246            for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) {
247               bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle;
248            }
249         }
250
251         if (batch->blit.src)
252            bos[bo_idx++] = batch->blit.src->gem_handle;
253
254         if (batch->blit.dst)
255            bos[bo_idx++] = batch->blit.dst->gem_handle;
256
257         if (batch->scoreboard.first_tiler)
258            bos[bo_idx++] = pdev->tiler_heap->gem_handle;
259
260         bos[bo_idx++] = pdev->sample_positions->gem_handle;
261         assert(bo_idx == nr_bos);
262
263         /* Merge identical BO entries. */
264         for (unsigned x = 0; x < nr_bos; x++) {
265            for (unsigned y = x + 1; y < nr_bos; ) {
266               if (bos[x] == bos[y])
267                  bos[y] = bos[--nr_bos];
268               else
269                  y++;
270            }
271         }
272
273         unsigned nr_in_fences = 0;
274         unsigned max_wait_event_syncobjs =
275            util_dynarray_num_elements(&batch->event_ops,
276                                       struct panvk_event_op);
277         uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs];
278         memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences));
279         nr_in_fences += nr_semaphores;
280
281         panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
282
283         panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences);
284
285         panvk_signal_event_syncobjs(queue, batch);
286      }
287   }
288
289   /* Transfer the out fence to signal semaphores */
290   for (unsigned i = 0; i < submit->signal_count; i++) {
291      assert(vk_sync_type_is_drm_syncobj(submit->signals[i].sync->type));
292      struct vk_drm_syncobj *syncobj =
293         vk_sync_as_drm_syncobj(submit->signals[i].sync);
294
295      panvk_queue_transfer_sync(queue, syncobj->syncobj);
296   }
297
298   return VK_SUCCESS;
299}
300
301VkResult
302panvk_per_arch(CreateSampler)(VkDevice _device,
303                              const VkSamplerCreateInfo *pCreateInfo,
304                              const VkAllocationCallbacks *pAllocator,
305                              VkSampler *pSampler)
306{
307   VK_FROM_HANDLE(panvk_device, device, _device);
308   struct panvk_sampler *sampler;
309
310   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
311
312   sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
313                             VK_OBJECT_TYPE_SAMPLER);
314   if (!sampler)
315      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
316
317   STATIC_ASSERT(sizeof(sampler->desc) >= pan_size(SAMPLER));
318   panvk_per_arch(emit_sampler)(pCreateInfo, &sampler->desc);
319   *pSampler = panvk_sampler_to_handle(sampler);
320
321   return VK_SUCCESS;
322}
323