1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file crocus_fence.c
25 *
26 * Fences for driver and IPC serialisation, scheduling and synchronisation.
27 */
28
29#include "util/u_inlines.h"
30#include "intel/common/intel_gem.h"
31
32#include "crocus_batch.h"
33#include "crocus_bufmgr.h"
34#include "crocus_context.h"
35#include "crocus_fence.h"
36#include "crocus_screen.h"
37
38static uint32_t
39gem_syncobj_create(int fd, uint32_t flags)
40{
41   struct drm_syncobj_create args = {
42      .flags = flags,
43   };
44
45   intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &args);
46
47   return args.handle;
48}
49
50static void
51gem_syncobj_destroy(int fd, uint32_t handle)
52{
53   struct drm_syncobj_destroy args = {
54      .handle = handle,
55   };
56
57   intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
58}
59
60/**
61 * Make a new sync-point.
62 */
63struct crocus_syncobj *
64crocus_create_syncobj(struct crocus_screen *screen)
65{
66   struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
67
68   if (!syncobj)
69      return NULL;
70
71   syncobj->handle = gem_syncobj_create(screen->fd, 0);
72   assert(syncobj->handle);
73
74   pipe_reference_init(&syncobj->ref, 1);
75
76   return syncobj;
77}
78
79void
80crocus_syncobj_destroy(struct crocus_screen *screen,
81                       struct crocus_syncobj *syncobj)
82{
83   gem_syncobj_destroy(screen->fd, syncobj->handle);
84   free(syncobj);
85}
86
87/**
88 * Add a sync-point to the batch, with the given flags.
89 *
90 * \p flags   One of I915_EXEC_FENCE_WAIT or I915_EXEC_FENCE_SIGNAL.
91 */
92void
93crocus_batch_add_syncobj(struct crocus_batch *batch,
94                         struct crocus_syncobj *syncobj, unsigned flags)
95{
96   struct drm_i915_gem_exec_fence *fence =
97      util_dynarray_grow(&batch->exec_fences, struct drm_i915_gem_exec_fence, 1);
98
99   *fence = (struct drm_i915_gem_exec_fence){
100      .handle = syncobj->handle,
101      .flags = flags,
102   };
103
104   struct crocus_syncobj **store =
105      util_dynarray_grow(&batch->syncobjs, struct crocus_syncobj *, 1);
106
107   *store = NULL;
108   crocus_syncobj_reference(batch->screen, store, syncobj);
109}
110
111/**
112 * Walk through a batch's dependencies (any I915_EXEC_FENCE_WAIT syncobjs)
113 * and unreference any which have already passed.
114 *
115 * Sometimes the compute batch is seldom used, and accumulates references
116 * to stale render batches that are no longer of interest, so we can free
117 * those up.
118 */
119static void
120clear_stale_syncobjs(struct crocus_batch *batch)
121{
122   struct crocus_screen *screen = batch->screen;
123
124   int n = util_dynarray_num_elements(&batch->syncobjs, struct crocus_syncobj *);
125
126   assert(n == util_dynarray_num_elements(&batch->exec_fences,
127                                          struct drm_i915_gem_exec_fence));
128
129   /* Skip the first syncobj, as it's the signalling one. */
130   for (int i = n - 1; i > 1; i--) {
131      struct crocus_syncobj **syncobj =
132         util_dynarray_element(&batch->syncobjs, struct crocus_syncobj *, i);
133      struct drm_i915_gem_exec_fence *fence =
134         util_dynarray_element(&batch->exec_fences,
135                               struct drm_i915_gem_exec_fence, i);
136      assert(fence->flags & I915_EXEC_FENCE_WAIT);
137
138      if (crocus_wait_syncobj(&screen->base, *syncobj, 0))
139         continue;
140
141      /* This sync object has already passed, there's no need to continue
142       * marking it as a dependency; we can stop holding on to the reference.
143       */
144      crocus_syncobj_reference(screen, syncobj, NULL);
145
146      /* Remove it from the lists; move the last element here. */
147      struct crocus_syncobj **nth_syncobj =
148         util_dynarray_pop_ptr(&batch->syncobjs, struct crocus_syncobj *);
149      struct drm_i915_gem_exec_fence *nth_fence =
150         util_dynarray_pop_ptr(&batch->exec_fences,
151                               struct drm_i915_gem_exec_fence);
152
153      if (syncobj != nth_syncobj) {
154         *syncobj = *nth_syncobj;
155         memcpy(fence, nth_fence, sizeof(*fence));
156      }
157   }
158}
159
160/* ------------------------------------------------------------------- */
161
162struct pipe_fence_handle {
163   struct pipe_reference ref;
164
165   struct pipe_context *unflushed_ctx;
166
167   struct crocus_fine_fence *fine[CROCUS_BATCH_COUNT];
168};
169
170static void
171crocus_fence_destroy(struct pipe_screen *p_screen,
172                     struct pipe_fence_handle *fence)
173{
174   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
175
176   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++)
177      crocus_fine_fence_reference(screen, &fence->fine[i], NULL);
178
179   free(fence);
180}
181
182static void
183crocus_fence_reference(struct pipe_screen *p_screen,
184                       struct pipe_fence_handle **dst,
185                       struct pipe_fence_handle *src)
186{
187   if (pipe_reference(&(*dst)->ref, &src->ref))
188      crocus_fence_destroy(p_screen, *dst);
189
190   *dst = src;
191}
192
193bool
194crocus_wait_syncobj(struct pipe_screen *p_screen,
195                    struct crocus_syncobj *syncobj, int64_t timeout_nsec)
196{
197   if (!syncobj)
198      return false;
199
200   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
201   struct drm_syncobj_wait args = {
202      .handles = (uintptr_t)&syncobj->handle,
203      .count_handles = 1,
204      .timeout_nsec = timeout_nsec,
205   };
206   return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
207}
208
209static void
210crocus_fence_flush(struct pipe_context *ctx,
211                   struct pipe_fence_handle **out_fence, unsigned flags)
212{
213   struct crocus_screen *screen = (void *)ctx->screen;
214   struct crocus_context *ice = (struct crocus_context *)ctx;
215
216   const bool deferred = flags & PIPE_FLUSH_DEFERRED;
217
218   if (!deferred) {
219      for (unsigned i = 0; i < ice->batch_count; i++)
220         crocus_batch_flush(&ice->batches[i]);
221   }
222
223   if (!out_fence)
224      return;
225
226   struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
227   if (!fence)
228      return;
229
230   pipe_reference_init(&fence->ref, 1);
231
232   if (deferred)
233      fence->unflushed_ctx = ctx;
234
235   for (unsigned b = 0; b < ice->batch_count; b++) {
236      struct crocus_batch *batch = &ice->batches[b];
237
238      if (deferred && crocus_batch_bytes_used(batch) > 0) {
239         struct crocus_fine_fence *fine =
240            crocus_fine_fence_new(batch, CROCUS_FENCE_BOTTOM_OF_PIPE);
241         crocus_fine_fence_reference(screen, &fence->fine[b], fine);
242         crocus_fine_fence_reference(screen, &fine, NULL);
243      } else {
244         /* This batch has no commands queued up (perhaps we just flushed,
245          * or all the commands are on the other batch).  Wait for the last
246          * syncobj on this engine - unless it's already finished by now.
247          */
248         if (crocus_fine_fence_signaled(batch->last_fence))
249            continue;
250
251         crocus_fine_fence_reference(screen, &fence->fine[b],
252                                     batch->last_fence);
253      }
254   }
255
256   crocus_fence_reference(ctx->screen, out_fence, NULL);
257   *out_fence = fence;
258}
259
260static void
261crocus_fence_await(struct pipe_context *ctx, struct pipe_fence_handle *fence)
262{
263   struct crocus_context *ice = (struct crocus_context *)ctx;
264
265   /* Unflushed fences from the same context are no-ops. */
266   if (ctx && ctx == fence->unflushed_ctx)
267      return;
268
269   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
270      struct crocus_fine_fence *fine = fence->fine[i];
271
272      if (crocus_fine_fence_signaled(fine))
273         continue;
274
275      for (unsigned b = 0; b < ice->batch_count; b++) {
276         struct crocus_batch *batch = &ice->batches[b];
277
278         /* We're going to make any future work in this batch wait for our
279          * fence to have gone by.  But any currently queued work doesn't
280          * need to wait.  Flush the batch now, so it can happen sooner.
281          */
282         crocus_batch_flush(batch);
283
284         /* Before adding a new reference, clean out any stale ones. */
285         clear_stale_syncobjs(batch);
286
287         crocus_batch_add_syncobj(batch, fine->syncobj, I915_EXEC_FENCE_WAIT);
288      }
289   }
290}
291
292#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
293#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
294#define MSEC_PER_SEC (1000)
295
296static uint64_t
297gettime_ns(void)
298{
299   struct timespec current;
300   clock_gettime(CLOCK_MONOTONIC, &current);
301   return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
302}
303
304static uint64_t
305rel2abs(uint64_t timeout)
306{
307   if (timeout == 0)
308      return 0;
309
310   uint64_t current_time = gettime_ns();
311   uint64_t max_timeout = (uint64_t)INT64_MAX - current_time;
312
313   timeout = MIN2(max_timeout, timeout);
314
315   return current_time + timeout;
316}
317
318static bool
319crocus_fence_finish(struct pipe_screen *p_screen, struct pipe_context *ctx,
320                    struct pipe_fence_handle *fence, uint64_t timeout)
321{
322   ctx = threaded_context_unwrap_sync(ctx);
323   struct crocus_context *ice = (struct crocus_context *)ctx;
324   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
325
326   /* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have
327    * flushed yet.  Check if our syncobj is the current batch's signalling
328    * syncobj - if so, we haven't flushed and need to now.
329    *
330    * The Gallium docs mention that a flush will occur if \p ctx matches
331    * the context the fence was created with.  It may be NULL, so we check
332    * that it matches first.
333    */
334   if (ctx && ctx == fence->unflushed_ctx) {
335      for (unsigned i = 0; i < ice->batch_count; i++) {
336         struct crocus_fine_fence *fine = fence->fine[i];
337
338         if (crocus_fine_fence_signaled(fine))
339            continue;
340
341         if (fine->syncobj == crocus_batch_get_signal_syncobj(&ice->batches[i]))
342            crocus_batch_flush(&ice->batches[i]);
343      }
344
345      /* The fence is no longer deferred. */
346      fence->unflushed_ctx = NULL;
347   }
348
349   unsigned int handle_count = 0;
350   uint32_t handles[ARRAY_SIZE(fence->fine)];
351   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
352      struct crocus_fine_fence *fine = fence->fine[i];
353
354      if (crocus_fine_fence_signaled(fine))
355         continue;
356
357      handles[handle_count++] = fine->syncobj->handle;
358   }
359
360   if (handle_count == 0)
361      return true;
362
363   struct drm_syncobj_wait args = {
364      .handles = (uintptr_t)handles,
365      .count_handles = handle_count,
366      .timeout_nsec = rel2abs(timeout),
367      .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
368   };
369   if (fence->unflushed_ctx) {
370      /* This fence had a deferred flush from another context.  We can't
371       * safely flush it here, because the context might be bound to a
372       * different thread, and poking at its internals wouldn't be safe.
373       *
374       * Instead, use the WAIT_FOR_SUBMIT flag to block and hope that
375       * another thread submits the work.
376       */
377      args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
378   }
379   return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
380}
381
382#ifndef SYNC_IOC_MAGIC
383/* duplicated from linux/sync_file.h to avoid build-time dependency
384 * on new (v4.7) kernel headers.  Once distro's are mostly using
385 * something newer than v4.7 drop this and #include <linux/sync_file.h>
386 * instead.
387 */
388struct sync_merge_data {
389   char name[32];
390   __s32 fd2;
391   __s32 fence;
392   __u32 flags;
393   __u32 pad;
394};
395
396#define SYNC_IOC_MAGIC '>'
397#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
398#endif
399
400static int
401sync_merge_fd(int sync_fd, int new_fd)
402{
403   if (sync_fd == -1)
404      return new_fd;
405
406   if (new_fd == -1)
407      return sync_fd;
408
409   struct sync_merge_data args = {
410      .name = "crocus fence",
411      .fd2 = new_fd,
412      .fence = -1,
413   };
414
415   intel_ioctl(sync_fd, SYNC_IOC_MERGE, &args);
416   close(new_fd);
417   close(sync_fd);
418
419   return args.fence;
420}
421
422static int
423crocus_fence_get_fd(struct pipe_screen *p_screen,
424                    struct pipe_fence_handle *fence)
425{
426   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
427   int fd = -1;
428
429   /* Deferred fences aren't supported. */
430   if (fence->unflushed_ctx)
431      return -1;
432
433   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
434      struct crocus_fine_fence *fine = fence->fine[i];
435
436      if (crocus_fine_fence_signaled(fine))
437         continue;
438
439      struct drm_syncobj_handle args = {
440         .handle = fine->syncobj->handle,
441         .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
442         .fd = -1,
443      };
444
445      intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
446      fd = sync_merge_fd(fd, args.fd);
447   }
448
449   if (fd == -1) {
450      /* Our fence has no syncobj's recorded.  This means that all of the
451       * batches had already completed, their syncobj's had been signalled,
452       * and so we didn't bother to record them.  But we're being asked to
453       * export such a fence.  So export a dummy already-signalled syncobj.
454       */
455      struct drm_syncobj_handle args = {
456         .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
457         .fd = -1,
458      };
459
460      args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
461      intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
462      gem_syncobj_destroy(screen->fd, args.handle);
463      return args.fd;
464   }
465
466   return fd;
467}
468
469static void
470crocus_fence_create_fd(struct pipe_context *ctx, struct pipe_fence_handle **out,
471                       int fd, enum pipe_fd_type type)
472{
473   assert(type == PIPE_FD_TYPE_NATIVE_SYNC || type == PIPE_FD_TYPE_SYNCOBJ);
474
475   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
476   struct drm_syncobj_handle args = {
477      .fd = fd,
478   };
479
480   if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
481      args.flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE;
482      args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
483   }
484
485   if (intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args) == -1) {
486      fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
487              strerror(errno));
488      if (type == PIPE_FD_TYPE_NATIVE_SYNC)
489         gem_syncobj_destroy(screen->fd, args.handle);
490      *out = NULL;
491      return;
492   }
493
494   struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
495   if (!syncobj) {
496      *out = NULL;
497      return;
498   }
499   syncobj->handle = args.handle;
500   pipe_reference_init(&syncobj->ref, 1);
501
502   struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
503   if (!fine) {
504      free(syncobj);
505      *out = NULL;
506      return;
507   }
508
509   static const uint32_t zero = 0;
510
511   /* Fences work in terms of crocus_fine_fence, but we don't actually have a
512    * seqno for an imported fence.  So, create a fake one which always
513    * returns as 'not signaled' so we fall back to using the sync object.
514    */
515   fine->seqno = UINT32_MAX;
516   fine->map = &zero;
517   fine->syncobj = syncobj;
518   fine->flags = CROCUS_FENCE_END;
519   pipe_reference_init(&fine->reference, 1);
520
521   struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
522   if (!fence) {
523      free(fine);
524      free(syncobj);
525      *out = NULL;
526      return;
527   }
528   pipe_reference_init(&fence->ref, 1);
529   fence->fine[0] = fine;
530
531   *out = fence;
532}
533
534static void
535crocus_fence_signal(struct pipe_context *ctx, struct pipe_fence_handle *fence)
536{
537   struct crocus_context *ice = (struct crocus_context *)ctx;
538
539   if (ctx == fence->unflushed_ctx)
540      return;
541
542   for (unsigned b = 0; b < ice->batch_count; b++) {
543      for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
544         struct crocus_fine_fence *fine = fence->fine[i];
545
546         /* already signaled fence skipped */
547         if (crocus_fine_fence_signaled(fine))
548            continue;
549
550         ice->batches[b].contains_fence_signal = true;
551         crocus_batch_add_syncobj(&ice->batches[b], fine->syncobj,
552                                  I915_EXEC_FENCE_SIGNAL);
553      }
554      if (ice->batches[b].contains_fence_signal)
555         crocus_batch_flush(&ice->batches[b]);
556   }
557}
558
559void
560crocus_init_screen_fence_functions(struct pipe_screen *screen)
561{
562   screen->fence_reference = crocus_fence_reference;
563   screen->fence_finish = crocus_fence_finish;
564   screen->fence_get_fd = crocus_fence_get_fd;
565}
566
567void
568crocus_init_context_fence_functions(struct pipe_context *ctx)
569{
570   ctx->flush = crocus_fence_flush;
571   ctx->create_fence_fd = crocus_fence_create_fd;
572   ctx->fence_server_sync = crocus_fence_await;
573   ctx->fence_server_signal = crocus_fence_signal;
574}
575