1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2013-2017 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci *
24bf215546Sopenharmony_ci */
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "si_build_pm4.h"
27bf215546Sopenharmony_ci#include "util/os_time.h"
28bf215546Sopenharmony_ci#include "util/u_memory.h"
29bf215546Sopenharmony_ci#include "util/u_queue.h"
30bf215546Sopenharmony_ci#include "util/u_upload_mgr.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#include <libsync.h>
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cistruct si_fine_fence {
35bf215546Sopenharmony_ci   struct si_resource *buf;
36bf215546Sopenharmony_ci   unsigned offset;
37bf215546Sopenharmony_ci};
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistruct si_fence {
40bf215546Sopenharmony_ci   struct pipe_reference reference;
41bf215546Sopenharmony_ci   struct pipe_fence_handle *gfx;
42bf215546Sopenharmony_ci   struct tc_unflushed_batch_token *tc_token;
43bf215546Sopenharmony_ci   struct util_queue_fence ready;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci   /* If the context wasn't flushed at fence creation, this is non-NULL. */
46bf215546Sopenharmony_ci   struct {
47bf215546Sopenharmony_ci      struct si_context *ctx;
48bf215546Sopenharmony_ci      unsigned ib_index;
49bf215546Sopenharmony_ci   } gfx_unflushed;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   struct si_fine_fence fine;
52bf215546Sopenharmony_ci};
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci/**
55bf215546Sopenharmony_ci * Write an EOP event.
56bf215546Sopenharmony_ci *
57bf215546Sopenharmony_ci * \param event        EVENT_TYPE_*
58bf215546Sopenharmony_ci * \param event_flags  Optional cache flush flags (TC)
59bf215546Sopenharmony_ci * \param dst_sel      MEM or TC_L2
60bf215546Sopenharmony_ci * \param int_sel      NONE or SEND_DATA_AFTER_WR_CONFIRM
61bf215546Sopenharmony_ci * \param data_sel     DISCARD, VALUE_32BIT, TIMESTAMP, or GDS
62bf215546Sopenharmony_ci * \param buf          Buffer
63bf215546Sopenharmony_ci * \param va           GPU address
64bf215546Sopenharmony_ci * \param old_value    Previous fence value (for a bug workaround)
65bf215546Sopenharmony_ci * \param new_value    Fence value to write for this event.
66bf215546Sopenharmony_ci */
67bf215546Sopenharmony_civoid si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigned event,
68bf215546Sopenharmony_ci                       unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel,
69bf215546Sopenharmony_ci                       struct si_resource *buf, uint64_t va, uint32_t new_fence,
70bf215546Sopenharmony_ci                       unsigned query_type)
71bf215546Sopenharmony_ci{
72bf215546Sopenharmony_ci   unsigned op = EVENT_TYPE(event) |
73bf215546Sopenharmony_ci                 EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
74bf215546Sopenharmony_ci                 event_flags;
75bf215546Sopenharmony_ci   unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
76bf215546Sopenharmony_ci   bool compute_ib = !ctx->has_graphics;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci   radeon_begin(cs);
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   if (ctx->gfx_level >= GFX9 || (compute_ib && ctx->gfx_level >= GFX7)) {
81bf215546Sopenharmony_ci      /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
82bf215546Sopenharmony_ci       * counters) must immediately precede every timestamp event to
83bf215546Sopenharmony_ci       * prevent a GPU hang on GFX9.
84bf215546Sopenharmony_ci       *
85bf215546Sopenharmony_ci       * Occlusion queries don't need to do it here, because they
86bf215546Sopenharmony_ci       * always do ZPASS_DONE before the timestamp.
87bf215546Sopenharmony_ci       */
88bf215546Sopenharmony_ci      if (ctx->gfx_level == GFX9 && !compute_ib && query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
89bf215546Sopenharmony_ci          query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
90bf215546Sopenharmony_ci          query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
91bf215546Sopenharmony_ci         struct si_screen *sscreen = ctx->screen;
92bf215546Sopenharmony_ci         struct si_resource *scratch;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci         if (!ctx->ws->cs_is_secure(&ctx->gfx_cs)) {
95bf215546Sopenharmony_ci            scratch = ctx->eop_bug_scratch;
96bf215546Sopenharmony_ci         } else {
97bf215546Sopenharmony_ci            assert(ctx->screen->info.has_tmz_support);
98bf215546Sopenharmony_ci            if (!ctx->eop_bug_scratch_tmz)
99bf215546Sopenharmony_ci               ctx->eop_bug_scratch_tmz =
100bf215546Sopenharmony_ci                  si_aligned_buffer_create(&sscreen->b,
101bf215546Sopenharmony_ci                                           PIPE_RESOURCE_FLAG_ENCRYPTED |
102bf215546Sopenharmony_ci                                           PIPE_RESOURCE_FLAG_UNMAPPABLE |
103bf215546Sopenharmony_ci                                           SI_RESOURCE_FLAG_DRIVER_INTERNAL,
104bf215546Sopenharmony_ci                                           PIPE_USAGE_DEFAULT,
105bf215546Sopenharmony_ci                                           16 * sscreen->info.max_render_backends, 256);
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci            scratch = ctx->eop_bug_scratch_tmz;
108bf215546Sopenharmony_ci         }
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci         assert(16 * ctx->screen->info.max_render_backends <= scratch->b.b.width0);
111bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_EVENT_WRITE, 2, 0));
112bf215546Sopenharmony_ci         radeon_emit(EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
113bf215546Sopenharmony_ci         radeon_emit(scratch->gpu_address);
114bf215546Sopenharmony_ci         radeon_emit(scratch->gpu_address >> 32);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci         radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
117bf215546Sopenharmony_ci                                   RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
118bf215546Sopenharmony_ci      }
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_RELEASE_MEM, ctx->gfx_level >= GFX9 ? 6 : 5, 0));
121bf215546Sopenharmony_ci      radeon_emit(op);
122bf215546Sopenharmony_ci      radeon_emit(sel);
123bf215546Sopenharmony_ci      radeon_emit(va);        /* address lo */
124bf215546Sopenharmony_ci      radeon_emit(va >> 32);  /* address hi */
125bf215546Sopenharmony_ci      radeon_emit(new_fence); /* immediate data lo */
126bf215546Sopenharmony_ci      radeon_emit(0);         /* immediate data hi */
127bf215546Sopenharmony_ci      if (ctx->gfx_level >= GFX9)
128bf215546Sopenharmony_ci         radeon_emit(0); /* unused */
129bf215546Sopenharmony_ci   } else {
130bf215546Sopenharmony_ci      if (ctx->gfx_level == GFX7 || ctx->gfx_level == GFX8) {
131bf215546Sopenharmony_ci         struct si_resource *scratch = ctx->eop_bug_scratch;
132bf215546Sopenharmony_ci         uint64_t va = scratch->gpu_address;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci         /* Two EOP events are required to make all engines go idle
135bf215546Sopenharmony_ci          * (and optional cache flushes executed) before the timestamp
136bf215546Sopenharmony_ci          * is written.
137bf215546Sopenharmony_ci          */
138bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
139bf215546Sopenharmony_ci         radeon_emit(op);
140bf215546Sopenharmony_ci         radeon_emit(va);
141bf215546Sopenharmony_ci         radeon_emit(((va >> 32) & 0xffff) | sel);
142bf215546Sopenharmony_ci         radeon_emit(0); /* immediate data */
143bf215546Sopenharmony_ci         radeon_emit(0); /* unused */
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci         radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, scratch,
146bf215546Sopenharmony_ci                                   RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
147bf215546Sopenharmony_ci      }
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
150bf215546Sopenharmony_ci      radeon_emit(op);
151bf215546Sopenharmony_ci      radeon_emit(va);
152bf215546Sopenharmony_ci      radeon_emit(((va >> 32) & 0xffff) | sel);
153bf215546Sopenharmony_ci      radeon_emit(new_fence); /* immediate data */
154bf215546Sopenharmony_ci      radeon_emit(0);         /* unused */
155bf215546Sopenharmony_ci   }
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   radeon_end();
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   if (buf) {
160bf215546Sopenharmony_ci      radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
161bf215546Sopenharmony_ci   }
162bf215546Sopenharmony_ci}
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ciunsigned si_cp_write_fence_dwords(struct si_screen *screen)
165bf215546Sopenharmony_ci{
166bf215546Sopenharmony_ci   unsigned dwords = 6;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   if (screen->info.gfx_level == GFX7 || screen->info.gfx_level == GFX8)
169bf215546Sopenharmony_ci      dwords *= 2;
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   return dwords;
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_civoid si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref,
175bf215546Sopenharmony_ci                    uint32_t mask, unsigned flags)
176bf215546Sopenharmony_ci{
177bf215546Sopenharmony_ci   radeon_begin(cs);
178bf215546Sopenharmony_ci   radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
179bf215546Sopenharmony_ci   radeon_emit(WAIT_REG_MEM_MEM_SPACE(1) | flags);
180bf215546Sopenharmony_ci   radeon_emit(va);
181bf215546Sopenharmony_ci   radeon_emit(va >> 32);
182bf215546Sopenharmony_ci   radeon_emit(ref);  /* reference value */
183bf215546Sopenharmony_ci   radeon_emit(mask); /* mask */
184bf215546Sopenharmony_ci   radeon_emit(4);    /* poll interval */
185bf215546Sopenharmony_ci   radeon_end();
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cistatic void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_handle *fence)
189bf215546Sopenharmony_ci{
190bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   ws->cs_add_fence_dependency(&sctx->gfx_cs, fence, 0);
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_cistatic void si_add_syncobj_signal(struct si_context *sctx, struct pipe_fence_handle *fence)
196bf215546Sopenharmony_ci{
197bf215546Sopenharmony_ci   sctx->ws->cs_add_syncobj_signal(&sctx->gfx_cs, fence);
198bf215546Sopenharmony_ci}
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_cistatic void si_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **dst,
201bf215546Sopenharmony_ci                               struct pipe_fence_handle *src)
202bf215546Sopenharmony_ci{
203bf215546Sopenharmony_ci   struct radeon_winsys *ws = ((struct si_screen *)screen)->ws;
204bf215546Sopenharmony_ci   struct si_fence **sdst = (struct si_fence **)dst;
205bf215546Sopenharmony_ci   struct si_fence *ssrc = (struct si_fence *)src;
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   if (pipe_reference(&(*sdst)->reference, &ssrc->reference)) {
208bf215546Sopenharmony_ci      ws->fence_reference(&(*sdst)->gfx, NULL);
209bf215546Sopenharmony_ci      tc_unflushed_batch_token_reference(&(*sdst)->tc_token, NULL);
210bf215546Sopenharmony_ci      si_resource_reference(&(*sdst)->fine.buf, NULL);
211bf215546Sopenharmony_ci      FREE(*sdst);
212bf215546Sopenharmony_ci   }
213bf215546Sopenharmony_ci   *sdst = ssrc;
214bf215546Sopenharmony_ci}
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_cistatic struct si_fence *si_create_multi_fence()
217bf215546Sopenharmony_ci{
218bf215546Sopenharmony_ci   struct si_fence *fence = CALLOC_STRUCT(si_fence);
219bf215546Sopenharmony_ci   if (!fence)
220bf215546Sopenharmony_ci      return NULL;
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   pipe_reference_init(&fence->reference, 1);
223bf215546Sopenharmony_ci   util_queue_fence_init(&fence->ready);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   return fence;
226bf215546Sopenharmony_ci}
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_cistruct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
229bf215546Sopenharmony_ci                                          struct tc_unflushed_batch_token *tc_token)
230bf215546Sopenharmony_ci{
231bf215546Sopenharmony_ci   struct si_fence *fence = si_create_multi_fence();
232bf215546Sopenharmony_ci   if (!fence)
233bf215546Sopenharmony_ci      return NULL;
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci   util_queue_fence_reset(&fence->ready);
236bf215546Sopenharmony_ci   tc_unflushed_batch_token_reference(&fence->tc_token, tc_token);
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   return (struct pipe_fence_handle *)fence;
239bf215546Sopenharmony_ci}
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_cistatic bool si_fine_fence_signaled(struct radeon_winsys *rws, const struct si_fine_fence *fine)
242bf215546Sopenharmony_ci{
243bf215546Sopenharmony_ci   char *map =
244bf215546Sopenharmony_ci      rws->buffer_map(rws, fine->buf->buf, NULL, PIPE_MAP_READ | PIPE_MAP_UNSYNCHRONIZED);
245bf215546Sopenharmony_ci   if (!map)
246bf215546Sopenharmony_ci      return false;
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   uint32_t *fence = (uint32_t *)(map + fine->offset);
249bf215546Sopenharmony_ci   return *fence != 0;
250bf215546Sopenharmony_ci}
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_cistatic void si_fine_fence_set(struct si_context *ctx, struct si_fine_fence *fine, unsigned flags)
253bf215546Sopenharmony_ci{
254bf215546Sopenharmony_ci   uint32_t *fence_ptr;
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   /* Use cached system memory for the fence. */
259bf215546Sopenharmony_ci   u_upload_alloc(ctx->cached_gtt_allocator, 0, 4, 4, &fine->offset,
260bf215546Sopenharmony_ci                  (struct pipe_resource **)&fine->buf, (void **)&fence_ptr);
261bf215546Sopenharmony_ci   if (!fine->buf)
262bf215546Sopenharmony_ci      return;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci   *fence_ptr = 0;
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
267bf215546Sopenharmony_ci      uint32_t value = 0x80000000;
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci      si_cp_write_data(ctx, fine->buf, fine->offset, 4, V_370_MEM, V_370_PFP, &value);
270bf215546Sopenharmony_ci   } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
271bf215546Sopenharmony_ci      uint64_t fence_va = fine->buf->gpu_address + fine->offset;
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci      radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, fine->buf, RADEON_USAGE_WRITE | RADEON_PRIO_QUERY);
274bf215546Sopenharmony_ci      si_cp_release_mem(ctx, &ctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
275bf215546Sopenharmony_ci                        EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, NULL, fence_va, 0x80000000,
276bf215546Sopenharmony_ci                        PIPE_QUERY_GPU_FINISHED);
277bf215546Sopenharmony_ci   } else {
278bf215546Sopenharmony_ci      assert(false);
279bf215546Sopenharmony_ci   }
280bf215546Sopenharmony_ci}
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_cistatic bool si_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx,
283bf215546Sopenharmony_ci                            struct pipe_fence_handle *fence, uint64_t timeout)
284bf215546Sopenharmony_ci{
285bf215546Sopenharmony_ci   struct radeon_winsys *rws = ((struct si_screen *)screen)->ws;
286bf215546Sopenharmony_ci   struct si_fence *sfence = (struct si_fence *)fence;
287bf215546Sopenharmony_ci   struct si_context *sctx;
288bf215546Sopenharmony_ci   int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   ctx = threaded_context_unwrap_sync(ctx);
291bf215546Sopenharmony_ci   sctx = (struct si_context *)(ctx ? ctx : NULL);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   if (!util_queue_fence_is_signalled(&sfence->ready)) {
294bf215546Sopenharmony_ci      if (sfence->tc_token) {
295bf215546Sopenharmony_ci         /* Ensure that si_flush_from_st will be called for
296bf215546Sopenharmony_ci          * this fence, but only if we're in the API thread
297bf215546Sopenharmony_ci          * where the context is current.
298bf215546Sopenharmony_ci          *
299bf215546Sopenharmony_ci          * Note that the batch containing the flush may already
300bf215546Sopenharmony_ci          * be in flight in the driver thread, so the fence
301bf215546Sopenharmony_ci          * may not be ready yet when this call returns.
302bf215546Sopenharmony_ci          */
303bf215546Sopenharmony_ci         threaded_context_flush(ctx, sfence->tc_token, timeout == 0);
304bf215546Sopenharmony_ci      }
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      if (!timeout)
307bf215546Sopenharmony_ci         return false;
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci      if (timeout == PIPE_TIMEOUT_INFINITE) {
310bf215546Sopenharmony_ci         util_queue_fence_wait(&sfence->ready);
311bf215546Sopenharmony_ci      } else {
312bf215546Sopenharmony_ci         if (!util_queue_fence_wait_timeout(&sfence->ready, abs_timeout))
313bf215546Sopenharmony_ci            return false;
314bf215546Sopenharmony_ci      }
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci      if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
317bf215546Sopenharmony_ci         int64_t time = os_time_get_nano();
318bf215546Sopenharmony_ci         timeout = abs_timeout > time ? abs_timeout - time : 0;
319bf215546Sopenharmony_ci      }
320bf215546Sopenharmony_ci   }
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   if (!sfence->gfx)
323bf215546Sopenharmony_ci      return true;
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   if (sfence->fine.buf && si_fine_fence_signaled(rws, &sfence->fine)) {
326bf215546Sopenharmony_ci      rws->fence_reference(&sfence->gfx, NULL);
327bf215546Sopenharmony_ci      si_resource_reference(&sfence->fine.buf, NULL);
328bf215546Sopenharmony_ci      return true;
329bf215546Sopenharmony_ci   }
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   /* Flush the gfx IB if it hasn't been flushed yet. */
332bf215546Sopenharmony_ci   if (sctx && sfence->gfx_unflushed.ctx == sctx &&
333bf215546Sopenharmony_ci       sfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) {
334bf215546Sopenharmony_ci      /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
335bf215546Sopenharmony_ci       * spec says:
336bf215546Sopenharmony_ci       *
337bf215546Sopenharmony_ci       *    "If the sync object being blocked upon will not be
338bf215546Sopenharmony_ci       *     signaled in finite time (for example, by an associated
339bf215546Sopenharmony_ci       *     fence command issued previously, but not yet flushed to
340bf215546Sopenharmony_ci       *     the graphics pipeline), then ClientWaitSync may hang
341bf215546Sopenharmony_ci       *     forever. To help prevent this behavior, if
342bf215546Sopenharmony_ci       *     ClientWaitSync is called and all of the following are
343bf215546Sopenharmony_ci       *     true:
344bf215546Sopenharmony_ci       *
345bf215546Sopenharmony_ci       *     * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
346bf215546Sopenharmony_ci       *     * sync is unsignaled when ClientWaitSync is called,
347bf215546Sopenharmony_ci       *     * and the calls to ClientWaitSync and FenceSync were
348bf215546Sopenharmony_ci       *       issued from the same context,
349bf215546Sopenharmony_ci       *
350bf215546Sopenharmony_ci       *     then the GL will behave as if the equivalent of Flush
351bf215546Sopenharmony_ci       *     were inserted immediately after the creation of sync."
352bf215546Sopenharmony_ci       *
353bf215546Sopenharmony_ci       * This means we need to flush for such fences even when we're
354bf215546Sopenharmony_ci       * not going to wait.
355bf215546Sopenharmony_ci       */
356bf215546Sopenharmony_ci      si_flush_gfx_cs(sctx, (timeout ? 0 : PIPE_FLUSH_ASYNC) | RADEON_FLUSH_START_NEXT_GFX_IB_NOW,
357bf215546Sopenharmony_ci                      NULL);
358bf215546Sopenharmony_ci      sfence->gfx_unflushed.ctx = NULL;
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci      if (!timeout)
361bf215546Sopenharmony_ci         return false;
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_ci      /* Recompute the timeout after all that. */
364bf215546Sopenharmony_ci      if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
365bf215546Sopenharmony_ci         int64_t time = os_time_get_nano();
366bf215546Sopenharmony_ci         timeout = abs_timeout > time ? abs_timeout - time : 0;
367bf215546Sopenharmony_ci      }
368bf215546Sopenharmony_ci   }
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   if (rws->fence_wait(rws, sfence->gfx, timeout))
371bf215546Sopenharmony_ci      return true;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   /* Re-check in case the GPU is slow or hangs, but the commands before
374bf215546Sopenharmony_ci    * the fine-grained fence have completed. */
375bf215546Sopenharmony_ci   if (sfence->fine.buf && si_fine_fence_signaled(rws, &sfence->fine))
376bf215546Sopenharmony_ci      return true;
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   return false;
379bf215546Sopenharmony_ci}
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_cistatic void si_create_fence_fd(struct pipe_context *ctx, struct pipe_fence_handle **pfence, int fd,
382bf215546Sopenharmony_ci                               enum pipe_fd_type type)
383bf215546Sopenharmony_ci{
384bf215546Sopenharmony_ci   struct si_screen *sscreen = (struct si_screen *)ctx->screen;
385bf215546Sopenharmony_ci   struct radeon_winsys *ws = sscreen->ws;
386bf215546Sopenharmony_ci   struct si_fence *sfence;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   *pfence = NULL;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   sfence = si_create_multi_fence();
391bf215546Sopenharmony_ci   if (!sfence)
392bf215546Sopenharmony_ci      return;
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci   switch (type) {
395bf215546Sopenharmony_ci   case PIPE_FD_TYPE_NATIVE_SYNC:
396bf215546Sopenharmony_ci      if (!sscreen->info.has_fence_to_handle)
397bf215546Sopenharmony_ci         goto finish;
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci      sfence->gfx = ws->fence_import_sync_file(ws, fd);
400bf215546Sopenharmony_ci      break;
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci   case PIPE_FD_TYPE_SYNCOBJ:
403bf215546Sopenharmony_ci      if (!sscreen->info.has_syncobj)
404bf215546Sopenharmony_ci         goto finish;
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci      sfence->gfx = ws->fence_import_syncobj(ws, fd);
407bf215546Sopenharmony_ci      break;
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci   default:
410bf215546Sopenharmony_ci      unreachable("bad fence fd type when importing");
411bf215546Sopenharmony_ci   }
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_cifinish:
414bf215546Sopenharmony_ci   if (!sfence->gfx) {
415bf215546Sopenharmony_ci      FREE(sfence);
416bf215546Sopenharmony_ci      return;
417bf215546Sopenharmony_ci   }
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci   *pfence = (struct pipe_fence_handle *)sfence;
420bf215546Sopenharmony_ci}
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_cistatic int si_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *fence)
423bf215546Sopenharmony_ci{
424bf215546Sopenharmony_ci   struct si_screen *sscreen = (struct si_screen *)screen;
425bf215546Sopenharmony_ci   struct radeon_winsys *ws = sscreen->ws;
426bf215546Sopenharmony_ci   struct si_fence *sfence = (struct si_fence *)fence;
427bf215546Sopenharmony_ci   int gfx_fd = -1;
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci   if (!sscreen->info.has_fence_to_handle)
430bf215546Sopenharmony_ci      return -1;
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci   util_queue_fence_wait(&sfence->ready);
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci   /* Deferred fences aren't supported. */
435bf215546Sopenharmony_ci   assert(!sfence->gfx_unflushed.ctx);
436bf215546Sopenharmony_ci   if (sfence->gfx_unflushed.ctx)
437bf215546Sopenharmony_ci      return -1;
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci   if (sfence->gfx) {
440bf215546Sopenharmony_ci      gfx_fd = ws->fence_export_sync_file(ws, sfence->gfx);
441bf215546Sopenharmony_ci      if (gfx_fd == -1) {
442bf215546Sopenharmony_ci         return -1;
443bf215546Sopenharmony_ci      }
444bf215546Sopenharmony_ci   }
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   /* If we don't have FDs at this point, it means we don't have fences
447bf215546Sopenharmony_ci    * either. */
448bf215546Sopenharmony_ci   if (gfx_fd == -1)
449bf215546Sopenharmony_ci      return ws->export_signalled_sync_file(ws);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   return gfx_fd;
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_cistatic void si_flush_all_queues(struct pipe_context *ctx,
455bf215546Sopenharmony_ci                                struct pipe_fence_handle **fence,
456bf215546Sopenharmony_ci                                unsigned flags, bool force_flush)
457bf215546Sopenharmony_ci{
458bf215546Sopenharmony_ci   struct pipe_screen *screen = ctx->screen;
459bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
460bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
461bf215546Sopenharmony_ci   struct pipe_fence_handle *gfx_fence = NULL;
462bf215546Sopenharmony_ci   bool deferred_fence = false;
463bf215546Sopenharmony_ci   struct si_fine_fence fine = {};
464bf215546Sopenharmony_ci   unsigned rflags = PIPE_FLUSH_ASYNC;
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci   if (!(flags & PIPE_FLUSH_DEFERRED)) {
467bf215546Sopenharmony_ci      si_flush_implicit_resources(sctx);
468bf215546Sopenharmony_ci   }
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci   if (flags & PIPE_FLUSH_END_OF_FRAME)
471bf215546Sopenharmony_ci      rflags |= PIPE_FLUSH_END_OF_FRAME;
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   if (flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) {
474bf215546Sopenharmony_ci      assert(flags & PIPE_FLUSH_DEFERRED);
475bf215546Sopenharmony_ci      assert(fence);
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci      si_fine_fence_set(sctx, &fine, flags);
478bf215546Sopenharmony_ci   }
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci   if (force_flush) {
481bf215546Sopenharmony_ci      sctx->initial_gfx_cs_size = 0;
482bf215546Sopenharmony_ci   }
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci   if (!radeon_emitted(&sctx->gfx_cs, sctx->initial_gfx_cs_size)) {
485bf215546Sopenharmony_ci      if (fence)
486bf215546Sopenharmony_ci         ws->fence_reference(&gfx_fence, sctx->last_gfx_fence);
487bf215546Sopenharmony_ci      if (!(flags & PIPE_FLUSH_DEFERRED))
488bf215546Sopenharmony_ci         ws->cs_sync_flush(&sctx->gfx_cs);
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci      tc_driver_internal_flush_notify(sctx->tc);
491bf215546Sopenharmony_ci   } else {
492bf215546Sopenharmony_ci      /* Instead of flushing, create a deferred fence. Constraints:
493bf215546Sopenharmony_ci       * - the gallium frontend must allow a deferred flush.
494bf215546Sopenharmony_ci       * - the gallium frontend must request a fence.
495bf215546Sopenharmony_ci       * - fence_get_fd is not allowed.
496bf215546Sopenharmony_ci       * Thread safety in fence_finish must be ensured by the gallium frontend.
497bf215546Sopenharmony_ci       */
498bf215546Sopenharmony_ci      if (flags & PIPE_FLUSH_DEFERRED && !(flags & PIPE_FLUSH_FENCE_FD) && fence) {
499bf215546Sopenharmony_ci         gfx_fence = sctx->ws->cs_get_next_fence(&sctx->gfx_cs);
500bf215546Sopenharmony_ci         deferred_fence = true;
501bf215546Sopenharmony_ci      } else {
502bf215546Sopenharmony_ci         si_flush_gfx_cs(sctx, rflags, fence ? &gfx_fence : NULL);
503bf215546Sopenharmony_ci      }
504bf215546Sopenharmony_ci   }
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   /* Both engines can signal out of order, so we need to keep both fences. */
507bf215546Sopenharmony_ci   if (fence) {
508bf215546Sopenharmony_ci      struct si_fence *new_fence;
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci      if (flags & TC_FLUSH_ASYNC) {
511bf215546Sopenharmony_ci         new_fence = (struct si_fence *)*fence;
512bf215546Sopenharmony_ci         assert(new_fence);
513bf215546Sopenharmony_ci      } else {
514bf215546Sopenharmony_ci         new_fence = si_create_multi_fence();
515bf215546Sopenharmony_ci         if (!new_fence) {
516bf215546Sopenharmony_ci            ws->fence_reference(&gfx_fence, NULL);
517bf215546Sopenharmony_ci            goto finish;
518bf215546Sopenharmony_ci         }
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci         screen->fence_reference(screen, fence, NULL);
521bf215546Sopenharmony_ci         *fence = (struct pipe_fence_handle *)new_fence;
522bf215546Sopenharmony_ci      }
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci      /* If both fences are NULL, fence_finish will always return true. */
525bf215546Sopenharmony_ci      new_fence->gfx = gfx_fence;
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci      if (deferred_fence) {
528bf215546Sopenharmony_ci         new_fence->gfx_unflushed.ctx = sctx;
529bf215546Sopenharmony_ci         new_fence->gfx_unflushed.ib_index = sctx->num_gfx_cs_flushes;
530bf215546Sopenharmony_ci      }
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci      new_fence->fine = fine;
533bf215546Sopenharmony_ci      fine.buf = NULL;
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci      if (flags & TC_FLUSH_ASYNC) {
536bf215546Sopenharmony_ci         util_queue_fence_signal(&new_fence->ready);
537bf215546Sopenharmony_ci         tc_unflushed_batch_token_reference(&new_fence->tc_token, NULL);
538bf215546Sopenharmony_ci      }
539bf215546Sopenharmony_ci   }
540bf215546Sopenharmony_ci   assert(!fine.buf);
541bf215546Sopenharmony_cifinish:
542bf215546Sopenharmony_ci   if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) {
543bf215546Sopenharmony_ci      ws->cs_sync_flush(&sctx->gfx_cs);
544bf215546Sopenharmony_ci   }
545bf215546Sopenharmony_ci}
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_cistatic void si_flush_from_st(struct pipe_context *ctx, struct pipe_fence_handle **fence,
548bf215546Sopenharmony_ci                             unsigned flags)
549bf215546Sopenharmony_ci{
550bf215546Sopenharmony_ci   return si_flush_all_queues(ctx, fence, flags, false);
551bf215546Sopenharmony_ci}
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_cistatic void si_fence_server_signal(struct pipe_context *ctx, struct pipe_fence_handle *fence)
554bf215546Sopenharmony_ci{
555bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
556bf215546Sopenharmony_ci   struct si_fence *sfence = (struct si_fence *)fence;
557bf215546Sopenharmony_ci
558bf215546Sopenharmony_ci   assert(sfence->gfx);
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci   if (sfence->gfx)
561bf215546Sopenharmony_ci      si_add_syncobj_signal(sctx, sfence->gfx);
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   /**
564bf215546Sopenharmony_ci    * The spec requires a flush here. We insert a flush
565bf215546Sopenharmony_ci    * because syncobj based signals are not directly placed into
566bf215546Sopenharmony_ci    * the command stream. Instead the signal happens when the
567bf215546Sopenharmony_ci    * submission associated with the syncobj finishes execution.
568bf215546Sopenharmony_ci    *
569bf215546Sopenharmony_ci    * Therefore, we must make sure that we flush the pipe to avoid
570bf215546Sopenharmony_ci    * new work being emitted and getting executed before the signal
571bf215546Sopenharmony_ci    * operation.
572bf215546Sopenharmony_ci    *
573bf215546Sopenharmony_ci    * Forces a flush even if the GFX CS is empty.
574bf215546Sopenharmony_ci    *
575bf215546Sopenharmony_ci    * The flush must not be asynchronous because the kernel must receive
576bf215546Sopenharmony_ci    * the scheduled "signal" operation before any wait.
577bf215546Sopenharmony_ci    */
578bf215546Sopenharmony_ci   si_flush_all_queues(ctx, NULL, 0, true);
579bf215546Sopenharmony_ci}
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_cistatic void si_fence_server_sync(struct pipe_context *ctx, struct pipe_fence_handle *fence)
582bf215546Sopenharmony_ci{
583bf215546Sopenharmony_ci   struct si_context *sctx = (struct si_context *)ctx;
584bf215546Sopenharmony_ci   struct si_fence *sfence = (struct si_fence *)fence;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   util_queue_fence_wait(&sfence->ready);
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   /* Unflushed fences from the same context are no-ops. */
589bf215546Sopenharmony_ci   if (sfence->gfx_unflushed.ctx && sfence->gfx_unflushed.ctx == sctx)
590bf215546Sopenharmony_ci      return;
591bf215546Sopenharmony_ci
592bf215546Sopenharmony_ci   /* All unflushed commands will not start execution before this fence
593bf215546Sopenharmony_ci    * dependency is signalled. That's fine. Flushing is very expensive
594bf215546Sopenharmony_ci    * if we get fence_server_sync after every draw call. (which happens
595bf215546Sopenharmony_ci    * with Android/SurfaceFlinger)
596bf215546Sopenharmony_ci    *
597bf215546Sopenharmony_ci    * In a nutshell, when CPU overhead is greater than GPU overhead,
598bf215546Sopenharmony_ci    * or when the time it takes to execute an IB on the GPU is less than
599bf215546Sopenharmony_ci    * the time it takes to create and submit that IB, flushing decreases
600bf215546Sopenharmony_ci    * performance. Therefore, DO NOT FLUSH.
601bf215546Sopenharmony_ci    */
602bf215546Sopenharmony_ci   if (sfence->gfx)
603bf215546Sopenharmony_ci      si_add_fence_dependency(sctx, sfence->gfx);
604bf215546Sopenharmony_ci}
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_civoid si_init_fence_functions(struct si_context *ctx)
607bf215546Sopenharmony_ci{
608bf215546Sopenharmony_ci   ctx->b.flush = si_flush_from_st;
609bf215546Sopenharmony_ci   ctx->b.create_fence_fd = si_create_fence_fd;
610bf215546Sopenharmony_ci   ctx->b.fence_server_sync = si_fence_server_sync;
611bf215546Sopenharmony_ci   ctx->b.fence_server_signal = si_fence_server_signal;
612bf215546Sopenharmony_ci}
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_civoid si_init_screen_fence_functions(struct si_screen *screen)
615bf215546Sopenharmony_ci{
616bf215546Sopenharmony_ci   screen->b.fence_finish = si_fence_finish;
617bf215546Sopenharmony_ci   screen->b.fence_reference = si_fence_reference;
618bf215546Sopenharmony_ci   screen->b.fence_get_fd = si_fence_get_fd;
619bf215546Sopenharmony_ci}
620