1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included
12bf215546Sopenharmony_ci * in all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci/**
24bf215546Sopenharmony_ci * @file iris_pipe_control.c
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci * PIPE_CONTROL is the main flushing and synchronization primitive on Intel
27bf215546Sopenharmony_ci * GPUs.  It can invalidate caches, stall until rendering reaches various
28bf215546Sopenharmony_ci * stages of completion, write to memory, and other things.  In a way, it's
29bf215546Sopenharmony_ci * a swiss army knife command - it has all kinds of capabilities, but some
30bf215546Sopenharmony_ci * significant limitations as well.
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * Unfortunately, it's notoriously complicated and difficult to use.  Many
33bf215546Sopenharmony_ci * sub-commands can't be used together.  Some are meant to be used at the
34bf215546Sopenharmony_ci * top of the pipeline (invalidating caches before drawing), while some are
35bf215546Sopenharmony_ci * meant to be used at the end (stalling or flushing after drawing).
36bf215546Sopenharmony_ci *
37bf215546Sopenharmony_ci * Also, there's a list of restrictions a mile long, which vary by generation.
38bf215546Sopenharmony_ci * Do this before doing that, or suffer the consequences (usually a GPU hang).
39bf215546Sopenharmony_ci *
40bf215546Sopenharmony_ci * This file contains helpers for emitting them safely.  You can simply call
41bf215546Sopenharmony_ci * iris_emit_pipe_control_flush() with the desired operations (as logical
42bf215546Sopenharmony_ci * PIPE_CONTROL_* bits), and it will take care of splitting it into multiple
43bf215546Sopenharmony_ci * PIPE_CONTROL commands as necessary.  The per-generation workarounds are
44bf215546Sopenharmony_ci * applied in iris_emit_raw_pipe_control() in iris_state.c.
45bf215546Sopenharmony_ci */
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci#include "iris_context.h"
48bf215546Sopenharmony_ci#include "util/hash_table.h"
49bf215546Sopenharmony_ci#include "util/set.h"
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci/**
52bf215546Sopenharmony_ci * Emit a PIPE_CONTROL with various flushing flags.
53bf215546Sopenharmony_ci *
54bf215546Sopenharmony_ci * The caller is responsible for deciding what flags are appropriate for the
55bf215546Sopenharmony_ci * given generation.
56bf215546Sopenharmony_ci */
57bf215546Sopenharmony_civoid
58bf215546Sopenharmony_ciiris_emit_pipe_control_flush(struct iris_batch *batch,
59bf215546Sopenharmony_ci                             const char *reason,
60bf215546Sopenharmony_ci                             uint32_t flags)
61bf215546Sopenharmony_ci{
62bf215546Sopenharmony_ci   if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
63bf215546Sopenharmony_ci       (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
64bf215546Sopenharmony_ci      /* A pipe control command with flush and invalidate bits set
65bf215546Sopenharmony_ci       * simultaneously is an inherently racy operation on Gfx6+ if the
66bf215546Sopenharmony_ci       * contents of the flushed caches were intended to become visible from
67bf215546Sopenharmony_ci       * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
68bf215546Sopenharmony_ci       * first one should stall the pipeline to make sure that the flushed R/W
69bf215546Sopenharmony_ci       * caches are coherent with memory once the specified R/O caches are
70bf215546Sopenharmony_ci       * invalidated.  On pre-Gfx6 hardware the (implicit) R/O cache
71bf215546Sopenharmony_ci       * invalidation seems to happen at the bottom of the pipeline together
72bf215546Sopenharmony_ci       * with any write cache flush, so this shouldn't be a concern.  In order
73bf215546Sopenharmony_ci       * to ensure a full stall, we do an end-of-pipe sync.
74bf215546Sopenharmony_ci       */
75bf215546Sopenharmony_ci      iris_emit_end_of_pipe_sync(batch, reason,
76bf215546Sopenharmony_ci                                 flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
77bf215546Sopenharmony_ci      flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
78bf215546Sopenharmony_ci   }
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, NULL, 0, 0);
81bf215546Sopenharmony_ci}
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci/**
84bf215546Sopenharmony_ci * Emit a PIPE_CONTROL that writes to a buffer object.
85bf215546Sopenharmony_ci *
86bf215546Sopenharmony_ci * \p flags should contain one of the following items:
87bf215546Sopenharmony_ci *  - PIPE_CONTROL_WRITE_IMMEDIATE
88bf215546Sopenharmony_ci *  - PIPE_CONTROL_WRITE_TIMESTAMP
89bf215546Sopenharmony_ci *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
90bf215546Sopenharmony_ci */
91bf215546Sopenharmony_civoid
92bf215546Sopenharmony_ciiris_emit_pipe_control_write(struct iris_batch *batch,
93bf215546Sopenharmony_ci                             const char *reason, uint32_t flags,
94bf215546Sopenharmony_ci                             struct iris_bo *bo, uint32_t offset,
95bf215546Sopenharmony_ci                             uint64_t imm)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci   batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, bo, offset, imm);
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci/*
101bf215546Sopenharmony_ci * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
102bf215546Sopenharmony_ci *
103bf215546Sopenharmony_ci *  Write synchronization is a special case of end-of-pipe
104bf215546Sopenharmony_ci *  synchronization that requires that the render cache and/or depth
105bf215546Sopenharmony_ci *  related caches are flushed to memory, where the data will become
106bf215546Sopenharmony_ci *  globally visible. This type of synchronization is required prior to
107bf215546Sopenharmony_ci *  SW (CPU) actually reading the result data from memory, or initiating
108bf215546Sopenharmony_ci *  an operation that will use as a read surface (such as a texture
109bf215546Sopenharmony_ci *  surface) a previous render target and/or depth/stencil buffer
110bf215546Sopenharmony_ci *
111bf215546Sopenharmony_ci * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
112bf215546Sopenharmony_ci *
113bf215546Sopenharmony_ci *  Exercising the write cache flush bits (Render Target Cache Flush
114bf215546Sopenharmony_ci *  Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
115bf215546Sopenharmony_ci *  ensures the write caches are flushed and doesn't guarantee the data
116bf215546Sopenharmony_ci *  is globally visible.
117bf215546Sopenharmony_ci *
118bf215546Sopenharmony_ci *  SW can track the completion of the end-of-pipe-synchronization by
119bf215546Sopenharmony_ci *  using "Notify Enable" and "PostSync Operation - Write Immediate
120bf215546Sopenharmony_ci *  Data" in the PIPE_CONTROL command.
121bf215546Sopenharmony_ci */
122bf215546Sopenharmony_civoid
123bf215546Sopenharmony_ciiris_emit_end_of_pipe_sync(struct iris_batch *batch,
124bf215546Sopenharmony_ci                           const char *reason, uint32_t flags)
125bf215546Sopenharmony_ci{
126bf215546Sopenharmony_ci   /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
127bf215546Sopenharmony_ci    *
128bf215546Sopenharmony_ci    *    "The most common action to perform upon reaching a synchronization
129bf215546Sopenharmony_ci    *    point is to write a value out to memory. An immediate value
130bf215546Sopenharmony_ci    *    (included with the synchronization command) may be written."
131bf215546Sopenharmony_ci    *
132bf215546Sopenharmony_ci    * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
133bf215546Sopenharmony_ci    *
134bf215546Sopenharmony_ci    *    "In case the data flushed out by the render engine is to be read
135bf215546Sopenharmony_ci    *    back in to the render engine in coherent manner, then the render
136bf215546Sopenharmony_ci    *    engine has to wait for the fence completion before accessing the
137bf215546Sopenharmony_ci    *    flushed data. This can be achieved by following means on various
138bf215546Sopenharmony_ci    *    products: PIPE_CONTROL command with CS Stall and the required
139bf215546Sopenharmony_ci    *    write caches flushed with Post-Sync-Operation as Write Immediate
140bf215546Sopenharmony_ci    *    Data.
141bf215546Sopenharmony_ci    *
142bf215546Sopenharmony_ci    *    Example:
143bf215546Sopenharmony_ci    *       - Workload-1 (3D/GPGPU/MEDIA)
144bf215546Sopenharmony_ci    *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
145bf215546Sopenharmony_ci    *         Data, Required Write Cache Flush bits set)
146bf215546Sopenharmony_ci    *       - Workload-2 (Can use the data produce or output by Workload-1)
147bf215546Sopenharmony_ci    */
148bf215546Sopenharmony_ci   iris_emit_pipe_control_write(batch, reason,
149bf215546Sopenharmony_ci                                flags | PIPE_CONTROL_CS_STALL |
150bf215546Sopenharmony_ci                                PIPE_CONTROL_WRITE_IMMEDIATE,
151bf215546Sopenharmony_ci                                batch->screen->workaround_address.bo,
152bf215546Sopenharmony_ci                                batch->screen->workaround_address.offset, 0);
153bf215546Sopenharmony_ci}
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci/**
156bf215546Sopenharmony_ci * Emits appropriate flushes and invalidations for any previous memory
157bf215546Sopenharmony_ci * operations on \p bo to be strictly ordered relative to any subsequent
158bf215546Sopenharmony_ci * memory operations performed from the caching domain \p access.
159bf215546Sopenharmony_ci *
160bf215546Sopenharmony_ci * This is useful because the GPU has separate incoherent caches for the
161bf215546Sopenharmony_ci * render target, sampler, etc., which need to be explicitly invalidated or
162bf215546Sopenharmony_ci * flushed in order to obtain the expected memory ordering in cases where the
163bf215546Sopenharmony_ci * same surface is accessed through multiple caches (e.g. due to
164bf215546Sopenharmony_ci * render-to-texture).
165bf215546Sopenharmony_ci *
166bf215546Sopenharmony_ci * This provides the expected memory ordering guarantees whether or not the
167bf215546Sopenharmony_ci * previous access was performed from the same batch or a different one, but
168bf215546Sopenharmony_ci * only the former case needs to be handled explicitly here, since the kernel
169bf215546Sopenharmony_ci * already inserts implicit flushes and synchronization in order to guarantee
170bf215546Sopenharmony_ci * that any data dependencies between batches are satisfied.
171bf215546Sopenharmony_ci *
172bf215546Sopenharmony_ci * Even though no flushing nor invalidation is required in order to account
173bf215546Sopenharmony_ci * for concurrent updates from other batches, we provide the guarantee that a
174bf215546Sopenharmony_ci * required synchronization operation due to a previous batch-local update
175bf215546Sopenharmony_ci * will never be omitted due to the influence of another thread accessing the
176bf215546Sopenharmony_ci * same buffer concurrently from the same caching domain: Such a concurrent
177bf215546Sopenharmony_ci * update will only ever change the seqno of the last update to a value
178bf215546Sopenharmony_ci * greater than the local value (see iris_bo_bump_seqno()), which means that
179bf215546Sopenharmony_ci * we will always emit at least as much flushing and invalidation as we would
180bf215546Sopenharmony_ci * have for the local seqno (see the coherent_seqnos comparisons below).
181bf215546Sopenharmony_ci */
182bf215546Sopenharmony_civoid
183bf215546Sopenharmony_ciiris_emit_buffer_barrier_for(struct iris_batch *batch,
184bf215546Sopenharmony_ci                             struct iris_bo *bo,
185bf215546Sopenharmony_ci                             enum iris_domain access)
186bf215546Sopenharmony_ci{
187bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &batch->screen->devinfo;
188bf215546Sopenharmony_ci   const struct brw_compiler *compiler = batch->screen->compiler;
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   const bool access_via_l3 = iris_domain_is_l3_coherent(devinfo, access);
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   const uint32_t all_flush_bits = (PIPE_CONTROL_CACHE_FLUSH_BITS |
193bf215546Sopenharmony_ci                                    PIPE_CONTROL_STALL_AT_SCOREBOARD |
194bf215546Sopenharmony_ci                                    PIPE_CONTROL_FLUSH_ENABLE);
195bf215546Sopenharmony_ci   const uint32_t flush_bits[NUM_IRIS_DOMAINS] = {
196bf215546Sopenharmony_ci      [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH,
197bf215546Sopenharmony_ci      [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH,
198bf215546Sopenharmony_ci      [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_FLUSH_HDC,
199bf215546Sopenharmony_ci      /* OTHER_WRITE includes "VF Cache Invalidate" to make sure that any
200bf215546Sopenharmony_ci       * stream output writes are finished.  CS stall is added implicitly.
201bf215546Sopenharmony_ci       */
202bf215546Sopenharmony_ci      [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_VF_CACHE_INVALIDATE,
203bf215546Sopenharmony_ci      [IRIS_DOMAIN_VF_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
204bf215546Sopenharmony_ci      [IRIS_DOMAIN_SAMPLER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
205bf215546Sopenharmony_ci      [IRIS_DOMAIN_PULL_CONSTANT_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
206bf215546Sopenharmony_ci      [IRIS_DOMAIN_OTHER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD,
207bf215546Sopenharmony_ci   };
208bf215546Sopenharmony_ci   const uint32_t invalidate_bits[NUM_IRIS_DOMAINS] = {
209bf215546Sopenharmony_ci      [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH,
210bf215546Sopenharmony_ci      [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH,
211bf215546Sopenharmony_ci      [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_FLUSH_HDC,
212bf215546Sopenharmony_ci      [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE,
213bf215546Sopenharmony_ci      [IRIS_DOMAIN_VF_READ] = PIPE_CONTROL_VF_CACHE_INVALIDATE,
214bf215546Sopenharmony_ci      [IRIS_DOMAIN_SAMPLER_READ] = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE,
215bf215546Sopenharmony_ci      [IRIS_DOMAIN_PULL_CONSTANT_READ] = PIPE_CONTROL_CONST_CACHE_INVALIDATE |
216bf215546Sopenharmony_ci         (compiler->indirect_ubos_use_sampler ?
217bf215546Sopenharmony_ci          PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE :
218bf215546Sopenharmony_ci          PIPE_CONTROL_DATA_CACHE_FLUSH),
219bf215546Sopenharmony_ci   };
220bf215546Sopenharmony_ci   const uint32_t l3_flush_bits[NUM_IRIS_DOMAINS] = {
221bf215546Sopenharmony_ci      [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH,
222bf215546Sopenharmony_ci      [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH,
223bf215546Sopenharmony_ci      [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_DATA_CACHE_FLUSH,
224bf215546Sopenharmony_ci   };
225bf215546Sopenharmony_ci   uint32_t bits = 0;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   /* Iterate over all read/write domains first in order to handle RaW
228bf215546Sopenharmony_ci    * and WaW dependencies, which might involve flushing the domain of
229bf215546Sopenharmony_ci    * the previous access and invalidating the specified domain.
230bf215546Sopenharmony_ci    */
231bf215546Sopenharmony_ci   for (unsigned i = 0; i < IRIS_DOMAIN_OTHER_WRITE; i++) {
232bf215546Sopenharmony_ci      assert(!iris_domain_is_read_only(i));
233bf215546Sopenharmony_ci      assert(iris_domain_is_l3_coherent(devinfo, i));
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci      if (i != access) {
236bf215546Sopenharmony_ci         const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]);
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci         /* Invalidate unless the most recent read/write access from
239bf215546Sopenharmony_ci          * this domain is already guaranteed to be visible to the
240bf215546Sopenharmony_ci          * specified domain.  Flush if the most recent access from
241bf215546Sopenharmony_ci          * this domain occurred after its most recent flush.
242bf215546Sopenharmony_ci          */
243bf215546Sopenharmony_ci         if (seqno > batch->coherent_seqnos[access][i]) {
244bf215546Sopenharmony_ci            bits |= invalidate_bits[access];
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci            if (access_via_l3) {
247bf215546Sopenharmony_ci               /* Both domains share L3.  If the most recent read/write access
248bf215546Sopenharmony_ci                * in domain `i' isn't visible to L3, then flush it to L3.
249bf215546Sopenharmony_ci                */
250bf215546Sopenharmony_ci               if (seqno > batch->l3_coherent_seqnos[i])
251bf215546Sopenharmony_ci                  bits |= flush_bits[i];
252bf215546Sopenharmony_ci            } else {
253bf215546Sopenharmony_ci               /* Domain `i` is L3 coherent but the specified domain is not.
254bf215546Sopenharmony_ci                * Flush both this cache and L3 out to memory.
255bf215546Sopenharmony_ci                */
256bf215546Sopenharmony_ci               if (seqno > batch->coherent_seqnos[i][i])
257bf215546Sopenharmony_ci                  bits |= flush_bits[i] | l3_flush_bits[i];
258bf215546Sopenharmony_ci            }
259bf215546Sopenharmony_ci         }
260bf215546Sopenharmony_ci      }
261bf215546Sopenharmony_ci   }
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci   /* All read-only domains can be considered mutually coherent since
264bf215546Sopenharmony_ci    * the order of read-only memory operations is immaterial.  If the
265bf215546Sopenharmony_ci    * specified domain is read/write we need to iterate over them too,
266bf215546Sopenharmony_ci    * in order to handle any WaR dependencies.
267bf215546Sopenharmony_ci    */
268bf215546Sopenharmony_ci   if (!iris_domain_is_read_only(access)) {
269bf215546Sopenharmony_ci      for (unsigned i = IRIS_DOMAIN_VF_READ; i < NUM_IRIS_DOMAINS; i++) {
270bf215546Sopenharmony_ci         assert(iris_domain_is_read_only(i));
271bf215546Sopenharmony_ci         const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]);
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci         const uint64_t last_visible_seqno =
274bf215546Sopenharmony_ci            iris_domain_is_l3_coherent(devinfo, i) ?
275bf215546Sopenharmony_ci            batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci         /* Flush if the most recent access from this domain occurred
278bf215546Sopenharmony_ci          * after its most recent flush.
279bf215546Sopenharmony_ci          */
280bf215546Sopenharmony_ci         if (seqno > last_visible_seqno)
281bf215546Sopenharmony_ci            bits |= flush_bits[i];
282bf215546Sopenharmony_ci      }
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   /* The IRIS_DOMAIN_OTHER_WRITE kitchen-sink domain cannot be
286bf215546Sopenharmony_ci    * considered coherent with itself since it's really a collection
287bf215546Sopenharmony_ci    * of multiple incoherent read/write domains, so we special-case it
288bf215546Sopenharmony_ci    * here.
289bf215546Sopenharmony_ci    */
290bf215546Sopenharmony_ci   const unsigned i = IRIS_DOMAIN_OTHER_WRITE;
291bf215546Sopenharmony_ci   const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   assert(!iris_domain_is_l3_coherent(devinfo, i));
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   /* Invalidate unless the most recent read/write access from this
296bf215546Sopenharmony_ci    * domain is already guaranteed to be visible to the specified
297bf215546Sopenharmony_ci    * domain.  Flush if the most recent access from this domain
298bf215546Sopenharmony_ci    * occurred after its most recent flush.
299bf215546Sopenharmony_ci    */
300bf215546Sopenharmony_ci   if (seqno > batch->coherent_seqnos[access][i]) {
301bf215546Sopenharmony_ci      bits |= invalidate_bits[access];
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci      /* There is a non-L3-coherent write that isn't visible to the
304bf215546Sopenharmony_ci       * specified domain.  If the access is via L3, then it might see
305bf215546Sopenharmony_ci       * stale L3 data that was loaded before that write.  In this case,
306bf215546Sopenharmony_ci       * we try to invalidate all read-only sections of the L3 cache.
307bf215546Sopenharmony_ci       */
308bf215546Sopenharmony_ci      if (access_via_l3 && seqno > batch->l3_coherent_seqnos[i])
309bf215546Sopenharmony_ci         bits |= PIPE_CONTROL_L3_RO_INVALIDATE_BITS;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci      if (seqno > batch->coherent_seqnos[i][i])
312bf215546Sopenharmony_ci         bits |= flush_bits[i];
313bf215546Sopenharmony_ci   }
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   if (bits) {
316bf215546Sopenharmony_ci      /* Stall-at-scoreboard is not expected to work in combination with other
317bf215546Sopenharmony_ci       * flush bits.
318bf215546Sopenharmony_ci       */
319bf215546Sopenharmony_ci      if (bits & PIPE_CONTROL_CACHE_FLUSH_BITS)
320bf215546Sopenharmony_ci         bits &= ~PIPE_CONTROL_STALL_AT_SCOREBOARD;
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci      /* Emit any required flushes and invalidations. */
323bf215546Sopenharmony_ci      if (bits & all_flush_bits)
324bf215546Sopenharmony_ci         iris_emit_end_of_pipe_sync(batch, "cache tracker: flush",
325bf215546Sopenharmony_ci                                    bits & all_flush_bits);
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci      if (bits & ~all_flush_bits)
328bf215546Sopenharmony_ci         iris_emit_pipe_control_flush(batch, "cache tracker: invalidate",
329bf215546Sopenharmony_ci                                      bits & ~all_flush_bits);
330bf215546Sopenharmony_ci   }
331bf215546Sopenharmony_ci}
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci/**
334bf215546Sopenharmony_ci * Flush and invalidate all caches (for debugging purposes).
335bf215546Sopenharmony_ci */
336bf215546Sopenharmony_civoid
337bf215546Sopenharmony_ciiris_flush_all_caches(struct iris_batch *batch)
338bf215546Sopenharmony_ci{
339bf215546Sopenharmony_ci   iris_emit_pipe_control_flush(batch, "debug: flush all caches",
340bf215546Sopenharmony_ci                                PIPE_CONTROL_CS_STALL |
341bf215546Sopenharmony_ci                                PIPE_CONTROL_DATA_CACHE_FLUSH |
342bf215546Sopenharmony_ci                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
343bf215546Sopenharmony_ci                                PIPE_CONTROL_RENDER_TARGET_FLUSH |
344bf215546Sopenharmony_ci                                PIPE_CONTROL_TILE_CACHE_FLUSH |
345bf215546Sopenharmony_ci                                PIPE_CONTROL_VF_CACHE_INVALIDATE |
346bf215546Sopenharmony_ci                                PIPE_CONTROL_INSTRUCTION_INVALIDATE |
347bf215546Sopenharmony_ci                                PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
348bf215546Sopenharmony_ci                                PIPE_CONTROL_CONST_CACHE_INVALIDATE |
349bf215546Sopenharmony_ci                                PIPE_CONTROL_STATE_CACHE_INVALIDATE);
350bf215546Sopenharmony_ci}
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_cistatic void
353bf215546Sopenharmony_ciiris_texture_barrier(struct pipe_context *ctx, unsigned flags)
354bf215546Sopenharmony_ci{
355bf215546Sopenharmony_ci   struct iris_context *ice = (void *) ctx;
356bf215546Sopenharmony_ci   struct iris_batch *render_batch = &ice->batches[IRIS_BATCH_RENDER];
357bf215546Sopenharmony_ci   struct iris_batch *compute_batch = &ice->batches[IRIS_BATCH_COMPUTE];
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   if (render_batch->contains_draw) {
360bf215546Sopenharmony_ci      iris_batch_maybe_flush(render_batch, 48);
361bf215546Sopenharmony_ci      iris_emit_pipe_control_flush(render_batch,
362bf215546Sopenharmony_ci                                   "API: texture barrier (1/2)",
363bf215546Sopenharmony_ci                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
364bf215546Sopenharmony_ci                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
365bf215546Sopenharmony_ci                                   PIPE_CONTROL_CS_STALL);
366bf215546Sopenharmony_ci      iris_emit_pipe_control_flush(render_batch,
367bf215546Sopenharmony_ci                                   "API: texture barrier (2/2)",
368bf215546Sopenharmony_ci                                   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
369bf215546Sopenharmony_ci   }
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   if (compute_batch->contains_draw) {
372bf215546Sopenharmony_ci      iris_batch_maybe_flush(compute_batch, 48);
373bf215546Sopenharmony_ci      iris_emit_pipe_control_flush(compute_batch,
374bf215546Sopenharmony_ci                                   "API: texture barrier (1/2)",
375bf215546Sopenharmony_ci                                   PIPE_CONTROL_CS_STALL);
376bf215546Sopenharmony_ci      iris_emit_pipe_control_flush(compute_batch,
377bf215546Sopenharmony_ci                                   "API: texture barrier (2/2)",
378bf215546Sopenharmony_ci                                   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
379bf215546Sopenharmony_ci   }
380bf215546Sopenharmony_ci}
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_cistatic void
383bf215546Sopenharmony_ciiris_memory_barrier(struct pipe_context *ctx, unsigned flags)
384bf215546Sopenharmony_ci{
385bf215546Sopenharmony_ci   struct iris_context *ice = (void *) ctx;
386bf215546Sopenharmony_ci   unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
389bf215546Sopenharmony_ci                PIPE_BARRIER_INDEX_BUFFER |
390bf215546Sopenharmony_ci                PIPE_BARRIER_INDIRECT_BUFFER)) {
391bf215546Sopenharmony_ci      bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
392bf215546Sopenharmony_ci   }
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci   if (flags & PIPE_BARRIER_CONSTANT_BUFFER) {
395bf215546Sopenharmony_ci      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
396bf215546Sopenharmony_ci              PIPE_CONTROL_CONST_CACHE_INVALIDATE;
397bf215546Sopenharmony_ci   }
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_FRAMEBUFFER)) {
400bf215546Sopenharmony_ci      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
401bf215546Sopenharmony_ci              PIPE_CONTROL_RENDER_TARGET_FLUSH |
402bf215546Sopenharmony_ci              PIPE_CONTROL_TILE_CACHE_FLUSH;
403bf215546Sopenharmony_ci   }
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   iris_foreach_batch(ice, batch) {
406bf215546Sopenharmony_ci      if (batch->contains_draw) {
407bf215546Sopenharmony_ci         iris_batch_maybe_flush(batch, 24);
408bf215546Sopenharmony_ci         iris_emit_pipe_control_flush(batch, "API: memory barrier", bits);
409bf215546Sopenharmony_ci      }
410bf215546Sopenharmony_ci   }
411bf215546Sopenharmony_ci}
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_civoid
414bf215546Sopenharmony_ciiris_init_flush_functions(struct pipe_context *ctx)
415bf215546Sopenharmony_ci{
416bf215546Sopenharmony_ci   ctx->memory_barrier = iris_memory_barrier;
417bf215546Sopenharmony_ci   ctx->texture_barrier = iris_texture_barrier;
418bf215546Sopenharmony_ci}
419