1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2017 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included 12bf215546Sopenharmony_ci * in all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci/** 24bf215546Sopenharmony_ci * @file iris_pipe_control.c 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci * PIPE_CONTROL is the main flushing and synchronization primitive on Intel 27bf215546Sopenharmony_ci * GPUs. It can invalidate caches, stall until rendering reaches various 28bf215546Sopenharmony_ci * stages of completion, write to memory, and other things. In a way, it's 29bf215546Sopenharmony_ci * a swiss army knife command - it has all kinds of capabilities, but some 30bf215546Sopenharmony_ci * significant limitations as well. 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * Unfortunately, it's notoriously complicated and difficult to use. Many 33bf215546Sopenharmony_ci * sub-commands can't be used together. Some are meant to be used at the 34bf215546Sopenharmony_ci * top of the pipeline (invalidating caches before drawing), while some are 35bf215546Sopenharmony_ci * meant to be used at the end (stalling or flushing after drawing). 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * Also, there's a list of restrictions a mile long, which vary by generation. 38bf215546Sopenharmony_ci * Do this before doing that, or suffer the consequences (usually a GPU hang). 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * This file contains helpers for emitting them safely. You can simply call 41bf215546Sopenharmony_ci * iris_emit_pipe_control_flush() with the desired operations (as logical 42bf215546Sopenharmony_ci * PIPE_CONTROL_* bits), and it will take care of splitting it into multiple 43bf215546Sopenharmony_ci * PIPE_CONTROL commands as necessary. The per-generation workarounds are 44bf215546Sopenharmony_ci * applied in iris_emit_raw_pipe_control() in iris_state.c. 45bf215546Sopenharmony_ci */ 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci#include "iris_context.h" 48bf215546Sopenharmony_ci#include "util/hash_table.h" 49bf215546Sopenharmony_ci#include "util/set.h" 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci/** 52bf215546Sopenharmony_ci * Emit a PIPE_CONTROL with various flushing flags. 53bf215546Sopenharmony_ci * 54bf215546Sopenharmony_ci * The caller is responsible for deciding what flags are appropriate for the 55bf215546Sopenharmony_ci * given generation. 56bf215546Sopenharmony_ci */ 57bf215546Sopenharmony_civoid 58bf215546Sopenharmony_ciiris_emit_pipe_control_flush(struct iris_batch *batch, 59bf215546Sopenharmony_ci const char *reason, 60bf215546Sopenharmony_ci uint32_t flags) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && 63bf215546Sopenharmony_ci (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { 64bf215546Sopenharmony_ci /* A pipe control command with flush and invalidate bits set 65bf215546Sopenharmony_ci * simultaneously is an inherently racy operation on Gfx6+ if the 66bf215546Sopenharmony_ci * contents of the flushed caches were intended to become visible from 67bf215546Sopenharmony_ci * any of the invalidated caches. Split it in two PIPE_CONTROLs, the 68bf215546Sopenharmony_ci * first one should stall the pipeline to make sure that the flushed R/W 69bf215546Sopenharmony_ci * caches are coherent with memory once the specified R/O caches are 70bf215546Sopenharmony_ci * invalidated. On pre-Gfx6 hardware the (implicit) R/O cache 71bf215546Sopenharmony_ci * invalidation seems to happen at the bottom of the pipeline together 72bf215546Sopenharmony_ci * with any write cache flush, so this shouldn't be a concern. In order 73bf215546Sopenharmony_ci * to ensure a full stall, we do an end-of-pipe sync. 74bf215546Sopenharmony_ci */ 75bf215546Sopenharmony_ci iris_emit_end_of_pipe_sync(batch, reason, 76bf215546Sopenharmony_ci flags & PIPE_CONTROL_CACHE_FLUSH_BITS); 77bf215546Sopenharmony_ci flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, NULL, 0, 0); 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci/** 84bf215546Sopenharmony_ci * Emit a PIPE_CONTROL that writes to a buffer object. 85bf215546Sopenharmony_ci * 86bf215546Sopenharmony_ci * \p flags should contain one of the following items: 87bf215546Sopenharmony_ci * - PIPE_CONTROL_WRITE_IMMEDIATE 88bf215546Sopenharmony_ci * - PIPE_CONTROL_WRITE_TIMESTAMP 89bf215546Sopenharmony_ci * - PIPE_CONTROL_WRITE_DEPTH_COUNT 90bf215546Sopenharmony_ci */ 91bf215546Sopenharmony_civoid 92bf215546Sopenharmony_ciiris_emit_pipe_control_write(struct iris_batch *batch, 93bf215546Sopenharmony_ci const char *reason, uint32_t flags, 94bf215546Sopenharmony_ci struct iris_bo *bo, uint32_t offset, 95bf215546Sopenharmony_ci uint64_t imm) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, bo, offset, imm); 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci/* 101bf215546Sopenharmony_ci * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization": 102bf215546Sopenharmony_ci * 103bf215546Sopenharmony_ci * Write synchronization is a special case of end-of-pipe 104bf215546Sopenharmony_ci * synchronization that requires that the render cache and/or depth 105bf215546Sopenharmony_ci * related caches are flushed to memory, where the data will become 106bf215546Sopenharmony_ci * globally visible. This type of synchronization is required prior to 107bf215546Sopenharmony_ci * SW (CPU) actually reading the result data from memory, or initiating 108bf215546Sopenharmony_ci * an operation that will use as a read surface (such as a texture 109bf215546Sopenharmony_ci * surface) a previous render target and/or depth/stencil buffer 110bf215546Sopenharmony_ci * 111bf215546Sopenharmony_ci * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization": 112bf215546Sopenharmony_ci * 113bf215546Sopenharmony_ci * Exercising the write cache flush bits (Render Target Cache Flush 114bf215546Sopenharmony_ci * Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only 115bf215546Sopenharmony_ci * ensures the write caches are flushed and doesn't guarantee the data 116bf215546Sopenharmony_ci * is globally visible. 117bf215546Sopenharmony_ci * 118bf215546Sopenharmony_ci * SW can track the completion of the end-of-pipe-synchronization by 119bf215546Sopenharmony_ci * using "Notify Enable" and "PostSync Operation - Write Immediate 120bf215546Sopenharmony_ci * Data" in the PIPE_CONTROL command. 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_civoid 123bf215546Sopenharmony_ciiris_emit_end_of_pipe_sync(struct iris_batch *batch, 124bf215546Sopenharmony_ci const char *reason, uint32_t flags) 125bf215546Sopenharmony_ci{ 126bf215546Sopenharmony_ci /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory": 127bf215546Sopenharmony_ci * 128bf215546Sopenharmony_ci * "The most common action to perform upon reaching a synchronization 129bf215546Sopenharmony_ci * point is to write a value out to memory. An immediate value 130bf215546Sopenharmony_ci * (included with the synchronization command) may be written." 131bf215546Sopenharmony_ci * 132bf215546Sopenharmony_ci * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization": 133bf215546Sopenharmony_ci * 134bf215546Sopenharmony_ci * "In case the data flushed out by the render engine is to be read 135bf215546Sopenharmony_ci * back in to the render engine in coherent manner, then the render 136bf215546Sopenharmony_ci * engine has to wait for the fence completion before accessing the 137bf215546Sopenharmony_ci * flushed data. This can be achieved by following means on various 138bf215546Sopenharmony_ci * products: PIPE_CONTROL command with CS Stall and the required 139bf215546Sopenharmony_ci * write caches flushed with Post-Sync-Operation as Write Immediate 140bf215546Sopenharmony_ci * Data. 141bf215546Sopenharmony_ci * 142bf215546Sopenharmony_ci * Example: 143bf215546Sopenharmony_ci * - Workload-1 (3D/GPGPU/MEDIA) 144bf215546Sopenharmony_ci * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate 145bf215546Sopenharmony_ci * Data, Required Write Cache Flush bits set) 146bf215546Sopenharmony_ci * - Workload-2 (Can use the data produce or output by Workload-1) 147bf215546Sopenharmony_ci */ 148bf215546Sopenharmony_ci iris_emit_pipe_control_write(batch, reason, 149bf215546Sopenharmony_ci flags | PIPE_CONTROL_CS_STALL | 150bf215546Sopenharmony_ci PIPE_CONTROL_WRITE_IMMEDIATE, 151bf215546Sopenharmony_ci batch->screen->workaround_address.bo, 152bf215546Sopenharmony_ci batch->screen->workaround_address.offset, 0); 153bf215546Sopenharmony_ci} 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci/** 156bf215546Sopenharmony_ci * Emits appropriate flushes and invalidations for any previous memory 157bf215546Sopenharmony_ci * operations on \p bo to be strictly ordered relative to any subsequent 158bf215546Sopenharmony_ci * memory operations performed from the caching domain \p access. 159bf215546Sopenharmony_ci * 160bf215546Sopenharmony_ci * This is useful because the GPU has separate incoherent caches for the 161bf215546Sopenharmony_ci * render target, sampler, etc., which need to be explicitly invalidated or 162bf215546Sopenharmony_ci * flushed in order to obtain the expected memory ordering in cases where the 163bf215546Sopenharmony_ci * same surface is accessed through multiple caches (e.g. due to 164bf215546Sopenharmony_ci * render-to-texture). 165bf215546Sopenharmony_ci * 166bf215546Sopenharmony_ci * This provides the expected memory ordering guarantees whether or not the 167bf215546Sopenharmony_ci * previous access was performed from the same batch or a different one, but 168bf215546Sopenharmony_ci * only the former case needs to be handled explicitly here, since the kernel 169bf215546Sopenharmony_ci * already inserts implicit flushes and synchronization in order to guarantee 170bf215546Sopenharmony_ci * that any data dependencies between batches are satisfied. 171bf215546Sopenharmony_ci * 172bf215546Sopenharmony_ci * Even though no flushing nor invalidation is required in order to account 173bf215546Sopenharmony_ci * for concurrent updates from other batches, we provide the guarantee that a 174bf215546Sopenharmony_ci * required synchronization operation due to a previous batch-local update 175bf215546Sopenharmony_ci * will never be omitted due to the influence of another thread accessing the 176bf215546Sopenharmony_ci * same buffer concurrently from the same caching domain: Such a concurrent 177bf215546Sopenharmony_ci * update will only ever change the seqno of the last update to a value 178bf215546Sopenharmony_ci * greater than the local value (see iris_bo_bump_seqno()), which means that 179bf215546Sopenharmony_ci * we will always emit at least as much flushing and invalidation as we would 180bf215546Sopenharmony_ci * have for the local seqno (see the coherent_seqnos comparisons below). 181bf215546Sopenharmony_ci */ 182bf215546Sopenharmony_civoid 183bf215546Sopenharmony_ciiris_emit_buffer_barrier_for(struct iris_batch *batch, 184bf215546Sopenharmony_ci struct iris_bo *bo, 185bf215546Sopenharmony_ci enum iris_domain access) 186bf215546Sopenharmony_ci{ 187bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &batch->screen->devinfo; 188bf215546Sopenharmony_ci const struct brw_compiler *compiler = batch->screen->compiler; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci const bool access_via_l3 = iris_domain_is_l3_coherent(devinfo, access); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci const uint32_t all_flush_bits = (PIPE_CONTROL_CACHE_FLUSH_BITS | 193bf215546Sopenharmony_ci PIPE_CONTROL_STALL_AT_SCOREBOARD | 194bf215546Sopenharmony_ci PIPE_CONTROL_FLUSH_ENABLE); 195bf215546Sopenharmony_ci const uint32_t flush_bits[NUM_IRIS_DOMAINS] = { 196bf215546Sopenharmony_ci [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH, 197bf215546Sopenharmony_ci [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH, 198bf215546Sopenharmony_ci [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_FLUSH_HDC, 199bf215546Sopenharmony_ci /* OTHER_WRITE includes "VF Cache Invalidate" to make sure that any 200bf215546Sopenharmony_ci * stream output writes are finished. CS stall is added implicitly. 201bf215546Sopenharmony_ci */ 202bf215546Sopenharmony_ci [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_VF_CACHE_INVALIDATE, 203bf215546Sopenharmony_ci [IRIS_DOMAIN_VF_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD, 204bf215546Sopenharmony_ci [IRIS_DOMAIN_SAMPLER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD, 205bf215546Sopenharmony_ci [IRIS_DOMAIN_PULL_CONSTANT_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD, 206bf215546Sopenharmony_ci [IRIS_DOMAIN_OTHER_READ] = PIPE_CONTROL_STALL_AT_SCOREBOARD, 207bf215546Sopenharmony_ci }; 208bf215546Sopenharmony_ci const uint32_t invalidate_bits[NUM_IRIS_DOMAINS] = { 209bf215546Sopenharmony_ci [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_RENDER_TARGET_FLUSH, 210bf215546Sopenharmony_ci [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_DEPTH_CACHE_FLUSH, 211bf215546Sopenharmony_ci [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_FLUSH_HDC, 212bf215546Sopenharmony_ci [IRIS_DOMAIN_OTHER_WRITE] = PIPE_CONTROL_FLUSH_ENABLE, 213bf215546Sopenharmony_ci [IRIS_DOMAIN_VF_READ] = PIPE_CONTROL_VF_CACHE_INVALIDATE, 214bf215546Sopenharmony_ci [IRIS_DOMAIN_SAMPLER_READ] = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE, 215bf215546Sopenharmony_ci [IRIS_DOMAIN_PULL_CONSTANT_READ] = PIPE_CONTROL_CONST_CACHE_INVALIDATE | 216bf215546Sopenharmony_ci (compiler->indirect_ubos_use_sampler ? 217bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE : 218bf215546Sopenharmony_ci PIPE_CONTROL_DATA_CACHE_FLUSH), 219bf215546Sopenharmony_ci }; 220bf215546Sopenharmony_ci const uint32_t l3_flush_bits[NUM_IRIS_DOMAINS] = { 221bf215546Sopenharmony_ci [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH, 222bf215546Sopenharmony_ci [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH, 223bf215546Sopenharmony_ci [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_DATA_CACHE_FLUSH, 224bf215546Sopenharmony_ci }; 225bf215546Sopenharmony_ci uint32_t bits = 0; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* Iterate over all read/write domains first in order to handle RaW 228bf215546Sopenharmony_ci * and WaW dependencies, which might involve flushing the domain of 229bf215546Sopenharmony_ci * the previous access and invalidating the specified domain. 230bf215546Sopenharmony_ci */ 231bf215546Sopenharmony_ci for (unsigned i = 0; i < IRIS_DOMAIN_OTHER_WRITE; i++) { 232bf215546Sopenharmony_ci assert(!iris_domain_is_read_only(i)); 233bf215546Sopenharmony_ci assert(iris_domain_is_l3_coherent(devinfo, i)); 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci if (i != access) { 236bf215546Sopenharmony_ci const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci /* Invalidate unless the most recent read/write access from 239bf215546Sopenharmony_ci * this domain is already guaranteed to be visible to the 240bf215546Sopenharmony_ci * specified domain. Flush if the most recent access from 241bf215546Sopenharmony_ci * this domain occurred after its most recent flush. 242bf215546Sopenharmony_ci */ 243bf215546Sopenharmony_ci if (seqno > batch->coherent_seqnos[access][i]) { 244bf215546Sopenharmony_ci bits |= invalidate_bits[access]; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci if (access_via_l3) { 247bf215546Sopenharmony_ci /* Both domains share L3. If the most recent read/write access 248bf215546Sopenharmony_ci * in domain `i' isn't visible to L3, then flush it to L3. 249bf215546Sopenharmony_ci */ 250bf215546Sopenharmony_ci if (seqno > batch->l3_coherent_seqnos[i]) 251bf215546Sopenharmony_ci bits |= flush_bits[i]; 252bf215546Sopenharmony_ci } else { 253bf215546Sopenharmony_ci /* Domain `i` is L3 coherent but the specified domain is not. 254bf215546Sopenharmony_ci * Flush both this cache and L3 out to memory. 255bf215546Sopenharmony_ci */ 256bf215546Sopenharmony_ci if (seqno > batch->coherent_seqnos[i][i]) 257bf215546Sopenharmony_ci bits |= flush_bits[i] | l3_flush_bits[i]; 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci /* All read-only domains can be considered mutually coherent since 264bf215546Sopenharmony_ci * the order of read-only memory operations is immaterial. If the 265bf215546Sopenharmony_ci * specified domain is read/write we need to iterate over them too, 266bf215546Sopenharmony_ci * in order to handle any WaR dependencies. 267bf215546Sopenharmony_ci */ 268bf215546Sopenharmony_ci if (!iris_domain_is_read_only(access)) { 269bf215546Sopenharmony_ci for (unsigned i = IRIS_DOMAIN_VF_READ; i < NUM_IRIS_DOMAINS; i++) { 270bf215546Sopenharmony_ci assert(iris_domain_is_read_only(i)); 271bf215546Sopenharmony_ci const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci const uint64_t last_visible_seqno = 274bf215546Sopenharmony_ci iris_domain_is_l3_coherent(devinfo, i) ? 275bf215546Sopenharmony_ci batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i]; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci /* Flush if the most recent access from this domain occurred 278bf215546Sopenharmony_ci * after its most recent flush. 279bf215546Sopenharmony_ci */ 280bf215546Sopenharmony_ci if (seqno > last_visible_seqno) 281bf215546Sopenharmony_ci bits |= flush_bits[i]; 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci /* The IRIS_DOMAIN_OTHER_WRITE kitchen-sink domain cannot be 286bf215546Sopenharmony_ci * considered coherent with itself since it's really a collection 287bf215546Sopenharmony_ci * of multiple incoherent read/write domains, so we special-case it 288bf215546Sopenharmony_ci * here. 289bf215546Sopenharmony_ci */ 290bf215546Sopenharmony_ci const unsigned i = IRIS_DOMAIN_OTHER_WRITE; 291bf215546Sopenharmony_ci const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci assert(!iris_domain_is_l3_coherent(devinfo, i)); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci /* Invalidate unless the most recent read/write access from this 296bf215546Sopenharmony_ci * domain is already guaranteed to be visible to the specified 297bf215546Sopenharmony_ci * domain. Flush if the most recent access from this domain 298bf215546Sopenharmony_ci * occurred after its most recent flush. 299bf215546Sopenharmony_ci */ 300bf215546Sopenharmony_ci if (seqno > batch->coherent_seqnos[access][i]) { 301bf215546Sopenharmony_ci bits |= invalidate_bits[access]; 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci /* There is a non-L3-coherent write that isn't visible to the 304bf215546Sopenharmony_ci * specified domain. If the access is via L3, then it might see 305bf215546Sopenharmony_ci * stale L3 data that was loaded before that write. In this case, 306bf215546Sopenharmony_ci * we try to invalidate all read-only sections of the L3 cache. 307bf215546Sopenharmony_ci */ 308bf215546Sopenharmony_ci if (access_via_l3 && seqno > batch->l3_coherent_seqnos[i]) 309bf215546Sopenharmony_ci bits |= PIPE_CONTROL_L3_RO_INVALIDATE_BITS; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci if (seqno > batch->coherent_seqnos[i][i]) 312bf215546Sopenharmony_ci bits |= flush_bits[i]; 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci if (bits) { 316bf215546Sopenharmony_ci /* Stall-at-scoreboard is not expected to work in combination with other 317bf215546Sopenharmony_ci * flush bits. 318bf215546Sopenharmony_ci */ 319bf215546Sopenharmony_ci if (bits & PIPE_CONTROL_CACHE_FLUSH_BITS) 320bf215546Sopenharmony_ci bits &= ~PIPE_CONTROL_STALL_AT_SCOREBOARD; 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci /* Emit any required flushes and invalidations. */ 323bf215546Sopenharmony_ci if (bits & all_flush_bits) 324bf215546Sopenharmony_ci iris_emit_end_of_pipe_sync(batch, "cache tracker: flush", 325bf215546Sopenharmony_ci bits & all_flush_bits); 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci if (bits & ~all_flush_bits) 328bf215546Sopenharmony_ci iris_emit_pipe_control_flush(batch, "cache tracker: invalidate", 329bf215546Sopenharmony_ci bits & ~all_flush_bits); 330bf215546Sopenharmony_ci } 331bf215546Sopenharmony_ci} 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci/** 334bf215546Sopenharmony_ci * Flush and invalidate all caches (for debugging purposes). 335bf215546Sopenharmony_ci */ 336bf215546Sopenharmony_civoid 337bf215546Sopenharmony_ciiris_flush_all_caches(struct iris_batch *batch) 338bf215546Sopenharmony_ci{ 339bf215546Sopenharmony_ci iris_emit_pipe_control_flush(batch, "debug: flush all caches", 340bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL | 341bf215546Sopenharmony_ci PIPE_CONTROL_DATA_CACHE_FLUSH | 342bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 343bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH | 344bf215546Sopenharmony_ci PIPE_CONTROL_TILE_CACHE_FLUSH | 345bf215546Sopenharmony_ci PIPE_CONTROL_VF_CACHE_INVALIDATE | 346bf215546Sopenharmony_ci PIPE_CONTROL_INSTRUCTION_INVALIDATE | 347bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 348bf215546Sopenharmony_ci PIPE_CONTROL_CONST_CACHE_INVALIDATE | 349bf215546Sopenharmony_ci PIPE_CONTROL_STATE_CACHE_INVALIDATE); 350bf215546Sopenharmony_ci} 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_cistatic void 353bf215546Sopenharmony_ciiris_texture_barrier(struct pipe_context *ctx, unsigned flags) 354bf215546Sopenharmony_ci{ 355bf215546Sopenharmony_ci struct iris_context *ice = (void *) ctx; 356bf215546Sopenharmony_ci struct iris_batch *render_batch = &ice->batches[IRIS_BATCH_RENDER]; 357bf215546Sopenharmony_ci struct iris_batch *compute_batch = &ice->batches[IRIS_BATCH_COMPUTE]; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci if (render_batch->contains_draw) { 360bf215546Sopenharmony_ci iris_batch_maybe_flush(render_batch, 48); 361bf215546Sopenharmony_ci iris_emit_pipe_control_flush(render_batch, 362bf215546Sopenharmony_ci "API: texture barrier (1/2)", 363bf215546Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 364bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH | 365bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 366bf215546Sopenharmony_ci iris_emit_pipe_control_flush(render_batch, 367bf215546Sopenharmony_ci "API: texture barrier (2/2)", 368bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 369bf215546Sopenharmony_ci } 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (compute_batch->contains_draw) { 372bf215546Sopenharmony_ci iris_batch_maybe_flush(compute_batch, 48); 373bf215546Sopenharmony_ci iris_emit_pipe_control_flush(compute_batch, 374bf215546Sopenharmony_ci "API: texture barrier (1/2)", 375bf215546Sopenharmony_ci PIPE_CONTROL_CS_STALL); 376bf215546Sopenharmony_ci iris_emit_pipe_control_flush(compute_batch, 377bf215546Sopenharmony_ci "API: texture barrier (2/2)", 378bf215546Sopenharmony_ci PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci} 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_cistatic void 383bf215546Sopenharmony_ciiris_memory_barrier(struct pipe_context *ctx, unsigned flags) 384bf215546Sopenharmony_ci{ 385bf215546Sopenharmony_ci struct iris_context *ice = (void *) ctx; 386bf215546Sopenharmony_ci unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 389bf215546Sopenharmony_ci PIPE_BARRIER_INDEX_BUFFER | 390bf215546Sopenharmony_ci PIPE_BARRIER_INDIRECT_BUFFER)) { 391bf215546Sopenharmony_ci bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 392bf215546Sopenharmony_ci } 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci if (flags & PIPE_BARRIER_CONSTANT_BUFFER) { 395bf215546Sopenharmony_ci bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 396bf215546Sopenharmony_ci PIPE_CONTROL_CONST_CACHE_INVALIDATE; 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_FRAMEBUFFER)) { 400bf215546Sopenharmony_ci bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 401bf215546Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_FLUSH | 402bf215546Sopenharmony_ci PIPE_CONTROL_TILE_CACHE_FLUSH; 403bf215546Sopenharmony_ci } 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci iris_foreach_batch(ice, batch) { 406bf215546Sopenharmony_ci if (batch->contains_draw) { 407bf215546Sopenharmony_ci iris_batch_maybe_flush(batch, 24); 408bf215546Sopenharmony_ci iris_emit_pipe_control_flush(batch, "API: memory barrier", bits); 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci} 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_civoid 414bf215546Sopenharmony_ciiris_init_flush_functions(struct pipe_context *ctx) 415bf215546Sopenharmony_ci{ 416bf215546Sopenharmony_ci ctx->memory_barrier = iris_memory_barrier; 417bf215546Sopenharmony_ci ctx->texture_barrier = iris_texture_barrier; 418bf215546Sopenharmony_ci} 419