1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 10bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 11bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "si_build_pm4.h" 27bf215546Sopenharmony_ci#include "si_pipe.h" 28bf215546Sopenharmony_ci#include "sid.h" 29bf215546Sopenharmony_ci#include "util/os_time.h" 30bf215546Sopenharmony_ci#include "util/u_log.h" 31bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 32bf215546Sopenharmony_ci#include "ac_debug.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_civoid si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &ctx->gfx_cs; 37bf215546Sopenharmony_ci struct radeon_winsys *ws = ctx->ws; 38bf215546Sopenharmony_ci struct si_screen *sscreen = ctx->screen; 39bf215546Sopenharmony_ci const unsigned wait_ps_cs = SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; 40bf215546Sopenharmony_ci unsigned wait_flags = 0; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci if (ctx->gfx_flush_in_progress) 43bf215546Sopenharmony_ci return; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci /* The amdgpu kernel driver synchronizes execution for shared DMABUFs between 46bf215546Sopenharmony_ci * processes on DRM >= 3.39.0, so we don't have to wait at the end of IBs to 47bf215546Sopenharmony_ci * make sure everything is idle. 48bf215546Sopenharmony_ci * 49bf215546Sopenharmony_ci * The amdgpu winsys synchronizes execution for buffers shared by different 50bf215546Sopenharmony_ci * contexts within the same process. 51bf215546Sopenharmony_ci * 52bf215546Sopenharmony_ci * Interop with AMDVLK, RADV, or OpenCL within the same process requires 53bf215546Sopenharmony_ci * explicit fences or glFinish. 54bf215546Sopenharmony_ci */ 55bf215546Sopenharmony_ci if (sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 39) 56bf215546Sopenharmony_ci flags |= RADEON_FLUSH_START_NEXT_GFX_IB_NOW; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci if (ctx->gfx_level == GFX6) { 59bf215546Sopenharmony_ci /* The kernel flushes L2 before shaders are finished. */ 60bf215546Sopenharmony_ci wait_flags |= wait_ps_cs; 61bf215546Sopenharmony_ci } else if (!(flags & RADEON_FLUSH_START_NEXT_GFX_IB_NOW) || 62bf215546Sopenharmony_ci ((flags & RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION) && 63bf215546Sopenharmony_ci !ws->cs_is_secure(cs))) { 64bf215546Sopenharmony_ci /* TODO: this workaround fixes subtitles rendering with mpv -vo=vaapi and 65bf215546Sopenharmony_ci * tmz but shouldn't be necessary. 66bf215546Sopenharmony_ci */ 67bf215546Sopenharmony_ci wait_flags |= wait_ps_cs; 68bf215546Sopenharmony_ci } 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* Drop this flush if it's a no-op. */ 71bf215546Sopenharmony_ci if (!radeon_emitted(cs, ctx->initial_gfx_cs_size) && 72bf215546Sopenharmony_ci (!wait_flags || !ctx->gfx_last_ib_is_busy) && 73bf215546Sopenharmony_ci !(flags & RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION)) { 74bf215546Sopenharmony_ci tc_driver_internal_flush_notify(ctx->tc); 75bf215546Sopenharmony_ci return; 76bf215546Sopenharmony_ci } 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci /* Non-aux contexts must set up no-op API dispatch on GPU resets. This is 79bf215546Sopenharmony_ci * similar to si_get_reset_status but here we can ignore soft-recoveries, 80bf215546Sopenharmony_ci * while si_get_reset_status can't. */ 81bf215546Sopenharmony_ci if (!(ctx->context_flags & SI_CONTEXT_FLAG_AUX) && 82bf215546Sopenharmony_ci ctx->device_reset_callback.reset) { 83bf215546Sopenharmony_ci enum pipe_reset_status status = ctx->ws->ctx_query_reset_status(ctx->ctx, true, NULL); 84bf215546Sopenharmony_ci if (status != PIPE_NO_RESET) 85bf215546Sopenharmony_ci ctx->device_reset_callback.reset(ctx->device_reset_callback.data, status); 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci if (sscreen->debug_flags & DBG(CHECK_VM)) 89bf215546Sopenharmony_ci flags &= ~PIPE_FLUSH_ASYNC; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci ctx->gfx_flush_in_progress = true; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci if (ctx->has_graphics) { 94bf215546Sopenharmony_ci if (!list_is_empty(&ctx->active_queries)) 95bf215546Sopenharmony_ci si_suspend_queries(ctx); 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci ctx->streamout.suspended = false; 98bf215546Sopenharmony_ci if (ctx->streamout.begin_emitted) { 99bf215546Sopenharmony_ci si_emit_streamout_end(ctx); 100bf215546Sopenharmony_ci ctx->streamout.suspended = true; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci /* Since NGG streamout uses GDS, we need to make GDS 103bf215546Sopenharmony_ci * idle when we leave the IB, otherwise another process 104bf215546Sopenharmony_ci * might overwrite it while our shaders are busy. 105bf215546Sopenharmony_ci */ 106bf215546Sopenharmony_ci if (sscreen->use_ngg_streamout) 107bf215546Sopenharmony_ci wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; 108bf215546Sopenharmony_ci } 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci /* Make sure CP DMA is idle at the end of IBs after L2 prefetches 112bf215546Sopenharmony_ci * because the kernel doesn't wait for it. */ 113bf215546Sopenharmony_ci if (ctx->gfx_level >= GFX7) 114bf215546Sopenharmony_ci si_cp_dma_wait_for_idle(ctx, &ctx->gfx_cs); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci /* If we use s_sendmsg to set tess factors to all 0 or all 1 instead of writing to the tess 117bf215546Sopenharmony_ci * factor buffer, we need this at the end of command buffers: 118bf215546Sopenharmony_ci */ 119bf215546Sopenharmony_ci if (ctx->gfx_level == GFX11 && ctx->tess_rings) { 120bf215546Sopenharmony_ci radeon_begin(cs); 121bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 122bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); 123bf215546Sopenharmony_ci radeon_end(); 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci /* Wait for draw calls to finish if needed. */ 127bf215546Sopenharmony_ci if (wait_flags) { 128bf215546Sopenharmony_ci ctx->flags |= wait_flags; 129bf215546Sopenharmony_ci ctx->emit_cache_flush(ctx, &ctx->gfx_cs); 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci ctx->gfx_last_ib_is_busy = (wait_flags & wait_ps_cs) != wait_ps_cs; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci if (ctx->current_saved_cs) { 134bf215546Sopenharmony_ci si_trace_emit(ctx); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci /* Save the IB for debug contexts. */ 137bf215546Sopenharmony_ci si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true); 138bf215546Sopenharmony_ci ctx->current_saved_cs->flushed = true; 139bf215546Sopenharmony_ci ctx->current_saved_cs->time_flush = os_time_get_nano(); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci si_log_hw_flush(ctx); 142bf215546Sopenharmony_ci } 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci if (sscreen->debug_flags & DBG(IB)) 145bf215546Sopenharmony_ci si_print_current_ib(ctx, stderr); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci if (ctx->is_noop) 148bf215546Sopenharmony_ci flags |= RADEON_FLUSH_NOOP; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci /* Flush the CS. */ 151bf215546Sopenharmony_ci ws->cs_flush(cs, flags, &ctx->last_gfx_fence); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci tc_driver_internal_flush_notify(ctx->tc); 154bf215546Sopenharmony_ci if (fence) 155bf215546Sopenharmony_ci ws->fence_reference(fence, ctx->last_gfx_fence); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci ctx->num_gfx_cs_flushes++; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci /* Check VM faults if needed. */ 160bf215546Sopenharmony_ci if (sscreen->debug_flags & DBG(CHECK_VM)) { 161bf215546Sopenharmony_ci /* Use conservative timeout 800ms, after which we won't wait any 162bf215546Sopenharmony_ci * longer and assume the GPU is hung. 163bf215546Sopenharmony_ci */ 164bf215546Sopenharmony_ci ctx->ws->fence_wait(ctx->ws, ctx->last_gfx_fence, 800 * 1000 * 1000); 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci si_check_vm_faults(ctx, &ctx->current_saved_cs->gfx, AMD_IP_GFX); 167bf215546Sopenharmony_ci } 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci if (unlikely(ctx->thread_trace && 170bf215546Sopenharmony_ci (flags & PIPE_FLUSH_END_OF_FRAME))) { 171bf215546Sopenharmony_ci si_handle_thread_trace(ctx, &ctx->gfx_cs); 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci if (ctx->current_saved_cs) 175bf215546Sopenharmony_ci si_saved_cs_reference(&ctx->current_saved_cs, NULL); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci si_begin_new_gfx_cs(ctx, false); 178bf215546Sopenharmony_ci ctx->gfx_flush_in_progress = false; 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_cistatic void si_begin_gfx_cs_debug(struct si_context *ctx) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci static const uint32_t zeros[1]; 184bf215546Sopenharmony_ci assert(!ctx->current_saved_cs); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci ctx->current_saved_cs = calloc(1, sizeof(*ctx->current_saved_cs)); 187bf215546Sopenharmony_ci if (!ctx->current_saved_cs) 188bf215546Sopenharmony_ci return; 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci pipe_reference_init(&ctx->current_saved_cs->reference, 1); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci ctx->current_saved_cs->trace_buf = 193bf215546Sopenharmony_ci si_resource(pipe_buffer_create(ctx->b.screen, 0, PIPE_USAGE_STAGING, 4)); 194bf215546Sopenharmony_ci if (!ctx->current_saved_cs->trace_buf) { 195bf215546Sopenharmony_ci free(ctx->current_saved_cs); 196bf215546Sopenharmony_ci ctx->current_saved_cs = NULL; 197bf215546Sopenharmony_ci return; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci pipe_buffer_write_nooverlap(&ctx->b, &ctx->current_saved_cs->trace_buf->b.b, 0, sizeof(zeros), 201bf215546Sopenharmony_ci zeros); 202bf215546Sopenharmony_ci ctx->current_saved_cs->trace_id = 0; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci si_trace_emit(ctx); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->current_saved_cs->trace_buf, 207bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | RADEON_PRIO_FENCE_TRACE); 208bf215546Sopenharmony_ci} 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_cistatic void si_add_gds_to_buffer_list(struct si_context *sctx) 211bf215546Sopenharmony_ci{ 212bf215546Sopenharmony_ci if (sctx->screen->gds) { 213bf215546Sopenharmony_ci sctx->ws->cs_add_buffer(&sctx->gfx_cs, sctx->screen->gds, RADEON_USAGE_READWRITE, 0); 214bf215546Sopenharmony_ci if (sctx->screen->gds_oa) { 215bf215546Sopenharmony_ci sctx->ws->cs_add_buffer(&sctx->gfx_cs, sctx->screen->gds_oa, RADEON_USAGE_READWRITE, 0); 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci } 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_civoid si_allocate_gds(struct si_context *sctx) 221bf215546Sopenharmony_ci{ 222bf215546Sopenharmony_ci struct radeon_winsys *ws = sctx->ws; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci if (sctx->screen->gds && sctx->screen->gds_oa) 225bf215546Sopenharmony_ci return; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci assert(sctx->screen->use_ngg_streamout); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci /* We need 256B (64 dw) of GDS, otherwise streamout hangs. */ 230bf215546Sopenharmony_ci simple_mtx_lock(&sctx->screen->gds_mutex); 231bf215546Sopenharmony_ci if (!sctx->screen->gds) 232bf215546Sopenharmony_ci sctx->screen->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, RADEON_FLAG_DRIVER_INTERNAL); 233bf215546Sopenharmony_ci if (!sctx->screen->gds_oa) 234bf215546Sopenharmony_ci sctx->screen->gds_oa = ws->buffer_create(ws, 1, 1, RADEON_DOMAIN_OA, RADEON_FLAG_DRIVER_INTERNAL); 235bf215546Sopenharmony_ci simple_mtx_unlock(&sctx->screen->gds_mutex); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci assert(sctx->screen->gds && sctx->screen->gds_oa); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci si_add_gds_to_buffer_list(sctx); 240bf215546Sopenharmony_ci} 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_civoid si_set_tracked_regs_to_clear_state(struct si_context *ctx) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci STATIC_ASSERT(SI_NUM_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved) * 8); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000; 247bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000; 248bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0x00000000; 249bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0x00000000; 250bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff; 251bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000; 252bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0x00000000; 253bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0x00000000; 254bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0x00000000; 255bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_CNTL] = 0x00001000; 256bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0x00000000; 257bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0x00000000; 258bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000; 259bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0; 260bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000; 261bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000; 262bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000; 263bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003; 264bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL] = 0x00000000; 265bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000; 266bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; 267bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; 268bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000; 269bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0; 270bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_VTX_CNTL] = 0x00000005; 271bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff; 272bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_STIPPLE] = 0; 273bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0x00000000; 274bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000; 275bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0x00000000; 276bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0x00000000; 277bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0x00000000; 278bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0x00000000; 279bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0x00000000; 280bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1] = 0x00000000; 281bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2] = 0x00000000; 282bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3] = 0x00000000; 283bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT] = 0x00000000; 284bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL] = 0x00000000; 285bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0x00000000; 286bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MODE] = 0x00000000; 287bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_PRIMITIVEID_EN] = 0x00000000; 288bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_REUSE_OFF] = 0x00000000; 289bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0x00000000; 290bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP] = 0x00000000; 291bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_GE_NGG_SUBGRP_CNTL] = 0x00000000; 292bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_IDX_FORMAT] = 0x00000000; 293bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT] = 0x00000000; 294bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL] = 0x00000000; 295bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_NGG_CNTL] = 0x00000000; 296bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0x00000000; 297bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0x00000000; 298bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL] = 0x00000000; 299bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x00000002; 300bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT] = 0x00000000; 301bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000; 302bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; 303bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000; 304bf215546Sopenharmony_ci ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */ 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci /* Set all cleared context registers to saved. */ 307bf215546Sopenharmony_ci ctx->tracked_regs.reg_saved = BITFIELD64_MASK(SI_TRACKED_GE_PC_ALLOC); 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci if (ctx->gfx_level >= GFX11) 310bf215546Sopenharmony_ci ctx->last_gs_out_prim = -1; /* uconfig register, unknown value */ 311bf215546Sopenharmony_ci else 312bf215546Sopenharmony_ci ctx->last_gs_out_prim = 0; /* context register cleared by CLEAR_STATE */ 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_civoid si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper, 316bf215546Sopenharmony_ci pipe_draw_vertex_state_func vstate_wrapper) 317bf215546Sopenharmony_ci{ 318bf215546Sopenharmony_ci if (wrapper) { 319bf215546Sopenharmony_ci if (wrapper != sctx->b.draw_vbo) { 320bf215546Sopenharmony_ci assert(!sctx->real_draw_vbo); 321bf215546Sopenharmony_ci assert(!sctx->real_draw_vertex_state); 322bf215546Sopenharmony_ci sctx->real_draw_vbo = sctx->b.draw_vbo; 323bf215546Sopenharmony_ci sctx->real_draw_vertex_state = sctx->b.draw_vertex_state; 324bf215546Sopenharmony_ci sctx->b.draw_vbo = wrapper; 325bf215546Sopenharmony_ci sctx->b.draw_vertex_state = vstate_wrapper; 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci } else if (sctx->real_draw_vbo) { 328bf215546Sopenharmony_ci sctx->real_draw_vbo = NULL; 329bf215546Sopenharmony_ci sctx->real_draw_vertex_state = NULL; 330bf215546Sopenharmony_ci si_select_draw_vbo(sctx); 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci} 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_cistatic void si_tmz_preamble(struct si_context *sctx) 335bf215546Sopenharmony_ci{ 336bf215546Sopenharmony_ci bool secure = si_gfx_resources_check_encrypted(sctx); 337bf215546Sopenharmony_ci if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) { 338bf215546Sopenharmony_ci si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW | 339bf215546Sopenharmony_ci RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL); 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cistatic void si_draw_vbo_tmz_preamble(struct pipe_context *ctx, 344bf215546Sopenharmony_ci const struct pipe_draw_info *info, 345bf215546Sopenharmony_ci unsigned drawid_offset, 346bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 347bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 348bf215546Sopenharmony_ci unsigned num_draws) { 349bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci si_tmz_preamble(sctx); 352bf215546Sopenharmony_ci sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws); 353bf215546Sopenharmony_ci} 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_cistatic void si_draw_vstate_tmz_preamble(struct pipe_context *ctx, 356bf215546Sopenharmony_ci struct pipe_vertex_state *state, 357bf215546Sopenharmony_ci uint32_t partial_velem_mask, 358bf215546Sopenharmony_ci struct pipe_draw_vertex_state_info info, 359bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 360bf215546Sopenharmony_ci unsigned num_draws) { 361bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci si_tmz_preamble(sctx); 364bf215546Sopenharmony_ci sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws); 365bf215546Sopenharmony_ci} 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_civoid si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci bool is_secure = false; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (unlikely(radeon_uses_secure_bos(ctx->ws))) { 372bf215546Sopenharmony_ci is_secure = ctx->ws->cs_is_secure(&ctx->gfx_cs); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci si_install_draw_wrapper(ctx, si_draw_vbo_tmz_preamble, 375bf215546Sopenharmony_ci si_draw_vstate_tmz_preamble); 376bf215546Sopenharmony_ci } 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci if (ctx->is_debug) 379bf215546Sopenharmony_ci si_begin_gfx_cs_debug(ctx); 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci si_add_gds_to_buffer_list(ctx); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci /* Always invalidate caches at the beginning of IBs, because external 384bf215546Sopenharmony_ci * users (e.g. BO evictions and SDMA/UVD/VCE IBs) can modify our 385bf215546Sopenharmony_ci * buffers. 386bf215546Sopenharmony_ci * 387bf215546Sopenharmony_ci * Note that the cache flush done by the kernel at the end of GFX IBs 388bf215546Sopenharmony_ci * isn't useful here, because that flush can finish after the following 389bf215546Sopenharmony_ci * IB starts drawing. 390bf215546Sopenharmony_ci * 391bf215546Sopenharmony_ci * TODO: Do we also need to invalidate CB & DB caches? 392bf215546Sopenharmony_ci */ 393bf215546Sopenharmony_ci ctx->flags |= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 394bf215546Sopenharmony_ci SI_CONTEXT_INV_L2 | SI_CONTEXT_START_PIPELINE_STATS; 395bf215546Sopenharmony_ci ctx->pipeline_stats_enabled = -1; 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci /* We don't know if the last draw used NGG because it can be a different process. 398bf215546Sopenharmony_ci * When switching NGG->legacy, we need to flush VGT for certain hw generations. 399bf215546Sopenharmony_ci */ 400bf215546Sopenharmony_ci if (ctx->screen->info.has_vgt_flush_ngg_legacy_bug && !ctx->ngg) 401bf215546Sopenharmony_ci ctx->flags |= SI_CONTEXT_VGT_FLUSH; 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci if (ctx->screen->attribute_ring) { 404bf215546Sopenharmony_ci radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->screen->attribute_ring, 405bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS); 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci if (ctx->border_color_buffer) { 408bf215546Sopenharmony_ci radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer, 409bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_BORDER_COLORS); 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci if (ctx->shadowed_regs) { 412bf215546Sopenharmony_ci radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs, 413bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS); 414bf215546Sopenharmony_ci } 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci si_add_all_descriptors_to_bo_list(ctx); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci if (first_cs || !ctx->shadowed_regs) { 419bf215546Sopenharmony_ci si_shader_pointers_mark_dirty(ctx); 420bf215546Sopenharmony_ci ctx->cs_shader_state.initialized = false; 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci if (!ctx->has_graphics) { 424bf215546Sopenharmony_ci ctx->initial_gfx_cs_size = ctx->gfx_cs.current.cdw; 425bf215546Sopenharmony_ci return; 426bf215546Sopenharmony_ci } 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci if (ctx->tess_rings) { 429bf215546Sopenharmony_ci radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, 430bf215546Sopenharmony_ci unlikely(is_secure) ? si_resource(ctx->tess_rings_tmz) : si_resource(ctx->tess_rings), 431bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | RADEON_PRIO_SHADER_RINGS); 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci /* set all valid group as dirty so they get reemited on 435bf215546Sopenharmony_ci * next draw command 436bf215546Sopenharmony_ci */ 437bf215546Sopenharmony_ci si_pm4_reset_emitted(ctx, first_cs); 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci /* The CS initialization should be emitted before everything else. */ 440bf215546Sopenharmony_ci if (ctx->cs_preamble_state) { 441bf215546Sopenharmony_ci struct si_pm4_state *preamble = is_secure ? ctx->cs_preamble_state_tmz : 442bf215546Sopenharmony_ci ctx->cs_preamble_state; 443bf215546Sopenharmony_ci ctx->ws->cs_set_preamble(&ctx->gfx_cs, preamble->pm4, preamble->ndw, 444bf215546Sopenharmony_ci preamble != ctx->last_preamble); 445bf215546Sopenharmony_ci ctx->last_preamble = preamble; 446bf215546Sopenharmony_ci } 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci if (ctx->queued.named.ls) 449bf215546Sopenharmony_ci ctx->prefetch_L2_mask |= SI_PREFETCH_LS; 450bf215546Sopenharmony_ci if (ctx->queued.named.hs) 451bf215546Sopenharmony_ci ctx->prefetch_L2_mask |= SI_PREFETCH_HS; 452bf215546Sopenharmony_ci if (ctx->queued.named.es) 453bf215546Sopenharmony_ci ctx->prefetch_L2_mask |= SI_PREFETCH_ES; 454bf215546Sopenharmony_ci if (ctx->queued.named.gs) 455bf215546Sopenharmony_ci ctx->prefetch_L2_mask |= SI_PREFETCH_GS; 456bf215546Sopenharmony_ci if (ctx->queued.named.vs) 457bf215546Sopenharmony_ci ctx->prefetch_L2_mask |= SI_PREFETCH_VS; 458bf215546Sopenharmony_ci if (ctx->queued.named.ps) 459bf215546Sopenharmony_ci ctx->prefetch_L2_mask |= SI_PREFETCH_PS; 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */ 462bf215546Sopenharmony_ci bool has_clear_state = ctx->screen->info.has_clear_state; 463bf215546Sopenharmony_ci if (has_clear_state || ctx->shadowed_regs) { 464bf215546Sopenharmony_ci ctx->framebuffer.dirty_cbufs = 465bf215546Sopenharmony_ci u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs); 466bf215546Sopenharmony_ci /* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */ 467bf215546Sopenharmony_ci ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL; 468bf215546Sopenharmony_ci } else { 469bf215546Sopenharmony_ci ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, 8); 470bf215546Sopenharmony_ci ctx->framebuffer.dirty_zsbuf = true; 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci /* Even with shadowed registers, we have to add buffers to the buffer list. 474bf215546Sopenharmony_ci * These atoms are the only ones that add buffers. 475bf215546Sopenharmony_ci */ 476bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.framebuffer); 477bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond); 478bf215546Sopenharmony_ci if (ctx->screen->use_ngg_culling) 479bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.ngg_cull_state); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci if (first_cs || !ctx->shadowed_regs) { 482bf215546Sopenharmony_ci /* These don't add any buffers, so skip them with shadowing. */ 483bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs); 484bf215546Sopenharmony_ci /* CLEAR_STATE sets zeros. */ 485bf215546Sopenharmony_ci if (!has_clear_state || ctx->clip_state_any_nonzeros) 486bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_state); 487bf215546Sopenharmony_ci ctx->sample_locs_num_samples = 0; 488bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_sample_locs); 489bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_config); 490bf215546Sopenharmony_ci /* CLEAR_STATE sets 0xffff. */ 491bf215546Sopenharmony_ci if (!has_clear_state || ctx->sample_mask != 0xffff) 492bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.sample_mask); 493bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.cb_render_state); 494bf215546Sopenharmony_ci /* CLEAR_STATE sets zeros. */ 495bf215546Sopenharmony_ci if (!has_clear_state || ctx->blend_color_any_nonzeros) 496bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color); 497bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state); 498bf215546Sopenharmony_ci if (ctx->gfx_level >= GFX9) 499bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state); 500bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref); 501bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map); 502bf215546Sopenharmony_ci if (!ctx->screen->use_ngg_streamout) 503bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable); 504bf215546Sopenharmony_ci /* CLEAR_STATE disables all window rectangles. */ 505bf215546Sopenharmony_ci if (!has_clear_state || ctx->num_window_rectangles > 0) 506bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles); 507bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); 508bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); 509bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci /* Invalidate various draw states so that they are emitted before 512bf215546Sopenharmony_ci * the first draw call. */ 513bf215546Sopenharmony_ci si_invalidate_draw_constants(ctx); 514bf215546Sopenharmony_ci ctx->last_index_size = -1; 515bf215546Sopenharmony_ci ctx->last_primitive_restart_en = -1; 516bf215546Sopenharmony_ci ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; 517bf215546Sopenharmony_ci ctx->last_prim = -1; 518bf215546Sopenharmony_ci ctx->last_multi_vgt_param = -1; 519bf215546Sopenharmony_ci ctx->last_vs_state = ~0; 520bf215546Sopenharmony_ci ctx->last_gs_state = ~0; 521bf215546Sopenharmony_ci ctx->last_ls = NULL; 522bf215546Sopenharmony_ci ctx->last_tcs = NULL; 523bf215546Sopenharmony_ci ctx->last_tes_sh_base = -1; 524bf215546Sopenharmony_ci ctx->last_num_tcs_input_cp = -1; 525bf215546Sopenharmony_ci ctx->last_ls_hs_config = -1; /* impossible value */ 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci if (has_clear_state) { 528bf215546Sopenharmony_ci si_set_tracked_regs_to_clear_state(ctx); 529bf215546Sopenharmony_ci } else { 530bf215546Sopenharmony_ci /* Set all register values to unknown. */ 531bf215546Sopenharmony_ci ctx->tracked_regs.reg_saved = 0; 532bf215546Sopenharmony_ci ctx->last_gs_out_prim = -1; /* unknown */ 533bf215546Sopenharmony_ci } 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci /* 0xffffffff is an impossible value to register SPI_PS_INPUT_CNTL_n */ 536bf215546Sopenharmony_ci memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32); 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci if (ctx->scratch_buffer) { 540bf215546Sopenharmony_ci si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b); 541bf215546Sopenharmony_ci si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state); 542bf215546Sopenharmony_ci } 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if (ctx->streamout.suspended) { 545bf215546Sopenharmony_ci ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; 546bf215546Sopenharmony_ci si_streamout_buffers_dirty(ctx); 547bf215546Sopenharmony_ci } 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci if (!list_is_empty(&ctx->active_queries)) 550bf215546Sopenharmony_ci si_resume_queries(ctx); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci assert(!ctx->gfx_cs.prev_dw); 553bf215546Sopenharmony_ci ctx->initial_gfx_cs_size = ctx->gfx_cs.current.cdw; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci /* All buffer references are removed on a flush, so si_check_needs_implicit_sync 556bf215546Sopenharmony_ci * cannot determine if si_make_CB_shader_coherent() needs to be called. 557bf215546Sopenharmony_ci * ctx->force_cb_shader_coherent will be cleared by the first call to 558bf215546Sopenharmony_ci * si_make_CB_shader_coherent. 559bf215546Sopenharmony_ci */ 560bf215546Sopenharmony_ci ctx->force_cb_shader_coherent = true; 561bf215546Sopenharmony_ci} 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_civoid si_trace_emit(struct si_context *sctx) 564bf215546Sopenharmony_ci{ 565bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 566bf215546Sopenharmony_ci uint32_t trace_id = ++sctx->current_saved_cs->trace_id; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id); 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci radeon_begin(cs); 571bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_NOP, 0, 0)); 572bf215546Sopenharmony_ci radeon_emit(AC_ENCODE_TRACE_POINT(trace_id)); 573bf215546Sopenharmony_ci radeon_end(); 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci if (sctx->log) 576bf215546Sopenharmony_ci u_log_flush(sctx->log); 577bf215546Sopenharmony_ci} 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_civoid si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl) 580bf215546Sopenharmony_ci{ 581bf215546Sopenharmony_ci bool compute_ib = !sctx->has_graphics; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci assert(sctx->gfx_level <= GFX9); 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci /* This seems problematic with GFX7 (see #4764) */ 586bf215546Sopenharmony_ci if (sctx->gfx_level != GFX7) 587bf215546Sopenharmony_ci cp_coher_cntl |= 1u << 31; /* don't sync PFP, i.e. execute the sync in ME */ 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci radeon_begin(cs); 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9 || compute_ib) { 592bf215546Sopenharmony_ci /* Flush caches and wait for the caches to assert idle. */ 593bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 5, 0)); 594bf215546Sopenharmony_ci radeon_emit(cp_coher_cntl); /* CP_COHER_CNTL */ 595bf215546Sopenharmony_ci radeon_emit(0xffffffff); /* CP_COHER_SIZE */ 596bf215546Sopenharmony_ci radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */ 597bf215546Sopenharmony_ci radeon_emit(0); /* CP_COHER_BASE */ 598bf215546Sopenharmony_ci radeon_emit(0); /* CP_COHER_BASE_HI */ 599bf215546Sopenharmony_ci radeon_emit(0x0000000A); /* POLL_INTERVAL */ 600bf215546Sopenharmony_ci } else { 601bf215546Sopenharmony_ci /* ACQUIRE_MEM is only required on a compute ring. */ 602bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_SURFACE_SYNC, 3, 0)); 603bf215546Sopenharmony_ci radeon_emit(cp_coher_cntl); /* CP_COHER_CNTL */ 604bf215546Sopenharmony_ci radeon_emit(0xffffffff); /* CP_COHER_SIZE */ 605bf215546Sopenharmony_ci radeon_emit(0); /* CP_COHER_BASE */ 606bf215546Sopenharmony_ci radeon_emit(0x0000000A); /* POLL_INTERVAL */ 607bf215546Sopenharmony_ci } 608bf215546Sopenharmony_ci radeon_end(); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci /* ACQUIRE_MEM has an implicit context roll if the current context 611bf215546Sopenharmony_ci * is busy. */ 612bf215546Sopenharmony_ci if (!compute_ib) 613bf215546Sopenharmony_ci sctx->context_roll = true; 614bf215546Sopenharmony_ci} 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_cistatic struct si_resource *si_get_wait_mem_scratch_bo(struct si_context *ctx, 617bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, bool is_secure) 618bf215546Sopenharmony_ci{ 619bf215546Sopenharmony_ci struct si_screen *sscreen = ctx->screen; 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci assert(ctx->gfx_level < GFX11); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci if (likely(!is_secure)) { 624bf215546Sopenharmony_ci return ctx->wait_mem_scratch; 625bf215546Sopenharmony_ci } else { 626bf215546Sopenharmony_ci assert(sscreen->info.has_tmz_support); 627bf215546Sopenharmony_ci if (!ctx->wait_mem_scratch_tmz) { 628bf215546Sopenharmony_ci ctx->wait_mem_scratch_tmz = 629bf215546Sopenharmony_ci si_aligned_buffer_create(&sscreen->b, 630bf215546Sopenharmony_ci PIPE_RESOURCE_FLAG_UNMAPPABLE | 631bf215546Sopenharmony_ci SI_RESOURCE_FLAG_DRIVER_INTERNAL | 632bf215546Sopenharmony_ci PIPE_RESOURCE_FLAG_ENCRYPTED, 633bf215546Sopenharmony_ci PIPE_USAGE_DEFAULT, 4, 634bf215546Sopenharmony_ci sscreen->info.tcc_cache_line_size); 635bf215546Sopenharmony_ci si_cp_write_data(ctx, ctx->wait_mem_scratch_tmz, 0, 4, V_370_MEM, V_370_ME, 636bf215546Sopenharmony_ci &ctx->wait_mem_number); 637bf215546Sopenharmony_ci } 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci return ctx->wait_mem_scratch_tmz; 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci} 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_civoid gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) 644bf215546Sopenharmony_ci{ 645bf215546Sopenharmony_ci uint32_t gcr_cntl = 0; 646bf215546Sopenharmony_ci unsigned cb_db_event = 0; 647bf215546Sopenharmony_ci unsigned flags = ctx->flags; 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci if (!ctx->has_graphics) { 650bf215546Sopenharmony_ci /* Only process compute flags. */ 651bf215546Sopenharmony_ci flags &= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 652bf215546Sopenharmony_ci SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2 | SI_CONTEXT_INV_L2_METADATA | 653bf215546Sopenharmony_ci SI_CONTEXT_CS_PARTIAL_FLUSH; 654bf215546Sopenharmony_ci } 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci /* We don't need these. */ 657bf215546Sopenharmony_ci assert(!(flags & (SI_CONTEXT_VGT_STREAMOUT_SYNC | SI_CONTEXT_FLUSH_AND_INV_DB_META))); 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci radeon_begin(cs); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci if (flags & SI_CONTEXT_VGT_FLUSH) { 662bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 663bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 664bf215546Sopenharmony_ci } 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) 667bf215546Sopenharmony_ci ctx->num_cb_cache_flushes++; 668bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) 669bf215546Sopenharmony_ci ctx->num_db_cache_flushes++; 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_ICACHE) 672bf215546Sopenharmony_ci gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL); 673bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_SCACHE) { 674bf215546Sopenharmony_ci /* TODO: When writing to the SMEM L1 cache, we need to set SEQ 675bf215546Sopenharmony_ci * to FORWARD when both L1 and L2 are written out (WB or INV). 676bf215546Sopenharmony_ci */ 677bf215546Sopenharmony_ci gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1); 678bf215546Sopenharmony_ci } 679bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_VCACHE) 680bf215546Sopenharmony_ci gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1); 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci /* The L2 cache ops are: 683bf215546Sopenharmony_ci * - INV: - invalidate lines that reflect memory (were loaded from memory) 684bf215546Sopenharmony_ci * - don't touch lines that were overwritten (were stored by gfx clients) 685bf215546Sopenharmony_ci * - WB: - don't touch lines that reflect memory 686bf215546Sopenharmony_ci * - write back lines that were overwritten 687bf215546Sopenharmony_ci * - WB | INV: - invalidate lines that reflect memory 688bf215546Sopenharmony_ci * - write back lines that were overwritten 689bf215546Sopenharmony_ci * 690bf215546Sopenharmony_ci * GLM doesn't support WB alone. If WB is set, INV must be set too. 691bf215546Sopenharmony_ci */ 692bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_L2) { 693bf215546Sopenharmony_ci /* Writeback and invalidate everything in L2. */ 694bf215546Sopenharmony_ci gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) | S_586_GLM_WB(1); 695bf215546Sopenharmony_ci ctx->num_L2_invalidates++; 696bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_WB_L2) { 697bf215546Sopenharmony_ci gcr_cntl |= S_586_GL2_WB(1) | S_586_GLM_WB(1) | S_586_GLM_INV(1); 698bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_INV_L2_METADATA) { 699bf215546Sopenharmony_ci gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1); 700bf215546Sopenharmony_ci } 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci if (flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) { 703bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { 704bf215546Sopenharmony_ci /* Flush CMASK/FMASK/DCC. Will wait for idle later. */ 705bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 706bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); 707bf215546Sopenharmony_ci } 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci /* Gfx11 can't flush DB_META and should use a TS event instead. */ 710bf215546Sopenharmony_ci if (ctx->gfx_level != GFX11 && flags & SI_CONTEXT_FLUSH_AND_INV_DB) { 711bf215546Sopenharmony_ci /* Flush HTILE. Will wait for idle later. */ 712bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 713bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); 714bf215546Sopenharmony_ci } 715bf215546Sopenharmony_ci 716bf215546Sopenharmony_ci /* First flush CB/DB, then L1/L2. */ 717bf215546Sopenharmony_ci gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD); 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci if ((flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) == 720bf215546Sopenharmony_ci (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) { 721bf215546Sopenharmony_ci cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; 722bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { 723bf215546Sopenharmony_ci cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS; 724bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) { 725bf215546Sopenharmony_ci if (ctx->gfx_level == GFX11) 726bf215546Sopenharmony_ci cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; 727bf215546Sopenharmony_ci else 728bf215546Sopenharmony_ci cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS; 729bf215546Sopenharmony_ci } else { 730bf215546Sopenharmony_ci assert(0); 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci } else { 733bf215546Sopenharmony_ci /* Wait for graphics shaders to go idle if requested. */ 734bf215546Sopenharmony_ci if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { 735bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 736bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 737bf215546Sopenharmony_ci /* Only count explicit shader flushes, not implicit ones. */ 738bf215546Sopenharmony_ci ctx->num_vs_flushes++; 739bf215546Sopenharmony_ci ctx->num_ps_flushes++; 740bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { 741bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 742bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 743bf215546Sopenharmony_ci ctx->num_vs_flushes++; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci } 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && ctx->compute_is_busy) { 748bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 749bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); 750bf215546Sopenharmony_ci ctx->num_cs_flushes++; 751bf215546Sopenharmony_ci ctx->compute_is_busy = false; 752bf215546Sopenharmony_ci } 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci if (cb_db_event) { 755bf215546Sopenharmony_ci if (ctx->gfx_level >= GFX11) { 756bf215546Sopenharmony_ci /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */ 757bf215546Sopenharmony_ci unsigned glm_wb = G_586_GLM_WB(gcr_cntl); 758bf215546Sopenharmony_ci unsigned glm_inv = G_586_GLM_INV(gcr_cntl); 759bf215546Sopenharmony_ci unsigned glk_wb = G_586_GLK_WB(gcr_cntl); 760bf215546Sopenharmony_ci unsigned glk_inv = G_586_GLK_INV(gcr_cntl); 761bf215546Sopenharmony_ci unsigned glv_inv = G_586_GLV_INV(gcr_cntl); 762bf215546Sopenharmony_ci unsigned gl1_inv = G_586_GL1_INV(gcr_cntl); 763bf215546Sopenharmony_ci assert(G_586_GL2_US(gcr_cntl) == 0); 764bf215546Sopenharmony_ci assert(G_586_GL2_RANGE(gcr_cntl) == 0); 765bf215546Sopenharmony_ci assert(G_586_GL2_DISCARD(gcr_cntl) == 0); 766bf215546Sopenharmony_ci unsigned gl2_inv = G_586_GL2_INV(gcr_cntl); 767bf215546Sopenharmony_ci unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); 768bf215546Sopenharmony_ci unsigned gcr_seq = G_586_SEQ(gcr_cntl); 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLK_WB & C_586_GLK_INV & 771bf215546Sopenharmony_ci C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci /* Send an event that flushes caches. */ 774bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); 775bf215546Sopenharmony_ci radeon_emit(S_490_EVENT_TYPE(cb_db_event) | 776bf215546Sopenharmony_ci S_490_EVENT_INDEX(5) | 777bf215546Sopenharmony_ci S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | 778bf215546Sopenharmony_ci S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | 779bf215546Sopenharmony_ci S_490_SEQ(gcr_seq) | S_490_GLK_WB(glk_wb) | S_490_GLK_INV(glk_inv) | 780bf215546Sopenharmony_ci S_490_PWS_ENABLE(1)); 781bf215546Sopenharmony_ci radeon_emit(0); /* DST_SEL, INT_SEL, DATA_SEL */ 782bf215546Sopenharmony_ci radeon_emit(0); /* ADDRESS_LO */ 783bf215546Sopenharmony_ci radeon_emit(0); /* ADDRESS_HI */ 784bf215546Sopenharmony_ci radeon_emit(0); /* DATA_LO */ 785bf215546Sopenharmony_ci radeon_emit(0); /* DATA_HI */ 786bf215546Sopenharmony_ci radeon_emit(0); /* INT_CTXID */ 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci if (unlikely(ctx->thread_trace_enabled)) { 789bf215546Sopenharmony_ci radeon_end(); 790bf215546Sopenharmony_ci si_sqtt_describe_barrier_start(ctx, &ctx->gfx_cs); 791bf215546Sopenharmony_ci radeon_begin_again(cs); 792bf215546Sopenharmony_ci } 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci /* Wait for the event and invalidate remaining caches if needed. */ 795bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); 796bf215546Sopenharmony_ci radeon_emit(S_580_PWS_STAGE_SEL(flags & SI_CONTEXT_PFP_SYNC_ME ? V_580_CP_PFP : 797bf215546Sopenharmony_ci V_580_CP_ME) | 798bf215546Sopenharmony_ci S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | 799bf215546Sopenharmony_ci S_580_PWS_ENA2(1) | 800bf215546Sopenharmony_ci S_580_PWS_COUNT(0)); 801bf215546Sopenharmony_ci radeon_emit(0xffffffff); /* GCR_SIZE */ 802bf215546Sopenharmony_ci radeon_emit(0x01ffffff); /* GCR_SIZE_HI */ 803bf215546Sopenharmony_ci radeon_emit(0); /* GCR_BASE_LO */ 804bf215546Sopenharmony_ci radeon_emit(0); /* GCR_BASE_HI */ 805bf215546Sopenharmony_ci radeon_emit(S_585_PWS_ENA(1)); 806bf215546Sopenharmony_ci radeon_emit(gcr_cntl); /* GCR_CNTL */ 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci if (unlikely(ctx->thread_trace_enabled)) { 809bf215546Sopenharmony_ci radeon_end(); 810bf215546Sopenharmony_ci si_sqtt_describe_barrier_end(ctx, &ctx->gfx_cs, flags); 811bf215546Sopenharmony_ci radeon_begin_again(cs); 812bf215546Sopenharmony_ci } 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci gcr_cntl = 0; /* all done */ 815bf215546Sopenharmony_ci flags &= ~SI_CONTEXT_PFP_SYNC_ME; 816bf215546Sopenharmony_ci } else { 817bf215546Sopenharmony_ci /* GFX10 */ 818bf215546Sopenharmony_ci radeon_end(); 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci struct si_resource *wait_mem_scratch = 821bf215546Sopenharmony_ci si_get_wait_mem_scratch_bo(ctx, cs, ctx->ws->cs_is_secure(cs)); 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci /* CB/DB flush and invalidate via RELEASE_MEM. 824bf215546Sopenharmony_ci * Combine this with other cache flushes when possible. 825bf215546Sopenharmony_ci */ 826bf215546Sopenharmony_ci uint64_t va = wait_mem_scratch->gpu_address; 827bf215546Sopenharmony_ci ctx->wait_mem_number++; 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */ 830bf215546Sopenharmony_ci unsigned glm_wb = G_586_GLM_WB(gcr_cntl); 831bf215546Sopenharmony_ci unsigned glm_inv = G_586_GLM_INV(gcr_cntl); 832bf215546Sopenharmony_ci unsigned glv_inv = G_586_GLV_INV(gcr_cntl); 833bf215546Sopenharmony_ci unsigned gl1_inv = G_586_GL1_INV(gcr_cntl); 834bf215546Sopenharmony_ci assert(G_586_GL2_US(gcr_cntl) == 0); 835bf215546Sopenharmony_ci assert(G_586_GL2_RANGE(gcr_cntl) == 0); 836bf215546Sopenharmony_ci assert(G_586_GL2_DISCARD(gcr_cntl) == 0); 837bf215546Sopenharmony_ci unsigned gl2_inv = G_586_GL2_INV(gcr_cntl); 838bf215546Sopenharmony_ci unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); 839bf215546Sopenharmony_ci unsigned gcr_seq = G_586_SEQ(gcr_cntl); 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & 842bf215546Sopenharmony_ci C_586_GL2_WB; /* keep SEQ */ 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci si_cp_release_mem(ctx, cs, cb_db_event, 845bf215546Sopenharmony_ci S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | 846bf215546Sopenharmony_ci S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | 847bf215546Sopenharmony_ci S_490_SEQ(gcr_seq), 848bf215546Sopenharmony_ci EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, 849bf215546Sopenharmony_ci EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number, 850bf215546Sopenharmony_ci SI_NOT_QUERY); 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci if (unlikely(ctx->thread_trace_enabled)) { 853bf215546Sopenharmony_ci si_sqtt_describe_barrier_start(ctx, &ctx->gfx_cs); 854bf215546Sopenharmony_ci } 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci if (unlikely(ctx->thread_trace_enabled)) { 859bf215546Sopenharmony_ci si_sqtt_describe_barrier_end(ctx, &ctx->gfx_cs, flags); 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci radeon_begin_again(cs); 863bf215546Sopenharmony_ci } 864bf215546Sopenharmony_ci } 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci /* Ignore fields that only modify the behavior of other fields. */ 867bf215546Sopenharmony_ci if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { 868bf215546Sopenharmony_ci unsigned dont_sync_pfp = (!(flags & SI_CONTEXT_PFP_SYNC_ME)) << 31; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci /* Flush caches and wait for the caches to assert idle. 871bf215546Sopenharmony_ci * The cache flush is executed in the ME, but the PFP waits 872bf215546Sopenharmony_ci * for completion. 873bf215546Sopenharmony_ci */ 874bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); 875bf215546Sopenharmony_ci radeon_emit(dont_sync_pfp); /* CP_COHER_CNTL */ 876bf215546Sopenharmony_ci radeon_emit(0xffffffff); /* CP_COHER_SIZE */ 877bf215546Sopenharmony_ci radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */ 878bf215546Sopenharmony_ci radeon_emit(0); /* CP_COHER_BASE */ 879bf215546Sopenharmony_ci radeon_emit(0); /* CP_COHER_BASE_HI */ 880bf215546Sopenharmony_ci radeon_emit(0x0000000A); /* POLL_INTERVAL */ 881bf215546Sopenharmony_ci radeon_emit(gcr_cntl); /* GCR_CNTL */ 882bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_PFP_SYNC_ME) { 883bf215546Sopenharmony_ci /* Synchronize PFP with ME. (this stalls PFP) */ 884bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 885bf215546Sopenharmony_ci radeon_emit(0); 886bf215546Sopenharmony_ci } 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_ci if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) { 889bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 890bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); 891bf215546Sopenharmony_ci ctx->pipeline_stats_enabled = 1; 892bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && ctx->pipeline_stats_enabled != 0) { 893bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 894bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); 895bf215546Sopenharmony_ci ctx->pipeline_stats_enabled = 0; 896bf215546Sopenharmony_ci } 897bf215546Sopenharmony_ci radeon_end(); 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci ctx->flags = 0; 900bf215546Sopenharmony_ci} 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_civoid si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) 903bf215546Sopenharmony_ci{ 904bf215546Sopenharmony_ci uint32_t flags = sctx->flags; 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci if (!sctx->has_graphics) { 907bf215546Sopenharmony_ci /* Only process compute flags. */ 908bf215546Sopenharmony_ci flags &= SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 909bf215546Sopenharmony_ci SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2 | SI_CONTEXT_INV_L2_METADATA | 910bf215546Sopenharmony_ci SI_CONTEXT_CS_PARTIAL_FLUSH; 911bf215546Sopenharmony_ci } 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ci uint32_t cp_coher_cntl = 0; 914bf215546Sopenharmony_ci const uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB); 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci assert(sctx->gfx_level <= GFX9); 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) 919bf215546Sopenharmony_ci sctx->num_cb_cache_flushes++; 920bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) 921bf215546Sopenharmony_ci sctx->num_db_cache_flushes++; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci /* GFX6 has a bug that it always flushes ICACHE and KCACHE if either 924bf215546Sopenharmony_ci * bit is set. An alternative way is to write SQC_CACHES, but that 925bf215546Sopenharmony_ci * doesn't seem to work reliably. Since the bug doesn't affect 926bf215546Sopenharmony_ci * correctness (it only does more work than necessary) and 927bf215546Sopenharmony_ci * the performance impact is likely negligible, there is no plan 928bf215546Sopenharmony_ci * to add a workaround for it. 929bf215546Sopenharmony_ci */ 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_ICACHE) 932bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); 933bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_SCACHE) 934bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci if (sctx->gfx_level <= GFX8) { 937bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { 938bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | 939bf215546Sopenharmony_ci S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) | 940bf215546Sopenharmony_ci S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | 941bf215546Sopenharmony_ci S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | 942bf215546Sopenharmony_ci S_0085F0_CB7_DEST_BASE_ENA(1); 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci /* Necessary for DCC */ 945bf215546Sopenharmony_ci if (sctx->gfx_level == GFX8) 946bf215546Sopenharmony_ci si_cp_release_mem(sctx, cs, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM, 947bf215546Sopenharmony_ci EOP_INT_SEL_NONE, EOP_DATA_SEL_DISCARD, NULL, 0, 0, SI_NOT_QUERY); 948bf215546Sopenharmony_ci } 949bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) 950bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); 951bf215546Sopenharmony_ci } 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci radeon_begin(cs); 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { 956bf215546Sopenharmony_ci /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */ 957bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 958bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); 959bf215546Sopenharmony_ci } 960bf215546Sopenharmony_ci if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META)) { 961bf215546Sopenharmony_ci /* Flush HTILE. SURFACE_SYNC will wait for idle. */ 962bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 963bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci /* Wait for shader engines to go idle. 967bf215546Sopenharmony_ci * VS and PS waits are unnecessary if SURFACE_SYNC is going to wait 968bf215546Sopenharmony_ci * for everything including CB/DB cache flushes. 969bf215546Sopenharmony_ci */ 970bf215546Sopenharmony_ci if (!flush_cb_db) { 971bf215546Sopenharmony_ci if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { 972bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 973bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 974bf215546Sopenharmony_ci /* Only count explicit shader flushes, not implicit ones 975bf215546Sopenharmony_ci * done by SURFACE_SYNC. 976bf215546Sopenharmony_ci */ 977bf215546Sopenharmony_ci sctx->num_vs_flushes++; 978bf215546Sopenharmony_ci sctx->num_ps_flushes++; 979bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { 980bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 981bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 982bf215546Sopenharmony_ci sctx->num_vs_flushes++; 983bf215546Sopenharmony_ci } 984bf215546Sopenharmony_ci } 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && sctx->compute_is_busy) { 987bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 988bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 989bf215546Sopenharmony_ci sctx->num_cs_flushes++; 990bf215546Sopenharmony_ci sctx->compute_is_busy = false; 991bf215546Sopenharmony_ci } 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci /* VGT state synchronization. */ 994bf215546Sopenharmony_ci if (flags & SI_CONTEXT_VGT_FLUSH) { 995bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 996bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 997bf215546Sopenharmony_ci } 998bf215546Sopenharmony_ci if (flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) { 999bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 1000bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); 1001bf215546Sopenharmony_ci } 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci radeon_end(); 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci /* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't 1006bf215546Sopenharmony_ci * wait for idle on GFX9. We have to use a TS event. 1007bf215546Sopenharmony_ci */ 1008bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9 && flush_cb_db) { 1009bf215546Sopenharmony_ci uint64_t va; 1010bf215546Sopenharmony_ci unsigned tc_flags, cb_db_event; 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci /* Set the CB/DB flush event. */ 1013bf215546Sopenharmony_ci switch (flush_cb_db) { 1014bf215546Sopenharmony_ci case SI_CONTEXT_FLUSH_AND_INV_CB: 1015bf215546Sopenharmony_ci cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS; 1016bf215546Sopenharmony_ci break; 1017bf215546Sopenharmony_ci case SI_CONTEXT_FLUSH_AND_INV_DB: 1018bf215546Sopenharmony_ci cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS; 1019bf215546Sopenharmony_ci break; 1020bf215546Sopenharmony_ci default: 1021bf215546Sopenharmony_ci /* both CB & DB */ 1022bf215546Sopenharmony_ci cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; 1023bf215546Sopenharmony_ci } 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci /* These are the only allowed combinations. If you need to 1026bf215546Sopenharmony_ci * do multiple operations at once, do them separately. 1027bf215546Sopenharmony_ci * All operations that invalidate L2 also seem to invalidate 1028bf215546Sopenharmony_ci * metadata. Volatile (VOL) and WC flushes are not listed here. 1029bf215546Sopenharmony_ci * 1030bf215546Sopenharmony_ci * TC | TC_WB = writeback & invalidate L2 1031bf215546Sopenharmony_ci * TC | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC 1032bf215546Sopenharmony_ci * TC_WB | TC_NC = writeback L2 for MTYPE == NC 1033bf215546Sopenharmony_ci * TC | TC_NC = invalidate L2 for MTYPE == NC 1034bf215546Sopenharmony_ci * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.) 1035bf215546Sopenharmony_ci * TCL1 = invalidate L1 1036bf215546Sopenharmony_ci */ 1037bf215546Sopenharmony_ci tc_flags = 0; 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_L2_METADATA) { 1040bf215546Sopenharmony_ci tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA; 1041bf215546Sopenharmony_ci } 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci /* Ideally flush TC together with CB/DB. */ 1044bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_L2) { 1045bf215546Sopenharmony_ci /* Writeback and invalidate everything in L2 & L1. */ 1046bf215546Sopenharmony_ci tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA; 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci /* Clear the flags. */ 1049bf215546Sopenharmony_ci flags &= ~(SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2); 1050bf215546Sopenharmony_ci sctx->num_L2_invalidates++; 1051bf215546Sopenharmony_ci } 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci /* Do the flush (enqueue the event and wait for it). */ 1054bf215546Sopenharmony_ci struct si_resource* wait_mem_scratch = 1055bf215546Sopenharmony_ci si_get_wait_mem_scratch_bo(sctx, cs, sctx->ws->cs_is_secure(cs)); 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci va = wait_mem_scratch->gpu_address; 1058bf215546Sopenharmony_ci sctx->wait_mem_number++; 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci si_cp_release_mem(sctx, cs, cb_db_event, tc_flags, EOP_DST_SEL_MEM, 1061bf215546Sopenharmony_ci EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, 1062bf215546Sopenharmony_ci wait_mem_scratch, va, sctx->wait_mem_number, SI_NOT_QUERY); 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci if (unlikely(sctx->thread_trace_enabled)) { 1065bf215546Sopenharmony_ci si_sqtt_describe_barrier_start(sctx, &sctx->gfx_cs); 1066bf215546Sopenharmony_ci } 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci if (unlikely(sctx->thread_trace_enabled)) { 1071bf215546Sopenharmony_ci si_sqtt_describe_barrier_end(sctx, &sctx->gfx_cs, sctx->flags); 1072bf215546Sopenharmony_ci } 1073bf215546Sopenharmony_ci } 1074bf215546Sopenharmony_ci 1075bf215546Sopenharmony_ci /* GFX6-GFX8 only: 1076bf215546Sopenharmony_ci * When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC 1077bf215546Sopenharmony_ci * waits for idle, so it should be last. SURFACE_SYNC is done in PFP. 1078bf215546Sopenharmony_ci * 1079bf215546Sopenharmony_ci * cp_coher_cntl should contain all necessary flags except TC and PFP flags 1080bf215546Sopenharmony_ci * at this point. 1081bf215546Sopenharmony_ci * 1082bf215546Sopenharmony_ci * GFX6-GFX7 don't support L2 write-back. 1083bf215546Sopenharmony_ci */ 1084bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_L2 || (sctx->gfx_level <= GFX7 && (flags & SI_CONTEXT_WB_L2))) { 1085bf215546Sopenharmony_ci /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) 1086bf215546Sopenharmony_ci * WB must be set on GFX8+ when TC_ACTION is set. 1087bf215546Sopenharmony_ci */ 1088bf215546Sopenharmony_ci si_emit_surface_sync(sctx, cs, 1089bf215546Sopenharmony_ci cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | 1090bf215546Sopenharmony_ci S_0301F0_TC_WB_ACTION_ENA(sctx->gfx_level >= GFX8)); 1091bf215546Sopenharmony_ci cp_coher_cntl = 0; 1092bf215546Sopenharmony_ci sctx->num_L2_invalidates++; 1093bf215546Sopenharmony_ci } else { 1094bf215546Sopenharmony_ci /* L1 invalidation and L2 writeback must be done separately, 1095bf215546Sopenharmony_ci * because both operations can't be done together. 1096bf215546Sopenharmony_ci */ 1097bf215546Sopenharmony_ci if (flags & SI_CONTEXT_WB_L2) { 1098bf215546Sopenharmony_ci /* WB = write-back 1099bf215546Sopenharmony_ci * NC = apply to non-coherent MTYPEs 1100bf215546Sopenharmony_ci * (i.e. MTYPE <= 1, which is what we use everywhere) 1101bf215546Sopenharmony_ci * 1102bf215546Sopenharmony_ci * WB doesn't work without NC. 1103bf215546Sopenharmony_ci */ 1104bf215546Sopenharmony_ci si_emit_surface_sync( 1105bf215546Sopenharmony_ci sctx, cs, 1106bf215546Sopenharmony_ci cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); 1107bf215546Sopenharmony_ci cp_coher_cntl = 0; 1108bf215546Sopenharmony_ci sctx->num_L2_writebacks++; 1109bf215546Sopenharmony_ci } 1110bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_VCACHE) { 1111bf215546Sopenharmony_ci /* Invalidate per-CU VMEM L1. */ 1112bf215546Sopenharmony_ci si_emit_surface_sync(sctx, cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); 1113bf215546Sopenharmony_ci cp_coher_cntl = 0; 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci } 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci /* If TC flushes haven't cleared this... */ 1118bf215546Sopenharmony_ci if (cp_coher_cntl) 1119bf215546Sopenharmony_ci si_emit_surface_sync(sctx, cs, cp_coher_cntl); 1120bf215546Sopenharmony_ci 1121bf215546Sopenharmony_ci if (flags & SI_CONTEXT_PFP_SYNC_ME) { 1122bf215546Sopenharmony_ci radeon_begin(cs); 1123bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 1124bf215546Sopenharmony_ci radeon_emit(0); 1125bf215546Sopenharmony_ci radeon_end(); 1126bf215546Sopenharmony_ci } 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) { 1129bf215546Sopenharmony_ci radeon_begin(cs); 1130bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 1131bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); 1132bf215546Sopenharmony_ci radeon_end(); 1133bf215546Sopenharmony_ci sctx->pipeline_stats_enabled = 1; 1134bf215546Sopenharmony_ci } else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && sctx->pipeline_stats_enabled != 0) { 1135bf215546Sopenharmony_ci radeon_begin(cs); 1136bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 1137bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); 1138bf215546Sopenharmony_ci radeon_end(); 1139bf215546Sopenharmony_ci sctx->pipeline_stats_enabled = 0; 1140bf215546Sopenharmony_ci } 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_ci sctx->flags = 0; 1143bf215546Sopenharmony_ci} 1144