1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2020 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci * 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "si_pipe.h" 28bf215546Sopenharmony_ci#include "si_build_pm4.h" 29bf215546Sopenharmony_ci#include "si_compute.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "ac_rgp.h" 32bf215546Sopenharmony_ci#include "ac_sqtt.h" 33bf215546Sopenharmony_ci#include "util/u_memory.h" 34bf215546Sopenharmony_ci#include "tgsi/tgsi_from_mesa.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cistatic void 37bf215546Sopenharmony_cisi_emit_spi_config_cntl(struct si_context* sctx, 38bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, bool enable); 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic bool 41bf215546Sopenharmony_cisi_thread_trace_init_bo(struct si_context *sctx) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci unsigned max_se = sctx->screen->info.max_se; 44bf215546Sopenharmony_ci struct radeon_winsys *ws = sctx->ws; 45bf215546Sopenharmony_ci uint64_t size; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci /* The buffer size and address need to be aligned in HW regs. Align the 48bf215546Sopenharmony_ci * size as early as possible so that we do all the allocation & addressing 49bf215546Sopenharmony_ci * correctly. */ 50bf215546Sopenharmony_ci sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size, 51bf215546Sopenharmony_ci 1u << SQTT_BUFFER_ALIGN_SHIFT); 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* Compute total size of the thread trace BO for all SEs. */ 54bf215546Sopenharmony_ci size = align64(sizeof(struct ac_thread_trace_info) * max_se, 55bf215546Sopenharmony_ci 1 << SQTT_BUFFER_ALIGN_SHIFT); 56bf215546Sopenharmony_ci size += sctx->thread_trace->buffer_size * (uint64_t)max_se; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci sctx->thread_trace->bo = 59bf215546Sopenharmony_ci ws->buffer_create(ws, size, 4096, 60bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM, 61bf215546Sopenharmony_ci RADEON_FLAG_NO_INTERPROCESS_SHARING | 62bf215546Sopenharmony_ci RADEON_FLAG_GTT_WC | 63bf215546Sopenharmony_ci RADEON_FLAG_NO_SUBALLOC); 64bf215546Sopenharmony_ci if (!sctx->thread_trace->bo) 65bf215546Sopenharmony_ci return false; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci return true; 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_cistatic bool 71bf215546Sopenharmony_cisi_se_is_disabled(struct si_context* sctx, unsigned se) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci /* No active CU on the SE means it is disabled. */ 74bf215546Sopenharmony_ci return sctx->screen->info.cu_mask[se][0] == 0; 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_cistatic void 79bf215546Sopenharmony_cisi_emit_thread_trace_start(struct si_context* sctx, 80bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, 81bf215546Sopenharmony_ci uint32_t queue_family_index) 82bf215546Sopenharmony_ci{ 83bf215546Sopenharmony_ci struct si_screen *sscreen = sctx->screen; 84bf215546Sopenharmony_ci uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; 85bf215546Sopenharmony_ci unsigned max_se = sscreen->info.max_se; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci radeon_begin(cs); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci for (unsigned se = 0; se < max_se; se++) { 90bf215546Sopenharmony_ci uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); 91bf215546Sopenharmony_ci uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se); 92bf215546Sopenharmony_ci uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci if (si_se_is_disabled(sctx, se)) 95bf215546Sopenharmony_ci continue; 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci /* Target SEx and SH0. */ 98bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 99bf215546Sopenharmony_ci S_030800_SE_INDEX(se) | 100bf215546Sopenharmony_ci S_030800_SH_INDEX(0) | 101bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci /* Select the first active CUs */ 104bf215546Sopenharmony_ci int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 107bf215546Sopenharmony_ci /* Order seems important for the following 2 registers. */ 108bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE, 109bf215546Sopenharmony_ci S_008D04_SIZE(shifted_size) | 110bf215546Sopenharmony_ci S_008D04_BASE_HI(shifted_va >> 32)); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va); 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci int wgp = first_active_cu / 2; 115bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK, 116bf215546Sopenharmony_ci S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */ 117bf215546Sopenharmony_ci S_008D14_SA_SEL(0) | 118bf215546Sopenharmony_ci S_008D14_WGP_SEL(wgp) | 119bf215546Sopenharmony_ci S_008D14_SIMD_SEL(0)); 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK, 122bf215546Sopenharmony_ci S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC | 123bf215546Sopenharmony_ci V_008D18_REG_INCLUDE_SHDEC | 124bf215546Sopenharmony_ci V_008D18_REG_INCLUDE_GFXUDEC | 125bf215546Sopenharmony_ci V_008D18_REG_INCLUDE_CONTEXT | 126bf215546Sopenharmony_ci V_008D18_REG_INCLUDE_COMP | 127bf215546Sopenharmony_ci V_008D18_REG_INCLUDE_CONFIG) | 128bf215546Sopenharmony_ci S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF)); 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci /* Should be emitted last (it enables thread traces). */ 131bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, 132bf215546Sopenharmony_ci S_008D1C_MODE(1) | 133bf215546Sopenharmony_ci S_008D1C_HIWATER(5) | 134bf215546Sopenharmony_ci S_008D1C_UTIL_TIMER(1) | 135bf215546Sopenharmony_ci S_008D1C_RT_FREQ(2) | /* 4096 clk */ 136bf215546Sopenharmony_ci S_008D1C_DRAW_EVENT_EN(1) | 137bf215546Sopenharmony_ci S_008D1C_REG_STALL_EN(1) | 138bf215546Sopenharmony_ci S_008D1C_SPI_STALL_EN(1) | 139bf215546Sopenharmony_ci S_008D1C_SQ_STALL_EN(1) | 140bf215546Sopenharmony_ci S_008D1C_REG_DROP_ON_STALL(0) | 141bf215546Sopenharmony_ci S_008D1C_LOWATER_OFFSET( 142bf215546Sopenharmony_ci sctx->gfx_level >= GFX10_3 ? 4 : 0) | 143bf215546Sopenharmony_ci S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug)); 144bf215546Sopenharmony_ci } else { 145bf215546Sopenharmony_ci /* Order seems important for the following 4 registers. */ 146bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2, 147bf215546Sopenharmony_ci S_030CDC_ADDR_HI(shifted_va >> 32)); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CC4_SQ_THREAD_TRACE_SIZE, 152bf215546Sopenharmony_ci S_030CC4_SIZE(shifted_size)); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CD4_SQ_THREAD_TRACE_CTRL, 155bf215546Sopenharmony_ci S_030CD4_RESET_BUFFER(1)); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) | 158bf215546Sopenharmony_ci S_030CC8_SH_SEL(0) | 159bf215546Sopenharmony_ci S_030CC8_SIMD_EN(0xf) | 160bf215546Sopenharmony_ci S_030CC8_VM_ID_MASK(0) | 161bf215546Sopenharmony_ci S_030CC8_REG_STALL_EN(1) | 162bf215546Sopenharmony_ci S_030CC8_SPI_STALL_EN(1) | 163bf215546Sopenharmony_ci S_030CC8_SQ_STALL_EN(1); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CC8_SQ_THREAD_TRACE_MASK, 166bf215546Sopenharmony_ci thread_trace_mask); 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci /* Trace all tokens and registers. */ 169bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK, 170bf215546Sopenharmony_ci S_030CCC_TOKEN_MASK(0xbfff) | 171bf215546Sopenharmony_ci S_030CCC_REG_MASK(0xff) | 172bf215546Sopenharmony_ci S_030CCC_REG_DROP_ON_STALL(0)); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* Enable SQTT perf counters for all CUs. */ 175bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CD0_SQ_THREAD_TRACE_PERF_MASK, 176bf215546Sopenharmony_ci S_030CD0_SH0_MASK(0xffff) | 177bf215546Sopenharmony_ci S_030CD0_SH1_MASK(0xffff)); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CEC_SQ_THREAD_TRACE_HIWATER, 182bf215546Sopenharmony_ci S_030CEC_HIWATER(4)); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9) { 185bf215546Sopenharmony_ci /* Reset thread trace status errors. */ 186bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CE8_SQ_THREAD_TRACE_STATUS, 187bf215546Sopenharmony_ci S_030CE8_UTC_ERROR(0)); 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* Enable the thread trace mode. */ 191bf215546Sopenharmony_ci uint32_t thread_trace_mode = 192bf215546Sopenharmony_ci S_030CD8_MASK_PS(1) | 193bf215546Sopenharmony_ci S_030CD8_MASK_VS(1) | 194bf215546Sopenharmony_ci S_030CD8_MASK_GS(1) | 195bf215546Sopenharmony_ci S_030CD8_MASK_ES(1) | 196bf215546Sopenharmony_ci S_030CD8_MASK_HS(1) | 197bf215546Sopenharmony_ci S_030CD8_MASK_LS(1) | 198bf215546Sopenharmony_ci S_030CD8_MASK_CS(1) | 199bf215546Sopenharmony_ci S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */ 200bf215546Sopenharmony_ci S_030CD8_MODE(1); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9) { 203bf215546Sopenharmony_ci /* Count SQTT traffic in TCC perf counters. */ 204bf215546Sopenharmony_ci thread_trace_mode |= S_030CD8_TC_PERF_EN(1); 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE, 208bf215546Sopenharmony_ci thread_trace_mode); 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci /* Restore global broadcasting. */ 213bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 214bf215546Sopenharmony_ci S_030800_SE_BROADCAST_WRITES(1) | 215bf215546Sopenharmony_ci S_030800_SH_BROADCAST_WRITES(1) | 216bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci /* Start the thread trace with a different event based on the queue. */ 219bf215546Sopenharmony_ci if (queue_family_index == AMD_IP_COMPUTE) { 220bf215546Sopenharmony_ci radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 221bf215546Sopenharmony_ci S_00B878_THREAD_TRACE_ENABLE(1)); 222bf215546Sopenharmony_ci } else { 223bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 224bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0)); 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci radeon_end(); 227bf215546Sopenharmony_ci} 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_cistatic const uint32_t gfx9_thread_trace_info_regs[] = 230bf215546Sopenharmony_ci{ 231bf215546Sopenharmony_ci R_030CE4_SQ_THREAD_TRACE_WPTR, 232bf215546Sopenharmony_ci R_030CE8_SQ_THREAD_TRACE_STATUS, 233bf215546Sopenharmony_ci R_030CF0_SQ_THREAD_TRACE_CNTR, 234bf215546Sopenharmony_ci}; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_cistatic const uint32_t gfx10_thread_trace_info_regs[] = 237bf215546Sopenharmony_ci{ 238bf215546Sopenharmony_ci R_008D10_SQ_THREAD_TRACE_WPTR, 239bf215546Sopenharmony_ci R_008D20_SQ_THREAD_TRACE_STATUS, 240bf215546Sopenharmony_ci R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR, 241bf215546Sopenharmony_ci}; 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cistatic void 244bf215546Sopenharmony_cisi_copy_thread_trace_info_regs(struct si_context* sctx, 245bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, 246bf215546Sopenharmony_ci unsigned se_index) 247bf215546Sopenharmony_ci{ 248bf215546Sopenharmony_ci const uint32_t *thread_trace_info_regs = NULL; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci switch (sctx->gfx_level) { 251bf215546Sopenharmony_ci case GFX10_3: 252bf215546Sopenharmony_ci case GFX10: 253bf215546Sopenharmony_ci thread_trace_info_regs = gfx10_thread_trace_info_regs; 254bf215546Sopenharmony_ci break; 255bf215546Sopenharmony_ci case GFX9: 256bf215546Sopenharmony_ci thread_trace_info_regs = gfx9_thread_trace_info_regs; 257bf215546Sopenharmony_ci break; 258bf215546Sopenharmony_ci default: 259bf215546Sopenharmony_ci unreachable("Unsupported gfx_level"); 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci /* Get the VA where the info struct is stored for this SE. */ 263bf215546Sopenharmony_ci uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); 264bf215546Sopenharmony_ci uint64_t info_va = ac_thread_trace_get_info_va(va, se_index); 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci radeon_begin(cs); 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci /* Copy back the info struct one DWORD at a time. */ 269bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) { 270bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); 271bf215546Sopenharmony_ci radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | 272bf215546Sopenharmony_ci COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | 273bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 274bf215546Sopenharmony_ci radeon_emit(thread_trace_info_regs[i] >> 2); 275bf215546Sopenharmony_ci radeon_emit(0); /* unused */ 276bf215546Sopenharmony_ci radeon_emit((info_va + i * 4)); 277bf215546Sopenharmony_ci radeon_emit((info_va + i * 4) >> 32); 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci radeon_end(); 280bf215546Sopenharmony_ci} 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_cistatic void 285bf215546Sopenharmony_cisi_emit_thread_trace_stop(struct si_context *sctx, 286bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, 287bf215546Sopenharmony_ci uint32_t queue_family_index) 288bf215546Sopenharmony_ci{ 289bf215546Sopenharmony_ci unsigned max_se = sctx->screen->info.max_se; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci radeon_begin(cs); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci /* Stop the thread trace with a different event based on the queue. */ 294bf215546Sopenharmony_ci if (queue_family_index == AMD_IP_COMPUTE) { 295bf215546Sopenharmony_ci radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 296bf215546Sopenharmony_ci S_00B878_THREAD_TRACE_ENABLE(0)); 297bf215546Sopenharmony_ci } else { 298bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 299bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0)); 300bf215546Sopenharmony_ci } 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 303bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0)); 304bf215546Sopenharmony_ci radeon_end(); 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci if (sctx->screen->info.has_sqtt_rb_harvest_bug) { 307bf215546Sopenharmony_ci /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */ 308bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | 309bf215546Sopenharmony_ci SI_CONTEXT_FLUSH_AND_INV_DB | 310bf215546Sopenharmony_ci SI_CONTEXT_CS_PARTIAL_FLUSH; 311bf215546Sopenharmony_ci sctx->emit_cache_flush(sctx, cs); 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci for (unsigned se = 0; se < max_se; se++) { 315bf215546Sopenharmony_ci if (si_se_is_disabled(sctx, se)) 316bf215546Sopenharmony_ci continue; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci radeon_begin(cs); 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci /* Target SEi and SH0. */ 321bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 322bf215546Sopenharmony_ci S_030800_SE_INDEX(se) | 323bf215546Sopenharmony_ci S_030800_SH_INDEX(0) | 324bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 327bf215546Sopenharmony_ci if (!sctx->screen->info.has_sqtt_rb_harvest_bug) { 328bf215546Sopenharmony_ci /* Make sure to wait for the trace buffer. */ 329bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 330bf215546Sopenharmony_ci radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ 331bf215546Sopenharmony_ci radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */ 332bf215546Sopenharmony_ci radeon_emit(0); 333bf215546Sopenharmony_ci radeon_emit(0); /* reference value */ 334bf215546Sopenharmony_ci radeon_emit(~C_008D20_FINISH_DONE); /* mask */ 335bf215546Sopenharmony_ci radeon_emit(4); /* poll interval */ 336bf215546Sopenharmony_ci } 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci /* Disable the thread trace mode. */ 339bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, 340bf215546Sopenharmony_ci S_008D1C_MODE(0)); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci /* Wait for thread trace completion. */ 343bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 344bf215546Sopenharmony_ci radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ 345bf215546Sopenharmony_ci radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */ 346bf215546Sopenharmony_ci radeon_emit(0); 347bf215546Sopenharmony_ci radeon_emit(0); /* reference value */ 348bf215546Sopenharmony_ci radeon_emit(~C_008D20_BUSY); /* mask */ 349bf215546Sopenharmony_ci radeon_emit(4); /* poll interval */ 350bf215546Sopenharmony_ci } else { 351bf215546Sopenharmony_ci /* Disable the thread trace mode. */ 352bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE, 353bf215546Sopenharmony_ci S_030CD8_MODE(0)); 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci /* Wait for thread trace completion. */ 356bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 357bf215546Sopenharmony_ci radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ 358bf215546Sopenharmony_ci radeon_emit(R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */ 359bf215546Sopenharmony_ci radeon_emit(0); 360bf215546Sopenharmony_ci radeon_emit(0); /* reference value */ 361bf215546Sopenharmony_ci radeon_emit(~C_030CE8_BUSY); /* mask */ 362bf215546Sopenharmony_ci radeon_emit(4); /* poll interval */ 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci radeon_end(); 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci si_copy_thread_trace_info_regs(sctx, cs, se); 367bf215546Sopenharmony_ci } 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci /* Restore global broadcasting. */ 370bf215546Sopenharmony_ci radeon_begin_again(cs); 371bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 372bf215546Sopenharmony_ci S_030800_SE_BROADCAST_WRITES(1) | 373bf215546Sopenharmony_ci S_030800_SH_BROADCAST_WRITES(1) | 374bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 375bf215546Sopenharmony_ci radeon_end(); 376bf215546Sopenharmony_ci} 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_cistatic void 379bf215546Sopenharmony_cisi_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf *cs) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci struct radeon_winsys *ws = sctx->ws; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci radeon_begin(cs); 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci switch (family) { 386bf215546Sopenharmony_ci case AMD_IP_GFX: 387bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 388bf215546Sopenharmony_ci radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); 389bf215546Sopenharmony_ci radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); 390bf215546Sopenharmony_ci break; 391bf215546Sopenharmony_ci case AMD_IP_COMPUTE: 392bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_NOP, 0, 0)); 393bf215546Sopenharmony_ci radeon_emit(0); 394bf215546Sopenharmony_ci break; 395bf215546Sopenharmony_ci } 396bf215546Sopenharmony_ci radeon_end(); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci ws->cs_add_buffer(cs, 399bf215546Sopenharmony_ci sctx->thread_trace->bo, 400bf215546Sopenharmony_ci RADEON_USAGE_READWRITE, 401bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM); 402bf215546Sopenharmony_ci ws->cs_add_buffer(cs, 403bf215546Sopenharmony_ci sctx->spm_trace.bo, 404bf215546Sopenharmony_ci RADEON_USAGE_READWRITE, 405bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci si_cp_dma_wait_for_idle(sctx, cs); 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci /* Make sure to wait-for-idle before starting SQTT. */ 410bf215546Sopenharmony_ci sctx->flags |= 411bf215546Sopenharmony_ci SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 412bf215546Sopenharmony_ci SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 413bf215546Sopenharmony_ci SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME; 414bf215546Sopenharmony_ci sctx->emit_cache_flush(sctx, cs); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci si_inhibit_clockgating(sctx, cs, true); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci /* Enable SQG events that collects thread trace data. */ 419bf215546Sopenharmony_ci si_emit_spi_config_cntl(sctx, cs, true); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci si_pc_emit_spm_reset(cs); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci si_pc_emit_shaders(cs, 0x7f); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci si_emit_spm_setup(sctx, cs); 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci si_emit_thread_trace_start(sctx, cs, family); 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci si_pc_emit_spm_start(cs); 430bf215546Sopenharmony_ci} 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_cistatic void 433bf215546Sopenharmony_cisi_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs) 434bf215546Sopenharmony_ci{ 435bf215546Sopenharmony_ci struct radeon_winsys *ws = sctx->ws; 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci radeon_begin(cs); 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci switch (family) { 440bf215546Sopenharmony_ci case AMD_IP_GFX: 441bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 442bf215546Sopenharmony_ci radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); 443bf215546Sopenharmony_ci radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); 444bf215546Sopenharmony_ci break; 445bf215546Sopenharmony_ci case AMD_IP_COMPUTE: 446bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_NOP, 0, 0)); 447bf215546Sopenharmony_ci radeon_emit(0); 448bf215546Sopenharmony_ci break; 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci radeon_end(); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci ws->cs_add_buffer(cs, 453bf215546Sopenharmony_ci sctx->thread_trace->bo, 454bf215546Sopenharmony_ci RADEON_USAGE_READWRITE, 455bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM); 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci ws->cs_add_buffer(cs, 458bf215546Sopenharmony_ci sctx->spm_trace.bo, 459bf215546Sopenharmony_ci RADEON_USAGE_READWRITE, 460bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM); 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci si_cp_dma_wait_for_idle(sctx, cs); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters, 465bf215546Sopenharmony_ci sctx->screen->info.never_send_perfcounter_stop); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci /* Make sure to wait-for-idle before stopping SQTT. */ 468bf215546Sopenharmony_ci sctx->flags |= 469bf215546Sopenharmony_ci SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 470bf215546Sopenharmony_ci SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 471bf215546Sopenharmony_ci SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME; 472bf215546Sopenharmony_ci sctx->emit_cache_flush(sctx, cs); 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci si_emit_thread_trace_stop(sctx, cs, family); 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci si_pc_emit_spm_reset(cs); 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci /* Restore previous state by disabling SQG events. */ 479bf215546Sopenharmony_ci si_emit_spi_config_cntl(sctx, cs, false); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci si_inhibit_clockgating(sctx, cs, false); 482bf215546Sopenharmony_ci} 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_cistatic void 486bf215546Sopenharmony_cisi_thread_trace_init_cs(struct si_context *sctx) 487bf215546Sopenharmony_ci{ 488bf215546Sopenharmony_ci struct radeon_winsys *ws = sctx->ws; 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci /* Thread trace start CS (only handles AMD_IP_GFX). */ 491bf215546Sopenharmony_ci sctx->thread_trace->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf); 492bf215546Sopenharmony_ci if (!ws->cs_create(sctx->thread_trace->start_cs[AMD_IP_GFX], 493bf215546Sopenharmony_ci sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) { 494bf215546Sopenharmony_ci free(sctx->thread_trace->start_cs[AMD_IP_GFX]); 495bf215546Sopenharmony_ci sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL; 496bf215546Sopenharmony_ci return; 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci si_thread_trace_start(sctx, AMD_IP_GFX, sctx->thread_trace->start_cs[AMD_IP_GFX]); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci /* Thread trace stop CS. */ 502bf215546Sopenharmony_ci sctx->thread_trace->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf); 503bf215546Sopenharmony_ci if (!ws->cs_create(sctx->thread_trace->stop_cs[AMD_IP_GFX], 504bf215546Sopenharmony_ci sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) { 505bf215546Sopenharmony_ci free(sctx->thread_trace->start_cs[AMD_IP_GFX]); 506bf215546Sopenharmony_ci sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL; 507bf215546Sopenharmony_ci free(sctx->thread_trace->stop_cs[AMD_IP_GFX]); 508bf215546Sopenharmony_ci sctx->thread_trace->stop_cs[AMD_IP_GFX] = NULL; 509bf215546Sopenharmony_ci return; 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci si_thread_trace_stop(sctx, AMD_IP_GFX, sctx->thread_trace->stop_cs[AMD_IP_GFX]); 513bf215546Sopenharmony_ci} 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistatic void 516bf215546Sopenharmony_cisi_begin_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) 517bf215546Sopenharmony_ci{ 518bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = sctx->thread_trace->start_cs[AMD_IP_GFX]; 519bf215546Sopenharmony_ci sctx->ws->cs_flush(cs, 0, NULL); 520bf215546Sopenharmony_ci} 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_cistatic void 523bf215546Sopenharmony_cisi_end_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) 524bf215546Sopenharmony_ci{ 525bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = sctx->thread_trace->stop_cs[AMD_IP_GFX]; 526bf215546Sopenharmony_ci sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence); 527bf215546Sopenharmony_ci} 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_cistatic bool 530bf215546Sopenharmony_cisi_get_thread_trace(struct si_context *sctx, 531bf215546Sopenharmony_ci struct ac_thread_trace *thread_trace) 532bf215546Sopenharmony_ci{ 533bf215546Sopenharmony_ci unsigned max_se = sctx->screen->info.max_se; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci memset(thread_trace, 0, sizeof(*thread_trace)); 536bf215546Sopenharmony_ci thread_trace->num_traces = max_se; 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci sctx->thread_trace->ptr = sctx->ws->buffer_map(sctx->ws, sctx->thread_trace->bo, 539bf215546Sopenharmony_ci NULL, 540bf215546Sopenharmony_ci PIPE_MAP_READ); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci if (!sctx->thread_trace->ptr) 543bf215546Sopenharmony_ci return false; 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci void *thread_trace_ptr = sctx->thread_trace->ptr; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci for (unsigned se = 0; se < max_se; se++) { 548bf215546Sopenharmony_ci uint64_t info_offset = ac_thread_trace_get_info_offset(se); 549bf215546Sopenharmony_ci uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se); 550bf215546Sopenharmony_ci void *info_ptr = thread_trace_ptr + info_offset; 551bf215546Sopenharmony_ci void *data_ptr = thread_trace_ptr + data_offset; 552bf215546Sopenharmony_ci struct ac_thread_trace_info *info = 553bf215546Sopenharmony_ci (struct ac_thread_trace_info *)info_ptr; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci struct ac_thread_trace_se thread_trace_se = {0}; 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) { 558bf215546Sopenharmony_ci uint32_t expected_size = 559bf215546Sopenharmony_ci ac_get_expected_buffer_size(&sctx->screen->info, info); 560bf215546Sopenharmony_ci uint32_t available_size = (info->cur_offset * 32) / 1024; 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci fprintf(stderr, "Failed to get the thread trace " 563bf215546Sopenharmony_ci "because the buffer is too small. The " 564bf215546Sopenharmony_ci "hardware needs %d KB but the " 565bf215546Sopenharmony_ci "buffer size is %d KB.\n", 566bf215546Sopenharmony_ci expected_size, available_size); 567bf215546Sopenharmony_ci fprintf(stderr, "Please update the buffer size with " 568bf215546Sopenharmony_ci "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n"); 569bf215546Sopenharmony_ci return false; 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci thread_trace_se.data_ptr = data_ptr; 573bf215546Sopenharmony_ci thread_trace_se.info = *info; 574bf215546Sopenharmony_ci thread_trace_se.shader_engine = se; 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]); 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci /* For GFX10+ compute_unit really means WGP */ 579bf215546Sopenharmony_ci thread_trace_se.compute_unit = 580bf215546Sopenharmony_ci sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu; 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci thread_trace->traces[se] = thread_trace_se; 583bf215546Sopenharmony_ci } 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci thread_trace->data = sctx->thread_trace; 586bf215546Sopenharmony_ci return true; 587bf215546Sopenharmony_ci} 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_cibool 591bf215546Sopenharmony_cisi_init_thread_trace(struct si_context *sctx) 592bf215546Sopenharmony_ci{ 593bf215546Sopenharmony_ci static bool warn_once = true; 594bf215546Sopenharmony_ci if (warn_once) { 595bf215546Sopenharmony_ci fprintf(stderr, "*************************************************\n"); 596bf215546Sopenharmony_ci fprintf(stderr, "* WARNING: Thread trace support is experimental *\n"); 597bf215546Sopenharmony_ci fprintf(stderr, "*************************************************\n"); 598bf215546Sopenharmony_ci warn_once = false; 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci sctx->thread_trace = CALLOC_STRUCT(ac_thread_trace_data); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci if (sctx->gfx_level < GFX8) { 604bf215546Sopenharmony_ci fprintf(stderr, "GPU hardware not supported: refer to " 605bf215546Sopenharmony_ci "the RGP documentation for the list of " 606bf215546Sopenharmony_ci "supported GPUs!\n"); 607bf215546Sopenharmony_ci return false; 608bf215546Sopenharmony_ci } 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci if (sctx->gfx_level > GFX10_3) { 611bf215546Sopenharmony_ci fprintf(stderr, "radeonsi: Thread trace is not supported " 612bf215546Sopenharmony_ci "for that GPU!\n"); 613bf215546Sopenharmony_ci return false; 614bf215546Sopenharmony_ci } 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci /* Default buffer size set to 32MB per SE. */ 617bf215546Sopenharmony_ci sctx->thread_trace->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024; 618bf215546Sopenharmony_ci sctx->thread_trace->start_frame = 10; 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER"); 621bf215546Sopenharmony_ci if (trigger) { 622bf215546Sopenharmony_ci sctx->thread_trace->start_frame = atoi(trigger); 623bf215546Sopenharmony_ci if (sctx->thread_trace->start_frame <= 0) { 624bf215546Sopenharmony_ci /* This isn't a frame number, must be a file */ 625bf215546Sopenharmony_ci sctx->thread_trace->trigger_file = strdup(trigger); 626bf215546Sopenharmony_ci sctx->thread_trace->start_frame = -1; 627bf215546Sopenharmony_ci } 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci if (!si_thread_trace_init_bo(sctx)) 631bf215546Sopenharmony_ci return false; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci list_inithead(&sctx->thread_trace->rgp_pso_correlation.record); 634bf215546Sopenharmony_ci simple_mtx_init(&sctx->thread_trace->rgp_pso_correlation.lock, mtx_plain); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci list_inithead(&sctx->thread_trace->rgp_loader_events.record); 637bf215546Sopenharmony_ci simple_mtx_init(&sctx->thread_trace->rgp_loader_events.lock, mtx_plain); 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci list_inithead(&sctx->thread_trace->rgp_code_object.record); 640bf215546Sopenharmony_ci simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain); 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 643bf215546Sopenharmony_ci /* Limit SPM counters to GFX10+ for now */ 644bf215546Sopenharmony_ci ASSERTED bool r = si_spm_init(sctx); 645bf215546Sopenharmony_ci assert(r); 646bf215546Sopenharmony_ci } 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci si_thread_trace_init_cs(sctx); 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci sctx->sqtt_next_event = EventInvalid; 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci return true; 653bf215546Sopenharmony_ci} 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_civoid 656bf215546Sopenharmony_cisi_destroy_thread_trace(struct si_context *sctx) 657bf215546Sopenharmony_ci{ 658bf215546Sopenharmony_ci struct si_screen *sscreen = sctx->screen; 659bf215546Sopenharmony_ci struct pb_buffer *bo = sctx->thread_trace->bo; 660bf215546Sopenharmony_ci radeon_bo_reference(sctx->screen->ws, &bo, NULL); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci if (sctx->thread_trace->trigger_file) 663bf215546Sopenharmony_ci free(sctx->thread_trace->trigger_file); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci sscreen->ws->cs_destroy(sctx->thread_trace->start_cs[AMD_IP_GFX]); 666bf215546Sopenharmony_ci sscreen->ws->cs_destroy(sctx->thread_trace->stop_cs[AMD_IP_GFX]); 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci struct rgp_pso_correlation *pso_correlation = &sctx->thread_trace->rgp_pso_correlation; 669bf215546Sopenharmony_ci struct rgp_loader_events *loader_events = &sctx->thread_trace->rgp_loader_events; 670bf215546Sopenharmony_ci struct rgp_code_object *code_object = &sctx->thread_trace->rgp_code_object; 671bf215546Sopenharmony_ci list_for_each_entry_safe(struct rgp_pso_correlation_record, record, 672bf215546Sopenharmony_ci &pso_correlation->record, list) { 673bf215546Sopenharmony_ci list_del(&record->list); 674bf215546Sopenharmony_ci free(record); 675bf215546Sopenharmony_ci } 676bf215546Sopenharmony_ci simple_mtx_destroy(&sctx->thread_trace->rgp_pso_correlation.lock); 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci list_for_each_entry_safe(struct rgp_loader_events_record, record, 679bf215546Sopenharmony_ci &loader_events->record, list) { 680bf215546Sopenharmony_ci list_del(&record->list); 681bf215546Sopenharmony_ci free(record); 682bf215546Sopenharmony_ci } 683bf215546Sopenharmony_ci simple_mtx_destroy(&sctx->thread_trace->rgp_loader_events.lock); 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci list_for_each_entry_safe(struct rgp_code_object_record, record, 686bf215546Sopenharmony_ci &code_object->record, list) { 687bf215546Sopenharmony_ci uint32_t mask = record->shader_stages_mask; 688bf215546Sopenharmony_ci int i; 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci /* Free the disassembly. */ 691bf215546Sopenharmony_ci while (mask) { 692bf215546Sopenharmony_ci i = u_bit_scan(&mask); 693bf215546Sopenharmony_ci free(record->shader_data[i].code); 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci list_del(&record->list); 696bf215546Sopenharmony_ci free(record); 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock); 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci free(sctx->thread_trace); 701bf215546Sopenharmony_ci sctx->thread_trace = NULL; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) 704bf215546Sopenharmony_ci si_spm_finish(sctx); 705bf215546Sopenharmony_ci} 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_cistatic uint64_t num_frames = 0; 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_civoid 710bf215546Sopenharmony_cisi_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) 711bf215546Sopenharmony_ci{ 712bf215546Sopenharmony_ci /* Should we enable SQTT yet? */ 713bf215546Sopenharmony_ci if (!sctx->thread_trace_enabled) { 714bf215546Sopenharmony_ci bool frame_trigger = num_frames == sctx->thread_trace->start_frame; 715bf215546Sopenharmony_ci bool file_trigger = false; 716bf215546Sopenharmony_ci if (sctx->thread_trace->trigger_file && 717bf215546Sopenharmony_ci access(sctx->thread_trace->trigger_file, W_OK) == 0) { 718bf215546Sopenharmony_ci if (unlink(sctx->thread_trace->trigger_file) == 0) { 719bf215546Sopenharmony_ci file_trigger = true; 720bf215546Sopenharmony_ci } else { 721bf215546Sopenharmony_ci /* Do not enable tracing if we cannot remove the file, 722bf215546Sopenharmony_ci * because by then we'll trace every frame. 723bf215546Sopenharmony_ci */ 724bf215546Sopenharmony_ci fprintf(stderr, "radeonsi: could not remove thread trace trigger file, ignoring\n"); 725bf215546Sopenharmony_ci } 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci if (frame_trigger || file_trigger) { 729bf215546Sopenharmony_ci /* Wait for last submission */ 730bf215546Sopenharmony_ci sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence, PIPE_TIMEOUT_INFINITE); 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci /* Start SQTT */ 733bf215546Sopenharmony_ci si_begin_thread_trace(sctx, rcs); 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci sctx->thread_trace_enabled = true; 736bf215546Sopenharmony_ci sctx->thread_trace->start_frame = -1; 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci /* Force shader update to make sure si_sqtt_describe_pipeline_bind is called 739bf215546Sopenharmony_ci * for the current "pipeline". 740bf215546Sopenharmony_ci */ 741bf215546Sopenharmony_ci sctx->do_update_shaders = true; 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci } else { 744bf215546Sopenharmony_ci struct ac_thread_trace thread_trace = {0}; 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci /* Stop SQTT */ 747bf215546Sopenharmony_ci si_end_thread_trace(sctx, rcs); 748bf215546Sopenharmony_ci sctx->thread_trace_enabled = false; 749bf215546Sopenharmony_ci sctx->thread_trace->start_frame = -1; 750bf215546Sopenharmony_ci assert (sctx->last_sqtt_fence); 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci /* Wait for SQTT to finish and read back the bo */ 753bf215546Sopenharmony_ci if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) && 754bf215546Sopenharmony_ci si_get_thread_trace(sctx, &thread_trace)) { 755bf215546Sopenharmony_ci /* Map the SPM counter buffer */ 756bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) 757bf215546Sopenharmony_ci sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo, 758bf215546Sopenharmony_ci NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, &sctx->spm_trace); 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_ci if (sctx->spm_trace.ptr) 763bf215546Sopenharmony_ci sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo); 764bf215546Sopenharmony_ci } else { 765bf215546Sopenharmony_ci fprintf(stderr, "Failed to read the trace\n"); 766bf215546Sopenharmony_ci } 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci num_frames++; 770bf215546Sopenharmony_ci} 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_cistatic void 774bf215546Sopenharmony_cisi_emit_thread_trace_userdata(struct si_context* sctx, 775bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, 776bf215546Sopenharmony_ci const void *data, uint32_t num_dwords) 777bf215546Sopenharmony_ci{ 778bf215546Sopenharmony_ci const uint32_t *dwords = (uint32_t *)data; 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci radeon_begin(cs); 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci while (num_dwords > 0) { 783bf215546Sopenharmony_ci uint32_t count = MIN2(num_dwords, 2); 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci /* Without the perfctr bit the CP might not always pass the 786bf215546Sopenharmony_ci * write on correctly. */ 787bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count, sctx->gfx_level >= GFX10); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci radeon_emit_array(dwords, count); 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci dwords += count; 792bf215546Sopenharmony_ci num_dwords -= count; 793bf215546Sopenharmony_ci } 794bf215546Sopenharmony_ci radeon_end(); 795bf215546Sopenharmony_ci} 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_cistatic void 798bf215546Sopenharmony_cisi_emit_spi_config_cntl(struct si_context* sctx, 799bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, bool enable) 800bf215546Sopenharmony_ci{ 801bf215546Sopenharmony_ci radeon_begin(cs); 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX9) { 804bf215546Sopenharmony_ci uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | 805bf215546Sopenharmony_ci S_031100_EXP_PRIORITY_ORDER(3) | 806bf215546Sopenharmony_ci S_031100_ENABLE_SQG_TOP_EVENTS(enable) | 807bf215546Sopenharmony_ci S_031100_ENABLE_SQG_BOP_EVENTS(enable); 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) 810bf215546Sopenharmony_ci spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3); 811bf215546Sopenharmony_ci 812bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_031100_SPI_CONFIG_CNTL, spi_config_cntl); 813bf215546Sopenharmony_ci } else { 814bf215546Sopenharmony_ci /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */ 815bf215546Sopenharmony_ci radeon_set_privileged_config_reg(R_009100_SPI_CONFIG_CNTL, 816bf215546Sopenharmony_ci S_009100_ENABLE_SQG_TOP_EVENTS(enable) | 817bf215546Sopenharmony_ci S_009100_ENABLE_SQG_BOP_EVENTS(enable)); 818bf215546Sopenharmony_ci } 819bf215546Sopenharmony_ci radeon_end(); 820bf215546Sopenharmony_ci} 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_cistatic uint32_t num_events = 0; 823bf215546Sopenharmony_civoid 824bf215546Sopenharmony_cisi_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs, 825bf215546Sopenharmony_ci enum rgp_sqtt_marker_event_type api_type, 826bf215546Sopenharmony_ci uint32_t vertex_offset_user_data, 827bf215546Sopenharmony_ci uint32_t instance_offset_user_data, 828bf215546Sopenharmony_ci uint32_t draw_index_user_data) 829bf215546Sopenharmony_ci{ 830bf215546Sopenharmony_ci struct rgp_sqtt_marker_event marker = {0}; 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; 833bf215546Sopenharmony_ci marker.api_type = api_type == EventInvalid ? EventCmdDraw : api_type; 834bf215546Sopenharmony_ci marker.cmd_id = num_events++; 835bf215546Sopenharmony_ci marker.cb_id = 0; 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci if (vertex_offset_user_data == UINT_MAX || 838bf215546Sopenharmony_ci instance_offset_user_data == UINT_MAX) { 839bf215546Sopenharmony_ci vertex_offset_user_data = 0; 840bf215546Sopenharmony_ci instance_offset_user_data = 0; 841bf215546Sopenharmony_ci } 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci if (draw_index_user_data == UINT_MAX) 844bf215546Sopenharmony_ci draw_index_user_data = vertex_offset_user_data; 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci marker.vertex_offset_reg_idx = vertex_offset_user_data; 847bf215546Sopenharmony_ci marker.instance_offset_reg_idx = instance_offset_user_data; 848bf215546Sopenharmony_ci marker.draw_index_reg_idx = draw_index_user_data; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci sctx->sqtt_next_event = EventInvalid; 853bf215546Sopenharmony_ci} 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_civoid 856bf215546Sopenharmony_cisi_write_event_with_dims_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs, 857bf215546Sopenharmony_ci enum rgp_sqtt_marker_event_type api_type, 858bf215546Sopenharmony_ci uint32_t x, uint32_t y, uint32_t z) 859bf215546Sopenharmony_ci{ 860bf215546Sopenharmony_ci struct rgp_sqtt_marker_event_with_dims marker = {0}; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; 863bf215546Sopenharmony_ci marker.event.api_type = api_type; 864bf215546Sopenharmony_ci marker.event.cmd_id = num_events++; 865bf215546Sopenharmony_ci marker.event.cb_id = 0; 866bf215546Sopenharmony_ci marker.event.has_thread_dims = 1; 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci marker.thread_x = x; 869bf215546Sopenharmony_ci marker.thread_y = y; 870bf215546Sopenharmony_ci marker.thread_z = z; 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 873bf215546Sopenharmony_ci sctx->sqtt_next_event = EventInvalid; 874bf215546Sopenharmony_ci} 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_civoid 877bf215546Sopenharmony_cisi_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs) 878bf215546Sopenharmony_ci{ 879bf215546Sopenharmony_ci struct rgp_sqtt_marker_barrier_start marker = {0}; 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; 882bf215546Sopenharmony_ci marker.cb_id = 0; 883bf215546Sopenharmony_ci marker.dword02 = 0xC0000000 + 10; /* RGP_BARRIER_INTERNAL_BASE */ 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 886bf215546Sopenharmony_ci} 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_civoid 889bf215546Sopenharmony_cisi_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs, 890bf215546Sopenharmony_ci unsigned flags) 891bf215546Sopenharmony_ci{ 892bf215546Sopenharmony_ci struct rgp_sqtt_marker_barrier_end marker = {0}; 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; 895bf215546Sopenharmony_ci marker.cb_id = 0; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) 898bf215546Sopenharmony_ci marker.vs_partial_flush = true; 899bf215546Sopenharmony_ci if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) 900bf215546Sopenharmony_ci marker.ps_partial_flush = true; 901bf215546Sopenharmony_ci if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH) 902bf215546Sopenharmony_ci marker.cs_partial_flush = true; 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci if (flags & SI_CONTEXT_PFP_SYNC_ME) 905bf215546Sopenharmony_ci marker.pfp_sync_me = true; 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_VCACHE) 908bf215546Sopenharmony_ci marker.inval_tcp = true; 909bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_ICACHE) 910bf215546Sopenharmony_ci marker.inval_sqI = true; 911bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_SCACHE) 912bf215546Sopenharmony_ci marker.inval_sqK = true; 913bf215546Sopenharmony_ci if (flags & SI_CONTEXT_INV_L2) 914bf215546Sopenharmony_ci marker.inval_tcc = true; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { 917bf215546Sopenharmony_ci marker.inval_cb = true; 918bf215546Sopenharmony_ci marker.flush_cb = true; 919bf215546Sopenharmony_ci } 920bf215546Sopenharmony_ci if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) { 921bf215546Sopenharmony_ci marker.inval_db = true; 922bf215546Sopenharmony_ci marker.flush_db = true; 923bf215546Sopenharmony_ci } 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 926bf215546Sopenharmony_ci} 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_civoid 929bf215546Sopenharmony_cisi_write_user_event(struct si_context* sctx, struct radeon_cmdbuf *rcs, 930bf215546Sopenharmony_ci enum rgp_sqtt_marker_user_event_type type, 931bf215546Sopenharmony_ci const char *str, int len) 932bf215546Sopenharmony_ci{ 933bf215546Sopenharmony_ci if (type == UserEventPop) { 934bf215546Sopenharmony_ci assert (str == NULL); 935bf215546Sopenharmony_ci struct rgp_sqtt_marker_user_event marker = { 0 }; 936bf215546Sopenharmony_ci marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; 937bf215546Sopenharmony_ci marker.data_type = type; 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 940bf215546Sopenharmony_ci } else { 941bf215546Sopenharmony_ci assert (str != NULL); 942bf215546Sopenharmony_ci struct rgp_sqtt_marker_user_event_with_length marker = { 0 }; 943bf215546Sopenharmony_ci marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; 944bf215546Sopenharmony_ci marker.user_event.data_type = type; 945bf215546Sopenharmony_ci len = MIN2(1024, len); 946bf215546Sopenharmony_ci marker.length = align(len, 4); 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci uint8_t *buffer = alloca(sizeof(marker) + marker.length); 949bf215546Sopenharmony_ci memcpy(buffer, &marker, sizeof(marker)); 950bf215546Sopenharmony_ci memcpy(buffer + sizeof(marker), str, len); 951bf215546Sopenharmony_ci buffer[sizeof(marker) + len - 1] = '\0'; 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, rcs, buffer, sizeof(marker) / 4 + marker.length / 4); 954bf215546Sopenharmony_ci } 955bf215546Sopenharmony_ci} 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_cibool 959bf215546Sopenharmony_cisi_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data, 960bf215546Sopenharmony_ci uint64_t pipeline_hash) 961bf215546Sopenharmony_ci{ 962bf215546Sopenharmony_ci simple_mtx_lock(&thread_trace_data->rgp_pso_correlation.lock); 963bf215546Sopenharmony_ci list_for_each_entry_safe(struct rgp_pso_correlation_record, record, 964bf215546Sopenharmony_ci &thread_trace_data->rgp_pso_correlation.record, list) { 965bf215546Sopenharmony_ci if (record->pipeline_hash[0] == pipeline_hash) { 966bf215546Sopenharmony_ci simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock); 967bf215546Sopenharmony_ci return true; 968bf215546Sopenharmony_ci } 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci } 971bf215546Sopenharmony_ci simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock); 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci return false; 974bf215546Sopenharmony_ci} 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_cistatic enum rgp_hardware_stages 979bf215546Sopenharmony_cisi_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage) 980bf215546Sopenharmony_ci{ 981bf215546Sopenharmony_ci switch (stage) { 982bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: 983bf215546Sopenharmony_ci if (key->ge.as_ls) 984bf215546Sopenharmony_ci return RGP_HW_STAGE_LS; 985bf215546Sopenharmony_ci else if (key->ge.as_es) 986bf215546Sopenharmony_ci return RGP_HW_STAGE_ES; 987bf215546Sopenharmony_ci else if (key->ge.as_ngg) 988bf215546Sopenharmony_ci return RGP_HW_STAGE_GS; 989bf215546Sopenharmony_ci else 990bf215546Sopenharmony_ci return RGP_HW_STAGE_VS; 991bf215546Sopenharmony_ci case PIPE_SHADER_TESS_CTRL: 992bf215546Sopenharmony_ci return RGP_HW_STAGE_HS; 993bf215546Sopenharmony_ci case PIPE_SHADER_TESS_EVAL: 994bf215546Sopenharmony_ci if (key->ge.as_es) 995bf215546Sopenharmony_ci return RGP_HW_STAGE_ES; 996bf215546Sopenharmony_ci else if (key->ge.as_ngg) 997bf215546Sopenharmony_ci return RGP_HW_STAGE_GS; 998bf215546Sopenharmony_ci else 999bf215546Sopenharmony_ci return RGP_HW_STAGE_VS; 1000bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: 1001bf215546Sopenharmony_ci return RGP_HW_STAGE_GS; 1002bf215546Sopenharmony_ci case PIPE_SHADER_FRAGMENT: 1003bf215546Sopenharmony_ci return RGP_HW_STAGE_PS; 1004bf215546Sopenharmony_ci case PIPE_SHADER_COMPUTE: 1005bf215546Sopenharmony_ci return RGP_HW_STAGE_CS; 1006bf215546Sopenharmony_ci default: 1007bf215546Sopenharmony_ci unreachable("invalid mesa shader stage"); 1008bf215546Sopenharmony_ci } 1009bf215546Sopenharmony_ci} 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_cistatic bool 1012bf215546Sopenharmony_cisi_sqtt_add_code_object(struct si_context* sctx, 1013bf215546Sopenharmony_ci uint64_t pipeline_hash, 1014bf215546Sopenharmony_ci bool is_compute) 1015bf215546Sopenharmony_ci{ 1016bf215546Sopenharmony_ci struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace; 1017bf215546Sopenharmony_ci struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; 1018bf215546Sopenharmony_ci struct rgp_code_object_record *record; 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ci record = malloc(sizeof(struct rgp_code_object_record)); 1021bf215546Sopenharmony_ci if (!record) 1022bf215546Sopenharmony_ci return false; 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci record->shader_stages_mask = 0; 1025bf215546Sopenharmony_ci record->num_shaders_combined = 0; 1026bf215546Sopenharmony_ci record->pipeline_hash[0] = pipeline_hash; 1027bf215546Sopenharmony_ci record->pipeline_hash[1] = pipeline_hash; 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { 1030bf215546Sopenharmony_ci struct si_shader *shader; 1031bf215546Sopenharmony_ci enum rgp_hardware_stages hw_stage; 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci if (is_compute) { 1034bf215546Sopenharmony_ci if (i != PIPE_SHADER_COMPUTE) 1035bf215546Sopenharmony_ci continue; 1036bf215546Sopenharmony_ci shader = &sctx->cs_shader_state.program->shader; 1037bf215546Sopenharmony_ci hw_stage = RGP_HW_STAGE_CS; 1038bf215546Sopenharmony_ci } else if (i != PIPE_SHADER_COMPUTE) { 1039bf215546Sopenharmony_ci if (!sctx->shaders[i].cso || !sctx->shaders[i].current) 1040bf215546Sopenharmony_ci continue; 1041bf215546Sopenharmony_ci shader = sctx->shaders[i].current; 1042bf215546Sopenharmony_ci hw_stage = si_sqtt_pipe_to_rgp_shader_stage(&shader->key, i); 1043bf215546Sopenharmony_ci } else { 1044bf215546Sopenharmony_ci continue; 1045bf215546Sopenharmony_ci } 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci uint8_t *code = malloc(shader->binary.uploaded_code_size); 1048bf215546Sopenharmony_ci if (!code) { 1049bf215546Sopenharmony_ci free(record); 1050bf215546Sopenharmony_ci return false; 1051bf215546Sopenharmony_ci } 1052bf215546Sopenharmony_ci memcpy(code, shader->binary.uploaded_code, shader->binary.uploaded_code_size); 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci uint64_t va = shader->bo->gpu_address; 1055bf215546Sopenharmony_ci unsigned gl_shader_stage = tgsi_processor_to_shader_stage(i); 1056bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].hash[0] = _mesa_hash_data(code, shader->binary.uploaded_code_size); 1057bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].hash[1] = record->shader_data[gl_shader_stage].hash[0]; 1058bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].code_size = shader->binary.uploaded_code_size; 1059bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].code = code; 1060bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].vgpr_count = shader->config.num_vgprs; 1061bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].sgpr_count = shader->config.num_sgprs; 1062bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].base_address = va & 0xffffffffffff; 1063bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].elf_symbol_offset = 0; 1064bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].hw_stage = hw_stage; 1065bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].is_combined = false; 1066bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].scratch_memory_size = shader->config.scratch_bytes_per_wave; 1067bf215546Sopenharmony_ci record->shader_data[gl_shader_stage].wavefront_size = shader->wave_size; 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci record->shader_stages_mask |= 1 << gl_shader_stage; 1070bf215546Sopenharmony_ci record->num_shaders_combined++; 1071bf215546Sopenharmony_ci } 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci simple_mtx_lock(&code_object->lock); 1074bf215546Sopenharmony_ci list_addtail(&record->list, &code_object->record); 1075bf215546Sopenharmony_ci code_object->record_count++; 1076bf215546Sopenharmony_ci simple_mtx_unlock(&code_object->lock); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci return true; 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_cibool 1082bf215546Sopenharmony_cisi_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute) 1083bf215546Sopenharmony_ci{ 1084bf215546Sopenharmony_ci struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace; 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_hash)); 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline_hash); 1089bf215546Sopenharmony_ci if (!result) 1090bf215546Sopenharmony_ci return false; 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline_hash, base_address); 1093bf215546Sopenharmony_ci if (!result) 1094bf215546Sopenharmony_ci return false; 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci return si_sqtt_add_code_object(sctx, pipeline_hash, is_compute); 1097bf215546Sopenharmony_ci} 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_civoid 1100bf215546Sopenharmony_cisi_sqtt_describe_pipeline_bind(struct si_context* sctx, 1101bf215546Sopenharmony_ci uint64_t pipeline_hash, 1102bf215546Sopenharmony_ci int bind_point) 1103bf215546Sopenharmony_ci{ 1104bf215546Sopenharmony_ci struct rgp_sqtt_marker_pipeline_bind marker = {0}; 1105bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci if (likely(!sctx->thread_trace_enabled)) { 1108bf215546Sopenharmony_ci return; 1109bf215546Sopenharmony_ci } 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_ci marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE; 1112bf215546Sopenharmony_ci marker.cb_id = 0; 1113bf215546Sopenharmony_ci marker.bind_point = bind_point; 1114bf215546Sopenharmony_ci marker.api_pso_hash[0] = pipeline_hash; 1115bf215546Sopenharmony_ci marker.api_pso_hash[1] = pipeline_hash >> 32; 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci si_emit_thread_trace_userdata(sctx, cs, &marker, sizeof(marker) / 4); 1118bf215546Sopenharmony_ci} 1119