1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2020 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
23bf215546Sopenharmony_ci *
24bf215546Sopenharmony_ci */
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "si_pipe.h"
28bf215546Sopenharmony_ci#include "si_build_pm4.h"
29bf215546Sopenharmony_ci#include "si_compute.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "ac_rgp.h"
32bf215546Sopenharmony_ci#include "ac_sqtt.h"
33bf215546Sopenharmony_ci#include "util/u_memory.h"
34bf215546Sopenharmony_ci#include "tgsi/tgsi_from_mesa.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cistatic void
37bf215546Sopenharmony_cisi_emit_spi_config_cntl(struct si_context* sctx,
38bf215546Sopenharmony_ci                        struct radeon_cmdbuf *cs, bool enable);
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic bool
41bf215546Sopenharmony_cisi_thread_trace_init_bo(struct si_context *sctx)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   unsigned max_se = sctx->screen->info.max_se;
44bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
45bf215546Sopenharmony_ci   uint64_t size;
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci   /* The buffer size and address need to be aligned in HW regs. Align the
48bf215546Sopenharmony_ci    * size as early as possible so that we do all the allocation & addressing
49bf215546Sopenharmony_ci    * correctly. */
50bf215546Sopenharmony_ci   sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size,
51bf215546Sopenharmony_ci                                             1u << SQTT_BUFFER_ALIGN_SHIFT);
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   /* Compute total size of the thread trace BO for all SEs. */
54bf215546Sopenharmony_ci   size = align64(sizeof(struct ac_thread_trace_info) * max_se,
55bf215546Sopenharmony_ci                  1 << SQTT_BUFFER_ALIGN_SHIFT);
56bf215546Sopenharmony_ci   size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   sctx->thread_trace->bo =
59bf215546Sopenharmony_ci      ws->buffer_create(ws, size, 4096,
60bf215546Sopenharmony_ci                        RADEON_DOMAIN_VRAM,
61bf215546Sopenharmony_ci                        RADEON_FLAG_NO_INTERPROCESS_SHARING |
62bf215546Sopenharmony_ci                        RADEON_FLAG_GTT_WC |
63bf215546Sopenharmony_ci                        RADEON_FLAG_NO_SUBALLOC);
64bf215546Sopenharmony_ci   if (!sctx->thread_trace->bo)
65bf215546Sopenharmony_ci      return false;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   return true;
68bf215546Sopenharmony_ci}
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_cistatic bool
71bf215546Sopenharmony_cisi_se_is_disabled(struct si_context* sctx, unsigned se)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   /* No active CU on the SE means it is disabled. */
74bf215546Sopenharmony_ci   return sctx->screen->info.cu_mask[se][0] == 0;
75bf215546Sopenharmony_ci}
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_cistatic void
79bf215546Sopenharmony_cisi_emit_thread_trace_start(struct si_context* sctx,
80bf215546Sopenharmony_ci                           struct radeon_cmdbuf *cs,
81bf215546Sopenharmony_ci                           uint32_t queue_family_index)
82bf215546Sopenharmony_ci{
83bf215546Sopenharmony_ci   struct si_screen *sscreen = sctx->screen;
84bf215546Sopenharmony_ci   uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
85bf215546Sopenharmony_ci   unsigned max_se = sscreen->info.max_se;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   radeon_begin(cs);
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   for (unsigned se = 0; se < max_se; se++) {
90bf215546Sopenharmony_ci      uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
91bf215546Sopenharmony_ci      uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se);
92bf215546Sopenharmony_ci      uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci      if (si_se_is_disabled(sctx, se))
95bf215546Sopenharmony_ci         continue;
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci      /* Target SEx and SH0. */
98bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
99bf215546Sopenharmony_ci                             S_030800_SE_INDEX(se) |
100bf215546Sopenharmony_ci                             S_030800_SH_INDEX(0) |
101bf215546Sopenharmony_ci                             S_030800_INSTANCE_BROADCAST_WRITES(1));
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci      /* Select the first active CUs */
104bf215546Sopenharmony_ci      int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci      if (sctx->gfx_level >= GFX10) {
107bf215546Sopenharmony_ci         /* Order seems important for the following 2 registers. */
108bf215546Sopenharmony_ci         radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
109bf215546Sopenharmony_ci                                          S_008D04_SIZE(shifted_size) |
110bf215546Sopenharmony_ci                                          S_008D04_BASE_HI(shifted_va >> 32));
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci         radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci         int wgp = first_active_cu / 2;
115bf215546Sopenharmony_ci         radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
116bf215546Sopenharmony_ci                                          S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
117bf215546Sopenharmony_ci                                          S_008D14_SA_SEL(0) |
118bf215546Sopenharmony_ci                                          S_008D14_WGP_SEL(wgp) |
119bf215546Sopenharmony_ci                                          S_008D14_SIMD_SEL(0));
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci         radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
122bf215546Sopenharmony_ci                      S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC |
123bf215546Sopenharmony_ci                                           V_008D18_REG_INCLUDE_SHDEC |
124bf215546Sopenharmony_ci                                           V_008D18_REG_INCLUDE_GFXUDEC |
125bf215546Sopenharmony_ci                                           V_008D18_REG_INCLUDE_CONTEXT |
126bf215546Sopenharmony_ci                                           V_008D18_REG_INCLUDE_COMP |
127bf215546Sopenharmony_ci                                           V_008D18_REG_INCLUDE_CONFIG) |
128bf215546Sopenharmony_ci                      S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci         /* Should be emitted last (it enables thread traces). */
131bf215546Sopenharmony_ci         radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
132bf215546Sopenharmony_ci                                          S_008D1C_MODE(1) |
133bf215546Sopenharmony_ci                                          S_008D1C_HIWATER(5) |
134bf215546Sopenharmony_ci                                          S_008D1C_UTIL_TIMER(1) |
135bf215546Sopenharmony_ci                                          S_008D1C_RT_FREQ(2) | /* 4096 clk */
136bf215546Sopenharmony_ci                                          S_008D1C_DRAW_EVENT_EN(1) |
137bf215546Sopenharmony_ci                                          S_008D1C_REG_STALL_EN(1) |
138bf215546Sopenharmony_ci                                          S_008D1C_SPI_STALL_EN(1) |
139bf215546Sopenharmony_ci                                          S_008D1C_SQ_STALL_EN(1) |
140bf215546Sopenharmony_ci                                          S_008D1C_REG_DROP_ON_STALL(0) |
141bf215546Sopenharmony_ci                                          S_008D1C_LOWATER_OFFSET(
142bf215546Sopenharmony_ci                                             sctx->gfx_level >= GFX10_3 ? 4 : 0) |
143bf215546Sopenharmony_ci                                          S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug));
144bf215546Sopenharmony_ci      } else {
145bf215546Sopenharmony_ci         /* Order seems important for the following 4 registers. */
146bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2,
147bf215546Sopenharmony_ci                                S_030CDC_ADDR_HI(shifted_va >> 32));
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va);
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CC4_SQ_THREAD_TRACE_SIZE,
152bf215546Sopenharmony_ci                                S_030CC4_SIZE(shifted_size));
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CD4_SQ_THREAD_TRACE_CTRL,
155bf215546Sopenharmony_ci                                S_030CD4_RESET_BUFFER(1));
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci         uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) |
158bf215546Sopenharmony_ci                                      S_030CC8_SH_SEL(0) |
159bf215546Sopenharmony_ci                                      S_030CC8_SIMD_EN(0xf) |
160bf215546Sopenharmony_ci                                      S_030CC8_VM_ID_MASK(0) |
161bf215546Sopenharmony_ci                                      S_030CC8_REG_STALL_EN(1) |
162bf215546Sopenharmony_ci                                      S_030CC8_SPI_STALL_EN(1) |
163bf215546Sopenharmony_ci                                      S_030CC8_SQ_STALL_EN(1);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CC8_SQ_THREAD_TRACE_MASK,
166bf215546Sopenharmony_ci                                thread_trace_mask);
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci         /* Trace all tokens and registers. */
169bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
170bf215546Sopenharmony_ci                                S_030CCC_TOKEN_MASK(0xbfff) |
171bf215546Sopenharmony_ci                                S_030CCC_REG_MASK(0xff) |
172bf215546Sopenharmony_ci                                S_030CCC_REG_DROP_ON_STALL(0));
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci         /* Enable SQTT perf counters for all CUs. */
175bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
176bf215546Sopenharmony_ci                                S_030CD0_SH0_MASK(0xffff) |
177bf215546Sopenharmony_ci                                S_030CD0_SH1_MASK(0xffff));
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff);
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CEC_SQ_THREAD_TRACE_HIWATER,
182bf215546Sopenharmony_ci                                S_030CEC_HIWATER(4));
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci         if (sctx->gfx_level == GFX9) {
185bf215546Sopenharmony_ci            /* Reset thread trace status errors. */
186bf215546Sopenharmony_ci            radeon_set_uconfig_reg(R_030CE8_SQ_THREAD_TRACE_STATUS,
187bf215546Sopenharmony_ci                                   S_030CE8_UTC_ERROR(0));
188bf215546Sopenharmony_ci         }
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci         /* Enable the thread trace mode. */
191bf215546Sopenharmony_ci         uint32_t thread_trace_mode =
192bf215546Sopenharmony_ci            S_030CD8_MASK_PS(1) |
193bf215546Sopenharmony_ci            S_030CD8_MASK_VS(1) |
194bf215546Sopenharmony_ci            S_030CD8_MASK_GS(1) |
195bf215546Sopenharmony_ci            S_030CD8_MASK_ES(1) |
196bf215546Sopenharmony_ci            S_030CD8_MASK_HS(1) |
197bf215546Sopenharmony_ci            S_030CD8_MASK_LS(1) |
198bf215546Sopenharmony_ci            S_030CD8_MASK_CS(1) |
199bf215546Sopenharmony_ci            S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
200bf215546Sopenharmony_ci            S_030CD8_MODE(1);
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci         if (sctx->gfx_level == GFX9) {
203bf215546Sopenharmony_ci            /* Count SQTT traffic in TCC perf counters. */
204bf215546Sopenharmony_ci            thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
205bf215546Sopenharmony_ci         }
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE,
208bf215546Sopenharmony_ci                                thread_trace_mode);
209bf215546Sopenharmony_ci      }
210bf215546Sopenharmony_ci   }
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   /* Restore global broadcasting. */
213bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
214bf215546Sopenharmony_ci                          S_030800_SE_BROADCAST_WRITES(1) |
215bf215546Sopenharmony_ci                             S_030800_SH_BROADCAST_WRITES(1) |
216bf215546Sopenharmony_ci                             S_030800_INSTANCE_BROADCAST_WRITES(1));
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci   /* Start the thread trace with a different event based on the queue. */
219bf215546Sopenharmony_ci   if (queue_family_index == AMD_IP_COMPUTE) {
220bf215546Sopenharmony_ci      radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
221bf215546Sopenharmony_ci                        S_00B878_THREAD_TRACE_ENABLE(1));
222bf215546Sopenharmony_ci   } else {
223bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
224bf215546Sopenharmony_ci      radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci   radeon_end();
227bf215546Sopenharmony_ci}
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistatic const uint32_t gfx9_thread_trace_info_regs[] =
230bf215546Sopenharmony_ci{
231bf215546Sopenharmony_ci   R_030CE4_SQ_THREAD_TRACE_WPTR,
232bf215546Sopenharmony_ci   R_030CE8_SQ_THREAD_TRACE_STATUS,
233bf215546Sopenharmony_ci   R_030CF0_SQ_THREAD_TRACE_CNTR,
234bf215546Sopenharmony_ci};
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_cistatic const uint32_t gfx10_thread_trace_info_regs[] =
237bf215546Sopenharmony_ci{
238bf215546Sopenharmony_ci   R_008D10_SQ_THREAD_TRACE_WPTR,
239bf215546Sopenharmony_ci   R_008D20_SQ_THREAD_TRACE_STATUS,
240bf215546Sopenharmony_ci   R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
241bf215546Sopenharmony_ci};
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_cistatic void
244bf215546Sopenharmony_cisi_copy_thread_trace_info_regs(struct si_context* sctx,
245bf215546Sopenharmony_ci             struct radeon_cmdbuf *cs,
246bf215546Sopenharmony_ci             unsigned se_index)
247bf215546Sopenharmony_ci{
248bf215546Sopenharmony_ci   const uint32_t *thread_trace_info_regs = NULL;
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   switch (sctx->gfx_level) {
251bf215546Sopenharmony_ci   case GFX10_3:
252bf215546Sopenharmony_ci   case GFX10:
253bf215546Sopenharmony_ci      thread_trace_info_regs = gfx10_thread_trace_info_regs;
254bf215546Sopenharmony_ci      break;
255bf215546Sopenharmony_ci   case GFX9:
256bf215546Sopenharmony_ci      thread_trace_info_regs = gfx9_thread_trace_info_regs;
257bf215546Sopenharmony_ci      break;
258bf215546Sopenharmony_ci   default:
259bf215546Sopenharmony_ci      unreachable("Unsupported gfx_level");
260bf215546Sopenharmony_ci   }
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   /* Get the VA where the info struct is stored for this SE. */
263bf215546Sopenharmony_ci   uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
264bf215546Sopenharmony_ci   uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   radeon_begin(cs);
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci   /* Copy back the info struct one DWORD at a time. */
269bf215546Sopenharmony_ci   for (unsigned i = 0; i < 3; i++) {
270bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
271bf215546Sopenharmony_ci      radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
272bf215546Sopenharmony_ci                  COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
273bf215546Sopenharmony_ci                  COPY_DATA_WR_CONFIRM);
274bf215546Sopenharmony_ci      radeon_emit(thread_trace_info_regs[i] >> 2);
275bf215546Sopenharmony_ci      radeon_emit(0); /* unused */
276bf215546Sopenharmony_ci      radeon_emit((info_va + i * 4));
277bf215546Sopenharmony_ci      radeon_emit((info_va + i * 4) >> 32);
278bf215546Sopenharmony_ci   }
279bf215546Sopenharmony_ci   radeon_end();
280bf215546Sopenharmony_ci}
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_cistatic void
285bf215546Sopenharmony_cisi_emit_thread_trace_stop(struct si_context *sctx,
286bf215546Sopenharmony_ci                          struct radeon_cmdbuf *cs,
287bf215546Sopenharmony_ci                          uint32_t queue_family_index)
288bf215546Sopenharmony_ci{
289bf215546Sopenharmony_ci   unsigned max_se = sctx->screen->info.max_se;
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci   radeon_begin(cs);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   /* Stop the thread trace with a different event based on the queue. */
294bf215546Sopenharmony_ci   if (queue_family_index == AMD_IP_COMPUTE) {
295bf215546Sopenharmony_ci      radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
296bf215546Sopenharmony_ci                        S_00B878_THREAD_TRACE_ENABLE(0));
297bf215546Sopenharmony_ci   } else {
298bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
299bf215546Sopenharmony_ci      radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
300bf215546Sopenharmony_ci   }
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
303bf215546Sopenharmony_ci   radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
304bf215546Sopenharmony_ci   radeon_end();
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci   if (sctx->screen->info.has_sqtt_rb_harvest_bug) {
307bf215546Sopenharmony_ci      /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */
308bf215546Sopenharmony_ci      sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
309bf215546Sopenharmony_ci                     SI_CONTEXT_FLUSH_AND_INV_DB |
310bf215546Sopenharmony_ci                     SI_CONTEXT_CS_PARTIAL_FLUSH;
311bf215546Sopenharmony_ci      sctx->emit_cache_flush(sctx, cs);
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   for (unsigned se = 0; se < max_se; se++) {
315bf215546Sopenharmony_ci      if (si_se_is_disabled(sctx, se))
316bf215546Sopenharmony_ci         continue;
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci      radeon_begin(cs);
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci      /* Target SEi and SH0. */
321bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
322bf215546Sopenharmony_ci                             S_030800_SE_INDEX(se) |
323bf215546Sopenharmony_ci                             S_030800_SH_INDEX(0) |
324bf215546Sopenharmony_ci                             S_030800_INSTANCE_BROADCAST_WRITES(1));
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci      if (sctx->gfx_level >= GFX10) {
327bf215546Sopenharmony_ci         if (!sctx->screen->info.has_sqtt_rb_harvest_bug) {
328bf215546Sopenharmony_ci            /* Make sure to wait for the trace buffer. */
329bf215546Sopenharmony_ci            radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
330bf215546Sopenharmony_ci            radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
331bf215546Sopenharmony_ci            radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
332bf215546Sopenharmony_ci            radeon_emit(0);
333bf215546Sopenharmony_ci            radeon_emit(0); /* reference value */
334bf215546Sopenharmony_ci            radeon_emit(~C_008D20_FINISH_DONE); /* mask */
335bf215546Sopenharmony_ci            radeon_emit(4); /* poll interval */
336bf215546Sopenharmony_ci         }
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci         /* Disable the thread trace mode. */
339bf215546Sopenharmony_ci         radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
340bf215546Sopenharmony_ci                                          S_008D1C_MODE(0));
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci         /* Wait for thread trace completion. */
343bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
344bf215546Sopenharmony_ci         radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
345bf215546Sopenharmony_ci         radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
346bf215546Sopenharmony_ci         radeon_emit(0);
347bf215546Sopenharmony_ci         radeon_emit(0); /* reference value */
348bf215546Sopenharmony_ci         radeon_emit(~C_008D20_BUSY); /* mask */
349bf215546Sopenharmony_ci         radeon_emit(4); /* poll interval */
350bf215546Sopenharmony_ci      } else {
351bf215546Sopenharmony_ci         /* Disable the thread trace mode. */
352bf215546Sopenharmony_ci         radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE,
353bf215546Sopenharmony_ci                                S_030CD8_MODE(0));
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci         /* Wait for thread trace completion. */
356bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
357bf215546Sopenharmony_ci         radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
358bf215546Sopenharmony_ci         radeon_emit(R_030CE8_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
359bf215546Sopenharmony_ci         radeon_emit(0);
360bf215546Sopenharmony_ci         radeon_emit(0); /* reference value */
361bf215546Sopenharmony_ci         radeon_emit(~C_030CE8_BUSY); /* mask */
362bf215546Sopenharmony_ci         radeon_emit(4); /* poll interval */
363bf215546Sopenharmony_ci      }
364bf215546Sopenharmony_ci      radeon_end();
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci      si_copy_thread_trace_info_regs(sctx, cs, se);
367bf215546Sopenharmony_ci   }
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   /* Restore global broadcasting. */
370bf215546Sopenharmony_ci   radeon_begin_again(cs);
371bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
372bf215546Sopenharmony_ci                          S_030800_SE_BROADCAST_WRITES(1) |
373bf215546Sopenharmony_ci                             S_030800_SH_BROADCAST_WRITES(1) |
374bf215546Sopenharmony_ci                             S_030800_INSTANCE_BROADCAST_WRITES(1));
375bf215546Sopenharmony_ci   radeon_end();
376bf215546Sopenharmony_ci}
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_cistatic void
379bf215546Sopenharmony_cisi_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
380bf215546Sopenharmony_ci{
381bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   radeon_begin(cs);
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   switch (family) {
386bf215546Sopenharmony_ci      case AMD_IP_GFX:
387bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
388bf215546Sopenharmony_ci         radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
389bf215546Sopenharmony_ci         radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
390bf215546Sopenharmony_ci         break;
391bf215546Sopenharmony_ci      case AMD_IP_COMPUTE:
392bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_NOP, 0, 0));
393bf215546Sopenharmony_ci         radeon_emit(0);
394bf215546Sopenharmony_ci         break;
395bf215546Sopenharmony_ci   }
396bf215546Sopenharmony_ci   radeon_end();
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci   ws->cs_add_buffer(cs,
399bf215546Sopenharmony_ci                     sctx->thread_trace->bo,
400bf215546Sopenharmony_ci                     RADEON_USAGE_READWRITE,
401bf215546Sopenharmony_ci                     RADEON_DOMAIN_VRAM);
402bf215546Sopenharmony_ci   ws->cs_add_buffer(cs,
403bf215546Sopenharmony_ci                     sctx->spm_trace.bo,
404bf215546Sopenharmony_ci                     RADEON_USAGE_READWRITE,
405bf215546Sopenharmony_ci                     RADEON_DOMAIN_VRAM);
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   si_cp_dma_wait_for_idle(sctx, cs);
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci   /* Make sure to wait-for-idle before starting SQTT. */
410bf215546Sopenharmony_ci   sctx->flags |=
411bf215546Sopenharmony_ci      SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
412bf215546Sopenharmony_ci      SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
413bf215546Sopenharmony_ci      SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME;
414bf215546Sopenharmony_ci   sctx->emit_cache_flush(sctx, cs);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   si_inhibit_clockgating(sctx, cs, true);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   /* Enable SQG events that collects thread trace data. */
419bf215546Sopenharmony_ci   si_emit_spi_config_cntl(sctx, cs, true);
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci   si_pc_emit_spm_reset(cs);
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   si_pc_emit_shaders(cs, 0x7f);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   si_emit_spm_setup(sctx, cs);
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci   si_emit_thread_trace_start(sctx, cs, family);
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci   si_pc_emit_spm_start(cs);
430bf215546Sopenharmony_ci}
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_cistatic void
433bf215546Sopenharmony_cisi_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
434bf215546Sopenharmony_ci{
435bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   radeon_begin(cs);
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci   switch (family) {
440bf215546Sopenharmony_ci      case AMD_IP_GFX:
441bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
442bf215546Sopenharmony_ci         radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
443bf215546Sopenharmony_ci         radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
444bf215546Sopenharmony_ci         break;
445bf215546Sopenharmony_ci      case AMD_IP_COMPUTE:
446bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_NOP, 0, 0));
447bf215546Sopenharmony_ci         radeon_emit(0);
448bf215546Sopenharmony_ci         break;
449bf215546Sopenharmony_ci   }
450bf215546Sopenharmony_ci   radeon_end();
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci   ws->cs_add_buffer(cs,
453bf215546Sopenharmony_ci                     sctx->thread_trace->bo,
454bf215546Sopenharmony_ci                     RADEON_USAGE_READWRITE,
455bf215546Sopenharmony_ci                     RADEON_DOMAIN_VRAM);
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   ws->cs_add_buffer(cs,
458bf215546Sopenharmony_ci                     sctx->spm_trace.bo,
459bf215546Sopenharmony_ci                     RADEON_USAGE_READWRITE,
460bf215546Sopenharmony_ci                     RADEON_DOMAIN_VRAM);
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   si_cp_dma_wait_for_idle(sctx, cs);
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci   si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
465bf215546Sopenharmony_ci                       sctx->screen->info.never_send_perfcounter_stop);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   /* Make sure to wait-for-idle before stopping SQTT. */
468bf215546Sopenharmony_ci   sctx->flags |=
469bf215546Sopenharmony_ci      SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
470bf215546Sopenharmony_ci      SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
471bf215546Sopenharmony_ci      SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME;
472bf215546Sopenharmony_ci   sctx->emit_cache_flush(sctx, cs);
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   si_emit_thread_trace_stop(sctx, cs, family);
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci   si_pc_emit_spm_reset(cs);
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   /* Restore previous state by disabling SQG events. */
479bf215546Sopenharmony_ci   si_emit_spi_config_cntl(sctx, cs, false);
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci   si_inhibit_clockgating(sctx, cs, false);
482bf215546Sopenharmony_ci}
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_cistatic void
486bf215546Sopenharmony_cisi_thread_trace_init_cs(struct si_context *sctx)
487bf215546Sopenharmony_ci{
488bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   /* Thread trace start CS (only handles AMD_IP_GFX). */
491bf215546Sopenharmony_ci   sctx->thread_trace->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
492bf215546Sopenharmony_ci   if (!ws->cs_create(sctx->thread_trace->start_cs[AMD_IP_GFX],
493bf215546Sopenharmony_ci                      sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) {
494bf215546Sopenharmony_ci      free(sctx->thread_trace->start_cs[AMD_IP_GFX]);
495bf215546Sopenharmony_ci      sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL;
496bf215546Sopenharmony_ci      return;
497bf215546Sopenharmony_ci   }
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci   si_thread_trace_start(sctx, AMD_IP_GFX, sctx->thread_trace->start_cs[AMD_IP_GFX]);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   /* Thread trace stop CS. */
502bf215546Sopenharmony_ci   sctx->thread_trace->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
503bf215546Sopenharmony_ci   if (!ws->cs_create(sctx->thread_trace->stop_cs[AMD_IP_GFX],
504bf215546Sopenharmony_ci                      sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) {
505bf215546Sopenharmony_ci      free(sctx->thread_trace->start_cs[AMD_IP_GFX]);
506bf215546Sopenharmony_ci      sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL;
507bf215546Sopenharmony_ci      free(sctx->thread_trace->stop_cs[AMD_IP_GFX]);
508bf215546Sopenharmony_ci      sctx->thread_trace->stop_cs[AMD_IP_GFX] = NULL;
509bf215546Sopenharmony_ci      return;
510bf215546Sopenharmony_ci   }
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci   si_thread_trace_stop(sctx, AMD_IP_GFX, sctx->thread_trace->stop_cs[AMD_IP_GFX]);
513bf215546Sopenharmony_ci}
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_cistatic void
516bf215546Sopenharmony_cisi_begin_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
517bf215546Sopenharmony_ci{
518bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = sctx->thread_trace->start_cs[AMD_IP_GFX];
519bf215546Sopenharmony_ci   sctx->ws->cs_flush(cs, 0, NULL);
520bf215546Sopenharmony_ci}
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_cistatic void
523bf215546Sopenharmony_cisi_end_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
524bf215546Sopenharmony_ci{
525bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = sctx->thread_trace->stop_cs[AMD_IP_GFX];
526bf215546Sopenharmony_ci   sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence);
527bf215546Sopenharmony_ci}
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_cistatic bool
530bf215546Sopenharmony_cisi_get_thread_trace(struct si_context *sctx,
531bf215546Sopenharmony_ci                    struct ac_thread_trace *thread_trace)
532bf215546Sopenharmony_ci{
533bf215546Sopenharmony_ci   unsigned max_se = sctx->screen->info.max_se;
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   memset(thread_trace, 0, sizeof(*thread_trace));
536bf215546Sopenharmony_ci   thread_trace->num_traces = max_se;
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci   sctx->thread_trace->ptr = sctx->ws->buffer_map(sctx->ws, sctx->thread_trace->bo,
539bf215546Sopenharmony_ci                                                          NULL,
540bf215546Sopenharmony_ci                                                          PIPE_MAP_READ);
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   if (!sctx->thread_trace->ptr)
543bf215546Sopenharmony_ci      return false;
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   void *thread_trace_ptr = sctx->thread_trace->ptr;
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci   for (unsigned se = 0; se < max_se; se++) {
548bf215546Sopenharmony_ci      uint64_t info_offset = ac_thread_trace_get_info_offset(se);
549bf215546Sopenharmony_ci      uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
550bf215546Sopenharmony_ci      void *info_ptr = thread_trace_ptr + info_offset;
551bf215546Sopenharmony_ci      void *data_ptr = thread_trace_ptr + data_offset;
552bf215546Sopenharmony_ci      struct ac_thread_trace_info *info =
553bf215546Sopenharmony_ci         (struct ac_thread_trace_info *)info_ptr;
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci      struct ac_thread_trace_se thread_trace_se = {0};
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci      if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
558bf215546Sopenharmony_ci         uint32_t expected_size =
559bf215546Sopenharmony_ci            ac_get_expected_buffer_size(&sctx->screen->info, info);
560bf215546Sopenharmony_ci         uint32_t available_size = (info->cur_offset * 32) / 1024;
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci         fprintf(stderr, "Failed to get the thread trace "
563bf215546Sopenharmony_ci                 "because the buffer is too small. The "
564bf215546Sopenharmony_ci                 "hardware needs %d KB but the "
565bf215546Sopenharmony_ci                 "buffer size is %d KB.\n",
566bf215546Sopenharmony_ci                 expected_size, available_size);
567bf215546Sopenharmony_ci         fprintf(stderr, "Please update the buffer size with "
568bf215546Sopenharmony_ci                 "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
569bf215546Sopenharmony_ci         return false;
570bf215546Sopenharmony_ci      }
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci      thread_trace_se.data_ptr = data_ptr;
573bf215546Sopenharmony_ci      thread_trace_se.info = *info;
574bf215546Sopenharmony_ci      thread_trace_se.shader_engine = se;
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_ci      int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci      /* For GFX10+ compute_unit really means WGP */
579bf215546Sopenharmony_ci      thread_trace_se.compute_unit =
580bf215546Sopenharmony_ci         sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci      thread_trace->traces[se] = thread_trace_se;
583bf215546Sopenharmony_ci   }
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci   thread_trace->data = sctx->thread_trace;
586bf215546Sopenharmony_ci   return true;
587bf215546Sopenharmony_ci}
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_cibool
591bf215546Sopenharmony_cisi_init_thread_trace(struct si_context *sctx)
592bf215546Sopenharmony_ci{
593bf215546Sopenharmony_ci   static bool warn_once = true;
594bf215546Sopenharmony_ci   if (warn_once) {
595bf215546Sopenharmony_ci      fprintf(stderr, "*************************************************\n");
596bf215546Sopenharmony_ci      fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
597bf215546Sopenharmony_ci      fprintf(stderr, "*************************************************\n");
598bf215546Sopenharmony_ci      warn_once = false;
599bf215546Sopenharmony_ci   }
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   sctx->thread_trace = CALLOC_STRUCT(ac_thread_trace_data);
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   if (sctx->gfx_level < GFX8) {
604bf215546Sopenharmony_ci      fprintf(stderr, "GPU hardware not supported: refer to "
605bf215546Sopenharmony_ci              "the RGP documentation for the list of "
606bf215546Sopenharmony_ci              "supported GPUs!\n");
607bf215546Sopenharmony_ci      return false;
608bf215546Sopenharmony_ci   }
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci   if (sctx->gfx_level > GFX10_3) {
611bf215546Sopenharmony_ci      fprintf(stderr, "radeonsi: Thread trace is not supported "
612bf215546Sopenharmony_ci              "for that GPU!\n");
613bf215546Sopenharmony_ci      return false;
614bf215546Sopenharmony_ci   }
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   /* Default buffer size set to 32MB per SE. */
617bf215546Sopenharmony_ci   sctx->thread_trace->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024;
618bf215546Sopenharmony_ci   sctx->thread_trace->start_frame = 10;
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_ci   const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER");
621bf215546Sopenharmony_ci   if (trigger) {
622bf215546Sopenharmony_ci      sctx->thread_trace->start_frame = atoi(trigger);
623bf215546Sopenharmony_ci      if (sctx->thread_trace->start_frame <= 0) {
624bf215546Sopenharmony_ci         /* This isn't a frame number, must be a file */
625bf215546Sopenharmony_ci         sctx->thread_trace->trigger_file = strdup(trigger);
626bf215546Sopenharmony_ci         sctx->thread_trace->start_frame = -1;
627bf215546Sopenharmony_ci      }
628bf215546Sopenharmony_ci   }
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   if (!si_thread_trace_init_bo(sctx))
631bf215546Sopenharmony_ci      return false;
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   list_inithead(&sctx->thread_trace->rgp_pso_correlation.record);
634bf215546Sopenharmony_ci   simple_mtx_init(&sctx->thread_trace->rgp_pso_correlation.lock, mtx_plain);
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   list_inithead(&sctx->thread_trace->rgp_loader_events.record);
637bf215546Sopenharmony_ci   simple_mtx_init(&sctx->thread_trace->rgp_loader_events.lock, mtx_plain);
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   list_inithead(&sctx->thread_trace->rgp_code_object.record);
640bf215546Sopenharmony_ci   simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain);
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX10) {
643bf215546Sopenharmony_ci      /* Limit SPM counters to GFX10+ for now */
644bf215546Sopenharmony_ci      ASSERTED bool r = si_spm_init(sctx);
645bf215546Sopenharmony_ci      assert(r);
646bf215546Sopenharmony_ci   }
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci   si_thread_trace_init_cs(sctx);
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   sctx->sqtt_next_event = EventInvalid;
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci   return true;
653bf215546Sopenharmony_ci}
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_civoid
656bf215546Sopenharmony_cisi_destroy_thread_trace(struct si_context *sctx)
657bf215546Sopenharmony_ci{
658bf215546Sopenharmony_ci   struct si_screen *sscreen = sctx->screen;
659bf215546Sopenharmony_ci   struct pb_buffer *bo = sctx->thread_trace->bo;
660bf215546Sopenharmony_ci   radeon_bo_reference(sctx->screen->ws, &bo, NULL);
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci   if (sctx->thread_trace->trigger_file)
663bf215546Sopenharmony_ci      free(sctx->thread_trace->trigger_file);
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   sscreen->ws->cs_destroy(sctx->thread_trace->start_cs[AMD_IP_GFX]);
666bf215546Sopenharmony_ci   sscreen->ws->cs_destroy(sctx->thread_trace->stop_cs[AMD_IP_GFX]);
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   struct rgp_pso_correlation *pso_correlation = &sctx->thread_trace->rgp_pso_correlation;
669bf215546Sopenharmony_ci   struct rgp_loader_events *loader_events = &sctx->thread_trace->rgp_loader_events;
670bf215546Sopenharmony_ci   struct rgp_code_object *code_object = &sctx->thread_trace->rgp_code_object;
671bf215546Sopenharmony_ci   list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
672bf215546Sopenharmony_ci                            &pso_correlation->record, list) {
673bf215546Sopenharmony_ci      list_del(&record->list);
674bf215546Sopenharmony_ci      free(record);
675bf215546Sopenharmony_ci   }
676bf215546Sopenharmony_ci   simple_mtx_destroy(&sctx->thread_trace->rgp_pso_correlation.lock);
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci   list_for_each_entry_safe(struct rgp_loader_events_record, record,
679bf215546Sopenharmony_ci                            &loader_events->record, list) {
680bf215546Sopenharmony_ci      list_del(&record->list);
681bf215546Sopenharmony_ci      free(record);
682bf215546Sopenharmony_ci   }
683bf215546Sopenharmony_ci   simple_mtx_destroy(&sctx->thread_trace->rgp_loader_events.lock);
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci   list_for_each_entry_safe(struct rgp_code_object_record, record,
686bf215546Sopenharmony_ci             &code_object->record, list) {
687bf215546Sopenharmony_ci      uint32_t mask = record->shader_stages_mask;
688bf215546Sopenharmony_ci      int i;
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci      /* Free the disassembly. */
691bf215546Sopenharmony_ci      while (mask) {
692bf215546Sopenharmony_ci         i = u_bit_scan(&mask);
693bf215546Sopenharmony_ci         free(record->shader_data[i].code);
694bf215546Sopenharmony_ci      }
695bf215546Sopenharmony_ci      list_del(&record->list);
696bf215546Sopenharmony_ci      free(record);
697bf215546Sopenharmony_ci   }
698bf215546Sopenharmony_ci   simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock);
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci   free(sctx->thread_trace);
701bf215546Sopenharmony_ci   sctx->thread_trace = NULL;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX10)
704bf215546Sopenharmony_ci      si_spm_finish(sctx);
705bf215546Sopenharmony_ci}
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_cistatic uint64_t num_frames = 0;
708bf215546Sopenharmony_ci
709bf215546Sopenharmony_civoid
710bf215546Sopenharmony_cisi_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
711bf215546Sopenharmony_ci{
712bf215546Sopenharmony_ci   /* Should we enable SQTT yet? */
713bf215546Sopenharmony_ci   if (!sctx->thread_trace_enabled) {
714bf215546Sopenharmony_ci      bool frame_trigger = num_frames == sctx->thread_trace->start_frame;
715bf215546Sopenharmony_ci      bool file_trigger = false;
716bf215546Sopenharmony_ci      if (sctx->thread_trace->trigger_file &&
717bf215546Sopenharmony_ci          access(sctx->thread_trace->trigger_file, W_OK) == 0) {
718bf215546Sopenharmony_ci         if (unlink(sctx->thread_trace->trigger_file) == 0) {
719bf215546Sopenharmony_ci            file_trigger = true;
720bf215546Sopenharmony_ci         } else {
721bf215546Sopenharmony_ci            /* Do not enable tracing if we cannot remove the file,
722bf215546Sopenharmony_ci             * because by then we'll trace every frame.
723bf215546Sopenharmony_ci             */
724bf215546Sopenharmony_ci            fprintf(stderr, "radeonsi: could not remove thread trace trigger file, ignoring\n");
725bf215546Sopenharmony_ci         }
726bf215546Sopenharmony_ci      }
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci      if (frame_trigger || file_trigger) {
729bf215546Sopenharmony_ci         /* Wait for last submission */
730bf215546Sopenharmony_ci         sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence, PIPE_TIMEOUT_INFINITE);
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci         /* Start SQTT */
733bf215546Sopenharmony_ci         si_begin_thread_trace(sctx, rcs);
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci         sctx->thread_trace_enabled = true;
736bf215546Sopenharmony_ci         sctx->thread_trace->start_frame = -1;
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci         /* Force shader update to make sure si_sqtt_describe_pipeline_bind is called
739bf215546Sopenharmony_ci          * for the current "pipeline".
740bf215546Sopenharmony_ci          */
741bf215546Sopenharmony_ci         sctx->do_update_shaders = true;
742bf215546Sopenharmony_ci      }
743bf215546Sopenharmony_ci   } else {
744bf215546Sopenharmony_ci      struct ac_thread_trace thread_trace = {0};
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci      /* Stop SQTT */
747bf215546Sopenharmony_ci      si_end_thread_trace(sctx, rcs);
748bf215546Sopenharmony_ci      sctx->thread_trace_enabled = false;
749bf215546Sopenharmony_ci      sctx->thread_trace->start_frame = -1;
750bf215546Sopenharmony_ci      assert (sctx->last_sqtt_fence);
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci      /* Wait for SQTT to finish and read back the bo */
753bf215546Sopenharmony_ci      if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
754bf215546Sopenharmony_ci          si_get_thread_trace(sctx, &thread_trace)) {
755bf215546Sopenharmony_ci         /* Map the SPM counter buffer */
756bf215546Sopenharmony_ci         if (sctx->gfx_level >= GFX10)
757bf215546Sopenharmony_ci            sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo,
758bf215546Sopenharmony_ci                                                       NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY);
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci         ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, &sctx->spm_trace);
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci         if (sctx->spm_trace.ptr)
763bf215546Sopenharmony_ci            sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo);
764bf215546Sopenharmony_ci      } else {
765bf215546Sopenharmony_ci         fprintf(stderr, "Failed to read the trace\n");
766bf215546Sopenharmony_ci      }
767bf215546Sopenharmony_ci   }
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   num_frames++;
770bf215546Sopenharmony_ci}
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_cistatic void
774bf215546Sopenharmony_cisi_emit_thread_trace_userdata(struct si_context* sctx,
775bf215546Sopenharmony_ci                              struct radeon_cmdbuf *cs,
776bf215546Sopenharmony_ci                              const void *data, uint32_t num_dwords)
777bf215546Sopenharmony_ci{
778bf215546Sopenharmony_ci   const uint32_t *dwords = (uint32_t *)data;
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci   radeon_begin(cs);
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   while (num_dwords > 0) {
783bf215546Sopenharmony_ci      uint32_t count = MIN2(num_dwords, 2);
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci      /* Without the perfctr bit the CP might not always pass the
786bf215546Sopenharmony_ci       * write on correctly. */
787bf215546Sopenharmony_ci      radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count, sctx->gfx_level >= GFX10);
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci      radeon_emit_array(dwords, count);
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci      dwords += count;
792bf215546Sopenharmony_ci      num_dwords -= count;
793bf215546Sopenharmony_ci   }
794bf215546Sopenharmony_ci   radeon_end();
795bf215546Sopenharmony_ci}
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_cistatic void
798bf215546Sopenharmony_cisi_emit_spi_config_cntl(struct si_context* sctx,
799bf215546Sopenharmony_ci           struct radeon_cmdbuf *cs, bool enable)
800bf215546Sopenharmony_ci{
801bf215546Sopenharmony_ci   radeon_begin(cs);
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX9) {
804bf215546Sopenharmony_ci      uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
805bf215546Sopenharmony_ci                                 S_031100_EXP_PRIORITY_ORDER(3) |
806bf215546Sopenharmony_ci                                 S_031100_ENABLE_SQG_TOP_EVENTS(enable) |
807bf215546Sopenharmony_ci                                 S_031100_ENABLE_SQG_BOP_EVENTS(enable);
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci      if (sctx->gfx_level >= GFX10)
810bf215546Sopenharmony_ci         spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
811bf215546Sopenharmony_ci
812bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
813bf215546Sopenharmony_ci   } else {
814bf215546Sopenharmony_ci      /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
815bf215546Sopenharmony_ci      radeon_set_privileged_config_reg(R_009100_SPI_CONFIG_CNTL,
816bf215546Sopenharmony_ci                                       S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
817bf215546Sopenharmony_ci                                       S_009100_ENABLE_SQG_BOP_EVENTS(enable));
818bf215546Sopenharmony_ci   }
819bf215546Sopenharmony_ci   radeon_end();
820bf215546Sopenharmony_ci}
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_cistatic uint32_t num_events = 0;
823bf215546Sopenharmony_civoid
824bf215546Sopenharmony_cisi_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,
825bf215546Sopenharmony_ci                           enum rgp_sqtt_marker_event_type api_type,
826bf215546Sopenharmony_ci                           uint32_t vertex_offset_user_data,
827bf215546Sopenharmony_ci                           uint32_t instance_offset_user_data,
828bf215546Sopenharmony_ci                           uint32_t draw_index_user_data)
829bf215546Sopenharmony_ci{
830bf215546Sopenharmony_ci   struct rgp_sqtt_marker_event marker = {0};
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
833bf215546Sopenharmony_ci   marker.api_type = api_type == EventInvalid ? EventCmdDraw : api_type;
834bf215546Sopenharmony_ci   marker.cmd_id = num_events++;
835bf215546Sopenharmony_ci   marker.cb_id = 0;
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_ci   if (vertex_offset_user_data == UINT_MAX ||
838bf215546Sopenharmony_ci       instance_offset_user_data == UINT_MAX) {
839bf215546Sopenharmony_ci      vertex_offset_user_data = 0;
840bf215546Sopenharmony_ci      instance_offset_user_data = 0;
841bf215546Sopenharmony_ci   }
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci   if (draw_index_user_data == UINT_MAX)
844bf215546Sopenharmony_ci      draw_index_user_data = vertex_offset_user_data;
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci   marker.vertex_offset_reg_idx = vertex_offset_user_data;
847bf215546Sopenharmony_ci   marker.instance_offset_reg_idx = instance_offset_user_data;
848bf215546Sopenharmony_ci   marker.draw_index_reg_idx = draw_index_user_data;
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci   si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci   sctx->sqtt_next_event = EventInvalid;
853bf215546Sopenharmony_ci}
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_civoid
856bf215546Sopenharmony_cisi_write_event_with_dims_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,
857bf215546Sopenharmony_ci                                enum rgp_sqtt_marker_event_type api_type,
858bf215546Sopenharmony_ci                                uint32_t x, uint32_t y, uint32_t z)
859bf215546Sopenharmony_ci{
860bf215546Sopenharmony_ci   struct rgp_sqtt_marker_event_with_dims marker = {0};
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
863bf215546Sopenharmony_ci   marker.event.api_type = api_type;
864bf215546Sopenharmony_ci   marker.event.cmd_id = num_events++;
865bf215546Sopenharmony_ci   marker.event.cb_id = 0;
866bf215546Sopenharmony_ci   marker.event.has_thread_dims = 1;
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci   marker.thread_x = x;
869bf215546Sopenharmony_ci   marker.thread_y = y;
870bf215546Sopenharmony_ci   marker.thread_z = z;
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci   si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
873bf215546Sopenharmony_ci   sctx->sqtt_next_event = EventInvalid;
874bf215546Sopenharmony_ci}
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_civoid
877bf215546Sopenharmony_cisi_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs)
878bf215546Sopenharmony_ci{
879bf215546Sopenharmony_ci   struct rgp_sqtt_marker_barrier_start marker = {0};
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
882bf215546Sopenharmony_ci   marker.cb_id = 0;
883bf215546Sopenharmony_ci   marker.dword02 = 0xC0000000 + 10; /* RGP_BARRIER_INTERNAL_BASE */
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci   si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
886bf215546Sopenharmony_ci}
887bf215546Sopenharmony_ci
888bf215546Sopenharmony_civoid
889bf215546Sopenharmony_cisi_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs,
890bf215546Sopenharmony_ci                            unsigned flags)
891bf215546Sopenharmony_ci{
892bf215546Sopenharmony_ci   struct rgp_sqtt_marker_barrier_end marker = {0};
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_ci   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
895bf215546Sopenharmony_ci   marker.cb_id = 0;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH)
898bf215546Sopenharmony_ci      marker.vs_partial_flush = true;
899bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH)
900bf215546Sopenharmony_ci      marker.ps_partial_flush = true;
901bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH)
902bf215546Sopenharmony_ci      marker.cs_partial_flush = true;
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_PFP_SYNC_ME)
905bf215546Sopenharmony_ci      marker.pfp_sync_me = true;
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_INV_VCACHE)
908bf215546Sopenharmony_ci      marker.inval_tcp = true;
909bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_INV_ICACHE)
910bf215546Sopenharmony_ci      marker.inval_sqI = true;
911bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_INV_SCACHE)
912bf215546Sopenharmony_ci      marker.inval_sqK = true;
913bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_INV_L2)
914bf215546Sopenharmony_ci      marker.inval_tcc = true;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
917bf215546Sopenharmony_ci      marker.inval_cb = true;
918bf215546Sopenharmony_ci      marker.flush_cb = true;
919bf215546Sopenharmony_ci   }
920bf215546Sopenharmony_ci   if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
921bf215546Sopenharmony_ci      marker.inval_db = true;
922bf215546Sopenharmony_ci      marker.flush_db = true;
923bf215546Sopenharmony_ci   }
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci   si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
926bf215546Sopenharmony_ci}
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_civoid
929bf215546Sopenharmony_cisi_write_user_event(struct si_context* sctx, struct radeon_cmdbuf *rcs,
930bf215546Sopenharmony_ci                    enum rgp_sqtt_marker_user_event_type type,
931bf215546Sopenharmony_ci                    const char *str, int len)
932bf215546Sopenharmony_ci{
933bf215546Sopenharmony_ci   if (type == UserEventPop) {
934bf215546Sopenharmony_ci      assert (str == NULL);
935bf215546Sopenharmony_ci      struct rgp_sqtt_marker_user_event marker = { 0 };
936bf215546Sopenharmony_ci      marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
937bf215546Sopenharmony_ci      marker.data_type = type;
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_ci      si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
940bf215546Sopenharmony_ci   } else {
941bf215546Sopenharmony_ci      assert (str != NULL);
942bf215546Sopenharmony_ci      struct rgp_sqtt_marker_user_event_with_length marker = { 0 };
943bf215546Sopenharmony_ci      marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
944bf215546Sopenharmony_ci      marker.user_event.data_type = type;
945bf215546Sopenharmony_ci      len = MIN2(1024, len);
946bf215546Sopenharmony_ci      marker.length = align(len, 4);
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_ci      uint8_t *buffer = alloca(sizeof(marker) + marker.length);
949bf215546Sopenharmony_ci      memcpy(buffer, &marker, sizeof(marker));
950bf215546Sopenharmony_ci      memcpy(buffer + sizeof(marker), str, len);
951bf215546Sopenharmony_ci      buffer[sizeof(marker) + len - 1] = '\0';
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci      si_emit_thread_trace_userdata(sctx, rcs, buffer, sizeof(marker) / 4 + marker.length / 4);
954bf215546Sopenharmony_ci   }
955bf215546Sopenharmony_ci}
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_cibool
959bf215546Sopenharmony_cisi_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
960bf215546Sopenharmony_ci                               uint64_t pipeline_hash)
961bf215546Sopenharmony_ci{
962bf215546Sopenharmony_ci   simple_mtx_lock(&thread_trace_data->rgp_pso_correlation.lock);
963bf215546Sopenharmony_ci   list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
964bf215546Sopenharmony_ci             &thread_trace_data->rgp_pso_correlation.record, list) {
965bf215546Sopenharmony_ci      if (record->pipeline_hash[0] == pipeline_hash) {
966bf215546Sopenharmony_ci         simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
967bf215546Sopenharmony_ci         return true;
968bf215546Sopenharmony_ci      }
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   }
971bf215546Sopenharmony_ci   simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci   return false;
974bf215546Sopenharmony_ci}
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_cistatic enum rgp_hardware_stages
979bf215546Sopenharmony_cisi_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage)
980bf215546Sopenharmony_ci{
981bf215546Sopenharmony_ci   switch (stage) {
982bf215546Sopenharmony_ci   case PIPE_SHADER_VERTEX:
983bf215546Sopenharmony_ci      if (key->ge.as_ls)
984bf215546Sopenharmony_ci         return RGP_HW_STAGE_LS;
985bf215546Sopenharmony_ci      else if (key->ge.as_es)
986bf215546Sopenharmony_ci         return RGP_HW_STAGE_ES;
987bf215546Sopenharmony_ci      else if (key->ge.as_ngg)
988bf215546Sopenharmony_ci         return RGP_HW_STAGE_GS;
989bf215546Sopenharmony_ci      else
990bf215546Sopenharmony_ci         return RGP_HW_STAGE_VS;
991bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_CTRL:
992bf215546Sopenharmony_ci      return RGP_HW_STAGE_HS;
993bf215546Sopenharmony_ci   case PIPE_SHADER_TESS_EVAL:
994bf215546Sopenharmony_ci      if (key->ge.as_es)
995bf215546Sopenharmony_ci         return RGP_HW_STAGE_ES;
996bf215546Sopenharmony_ci      else if (key->ge.as_ngg)
997bf215546Sopenharmony_ci         return RGP_HW_STAGE_GS;
998bf215546Sopenharmony_ci      else
999bf215546Sopenharmony_ci         return RGP_HW_STAGE_VS;
1000bf215546Sopenharmony_ci   case PIPE_SHADER_GEOMETRY:
1001bf215546Sopenharmony_ci      return RGP_HW_STAGE_GS;
1002bf215546Sopenharmony_ci   case PIPE_SHADER_FRAGMENT:
1003bf215546Sopenharmony_ci      return RGP_HW_STAGE_PS;
1004bf215546Sopenharmony_ci   case PIPE_SHADER_COMPUTE:
1005bf215546Sopenharmony_ci      return RGP_HW_STAGE_CS;
1006bf215546Sopenharmony_ci   default:
1007bf215546Sopenharmony_ci      unreachable("invalid mesa shader stage");
1008bf215546Sopenharmony_ci   }
1009bf215546Sopenharmony_ci}
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_cistatic bool
1012bf215546Sopenharmony_cisi_sqtt_add_code_object(struct si_context* sctx,
1013bf215546Sopenharmony_ci                        uint64_t pipeline_hash,
1014bf215546Sopenharmony_ci                        bool is_compute)
1015bf215546Sopenharmony_ci{
1016bf215546Sopenharmony_ci   struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
1017bf215546Sopenharmony_ci   struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
1018bf215546Sopenharmony_ci   struct rgp_code_object_record *record;
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ci   record = malloc(sizeof(struct rgp_code_object_record));
1021bf215546Sopenharmony_ci   if (!record)
1022bf215546Sopenharmony_ci      return false;
1023bf215546Sopenharmony_ci
1024bf215546Sopenharmony_ci   record->shader_stages_mask = 0;
1025bf215546Sopenharmony_ci   record->num_shaders_combined = 0;
1026bf215546Sopenharmony_ci   record->pipeline_hash[0] = pipeline_hash;
1027bf215546Sopenharmony_ci   record->pipeline_hash[1] = pipeline_hash;
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci   for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
1030bf215546Sopenharmony_ci      struct si_shader *shader;
1031bf215546Sopenharmony_ci      enum rgp_hardware_stages hw_stage;
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci      if (is_compute) {
1034bf215546Sopenharmony_ci         if (i != PIPE_SHADER_COMPUTE)
1035bf215546Sopenharmony_ci            continue;
1036bf215546Sopenharmony_ci         shader = &sctx->cs_shader_state.program->shader;
1037bf215546Sopenharmony_ci         hw_stage = RGP_HW_STAGE_CS;
1038bf215546Sopenharmony_ci      } else if (i != PIPE_SHADER_COMPUTE) {
1039bf215546Sopenharmony_ci         if (!sctx->shaders[i].cso || !sctx->shaders[i].current)
1040bf215546Sopenharmony_ci            continue;
1041bf215546Sopenharmony_ci         shader = sctx->shaders[i].current;
1042bf215546Sopenharmony_ci         hw_stage = si_sqtt_pipe_to_rgp_shader_stage(&shader->key, i);
1043bf215546Sopenharmony_ci      } else {
1044bf215546Sopenharmony_ci         continue;
1045bf215546Sopenharmony_ci      }
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci      uint8_t *code = malloc(shader->binary.uploaded_code_size);
1048bf215546Sopenharmony_ci      if (!code) {
1049bf215546Sopenharmony_ci         free(record);
1050bf215546Sopenharmony_ci         return false;
1051bf215546Sopenharmony_ci      }
1052bf215546Sopenharmony_ci      memcpy(code, shader->binary.uploaded_code, shader->binary.uploaded_code_size);
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci      uint64_t va = shader->bo->gpu_address;
1055bf215546Sopenharmony_ci      unsigned gl_shader_stage = tgsi_processor_to_shader_stage(i);
1056bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].hash[0] = _mesa_hash_data(code, shader->binary.uploaded_code_size);
1057bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].hash[1] = record->shader_data[gl_shader_stage].hash[0];
1058bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].code_size = shader->binary.uploaded_code_size;
1059bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].code = code;
1060bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].vgpr_count = shader->config.num_vgprs;
1061bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].sgpr_count = shader->config.num_sgprs;
1062bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].base_address = va & 0xffffffffffff;
1063bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].elf_symbol_offset = 0;
1064bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].hw_stage = hw_stage;
1065bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].is_combined = false;
1066bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].scratch_memory_size = shader->config.scratch_bytes_per_wave;
1067bf215546Sopenharmony_ci      record->shader_data[gl_shader_stage].wavefront_size = shader->wave_size;
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_ci      record->shader_stages_mask |= 1 << gl_shader_stage;
1070bf215546Sopenharmony_ci      record->num_shaders_combined++;
1071bf215546Sopenharmony_ci   }
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci   simple_mtx_lock(&code_object->lock);
1074bf215546Sopenharmony_ci   list_addtail(&record->list, &code_object->record);
1075bf215546Sopenharmony_ci   code_object->record_count++;
1076bf215546Sopenharmony_ci   simple_mtx_unlock(&code_object->lock);
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci   return true;
1079bf215546Sopenharmony_ci}
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_cibool
1082bf215546Sopenharmony_cisi_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute)
1083bf215546Sopenharmony_ci{
1084bf215546Sopenharmony_ci   struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_hash));
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci   bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline_hash);
1089bf215546Sopenharmony_ci   if (!result)
1090bf215546Sopenharmony_ci      return false;
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_ci   result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline_hash, base_address);
1093bf215546Sopenharmony_ci   if (!result)
1094bf215546Sopenharmony_ci      return false;
1095bf215546Sopenharmony_ci
1096bf215546Sopenharmony_ci   return si_sqtt_add_code_object(sctx, pipeline_hash, is_compute);
1097bf215546Sopenharmony_ci}
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_civoid
1100bf215546Sopenharmony_cisi_sqtt_describe_pipeline_bind(struct si_context* sctx,
1101bf215546Sopenharmony_ci                               uint64_t pipeline_hash,
1102bf215546Sopenharmony_ci                               int bind_point)
1103bf215546Sopenharmony_ci{
1104bf215546Sopenharmony_ci   struct rgp_sqtt_marker_pipeline_bind marker = {0};
1105bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci   if (likely(!sctx->thread_trace_enabled)) {
1108bf215546Sopenharmony_ci      return;
1109bf215546Sopenharmony_ci   }
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_ci   marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
1112bf215546Sopenharmony_ci   marker.cb_id = 0;
1113bf215546Sopenharmony_ci   marker.bind_point = bind_point;
1114bf215546Sopenharmony_ci   marker.api_pso_hash[0] = pipeline_hash;
1115bf215546Sopenharmony_ci   marker.api_pso_hash[1] = pipeline_hash >> 32;
1116bf215546Sopenharmony_ci
1117bf215546Sopenharmony_ci   si_emit_thread_trace_userdata(sctx, cs, &marker, sizeof(marker) / 4);
1118bf215546Sopenharmony_ci}
1119