1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "si_build_pm4.h"
26bf215546Sopenharmony_ci#include "si_query.h"
27bf215546Sopenharmony_ci#include "util/u_memory.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "ac_perfcounter.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cistruct si_query_group {
32bf215546Sopenharmony_ci   struct si_query_group *next;
33bf215546Sopenharmony_ci   struct ac_pc_block *block;
34bf215546Sopenharmony_ci   unsigned sub_gid;     /* only used during init */
35bf215546Sopenharmony_ci   unsigned result_base; /* only used during init */
36bf215546Sopenharmony_ci   int se;
37bf215546Sopenharmony_ci   int instance;
38bf215546Sopenharmony_ci   unsigned num_counters;
39bf215546Sopenharmony_ci   unsigned selectors[AC_QUERY_MAX_COUNTERS];
40bf215546Sopenharmony_ci};
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistruct si_query_counter {
43bf215546Sopenharmony_ci   unsigned base;
44bf215546Sopenharmony_ci   unsigned qwords;
45bf215546Sopenharmony_ci   unsigned stride; /* in uint64s */
46bf215546Sopenharmony_ci};
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistruct si_query_pc {
49bf215546Sopenharmony_ci   struct si_query b;
50bf215546Sopenharmony_ci   struct si_query_buffer buffer;
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci   /* Size of the results in memory, in bytes. */
53bf215546Sopenharmony_ci   unsigned result_size;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   unsigned shaders;
56bf215546Sopenharmony_ci   unsigned num_counters;
57bf215546Sopenharmony_ci   struct si_query_counter *counters;
58bf215546Sopenharmony_ci   struct si_query_group *groups;
59bf215546Sopenharmony_ci};
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_cistatic void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
62bf215546Sopenharmony_ci{
63bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
64bf215546Sopenharmony_ci   unsigned value = S_030800_SH_BROADCAST_WRITES(1);
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   if (se >= 0) {
67bf215546Sopenharmony_ci      value |= S_030800_SE_INDEX(se);
68bf215546Sopenharmony_ci   } else {
69bf215546Sopenharmony_ci      value |= S_030800_SE_BROADCAST_WRITES(1);
70bf215546Sopenharmony_ci   }
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX10) {
73bf215546Sopenharmony_ci      /* TODO: Expose counters from each shader array separately if needed. */
74bf215546Sopenharmony_ci      value |= S_030800_SA_BROADCAST_WRITES(1);
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   if (instance >= 0) {
78bf215546Sopenharmony_ci      value |= S_030800_INSTANCE_INDEX(instance);
79bf215546Sopenharmony_ci   } else {
80bf215546Sopenharmony_ci      value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
81bf215546Sopenharmony_ci   }
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   radeon_begin(cs);
84bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value);
85bf215546Sopenharmony_ci   radeon_end();
86bf215546Sopenharmony_ci}
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_civoid si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders)
89bf215546Sopenharmony_ci{
90bf215546Sopenharmony_ci   radeon_begin(cs);
91bf215546Sopenharmony_ci   radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
92bf215546Sopenharmony_ci   radeon_emit(shaders & 0x7f);
93bf215546Sopenharmony_ci   radeon_emit(0xffffffff);
94bf215546Sopenharmony_ci   radeon_end();
95bf215546Sopenharmony_ci}
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_cistatic void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
98bf215546Sopenharmony_ci                              unsigned *selectors)
99bf215546Sopenharmony_ci{
100bf215546Sopenharmony_ci   struct ac_pc_block_base *regs = block->b->b;
101bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
102bf215546Sopenharmony_ci   unsigned idx;
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci   assert(count <= regs->num_counters);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   /* Fake counters. */
107bf215546Sopenharmony_ci   if (!regs->select0)
108bf215546Sopenharmony_ci      return;
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   radeon_begin(cs);
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   for (idx = 0; idx < count; ++idx) {
113bf215546Sopenharmony_ci      radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false);
114bf215546Sopenharmony_ci      radeon_emit(selectors[idx] | regs->select_or);
115bf215546Sopenharmony_ci   }
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   for (idx = 0; idx < regs->num_spm_counters; idx++) {
118bf215546Sopenharmony_ci      radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false);
119bf215546Sopenharmony_ci      radeon_emit(0);
120bf215546Sopenharmony_ci   }
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   radeon_end();
123bf215546Sopenharmony_ci}
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_cistatic void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
126bf215546Sopenharmony_ci{
127bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci   si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
130bf215546Sopenharmony_ci                   COPY_DATA_IMM, NULL, 1);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   radeon_begin(cs);
133bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
134bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
135bf215546Sopenharmony_ci   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
136bf215546Sopenharmony_ci   radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
137bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
138bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
139bf215546Sopenharmony_ci   radeon_end();
140bf215546Sopenharmony_ci}
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci/* Note: The buffer was already added in si_pc_emit_start, so we don't have to
143bf215546Sopenharmony_ci * do it again in here. */
144bf215546Sopenharmony_cistatic void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
145bf215546Sopenharmony_ci{
146bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
149bf215546Sopenharmony_ci                     EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
150bf215546Sopenharmony_ci   si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   radeon_begin(cs);
153bf215546Sopenharmony_ci   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
154bf215546Sopenharmony_ci   radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   if (!sctx->screen->info.never_send_perfcounter_stop) {
157bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
158bf215546Sopenharmony_ci      radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
159bf215546Sopenharmony_ci   }
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   radeon_set_uconfig_reg(
162bf215546Sopenharmony_ci      R_036020_CP_PERFMON_CNTL,
163bf215546Sopenharmony_ci      S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ?
164bf215546Sopenharmony_ci                                V_036020_CP_PERFMON_STATE_START_COUNTING :
165bf215546Sopenharmony_ci                                V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
166bf215546Sopenharmony_ci      S_036020_PERFMON_SAMPLE_ENABLE(1));
167bf215546Sopenharmony_ci   radeon_end();
168bf215546Sopenharmony_ci}
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_civoid si_pc_emit_spm_start(struct radeon_cmdbuf *cs)
171bf215546Sopenharmony_ci{
172bf215546Sopenharmony_ci   radeon_begin(cs);
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   /* Start SPM counters. */
175bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
176bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
177bf215546Sopenharmony_ci                             S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
178bf215546Sopenharmony_ci   /* Start windowed performance counters. */
179bf215546Sopenharmony_ci   radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
180bf215546Sopenharmony_ci   radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
181bf215546Sopenharmony_ci   radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   radeon_end();
184bf215546Sopenharmony_ci}
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_civoid si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters,
187bf215546Sopenharmony_ci                         bool never_send_perfcounter_stop)
188bf215546Sopenharmony_ci{
189bf215546Sopenharmony_ci   radeon_begin(cs);
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   /* Stop windowed performance counters. */
192bf215546Sopenharmony_ci   if (!never_send_perfcounter_stop) {
193bf215546Sopenharmony_ci      radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
194bf215546Sopenharmony_ci      radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
195bf215546Sopenharmony_ci   }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   /* Stop SPM counters. */
200bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
201bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
202bf215546Sopenharmony_ci                          S_036020_SPM_PERFMON_STATE(never_stop_sq_perf_counters ?
203bf215546Sopenharmony_ci                             V_036020_STRM_PERFMON_STATE_START_COUNTING :
204bf215546Sopenharmony_ci                             V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   radeon_end();
207bf215546Sopenharmony_ci}
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_civoid si_pc_emit_spm_reset(struct radeon_cmdbuf *cs)
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci   radeon_begin(cs);
212bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
213bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
214bf215546Sopenharmony_ci                          S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
215bf215546Sopenharmony_ci   radeon_end();
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_cistatic void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count,
220bf215546Sopenharmony_ci                            uint64_t va)
221bf215546Sopenharmony_ci{
222bf215546Sopenharmony_ci   struct ac_pc_block_base *regs = block->b->b;
223bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &sctx->gfx_cs;
224bf215546Sopenharmony_ci   unsigned idx;
225bf215546Sopenharmony_ci   unsigned reg = regs->counter0_lo;
226bf215546Sopenharmony_ci   unsigned reg_delta = 8;
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   radeon_begin(cs);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   if (regs->select0) {
231bf215546Sopenharmony_ci      for (idx = 0; idx < count; ++idx) {
232bf215546Sopenharmony_ci         if (regs->counters)
233bf215546Sopenharmony_ci            reg = regs->counters[idx];
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
236bf215546Sopenharmony_ci         radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
237bf215546Sopenharmony_ci                            COPY_DATA_COUNT_SEL); /* 64 bits */
238bf215546Sopenharmony_ci         radeon_emit(reg >> 2);
239bf215546Sopenharmony_ci         radeon_emit(0); /* unused */
240bf215546Sopenharmony_ci         radeon_emit(va);
241bf215546Sopenharmony_ci         radeon_emit(va >> 32);
242bf215546Sopenharmony_ci         va += sizeof(uint64_t);
243bf215546Sopenharmony_ci         reg += reg_delta;
244bf215546Sopenharmony_ci      }
245bf215546Sopenharmony_ci   } else {
246bf215546Sopenharmony_ci      /* Fake counters. */
247bf215546Sopenharmony_ci      for (idx = 0; idx < count; ++idx) {
248bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
249bf215546Sopenharmony_ci         radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
250bf215546Sopenharmony_ci                     COPY_DATA_COUNT_SEL);
251bf215546Sopenharmony_ci         radeon_emit(0); /* immediate */
252bf215546Sopenharmony_ci         radeon_emit(0);
253bf215546Sopenharmony_ci         radeon_emit(va);
254bf215546Sopenharmony_ci         radeon_emit(va >> 32);
255bf215546Sopenharmony_ci         va += sizeof(uint64_t);
256bf215546Sopenharmony_ci      }
257bf215546Sopenharmony_ci   }
258bf215546Sopenharmony_ci   radeon_end();
259bf215546Sopenharmony_ci}
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_cistatic void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
262bf215546Sopenharmony_ci{
263bf215546Sopenharmony_ci   struct si_query_pc *query = (struct si_query_pc *)squery;
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   while (query->groups) {
266bf215546Sopenharmony_ci      struct si_query_group *group = query->groups;
267bf215546Sopenharmony_ci      query->groups = group->next;
268bf215546Sopenharmony_ci      FREE(group);
269bf215546Sopenharmony_ci   }
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   FREE(query->counters);
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   si_query_buffer_destroy(sctx->screen, &query->buffer);
274bf215546Sopenharmony_ci   FREE(query);
275bf215546Sopenharmony_ci}
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_civoid si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
278bf215546Sopenharmony_ci{
279bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX11)
280bf215546Sopenharmony_ci      return;
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci   radeon_begin(&sctx->gfx_cs);
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci   if (sctx->gfx_level >= GFX10) {
285bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL,
286bf215546Sopenharmony_ci                             S_037390_PERFMON_CLOCK_STATE(inhibit));
287bf215546Sopenharmony_ci   } else if (sctx->gfx_level >= GFX8) {
288bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL,
289bf215546Sopenharmony_ci                             S_0372FC_PERFMON_CLOCK_STATE(inhibit));
290bf215546Sopenharmony_ci   }
291bf215546Sopenharmony_ci   radeon_end();
292bf215546Sopenharmony_ci}
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_cistatic void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
295bf215546Sopenharmony_ci/*
296bf215546Sopenharmony_ci                                   struct si_query_hw *hwquery,
297bf215546Sopenharmony_ci                                   struct si_resource *buffer, uint64_t va)*/
298bf215546Sopenharmony_ci{
299bf215546Sopenharmony_ci   struct si_query_pc *query = (struct si_query_pc *)squery;
300bf215546Sopenharmony_ci   int current_se = -1;
301bf215546Sopenharmony_ci   int current_instance = -1;
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size))
304bf215546Sopenharmony_ci      return;
305bf215546Sopenharmony_ci   si_need_gfx_cs_space(sctx, 0);
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   if (query->shaders)
308bf215546Sopenharmony_ci      si_pc_emit_shaders(&sctx->gfx_cs, query->shaders);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   si_inhibit_clockgating(sctx, &sctx->gfx_cs, true);
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   for (struct si_query_group *group = query->groups; group; group = group->next) {
313bf215546Sopenharmony_ci      struct ac_pc_block *block = group->block;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci      if (group->se != current_se || group->instance != current_instance) {
316bf215546Sopenharmony_ci         current_se = group->se;
317bf215546Sopenharmony_ci         current_instance = group->instance;
318bf215546Sopenharmony_ci         si_pc_emit_instance(sctx, group->se, group->instance);
319bf215546Sopenharmony_ci      }
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci      si_pc_emit_select(sctx, block, group->num_counters, group->selectors);
322bf215546Sopenharmony_ci   }
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   if (current_se != -1 || current_instance != -1)
325bf215546Sopenharmony_ci      si_pc_emit_instance(sctx, -1, -1);
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci   uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
328bf215546Sopenharmony_ci   si_pc_emit_start(sctx, query->buffer.buf, va);
329bf215546Sopenharmony_ci}
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_cistatic void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery)
332bf215546Sopenharmony_ci{
333bf215546Sopenharmony_ci   struct si_query_pc *query = (struct si_query_pc *)squery;
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   if (!query->buffer.buf)
336bf215546Sopenharmony_ci      return;
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
339bf215546Sopenharmony_ci   query->buffer.results_end += query->result_size;
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci   si_pc_emit_stop(sctx, query->buffer.buf, va);
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci   for (struct si_query_group *group = query->groups; group; group = group->next) {
344bf215546Sopenharmony_ci      struct ac_pc_block *block = group->block;
345bf215546Sopenharmony_ci      unsigned se = group->se >= 0 ? group->se : 0;
346bf215546Sopenharmony_ci      unsigned se_end = se + 1;
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci      if ((block->b->b->flags & AC_PC_BLOCK_SE) && (group->se < 0))
349bf215546Sopenharmony_ci         se_end = sctx->screen->info.max_se;
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci      do {
352bf215546Sopenharmony_ci         unsigned instance = group->instance >= 0 ? group->instance : 0;
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci         do {
355bf215546Sopenharmony_ci            si_pc_emit_instance(sctx, se, instance);
356bf215546Sopenharmony_ci            si_pc_emit_read(sctx, block, group->num_counters, va);
357bf215546Sopenharmony_ci            va += sizeof(uint64_t) * group->num_counters;
358bf215546Sopenharmony_ci         } while (group->instance < 0 && ++instance < block->num_instances);
359bf215546Sopenharmony_ci      } while (++se < se_end);
360bf215546Sopenharmony_ci   }
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   si_pc_emit_instance(sctx, -1, -1);
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   si_inhibit_clockgating(sctx, &sctx->gfx_cs, false);
365bf215546Sopenharmony_ci}
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_cistatic bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery)
368bf215546Sopenharmony_ci{
369bf215546Sopenharmony_ci   struct si_query_pc *query = (struct si_query_pc *)squery;
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   si_query_buffer_reset(ctx, &query->buffer);
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   list_addtail(&query->b.active_list, &ctx->active_queries);
374bf215546Sopenharmony_ci   ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   si_pc_query_resume(ctx, squery);
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   return true;
379bf215546Sopenharmony_ci}
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_cistatic bool si_pc_query_end(struct si_context *ctx, struct si_query *squery)
382bf215546Sopenharmony_ci{
383bf215546Sopenharmony_ci   struct si_query_pc *query = (struct si_query_pc *)squery;
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   si_pc_query_suspend(ctx, squery);
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci   list_del(&squery->active_list);
388bf215546Sopenharmony_ci   ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   return query->buffer.buf != NULL;
391bf215546Sopenharmony_ci}
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_cistatic void si_pc_query_add_result(struct si_query_pc *query, void *buffer,
394bf215546Sopenharmony_ci                                   union pipe_query_result *result)
395bf215546Sopenharmony_ci{
396bf215546Sopenharmony_ci   uint64_t *results = buffer;
397bf215546Sopenharmony_ci   unsigned i, j;
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   for (i = 0; i < query->num_counters; ++i) {
400bf215546Sopenharmony_ci      struct si_query_counter *counter = &query->counters[i];
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci      for (j = 0; j < counter->qwords; ++j) {
403bf215546Sopenharmony_ci         uint32_t value = results[counter->base + j * counter->stride];
404bf215546Sopenharmony_ci         result->batch[i].u64 += value;
405bf215546Sopenharmony_ci      }
406bf215546Sopenharmony_ci   }
407bf215546Sopenharmony_ci}
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_cistatic bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait,
410bf215546Sopenharmony_ci                                   union pipe_query_result *result)
411bf215546Sopenharmony_ci{
412bf215546Sopenharmony_ci   struct si_query_pc *query = (struct si_query_pc *)squery;
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
417bf215546Sopenharmony_ci      unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
418bf215546Sopenharmony_ci      unsigned results_base = 0;
419bf215546Sopenharmony_ci      void *map;
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci      if (squery->b.flushed)
422bf215546Sopenharmony_ci         map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
423bf215546Sopenharmony_ci      else
424bf215546Sopenharmony_ci         map = si_buffer_map(sctx, qbuf->buf, usage);
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_ci      if (!map)
427bf215546Sopenharmony_ci         return false;
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci      while (results_base != qbuf->results_end) {
430bf215546Sopenharmony_ci         si_pc_query_add_result(query, map + results_base, result);
431bf215546Sopenharmony_ci         results_base += query->result_size;
432bf215546Sopenharmony_ci      }
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci   return true;
436bf215546Sopenharmony_ci}
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_cistatic const struct si_query_ops batch_query_ops = {
439bf215546Sopenharmony_ci   .destroy = si_pc_query_destroy,
440bf215546Sopenharmony_ci   .begin = si_pc_query_begin,
441bf215546Sopenharmony_ci   .end = si_pc_query_end,
442bf215546Sopenharmony_ci   .get_result = si_pc_query_get_result,
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   .suspend = si_pc_query_suspend,
445bf215546Sopenharmony_ci   .resume = si_pc_query_resume,
446bf215546Sopenharmony_ci};
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_cistatic struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query,
449bf215546Sopenharmony_ci                                              struct ac_pc_block *block, unsigned sub_gid)
450bf215546Sopenharmony_ci{
451bf215546Sopenharmony_ci   struct si_perfcounters *pc = screen->perfcounters;
452bf215546Sopenharmony_ci   struct si_query_group *group = query->groups;
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   while (group) {
455bf215546Sopenharmony_ci      if (group->block == block && group->sub_gid == sub_gid)
456bf215546Sopenharmony_ci         return group;
457bf215546Sopenharmony_ci      group = group->next;
458bf215546Sopenharmony_ci   }
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   group = CALLOC_STRUCT(si_query_group);
461bf215546Sopenharmony_ci   if (!group)
462bf215546Sopenharmony_ci      return NULL;
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci   group->block = block;
465bf215546Sopenharmony_ci   group->sub_gid = sub_gid;
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
468bf215546Sopenharmony_ci      unsigned sub_gids = block->num_instances;
469bf215546Sopenharmony_ci      unsigned shader_id;
470bf215546Sopenharmony_ci      unsigned shaders;
471bf215546Sopenharmony_ci      unsigned query_shaders;
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci      if (ac_pc_block_has_per_se_groups(&pc->base, block))
474bf215546Sopenharmony_ci         sub_gids = sub_gids * screen->info.max_se;
475bf215546Sopenharmony_ci      shader_id = sub_gid / sub_gids;
476bf215546Sopenharmony_ci      sub_gid = sub_gid % sub_gids;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci      shaders = ac_pc_shader_type_bits[shader_id];
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci      query_shaders = query->shaders & ~AC_PC_SHADERS_WINDOWING;
481bf215546Sopenharmony_ci      if (query_shaders && query_shaders != shaders) {
482bf215546Sopenharmony_ci         fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
483bf215546Sopenharmony_ci         FREE(group);
484bf215546Sopenharmony_ci         return NULL;
485bf215546Sopenharmony_ci      }
486bf215546Sopenharmony_ci      query->shaders = shaders;
487bf215546Sopenharmony_ci   }
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   if (block->b->b->flags & AC_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
490bf215546Sopenharmony_ci      // A non-zero value in query->shaders ensures that the shader
491bf215546Sopenharmony_ci      // masking is reset unless the user explicitly requests one.
492bf215546Sopenharmony_ci      query->shaders = AC_PC_SHADERS_WINDOWING;
493bf215546Sopenharmony_ci   }
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   if (ac_pc_block_has_per_se_groups(&pc->base, block)) {
496bf215546Sopenharmony_ci      group->se = sub_gid / block->num_instances;
497bf215546Sopenharmony_ci      sub_gid = sub_gid % block->num_instances;
498bf215546Sopenharmony_ci   } else {
499bf215546Sopenharmony_ci      group->se = -1;
500bf215546Sopenharmony_ci   }
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci   if (ac_pc_block_has_per_instance_groups(&pc->base, block)) {
503bf215546Sopenharmony_ci      group->instance = sub_gid;
504bf215546Sopenharmony_ci   } else {
505bf215546Sopenharmony_ci      group->instance = -1;
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   group->next = query->groups;
509bf215546Sopenharmony_ci   query->groups = group;
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   return group;
512bf215546Sopenharmony_ci}
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_cistruct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries,
515bf215546Sopenharmony_ci                                         unsigned *query_types)
516bf215546Sopenharmony_ci{
517bf215546Sopenharmony_ci   struct si_screen *screen = (struct si_screen *)ctx->screen;
518bf215546Sopenharmony_ci   struct si_perfcounters *pc = screen->perfcounters;
519bf215546Sopenharmony_ci   struct ac_pc_block *block;
520bf215546Sopenharmony_ci   struct si_query_group *group;
521bf215546Sopenharmony_ci   struct si_query_pc *query;
522bf215546Sopenharmony_ci   unsigned base_gid, sub_gid, sub_index;
523bf215546Sopenharmony_ci   unsigned i, j;
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci   if (!pc)
526bf215546Sopenharmony_ci      return NULL;
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   query = CALLOC_STRUCT(si_query_pc);
529bf215546Sopenharmony_ci   if (!query)
530bf215546Sopenharmony_ci      return NULL;
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   query->b.ops = &batch_query_ops;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci   query->num_counters = num_queries;
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci   /* Collect selectors per group */
537bf215546Sopenharmony_ci   for (i = 0; i < num_queries; ++i) {
538bf215546Sopenharmony_ci      unsigned sub_gid;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci      if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
541bf215546Sopenharmony_ci         goto error;
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci      block =
544bf215546Sopenharmony_ci         ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
545bf215546Sopenharmony_ci      if (!block)
546bf215546Sopenharmony_ci         goto error;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci      sub_gid = sub_index / block->b->selectors;
549bf215546Sopenharmony_ci      sub_index = sub_index % block->b->selectors;
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci      group = get_group_state(screen, query, block, sub_gid);
552bf215546Sopenharmony_ci      if (!group)
553bf215546Sopenharmony_ci         goto error;
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci      if (group->num_counters >= block->b->b->num_counters) {
556bf215546Sopenharmony_ci         fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name);
557bf215546Sopenharmony_ci         goto error;
558bf215546Sopenharmony_ci      }
559bf215546Sopenharmony_ci      group->selectors[group->num_counters] = sub_index;
560bf215546Sopenharmony_ci      ++group->num_counters;
561bf215546Sopenharmony_ci   }
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   /* Compute result bases and CS size per group */
564bf215546Sopenharmony_ci   query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
565bf215546Sopenharmony_ci   query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
566bf215546Sopenharmony_ci
567bf215546Sopenharmony_ci   i = 0;
568bf215546Sopenharmony_ci   for (group = query->groups; group; group = group->next) {
569bf215546Sopenharmony_ci      struct ac_pc_block *block = group->block;
570bf215546Sopenharmony_ci      unsigned read_dw;
571bf215546Sopenharmony_ci      unsigned instances = 1;
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci      if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
574bf215546Sopenharmony_ci         instances = screen->info.max_se;
575bf215546Sopenharmony_ci      if (group->instance < 0)
576bf215546Sopenharmony_ci         instances *= block->num_instances;
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci      group->result_base = i;
579bf215546Sopenharmony_ci      query->result_size += sizeof(uint64_t) * instances * group->num_counters;
580bf215546Sopenharmony_ci      i += instances * group->num_counters;
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci      read_dw = 6 * group->num_counters;
583bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend += instances * read_dw;
584bf215546Sopenharmony_ci      query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
585bf215546Sopenharmony_ci   }
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci   if (query->shaders) {
588bf215546Sopenharmony_ci      if (query->shaders == AC_PC_SHADERS_WINDOWING)
589bf215546Sopenharmony_ci         query->shaders = 0xffffffff;
590bf215546Sopenharmony_ci   }
591bf215546Sopenharmony_ci
592bf215546Sopenharmony_ci   /* Map user-supplied query array to result indices */
593bf215546Sopenharmony_ci   query->counters = CALLOC(num_queries, sizeof(*query->counters));
594bf215546Sopenharmony_ci   for (i = 0; i < num_queries; ++i) {
595bf215546Sopenharmony_ci      struct si_query_counter *counter = &query->counters[i];
596bf215546Sopenharmony_ci      struct ac_pc_block *block;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci      block =
599bf215546Sopenharmony_ci         ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index);
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci      sub_gid = sub_index / block->b->selectors;
602bf215546Sopenharmony_ci      sub_index = sub_index % block->b->selectors;
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci      group = get_group_state(screen, query, block, sub_gid);
605bf215546Sopenharmony_ci      assert(group != NULL);
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci      for (j = 0; j < group->num_counters; ++j) {
608bf215546Sopenharmony_ci         if (group->selectors[j] == sub_index)
609bf215546Sopenharmony_ci            break;
610bf215546Sopenharmony_ci      }
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci      counter->base = group->result_base + j;
613bf215546Sopenharmony_ci      counter->stride = group->num_counters;
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci      counter->qwords = 1;
616bf215546Sopenharmony_ci      if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0)
617bf215546Sopenharmony_ci         counter->qwords = screen->info.max_se;
618bf215546Sopenharmony_ci      if (group->instance < 0)
619bf215546Sopenharmony_ci         counter->qwords *= block->num_instances;
620bf215546Sopenharmony_ci   }
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   return (struct pipe_query *)query;
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_cierror:
625bf215546Sopenharmony_ci   si_pc_query_destroy((struct si_context *)ctx, &query->b);
626bf215546Sopenharmony_ci   return NULL;
627bf215546Sopenharmony_ci}
628bf215546Sopenharmony_ci
629bf215546Sopenharmony_ciint si_get_perfcounter_info(struct si_screen *screen, unsigned index,
630bf215546Sopenharmony_ci                            struct pipe_driver_query_info *info)
631bf215546Sopenharmony_ci{
632bf215546Sopenharmony_ci   struct si_perfcounters *pc = screen->perfcounters;
633bf215546Sopenharmony_ci   struct ac_pc_block *block;
634bf215546Sopenharmony_ci   unsigned base_gid, sub;
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   if (!pc)
637bf215546Sopenharmony_ci      return 0;
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   if (!info) {
640bf215546Sopenharmony_ci      unsigned bid, num_queries = 0;
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci      for (bid = 0; bid < pc->base.num_blocks; ++bid) {
643bf215546Sopenharmony_ci         num_queries += pc->base.blocks[bid].b->selectors * pc->base.blocks[bid].num_groups;
644bf215546Sopenharmony_ci      }
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci      return num_queries;
647bf215546Sopenharmony_ci   }
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci   block = ac_lookup_counter(&pc->base, index, &base_gid, &sub);
650bf215546Sopenharmony_ci   if (!block)
651bf215546Sopenharmony_ci      return 0;
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci   if (!block->selector_names) {
654bf215546Sopenharmony_ci      if (!ac_init_block_names(&screen->info, &pc->base, block))
655bf215546Sopenharmony_ci         return 0;
656bf215546Sopenharmony_ci   }
657bf215546Sopenharmony_ci   info->name = block->selector_names + sub * block->selector_name_stride;
658bf215546Sopenharmony_ci   info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
659bf215546Sopenharmony_ci   info->max_value.u64 = 0;
660bf215546Sopenharmony_ci   info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
661bf215546Sopenharmony_ci   info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
662bf215546Sopenharmony_ci   info->group_id = base_gid + sub / block->b->selectors;
663bf215546Sopenharmony_ci   info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
664bf215546Sopenharmony_ci   if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups)
665bf215546Sopenharmony_ci      info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
666bf215546Sopenharmony_ci   return 1;
667bf215546Sopenharmony_ci}
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ciint si_get_perfcounter_group_info(struct si_screen *screen, unsigned index,
670bf215546Sopenharmony_ci                                  struct pipe_driver_query_group_info *info)
671bf215546Sopenharmony_ci{
672bf215546Sopenharmony_ci   struct si_perfcounters *pc = screen->perfcounters;
673bf215546Sopenharmony_ci   struct ac_pc_block *block;
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci   if (!pc)
676bf215546Sopenharmony_ci      return 0;
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci   if (!info)
679bf215546Sopenharmony_ci      return pc->base.num_groups;
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_ci   block = ac_lookup_group(&pc->base, &index);
682bf215546Sopenharmony_ci   if (!block)
683bf215546Sopenharmony_ci      return 0;
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci   if (!block->group_names) {
686bf215546Sopenharmony_ci      if (!ac_init_block_names(&screen->info, &pc->base, block))
687bf215546Sopenharmony_ci         return 0;
688bf215546Sopenharmony_ci   }
689bf215546Sopenharmony_ci   info->name = block->group_names + index * block->group_name_stride;
690bf215546Sopenharmony_ci   info->num_queries = block->b->selectors;
691bf215546Sopenharmony_ci   info->max_active_queries = block->b->b->num_counters;
692bf215546Sopenharmony_ci   return 1;
693bf215546Sopenharmony_ci}
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_civoid si_destroy_perfcounters(struct si_screen *screen)
696bf215546Sopenharmony_ci{
697bf215546Sopenharmony_ci   struct si_perfcounters *pc = screen->perfcounters;
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci   if (!pc)
700bf215546Sopenharmony_ci      return;
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci   ac_destroy_perfcounters(&pc->base);
703bf215546Sopenharmony_ci   FREE(pc);
704bf215546Sopenharmony_ci   screen->perfcounters = NULL;
705bf215546Sopenharmony_ci}
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_civoid si_init_perfcounters(struct si_screen *screen)
708bf215546Sopenharmony_ci{
709bf215546Sopenharmony_ci   bool separate_se, separate_instance;
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci   separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
712bf215546Sopenharmony_ci   separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci   screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
715bf215546Sopenharmony_ci   if (!screen->perfcounters)
716bf215546Sopenharmony_ci      return;
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
719bf215546Sopenharmony_ci   screen->perfcounters->num_instance_cs_dwords = 3;
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   if (!ac_init_perfcounters(&screen->info, separate_se, separate_instance,
722bf215546Sopenharmony_ci                             &screen->perfcounters->base)) {
723bf215546Sopenharmony_ci      si_destroy_perfcounters(screen);
724bf215546Sopenharmony_ci   }
725bf215546Sopenharmony_ci}
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_cistatic bool
728bf215546Sopenharmony_cisi_spm_init_bo(struct si_context *sctx)
729bf215546Sopenharmony_ci{
730bf215546Sopenharmony_ci   struct radeon_winsys *ws = sctx->ws;
731bf215546Sopenharmony_ci   uint64_t size = 32 * 1024 * 1024; /* Default to 32MB. */
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci   sctx->spm_trace.buffer_size = size;
734bf215546Sopenharmony_ci   sctx->spm_trace.sample_interval = 4096; /* Default to 4096 clk. */
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci   sctx->spm_trace.bo = ws->buffer_create(
737bf215546Sopenharmony_ci      ws, size, 4096,
738bf215546Sopenharmony_ci      RADEON_DOMAIN_VRAM,
739bf215546Sopenharmony_ci      RADEON_FLAG_NO_INTERPROCESS_SHARING |
740bf215546Sopenharmony_ci         RADEON_FLAG_GTT_WC |
741bf215546Sopenharmony_ci         RADEON_FLAG_NO_SUBALLOC);
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci   return sctx->spm_trace.bo != NULL;
744bf215546Sopenharmony_ci}
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_cistatic void
748bf215546Sopenharmony_cisi_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
749bf215546Sopenharmony_ci{
750bf215546Sopenharmony_ci   struct ac_spm_trace_data *spm_trace = &sctx->spm_trace;
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci   radeon_begin(cs);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) {
755bf215546Sopenharmony_ci      struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b];
756bf215546Sopenharmony_ci      const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
757bf215546Sopenharmony_ci      uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci      radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false);
760bf215546Sopenharmony_ci      radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
761bf215546Sopenharmony_ci   }
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci   for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) {
764bf215546Sopenharmony_ci      struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b];
765bf215546Sopenharmony_ci      struct ac_pc_block_base *regs = block_sel->b->b->b;
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci      for (unsigned c = 0; c < block_sel->num_counters; c++) {
770bf215546Sopenharmony_ci         const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c];
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci         if (!cntr_sel->active)
773bf215546Sopenharmony_ci            continue;
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci         radeon_set_uconfig_reg_seq(regs->select0[c], 1, false);
776bf215546Sopenharmony_ci         radeon_emit(cntr_sel->sel0);
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci         radeon_set_uconfig_reg_seq(regs->select1[c], 1, false);
779bf215546Sopenharmony_ci         radeon_emit(cntr_sel->sel1);
780bf215546Sopenharmony_ci      }
781bf215546Sopenharmony_ci   }
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci   /* Restore global broadcasting. */
784bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
785bf215546Sopenharmony_ci                          S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
786bf215546Sopenharmony_ci                          S_030800_INSTANCE_BROADCAST_WRITES(1));
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci   radeon_end();
789bf215546Sopenharmony_ci}
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci#define SPM_RING_BASE_ALIGN 32
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_civoid
794bf215546Sopenharmony_cisi_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
795bf215546Sopenharmony_ci{
796bf215546Sopenharmony_ci   struct ac_spm_trace_data *spm_trace = &sctx->spm_trace;
797bf215546Sopenharmony_ci   uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm_trace->bo);
798bf215546Sopenharmony_ci   uint64_t ring_size = spm_trace->buffer_size;
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci   /* It's required that the ring VA and the size are correctly aligned. */
801bf215546Sopenharmony_ci   assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
802bf215546Sopenharmony_ci   assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
803bf215546Sopenharmony_ci   assert(spm_trace->sample_interval >= 32);
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci   radeon_begin(cs);
806bf215546Sopenharmony_ci
807bf215546Sopenharmony_ci   /* Configure the SPM ring buffer. */
808bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
809bf215546Sopenharmony_ci                          S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
810bf215546Sopenharmony_ci                          S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */
811bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
812bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
813bf215546Sopenharmony_ci                          S_037208_RING_BASE_HI(va >> 32));
814bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   /* Configure the muxsel. */
817bf215546Sopenharmony_ci   uint32_t total_muxsel_lines = 0;
818bf215546Sopenharmony_ci   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
819bf215546Sopenharmony_ci      total_muxsel_lines += spm_trace->num_muxsel_lines[s];
820bf215546Sopenharmony_ci   }
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
823bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
824bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
825bf215546Sopenharmony_ci                          S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) |
826bf215546Sopenharmony_ci                          S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) |
827bf215546Sopenharmony_ci                          S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) |
828bf215546Sopenharmony_ci                          S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3]));
829bf215546Sopenharmony_ci   radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
830bf215546Sopenharmony_ci                          S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
831bf215546Sopenharmony_ci                          S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4]));
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   /* Upload each muxsel ram to the RLC. */
834bf215546Sopenharmony_ci   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
835bf215546Sopenharmony_ci      unsigned rlc_muxsel_addr, rlc_muxsel_data;
836bf215546Sopenharmony_ci      unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
837bf215546Sopenharmony_ci                                S_030800_INSTANCE_BROADCAST_WRITES(1);
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ci      if (!spm_trace->num_muxsel_lines[s])
840bf215546Sopenharmony_ci         continue;
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
843bf215546Sopenharmony_ci         grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci         rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
846bf215546Sopenharmony_ci         rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
847bf215546Sopenharmony_ci      } else {
848bf215546Sopenharmony_ci         grbm_gfx_index |= S_030800_SE_INDEX(s);
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci         rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
851bf215546Sopenharmony_ci         rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
852bf215546Sopenharmony_ci      }
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci      for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) {
857bf215546Sopenharmony_ci         uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel;
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci         /* Select MUXSEL_ADDR to point to the next muxsel. */
860bf215546Sopenharmony_ci         radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci         /* Write the muxsel line configuration with MUXSEL_DATA. */
863bf215546Sopenharmony_ci         radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
864bf215546Sopenharmony_ci         radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
865bf215546Sopenharmony_ci                     S_370_WR_CONFIRM(1) |
866bf215546Sopenharmony_ci                     S_370_ENGINE_SEL(V_370_ME) |
867bf215546Sopenharmony_ci                     S_370_WR_ONE_ADDR(1));
868bf215546Sopenharmony_ci         radeon_emit(rlc_muxsel_data >> 2);
869bf215546Sopenharmony_ci         radeon_emit(0);
870bf215546Sopenharmony_ci         radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
871bf215546Sopenharmony_ci      }
872bf215546Sopenharmony_ci   }
873bf215546Sopenharmony_ci   radeon_end();
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci   /* Select SPM counters. */
876bf215546Sopenharmony_ci   si_emit_spm_counters(sctx, cs);
877bf215546Sopenharmony_ci}
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_cibool
880bf215546Sopenharmony_cisi_spm_init(struct si_context *sctx)
881bf215546Sopenharmony_ci{
882bf215546Sopenharmony_ci   const struct radeon_info *info = &sctx->screen->info;
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci   sctx->screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
885bf215546Sopenharmony_ci   sctx->screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(sctx->screen);
886bf215546Sopenharmony_ci   sctx->screen->perfcounters->num_instance_cs_dwords = 3;
887bf215546Sopenharmony_ci
888bf215546Sopenharmony_ci   struct ac_perfcounters *pc = &sctx->screen->perfcounters->base;
889bf215546Sopenharmony_ci   struct ac_spm_counter_create_info spm_counters[] = {
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci      /* XXX: doesn't work */
892bf215546Sopenharmony_ci      {TCP, 0, 0x9},    /* Number of L2 requests. */
893bf215546Sopenharmony_ci      {TCP, 0, 0x12},   /* Number of L2 misses. */
894bf215546Sopenharmony_ci
895bf215546Sopenharmony_ci      /* Scalar cache hit */
896bf215546Sopenharmony_ci      {SQ, 0, 0x14f},   /* Number of SCACHE hits. */
897bf215546Sopenharmony_ci      {SQ, 0, 0x150},   /* Number of SCACHE misses. */
898bf215546Sopenharmony_ci      {SQ, 0, 0x151},   /* Number of SCACHE misses duplicate. */
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci      /* Instruction cache hit */
901bf215546Sopenharmony_ci      {SQ, 0, 0x12c},   /* Number of ICACHE hits. */
902bf215546Sopenharmony_ci      {SQ, 0, 0x12d},   /* Number of ICACHE misses. */
903bf215546Sopenharmony_ci      {SQ, 0, 0x12e},   /* Number of ICACHE misses duplicate. */
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_ci      /* XXX: doesn't work */
906bf215546Sopenharmony_ci      {GL1C, 0, 0xe},   /* Number of GL1C requests. */
907bf215546Sopenharmony_ci      {GL1C, 0, 0x12},  /* Number of GL1C misses. */
908bf215546Sopenharmony_ci
909bf215546Sopenharmony_ci      /* L2 cache hit */
910bf215546Sopenharmony_ci      {GL2C, 0, 0x3},   /* Number of GL2C requests. */
911bf215546Sopenharmony_ci      {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23},  /* Number of GL2C misses. */
912bf215546Sopenharmony_ci   };
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   if (!ac_init_perfcounters(info, false, false, pc))
915bf215546Sopenharmony_ci      return false;
916bf215546Sopenharmony_ci
917bf215546Sopenharmony_ci   if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm_trace))
918bf215546Sopenharmony_ci      return false;
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci   if (!si_spm_init_bo(sctx))
921bf215546Sopenharmony_ci      return false;
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci   return true;
924bf215546Sopenharmony_ci}
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_civoid
927bf215546Sopenharmony_cisi_spm_finish(struct si_context *sctx)
928bf215546Sopenharmony_ci{
929bf215546Sopenharmony_ci   struct pb_buffer *bo = sctx->spm_trace.bo;
930bf215546Sopenharmony_ci   radeon_bo_reference(sctx->screen->ws, &bo, NULL);
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci   ac_destroy_spm(&sctx->spm_trace);
933bf215546Sopenharmony_ci}
934