1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include <inttypes.h>
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "ac_perfcounter.h"
27bf215546Sopenharmony_ci#include "amdgfxregs.h"
28bf215546Sopenharmony_ci#include "radv_cs.h"
29bf215546Sopenharmony_ci#include "radv_private.h"
30bf215546Sopenharmony_ci#include "sid.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_civoid
33bf215546Sopenharmony_ciradv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders)
34bf215546Sopenharmony_ci{
35bf215546Sopenharmony_ci   radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
36bf215546Sopenharmony_ci   radeon_emit(cs, shaders & 0x7f);
37bf215546Sopenharmony_ci   radeon_emit(cs, 0xffffffff);
38bf215546Sopenharmony_ci}
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic void
41bf215546Sopenharmony_ciradv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family,
42bf215546Sopenharmony_ci                            bool enable)
43bf215546Sopenharmony_ci{
44bf215546Sopenharmony_ci   if (family == RADV_QUEUE_GENERAL) {
45bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
46bf215546Sopenharmony_ci      radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) |
47bf215546Sopenharmony_ci                         EVENT_INDEX(0));
48bf215546Sopenharmony_ci   }
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci   radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(enable));
51bf215546Sopenharmony_ci}
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_civoid
54bf215546Sopenharmony_ciradv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs)
55bf215546Sopenharmony_ci{
56bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
57bf215546Sopenharmony_ci                              S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
58bf215546Sopenharmony_ci                              S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET));
59bf215546Sopenharmony_ci}
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_civoid
62bf215546Sopenharmony_ciradv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci   /* Start SPM counters. */
65bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
66bf215546Sopenharmony_ci                              S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
67bf215546Sopenharmony_ci                              S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   radv_emit_windowed_counters(device, cs, family, true);
70bf215546Sopenharmony_ci}
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_civoid
73bf215546Sopenharmony_ciradv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
74bf215546Sopenharmony_ci{
75bf215546Sopenharmony_ci   radv_emit_windowed_counters(device, cs, family, false);
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   /* Stop SPM counters. */
78bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
79bf215546Sopenharmony_ci                              S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
80bf215546Sopenharmony_ci                              S_036020_SPM_PERFMON_STATE(device->physical_device->rad_info.never_stop_sq_perf_counters ?
81bf215546Sopenharmony_ci                                                            V_036020_STRM_PERFMON_STATE_START_COUNTING :
82bf215546Sopenharmony_ci                                                            V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cienum radv_perfcounter_op {
86bf215546Sopenharmony_ci   RADV_PC_OP_SUM,
87bf215546Sopenharmony_ci   RADV_PC_OP_MAX,
88bf215546Sopenharmony_ci   RADV_PC_OP_RATIO_DIVSCALE,
89bf215546Sopenharmony_ci   RADV_PC_OP_REVERSE_RATIO, /* (reg1 - reg0) / reg1 */
90bf215546Sopenharmony_ci   RADV_PC_OP_SUM_WEIGHTED_4,
91bf215546Sopenharmony_ci};
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci#define S_REG_SEL(x)   ((x)&0xFFFF)
94bf215546Sopenharmony_ci#define G_REG_SEL(x)   ((x)&0xFFFF)
95bf215546Sopenharmony_ci#define S_REG_BLOCK(x) ((x) << 16)
96bf215546Sopenharmony_ci#define G_REG_BLOCK(x) (((x) >> 16) & 0x7FFF)
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci#define S_REG_OFFSET(x)    ((x)&0xFFFF)
99bf215546Sopenharmony_ci#define G_REG_OFFSET(x)    ((x)&0xFFFF)
100bf215546Sopenharmony_ci#define S_REG_INSTANCES(x) ((x) << 16)
101bf215546Sopenharmony_ci#define G_REG_INSTANCES(x) (((x) >> 16) & 0x7FFF)
102bf215546Sopenharmony_ci#define S_REG_CONSTANT(x)  ((x) << 31)
103bf215546Sopenharmony_ci#define G_REG_CONSTANT(x)  ((x) >> 31)
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_cistruct radv_perfcounter_impl {
106bf215546Sopenharmony_ci   enum radv_perfcounter_op op;
107bf215546Sopenharmony_ci   uint32_t regs[8];
108bf215546Sopenharmony_ci};
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci/* Only append to this list, never insert into the middle or remove (but can rename).
111bf215546Sopenharmony_ci *
112bf215546Sopenharmony_ci * The invariant we're trying to get here is counters that have the same meaning, so
113bf215546Sopenharmony_ci * these can be shared between counters that have different implementations on different
114bf215546Sopenharmony_ci * GPUs, but should be unique within a GPU.
115bf215546Sopenharmony_ci */
116bf215546Sopenharmony_cienum radv_perfcounter_uuid {
117bf215546Sopenharmony_ci   RADV_PC_UUID_GPU_CYCLES,
118bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_WAVES,
119bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS,
120bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_VALU,
121bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_SALU,
122bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_VMEM_LOAD,
123bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_SMEM_LOAD,
124bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_VMEM_STORE,
125bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_LDS,
126bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_INSTRUCTIONS_GDS,
127bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_VALU_BUSY,
128bf215546Sopenharmony_ci   RADV_PC_UUID_SHADER_SALU_BUSY,
129bf215546Sopenharmony_ci   RADV_PC_UUID_VRAM_READ_SIZE,
130bf215546Sopenharmony_ci   RADV_PC_UUID_VRAM_WRITE_SIZE,
131bf215546Sopenharmony_ci   RADV_PC_UUID_L0_CACHE_HIT_RATIO,
132bf215546Sopenharmony_ci   RADV_PC_UUID_L1_CACHE_HIT_RATIO,
133bf215546Sopenharmony_ci   RADV_PC_UUID_L2_CACHE_HIT_RATIO,
134bf215546Sopenharmony_ci};
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_cistruct radv_perfcounter_desc {
137bf215546Sopenharmony_ci   struct radv_perfcounter_impl impl;
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   VkPerformanceCounterUnitKHR unit;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   char name[VK_MAX_DESCRIPTION_SIZE];
142bf215546Sopenharmony_ci   char category[VK_MAX_DESCRIPTION_SIZE];
143bf215546Sopenharmony_ci   char description[VK_MAX_DESCRIPTION_SIZE];
144bf215546Sopenharmony_ci   enum radv_perfcounter_uuid uuid;
145bf215546Sopenharmony_ci};
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci#define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...)          \
148bf215546Sopenharmony_ci   (struct radv_perfcounter_desc)                                                                  \
149bf215546Sopenharmony_ci   {                                                                                               \
150bf215546Sopenharmony_ci      .impl = {.op = arg_op, .regs = {__VA_ARGS__}},                                               \
151bf215546Sopenharmony_ci      .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, .name = arg_name,                      \
152bf215546Sopenharmony_ci      .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid    \
153bf215546Sopenharmony_ci   }
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci#define ADD_PC(op, unit, name, category, description, uuid, ...)                                   \
156bf215546Sopenharmony_ci   do {                                                                                            \
157bf215546Sopenharmony_ci      if (descs) {                                                                                 \
158bf215546Sopenharmony_ci         descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__);      \
159bf215546Sopenharmony_ci      }                                                                                            \
160bf215546Sopenharmony_ci      ++*count;                                                                                    \
161bf215546Sopenharmony_ci   } while (0)
162bf215546Sopenharmony_ci#define CTR(block, ctr) (S_REG_BLOCK(block) | S_REG_SEL(ctr))
163bf215546Sopenharmony_ci#define CONSTANT(v)     (S_REG_CONSTANT(1) | (uint32_t)(v))
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_cienum { GRBM_PERF_SEL_GUI_ACTIVE = CTR(GRBM, 2) };
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_cienum { CPF_PERF_SEL_CPF_STAT_BUSY_GFX10 = CTR(CPF, 0x18) };
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_cienum {
170bf215546Sopenharmony_ci   GL1C_PERF_SEL_REQ = CTR(GL1C, 0xe),
171bf215546Sopenharmony_ci   GL1C_PERF_SEL_REQ_MISS = CTR(GL1C, 0x12),
172bf215546Sopenharmony_ci};
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_cienum {
175bf215546Sopenharmony_ci   GL2C_PERF_SEL_REQ = CTR(GL2C, 0x3),
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci   GL2C_PERF_SEL_MISS_GFX101 = CTR(GL2C, 0x23),
178bf215546Sopenharmony_ci   GL2C_PERF_SEL_MC_WRREQ_GFX101 = CTR(GL2C, 0x4b),
179bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_WRREQ_64B_GFX101 = CTR(GL2C, 0x4c),
180bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_32B_GFX101 = CTR(GL2C, 0x59),
181bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_64B_GFX101 = CTR(GL2C, 0x5a),
182bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_96B_GFX101 = CTR(GL2C, 0x5b),
183bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_128B_GFX101 = CTR(GL2C, 0x5c),
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   GL2C_PERF_SEL_MISS_GFX103 = CTR(GL2C, 0x2b),
186bf215546Sopenharmony_ci   GL2C_PERF_SEL_MC_WRREQ_GFX103 = CTR(GL2C, 0x53),
187bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_WRREQ_64B_GFX103 = CTR(GL2C, 0x55),
188bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_32B_GFX103 = CTR(GL2C, 0x63),
189bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_64B_GFX103 = CTR(GL2C, 0x64),
190bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_96B_GFX103 = CTR(GL2C, 0x65),
191bf215546Sopenharmony_ci   GL2C_PERF_SEL_EA_RDREQ_128B_GFX103 = CTR(GL2C, 0x66),
192bf215546Sopenharmony_ci};
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_cienum {
195bf215546Sopenharmony_ci   SQ_PERF_SEL_WAVES = CTR(SQ, 0x4),
196bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_ALL_GFX10 = CTR(SQ, 0x31),
197bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_GDS_GFX10 = CTR(SQ, 0x37),
198bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_LDS_GFX10 = CTR(SQ, 0x3b),
199bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_SALU_GFX10 = CTR(SQ, 0x3c),
200bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_SMEM_GFX10 = CTR(SQ, 0x3d),
201bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_VALU_GFX10 = CTR(SQ, 0x40),
202bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10 = CTR(SQ, 0x45),
203bf215546Sopenharmony_ci   SQ_PERF_SEL_INSTS_TEX_STORE_GFX10 = CTR(SQ, 0x46),
204bf215546Sopenharmony_ci   SQ_PERF_SEL_INST_CYCLES_VALU_GFX10 = CTR(SQ, 0x75),
205bf215546Sopenharmony_ci};
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_cienum {
208bf215546Sopenharmony_ci   TCP_PERF_SEL_REQ_GFX10 = CTR(TCP, 0x9),
209bf215546Sopenharmony_ci   TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12),
210bf215546Sopenharmony_ci};
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci#define CTR_NUM_SIMD                                                                               \
213bf215546Sopenharmony_ci   CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu)
214bf215546Sopenharmony_ci#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu)
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_cistatic void
217bf215546Sopenharmony_ciradv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count,
218bf215546Sopenharmony_ci                             struct radv_perfcounter_desc *descs)
219bf215546Sopenharmony_ci{
220bf215546Sopenharmony_ci   *count = 0;
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM",
223bf215546Sopenharmony_ci          "cycles the GPU is active processing a command buffer.", GPU_CYCLES,
224bf215546Sopenharmony_ci          GRBM_PERF_SEL_GUI_ACTIVE);
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES,
227bf215546Sopenharmony_ci          SQ_PERF_SEL_WAVES);
228bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed",
229bf215546Sopenharmony_ci          SHADER_INSTRUCTIONS, SQ_PERF_SEL_INSTS_ALL_GFX10);
230bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders",
231bf215546Sopenharmony_ci          "Number of VALU Instructions executed", SHADER_INSTRUCTIONS_VALU,
232bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_VALU_GFX10);
233bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders",
234bf215546Sopenharmony_ci          "Number of SALU Instructions executed", SHADER_INSTRUCTIONS_SALU,
235bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_SALU_GFX10);
236bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders",
237bf215546Sopenharmony_ci          "Number of VMEM load instructions executed", SHADER_INSTRUCTIONS_VMEM_LOAD,
238bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10);
239bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders",
240bf215546Sopenharmony_ci          "Number of SMEM load instructions executed", SHADER_INSTRUCTIONS_SMEM_LOAD,
241bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_SMEM_GFX10);
242bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders",
243bf215546Sopenharmony_ci          "Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE,
244bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_TEX_STORE_GFX10);
245bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders",
246bf215546Sopenharmony_ci          "Number of LDS Instructions executed", SHADER_INSTRUCTIONS_LDS,
247bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_LDS_GFX10);
248bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders",
249bf215546Sopenharmony_ci          "Number of GDS Instructions executed", SHADER_INSTRUCTIONS_GDS,
250bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_GDS_GFX10);
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization",
253bf215546Sopenharmony_ci          "Percentage of time the VALU units are busy", SHADER_VALU_BUSY,
254bf215546Sopenharmony_ci          SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD);
255bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization",
256bf215546Sopenharmony_ci          "Percentage of time the SALU units are busy", SHADER_SALU_BUSY,
257bf215546Sopenharmony_ci          SQ_PERF_SEL_INSTS_SALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS);
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   if (pdev->rad_info.gfx_level >= GFX10_3) {
260bf215546Sopenharmony_ci      ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory",
261bf215546Sopenharmony_ci             "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103,
262bf215546Sopenharmony_ci             CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103, CONSTANT(64),
263bf215546Sopenharmony_ci             GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103,
264bf215546Sopenharmony_ci             CONSTANT(128));
265bf215546Sopenharmony_ci      ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory",
266bf215546Sopenharmony_ci             "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103,
267bf215546Sopenharmony_ci             CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103, CONSTANT(64), CONSTANT(0),
268bf215546Sopenharmony_ci             CONSTANT(0), CONSTANT(0), CONSTANT(0));
269bf215546Sopenharmony_ci   } else {
270bf215546Sopenharmony_ci      ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory",
271bf215546Sopenharmony_ci             "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101,
272bf215546Sopenharmony_ci             CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101, CONSTANT(64),
273bf215546Sopenharmony_ci             GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101,
274bf215546Sopenharmony_ci             CONSTANT(128));
275bf215546Sopenharmony_ci      ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory",
276bf215546Sopenharmony_ci             "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101,
277bf215546Sopenharmony_ci             CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101, CONSTANT(32), CONSTANT(0),
278bf215546Sopenharmony_ci             CONSTANT(0), CONSTANT(0), CONSTANT(0));
279bf215546Sopenharmony_ci   }
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache",
282bf215546Sopenharmony_ci          L0_CACHE_HIT_RATIO, TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10);
283bf215546Sopenharmony_ci   ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache",
284bf215546Sopenharmony_ci          L1_CACHE_HIT_RATIO, GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ);
285bf215546Sopenharmony_ci   if (pdev->rad_info.gfx_level >= GFX10_3) {
286bf215546Sopenharmony_ci      ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory",
287bf215546Sopenharmony_ci             "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103,
288bf215546Sopenharmony_ci             GL2C_PERF_SEL_REQ);
289bf215546Sopenharmony_ci   } else {
290bf215546Sopenharmony_ci      ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory",
291bf215546Sopenharmony_ci             "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101,
292bf215546Sopenharmony_ci             GL2C_PERF_SEL_REQ);
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci}
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_cistatic bool
297bf215546Sopenharmony_ciradv_init_perfcounter_descs(struct radv_physical_device *pdev)
298bf215546Sopenharmony_ci{
299bf215546Sopenharmony_ci   if (pdev->perfcounters)
300bf215546Sopenharmony_ci      return true;
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   uint32_t count;
303bf215546Sopenharmony_ci   radv_query_perfcounter_descs(pdev, &count, NULL);
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   struct radv_perfcounter_desc *descs = malloc(sizeof(*descs) * count);
306bf215546Sopenharmony_ci   if (!descs)
307bf215546Sopenharmony_ci      return false;
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   radv_query_perfcounter_descs(pdev, &count, descs);
310bf215546Sopenharmony_ci   pdev->num_perfcounters = count;
311bf215546Sopenharmony_ci   pdev->perfcounters = descs;
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   return true;
314bf215546Sopenharmony_ci}
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_cistatic int
317bf215546Sopenharmony_cicmp_uint32_t(const void *a, const void *b)
318bf215546Sopenharmony_ci{
319bf215546Sopenharmony_ci   uint32_t l = *(const uint32_t *)a;
320bf215546Sopenharmony_ci   uint32_t r = *(const uint32_t *)b;
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   return (l < r) ? -1 : (l > r) ? 1 : 0;
323bf215546Sopenharmony_ci}
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_cistatic VkResult
326bf215546Sopenharmony_ciradv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t num_indices,
327bf215546Sopenharmony_ci                           const uint32_t *indices, unsigned *out_num_regs, uint32_t **out_regs)
328bf215546Sopenharmony_ci{
329bf215546Sopenharmony_ci   ASSERTED uint32_t num_counters = pdevice->num_perfcounters;
330bf215546Sopenharmony_ci   const struct radv_perfcounter_desc *descs = pdevice->perfcounters;
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci   unsigned full_reg_cnt = num_indices * ARRAY_SIZE(descs->impl.regs);
333bf215546Sopenharmony_ci   uint32_t *regs = malloc(full_reg_cnt * sizeof(uint32_t));
334bf215546Sopenharmony_ci   if (!regs)
335bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   unsigned reg_cnt = 0;
338bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_indices; ++i) {
339bf215546Sopenharmony_ci      uint32_t index = indices[i];
340bf215546Sopenharmony_ci      assert(index < num_counters);
341bf215546Sopenharmony_ci      for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j];
342bf215546Sopenharmony_ci           ++j) {
343bf215546Sopenharmony_ci         if (!G_REG_CONSTANT(descs[index].impl.regs[j]))
344bf215546Sopenharmony_ci            regs[reg_cnt++] = descs[index].impl.regs[j];
345bf215546Sopenharmony_ci      }
346bf215546Sopenharmony_ci   }
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   qsort(regs, reg_cnt, sizeof(uint32_t), cmp_uint32_t);
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   unsigned deduped_reg_cnt = 0;
351bf215546Sopenharmony_ci   for (unsigned i = 1; i < reg_cnt; ++i) {
352bf215546Sopenharmony_ci      if (regs[i] != regs[deduped_reg_cnt])
353bf215546Sopenharmony_ci         regs[++deduped_reg_cnt] = regs[i];
354bf215546Sopenharmony_ci   }
355bf215546Sopenharmony_ci   ++deduped_reg_cnt;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   *out_num_regs = deduped_reg_cnt;
358bf215546Sopenharmony_ci   *out_regs = regs;
359bf215546Sopenharmony_ci   return VK_SUCCESS;
360bf215546Sopenharmony_ci}
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_cistatic unsigned
363bf215546Sopenharmony_ciradv_pc_get_num_instances(const struct radv_physical_device *pdevice, struct ac_pc_block *ac_block)
364bf215546Sopenharmony_ci{
365bf215546Sopenharmony_ci   return ac_block->num_instances *
366bf215546Sopenharmony_ci          ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdevice->rad_info.max_se : 1);
367bf215546Sopenharmony_ci}
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_cistatic unsigned
370bf215546Sopenharmony_ciradv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned num_regs,
371bf215546Sopenharmony_ci                            const uint32_t *regs)
372bf215546Sopenharmony_ci{
373bf215546Sopenharmony_ci   enum ac_pc_gpu_block prev_block = NUM_GPU_BLOCK;
374bf215546Sopenharmony_ci   unsigned block_reg_count = 0;
375bf215546Sopenharmony_ci   struct ac_pc_block *ac_block = NULL;
376bf215546Sopenharmony_ci   unsigned passes_needed = 1;
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_regs; ++i) {
379bf215546Sopenharmony_ci      enum ac_pc_gpu_block block = G_REG_BLOCK(regs[i]);
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci      if (block != prev_block) {
382bf215546Sopenharmony_ci         block_reg_count = 0;
383bf215546Sopenharmony_ci         prev_block = block;
384bf215546Sopenharmony_ci         ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block);
385bf215546Sopenharmony_ci      }
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci      ++block_reg_count;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci      passes_needed =
390bf215546Sopenharmony_ci         MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters));
391bf215546Sopenharmony_ci   }
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci   return passes_needed;
394bf215546Sopenharmony_ci}
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_civoid
397bf215546Sopenharmony_ciradv_pc_deinit_query_pool(struct radv_pc_query_pool *pool)
398bf215546Sopenharmony_ci{
399bf215546Sopenharmony_ci   free(pool->counters);
400bf215546Sopenharmony_ci   free(pool->pc_regs);
401bf215546Sopenharmony_ci}
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ciVkResult
404bf215546Sopenharmony_ciradv_pc_init_query_pool(struct radv_physical_device *pdevice,
405bf215546Sopenharmony_ci                        const VkQueryPoolCreateInfo *pCreateInfo, struct radv_pc_query_pool *pool)
406bf215546Sopenharmony_ci{
407bf215546Sopenharmony_ci   const VkQueryPoolPerformanceCreateInfoKHR *perf_info =
408bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
409bf215546Sopenharmony_ci   VkResult result;
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   if (!radv_init_perfcounter_descs(pdevice))
412bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   result =
415bf215546Sopenharmony_ci      radv_get_counter_registers(pdevice, perf_info->counterIndexCount, perf_info->pCounterIndices,
416bf215546Sopenharmony_ci                                 &pool->num_pc_regs, &pool->pc_regs);
417bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
418bf215546Sopenharmony_ci      return result;
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   pool->num_passes = radv_get_num_counter_passes(pdevice, pool->num_pc_regs, pool->pc_regs);
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci   uint32_t *pc_reg_offsets = malloc(pool->num_pc_regs * sizeof(uint32_t));
423bf215546Sopenharmony_ci   if (!pc_reg_offsets)
424bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_ci   unsigned offset = 0;
427bf215546Sopenharmony_ci   for (unsigned i = 0; i < pool->num_pc_regs; ++i) {
428bf215546Sopenharmony_ci      enum ac_pc_gpu_block block = pool->pc_regs[i] >> 16;
429bf215546Sopenharmony_ci      struct ac_pc_block *ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block);
430bf215546Sopenharmony_ci      unsigned num_instances = radv_pc_get_num_instances(pdevice, ac_block);
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci      pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_instances);
433bf215546Sopenharmony_ci      offset += sizeof(uint64_t) * 2 * num_instances;
434bf215546Sopenharmony_ci   }
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   /* allow an uint32_t per pass to signal completion. */
437bf215546Sopenharmony_ci   pool->b.stride = offset + 8 * pool->num_passes;
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci   pool->num_counters = perf_info->counterIndexCount;
440bf215546Sopenharmony_ci   pool->counters = malloc(pool->num_counters * sizeof(struct radv_perfcounter_impl));
441bf215546Sopenharmony_ci   if (!pool->counters) {
442bf215546Sopenharmony_ci      free(pc_reg_offsets);
443bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
444bf215546Sopenharmony_ci   }
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   for (unsigned i = 0; i < pool->num_counters; ++i) {
447bf215546Sopenharmony_ci      pool->counters[i] = pdevice->perfcounters[perf_info->pCounterIndices[i]].impl;
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci      for (unsigned j = 0; j < ARRAY_SIZE(pool->counters[i].regs); ++j) {
450bf215546Sopenharmony_ci         uint32_t reg = pool->counters[i].regs[j];
451bf215546Sopenharmony_ci         if (!reg || G_REG_CONSTANT(reg))
452bf215546Sopenharmony_ci            continue;
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci         unsigned k;
455bf215546Sopenharmony_ci         for (k = 0; k < pool->num_pc_regs; ++k)
456bf215546Sopenharmony_ci            if (pool->pc_regs[k] == reg)
457bf215546Sopenharmony_ci               break;
458bf215546Sopenharmony_ci         pool->counters[i].regs[j] = pc_reg_offsets[k];
459bf215546Sopenharmony_ci      }
460bf215546Sopenharmony_ci   }
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   free(pc_reg_offsets);
463bf215546Sopenharmony_ci   return VK_SUCCESS;
464bf215546Sopenharmony_ci}
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_cistatic void
467bf215546Sopenharmony_ciradv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
468bf215546Sopenharmony_ci{
469bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
470bf215546Sopenharmony_ci   unsigned value = S_030800_SH_BROADCAST_WRITES(1);
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci   if (se >= 0) {
473bf215546Sopenharmony_ci      value |= S_030800_SE_INDEX(se);
474bf215546Sopenharmony_ci   } else {
475bf215546Sopenharmony_ci      value |= S_030800_SE_BROADCAST_WRITES(1);
476bf215546Sopenharmony_ci   }
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   if (instance >= 0) {
479bf215546Sopenharmony_ci      value |= S_030800_INSTANCE_INDEX(instance);
480bf215546Sopenharmony_ci   } else {
481bf215546Sopenharmony_ci      value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
482bf215546Sopenharmony_ci   }
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
485bf215546Sopenharmony_ci}
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_cistatic void
488bf215546Sopenharmony_ciradv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
489bf215546Sopenharmony_ci                 unsigned *selectors)
490bf215546Sopenharmony_ci{
491bf215546Sopenharmony_ci   struct ac_pc_block_base *regs = block->b->b;
492bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
493bf215546Sopenharmony_ci   unsigned idx;
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   assert(count <= regs->num_counters);
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci   /* Fake counters. */
498bf215546Sopenharmony_ci   if (!regs->select0)
499bf215546Sopenharmony_ci      return;
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   for (idx = 0; idx < count; ++idx) {
502bf215546Sopenharmony_ci      radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx],
503bf215546Sopenharmony_ci                             G_REG_SEL(selectors[idx]) | regs->select_or);
504bf215546Sopenharmony_ci   }
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   for (idx = 0; idx < regs->num_spm_counters; idx++) {
507bf215546Sopenharmony_ci      radeon_set_uconfig_reg_seq(cs, regs->select1[idx], 1);
508bf215546Sopenharmony_ci      radeon_emit(cs, 0);
509bf215546Sopenharmony_ci   }
510bf215546Sopenharmony_ci}
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_cistatic void
513bf215546Sopenharmony_ciradv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block,
514bf215546Sopenharmony_ci                                 unsigned count, uint64_t va)
515bf215546Sopenharmony_ci{
516bf215546Sopenharmony_ci   struct ac_pc_block_base *regs = block->b->b;
517bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
518bf215546Sopenharmony_ci   unsigned reg = regs->counter0_lo;
519bf215546Sopenharmony_ci   unsigned reg_delta = 8;
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci   assert(regs->select0);
522bf215546Sopenharmony_ci   for (unsigned idx = 0; idx < count; ++idx) {
523bf215546Sopenharmony_ci      if (regs->counters)
524bf215546Sopenharmony_ci         reg = regs->counters[idx];
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
527bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
528bf215546Sopenharmony_ci                         COPY_DATA_WR_CONFIRM | COPY_DATA_COUNT_SEL); /* 64 bits */
529bf215546Sopenharmony_ci      radeon_emit(cs, reg >> 2);
530bf215546Sopenharmony_ci      radeon_emit(cs, 0); /* unused */
531bf215546Sopenharmony_ci      radeon_emit(cs, va);
532bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci      va += sizeof(uint64_t) * 2 *
535bf215546Sopenharmony_ci            radv_pc_get_num_instances(cmd_buffer->device->physical_device, block);
536bf215546Sopenharmony_ci      reg += reg_delta;
537bf215546Sopenharmony_ci   }
538bf215546Sopenharmony_ci}
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_cistatic void
541bf215546Sopenharmony_ciradv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
542bf215546Sopenharmony_ci                     uint64_t va)
543bf215546Sopenharmony_ci{
544bf215546Sopenharmony_ci   unsigned se_end = 1;
545bf215546Sopenharmony_ci   if (block->b->b->flags & AC_PC_BLOCK_SE)
546bf215546Sopenharmony_ci      se_end = cmd_buffer->device->physical_device->rad_info.max_se;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci   for (unsigned se = 0; se < se_end; ++se) {
549bf215546Sopenharmony_ci      for (unsigned instance = 0; instance < block->num_instances; ++instance) {
550bf215546Sopenharmony_ci         radv_emit_instance(cmd_buffer, se, instance);
551bf215546Sopenharmony_ci         radv_pc_emit_block_instance_read(cmd_buffer, block, count, va);
552bf215546Sopenharmony_ci         va += sizeof(uint64_t) * 2;
553bf215546Sopenharmony_ci      }
554bf215546Sopenharmony_ci   }
555bf215546Sopenharmony_ci}
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_cistatic void
558bf215546Sopenharmony_ciradv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
559bf215546Sopenharmony_ci{
560bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
563bf215546Sopenharmony_ci   radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
566bf215546Sopenharmony_ci   radeon_emit(cs, 0);          /* CP_COHER_CNTL */
567bf215546Sopenharmony_ci   radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
568bf215546Sopenharmony_ci   radeon_emit(cs, 0xffffff);   /* CP_COHER_SIZE_HI */
569bf215546Sopenharmony_ci   radeon_emit(cs, 0);          /* CP_COHER_BASE */
570bf215546Sopenharmony_ci   radeon_emit(cs, 0);          /* CP_COHER_BASE_HI */
571bf215546Sopenharmony_ci   radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
572bf215546Sopenharmony_ci   radeon_emit(cs, 0);          /* GCR_CNTL */
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
575bf215546Sopenharmony_ci   radeon_emit(cs, 0);
576bf215546Sopenharmony_ci}
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_cistatic void
579bf215546Sopenharmony_ciradv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
580bf215546Sopenharmony_ci                        uint64_t va, bool end)
581bf215546Sopenharmony_ci{
582bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
583bf215546Sopenharmony_ci   struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
586bf215546Sopenharmony_ci   radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   radv_pc_wait_idle(cmd_buffer);
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   radv_emit_instance(cmd_buffer, -1, -1);
591bf215546Sopenharmony_ci   radv_emit_windowed_counters(cmd_buffer->device, cs, cmd_buffer->qf, false);
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
594bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) |
595bf215546Sopenharmony_ci                             S_036020_PERFMON_SAMPLE_ENABLE(1));
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci   for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
598bf215546Sopenharmony_ci      uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) +
599bf215546Sopenharmony_ci                         PERF_CTR_BO_PASS_OFFSET + 8 * pass;
600bf215546Sopenharmony_ci      uint64_t reg_va = va + (end ? 8 : 0);
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
603bf215546Sopenharmony_ci      radeon_emit(cs, pred_va);
604bf215546Sopenharmony_ci      radeon_emit(cs, pred_va >> 32);
605bf215546Sopenharmony_ci      radeon_emit(cs, 0); /* Cache policy */
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci      uint32_t *skip_dwords = cs->buf + cs->cdw;
608bf215546Sopenharmony_ci      radeon_emit(cs, 0);
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci      for (unsigned i = 0; i < pool->num_pc_regs;) {
611bf215546Sopenharmony_ci         enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
612bf215546Sopenharmony_ci         struct ac_pc_block *ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block);
613bf215546Sopenharmony_ci         unsigned offset = ac_block->num_instances * pass;
614bf215546Sopenharmony_ci         unsigned num_instances = radv_pc_get_num_instances(pdevice, ac_block);
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci         unsigned cnt = 1;
617bf215546Sopenharmony_ci         while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
618bf215546Sopenharmony_ci            ++cnt;
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_ci         if (offset < cnt) {
621bf215546Sopenharmony_ci            unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters);
622bf215546Sopenharmony_ci            radv_pc_sample_block(cmd_buffer, ac_block, pass_reg_cnt,
623bf215546Sopenharmony_ci                                 reg_va + offset * num_instances * sizeof(uint64_t));
624bf215546Sopenharmony_ci         }
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci         i += cnt;
627bf215546Sopenharmony_ci         reg_va += num_instances * sizeof(uint64_t) * 2 * cnt;
628bf215546Sopenharmony_ci      }
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci      if (end) {
631bf215546Sopenharmony_ci         uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass;
632bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
633bf215546Sopenharmony_ci         radeon_emit(cs,
634bf215546Sopenharmony_ci                     S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
635bf215546Sopenharmony_ci         radeon_emit(cs, signal_va);
636bf215546Sopenharmony_ci         radeon_emit(cs, signal_va >> 32);
637bf215546Sopenharmony_ci         radeon_emit(cs, 1); /* value */
638bf215546Sopenharmony_ci      }
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci      *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
641bf215546Sopenharmony_ci   }
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   radv_emit_instance(cmd_buffer, -1, -1);
644bf215546Sopenharmony_ci}
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_civoid
647bf215546Sopenharmony_ciradv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
648bf215546Sopenharmony_ci                    uint64_t va)
649bf215546Sopenharmony_ci{
650bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
651bf215546Sopenharmony_ci   struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
652bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max;
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci   cmd_buffer->state.uses_perf_counters = true;
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci   cdw_max = radeon_check_space(cmd_buffer->device->ws, cs,
657bf215546Sopenharmony_ci                                256 +                      /* Random one time stuff */
658bf215546Sopenharmony_ci                                   10 * pool->num_passes + /* COND_EXECs */
659bf215546Sopenharmony_ci                                   pool->b.stride / 8 * (5 + 8));
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo);
662bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci   uint64_t perf_ctr_va =
665bf215546Sopenharmony_ci      radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
666bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
667bf215546Sopenharmony_ci   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
668bf215546Sopenharmony_ci   radeon_emit(cs, perf_ctr_va);
669bf215546Sopenharmony_ci   radeon_emit(cs, perf_ctr_va >> 32);
670bf215546Sopenharmony_ci   radeon_emit(cs, 0); /* value */
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci   radv_pc_wait_idle(cmd_buffer);
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
675bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci   radv_emit_inhibit_clockgating(cmd_buffer->device, cs, true);
678bf215546Sopenharmony_ci   radv_emit_spi_config_cntl(cmd_buffer->device, cs, true);
679bf215546Sopenharmony_ci   radv_perfcounter_emit_shaders(cs, 0x7f);
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_ci   for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
682bf215546Sopenharmony_ci      uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) +
683bf215546Sopenharmony_ci                         PERF_CTR_BO_PASS_OFFSET + 8 * pass;
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
686bf215546Sopenharmony_ci      radeon_emit(cs, pred_va);
687bf215546Sopenharmony_ci      radeon_emit(cs, pred_va >> 32);
688bf215546Sopenharmony_ci      radeon_emit(cs, 0); /* Cache policy */
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci      uint32_t *skip_dwords = cs->buf + cs->cdw;
691bf215546Sopenharmony_ci      radeon_emit(cs, 0);
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci      for (unsigned i = 0; i < pool->num_pc_regs;) {
694bf215546Sopenharmony_ci         enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]);
695bf215546Sopenharmony_ci         struct ac_pc_block *ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block);
696bf215546Sopenharmony_ci         unsigned offset = ac_block->num_instances * pass;
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci         unsigned cnt = 1;
699bf215546Sopenharmony_ci         while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt]))
700bf215546Sopenharmony_ci            ++cnt;
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci         if (offset < cnt) {
703bf215546Sopenharmony_ci            unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters);
704bf215546Sopenharmony_ci            radv_emit_select(cmd_buffer, ac_block, pass_reg_cnt, pool->pc_regs + i + offset);
705bf215546Sopenharmony_ci         }
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci         i += cnt;
708bf215546Sopenharmony_ci      }
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci      *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1;
711bf215546Sopenharmony_ci   }
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci   radv_emit_instance(cmd_buffer, -1, -1);
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   /* The following sequence actually starts the perfcounters. */
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci   radv_pc_stop_and_sample(cmd_buffer, pool, va, false);
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
720bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci   radv_emit_windowed_counters(cmd_buffer->device, cs, cmd_buffer->qf, true);
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
725bf215546Sopenharmony_ci}
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_civoid
728bf215546Sopenharmony_ciradv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
729bf215546Sopenharmony_ci{
730bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
731bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max;
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci   cdw_max =
734bf215546Sopenharmony_ci      radeon_check_space(cmd_buffer->device->ws, cs,
735bf215546Sopenharmony_ci                         256 + /* Reserved for things that don't scale with passes/counters */
736bf215546Sopenharmony_ci                            5 * pool->num_passes + /* COND_EXECs */
737bf215546Sopenharmony_ci                            pool->b.stride / 8 * 8);
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo);
740bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci   uint64_t perf_ctr_va =
743bf215546Sopenharmony_ci      radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
744bf215546Sopenharmony_ci   si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
745bf215546Sopenharmony_ci                              radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0,
746bf215546Sopenharmony_ci                              EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1,
747bf215546Sopenharmony_ci                              cmd_buffer->gfx9_fence_va);
748bf215546Sopenharmony_ci   radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci   radv_pc_wait_idle(cmd_buffer);
751bf215546Sopenharmony_ci   radv_pc_stop_and_sample(cmd_buffer, pool, va, true);
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
754bf215546Sopenharmony_ci                          S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
755bf215546Sopenharmony_ci   radv_emit_spi_config_cntl(cmd_buffer->device, cs, false);
756bf215546Sopenharmony_ci   radv_emit_inhibit_clockgating(cmd_buffer->device, cs, false);
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
759bf215546Sopenharmony_ci}
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_cistatic uint64_t
762bf215546Sopenharmony_ciradv_pc_sum_reg(uint32_t reg, const uint64_t *data)
763bf215546Sopenharmony_ci{
764bf215546Sopenharmony_ci   unsigned instances = G_REG_INSTANCES(reg);
765bf215546Sopenharmony_ci   unsigned offset = G_REG_OFFSET(reg) / 8;
766bf215546Sopenharmony_ci   uint64_t result = 0;
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci   if (G_REG_CONSTANT(reg))
769bf215546Sopenharmony_ci      return reg & 0x7fffffffu;
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci   for (unsigned i = 0; i < instances; ++i) {
772bf215546Sopenharmony_ci      result += data[offset + 2 * i + 1] - data[offset + 2 * i];
773bf215546Sopenharmony_ci   }
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci   return result;
776bf215546Sopenharmony_ci}
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_cistatic uint64_t
779bf215546Sopenharmony_ciradv_pc_max_reg(uint32_t reg, const uint64_t *data)
780bf215546Sopenharmony_ci{
781bf215546Sopenharmony_ci   unsigned instances = G_REG_INSTANCES(reg);
782bf215546Sopenharmony_ci   unsigned offset = G_REG_OFFSET(reg) / 8;
783bf215546Sopenharmony_ci   uint64_t result = 0;
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   if (G_REG_CONSTANT(reg))
786bf215546Sopenharmony_ci      return reg & 0x7fffffffu;
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci   for (unsigned i = 0; i < instances; ++i) {
789bf215546Sopenharmony_ci      result = MAX2(result, data[offset + 2 * i + 1]);
790bf215546Sopenharmony_ci   }
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci   return result;
793bf215546Sopenharmony_ci}
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_cistatic union VkPerformanceCounterResultKHR
796bf215546Sopenharmony_ciradv_pc_get_result(const struct radv_perfcounter_impl *impl, const uint64_t *data)
797bf215546Sopenharmony_ci{
798bf215546Sopenharmony_ci   union VkPerformanceCounterResultKHR result;
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci   switch (impl->op) {
801bf215546Sopenharmony_ci   case RADV_PC_OP_MAX:
802bf215546Sopenharmony_ci      result.float64 = radv_pc_max_reg(impl->regs[0], data);
803bf215546Sopenharmony_ci      break;
804bf215546Sopenharmony_ci   case RADV_PC_OP_SUM:
805bf215546Sopenharmony_ci      result.float64 = radv_pc_sum_reg(impl->regs[0], data);
806bf215546Sopenharmony_ci      break;
807bf215546Sopenharmony_ci   case RADV_PC_OP_RATIO_DIVSCALE:
808bf215546Sopenharmony_ci      result.float64 = radv_pc_sum_reg(impl->regs[0], data) /
809bf215546Sopenharmony_ci                       (double)radv_pc_sum_reg(impl->regs[1], data) /
810bf215546Sopenharmony_ci                       radv_pc_sum_reg(impl->regs[2], data) * 100.0;
811bf215546Sopenharmony_ci      break;
812bf215546Sopenharmony_ci   case RADV_PC_OP_REVERSE_RATIO: {
813bf215546Sopenharmony_ci      double tmp = radv_pc_sum_reg(impl->regs[1], data);
814bf215546Sopenharmony_ci      result.float64 = (tmp - radv_pc_sum_reg(impl->regs[0], data)) / tmp * 100.0;
815bf215546Sopenharmony_ci      break;
816bf215546Sopenharmony_ci   }
817bf215546Sopenharmony_ci   case RADV_PC_OP_SUM_WEIGHTED_4:
818bf215546Sopenharmony_ci      result.float64 = 0.0;
819bf215546Sopenharmony_ci      for (unsigned i = 0; i < 4; ++i)
820bf215546Sopenharmony_ci         result.float64 +=
821bf215546Sopenharmony_ci            radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data);
822bf215546Sopenharmony_ci      break;
823bf215546Sopenharmony_ci   default:
824bf215546Sopenharmony_ci      unreachable("unhandled performance counter operation");
825bf215546Sopenharmony_ci   }
826bf215546Sopenharmony_ci   return result;
827bf215546Sopenharmony_ci}
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_civoid
830bf215546Sopenharmony_ciradv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out)
831bf215546Sopenharmony_ci{
832bf215546Sopenharmony_ci   union VkPerformanceCounterResultKHR *pc_result = out;
833bf215546Sopenharmony_ci
834bf215546Sopenharmony_ci   for (unsigned i = 0; i < pc_pool->num_counters; ++i) {
835bf215546Sopenharmony_ci      pc_result[i] = radv_pc_get_result(pc_pool->counters + i, data);
836bf215546Sopenharmony_ci   }
837bf215546Sopenharmony_ci}
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ciVkResult
840bf215546Sopenharmony_ciradv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
841bf215546Sopenharmony_ci   VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount,
842bf215546Sopenharmony_ci   VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
843bf215546Sopenharmony_ci{
844bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci   if (vk_queue_to_radv(pdevice, queueFamilyIndex) != RADV_QUEUE_GENERAL) {
847bf215546Sopenharmony_ci      *pCounterCount = 0;
848bf215546Sopenharmony_ci      return VK_SUCCESS;
849bf215546Sopenharmony_ci   }
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci   if (!radv_init_perfcounter_descs(pdevice))
852bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_HOST_MEMORY;
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci   uint32_t counter_cnt = pdevice->num_perfcounters;
855bf215546Sopenharmony_ci   const struct radv_perfcounter_desc *descs = pdevice->perfcounters;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci   if (!pCounters && !pCounterDescriptions) {
858bf215546Sopenharmony_ci      *pCounterCount = counter_cnt;
859bf215546Sopenharmony_ci      return VK_SUCCESS;
860bf215546Sopenharmony_ci   }
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   VkResult result = counter_cnt > *pCounterCount ? VK_INCOMPLETE : VK_SUCCESS;
863bf215546Sopenharmony_ci   counter_cnt = MIN2(counter_cnt, *pCounterCount);
864bf215546Sopenharmony_ci   *pCounterCount = counter_cnt;
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci   for (uint32_t i = 0; i < counter_cnt; ++i) {
867bf215546Sopenharmony_ci      if (pCounters) {
868bf215546Sopenharmony_ci         pCounters[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR;
869bf215546Sopenharmony_ci         pCounters[i].unit = descs[i].unit;
870bf215546Sopenharmony_ci         pCounters[i].scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
871bf215546Sopenharmony_ci         pCounters[i].storage = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR;
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_ci         memset(&pCounters[i].uuid, 0, sizeof(pCounters[i].uuid));
874bf215546Sopenharmony_ci         strcpy((char*)&pCounters[i].uuid, "RADV");
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_ci         const uint32_t uuid = descs[i].uuid;
877bf215546Sopenharmony_ci         memcpy(&pCounters[i].uuid[12], &uuid, sizeof(uuid));
878bf215546Sopenharmony_ci      }
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci      if (pCounterDescriptions) {
881bf215546Sopenharmony_ci         pCounterDescriptions[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR;
882bf215546Sopenharmony_ci         pCounterDescriptions[i].flags =
883bf215546Sopenharmony_ci            VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR;
884bf215546Sopenharmony_ci         strcpy(pCounterDescriptions[i].name, descs[i].name);
885bf215546Sopenharmony_ci         strcpy(pCounterDescriptions[i].category, descs[i].category);
886bf215546Sopenharmony_ci         strcpy(pCounterDescriptions[i].description, descs[i].description);
887bf215546Sopenharmony_ci      }
888bf215546Sopenharmony_ci   }
889bf215546Sopenharmony_ci   return result;
890bf215546Sopenharmony_ci}
891bf215546Sopenharmony_ci
892bf215546Sopenharmony_civoid
893bf215546Sopenharmony_ciradv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
894bf215546Sopenharmony_ci   VkPhysicalDevice physicalDevice,
895bf215546Sopenharmony_ci   const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses)
896bf215546Sopenharmony_ci{
897bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci   if (pPerformanceQueryCreateInfo->counterIndexCount == 0) {
900bf215546Sopenharmony_ci      *pNumPasses = 0;
901bf215546Sopenharmony_ci      return;
902bf215546Sopenharmony_ci   }
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci   if (!radv_init_perfcounter_descs(pdevice)) {
905bf215546Sopenharmony_ci      /* Can't return an error, so log */
906bf215546Sopenharmony_ci      fprintf(stderr, "radv: Failed to init perf counters\n");
907bf215546Sopenharmony_ci      *pNumPasses = 1;
908bf215546Sopenharmony_ci      return;
909bf215546Sopenharmony_ci   }
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci   assert(vk_queue_to_radv(pdevice, pPerformanceQueryCreateInfo->queueFamilyIndex) ==
912bf215546Sopenharmony_ci          RADV_QUEUE_GENERAL);
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   unsigned num_regs = 0;
915bf215546Sopenharmony_ci   uint32_t *regs = NULL;
916bf215546Sopenharmony_ci   VkResult result =
917bf215546Sopenharmony_ci      radv_get_counter_registers(pdevice, pPerformanceQueryCreateInfo->counterIndexCount,
918bf215546Sopenharmony_ci                                 pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, &regs);
919bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
920bf215546Sopenharmony_ci      /* Can't return an error, so log */
921bf215546Sopenharmony_ci      fprintf(stderr, "radv: Failed to allocate memory for perf counters\n");
922bf215546Sopenharmony_ci   }
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci   *pNumPasses = radv_get_num_counter_passes(pdevice, num_regs, regs);
925bf215546Sopenharmony_ci   free(regs);
926bf215546Sopenharmony_ci}
927