1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef AC_PERFCOUNTER_H
26#define AC_PERFCOUNTER_H
27
28#include <stdbool.h>
29
30#include "sid.h"
31
32#include "ac_gpu_info.h"
33
34/* Max counters per HW block */
35#define AC_QUERY_MAX_COUNTERS 16
36
37#define AC_PC_SHADERS_WINDOWING (1u << 31)
38
39enum ac_pc_block_flags
40{
41   /* This block is part of the shader engine */
42   AC_PC_BLOCK_SE = (1 << 0),
43
44   /* Expose per-instance groups instead of summing all instances (within
45    * an SE). */
46   AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
47
48   /* Expose per-SE groups instead of summing instances across SEs. */
49   AC_PC_BLOCK_SE_GROUPS = (1 << 2),
50
51   /* Shader block */
52   AC_PC_BLOCK_SHADER = (1 << 3),
53
54   /* Non-shader block with perfcounters windowed by shaders. */
55   AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
56};
57
58enum ac_pc_gpu_block {
59   CPF     = 0x0,
60   IA      = 0x1,
61   VGT     = 0x2,
62   PA_SU   = 0x3,
63   PA_SC   = 0x4,
64   SPI     = 0x5,
65   SQ      = 0x6,
66   SX      = 0x7,
67   TA      = 0x8,
68   TD      = 0x9,
69   TCP     = 0xA,
70   TCC     = 0xB,
71   TCA     = 0xC,
72   DB      = 0xD,
73   CB      = 0xE,
74   GDS     = 0xF,
75   SRBM    = 0x10,
76   GRBM    = 0x11,
77   GRBMSE  = 0x12,
78   RLC     = 0x13,
79   DMA     = 0x14,
80   MC      = 0x15,
81   CPG     = 0x16,
82   CPC     = 0x17,
83   WD      = 0x18,
84   TCS     = 0x19,
85   ATC     = 0x1A,
86   ATCL2   = 0x1B,
87   MCVML2  = 0x1C,
88   EA      = 0x1D,
89   RPB     = 0x1E,
90   RMI     = 0x1F,
91   UMCCH   = 0x20,
92   GE      = 0x21,
93   GE1     = GE,
94   GL1A    = 0x22,
95   GL1C    = 0x23,
96   GL1CG   = 0x24,
97   GL2A    = 0x25,
98   GL2C    = 0x26,
99   CHA     = 0x27,
100   CHC     = 0x28,
101   CHCG    = 0x29,
102   GUS     = 0x2A,
103   GCR     = 0x2B,
104   PA_PH   = 0x2C,
105   UTCL1   = 0x2D,
106   GEDIST  = 0x2E,
107   GESE    = 0x2F,
108   DF      = 0x30,
109   NUM_GPU_BLOCK,
110};
111
112struct ac_pc_block_base {
113   enum ac_pc_gpu_block gpu_block;
114   const char *name;
115   unsigned num_counters;
116   unsigned flags;
117
118   unsigned select_or;
119   unsigned *select0;
120   unsigned counter0_lo;
121   unsigned *counters;
122
123   /* SPM */
124   unsigned num_spm_counters;
125   unsigned num_spm_wires;
126   unsigned *select1;
127   unsigned spm_block_select;
128};
129
130struct ac_pc_block_gfxdescr {
131   struct ac_pc_block_base *b;
132   unsigned selectors;
133   unsigned instances;
134};
135
136struct ac_pc_block {
137   const struct ac_pc_block_gfxdescr *b;
138   unsigned num_instances;
139
140   unsigned num_groups;
141   char *group_names;
142   unsigned group_name_stride;
143
144   char *selector_names;
145   unsigned selector_name_stride;
146};
147
148struct ac_perfcounters {
149   unsigned num_groups;
150   unsigned num_blocks;
151   struct ac_pc_block *blocks;
152
153   bool separate_se;
154   bool separate_instance;
155};
156
157/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
158 * performance counter group IDs.
159 */
160static const char *const ac_pc_shader_type_suffixes[] = {"",    "_ES", "_GS", "_VS",
161                                                         "_PS", "_LS", "_HS", "_CS"};
162
163static const unsigned ac_pc_shader_type_bits[] = {
164   0x7f,
165   S_036780_ES_EN(1),
166   S_036780_GS_EN(1),
167   S_036780_VS_EN(1),
168   S_036780_PS_EN(1),
169   S_036780_LS_EN(1),
170   S_036780_HS_EN(1),
171   S_036780_CS_EN(1),
172};
173
174static inline bool
175ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc,
176                              const struct ac_pc_block *block)
177{
178   return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS ||
179          (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se);
180}
181
182static inline bool
183ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
184                                    const struct ac_pc_block *block)
185{
186   return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
187          (block->num_instances > 1 && pc->separate_instance);
188}
189
190struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
191                                      unsigned index, unsigned *base_gid,
192                                      unsigned *sub_index);
193struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
194                                    unsigned *index);
195
196struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
197                                    enum ac_pc_gpu_block gpu_block);
198
199bool ac_init_block_names(const struct radeon_info *info,
200                         const struct ac_perfcounters *pc,
201                         struct ac_pc_block *block);
202
203bool ac_init_perfcounters(const struct radeon_info *info,
204                          bool separate_se,
205                          bool separate_instance,
206                          struct ac_perfcounters *pc);
207void ac_destroy_perfcounters(struct ac_perfcounters *pc);
208
209#endif
210