1/* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#ifndef AC_PERFCOUNTER_H 26#define AC_PERFCOUNTER_H 27 28#include <stdbool.h> 29 30#include "sid.h" 31 32#include "ac_gpu_info.h" 33 34/* Max counters per HW block */ 35#define AC_QUERY_MAX_COUNTERS 16 36 37#define AC_PC_SHADERS_WINDOWING (1u << 31) 38 39enum ac_pc_block_flags 40{ 41 /* This block is part of the shader engine */ 42 AC_PC_BLOCK_SE = (1 << 0), 43 44 /* Expose per-instance groups instead of summing all instances (within 45 * an SE). */ 46 AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), 47 48 /* Expose per-SE groups instead of summing instances across SEs. */ 49 AC_PC_BLOCK_SE_GROUPS = (1 << 2), 50 51 /* Shader block */ 52 AC_PC_BLOCK_SHADER = (1 << 3), 53 54 /* Non-shader block with perfcounters windowed by shaders. */ 55 AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4), 56}; 57 58enum ac_pc_gpu_block { 59 CPF = 0x0, 60 IA = 0x1, 61 VGT = 0x2, 62 PA_SU = 0x3, 63 PA_SC = 0x4, 64 SPI = 0x5, 65 SQ = 0x6, 66 SX = 0x7, 67 TA = 0x8, 68 TD = 0x9, 69 TCP = 0xA, 70 TCC = 0xB, 71 TCA = 0xC, 72 DB = 0xD, 73 CB = 0xE, 74 GDS = 0xF, 75 SRBM = 0x10, 76 GRBM = 0x11, 77 GRBMSE = 0x12, 78 RLC = 0x13, 79 DMA = 0x14, 80 MC = 0x15, 81 CPG = 0x16, 82 CPC = 0x17, 83 WD = 0x18, 84 TCS = 0x19, 85 ATC = 0x1A, 86 ATCL2 = 0x1B, 87 MCVML2 = 0x1C, 88 EA = 0x1D, 89 RPB = 0x1E, 90 RMI = 0x1F, 91 UMCCH = 0x20, 92 GE = 0x21, 93 GE1 = GE, 94 GL1A = 0x22, 95 GL1C = 0x23, 96 GL1CG = 0x24, 97 GL2A = 0x25, 98 GL2C = 0x26, 99 CHA = 0x27, 100 CHC = 0x28, 101 CHCG = 0x29, 102 GUS = 0x2A, 103 GCR = 0x2B, 104 PA_PH = 0x2C, 105 UTCL1 = 0x2D, 106 GEDIST = 0x2E, 107 GESE = 0x2F, 108 DF = 0x30, 109 NUM_GPU_BLOCK, 110}; 111 112struct ac_pc_block_base { 113 enum ac_pc_gpu_block gpu_block; 114 const char *name; 115 unsigned num_counters; 116 unsigned flags; 117 118 unsigned select_or; 119 unsigned *select0; 120 unsigned counter0_lo; 121 unsigned *counters; 122 123 /* SPM */ 124 unsigned num_spm_counters; 125 unsigned num_spm_wires; 126 unsigned *select1; 127 unsigned spm_block_select; 128}; 129 130struct ac_pc_block_gfxdescr { 131 struct ac_pc_block_base *b; 132 unsigned selectors; 133 unsigned instances; 134}; 135 136struct ac_pc_block { 137 const struct ac_pc_block_gfxdescr *b; 138 unsigned num_instances; 139 140 unsigned num_groups; 141 char *group_names; 142 unsigned group_name_stride; 143 144 char *selector_names; 145 unsigned selector_name_stride; 146}; 147 148struct ac_perfcounters { 149 unsigned num_groups; 150 unsigned num_blocks; 151 struct ac_pc_block *blocks; 152 153 bool separate_se; 154 bool separate_instance; 155}; 156 157/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of 158 * performance counter group IDs. 159 */ 160static const char *const ac_pc_shader_type_suffixes[] = {"", "_ES", "_GS", "_VS", 161 "_PS", "_LS", "_HS", "_CS"}; 162 163static const unsigned ac_pc_shader_type_bits[] = { 164 0x7f, 165 S_036780_ES_EN(1), 166 S_036780_GS_EN(1), 167 S_036780_VS_EN(1), 168 S_036780_PS_EN(1), 169 S_036780_LS_EN(1), 170 S_036780_HS_EN(1), 171 S_036780_CS_EN(1), 172}; 173 174static inline bool 175ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc, 176 const struct ac_pc_block *block) 177{ 178 return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS || 179 (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se); 180} 181 182static inline bool 183ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc, 184 const struct ac_pc_block *block) 185{ 186 return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS || 187 (block->num_instances > 1 && pc->separate_instance); 188} 189 190struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc, 191 unsigned index, unsigned *base_gid, 192 unsigned *sub_index); 193struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc, 194 unsigned *index); 195 196struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc, 197 enum ac_pc_gpu_block gpu_block); 198 199bool ac_init_block_names(const struct radeon_info *info, 200 const struct ac_perfcounters *pc, 201 struct ac_pc_block *block); 202 203bool ac_init_perfcounters(const struct radeon_info *info, 204 bool separate_se, 205 bool separate_instance, 206 struct ac_perfcounters *pc); 207void ac_destroy_perfcounters(struct ac_perfcounters *pc); 208 209#endif 210