1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <inttypes.h> 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "ac_perfcounter.h" 27bf215546Sopenharmony_ci#include "amdgfxregs.h" 28bf215546Sopenharmony_ci#include "radv_cs.h" 29bf215546Sopenharmony_ci#include "radv_private.h" 30bf215546Sopenharmony_ci#include "sid.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_civoid 33bf215546Sopenharmony_ciradv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2); 36bf215546Sopenharmony_ci radeon_emit(cs, shaders & 0x7f); 37bf215546Sopenharmony_ci radeon_emit(cs, 0xffffffff); 38bf215546Sopenharmony_ci} 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic void 41bf215546Sopenharmony_ciradv_emit_windowed_counters(struct radv_device *device, struct radeon_cmdbuf *cs, int family, 42bf215546Sopenharmony_ci bool enable) 43bf215546Sopenharmony_ci{ 44bf215546Sopenharmony_ci if (family == RADV_QUEUE_GENERAL) { 45bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 46bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(enable ? V_028A90_PERFCOUNTER_START : V_028A90_PERFCOUNTER_STOP) | 47bf215546Sopenharmony_ci EVENT_INDEX(0)); 48bf215546Sopenharmony_ci } 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(enable)); 51bf215546Sopenharmony_ci} 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_civoid 54bf215546Sopenharmony_ciradv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 57bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | 58bf215546Sopenharmony_ci S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET)); 59bf215546Sopenharmony_ci} 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_civoid 62bf215546Sopenharmony_ciradv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family) 63bf215546Sopenharmony_ci{ 64bf215546Sopenharmony_ci /* Start SPM counters. */ 65bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 66bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | 67bf215546Sopenharmony_ci S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci radv_emit_windowed_counters(device, cs, family, true); 70bf215546Sopenharmony_ci} 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_civoid 73bf215546Sopenharmony_ciradv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci radv_emit_windowed_counters(device, cs, family, false); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci /* Stop SPM counters. */ 78bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 79bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | 80bf215546Sopenharmony_ci S_036020_SPM_PERFMON_STATE(device->physical_device->rad_info.never_stop_sq_perf_counters ? 81bf215546Sopenharmony_ci V_036020_STRM_PERFMON_STATE_START_COUNTING : 82bf215546Sopenharmony_ci V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cienum radv_perfcounter_op { 86bf215546Sopenharmony_ci RADV_PC_OP_SUM, 87bf215546Sopenharmony_ci RADV_PC_OP_MAX, 88bf215546Sopenharmony_ci RADV_PC_OP_RATIO_DIVSCALE, 89bf215546Sopenharmony_ci RADV_PC_OP_REVERSE_RATIO, /* (reg1 - reg0) / reg1 */ 90bf215546Sopenharmony_ci RADV_PC_OP_SUM_WEIGHTED_4, 91bf215546Sopenharmony_ci}; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci#define S_REG_SEL(x) ((x)&0xFFFF) 94bf215546Sopenharmony_ci#define G_REG_SEL(x) ((x)&0xFFFF) 95bf215546Sopenharmony_ci#define S_REG_BLOCK(x) ((x) << 16) 96bf215546Sopenharmony_ci#define G_REG_BLOCK(x) (((x) >> 16) & 0x7FFF) 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci#define S_REG_OFFSET(x) ((x)&0xFFFF) 99bf215546Sopenharmony_ci#define G_REG_OFFSET(x) ((x)&0xFFFF) 100bf215546Sopenharmony_ci#define S_REG_INSTANCES(x) ((x) << 16) 101bf215546Sopenharmony_ci#define G_REG_INSTANCES(x) (((x) >> 16) & 0x7FFF) 102bf215546Sopenharmony_ci#define S_REG_CONSTANT(x) ((x) << 31) 103bf215546Sopenharmony_ci#define G_REG_CONSTANT(x) ((x) >> 31) 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_cistruct radv_perfcounter_impl { 106bf215546Sopenharmony_ci enum radv_perfcounter_op op; 107bf215546Sopenharmony_ci uint32_t regs[8]; 108bf215546Sopenharmony_ci}; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci/* Only append to this list, never insert into the middle or remove (but can rename). 111bf215546Sopenharmony_ci * 112bf215546Sopenharmony_ci * The invariant we're trying to get here is counters that have the same meaning, so 113bf215546Sopenharmony_ci * these can be shared between counters that have different implementations on different 114bf215546Sopenharmony_ci * GPUs, but should be unique within a GPU. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_cienum radv_perfcounter_uuid { 117bf215546Sopenharmony_ci RADV_PC_UUID_GPU_CYCLES, 118bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_WAVES, 119bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS, 120bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_VALU, 121bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_SALU, 122bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_VMEM_LOAD, 123bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_SMEM_LOAD, 124bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_VMEM_STORE, 125bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_LDS, 126bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_INSTRUCTIONS_GDS, 127bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_VALU_BUSY, 128bf215546Sopenharmony_ci RADV_PC_UUID_SHADER_SALU_BUSY, 129bf215546Sopenharmony_ci RADV_PC_UUID_VRAM_READ_SIZE, 130bf215546Sopenharmony_ci RADV_PC_UUID_VRAM_WRITE_SIZE, 131bf215546Sopenharmony_ci RADV_PC_UUID_L0_CACHE_HIT_RATIO, 132bf215546Sopenharmony_ci RADV_PC_UUID_L1_CACHE_HIT_RATIO, 133bf215546Sopenharmony_ci RADV_PC_UUID_L2_CACHE_HIT_RATIO, 134bf215546Sopenharmony_ci}; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_cistruct radv_perfcounter_desc { 137bf215546Sopenharmony_ci struct radv_perfcounter_impl impl; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci VkPerformanceCounterUnitKHR unit; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci char name[VK_MAX_DESCRIPTION_SIZE]; 142bf215546Sopenharmony_ci char category[VK_MAX_DESCRIPTION_SIZE]; 143bf215546Sopenharmony_ci char description[VK_MAX_DESCRIPTION_SIZE]; 144bf215546Sopenharmony_ci enum radv_perfcounter_uuid uuid; 145bf215546Sopenharmony_ci}; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci#define PC_DESC(arg_op, arg_unit, arg_name, arg_category, arg_description, arg_uuid, ...) \ 148bf215546Sopenharmony_ci (struct radv_perfcounter_desc) \ 149bf215546Sopenharmony_ci { \ 150bf215546Sopenharmony_ci .impl = {.op = arg_op, .regs = {__VA_ARGS__}}, \ 151bf215546Sopenharmony_ci .unit = VK_PERFORMANCE_COUNTER_UNIT_##arg_unit##_KHR, .name = arg_name, \ 152bf215546Sopenharmony_ci .category = arg_category, .description = arg_description, .uuid = RADV_PC_UUID_##arg_uuid \ 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci#define ADD_PC(op, unit, name, category, description, uuid, ...) \ 156bf215546Sopenharmony_ci do { \ 157bf215546Sopenharmony_ci if (descs) { \ 158bf215546Sopenharmony_ci descs[*count] = PC_DESC((op), unit, name, category, description, uuid, __VA_ARGS__); \ 159bf215546Sopenharmony_ci } \ 160bf215546Sopenharmony_ci ++*count; \ 161bf215546Sopenharmony_ci } while (0) 162bf215546Sopenharmony_ci#define CTR(block, ctr) (S_REG_BLOCK(block) | S_REG_SEL(ctr)) 163bf215546Sopenharmony_ci#define CONSTANT(v) (S_REG_CONSTANT(1) | (uint32_t)(v)) 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_cienum { GRBM_PERF_SEL_GUI_ACTIVE = CTR(GRBM, 2) }; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_cienum { CPF_PERF_SEL_CPF_STAT_BUSY_GFX10 = CTR(CPF, 0x18) }; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_cienum { 170bf215546Sopenharmony_ci GL1C_PERF_SEL_REQ = CTR(GL1C, 0xe), 171bf215546Sopenharmony_ci GL1C_PERF_SEL_REQ_MISS = CTR(GL1C, 0x12), 172bf215546Sopenharmony_ci}; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_cienum { 175bf215546Sopenharmony_ci GL2C_PERF_SEL_REQ = CTR(GL2C, 0x3), 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci GL2C_PERF_SEL_MISS_GFX101 = CTR(GL2C, 0x23), 178bf215546Sopenharmony_ci GL2C_PERF_SEL_MC_WRREQ_GFX101 = CTR(GL2C, 0x4b), 179bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_WRREQ_64B_GFX101 = CTR(GL2C, 0x4c), 180bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_32B_GFX101 = CTR(GL2C, 0x59), 181bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_64B_GFX101 = CTR(GL2C, 0x5a), 182bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_96B_GFX101 = CTR(GL2C, 0x5b), 183bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_128B_GFX101 = CTR(GL2C, 0x5c), 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci GL2C_PERF_SEL_MISS_GFX103 = CTR(GL2C, 0x2b), 186bf215546Sopenharmony_ci GL2C_PERF_SEL_MC_WRREQ_GFX103 = CTR(GL2C, 0x53), 187bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_WRREQ_64B_GFX103 = CTR(GL2C, 0x55), 188bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_32B_GFX103 = CTR(GL2C, 0x63), 189bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_64B_GFX103 = CTR(GL2C, 0x64), 190bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_96B_GFX103 = CTR(GL2C, 0x65), 191bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_128B_GFX103 = CTR(GL2C, 0x66), 192bf215546Sopenharmony_ci}; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_cienum { 195bf215546Sopenharmony_ci SQ_PERF_SEL_WAVES = CTR(SQ, 0x4), 196bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_ALL_GFX10 = CTR(SQ, 0x31), 197bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_GDS_GFX10 = CTR(SQ, 0x37), 198bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_LDS_GFX10 = CTR(SQ, 0x3b), 199bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_SALU_GFX10 = CTR(SQ, 0x3c), 200bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_SMEM_GFX10 = CTR(SQ, 0x3d), 201bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_VALU_GFX10 = CTR(SQ, 0x40), 202bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10 = CTR(SQ, 0x45), 203bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_TEX_STORE_GFX10 = CTR(SQ, 0x46), 204bf215546Sopenharmony_ci SQ_PERF_SEL_INST_CYCLES_VALU_GFX10 = CTR(SQ, 0x75), 205bf215546Sopenharmony_ci}; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_cienum { 208bf215546Sopenharmony_ci TCP_PERF_SEL_REQ_GFX10 = CTR(TCP, 0x9), 209bf215546Sopenharmony_ci TCP_PERF_SEL_REQ_MISS_GFX10 = CTR(TCP, 0x12), 210bf215546Sopenharmony_ci}; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci#define CTR_NUM_SIMD \ 213bf215546Sopenharmony_ci CONSTANT(pdev->rad_info.num_simd_per_compute_unit * pdev->rad_info.num_cu) 214bf215546Sopenharmony_ci#define CTR_NUM_CUS CONSTANT(pdev->rad_info.num_cu) 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_cistatic void 217bf215546Sopenharmony_ciradv_query_perfcounter_descs(struct radv_physical_device *pdev, uint32_t *count, 218bf215546Sopenharmony_ci struct radv_perfcounter_desc *descs) 219bf215546Sopenharmony_ci{ 220bf215546Sopenharmony_ci *count = 0; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_MAX, CYCLES, "GPU active cycles", "GRBM", 223bf215546Sopenharmony_ci "cycles the GPU is active processing a command buffer.", GPU_CYCLES, 224bf215546Sopenharmony_ci GRBM_PERF_SEL_GUI_ACTIVE); 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "Waves", "Shaders", "Number of waves executed", SHADER_WAVES, 227bf215546Sopenharmony_ci SQ_PERF_SEL_WAVES); 228bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "Instructions", "Shaders", "Number of Instructions executed", 229bf215546Sopenharmony_ci SHADER_INSTRUCTIONS, SQ_PERF_SEL_INSTS_ALL_GFX10); 230bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "VALU Instructions", "Shaders", 231bf215546Sopenharmony_ci "Number of VALU Instructions executed", SHADER_INSTRUCTIONS_VALU, 232bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_VALU_GFX10); 233bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "SALU Instructions", "Shaders", 234bf215546Sopenharmony_ci "Number of SALU Instructions executed", SHADER_INSTRUCTIONS_SALU, 235bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_SALU_GFX10); 236bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Load Instructions", "Shaders", 237bf215546Sopenharmony_ci "Number of VMEM load instructions executed", SHADER_INSTRUCTIONS_VMEM_LOAD, 238bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_TEX_LOAD_GFX10); 239bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "SMEM Load Instructions", "Shaders", 240bf215546Sopenharmony_ci "Number of SMEM load instructions executed", SHADER_INSTRUCTIONS_SMEM_LOAD, 241bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_SMEM_GFX10); 242bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "VMEM Store Instructions", "Shaders", 243bf215546Sopenharmony_ci "Number of VMEM store instructions executed", SHADER_INSTRUCTIONS_VMEM_STORE, 244bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_TEX_STORE_GFX10); 245bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "LDS Instructions", "Shaders", 246bf215546Sopenharmony_ci "Number of LDS Instructions executed", SHADER_INSTRUCTIONS_LDS, 247bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_LDS_GFX10); 248bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM, GENERIC, "GDS Instructions", "Shaders", 249bf215546Sopenharmony_ci "Number of GDS Instructions executed", SHADER_INSTRUCTIONS_GDS, 250bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_GDS_GFX10); 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "VALU Busy", "Shader Utilization", 253bf215546Sopenharmony_ci "Percentage of time the VALU units are busy", SHADER_VALU_BUSY, 254bf215546Sopenharmony_ci SQ_PERF_SEL_INST_CYCLES_VALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_SIMD); 255bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_RATIO_DIVSCALE, PERCENTAGE, "SALU Busy", "Shader Utilization", 256bf215546Sopenharmony_ci "Percentage of time the SALU units are busy", SHADER_SALU_BUSY, 257bf215546Sopenharmony_ci SQ_PERF_SEL_INSTS_SALU_GFX10, CPF_PERF_SEL_CPF_STAT_BUSY_GFX10, CTR_NUM_CUS); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci if (pdev->rad_info.gfx_level >= GFX10_3) { 260bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", 261bf215546Sopenharmony_ci "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX103, 262bf215546Sopenharmony_ci CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX103, CONSTANT(64), 263bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_96B_GFX103, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX103, 264bf215546Sopenharmony_ci CONSTANT(128)); 265bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", 266bf215546Sopenharmony_ci "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX103, 267bf215546Sopenharmony_ci CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX103, CONSTANT(64), CONSTANT(0), 268bf215546Sopenharmony_ci CONSTANT(0), CONSTANT(0), CONSTANT(0)); 269bf215546Sopenharmony_ci } else { 270bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM read size", "Memory", 271bf215546Sopenharmony_ci "Number of bytes read from VRAM", VRAM_READ_SIZE, GL2C_PERF_SEL_EA_RDREQ_32B_GFX101, 272bf215546Sopenharmony_ci CONSTANT(32), GL2C_PERF_SEL_EA_RDREQ_64B_GFX101, CONSTANT(64), 273bf215546Sopenharmony_ci GL2C_PERF_SEL_EA_RDREQ_96B_GFX101, CONSTANT(96), GL2C_PERF_SEL_EA_RDREQ_128B_GFX101, 274bf215546Sopenharmony_ci CONSTANT(128)); 275bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_SUM_WEIGHTED_4, BYTES, "VRAM write size", "Memory", 276bf215546Sopenharmony_ci "Number of bytes written to VRAM", VRAM_WRITE_SIZE, GL2C_PERF_SEL_MC_WRREQ_GFX101, 277bf215546Sopenharmony_ci CONSTANT(32), GL2C_PERF_SEL_EA_WRREQ_64B_GFX101, CONSTANT(32), CONSTANT(0), 278bf215546Sopenharmony_ci CONSTANT(0), CONSTANT(0), CONSTANT(0)); 279bf215546Sopenharmony_ci } 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L0 cache hit ratio", "Memory", "Hit ratio of L0 cache", 282bf215546Sopenharmony_ci L0_CACHE_HIT_RATIO, TCP_PERF_SEL_REQ_MISS_GFX10, TCP_PERF_SEL_REQ_GFX10); 283bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L1 cache hit ratio", "Memory", "Hit ratio of L1 cache", 284bf215546Sopenharmony_ci L1_CACHE_HIT_RATIO, GL1C_PERF_SEL_REQ_MISS, GL1C_PERF_SEL_REQ); 285bf215546Sopenharmony_ci if (pdev->rad_info.gfx_level >= GFX10_3) { 286bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", 287bf215546Sopenharmony_ci "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX103, 288bf215546Sopenharmony_ci GL2C_PERF_SEL_REQ); 289bf215546Sopenharmony_ci } else { 290bf215546Sopenharmony_ci ADD_PC(RADV_PC_OP_REVERSE_RATIO, BYTES, "L2 cache hit ratio", "Memory", 291bf215546Sopenharmony_ci "Hit ratio of L2 cache", L2_CACHE_HIT_RATIO, GL2C_PERF_SEL_MISS_GFX101, 292bf215546Sopenharmony_ci GL2C_PERF_SEL_REQ); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_cistatic bool 297bf215546Sopenharmony_ciradv_init_perfcounter_descs(struct radv_physical_device *pdev) 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci if (pdev->perfcounters) 300bf215546Sopenharmony_ci return true; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci uint32_t count; 303bf215546Sopenharmony_ci radv_query_perfcounter_descs(pdev, &count, NULL); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci struct radv_perfcounter_desc *descs = malloc(sizeof(*descs) * count); 306bf215546Sopenharmony_ci if (!descs) 307bf215546Sopenharmony_ci return false; 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci radv_query_perfcounter_descs(pdev, &count, descs); 310bf215546Sopenharmony_ci pdev->num_perfcounters = count; 311bf215546Sopenharmony_ci pdev->perfcounters = descs; 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci return true; 314bf215546Sopenharmony_ci} 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_cistatic int 317bf215546Sopenharmony_cicmp_uint32_t(const void *a, const void *b) 318bf215546Sopenharmony_ci{ 319bf215546Sopenharmony_ci uint32_t l = *(const uint32_t *)a; 320bf215546Sopenharmony_ci uint32_t r = *(const uint32_t *)b; 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci return (l < r) ? -1 : (l > r) ? 1 : 0; 323bf215546Sopenharmony_ci} 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_cistatic VkResult 326bf215546Sopenharmony_ciradv_get_counter_registers(const struct radv_physical_device *pdevice, uint32_t num_indices, 327bf215546Sopenharmony_ci const uint32_t *indices, unsigned *out_num_regs, uint32_t **out_regs) 328bf215546Sopenharmony_ci{ 329bf215546Sopenharmony_ci ASSERTED uint32_t num_counters = pdevice->num_perfcounters; 330bf215546Sopenharmony_ci const struct radv_perfcounter_desc *descs = pdevice->perfcounters; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci unsigned full_reg_cnt = num_indices * ARRAY_SIZE(descs->impl.regs); 333bf215546Sopenharmony_ci uint32_t *regs = malloc(full_reg_cnt * sizeof(uint32_t)); 334bf215546Sopenharmony_ci if (!regs) 335bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci unsigned reg_cnt = 0; 338bf215546Sopenharmony_ci for (unsigned i = 0; i < num_indices; ++i) { 339bf215546Sopenharmony_ci uint32_t index = indices[i]; 340bf215546Sopenharmony_ci assert(index < num_counters); 341bf215546Sopenharmony_ci for (unsigned j = 0; j < ARRAY_SIZE(descs[index].impl.regs) && descs[index].impl.regs[j]; 342bf215546Sopenharmony_ci ++j) { 343bf215546Sopenharmony_ci if (!G_REG_CONSTANT(descs[index].impl.regs[j])) 344bf215546Sopenharmony_ci regs[reg_cnt++] = descs[index].impl.regs[j]; 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci qsort(regs, reg_cnt, sizeof(uint32_t), cmp_uint32_t); 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci unsigned deduped_reg_cnt = 0; 351bf215546Sopenharmony_ci for (unsigned i = 1; i < reg_cnt; ++i) { 352bf215546Sopenharmony_ci if (regs[i] != regs[deduped_reg_cnt]) 353bf215546Sopenharmony_ci regs[++deduped_reg_cnt] = regs[i]; 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci ++deduped_reg_cnt; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci *out_num_regs = deduped_reg_cnt; 358bf215546Sopenharmony_ci *out_regs = regs; 359bf215546Sopenharmony_ci return VK_SUCCESS; 360bf215546Sopenharmony_ci} 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_cistatic unsigned 363bf215546Sopenharmony_ciradv_pc_get_num_instances(const struct radv_physical_device *pdevice, struct ac_pc_block *ac_block) 364bf215546Sopenharmony_ci{ 365bf215546Sopenharmony_ci return ac_block->num_instances * 366bf215546Sopenharmony_ci ((ac_block->b->b->flags & AC_PC_BLOCK_SE) ? pdevice->rad_info.max_se : 1); 367bf215546Sopenharmony_ci} 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_cistatic unsigned 370bf215546Sopenharmony_ciradv_get_num_counter_passes(const struct radv_physical_device *pdevice, unsigned num_regs, 371bf215546Sopenharmony_ci const uint32_t *regs) 372bf215546Sopenharmony_ci{ 373bf215546Sopenharmony_ci enum ac_pc_gpu_block prev_block = NUM_GPU_BLOCK; 374bf215546Sopenharmony_ci unsigned block_reg_count = 0; 375bf215546Sopenharmony_ci struct ac_pc_block *ac_block = NULL; 376bf215546Sopenharmony_ci unsigned passes_needed = 1; 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci for (unsigned i = 0; i < num_regs; ++i) { 379bf215546Sopenharmony_ci enum ac_pc_gpu_block block = G_REG_BLOCK(regs[i]); 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci if (block != prev_block) { 382bf215546Sopenharmony_ci block_reg_count = 0; 383bf215546Sopenharmony_ci prev_block = block; 384bf215546Sopenharmony_ci ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block); 385bf215546Sopenharmony_ci } 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci ++block_reg_count; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci passes_needed = 390bf215546Sopenharmony_ci MAX2(passes_needed, DIV_ROUND_UP(block_reg_count, ac_block->b->b->num_counters)); 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci return passes_needed; 394bf215546Sopenharmony_ci} 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_civoid 397bf215546Sopenharmony_ciradv_pc_deinit_query_pool(struct radv_pc_query_pool *pool) 398bf215546Sopenharmony_ci{ 399bf215546Sopenharmony_ci free(pool->counters); 400bf215546Sopenharmony_ci free(pool->pc_regs); 401bf215546Sopenharmony_ci} 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ciVkResult 404bf215546Sopenharmony_ciradv_pc_init_query_pool(struct radv_physical_device *pdevice, 405bf215546Sopenharmony_ci const VkQueryPoolCreateInfo *pCreateInfo, struct radv_pc_query_pool *pool) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci const VkQueryPoolPerformanceCreateInfoKHR *perf_info = 408bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR); 409bf215546Sopenharmony_ci VkResult result; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci if (!radv_init_perfcounter_descs(pdevice)) 412bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci result = 415bf215546Sopenharmony_ci radv_get_counter_registers(pdevice, perf_info->counterIndexCount, perf_info->pCounterIndices, 416bf215546Sopenharmony_ci &pool->num_pc_regs, &pool->pc_regs); 417bf215546Sopenharmony_ci if (result != VK_SUCCESS) 418bf215546Sopenharmony_ci return result; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci pool->num_passes = radv_get_num_counter_passes(pdevice, pool->num_pc_regs, pool->pc_regs); 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci uint32_t *pc_reg_offsets = malloc(pool->num_pc_regs * sizeof(uint32_t)); 423bf215546Sopenharmony_ci if (!pc_reg_offsets) 424bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci unsigned offset = 0; 427bf215546Sopenharmony_ci for (unsigned i = 0; i < pool->num_pc_regs; ++i) { 428bf215546Sopenharmony_ci enum ac_pc_gpu_block block = pool->pc_regs[i] >> 16; 429bf215546Sopenharmony_ci struct ac_pc_block *ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block); 430bf215546Sopenharmony_ci unsigned num_instances = radv_pc_get_num_instances(pdevice, ac_block); 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci pc_reg_offsets[i] = S_REG_OFFSET(offset) | S_REG_INSTANCES(num_instances); 433bf215546Sopenharmony_ci offset += sizeof(uint64_t) * 2 * num_instances; 434bf215546Sopenharmony_ci } 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci /* allow an uint32_t per pass to signal completion. */ 437bf215546Sopenharmony_ci pool->b.stride = offset + 8 * pool->num_passes; 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci pool->num_counters = perf_info->counterIndexCount; 440bf215546Sopenharmony_ci pool->counters = malloc(pool->num_counters * sizeof(struct radv_perfcounter_impl)); 441bf215546Sopenharmony_ci if (!pool->counters) { 442bf215546Sopenharmony_ci free(pc_reg_offsets); 443bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 444bf215546Sopenharmony_ci } 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci for (unsigned i = 0; i < pool->num_counters; ++i) { 447bf215546Sopenharmony_ci pool->counters[i] = pdevice->perfcounters[perf_info->pCounterIndices[i]].impl; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci for (unsigned j = 0; j < ARRAY_SIZE(pool->counters[i].regs); ++j) { 450bf215546Sopenharmony_ci uint32_t reg = pool->counters[i].regs[j]; 451bf215546Sopenharmony_ci if (!reg || G_REG_CONSTANT(reg)) 452bf215546Sopenharmony_ci continue; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci unsigned k; 455bf215546Sopenharmony_ci for (k = 0; k < pool->num_pc_regs; ++k) 456bf215546Sopenharmony_ci if (pool->pc_regs[k] == reg) 457bf215546Sopenharmony_ci break; 458bf215546Sopenharmony_ci pool->counters[i].regs[j] = pc_reg_offsets[k]; 459bf215546Sopenharmony_ci } 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci free(pc_reg_offsets); 463bf215546Sopenharmony_ci return VK_SUCCESS; 464bf215546Sopenharmony_ci} 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_cistatic void 467bf215546Sopenharmony_ciradv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance) 468bf215546Sopenharmony_ci{ 469bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 470bf215546Sopenharmony_ci unsigned value = S_030800_SH_BROADCAST_WRITES(1); 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci if (se >= 0) { 473bf215546Sopenharmony_ci value |= S_030800_SE_INDEX(se); 474bf215546Sopenharmony_ci } else { 475bf215546Sopenharmony_ci value |= S_030800_SE_BROADCAST_WRITES(1); 476bf215546Sopenharmony_ci } 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci if (instance >= 0) { 479bf215546Sopenharmony_ci value |= S_030800_INSTANCE_INDEX(instance); 480bf215546Sopenharmony_ci } else { 481bf215546Sopenharmony_ci value |= S_030800_INSTANCE_BROADCAST_WRITES(1); 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value); 485bf215546Sopenharmony_ci} 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_cistatic void 488bf215546Sopenharmony_ciradv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, 489bf215546Sopenharmony_ci unsigned *selectors) 490bf215546Sopenharmony_ci{ 491bf215546Sopenharmony_ci struct ac_pc_block_base *regs = block->b->b; 492bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 493bf215546Sopenharmony_ci unsigned idx; 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci assert(count <= regs->num_counters); 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci /* Fake counters. */ 498bf215546Sopenharmony_ci if (!regs->select0) 499bf215546Sopenharmony_ci return; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci for (idx = 0; idx < count; ++idx) { 502bf215546Sopenharmony_ci radeon_set_perfctr_reg(cmd_buffer, regs->select0[idx], 503bf215546Sopenharmony_ci G_REG_SEL(selectors[idx]) | regs->select_or); 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci for (idx = 0; idx < regs->num_spm_counters; idx++) { 507bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(cs, regs->select1[idx], 1); 508bf215546Sopenharmony_ci radeon_emit(cs, 0); 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci} 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_cistatic void 513bf215546Sopenharmony_ciradv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, 514bf215546Sopenharmony_ci unsigned count, uint64_t va) 515bf215546Sopenharmony_ci{ 516bf215546Sopenharmony_ci struct ac_pc_block_base *regs = block->b->b; 517bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 518bf215546Sopenharmony_ci unsigned reg = regs->counter0_lo; 519bf215546Sopenharmony_ci unsigned reg_delta = 8; 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci assert(regs->select0); 522bf215546Sopenharmony_ci for (unsigned idx = 0; idx < count; ++idx) { 523bf215546Sopenharmony_ci if (regs->counters) 524bf215546Sopenharmony_ci reg = regs->counters[idx]; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 527bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | 528bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM | COPY_DATA_COUNT_SEL); /* 64 bits */ 529bf215546Sopenharmony_ci radeon_emit(cs, reg >> 2); 530bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 531bf215546Sopenharmony_ci radeon_emit(cs, va); 532bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci va += sizeof(uint64_t) * 2 * 535bf215546Sopenharmony_ci radv_pc_get_num_instances(cmd_buffer->device->physical_device, block); 536bf215546Sopenharmony_ci reg += reg_delta; 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci} 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_cistatic void 541bf215546Sopenharmony_ciradv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, 542bf215546Sopenharmony_ci uint64_t va) 543bf215546Sopenharmony_ci{ 544bf215546Sopenharmony_ci unsigned se_end = 1; 545bf215546Sopenharmony_ci if (block->b->b->flags & AC_PC_BLOCK_SE) 546bf215546Sopenharmony_ci se_end = cmd_buffer->device->physical_device->rad_info.max_se; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci for (unsigned se = 0; se < se_end; ++se) { 549bf215546Sopenharmony_ci for (unsigned instance = 0; instance < block->num_instances; ++instance) { 550bf215546Sopenharmony_ci radv_emit_instance(cmd_buffer, se, instance); 551bf215546Sopenharmony_ci radv_pc_emit_block_instance_read(cmd_buffer, block, count, va); 552bf215546Sopenharmony_ci va += sizeof(uint64_t) * 2; 553bf215546Sopenharmony_ci } 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci} 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_cistatic void 558bf215546Sopenharmony_ciradv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer) 559bf215546Sopenharmony_ci{ 560bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 563bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); 566bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_CNTL */ 567bf215546Sopenharmony_ci radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ 568bf215546Sopenharmony_ci radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */ 569bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE */ 570bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ 571bf215546Sopenharmony_ci radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ 572bf215546Sopenharmony_ci radeon_emit(cs, 0); /* GCR_CNTL */ 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 575bf215546Sopenharmony_ci radeon_emit(cs, 0); 576bf215546Sopenharmony_ci} 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_cistatic void 579bf215546Sopenharmony_ciradv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, 580bf215546Sopenharmony_ci uint64_t va, bool end) 581bf215546Sopenharmony_ci{ 582bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 583bf215546Sopenharmony_ci struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 586bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci radv_pc_wait_idle(cmd_buffer); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci radv_emit_instance(cmd_buffer, -1, -1); 591bf215546Sopenharmony_ci radv_emit_windowed_counters(cmd_buffer->device, cs, cmd_buffer->qf, false); 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 594bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | 595bf215546Sopenharmony_ci S_036020_PERFMON_SAMPLE_ENABLE(1)); 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci for (unsigned pass = 0; pass < pool->num_passes; ++pass) { 598bf215546Sopenharmony_ci uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + 599bf215546Sopenharmony_ci PERF_CTR_BO_PASS_OFFSET + 8 * pass; 600bf215546Sopenharmony_ci uint64_t reg_va = va + (end ? 8 : 0); 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); 603bf215546Sopenharmony_ci radeon_emit(cs, pred_va); 604bf215546Sopenharmony_ci radeon_emit(cs, pred_va >> 32); 605bf215546Sopenharmony_ci radeon_emit(cs, 0); /* Cache policy */ 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci uint32_t *skip_dwords = cs->buf + cs->cdw; 608bf215546Sopenharmony_ci radeon_emit(cs, 0); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci for (unsigned i = 0; i < pool->num_pc_regs;) { 611bf215546Sopenharmony_ci enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]); 612bf215546Sopenharmony_ci struct ac_pc_block *ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block); 613bf215546Sopenharmony_ci unsigned offset = ac_block->num_instances * pass; 614bf215546Sopenharmony_ci unsigned num_instances = radv_pc_get_num_instances(pdevice, ac_block); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci unsigned cnt = 1; 617bf215546Sopenharmony_ci while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt])) 618bf215546Sopenharmony_ci ++cnt; 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci if (offset < cnt) { 621bf215546Sopenharmony_ci unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters); 622bf215546Sopenharmony_ci radv_pc_sample_block(cmd_buffer, ac_block, pass_reg_cnt, 623bf215546Sopenharmony_ci reg_va + offset * num_instances * sizeof(uint64_t)); 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci i += cnt; 627bf215546Sopenharmony_ci reg_va += num_instances * sizeof(uint64_t) * 2 * cnt; 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci if (end) { 631bf215546Sopenharmony_ci uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass; 632bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 633bf215546Sopenharmony_ci radeon_emit(cs, 634bf215546Sopenharmony_ci S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); 635bf215546Sopenharmony_ci radeon_emit(cs, signal_va); 636bf215546Sopenharmony_ci radeon_emit(cs, signal_va >> 32); 637bf215546Sopenharmony_ci radeon_emit(cs, 1); /* value */ 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1; 641bf215546Sopenharmony_ci } 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci radv_emit_instance(cmd_buffer, -1, -1); 644bf215546Sopenharmony_ci} 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_civoid 647bf215546Sopenharmony_ciradv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, 648bf215546Sopenharmony_ci uint64_t va) 649bf215546Sopenharmony_ci{ 650bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 651bf215546Sopenharmony_ci struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; 652bf215546Sopenharmony_ci ASSERTED unsigned cdw_max; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci cmd_buffer->state.uses_perf_counters = true; 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 657bf215546Sopenharmony_ci 256 + /* Random one time stuff */ 658bf215546Sopenharmony_ci 10 * pool->num_passes + /* COND_EXECs */ 659bf215546Sopenharmony_ci pool->b.stride / 8 * (5 + 8)); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo); 662bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo); 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci uint64_t perf_ctr_va = 665bf215546Sopenharmony_ci radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; 666bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 667bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); 668bf215546Sopenharmony_ci radeon_emit(cs, perf_ctr_va); 669bf215546Sopenharmony_ci radeon_emit(cs, perf_ctr_va >> 32); 670bf215546Sopenharmony_ci radeon_emit(cs, 0); /* value */ 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci radv_pc_wait_idle(cmd_buffer); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 675bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET)); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci radv_emit_inhibit_clockgating(cmd_buffer->device, cs, true); 678bf215546Sopenharmony_ci radv_emit_spi_config_cntl(cmd_buffer->device, cs, true); 679bf215546Sopenharmony_ci radv_perfcounter_emit_shaders(cs, 0x7f); 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci for (unsigned pass = 0; pass < pool->num_passes; ++pass) { 682bf215546Sopenharmony_ci uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + 683bf215546Sopenharmony_ci PERF_CTR_BO_PASS_OFFSET + 8 * pass; 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); 686bf215546Sopenharmony_ci radeon_emit(cs, pred_va); 687bf215546Sopenharmony_ci radeon_emit(cs, pred_va >> 32); 688bf215546Sopenharmony_ci radeon_emit(cs, 0); /* Cache policy */ 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci uint32_t *skip_dwords = cs->buf + cs->cdw; 691bf215546Sopenharmony_ci radeon_emit(cs, 0); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci for (unsigned i = 0; i < pool->num_pc_regs;) { 694bf215546Sopenharmony_ci enum ac_pc_gpu_block block = G_REG_BLOCK(pool->pc_regs[i]); 695bf215546Sopenharmony_ci struct ac_pc_block *ac_block = ac_pc_get_block(&pdevice->ac_perfcounters, block); 696bf215546Sopenharmony_ci unsigned offset = ac_block->num_instances * pass; 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci unsigned cnt = 1; 699bf215546Sopenharmony_ci while (cnt < pool->num_pc_regs - i && block == G_REG_BLOCK(pool->pc_regs[i + cnt])) 700bf215546Sopenharmony_ci ++cnt; 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci if (offset < cnt) { 703bf215546Sopenharmony_ci unsigned pass_reg_cnt = MIN2(cnt - offset, ac_block->b->b->num_counters); 704bf215546Sopenharmony_ci radv_emit_select(cmd_buffer, ac_block, pass_reg_cnt, pool->pc_regs + i + offset); 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci i += cnt; 708bf215546Sopenharmony_ci } 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci *skip_dwords = cs->buf + cs->cdw - skip_dwords - 1; 711bf215546Sopenharmony_ci } 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci radv_emit_instance(cmd_buffer, -1, -1); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci /* The following sequence actually starts the perfcounters. */ 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci radv_pc_stop_and_sample(cmd_buffer, pool, va, false); 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 720bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING)); 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci radv_emit_windowed_counters(cmd_buffer->device, cs, cmd_buffer->qf, true); 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 725bf215546Sopenharmony_ci} 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_civoid 728bf215546Sopenharmony_ciradv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) 729bf215546Sopenharmony_ci{ 730bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 731bf215546Sopenharmony_ci ASSERTED unsigned cdw_max; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci cdw_max = 734bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cs, 735bf215546Sopenharmony_ci 256 + /* Reserved for things that don't scale with passes/counters */ 736bf215546Sopenharmony_ci 5 * pool->num_passes + /* COND_EXECs */ 737bf215546Sopenharmony_ci pool->b.stride / 8 * 8); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->b.bo); 740bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo); 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci uint64_t perf_ctr_va = 743bf215546Sopenharmony_ci radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; 744bf215546Sopenharmony_ci si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 745bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, 746bf215546Sopenharmony_ci EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, 747bf215546Sopenharmony_ci cmd_buffer->gfx9_fence_va); 748bf215546Sopenharmony_ci radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci radv_pc_wait_idle(cmd_buffer); 751bf215546Sopenharmony_ci radv_pc_stop_and_sample(cmd_buffer, pool, va, true); 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, 754bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET)); 755bf215546Sopenharmony_ci radv_emit_spi_config_cntl(cmd_buffer->device, cs, false); 756bf215546Sopenharmony_ci radv_emit_inhibit_clockgating(cmd_buffer->device, cs, false); 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 759bf215546Sopenharmony_ci} 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_cistatic uint64_t 762bf215546Sopenharmony_ciradv_pc_sum_reg(uint32_t reg, const uint64_t *data) 763bf215546Sopenharmony_ci{ 764bf215546Sopenharmony_ci unsigned instances = G_REG_INSTANCES(reg); 765bf215546Sopenharmony_ci unsigned offset = G_REG_OFFSET(reg) / 8; 766bf215546Sopenharmony_ci uint64_t result = 0; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci if (G_REG_CONSTANT(reg)) 769bf215546Sopenharmony_ci return reg & 0x7fffffffu; 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci for (unsigned i = 0; i < instances; ++i) { 772bf215546Sopenharmony_ci result += data[offset + 2 * i + 1] - data[offset + 2 * i]; 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci return result; 776bf215546Sopenharmony_ci} 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_cistatic uint64_t 779bf215546Sopenharmony_ciradv_pc_max_reg(uint32_t reg, const uint64_t *data) 780bf215546Sopenharmony_ci{ 781bf215546Sopenharmony_ci unsigned instances = G_REG_INSTANCES(reg); 782bf215546Sopenharmony_ci unsigned offset = G_REG_OFFSET(reg) / 8; 783bf215546Sopenharmony_ci uint64_t result = 0; 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci if (G_REG_CONSTANT(reg)) 786bf215546Sopenharmony_ci return reg & 0x7fffffffu; 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci for (unsigned i = 0; i < instances; ++i) { 789bf215546Sopenharmony_ci result = MAX2(result, data[offset + 2 * i + 1]); 790bf215546Sopenharmony_ci } 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci return result; 793bf215546Sopenharmony_ci} 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_cistatic union VkPerformanceCounterResultKHR 796bf215546Sopenharmony_ciradv_pc_get_result(const struct radv_perfcounter_impl *impl, const uint64_t *data) 797bf215546Sopenharmony_ci{ 798bf215546Sopenharmony_ci union VkPerformanceCounterResultKHR result; 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci switch (impl->op) { 801bf215546Sopenharmony_ci case RADV_PC_OP_MAX: 802bf215546Sopenharmony_ci result.float64 = radv_pc_max_reg(impl->regs[0], data); 803bf215546Sopenharmony_ci break; 804bf215546Sopenharmony_ci case RADV_PC_OP_SUM: 805bf215546Sopenharmony_ci result.float64 = radv_pc_sum_reg(impl->regs[0], data); 806bf215546Sopenharmony_ci break; 807bf215546Sopenharmony_ci case RADV_PC_OP_RATIO_DIVSCALE: 808bf215546Sopenharmony_ci result.float64 = radv_pc_sum_reg(impl->regs[0], data) / 809bf215546Sopenharmony_ci (double)radv_pc_sum_reg(impl->regs[1], data) / 810bf215546Sopenharmony_ci radv_pc_sum_reg(impl->regs[2], data) * 100.0; 811bf215546Sopenharmony_ci break; 812bf215546Sopenharmony_ci case RADV_PC_OP_REVERSE_RATIO: { 813bf215546Sopenharmony_ci double tmp = radv_pc_sum_reg(impl->regs[1], data); 814bf215546Sopenharmony_ci result.float64 = (tmp - radv_pc_sum_reg(impl->regs[0], data)) / tmp * 100.0; 815bf215546Sopenharmony_ci break; 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci case RADV_PC_OP_SUM_WEIGHTED_4: 818bf215546Sopenharmony_ci result.float64 = 0.0; 819bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; ++i) 820bf215546Sopenharmony_ci result.float64 += 821bf215546Sopenharmony_ci radv_pc_sum_reg(impl->regs[2 * i], data) * radv_pc_sum_reg(impl->regs[2 * i + 1], data); 822bf215546Sopenharmony_ci break; 823bf215546Sopenharmony_ci default: 824bf215546Sopenharmony_ci unreachable("unhandled performance counter operation"); 825bf215546Sopenharmony_ci } 826bf215546Sopenharmony_ci return result; 827bf215546Sopenharmony_ci} 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_civoid 830bf215546Sopenharmony_ciradv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out) 831bf215546Sopenharmony_ci{ 832bf215546Sopenharmony_ci union VkPerformanceCounterResultKHR *pc_result = out; 833bf215546Sopenharmony_ci 834bf215546Sopenharmony_ci for (unsigned i = 0; i < pc_pool->num_counters; ++i) { 835bf215546Sopenharmony_ci pc_result[i] = radv_pc_get_result(pc_pool->counters + i, data); 836bf215546Sopenharmony_ci } 837bf215546Sopenharmony_ci} 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ciVkResult 840bf215546Sopenharmony_ciradv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( 841bf215546Sopenharmony_ci VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount, 842bf215546Sopenharmony_ci VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions) 843bf215546Sopenharmony_ci{ 844bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci if (vk_queue_to_radv(pdevice, queueFamilyIndex) != RADV_QUEUE_GENERAL) { 847bf215546Sopenharmony_ci *pCounterCount = 0; 848bf215546Sopenharmony_ci return VK_SUCCESS; 849bf215546Sopenharmony_ci } 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci if (!radv_init_perfcounter_descs(pdevice)) 852bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci uint32_t counter_cnt = pdevice->num_perfcounters; 855bf215546Sopenharmony_ci const struct radv_perfcounter_desc *descs = pdevice->perfcounters; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci if (!pCounters && !pCounterDescriptions) { 858bf215546Sopenharmony_ci *pCounterCount = counter_cnt; 859bf215546Sopenharmony_ci return VK_SUCCESS; 860bf215546Sopenharmony_ci } 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci VkResult result = counter_cnt > *pCounterCount ? VK_INCOMPLETE : VK_SUCCESS; 863bf215546Sopenharmony_ci counter_cnt = MIN2(counter_cnt, *pCounterCount); 864bf215546Sopenharmony_ci *pCounterCount = counter_cnt; 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci for (uint32_t i = 0; i < counter_cnt; ++i) { 867bf215546Sopenharmony_ci if (pCounters) { 868bf215546Sopenharmony_ci pCounters[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR; 869bf215546Sopenharmony_ci pCounters[i].unit = descs[i].unit; 870bf215546Sopenharmony_ci pCounters[i].scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR; 871bf215546Sopenharmony_ci pCounters[i].storage = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR; 872bf215546Sopenharmony_ci 873bf215546Sopenharmony_ci memset(&pCounters[i].uuid, 0, sizeof(pCounters[i].uuid)); 874bf215546Sopenharmony_ci strcpy((char*)&pCounters[i].uuid, "RADV"); 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci const uint32_t uuid = descs[i].uuid; 877bf215546Sopenharmony_ci memcpy(&pCounters[i].uuid[12], &uuid, sizeof(uuid)); 878bf215546Sopenharmony_ci } 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci if (pCounterDescriptions) { 881bf215546Sopenharmony_ci pCounterDescriptions[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR; 882bf215546Sopenharmony_ci pCounterDescriptions[i].flags = 883bf215546Sopenharmony_ci VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR; 884bf215546Sopenharmony_ci strcpy(pCounterDescriptions[i].name, descs[i].name); 885bf215546Sopenharmony_ci strcpy(pCounterDescriptions[i].category, descs[i].category); 886bf215546Sopenharmony_ci strcpy(pCounterDescriptions[i].description, descs[i].description); 887bf215546Sopenharmony_ci } 888bf215546Sopenharmony_ci } 889bf215546Sopenharmony_ci return result; 890bf215546Sopenharmony_ci} 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_civoid 893bf215546Sopenharmony_ciradv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( 894bf215546Sopenharmony_ci VkPhysicalDevice physicalDevice, 895bf215546Sopenharmony_ci const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses) 896bf215546Sopenharmony_ci{ 897bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci if (pPerformanceQueryCreateInfo->counterIndexCount == 0) { 900bf215546Sopenharmony_ci *pNumPasses = 0; 901bf215546Sopenharmony_ci return; 902bf215546Sopenharmony_ci } 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci if (!radv_init_perfcounter_descs(pdevice)) { 905bf215546Sopenharmony_ci /* Can't return an error, so log */ 906bf215546Sopenharmony_ci fprintf(stderr, "radv: Failed to init perf counters\n"); 907bf215546Sopenharmony_ci *pNumPasses = 1; 908bf215546Sopenharmony_ci return; 909bf215546Sopenharmony_ci } 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci assert(vk_queue_to_radv(pdevice, pPerformanceQueryCreateInfo->queueFamilyIndex) == 912bf215546Sopenharmony_ci RADV_QUEUE_GENERAL); 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci unsigned num_regs = 0; 915bf215546Sopenharmony_ci uint32_t *regs = NULL; 916bf215546Sopenharmony_ci VkResult result = 917bf215546Sopenharmony_ci radv_get_counter_registers(pdevice, pPerformanceQueryCreateInfo->counterIndexCount, 918bf215546Sopenharmony_ci pPerformanceQueryCreateInfo->pCounterIndices, &num_regs, ®s); 919bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 920bf215546Sopenharmony_ci /* Can't return an error, so log */ 921bf215546Sopenharmony_ci fprintf(stderr, "radv: Failed to allocate memory for perf counters\n"); 922bf215546Sopenharmony_ci } 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci *pNumPasses = radv_get_num_counter_passes(pdevice, num_regs, regs); 925bf215546Sopenharmony_ci free(regs); 926bf215546Sopenharmony_ci} 927