1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <inttypes.h> 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "radv_cs.h" 27bf215546Sopenharmony_ci#include "radv_private.h" 28bf215546Sopenharmony_ci#include "sid.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#define SPM_RING_BASE_ALIGN 32 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistatic bool 33bf215546Sopenharmony_ciradv_spm_init_bo(struct radv_device *device) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci struct radeon_winsys *ws = device->ws; 36bf215546Sopenharmony_ci uint64_t size = 32 * 1024 * 1024; /* Default to 1MB. */ 37bf215546Sopenharmony_ci uint16_t sample_interval = 4096; /* Default to 4096 clk. */ 38bf215546Sopenharmony_ci VkResult result; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci device->spm_trace.buffer_size = size; 41bf215546Sopenharmony_ci device->spm_trace.sample_interval = sample_interval; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci struct radeon_winsys_bo *bo = NULL; 44bf215546Sopenharmony_ci result = ws->buffer_create( 45bf215546Sopenharmony_ci ws, size, 4096, RADEON_DOMAIN_VRAM, 46bf215546Sopenharmony_ci RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, 47bf215546Sopenharmony_ci RADV_BO_PRIORITY_SCRATCH, 0, &bo); 48bf215546Sopenharmony_ci device->spm_trace.bo = bo; 49bf215546Sopenharmony_ci if (result != VK_SUCCESS) 50bf215546Sopenharmony_ci return false; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci result = ws->buffer_make_resident(ws, device->spm_trace.bo, true); 53bf215546Sopenharmony_ci if (result != VK_SUCCESS) 54bf215546Sopenharmony_ci return false; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci device->spm_trace.ptr = ws->buffer_map(device->spm_trace.bo); 57bf215546Sopenharmony_ci if (!device->spm_trace.ptr) 58bf215546Sopenharmony_ci return false; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci return true; 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_cistatic void 64bf215546Sopenharmony_ciradv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs) 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci struct ac_spm_trace_data *spm_trace = &device->spm_trace; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) { 69bf215546Sopenharmony_ci struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b]; 70bf215546Sopenharmony_ci const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; 71bf215546Sopenharmony_ci uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(cs, reg_base + b * 4, 1); 74bf215546Sopenharmony_ci radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) { 78bf215546Sopenharmony_ci struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b]; 79bf215546Sopenharmony_ci struct ac_pc_block_base *regs = block_sel->b->b->b; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci for (unsigned c = 0; c < block_sel->num_counters; c++) { 84bf215546Sopenharmony_ci const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c]; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci if (!cntr_sel->active) 87bf215546Sopenharmony_ci continue; 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(cs, regs->select0[c], 1); 90bf215546Sopenharmony_ci radeon_emit(cs, cntr_sel->sel0); 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(cs, regs->select1[c], 1); 93bf215546Sopenharmony_ci radeon_emit(cs, cntr_sel->sel1); 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci } 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci /* Restore global broadcasting. */ 98bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, 99bf215546Sopenharmony_ci S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 100bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 101bf215546Sopenharmony_ci} 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_civoid 104bf215546Sopenharmony_ciradv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) 105bf215546Sopenharmony_ci{ 106bf215546Sopenharmony_ci struct ac_spm_trace_data *spm_trace = &device->spm_trace; 107bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(spm_trace->bo); 108bf215546Sopenharmony_ci uint64_t ring_size = spm_trace->buffer_size; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci /* It's required that the ring VA and the size are correctly aligned. */ 111bf215546Sopenharmony_ci assert(!(va & (SPM_RING_BASE_ALIGN - 1))); 112bf215546Sopenharmony_ci assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); 113bf215546Sopenharmony_ci assert(spm_trace->sample_interval >= 32); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci /* Configure the SPM ring buffer. */ 116bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL, 117bf215546Sopenharmony_ci S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ 118bf215546Sopenharmony_ci S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */ 119bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); 120bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, 121bf215546Sopenharmony_ci S_037208_RING_BASE_HI(va >> 32)); 122bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci /* Configure the muxsel. */ 125bf215546Sopenharmony_ci uint32_t total_muxsel_lines = 0; 126bf215546Sopenharmony_ci for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { 127bf215546Sopenharmony_ci total_muxsel_lines += spm_trace->num_muxsel_lines[s]; 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0); 131bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); 132bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, 133bf215546Sopenharmony_ci S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) | 134bf215546Sopenharmony_ci S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) | 135bf215546Sopenharmony_ci S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) | 136bf215546Sopenharmony_ci S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3])); 137bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, 138bf215546Sopenharmony_ci S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | 139bf215546Sopenharmony_ci S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4])); 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* Upload each muxsel ram to the RLC. */ 142bf215546Sopenharmony_ci for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { 143bf215546Sopenharmony_ci unsigned rlc_muxsel_addr, rlc_muxsel_data; 144bf215546Sopenharmony_ci unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | 145bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1); 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci if (!spm_trace->num_muxsel_lines[s]) 148bf215546Sopenharmony_ci continue; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { 151bf215546Sopenharmony_ci grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; 154bf215546Sopenharmony_ci rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; 155bf215546Sopenharmony_ci } else { 156bf215546Sopenharmony_ci grbm_gfx_index |= S_030800_SE_INDEX(s); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR; 159bf215546Sopenharmony_ci rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) { 165bf215546Sopenharmony_ci uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* Select MUXSEL_ADDR to point to the next muxsel. */ 168bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci /* Write the muxsel line configuration with MUXSEL_DATA. */ 171bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); 172bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | 173bf215546Sopenharmony_ci S_370_WR_CONFIRM(1) | 174bf215546Sopenharmony_ci S_370_ENGINE_SEL(V_370_ME) | 175bf215546Sopenharmony_ci S_370_WR_ONE_ADDR(1)); 176bf215546Sopenharmony_ci radeon_emit(cs, rlc_muxsel_data >> 2); 177bf215546Sopenharmony_ci radeon_emit(cs, 0); 178bf215546Sopenharmony_ci radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE); 179bf215546Sopenharmony_ci } 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci /* Select SPM counters. */ 183bf215546Sopenharmony_ci radv_emit_spm_counters(device, cs); 184bf215546Sopenharmony_ci} 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_cibool 187bf215546Sopenharmony_ciradv_spm_init(struct radv_device *device) 188bf215546Sopenharmony_ci{ 189bf215546Sopenharmony_ci const struct radeon_info *info = &device->physical_device->rad_info; 190bf215546Sopenharmony_ci struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters; 191bf215546Sopenharmony_ci struct ac_spm_counter_create_info spm_counters[] = { 192bf215546Sopenharmony_ci {TCP, 0, 0x9}, /* Number of L2 requests. */ 193bf215546Sopenharmony_ci {TCP, 0, 0x12}, /* Number of L2 misses. */ 194bf215546Sopenharmony_ci {SQ, 0, 0x14f}, /* Number of SCACHE hits. */ 195bf215546Sopenharmony_ci {SQ, 0, 0x150}, /* Number of SCACHE misses. */ 196bf215546Sopenharmony_ci {SQ, 0, 0x151}, /* Number of SCACHE misses duplicate. */ 197bf215546Sopenharmony_ci {SQ, 0, 0x12c}, /* Number of ICACHE hits. */ 198bf215546Sopenharmony_ci {SQ, 0, 0x12d}, /* Number of ICACHE misses. */ 199bf215546Sopenharmony_ci {SQ, 0, 0x12e}, /* Number of ICACHE misses duplicate. */ 200bf215546Sopenharmony_ci {GL1C, 0, 0xe}, /* Number of GL1C requests. */ 201bf215546Sopenharmony_ci {GL1C, 0, 0x12}, /* Number of GL1C misses. */ 202bf215546Sopenharmony_ci {GL2C, 0, 0x3}, /* Number of GL2C requests. */ 203bf215546Sopenharmony_ci {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */ 204bf215546Sopenharmony_ci }; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci /* We failed to initialize the performance counters. */ 207bf215546Sopenharmony_ci if (!pc->blocks) 208bf215546Sopenharmony_ci return false; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm_trace)) 211bf215546Sopenharmony_ci return false; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci if (!radv_spm_init_bo(device)) 214bf215546Sopenharmony_ci return false; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci return true; 217bf215546Sopenharmony_ci} 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_civoid 220bf215546Sopenharmony_ciradv_spm_finish(struct radv_device *device) 221bf215546Sopenharmony_ci{ 222bf215546Sopenharmony_ci struct radeon_winsys *ws = device->ws; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci if (device->spm_trace.bo) { 225bf215546Sopenharmony_ci ws->buffer_make_resident(ws, device->spm_trace.bo, false); 226bf215546Sopenharmony_ci ws->buffer_destroy(ws, device->spm_trace.bo); 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci ac_destroy_spm(&device->spm_trace); 230bf215546Sopenharmony_ci} 231