1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22bf215546Sopenharmony_ci * SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "si_build_pm4.h" 26bf215546Sopenharmony_ci#include "si_query.h" 27bf215546Sopenharmony_ci#include "util/u_memory.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "ac_perfcounter.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cistruct si_query_group { 32bf215546Sopenharmony_ci struct si_query_group *next; 33bf215546Sopenharmony_ci struct ac_pc_block *block; 34bf215546Sopenharmony_ci unsigned sub_gid; /* only used during init */ 35bf215546Sopenharmony_ci unsigned result_base; /* only used during init */ 36bf215546Sopenharmony_ci int se; 37bf215546Sopenharmony_ci int instance; 38bf215546Sopenharmony_ci unsigned num_counters; 39bf215546Sopenharmony_ci unsigned selectors[AC_QUERY_MAX_COUNTERS]; 40bf215546Sopenharmony_ci}; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistruct si_query_counter { 43bf215546Sopenharmony_ci unsigned base; 44bf215546Sopenharmony_ci unsigned qwords; 45bf215546Sopenharmony_ci unsigned stride; /* in uint64s */ 46bf215546Sopenharmony_ci}; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistruct si_query_pc { 49bf215546Sopenharmony_ci struct si_query b; 50bf215546Sopenharmony_ci struct si_query_buffer buffer; 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci /* Size of the results in memory, in bytes. */ 53bf215546Sopenharmony_ci unsigned result_size; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci unsigned shaders; 56bf215546Sopenharmony_ci unsigned num_counters; 57bf215546Sopenharmony_ci struct si_query_counter *counters; 58bf215546Sopenharmony_ci struct si_query_group *groups; 59bf215546Sopenharmony_ci}; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_cistatic void si_pc_emit_instance(struct si_context *sctx, int se, int instance) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 64bf215546Sopenharmony_ci unsigned value = S_030800_SH_BROADCAST_WRITES(1); 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci if (se >= 0) { 67bf215546Sopenharmony_ci value |= S_030800_SE_INDEX(se); 68bf215546Sopenharmony_ci } else { 69bf215546Sopenharmony_ci value |= S_030800_SE_BROADCAST_WRITES(1); 70bf215546Sopenharmony_ci } 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 73bf215546Sopenharmony_ci /* TODO: Expose counters from each shader array separately if needed. */ 74bf215546Sopenharmony_ci value |= S_030800_SA_BROADCAST_WRITES(1); 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci if (instance >= 0) { 78bf215546Sopenharmony_ci value |= S_030800_INSTANCE_INDEX(instance); 79bf215546Sopenharmony_ci } else { 80bf215546Sopenharmony_ci value |= S_030800_INSTANCE_BROADCAST_WRITES(1); 81bf215546Sopenharmony_ci } 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci radeon_begin(cs); 84bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, value); 85bf215546Sopenharmony_ci radeon_end(); 86bf215546Sopenharmony_ci} 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_civoid si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders) 89bf215546Sopenharmony_ci{ 90bf215546Sopenharmony_ci radeon_begin(cs); 91bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(R_036780_SQ_PERFCOUNTER_CTRL, 2, false); 92bf215546Sopenharmony_ci radeon_emit(shaders & 0x7f); 93bf215546Sopenharmony_ci radeon_emit(0xffffffff); 94bf215546Sopenharmony_ci radeon_end(); 95bf215546Sopenharmony_ci} 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_cistatic void si_pc_emit_select(struct si_context *sctx, struct ac_pc_block *block, unsigned count, 98bf215546Sopenharmony_ci unsigned *selectors) 99bf215546Sopenharmony_ci{ 100bf215546Sopenharmony_ci struct ac_pc_block_base *regs = block->b->b; 101bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 102bf215546Sopenharmony_ci unsigned idx; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci assert(count <= regs->num_counters); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* Fake counters. */ 107bf215546Sopenharmony_ci if (!regs->select0) 108bf215546Sopenharmony_ci return; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci radeon_begin(cs); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci for (idx = 0; idx < count; ++idx) { 113bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(regs->select0[idx], 1, false); 114bf215546Sopenharmony_ci radeon_emit(selectors[idx] | regs->select_or); 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci for (idx = 0; idx < regs->num_spm_counters; idx++) { 118bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(regs->select1[idx], 1, false); 119bf215546Sopenharmony_ci radeon_emit(0); 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci radeon_end(); 123bf215546Sopenharmony_ci} 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cistatic void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va) 126bf215546Sopenharmony_ci{ 127bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, 130bf215546Sopenharmony_ci COPY_DATA_IMM, NULL, 1); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci radeon_begin(cs); 133bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, 134bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET)); 135bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 136bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0)); 137bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, 138bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING)); 139bf215546Sopenharmony_ci radeon_end(); 140bf215546Sopenharmony_ci} 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci/* Note: The buffer was already added in si_pc_emit_start, so we don't have to 143bf215546Sopenharmony_ci * do it again in here. */ 144bf215546Sopenharmony_cistatic void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, uint64_t va) 145bf215546Sopenharmony_ci{ 146bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, 149bf215546Sopenharmony_ci EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); 150bf215546Sopenharmony_ci si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci radeon_begin(cs); 153bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 154bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci if (!sctx->screen->info.never_send_perfcounter_stop) { 157bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 158bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci radeon_set_uconfig_reg( 162bf215546Sopenharmony_ci R_036020_CP_PERFMON_CNTL, 163bf215546Sopenharmony_ci S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ? 164bf215546Sopenharmony_ci V_036020_CP_PERFMON_STATE_START_COUNTING : 165bf215546Sopenharmony_ci V_036020_CP_PERFMON_STATE_STOP_COUNTING) | 166bf215546Sopenharmony_ci S_036020_PERFMON_SAMPLE_ENABLE(1)); 167bf215546Sopenharmony_ci radeon_end(); 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_civoid si_pc_emit_spm_start(struct radeon_cmdbuf *cs) 171bf215546Sopenharmony_ci{ 172bf215546Sopenharmony_ci radeon_begin(cs); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* Start SPM counters. */ 175bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, 176bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | 177bf215546Sopenharmony_ci S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); 178bf215546Sopenharmony_ci /* Start windowed performance counters. */ 179bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 180bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0)); 181bf215546Sopenharmony_ci radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1)); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci radeon_end(); 184bf215546Sopenharmony_ci} 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_civoid si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters, 187bf215546Sopenharmony_ci bool never_send_perfcounter_stop) 188bf215546Sopenharmony_ci{ 189bf215546Sopenharmony_ci radeon_begin(cs); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci /* Stop windowed performance counters. */ 192bf215546Sopenharmony_ci if (!never_send_perfcounter_stop) { 193bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 194bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0)); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci /* Stop SPM counters. */ 200bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, 201bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | 202bf215546Sopenharmony_ci S_036020_SPM_PERFMON_STATE(never_stop_sq_perf_counters ? 203bf215546Sopenharmony_ci V_036020_STRM_PERFMON_STATE_START_COUNTING : 204bf215546Sopenharmony_ci V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci radeon_end(); 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_civoid si_pc_emit_spm_reset(struct radeon_cmdbuf *cs) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci radeon_begin(cs); 212bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, 213bf215546Sopenharmony_ci S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | 214bf215546Sopenharmony_ci S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_DISABLE_AND_RESET)); 215bf215546Sopenharmony_ci radeon_end(); 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_cistatic void si_pc_emit_read(struct si_context *sctx, struct ac_pc_block *block, unsigned count, 220bf215546Sopenharmony_ci uint64_t va) 221bf215546Sopenharmony_ci{ 222bf215546Sopenharmony_ci struct ac_pc_block_base *regs = block->b->b; 223bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 224bf215546Sopenharmony_ci unsigned idx; 225bf215546Sopenharmony_ci unsigned reg = regs->counter0_lo; 226bf215546Sopenharmony_ci unsigned reg_delta = 8; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci radeon_begin(cs); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci if (regs->select0) { 231bf215546Sopenharmony_ci for (idx = 0; idx < count; ++idx) { 232bf215546Sopenharmony_ci if (regs->counters) 233bf215546Sopenharmony_ci reg = regs->counters[idx]; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); 236bf215546Sopenharmony_ci radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 237bf215546Sopenharmony_ci COPY_DATA_COUNT_SEL); /* 64 bits */ 238bf215546Sopenharmony_ci radeon_emit(reg >> 2); 239bf215546Sopenharmony_ci radeon_emit(0); /* unused */ 240bf215546Sopenharmony_ci radeon_emit(va); 241bf215546Sopenharmony_ci radeon_emit(va >> 32); 242bf215546Sopenharmony_ci va += sizeof(uint64_t); 243bf215546Sopenharmony_ci reg += reg_delta; 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci } else { 246bf215546Sopenharmony_ci /* Fake counters. */ 247bf215546Sopenharmony_ci for (idx = 0; idx < count; ++idx) { 248bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); 249bf215546Sopenharmony_ci radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 250bf215546Sopenharmony_ci COPY_DATA_COUNT_SEL); 251bf215546Sopenharmony_ci radeon_emit(0); /* immediate */ 252bf215546Sopenharmony_ci radeon_emit(0); 253bf215546Sopenharmony_ci radeon_emit(va); 254bf215546Sopenharmony_ci radeon_emit(va >> 32); 255bf215546Sopenharmony_ci va += sizeof(uint64_t); 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci radeon_end(); 259bf215546Sopenharmony_ci} 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_cistatic void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery) 262bf215546Sopenharmony_ci{ 263bf215546Sopenharmony_ci struct si_query_pc *query = (struct si_query_pc *)squery; 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci while (query->groups) { 266bf215546Sopenharmony_ci struct si_query_group *group = query->groups; 267bf215546Sopenharmony_ci query->groups = group->next; 268bf215546Sopenharmony_ci FREE(group); 269bf215546Sopenharmony_ci } 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci FREE(query->counters); 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci si_query_buffer_destroy(sctx->screen, &query->buffer); 274bf215546Sopenharmony_ci FREE(query); 275bf215546Sopenharmony_ci} 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_civoid si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit) 278bf215546Sopenharmony_ci{ 279bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) 280bf215546Sopenharmony_ci return; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci radeon_begin(&sctx->gfx_cs); 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 285bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_037390_RLC_PERFMON_CLK_CNTL, 286bf215546Sopenharmony_ci S_037390_PERFMON_CLOCK_STATE(inhibit)); 287bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX8) { 288bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_0372FC_RLC_PERFMON_CLK_CNTL, 289bf215546Sopenharmony_ci S_0372FC_PERFMON_CLOCK_STATE(inhibit)); 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci radeon_end(); 292bf215546Sopenharmony_ci} 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_cistatic void si_pc_query_resume(struct si_context *sctx, struct si_query *squery) 295bf215546Sopenharmony_ci/* 296bf215546Sopenharmony_ci struct si_query_hw *hwquery, 297bf215546Sopenharmony_ci struct si_resource *buffer, uint64_t va)*/ 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci struct si_query_pc *query = (struct si_query_pc *)squery; 300bf215546Sopenharmony_ci int current_se = -1; 301bf215546Sopenharmony_ci int current_instance = -1; 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size)) 304bf215546Sopenharmony_ci return; 305bf215546Sopenharmony_ci si_need_gfx_cs_space(sctx, 0); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci if (query->shaders) 308bf215546Sopenharmony_ci si_pc_emit_shaders(&sctx->gfx_cs, query->shaders); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci si_inhibit_clockgating(sctx, &sctx->gfx_cs, true); 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci for (struct si_query_group *group = query->groups; group; group = group->next) { 313bf215546Sopenharmony_ci struct ac_pc_block *block = group->block; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci if (group->se != current_se || group->instance != current_instance) { 316bf215546Sopenharmony_ci current_se = group->se; 317bf215546Sopenharmony_ci current_instance = group->instance; 318bf215546Sopenharmony_ci si_pc_emit_instance(sctx, group->se, group->instance); 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci si_pc_emit_select(sctx, block, group->num_counters, group->selectors); 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci if (current_se != -1 || current_instance != -1) 325bf215546Sopenharmony_ci si_pc_emit_instance(sctx, -1, -1); 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end; 328bf215546Sopenharmony_ci si_pc_emit_start(sctx, query->buffer.buf, va); 329bf215546Sopenharmony_ci} 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_cistatic void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery) 332bf215546Sopenharmony_ci{ 333bf215546Sopenharmony_ci struct si_query_pc *query = (struct si_query_pc *)squery; 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (!query->buffer.buf) 336bf215546Sopenharmony_ci return; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end; 339bf215546Sopenharmony_ci query->buffer.results_end += query->result_size; 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci si_pc_emit_stop(sctx, query->buffer.buf, va); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci for (struct si_query_group *group = query->groups; group; group = group->next) { 344bf215546Sopenharmony_ci struct ac_pc_block *block = group->block; 345bf215546Sopenharmony_ci unsigned se = group->se >= 0 ? group->se : 0; 346bf215546Sopenharmony_ci unsigned se_end = se + 1; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci if ((block->b->b->flags & AC_PC_BLOCK_SE) && (group->se < 0)) 349bf215546Sopenharmony_ci se_end = sctx->screen->info.max_se; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci do { 352bf215546Sopenharmony_ci unsigned instance = group->instance >= 0 ? group->instance : 0; 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci do { 355bf215546Sopenharmony_ci si_pc_emit_instance(sctx, se, instance); 356bf215546Sopenharmony_ci si_pc_emit_read(sctx, block, group->num_counters, va); 357bf215546Sopenharmony_ci va += sizeof(uint64_t) * group->num_counters; 358bf215546Sopenharmony_ci } while (group->instance < 0 && ++instance < block->num_instances); 359bf215546Sopenharmony_ci } while (++se < se_end); 360bf215546Sopenharmony_ci } 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci si_pc_emit_instance(sctx, -1, -1); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci si_inhibit_clockgating(sctx, &sctx->gfx_cs, false); 365bf215546Sopenharmony_ci} 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_cistatic bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci struct si_query_pc *query = (struct si_query_pc *)squery; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci si_query_buffer_reset(ctx, &query->buffer); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci list_addtail(&query->b.active_list, &ctx->active_queries); 374bf215546Sopenharmony_ci ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci si_pc_query_resume(ctx, squery); 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci return true; 379bf215546Sopenharmony_ci} 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_cistatic bool si_pc_query_end(struct si_context *ctx, struct si_query *squery) 382bf215546Sopenharmony_ci{ 383bf215546Sopenharmony_ci struct si_query_pc *query = (struct si_query_pc *)squery; 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci si_pc_query_suspend(ctx, squery); 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci list_del(&squery->active_list); 388bf215546Sopenharmony_ci ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci return query->buffer.buf != NULL; 391bf215546Sopenharmony_ci} 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_cistatic void si_pc_query_add_result(struct si_query_pc *query, void *buffer, 394bf215546Sopenharmony_ci union pipe_query_result *result) 395bf215546Sopenharmony_ci{ 396bf215546Sopenharmony_ci uint64_t *results = buffer; 397bf215546Sopenharmony_ci unsigned i, j; 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci for (i = 0; i < query->num_counters; ++i) { 400bf215546Sopenharmony_ci struct si_query_counter *counter = &query->counters[i]; 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci for (j = 0; j < counter->qwords; ++j) { 403bf215546Sopenharmony_ci uint32_t value = results[counter->base + j * counter->stride]; 404bf215546Sopenharmony_ci result->batch[i].u64 += value; 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci} 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_cistatic bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait, 410bf215546Sopenharmony_ci union pipe_query_result *result) 411bf215546Sopenharmony_ci{ 412bf215546Sopenharmony_ci struct si_query_pc *query = (struct si_query_pc *)squery; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci memset(result, 0, sizeof(result->batch[0]) * query->num_counters); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 417bf215546Sopenharmony_ci unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK); 418bf215546Sopenharmony_ci unsigned results_base = 0; 419bf215546Sopenharmony_ci void *map; 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci if (squery->b.flushed) 422bf215546Sopenharmony_ci map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage); 423bf215546Sopenharmony_ci else 424bf215546Sopenharmony_ci map = si_buffer_map(sctx, qbuf->buf, usage); 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci if (!map) 427bf215546Sopenharmony_ci return false; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci while (results_base != qbuf->results_end) { 430bf215546Sopenharmony_ci si_pc_query_add_result(query, map + results_base, result); 431bf215546Sopenharmony_ci results_base += query->result_size; 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci return true; 436bf215546Sopenharmony_ci} 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_cistatic const struct si_query_ops batch_query_ops = { 439bf215546Sopenharmony_ci .destroy = si_pc_query_destroy, 440bf215546Sopenharmony_ci .begin = si_pc_query_begin, 441bf215546Sopenharmony_ci .end = si_pc_query_end, 442bf215546Sopenharmony_ci .get_result = si_pc_query_get_result, 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci .suspend = si_pc_query_suspend, 445bf215546Sopenharmony_ci .resume = si_pc_query_resume, 446bf215546Sopenharmony_ci}; 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_cistatic struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query, 449bf215546Sopenharmony_ci struct ac_pc_block *block, unsigned sub_gid) 450bf215546Sopenharmony_ci{ 451bf215546Sopenharmony_ci struct si_perfcounters *pc = screen->perfcounters; 452bf215546Sopenharmony_ci struct si_query_group *group = query->groups; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci while (group) { 455bf215546Sopenharmony_ci if (group->block == block && group->sub_gid == sub_gid) 456bf215546Sopenharmony_ci return group; 457bf215546Sopenharmony_ci group = group->next; 458bf215546Sopenharmony_ci } 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci group = CALLOC_STRUCT(si_query_group); 461bf215546Sopenharmony_ci if (!group) 462bf215546Sopenharmony_ci return NULL; 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci group->block = block; 465bf215546Sopenharmony_ci group->sub_gid = sub_gid; 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci if (block->b->b->flags & AC_PC_BLOCK_SHADER) { 468bf215546Sopenharmony_ci unsigned sub_gids = block->num_instances; 469bf215546Sopenharmony_ci unsigned shader_id; 470bf215546Sopenharmony_ci unsigned shaders; 471bf215546Sopenharmony_ci unsigned query_shaders; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci if (ac_pc_block_has_per_se_groups(&pc->base, block)) 474bf215546Sopenharmony_ci sub_gids = sub_gids * screen->info.max_se; 475bf215546Sopenharmony_ci shader_id = sub_gid / sub_gids; 476bf215546Sopenharmony_ci sub_gid = sub_gid % sub_gids; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci shaders = ac_pc_shader_type_bits[shader_id]; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci query_shaders = query->shaders & ~AC_PC_SHADERS_WINDOWING; 481bf215546Sopenharmony_ci if (query_shaders && query_shaders != shaders) { 482bf215546Sopenharmony_ci fprintf(stderr, "si_perfcounter: incompatible shader groups\n"); 483bf215546Sopenharmony_ci FREE(group); 484bf215546Sopenharmony_ci return NULL; 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci query->shaders = shaders; 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci if (block->b->b->flags & AC_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { 490bf215546Sopenharmony_ci // A non-zero value in query->shaders ensures that the shader 491bf215546Sopenharmony_ci // masking is reset unless the user explicitly requests one. 492bf215546Sopenharmony_ci query->shaders = AC_PC_SHADERS_WINDOWING; 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci if (ac_pc_block_has_per_se_groups(&pc->base, block)) { 496bf215546Sopenharmony_ci group->se = sub_gid / block->num_instances; 497bf215546Sopenharmony_ci sub_gid = sub_gid % block->num_instances; 498bf215546Sopenharmony_ci } else { 499bf215546Sopenharmony_ci group->se = -1; 500bf215546Sopenharmony_ci } 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (ac_pc_block_has_per_instance_groups(&pc->base, block)) { 503bf215546Sopenharmony_ci group->instance = sub_gid; 504bf215546Sopenharmony_ci } else { 505bf215546Sopenharmony_ci group->instance = -1; 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci group->next = query->groups; 509bf215546Sopenharmony_ci query->groups = group; 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci return group; 512bf215546Sopenharmony_ci} 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_cistruct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries, 515bf215546Sopenharmony_ci unsigned *query_types) 516bf215546Sopenharmony_ci{ 517bf215546Sopenharmony_ci struct si_screen *screen = (struct si_screen *)ctx->screen; 518bf215546Sopenharmony_ci struct si_perfcounters *pc = screen->perfcounters; 519bf215546Sopenharmony_ci struct ac_pc_block *block; 520bf215546Sopenharmony_ci struct si_query_group *group; 521bf215546Sopenharmony_ci struct si_query_pc *query; 522bf215546Sopenharmony_ci unsigned base_gid, sub_gid, sub_index; 523bf215546Sopenharmony_ci unsigned i, j; 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci if (!pc) 526bf215546Sopenharmony_ci return NULL; 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci query = CALLOC_STRUCT(si_query_pc); 529bf215546Sopenharmony_ci if (!query) 530bf215546Sopenharmony_ci return NULL; 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci query->b.ops = &batch_query_ops; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci query->num_counters = num_queries; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci /* Collect selectors per group */ 537bf215546Sopenharmony_ci for (i = 0; i < num_queries; ++i) { 538bf215546Sopenharmony_ci unsigned sub_gid; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER) 541bf215546Sopenharmony_ci goto error; 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci block = 544bf215546Sopenharmony_ci ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index); 545bf215546Sopenharmony_ci if (!block) 546bf215546Sopenharmony_ci goto error; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci sub_gid = sub_index / block->b->selectors; 549bf215546Sopenharmony_ci sub_index = sub_index % block->b->selectors; 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci group = get_group_state(screen, query, block, sub_gid); 552bf215546Sopenharmony_ci if (!group) 553bf215546Sopenharmony_ci goto error; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci if (group->num_counters >= block->b->b->num_counters) { 556bf215546Sopenharmony_ci fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name); 557bf215546Sopenharmony_ci goto error; 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci group->selectors[group->num_counters] = sub_index; 560bf215546Sopenharmony_ci ++group->num_counters; 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* Compute result bases and CS size per group */ 564bf215546Sopenharmony_ci query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords; 565bf215546Sopenharmony_ci query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords; 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci i = 0; 568bf215546Sopenharmony_ci for (group = query->groups; group; group = group->next) { 569bf215546Sopenharmony_ci struct ac_pc_block *block = group->block; 570bf215546Sopenharmony_ci unsigned read_dw; 571bf215546Sopenharmony_ci unsigned instances = 1; 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0) 574bf215546Sopenharmony_ci instances = screen->info.max_se; 575bf215546Sopenharmony_ci if (group->instance < 0) 576bf215546Sopenharmony_ci instances *= block->num_instances; 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci group->result_base = i; 579bf215546Sopenharmony_ci query->result_size += sizeof(uint64_t) * instances * group->num_counters; 580bf215546Sopenharmony_ci i += instances * group->num_counters; 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci read_dw = 6 * group->num_counters; 583bf215546Sopenharmony_ci query->b.num_cs_dw_suspend += instances * read_dw; 584bf215546Sopenharmony_ci query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords; 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci if (query->shaders) { 588bf215546Sopenharmony_ci if (query->shaders == AC_PC_SHADERS_WINDOWING) 589bf215546Sopenharmony_ci query->shaders = 0xffffffff; 590bf215546Sopenharmony_ci } 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci /* Map user-supplied query array to result indices */ 593bf215546Sopenharmony_ci query->counters = CALLOC(num_queries, sizeof(*query->counters)); 594bf215546Sopenharmony_ci for (i = 0; i < num_queries; ++i) { 595bf215546Sopenharmony_ci struct si_query_counter *counter = &query->counters[i]; 596bf215546Sopenharmony_ci struct ac_pc_block *block; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci block = 599bf215546Sopenharmony_ci ac_lookup_counter(&pc->base, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, &base_gid, &sub_index); 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci sub_gid = sub_index / block->b->selectors; 602bf215546Sopenharmony_ci sub_index = sub_index % block->b->selectors; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci group = get_group_state(screen, query, block, sub_gid); 605bf215546Sopenharmony_ci assert(group != NULL); 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci for (j = 0; j < group->num_counters; ++j) { 608bf215546Sopenharmony_ci if (group->selectors[j] == sub_index) 609bf215546Sopenharmony_ci break; 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci counter->base = group->result_base + j; 613bf215546Sopenharmony_ci counter->stride = group->num_counters; 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci counter->qwords = 1; 616bf215546Sopenharmony_ci if ((block->b->b->flags & AC_PC_BLOCK_SE) && group->se < 0) 617bf215546Sopenharmony_ci counter->qwords = screen->info.max_se; 618bf215546Sopenharmony_ci if (group->instance < 0) 619bf215546Sopenharmony_ci counter->qwords *= block->num_instances; 620bf215546Sopenharmony_ci } 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci return (struct pipe_query *)query; 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_cierror: 625bf215546Sopenharmony_ci si_pc_query_destroy((struct si_context *)ctx, &query->b); 626bf215546Sopenharmony_ci return NULL; 627bf215546Sopenharmony_ci} 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_ciint si_get_perfcounter_info(struct si_screen *screen, unsigned index, 630bf215546Sopenharmony_ci struct pipe_driver_query_info *info) 631bf215546Sopenharmony_ci{ 632bf215546Sopenharmony_ci struct si_perfcounters *pc = screen->perfcounters; 633bf215546Sopenharmony_ci struct ac_pc_block *block; 634bf215546Sopenharmony_ci unsigned base_gid, sub; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci if (!pc) 637bf215546Sopenharmony_ci return 0; 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci if (!info) { 640bf215546Sopenharmony_ci unsigned bid, num_queries = 0; 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci for (bid = 0; bid < pc->base.num_blocks; ++bid) { 643bf215546Sopenharmony_ci num_queries += pc->base.blocks[bid].b->selectors * pc->base.blocks[bid].num_groups; 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci return num_queries; 647bf215546Sopenharmony_ci } 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci block = ac_lookup_counter(&pc->base, index, &base_gid, &sub); 650bf215546Sopenharmony_ci if (!block) 651bf215546Sopenharmony_ci return 0; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci if (!block->selector_names) { 654bf215546Sopenharmony_ci if (!ac_init_block_names(&screen->info, &pc->base, block)) 655bf215546Sopenharmony_ci return 0; 656bf215546Sopenharmony_ci } 657bf215546Sopenharmony_ci info->name = block->selector_names + sub * block->selector_name_stride; 658bf215546Sopenharmony_ci info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index; 659bf215546Sopenharmony_ci info->max_value.u64 = 0; 660bf215546Sopenharmony_ci info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 661bf215546Sopenharmony_ci info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; 662bf215546Sopenharmony_ci info->group_id = base_gid + sub / block->b->selectors; 663bf215546Sopenharmony_ci info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 664bf215546Sopenharmony_ci if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups) 665bf215546Sopenharmony_ci info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST; 666bf215546Sopenharmony_ci return 1; 667bf215546Sopenharmony_ci} 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ciint si_get_perfcounter_group_info(struct si_screen *screen, unsigned index, 670bf215546Sopenharmony_ci struct pipe_driver_query_group_info *info) 671bf215546Sopenharmony_ci{ 672bf215546Sopenharmony_ci struct si_perfcounters *pc = screen->perfcounters; 673bf215546Sopenharmony_ci struct ac_pc_block *block; 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci if (!pc) 676bf215546Sopenharmony_ci return 0; 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci if (!info) 679bf215546Sopenharmony_ci return pc->base.num_groups; 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci block = ac_lookup_group(&pc->base, &index); 682bf215546Sopenharmony_ci if (!block) 683bf215546Sopenharmony_ci return 0; 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci if (!block->group_names) { 686bf215546Sopenharmony_ci if (!ac_init_block_names(&screen->info, &pc->base, block)) 687bf215546Sopenharmony_ci return 0; 688bf215546Sopenharmony_ci } 689bf215546Sopenharmony_ci info->name = block->group_names + index * block->group_name_stride; 690bf215546Sopenharmony_ci info->num_queries = block->b->selectors; 691bf215546Sopenharmony_ci info->max_active_queries = block->b->b->num_counters; 692bf215546Sopenharmony_ci return 1; 693bf215546Sopenharmony_ci} 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_civoid si_destroy_perfcounters(struct si_screen *screen) 696bf215546Sopenharmony_ci{ 697bf215546Sopenharmony_ci struct si_perfcounters *pc = screen->perfcounters; 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci if (!pc) 700bf215546Sopenharmony_ci return; 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci ac_destroy_perfcounters(&pc->base); 703bf215546Sopenharmony_ci FREE(pc); 704bf215546Sopenharmony_ci screen->perfcounters = NULL; 705bf215546Sopenharmony_ci} 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_civoid si_init_perfcounters(struct si_screen *screen) 708bf215546Sopenharmony_ci{ 709bf215546Sopenharmony_ci bool separate_se, separate_instance; 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false); 712bf215546Sopenharmony_ci separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false); 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci screen->perfcounters = CALLOC_STRUCT(si_perfcounters); 715bf215546Sopenharmony_ci if (!screen->perfcounters) 716bf215546Sopenharmony_ci return; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen); 719bf215546Sopenharmony_ci screen->perfcounters->num_instance_cs_dwords = 3; 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci if (!ac_init_perfcounters(&screen->info, separate_se, separate_instance, 722bf215546Sopenharmony_ci &screen->perfcounters->base)) { 723bf215546Sopenharmony_ci si_destroy_perfcounters(screen); 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci} 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_cistatic bool 728bf215546Sopenharmony_cisi_spm_init_bo(struct si_context *sctx) 729bf215546Sopenharmony_ci{ 730bf215546Sopenharmony_ci struct radeon_winsys *ws = sctx->ws; 731bf215546Sopenharmony_ci uint64_t size = 32 * 1024 * 1024; /* Default to 32MB. */ 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci sctx->spm_trace.buffer_size = size; 734bf215546Sopenharmony_ci sctx->spm_trace.sample_interval = 4096; /* Default to 4096 clk. */ 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci sctx->spm_trace.bo = ws->buffer_create( 737bf215546Sopenharmony_ci ws, size, 4096, 738bf215546Sopenharmony_ci RADEON_DOMAIN_VRAM, 739bf215546Sopenharmony_ci RADEON_FLAG_NO_INTERPROCESS_SHARING | 740bf215546Sopenharmony_ci RADEON_FLAG_GTT_WC | 741bf215546Sopenharmony_ci RADEON_FLAG_NO_SUBALLOC); 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci return sctx->spm_trace.bo != NULL; 744bf215546Sopenharmony_ci} 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_cistatic void 748bf215546Sopenharmony_cisi_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) 749bf215546Sopenharmony_ci{ 750bf215546Sopenharmony_ci struct ac_spm_trace_data *spm_trace = &sctx->spm_trace; 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci radeon_begin(cs); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) { 755bf215546Sopenharmony_ci struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b]; 756bf215546Sopenharmony_ci const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; 757bf215546Sopenharmony_ci uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false); 760bf215546Sopenharmony_ci radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ 761bf215546Sopenharmony_ci } 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) { 764bf215546Sopenharmony_ci struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b]; 765bf215546Sopenharmony_ci struct ac_pc_block_base *regs = block_sel->b->b->b; 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci for (unsigned c = 0; c < block_sel->num_counters; c++) { 770bf215546Sopenharmony_ci const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c]; 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci if (!cntr_sel->active) 773bf215546Sopenharmony_ci continue; 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(regs->select0[c], 1, false); 776bf215546Sopenharmony_ci radeon_emit(cntr_sel->sel0); 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(regs->select1[c], 1, false); 779bf215546Sopenharmony_ci radeon_emit(cntr_sel->sel1); 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci } 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci /* Restore global broadcasting. */ 784bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 785bf215546Sopenharmony_ci S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 786bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci radeon_end(); 789bf215546Sopenharmony_ci} 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci#define SPM_RING_BASE_ALIGN 32 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_civoid 794bf215546Sopenharmony_cisi_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) 795bf215546Sopenharmony_ci{ 796bf215546Sopenharmony_ci struct ac_spm_trace_data *spm_trace = &sctx->spm_trace; 797bf215546Sopenharmony_ci uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm_trace->bo); 798bf215546Sopenharmony_ci uint64_t ring_size = spm_trace->buffer_size; 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci /* It's required that the ring VA and the size are correctly aligned. */ 801bf215546Sopenharmony_ci assert(!(va & (SPM_RING_BASE_ALIGN - 1))); 802bf215546Sopenharmony_ci assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); 803bf215546Sopenharmony_ci assert(spm_trace->sample_interval >= 32); 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci radeon_begin(cs); 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci /* Configure the SPM ring buffer. */ 808bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL, 809bf215546Sopenharmony_ci S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ 810bf215546Sopenharmony_ci S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */ 811bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); 812bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, 813bf215546Sopenharmony_ci S_037208_RING_BASE_HI(va >> 32)); 814bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size); 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci /* Configure the muxsel. */ 817bf215546Sopenharmony_ci uint32_t total_muxsel_lines = 0; 818bf215546Sopenharmony_ci for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { 819bf215546Sopenharmony_ci total_muxsel_lines += spm_trace->num_muxsel_lines[s]; 820bf215546Sopenharmony_ci } 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0); 823bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); 824bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, 825bf215546Sopenharmony_ci S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) | 826bf215546Sopenharmony_ci S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) | 827bf215546Sopenharmony_ci S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) | 828bf215546Sopenharmony_ci S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3])); 829bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, 830bf215546Sopenharmony_ci S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | 831bf215546Sopenharmony_ci S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4])); 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci /* Upload each muxsel ram to the RLC. */ 834bf215546Sopenharmony_ci for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { 835bf215546Sopenharmony_ci unsigned rlc_muxsel_addr, rlc_muxsel_data; 836bf215546Sopenharmony_ci unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | 837bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1); 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ci if (!spm_trace->num_muxsel_lines[s]) 840bf215546Sopenharmony_ci continue; 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { 843bf215546Sopenharmony_ci grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1); 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR; 846bf215546Sopenharmony_ci rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA; 847bf215546Sopenharmony_ci } else { 848bf215546Sopenharmony_ci grbm_gfx_index |= S_030800_SE_INDEX(s); 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR; 851bf215546Sopenharmony_ci rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA; 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) { 857bf215546Sopenharmony_ci uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel; 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci /* Select MUXSEL_ADDR to point to the next muxsel. */ 860bf215546Sopenharmony_ci radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci /* Write the muxsel line configuration with MUXSEL_DATA. */ 863bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); 864bf215546Sopenharmony_ci radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | 865bf215546Sopenharmony_ci S_370_WR_CONFIRM(1) | 866bf215546Sopenharmony_ci S_370_ENGINE_SEL(V_370_ME) | 867bf215546Sopenharmony_ci S_370_WR_ONE_ADDR(1)); 868bf215546Sopenharmony_ci radeon_emit(rlc_muxsel_data >> 2); 869bf215546Sopenharmony_ci radeon_emit(0); 870bf215546Sopenharmony_ci radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE); 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci } 873bf215546Sopenharmony_ci radeon_end(); 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci /* Select SPM counters. */ 876bf215546Sopenharmony_ci si_emit_spm_counters(sctx, cs); 877bf215546Sopenharmony_ci} 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_cibool 880bf215546Sopenharmony_cisi_spm_init(struct si_context *sctx) 881bf215546Sopenharmony_ci{ 882bf215546Sopenharmony_ci const struct radeon_info *info = &sctx->screen->info; 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci sctx->screen->perfcounters = CALLOC_STRUCT(si_perfcounters); 885bf215546Sopenharmony_ci sctx->screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(sctx->screen); 886bf215546Sopenharmony_ci sctx->screen->perfcounters->num_instance_cs_dwords = 3; 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_ci struct ac_perfcounters *pc = &sctx->screen->perfcounters->base; 889bf215546Sopenharmony_ci struct ac_spm_counter_create_info spm_counters[] = { 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci /* XXX: doesn't work */ 892bf215546Sopenharmony_ci {TCP, 0, 0x9}, /* Number of L2 requests. */ 893bf215546Sopenharmony_ci {TCP, 0, 0x12}, /* Number of L2 misses. */ 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci /* Scalar cache hit */ 896bf215546Sopenharmony_ci {SQ, 0, 0x14f}, /* Number of SCACHE hits. */ 897bf215546Sopenharmony_ci {SQ, 0, 0x150}, /* Number of SCACHE misses. */ 898bf215546Sopenharmony_ci {SQ, 0, 0x151}, /* Number of SCACHE misses duplicate. */ 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci /* Instruction cache hit */ 901bf215546Sopenharmony_ci {SQ, 0, 0x12c}, /* Number of ICACHE hits. */ 902bf215546Sopenharmony_ci {SQ, 0, 0x12d}, /* Number of ICACHE misses. */ 903bf215546Sopenharmony_ci {SQ, 0, 0x12e}, /* Number of ICACHE misses duplicate. */ 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci /* XXX: doesn't work */ 906bf215546Sopenharmony_ci {GL1C, 0, 0xe}, /* Number of GL1C requests. */ 907bf215546Sopenharmony_ci {GL1C, 0, 0x12}, /* Number of GL1C misses. */ 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci /* L2 cache hit */ 910bf215546Sopenharmony_ci {GL2C, 0, 0x3}, /* Number of GL2C requests. */ 911bf215546Sopenharmony_ci {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */ 912bf215546Sopenharmony_ci }; 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci if (!ac_init_perfcounters(info, false, false, pc)) 915bf215546Sopenharmony_ci return false; 916bf215546Sopenharmony_ci 917bf215546Sopenharmony_ci if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm_trace)) 918bf215546Sopenharmony_ci return false; 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_ci if (!si_spm_init_bo(sctx)) 921bf215546Sopenharmony_ci return false; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci return true; 924bf215546Sopenharmony_ci} 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_civoid 927bf215546Sopenharmony_cisi_spm_finish(struct si_context *sctx) 928bf215546Sopenharmony_ci{ 929bf215546Sopenharmony_ci struct pb_buffer *bo = sctx->spm_trace.bo; 930bf215546Sopenharmony_ci radeon_bo_reference(sctx->screen->ws, &bo, NULL); 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci ac_destroy_spm(&sctx->spm_trace); 933bf215546Sopenharmony_ci} 934