1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include <inttypes.h>
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "radv_cs.h"
27bf215546Sopenharmony_ci#include "radv_private.h"
28bf215546Sopenharmony_ci#include "sid.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#define SPM_RING_BASE_ALIGN 32
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistatic bool
33bf215546Sopenharmony_ciradv_spm_init_bo(struct radv_device *device)
34bf215546Sopenharmony_ci{
35bf215546Sopenharmony_ci   struct radeon_winsys *ws = device->ws;
36bf215546Sopenharmony_ci   uint64_t size = 32 * 1024 * 1024; /* Default to 1MB. */
37bf215546Sopenharmony_ci   uint16_t sample_interval = 4096; /* Default to 4096 clk. */
38bf215546Sopenharmony_ci   VkResult result;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci   device->spm_trace.buffer_size = size;
41bf215546Sopenharmony_ci   device->spm_trace.sample_interval = sample_interval;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   struct radeon_winsys_bo *bo = NULL;
44bf215546Sopenharmony_ci   result = ws->buffer_create(
45bf215546Sopenharmony_ci      ws, size, 4096, RADEON_DOMAIN_VRAM,
46bf215546Sopenharmony_ci      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
47bf215546Sopenharmony_ci      RADV_BO_PRIORITY_SCRATCH, 0, &bo);
48bf215546Sopenharmony_ci   device->spm_trace.bo = bo;
49bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
50bf215546Sopenharmony_ci      return false;
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci   result = ws->buffer_make_resident(ws, device->spm_trace.bo, true);
53bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
54bf215546Sopenharmony_ci      return false;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   device->spm_trace.ptr = ws->buffer_map(device->spm_trace.bo);
57bf215546Sopenharmony_ci   if (!device->spm_trace.ptr)
58bf215546Sopenharmony_ci      return false;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   return true;
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cistatic void
64bf215546Sopenharmony_ciradv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs)
65bf215546Sopenharmony_ci{
66bf215546Sopenharmony_ci   struct ac_spm_trace_data *spm_trace = &device->spm_trace;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) {
69bf215546Sopenharmony_ci      struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b];
70bf215546Sopenharmony_ci      const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
71bf215546Sopenharmony_ci      uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci      radeon_set_uconfig_reg_seq(cs, reg_base + b * 4, 1);
74bf215546Sopenharmony_ci      radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) {
78bf215546Sopenharmony_ci      struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b];
79bf215546Sopenharmony_ci      struct ac_pc_block_base *regs = block_sel->b->b->b;
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci      radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci      for (unsigned c = 0; c < block_sel->num_counters; c++) {
84bf215546Sopenharmony_ci         const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c];
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci         if (!cntr_sel->active)
87bf215546Sopenharmony_ci            continue;
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci         radeon_set_uconfig_reg_seq(cs, regs->select0[c], 1);
90bf215546Sopenharmony_ci         radeon_emit(cs, cntr_sel->sel0);
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci         radeon_set_uconfig_reg_seq(cs, regs->select1[c], 1);
93bf215546Sopenharmony_ci         radeon_emit(cs, cntr_sel->sel1);
94bf215546Sopenharmony_ci      }
95bf215546Sopenharmony_ci   }
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci   /* Restore global broadcasting. */
98bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
99bf215546Sopenharmony_ci                              S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
100bf215546Sopenharmony_ci                              S_030800_INSTANCE_BROADCAST_WRITES(1));
101bf215546Sopenharmony_ci}
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_civoid
104bf215546Sopenharmony_ciradv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs)
105bf215546Sopenharmony_ci{
106bf215546Sopenharmony_ci   struct ac_spm_trace_data *spm_trace = &device->spm_trace;
107bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(spm_trace->bo);
108bf215546Sopenharmony_ci   uint64_t ring_size = spm_trace->buffer_size;
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   /* It's required that the ring VA and the size are correctly aligned. */
111bf215546Sopenharmony_ci   assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
112bf215546Sopenharmony_ci   assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
113bf215546Sopenharmony_ci   assert(spm_trace->sample_interval >= 32);
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   /* Configure the SPM ring buffer. */
116bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL,
117bf215546Sopenharmony_ci                              S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
118bf215546Sopenharmony_ci                              S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */
119bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
120bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
121bf215546Sopenharmony_ci                              S_037208_RING_BASE_HI(va >> 32));
122bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   /* Configure the muxsel. */
125bf215546Sopenharmony_ci   uint32_t total_muxsel_lines = 0;
126bf215546Sopenharmony_ci   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
127bf215546Sopenharmony_ci      total_muxsel_lines += spm_trace->num_muxsel_lines[s];
128bf215546Sopenharmony_ci   }
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
131bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
132bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
133bf215546Sopenharmony_ci                              S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) |
134bf215546Sopenharmony_ci                              S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) |
135bf215546Sopenharmony_ci                              S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) |
136bf215546Sopenharmony_ci                              S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3]));
137bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
138bf215546Sopenharmony_ci                              S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
139bf215546Sopenharmony_ci                              S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4]));
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   /* Upload each muxsel ram to the RLC. */
142bf215546Sopenharmony_ci   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
143bf215546Sopenharmony_ci      unsigned rlc_muxsel_addr, rlc_muxsel_data;
144bf215546Sopenharmony_ci      unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
145bf215546Sopenharmony_ci                                S_030800_INSTANCE_BROADCAST_WRITES(1);
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci      if (!spm_trace->num_muxsel_lines[s])
148bf215546Sopenharmony_ci         continue;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
151bf215546Sopenharmony_ci         grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci         rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
154bf215546Sopenharmony_ci         rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
155bf215546Sopenharmony_ci      } else {
156bf215546Sopenharmony_ci         grbm_gfx_index |= S_030800_SE_INDEX(s);
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci         rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
159bf215546Sopenharmony_ci         rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
160bf215546Sopenharmony_ci      }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci      radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci      for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) {
165bf215546Sopenharmony_ci         uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci         /* Select MUXSEL_ADDR to point to the next muxsel. */
168bf215546Sopenharmony_ci         radeon_set_uconfig_reg(cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci         /* Write the muxsel line configuration with MUXSEL_DATA. */
171bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
172bf215546Sopenharmony_ci         radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
173bf215546Sopenharmony_ci                         S_370_WR_CONFIRM(1) |
174bf215546Sopenharmony_ci                         S_370_ENGINE_SEL(V_370_ME) |
175bf215546Sopenharmony_ci                         S_370_WR_ONE_ADDR(1));
176bf215546Sopenharmony_ci         radeon_emit(cs, rlc_muxsel_data >> 2);
177bf215546Sopenharmony_ci         radeon_emit(cs, 0);
178bf215546Sopenharmony_ci         radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE);
179bf215546Sopenharmony_ci      }
180bf215546Sopenharmony_ci   }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   /* Select SPM counters. */
183bf215546Sopenharmony_ci   radv_emit_spm_counters(device, cs);
184bf215546Sopenharmony_ci}
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_cibool
187bf215546Sopenharmony_ciradv_spm_init(struct radv_device *device)
188bf215546Sopenharmony_ci{
189bf215546Sopenharmony_ci   const struct radeon_info *info = &device->physical_device->rad_info;
190bf215546Sopenharmony_ci   struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
191bf215546Sopenharmony_ci   struct ac_spm_counter_create_info spm_counters[] = {
192bf215546Sopenharmony_ci      {TCP, 0, 0x9},    /* Number of L2 requests. */
193bf215546Sopenharmony_ci      {TCP, 0, 0x12},   /* Number of L2 misses. */
194bf215546Sopenharmony_ci      {SQ, 0, 0x14f},   /* Number of SCACHE hits. */
195bf215546Sopenharmony_ci      {SQ, 0, 0x150},   /* Number of SCACHE misses. */
196bf215546Sopenharmony_ci      {SQ, 0, 0x151},   /* Number of SCACHE misses duplicate. */
197bf215546Sopenharmony_ci      {SQ, 0, 0x12c},   /* Number of ICACHE hits. */
198bf215546Sopenharmony_ci      {SQ, 0, 0x12d},   /* Number of ICACHE misses. */
199bf215546Sopenharmony_ci      {SQ, 0, 0x12e},   /* Number of ICACHE misses duplicate. */
200bf215546Sopenharmony_ci      {GL1C, 0, 0xe},   /* Number of GL1C requests. */
201bf215546Sopenharmony_ci      {GL1C, 0, 0x12},  /* Number of GL1C misses. */
202bf215546Sopenharmony_ci      {GL2C, 0, 0x3},   /* Number of GL2C requests. */
203bf215546Sopenharmony_ci      {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23},  /* Number of GL2C misses. */
204bf215546Sopenharmony_ci   };
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   /* We failed to initialize the performance counters. */
207bf215546Sopenharmony_ci   if (!pc->blocks)
208bf215546Sopenharmony_ci      return false;
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm_trace))
211bf215546Sopenharmony_ci      return false;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   if (!radv_spm_init_bo(device))
214bf215546Sopenharmony_ci      return false;
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   return true;
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_civoid
220bf215546Sopenharmony_ciradv_spm_finish(struct radv_device *device)
221bf215546Sopenharmony_ci{
222bf215546Sopenharmony_ci   struct radeon_winsys *ws = device->ws;
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   if (device->spm_trace.bo) {
225bf215546Sopenharmony_ci      ws->buffer_make_resident(ws, device->spm_trace.bo, false);
226bf215546Sopenharmony_ci      ws->buffer_destroy(ws, device->spm_trace.bo);
227bf215546Sopenharmony_ci   }
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci   ac_destroy_spm(&device->spm_trace);
230bf215546Sopenharmony_ci}
231