xref: /third_party/mesa3d/src/amd/common/ac_spm.c (revision bf215546)
1/*
2 * Copyright 2021 Valve Corporation
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#include "ac_spm.h"
26
27#include "util/bitscan.h"
28#include "util/u_memory.h"
29#include "ac_perfcounter.h"
30
31static struct ac_spm_block_select *
32ac_spm_get_block_select(struct ac_spm_trace_data *spm_trace,
33                        const struct ac_pc_block *block)
34{
35   struct ac_spm_block_select *block_sel, *new_block_sel;
36   uint32_t num_block_sel;
37
38   for (uint32_t i = 0; i < spm_trace->num_block_sel; i++) {
39      if (spm_trace->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
40         return &spm_trace->block_sel[i];
41   }
42
43   /* Allocate a new select block if it doesn't already exist. */
44   num_block_sel = spm_trace->num_block_sel + 1;
45   block_sel = realloc(spm_trace->block_sel, num_block_sel * sizeof(*block_sel));
46   if (!block_sel)
47      return NULL;
48
49   spm_trace->num_block_sel = num_block_sel;
50   spm_trace->block_sel = block_sel;
51
52   /* Initialize the new select block. */
53   new_block_sel = &spm_trace->block_sel[spm_trace->num_block_sel - 1];
54   memset(new_block_sel, 0, sizeof(*new_block_sel));
55
56   new_block_sel->b = block;
57   new_block_sel->num_counters = block->b->b->num_spm_counters;
58
59   /* Broadcast global block writes to SEs and SAs */
60   if (!(block->b->b->flags & (AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER)))
61      new_block_sel->grbm_gfx_index = S_030800_SE_BROADCAST_WRITES(1) |
62                                      S_030800_SH_BROADCAST_WRITES(1);
63   /* Broadcast per SE block writes to SAs */
64   else if (block->b->b->flags & AC_PC_BLOCK_SE)
65      new_block_sel->grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1);
66
67   return new_block_sel;
68}
69
70static void
71ac_spm_init_muxsel(const struct ac_pc_block *block,
72                   struct ac_spm_counter_info *counter,
73                   uint32_t spm_wire)
74{
75   struct ac_spm_muxsel *muxsel = &counter->muxsel;
76
77   muxsel->counter = 2 * spm_wire + (counter->is_even ? 0 : 1);
78   muxsel->block = block->b->b->spm_block_select;
79   muxsel->shader_array = 0;
80   muxsel->instance = 0;
81}
82
83static bool
84ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
85                   struct ac_spm_block_select *block_sel,
86                   struct ac_spm_counter_info *counter,
87                   uint32_t *spm_wire)
88{
89   if (block_sel->b->b->b->gpu_block == SQ) {
90      for (unsigned i = 0; i < ARRAY_SIZE(spm_trace->sq_block_sel); i++) {
91         struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[i];
92         struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
93         if (i < spm_trace->num_used_sq_block_sel)
94            continue;
95
96         /* SQ doesn't support 16-bit counters. */
97         cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) |
98                           S_036700_SPM_MODE(3) | /* 32-bit clamp */
99                           S_036700_PERF_MODE(0);
100         cntr_sel->active |= 0x3;
101
102         /* 32-bits counter are always even. */
103         counter->is_even = true;
104
105         /* One wire per SQ module. */
106         *spm_wire = i;
107
108         spm_trace->num_used_sq_block_sel++;
109         return true;
110      }
111   } else {
112      /* Generic blocks. */
113      for (unsigned i = 0; i < block_sel->num_counters; i++) {
114         struct ac_spm_counter_select *cntr_sel = &block_sel->counters[i];
115         int index = ffs(~cntr_sel->active) - 1;
116
117         switch (index) {
118         case 0: /* use S_037004_PERF_SEL */
119            cntr_sel->sel0 |= S_037004_PERF_SEL(counter->event_id) |
120                              S_037004_CNTR_MODE(1) | /* 16-bit clamp */
121                              S_037004_PERF_MODE(0); /* accum */
122            break;
123         case 1: /* use S_037004_PERF_SEL1 */
124            cntr_sel->sel0 |= S_037004_PERF_SEL1(counter->event_id) |
125                              S_037004_PERF_MODE1(0);
126            break;
127         case 2: /* use S_037004_PERF_SEL2 */
128            cntr_sel->sel1 |= S_037008_PERF_SEL2(counter->event_id) |
129                              S_037008_PERF_MODE2(0);
130            break;
131         case 3: /* use S_037004_PERF_SEL3 */
132            cntr_sel->sel1 |= S_037008_PERF_SEL3(counter->event_id) |
133                              S_037008_PERF_MODE3(0);
134            break;
135         default:
136            return false;
137         }
138
139         /* Mark this 16-bit counter as used. */
140         cntr_sel->active |= 1 << index;
141
142         /* Determine if the counter is even or odd. */
143         counter->is_even = !(index % 2);
144
145         /* Determine the SPM wire (one wire holds two 16-bit counters). */
146         *spm_wire = !!(index >= 2);
147
148         return true;
149      }
150   }
151
152   return false;
153}
154
155static bool
156ac_spm_add_counter(const struct ac_perfcounters *pc,
157                   struct ac_spm_trace_data *spm_trace,
158                   const struct ac_spm_counter_create_info *info)
159{
160   struct ac_spm_counter_info *counter;
161   struct ac_spm_block_select *block_sel;
162   struct ac_pc_block *block;
163   uint32_t spm_wire;
164
165   /* Check if the GPU block is valid. */
166   block = ac_pc_get_block(pc, info->gpu_block);
167   if (!block) {
168      fprintf(stderr, "ac/spm: Invalid GPU block.\n");
169      return false;
170   }
171
172   /* Check if the number of instances is valid. */
173   if (info->instance > block->num_instances) {
174      fprintf(stderr, "ac/spm: Invalid instance ID.\n");
175      return false;
176   }
177
178   /* Check if the event ID is valid. */
179   if (info->event_id > block->b->selectors) {
180      fprintf(stderr, "ac/spm: Invalid event ID.\n");
181      return false;
182   }
183
184   counter = &spm_trace->counters[spm_trace->num_counters];
185   spm_trace->num_counters++;
186
187   counter->gpu_block = info->gpu_block;
188   counter->instance = info->instance;
189   counter->event_id = info->event_id;
190
191   /* Get the select block used to configure the counter. */
192   block_sel = ac_spm_get_block_select(spm_trace, block);
193   if (!block_sel)
194      return false;
195
196   /* Map the counter to the select block. */
197   if (!ac_spm_map_counter(spm_trace, block_sel, counter, &spm_wire)) {
198      fprintf(stderr, "ac/spm: No free slots available!\n");
199      return false;
200   }
201
202   /* Determine the counter segment type. */
203   if (block->b->b->flags & AC_PC_BLOCK_SE) {
204      counter->segment_type = AC_SPM_SEGMENT_TYPE_SE0; // XXX
205   } else {
206      counter->segment_type = AC_SPM_SEGMENT_TYPE_GLOBAL;
207   }
208
209   /* Configure the muxsel for SPM. */
210   ac_spm_init_muxsel(block, counter, spm_wire);
211
212   return true;
213}
214
215bool ac_init_spm(const struct radeon_info *info,
216                 const struct ac_perfcounters *pc,
217                 unsigned num_counters,
218                 const struct ac_spm_counter_create_info *counters,
219                 struct ac_spm_trace_data *spm_trace)
220{
221   spm_trace->counters = CALLOC(num_counters, sizeof(*spm_trace->counters));
222   if (!spm_trace->counters)
223      return false;
224
225   for (unsigned i = 0; i < num_counters; i++) {
226      if (!ac_spm_add_counter(pc, spm_trace, &counters[i])) {
227         fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i);
228         return false;
229      }
230   }
231
232   /* Determine the segment size and create a muxsel ram for every segment. */
233   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
234      unsigned num_even_counters = 0, num_odd_counters = 0;
235
236      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
237         /* The global segment always start with a 64-bit timestamp. */
238         num_even_counters += AC_SPM_GLOBAL_TIMESTAMP_COUNTERS;
239      }
240
241      /* Count the number of even/odd counters for this segment. */
242      for (unsigned c = 0; c < spm_trace->num_counters; c++) {
243         struct ac_spm_counter_info *counter = &spm_trace->counters[c];
244
245         if (counter->segment_type != s)
246            continue;
247
248         if (counter->is_even) {
249            num_even_counters++;
250         } else {
251            num_odd_counters++;
252         }
253      }
254
255      /* Compute the number of lines. */
256      unsigned even_lines =
257         DIV_ROUND_UP(num_even_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
258      unsigned odd_lines =
259         DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
260      unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines);
261
262      spm_trace->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm_trace->muxsel_lines[s]));
263      if (!spm_trace->muxsel_lines[s])
264         return false;
265      spm_trace->num_muxsel_lines[s] = num_lines;
266   }
267
268   /* RLC uses the following order: Global, SE0, SE1, SE2, SE3. */
269   const enum ac_spm_segment_type ordered_segment[AC_SPM_SEGMENT_TYPE_COUNT] =
270   {
271      AC_SPM_SEGMENT_TYPE_GLOBAL,
272      AC_SPM_SEGMENT_TYPE_SE0,
273      AC_SPM_SEGMENT_TYPE_SE1,
274      AC_SPM_SEGMENT_TYPE_SE2,
275      AC_SPM_SEGMENT_TYPE_SE3,
276   };
277
278   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
279      if (!spm_trace->muxsel_lines[s])
280         continue;
281
282      uint32_t segment_offset = 0;
283      for (unsigned i = 0; s != ordered_segment[i]; i++) {
284         segment_offset += spm_trace->num_muxsel_lines[ordered_segment[i]] *
285                           AC_SPM_NUM_COUNTER_PER_MUXSEL;
286      }
287
288      uint32_t even_counter_idx = 0, even_line_idx = 0;
289      uint32_t odd_counter_idx = 0, odd_line_idx = 1;
290
291      /* Add the global timestamps first. */
292      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
293         struct ac_spm_muxsel global_timestamp_muxsel = {
294            .counter = 0x30,
295            .block = 0x3,
296            .shader_array = 0,
297            .instance = 0x1e,
298         };
299
300         for (unsigned i = 0; i < 4; i++) {
301            spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel;
302         }
303      }
304
305      for (unsigned i = 0; i < spm_trace->num_counters; i++) {
306         struct ac_spm_counter_info *counter = &spm_trace->counters[i];
307
308         if (counter->segment_type != s)
309            continue;
310
311         if (counter->is_even) {
312            counter->offset = segment_offset + even_line_idx *
313                              AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx;
314
315            spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm_trace->counters[i].muxsel;
316            if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
317               even_counter_idx = 0;
318               even_line_idx += 2;
319            }
320         } else {
321            counter->offset = segment_offset + odd_line_idx *
322                              AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx;
323
324            spm_trace->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm_trace->counters[i].muxsel;
325            if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
326               odd_counter_idx = 0;
327               odd_line_idx += 2;
328            }
329         }
330      }
331   }
332
333   return true;
334}
335
336void ac_destroy_spm(struct ac_spm_trace_data *spm_trace)
337{
338   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
339      FREE(spm_trace->muxsel_lines[s]);
340   }
341   FREE(spm_trace->block_sel);
342   FREE(spm_trace->counters);
343}
344
345uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace)
346{
347   uint32_t sample_size = 0; /* in bytes */
348
349   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
350      sample_size += spm_trace->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
351   }
352
353   return sample_size;
354}
355
356uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace)
357{
358   uint32_t sample_size = ac_spm_get_sample_size(spm_trace);
359   uint32_t *ptr = (uint32_t *)spm_trace->ptr;
360   uint32_t data_size, num_lines_written;
361   uint32_t num_samples = 0;
362
363   /* Get the data size (in bytes) written by the hw to the ring buffer. */
364   data_size = ptr[0];
365
366   /* Compute the number of 256 bits (16 * 16-bits counters) lines written. */
367   num_lines_written = data_size / (2 * AC_SPM_NUM_COUNTER_PER_MUXSEL);
368
369   /* Check for overflow. */
370   if (num_lines_written % (sample_size / 32)) {
371      abort();
372   } else {
373      num_samples = num_lines_written / (sample_size / 32);
374   }
375
376   return num_samples;
377}
378