1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "intel_perf.h"
25#include "intel_perf_mdapi.h"
26#include "intel_perf_private.h"
27#include "intel_perf_regs.h"
28
29#include "dev/intel_device_info.h"
30
31#include <drm-uapi/i915_drm.h>
32
33
34int
35intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
36                                    const struct intel_device_info *devinfo,
37                                    const struct intel_perf_query_info *query,
38                                    const struct intel_perf_query_result *result)
39{
40   switch (devinfo->ver) {
41   case 7: {
42      struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;
43
44      if (data_size < sizeof(*mdapi_data))
45         return 0;
46
47      assert(devinfo->platform == INTEL_PLATFORM_HSW);
48
49      for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
50         mdapi_data->ACounters[i] = result->accumulator[1 + i];
51
52      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
53         mdapi_data->NOACounters[i] =
54            result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
55      }
56
57      mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
58      mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
59
60      mdapi_data->ReportsCount = result->reports_accumulated;
61      mdapi_data->TotalTime =
62         intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
63      mdapi_data->CoreFrequency = result->gt_frequency[1];
64      mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
65      mdapi_data->SplitOccured = result->query_disjoint;
66      return sizeof(*mdapi_data);
67   }
68   case 8: {
69      struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;
70
71      if (data_size < sizeof(*mdapi_data))
72         return 0;
73
74      for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
75         mdapi_data->OaCntr[i] = result->accumulator[2 + i];
76      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
77         mdapi_data->NoaCntr[i] =
78            result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
79      }
80
81      mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
82      mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
83
84      mdapi_data->ReportId = result->hw_id;
85      mdapi_data->ReportsCount = result->reports_accumulated;
86      mdapi_data->TotalTime =
87         intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
88      mdapi_data->BeginTimestamp =
89         intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
90      mdapi_data->GPUTicks = result->accumulator[1];
91      mdapi_data->CoreFrequency = result->gt_frequency[1];
92      mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
93      mdapi_data->SliceFrequency =
94         (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
95      mdapi_data->UnsliceFrequency =
96         (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
97      mdapi_data->SplitOccured = result->query_disjoint;
98      return sizeof(*mdapi_data);
99   }
100   case 9:
101   case 11:
102   case 12:{
103      struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;
104
105      if (data_size < sizeof(*mdapi_data))
106         return 0;
107
108      for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
109         mdapi_data->OaCntr[i] = result->accumulator[2 + i];
110      for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
111         mdapi_data->NoaCntr[i] =
112            result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
113      }
114
115      mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
116      mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
117
118      mdapi_data->ReportId = result->hw_id;
119      mdapi_data->ReportsCount = result->reports_accumulated;
120      mdapi_data->TotalTime =
121         intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
122      mdapi_data->BeginTimestamp =
123         intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
124      mdapi_data->GPUTicks = result->accumulator[1];
125      mdapi_data->CoreFrequency = result->gt_frequency[1];
126      mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
127      mdapi_data->SliceFrequency =
128         (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
129      mdapi_data->UnsliceFrequency =
130         (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
131      mdapi_data->SplitOccured = result->query_disjoint;
132      return sizeof(*mdapi_data);
133   }
134   default:
135      unreachable("unexpected gen");
136   }
137}
138
139void
140intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
141                                          const struct intel_device_info *devinfo)
142{
143   if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
144      return;
145
146   struct intel_perf_query_info *query =
147      intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
148
149   query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
150   query->name = "Intel_Raw_Pipeline_Statistics_Query";
151
152   /* The order has to match mdapi_pipeline_metrics. */
153   intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
154                                     "N vertices submitted");
155   intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
156                                     "N primitives submitted");
157   intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
158                                     "N vertex shader invocations");
159   intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
160                                     "N geometry shader invocations");
161   intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
162                                     "N geometry shader primitives emitted");
163   intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
164                                     "N primitives entering clipping");
165   intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
166                                     "N primitives leaving clipping");
167   if (devinfo->verx10 == 75 || devinfo->ver == 8) {
168      intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
169                                  "N fragment shader invocations",
170                                  "N fragment shader invocations");
171   } else {
172      intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
173                                        "N fragment shader invocations");
174   }
175   intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
176                                     "N TCS shader invocations");
177   intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
178                                     "N TES shader invocations");
179   if (devinfo->ver >= 7) {
180      intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
181                                        "N compute shader invocations");
182   }
183
184   if (devinfo->ver >= 10) {
185      /* Reuse existing CS invocation register until we can expose this new
186       * one.
187       */
188      intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
189                                        "Reserved1");
190   }
191
192   query->data_size = sizeof(uint64_t) * query->n_counters;
193}
194
195static void
196fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,
197                              const char *name,
198                              uint32_t data_offset,
199                              uint32_t data_size,
200                              enum intel_perf_counter_data_type data_type)
201{
202   struct intel_perf_query_counter *counter = &query->counters[query->n_counters];
203
204   assert(query->n_counters <= query->max_counters);
205
206   counter->name = name;
207   counter->desc = "Raw counter value";
208   counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
209   counter->data_type = data_type;
210   counter->offset = data_offset;
211
212   query->n_counters++;
213
214   assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);
215}
216
217#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
218   fill_mdapi_perf_query_counter(query, #field_name,                    \
219                                 (uint8_t *) &struct_name.field_name -  \
220                                 (uint8_t *) &struct_name,              \
221                                 sizeof(struct_name.field_name),        \
222                                 INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
223#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
224   fill_mdapi_perf_query_counter(query,                                 \
225                                 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
226                                 (uint8_t *) &struct_name.field_name[idx] - \
227                                 (uint8_t *) &struct_name,              \
228                                 sizeof(struct_name.field_name[0]),     \
229                                 INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
230
231void
232intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
233                                   const struct intel_device_info *devinfo)
234{
235   struct intel_perf_query_info *query = NULL;
236
237   /* MDAPI requires different structures for pretty much every generation
238    * (right now we have definitions for gen 7 to 12).
239    */
240   if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
241      return;
242
243   switch (devinfo->ver) {
244   case 7: {
245      query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
246      query->oa_format = I915_OA_FORMAT_A45_B8_C8;
247
248      struct gfx7_mdapi_metrics metric_data;
249      query->data_size = sizeof(metric_data);
250
251      MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
252      for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
253         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
254                                       metric_data, ACounters, i, UINT64);
255      }
256      for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
257         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
258                                       metric_data, NOACounters, i, UINT64);
259      }
260      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
261      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
262      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
263      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
264      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
265      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
266      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
267      break;
268   }
269   case 8: {
270      query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
271      query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
272
273      struct gfx8_mdapi_metrics metric_data;
274      query->data_size = sizeof(metric_data);
275
276      MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
277      MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
278      for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
279         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
280                                       metric_data, OaCntr, i, UINT64);
281      }
282      for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
283         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
284                                       metric_data, NoaCntr, i, UINT64);
285      }
286      MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
287      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
288      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
289      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
290      MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
291      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
292      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
293      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
294      MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
295      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
296      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
297      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
298      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
299      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
300      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
301      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
302      break;
303   }
304   case 9:
305   case 11:
306   case 12: {
307      query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
308      query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
309
310      struct gfx9_mdapi_metrics metric_data;
311      query->data_size = sizeof(metric_data);
312
313      MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
314      MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
315      for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
316         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
317                                       metric_data, OaCntr, i, UINT64);
318      }
319      for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
320         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
321                                       metric_data, NoaCntr, i, UINT64);
322      }
323      MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
324      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
325      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
326      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
327      MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
328      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
329      MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
330      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
331      MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
332      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
333      MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
334      MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
335      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
336      MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
337      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
338      MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
339      for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
340         MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
341                                       metric_data, UserCntr, i, UINT64);
342      }
343      MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
344      MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
345      break;
346   }
347   default:
348      unreachable("Unsupported gen");
349      break;
350   }
351
352   query->kind = INTEL_PERF_QUERY_TYPE_RAW;
353   query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
354   query->guid = INTEL_PERF_QUERY_GUID_MDAPI;
355
356   {
357      /* Accumulation buffer offsets copied from an actual query... */
358      const struct intel_perf_query_info *copy_query =
359         &perf->queries[0];
360
361      query->gpu_time_offset = copy_query->gpu_time_offset;
362      query->gpu_clock_offset = copy_query->gpu_clock_offset;
363      query->a_offset = copy_query->a_offset;
364      query->b_offset = copy_query->b_offset;
365      query->c_offset = copy_query->c_offset;
366      query->perfcnt_offset = copy_query->perfcnt_offset;
367   }
368}
369