1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include <dirent.h>
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include <sys/types.h>
27bf215546Sopenharmony_ci#include <sys/stat.h>
28bf215546Sopenharmony_ci#include <fcntl.h>
29bf215546Sopenharmony_ci#include <unistd.h>
30bf215546Sopenharmony_ci#include <errno.h>
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#ifndef HAVE_DIRENT_D_TYPE
33bf215546Sopenharmony_ci#include <limits.h> // PATH_MAX
34bf215546Sopenharmony_ci#endif
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include <drm-uapi/i915_drm.h>
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#include "common/intel_gem.h"
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci#include "dev/intel_debug.h"
41bf215546Sopenharmony_ci#include "dev/intel_device_info.h"
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci#include "perf/intel_perf.h"
44bf215546Sopenharmony_ci#include "perf/intel_perf_regs.h"
45bf215546Sopenharmony_ci#include "perf/intel_perf_mdapi.h"
46bf215546Sopenharmony_ci#include "perf/intel_perf_metrics.h"
47bf215546Sopenharmony_ci#include "perf/intel_perf_private.h"
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci#include "util/bitscan.h"
50bf215546Sopenharmony_ci#include "util/macros.h"
51bf215546Sopenharmony_ci#include "util/mesa-sha1.h"
52bf215546Sopenharmony_ci#include "util/u_math.h"
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci#define FILE_DEBUG_FLAG DEBUG_PERFMON
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_cistatic bool
57bf215546Sopenharmony_ciis_dir_or_link(const struct dirent *entry, const char *parent_dir)
58bf215546Sopenharmony_ci{
59bf215546Sopenharmony_ci#ifdef HAVE_DIRENT_D_TYPE
60bf215546Sopenharmony_ci   return entry->d_type == DT_DIR || entry->d_type == DT_LNK;
61bf215546Sopenharmony_ci#else
62bf215546Sopenharmony_ci   struct stat st;
63bf215546Sopenharmony_ci   char path[PATH_MAX + 1];
64bf215546Sopenharmony_ci   snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name);
65bf215546Sopenharmony_ci   lstat(path, &st);
66bf215546Sopenharmony_ci   return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode);
67bf215546Sopenharmony_ci#endif
68bf215546Sopenharmony_ci}
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_cistatic bool
71bf215546Sopenharmony_ciget_sysfs_dev_dir(struct intel_perf_config *perf, int fd)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   struct stat sb;
74bf215546Sopenharmony_ci   int min, maj;
75bf215546Sopenharmony_ci   DIR *drmdir;
76bf215546Sopenharmony_ci   struct dirent *drm_entry;
77bf215546Sopenharmony_ci   int len;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   perf->sysfs_dev_dir[0] = '\0';
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci   if (INTEL_DEBUG(DEBUG_NO_OACONFIG))
82bf215546Sopenharmony_ci      return true;
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   if (fstat(fd, &sb)) {
85bf215546Sopenharmony_ci      DBG("Failed to stat DRM fd\n");
86bf215546Sopenharmony_ci      return false;
87bf215546Sopenharmony_ci   }
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   maj = major(sb.st_rdev);
90bf215546Sopenharmony_ci   min = minor(sb.st_rdev);
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci   if (!S_ISCHR(sb.st_mode)) {
93bf215546Sopenharmony_ci      DBG("DRM fd is not a character device as expected\n");
94bf215546Sopenharmony_ci      return false;
95bf215546Sopenharmony_ci   }
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci   len = snprintf(perf->sysfs_dev_dir,
98bf215546Sopenharmony_ci                  sizeof(perf->sysfs_dev_dir),
99bf215546Sopenharmony_ci                  "/sys/dev/char/%d:%d/device/drm", maj, min);
100bf215546Sopenharmony_ci   if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) {
101bf215546Sopenharmony_ci      DBG("Failed to concatenate sysfs path to drm device\n");
102bf215546Sopenharmony_ci      return false;
103bf215546Sopenharmony_ci   }
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   drmdir = opendir(perf->sysfs_dev_dir);
106bf215546Sopenharmony_ci   if (!drmdir) {
107bf215546Sopenharmony_ci      DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir);
108bf215546Sopenharmony_ci      return false;
109bf215546Sopenharmony_ci   }
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   while ((drm_entry = readdir(drmdir))) {
112bf215546Sopenharmony_ci      if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) &&
113bf215546Sopenharmony_ci          strncmp(drm_entry->d_name, "card", 4) == 0)
114bf215546Sopenharmony_ci      {
115bf215546Sopenharmony_ci         len = snprintf(perf->sysfs_dev_dir,
116bf215546Sopenharmony_ci                        sizeof(perf->sysfs_dev_dir),
117bf215546Sopenharmony_ci                        "/sys/dev/char/%d:%d/device/drm/%s",
118bf215546Sopenharmony_ci                        maj, min, drm_entry->d_name);
119bf215546Sopenharmony_ci         closedir(drmdir);
120bf215546Sopenharmony_ci         if (len < 0 || len >= sizeof(perf->sysfs_dev_dir))
121bf215546Sopenharmony_ci            return false;
122bf215546Sopenharmony_ci         else
123bf215546Sopenharmony_ci            return true;
124bf215546Sopenharmony_ci      }
125bf215546Sopenharmony_ci   }
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   closedir(drmdir);
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci   DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n",
130bf215546Sopenharmony_ci       maj, min);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   return false;
133bf215546Sopenharmony_ci}
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_cistatic bool
136bf215546Sopenharmony_ciread_file_uint64(const char *file, uint64_t *val)
137bf215546Sopenharmony_ci{
138bf215546Sopenharmony_ci    char buf[32];
139bf215546Sopenharmony_ci    int fd, n;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci    fd = open(file, 0);
142bf215546Sopenharmony_ci    if (fd < 0)
143bf215546Sopenharmony_ci       return false;
144bf215546Sopenharmony_ci    while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 &&
145bf215546Sopenharmony_ci           errno == EINTR);
146bf215546Sopenharmony_ci    close(fd);
147bf215546Sopenharmony_ci    if (n < 0)
148bf215546Sopenharmony_ci       return false;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci    buf[n] = '\0';
151bf215546Sopenharmony_ci    *val = strtoull(buf, NULL, 0);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci    return true;
154bf215546Sopenharmony_ci}
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_cistatic bool
157bf215546Sopenharmony_ciread_sysfs_drm_device_file_uint64(struct intel_perf_config *perf,
158bf215546Sopenharmony_ci                                  const char *file,
159bf215546Sopenharmony_ci                                  uint64_t *value)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci   char buf[512];
162bf215546Sopenharmony_ci   int len;
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file);
165bf215546Sopenharmony_ci   if (len < 0 || len >= sizeof(buf)) {
166bf215546Sopenharmony_ci      DBG("Failed to concatenate sys filename to read u64 from\n");
167bf215546Sopenharmony_ci      return false;
168bf215546Sopenharmony_ci   }
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   return read_file_uint64(buf, value);
171bf215546Sopenharmony_ci}
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_cistatic void
174bf215546Sopenharmony_ciregister_oa_config(struct intel_perf_config *perf,
175bf215546Sopenharmony_ci                   const struct intel_device_info *devinfo,
176bf215546Sopenharmony_ci                   const struct intel_perf_query_info *query,
177bf215546Sopenharmony_ci                   uint64_t config_id)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci   struct intel_perf_query_info *registered_query =
180bf215546Sopenharmony_ci      intel_perf_append_query_info(perf, 0);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   *registered_query = *query;
183bf215546Sopenharmony_ci   registered_query->oa_metrics_set_id = config_id;
184bf215546Sopenharmony_ci   DBG("metric set registered: id = %" PRIu64", guid = %s\n",
185bf215546Sopenharmony_ci       registered_query->oa_metrics_set_id, query->guid);
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cistatic void
189bf215546Sopenharmony_cienumerate_sysfs_metrics(struct intel_perf_config *perf,
190bf215546Sopenharmony_ci                        const struct intel_device_info *devinfo)
191bf215546Sopenharmony_ci{
192bf215546Sopenharmony_ci   DIR *metricsdir = NULL;
193bf215546Sopenharmony_ci   struct dirent *metric_entry;
194bf215546Sopenharmony_ci   char buf[256];
195bf215546Sopenharmony_ci   int len;
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir);
198bf215546Sopenharmony_ci   if (len < 0 || len >= sizeof(buf)) {
199bf215546Sopenharmony_ci      DBG("Failed to concatenate path to sysfs metrics/ directory\n");
200bf215546Sopenharmony_ci      return;
201bf215546Sopenharmony_ci   }
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   metricsdir = opendir(buf);
204bf215546Sopenharmony_ci   if (!metricsdir) {
205bf215546Sopenharmony_ci      DBG("Failed to open %s: %m\n", buf);
206bf215546Sopenharmony_ci      return;
207bf215546Sopenharmony_ci   }
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   while ((metric_entry = readdir(metricsdir))) {
210bf215546Sopenharmony_ci      struct hash_entry *entry;
211bf215546Sopenharmony_ci      if (!is_dir_or_link(metric_entry, buf) ||
212bf215546Sopenharmony_ci          metric_entry->d_name[0] == '.')
213bf215546Sopenharmony_ci         continue;
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci      DBG("metric set: %s\n", metric_entry->d_name);
216bf215546Sopenharmony_ci      entry = _mesa_hash_table_search(perf->oa_metrics_table,
217bf215546Sopenharmony_ci                                      metric_entry->d_name);
218bf215546Sopenharmony_ci      if (entry) {
219bf215546Sopenharmony_ci         uint64_t id;
220bf215546Sopenharmony_ci         if (!intel_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
221bf215546Sopenharmony_ci            DBG("Failed to read metric set id from %s: %m", buf);
222bf215546Sopenharmony_ci            continue;
223bf215546Sopenharmony_ci         }
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci         register_oa_config(perf, devinfo,
226bf215546Sopenharmony_ci                            (const struct intel_perf_query_info *)entry->data, id);
227bf215546Sopenharmony_ci      } else
228bf215546Sopenharmony_ci         DBG("metric set not known by mesa (skipping)\n");
229bf215546Sopenharmony_ci   }
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   closedir(metricsdir);
232bf215546Sopenharmony_ci}
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_cistatic void
235bf215546Sopenharmony_ciadd_all_metrics(struct intel_perf_config *perf,
236bf215546Sopenharmony_ci                const struct intel_device_info *devinfo)
237bf215546Sopenharmony_ci{
238bf215546Sopenharmony_ci   hash_table_foreach(perf->oa_metrics_table, entry) {
239bf215546Sopenharmony_ci      const struct intel_perf_query_info *query = entry->data;
240bf215546Sopenharmony_ci      register_oa_config(perf, devinfo, query, 0);
241bf215546Sopenharmony_ci   }
242bf215546Sopenharmony_ci}
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_cistatic bool
245bf215546Sopenharmony_cikernel_has_dynamic_config_support(struct intel_perf_config *perf, int fd)
246bf215546Sopenharmony_ci{
247bf215546Sopenharmony_ci   uint64_t invalid_config_id = UINT64_MAX;
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci   return intel_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
250bf215546Sopenharmony_ci                    &invalid_config_id) < 0 && errno == ENOENT;
251bf215546Sopenharmony_ci}
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_cistatic bool
254bf215546Sopenharmony_cii915_query_perf_config_supported(struct intel_perf_config *perf, int fd)
255bf215546Sopenharmony_ci{
256bf215546Sopenharmony_ci   int32_t length = 0;
257bf215546Sopenharmony_ci   return !intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG,
258bf215546Sopenharmony_ci                                  DRM_I915_QUERY_PERF_CONFIG_LIST,
259bf215546Sopenharmony_ci                                  NULL, &length);
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_cistatic bool
263bf215546Sopenharmony_cii915_query_perf_config_data(struct intel_perf_config *perf,
264bf215546Sopenharmony_ci                            int fd, const char *guid,
265bf215546Sopenharmony_ci                            struct drm_i915_perf_oa_config *config)
266bf215546Sopenharmony_ci{
267bf215546Sopenharmony_ci   char data[sizeof(struct drm_i915_query_perf_config) +
268bf215546Sopenharmony_ci             sizeof(struct drm_i915_perf_oa_config)] = {};
269bf215546Sopenharmony_ci   struct drm_i915_query_perf_config *query = (void *)data;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   memcpy(query->uuid, guid, sizeof(query->uuid));
272bf215546Sopenharmony_ci   memcpy(query->data, config, sizeof(*config));
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   int32_t item_length = sizeof(data);
275bf215546Sopenharmony_ci   if (intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG,
276bf215546Sopenharmony_ci                              DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
277bf215546Sopenharmony_ci                              query, &item_length))
278bf215546Sopenharmony_ci      return false;
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci   memcpy(config, query->data, sizeof(*config));
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci   return true;
283bf215546Sopenharmony_ci}
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_cibool
286bf215546Sopenharmony_ciintel_perf_load_metric_id(struct intel_perf_config *perf_cfg,
287bf215546Sopenharmony_ci                          const char *guid,
288bf215546Sopenharmony_ci                          uint64_t *metric_id)
289bf215546Sopenharmony_ci{
290bf215546Sopenharmony_ci   char config_path[280];
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci   snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
293bf215546Sopenharmony_ci            perf_cfg->sysfs_dev_dir, guid);
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   /* Don't recreate already loaded configs. */
296bf215546Sopenharmony_ci   return read_file_uint64(config_path, metric_id);
297bf215546Sopenharmony_ci}
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_cistatic uint64_t
300bf215546Sopenharmony_cii915_add_config(struct intel_perf_config *perf, int fd,
301bf215546Sopenharmony_ci                const struct intel_perf_registers *config,
302bf215546Sopenharmony_ci                const char *guid)
303bf215546Sopenharmony_ci{
304bf215546Sopenharmony_ci   struct drm_i915_perf_oa_config i915_config = { 0, };
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci   memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci   i915_config.n_mux_regs = config->n_mux_regs;
309bf215546Sopenharmony_ci   i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   i915_config.n_boolean_regs = config->n_b_counter_regs;
312bf215546Sopenharmony_ci   i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   i915_config.n_flex_regs = config->n_flex_regs;
315bf215546Sopenharmony_ci   i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci   int ret = intel_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
318bf215546Sopenharmony_ci   return ret > 0 ? ret : 0;
319bf215546Sopenharmony_ci}
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_cistatic void
322bf215546Sopenharmony_ciinit_oa_configs(struct intel_perf_config *perf, int fd,
323bf215546Sopenharmony_ci                const struct intel_device_info *devinfo)
324bf215546Sopenharmony_ci{
325bf215546Sopenharmony_ci   hash_table_foreach(perf->oa_metrics_table, entry) {
326bf215546Sopenharmony_ci      const struct intel_perf_query_info *query = entry->data;
327bf215546Sopenharmony_ci      uint64_t config_id;
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci      if (intel_perf_load_metric_id(perf, query->guid, &config_id)) {
330bf215546Sopenharmony_ci         DBG("metric set: %s (already loaded)\n", query->guid);
331bf215546Sopenharmony_ci         register_oa_config(perf, devinfo, query, config_id);
332bf215546Sopenharmony_ci         continue;
333bf215546Sopenharmony_ci      }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci      int ret = i915_add_config(perf, fd, &query->config, query->guid);
336bf215546Sopenharmony_ci      if (ret < 0) {
337bf215546Sopenharmony_ci         DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
338bf215546Sopenharmony_ci             query->name, query->guid, strerror(errno));
339bf215546Sopenharmony_ci         continue;
340bf215546Sopenharmony_ci      }
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci      register_oa_config(perf, devinfo, query, ret);
343bf215546Sopenharmony_ci      DBG("metric set: %s (added)\n", query->guid);
344bf215546Sopenharmony_ci   }
345bf215546Sopenharmony_ci}
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_cistatic void
348bf215546Sopenharmony_cicompute_topology_builtins(struct intel_perf_config *perf)
349bf215546Sopenharmony_ci{
350bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &perf->devinfo;
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci   perf->sys_vars.slice_mask = devinfo->slice_masks;
353bf215546Sopenharmony_ci   perf->sys_vars.n_eu_slices = devinfo->num_slices;
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci   perf->sys_vars.n_eu_slice0123 = 0;
356bf215546Sopenharmony_ci   for (int s = 0; s < MIN2(4, devinfo->max_slices); s++) {
357bf215546Sopenharmony_ci      if (!intel_device_info_slice_available(devinfo, s))
358bf215546Sopenharmony_ci         continue;
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci      for (int ss = 0; ss < devinfo->max_subslices_per_slice; ss++) {
361bf215546Sopenharmony_ci         if (!intel_device_info_subslice_available(devinfo, s, ss))
362bf215546Sopenharmony_ci            continue;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci         for (int eu = 0; eu < devinfo->max_eus_per_subslice; eu++) {
365bf215546Sopenharmony_ci            if (intel_device_info_eu_available(devinfo, s, ss, eu))
366bf215546Sopenharmony_ci               perf->sys_vars.n_eu_slice0123++;
367bf215546Sopenharmony_ci         }
368bf215546Sopenharmony_ci      }
369bf215546Sopenharmony_ci   }
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) {
372bf215546Sopenharmony_ci      perf->sys_vars.n_eu_sub_slices +=
373bf215546Sopenharmony_ci         util_bitcount(devinfo->subslice_masks[i]);
374bf215546Sopenharmony_ci   }
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
377bf215546Sopenharmony_ci      perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]);
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   /* The subslice mask builtin contains bits for all slices. Prior to Gfx11
380bf215546Sopenharmony_ci    * it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for
381bf215546Sopenharmony_ci    * each slice.
382bf215546Sopenharmony_ci    *
383bf215546Sopenharmony_ci    * Ideally equations would be updated to have a slice/subslice query
384bf215546Sopenharmony_ci    * function/operator.
385bf215546Sopenharmony_ci    */
386bf215546Sopenharmony_ci   perf->sys_vars.subslice_mask = 0;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   int bits_per_subslice = devinfo->ver >= 11 ? 8 : 3;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) {
391bf215546Sopenharmony_ci      for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) {
392bf215546Sopenharmony_ci         if (intel_device_info_subslice_available(devinfo, s, ss))
393bf215546Sopenharmony_ci            perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss);
394bf215546Sopenharmony_ci      }
395bf215546Sopenharmony_ci   }
396bf215546Sopenharmony_ci}
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_cistatic bool
399bf215546Sopenharmony_ciinit_oa_sys_vars(struct intel_perf_config *perf,
400bf215546Sopenharmony_ci                 bool use_register_snapshots)
401bf215546Sopenharmony_ci{
402bf215546Sopenharmony_ci   uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
405bf215546Sopenharmony_ci      if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
406bf215546Sopenharmony_ci         return false;
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci      if (!read_sysfs_drm_device_file_uint64(perf,  "gt_max_freq_mhz", &max_freq_mhz))
409bf215546Sopenharmony_ci         return false;
410bf215546Sopenharmony_ci   } else {
411bf215546Sopenharmony_ci      min_freq_mhz = 300;
412bf215546Sopenharmony_ci      max_freq_mhz = 1000;
413bf215546Sopenharmony_ci   }
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
416bf215546Sopenharmony_ci   perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
417bf215546Sopenharmony_ci   perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
418bf215546Sopenharmony_ci   perf->sys_vars.query_mode = use_register_snapshots;
419bf215546Sopenharmony_ci   compute_topology_builtins(perf);
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci   return true;
422bf215546Sopenharmony_ci}
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_citypedef void (*perf_register_oa_queries_t)(struct intel_perf_config *);
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_cistatic perf_register_oa_queries_t
427bf215546Sopenharmony_ciget_register_queries_function(const struct intel_device_info *devinfo)
428bf215546Sopenharmony_ci{
429bf215546Sopenharmony_ci   switch (devinfo->platform) {
430bf215546Sopenharmony_ci   case INTEL_PLATFORM_HSW:
431bf215546Sopenharmony_ci      return intel_oa_register_queries_hsw;
432bf215546Sopenharmony_ci   case INTEL_PLATFORM_CHV:
433bf215546Sopenharmony_ci      return intel_oa_register_queries_chv;
434bf215546Sopenharmony_ci   case INTEL_PLATFORM_BDW:
435bf215546Sopenharmony_ci      return intel_oa_register_queries_bdw;
436bf215546Sopenharmony_ci   case INTEL_PLATFORM_BXT:
437bf215546Sopenharmony_ci      return intel_oa_register_queries_bxt;
438bf215546Sopenharmony_ci   case INTEL_PLATFORM_SKL:
439bf215546Sopenharmony_ci      if (devinfo->gt == 2)
440bf215546Sopenharmony_ci         return intel_oa_register_queries_sklgt2;
441bf215546Sopenharmony_ci      if (devinfo->gt == 3)
442bf215546Sopenharmony_ci         return intel_oa_register_queries_sklgt3;
443bf215546Sopenharmony_ci      if (devinfo->gt == 4)
444bf215546Sopenharmony_ci         return intel_oa_register_queries_sklgt4;
445bf215546Sopenharmony_ci      return NULL;
446bf215546Sopenharmony_ci   case INTEL_PLATFORM_KBL:
447bf215546Sopenharmony_ci      if (devinfo->gt == 2)
448bf215546Sopenharmony_ci         return intel_oa_register_queries_kblgt2;
449bf215546Sopenharmony_ci      if (devinfo->gt == 3)
450bf215546Sopenharmony_ci         return intel_oa_register_queries_kblgt3;
451bf215546Sopenharmony_ci      return NULL;
452bf215546Sopenharmony_ci   case INTEL_PLATFORM_GLK:
453bf215546Sopenharmony_ci      return intel_oa_register_queries_glk;
454bf215546Sopenharmony_ci   case INTEL_PLATFORM_CFL:
455bf215546Sopenharmony_ci      if (devinfo->gt == 2)
456bf215546Sopenharmony_ci         return intel_oa_register_queries_cflgt2;
457bf215546Sopenharmony_ci      if (devinfo->gt == 3)
458bf215546Sopenharmony_ci         return intel_oa_register_queries_cflgt3;
459bf215546Sopenharmony_ci      return NULL;
460bf215546Sopenharmony_ci   case INTEL_PLATFORM_ICL:
461bf215546Sopenharmony_ci      return intel_oa_register_queries_icl;
462bf215546Sopenharmony_ci   case INTEL_PLATFORM_EHL:
463bf215546Sopenharmony_ci      return intel_oa_register_queries_ehl;
464bf215546Sopenharmony_ci   case INTEL_PLATFORM_TGL:
465bf215546Sopenharmony_ci      if (devinfo->gt == 1)
466bf215546Sopenharmony_ci         return intel_oa_register_queries_tglgt1;
467bf215546Sopenharmony_ci      if (devinfo->gt == 2)
468bf215546Sopenharmony_ci         return intel_oa_register_queries_tglgt2;
469bf215546Sopenharmony_ci      return NULL;
470bf215546Sopenharmony_ci   case INTEL_PLATFORM_RKL:
471bf215546Sopenharmony_ci      return intel_oa_register_queries_rkl;
472bf215546Sopenharmony_ci   case INTEL_PLATFORM_DG1:
473bf215546Sopenharmony_ci      return intel_oa_register_queries_dg1;
474bf215546Sopenharmony_ci   case INTEL_PLATFORM_ADL:
475bf215546Sopenharmony_ci      return intel_oa_register_queries_adl;
476bf215546Sopenharmony_ci   default:
477bf215546Sopenharmony_ci      return NULL;
478bf215546Sopenharmony_ci   }
479bf215546Sopenharmony_ci}
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_cistatic int
482bf215546Sopenharmony_ciintel_perf_compare_counter_names(const void *v1, const void *v2)
483bf215546Sopenharmony_ci{
484bf215546Sopenharmony_ci   const struct intel_perf_query_counter *c1 = v1;
485bf215546Sopenharmony_ci   const struct intel_perf_query_counter *c2 = v2;
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   return strcmp(c1->name, c2->name);
488bf215546Sopenharmony_ci}
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_cistatic void
491bf215546Sopenharmony_cisort_query(struct intel_perf_query_info *q)
492bf215546Sopenharmony_ci{
493bf215546Sopenharmony_ci   qsort(q->counters, q->n_counters, sizeof(q->counters[0]),
494bf215546Sopenharmony_ci         intel_perf_compare_counter_names);
495bf215546Sopenharmony_ci}
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_cistatic void
498bf215546Sopenharmony_ciload_pipeline_statistic_metrics(struct intel_perf_config *perf_cfg,
499bf215546Sopenharmony_ci                                const struct intel_device_info *devinfo)
500bf215546Sopenharmony_ci{
501bf215546Sopenharmony_ci   struct intel_perf_query_info *query =
502bf215546Sopenharmony_ci      intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci   query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
505bf215546Sopenharmony_ci   query->name = "Pipeline Statistics Registers";
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
508bf215546Sopenharmony_ci                                       "N vertices submitted");
509bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
510bf215546Sopenharmony_ci                                       "N primitives submitted");
511bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
512bf215546Sopenharmony_ci                                       "N vertex shader invocations");
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   if (devinfo->ver == 6) {
515bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX6_SO_PRIM_STORAGE_NEEDED, 1, 1,
516bf215546Sopenharmony_ci                                    "SO_PRIM_STORAGE_NEEDED",
517bf215546Sopenharmony_ci                                    "N geometry shader stream-out primitives (total)");
518bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX6_SO_NUM_PRIMS_WRITTEN, 1, 1,
519bf215546Sopenharmony_ci                                    "SO_NUM_PRIMS_WRITTEN",
520bf215546Sopenharmony_ci                                    "N geometry shader stream-out primitives (written)");
521bf215546Sopenharmony_ci   } else {
522bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
523bf215546Sopenharmony_ci                                    "SO_PRIM_STORAGE_NEEDED (Stream 0)",
524bf215546Sopenharmony_ci                                    "N stream-out (stream 0) primitives (total)");
525bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
526bf215546Sopenharmony_ci                                    "SO_PRIM_STORAGE_NEEDED (Stream 1)",
527bf215546Sopenharmony_ci                                    "N stream-out (stream 1) primitives (total)");
528bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
529bf215546Sopenharmony_ci                                    "SO_PRIM_STORAGE_NEEDED (Stream 2)",
530bf215546Sopenharmony_ci                                    "N stream-out (stream 2) primitives (total)");
531bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
532bf215546Sopenharmony_ci                                    "SO_PRIM_STORAGE_NEEDED (Stream 3)",
533bf215546Sopenharmony_ci                                    "N stream-out (stream 3) primitives (total)");
534bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
535bf215546Sopenharmony_ci                                    "SO_NUM_PRIMS_WRITTEN (Stream 0)",
536bf215546Sopenharmony_ci                                    "N stream-out (stream 0) primitives (written)");
537bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
538bf215546Sopenharmony_ci                                    "SO_NUM_PRIMS_WRITTEN (Stream 1)",
539bf215546Sopenharmony_ci                                    "N stream-out (stream 1) primitives (written)");
540bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
541bf215546Sopenharmony_ci                                    "SO_NUM_PRIMS_WRITTEN (Stream 2)",
542bf215546Sopenharmony_ci                                    "N stream-out (stream 2) primitives (written)");
543bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
544bf215546Sopenharmony_ci                                    "SO_NUM_PRIMS_WRITTEN (Stream 3)",
545bf215546Sopenharmony_ci                                    "N stream-out (stream 3) primitives (written)");
546bf215546Sopenharmony_ci   }
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
549bf215546Sopenharmony_ci                                       "N TCS shader invocations");
550bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
551bf215546Sopenharmony_ci                                       "N TES shader invocations");
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
554bf215546Sopenharmony_ci                                       "N geometry shader invocations");
555bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
556bf215546Sopenharmony_ci                                       "N geometry shader primitives emitted");
557bf215546Sopenharmony_ci
558bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
559bf215546Sopenharmony_ci                                       "N primitives entering clipping");
560bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
561bf215546Sopenharmony_ci                                       "N primitives leaving clipping");
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   if (devinfo->verx10 == 75 || devinfo->ver == 8) {
564bf215546Sopenharmony_ci      intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
565bf215546Sopenharmony_ci                                    "N fragment shader invocations",
566bf215546Sopenharmony_ci                                    "N fragment shader invocations");
567bf215546Sopenharmony_ci   } else {
568bf215546Sopenharmony_ci      intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
569bf215546Sopenharmony_ci                                          "N fragment shader invocations");
570bf215546Sopenharmony_ci   }
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci   intel_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
573bf215546Sopenharmony_ci                                       "N z-pass fragments");
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_ci   if (devinfo->ver >= 7) {
576bf215546Sopenharmony_ci      intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
577bf215546Sopenharmony_ci                                          "N compute shader invocations");
578bf215546Sopenharmony_ci   }
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci   query->data_size = sizeof(uint64_t) * query->n_counters;
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci   sort_query(query);
583bf215546Sopenharmony_ci}
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_cistatic int
586bf215546Sopenharmony_cii915_perf_version(int drm_fd)
587bf215546Sopenharmony_ci{
588bf215546Sopenharmony_ci   int tmp;
589bf215546Sopenharmony_ci   drm_i915_getparam_t gp = {
590bf215546Sopenharmony_ci      .param = I915_PARAM_PERF_REVISION,
591bf215546Sopenharmony_ci      .value = &tmp,
592bf215546Sopenharmony_ci   };
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   int ret = intel_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   /* Return 0 if this getparam is not supported, the first version supported
597bf215546Sopenharmony_ci    * is 1.
598bf215546Sopenharmony_ci    */
599bf215546Sopenharmony_ci   return ret < 0 ? 0 : tmp;
600bf215546Sopenharmony_ci}
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_cistatic void
603bf215546Sopenharmony_cii915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu)
604bf215546Sopenharmony_ci{
605bf215546Sopenharmony_ci   struct drm_i915_gem_context_param arg = {
606bf215546Sopenharmony_ci      .param = I915_CONTEXT_PARAM_SSEU,
607bf215546Sopenharmony_ci      .size = sizeof(*sseu),
608bf215546Sopenharmony_ci      .value = to_user_pointer(sseu)
609bf215546Sopenharmony_ci   };
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ci   intel_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
612bf215546Sopenharmony_ci}
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_cistatic inline int
615bf215546Sopenharmony_cicompare_str_or_null(const char *s1, const char *s2)
616bf215546Sopenharmony_ci{
617bf215546Sopenharmony_ci   if (s1 == NULL && s2 == NULL)
618bf215546Sopenharmony_ci      return 0;
619bf215546Sopenharmony_ci   if (s1 == NULL)
620bf215546Sopenharmony_ci      return -1;
621bf215546Sopenharmony_ci   if (s2 == NULL)
622bf215546Sopenharmony_ci      return 1;
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci   return strcmp(s1, s2);
625bf215546Sopenharmony_ci}
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_cistatic int
628bf215546Sopenharmony_cicompare_counter_categories_and_names(const void *_c1, const void *_c2)
629bf215546Sopenharmony_ci{
630bf215546Sopenharmony_ci   const struct intel_perf_query_counter_info *c1 = (const struct intel_perf_query_counter_info *)_c1;
631bf215546Sopenharmony_ci   const struct intel_perf_query_counter_info *c2 = (const struct intel_perf_query_counter_info *)_c2;
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   /* pipeline counters don't have an assigned category */
634bf215546Sopenharmony_ci   int r = compare_str_or_null(c1->counter->category, c2->counter->category);
635bf215546Sopenharmony_ci   if (r)
636bf215546Sopenharmony_ci      return r;
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci   return strcmp(c1->counter->name, c2->counter->name);
639bf215546Sopenharmony_ci}
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_cistatic void
642bf215546Sopenharmony_cibuild_unique_counter_list(struct intel_perf_config *perf)
643bf215546Sopenharmony_ci{
644bf215546Sopenharmony_ci   assert(perf->n_queries < 64);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci   size_t max_counters = 0;
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci   for (int q = 0; q < perf->n_queries; q++)
649bf215546Sopenharmony_ci      max_counters += perf->queries[q].n_counters;
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_ci   /*
652bf215546Sopenharmony_ci    * Allocate big enough array to hold maximum possible number of counters.
653bf215546Sopenharmony_ci    * We can't alloc it small and realloc when needed because the hash table
654bf215546Sopenharmony_ci    * below contains pointers to this array.
655bf215546Sopenharmony_ci    */
656bf215546Sopenharmony_ci   struct intel_perf_query_counter_info *counter_infos =
657bf215546Sopenharmony_ci         ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters);
658bf215546Sopenharmony_ci
659bf215546Sopenharmony_ci   perf->n_counters = 0;
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci   struct hash_table *counters_table =
662bf215546Sopenharmony_ci      _mesa_hash_table_create(perf,
663bf215546Sopenharmony_ci                              _mesa_hash_string,
664bf215546Sopenharmony_ci                              _mesa_key_string_equal);
665bf215546Sopenharmony_ci   struct hash_entry *entry;
666bf215546Sopenharmony_ci   for (int q = 0; q < perf->n_queries ; q++) {
667bf215546Sopenharmony_ci      struct intel_perf_query_info *query = &perf->queries[q];
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci      for (int c = 0; c < query->n_counters; c++) {
670bf215546Sopenharmony_ci         struct intel_perf_query_counter *counter;
671bf215546Sopenharmony_ci         struct intel_perf_query_counter_info *counter_info;
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_ci         counter = &query->counters[c];
674bf215546Sopenharmony_ci         entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_ci         if (entry) {
677bf215546Sopenharmony_ci            counter_info = entry->data;
678bf215546Sopenharmony_ci            counter_info->query_mask |= BITFIELD64_BIT(q);
679bf215546Sopenharmony_ci            continue;
680bf215546Sopenharmony_ci         }
681bf215546Sopenharmony_ci         assert(perf->n_counters < max_counters);
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci         counter_info = &counter_infos[perf->n_counters++];
684bf215546Sopenharmony_ci         counter_info->counter = counter;
685bf215546Sopenharmony_ci         counter_info->query_mask = BITFIELD64_BIT(q);
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci         counter_info->location.group_idx = q;
688bf215546Sopenharmony_ci         counter_info->location.counter_idx = c;
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci         _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info);
691bf215546Sopenharmony_ci      }
692bf215546Sopenharmony_ci   }
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci   _mesa_hash_table_destroy(counters_table, NULL);
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   /* Now we can realloc counter_infos array because hash table doesn't exist. */
697bf215546Sopenharmony_ci   perf->counter_infos = reralloc_array_size(perf, counter_infos,
698bf215546Sopenharmony_ci         sizeof(counter_infos[0]), perf->n_counters);
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci   qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]),
701bf215546Sopenharmony_ci         compare_counter_categories_and_names);
702bf215546Sopenharmony_ci}
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_cistatic bool
705bf215546Sopenharmony_cioa_metrics_available(struct intel_perf_config *perf, int fd,
706bf215546Sopenharmony_ci                     const struct intel_device_info *devinfo,
707bf215546Sopenharmony_ci                     bool use_register_snapshots)
708bf215546Sopenharmony_ci{
709bf215546Sopenharmony_ci   perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
710bf215546Sopenharmony_ci   bool i915_perf_oa_available = false;
711bf215546Sopenharmony_ci   struct stat sb;
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci   perf->devinfo = *devinfo;
714bf215546Sopenharmony_ci   perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
715bf215546Sopenharmony_ci   perf->i915_perf_version = i915_perf_version(fd);
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci   /* TODO: We should query this from i915 */
718bf215546Sopenharmony_ci   if (intel_device_info_is_dg2(devinfo))
719bf215546Sopenharmony_ci      perf->oa_timestamp_shift = 1;
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   perf->oa_timestamp_mask =
722bf215546Sopenharmony_ci      0xffffffffffffffffull >> (32 + perf->oa_timestamp_shift);
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_ci   /* Record the default SSEU configuration. */
725bf215546Sopenharmony_ci   i915_get_sseu(fd, &perf->sseu);
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_ci   /* The existence of this sysctl parameter implies the kernel supports
728bf215546Sopenharmony_ci    * the i915 perf interface.
729bf215546Sopenharmony_ci    */
730bf215546Sopenharmony_ci   if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) {
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci      /* If _paranoid == 1 then on Gfx8+ we won't be able to access OA
733bf215546Sopenharmony_ci       * metrics unless running as root.
734bf215546Sopenharmony_ci       */
735bf215546Sopenharmony_ci      if (devinfo->platform == INTEL_PLATFORM_HSW)
736bf215546Sopenharmony_ci         i915_perf_oa_available = true;
737bf215546Sopenharmony_ci      else {
738bf215546Sopenharmony_ci         uint64_t paranoid = 1;
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci         read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", &paranoid);
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci         if (paranoid == 0 || geteuid() == 0)
743bf215546Sopenharmony_ci            i915_perf_oa_available = true;
744bf215546Sopenharmony_ci      }
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci      perf->platform_supported = oa_register != NULL;
747bf215546Sopenharmony_ci   }
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci   return i915_perf_oa_available &&
750bf215546Sopenharmony_ci          oa_register &&
751bf215546Sopenharmony_ci          get_sysfs_dev_dir(perf, fd) &&
752bf215546Sopenharmony_ci          init_oa_sys_vars(perf, use_register_snapshots);
753bf215546Sopenharmony_ci}
754bf215546Sopenharmony_ci
755bf215546Sopenharmony_cistatic void
756bf215546Sopenharmony_ciload_oa_metrics(struct intel_perf_config *perf, int fd,
757bf215546Sopenharmony_ci                const struct intel_device_info *devinfo)
758bf215546Sopenharmony_ci{
759bf215546Sopenharmony_ci   int existing_queries = perf->n_queries;
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci   perf->oa_metrics_table =
764bf215546Sopenharmony_ci      _mesa_hash_table_create(perf, _mesa_hash_string,
765bf215546Sopenharmony_ci                              _mesa_key_string_equal);
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci   /* Index all the metric sets mesa knows about before looking to see what
768bf215546Sopenharmony_ci    * the kernel is advertising.
769bf215546Sopenharmony_ci    */
770bf215546Sopenharmony_ci   oa_register(perf);
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci   if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
773bf215546Sopenharmony_ci      if (kernel_has_dynamic_config_support(perf, fd))
774bf215546Sopenharmony_ci         init_oa_configs(perf, fd, devinfo);
775bf215546Sopenharmony_ci      else
776bf215546Sopenharmony_ci         enumerate_sysfs_metrics(perf, devinfo);
777bf215546Sopenharmony_ci   } else {
778bf215546Sopenharmony_ci      add_all_metrics(perf, devinfo);
779bf215546Sopenharmony_ci   }
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_ci   /* sort counters in each individual group created by this function by name */
782bf215546Sopenharmony_ci   for (int i = existing_queries; i < perf->n_queries; ++i)
783bf215546Sopenharmony_ci      sort_query(&perf->queries[i]);
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   /* Select a fallback OA metric. Look for the TestOa metric or use the last
786bf215546Sopenharmony_ci    * one if no present (on HSW).
787bf215546Sopenharmony_ci    */
788bf215546Sopenharmony_ci   for (int i = existing_queries; i < perf->n_queries; i++) {
789bf215546Sopenharmony_ci      if (perf->queries[i].symbol_name &&
790bf215546Sopenharmony_ci          strcmp(perf->queries[i].symbol_name, "TestOa") == 0) {
791bf215546Sopenharmony_ci         perf->fallback_raw_oa_metric = perf->queries[i].oa_metrics_set_id;
792bf215546Sopenharmony_ci         break;
793bf215546Sopenharmony_ci      }
794bf215546Sopenharmony_ci   }
795bf215546Sopenharmony_ci   if (perf->fallback_raw_oa_metric == 0 && perf->n_queries > 0)
796bf215546Sopenharmony_ci      perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id;
797bf215546Sopenharmony_ci}
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_cistruct intel_perf_registers *
800bf215546Sopenharmony_ciintel_perf_load_configuration(struct intel_perf_config *perf_cfg, int fd, const char *guid)
801bf215546Sopenharmony_ci{
802bf215546Sopenharmony_ci   if (!perf_cfg->i915_query_supported)
803bf215546Sopenharmony_ci      return NULL;
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci   struct drm_i915_perf_oa_config i915_config = { 0, };
806bf215546Sopenharmony_ci   if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config))
807bf215546Sopenharmony_ci      return NULL;
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci   struct intel_perf_registers *config = rzalloc(NULL, struct intel_perf_registers);
810bf215546Sopenharmony_ci   config->n_flex_regs = i915_config.n_flex_regs;
811bf215546Sopenharmony_ci   config->flex_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_flex_regs);
812bf215546Sopenharmony_ci   config->n_mux_regs = i915_config.n_mux_regs;
813bf215546Sopenharmony_ci   config->mux_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_mux_regs);
814bf215546Sopenharmony_ci   config->n_b_counter_regs = i915_config.n_boolean_regs;
815bf215546Sopenharmony_ci   config->b_counter_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_b_counter_regs);
816bf215546Sopenharmony_ci
817bf215546Sopenharmony_ci   /*
818bf215546Sopenharmony_ci    * struct intel_perf_query_register_prog maps exactly to the tuple of
819bf215546Sopenharmony_ci    * (register offset, register value) returned by the i915.
820bf215546Sopenharmony_ci    */
821bf215546Sopenharmony_ci   i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs);
822bf215546Sopenharmony_ci   i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs);
823bf215546Sopenharmony_ci   i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs);
824bf215546Sopenharmony_ci   if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
825bf215546Sopenharmony_ci      ralloc_free(config);
826bf215546Sopenharmony_ci      return NULL;
827bf215546Sopenharmony_ci   }
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_ci   return config;
830bf215546Sopenharmony_ci}
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ciuint64_t
833bf215546Sopenharmony_ciintel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd,
834bf215546Sopenharmony_ci                               const struct intel_perf_registers *config,
835bf215546Sopenharmony_ci                               const char *guid)
836bf215546Sopenharmony_ci{
837bf215546Sopenharmony_ci   if (guid)
838bf215546Sopenharmony_ci      return i915_add_config(perf_cfg, fd, config, guid);
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci   struct mesa_sha1 sha1_ctx;
841bf215546Sopenharmony_ci   _mesa_sha1_init(&sha1_ctx);
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci   if (config->flex_regs) {
844bf215546Sopenharmony_ci      _mesa_sha1_update(&sha1_ctx, config->flex_regs,
845bf215546Sopenharmony_ci                        sizeof(config->flex_regs[0]) *
846bf215546Sopenharmony_ci                        config->n_flex_regs);
847bf215546Sopenharmony_ci   }
848bf215546Sopenharmony_ci   if (config->mux_regs) {
849bf215546Sopenharmony_ci      _mesa_sha1_update(&sha1_ctx, config->mux_regs,
850bf215546Sopenharmony_ci                        sizeof(config->mux_regs[0]) *
851bf215546Sopenharmony_ci                        config->n_mux_regs);
852bf215546Sopenharmony_ci   }
853bf215546Sopenharmony_ci   if (config->b_counter_regs) {
854bf215546Sopenharmony_ci      _mesa_sha1_update(&sha1_ctx, config->b_counter_regs,
855bf215546Sopenharmony_ci                        sizeof(config->b_counter_regs[0]) *
856bf215546Sopenharmony_ci                        config->n_b_counter_regs);
857bf215546Sopenharmony_ci   }
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci   uint8_t hash[20];
860bf215546Sopenharmony_ci   _mesa_sha1_final(&sha1_ctx, hash);
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   char formatted_hash[41];
863bf215546Sopenharmony_ci   _mesa_sha1_format(formatted_hash, hash);
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci   char generated_guid[37];
866bf215546Sopenharmony_ci   snprintf(generated_guid, sizeof(generated_guid),
867bf215546Sopenharmony_ci            "%.8s-%.4s-%.4s-%.4s-%.12s",
868bf215546Sopenharmony_ci            &formatted_hash[0], &formatted_hash[8],
869bf215546Sopenharmony_ci            &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4],
870bf215546Sopenharmony_ci            &formatted_hash[8 + 4 + 4 + 4]);
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci   /* Check if already present. */
873bf215546Sopenharmony_ci   uint64_t id;
874bf215546Sopenharmony_ci   if (intel_perf_load_metric_id(perf_cfg, generated_guid, &id))
875bf215546Sopenharmony_ci      return id;
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci   return i915_add_config(perf_cfg, fd, config, generated_guid);
878bf215546Sopenharmony_ci}
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_cistatic uint64_t
881bf215546Sopenharmony_ciget_passes_mask(struct intel_perf_config *perf,
882bf215546Sopenharmony_ci                const uint32_t *counter_indices,
883bf215546Sopenharmony_ci                uint32_t counter_indices_count)
884bf215546Sopenharmony_ci{
885bf215546Sopenharmony_ci   uint64_t queries_mask = 0;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   assert(perf->n_queries < 64);
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci   /* Compute the number of passes by going through all counters N times (with
890bf215546Sopenharmony_ci    * N the number of queries) to make sure we select the most constraining
891bf215546Sopenharmony_ci    * counters first and look at the more flexible ones (that could be
892bf215546Sopenharmony_ci    * obtained from multiple queries) later. That way we minimize the number
893bf215546Sopenharmony_ci    * of passes required.
894bf215546Sopenharmony_ci    */
895bf215546Sopenharmony_ci   for (uint32_t q = 0; q < perf->n_queries; q++) {
896bf215546Sopenharmony_ci      for (uint32_t i = 0; i < counter_indices_count; i++) {
897bf215546Sopenharmony_ci         assert(counter_indices[i] < perf->n_counters);
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci         uint32_t idx = counter_indices[i];
900bf215546Sopenharmony_ci         if (util_bitcount64(perf->counter_infos[idx].query_mask) != (q + 1))
901bf215546Sopenharmony_ci            continue;
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci         if (queries_mask & perf->counter_infos[idx].query_mask)
904bf215546Sopenharmony_ci            continue;
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci         queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1);
907bf215546Sopenharmony_ci      }
908bf215546Sopenharmony_ci   }
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_ci   return queries_mask;
911bf215546Sopenharmony_ci}
912bf215546Sopenharmony_ci
913bf215546Sopenharmony_ciuint32_t
914bf215546Sopenharmony_ciintel_perf_get_n_passes(struct intel_perf_config *perf,
915bf215546Sopenharmony_ci                        const uint32_t *counter_indices,
916bf215546Sopenharmony_ci                        uint32_t counter_indices_count,
917bf215546Sopenharmony_ci                        struct intel_perf_query_info **pass_queries)
918bf215546Sopenharmony_ci{
919bf215546Sopenharmony_ci   uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_ci   if (pass_queries) {
922bf215546Sopenharmony_ci      uint32_t pass = 0;
923bf215546Sopenharmony_ci      for (uint32_t q = 0; q < perf->n_queries; q++) {
924bf215546Sopenharmony_ci         if ((1ULL << q) & queries_mask)
925bf215546Sopenharmony_ci            pass_queries[pass++] = &perf->queries[q];
926bf215546Sopenharmony_ci      }
927bf215546Sopenharmony_ci   }
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci   return util_bitcount64(queries_mask);
930bf215546Sopenharmony_ci}
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_civoid
933bf215546Sopenharmony_ciintel_perf_get_counters_passes(struct intel_perf_config *perf,
934bf215546Sopenharmony_ci                               const uint32_t *counter_indices,
935bf215546Sopenharmony_ci                               uint32_t counter_indices_count,
936bf215546Sopenharmony_ci                               struct intel_perf_counter_pass *counter_pass)
937bf215546Sopenharmony_ci{
938bf215546Sopenharmony_ci   uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count);
939bf215546Sopenharmony_ci   ASSERTED uint32_t n_passes = util_bitcount64(queries_mask);
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci   for (uint32_t i = 0; i < counter_indices_count; i++) {
942bf215546Sopenharmony_ci      assert(counter_indices[i] < perf->n_counters);
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_ci      uint32_t idx = counter_indices[i];
945bf215546Sopenharmony_ci      counter_pass[i].counter = perf->counter_infos[idx].counter;
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci      uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1;
948bf215546Sopenharmony_ci      counter_pass[i].query = &perf->queries[query_idx];
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci      uint32_t clear_bits = 63 - query_idx;
951bf215546Sopenharmony_ci      counter_pass[i].pass = util_bitcount64((queries_mask << clear_bits) >> clear_bits) - 1;
952bf215546Sopenharmony_ci      assert(counter_pass[i].pass < n_passes);
953bf215546Sopenharmony_ci   }
954bf215546Sopenharmony_ci}
955bf215546Sopenharmony_ci
956bf215546Sopenharmony_ci/* Accumulate 32bits OA counters */
957bf215546Sopenharmony_cistatic inline void
958bf215546Sopenharmony_ciaccumulate_uint32(const uint32_t *report0,
959bf215546Sopenharmony_ci                  const uint32_t *report1,
960bf215546Sopenharmony_ci                  uint64_t *accumulator)
961bf215546Sopenharmony_ci{
962bf215546Sopenharmony_ci   *accumulator += (uint32_t)(*report1 - *report0);
963bf215546Sopenharmony_ci}
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci/* Accumulate 40bits OA counters */
966bf215546Sopenharmony_cistatic inline void
967bf215546Sopenharmony_ciaccumulate_uint40(int a_index,
968bf215546Sopenharmony_ci                  const uint32_t *report0,
969bf215546Sopenharmony_ci                  const uint32_t *report1,
970bf215546Sopenharmony_ci                  uint64_t *accumulator)
971bf215546Sopenharmony_ci{
972bf215546Sopenharmony_ci   const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
973bf215546Sopenharmony_ci   const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
974bf215546Sopenharmony_ci   uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
975bf215546Sopenharmony_ci   uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
976bf215546Sopenharmony_ci   uint64_t value0 = report0[a_index + 4] | high0;
977bf215546Sopenharmony_ci   uint64_t value1 = report1[a_index + 4] | high1;
978bf215546Sopenharmony_ci   uint64_t delta;
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci   if (value0 > value1)
981bf215546Sopenharmony_ci      delta = (1ULL << 40) + value1 - value0;
982bf215546Sopenharmony_ci   else
983bf215546Sopenharmony_ci      delta = value1 - value0;
984bf215546Sopenharmony_ci
985bf215546Sopenharmony_ci   *accumulator += delta;
986bf215546Sopenharmony_ci}
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_cistatic void
989bf215546Sopenharmony_cigfx8_read_report_clock_ratios(const uint32_t *report,
990bf215546Sopenharmony_ci                              uint64_t *slice_freq_hz,
991bf215546Sopenharmony_ci                              uint64_t *unslice_freq_hz)
992bf215546Sopenharmony_ci{
993bf215546Sopenharmony_ci   /* The lower 16bits of the RPT_ID field of the OA reports contains a
994bf215546Sopenharmony_ci    * snapshot of the bits coming from the RP_FREQ_NORMAL register and is
995bf215546Sopenharmony_ci    * divided this way :
996bf215546Sopenharmony_ci    *
997bf215546Sopenharmony_ci    * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency)
998bf215546Sopenharmony_ci    * RPT_ID[10:9]:  RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency)
999bf215546Sopenharmony_ci    * RPT_ID[8:0]:   RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency)
1000bf215546Sopenharmony_ci    *
1001bf215546Sopenharmony_ci    * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request
1002bf215546Sopenharmony_ci    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
1003bf215546Sopenharmony_ci    *
1004bf215546Sopenharmony_ci    * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request
1005bf215546Sopenharmony_ci    *                        Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
1006bf215546Sopenharmony_ci    */
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci   uint32_t unslice_freq = report[0] & 0x1ff;
1009bf215546Sopenharmony_ci   uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
1010bf215546Sopenharmony_ci   uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
1011bf215546Sopenharmony_ci   uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_ci   *slice_freq_hz = slice_freq * 16666667ULL;
1014bf215546Sopenharmony_ci   *unslice_freq_hz = unslice_freq * 16666667ULL;
1015bf215546Sopenharmony_ci}
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_civoid
1018bf215546Sopenharmony_ciintel_perf_query_result_read_frequencies(struct intel_perf_query_result *result,
1019bf215546Sopenharmony_ci                                         const struct intel_device_info *devinfo,
1020bf215546Sopenharmony_ci                                         const uint32_t *start,
1021bf215546Sopenharmony_ci                                         const uint32_t *end)
1022bf215546Sopenharmony_ci{
1023bf215546Sopenharmony_ci   /* Slice/Unslice frequency is only available in the OA reports when the
1024bf215546Sopenharmony_ci    * "Disable OA reports due to clock ratio change" field in
1025bf215546Sopenharmony_ci    * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
1026bf215546Sopenharmony_ci    * global register (see drivers/gpu/drm/i915/i915_perf.c)
1027bf215546Sopenharmony_ci    *
1028bf215546Sopenharmony_ci    * Documentation says this should be available on Gfx9+ but experimentation
1029bf215546Sopenharmony_ci    * shows that Gfx8 reports similar values, so we enable it there too.
1030bf215546Sopenharmony_ci    */
1031bf215546Sopenharmony_ci   if (devinfo->ver < 8)
1032bf215546Sopenharmony_ci      return;
1033bf215546Sopenharmony_ci
1034bf215546Sopenharmony_ci   gfx8_read_report_clock_ratios(start,
1035bf215546Sopenharmony_ci                                 &result->slice_frequency[0],
1036bf215546Sopenharmony_ci                                 &result->unslice_frequency[0]);
1037bf215546Sopenharmony_ci   gfx8_read_report_clock_ratios(end,
1038bf215546Sopenharmony_ci                                 &result->slice_frequency[1],
1039bf215546Sopenharmony_ci                                 &result->unslice_frequency[1]);
1040bf215546Sopenharmony_ci}
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_cistatic inline bool
1043bf215546Sopenharmony_cican_use_mi_rpc_bc_counters(const struct intel_device_info *devinfo)
1044bf215546Sopenharmony_ci{
1045bf215546Sopenharmony_ci   return devinfo->ver <= 11;
1046bf215546Sopenharmony_ci}
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ciuint64_t
1049bf215546Sopenharmony_ciintel_perf_report_timestamp(const struct intel_perf_query_info *query,
1050bf215546Sopenharmony_ci                            const uint32_t *report)
1051bf215546Sopenharmony_ci{
1052bf215546Sopenharmony_ci   return report[1] >> query->perf->oa_timestamp_shift;
1053bf215546Sopenharmony_ci}
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_civoid
1056bf215546Sopenharmony_ciintel_perf_query_result_accumulate(struct intel_perf_query_result *result,
1057bf215546Sopenharmony_ci                                   const struct intel_perf_query_info *query,
1058bf215546Sopenharmony_ci                                   const uint32_t *start,
1059bf215546Sopenharmony_ci                                   const uint32_t *end)
1060bf215546Sopenharmony_ci{
1061bf215546Sopenharmony_ci   int i;
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci   if (result->hw_id == INTEL_PERF_INVALID_CTX_ID &&
1064bf215546Sopenharmony_ci       start[2] != INTEL_PERF_INVALID_CTX_ID)
1065bf215546Sopenharmony_ci      result->hw_id = start[2];
1066bf215546Sopenharmony_ci   if (result->reports_accumulated == 0)
1067bf215546Sopenharmony_ci      result->begin_timestamp = intel_perf_report_timestamp(query, start);
1068bf215546Sopenharmony_ci   result->end_timestamp = intel_perf_report_timestamp(query, end);
1069bf215546Sopenharmony_ci   result->reports_accumulated++;
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci   switch (query->oa_format) {
1072bf215546Sopenharmony_ci   case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
1073bf215546Sopenharmony_ci      result->accumulator[query->gpu_time_offset] =
1074bf215546Sopenharmony_ci         intel_perf_report_timestamp(query, end) -
1075bf215546Sopenharmony_ci         intel_perf_report_timestamp(query, start);
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci      accumulate_uint32(start + 3, end + 3,
1078bf215546Sopenharmony_ci                        result->accumulator + query->gpu_clock_offset); /* clock */
1079bf215546Sopenharmony_ci
1080bf215546Sopenharmony_ci      /* 32x 40bit A counters... */
1081bf215546Sopenharmony_ci      for (i = 0; i < 32; i++) {
1082bf215546Sopenharmony_ci         accumulate_uint40(i, start, end,
1083bf215546Sopenharmony_ci                           result->accumulator + query->a_offset + i);
1084bf215546Sopenharmony_ci      }
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci      /* 4x 32bit A counters... */
1087bf215546Sopenharmony_ci      for (i = 0; i < 4; i++) {
1088bf215546Sopenharmony_ci         accumulate_uint32(start + 36 + i, end + 36 + i,
1089bf215546Sopenharmony_ci                           result->accumulator + query->a_offset + 32 + i);
1090bf215546Sopenharmony_ci      }
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_ci      if (can_use_mi_rpc_bc_counters(&query->perf->devinfo)) {
1093bf215546Sopenharmony_ci         /* 8x 32bit B counters */
1094bf215546Sopenharmony_ci         for (i = 0; i < 8; i++) {
1095bf215546Sopenharmony_ci            accumulate_uint32(start + 48 + i, end + 48 + i,
1096bf215546Sopenharmony_ci                              result->accumulator + query->b_offset + i);
1097bf215546Sopenharmony_ci         }
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci         /* 8x 32bit C counters... */
1100bf215546Sopenharmony_ci         for (i = 0; i < 8; i++) {
1101bf215546Sopenharmony_ci            accumulate_uint32(start + 56 + i, end + 56 + i,
1102bf215546Sopenharmony_ci                              result->accumulator + query->c_offset + i);
1103bf215546Sopenharmony_ci         }
1104bf215546Sopenharmony_ci      }
1105bf215546Sopenharmony_ci      break;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci   case I915_OA_FORMAT_A45_B8_C8:
1108bf215546Sopenharmony_ci      result->accumulator[query->gpu_time_offset] =
1109bf215546Sopenharmony_ci         intel_perf_report_timestamp(query, end) -
1110bf215546Sopenharmony_ci         intel_perf_report_timestamp(query, start);
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci      for (i = 0; i < 61; i++) {
1113bf215546Sopenharmony_ci         accumulate_uint32(start + 3 + i, end + 3 + i,
1114bf215546Sopenharmony_ci                           result->accumulator + query->a_offset + i);
1115bf215546Sopenharmony_ci      }
1116bf215546Sopenharmony_ci      break;
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_ci   default:
1119bf215546Sopenharmony_ci      unreachable("Can't accumulate OA counters in unknown format");
1120bf215546Sopenharmony_ci   }
1121bf215546Sopenharmony_ci
1122bf215546Sopenharmony_ci}
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_civoid
1127bf215546Sopenharmony_ciintel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result,
1128bf215546Sopenharmony_ci                                          const struct intel_device_info *devinfo,
1129bf215546Sopenharmony_ci                                          const uint32_t start,
1130bf215546Sopenharmony_ci                                          const uint32_t end)
1131bf215546Sopenharmony_ci{
1132bf215546Sopenharmony_ci   switch (devinfo->ver) {
1133bf215546Sopenharmony_ci   case 7:
1134bf215546Sopenharmony_ci   case 8:
1135bf215546Sopenharmony_ci      result->gt_frequency[0] = GET_FIELD(start, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
1136bf215546Sopenharmony_ci      result->gt_frequency[1] = GET_FIELD(end, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
1137bf215546Sopenharmony_ci      break;
1138bf215546Sopenharmony_ci   case 9:
1139bf215546Sopenharmony_ci   case 11:
1140bf215546Sopenharmony_ci   case 12:
1141bf215546Sopenharmony_ci      result->gt_frequency[0] = GET_FIELD(start, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
1142bf215546Sopenharmony_ci      result->gt_frequency[1] = GET_FIELD(end, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
1143bf215546Sopenharmony_ci      break;
1144bf215546Sopenharmony_ci   default:
1145bf215546Sopenharmony_ci      unreachable("unexpected gen");
1146bf215546Sopenharmony_ci   }
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   /* Put the numbers into Hz. */
1149bf215546Sopenharmony_ci   result->gt_frequency[0] *= 1000000ULL;
1150bf215546Sopenharmony_ci   result->gt_frequency[1] *= 1000000ULL;
1151bf215546Sopenharmony_ci}
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_civoid
1154bf215546Sopenharmony_ciintel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result,
1155bf215546Sopenharmony_ci                                      const struct intel_perf_query_info *query,
1156bf215546Sopenharmony_ci                                      const uint64_t *start,
1157bf215546Sopenharmony_ci                                      const uint64_t *end)
1158bf215546Sopenharmony_ci{
1159bf215546Sopenharmony_ci   for (uint32_t i = 0; i < 2; i++) {
1160bf215546Sopenharmony_ci      uint64_t v0 = start[i] & PERF_CNT_VALUE_MASK;
1161bf215546Sopenharmony_ci      uint64_t v1 = end[i] & PERF_CNT_VALUE_MASK;
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci      result->accumulator[query->perfcnt_offset + i] = v0 > v1 ?
1164bf215546Sopenharmony_ci         (PERF_CNT_VALUE_MASK + 1 + v1 - v0) :
1165bf215546Sopenharmony_ci         (v1 - v0);
1166bf215546Sopenharmony_ci   }
1167bf215546Sopenharmony_ci}
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_cistatic uint32_t
1170bf215546Sopenharmony_ciquery_accumulator_offset(const struct intel_perf_query_info *query,
1171bf215546Sopenharmony_ci                         enum intel_perf_query_field_type type,
1172bf215546Sopenharmony_ci                         uint8_t index)
1173bf215546Sopenharmony_ci{
1174bf215546Sopenharmony_ci   switch (type) {
1175bf215546Sopenharmony_ci   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
1176bf215546Sopenharmony_ci      return query->perfcnt_offset + index;
1177bf215546Sopenharmony_ci   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
1178bf215546Sopenharmony_ci      return query->a_offset + index;
1179bf215546Sopenharmony_ci   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
1180bf215546Sopenharmony_ci      return query->b_offset + index;
1181bf215546Sopenharmony_ci   case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
1182bf215546Sopenharmony_ci      return query->c_offset + index;
1183bf215546Sopenharmony_ci   default:
1184bf215546Sopenharmony_ci      unreachable("Invalid register type");
1185bf215546Sopenharmony_ci      return 0;
1186bf215546Sopenharmony_ci   }
1187bf215546Sopenharmony_ci}
1188bf215546Sopenharmony_ci
1189bf215546Sopenharmony_civoid
1190bf215546Sopenharmony_ciintel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
1191bf215546Sopenharmony_ci                                          const struct intel_perf_query_info *query,
1192bf215546Sopenharmony_ci                                          const void *start,
1193bf215546Sopenharmony_ci                                          const void *end,
1194bf215546Sopenharmony_ci                                          bool no_oa_accumulate)
1195bf215546Sopenharmony_ci{
1196bf215546Sopenharmony_ci   const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
1197bf215546Sopenharmony_ci   const struct intel_device_info *devinfo = &query->perf->devinfo;
1198bf215546Sopenharmony_ci
1199bf215546Sopenharmony_ci   for (uint32_t r = 0; r < layout->n_fields; r++) {
1200bf215546Sopenharmony_ci      const struct intel_perf_query_field *field = &layout->fields[r];
1201bf215546Sopenharmony_ci
1202bf215546Sopenharmony_ci      if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) {
1203bf215546Sopenharmony_ci         intel_perf_query_result_read_frequencies(result, devinfo,
1204bf215546Sopenharmony_ci                                                start + field->location,
1205bf215546Sopenharmony_ci                                                end + field->location);
1206bf215546Sopenharmony_ci         /* no_oa_accumulate=true is used when doing GL perf queries, we
1207bf215546Sopenharmony_ci          * manually parse the OA reports from the OA buffer and subtract
1208bf215546Sopenharmony_ci          * unrelated deltas, so don't accumulate the begin/end reports here.
1209bf215546Sopenharmony_ci          */
1210bf215546Sopenharmony_ci         if (!no_oa_accumulate) {
1211bf215546Sopenharmony_ci            intel_perf_query_result_accumulate(result, query,
1212bf215546Sopenharmony_ci                                               start + field->location,
1213bf215546Sopenharmony_ci                                               end + field->location);
1214bf215546Sopenharmony_ci         }
1215bf215546Sopenharmony_ci      } else {
1216bf215546Sopenharmony_ci         uint64_t v0, v1;
1217bf215546Sopenharmony_ci
1218bf215546Sopenharmony_ci         if (field->size == 4) {
1219bf215546Sopenharmony_ci            v0 = *(const uint32_t *)(start + field->location);
1220bf215546Sopenharmony_ci            v1 = *(const uint32_t *)(end + field->location);
1221bf215546Sopenharmony_ci         } else {
1222bf215546Sopenharmony_ci            assert(field->size == 8);
1223bf215546Sopenharmony_ci            v0 = *(const uint64_t *)(start + field->location);
1224bf215546Sopenharmony_ci            v1 = *(const uint64_t *)(end + field->location);
1225bf215546Sopenharmony_ci         }
1226bf215546Sopenharmony_ci
1227bf215546Sopenharmony_ci         if (field->mask) {
1228bf215546Sopenharmony_ci            v0 = field->mask & v0;
1229bf215546Sopenharmony_ci            v1 = field->mask & v1;
1230bf215546Sopenharmony_ci         }
1231bf215546Sopenharmony_ci
1232bf215546Sopenharmony_ci         /* RPSTAT is a bit of a special case because its begin/end values
1233bf215546Sopenharmony_ci          * represent frequencies. We store it in a separate location.
1234bf215546Sopenharmony_ci          */
1235bf215546Sopenharmony_ci         if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
1236bf215546Sopenharmony_ci            intel_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
1237bf215546Sopenharmony_ci         else
1238bf215546Sopenharmony_ci            result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
1239bf215546Sopenharmony_ci      }
1240bf215546Sopenharmony_ci   }
1241bf215546Sopenharmony_ci}
1242bf215546Sopenharmony_ci
1243bf215546Sopenharmony_civoid
1244bf215546Sopenharmony_ciintel_perf_query_result_clear(struct intel_perf_query_result *result)
1245bf215546Sopenharmony_ci{
1246bf215546Sopenharmony_ci   memset(result, 0, sizeof(*result));
1247bf215546Sopenharmony_ci   result->hw_id = INTEL_PERF_INVALID_CTX_ID;
1248bf215546Sopenharmony_ci}
1249bf215546Sopenharmony_ci
1250bf215546Sopenharmony_civoid
1251bf215546Sopenharmony_ciintel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
1252bf215546Sopenharmony_ci                                     const void *data)
1253bf215546Sopenharmony_ci{
1254bf215546Sopenharmony_ci   const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
1255bf215546Sopenharmony_ci
1256bf215546Sopenharmony_ci   for (uint32_t r = 0; r < layout->n_fields; r++) {
1257bf215546Sopenharmony_ci      const struct intel_perf_query_field *field = &layout->fields[r];
1258bf215546Sopenharmony_ci      const uint32_t *value32 = data + field->location;
1259bf215546Sopenharmony_ci
1260bf215546Sopenharmony_ci      switch (field->type) {
1261bf215546Sopenharmony_ci      case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
1262bf215546Sopenharmony_ci         fprintf(stderr, "MI_RPC:\n");
1263bf215546Sopenharmony_ci         fprintf(stderr, "  TS: 0x%08x\n", *(value32 + 1));
1264bf215546Sopenharmony_ci         fprintf(stderr, "  CLK: 0x%08x\n", *(value32 + 3));
1265bf215546Sopenharmony_ci         break;
1266bf215546Sopenharmony_ci      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
1267bf215546Sopenharmony_ci         fprintf(stderr, "A%u: 0x%08x\n", field->index, *value32);
1268bf215546Sopenharmony_ci         break;
1269bf215546Sopenharmony_ci      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
1270bf215546Sopenharmony_ci         fprintf(stderr, "B%u: 0x%08x\n", field->index, *value32);
1271bf215546Sopenharmony_ci         break;
1272bf215546Sopenharmony_ci      case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
1273bf215546Sopenharmony_ci         fprintf(stderr, "C%u: 0x%08x\n", field->index, *value32);
1274bf215546Sopenharmony_ci         break;
1275bf215546Sopenharmony_ci      default:
1276bf215546Sopenharmony_ci         break;
1277bf215546Sopenharmony_ci      }
1278bf215546Sopenharmony_ci   }
1279bf215546Sopenharmony_ci}
1280bf215546Sopenharmony_ci
1281bf215546Sopenharmony_cistatic int
1282bf215546Sopenharmony_ciintel_perf_compare_query_names(const void *v1, const void *v2)
1283bf215546Sopenharmony_ci{
1284bf215546Sopenharmony_ci   const struct intel_perf_query_info *q1 = v1;
1285bf215546Sopenharmony_ci   const struct intel_perf_query_info *q2 = v2;
1286bf215546Sopenharmony_ci
1287bf215546Sopenharmony_ci   return strcmp(q1->name, q2->name);
1288bf215546Sopenharmony_ci}
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_cistatic inline struct intel_perf_query_field *
1291bf215546Sopenharmony_ciadd_query_register(struct intel_perf_query_field_layout *layout,
1292bf215546Sopenharmony_ci                   enum intel_perf_query_field_type type,
1293bf215546Sopenharmony_ci                   uint16_t offset,
1294bf215546Sopenharmony_ci                   uint16_t size,
1295bf215546Sopenharmony_ci                   uint8_t index)
1296bf215546Sopenharmony_ci{
1297bf215546Sopenharmony_ci   /* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes
1298bf215546Sopenharmony_ci    * (shows up nicely in the debugger).
1299bf215546Sopenharmony_ci    */
1300bf215546Sopenharmony_ci   if (type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC)
1301bf215546Sopenharmony_ci      layout->size = align(layout->size, 64);
1302bf215546Sopenharmony_ci   else if (size % 8 == 0)
1303bf215546Sopenharmony_ci      layout->size = align(layout->size, 8);
1304bf215546Sopenharmony_ci
1305bf215546Sopenharmony_ci   layout->fields[layout->n_fields++] = (struct intel_perf_query_field) {
1306bf215546Sopenharmony_ci      .mmio_offset = offset,
1307bf215546Sopenharmony_ci      .location = layout->size,
1308bf215546Sopenharmony_ci      .type = type,
1309bf215546Sopenharmony_ci      .index = index,
1310bf215546Sopenharmony_ci      .size = size,
1311bf215546Sopenharmony_ci   };
1312bf215546Sopenharmony_ci   layout->size += size;
1313bf215546Sopenharmony_ci
1314bf215546Sopenharmony_ci   return &layout->fields[layout->n_fields - 1];
1315bf215546Sopenharmony_ci}
1316bf215546Sopenharmony_ci
1317bf215546Sopenharmony_cistatic void
1318bf215546Sopenharmony_ciintel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
1319bf215546Sopenharmony_ci                             const struct intel_device_info *devinfo,
1320bf215546Sopenharmony_ci                             bool use_register_snapshots)
1321bf215546Sopenharmony_ci{
1322bf215546Sopenharmony_ci   struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout;
1323bf215546Sopenharmony_ci
1324bf215546Sopenharmony_ci   layout->n_fields = 0;
1325bf215546Sopenharmony_ci
1326bf215546Sopenharmony_ci   /* MI_RPC requires a 64byte alignment. */
1327bf215546Sopenharmony_ci   layout->alignment = 64;
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci   layout->fields = rzalloc_array(perf_cfg, struct intel_perf_query_field, 5 + 16);
1330bf215546Sopenharmony_ci
1331bf215546Sopenharmony_ci   add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC,
1332bf215546Sopenharmony_ci                      0, 256, 0);
1333bf215546Sopenharmony_ci
1334bf215546Sopenharmony_ci   if (use_register_snapshots) {
1335bf215546Sopenharmony_ci      if (devinfo->ver <= 11) {
1336bf215546Sopenharmony_ci         struct intel_perf_query_field *field =
1337bf215546Sopenharmony_ci            add_query_register(layout,
1338bf215546Sopenharmony_ci                               INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
1339bf215546Sopenharmony_ci                               PERF_CNT_1_DW0, 8, 0);
1340bf215546Sopenharmony_ci         field->mask = PERF_CNT_VALUE_MASK;
1341bf215546Sopenharmony_ci
1342bf215546Sopenharmony_ci         field = add_query_register(layout,
1343bf215546Sopenharmony_ci                                    INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
1344bf215546Sopenharmony_ci                                    PERF_CNT_2_DW0, 8, 1);
1345bf215546Sopenharmony_ci         field->mask = PERF_CNT_VALUE_MASK;
1346bf215546Sopenharmony_ci      }
1347bf215546Sopenharmony_ci
1348bf215546Sopenharmony_ci      if (devinfo->ver == 8 && devinfo->platform != INTEL_PLATFORM_CHV) {
1349bf215546Sopenharmony_ci         add_query_register(layout,
1350bf215546Sopenharmony_ci                         INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
1351bf215546Sopenharmony_ci                            GFX7_RPSTAT1, 4, 0);
1352bf215546Sopenharmony_ci      }
1353bf215546Sopenharmony_ci
1354bf215546Sopenharmony_ci      if (devinfo->ver >= 9) {
1355bf215546Sopenharmony_ci         add_query_register(layout,
1356bf215546Sopenharmony_ci                            INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
1357bf215546Sopenharmony_ci                            GFX9_RPSTAT0, 4, 0);
1358bf215546Sopenharmony_ci      }
1359bf215546Sopenharmony_ci
1360bf215546Sopenharmony_ci      if (!can_use_mi_rpc_bc_counters(devinfo)) {
1361bf215546Sopenharmony_ci         if (devinfo->ver >= 8 && devinfo->ver <= 11) {
1362bf215546Sopenharmony_ci            for (uint32_t i = 0; i < GFX8_N_OA_PERF_B32; i++) {
1363bf215546Sopenharmony_ci               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
1364bf215546Sopenharmony_ci                                  GFX8_OA_PERF_B32(i), 4, i);
1365bf215546Sopenharmony_ci            }
1366bf215546Sopenharmony_ci            for (uint32_t i = 0; i < GFX8_N_OA_PERF_C32; i++) {
1367bf215546Sopenharmony_ci               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
1368bf215546Sopenharmony_ci                                  GFX8_OA_PERF_C32(i), 4, i);
1369bf215546Sopenharmony_ci            }
1370bf215546Sopenharmony_ci         } else if (devinfo->verx10 == 120) {
1371bf215546Sopenharmony_ci            for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) {
1372bf215546Sopenharmony_ci               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
1373bf215546Sopenharmony_ci                                  GFX12_OAG_PERF_B32(i), 4, i);
1374bf215546Sopenharmony_ci            }
1375bf215546Sopenharmony_ci            for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) {
1376bf215546Sopenharmony_ci               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
1377bf215546Sopenharmony_ci                                  GFX12_OAG_PERF_C32(i), 4, i);
1378bf215546Sopenharmony_ci            }
1379bf215546Sopenharmony_ci         } else if (devinfo->verx10 == 125) {
1380bf215546Sopenharmony_ci            add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A,
1381bf215546Sopenharmony_ci                               GFX125_OAG_PERF_A36, 4, 36);
1382bf215546Sopenharmony_ci            add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A,
1383bf215546Sopenharmony_ci                               GFX125_OAG_PERF_A37, 4, 37);
1384bf215546Sopenharmony_ci            for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) {
1385bf215546Sopenharmony_ci               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
1386bf215546Sopenharmony_ci                                  GFX12_OAG_PERF_B32(i), 4, i);
1387bf215546Sopenharmony_ci            }
1388bf215546Sopenharmony_ci            for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) {
1389bf215546Sopenharmony_ci               add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
1390bf215546Sopenharmony_ci                                  GFX12_OAG_PERF_C32(i), 4, i);
1391bf215546Sopenharmony_ci            }
1392bf215546Sopenharmony_ci         }
1393bf215546Sopenharmony_ci      }
1394bf215546Sopenharmony_ci   }
1395bf215546Sopenharmony_ci
1396bf215546Sopenharmony_ci   /* Align the whole package to 64bytes so that 2 snapshots can be put
1397bf215546Sopenharmony_ci    * together without extract alignment for the user.
1398bf215546Sopenharmony_ci    */
1399bf215546Sopenharmony_ci   layout->size = align(layout->size, 64);
1400bf215546Sopenharmony_ci}
1401bf215546Sopenharmony_ci
1402bf215546Sopenharmony_civoid
1403bf215546Sopenharmony_ciintel_perf_init_metrics(struct intel_perf_config *perf_cfg,
1404bf215546Sopenharmony_ci                        const struct intel_device_info *devinfo,
1405bf215546Sopenharmony_ci                        int drm_fd,
1406bf215546Sopenharmony_ci                        bool include_pipeline_statistics,
1407bf215546Sopenharmony_ci                        bool use_register_snapshots)
1408bf215546Sopenharmony_ci{
1409bf215546Sopenharmony_ci   intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots);
1410bf215546Sopenharmony_ci
1411bf215546Sopenharmony_ci   if (include_pipeline_statistics) {
1412bf215546Sopenharmony_ci      load_pipeline_statistic_metrics(perf_cfg, devinfo);
1413bf215546Sopenharmony_ci      intel_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
1414bf215546Sopenharmony_ci   }
1415bf215546Sopenharmony_ci
1416bf215546Sopenharmony_ci   bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo,
1417bf215546Sopenharmony_ci                                          use_register_snapshots);
1418bf215546Sopenharmony_ci   if (oa_metrics)
1419bf215546Sopenharmony_ci      load_oa_metrics(perf_cfg, drm_fd, devinfo);
1420bf215546Sopenharmony_ci
1421bf215546Sopenharmony_ci   /* sort query groups by name */
1422bf215546Sopenharmony_ci   qsort(perf_cfg->queries, perf_cfg->n_queries,
1423bf215546Sopenharmony_ci         sizeof(perf_cfg->queries[0]), intel_perf_compare_query_names);
1424bf215546Sopenharmony_ci
1425bf215546Sopenharmony_ci   build_unique_counter_list(perf_cfg);
1426bf215546Sopenharmony_ci
1427bf215546Sopenharmony_ci   if (oa_metrics)
1428bf215546Sopenharmony_ci      intel_perf_register_mdapi_oa_query(perf_cfg, devinfo);
1429bf215546Sopenharmony_ci}
1430