1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <dirent.h> 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include <sys/types.h> 27bf215546Sopenharmony_ci#include <sys/stat.h> 28bf215546Sopenharmony_ci#include <fcntl.h> 29bf215546Sopenharmony_ci#include <unistd.h> 30bf215546Sopenharmony_ci#include <errno.h> 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#ifndef HAVE_DIRENT_D_TYPE 33bf215546Sopenharmony_ci#include <limits.h> // PATH_MAX 34bf215546Sopenharmony_ci#endif 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include <drm-uapi/i915_drm.h> 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include "common/intel_gem.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci#include "dev/intel_debug.h" 41bf215546Sopenharmony_ci#include "dev/intel_device_info.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#include "perf/intel_perf.h" 44bf215546Sopenharmony_ci#include "perf/intel_perf_regs.h" 45bf215546Sopenharmony_ci#include "perf/intel_perf_mdapi.h" 46bf215546Sopenharmony_ci#include "perf/intel_perf_metrics.h" 47bf215546Sopenharmony_ci#include "perf/intel_perf_private.h" 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci#include "util/bitscan.h" 50bf215546Sopenharmony_ci#include "util/macros.h" 51bf215546Sopenharmony_ci#include "util/mesa-sha1.h" 52bf215546Sopenharmony_ci#include "util/u_math.h" 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci#define FILE_DEBUG_FLAG DEBUG_PERFMON 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic bool 57bf215546Sopenharmony_ciis_dir_or_link(const struct dirent *entry, const char *parent_dir) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci#ifdef HAVE_DIRENT_D_TYPE 60bf215546Sopenharmony_ci return entry->d_type == DT_DIR || entry->d_type == DT_LNK; 61bf215546Sopenharmony_ci#else 62bf215546Sopenharmony_ci struct stat st; 63bf215546Sopenharmony_ci char path[PATH_MAX + 1]; 64bf215546Sopenharmony_ci snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name); 65bf215546Sopenharmony_ci lstat(path, &st); 66bf215546Sopenharmony_ci return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode); 67bf215546Sopenharmony_ci#endif 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_cistatic bool 71bf215546Sopenharmony_ciget_sysfs_dev_dir(struct intel_perf_config *perf, int fd) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci struct stat sb; 74bf215546Sopenharmony_ci int min, maj; 75bf215546Sopenharmony_ci DIR *drmdir; 76bf215546Sopenharmony_ci struct dirent *drm_entry; 77bf215546Sopenharmony_ci int len; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci perf->sysfs_dev_dir[0] = '\0'; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci if (INTEL_DEBUG(DEBUG_NO_OACONFIG)) 82bf215546Sopenharmony_ci return true; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci if (fstat(fd, &sb)) { 85bf215546Sopenharmony_ci DBG("Failed to stat DRM fd\n"); 86bf215546Sopenharmony_ci return false; 87bf215546Sopenharmony_ci } 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci maj = major(sb.st_rdev); 90bf215546Sopenharmony_ci min = minor(sb.st_rdev); 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci if (!S_ISCHR(sb.st_mode)) { 93bf215546Sopenharmony_ci DBG("DRM fd is not a character device as expected\n"); 94bf215546Sopenharmony_ci return false; 95bf215546Sopenharmony_ci } 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci len = snprintf(perf->sysfs_dev_dir, 98bf215546Sopenharmony_ci sizeof(perf->sysfs_dev_dir), 99bf215546Sopenharmony_ci "/sys/dev/char/%d:%d/device/drm", maj, min); 100bf215546Sopenharmony_ci if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) { 101bf215546Sopenharmony_ci DBG("Failed to concatenate sysfs path to drm device\n"); 102bf215546Sopenharmony_ci return false; 103bf215546Sopenharmony_ci } 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci drmdir = opendir(perf->sysfs_dev_dir); 106bf215546Sopenharmony_ci if (!drmdir) { 107bf215546Sopenharmony_ci DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir); 108bf215546Sopenharmony_ci return false; 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci while ((drm_entry = readdir(drmdir))) { 112bf215546Sopenharmony_ci if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) && 113bf215546Sopenharmony_ci strncmp(drm_entry->d_name, "card", 4) == 0) 114bf215546Sopenharmony_ci { 115bf215546Sopenharmony_ci len = snprintf(perf->sysfs_dev_dir, 116bf215546Sopenharmony_ci sizeof(perf->sysfs_dev_dir), 117bf215546Sopenharmony_ci "/sys/dev/char/%d:%d/device/drm/%s", 118bf215546Sopenharmony_ci maj, min, drm_entry->d_name); 119bf215546Sopenharmony_ci closedir(drmdir); 120bf215546Sopenharmony_ci if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) 121bf215546Sopenharmony_ci return false; 122bf215546Sopenharmony_ci else 123bf215546Sopenharmony_ci return true; 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci closedir(drmdir); 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", 130bf215546Sopenharmony_ci maj, min); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci return false; 133bf215546Sopenharmony_ci} 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_cistatic bool 136bf215546Sopenharmony_ciread_file_uint64(const char *file, uint64_t *val) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci char buf[32]; 139bf215546Sopenharmony_ci int fd, n; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci fd = open(file, 0); 142bf215546Sopenharmony_ci if (fd < 0) 143bf215546Sopenharmony_ci return false; 144bf215546Sopenharmony_ci while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && 145bf215546Sopenharmony_ci errno == EINTR); 146bf215546Sopenharmony_ci close(fd); 147bf215546Sopenharmony_ci if (n < 0) 148bf215546Sopenharmony_ci return false; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci buf[n] = '\0'; 151bf215546Sopenharmony_ci *val = strtoull(buf, NULL, 0); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci return true; 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cistatic bool 157bf215546Sopenharmony_ciread_sysfs_drm_device_file_uint64(struct intel_perf_config *perf, 158bf215546Sopenharmony_ci const char *file, 159bf215546Sopenharmony_ci uint64_t *value) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci char buf[512]; 162bf215546Sopenharmony_ci int len; 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file); 165bf215546Sopenharmony_ci if (len < 0 || len >= sizeof(buf)) { 166bf215546Sopenharmony_ci DBG("Failed to concatenate sys filename to read u64 from\n"); 167bf215546Sopenharmony_ci return false; 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci return read_file_uint64(buf, value); 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_cistatic void 174bf215546Sopenharmony_ciregister_oa_config(struct intel_perf_config *perf, 175bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 176bf215546Sopenharmony_ci const struct intel_perf_query_info *query, 177bf215546Sopenharmony_ci uint64_t config_id) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci struct intel_perf_query_info *registered_query = 180bf215546Sopenharmony_ci intel_perf_append_query_info(perf, 0); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci *registered_query = *query; 183bf215546Sopenharmony_ci registered_query->oa_metrics_set_id = config_id; 184bf215546Sopenharmony_ci DBG("metric set registered: id = %" PRIu64", guid = %s\n", 185bf215546Sopenharmony_ci registered_query->oa_metrics_set_id, query->guid); 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_cistatic void 189bf215546Sopenharmony_cienumerate_sysfs_metrics(struct intel_perf_config *perf, 190bf215546Sopenharmony_ci const struct intel_device_info *devinfo) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci DIR *metricsdir = NULL; 193bf215546Sopenharmony_ci struct dirent *metric_entry; 194bf215546Sopenharmony_ci char buf[256]; 195bf215546Sopenharmony_ci int len; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir); 198bf215546Sopenharmony_ci if (len < 0 || len >= sizeof(buf)) { 199bf215546Sopenharmony_ci DBG("Failed to concatenate path to sysfs metrics/ directory\n"); 200bf215546Sopenharmony_ci return; 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci metricsdir = opendir(buf); 204bf215546Sopenharmony_ci if (!metricsdir) { 205bf215546Sopenharmony_ci DBG("Failed to open %s: %m\n", buf); 206bf215546Sopenharmony_ci return; 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci while ((metric_entry = readdir(metricsdir))) { 210bf215546Sopenharmony_ci struct hash_entry *entry; 211bf215546Sopenharmony_ci if (!is_dir_or_link(metric_entry, buf) || 212bf215546Sopenharmony_ci metric_entry->d_name[0] == '.') 213bf215546Sopenharmony_ci continue; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci DBG("metric set: %s\n", metric_entry->d_name); 216bf215546Sopenharmony_ci entry = _mesa_hash_table_search(perf->oa_metrics_table, 217bf215546Sopenharmony_ci metric_entry->d_name); 218bf215546Sopenharmony_ci if (entry) { 219bf215546Sopenharmony_ci uint64_t id; 220bf215546Sopenharmony_ci if (!intel_perf_load_metric_id(perf, metric_entry->d_name, &id)) { 221bf215546Sopenharmony_ci DBG("Failed to read metric set id from %s: %m", buf); 222bf215546Sopenharmony_ci continue; 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci register_oa_config(perf, devinfo, 226bf215546Sopenharmony_ci (const struct intel_perf_query_info *)entry->data, id); 227bf215546Sopenharmony_ci } else 228bf215546Sopenharmony_ci DBG("metric set not known by mesa (skipping)\n"); 229bf215546Sopenharmony_ci } 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci closedir(metricsdir); 232bf215546Sopenharmony_ci} 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_cistatic void 235bf215546Sopenharmony_ciadd_all_metrics(struct intel_perf_config *perf, 236bf215546Sopenharmony_ci const struct intel_device_info *devinfo) 237bf215546Sopenharmony_ci{ 238bf215546Sopenharmony_ci hash_table_foreach(perf->oa_metrics_table, entry) { 239bf215546Sopenharmony_ci const struct intel_perf_query_info *query = entry->data; 240bf215546Sopenharmony_ci register_oa_config(perf, devinfo, query, 0); 241bf215546Sopenharmony_ci } 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_cistatic bool 245bf215546Sopenharmony_cikernel_has_dynamic_config_support(struct intel_perf_config *perf, int fd) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci uint64_t invalid_config_id = UINT64_MAX; 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci return intel_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, 250bf215546Sopenharmony_ci &invalid_config_id) < 0 && errno == ENOENT; 251bf215546Sopenharmony_ci} 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_cistatic bool 254bf215546Sopenharmony_cii915_query_perf_config_supported(struct intel_perf_config *perf, int fd) 255bf215546Sopenharmony_ci{ 256bf215546Sopenharmony_ci int32_t length = 0; 257bf215546Sopenharmony_ci return !intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG, 258bf215546Sopenharmony_ci DRM_I915_QUERY_PERF_CONFIG_LIST, 259bf215546Sopenharmony_ci NULL, &length); 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_cistatic bool 263bf215546Sopenharmony_cii915_query_perf_config_data(struct intel_perf_config *perf, 264bf215546Sopenharmony_ci int fd, const char *guid, 265bf215546Sopenharmony_ci struct drm_i915_perf_oa_config *config) 266bf215546Sopenharmony_ci{ 267bf215546Sopenharmony_ci char data[sizeof(struct drm_i915_query_perf_config) + 268bf215546Sopenharmony_ci sizeof(struct drm_i915_perf_oa_config)] = {}; 269bf215546Sopenharmony_ci struct drm_i915_query_perf_config *query = (void *)data; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci memcpy(query->uuid, guid, sizeof(query->uuid)); 272bf215546Sopenharmony_ci memcpy(query->data, config, sizeof(*config)); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci int32_t item_length = sizeof(data); 275bf215546Sopenharmony_ci if (intel_i915_query_flags(fd, DRM_I915_QUERY_PERF_CONFIG, 276bf215546Sopenharmony_ci DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, 277bf215546Sopenharmony_ci query, &item_length)) 278bf215546Sopenharmony_ci return false; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci memcpy(config, query->data, sizeof(*config)); 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci return true; 283bf215546Sopenharmony_ci} 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_cibool 286bf215546Sopenharmony_ciintel_perf_load_metric_id(struct intel_perf_config *perf_cfg, 287bf215546Sopenharmony_ci const char *guid, 288bf215546Sopenharmony_ci uint64_t *metric_id) 289bf215546Sopenharmony_ci{ 290bf215546Sopenharmony_ci char config_path[280]; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", 293bf215546Sopenharmony_ci perf_cfg->sysfs_dev_dir, guid); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci /* Don't recreate already loaded configs. */ 296bf215546Sopenharmony_ci return read_file_uint64(config_path, metric_id); 297bf215546Sopenharmony_ci} 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_cistatic uint64_t 300bf215546Sopenharmony_cii915_add_config(struct intel_perf_config *perf, int fd, 301bf215546Sopenharmony_ci const struct intel_perf_registers *config, 302bf215546Sopenharmony_ci const char *guid) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci struct drm_i915_perf_oa_config i915_config = { 0, }; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid)); 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci i915_config.n_mux_regs = config->n_mux_regs; 309bf215546Sopenharmony_ci i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci i915_config.n_boolean_regs = config->n_b_counter_regs; 312bf215546Sopenharmony_ci i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci i915_config.n_flex_regs = config->n_flex_regs; 315bf215546Sopenharmony_ci i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci int ret = intel_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config); 318bf215546Sopenharmony_ci return ret > 0 ? ret : 0; 319bf215546Sopenharmony_ci} 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_cistatic void 322bf215546Sopenharmony_ciinit_oa_configs(struct intel_perf_config *perf, int fd, 323bf215546Sopenharmony_ci const struct intel_device_info *devinfo) 324bf215546Sopenharmony_ci{ 325bf215546Sopenharmony_ci hash_table_foreach(perf->oa_metrics_table, entry) { 326bf215546Sopenharmony_ci const struct intel_perf_query_info *query = entry->data; 327bf215546Sopenharmony_ci uint64_t config_id; 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci if (intel_perf_load_metric_id(perf, query->guid, &config_id)) { 330bf215546Sopenharmony_ci DBG("metric set: %s (already loaded)\n", query->guid); 331bf215546Sopenharmony_ci register_oa_config(perf, devinfo, query, config_id); 332bf215546Sopenharmony_ci continue; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci int ret = i915_add_config(perf, fd, &query->config, query->guid); 336bf215546Sopenharmony_ci if (ret < 0) { 337bf215546Sopenharmony_ci DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", 338bf215546Sopenharmony_ci query->name, query->guid, strerror(errno)); 339bf215546Sopenharmony_ci continue; 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci register_oa_config(perf, devinfo, query, ret); 343bf215546Sopenharmony_ci DBG("metric set: %s (added)\n", query->guid); 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci} 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_cistatic void 348bf215546Sopenharmony_cicompute_topology_builtins(struct intel_perf_config *perf) 349bf215546Sopenharmony_ci{ 350bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &perf->devinfo; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci perf->sys_vars.slice_mask = devinfo->slice_masks; 353bf215546Sopenharmony_ci perf->sys_vars.n_eu_slices = devinfo->num_slices; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci perf->sys_vars.n_eu_slice0123 = 0; 356bf215546Sopenharmony_ci for (int s = 0; s < MIN2(4, devinfo->max_slices); s++) { 357bf215546Sopenharmony_ci if (!intel_device_info_slice_available(devinfo, s)) 358bf215546Sopenharmony_ci continue; 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci for (int ss = 0; ss < devinfo->max_subslices_per_slice; ss++) { 361bf215546Sopenharmony_ci if (!intel_device_info_subslice_available(devinfo, s, ss)) 362bf215546Sopenharmony_ci continue; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci for (int eu = 0; eu < devinfo->max_eus_per_subslice; eu++) { 365bf215546Sopenharmony_ci if (intel_device_info_eu_available(devinfo, s, ss, eu)) 366bf215546Sopenharmony_ci perf->sys_vars.n_eu_slice0123++; 367bf215546Sopenharmony_ci } 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci } 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { 372bf215546Sopenharmony_ci perf->sys_vars.n_eu_sub_slices += 373bf215546Sopenharmony_ci util_bitcount(devinfo->subslice_masks[i]); 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci for (int i = 0; i < sizeof(devinfo->eu_masks); i++) 377bf215546Sopenharmony_ci perf->sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]); 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci /* The subslice mask builtin contains bits for all slices. Prior to Gfx11 380bf215546Sopenharmony_ci * it had groups of 3bits for each slice, on Gfx11 and above it's 8bits for 381bf215546Sopenharmony_ci * each slice. 382bf215546Sopenharmony_ci * 383bf215546Sopenharmony_ci * Ideally equations would be updated to have a slice/subslice query 384bf215546Sopenharmony_ci * function/operator. 385bf215546Sopenharmony_ci */ 386bf215546Sopenharmony_ci perf->sys_vars.subslice_mask = 0; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci int bits_per_subslice = devinfo->ver >= 11 ? 8 : 3; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { 391bf215546Sopenharmony_ci for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { 392bf215546Sopenharmony_ci if (intel_device_info_subslice_available(devinfo, s, ss)) 393bf215546Sopenharmony_ci perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss); 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci } 396bf215546Sopenharmony_ci} 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_cistatic bool 399bf215546Sopenharmony_ciinit_oa_sys_vars(struct intel_perf_config *perf, 400bf215546Sopenharmony_ci bool use_register_snapshots) 401bf215546Sopenharmony_ci{ 402bf215546Sopenharmony_ci uint64_t min_freq_mhz = 0, max_freq_mhz = 0; 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 405bf215546Sopenharmony_ci if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz)) 406bf215546Sopenharmony_ci return false; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz)) 409bf215546Sopenharmony_ci return false; 410bf215546Sopenharmony_ci } else { 411bf215546Sopenharmony_ci min_freq_mhz = 300; 412bf215546Sopenharmony_ci max_freq_mhz = 1000; 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci memset(&perf->sys_vars, 0, sizeof(perf->sys_vars)); 416bf215546Sopenharmony_ci perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000; 417bf215546Sopenharmony_ci perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000; 418bf215546Sopenharmony_ci perf->sys_vars.query_mode = use_register_snapshots; 419bf215546Sopenharmony_ci compute_topology_builtins(perf); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci return true; 422bf215546Sopenharmony_ci} 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_citypedef void (*perf_register_oa_queries_t)(struct intel_perf_config *); 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_cistatic perf_register_oa_queries_t 427bf215546Sopenharmony_ciget_register_queries_function(const struct intel_device_info *devinfo) 428bf215546Sopenharmony_ci{ 429bf215546Sopenharmony_ci switch (devinfo->platform) { 430bf215546Sopenharmony_ci case INTEL_PLATFORM_HSW: 431bf215546Sopenharmony_ci return intel_oa_register_queries_hsw; 432bf215546Sopenharmony_ci case INTEL_PLATFORM_CHV: 433bf215546Sopenharmony_ci return intel_oa_register_queries_chv; 434bf215546Sopenharmony_ci case INTEL_PLATFORM_BDW: 435bf215546Sopenharmony_ci return intel_oa_register_queries_bdw; 436bf215546Sopenharmony_ci case INTEL_PLATFORM_BXT: 437bf215546Sopenharmony_ci return intel_oa_register_queries_bxt; 438bf215546Sopenharmony_ci case INTEL_PLATFORM_SKL: 439bf215546Sopenharmony_ci if (devinfo->gt == 2) 440bf215546Sopenharmony_ci return intel_oa_register_queries_sklgt2; 441bf215546Sopenharmony_ci if (devinfo->gt == 3) 442bf215546Sopenharmony_ci return intel_oa_register_queries_sklgt3; 443bf215546Sopenharmony_ci if (devinfo->gt == 4) 444bf215546Sopenharmony_ci return intel_oa_register_queries_sklgt4; 445bf215546Sopenharmony_ci return NULL; 446bf215546Sopenharmony_ci case INTEL_PLATFORM_KBL: 447bf215546Sopenharmony_ci if (devinfo->gt == 2) 448bf215546Sopenharmony_ci return intel_oa_register_queries_kblgt2; 449bf215546Sopenharmony_ci if (devinfo->gt == 3) 450bf215546Sopenharmony_ci return intel_oa_register_queries_kblgt3; 451bf215546Sopenharmony_ci return NULL; 452bf215546Sopenharmony_ci case INTEL_PLATFORM_GLK: 453bf215546Sopenharmony_ci return intel_oa_register_queries_glk; 454bf215546Sopenharmony_ci case INTEL_PLATFORM_CFL: 455bf215546Sopenharmony_ci if (devinfo->gt == 2) 456bf215546Sopenharmony_ci return intel_oa_register_queries_cflgt2; 457bf215546Sopenharmony_ci if (devinfo->gt == 3) 458bf215546Sopenharmony_ci return intel_oa_register_queries_cflgt3; 459bf215546Sopenharmony_ci return NULL; 460bf215546Sopenharmony_ci case INTEL_PLATFORM_ICL: 461bf215546Sopenharmony_ci return intel_oa_register_queries_icl; 462bf215546Sopenharmony_ci case INTEL_PLATFORM_EHL: 463bf215546Sopenharmony_ci return intel_oa_register_queries_ehl; 464bf215546Sopenharmony_ci case INTEL_PLATFORM_TGL: 465bf215546Sopenharmony_ci if (devinfo->gt == 1) 466bf215546Sopenharmony_ci return intel_oa_register_queries_tglgt1; 467bf215546Sopenharmony_ci if (devinfo->gt == 2) 468bf215546Sopenharmony_ci return intel_oa_register_queries_tglgt2; 469bf215546Sopenharmony_ci return NULL; 470bf215546Sopenharmony_ci case INTEL_PLATFORM_RKL: 471bf215546Sopenharmony_ci return intel_oa_register_queries_rkl; 472bf215546Sopenharmony_ci case INTEL_PLATFORM_DG1: 473bf215546Sopenharmony_ci return intel_oa_register_queries_dg1; 474bf215546Sopenharmony_ci case INTEL_PLATFORM_ADL: 475bf215546Sopenharmony_ci return intel_oa_register_queries_adl; 476bf215546Sopenharmony_ci default: 477bf215546Sopenharmony_ci return NULL; 478bf215546Sopenharmony_ci } 479bf215546Sopenharmony_ci} 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_cistatic int 482bf215546Sopenharmony_ciintel_perf_compare_counter_names(const void *v1, const void *v2) 483bf215546Sopenharmony_ci{ 484bf215546Sopenharmony_ci const struct intel_perf_query_counter *c1 = v1; 485bf215546Sopenharmony_ci const struct intel_perf_query_counter *c2 = v2; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci return strcmp(c1->name, c2->name); 488bf215546Sopenharmony_ci} 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_cistatic void 491bf215546Sopenharmony_cisort_query(struct intel_perf_query_info *q) 492bf215546Sopenharmony_ci{ 493bf215546Sopenharmony_ci qsort(q->counters, q->n_counters, sizeof(q->counters[0]), 494bf215546Sopenharmony_ci intel_perf_compare_counter_names); 495bf215546Sopenharmony_ci} 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_cistatic void 498bf215546Sopenharmony_ciload_pipeline_statistic_metrics(struct intel_perf_config *perf_cfg, 499bf215546Sopenharmony_ci const struct intel_device_info *devinfo) 500bf215546Sopenharmony_ci{ 501bf215546Sopenharmony_ci struct intel_perf_query_info *query = 502bf215546Sopenharmony_ci intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS); 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE; 505bf215546Sopenharmony_ci query->name = "Pipeline Statistics Registers"; 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT, 508bf215546Sopenharmony_ci "N vertices submitted"); 509bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, 510bf215546Sopenharmony_ci "N primitives submitted"); 511bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT, 512bf215546Sopenharmony_ci "N vertex shader invocations"); 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci if (devinfo->ver == 6) { 515bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX6_SO_PRIM_STORAGE_NEEDED, 1, 1, 516bf215546Sopenharmony_ci "SO_PRIM_STORAGE_NEEDED", 517bf215546Sopenharmony_ci "N geometry shader stream-out primitives (total)"); 518bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX6_SO_NUM_PRIMS_WRITTEN, 1, 1, 519bf215546Sopenharmony_ci "SO_NUM_PRIMS_WRITTEN", 520bf215546Sopenharmony_ci "N geometry shader stream-out primitives (written)"); 521bf215546Sopenharmony_ci } else { 522bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, 523bf215546Sopenharmony_ci "SO_PRIM_STORAGE_NEEDED (Stream 0)", 524bf215546Sopenharmony_ci "N stream-out (stream 0) primitives (total)"); 525bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, 526bf215546Sopenharmony_ci "SO_PRIM_STORAGE_NEEDED (Stream 1)", 527bf215546Sopenharmony_ci "N stream-out (stream 1) primitives (total)"); 528bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, 529bf215546Sopenharmony_ci "SO_PRIM_STORAGE_NEEDED (Stream 2)", 530bf215546Sopenharmony_ci "N stream-out (stream 2) primitives (total)"); 531bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, 532bf215546Sopenharmony_ci "SO_PRIM_STORAGE_NEEDED (Stream 3)", 533bf215546Sopenharmony_ci "N stream-out (stream 3) primitives (total)"); 534bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, 535bf215546Sopenharmony_ci "SO_NUM_PRIMS_WRITTEN (Stream 0)", 536bf215546Sopenharmony_ci "N stream-out (stream 0) primitives (written)"); 537bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, 538bf215546Sopenharmony_ci "SO_NUM_PRIMS_WRITTEN (Stream 1)", 539bf215546Sopenharmony_ci "N stream-out (stream 1) primitives (written)"); 540bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, 541bf215546Sopenharmony_ci "SO_NUM_PRIMS_WRITTEN (Stream 2)", 542bf215546Sopenharmony_ci "N stream-out (stream 2) primitives (written)"); 543bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, GFX7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, 544bf215546Sopenharmony_ci "SO_NUM_PRIMS_WRITTEN (Stream 3)", 545bf215546Sopenharmony_ci "N stream-out (stream 3) primitives (written)"); 546bf215546Sopenharmony_ci } 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT, 549bf215546Sopenharmony_ci "N TCS shader invocations"); 550bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT, 551bf215546Sopenharmony_ci "N TES shader invocations"); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT, 554bf215546Sopenharmony_ci "N geometry shader invocations"); 555bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, 556bf215546Sopenharmony_ci "N geometry shader primitives emitted"); 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT, 559bf215546Sopenharmony_ci "N primitives entering clipping"); 560bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, 561bf215546Sopenharmony_ci "N primitives leaving clipping"); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci if (devinfo->verx10 == 75 || devinfo->ver == 8) { 564bf215546Sopenharmony_ci intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, 565bf215546Sopenharmony_ci "N fragment shader invocations", 566bf215546Sopenharmony_ci "N fragment shader invocations"); 567bf215546Sopenharmony_ci } else { 568bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT, 569bf215546Sopenharmony_ci "N fragment shader invocations"); 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT, 573bf215546Sopenharmony_ci "N z-pass fragments"); 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci if (devinfo->ver >= 7) { 576bf215546Sopenharmony_ci intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, 577bf215546Sopenharmony_ci "N compute shader invocations"); 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci query->data_size = sizeof(uint64_t) * query->n_counters; 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci sort_query(query); 583bf215546Sopenharmony_ci} 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_cistatic int 586bf215546Sopenharmony_cii915_perf_version(int drm_fd) 587bf215546Sopenharmony_ci{ 588bf215546Sopenharmony_ci int tmp; 589bf215546Sopenharmony_ci drm_i915_getparam_t gp = { 590bf215546Sopenharmony_ci .param = I915_PARAM_PERF_REVISION, 591bf215546Sopenharmony_ci .value = &tmp, 592bf215546Sopenharmony_ci }; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci int ret = intel_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci /* Return 0 if this getparam is not supported, the first version supported 597bf215546Sopenharmony_ci * is 1. 598bf215546Sopenharmony_ci */ 599bf215546Sopenharmony_ci return ret < 0 ? 0 : tmp; 600bf215546Sopenharmony_ci} 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_cistatic void 603bf215546Sopenharmony_cii915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu) 604bf215546Sopenharmony_ci{ 605bf215546Sopenharmony_ci struct drm_i915_gem_context_param arg = { 606bf215546Sopenharmony_ci .param = I915_CONTEXT_PARAM_SSEU, 607bf215546Sopenharmony_ci .size = sizeof(*sseu), 608bf215546Sopenharmony_ci .value = to_user_pointer(sseu) 609bf215546Sopenharmony_ci }; 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci intel_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg); 612bf215546Sopenharmony_ci} 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_cistatic inline int 615bf215546Sopenharmony_cicompare_str_or_null(const char *s1, const char *s2) 616bf215546Sopenharmony_ci{ 617bf215546Sopenharmony_ci if (s1 == NULL && s2 == NULL) 618bf215546Sopenharmony_ci return 0; 619bf215546Sopenharmony_ci if (s1 == NULL) 620bf215546Sopenharmony_ci return -1; 621bf215546Sopenharmony_ci if (s2 == NULL) 622bf215546Sopenharmony_ci return 1; 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci return strcmp(s1, s2); 625bf215546Sopenharmony_ci} 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_cistatic int 628bf215546Sopenharmony_cicompare_counter_categories_and_names(const void *_c1, const void *_c2) 629bf215546Sopenharmony_ci{ 630bf215546Sopenharmony_ci const struct intel_perf_query_counter_info *c1 = (const struct intel_perf_query_counter_info *)_c1; 631bf215546Sopenharmony_ci const struct intel_perf_query_counter_info *c2 = (const struct intel_perf_query_counter_info *)_c2; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci /* pipeline counters don't have an assigned category */ 634bf215546Sopenharmony_ci int r = compare_str_or_null(c1->counter->category, c2->counter->category); 635bf215546Sopenharmony_ci if (r) 636bf215546Sopenharmony_ci return r; 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci return strcmp(c1->counter->name, c2->counter->name); 639bf215546Sopenharmony_ci} 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_cistatic void 642bf215546Sopenharmony_cibuild_unique_counter_list(struct intel_perf_config *perf) 643bf215546Sopenharmony_ci{ 644bf215546Sopenharmony_ci assert(perf->n_queries < 64); 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci size_t max_counters = 0; 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci for (int q = 0; q < perf->n_queries; q++) 649bf215546Sopenharmony_ci max_counters += perf->queries[q].n_counters; 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci /* 652bf215546Sopenharmony_ci * Allocate big enough array to hold maximum possible number of counters. 653bf215546Sopenharmony_ci * We can't alloc it small and realloc when needed because the hash table 654bf215546Sopenharmony_ci * below contains pointers to this array. 655bf215546Sopenharmony_ci */ 656bf215546Sopenharmony_ci struct intel_perf_query_counter_info *counter_infos = 657bf215546Sopenharmony_ci ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters); 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci perf->n_counters = 0; 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci struct hash_table *counters_table = 662bf215546Sopenharmony_ci _mesa_hash_table_create(perf, 663bf215546Sopenharmony_ci _mesa_hash_string, 664bf215546Sopenharmony_ci _mesa_key_string_equal); 665bf215546Sopenharmony_ci struct hash_entry *entry; 666bf215546Sopenharmony_ci for (int q = 0; q < perf->n_queries ; q++) { 667bf215546Sopenharmony_ci struct intel_perf_query_info *query = &perf->queries[q]; 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci for (int c = 0; c < query->n_counters; c++) { 670bf215546Sopenharmony_ci struct intel_perf_query_counter *counter; 671bf215546Sopenharmony_ci struct intel_perf_query_counter_info *counter_info; 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci counter = &query->counters[c]; 674bf215546Sopenharmony_ci entry = _mesa_hash_table_search(counters_table, counter->symbol_name); 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci if (entry) { 677bf215546Sopenharmony_ci counter_info = entry->data; 678bf215546Sopenharmony_ci counter_info->query_mask |= BITFIELD64_BIT(q); 679bf215546Sopenharmony_ci continue; 680bf215546Sopenharmony_ci } 681bf215546Sopenharmony_ci assert(perf->n_counters < max_counters); 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci counter_info = &counter_infos[perf->n_counters++]; 684bf215546Sopenharmony_ci counter_info->counter = counter; 685bf215546Sopenharmony_ci counter_info->query_mask = BITFIELD64_BIT(q); 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci counter_info->location.group_idx = q; 688bf215546Sopenharmony_ci counter_info->location.counter_idx = c; 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info); 691bf215546Sopenharmony_ci } 692bf215546Sopenharmony_ci } 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci _mesa_hash_table_destroy(counters_table, NULL); 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci /* Now we can realloc counter_infos array because hash table doesn't exist. */ 697bf215546Sopenharmony_ci perf->counter_infos = reralloc_array_size(perf, counter_infos, 698bf215546Sopenharmony_ci sizeof(counter_infos[0]), perf->n_counters); 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]), 701bf215546Sopenharmony_ci compare_counter_categories_and_names); 702bf215546Sopenharmony_ci} 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_cistatic bool 705bf215546Sopenharmony_cioa_metrics_available(struct intel_perf_config *perf, int fd, 706bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 707bf215546Sopenharmony_ci bool use_register_snapshots) 708bf215546Sopenharmony_ci{ 709bf215546Sopenharmony_ci perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); 710bf215546Sopenharmony_ci bool i915_perf_oa_available = false; 711bf215546Sopenharmony_ci struct stat sb; 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci perf->devinfo = *devinfo; 714bf215546Sopenharmony_ci perf->i915_query_supported = i915_query_perf_config_supported(perf, fd); 715bf215546Sopenharmony_ci perf->i915_perf_version = i915_perf_version(fd); 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci /* TODO: We should query this from i915 */ 718bf215546Sopenharmony_ci if (intel_device_info_is_dg2(devinfo)) 719bf215546Sopenharmony_ci perf->oa_timestamp_shift = 1; 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci perf->oa_timestamp_mask = 722bf215546Sopenharmony_ci 0xffffffffffffffffull >> (32 + perf->oa_timestamp_shift); 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_ci /* Record the default SSEU configuration. */ 725bf215546Sopenharmony_ci i915_get_sseu(fd, &perf->sseu); 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci /* The existence of this sysctl parameter implies the kernel supports 728bf215546Sopenharmony_ci * the i915 perf interface. 729bf215546Sopenharmony_ci */ 730bf215546Sopenharmony_ci if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci /* If _paranoid == 1 then on Gfx8+ we won't be able to access OA 733bf215546Sopenharmony_ci * metrics unless running as root. 734bf215546Sopenharmony_ci */ 735bf215546Sopenharmony_ci if (devinfo->platform == INTEL_PLATFORM_HSW) 736bf215546Sopenharmony_ci i915_perf_oa_available = true; 737bf215546Sopenharmony_ci else { 738bf215546Sopenharmony_ci uint64_t paranoid = 1; 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci if (paranoid == 0 || geteuid() == 0) 743bf215546Sopenharmony_ci i915_perf_oa_available = true; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci perf->platform_supported = oa_register != NULL; 747bf215546Sopenharmony_ci } 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci return i915_perf_oa_available && 750bf215546Sopenharmony_ci oa_register && 751bf215546Sopenharmony_ci get_sysfs_dev_dir(perf, fd) && 752bf215546Sopenharmony_ci init_oa_sys_vars(perf, use_register_snapshots); 753bf215546Sopenharmony_ci} 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_cistatic void 756bf215546Sopenharmony_ciload_oa_metrics(struct intel_perf_config *perf, int fd, 757bf215546Sopenharmony_ci const struct intel_device_info *devinfo) 758bf215546Sopenharmony_ci{ 759bf215546Sopenharmony_ci int existing_queries = perf->n_queries; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci perf->oa_metrics_table = 764bf215546Sopenharmony_ci _mesa_hash_table_create(perf, _mesa_hash_string, 765bf215546Sopenharmony_ci _mesa_key_string_equal); 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci /* Index all the metric sets mesa knows about before looking to see what 768bf215546Sopenharmony_ci * the kernel is advertising. 769bf215546Sopenharmony_ci */ 770bf215546Sopenharmony_ci oa_register(perf); 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) { 773bf215546Sopenharmony_ci if (kernel_has_dynamic_config_support(perf, fd)) 774bf215546Sopenharmony_ci init_oa_configs(perf, fd, devinfo); 775bf215546Sopenharmony_ci else 776bf215546Sopenharmony_ci enumerate_sysfs_metrics(perf, devinfo); 777bf215546Sopenharmony_ci } else { 778bf215546Sopenharmony_ci add_all_metrics(perf, devinfo); 779bf215546Sopenharmony_ci } 780bf215546Sopenharmony_ci 781bf215546Sopenharmony_ci /* sort counters in each individual group created by this function by name */ 782bf215546Sopenharmony_ci for (int i = existing_queries; i < perf->n_queries; ++i) 783bf215546Sopenharmony_ci sort_query(&perf->queries[i]); 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci /* Select a fallback OA metric. Look for the TestOa metric or use the last 786bf215546Sopenharmony_ci * one if no present (on HSW). 787bf215546Sopenharmony_ci */ 788bf215546Sopenharmony_ci for (int i = existing_queries; i < perf->n_queries; i++) { 789bf215546Sopenharmony_ci if (perf->queries[i].symbol_name && 790bf215546Sopenharmony_ci strcmp(perf->queries[i].symbol_name, "TestOa") == 0) { 791bf215546Sopenharmony_ci perf->fallback_raw_oa_metric = perf->queries[i].oa_metrics_set_id; 792bf215546Sopenharmony_ci break; 793bf215546Sopenharmony_ci } 794bf215546Sopenharmony_ci } 795bf215546Sopenharmony_ci if (perf->fallback_raw_oa_metric == 0 && perf->n_queries > 0) 796bf215546Sopenharmony_ci perf->fallback_raw_oa_metric = perf->queries[perf->n_queries - 1].oa_metrics_set_id; 797bf215546Sopenharmony_ci} 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_cistruct intel_perf_registers * 800bf215546Sopenharmony_ciintel_perf_load_configuration(struct intel_perf_config *perf_cfg, int fd, const char *guid) 801bf215546Sopenharmony_ci{ 802bf215546Sopenharmony_ci if (!perf_cfg->i915_query_supported) 803bf215546Sopenharmony_ci return NULL; 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci struct drm_i915_perf_oa_config i915_config = { 0, }; 806bf215546Sopenharmony_ci if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) 807bf215546Sopenharmony_ci return NULL; 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci struct intel_perf_registers *config = rzalloc(NULL, struct intel_perf_registers); 810bf215546Sopenharmony_ci config->n_flex_regs = i915_config.n_flex_regs; 811bf215546Sopenharmony_ci config->flex_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_flex_regs); 812bf215546Sopenharmony_ci config->n_mux_regs = i915_config.n_mux_regs; 813bf215546Sopenharmony_ci config->mux_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_mux_regs); 814bf215546Sopenharmony_ci config->n_b_counter_regs = i915_config.n_boolean_regs; 815bf215546Sopenharmony_ci config->b_counter_regs = rzalloc_array(config, struct intel_perf_query_register_prog, config->n_b_counter_regs); 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_ci /* 818bf215546Sopenharmony_ci * struct intel_perf_query_register_prog maps exactly to the tuple of 819bf215546Sopenharmony_ci * (register offset, register value) returned by the i915. 820bf215546Sopenharmony_ci */ 821bf215546Sopenharmony_ci i915_config.flex_regs_ptr = to_const_user_pointer(config->flex_regs); 822bf215546Sopenharmony_ci i915_config.mux_regs_ptr = to_const_user_pointer(config->mux_regs); 823bf215546Sopenharmony_ci i915_config.boolean_regs_ptr = to_const_user_pointer(config->b_counter_regs); 824bf215546Sopenharmony_ci if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) { 825bf215546Sopenharmony_ci ralloc_free(config); 826bf215546Sopenharmony_ci return NULL; 827bf215546Sopenharmony_ci } 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci return config; 830bf215546Sopenharmony_ci} 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ciuint64_t 833bf215546Sopenharmony_ciintel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd, 834bf215546Sopenharmony_ci const struct intel_perf_registers *config, 835bf215546Sopenharmony_ci const char *guid) 836bf215546Sopenharmony_ci{ 837bf215546Sopenharmony_ci if (guid) 838bf215546Sopenharmony_ci return i915_add_config(perf_cfg, fd, config, guid); 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci struct mesa_sha1 sha1_ctx; 841bf215546Sopenharmony_ci _mesa_sha1_init(&sha1_ctx); 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci if (config->flex_regs) { 844bf215546Sopenharmony_ci _mesa_sha1_update(&sha1_ctx, config->flex_regs, 845bf215546Sopenharmony_ci sizeof(config->flex_regs[0]) * 846bf215546Sopenharmony_ci config->n_flex_regs); 847bf215546Sopenharmony_ci } 848bf215546Sopenharmony_ci if (config->mux_regs) { 849bf215546Sopenharmony_ci _mesa_sha1_update(&sha1_ctx, config->mux_regs, 850bf215546Sopenharmony_ci sizeof(config->mux_regs[0]) * 851bf215546Sopenharmony_ci config->n_mux_regs); 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci if (config->b_counter_regs) { 854bf215546Sopenharmony_ci _mesa_sha1_update(&sha1_ctx, config->b_counter_regs, 855bf215546Sopenharmony_ci sizeof(config->b_counter_regs[0]) * 856bf215546Sopenharmony_ci config->n_b_counter_regs); 857bf215546Sopenharmony_ci } 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci uint8_t hash[20]; 860bf215546Sopenharmony_ci _mesa_sha1_final(&sha1_ctx, hash); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci char formatted_hash[41]; 863bf215546Sopenharmony_ci _mesa_sha1_format(formatted_hash, hash); 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci char generated_guid[37]; 866bf215546Sopenharmony_ci snprintf(generated_guid, sizeof(generated_guid), 867bf215546Sopenharmony_ci "%.8s-%.4s-%.4s-%.4s-%.12s", 868bf215546Sopenharmony_ci &formatted_hash[0], &formatted_hash[8], 869bf215546Sopenharmony_ci &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4], 870bf215546Sopenharmony_ci &formatted_hash[8 + 4 + 4 + 4]); 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci /* Check if already present. */ 873bf215546Sopenharmony_ci uint64_t id; 874bf215546Sopenharmony_ci if (intel_perf_load_metric_id(perf_cfg, generated_guid, &id)) 875bf215546Sopenharmony_ci return id; 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci return i915_add_config(perf_cfg, fd, config, generated_guid); 878bf215546Sopenharmony_ci} 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_cistatic uint64_t 881bf215546Sopenharmony_ciget_passes_mask(struct intel_perf_config *perf, 882bf215546Sopenharmony_ci const uint32_t *counter_indices, 883bf215546Sopenharmony_ci uint32_t counter_indices_count) 884bf215546Sopenharmony_ci{ 885bf215546Sopenharmony_ci uint64_t queries_mask = 0; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci assert(perf->n_queries < 64); 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci /* Compute the number of passes by going through all counters N times (with 890bf215546Sopenharmony_ci * N the number of queries) to make sure we select the most constraining 891bf215546Sopenharmony_ci * counters first and look at the more flexible ones (that could be 892bf215546Sopenharmony_ci * obtained from multiple queries) later. That way we minimize the number 893bf215546Sopenharmony_ci * of passes required. 894bf215546Sopenharmony_ci */ 895bf215546Sopenharmony_ci for (uint32_t q = 0; q < perf->n_queries; q++) { 896bf215546Sopenharmony_ci for (uint32_t i = 0; i < counter_indices_count; i++) { 897bf215546Sopenharmony_ci assert(counter_indices[i] < perf->n_counters); 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci uint32_t idx = counter_indices[i]; 900bf215546Sopenharmony_ci if (util_bitcount64(perf->counter_infos[idx].query_mask) != (q + 1)) 901bf215546Sopenharmony_ci continue; 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci if (queries_mask & perf->counter_infos[idx].query_mask) 904bf215546Sopenharmony_ci continue; 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci queries_mask |= BITFIELD64_BIT(ffsll(perf->counter_infos[idx].query_mask) - 1); 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci } 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_ci return queries_mask; 911bf215546Sopenharmony_ci} 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ciuint32_t 914bf215546Sopenharmony_ciintel_perf_get_n_passes(struct intel_perf_config *perf, 915bf215546Sopenharmony_ci const uint32_t *counter_indices, 916bf215546Sopenharmony_ci uint32_t counter_indices_count, 917bf215546Sopenharmony_ci struct intel_perf_query_info **pass_queries) 918bf215546Sopenharmony_ci{ 919bf215546Sopenharmony_ci uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count); 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci if (pass_queries) { 922bf215546Sopenharmony_ci uint32_t pass = 0; 923bf215546Sopenharmony_ci for (uint32_t q = 0; q < perf->n_queries; q++) { 924bf215546Sopenharmony_ci if ((1ULL << q) & queries_mask) 925bf215546Sopenharmony_ci pass_queries[pass++] = &perf->queries[q]; 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci } 928bf215546Sopenharmony_ci 929bf215546Sopenharmony_ci return util_bitcount64(queries_mask); 930bf215546Sopenharmony_ci} 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_civoid 933bf215546Sopenharmony_ciintel_perf_get_counters_passes(struct intel_perf_config *perf, 934bf215546Sopenharmony_ci const uint32_t *counter_indices, 935bf215546Sopenharmony_ci uint32_t counter_indices_count, 936bf215546Sopenharmony_ci struct intel_perf_counter_pass *counter_pass) 937bf215546Sopenharmony_ci{ 938bf215546Sopenharmony_ci uint64_t queries_mask = get_passes_mask(perf, counter_indices, counter_indices_count); 939bf215546Sopenharmony_ci ASSERTED uint32_t n_passes = util_bitcount64(queries_mask); 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci for (uint32_t i = 0; i < counter_indices_count; i++) { 942bf215546Sopenharmony_ci assert(counter_indices[i] < perf->n_counters); 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci uint32_t idx = counter_indices[i]; 945bf215546Sopenharmony_ci counter_pass[i].counter = perf->counter_infos[idx].counter; 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci uint32_t query_idx = ffsll(perf->counter_infos[idx].query_mask & queries_mask) - 1; 948bf215546Sopenharmony_ci counter_pass[i].query = &perf->queries[query_idx]; 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci uint32_t clear_bits = 63 - query_idx; 951bf215546Sopenharmony_ci counter_pass[i].pass = util_bitcount64((queries_mask << clear_bits) >> clear_bits) - 1; 952bf215546Sopenharmony_ci assert(counter_pass[i].pass < n_passes); 953bf215546Sopenharmony_ci } 954bf215546Sopenharmony_ci} 955bf215546Sopenharmony_ci 956bf215546Sopenharmony_ci/* Accumulate 32bits OA counters */ 957bf215546Sopenharmony_cistatic inline void 958bf215546Sopenharmony_ciaccumulate_uint32(const uint32_t *report0, 959bf215546Sopenharmony_ci const uint32_t *report1, 960bf215546Sopenharmony_ci uint64_t *accumulator) 961bf215546Sopenharmony_ci{ 962bf215546Sopenharmony_ci *accumulator += (uint32_t)(*report1 - *report0); 963bf215546Sopenharmony_ci} 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci/* Accumulate 40bits OA counters */ 966bf215546Sopenharmony_cistatic inline void 967bf215546Sopenharmony_ciaccumulate_uint40(int a_index, 968bf215546Sopenharmony_ci const uint32_t *report0, 969bf215546Sopenharmony_ci const uint32_t *report1, 970bf215546Sopenharmony_ci uint64_t *accumulator) 971bf215546Sopenharmony_ci{ 972bf215546Sopenharmony_ci const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); 973bf215546Sopenharmony_ci const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); 974bf215546Sopenharmony_ci uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; 975bf215546Sopenharmony_ci uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; 976bf215546Sopenharmony_ci uint64_t value0 = report0[a_index + 4] | high0; 977bf215546Sopenharmony_ci uint64_t value1 = report1[a_index + 4] | high1; 978bf215546Sopenharmony_ci uint64_t delta; 979bf215546Sopenharmony_ci 980bf215546Sopenharmony_ci if (value0 > value1) 981bf215546Sopenharmony_ci delta = (1ULL << 40) + value1 - value0; 982bf215546Sopenharmony_ci else 983bf215546Sopenharmony_ci delta = value1 - value0; 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ci *accumulator += delta; 986bf215546Sopenharmony_ci} 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_cistatic void 989bf215546Sopenharmony_cigfx8_read_report_clock_ratios(const uint32_t *report, 990bf215546Sopenharmony_ci uint64_t *slice_freq_hz, 991bf215546Sopenharmony_ci uint64_t *unslice_freq_hz) 992bf215546Sopenharmony_ci{ 993bf215546Sopenharmony_ci /* The lower 16bits of the RPT_ID field of the OA reports contains a 994bf215546Sopenharmony_ci * snapshot of the bits coming from the RP_FREQ_NORMAL register and is 995bf215546Sopenharmony_ci * divided this way : 996bf215546Sopenharmony_ci * 997bf215546Sopenharmony_ci * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency) 998bf215546Sopenharmony_ci * RPT_ID[10:9]: RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency) 999bf215546Sopenharmony_ci * RPT_ID[8:0]: RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency) 1000bf215546Sopenharmony_ci * 1001bf215546Sopenharmony_ci * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request 1002bf215546Sopenharmony_ci * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) 1003bf215546Sopenharmony_ci * 1004bf215546Sopenharmony_ci * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request 1005bf215546Sopenharmony_ci * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) 1006bf215546Sopenharmony_ci */ 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci uint32_t unslice_freq = report[0] & 0x1ff; 1009bf215546Sopenharmony_ci uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; 1010bf215546Sopenharmony_ci uint32_t slice_freq_high = (report[0] >> 9) & 0x3; 1011bf215546Sopenharmony_ci uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci *slice_freq_hz = slice_freq * 16666667ULL; 1014bf215546Sopenharmony_ci *unslice_freq_hz = unslice_freq * 16666667ULL; 1015bf215546Sopenharmony_ci} 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_civoid 1018bf215546Sopenharmony_ciintel_perf_query_result_read_frequencies(struct intel_perf_query_result *result, 1019bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 1020bf215546Sopenharmony_ci const uint32_t *start, 1021bf215546Sopenharmony_ci const uint32_t *end) 1022bf215546Sopenharmony_ci{ 1023bf215546Sopenharmony_ci /* Slice/Unslice frequency is only available in the OA reports when the 1024bf215546Sopenharmony_ci * "Disable OA reports due to clock ratio change" field in 1025bf215546Sopenharmony_ci * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this 1026bf215546Sopenharmony_ci * global register (see drivers/gpu/drm/i915/i915_perf.c) 1027bf215546Sopenharmony_ci * 1028bf215546Sopenharmony_ci * Documentation says this should be available on Gfx9+ but experimentation 1029bf215546Sopenharmony_ci * shows that Gfx8 reports similar values, so we enable it there too. 1030bf215546Sopenharmony_ci */ 1031bf215546Sopenharmony_ci if (devinfo->ver < 8) 1032bf215546Sopenharmony_ci return; 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci gfx8_read_report_clock_ratios(start, 1035bf215546Sopenharmony_ci &result->slice_frequency[0], 1036bf215546Sopenharmony_ci &result->unslice_frequency[0]); 1037bf215546Sopenharmony_ci gfx8_read_report_clock_ratios(end, 1038bf215546Sopenharmony_ci &result->slice_frequency[1], 1039bf215546Sopenharmony_ci &result->unslice_frequency[1]); 1040bf215546Sopenharmony_ci} 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_cistatic inline bool 1043bf215546Sopenharmony_cican_use_mi_rpc_bc_counters(const struct intel_device_info *devinfo) 1044bf215546Sopenharmony_ci{ 1045bf215546Sopenharmony_ci return devinfo->ver <= 11; 1046bf215546Sopenharmony_ci} 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ciuint64_t 1049bf215546Sopenharmony_ciintel_perf_report_timestamp(const struct intel_perf_query_info *query, 1050bf215546Sopenharmony_ci const uint32_t *report) 1051bf215546Sopenharmony_ci{ 1052bf215546Sopenharmony_ci return report[1] >> query->perf->oa_timestamp_shift; 1053bf215546Sopenharmony_ci} 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_civoid 1056bf215546Sopenharmony_ciintel_perf_query_result_accumulate(struct intel_perf_query_result *result, 1057bf215546Sopenharmony_ci const struct intel_perf_query_info *query, 1058bf215546Sopenharmony_ci const uint32_t *start, 1059bf215546Sopenharmony_ci const uint32_t *end) 1060bf215546Sopenharmony_ci{ 1061bf215546Sopenharmony_ci int i; 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci if (result->hw_id == INTEL_PERF_INVALID_CTX_ID && 1064bf215546Sopenharmony_ci start[2] != INTEL_PERF_INVALID_CTX_ID) 1065bf215546Sopenharmony_ci result->hw_id = start[2]; 1066bf215546Sopenharmony_ci if (result->reports_accumulated == 0) 1067bf215546Sopenharmony_ci result->begin_timestamp = intel_perf_report_timestamp(query, start); 1068bf215546Sopenharmony_ci result->end_timestamp = intel_perf_report_timestamp(query, end); 1069bf215546Sopenharmony_ci result->reports_accumulated++; 1070bf215546Sopenharmony_ci 1071bf215546Sopenharmony_ci switch (query->oa_format) { 1072bf215546Sopenharmony_ci case I915_OA_FORMAT_A32u40_A4u32_B8_C8: 1073bf215546Sopenharmony_ci result->accumulator[query->gpu_time_offset] = 1074bf215546Sopenharmony_ci intel_perf_report_timestamp(query, end) - 1075bf215546Sopenharmony_ci intel_perf_report_timestamp(query, start); 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci accumulate_uint32(start + 3, end + 3, 1078bf215546Sopenharmony_ci result->accumulator + query->gpu_clock_offset); /* clock */ 1079bf215546Sopenharmony_ci 1080bf215546Sopenharmony_ci /* 32x 40bit A counters... */ 1081bf215546Sopenharmony_ci for (i = 0; i < 32; i++) { 1082bf215546Sopenharmony_ci accumulate_uint40(i, start, end, 1083bf215546Sopenharmony_ci result->accumulator + query->a_offset + i); 1084bf215546Sopenharmony_ci } 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci /* 4x 32bit A counters... */ 1087bf215546Sopenharmony_ci for (i = 0; i < 4; i++) { 1088bf215546Sopenharmony_ci accumulate_uint32(start + 36 + i, end + 36 + i, 1089bf215546Sopenharmony_ci result->accumulator + query->a_offset + 32 + i); 1090bf215546Sopenharmony_ci } 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci if (can_use_mi_rpc_bc_counters(&query->perf->devinfo)) { 1093bf215546Sopenharmony_ci /* 8x 32bit B counters */ 1094bf215546Sopenharmony_ci for (i = 0; i < 8; i++) { 1095bf215546Sopenharmony_ci accumulate_uint32(start + 48 + i, end + 48 + i, 1096bf215546Sopenharmony_ci result->accumulator + query->b_offset + i); 1097bf215546Sopenharmony_ci } 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci /* 8x 32bit C counters... */ 1100bf215546Sopenharmony_ci for (i = 0; i < 8; i++) { 1101bf215546Sopenharmony_ci accumulate_uint32(start + 56 + i, end + 56 + i, 1102bf215546Sopenharmony_ci result->accumulator + query->c_offset + i); 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci break; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci case I915_OA_FORMAT_A45_B8_C8: 1108bf215546Sopenharmony_ci result->accumulator[query->gpu_time_offset] = 1109bf215546Sopenharmony_ci intel_perf_report_timestamp(query, end) - 1110bf215546Sopenharmony_ci intel_perf_report_timestamp(query, start); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci for (i = 0; i < 61; i++) { 1113bf215546Sopenharmony_ci accumulate_uint32(start + 3 + i, end + 3 + i, 1114bf215546Sopenharmony_ci result->accumulator + query->a_offset + i); 1115bf215546Sopenharmony_ci } 1116bf215546Sopenharmony_ci break; 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ci default: 1119bf215546Sopenharmony_ci unreachable("Can't accumulate OA counters in unknown format"); 1120bf215546Sopenharmony_ci } 1121bf215546Sopenharmony_ci 1122bf215546Sopenharmony_ci} 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_civoid 1127bf215546Sopenharmony_ciintel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result, 1128bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 1129bf215546Sopenharmony_ci const uint32_t start, 1130bf215546Sopenharmony_ci const uint32_t end) 1131bf215546Sopenharmony_ci{ 1132bf215546Sopenharmony_ci switch (devinfo->ver) { 1133bf215546Sopenharmony_ci case 7: 1134bf215546Sopenharmony_ci case 8: 1135bf215546Sopenharmony_ci result->gt_frequency[0] = GET_FIELD(start, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL; 1136bf215546Sopenharmony_ci result->gt_frequency[1] = GET_FIELD(end, GFX7_RPSTAT1_CURR_GT_FREQ) * 50ULL; 1137bf215546Sopenharmony_ci break; 1138bf215546Sopenharmony_ci case 9: 1139bf215546Sopenharmony_ci case 11: 1140bf215546Sopenharmony_ci case 12: 1141bf215546Sopenharmony_ci result->gt_frequency[0] = GET_FIELD(start, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; 1142bf215546Sopenharmony_ci result->gt_frequency[1] = GET_FIELD(end, GFX9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL; 1143bf215546Sopenharmony_ci break; 1144bf215546Sopenharmony_ci default: 1145bf215546Sopenharmony_ci unreachable("unexpected gen"); 1146bf215546Sopenharmony_ci } 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci /* Put the numbers into Hz. */ 1149bf215546Sopenharmony_ci result->gt_frequency[0] *= 1000000ULL; 1150bf215546Sopenharmony_ci result->gt_frequency[1] *= 1000000ULL; 1151bf215546Sopenharmony_ci} 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_civoid 1154bf215546Sopenharmony_ciintel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result, 1155bf215546Sopenharmony_ci const struct intel_perf_query_info *query, 1156bf215546Sopenharmony_ci const uint64_t *start, 1157bf215546Sopenharmony_ci const uint64_t *end) 1158bf215546Sopenharmony_ci{ 1159bf215546Sopenharmony_ci for (uint32_t i = 0; i < 2; i++) { 1160bf215546Sopenharmony_ci uint64_t v0 = start[i] & PERF_CNT_VALUE_MASK; 1161bf215546Sopenharmony_ci uint64_t v1 = end[i] & PERF_CNT_VALUE_MASK; 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci result->accumulator[query->perfcnt_offset + i] = v0 > v1 ? 1164bf215546Sopenharmony_ci (PERF_CNT_VALUE_MASK + 1 + v1 - v0) : 1165bf215546Sopenharmony_ci (v1 - v0); 1166bf215546Sopenharmony_ci } 1167bf215546Sopenharmony_ci} 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_cistatic uint32_t 1170bf215546Sopenharmony_ciquery_accumulator_offset(const struct intel_perf_query_info *query, 1171bf215546Sopenharmony_ci enum intel_perf_query_field_type type, 1172bf215546Sopenharmony_ci uint8_t index) 1173bf215546Sopenharmony_ci{ 1174bf215546Sopenharmony_ci switch (type) { 1175bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT: 1176bf215546Sopenharmony_ci return query->perfcnt_offset + index; 1177bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: 1178bf215546Sopenharmony_ci return query->a_offset + index; 1179bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: 1180bf215546Sopenharmony_ci return query->b_offset + index; 1181bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: 1182bf215546Sopenharmony_ci return query->c_offset + index; 1183bf215546Sopenharmony_ci default: 1184bf215546Sopenharmony_ci unreachable("Invalid register type"); 1185bf215546Sopenharmony_ci return 0; 1186bf215546Sopenharmony_ci } 1187bf215546Sopenharmony_ci} 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_civoid 1190bf215546Sopenharmony_ciintel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result, 1191bf215546Sopenharmony_ci const struct intel_perf_query_info *query, 1192bf215546Sopenharmony_ci const void *start, 1193bf215546Sopenharmony_ci const void *end, 1194bf215546Sopenharmony_ci bool no_oa_accumulate) 1195bf215546Sopenharmony_ci{ 1196bf215546Sopenharmony_ci const struct intel_perf_query_field_layout *layout = &query->perf->query_layout; 1197bf215546Sopenharmony_ci const struct intel_device_info *devinfo = &query->perf->devinfo; 1198bf215546Sopenharmony_ci 1199bf215546Sopenharmony_ci for (uint32_t r = 0; r < layout->n_fields; r++) { 1200bf215546Sopenharmony_ci const struct intel_perf_query_field *field = &layout->fields[r]; 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) { 1203bf215546Sopenharmony_ci intel_perf_query_result_read_frequencies(result, devinfo, 1204bf215546Sopenharmony_ci start + field->location, 1205bf215546Sopenharmony_ci end + field->location); 1206bf215546Sopenharmony_ci /* no_oa_accumulate=true is used when doing GL perf queries, we 1207bf215546Sopenharmony_ci * manually parse the OA reports from the OA buffer and subtract 1208bf215546Sopenharmony_ci * unrelated deltas, so don't accumulate the begin/end reports here. 1209bf215546Sopenharmony_ci */ 1210bf215546Sopenharmony_ci if (!no_oa_accumulate) { 1211bf215546Sopenharmony_ci intel_perf_query_result_accumulate(result, query, 1212bf215546Sopenharmony_ci start + field->location, 1213bf215546Sopenharmony_ci end + field->location); 1214bf215546Sopenharmony_ci } 1215bf215546Sopenharmony_ci } else { 1216bf215546Sopenharmony_ci uint64_t v0, v1; 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci if (field->size == 4) { 1219bf215546Sopenharmony_ci v0 = *(const uint32_t *)(start + field->location); 1220bf215546Sopenharmony_ci v1 = *(const uint32_t *)(end + field->location); 1221bf215546Sopenharmony_ci } else { 1222bf215546Sopenharmony_ci assert(field->size == 8); 1223bf215546Sopenharmony_ci v0 = *(const uint64_t *)(start + field->location); 1224bf215546Sopenharmony_ci v1 = *(const uint64_t *)(end + field->location); 1225bf215546Sopenharmony_ci } 1226bf215546Sopenharmony_ci 1227bf215546Sopenharmony_ci if (field->mask) { 1228bf215546Sopenharmony_ci v0 = field->mask & v0; 1229bf215546Sopenharmony_ci v1 = field->mask & v1; 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci /* RPSTAT is a bit of a special case because its begin/end values 1233bf215546Sopenharmony_ci * represent frequencies. We store it in a separate location. 1234bf215546Sopenharmony_ci */ 1235bf215546Sopenharmony_ci if (field->type == INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT) 1236bf215546Sopenharmony_ci intel_perf_query_result_read_gt_frequency(result, devinfo, v0, v1); 1237bf215546Sopenharmony_ci else 1238bf215546Sopenharmony_ci result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0; 1239bf215546Sopenharmony_ci } 1240bf215546Sopenharmony_ci } 1241bf215546Sopenharmony_ci} 1242bf215546Sopenharmony_ci 1243bf215546Sopenharmony_civoid 1244bf215546Sopenharmony_ciintel_perf_query_result_clear(struct intel_perf_query_result *result) 1245bf215546Sopenharmony_ci{ 1246bf215546Sopenharmony_ci memset(result, 0, sizeof(*result)); 1247bf215546Sopenharmony_ci result->hw_id = INTEL_PERF_INVALID_CTX_ID; 1248bf215546Sopenharmony_ci} 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_civoid 1251bf215546Sopenharmony_ciintel_perf_query_result_print_fields(const struct intel_perf_query_info *query, 1252bf215546Sopenharmony_ci const void *data) 1253bf215546Sopenharmony_ci{ 1254bf215546Sopenharmony_ci const struct intel_perf_query_field_layout *layout = &query->perf->query_layout; 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_ci for (uint32_t r = 0; r < layout->n_fields; r++) { 1257bf215546Sopenharmony_ci const struct intel_perf_query_field *field = &layout->fields[r]; 1258bf215546Sopenharmony_ci const uint32_t *value32 = data + field->location; 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ci switch (field->type) { 1261bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC: 1262bf215546Sopenharmony_ci fprintf(stderr, "MI_RPC:\n"); 1263bf215546Sopenharmony_ci fprintf(stderr, " TS: 0x%08x\n", *(value32 + 1)); 1264bf215546Sopenharmony_ci fprintf(stderr, " CLK: 0x%08x\n", *(value32 + 3)); 1265bf215546Sopenharmony_ci break; 1266bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: 1267bf215546Sopenharmony_ci fprintf(stderr, "A%u: 0x%08x\n", field->index, *value32); 1268bf215546Sopenharmony_ci break; 1269bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: 1270bf215546Sopenharmony_ci fprintf(stderr, "B%u: 0x%08x\n", field->index, *value32); 1271bf215546Sopenharmony_ci break; 1272bf215546Sopenharmony_ci case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: 1273bf215546Sopenharmony_ci fprintf(stderr, "C%u: 0x%08x\n", field->index, *value32); 1274bf215546Sopenharmony_ci break; 1275bf215546Sopenharmony_ci default: 1276bf215546Sopenharmony_ci break; 1277bf215546Sopenharmony_ci } 1278bf215546Sopenharmony_ci } 1279bf215546Sopenharmony_ci} 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_cistatic int 1282bf215546Sopenharmony_ciintel_perf_compare_query_names(const void *v1, const void *v2) 1283bf215546Sopenharmony_ci{ 1284bf215546Sopenharmony_ci const struct intel_perf_query_info *q1 = v1; 1285bf215546Sopenharmony_ci const struct intel_perf_query_info *q2 = v2; 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_ci return strcmp(q1->name, q2->name); 1288bf215546Sopenharmony_ci} 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_cistatic inline struct intel_perf_query_field * 1291bf215546Sopenharmony_ciadd_query_register(struct intel_perf_query_field_layout *layout, 1292bf215546Sopenharmony_ci enum intel_perf_query_field_type type, 1293bf215546Sopenharmony_ci uint16_t offset, 1294bf215546Sopenharmony_ci uint16_t size, 1295bf215546Sopenharmony_ci uint8_t index) 1296bf215546Sopenharmony_ci{ 1297bf215546Sopenharmony_ci /* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes 1298bf215546Sopenharmony_ci * (shows up nicely in the debugger). 1299bf215546Sopenharmony_ci */ 1300bf215546Sopenharmony_ci if (type == INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC) 1301bf215546Sopenharmony_ci layout->size = align(layout->size, 64); 1302bf215546Sopenharmony_ci else if (size % 8 == 0) 1303bf215546Sopenharmony_ci layout->size = align(layout->size, 8); 1304bf215546Sopenharmony_ci 1305bf215546Sopenharmony_ci layout->fields[layout->n_fields++] = (struct intel_perf_query_field) { 1306bf215546Sopenharmony_ci .mmio_offset = offset, 1307bf215546Sopenharmony_ci .location = layout->size, 1308bf215546Sopenharmony_ci .type = type, 1309bf215546Sopenharmony_ci .index = index, 1310bf215546Sopenharmony_ci .size = size, 1311bf215546Sopenharmony_ci }; 1312bf215546Sopenharmony_ci layout->size += size; 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci return &layout->fields[layout->n_fields - 1]; 1315bf215546Sopenharmony_ci} 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_cistatic void 1318bf215546Sopenharmony_ciintel_perf_init_query_fields(struct intel_perf_config *perf_cfg, 1319bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 1320bf215546Sopenharmony_ci bool use_register_snapshots) 1321bf215546Sopenharmony_ci{ 1322bf215546Sopenharmony_ci struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout; 1323bf215546Sopenharmony_ci 1324bf215546Sopenharmony_ci layout->n_fields = 0; 1325bf215546Sopenharmony_ci 1326bf215546Sopenharmony_ci /* MI_RPC requires a 64byte alignment. */ 1327bf215546Sopenharmony_ci layout->alignment = 64; 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci layout->fields = rzalloc_array(perf_cfg, struct intel_perf_query_field, 5 + 16); 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC, 1332bf215546Sopenharmony_ci 0, 256, 0); 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci if (use_register_snapshots) { 1335bf215546Sopenharmony_ci if (devinfo->ver <= 11) { 1336bf215546Sopenharmony_ci struct intel_perf_query_field *field = 1337bf215546Sopenharmony_ci add_query_register(layout, 1338bf215546Sopenharmony_ci INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, 1339bf215546Sopenharmony_ci PERF_CNT_1_DW0, 8, 0); 1340bf215546Sopenharmony_ci field->mask = PERF_CNT_VALUE_MASK; 1341bf215546Sopenharmony_ci 1342bf215546Sopenharmony_ci field = add_query_register(layout, 1343bf215546Sopenharmony_ci INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT, 1344bf215546Sopenharmony_ci PERF_CNT_2_DW0, 8, 1); 1345bf215546Sopenharmony_ci field->mask = PERF_CNT_VALUE_MASK; 1346bf215546Sopenharmony_ci } 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci if (devinfo->ver == 8 && devinfo->platform != INTEL_PLATFORM_CHV) { 1349bf215546Sopenharmony_ci add_query_register(layout, 1350bf215546Sopenharmony_ci INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, 1351bf215546Sopenharmony_ci GFX7_RPSTAT1, 4, 0); 1352bf215546Sopenharmony_ci } 1353bf215546Sopenharmony_ci 1354bf215546Sopenharmony_ci if (devinfo->ver >= 9) { 1355bf215546Sopenharmony_ci add_query_register(layout, 1356bf215546Sopenharmony_ci INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT, 1357bf215546Sopenharmony_ci GFX9_RPSTAT0, 4, 0); 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci if (!can_use_mi_rpc_bc_counters(devinfo)) { 1361bf215546Sopenharmony_ci if (devinfo->ver >= 8 && devinfo->ver <= 11) { 1362bf215546Sopenharmony_ci for (uint32_t i = 0; i < GFX8_N_OA_PERF_B32; i++) { 1363bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, 1364bf215546Sopenharmony_ci GFX8_OA_PERF_B32(i), 4, i); 1365bf215546Sopenharmony_ci } 1366bf215546Sopenharmony_ci for (uint32_t i = 0; i < GFX8_N_OA_PERF_C32; i++) { 1367bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, 1368bf215546Sopenharmony_ci GFX8_OA_PERF_C32(i), 4, i); 1369bf215546Sopenharmony_ci } 1370bf215546Sopenharmony_ci } else if (devinfo->verx10 == 120) { 1371bf215546Sopenharmony_ci for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) { 1372bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, 1373bf215546Sopenharmony_ci GFX12_OAG_PERF_B32(i), 4, i); 1374bf215546Sopenharmony_ci } 1375bf215546Sopenharmony_ci for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) { 1376bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, 1377bf215546Sopenharmony_ci GFX12_OAG_PERF_C32(i), 4, i); 1378bf215546Sopenharmony_ci } 1379bf215546Sopenharmony_ci } else if (devinfo->verx10 == 125) { 1380bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A, 1381bf215546Sopenharmony_ci GFX125_OAG_PERF_A36, 4, 36); 1382bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A, 1383bf215546Sopenharmony_ci GFX125_OAG_PERF_A37, 4, 37); 1384bf215546Sopenharmony_ci for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) { 1385bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, 1386bf215546Sopenharmony_ci GFX12_OAG_PERF_B32(i), 4, i); 1387bf215546Sopenharmony_ci } 1388bf215546Sopenharmony_ci for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) { 1389bf215546Sopenharmony_ci add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, 1390bf215546Sopenharmony_ci GFX12_OAG_PERF_C32(i), 4, i); 1391bf215546Sopenharmony_ci } 1392bf215546Sopenharmony_ci } 1393bf215546Sopenharmony_ci } 1394bf215546Sopenharmony_ci } 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_ci /* Align the whole package to 64bytes so that 2 snapshots can be put 1397bf215546Sopenharmony_ci * together without extract alignment for the user. 1398bf215546Sopenharmony_ci */ 1399bf215546Sopenharmony_ci layout->size = align(layout->size, 64); 1400bf215546Sopenharmony_ci} 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_civoid 1403bf215546Sopenharmony_ciintel_perf_init_metrics(struct intel_perf_config *perf_cfg, 1404bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 1405bf215546Sopenharmony_ci int drm_fd, 1406bf215546Sopenharmony_ci bool include_pipeline_statistics, 1407bf215546Sopenharmony_ci bool use_register_snapshots) 1408bf215546Sopenharmony_ci{ 1409bf215546Sopenharmony_ci intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots); 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci if (include_pipeline_statistics) { 1412bf215546Sopenharmony_ci load_pipeline_statistic_metrics(perf_cfg, devinfo); 1413bf215546Sopenharmony_ci intel_perf_register_mdapi_statistic_query(perf_cfg, devinfo); 1414bf215546Sopenharmony_ci } 1415bf215546Sopenharmony_ci 1416bf215546Sopenharmony_ci bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo, 1417bf215546Sopenharmony_ci use_register_snapshots); 1418bf215546Sopenharmony_ci if (oa_metrics) 1419bf215546Sopenharmony_ci load_oa_metrics(perf_cfg, drm_fd, devinfo); 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci /* sort query groups by name */ 1422bf215546Sopenharmony_ci qsort(perf_cfg->queries, perf_cfg->n_queries, 1423bf215546Sopenharmony_ci sizeof(perf_cfg->queries[0]), intel_perf_compare_query_names); 1424bf215546Sopenharmony_ci 1425bf215546Sopenharmony_ci build_unique_counter_list(perf_cfg); 1426bf215546Sopenharmony_ci 1427bf215546Sopenharmony_ci if (oa_metrics) 1428bf215546Sopenharmony_ci intel_perf_register_mdapi_oa_query(perf_cfg, devinfo); 1429bf215546Sopenharmony_ci} 1430