1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#ifndef INTEL_PERF_H
25bf215546Sopenharmony_ci#define INTEL_PERF_H
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <stdio.h>
28bf215546Sopenharmony_ci#include <stdbool.h>
29bf215546Sopenharmony_ci#include <stdint.h>
30bf215546Sopenharmony_ci#include <string.h>
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#if defined(MAJOR_IN_SYSMACROS)
33bf215546Sopenharmony_ci#include <sys/sysmacros.h>
34bf215546Sopenharmony_ci#elif defined(MAJOR_IN_MKDEV)
35bf215546Sopenharmony_ci#include <sys/mkdev.h>
36bf215546Sopenharmony_ci#endif
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#include "compiler/glsl/list.h"
39bf215546Sopenharmony_ci#include "dev/intel_device_info.h"
40bf215546Sopenharmony_ci#include "util/bitscan.h"
41bf215546Sopenharmony_ci#include "util/hash_table.h"
42bf215546Sopenharmony_ci#include "util/ralloc.h"
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci#include "drm-uapi/i915_drm.h"
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci#ifdef __cplusplus
47bf215546Sopenharmony_ciextern "C" {
48bf215546Sopenharmony_ci#endif
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistruct intel_perf_config;
51bf215546Sopenharmony_cistruct intel_perf_query_info;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci#define INTEL_PERF_INVALID_CTX_ID (0xffffffff)
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_cienum PACKED intel_perf_counter_type {
56bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_TYPE_EVENT,
57bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_TYPE_DURATION_NORM,
58bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_TYPE_DURATION_RAW,
59bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_TYPE_THROUGHPUT,
60bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_TYPE_RAW,
61bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_TYPE_TIMESTAMP,
62bf215546Sopenharmony_ci};
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cienum PACKED intel_perf_counter_data_type {
65bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_DATA_TYPE_BOOL32,
66bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_DATA_TYPE_UINT32,
67bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_DATA_TYPE_UINT64,
68bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_DATA_TYPE_FLOAT,
69bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE,
70bf215546Sopenharmony_ci};
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_cienum PACKED intel_perf_counter_units {
73bf215546Sopenharmony_ci   /* size */
74bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_BYTES,
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   /* frequency */
77bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_HZ,
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   /* time */
80bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_NS,
81bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_US,
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   /**/
84bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_PIXELS,
85bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_TEXELS,
86bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_THREADS,
87bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_PERCENT,
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   /* events */
90bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_MESSAGES,
91bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_NUMBER,
92bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_CYCLES,
93bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_EVENTS,
94bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_UTILIZATION,
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   /**/
97bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES,
98bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES,
99bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES,
100bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE,
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   INTEL_PERF_COUNTER_UNITS_MAX
103bf215546Sopenharmony_ci};
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_cistruct intel_pipeline_stat {
106bf215546Sopenharmony_ci   uint32_t reg;
107bf215546Sopenharmony_ci   uint32_t numerator;
108bf215546Sopenharmony_ci   uint32_t denominator;
109bf215546Sopenharmony_ci};
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci/*
112bf215546Sopenharmony_ci * The largest OA formats we can use include:
113bf215546Sopenharmony_ci * For Haswell:
114bf215546Sopenharmony_ci *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
115bf215546Sopenharmony_ci * For Gfx8+
116bf215546Sopenharmony_ci *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
117bf215546Sopenharmony_ci *
118bf215546Sopenharmony_ci * Plus 2 PERF_CNT registers and 1 RPSTAT register.
119bf215546Sopenharmony_ci */
120bf215546Sopenharmony_ci#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci/*
123bf215546Sopenharmony_ci * When currently allocate only one page for pipeline statistics queries. Here
124bf215546Sopenharmony_ci * we derived the maximum number of counters for that amount.
125bf215546Sopenharmony_ci */
126bf215546Sopenharmony_ci#define STATS_BO_SIZE               4096
127bf215546Sopenharmony_ci#define STATS_BO_END_OFFSET_BYTES   (STATS_BO_SIZE / 2)
128bf215546Sopenharmony_ci#define MAX_STAT_COUNTERS           (STATS_BO_END_OFFSET_BYTES / 8)
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci#define I915_PERF_OA_SAMPLE_SIZE (8 +   /* drm_i915_perf_record_header */ \
131bf215546Sopenharmony_ci                                  256)  /* OA counter report */
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_cistruct intel_perf_query_result {
134bf215546Sopenharmony_ci   /**
135bf215546Sopenharmony_ci    * Storage for the final accumulated OA counters.
136bf215546Sopenharmony_ci    */
137bf215546Sopenharmony_ci   uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   /**
140bf215546Sopenharmony_ci    * Hw ID used by the context on which the query was running.
141bf215546Sopenharmony_ci    */
142bf215546Sopenharmony_ci   uint32_t hw_id;
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   /**
145bf215546Sopenharmony_ci    * Number of reports accumulated to produce the results.
146bf215546Sopenharmony_ci    */
147bf215546Sopenharmony_ci   uint32_t reports_accumulated;
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   /**
150bf215546Sopenharmony_ci    * Frequency in the slices of the GT at the begin and end of the
151bf215546Sopenharmony_ci    * query.
152bf215546Sopenharmony_ci    */
153bf215546Sopenharmony_ci   uint64_t slice_frequency[2];
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   /**
156bf215546Sopenharmony_ci    * Frequency in the unslice of the GT at the begin and end of the
157bf215546Sopenharmony_ci    * query.
158bf215546Sopenharmony_ci    */
159bf215546Sopenharmony_ci   uint64_t unslice_frequency[2];
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   /**
162bf215546Sopenharmony_ci    * Frequency of the whole GT at the begin and end of the query.
163bf215546Sopenharmony_ci    */
164bf215546Sopenharmony_ci   uint64_t gt_frequency[2];
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   /**
167bf215546Sopenharmony_ci    * Timestamp of the query.
168bf215546Sopenharmony_ci    */
169bf215546Sopenharmony_ci   uint64_t begin_timestamp;
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   /**
172bf215546Sopenharmony_ci    * Timestamp of the query.
173bf215546Sopenharmony_ci    */
174bf215546Sopenharmony_ci   uint64_t end_timestamp;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   /**
177bf215546Sopenharmony_ci    * Whether the query was interrupted by another workload (aka preemption).
178bf215546Sopenharmony_ci    */
179bf215546Sopenharmony_ci   bool query_disjoint;
180bf215546Sopenharmony_ci};
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_citypedef uint64_t (*intel_counter_read_uint64_t)(struct intel_perf_config *perf,
183bf215546Sopenharmony_ci                                                const struct intel_perf_query_info *query,
184bf215546Sopenharmony_ci                                                const struct intel_perf_query_result *results);
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_citypedef float (*intel_counter_read_float_t)(struct intel_perf_config *perf,
187bf215546Sopenharmony_ci                                            const struct intel_perf_query_info *query,
188bf215546Sopenharmony_ci                                            const struct intel_perf_query_result *results);
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cistruct intel_perf_query_counter {
191bf215546Sopenharmony_ci   const char *name;
192bf215546Sopenharmony_ci   const char *desc;
193bf215546Sopenharmony_ci   const char *symbol_name;
194bf215546Sopenharmony_ci   const char *category;
195bf215546Sopenharmony_ci   enum intel_perf_counter_type type;
196bf215546Sopenharmony_ci   enum intel_perf_counter_data_type data_type;
197bf215546Sopenharmony_ci   enum intel_perf_counter_units units;
198bf215546Sopenharmony_ci   size_t offset;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   union {
201bf215546Sopenharmony_ci      intel_counter_read_uint64_t oa_counter_max_uint64;
202bf215546Sopenharmony_ci      intel_counter_read_float_t  oa_counter_max_float;
203bf215546Sopenharmony_ci   };
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   union {
206bf215546Sopenharmony_ci      intel_counter_read_uint64_t oa_counter_read_uint64;
207bf215546Sopenharmony_ci      intel_counter_read_float_t  oa_counter_read_float;
208bf215546Sopenharmony_ci      struct intel_pipeline_stat pipeline_stat;
209bf215546Sopenharmony_ci   };
210bf215546Sopenharmony_ci};
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_cistruct intel_perf_query_register_prog {
213bf215546Sopenharmony_ci   uint32_t reg;
214bf215546Sopenharmony_ci   uint32_t val;
215bf215546Sopenharmony_ci};
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci/* Register programming for a given query */
218bf215546Sopenharmony_cistruct intel_perf_registers {
219bf215546Sopenharmony_ci   const struct intel_perf_query_register_prog *flex_regs;
220bf215546Sopenharmony_ci   uint32_t n_flex_regs;
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   const struct intel_perf_query_register_prog *mux_regs;
223bf215546Sopenharmony_ci   uint32_t n_mux_regs;
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   const struct intel_perf_query_register_prog *b_counter_regs;
226bf215546Sopenharmony_ci   uint32_t n_b_counter_regs;
227bf215546Sopenharmony_ci};
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistruct intel_perf_query_info {
230bf215546Sopenharmony_ci   struct intel_perf_config *perf;
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ci   enum intel_perf_query_type {
233bf215546Sopenharmony_ci      INTEL_PERF_QUERY_TYPE_OA,
234bf215546Sopenharmony_ci      INTEL_PERF_QUERY_TYPE_RAW,
235bf215546Sopenharmony_ci      INTEL_PERF_QUERY_TYPE_PIPELINE,
236bf215546Sopenharmony_ci   } kind;
237bf215546Sopenharmony_ci   const char *name;
238bf215546Sopenharmony_ci   const char *symbol_name;
239bf215546Sopenharmony_ci   const char *guid;
240bf215546Sopenharmony_ci   struct intel_perf_query_counter *counters;
241bf215546Sopenharmony_ci   int n_counters;
242bf215546Sopenharmony_ci   int max_counters;
243bf215546Sopenharmony_ci   size_t data_size;
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   /* OA specific */
246bf215546Sopenharmony_ci   uint64_t oa_metrics_set_id;
247bf215546Sopenharmony_ci   int oa_format;
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci   /* For indexing into the accumulator[] ... */
250bf215546Sopenharmony_ci   int gpu_time_offset;
251bf215546Sopenharmony_ci   int gpu_clock_offset;
252bf215546Sopenharmony_ci   int a_offset;
253bf215546Sopenharmony_ci   int b_offset;
254bf215546Sopenharmony_ci   int c_offset;
255bf215546Sopenharmony_ci   int perfcnt_offset;
256bf215546Sopenharmony_ci   int rpstat_offset;
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   struct intel_perf_registers config;
259bf215546Sopenharmony_ci};
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci/* When not using the MI_RPC command, this structure describes the list of
262bf215546Sopenharmony_ci * register offsets as well as their storage location so that they can be
263bf215546Sopenharmony_ci * stored through a series of MI_SRM commands and accumulated with
264bf215546Sopenharmony_ci * intel_perf_query_result_accumulate_snapshots().
265bf215546Sopenharmony_ci */
266bf215546Sopenharmony_cistruct intel_perf_query_field_layout {
267bf215546Sopenharmony_ci   /* Alignment for the layout */
268bf215546Sopenharmony_ci   uint32_t alignment;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci   /* Size of the whole layout */
271bf215546Sopenharmony_ci   uint32_t size;
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   uint32_t n_fields;
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   struct intel_perf_query_field {
276bf215546Sopenharmony_ci      /* MMIO location of this register */
277bf215546Sopenharmony_ci      uint16_t mmio_offset;
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci      /* Location of this register in the storage */
280bf215546Sopenharmony_ci      uint16_t location;
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci      /* Type of register, for accumulation (see intel_perf_query_info:*_offset
283bf215546Sopenharmony_ci       * fields)
284bf215546Sopenharmony_ci       */
285bf215546Sopenharmony_ci      enum intel_perf_query_field_type {
286bf215546Sopenharmony_ci         INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC,
287bf215546Sopenharmony_ci         INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
288bf215546Sopenharmony_ci         INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
289bf215546Sopenharmony_ci         INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A,
290bf215546Sopenharmony_ci         INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
291bf215546Sopenharmony_ci         INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
292bf215546Sopenharmony_ci      } type;
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci      /* Index of register in the given type (for instance A31 or B2,
295bf215546Sopenharmony_ci       * etc...)
296bf215546Sopenharmony_ci       */
297bf215546Sopenharmony_ci      uint8_t index;
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci      /* 4, 8 or 256 */
300bf215546Sopenharmony_ci      uint16_t size;
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci      /* If not 0, mask to apply to the register value. */
303bf215546Sopenharmony_ci      uint64_t mask;
304bf215546Sopenharmony_ci   } *fields;
305bf215546Sopenharmony_ci};
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_cistruct intel_perf_query_counter_info {
308bf215546Sopenharmony_ci   struct intel_perf_query_counter *counter;
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   uint64_t query_mask;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   /**
313bf215546Sopenharmony_ci    * Each counter can be a part of many groups, each time at different index.
314bf215546Sopenharmony_ci    * This struct stores one of those locations.
315bf215546Sopenharmony_ci    */
316bf215546Sopenharmony_ci   struct {
317bf215546Sopenharmony_ci      int group_idx; /* query/group number */
318bf215546Sopenharmony_ci      int counter_idx; /* index inside of query/group */
319bf215546Sopenharmony_ci   } location;
320bf215546Sopenharmony_ci};
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_cistruct intel_perf_config {
323bf215546Sopenharmony_ci   /* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */
324bf215546Sopenharmony_ci   bool i915_query_supported;
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   /* Version of the i915-perf subsystem, refer to i915_drm.h. */
327bf215546Sopenharmony_ci   int i915_perf_version;
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci   /* Number of bits to shift the OA timestamp values by to match the ring
330bf215546Sopenharmony_ci    * timestamp.
331bf215546Sopenharmony_ci    */
332bf215546Sopenharmony_ci   int oa_timestamp_shift;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci   /* Mask of bits valid from the OA report (for instance you might have the
335bf215546Sopenharmony_ci    * lower 31 bits [30:0] of timestamp value). This is useful if you want to
336bf215546Sopenharmony_ci    * recombine a full timestamp value captured from the CPU with OA
337bf215546Sopenharmony_ci    * timestamps captured on the device but that only include 31bits of data.
338bf215546Sopenharmony_ci    */
339bf215546Sopenharmony_ci   uint64_t oa_timestamp_mask;
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci   /* Powergating configuration for the running the query. */
342bf215546Sopenharmony_ci   struct drm_i915_gem_context_param_sseu sseu;
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   struct intel_perf_query_info *queries;
345bf215546Sopenharmony_ci   int n_queries;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   struct intel_perf_query_counter_info *counter_infos;
348bf215546Sopenharmony_ci   int n_counters;
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   struct intel_perf_query_field_layout query_layout;
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci   /* Variables referenced in the XML meta data for OA performance
353bf215546Sopenharmony_ci    * counters, e.g in the normalization equations.
354bf215546Sopenharmony_ci    *
355bf215546Sopenharmony_ci    * All uint64_t for consistent operand types in generated code
356bf215546Sopenharmony_ci    */
357bf215546Sopenharmony_ci   struct {
358bf215546Sopenharmony_ci      uint64_t n_eus;               /** $EuCoresTotalCount */
359bf215546Sopenharmony_ci      uint64_t n_eu_slices;         /** $EuSlicesTotalCount */
360bf215546Sopenharmony_ci      uint64_t n_eu_sub_slices;     /** $EuSubslicesTotalCount */
361bf215546Sopenharmony_ci      uint64_t n_eu_slice0123;      /** $EuDualSubslicesSlice0123Count */
362bf215546Sopenharmony_ci      uint64_t slice_mask;          /** $SliceMask */
363bf215546Sopenharmony_ci      uint64_t subslice_mask;       /** $SubsliceMask */
364bf215546Sopenharmony_ci      uint64_t gt_min_freq;         /** $GpuMinFrequency */
365bf215546Sopenharmony_ci      uint64_t gt_max_freq;         /** $GpuMaxFrequency */
366bf215546Sopenharmony_ci      bool     query_mode;          /** $QueryMode */
367bf215546Sopenharmony_ci   } sys_vars;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   struct intel_device_info devinfo;
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   /* OA metric sets, indexed by GUID, as know by Mesa at build time, to
372bf215546Sopenharmony_ci    * cross-reference with the GUIDs of configs advertised by the kernel at
373bf215546Sopenharmony_ci    * runtime
374bf215546Sopenharmony_ci    */
375bf215546Sopenharmony_ci   struct hash_table *oa_metrics_table;
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci   /* When MDAPI hasn't configured the metric we need to use by the time the
378bf215546Sopenharmony_ci    * query begins, this OA metric is used as a fallback.
379bf215546Sopenharmony_ci    */
380bf215546Sopenharmony_ci   uint64_t fallback_raw_oa_metric;
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   /* Whether we have support for this platform. If true && n_queries == 0,
383bf215546Sopenharmony_ci    * this means we will not be able to use i915-perf because of it is in
384bf215546Sopenharmony_ci    * paranoid mode.
385bf215546Sopenharmony_ci    */
386bf215546Sopenharmony_ci   bool platform_supported;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   /* Location of the device's sysfs entry. */
389bf215546Sopenharmony_ci   char sysfs_dev_dir[256];
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci   struct {
392bf215546Sopenharmony_ci      void *(*bo_alloc)(void *bufmgr, const char *name, uint64_t size);
393bf215546Sopenharmony_ci      void (*bo_unreference)(void *bo);
394bf215546Sopenharmony_ci      void *(*bo_map)(void *ctx, void *bo, unsigned flags);
395bf215546Sopenharmony_ci      void (*bo_unmap)(void *bo);
396bf215546Sopenharmony_ci      bool (*batch_references)(void *batch, void *bo);
397bf215546Sopenharmony_ci      void (*bo_wait_rendering)(void *bo);
398bf215546Sopenharmony_ci      int (*bo_busy)(void *bo);
399bf215546Sopenharmony_ci      void (*emit_stall_at_pixel_scoreboard)(void *ctx);
400bf215546Sopenharmony_ci      void (*emit_mi_report_perf_count)(void *ctx,
401bf215546Sopenharmony_ci                                        void *bo,
402bf215546Sopenharmony_ci                                        uint32_t offset_in_bytes,
403bf215546Sopenharmony_ci                                        uint32_t report_id);
404bf215546Sopenharmony_ci      void (*batchbuffer_flush)(void *ctx,
405bf215546Sopenharmony_ci                                const char *file, int line);
406bf215546Sopenharmony_ci      void (*store_register_mem)(void *ctx, void *bo, uint32_t reg, uint32_t reg_size, uint32_t offset);
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   } vtbl;
409bf215546Sopenharmony_ci};
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_cistruct intel_perf_counter_pass {
412bf215546Sopenharmony_ci   struct intel_perf_query_info *query;
413bf215546Sopenharmony_ci   struct intel_perf_query_counter *counter;
414bf215546Sopenharmony_ci   uint32_t pass;
415bf215546Sopenharmony_ci};
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci/** Initialize the intel_perf_config object for a given device.
418bf215546Sopenharmony_ci *
419bf215546Sopenharmony_ci *    include_pipeline_statistics : Whether to add a pipeline statistic query
420bf215546Sopenharmony_ci *                                  intel_perf_query_info object
421bf215546Sopenharmony_ci *
422bf215546Sopenharmony_ci *    use_register_snapshots : Whether the queries should include counters
423bf215546Sopenharmony_ci *                             that rely on register snapshots using command
424bf215546Sopenharmony_ci *                             streamer instructions (not possible when using
425bf215546Sopenharmony_ci *                             only the OA buffer data).
426bf215546Sopenharmony_ci */
427bf215546Sopenharmony_civoid intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
428bf215546Sopenharmony_ci                             const struct intel_device_info *devinfo,
429bf215546Sopenharmony_ci                             int drm_fd,
430bf215546Sopenharmony_ci                             bool include_pipeline_statistics,
431bf215546Sopenharmony_ci                             bool use_register_snapshots);
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci/** Query i915 for a metric id using guid.
434bf215546Sopenharmony_ci */
435bf215546Sopenharmony_cibool intel_perf_load_metric_id(struct intel_perf_config *perf_cfg,
436bf215546Sopenharmony_ci                               const char *guid,
437bf215546Sopenharmony_ci                               uint64_t *metric_id);
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci/** Load a configuation's content from i915 using a guid.
440bf215546Sopenharmony_ci */
441bf215546Sopenharmony_cistruct intel_perf_registers *intel_perf_load_configuration(struct intel_perf_config *perf_cfg,
442bf215546Sopenharmony_ci                                                           int fd, const char *guid);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci/** Store a configuration into i915 using guid and return a new metric id.
445bf215546Sopenharmony_ci *
446bf215546Sopenharmony_ci * If guid is NULL, then a generated one will be provided by hashing the
447bf215546Sopenharmony_ci * content of the configuration.
448bf215546Sopenharmony_ci */
449bf215546Sopenharmony_ciuint64_t intel_perf_store_configuration(struct intel_perf_config *perf_cfg, int fd,
450bf215546Sopenharmony_ci                                        const struct intel_perf_registers *config,
451bf215546Sopenharmony_ci                                        const char *guid);
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_cistatic inline unsigned
454bf215546Sopenharmony_ciintel_perf_query_counter_info_first_query(const struct intel_perf_query_counter_info *counter_info)
455bf215546Sopenharmony_ci{
456bf215546Sopenharmony_ci   return ffsll(counter_info->query_mask);
457bf215546Sopenharmony_ci}
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci/** Read the slice/unslice frequency from 2 OA reports and store then into
460bf215546Sopenharmony_ci *  result.
461bf215546Sopenharmony_ci */
462bf215546Sopenharmony_civoid intel_perf_query_result_read_frequencies(struct intel_perf_query_result *result,
463bf215546Sopenharmony_ci                                              const struct intel_device_info *devinfo,
464bf215546Sopenharmony_ci                                              const uint32_t *start,
465bf215546Sopenharmony_ci                                              const uint32_t *end);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci/** Store the GT frequency as reported by the RPSTAT register.
468bf215546Sopenharmony_ci */
469bf215546Sopenharmony_civoid intel_perf_query_result_read_gt_frequency(struct intel_perf_query_result *result,
470bf215546Sopenharmony_ci                                               const struct intel_device_info *devinfo,
471bf215546Sopenharmony_ci                                               const uint32_t start,
472bf215546Sopenharmony_ci                                               const uint32_t end);
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci/** Store PERFCNT registers values.
475bf215546Sopenharmony_ci */
476bf215546Sopenharmony_civoid intel_perf_query_result_read_perfcnts(struct intel_perf_query_result *result,
477bf215546Sopenharmony_ci                                           const struct intel_perf_query_info *query,
478bf215546Sopenharmony_ci                                           const uint64_t *start,
479bf215546Sopenharmony_ci                                           const uint64_t *end);
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci/** Accumulate the delta between 2 OA reports into result for a given query.
482bf215546Sopenharmony_ci */
483bf215546Sopenharmony_civoid intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
484bf215546Sopenharmony_ci                                        const struct intel_perf_query_info *query,
485bf215546Sopenharmony_ci                                        const uint32_t *start,
486bf215546Sopenharmony_ci                                        const uint32_t *end);
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci/** Read the timestamp value in a report.
489bf215546Sopenharmony_ci */
490bf215546Sopenharmony_ciuint64_t intel_perf_report_timestamp(const struct intel_perf_query_info *query,
491bf215546Sopenharmony_ci                                     const uint32_t *report);
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_ci/** Accumulate the delta between 2 snapshots of OA perf registers (layout
494bf215546Sopenharmony_ci * should match description specified through intel_perf_query_register_layout).
495bf215546Sopenharmony_ci */
496bf215546Sopenharmony_civoid intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result,
497bf215546Sopenharmony_ci                                               const struct intel_perf_query_info *query,
498bf215546Sopenharmony_ci                                               const void *start,
499bf215546Sopenharmony_ci                                               const void *end,
500bf215546Sopenharmony_ci                                               bool no_oa_accumulate);
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_civoid intel_perf_query_result_clear(struct intel_perf_query_result *result);
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci/** Debug helper printing out query data.
505bf215546Sopenharmony_ci */
506bf215546Sopenharmony_civoid intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
507bf215546Sopenharmony_ci                                          const void *data);
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_cistatic inline size_t
510bf215546Sopenharmony_ciintel_perf_query_counter_get_size(const struct intel_perf_query_counter *counter)
511bf215546Sopenharmony_ci{
512bf215546Sopenharmony_ci   switch (counter->data_type) {
513bf215546Sopenharmony_ci   case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
514bf215546Sopenharmony_ci      return sizeof(uint32_t);
515bf215546Sopenharmony_ci   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
516bf215546Sopenharmony_ci      return sizeof(uint32_t);
517bf215546Sopenharmony_ci   case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
518bf215546Sopenharmony_ci      return sizeof(uint64_t);
519bf215546Sopenharmony_ci   case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
520bf215546Sopenharmony_ci      return sizeof(float);
521bf215546Sopenharmony_ci   case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
522bf215546Sopenharmony_ci      return sizeof(double);
523bf215546Sopenharmony_ci   default:
524bf215546Sopenharmony_ci      unreachable("invalid counter data type");
525bf215546Sopenharmony_ci   }
526bf215546Sopenharmony_ci}
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_cistatic inline struct intel_perf_config *
529bf215546Sopenharmony_ciintel_perf_new(void *ctx)
530bf215546Sopenharmony_ci{
531bf215546Sopenharmony_ci   struct intel_perf_config *perf = rzalloc(ctx, struct intel_perf_config);
532bf215546Sopenharmony_ci   return perf;
533bf215546Sopenharmony_ci}
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci/** Whether we have the ability to hold off preemption on a batch so we don't
536bf215546Sopenharmony_ci * have to look at the OA buffer to subtract unrelated workloads off the
537bf215546Sopenharmony_ci * values captured through MI_* commands.
538bf215546Sopenharmony_ci */
539bf215546Sopenharmony_cistatic inline bool
540bf215546Sopenharmony_ciintel_perf_has_hold_preemption(const struct intel_perf_config *perf)
541bf215546Sopenharmony_ci{
542bf215546Sopenharmony_ci   return perf->i915_perf_version >= 3;
543bf215546Sopenharmony_ci}
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci/** Whether we have the ability to lock EU array power configuration for the
546bf215546Sopenharmony_ci * duration of the performance recording. This is useful on Gfx11 where the HW
547bf215546Sopenharmony_ci * architecture requires half the EU for particular workloads.
548bf215546Sopenharmony_ci */
549bf215546Sopenharmony_cistatic inline bool
550bf215546Sopenharmony_ciintel_perf_has_global_sseu(const struct intel_perf_config *perf)
551bf215546Sopenharmony_ci{
552bf215546Sopenharmony_ci   return perf->i915_perf_version >= 4;
553bf215546Sopenharmony_ci}
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ciuint32_t intel_perf_get_n_passes(struct intel_perf_config *perf,
556bf215546Sopenharmony_ci                                 const uint32_t *counter_indices,
557bf215546Sopenharmony_ci                                 uint32_t counter_indices_count,
558bf215546Sopenharmony_ci                                 struct intel_perf_query_info **pass_queries);
559bf215546Sopenharmony_civoid intel_perf_get_counters_passes(struct intel_perf_config *perf,
560bf215546Sopenharmony_ci                                    const uint32_t *counter_indices,
561bf215546Sopenharmony_ci                                    uint32_t counter_indices_count,
562bf215546Sopenharmony_ci                                    struct intel_perf_counter_pass *counter_pass);
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci#ifdef __cplusplus
565bf215546Sopenharmony_ci} // extern "C"
566bf215546Sopenharmony_ci#endif
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci#endif /* INTEL_PERF_H */
569