1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "intel_perf.h" 25#include "intel_perf_mdapi.h" 26#include "intel_perf_private.h" 27#include "intel_perf_regs.h" 28 29#include "dev/intel_device_info.h" 30 31#include <drm-uapi/i915_drm.h> 32 33 34int 35intel_perf_query_result_write_mdapi(void *data, uint32_t data_size, 36 const struct intel_device_info *devinfo, 37 const struct intel_perf_query_info *query, 38 const struct intel_perf_query_result *result) 39{ 40 switch (devinfo->ver) { 41 case 7: { 42 struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data; 43 44 if (data_size < sizeof(*mdapi_data)) 45 return 0; 46 47 assert(devinfo->platform == INTEL_PLATFORM_HSW); 48 49 for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++) 50 mdapi_data->ACounters[i] = result->accumulator[1 + i]; 51 52 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) { 53 mdapi_data->NOACounters[i] = 54 result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i]; 55 } 56 57 mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0]; 58 mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1]; 59 60 mdapi_data->ReportsCount = result->reports_accumulated; 61 mdapi_data->TotalTime = 62 intel_device_info_timebase_scale(devinfo, result->accumulator[0]); 63 mdapi_data->CoreFrequency = result->gt_frequency[1]; 64 mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0]; 65 mdapi_data->SplitOccured = result->query_disjoint; 66 return sizeof(*mdapi_data); 67 } 68 case 8: { 69 struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data; 70 71 if (data_size < sizeof(*mdapi_data)) 72 return 0; 73 74 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++) 75 mdapi_data->OaCntr[i] = result->accumulator[2 + i]; 76 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) { 77 mdapi_data->NoaCntr[i] = 78 result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i]; 79 } 80 81 mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0]; 82 mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1]; 83 84 mdapi_data->ReportId = result->hw_id; 85 mdapi_data->ReportsCount = result->reports_accumulated; 86 mdapi_data->TotalTime = 87 intel_device_info_timebase_scale(devinfo, result->accumulator[0]); 88 mdapi_data->BeginTimestamp = 89 intel_device_info_timebase_scale(devinfo, result->begin_timestamp); 90 mdapi_data->GPUTicks = result->accumulator[1]; 91 mdapi_data->CoreFrequency = result->gt_frequency[1]; 92 mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0]; 93 mdapi_data->SliceFrequency = 94 (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL; 95 mdapi_data->UnsliceFrequency = 96 (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL; 97 mdapi_data->SplitOccured = result->query_disjoint; 98 return sizeof(*mdapi_data); 99 } 100 case 9: 101 case 11: 102 case 12:{ 103 struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data; 104 105 if (data_size < sizeof(*mdapi_data)) 106 return 0; 107 108 for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++) 109 mdapi_data->OaCntr[i] = result->accumulator[2 + i]; 110 for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) { 111 mdapi_data->NoaCntr[i] = 112 result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i]; 113 } 114 115 mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0]; 116 mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1]; 117 118 mdapi_data->ReportId = result->hw_id; 119 mdapi_data->ReportsCount = result->reports_accumulated; 120 mdapi_data->TotalTime = 121 intel_device_info_timebase_scale(devinfo, result->accumulator[0]); 122 mdapi_data->BeginTimestamp = 123 intel_device_info_timebase_scale(devinfo, result->begin_timestamp); 124 mdapi_data->GPUTicks = result->accumulator[1]; 125 mdapi_data->CoreFrequency = result->gt_frequency[1]; 126 mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0]; 127 mdapi_data->SliceFrequency = 128 (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL; 129 mdapi_data->UnsliceFrequency = 130 (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL; 131 mdapi_data->SplitOccured = result->query_disjoint; 132 return sizeof(*mdapi_data); 133 } 134 default: 135 unreachable("unexpected gen"); 136 } 137} 138 139void 140intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg, 141 const struct intel_device_info *devinfo) 142{ 143 if (!(devinfo->ver >= 7 && devinfo->ver <= 12)) 144 return; 145 146 struct intel_perf_query_info *query = 147 intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS); 148 149 query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE; 150 query->name = "Intel_Raw_Pipeline_Statistics_Query"; 151 152 /* The order has to match mdapi_pipeline_metrics. */ 153 intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT, 154 "N vertices submitted"); 155 intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, 156 "N primitives submitted"); 157 intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT, 158 "N vertex shader invocations"); 159 intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT, 160 "N geometry shader invocations"); 161 intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, 162 "N geometry shader primitives emitted"); 163 intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT, 164 "N primitives entering clipping"); 165 intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, 166 "N primitives leaving clipping"); 167 if (devinfo->verx10 == 75 || devinfo->ver == 8) { 168 intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, 169 "N fragment shader invocations", 170 "N fragment shader invocations"); 171 } else { 172 intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT, 173 "N fragment shader invocations"); 174 } 175 intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT, 176 "N TCS shader invocations"); 177 intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT, 178 "N TES shader invocations"); 179 if (devinfo->ver >= 7) { 180 intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, 181 "N compute shader invocations"); 182 } 183 184 if (devinfo->ver >= 10) { 185 /* Reuse existing CS invocation register until we can expose this new 186 * one. 187 */ 188 intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT, 189 "Reserved1"); 190 } 191 192 query->data_size = sizeof(uint64_t) * query->n_counters; 193} 194 195static void 196fill_mdapi_perf_query_counter(struct intel_perf_query_info *query, 197 const char *name, 198 uint32_t data_offset, 199 uint32_t data_size, 200 enum intel_perf_counter_data_type data_type) 201{ 202 struct intel_perf_query_counter *counter = &query->counters[query->n_counters]; 203 204 assert(query->n_counters <= query->max_counters); 205 206 counter->name = name; 207 counter->desc = "Raw counter value"; 208 counter->type = INTEL_PERF_COUNTER_TYPE_RAW; 209 counter->data_type = data_type; 210 counter->offset = data_offset; 211 212 query->n_counters++; 213 214 assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size); 215} 216 217#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \ 218 fill_mdapi_perf_query_counter(query, #field_name, \ 219 (uint8_t *) &struct_name.field_name - \ 220 (uint8_t *) &struct_name, \ 221 sizeof(struct_name.field_name), \ 222 INTEL_PERF_COUNTER_DATA_TYPE_##type_name) 223#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \ 224 fill_mdapi_perf_query_counter(query, \ 225 ralloc_asprintf(ctx, "%s%i", #field_name, idx), \ 226 (uint8_t *) &struct_name.field_name[idx] - \ 227 (uint8_t *) &struct_name, \ 228 sizeof(struct_name.field_name[0]), \ 229 INTEL_PERF_COUNTER_DATA_TYPE_##type_name) 230 231void 232intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf, 233 const struct intel_device_info *devinfo) 234{ 235 struct intel_perf_query_info *query = NULL; 236 237 /* MDAPI requires different structures for pretty much every generation 238 * (right now we have definitions for gen 7 to 12). 239 */ 240 if (!(devinfo->ver >= 7 && devinfo->ver <= 12)) 241 return; 242 243 switch (devinfo->ver) { 244 case 7: { 245 query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7); 246 query->oa_format = I915_OA_FORMAT_A45_B8_C8; 247 248 struct gfx7_mdapi_metrics metric_data; 249 query->data_size = sizeof(metric_data); 250 251 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); 252 for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) { 253 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 254 metric_data, ACounters, i, UINT64); 255 } 256 for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) { 257 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 258 metric_data, NOACounters, i, UINT64); 259 } 260 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); 261 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); 262 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); 263 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); 264 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); 265 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); 266 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); 267 break; 268 } 269 case 8: { 270 query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16); 271 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; 272 273 struct gfx8_mdapi_metrics metric_data; 274 query->data_size = sizeof(metric_data); 275 276 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); 277 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); 278 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { 279 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 280 metric_data, OaCntr, i, UINT64); 281 } 282 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { 283 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 284 metric_data, NoaCntr, i, UINT64); 285 } 286 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); 287 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); 288 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); 289 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); 290 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); 291 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); 292 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); 293 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); 294 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); 295 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); 296 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); 297 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); 298 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); 299 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); 300 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); 301 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); 302 break; 303 } 304 case 9: 305 case 11: 306 case 12: { 307 query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); 308 query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; 309 310 struct gfx9_mdapi_metrics metric_data; 311 query->data_size = sizeof(metric_data); 312 313 MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); 314 MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); 315 for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { 316 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 317 metric_data, OaCntr, i, UINT64); 318 } 319 for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { 320 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 321 metric_data, NoaCntr, i, UINT64); 322 } 323 MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); 324 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); 325 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); 326 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); 327 MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); 328 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); 329 MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); 330 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); 331 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); 332 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); 333 MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); 334 MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); 335 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); 336 MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); 337 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); 338 MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); 339 for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) { 340 MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, 341 metric_data, UserCntr, i, UINT64); 342 } 343 MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32); 344 MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32); 345 break; 346 } 347 default: 348 unreachable("Unsupported gen"); 349 break; 350 } 351 352 query->kind = INTEL_PERF_QUERY_TYPE_RAW; 353 query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; 354 query->guid = INTEL_PERF_QUERY_GUID_MDAPI; 355 356 { 357 /* Accumulation buffer offsets copied from an actual query... */ 358 const struct intel_perf_query_info *copy_query = 359 &perf->queries[0]; 360 361 query->gpu_time_offset = copy_query->gpu_time_offset; 362 query->gpu_clock_offset = copy_query->gpu_clock_offset; 363 query->a_offset = copy_query->a_offset; 364 query->b_offset = copy_query->b_offset; 365 query->c_offset = copy_query->c_offset; 366 query->perfcnt_offset = copy_query->perfcnt_offset; 367 } 368} 369