1/* SPDX-License-Identifier: GPL-2.0 */ 2/* Copyright (C) 2008-2018 Andes Technology Corporation */ 3 4#ifndef __ASM_PMU_H 5#define __ASM_PMU_H 6 7#include <linux/interrupt.h> 8#include <linux/perf_event.h> 9#include <asm/unistd.h> 10#include <asm/bitfield.h> 11 12/* Has special meaning for perf core implementation */ 13#define HW_OP_UNSUPPORTED 0x0 14#define C(_x) PERF_COUNT_HW_CACHE_##_x 15#define CACHE_OP_UNSUPPORTED 0x0 16 17/* Enough for both software and hardware defined events */ 18#define SOFTWARE_EVENT_MASK 0xFF 19 20#define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */ 21#define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36) 22#define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36) 23 24enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS }; 25 26u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1, 27 PFM_CTL_mskOVF2 }; 28u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1, 29 PFM_CTL_mskEN2 }; 30u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1, 31 PFM_CTL_offSEL2 }; 32u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 }; 33u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 }; 34u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 }; 35u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 }; 36/* 37 * Perf Events' indices 38 */ 39#define NDS32_IDX_CYCLE_COUNTER 0 40#define NDS32_IDX_COUNTER0 1 41#define NDS32_IDX_COUNTER1 2 42 43/* The events for a given PMU register set. */ 44struct pmu_hw_events { 45 /* 46 * The events that are active on the PMU for the given index. 47 */ 48 struct perf_event *events[MAX_COUNTERS]; 49 50 /* 51 * A 1 bit for an index indicates that the counter is being used for 52 * an event. A 0 means that the counter can be used. 53 */ 54 unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)]; 55 56 /* 57 * Hardware lock to serialize accesses to PMU registers. Needed for the 58 * read/modify/write sequences. 59 */ 60 raw_spinlock_t pmu_lock; 61}; 62 63struct nds32_pmu { 64 struct pmu pmu; 65 cpumask_t active_irqs; 66 char *name; 67 irqreturn_t (*handle_irq)(int irq_num, void *dev); 68 void (*enable)(struct perf_event *event); 69 void (*disable)(struct perf_event *event); 70 int (*get_event_idx)(struct pmu_hw_events *hw_events, 71 struct perf_event *event); 72 int (*set_event_filter)(struct hw_perf_event *evt, 73 struct perf_event_attr *attr); 74 u32 (*read_counter)(struct perf_event *event); 75 void (*write_counter)(struct perf_event *event, u32 val); 76 void (*start)(struct nds32_pmu *nds32_pmu); 77 void (*stop)(struct nds32_pmu *nds32_pmu); 78 void (*reset)(void *data); 79 int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler); 80 void (*free_irq)(struct nds32_pmu *nds32_pmu); 81 int (*map_event)(struct perf_event *event); 82 int num_events; 83 atomic_t active_events; 84 u64 max_period; 85 struct platform_device *plat_device; 86 struct pmu_hw_events *(*get_hw_events)(void); 87}; 88 89#define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu)) 90 91int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type); 92 93u64 nds32_pmu_event_update(struct perf_event *event); 94 95int nds32_pmu_event_set_period(struct perf_event *event); 96 97/* 98 * Common NDS32 SPAv3 event types 99 * 100 * Note: An implementation may not be able to count all of these events 101 * but the encodings are considered to be `reserved' in the case that 102 * they are not available. 103 * 104 * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as 105 * NOT_SUPPORTED EVENT mapping in generic perf code. 106 * You will need to deal it in the event writing implementation. 107 */ 108enum spav3_counter_0_perf_types { 109 SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */ 110 SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0, 111 SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0, 112 SPAV3_0_SEL_LAST /* counting symbol */ 113}; 114 115enum spav3_counter_1_perf_types { 116 SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */ 117 SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1, 118 SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1, 119 SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1, 120 SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1, 121 SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1, 122 SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1, 123 SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1, 124 SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1, 125 SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1, 126 SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1, 127 SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1, 128 SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1, 129 SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1, 130 SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1, 131 SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1, 132 SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1, 133 SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1, 134 SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1, 135 SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1, 136 SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1, 137 SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1, 138 SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1, 139 SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1, 140 SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1, 141 SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1, 142 SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1, 143 SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1, 144 SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1, 145 SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1, 146 SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1, 147 SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1, 148 SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1, 149 SPAV3_1_SEL_LAST /* counting symbol */ 150}; 151 152enum spav3_counter_2_perf_types { 153 SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */ 154 SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2, 155 SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2, 156 SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2, 157 SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT = 158 3 + PFM_OFFSET_MAGIC_2, 159 SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2, 160 SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2, 161 SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2, 162 SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2, 163 SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2, 164 SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2, 165 SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2, 166 SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2, 167 SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2, 168 SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2, 169 SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2, 170 SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2, 171 SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2, 172 SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2, 173 SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2, 174 SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2, 175 SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2, 176 SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2, 177 SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2, 178 SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2, 179 SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2, 180 SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2, 181 SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2, 182 SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2, 183 SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2, 184 SPAV3_2_SEL_LAST /* counting symbol */ 185}; 186 187/* Get converted event counter index */ 188static inline int get_converted_event_idx(unsigned long event) 189{ 190 int idx; 191 192 if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) { 193 idx = 0; 194 } else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) { 195 idx = 1; 196 } else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) { 197 idx = 2; 198 } else { 199 pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n"); 200 return -EPERM; 201 } 202 203 return idx; 204} 205 206/* Get converted hardware event number */ 207static inline u32 get_converted_evet_hw_num(u32 event) 208{ 209 if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) 210 event -= PFM_OFFSET_MAGIC_0; 211 else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) 212 event -= PFM_OFFSET_MAGIC_1; 213 else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) 214 event -= PFM_OFFSET_MAGIC_2; 215 else if (event != 0) 216 pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n"); 217 218 return event; 219} 220 221/* 222 * NDS32 HW events mapping 223 * 224 * The hardware events that we support. We do support cache operations but 225 * we have harvard caches and no way to combine instruction and data 226 * accesses/misses in hardware. 227 */ 228static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = { 229 [PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES, 230 [PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION, 231 [PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS, 232 [PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS, 233 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, 234 [PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED, 235 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 236 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, 237 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, 238 [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED 239}; 240 241static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 242 [PERF_COUNT_HW_CACHE_OP_MAX] 243 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 244 [C(L1D)] = { 245 [C(OP_READ)] = { 246 [C(RESULT_ACCESS)] = 247 SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS, 248 [C(RESULT_MISS)] = 249 SPAV3_2_SEL_LOAD_DATA_CACHE_MISS, 250 }, 251 [C(OP_WRITE)] = { 252 [C(RESULT_ACCESS)] = 253 SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS, 254 [C(RESULT_MISS)] = 255 SPAV3_2_SEL_STORE_DATA_CACHE_MISS, 256 }, 257 [C(OP_PREFETCH)] = { 258 [C(RESULT_ACCESS)] = 259 CACHE_OP_UNSUPPORTED, 260 [C(RESULT_MISS)] = 261 CACHE_OP_UNSUPPORTED, 262 }, 263 }, 264 [C(L1I)] = { 265 [C(OP_READ)] = { 266 [C(RESULT_ACCESS)] = 267 SPAV3_1_SEL_CODE_CACHE_ACCESS, 268 [C(RESULT_MISS)] = 269 SPAV3_2_SEL_CODE_CACHE_MISS, 270 }, 271 [C(OP_WRITE)] = { 272 [C(RESULT_ACCESS)] = 273 SPAV3_1_SEL_CODE_CACHE_ACCESS, 274 [C(RESULT_MISS)] = 275 SPAV3_2_SEL_CODE_CACHE_MISS, 276 }, 277 [C(OP_PREFETCH)] = { 278 [C(RESULT_ACCESS)] = 279 CACHE_OP_UNSUPPORTED, 280 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 281 }, 282 }, 283 /* TODO: L2CC */ 284 [C(LL)] = { 285 [C(OP_READ)] = { 286 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 287 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 288 }, 289 [C(OP_WRITE)] = { 290 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 291 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 292 }, 293 [C(OP_PREFETCH)] = { 294 [C(RESULT_ACCESS)] = 295 CACHE_OP_UNSUPPORTED, 296 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 297 }, 298 }, 299 /* NDS32 PMU does not support TLB read/write hit/miss, 300 * However, it can count access/miss, which mixed with read and write. 301 * Therefore, only READ counter will use it. 302 * We do as possible as we can. 303 */ 304 [C(DTLB)] = { 305 [C(OP_READ)] = { 306 [C(RESULT_ACCESS)] = 307 SPAV3_1_SEL_UDTLB_ACCESS, 308 [C(RESULT_MISS)] = 309 SPAV3_2_SEL_UDTLB_MISS, 310 }, 311 [C(OP_WRITE)] = { 312 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 313 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 314 }, 315 [C(OP_PREFETCH)] = { 316 [C(RESULT_ACCESS)] = 317 CACHE_OP_UNSUPPORTED, 318 [C(RESULT_MISS)] = 319 CACHE_OP_UNSUPPORTED, 320 }, 321 }, 322 [C(ITLB)] = { 323 [C(OP_READ)] = { 324 [C(RESULT_ACCESS)] = 325 SPAV3_1_SEL_UITLB_ACCESS, 326 [C(RESULT_MISS)] = 327 SPAV3_2_SEL_UITLB_MISS, 328 }, 329 [C(OP_WRITE)] = { 330 [C(RESULT_ACCESS)] = 331 CACHE_OP_UNSUPPORTED, 332 [C(RESULT_MISS)] = 333 CACHE_OP_UNSUPPORTED, 334 }, 335 [C(OP_PREFETCH)] = { 336 [C(RESULT_ACCESS)] = 337 CACHE_OP_UNSUPPORTED, 338 [C(RESULT_MISS)] = 339 CACHE_OP_UNSUPPORTED, 340 }, 341 }, 342 [C(BPU)] = { /* What is BPU? */ 343 [C(OP_READ)] = { 344 [C(RESULT_ACCESS)] = 345 CACHE_OP_UNSUPPORTED, 346 [C(RESULT_MISS)] = 347 CACHE_OP_UNSUPPORTED, 348 }, 349 [C(OP_WRITE)] = { 350 [C(RESULT_ACCESS)] = 351 CACHE_OP_UNSUPPORTED, 352 [C(RESULT_MISS)] = 353 CACHE_OP_UNSUPPORTED, 354 }, 355 [C(OP_PREFETCH)] = { 356 [C(RESULT_ACCESS)] = 357 CACHE_OP_UNSUPPORTED, 358 [C(RESULT_MISS)] = 359 CACHE_OP_UNSUPPORTED, 360 }, 361 }, 362 [C(NODE)] = { /* What is NODE? */ 363 [C(OP_READ)] = { 364 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 365 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 366 }, 367 [C(OP_WRITE)] = { 368 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 369 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 370 }, 371 [C(OP_PREFETCH)] = { 372 [C(RESULT_ACCESS)] = 373 CACHE_OP_UNSUPPORTED, 374 [C(RESULT_MISS)] = 375 CACHE_OP_UNSUPPORTED, 376 }, 377 }, 378}; 379 380int nds32_pmu_map_event(struct perf_event *event, 381 const unsigned int (*event_map)[PERF_COUNT_HW_MAX], 382 const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX] 383 [PERF_COUNT_HW_CACHE_OP_MAX] 384 [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask); 385 386#endif /* __ASM_PMU_H */ 387