1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (C) 2008-2018 Andes Technology Corporation */
3
4#ifndef __ASM_PMU_H
5#define __ASM_PMU_H
6
7#include <linux/interrupt.h>
8#include <linux/perf_event.h>
9#include <asm/unistd.h>
10#include <asm/bitfield.h>
11
12/* Has special meaning for perf core implementation */
13#define HW_OP_UNSUPPORTED		0x0
14#define C(_x)				PERF_COUNT_HW_CACHE_##_x
15#define CACHE_OP_UNSUPPORTED		0x0
16
17/* Enough for both software and hardware defined events */
18#define SOFTWARE_EVENT_MASK		0xFF
19
20#define PFM_OFFSET_MAGIC_0		2	/* DO NOT START FROM 0 */
21#define PFM_OFFSET_MAGIC_1		(PFM_OFFSET_MAGIC_0 + 36)
22#define PFM_OFFSET_MAGIC_2		(PFM_OFFSET_MAGIC_1 + 36)
23
24enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
25
26u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
27		       PFM_CTL_mskOVF2 };
28u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
29		      PFM_CTL_mskEN2 };
30u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
31			  PFM_CTL_offSEL2 };
32u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
33u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
34u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
35u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
36/*
37 * Perf Events' indices
38 */
39#define NDS32_IDX_CYCLE_COUNTER			0
40#define NDS32_IDX_COUNTER0			1
41#define NDS32_IDX_COUNTER1			2
42
43/* The events for a given PMU register set. */
44struct pmu_hw_events {
45	/*
46	 * The events that are active on the PMU for the given index.
47	 */
48	struct perf_event *events[MAX_COUNTERS];
49
50	/*
51	 * A 1 bit for an index indicates that the counter is being used for
52	 * an event. A 0 means that the counter can be used.
53	 */
54	unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
55
56	/*
57	 * Hardware lock to serialize accesses to PMU registers. Needed for the
58	 * read/modify/write sequences.
59	 */
60	raw_spinlock_t pmu_lock;
61};
62
63struct nds32_pmu {
64	struct pmu pmu;
65	cpumask_t active_irqs;
66	char *name;
67	 irqreturn_t (*handle_irq)(int irq_num, void *dev);
68	void (*enable)(struct perf_event *event);
69	void (*disable)(struct perf_event *event);
70	int (*get_event_idx)(struct pmu_hw_events *hw_events,
71			     struct perf_event *event);
72	int (*set_event_filter)(struct hw_perf_event *evt,
73				struct perf_event_attr *attr);
74	u32 (*read_counter)(struct perf_event *event);
75	void (*write_counter)(struct perf_event *event, u32 val);
76	void (*start)(struct nds32_pmu *nds32_pmu);
77	void (*stop)(struct nds32_pmu *nds32_pmu);
78	void (*reset)(void *data);
79	int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
80	void (*free_irq)(struct nds32_pmu *nds32_pmu);
81	int (*map_event)(struct perf_event *event);
82	int num_events;
83	atomic_t active_events;
84	u64 max_period;
85	struct platform_device *plat_device;
86	struct pmu_hw_events *(*get_hw_events)(void);
87};
88
89#define to_nds32_pmu(p)			(container_of(p, struct nds32_pmu, pmu))
90
91int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
92
93u64 nds32_pmu_event_update(struct perf_event *event);
94
95int nds32_pmu_event_set_period(struct perf_event *event);
96
97/*
98 * Common NDS32 SPAv3 event types
99 *
100 * Note: An implementation may not be able to count all of these events
101 * but the encodings are considered to be `reserved' in the case that
102 * they are not available.
103 *
104 * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
105 * NOT_SUPPORTED EVENT mapping in generic perf code.
106 * You will need to deal it in the event writing implementation.
107 */
108enum spav3_counter_0_perf_types {
109	SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0,	/* counting symbol */
110	SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
111	SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
112	SPAV3_0_SEL_LAST	/* counting symbol */
113};
114
115enum spav3_counter_1_perf_types {
116	SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1,	/* counting symbol */
117	SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
118	SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
119	SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
120	SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
121	SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
122	SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
123	SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
124	SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
125	SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
126	SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
127	SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
128	SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
129	SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
130	SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
131	SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
132	SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
133	SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
134	SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
135	SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
136	SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
137	SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
138	SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
139	SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
140	SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
141	SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
142	SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
143	SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
144	SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
145	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
146	SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
147	SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
148	SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
149	SPAV3_1_SEL_LAST	/* counting symbol */
150};
151
152enum spav3_counter_2_perf_types {
153	SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2,	/* counting symbol */
154	SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
155	SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
156	SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
157	SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
158	    3 + PFM_OFFSET_MAGIC_2,
159	SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
160	SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
161	SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
162	SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
163	SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
164	SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
165	SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
166	SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
167	SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
168	SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
169	SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
170	SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
171	SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
172	SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
173	SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
174	SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
175	SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
176	SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
177	SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
178	SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
179	SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
180	SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
181	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
182	SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
183	SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
184	SPAV3_2_SEL_LAST	/* counting symbol */
185};
186
187/* Get converted event counter index */
188static inline int get_converted_event_idx(unsigned long event)
189{
190	int idx;
191
192	if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
193		idx = 0;
194	} else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
195		idx = 1;
196	} else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
197		idx = 2;
198	} else {
199		pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
200		return -EPERM;
201	}
202
203	return idx;
204}
205
206/* Get converted hardware event number */
207static inline u32 get_converted_evet_hw_num(u32 event)
208{
209	if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
210		event -= PFM_OFFSET_MAGIC_0;
211	else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
212		event -= PFM_OFFSET_MAGIC_1;
213	else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
214		event -= PFM_OFFSET_MAGIC_2;
215	else if (event != 0)
216		pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
217
218	return event;
219}
220
221/*
222 * NDS32 HW events mapping
223 *
224 * The hardware events that we support. We do support cache operations but
225 * we have harvard caches and no way to combine instruction and data
226 * accesses/misses in hardware.
227 */
228static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
229	[PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
230	[PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
231	[PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
232	[PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
233	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
234	[PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
235	[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
236	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
237	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
238	[PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
239};
240
241static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
242	[PERF_COUNT_HW_CACHE_OP_MAX]
243	[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
244	[C(L1D)] = {
245		    [C(OP_READ)] = {
246				    [C(RESULT_ACCESS)] =
247				    SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
248				    [C(RESULT_MISS)] =
249				    SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
250				    },
251		    [C(OP_WRITE)] = {
252				     [C(RESULT_ACCESS)] =
253				     SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
254				     [C(RESULT_MISS)] =
255				     SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
256				     },
257		    [C(OP_PREFETCH)] = {
258					[C(RESULT_ACCESS)] =
259						CACHE_OP_UNSUPPORTED,
260					[C(RESULT_MISS)] =
261						CACHE_OP_UNSUPPORTED,
262					},
263		    },
264	[C(L1I)] = {
265		    [C(OP_READ)] = {
266				    [C(RESULT_ACCESS)] =
267				    SPAV3_1_SEL_CODE_CACHE_ACCESS,
268				    [C(RESULT_MISS)] =
269				    SPAV3_2_SEL_CODE_CACHE_MISS,
270				    },
271		    [C(OP_WRITE)] = {
272				     [C(RESULT_ACCESS)] =
273				     SPAV3_1_SEL_CODE_CACHE_ACCESS,
274				     [C(RESULT_MISS)] =
275				     SPAV3_2_SEL_CODE_CACHE_MISS,
276				     },
277		    [C(OP_PREFETCH)] = {
278					[C(RESULT_ACCESS)] =
279					CACHE_OP_UNSUPPORTED,
280					[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
281					},
282		    },
283	/* TODO: L2CC */
284	[C(LL)] = {
285		   [C(OP_READ)] = {
286				   [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
287				   [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
288				   },
289		   [C(OP_WRITE)] = {
290				    [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
291				    [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
292				    },
293		   [C(OP_PREFETCH)] = {
294				       [C(RESULT_ACCESS)] =
295				       CACHE_OP_UNSUPPORTED,
296				       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
297				       },
298		   },
299	/* NDS32 PMU does not support TLB read/write hit/miss,
300	 * However, it can count access/miss, which mixed with read and write.
301	 * Therefore, only READ counter will use it.
302	 * We do as possible as we can.
303	 */
304	[C(DTLB)] = {
305		     [C(OP_READ)] = {
306				     [C(RESULT_ACCESS)] =
307					SPAV3_1_SEL_UDTLB_ACCESS,
308				     [C(RESULT_MISS)] =
309					SPAV3_2_SEL_UDTLB_MISS,
310				     },
311		     [C(OP_WRITE)] = {
312				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
313				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
314				      },
315		     [C(OP_PREFETCH)] = {
316					 [C(RESULT_ACCESS)] =
317					 CACHE_OP_UNSUPPORTED,
318					 [C(RESULT_MISS)] =
319					 CACHE_OP_UNSUPPORTED,
320					 },
321		     },
322	[C(ITLB)] = {
323		     [C(OP_READ)] = {
324				     [C(RESULT_ACCESS)] =
325					SPAV3_1_SEL_UITLB_ACCESS,
326				     [C(RESULT_MISS)] =
327					SPAV3_2_SEL_UITLB_MISS,
328				     },
329		     [C(OP_WRITE)] = {
330				      [C(RESULT_ACCESS)] =
331					CACHE_OP_UNSUPPORTED,
332				      [C(RESULT_MISS)] =
333					CACHE_OP_UNSUPPORTED,
334				      },
335		     [C(OP_PREFETCH)] = {
336					 [C(RESULT_ACCESS)] =
337						CACHE_OP_UNSUPPORTED,
338					 [C(RESULT_MISS)] =
339						CACHE_OP_UNSUPPORTED,
340					 },
341		     },
342	[C(BPU)] = {		/* What is BPU? */
343		    [C(OP_READ)] = {
344				    [C(RESULT_ACCESS)] =
345					CACHE_OP_UNSUPPORTED,
346				    [C(RESULT_MISS)] =
347					CACHE_OP_UNSUPPORTED,
348				    },
349		    [C(OP_WRITE)] = {
350				     [C(RESULT_ACCESS)] =
351					CACHE_OP_UNSUPPORTED,
352				     [C(RESULT_MISS)] =
353					CACHE_OP_UNSUPPORTED,
354				     },
355		    [C(OP_PREFETCH)] = {
356					[C(RESULT_ACCESS)] =
357						CACHE_OP_UNSUPPORTED,
358					[C(RESULT_MISS)] =
359						CACHE_OP_UNSUPPORTED,
360					},
361		    },
362	[C(NODE)] = {		/* What is NODE? */
363		     [C(OP_READ)] = {
364				     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
365				     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
366				     },
367		     [C(OP_WRITE)] = {
368				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
369				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
370				      },
371		     [C(OP_PREFETCH)] = {
372					 [C(RESULT_ACCESS)] =
373						CACHE_OP_UNSUPPORTED,
374					 [C(RESULT_MISS)] =
375						CACHE_OP_UNSUPPORTED,
376					 },
377		     },
378};
379
380int nds32_pmu_map_event(struct perf_event *event,
381			const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
382			const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
383			[PERF_COUNT_HW_CACHE_OP_MAX]
384			[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
385
386#endif /* __ASM_PMU_H */
387