1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
3
4#include <linux/errno.h>
5#include <linux/interrupt.h>
6#include <linux/module.h>
7#include <linux/of.h>
8#include <linux/perf_event.h>
9#include <linux/platform_device.h>
10
11#define CSKY_PMU_MAX_EVENTS 32
12#define DEFAULT_COUNT_WIDTH 48
13
14#define HPCR		"<0, 0x0>"      /* PMU Control reg */
15#define HPSPR		"<0, 0x1>"      /* Start PC reg */
16#define HPEPR		"<0, 0x2>"      /* End PC reg */
17#define HPSIR		"<0, 0x3>"      /* Soft Counter reg */
18#define HPCNTENR	"<0, 0x4>"      /* Count Enable reg */
19#define HPINTENR	"<0, 0x5>"      /* Interrupt Enable reg */
20#define HPOFSR		"<0, 0x6>"      /* Interrupt Status reg */
21
22/* The events for a given PMU register set. */
23struct pmu_hw_events {
24	/*
25	 * The events that are active on the PMU for the given index.
26	 */
27	struct perf_event *events[CSKY_PMU_MAX_EVENTS];
28
29	/*
30	 * A 1 bit for an index indicates that the counter is being used for
31	 * an event. A 0 means that the counter can be used.
32	 */
33	unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)];
34};
35
36static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void);
37static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val);
38
39static struct csky_pmu_t {
40	struct pmu			pmu;
41	struct pmu_hw_events __percpu	*hw_events;
42	struct platform_device		*plat_device;
43	uint32_t			count_width;
44	uint32_t			hpcr;
45	u64				max_period;
46} csky_pmu;
47static int csky_pmu_irq;
48
49#define to_csky_pmu(p)  (container_of(p, struct csky_pmu, pmu))
50
51#define cprgr(reg)				\
52({						\
53	unsigned int tmp;			\
54	asm volatile("cprgr %0, "reg"\n"	\
55		     : "=r"(tmp)		\
56		     :				\
57		     : "memory");		\
58	tmp;					\
59})
60
61#define cpwgr(reg, val)		\
62({				\
63	asm volatile(		\
64	"cpwgr %0, "reg"\n"	\
65	:			\
66	: "r"(val)		\
67	: "memory");		\
68})
69
70#define cprcr(reg)				\
71({						\
72	unsigned int tmp;			\
73	asm volatile("cprcr %0, "reg"\n"	\
74		     : "=r"(tmp)		\
75		     :				\
76		     : "memory");		\
77	tmp;					\
78})
79
80#define cpwcr(reg, val)		\
81({				\
82	asm volatile(		\
83	"cpwcr %0, "reg"\n"	\
84	:			\
85	: "r"(val)		\
86	: "memory");		\
87})
88
89/* cycle counter */
90uint64_t csky_pmu_read_cc(void)
91{
92	uint32_t lo, hi, tmp;
93	uint64_t result;
94
95	do {
96		tmp = cprgr("<0, 0x3>");
97		lo  = cprgr("<0, 0x2>");
98		hi  = cprgr("<0, 0x3>");
99	} while (hi != tmp);
100
101	result = (uint64_t) (hi) << 32;
102	result |= lo;
103
104	return result;
105}
106
107static void csky_pmu_write_cc(uint64_t val)
108{
109	cpwgr("<0, 0x2>", (uint32_t)  val);
110	cpwgr("<0, 0x3>", (uint32_t) (val >> 32));
111}
112
113/* instruction counter */
114static uint64_t csky_pmu_read_ic(void)
115{
116	uint32_t lo, hi, tmp;
117	uint64_t result;
118
119	do {
120		tmp = cprgr("<0, 0x5>");
121		lo  = cprgr("<0, 0x4>");
122		hi  = cprgr("<0, 0x5>");
123	} while (hi != tmp);
124
125	result = (uint64_t) (hi) << 32;
126	result |= lo;
127
128	return result;
129}
130
131static void csky_pmu_write_ic(uint64_t val)
132{
133	cpwgr("<0, 0x4>", (uint32_t)  val);
134	cpwgr("<0, 0x5>", (uint32_t) (val >> 32));
135}
136
137/* l1 icache access counter */
138static uint64_t csky_pmu_read_icac(void)
139{
140	uint32_t lo, hi, tmp;
141	uint64_t result;
142
143	do {
144		tmp = cprgr("<0, 0x7>");
145		lo  = cprgr("<0, 0x6>");
146		hi  = cprgr("<0, 0x7>");
147	} while (hi != tmp);
148
149	result = (uint64_t) (hi) << 32;
150	result |= lo;
151
152	return result;
153}
154
155static void csky_pmu_write_icac(uint64_t val)
156{
157	cpwgr("<0, 0x6>", (uint32_t)  val);
158	cpwgr("<0, 0x7>", (uint32_t) (val >> 32));
159}
160
161/* l1 icache miss counter */
162static uint64_t csky_pmu_read_icmc(void)
163{
164	uint32_t lo, hi, tmp;
165	uint64_t result;
166
167	do {
168		tmp = cprgr("<0, 0x9>");
169		lo  = cprgr("<0, 0x8>");
170		hi  = cprgr("<0, 0x9>");
171	} while (hi != tmp);
172
173	result = (uint64_t) (hi) << 32;
174	result |= lo;
175
176	return result;
177}
178
179static void csky_pmu_write_icmc(uint64_t val)
180{
181	cpwgr("<0, 0x8>", (uint32_t)  val);
182	cpwgr("<0, 0x9>", (uint32_t) (val >> 32));
183}
184
185/* l1 dcache access counter */
186static uint64_t csky_pmu_read_dcac(void)
187{
188	uint32_t lo, hi, tmp;
189	uint64_t result;
190
191	do {
192		tmp = cprgr("<0, 0xb>");
193		lo  = cprgr("<0, 0xa>");
194		hi  = cprgr("<0, 0xb>");
195	} while (hi != tmp);
196
197	result = (uint64_t) (hi) << 32;
198	result |= lo;
199
200	return result;
201}
202
203static void csky_pmu_write_dcac(uint64_t val)
204{
205	cpwgr("<0, 0xa>", (uint32_t)  val);
206	cpwgr("<0, 0xb>", (uint32_t) (val >> 32));
207}
208
209/* l1 dcache miss counter */
210static uint64_t csky_pmu_read_dcmc(void)
211{
212	uint32_t lo, hi, tmp;
213	uint64_t result;
214
215	do {
216		tmp = cprgr("<0, 0xd>");
217		lo  = cprgr("<0, 0xc>");
218		hi  = cprgr("<0, 0xd>");
219	} while (hi != tmp);
220
221	result = (uint64_t) (hi) << 32;
222	result |= lo;
223
224	return result;
225}
226
227static void csky_pmu_write_dcmc(uint64_t val)
228{
229	cpwgr("<0, 0xc>", (uint32_t)  val);
230	cpwgr("<0, 0xd>", (uint32_t) (val >> 32));
231}
232
233/* l2 cache access counter */
234static uint64_t csky_pmu_read_l2ac(void)
235{
236	uint32_t lo, hi, tmp;
237	uint64_t result;
238
239	do {
240		tmp = cprgr("<0, 0xf>");
241		lo  = cprgr("<0, 0xe>");
242		hi  = cprgr("<0, 0xf>");
243	} while (hi != tmp);
244
245	result = (uint64_t) (hi) << 32;
246	result |= lo;
247
248	return result;
249}
250
251static void csky_pmu_write_l2ac(uint64_t val)
252{
253	cpwgr("<0, 0xe>", (uint32_t)  val);
254	cpwgr("<0, 0xf>", (uint32_t) (val >> 32));
255}
256
257/* l2 cache miss counter */
258static uint64_t csky_pmu_read_l2mc(void)
259{
260	uint32_t lo, hi, tmp;
261	uint64_t result;
262
263	do {
264		tmp = cprgr("<0, 0x11>");
265		lo  = cprgr("<0, 0x10>");
266		hi  = cprgr("<0, 0x11>");
267	} while (hi != tmp);
268
269	result = (uint64_t) (hi) << 32;
270	result |= lo;
271
272	return result;
273}
274
275static void csky_pmu_write_l2mc(uint64_t val)
276{
277	cpwgr("<0, 0x10>", (uint32_t)  val);
278	cpwgr("<0, 0x11>", (uint32_t) (val >> 32));
279}
280
281/* I-UTLB miss counter */
282static uint64_t csky_pmu_read_iutlbmc(void)
283{
284	uint32_t lo, hi, tmp;
285	uint64_t result;
286
287	do {
288		tmp = cprgr("<0, 0x15>");
289		lo  = cprgr("<0, 0x14>");
290		hi  = cprgr("<0, 0x15>");
291	} while (hi != tmp);
292
293	result = (uint64_t) (hi) << 32;
294	result |= lo;
295
296	return result;
297}
298
299static void csky_pmu_write_iutlbmc(uint64_t val)
300{
301	cpwgr("<0, 0x14>", (uint32_t)  val);
302	cpwgr("<0, 0x15>", (uint32_t) (val >> 32));
303}
304
305/* D-UTLB miss counter */
306static uint64_t csky_pmu_read_dutlbmc(void)
307{
308	uint32_t lo, hi, tmp;
309	uint64_t result;
310
311	do {
312		tmp = cprgr("<0, 0x17>");
313		lo  = cprgr("<0, 0x16>");
314		hi  = cprgr("<0, 0x17>");
315	} while (hi != tmp);
316
317	result = (uint64_t) (hi) << 32;
318	result |= lo;
319
320	return result;
321}
322
323static void csky_pmu_write_dutlbmc(uint64_t val)
324{
325	cpwgr("<0, 0x16>", (uint32_t)  val);
326	cpwgr("<0, 0x17>", (uint32_t) (val >> 32));
327}
328
329/* JTLB miss counter */
330static uint64_t csky_pmu_read_jtlbmc(void)
331{
332	uint32_t lo, hi, tmp;
333	uint64_t result;
334
335	do {
336		tmp = cprgr("<0, 0x19>");
337		lo  = cprgr("<0, 0x18>");
338		hi  = cprgr("<0, 0x19>");
339	} while (hi != tmp);
340
341	result = (uint64_t) (hi) << 32;
342	result |= lo;
343
344	return result;
345}
346
347static void csky_pmu_write_jtlbmc(uint64_t val)
348{
349	cpwgr("<0, 0x18>", (uint32_t)  val);
350	cpwgr("<0, 0x19>", (uint32_t) (val >> 32));
351}
352
353/* software counter */
354static uint64_t csky_pmu_read_softc(void)
355{
356	uint32_t lo, hi, tmp;
357	uint64_t result;
358
359	do {
360		tmp = cprgr("<0, 0x1b>");
361		lo  = cprgr("<0, 0x1a>");
362		hi  = cprgr("<0, 0x1b>");
363	} while (hi != tmp);
364
365	result = (uint64_t) (hi) << 32;
366	result |= lo;
367
368	return result;
369}
370
371static void csky_pmu_write_softc(uint64_t val)
372{
373	cpwgr("<0, 0x1a>", (uint32_t)  val);
374	cpwgr("<0, 0x1b>", (uint32_t) (val >> 32));
375}
376
377/* conditional branch mispredict counter */
378static uint64_t csky_pmu_read_cbmc(void)
379{
380	uint32_t lo, hi, tmp;
381	uint64_t result;
382
383	do {
384		tmp = cprgr("<0, 0x1d>");
385		lo  = cprgr("<0, 0x1c>");
386		hi  = cprgr("<0, 0x1d>");
387	} while (hi != tmp);
388
389	result = (uint64_t) (hi) << 32;
390	result |= lo;
391
392	return result;
393}
394
395static void csky_pmu_write_cbmc(uint64_t val)
396{
397	cpwgr("<0, 0x1c>", (uint32_t)  val);
398	cpwgr("<0, 0x1d>", (uint32_t) (val >> 32));
399}
400
401/* conditional branch instruction counter */
402static uint64_t csky_pmu_read_cbic(void)
403{
404	uint32_t lo, hi, tmp;
405	uint64_t result;
406
407	do {
408		tmp = cprgr("<0, 0x1f>");
409		lo  = cprgr("<0, 0x1e>");
410		hi  = cprgr("<0, 0x1f>");
411	} while (hi != tmp);
412
413	result = (uint64_t) (hi) << 32;
414	result |= lo;
415
416	return result;
417}
418
419static void csky_pmu_write_cbic(uint64_t val)
420{
421	cpwgr("<0, 0x1e>", (uint32_t)  val);
422	cpwgr("<0, 0x1f>", (uint32_t) (val >> 32));
423}
424
425/* indirect branch mispredict counter */
426static uint64_t csky_pmu_read_ibmc(void)
427{
428	uint32_t lo, hi, tmp;
429	uint64_t result;
430
431	do {
432		tmp = cprgr("<0, 0x21>");
433		lo  = cprgr("<0, 0x20>");
434		hi  = cprgr("<0, 0x21>");
435	} while (hi != tmp);
436
437	result = (uint64_t) (hi) << 32;
438	result |= lo;
439
440	return result;
441}
442
443static void csky_pmu_write_ibmc(uint64_t val)
444{
445	cpwgr("<0, 0x20>", (uint32_t)  val);
446	cpwgr("<0, 0x21>", (uint32_t) (val >> 32));
447}
448
449/* indirect branch instruction counter */
450static uint64_t csky_pmu_read_ibic(void)
451{
452	uint32_t lo, hi, tmp;
453	uint64_t result;
454
455	do {
456		tmp = cprgr("<0, 0x23>");
457		lo  = cprgr("<0, 0x22>");
458		hi  = cprgr("<0, 0x23>");
459	} while (hi != tmp);
460
461	result = (uint64_t) (hi) << 32;
462	result |= lo;
463
464	return result;
465}
466
467static void csky_pmu_write_ibic(uint64_t val)
468{
469	cpwgr("<0, 0x22>", (uint32_t)  val);
470	cpwgr("<0, 0x23>", (uint32_t) (val >> 32));
471}
472
473/* LSU spec fail counter */
474static uint64_t csky_pmu_read_lsfc(void)
475{
476	uint32_t lo, hi, tmp;
477	uint64_t result;
478
479	do {
480		tmp = cprgr("<0, 0x25>");
481		lo  = cprgr("<0, 0x24>");
482		hi  = cprgr("<0, 0x25>");
483	} while (hi != tmp);
484
485	result = (uint64_t) (hi) << 32;
486	result |= lo;
487
488	return result;
489}
490
491static void csky_pmu_write_lsfc(uint64_t val)
492{
493	cpwgr("<0, 0x24>", (uint32_t)  val);
494	cpwgr("<0, 0x25>", (uint32_t) (val >> 32));
495}
496
497/* store instruction counter */
498static uint64_t csky_pmu_read_sic(void)
499{
500	uint32_t lo, hi, tmp;
501	uint64_t result;
502
503	do {
504		tmp = cprgr("<0, 0x27>");
505		lo  = cprgr("<0, 0x26>");
506		hi  = cprgr("<0, 0x27>");
507	} while (hi != tmp);
508
509	result = (uint64_t) (hi) << 32;
510	result |= lo;
511
512	return result;
513}
514
515static void csky_pmu_write_sic(uint64_t val)
516{
517	cpwgr("<0, 0x26>", (uint32_t)  val);
518	cpwgr("<0, 0x27>", (uint32_t) (val >> 32));
519}
520
521/* dcache read access counter */
522static uint64_t csky_pmu_read_dcrac(void)
523{
524	uint32_t lo, hi, tmp;
525	uint64_t result;
526
527	do {
528		tmp = cprgr("<0, 0x29>");
529		lo  = cprgr("<0, 0x28>");
530		hi  = cprgr("<0, 0x29>");
531	} while (hi != tmp);
532
533	result = (uint64_t) (hi) << 32;
534	result |= lo;
535
536	return result;
537}
538
539static void csky_pmu_write_dcrac(uint64_t val)
540{
541	cpwgr("<0, 0x28>", (uint32_t)  val);
542	cpwgr("<0, 0x29>", (uint32_t) (val >> 32));
543}
544
545/* dcache read miss counter */
546static uint64_t csky_pmu_read_dcrmc(void)
547{
548	uint32_t lo, hi, tmp;
549	uint64_t result;
550
551	do {
552		tmp = cprgr("<0, 0x2b>");
553		lo  = cprgr("<0, 0x2a>");
554		hi  = cprgr("<0, 0x2b>");
555	} while (hi != tmp);
556
557	result = (uint64_t) (hi) << 32;
558	result |= lo;
559
560	return result;
561}
562
563static void csky_pmu_write_dcrmc(uint64_t val)
564{
565	cpwgr("<0, 0x2a>", (uint32_t)  val);
566	cpwgr("<0, 0x2b>", (uint32_t) (val >> 32));
567}
568
569/* dcache write access counter */
570static uint64_t csky_pmu_read_dcwac(void)
571{
572	uint32_t lo, hi, tmp;
573	uint64_t result;
574
575	do {
576		tmp = cprgr("<0, 0x2d>");
577		lo  = cprgr("<0, 0x2c>");
578		hi  = cprgr("<0, 0x2d>");
579	} while (hi != tmp);
580
581	result = (uint64_t) (hi) << 32;
582	result |= lo;
583
584	return result;
585}
586
587static void csky_pmu_write_dcwac(uint64_t val)
588{
589	cpwgr("<0, 0x2c>", (uint32_t)  val);
590	cpwgr("<0, 0x2d>", (uint32_t) (val >> 32));
591}
592
593/* dcache write miss counter */
594static uint64_t csky_pmu_read_dcwmc(void)
595{
596	uint32_t lo, hi, tmp;
597	uint64_t result;
598
599	do {
600		tmp = cprgr("<0, 0x2f>");
601		lo  = cprgr("<0, 0x2e>");
602		hi  = cprgr("<0, 0x2f>");
603	} while (hi != tmp);
604
605	result = (uint64_t) (hi) << 32;
606	result |= lo;
607
608	return result;
609}
610
611static void csky_pmu_write_dcwmc(uint64_t val)
612{
613	cpwgr("<0, 0x2e>", (uint32_t)  val);
614	cpwgr("<0, 0x2f>", (uint32_t) (val >> 32));
615}
616
617/* l2cache read access counter */
618static uint64_t csky_pmu_read_l2rac(void)
619{
620	uint32_t lo, hi, tmp;
621	uint64_t result;
622
623	do {
624		tmp = cprgr("<0, 0x31>");
625		lo  = cprgr("<0, 0x30>");
626		hi  = cprgr("<0, 0x31>");
627	} while (hi != tmp);
628
629	result = (uint64_t) (hi) << 32;
630	result |= lo;
631
632	return result;
633}
634
635static void csky_pmu_write_l2rac(uint64_t val)
636{
637	cpwgr("<0, 0x30>", (uint32_t)  val);
638	cpwgr("<0, 0x31>", (uint32_t) (val >> 32));
639}
640
641/* l2cache read miss counter */
642static uint64_t csky_pmu_read_l2rmc(void)
643{
644	uint32_t lo, hi, tmp;
645	uint64_t result;
646
647	do {
648		tmp = cprgr("<0, 0x33>");
649		lo  = cprgr("<0, 0x32>");
650		hi  = cprgr("<0, 0x33>");
651	} while (hi != tmp);
652
653	result = (uint64_t) (hi) << 32;
654	result |= lo;
655
656	return result;
657}
658
659static void csky_pmu_write_l2rmc(uint64_t val)
660{
661	cpwgr("<0, 0x32>", (uint32_t)  val);
662	cpwgr("<0, 0x33>", (uint32_t) (val >> 32));
663}
664
665/* l2cache write access counter */
666static uint64_t csky_pmu_read_l2wac(void)
667{
668	uint32_t lo, hi, tmp;
669	uint64_t result;
670
671	do {
672		tmp = cprgr("<0, 0x35>");
673		lo  = cprgr("<0, 0x34>");
674		hi  = cprgr("<0, 0x35>");
675	} while (hi != tmp);
676
677	result = (uint64_t) (hi) << 32;
678	result |= lo;
679
680	return result;
681}
682
683static void csky_pmu_write_l2wac(uint64_t val)
684{
685	cpwgr("<0, 0x34>", (uint32_t)  val);
686	cpwgr("<0, 0x35>", (uint32_t) (val >> 32));
687}
688
689/* l2cache write miss counter */
690static uint64_t csky_pmu_read_l2wmc(void)
691{
692	uint32_t lo, hi, tmp;
693	uint64_t result;
694
695	do {
696		tmp = cprgr("<0, 0x37>");
697		lo  = cprgr("<0, 0x36>");
698		hi  = cprgr("<0, 0x37>");
699	} while (hi != tmp);
700
701	result = (uint64_t) (hi) << 32;
702	result |= lo;
703
704	return result;
705}
706
707static void csky_pmu_write_l2wmc(uint64_t val)
708{
709	cpwgr("<0, 0x36>", (uint32_t)  val);
710	cpwgr("<0, 0x37>", (uint32_t) (val >> 32));
711}
712
713#define HW_OP_UNSUPPORTED	0xffff
714static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = {
715	[PERF_COUNT_HW_CPU_CYCLES]		= 0x1,
716	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x2,
717	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
718	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
719	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0xf,
720	[PERF_COUNT_HW_BRANCH_MISSES]		= 0xe,
721	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
722	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= HW_OP_UNSUPPORTED,
723	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= HW_OP_UNSUPPORTED,
724	[PERF_COUNT_HW_REF_CPU_CYCLES]		= HW_OP_UNSUPPORTED,
725};
726
727#define C(_x)			PERF_COUNT_HW_CACHE_##_x
728#define CACHE_OP_UNSUPPORTED	0xffff
729static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
730	[C(L1D)] = {
731#ifdef CONFIG_CPU_CK810
732		[C(OP_READ)] = {
733			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
734			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
735		},
736		[C(OP_WRITE)] = {
737			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
738			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
739		},
740		[C(OP_PREFETCH)] = {
741			[C(RESULT_ACCESS)]	= 0x5,
742			[C(RESULT_MISS)]	= 0x6,
743		},
744#else
745		[C(OP_READ)] = {
746			[C(RESULT_ACCESS)]	= 0x14,
747			[C(RESULT_MISS)]	= 0x15,
748		},
749		[C(OP_WRITE)] = {
750			[C(RESULT_ACCESS)]	= 0x16,
751			[C(RESULT_MISS)]	= 0x17,
752		},
753		[C(OP_PREFETCH)] = {
754			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
755			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
756		},
757#endif
758	},
759	[C(L1I)] = {
760		[C(OP_READ)] = {
761			[C(RESULT_ACCESS)]	= 0x3,
762			[C(RESULT_MISS)]	= 0x4,
763		},
764		[C(OP_WRITE)] = {
765			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
766			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
767		},
768		[C(OP_PREFETCH)] = {
769			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
770			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
771		},
772	},
773	[C(LL)] = {
774#ifdef CONFIG_CPU_CK810
775		[C(OP_READ)] = {
776			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
777			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
778		},
779		[C(OP_WRITE)] = {
780			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
781			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
782		},
783		[C(OP_PREFETCH)] = {
784			[C(RESULT_ACCESS)]	= 0x7,
785			[C(RESULT_MISS)]	= 0x8,
786		},
787#else
788		[C(OP_READ)] = {
789			[C(RESULT_ACCESS)]	= 0x18,
790			[C(RESULT_MISS)]	= 0x19,
791		},
792		[C(OP_WRITE)] = {
793			[C(RESULT_ACCESS)]	= 0x1a,
794			[C(RESULT_MISS)]	= 0x1b,
795		},
796		[C(OP_PREFETCH)] = {
797			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
798			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
799		},
800#endif
801	},
802	[C(DTLB)] = {
803#ifdef CONFIG_CPU_CK810
804		[C(OP_READ)] = {
805			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
806			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
807		},
808		[C(OP_WRITE)] = {
809			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
810			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
811		},
812#else
813		[C(OP_READ)] = {
814			[C(RESULT_ACCESS)]	= 0x14,
815			[C(RESULT_MISS)]	= 0xb,
816		},
817		[C(OP_WRITE)] = {
818			[C(RESULT_ACCESS)]	= 0x16,
819			[C(RESULT_MISS)]	= 0xb,
820		},
821#endif
822		[C(OP_PREFETCH)] = {
823			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
824			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
825		},
826	},
827	[C(ITLB)] = {
828#ifdef CONFIG_CPU_CK810
829		[C(OP_READ)] = {
830			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
831			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
832		},
833#else
834		[C(OP_READ)] = {
835			[C(RESULT_ACCESS)]	= 0x3,
836			[C(RESULT_MISS)]	= 0xa,
837		},
838#endif
839		[C(OP_WRITE)] = {
840			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
841			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
842		},
843		[C(OP_PREFETCH)] = {
844			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
845			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
846		},
847	},
848	[C(BPU)] = {
849		[C(OP_READ)] = {
850			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
851			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
852		},
853		[C(OP_WRITE)] = {
854			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
855			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
856		},
857		[C(OP_PREFETCH)] = {
858			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
859			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
860		},
861	},
862	[C(NODE)] = {
863		[C(OP_READ)] = {
864			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
865			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
866		},
867		[C(OP_WRITE)] = {
868			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
869			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
870		},
871		[C(OP_PREFETCH)] = {
872			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
873			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
874		},
875	},
876};
877
878int  csky_pmu_event_set_period(struct perf_event *event)
879{
880	struct hw_perf_event *hwc = &event->hw;
881	s64 left = local64_read(&hwc->period_left);
882	s64 period = hwc->sample_period;
883	int ret = 0;
884
885	if (unlikely(left <= -period)) {
886		left = period;
887		local64_set(&hwc->period_left, left);
888		hwc->last_period = period;
889		ret = 1;
890	}
891
892	if (unlikely(left <= 0)) {
893		left += period;
894		local64_set(&hwc->period_left, left);
895		hwc->last_period = period;
896		ret = 1;
897	}
898
899	if (left > (s64)csky_pmu.max_period)
900		left = csky_pmu.max_period;
901
902	/*
903	 * The hw event starts counting from this event offset,
904	 * mark it to be able to extract future "deltas":
905	 */
906	local64_set(&hwc->prev_count, (u64)(-left));
907
908	if (hw_raw_write_mapping[hwc->idx] != NULL)
909		hw_raw_write_mapping[hwc->idx]((u64)(-left) &
910						csky_pmu.max_period);
911
912	cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR));
913
914	perf_event_update_userpage(event);
915
916	return ret;
917}
918
919static void csky_perf_event_update(struct perf_event *event,
920				   struct hw_perf_event *hwc)
921{
922	uint64_t prev_raw_count = local64_read(&hwc->prev_count);
923	/*
924	 * Sign extend count value to 64bit, otherwise delta calculation
925	 * would be incorrect when overflow occurs.
926	 */
927	uint64_t new_raw_count = sign_extend64(
928		hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1);
929	int64_t delta = new_raw_count - prev_raw_count;
930
931	/*
932	 * We aren't afraid of hwc->prev_count changing beneath our feet
933	 * because there's no way for us to re-enter this function anytime.
934	 */
935	local64_set(&hwc->prev_count, new_raw_count);
936	local64_add(delta, &event->count);
937	local64_sub(delta, &hwc->period_left);
938}
939
940static void csky_pmu_reset(void *info)
941{
942	cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1));
943}
944
945static void csky_pmu_read(struct perf_event *event)
946{
947	csky_perf_event_update(event, &event->hw);
948}
949
950static int csky_pmu_cache_event(u64 config)
951{
952	unsigned int cache_type, cache_op, cache_result;
953
954	cache_type	= (config >>  0) & 0xff;
955	cache_op	= (config >>  8) & 0xff;
956	cache_result	= (config >> 16) & 0xff;
957
958	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
959		return -EINVAL;
960	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
961		return -EINVAL;
962	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
963		return -EINVAL;
964
965	return csky_pmu_cache_map[cache_type][cache_op][cache_result];
966}
967
968static int csky_pmu_event_init(struct perf_event *event)
969{
970	struct hw_perf_event *hwc = &event->hw;
971	int ret;
972
973	switch (event->attr.type) {
974	case PERF_TYPE_HARDWARE:
975		if (event->attr.config >= PERF_COUNT_HW_MAX)
976			return -ENOENT;
977		ret = csky_pmu_hw_map[event->attr.config];
978		if (ret == HW_OP_UNSUPPORTED)
979			return -ENOENT;
980		hwc->idx = ret;
981		break;
982	case PERF_TYPE_HW_CACHE:
983		ret = csky_pmu_cache_event(event->attr.config);
984		if (ret == CACHE_OP_UNSUPPORTED)
985			return -ENOENT;
986		hwc->idx = ret;
987		break;
988	case PERF_TYPE_RAW:
989		if (hw_raw_read_mapping[event->attr.config] == NULL)
990			return -ENOENT;
991		hwc->idx = event->attr.config;
992		break;
993	default:
994		return -ENOENT;
995	}
996
997	if (event->attr.exclude_user)
998		csky_pmu.hpcr = BIT(2);
999	else if (event->attr.exclude_kernel)
1000		csky_pmu.hpcr = BIT(3);
1001	else
1002		csky_pmu.hpcr = BIT(2) | BIT(3);
1003
1004	csky_pmu.hpcr |= BIT(1) | BIT(0);
1005
1006	return 0;
1007}
1008
1009/* starts all counters */
1010static void csky_pmu_enable(struct pmu *pmu)
1011{
1012	cpwcr(HPCR, csky_pmu.hpcr);
1013}
1014
1015/* stops all counters */
1016static void csky_pmu_disable(struct pmu *pmu)
1017{
1018	cpwcr(HPCR, BIT(1));
1019}
1020
1021static void csky_pmu_start(struct perf_event *event, int flags)
1022{
1023	unsigned long flg;
1024	struct hw_perf_event *hwc = &event->hw;
1025	int idx = hwc->idx;
1026
1027	if (WARN_ON_ONCE(idx == -1))
1028		return;
1029
1030	if (flags & PERF_EF_RELOAD)
1031		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
1032
1033	hwc->state = 0;
1034
1035	csky_pmu_event_set_period(event);
1036
1037	local_irq_save(flg);
1038
1039	cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR));
1040	cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR));
1041
1042	local_irq_restore(flg);
1043}
1044
1045static void csky_pmu_stop_event(struct perf_event *event)
1046{
1047	unsigned long flg;
1048	struct hw_perf_event *hwc = &event->hw;
1049	int idx = hwc->idx;
1050
1051	local_irq_save(flg);
1052
1053	cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR));
1054	cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR));
1055
1056	local_irq_restore(flg);
1057}
1058
1059static void csky_pmu_stop(struct perf_event *event, int flags)
1060{
1061	if (!(event->hw.state & PERF_HES_STOPPED)) {
1062		csky_pmu_stop_event(event);
1063		event->hw.state |= PERF_HES_STOPPED;
1064	}
1065
1066	if ((flags & PERF_EF_UPDATE) &&
1067	    !(event->hw.state & PERF_HES_UPTODATE)) {
1068		csky_perf_event_update(event, &event->hw);
1069		event->hw.state |= PERF_HES_UPTODATE;
1070	}
1071}
1072
1073static void csky_pmu_del(struct perf_event *event, int flags)
1074{
1075	struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events);
1076	struct hw_perf_event *hwc = &event->hw;
1077
1078	csky_pmu_stop(event, PERF_EF_UPDATE);
1079
1080	hw_events->events[hwc->idx] = NULL;
1081
1082	perf_event_update_userpage(event);
1083}
1084
1085/* allocate hardware counter and optionally start counting */
1086static int csky_pmu_add(struct perf_event *event, int flags)
1087{
1088	struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events);
1089	struct hw_perf_event *hwc = &event->hw;
1090
1091	hw_events->events[hwc->idx] = event;
1092
1093	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1094
1095	if (flags & PERF_EF_START)
1096		csky_pmu_start(event, PERF_EF_RELOAD);
1097
1098	perf_event_update_userpage(event);
1099
1100	return 0;
1101}
1102
1103static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev)
1104{
1105	struct perf_sample_data data;
1106	struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events);
1107	struct pt_regs *regs;
1108	int idx;
1109
1110	/*
1111	 * Did an overflow occur?
1112	 */
1113	if (!cprcr(HPOFSR))
1114		return IRQ_NONE;
1115
1116	/*
1117	 * Handle the counter(s) overflow(s)
1118	 */
1119	regs = get_irq_regs();
1120
1121	csky_pmu_disable(&csky_pmu.pmu);
1122
1123	for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) {
1124		struct perf_event *event = cpuc->events[idx];
1125		struct hw_perf_event *hwc;
1126
1127		/* Ignore if we don't have an event. */
1128		if (!event)
1129			continue;
1130		/*
1131		 * We have a single interrupt for all counters. Check that
1132		 * each counter has overflowed before we process it.
1133		 */
1134		if (!(cprcr(HPOFSR) & BIT(idx)))
1135			continue;
1136
1137		hwc = &event->hw;
1138		csky_perf_event_update(event, &event->hw);
1139		perf_sample_data_init(&data, 0, hwc->last_period);
1140		csky_pmu_event_set_period(event);
1141
1142		if (perf_event_overflow(event, &data, regs))
1143			csky_pmu_stop_event(event);
1144	}
1145
1146	csky_pmu_enable(&csky_pmu.pmu);
1147
1148	/*
1149	 * Handle the pending perf events.
1150	 *
1151	 * Note: this call *must* be run with interrupts disabled. For
1152	 * platforms that can have the PMU interrupts raised as an NMI, this
1153	 * will not work.
1154	 */
1155	irq_work_run();
1156
1157	return IRQ_HANDLED;
1158}
1159
1160static int csky_pmu_request_irq(irq_handler_t handler)
1161{
1162	int err, irqs;
1163	struct platform_device *pmu_device = csky_pmu.plat_device;
1164
1165	if (!pmu_device)
1166		return -ENODEV;
1167
1168	irqs = min(pmu_device->num_resources, num_possible_cpus());
1169	if (irqs < 1) {
1170		pr_err("no irqs for PMUs defined\n");
1171		return -ENODEV;
1172	}
1173
1174	csky_pmu_irq = platform_get_irq(pmu_device, 0);
1175	if (csky_pmu_irq < 0)
1176		return -ENODEV;
1177	err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu",
1178				 this_cpu_ptr(csky_pmu.hw_events));
1179	if (err) {
1180		pr_err("unable to request IRQ%d for CSKY PMU counters\n",
1181		       csky_pmu_irq);
1182		return err;
1183	}
1184
1185	return 0;
1186}
1187
1188static void csky_pmu_free_irq(void)
1189{
1190	int irq;
1191	struct platform_device *pmu_device = csky_pmu.plat_device;
1192
1193	irq = platform_get_irq(pmu_device, 0);
1194	if (irq >= 0)
1195		free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events));
1196}
1197
1198int init_hw_perf_events(void)
1199{
1200	csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events,
1201					      GFP_KERNEL);
1202	if (!csky_pmu.hw_events) {
1203		pr_info("failed to allocate per-cpu PMU data.\n");
1204		return -ENOMEM;
1205	}
1206
1207	csky_pmu.pmu = (struct pmu) {
1208		.pmu_enable	= csky_pmu_enable,
1209		.pmu_disable	= csky_pmu_disable,
1210		.event_init	= csky_pmu_event_init,
1211		.add		= csky_pmu_add,
1212		.del		= csky_pmu_del,
1213		.start		= csky_pmu_start,
1214		.stop		= csky_pmu_stop,
1215		.read		= csky_pmu_read,
1216	};
1217
1218	memset((void *)hw_raw_read_mapping, 0,
1219		sizeof(hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS]));
1220
1221	hw_raw_read_mapping[0x1]  = csky_pmu_read_cc;
1222	hw_raw_read_mapping[0x2]  = csky_pmu_read_ic;
1223	hw_raw_read_mapping[0x3]  = csky_pmu_read_icac;
1224	hw_raw_read_mapping[0x4]  = csky_pmu_read_icmc;
1225	hw_raw_read_mapping[0x5]  = csky_pmu_read_dcac;
1226	hw_raw_read_mapping[0x6]  = csky_pmu_read_dcmc;
1227	hw_raw_read_mapping[0x7]  = csky_pmu_read_l2ac;
1228	hw_raw_read_mapping[0x8]  = csky_pmu_read_l2mc;
1229	hw_raw_read_mapping[0xa]  = csky_pmu_read_iutlbmc;
1230	hw_raw_read_mapping[0xb]  = csky_pmu_read_dutlbmc;
1231	hw_raw_read_mapping[0xc]  = csky_pmu_read_jtlbmc;
1232	hw_raw_read_mapping[0xd]  = csky_pmu_read_softc;
1233	hw_raw_read_mapping[0xe]  = csky_pmu_read_cbmc;
1234	hw_raw_read_mapping[0xf]  = csky_pmu_read_cbic;
1235	hw_raw_read_mapping[0x10] = csky_pmu_read_ibmc;
1236	hw_raw_read_mapping[0x11] = csky_pmu_read_ibic;
1237	hw_raw_read_mapping[0x12] = csky_pmu_read_lsfc;
1238	hw_raw_read_mapping[0x13] = csky_pmu_read_sic;
1239	hw_raw_read_mapping[0x14] = csky_pmu_read_dcrac;
1240	hw_raw_read_mapping[0x15] = csky_pmu_read_dcrmc;
1241	hw_raw_read_mapping[0x16] = csky_pmu_read_dcwac;
1242	hw_raw_read_mapping[0x17] = csky_pmu_read_dcwmc;
1243	hw_raw_read_mapping[0x18] = csky_pmu_read_l2rac;
1244	hw_raw_read_mapping[0x19] = csky_pmu_read_l2rmc;
1245	hw_raw_read_mapping[0x1a] = csky_pmu_read_l2wac;
1246	hw_raw_read_mapping[0x1b] = csky_pmu_read_l2wmc;
1247
1248	memset((void *)hw_raw_write_mapping, 0,
1249		sizeof(hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS]));
1250
1251	hw_raw_write_mapping[0x1]  = csky_pmu_write_cc;
1252	hw_raw_write_mapping[0x2]  = csky_pmu_write_ic;
1253	hw_raw_write_mapping[0x3]  = csky_pmu_write_icac;
1254	hw_raw_write_mapping[0x4]  = csky_pmu_write_icmc;
1255	hw_raw_write_mapping[0x5]  = csky_pmu_write_dcac;
1256	hw_raw_write_mapping[0x6]  = csky_pmu_write_dcmc;
1257	hw_raw_write_mapping[0x7]  = csky_pmu_write_l2ac;
1258	hw_raw_write_mapping[0x8]  = csky_pmu_write_l2mc;
1259	hw_raw_write_mapping[0xa]  = csky_pmu_write_iutlbmc;
1260	hw_raw_write_mapping[0xb]  = csky_pmu_write_dutlbmc;
1261	hw_raw_write_mapping[0xc]  = csky_pmu_write_jtlbmc;
1262	hw_raw_write_mapping[0xd]  = csky_pmu_write_softc;
1263	hw_raw_write_mapping[0xe]  = csky_pmu_write_cbmc;
1264	hw_raw_write_mapping[0xf]  = csky_pmu_write_cbic;
1265	hw_raw_write_mapping[0x10] = csky_pmu_write_ibmc;
1266	hw_raw_write_mapping[0x11] = csky_pmu_write_ibic;
1267	hw_raw_write_mapping[0x12] = csky_pmu_write_lsfc;
1268	hw_raw_write_mapping[0x13] = csky_pmu_write_sic;
1269	hw_raw_write_mapping[0x14] = csky_pmu_write_dcrac;
1270	hw_raw_write_mapping[0x15] = csky_pmu_write_dcrmc;
1271	hw_raw_write_mapping[0x16] = csky_pmu_write_dcwac;
1272	hw_raw_write_mapping[0x17] = csky_pmu_write_dcwmc;
1273	hw_raw_write_mapping[0x18] = csky_pmu_write_l2rac;
1274	hw_raw_write_mapping[0x19] = csky_pmu_write_l2rmc;
1275	hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac;
1276	hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc;
1277
1278	return 0;
1279}
1280
1281static int csky_pmu_starting_cpu(unsigned int cpu)
1282{
1283	enable_percpu_irq(csky_pmu_irq, 0);
1284	return 0;
1285}
1286
1287static int csky_pmu_dying_cpu(unsigned int cpu)
1288{
1289	disable_percpu_irq(csky_pmu_irq);
1290	return 0;
1291}
1292
1293int csky_pmu_device_probe(struct platform_device *pdev,
1294			  const struct of_device_id *of_table)
1295{
1296	struct device_node *node = pdev->dev.of_node;
1297	int ret;
1298
1299	ret = init_hw_perf_events();
1300	if (ret) {
1301		pr_notice("[perf] failed to probe PMU!\n");
1302		return ret;
1303	}
1304
1305	if (of_property_read_u32(node, "count-width",
1306				 &csky_pmu.count_width)) {
1307		csky_pmu.count_width = DEFAULT_COUNT_WIDTH;
1308	}
1309	csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1;
1310
1311	csky_pmu.plat_device = pdev;
1312
1313	/* Ensure the PMU has sane values out of reset. */
1314	on_each_cpu(csky_pmu_reset, &csky_pmu, 1);
1315
1316	ret = csky_pmu_request_irq(csky_pmu_handle_irq);
1317	if (ret) {
1318		csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1319		pr_notice("[perf] PMU request irq fail!\n");
1320	}
1321
1322	ret = cpuhp_setup_state(CPUHP_AP_PERF_CSKY_ONLINE, "AP_PERF_ONLINE",
1323				csky_pmu_starting_cpu,
1324				csky_pmu_dying_cpu);
1325	if (ret) {
1326		csky_pmu_free_irq();
1327		free_percpu(csky_pmu.hw_events);
1328		return ret;
1329	}
1330
1331	ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW);
1332	if (ret) {
1333		csky_pmu_free_irq();
1334		free_percpu(csky_pmu.hw_events);
1335	}
1336
1337	return ret;
1338}
1339
1340static const struct of_device_id csky_pmu_of_device_ids[] = {
1341	{.compatible = "csky,csky-pmu"},
1342	{},
1343};
1344
1345static int csky_pmu_dev_probe(struct platform_device *pdev)
1346{
1347	return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids);
1348}
1349
1350static struct platform_driver csky_pmu_driver = {
1351	.driver = {
1352		   .name = "csky-pmu",
1353		   .of_match_table = csky_pmu_of_device_ids,
1354		   },
1355	.probe = csky_pmu_dev_probe,
1356};
1357
1358static int __init csky_pmu_probe(void)
1359{
1360	int ret;
1361
1362	ret = platform_driver_register(&csky_pmu_driver);
1363	if (ret)
1364		pr_notice("[perf] PMU initialization failed\n");
1365	else
1366		pr_notice("[perf] PMU initialization done\n");
1367
1368	return ret;
1369}
1370
1371device_initcall(csky_pmu_probe);
1372