1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2015 Linaro Ltd.
4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
5 */
6
7#include <linux/cpu.h>
8#include <linux/kvm.h>
9#include <linux/kvm_host.h>
10#include <linux/perf_event.h>
11#include <linux/perf/arm_pmu.h>
12#include <linux/uaccess.h>
13#include <asm/kvm_emulate.h>
14#include <kvm/arm_pmu.h>
15#include <kvm/arm_vgic.h>
16
17static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20
21#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22
23static u32 kvm_pmu_event_mask(struct kvm *kvm)
24{
25	switch (kvm->arch.pmuver) {
26	case 1:			/* ARMv8.0 */
27		return GENMASK(9, 0);
28	case 4:			/* ARMv8.1 */
29	case 5:			/* ARMv8.4 */
30	case 6:			/* ARMv8.5 */
31		return GENMASK(15, 0);
32	default:		/* Shouldn't be here, just for sanity */
33		WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
34		return 0;
35	}
36}
37
38/**
39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
40 * @vcpu: The vcpu pointer
41 * @select_idx: The counter index
42 */
43static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
44{
45	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
46		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
47}
48
49static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
50{
51	struct kvm_pmu *pmu;
52	struct kvm_vcpu_arch *vcpu_arch;
53
54	pmc -= pmc->idx;
55	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
56	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
57	return container_of(vcpu_arch, struct kvm_vcpu, arch);
58}
59
60/**
61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
62 * @pmc: The PMU counter pointer
63 */
64static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
65{
66	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
67
68	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
69}
70
71/**
72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
73 * @select_idx: The counter index
74 */
75static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
76{
77	return select_idx & 0x1;
78}
79
80/**
81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
82 * @pmc: The PMU counter pointer
83 *
84 * When a pair of PMCs are chained together we use the low counter (canonical)
85 * to hold the underlying perf event.
86 */
87static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
88{
89	if (kvm_pmu_pmc_is_chained(pmc) &&
90	    kvm_pmu_idx_is_high_counter(pmc->idx))
91		return pmc - 1;
92
93	return pmc;
94}
95static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
96{
97	if (kvm_pmu_idx_is_high_counter(pmc->idx))
98		return pmc - 1;
99	else
100		return pmc + 1;
101}
102
103/**
104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
105 * @vcpu: The vcpu pointer
106 * @select_idx: The counter index
107 */
108static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
109{
110	u64 eventsel, reg;
111
112	select_idx |= 0x1;
113
114	if (select_idx == ARMV8_PMU_CYCLE_IDX)
115		return false;
116
117	reg = PMEVTYPER0_EL0 + select_idx;
118	eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
119
120	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
121}
122
123/**
124 * kvm_pmu_get_pair_counter_value - get PMU counter value
125 * @vcpu: The vcpu pointer
126 * @pmc: The PMU counter pointer
127 */
128static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
129					  struct kvm_pmc *pmc)
130{
131	u64 counter, counter_high, reg, enabled, running;
132
133	if (kvm_pmu_pmc_is_chained(pmc)) {
134		pmc = kvm_pmu_get_canonical_pmc(pmc);
135		reg = PMEVCNTR0_EL0 + pmc->idx;
136
137		counter = __vcpu_sys_reg(vcpu, reg);
138		counter_high = __vcpu_sys_reg(vcpu, reg + 1);
139
140		counter = lower_32_bits(counter) | (counter_high << 32);
141	} else {
142		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
143		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
144		counter = __vcpu_sys_reg(vcpu, reg);
145	}
146
147	/*
148	 * The real counter value is equal to the value of counter register plus
149	 * the value perf event counts.
150	 */
151	if (pmc->perf_event)
152		counter += perf_event_read_value(pmc->perf_event, &enabled,
153						 &running);
154
155	return counter;
156}
157
158/**
159 * kvm_pmu_get_counter_value - get PMU counter value
160 * @vcpu: The vcpu pointer
161 * @select_idx: The counter index
162 */
163u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
164{
165	u64 counter;
166	struct kvm_pmu *pmu = &vcpu->arch.pmu;
167	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
168
169	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
170
171	if (kvm_pmu_pmc_is_chained(pmc) &&
172	    kvm_pmu_idx_is_high_counter(select_idx))
173		counter = upper_32_bits(counter);
174	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
175		counter = lower_32_bits(counter);
176
177	return counter;
178}
179
180/**
181 * kvm_pmu_set_counter_value - set PMU counter value
182 * @vcpu: The vcpu pointer
183 * @select_idx: The counter index
184 * @val: The counter value
185 */
186void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
187{
188	u64 reg;
189
190	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
191	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
192	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
193
194	/* Recreate the perf event to reflect the updated sample_period */
195	kvm_pmu_create_perf_event(vcpu, select_idx);
196}
197
198/**
199 * kvm_pmu_release_perf_event - remove the perf event
200 * @pmc: The PMU counter pointer
201 */
202static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
203{
204	pmc = kvm_pmu_get_canonical_pmc(pmc);
205	if (pmc->perf_event) {
206		perf_event_disable(pmc->perf_event);
207		perf_event_release_kernel(pmc->perf_event);
208		pmc->perf_event = NULL;
209	}
210}
211
212/**
213 * kvm_pmu_stop_counter - stop PMU counter
214 * @pmc: The PMU counter pointer
215 *
216 * If this counter has been configured to monitor some event, release it here.
217 */
218static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
219{
220	u64 counter, reg, val;
221
222	pmc = kvm_pmu_get_canonical_pmc(pmc);
223	if (!pmc->perf_event)
224		return;
225
226	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
227
228	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
229		reg = PMCCNTR_EL0;
230		val = counter;
231	} else {
232		reg = PMEVCNTR0_EL0 + pmc->idx;
233		val = lower_32_bits(counter);
234	}
235
236	__vcpu_sys_reg(vcpu, reg) = val;
237
238	if (kvm_pmu_pmc_is_chained(pmc))
239		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
240
241	kvm_pmu_release_perf_event(pmc);
242}
243
244/**
245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
246 * @vcpu: The vcpu pointer
247 *
248 */
249void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
250{
251	int i;
252	struct kvm_pmu *pmu = &vcpu->arch.pmu;
253
254	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
255		pmu->pmc[i].idx = i;
256}
257
258/**
259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
260 * @vcpu: The vcpu pointer
261 *
262 */
263void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
264{
265	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
266	struct kvm_pmu *pmu = &vcpu->arch.pmu;
267	int i;
268
269	for_each_set_bit(i, &mask, 32)
270		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
271
272	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
273}
274
275/**
276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
277 * @vcpu: The vcpu pointer
278 *
279 */
280void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
281{
282	int i;
283	struct kvm_pmu *pmu = &vcpu->arch.pmu;
284
285	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
286		kvm_pmu_release_perf_event(&pmu->pmc[i]);
287	irq_work_sync(&vcpu->arch.pmu.overflow_work);
288}
289
290u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
291{
292	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
293
294	val &= ARMV8_PMU_PMCR_N_MASK;
295	if (val == 0)
296		return BIT(ARMV8_PMU_CYCLE_IDX);
297	else
298		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
299}
300
301/**
302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
303 * @vcpu: The vcpu pointer
304 * @val: the value guest writes to PMCNTENSET register
305 *
306 * Call perf_event_enable to start counting the perf event
307 */
308void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
309{
310	int i;
311	struct kvm_pmu *pmu = &vcpu->arch.pmu;
312	struct kvm_pmc *pmc;
313
314	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
315		return;
316
317	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
318		if (!(val & BIT(i)))
319			continue;
320
321		pmc = &pmu->pmc[i];
322
323		/* A change in the enable state may affect the chain state */
324		kvm_pmu_update_pmc_chained(vcpu, i);
325		kvm_pmu_create_perf_event(vcpu, i);
326
327		/* At this point, pmc must be the canonical */
328		if (pmc->perf_event) {
329			perf_event_enable(pmc->perf_event);
330			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
331				kvm_debug("fail to enable perf event\n");
332		}
333	}
334}
335
336/**
337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
338 * @vcpu: The vcpu pointer
339 * @val: the value guest writes to PMCNTENCLR register
340 *
341 * Call perf_event_disable to stop counting the perf event
342 */
343void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
344{
345	int i;
346	struct kvm_pmu *pmu = &vcpu->arch.pmu;
347	struct kvm_pmc *pmc;
348
349	if (!val)
350		return;
351
352	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
353		if (!(val & BIT(i)))
354			continue;
355
356		pmc = &pmu->pmc[i];
357
358		/* A change in the enable state may affect the chain state */
359		kvm_pmu_update_pmc_chained(vcpu, i);
360		kvm_pmu_create_perf_event(vcpu, i);
361
362		/* At this point, pmc must be the canonical */
363		if (pmc->perf_event)
364			perf_event_disable(pmc->perf_event);
365	}
366}
367
368static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
369{
370	u64 reg = 0;
371
372	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
373		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
374		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
375		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
376		reg &= kvm_pmu_valid_counter_mask(vcpu);
377	}
378
379	return reg;
380}
381
382static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
383{
384	struct kvm_pmu *pmu = &vcpu->arch.pmu;
385	bool overflow;
386
387	if (!kvm_arm_pmu_v3_ready(vcpu))
388		return;
389
390	overflow = !!kvm_pmu_overflow_status(vcpu);
391	if (pmu->irq_level == overflow)
392		return;
393
394	pmu->irq_level = overflow;
395
396	if (likely(irqchip_in_kernel(vcpu->kvm))) {
397		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
398					      pmu->irq_num, overflow, pmu);
399		WARN_ON(ret);
400	}
401}
402
403bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
404{
405	struct kvm_pmu *pmu = &vcpu->arch.pmu;
406	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
407	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
408
409	if (likely(irqchip_in_kernel(vcpu->kvm)))
410		return false;
411
412	return pmu->irq_level != run_level;
413}
414
415/*
416 * Reflect the PMU overflow interrupt output level into the kvm_run structure
417 */
418void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
419{
420	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
421
422	/* Populate the timer bitmap for user space */
423	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
424	if (vcpu->arch.pmu.irq_level)
425		regs->device_irq_level |= KVM_ARM_DEV_PMU;
426}
427
428/**
429 * kvm_pmu_flush_hwstate - flush pmu state to cpu
430 * @vcpu: The vcpu pointer
431 *
432 * Check if the PMU has overflowed while we were running in the host, and inject
433 * an interrupt if that was the case.
434 */
435void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
436{
437	kvm_pmu_update_state(vcpu);
438}
439
440/**
441 * kvm_pmu_sync_hwstate - sync pmu state from cpu
442 * @vcpu: The vcpu pointer
443 *
444 * Check if the PMU has overflowed while we were running in the guest, and
445 * inject an interrupt if that was the case.
446 */
447void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
448{
449	kvm_pmu_update_state(vcpu);
450}
451
452/**
453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
454 * to the event.
455 * This is why we need a callback to do it once outside of the NMI context.
456 */
457static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
458{
459	struct kvm_vcpu *vcpu;
460	struct kvm_pmu *pmu;
461
462	pmu = container_of(work, struct kvm_pmu, overflow_work);
463	vcpu = kvm_pmc_to_vcpu(pmu->pmc);
464
465	kvm_vcpu_kick(vcpu);
466}
467
468/**
469 * When the perf event overflows, set the overflow status and inform the vcpu.
470 */
471static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
472				  struct perf_sample_data *data,
473				  struct pt_regs *regs)
474{
475	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
476	struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
477	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
478	int idx = pmc->idx;
479	u64 period;
480
481	cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
482
483	/*
484	 * Reset the sample period to the architectural limit,
485	 * i.e. the point where the counter overflows.
486	 */
487	period = -(local64_read(&perf_event->count));
488
489	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
490		period &= GENMASK(31, 0);
491
492	local64_set(&perf_event->hw.period_left, 0);
493	perf_event->attr.sample_period = period;
494	perf_event->hw.sample_period = period;
495
496	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
497
498	if (kvm_pmu_overflow_status(vcpu)) {
499		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
500
501		if (!in_nmi())
502			kvm_vcpu_kick(vcpu);
503		else
504			irq_work_queue(&vcpu->arch.pmu.overflow_work);
505	}
506
507	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
508}
509
510/**
511 * kvm_pmu_software_increment - do software increment
512 * @vcpu: The vcpu pointer
513 * @val: the value guest writes to PMSWINC register
514 */
515void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
516{
517	struct kvm_pmu *pmu = &vcpu->arch.pmu;
518	int i;
519
520	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
521		return;
522
523	/* Weed out disabled counters */
524	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
525
526	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
527		u64 type, reg;
528
529		if (!(val & BIT(i)))
530			continue;
531
532		/* PMSWINC only applies to ... SW_INC! */
533		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
534		type &= kvm_pmu_event_mask(vcpu->kvm);
535		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
536			continue;
537
538		/* increment this even SW_INC counter */
539		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
540		reg = lower_32_bits(reg);
541		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
542
543		if (reg) /* no overflow on the low part */
544			continue;
545
546		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
547			/* increment the high counter */
548			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
549			reg = lower_32_bits(reg);
550			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
551			if (!reg) /* mark overflow on the high counter */
552				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
553		} else {
554			/* mark overflow on low counter */
555			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
556		}
557	}
558}
559
560/**
561 * kvm_pmu_handle_pmcr - handle PMCR register
562 * @vcpu: The vcpu pointer
563 * @val: the value guest writes to PMCR register
564 */
565void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
566{
567	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
568	int i;
569
570	if (val & ARMV8_PMU_PMCR_E) {
571		kvm_pmu_enable_counter_mask(vcpu,
572		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
573	} else {
574		kvm_pmu_disable_counter_mask(vcpu, mask);
575	}
576
577	if (val & ARMV8_PMU_PMCR_C)
578		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
579
580	if (val & ARMV8_PMU_PMCR_P) {
581		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
582		for_each_set_bit(i, &mask, 32)
583			kvm_pmu_set_counter_value(vcpu, i, 0);
584	}
585}
586
587static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
588{
589	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
590	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
591}
592
593/**
594 * kvm_pmu_create_perf_event - create a perf event for a counter
595 * @vcpu: The vcpu pointer
596 * @select_idx: The number of selected counter
597 */
598static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
599{
600	struct kvm_pmu *pmu = &vcpu->arch.pmu;
601	struct kvm_pmc *pmc;
602	struct perf_event *event;
603	struct perf_event_attr attr;
604	u64 eventsel, counter, reg, data;
605
606	/*
607	 * For chained counters the event type and filtering attributes are
608	 * obtained from the low/even counter. We also use this counter to
609	 * determine if the event is enabled/disabled.
610	 */
611	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
612
613	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
614	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
615	data = __vcpu_sys_reg(vcpu, reg);
616
617	kvm_pmu_stop_counter(vcpu, pmc);
618	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
619		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
620	else
621		eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
622
623	/* Software increment event doesn't need to be backed by a perf event */
624	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
625		return;
626
627	/*
628	 * If we have a filter in place and that the event isn't allowed, do
629	 * not install a perf event either.
630	 */
631	if (vcpu->kvm->arch.pmu_filter &&
632	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
633		return;
634
635	memset(&attr, 0, sizeof(struct perf_event_attr));
636	attr.type = PERF_TYPE_RAW;
637	attr.size = sizeof(attr);
638	attr.pinned = 1;
639	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
640	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
641	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
642	attr.exclude_hv = 1; /* Don't count EL2 events */
643	attr.exclude_host = 1; /* Don't count host events */
644	attr.config = eventsel;
645
646	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
647
648	if (kvm_pmu_pmc_is_chained(pmc)) {
649		/**
650		 * The initial sample period (overflow count) of an event. For
651		 * chained counters we only support overflow interrupts on the
652		 * high counter.
653		 */
654		attr.sample_period = (-counter) & GENMASK(63, 0);
655		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
656
657		event = perf_event_create_kernel_counter(&attr, -1, current,
658							 kvm_pmu_perf_overflow,
659							 pmc + 1);
660	} else {
661		/* The initial sample period (overflow count) of an event. */
662		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
663			attr.sample_period = (-counter) & GENMASK(63, 0);
664		else
665			attr.sample_period = (-counter) & GENMASK(31, 0);
666
667		event = perf_event_create_kernel_counter(&attr, -1, current,
668						 kvm_pmu_perf_overflow, pmc);
669	}
670
671	if (IS_ERR(event)) {
672		pr_err_once("kvm: pmu event creation failed %ld\n",
673			    PTR_ERR(event));
674		return;
675	}
676
677	pmc->perf_event = event;
678}
679
680/**
681 * kvm_pmu_update_pmc_chained - update chained bitmap
682 * @vcpu: The vcpu pointer
683 * @select_idx: The number of selected counter
684 *
685 * Update the chained bitmap based on the event type written in the
686 * typer register and the enable state of the odd register.
687 */
688static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
689{
690	struct kvm_pmu *pmu = &vcpu->arch.pmu;
691	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
692	bool new_state, old_state;
693
694	old_state = kvm_pmu_pmc_is_chained(pmc);
695	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
696		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
697
698	if (old_state == new_state)
699		return;
700
701	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
702	kvm_pmu_stop_counter(vcpu, canonical_pmc);
703	if (new_state) {
704		/*
705		 * During promotion from !chained to chained we must ensure
706		 * the adjacent counter is stopped and its event destroyed
707		 */
708		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
709		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
710		return;
711	}
712	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
713}
714
715/**
716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
717 * @vcpu: The vcpu pointer
718 * @data: The data guest writes to PMXEVTYPER_EL0
719 * @select_idx: The number of selected counter
720 *
721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
722 * event with given hardware event number. Here we call perf_event API to
723 * emulate this action and create a kernel perf event for it.
724 */
725void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
726				    u64 select_idx)
727{
728	u64 reg, mask;
729
730	mask  =  ARMV8_PMU_EVTYPE_MASK;
731	mask &= ~ARMV8_PMU_EVTYPE_EVENT;
732	mask |= kvm_pmu_event_mask(vcpu->kvm);
733
734	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
735	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
736
737	__vcpu_sys_reg(vcpu, reg) = data & mask;
738
739	kvm_pmu_update_pmc_chained(vcpu, select_idx);
740	kvm_pmu_create_perf_event(vcpu, select_idx);
741}
742
743static int kvm_pmu_probe_pmuver(void)
744{
745	struct perf_event_attr attr = { };
746	struct perf_event *event;
747	struct arm_pmu *pmu;
748	int pmuver = 0xf;
749
750	/*
751	 * Create a dummy event that only counts user cycles. As we'll never
752	 * leave this function with the event being live, it will never
753	 * count anything. But it allows us to probe some of the PMU
754	 * details. Yes, this is terrible.
755	 */
756	attr.type = PERF_TYPE_RAW;
757	attr.size = sizeof(attr);
758	attr.pinned = 1;
759	attr.disabled = 0;
760	attr.exclude_user = 0;
761	attr.exclude_kernel = 1;
762	attr.exclude_hv = 1;
763	attr.exclude_host = 1;
764	attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
765	attr.sample_period = GENMASK(63, 0);
766
767	event = perf_event_create_kernel_counter(&attr, -1, current,
768						 kvm_pmu_perf_overflow, &attr);
769
770	if (IS_ERR(event)) {
771		pr_err_once("kvm: pmu event creation failed %ld\n",
772			    PTR_ERR(event));
773		return 0xf;
774	}
775
776	if (event->pmu) {
777		pmu = to_arm_pmu(event->pmu);
778		if (pmu->pmuver)
779			pmuver = pmu->pmuver;
780	}
781
782	perf_event_disable(event);
783	perf_event_release_kernel(event);
784
785	return pmuver;
786}
787
788u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
789{
790	unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
791	u64 val, mask = 0;
792	int base, i, nr_events;
793
794	if (!pmceid1) {
795		val = read_sysreg(pmceid0_el0);
796		base = 0;
797	} else {
798		val = read_sysreg(pmceid1_el0);
799		base = 32;
800	}
801
802	if (!bmap)
803		return val;
804
805	nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
806
807	for (i = 0; i < 32; i += 8) {
808		u64 byte;
809
810		byte = bitmap_get_value8(bmap, base + i);
811		mask |= byte << i;
812		if (nr_events >= (0x4000 + base + 32)) {
813			byte = bitmap_get_value8(bmap, 0x4000 + base + i);
814			mask |= byte << (32 + i);
815		}
816	}
817
818	return val & mask;
819}
820
821bool kvm_arm_support_pmu_v3(void)
822{
823	/*
824	 * Check if HW_PERF_EVENTS are supported by checking the number of
825	 * hardware performance counters. This could ensure the presence of
826	 * a physical PMU and CONFIG_PERF_EVENT is selected.
827	 */
828	return (perf_num_counters() > 0);
829}
830
831int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
832{
833	if (!vcpu->arch.pmu.created)
834		return 0;
835
836	/*
837	 * A valid interrupt configuration for the PMU is either to have a
838	 * properly configured interrupt number and using an in-kernel
839	 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
840	 */
841	if (irqchip_in_kernel(vcpu->kvm)) {
842		int irq = vcpu->arch.pmu.irq_num;
843		if (!kvm_arm_pmu_irq_initialized(vcpu))
844			return -EINVAL;
845
846		/*
847		 * If we are using an in-kernel vgic, at this point we know
848		 * the vgic will be initialized, so we can check the PMU irq
849		 * number against the dimensions of the vgic and make sure
850		 * it's valid.
851		 */
852		if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
853			return -EINVAL;
854	} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
855		   return -EINVAL;
856	}
857
858	kvm_pmu_vcpu_reset(vcpu);
859	vcpu->arch.pmu.ready = true;
860
861	return 0;
862}
863
864static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
865{
866	if (irqchip_in_kernel(vcpu->kvm)) {
867		int ret;
868
869		/*
870		 * If using the PMU with an in-kernel virtual GIC
871		 * implementation, we require the GIC to be already
872		 * initialized when initializing the PMU.
873		 */
874		if (!vgic_initialized(vcpu->kvm))
875			return -ENODEV;
876
877		if (!kvm_arm_pmu_irq_initialized(vcpu))
878			return -ENXIO;
879
880		ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
881					 &vcpu->arch.pmu);
882		if (ret)
883			return ret;
884	}
885
886	init_irq_work(&vcpu->arch.pmu.overflow_work,
887		      kvm_pmu_perf_overflow_notify_vcpu);
888
889	vcpu->arch.pmu.created = true;
890	return 0;
891}
892
893/*
894 * For one VM the interrupt type must be same for each vcpu.
895 * As a PPI, the interrupt number is the same for all vcpus,
896 * while as an SPI it must be a separate number per vcpu.
897 */
898static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
899{
900	int i;
901	struct kvm_vcpu *vcpu;
902
903	kvm_for_each_vcpu(i, vcpu, kvm) {
904		if (!kvm_arm_pmu_irq_initialized(vcpu))
905			continue;
906
907		if (irq_is_ppi(irq)) {
908			if (vcpu->arch.pmu.irq_num != irq)
909				return false;
910		} else {
911			if (vcpu->arch.pmu.irq_num == irq)
912				return false;
913		}
914	}
915
916	return true;
917}
918
919int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
920{
921	if (!kvm_arm_support_pmu_v3() ||
922	    !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
923		return -ENODEV;
924
925	if (vcpu->arch.pmu.created)
926		return -EBUSY;
927
928	if (!vcpu->kvm->arch.pmuver)
929		vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
930
931	if (vcpu->kvm->arch.pmuver == 0xf)
932		return -ENODEV;
933
934	switch (attr->attr) {
935	case KVM_ARM_VCPU_PMU_V3_IRQ: {
936		int __user *uaddr = (int __user *)(long)attr->addr;
937		int irq;
938
939		if (!irqchip_in_kernel(vcpu->kvm))
940			return -EINVAL;
941
942		if (get_user(irq, uaddr))
943			return -EFAULT;
944
945		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
946		if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
947			return -EINVAL;
948
949		if (!pmu_irq_is_valid(vcpu->kvm, irq))
950			return -EINVAL;
951
952		if (kvm_arm_pmu_irq_initialized(vcpu))
953			return -EBUSY;
954
955		kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
956		vcpu->arch.pmu.irq_num = irq;
957		return 0;
958	}
959	case KVM_ARM_VCPU_PMU_V3_FILTER: {
960		struct kvm_pmu_event_filter __user *uaddr;
961		struct kvm_pmu_event_filter filter;
962		int nr_events;
963
964		nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
965
966		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
967
968		if (copy_from_user(&filter, uaddr, sizeof(filter)))
969			return -EFAULT;
970
971		if (((u32)filter.base_event + filter.nevents) > nr_events ||
972		    (filter.action != KVM_PMU_EVENT_ALLOW &&
973		     filter.action != KVM_PMU_EVENT_DENY))
974			return -EINVAL;
975
976		mutex_lock(&vcpu->kvm->lock);
977
978		if (!vcpu->kvm->arch.pmu_filter) {
979			vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
980			if (!vcpu->kvm->arch.pmu_filter) {
981				mutex_unlock(&vcpu->kvm->lock);
982				return -ENOMEM;
983			}
984
985			/*
986			 * The default depends on the first applied filter.
987			 * If it allows events, the default is to deny.
988			 * Conversely, if the first filter denies a set of
989			 * events, the default is to allow.
990			 */
991			if (filter.action == KVM_PMU_EVENT_ALLOW)
992				bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
993			else
994				bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
995		}
996
997		if (filter.action == KVM_PMU_EVENT_ALLOW)
998			bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
999		else
1000			bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1001
1002		mutex_unlock(&vcpu->kvm->lock);
1003
1004		return 0;
1005	}
1006	case KVM_ARM_VCPU_PMU_V3_INIT:
1007		return kvm_arm_pmu_v3_init(vcpu);
1008	}
1009
1010	return -ENXIO;
1011}
1012
1013int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1014{
1015	switch (attr->attr) {
1016	case KVM_ARM_VCPU_PMU_V3_IRQ: {
1017		int __user *uaddr = (int __user *)(long)attr->addr;
1018		int irq;
1019
1020		if (!irqchip_in_kernel(vcpu->kvm))
1021			return -EINVAL;
1022
1023		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
1024			return -ENODEV;
1025
1026		if (!kvm_arm_pmu_irq_initialized(vcpu))
1027			return -ENXIO;
1028
1029		irq = vcpu->arch.pmu.irq_num;
1030		return put_user(irq, uaddr);
1031	}
1032	}
1033
1034	return -ENXIO;
1035}
1036
1037int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1038{
1039	switch (attr->attr) {
1040	case KVM_ARM_VCPU_PMU_V3_IRQ:
1041	case KVM_ARM_VCPU_PMU_V3_INIT:
1042	case KVM_ARM_VCPU_PMU_V3_FILTER:
1043		if (kvm_arm_support_pmu_v3() &&
1044		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
1045			return 0;
1046	}
1047
1048	return -ENXIO;
1049}
1050