1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7#include <linux/cpu.h> 8#include <linux/kvm.h> 9#include <linux/kvm_host.h> 10#include <linux/perf_event.h> 11#include <linux/perf/arm_pmu.h> 12#include <linux/uaccess.h> 13#include <asm/kvm_emulate.h> 14#include <kvm/arm_pmu.h> 15#include <kvm/arm_vgic.h> 16 17static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 18static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 19static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 20 21#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 22 23static u32 kvm_pmu_event_mask(struct kvm *kvm) 24{ 25 switch (kvm->arch.pmuver) { 26 case 1: /* ARMv8.0 */ 27 return GENMASK(9, 0); 28 case 4: /* ARMv8.1 */ 29 case 5: /* ARMv8.4 */ 30 case 6: /* ARMv8.5 */ 31 return GENMASK(15, 0); 32 default: /* Shouldn't be here, just for sanity */ 33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); 34 return 0; 35 } 36} 37 38/** 39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 40 * @vcpu: The vcpu pointer 41 * @select_idx: The counter index 42 */ 43static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 44{ 45 return (select_idx == ARMV8_PMU_CYCLE_IDX && 46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 47} 48 49static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 50{ 51 struct kvm_pmu *pmu; 52 struct kvm_vcpu_arch *vcpu_arch; 53 54 pmc -= pmc->idx; 55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 57 return container_of(vcpu_arch, struct kvm_vcpu, arch); 58} 59 60/** 61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 62 * @pmc: The PMU counter pointer 63 */ 64static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 65{ 66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 67 68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 69} 70 71/** 72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 73 * @select_idx: The counter index 74 */ 75static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 76{ 77 return select_idx & 0x1; 78} 79 80/** 81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 82 * @pmc: The PMU counter pointer 83 * 84 * When a pair of PMCs are chained together we use the low counter (canonical) 85 * to hold the underlying perf event. 86 */ 87static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 88{ 89 if (kvm_pmu_pmc_is_chained(pmc) && 90 kvm_pmu_idx_is_high_counter(pmc->idx)) 91 return pmc - 1; 92 93 return pmc; 94} 95static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 96{ 97 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 98 return pmc - 1; 99 else 100 return pmc + 1; 101} 102 103/** 104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 105 * @vcpu: The vcpu pointer 106 * @select_idx: The counter index 107 */ 108static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 109{ 110 u64 eventsel, reg; 111 112 select_idx |= 0x1; 113 114 if (select_idx == ARMV8_PMU_CYCLE_IDX) 115 return false; 116 117 reg = PMEVTYPER0_EL0 + select_idx; 118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 119 120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 121} 122 123/** 124 * kvm_pmu_get_pair_counter_value - get PMU counter value 125 * @vcpu: The vcpu pointer 126 * @pmc: The PMU counter pointer 127 */ 128static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 129 struct kvm_pmc *pmc) 130{ 131 u64 counter, counter_high, reg, enabled, running; 132 133 if (kvm_pmu_pmc_is_chained(pmc)) { 134 pmc = kvm_pmu_get_canonical_pmc(pmc); 135 reg = PMEVCNTR0_EL0 + pmc->idx; 136 137 counter = __vcpu_sys_reg(vcpu, reg); 138 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 139 140 counter = lower_32_bits(counter) | (counter_high << 32); 141 } else { 142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 144 counter = __vcpu_sys_reg(vcpu, reg); 145 } 146 147 /* 148 * The real counter value is equal to the value of counter register plus 149 * the value perf event counts. 150 */ 151 if (pmc->perf_event) 152 counter += perf_event_read_value(pmc->perf_event, &enabled, 153 &running); 154 155 return counter; 156} 157 158/** 159 * kvm_pmu_get_counter_value - get PMU counter value 160 * @vcpu: The vcpu pointer 161 * @select_idx: The counter index 162 */ 163u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 164{ 165 u64 counter; 166 struct kvm_pmu *pmu = &vcpu->arch.pmu; 167 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 168 169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 170 171 if (kvm_pmu_pmc_is_chained(pmc) && 172 kvm_pmu_idx_is_high_counter(select_idx)) 173 counter = upper_32_bits(counter); 174 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 175 counter = lower_32_bits(counter); 176 177 return counter; 178} 179 180/** 181 * kvm_pmu_set_counter_value - set PMU counter value 182 * @vcpu: The vcpu pointer 183 * @select_idx: The counter index 184 * @val: The counter value 185 */ 186void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 187{ 188 u64 reg; 189 190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 193 194 /* Recreate the perf event to reflect the updated sample_period */ 195 kvm_pmu_create_perf_event(vcpu, select_idx); 196} 197 198/** 199 * kvm_pmu_release_perf_event - remove the perf event 200 * @pmc: The PMU counter pointer 201 */ 202static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 203{ 204 pmc = kvm_pmu_get_canonical_pmc(pmc); 205 if (pmc->perf_event) { 206 perf_event_disable(pmc->perf_event); 207 perf_event_release_kernel(pmc->perf_event); 208 pmc->perf_event = NULL; 209 } 210} 211 212/** 213 * kvm_pmu_stop_counter - stop PMU counter 214 * @pmc: The PMU counter pointer 215 * 216 * If this counter has been configured to monitor some event, release it here. 217 */ 218static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 219{ 220 u64 counter, reg, val; 221 222 pmc = kvm_pmu_get_canonical_pmc(pmc); 223 if (!pmc->perf_event) 224 return; 225 226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 227 228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 229 reg = PMCCNTR_EL0; 230 val = counter; 231 } else { 232 reg = PMEVCNTR0_EL0 + pmc->idx; 233 val = lower_32_bits(counter); 234 } 235 236 __vcpu_sys_reg(vcpu, reg) = val; 237 238 if (kvm_pmu_pmc_is_chained(pmc)) 239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 240 241 kvm_pmu_release_perf_event(pmc); 242} 243 244/** 245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 246 * @vcpu: The vcpu pointer 247 * 248 */ 249void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 250{ 251 int i; 252 struct kvm_pmu *pmu = &vcpu->arch.pmu; 253 254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 255 pmu->pmc[i].idx = i; 256} 257 258/** 259 * kvm_pmu_vcpu_reset - reset pmu state for cpu 260 * @vcpu: The vcpu pointer 261 * 262 */ 263void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 264{ 265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 266 struct kvm_pmu *pmu = &vcpu->arch.pmu; 267 int i; 268 269 for_each_set_bit(i, &mask, 32) 270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 271 272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 273} 274 275/** 276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 277 * @vcpu: The vcpu pointer 278 * 279 */ 280void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 281{ 282 int i; 283 struct kvm_pmu *pmu = &vcpu->arch.pmu; 284 285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 286 kvm_pmu_release_perf_event(&pmu->pmc[i]); 287 irq_work_sync(&vcpu->arch.pmu.overflow_work); 288} 289 290u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 291{ 292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 293 294 val &= ARMV8_PMU_PMCR_N_MASK; 295 if (val == 0) 296 return BIT(ARMV8_PMU_CYCLE_IDX); 297 else 298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 299} 300 301/** 302 * kvm_pmu_enable_counter_mask - enable selected PMU counters 303 * @vcpu: The vcpu pointer 304 * @val: the value guest writes to PMCNTENSET register 305 * 306 * Call perf_event_enable to start counting the perf event 307 */ 308void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 309{ 310 int i; 311 struct kvm_pmu *pmu = &vcpu->arch.pmu; 312 struct kvm_pmc *pmc; 313 314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 315 return; 316 317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 318 if (!(val & BIT(i))) 319 continue; 320 321 pmc = &pmu->pmc[i]; 322 323 /* A change in the enable state may affect the chain state */ 324 kvm_pmu_update_pmc_chained(vcpu, i); 325 kvm_pmu_create_perf_event(vcpu, i); 326 327 /* At this point, pmc must be the canonical */ 328 if (pmc->perf_event) { 329 perf_event_enable(pmc->perf_event); 330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 331 kvm_debug("fail to enable perf event\n"); 332 } 333 } 334} 335 336/** 337 * kvm_pmu_disable_counter_mask - disable selected PMU counters 338 * @vcpu: The vcpu pointer 339 * @val: the value guest writes to PMCNTENCLR register 340 * 341 * Call perf_event_disable to stop counting the perf event 342 */ 343void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 344{ 345 int i; 346 struct kvm_pmu *pmu = &vcpu->arch.pmu; 347 struct kvm_pmc *pmc; 348 349 if (!val) 350 return; 351 352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 353 if (!(val & BIT(i))) 354 continue; 355 356 pmc = &pmu->pmc[i]; 357 358 /* A change in the enable state may affect the chain state */ 359 kvm_pmu_update_pmc_chained(vcpu, i); 360 kvm_pmu_create_perf_event(vcpu, i); 361 362 /* At this point, pmc must be the canonical */ 363 if (pmc->perf_event) 364 perf_event_disable(pmc->perf_event); 365 } 366} 367 368static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 369{ 370 u64 reg = 0; 371 372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 376 reg &= kvm_pmu_valid_counter_mask(vcpu); 377 } 378 379 return reg; 380} 381 382static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 383{ 384 struct kvm_pmu *pmu = &vcpu->arch.pmu; 385 bool overflow; 386 387 if (!kvm_arm_pmu_v3_ready(vcpu)) 388 return; 389 390 overflow = !!kvm_pmu_overflow_status(vcpu); 391 if (pmu->irq_level == overflow) 392 return; 393 394 pmu->irq_level = overflow; 395 396 if (likely(irqchip_in_kernel(vcpu->kvm))) { 397 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 398 pmu->irq_num, overflow, pmu); 399 WARN_ON(ret); 400 } 401} 402 403bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 404{ 405 struct kvm_pmu *pmu = &vcpu->arch.pmu; 406 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 407 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 408 409 if (likely(irqchip_in_kernel(vcpu->kvm))) 410 return false; 411 412 return pmu->irq_level != run_level; 413} 414 415/* 416 * Reflect the PMU overflow interrupt output level into the kvm_run structure 417 */ 418void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 419{ 420 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 421 422 /* Populate the timer bitmap for user space */ 423 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 424 if (vcpu->arch.pmu.irq_level) 425 regs->device_irq_level |= KVM_ARM_DEV_PMU; 426} 427 428/** 429 * kvm_pmu_flush_hwstate - flush pmu state to cpu 430 * @vcpu: The vcpu pointer 431 * 432 * Check if the PMU has overflowed while we were running in the host, and inject 433 * an interrupt if that was the case. 434 */ 435void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 436{ 437 kvm_pmu_update_state(vcpu); 438} 439 440/** 441 * kvm_pmu_sync_hwstate - sync pmu state from cpu 442 * @vcpu: The vcpu pointer 443 * 444 * Check if the PMU has overflowed while we were running in the guest, and 445 * inject an interrupt if that was the case. 446 */ 447void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 448{ 449 kvm_pmu_update_state(vcpu); 450} 451 452/** 453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 454 * to the event. 455 * This is why we need a callback to do it once outside of the NMI context. 456 */ 457static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 458{ 459 struct kvm_vcpu *vcpu; 460 struct kvm_pmu *pmu; 461 462 pmu = container_of(work, struct kvm_pmu, overflow_work); 463 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 464 465 kvm_vcpu_kick(vcpu); 466} 467 468/** 469 * When the perf event overflows, set the overflow status and inform the vcpu. 470 */ 471static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 472 struct perf_sample_data *data, 473 struct pt_regs *regs) 474{ 475 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 476 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 477 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 478 int idx = pmc->idx; 479 u64 period; 480 481 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 482 483 /* 484 * Reset the sample period to the architectural limit, 485 * i.e. the point where the counter overflows. 486 */ 487 period = -(local64_read(&perf_event->count)); 488 489 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 490 period &= GENMASK(31, 0); 491 492 local64_set(&perf_event->hw.period_left, 0); 493 perf_event->attr.sample_period = period; 494 perf_event->hw.sample_period = period; 495 496 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 497 498 if (kvm_pmu_overflow_status(vcpu)) { 499 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 500 501 if (!in_nmi()) 502 kvm_vcpu_kick(vcpu); 503 else 504 irq_work_queue(&vcpu->arch.pmu.overflow_work); 505 } 506 507 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 508} 509 510/** 511 * kvm_pmu_software_increment - do software increment 512 * @vcpu: The vcpu pointer 513 * @val: the value guest writes to PMSWINC register 514 */ 515void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 516{ 517 struct kvm_pmu *pmu = &vcpu->arch.pmu; 518 int i; 519 520 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 521 return; 522 523 /* Weed out disabled counters */ 524 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 525 526 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 527 u64 type, reg; 528 529 if (!(val & BIT(i))) 530 continue; 531 532 /* PMSWINC only applies to ... SW_INC! */ 533 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 534 type &= kvm_pmu_event_mask(vcpu->kvm); 535 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 536 continue; 537 538 /* increment this even SW_INC counter */ 539 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 540 reg = lower_32_bits(reg); 541 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 542 543 if (reg) /* no overflow on the low part */ 544 continue; 545 546 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 547 /* increment the high counter */ 548 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 549 reg = lower_32_bits(reg); 550 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 551 if (!reg) /* mark overflow on the high counter */ 552 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 553 } else { 554 /* mark overflow on low counter */ 555 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 556 } 557 } 558} 559 560/** 561 * kvm_pmu_handle_pmcr - handle PMCR register 562 * @vcpu: The vcpu pointer 563 * @val: the value guest writes to PMCR register 564 */ 565void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 566{ 567 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 568 int i; 569 570 if (val & ARMV8_PMU_PMCR_E) { 571 kvm_pmu_enable_counter_mask(vcpu, 572 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); 573 } else { 574 kvm_pmu_disable_counter_mask(vcpu, mask); 575 } 576 577 if (val & ARMV8_PMU_PMCR_C) 578 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 579 580 if (val & ARMV8_PMU_PMCR_P) { 581 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 582 for_each_set_bit(i, &mask, 32) 583 kvm_pmu_set_counter_value(vcpu, i, 0); 584 } 585} 586 587static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 588{ 589 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 591} 592 593/** 594 * kvm_pmu_create_perf_event - create a perf event for a counter 595 * @vcpu: The vcpu pointer 596 * @select_idx: The number of selected counter 597 */ 598static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 599{ 600 struct kvm_pmu *pmu = &vcpu->arch.pmu; 601 struct kvm_pmc *pmc; 602 struct perf_event *event; 603 struct perf_event_attr attr; 604 u64 eventsel, counter, reg, data; 605 606 /* 607 * For chained counters the event type and filtering attributes are 608 * obtained from the low/even counter. We also use this counter to 609 * determine if the event is enabled/disabled. 610 */ 611 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 612 613 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 614 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 615 data = __vcpu_sys_reg(vcpu, reg); 616 617 kvm_pmu_stop_counter(vcpu, pmc); 618 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 619 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 620 else 621 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 622 623 /* Software increment event doesn't need to be backed by a perf event */ 624 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 625 return; 626 627 /* 628 * If we have a filter in place and that the event isn't allowed, do 629 * not install a perf event either. 630 */ 631 if (vcpu->kvm->arch.pmu_filter && 632 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 633 return; 634 635 memset(&attr, 0, sizeof(struct perf_event_attr)); 636 attr.type = PERF_TYPE_RAW; 637 attr.size = sizeof(attr); 638 attr.pinned = 1; 639 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 640 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 641 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 642 attr.exclude_hv = 1; /* Don't count EL2 events */ 643 attr.exclude_host = 1; /* Don't count host events */ 644 attr.config = eventsel; 645 646 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 647 648 if (kvm_pmu_pmc_is_chained(pmc)) { 649 /** 650 * The initial sample period (overflow count) of an event. For 651 * chained counters we only support overflow interrupts on the 652 * high counter. 653 */ 654 attr.sample_period = (-counter) & GENMASK(63, 0); 655 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 656 657 event = perf_event_create_kernel_counter(&attr, -1, current, 658 kvm_pmu_perf_overflow, 659 pmc + 1); 660 } else { 661 /* The initial sample period (overflow count) of an event. */ 662 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 663 attr.sample_period = (-counter) & GENMASK(63, 0); 664 else 665 attr.sample_period = (-counter) & GENMASK(31, 0); 666 667 event = perf_event_create_kernel_counter(&attr, -1, current, 668 kvm_pmu_perf_overflow, pmc); 669 } 670 671 if (IS_ERR(event)) { 672 pr_err_once("kvm: pmu event creation failed %ld\n", 673 PTR_ERR(event)); 674 return; 675 } 676 677 pmc->perf_event = event; 678} 679 680/** 681 * kvm_pmu_update_pmc_chained - update chained bitmap 682 * @vcpu: The vcpu pointer 683 * @select_idx: The number of selected counter 684 * 685 * Update the chained bitmap based on the event type written in the 686 * typer register and the enable state of the odd register. 687 */ 688static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 689{ 690 struct kvm_pmu *pmu = &vcpu->arch.pmu; 691 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 692 bool new_state, old_state; 693 694 old_state = kvm_pmu_pmc_is_chained(pmc); 695 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 696 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 697 698 if (old_state == new_state) 699 return; 700 701 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 702 kvm_pmu_stop_counter(vcpu, canonical_pmc); 703 if (new_state) { 704 /* 705 * During promotion from !chained to chained we must ensure 706 * the adjacent counter is stopped and its event destroyed 707 */ 708 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 709 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 710 return; 711 } 712 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 713} 714 715/** 716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 717 * @vcpu: The vcpu pointer 718 * @data: The data guest writes to PMXEVTYPER_EL0 719 * @select_idx: The number of selected counter 720 * 721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 722 * event with given hardware event number. Here we call perf_event API to 723 * emulate this action and create a kernel perf event for it. 724 */ 725void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 726 u64 select_idx) 727{ 728 u64 reg, mask; 729 730 mask = ARMV8_PMU_EVTYPE_MASK; 731 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 732 mask |= kvm_pmu_event_mask(vcpu->kvm); 733 734 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 735 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 736 737 __vcpu_sys_reg(vcpu, reg) = data & mask; 738 739 kvm_pmu_update_pmc_chained(vcpu, select_idx); 740 kvm_pmu_create_perf_event(vcpu, select_idx); 741} 742 743static int kvm_pmu_probe_pmuver(void) 744{ 745 struct perf_event_attr attr = { }; 746 struct perf_event *event; 747 struct arm_pmu *pmu; 748 int pmuver = 0xf; 749 750 /* 751 * Create a dummy event that only counts user cycles. As we'll never 752 * leave this function with the event being live, it will never 753 * count anything. But it allows us to probe some of the PMU 754 * details. Yes, this is terrible. 755 */ 756 attr.type = PERF_TYPE_RAW; 757 attr.size = sizeof(attr); 758 attr.pinned = 1; 759 attr.disabled = 0; 760 attr.exclude_user = 0; 761 attr.exclude_kernel = 1; 762 attr.exclude_hv = 1; 763 attr.exclude_host = 1; 764 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 765 attr.sample_period = GENMASK(63, 0); 766 767 event = perf_event_create_kernel_counter(&attr, -1, current, 768 kvm_pmu_perf_overflow, &attr); 769 770 if (IS_ERR(event)) { 771 pr_err_once("kvm: pmu event creation failed %ld\n", 772 PTR_ERR(event)); 773 return 0xf; 774 } 775 776 if (event->pmu) { 777 pmu = to_arm_pmu(event->pmu); 778 if (pmu->pmuver) 779 pmuver = pmu->pmuver; 780 } 781 782 perf_event_disable(event); 783 perf_event_release_kernel(event); 784 785 return pmuver; 786} 787 788u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 789{ 790 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 791 u64 val, mask = 0; 792 int base, i, nr_events; 793 794 if (!pmceid1) { 795 val = read_sysreg(pmceid0_el0); 796 base = 0; 797 } else { 798 val = read_sysreg(pmceid1_el0); 799 base = 32; 800 } 801 802 if (!bmap) 803 return val; 804 805 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 806 807 for (i = 0; i < 32; i += 8) { 808 u64 byte; 809 810 byte = bitmap_get_value8(bmap, base + i); 811 mask |= byte << i; 812 if (nr_events >= (0x4000 + base + 32)) { 813 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 814 mask |= byte << (32 + i); 815 } 816 } 817 818 return val & mask; 819} 820 821bool kvm_arm_support_pmu_v3(void) 822{ 823 /* 824 * Check if HW_PERF_EVENTS are supported by checking the number of 825 * hardware performance counters. This could ensure the presence of 826 * a physical PMU and CONFIG_PERF_EVENT is selected. 827 */ 828 return (perf_num_counters() > 0); 829} 830 831int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 832{ 833 if (!vcpu->arch.pmu.created) 834 return 0; 835 836 /* 837 * A valid interrupt configuration for the PMU is either to have a 838 * properly configured interrupt number and using an in-kernel 839 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 840 */ 841 if (irqchip_in_kernel(vcpu->kvm)) { 842 int irq = vcpu->arch.pmu.irq_num; 843 if (!kvm_arm_pmu_irq_initialized(vcpu)) 844 return -EINVAL; 845 846 /* 847 * If we are using an in-kernel vgic, at this point we know 848 * the vgic will be initialized, so we can check the PMU irq 849 * number against the dimensions of the vgic and make sure 850 * it's valid. 851 */ 852 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 853 return -EINVAL; 854 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 855 return -EINVAL; 856 } 857 858 kvm_pmu_vcpu_reset(vcpu); 859 vcpu->arch.pmu.ready = true; 860 861 return 0; 862} 863 864static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 865{ 866 if (irqchip_in_kernel(vcpu->kvm)) { 867 int ret; 868 869 /* 870 * If using the PMU with an in-kernel virtual GIC 871 * implementation, we require the GIC to be already 872 * initialized when initializing the PMU. 873 */ 874 if (!vgic_initialized(vcpu->kvm)) 875 return -ENODEV; 876 877 if (!kvm_arm_pmu_irq_initialized(vcpu)) 878 return -ENXIO; 879 880 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 881 &vcpu->arch.pmu); 882 if (ret) 883 return ret; 884 } 885 886 init_irq_work(&vcpu->arch.pmu.overflow_work, 887 kvm_pmu_perf_overflow_notify_vcpu); 888 889 vcpu->arch.pmu.created = true; 890 return 0; 891} 892 893/* 894 * For one VM the interrupt type must be same for each vcpu. 895 * As a PPI, the interrupt number is the same for all vcpus, 896 * while as an SPI it must be a separate number per vcpu. 897 */ 898static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 899{ 900 int i; 901 struct kvm_vcpu *vcpu; 902 903 kvm_for_each_vcpu(i, vcpu, kvm) { 904 if (!kvm_arm_pmu_irq_initialized(vcpu)) 905 continue; 906 907 if (irq_is_ppi(irq)) { 908 if (vcpu->arch.pmu.irq_num != irq) 909 return false; 910 } else { 911 if (vcpu->arch.pmu.irq_num == irq) 912 return false; 913 } 914 } 915 916 return true; 917} 918 919int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 920{ 921 if (!kvm_arm_support_pmu_v3() || 922 !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) 923 return -ENODEV; 924 925 if (vcpu->arch.pmu.created) 926 return -EBUSY; 927 928 if (!vcpu->kvm->arch.pmuver) 929 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); 930 931 if (vcpu->kvm->arch.pmuver == 0xf) 932 return -ENODEV; 933 934 switch (attr->attr) { 935 case KVM_ARM_VCPU_PMU_V3_IRQ: { 936 int __user *uaddr = (int __user *)(long)attr->addr; 937 int irq; 938 939 if (!irqchip_in_kernel(vcpu->kvm)) 940 return -EINVAL; 941 942 if (get_user(irq, uaddr)) 943 return -EFAULT; 944 945 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 946 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 947 return -EINVAL; 948 949 if (!pmu_irq_is_valid(vcpu->kvm, irq)) 950 return -EINVAL; 951 952 if (kvm_arm_pmu_irq_initialized(vcpu)) 953 return -EBUSY; 954 955 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 956 vcpu->arch.pmu.irq_num = irq; 957 return 0; 958 } 959 case KVM_ARM_VCPU_PMU_V3_FILTER: { 960 struct kvm_pmu_event_filter __user *uaddr; 961 struct kvm_pmu_event_filter filter; 962 int nr_events; 963 964 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 965 966 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 967 968 if (copy_from_user(&filter, uaddr, sizeof(filter))) 969 return -EFAULT; 970 971 if (((u32)filter.base_event + filter.nevents) > nr_events || 972 (filter.action != KVM_PMU_EVENT_ALLOW && 973 filter.action != KVM_PMU_EVENT_DENY)) 974 return -EINVAL; 975 976 mutex_lock(&vcpu->kvm->lock); 977 978 if (!vcpu->kvm->arch.pmu_filter) { 979 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); 980 if (!vcpu->kvm->arch.pmu_filter) { 981 mutex_unlock(&vcpu->kvm->lock); 982 return -ENOMEM; 983 } 984 985 /* 986 * The default depends on the first applied filter. 987 * If it allows events, the default is to deny. 988 * Conversely, if the first filter denies a set of 989 * events, the default is to allow. 990 */ 991 if (filter.action == KVM_PMU_EVENT_ALLOW) 992 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); 993 else 994 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); 995 } 996 997 if (filter.action == KVM_PMU_EVENT_ALLOW) 998 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 999 else 1000 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1001 1002 mutex_unlock(&vcpu->kvm->lock); 1003 1004 return 0; 1005 } 1006 case KVM_ARM_VCPU_PMU_V3_INIT: 1007 return kvm_arm_pmu_v3_init(vcpu); 1008 } 1009 1010 return -ENXIO; 1011} 1012 1013int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1014{ 1015 switch (attr->attr) { 1016 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1017 int __user *uaddr = (int __user *)(long)attr->addr; 1018 int irq; 1019 1020 if (!irqchip_in_kernel(vcpu->kvm)) 1021 return -EINVAL; 1022 1023 if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) 1024 return -ENODEV; 1025 1026 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1027 return -ENXIO; 1028 1029 irq = vcpu->arch.pmu.irq_num; 1030 return put_user(irq, uaddr); 1031 } 1032 } 1033 1034 return -ENXIO; 1035} 1036 1037int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1038{ 1039 switch (attr->attr) { 1040 case KVM_ARM_VCPU_PMU_V3_IRQ: 1041 case KVM_ARM_VCPU_PMU_V3_INIT: 1042 case KVM_ARM_VCPU_PMU_V3_FILTER: 1043 if (kvm_arm_support_pmu_v3() && 1044 test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) 1045 return 0; 1046 } 1047 1048 return -ENXIO; 1049} 1050