162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Kernel-based Virtual Machine -- Performance Monitoring Unit support 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright 2015 Red Hat, Inc. and/or its affiliates. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Authors: 862306a36Sopenharmony_ci * Avi Kivity <avi@redhat.com> 962306a36Sopenharmony_ci * Gleb Natapov <gleb@redhat.com> 1062306a36Sopenharmony_ci * Wei Huang <wei@redhat.com> 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/types.h> 1562306a36Sopenharmony_ci#include <linux/kvm_host.h> 1662306a36Sopenharmony_ci#include <linux/perf_event.h> 1762306a36Sopenharmony_ci#include <linux/bsearch.h> 1862306a36Sopenharmony_ci#include <linux/sort.h> 1962306a36Sopenharmony_ci#include <asm/perf_event.h> 2062306a36Sopenharmony_ci#include <asm/cpu_device_id.h> 2162306a36Sopenharmony_ci#include "x86.h" 2262306a36Sopenharmony_ci#include "cpuid.h" 2362306a36Sopenharmony_ci#include "lapic.h" 2462306a36Sopenharmony_ci#include "pmu.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* This is enough to filter the vast majority of currently defined events. */ 2762306a36Sopenharmony_ci#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistruct x86_pmu_capability __read_mostly kvm_pmu_cap; 3062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_pmu_cap); 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* Precise Distribution of Instructions Retired (PDIR) */ 3362306a36Sopenharmony_cistatic const struct x86_cpu_id vmx_pebs_pdir_cpu[] = { 3462306a36Sopenharmony_ci X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), 3562306a36Sopenharmony_ci X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), 3662306a36Sopenharmony_ci /* Instruction-Accurate PDIR (PDIR++) */ 3762306a36Sopenharmony_ci X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), 3862306a36Sopenharmony_ci {} 3962306a36Sopenharmony_ci}; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci/* Precise Distribution (PDist) */ 4262306a36Sopenharmony_cistatic const struct x86_cpu_id vmx_pebs_pdist_cpu[] = { 4362306a36Sopenharmony_ci X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), 4462306a36Sopenharmony_ci {} 4562306a36Sopenharmony_ci}; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci/* NOTE: 4862306a36Sopenharmony_ci * - Each perf counter is defined as "struct kvm_pmc"; 4962306a36Sopenharmony_ci * - There are two types of perf counters: general purpose (gp) and fixed. 5062306a36Sopenharmony_ci * gp counters are stored in gp_counters[] and fixed counters are stored 5162306a36Sopenharmony_ci * in fixed_counters[] respectively. Both of them are part of "struct 5262306a36Sopenharmony_ci * kvm_pmu"; 5362306a36Sopenharmony_ci * - pmu.c understands the difference between gp counters and fixed counters. 5462306a36Sopenharmony_ci * However AMD doesn't support fixed-counters; 5562306a36Sopenharmony_ci * - There are three types of index to access perf counters (PMC): 5662306a36Sopenharmony_ci * 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD 5762306a36Sopenharmony_ci * has MSR_K7_PERFCTRn and, for families 15H and later, 5862306a36Sopenharmony_ci * MSR_F15H_PERF_CTRn, where MSR_F15H_PERF_CTR[0-3] are 5962306a36Sopenharmony_ci * aliased to MSR_K7_PERFCTRn. 6062306a36Sopenharmony_ci * 2. MSR Index (named idx): This normally is used by RDPMC instruction. 6162306a36Sopenharmony_ci * For instance AMD RDPMC instruction uses 0000_0003h in ECX to access 6262306a36Sopenharmony_ci * C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except 6362306a36Sopenharmony_ci * that it also supports fixed counters. idx can be used to as index to 6462306a36Sopenharmony_ci * gp and fixed counters. 6562306a36Sopenharmony_ci * 3. Global PMC Index (named pmc): pmc is an index specific to PMU 6662306a36Sopenharmony_ci * code. Each pmc, stored in kvm_pmc.idx field, is unique across 6762306a36Sopenharmony_ci * all perf counters (both gp and fixed). The mapping relationship 6862306a36Sopenharmony_ci * between pmc and perf counters is as the following: 6962306a36Sopenharmony_ci * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters 7062306a36Sopenharmony_ci * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed 7162306a36Sopenharmony_ci * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H 7262306a36Sopenharmony_ci * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters 7362306a36Sopenharmony_ci */ 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_cistatic struct kvm_pmu_ops kvm_pmu_ops __read_mostly; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci#define KVM_X86_PMU_OP(func) \ 7862306a36Sopenharmony_ci DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func, \ 7962306a36Sopenharmony_ci *(((struct kvm_pmu_ops *)0)->func)); 8062306a36Sopenharmony_ci#define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP 8162306a36Sopenharmony_ci#include <asm/kvm-x86-pmu-ops.h> 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_civoid kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) 8462306a36Sopenharmony_ci{ 8562306a36Sopenharmony_ci memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops)); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci#define __KVM_X86_PMU_OP(func) \ 8862306a36Sopenharmony_ci static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func); 8962306a36Sopenharmony_ci#define KVM_X86_PMU_OP(func) \ 9062306a36Sopenharmony_ci WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func) 9162306a36Sopenharmony_ci#define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP 9262306a36Sopenharmony_ci#include <asm/kvm-x86-pmu-ops.h> 9362306a36Sopenharmony_ci#undef __KVM_X86_PMU_OP 9462306a36Sopenharmony_ci} 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_cistatic inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) 9762306a36Sopenharmony_ci{ 9862306a36Sopenharmony_ci struct kvm_pmu *pmu = pmc_to_pmu(pmc); 9962306a36Sopenharmony_ci bool skip_pmi = false; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { 10262306a36Sopenharmony_ci if (!in_pmi) { 10362306a36Sopenharmony_ci /* 10462306a36Sopenharmony_ci * TODO: KVM is currently _choosing_ to not generate records 10562306a36Sopenharmony_ci * for emulated instructions, avoiding BUFFER_OVF PMI when 10662306a36Sopenharmony_ci * there are no records. Strictly speaking, it should be done 10762306a36Sopenharmony_ci * as well in the right context to improve sampling accuracy. 10862306a36Sopenharmony_ci */ 10962306a36Sopenharmony_ci skip_pmi = true; 11062306a36Sopenharmony_ci } else { 11162306a36Sopenharmony_ci /* Indicate PEBS overflow PMI to guest. */ 11262306a36Sopenharmony_ci skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, 11362306a36Sopenharmony_ci (unsigned long *)&pmu->global_status); 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci } else { 11662306a36Sopenharmony_ci __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci if (pmc->intr && !skip_pmi) 12062306a36Sopenharmony_ci kvm_make_request(KVM_REQ_PMI, pmc->vcpu); 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic void kvm_perf_overflow(struct perf_event *perf_event, 12462306a36Sopenharmony_ci struct perf_sample_data *data, 12562306a36Sopenharmony_ci struct pt_regs *regs) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci struct kvm_pmc *pmc = perf_event->overflow_handler_context; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci /* 13062306a36Sopenharmony_ci * Ignore overflow events for counters that are scheduled to be 13162306a36Sopenharmony_ci * reprogrammed, e.g. if a PMI for the previous event races with KVM's 13262306a36Sopenharmony_ci * handling of a related guest WRMSR. 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_ci if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi)) 13562306a36Sopenharmony_ci return; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci __kvm_perf_overflow(pmc, true); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci kvm_make_request(KVM_REQ_PMU, pmc->vcpu); 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cistatic u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci * For some model specific pebs counters with special capabilities 14662306a36Sopenharmony_ci * (PDIR, PDIR++, PDIST), KVM needs to raise the event precise 14762306a36Sopenharmony_ci * level to the maximum value (currently 3, backwards compatible) 14862306a36Sopenharmony_ci * so that the perf subsystem would assign specific hardware counter 14962306a36Sopenharmony_ci * with that capability for vPMC. 15062306a36Sopenharmony_ci */ 15162306a36Sopenharmony_ci if ((pmc->idx == 0 && x86_match_cpu(vmx_pebs_pdist_cpu)) || 15262306a36Sopenharmony_ci (pmc->idx == 32 && x86_match_cpu(vmx_pebs_pdir_cpu))) 15362306a36Sopenharmony_ci return 3; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci /* 15662306a36Sopenharmony_ci * The non-zero precision level of guest event makes the ordinary 15762306a36Sopenharmony_ci * guest event becomes a guest PEBS event and triggers the host 15862306a36Sopenharmony_ci * PEBS PMI handler to determine whether the PEBS overflow PMI 15962306a36Sopenharmony_ci * comes from the host counters or the guest. 16062306a36Sopenharmony_ci */ 16162306a36Sopenharmony_ci return 1; 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, 16562306a36Sopenharmony_ci bool exclude_user, bool exclude_kernel, 16662306a36Sopenharmony_ci bool intr) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci struct kvm_pmu *pmu = pmc_to_pmu(pmc); 16962306a36Sopenharmony_ci struct perf_event *event; 17062306a36Sopenharmony_ci struct perf_event_attr attr = { 17162306a36Sopenharmony_ci .type = type, 17262306a36Sopenharmony_ci .size = sizeof(attr), 17362306a36Sopenharmony_ci .pinned = true, 17462306a36Sopenharmony_ci .exclude_idle = true, 17562306a36Sopenharmony_ci .exclude_host = 1, 17662306a36Sopenharmony_ci .exclude_user = exclude_user, 17762306a36Sopenharmony_ci .exclude_kernel = exclude_kernel, 17862306a36Sopenharmony_ci .config = config, 17962306a36Sopenharmony_ci }; 18062306a36Sopenharmony_ci bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci attr.sample_period = get_sample_period(pmc, pmc->counter); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci if ((attr.config & HSW_IN_TX_CHECKPOINTED) && 18562306a36Sopenharmony_ci guest_cpuid_is_intel(pmc->vcpu)) { 18662306a36Sopenharmony_ci /* 18762306a36Sopenharmony_ci * HSW_IN_TX_CHECKPOINTED is not supported with nonzero 18862306a36Sopenharmony_ci * period. Just clear the sample period so at least 18962306a36Sopenharmony_ci * allocating the counter doesn't fail. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci attr.sample_period = 0; 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci if (pebs) { 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * For most PEBS hardware events, the difference in the software 19662306a36Sopenharmony_ci * precision levels of guest and host PEBS events will not affect 19762306a36Sopenharmony_ci * the accuracy of the PEBS profiling result, because the "event IP" 19862306a36Sopenharmony_ci * in the PEBS record is calibrated on the guest side. 19962306a36Sopenharmony_ci */ 20062306a36Sopenharmony_ci attr.precise_ip = pmc_get_pebs_precise_level(pmc); 20162306a36Sopenharmony_ci } 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci event = perf_event_create_kernel_counter(&attr, -1, current, 20462306a36Sopenharmony_ci kvm_perf_overflow, pmc); 20562306a36Sopenharmony_ci if (IS_ERR(event)) { 20662306a36Sopenharmony_ci pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", 20762306a36Sopenharmony_ci PTR_ERR(event), pmc->idx); 20862306a36Sopenharmony_ci return PTR_ERR(event); 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci pmc->perf_event = event; 21262306a36Sopenharmony_ci pmc_to_pmu(pmc)->event_count++; 21362306a36Sopenharmony_ci pmc->is_paused = false; 21462306a36Sopenharmony_ci pmc->intr = intr || pebs; 21562306a36Sopenharmony_ci return 0; 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_cistatic void pmc_pause_counter(struct kvm_pmc *pmc) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci u64 counter = pmc->counter; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci if (!pmc->perf_event || pmc->is_paused) 22362306a36Sopenharmony_ci return; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci /* update counter, reset event value to avoid redundant accumulation */ 22662306a36Sopenharmony_ci counter += perf_event_pause(pmc->perf_event, true); 22762306a36Sopenharmony_ci pmc->counter = counter & pmc_bitmask(pmc); 22862306a36Sopenharmony_ci pmc->is_paused = true; 22962306a36Sopenharmony_ci} 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_cistatic bool pmc_resume_counter(struct kvm_pmc *pmc) 23262306a36Sopenharmony_ci{ 23362306a36Sopenharmony_ci if (!pmc->perf_event) 23462306a36Sopenharmony_ci return false; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* recalibrate sample period and check if it's accepted by perf core */ 23762306a36Sopenharmony_ci if (is_sampling_event(pmc->perf_event) && 23862306a36Sopenharmony_ci perf_event_period(pmc->perf_event, 23962306a36Sopenharmony_ci get_sample_period(pmc, pmc->counter))) 24062306a36Sopenharmony_ci return false; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) != 24362306a36Sopenharmony_ci (!!pmc->perf_event->attr.precise_ip)) 24462306a36Sopenharmony_ci return false; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci /* reuse perf_event to serve as pmc_reprogram_counter() does*/ 24762306a36Sopenharmony_ci perf_event_enable(pmc->perf_event); 24862306a36Sopenharmony_ci pmc->is_paused = false; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci return true; 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cistatic void pmc_release_perf_event(struct kvm_pmc *pmc) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci if (pmc->perf_event) { 25662306a36Sopenharmony_ci perf_event_release_kernel(pmc->perf_event); 25762306a36Sopenharmony_ci pmc->perf_event = NULL; 25862306a36Sopenharmony_ci pmc->current_config = 0; 25962306a36Sopenharmony_ci pmc_to_pmu(pmc)->event_count--; 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_cistatic void pmc_stop_counter(struct kvm_pmc *pmc) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci if (pmc->perf_event) { 26662306a36Sopenharmony_ci pmc->counter = pmc_read_counter(pmc); 26762306a36Sopenharmony_ci pmc_release_perf_event(pmc); 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_cistatic int filter_cmp(const void *pa, const void *pb, u64 mask) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci u64 a = *(u64 *)pa & mask; 27462306a36Sopenharmony_ci u64 b = *(u64 *)pb & mask; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci return (a > b) - (a < b); 27762306a36Sopenharmony_ci} 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_cistatic int filter_sort_cmp(const void *pa, const void *pb) 28162306a36Sopenharmony_ci{ 28262306a36Sopenharmony_ci return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT | 28362306a36Sopenharmony_ci KVM_PMU_MASKED_ENTRY_EXCLUDE)); 28462306a36Sopenharmony_ci} 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci/* 28762306a36Sopenharmony_ci * For the event filter, searching is done on the 'includes' list and 28862306a36Sopenharmony_ci * 'excludes' list separately rather than on the 'events' list (which 28962306a36Sopenharmony_ci * has both). As a result the exclude bit can be ignored. 29062306a36Sopenharmony_ci */ 29162306a36Sopenharmony_cistatic int filter_event_cmp(const void *pa, const void *pb) 29262306a36Sopenharmony_ci{ 29362306a36Sopenharmony_ci return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT)); 29462306a36Sopenharmony_ci} 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_cistatic int find_filter_index(u64 *events, u64 nevents, u64 key) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci u64 *fe = bsearch(&key, events, nevents, sizeof(events[0]), 29962306a36Sopenharmony_ci filter_event_cmp); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci if (!fe) 30262306a36Sopenharmony_ci return -1; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci return fe - events; 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_cistatic bool is_filter_entry_match(u64 filter_event, u64 umask) 30862306a36Sopenharmony_ci{ 30962306a36Sopenharmony_ci u64 mask = filter_event >> (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8); 31062306a36Sopenharmony_ci u64 match = filter_event & KVM_PMU_MASKED_ENTRY_UMASK_MATCH; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci BUILD_BUG_ON((KVM_PMU_ENCODE_MASKED_ENTRY(0, 0xff, 0, false) >> 31362306a36Sopenharmony_ci (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8)) != 31462306a36Sopenharmony_ci ARCH_PERFMON_EVENTSEL_UMASK); 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci return (umask & mask) == match; 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_cistatic bool filter_contains_match(u64 *events, u64 nevents, u64 eventsel) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci u64 event_select = eventsel & kvm_pmu_ops.EVENTSEL_EVENT; 32262306a36Sopenharmony_ci u64 umask = eventsel & ARCH_PERFMON_EVENTSEL_UMASK; 32362306a36Sopenharmony_ci int i, index; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci index = find_filter_index(events, nevents, event_select); 32662306a36Sopenharmony_ci if (index < 0) 32762306a36Sopenharmony_ci return false; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci /* 33062306a36Sopenharmony_ci * Entries are sorted by the event select. Walk the list in both 33162306a36Sopenharmony_ci * directions to process all entries with the targeted event select. 33262306a36Sopenharmony_ci */ 33362306a36Sopenharmony_ci for (i = index; i < nevents; i++) { 33462306a36Sopenharmony_ci if (filter_event_cmp(&events[i], &event_select)) 33562306a36Sopenharmony_ci break; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (is_filter_entry_match(events[i], umask)) 33862306a36Sopenharmony_ci return true; 33962306a36Sopenharmony_ci } 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci for (i = index - 1; i >= 0; i--) { 34262306a36Sopenharmony_ci if (filter_event_cmp(&events[i], &event_select)) 34362306a36Sopenharmony_ci break; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (is_filter_entry_match(events[i], umask)) 34662306a36Sopenharmony_ci return true; 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci return false; 35062306a36Sopenharmony_ci} 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_cistatic bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f, 35362306a36Sopenharmony_ci u64 eventsel) 35462306a36Sopenharmony_ci{ 35562306a36Sopenharmony_ci if (filter_contains_match(f->includes, f->nr_includes, eventsel) && 35662306a36Sopenharmony_ci !filter_contains_match(f->excludes, f->nr_excludes, eventsel)) 35762306a36Sopenharmony_ci return f->action == KVM_PMU_EVENT_ALLOW; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci return f->action == KVM_PMU_EVENT_DENY; 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_cistatic bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter, 36362306a36Sopenharmony_ci int idx) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci int fixed_idx = idx - INTEL_PMC_IDX_FIXED; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci if (filter->action == KVM_PMU_EVENT_DENY && 36862306a36Sopenharmony_ci test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap)) 36962306a36Sopenharmony_ci return false; 37062306a36Sopenharmony_ci if (filter->action == KVM_PMU_EVENT_ALLOW && 37162306a36Sopenharmony_ci !test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap)) 37262306a36Sopenharmony_ci return false; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci return true; 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_cistatic bool check_pmu_event_filter(struct kvm_pmc *pmc) 37862306a36Sopenharmony_ci{ 37962306a36Sopenharmony_ci struct kvm_x86_pmu_event_filter *filter; 38062306a36Sopenharmony_ci struct kvm *kvm = pmc->vcpu->kvm; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); 38362306a36Sopenharmony_ci if (!filter) 38462306a36Sopenharmony_ci return true; 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci if (pmc_is_gp(pmc)) 38762306a36Sopenharmony_ci return is_gp_event_allowed(filter, pmc->eventsel); 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci return is_fixed_event_allowed(filter, pmc->idx); 39062306a36Sopenharmony_ci} 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_cistatic bool pmc_event_is_allowed(struct kvm_pmc *pmc) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && 39562306a36Sopenharmony_ci static_call(kvm_x86_pmu_hw_event_available)(pmc) && 39662306a36Sopenharmony_ci check_pmu_event_filter(pmc); 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_cistatic void reprogram_counter(struct kvm_pmc *pmc) 40062306a36Sopenharmony_ci{ 40162306a36Sopenharmony_ci struct kvm_pmu *pmu = pmc_to_pmu(pmc); 40262306a36Sopenharmony_ci u64 eventsel = pmc->eventsel; 40362306a36Sopenharmony_ci u64 new_config = eventsel; 40462306a36Sopenharmony_ci u8 fixed_ctr_ctrl; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci pmc_pause_counter(pmc); 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci if (!pmc_event_is_allowed(pmc)) 40962306a36Sopenharmony_ci goto reprogram_complete; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci if (pmc->counter < pmc->prev_counter) 41262306a36Sopenharmony_ci __kvm_perf_overflow(pmc, false); 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) 41562306a36Sopenharmony_ci printk_once("kvm pmu: pin control bit is ignored\n"); 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci if (pmc_is_fixed(pmc)) { 41862306a36Sopenharmony_ci fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, 41962306a36Sopenharmony_ci pmc->idx - INTEL_PMC_IDX_FIXED); 42062306a36Sopenharmony_ci if (fixed_ctr_ctrl & 0x1) 42162306a36Sopenharmony_ci eventsel |= ARCH_PERFMON_EVENTSEL_OS; 42262306a36Sopenharmony_ci if (fixed_ctr_ctrl & 0x2) 42362306a36Sopenharmony_ci eventsel |= ARCH_PERFMON_EVENTSEL_USR; 42462306a36Sopenharmony_ci if (fixed_ctr_ctrl & 0x8) 42562306a36Sopenharmony_ci eventsel |= ARCH_PERFMON_EVENTSEL_INT; 42662306a36Sopenharmony_ci new_config = (u64)fixed_ctr_ctrl; 42762306a36Sopenharmony_ci } 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (pmc->current_config == new_config && pmc_resume_counter(pmc)) 43062306a36Sopenharmony_ci goto reprogram_complete; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci pmc_release_perf_event(pmc); 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci pmc->current_config = new_config; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci /* 43762306a36Sopenharmony_ci * If reprogramming fails, e.g. due to contention, leave the counter's 43862306a36Sopenharmony_ci * regprogram bit set, i.e. opportunistically try again on the next PMU 43962306a36Sopenharmony_ci * refresh. Don't make a new request as doing so can stall the guest 44062306a36Sopenharmony_ci * if reprogramming repeatedly fails. 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_ci if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW, 44362306a36Sopenharmony_ci (eventsel & pmu->raw_event_mask), 44462306a36Sopenharmony_ci !(eventsel & ARCH_PERFMON_EVENTSEL_USR), 44562306a36Sopenharmony_ci !(eventsel & ARCH_PERFMON_EVENTSEL_OS), 44662306a36Sopenharmony_ci eventsel & ARCH_PERFMON_EVENTSEL_INT)) 44762306a36Sopenharmony_ci return; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_cireprogram_complete: 45062306a36Sopenharmony_ci clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); 45162306a36Sopenharmony_ci pmc->prev_counter = 0; 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_civoid kvm_pmu_handle_event(struct kvm_vcpu *vcpu) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 45762306a36Sopenharmony_ci int bit; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { 46062306a36Sopenharmony_ci struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit); 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci if (unlikely(!pmc)) { 46362306a36Sopenharmony_ci clear_bit(bit, pmu->reprogram_pmi); 46462306a36Sopenharmony_ci continue; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci reprogram_counter(pmc); 46862306a36Sopenharmony_ci } 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci /* 47162306a36Sopenharmony_ci * Unused perf_events are only released if the corresponding MSRs 47262306a36Sopenharmony_ci * weren't accessed during the last vCPU time slice. kvm_arch_sched_in 47362306a36Sopenharmony_ci * triggers KVM_REQ_PMU if cleanup is needed. 47462306a36Sopenharmony_ci */ 47562306a36Sopenharmony_ci if (unlikely(pmu->need_cleanup)) 47662306a36Sopenharmony_ci kvm_pmu_cleanup(vcpu); 47762306a36Sopenharmony_ci} 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci/* check if idx is a valid index to access PMU */ 48062306a36Sopenharmony_cibool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx); 48362306a36Sopenharmony_ci} 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_cibool is_vmware_backdoor_pmc(u32 pmc_idx) 48662306a36Sopenharmony_ci{ 48762306a36Sopenharmony_ci switch (pmc_idx) { 48862306a36Sopenharmony_ci case VMWARE_BACKDOOR_PMC_HOST_TSC: 48962306a36Sopenharmony_ci case VMWARE_BACKDOOR_PMC_REAL_TIME: 49062306a36Sopenharmony_ci case VMWARE_BACKDOOR_PMC_APPARENT_TIME: 49162306a36Sopenharmony_ci return true; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci return false; 49462306a36Sopenharmony_ci} 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_cistatic int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) 49762306a36Sopenharmony_ci{ 49862306a36Sopenharmony_ci u64 ctr_val; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci switch (idx) { 50162306a36Sopenharmony_ci case VMWARE_BACKDOOR_PMC_HOST_TSC: 50262306a36Sopenharmony_ci ctr_val = rdtsc(); 50362306a36Sopenharmony_ci break; 50462306a36Sopenharmony_ci case VMWARE_BACKDOOR_PMC_REAL_TIME: 50562306a36Sopenharmony_ci ctr_val = ktime_get_boottime_ns(); 50662306a36Sopenharmony_ci break; 50762306a36Sopenharmony_ci case VMWARE_BACKDOOR_PMC_APPARENT_TIME: 50862306a36Sopenharmony_ci ctr_val = ktime_get_boottime_ns() + 50962306a36Sopenharmony_ci vcpu->kvm->arch.kvmclock_offset; 51062306a36Sopenharmony_ci break; 51162306a36Sopenharmony_ci default: 51262306a36Sopenharmony_ci return 1; 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci *data = ctr_val; 51662306a36Sopenharmony_ci return 0; 51762306a36Sopenharmony_ci} 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ciint kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci bool fast_mode = idx & (1u << 31); 52262306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 52362306a36Sopenharmony_ci struct kvm_pmc *pmc; 52462306a36Sopenharmony_ci u64 mask = fast_mode ? ~0u : ~0ull; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci if (!pmu->version) 52762306a36Sopenharmony_ci return 1; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci if (is_vmware_backdoor_pmc(idx)) 53062306a36Sopenharmony_ci return kvm_pmu_rdpmc_vmware(vcpu, idx, data); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask); 53362306a36Sopenharmony_ci if (!pmc) 53462306a36Sopenharmony_ci return 1; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCE) && 53762306a36Sopenharmony_ci (static_call(kvm_x86_get_cpl)(vcpu) != 0) && 53862306a36Sopenharmony_ci kvm_is_cr0_bit_set(vcpu, X86_CR0_PE)) 53962306a36Sopenharmony_ci return 1; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci *data = pmc_read_counter(pmc) & mask; 54262306a36Sopenharmony_ci return 0; 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_civoid kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci if (lapic_in_kernel(vcpu)) { 54862306a36Sopenharmony_ci static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu); 54962306a36Sopenharmony_ci kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); 55062306a36Sopenharmony_ci } 55162306a36Sopenharmony_ci} 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_cibool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci switch (msr) { 55662306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_STATUS: 55762306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 55862306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 55962306a36Sopenharmony_ci return kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)); 56062306a36Sopenharmony_ci default: 56162306a36Sopenharmony_ci break; 56262306a36Sopenharmony_ci } 56362306a36Sopenharmony_ci return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) || 56462306a36Sopenharmony_ci static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr); 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistatic void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 57062306a36Sopenharmony_ci struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr); 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (pmc) 57362306a36Sopenharmony_ci __set_bit(pmc->idx, pmu->pmc_in_use); 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ciint kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 57762306a36Sopenharmony_ci{ 57862306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 57962306a36Sopenharmony_ci u32 msr = msr_info->index; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci switch (msr) { 58262306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_STATUS: 58362306a36Sopenharmony_ci case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: 58462306a36Sopenharmony_ci msr_info->data = pmu->global_status; 58562306a36Sopenharmony_ci break; 58662306a36Sopenharmony_ci case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: 58762306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 58862306a36Sopenharmony_ci msr_info->data = pmu->global_ctrl; 58962306a36Sopenharmony_ci break; 59062306a36Sopenharmony_ci case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: 59162306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 59262306a36Sopenharmony_ci msr_info->data = 0; 59362306a36Sopenharmony_ci break; 59462306a36Sopenharmony_ci default: 59562306a36Sopenharmony_ci return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info); 59662306a36Sopenharmony_ci } 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci return 0; 59962306a36Sopenharmony_ci} 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ciint kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 60262306a36Sopenharmony_ci{ 60362306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 60462306a36Sopenharmony_ci u32 msr = msr_info->index; 60562306a36Sopenharmony_ci u64 data = msr_info->data; 60662306a36Sopenharmony_ci u64 diff; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci /* 60962306a36Sopenharmony_ci * Note, AMD ignores writes to reserved bits and read-only PMU MSRs, 61062306a36Sopenharmony_ci * whereas Intel generates #GP on attempts to write reserved/RO MSRs. 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci switch (msr) { 61362306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_STATUS: 61462306a36Sopenharmony_ci if (!msr_info->host_initiated) 61562306a36Sopenharmony_ci return 1; /* RO MSR */ 61662306a36Sopenharmony_ci fallthrough; 61762306a36Sopenharmony_ci case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: 61862306a36Sopenharmony_ci /* Per PPR, Read-only MSR. Writes are ignored. */ 61962306a36Sopenharmony_ci if (!msr_info->host_initiated) 62062306a36Sopenharmony_ci break; 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci if (data & pmu->global_status_mask) 62362306a36Sopenharmony_ci return 1; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci pmu->global_status = data; 62662306a36Sopenharmony_ci break; 62762306a36Sopenharmony_ci case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: 62862306a36Sopenharmony_ci data &= ~pmu->global_ctrl_mask; 62962306a36Sopenharmony_ci fallthrough; 63062306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_CTRL: 63162306a36Sopenharmony_ci if (!kvm_valid_perf_global_ctrl(pmu, data)) 63262306a36Sopenharmony_ci return 1; 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci if (pmu->global_ctrl != data) { 63562306a36Sopenharmony_ci diff = pmu->global_ctrl ^ data; 63662306a36Sopenharmony_ci pmu->global_ctrl = data; 63762306a36Sopenharmony_ci reprogram_counters(pmu, diff); 63862306a36Sopenharmony_ci } 63962306a36Sopenharmony_ci break; 64062306a36Sopenharmony_ci case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 64162306a36Sopenharmony_ci /* 64262306a36Sopenharmony_ci * GLOBAL_OVF_CTRL, a.k.a. GLOBAL STATUS_RESET, clears bits in 64362306a36Sopenharmony_ci * GLOBAL_STATUS, and so the set of reserved bits is the same. 64462306a36Sopenharmony_ci */ 64562306a36Sopenharmony_ci if (data & pmu->global_status_mask) 64662306a36Sopenharmony_ci return 1; 64762306a36Sopenharmony_ci fallthrough; 64862306a36Sopenharmony_ci case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: 64962306a36Sopenharmony_ci if (!msr_info->host_initiated) 65062306a36Sopenharmony_ci pmu->global_status &= ~data; 65162306a36Sopenharmony_ci break; 65262306a36Sopenharmony_ci default: 65362306a36Sopenharmony_ci kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); 65462306a36Sopenharmony_ci return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info); 65562306a36Sopenharmony_ci } 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci return 0; 65862306a36Sopenharmony_ci} 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_civoid kvm_pmu_reset(struct kvm_vcpu *vcpu) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 66362306a36Sopenharmony_ci struct kvm_pmc *pmc; 66462306a36Sopenharmony_ci int i; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci pmu->need_cleanup = false; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { 67162306a36Sopenharmony_ci pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); 67262306a36Sopenharmony_ci if (!pmc) 67362306a36Sopenharmony_ci continue; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci pmc_stop_counter(pmc); 67662306a36Sopenharmony_ci pmc->counter = 0; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci if (pmc_is_gp(pmc)) 67962306a36Sopenharmony_ci pmc->eventsel = 0; 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0; 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci static_call_cond(kvm_x86_pmu_reset)(vcpu); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci/* 68962306a36Sopenharmony_ci * Refresh the PMU configuration for the vCPU, e.g. if userspace changes CPUID 69062306a36Sopenharmony_ci * and/or PERF_CAPABILITIES. 69162306a36Sopenharmony_ci */ 69262306a36Sopenharmony_civoid kvm_pmu_refresh(struct kvm_vcpu *vcpu) 69362306a36Sopenharmony_ci{ 69462306a36Sopenharmony_ci if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm)) 69562306a36Sopenharmony_ci return; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci /* 69862306a36Sopenharmony_ci * Stop/release all existing counters/events before realizing the new 69962306a36Sopenharmony_ci * vPMU model. 70062306a36Sopenharmony_ci */ 70162306a36Sopenharmony_ci kvm_pmu_reset(vcpu); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX); 70462306a36Sopenharmony_ci static_call(kvm_x86_pmu_refresh)(vcpu); 70562306a36Sopenharmony_ci} 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_civoid kvm_pmu_init(struct kvm_vcpu *vcpu) 70862306a36Sopenharmony_ci{ 70962306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci memset(pmu, 0, sizeof(*pmu)); 71262306a36Sopenharmony_ci static_call(kvm_x86_pmu_init)(vcpu); 71362306a36Sopenharmony_ci pmu->event_count = 0; 71462306a36Sopenharmony_ci pmu->need_cleanup = false; 71562306a36Sopenharmony_ci kvm_pmu_refresh(vcpu); 71662306a36Sopenharmony_ci} 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci/* Release perf_events for vPMCs that have been unused for a full time slice. */ 71962306a36Sopenharmony_civoid kvm_pmu_cleanup(struct kvm_vcpu *vcpu) 72062306a36Sopenharmony_ci{ 72162306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 72262306a36Sopenharmony_ci struct kvm_pmc *pmc = NULL; 72362306a36Sopenharmony_ci DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX); 72462306a36Sopenharmony_ci int i; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci pmu->need_cleanup = false; 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci bitmap_andnot(bitmask, pmu->all_valid_pmc_idx, 72962306a36Sopenharmony_ci pmu->pmc_in_use, X86_PMC_IDX_MAX); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) { 73262306a36Sopenharmony_ci pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc)) 73562306a36Sopenharmony_ci pmc_stop_counter(pmc); 73662306a36Sopenharmony_ci } 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci static_call_cond(kvm_x86_pmu_cleanup)(vcpu); 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX); 74162306a36Sopenharmony_ci} 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_civoid kvm_pmu_destroy(struct kvm_vcpu *vcpu) 74462306a36Sopenharmony_ci{ 74562306a36Sopenharmony_ci kvm_pmu_reset(vcpu); 74662306a36Sopenharmony_ci} 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_cistatic void kvm_pmu_incr_counter(struct kvm_pmc *pmc) 74962306a36Sopenharmony_ci{ 75062306a36Sopenharmony_ci pmc->prev_counter = pmc->counter; 75162306a36Sopenharmony_ci pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); 75262306a36Sopenharmony_ci kvm_pmu_request_counter_reprogram(pmc); 75362306a36Sopenharmony_ci} 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_cistatic inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, 75662306a36Sopenharmony_ci unsigned int perf_hw_id) 75762306a36Sopenharmony_ci{ 75862306a36Sopenharmony_ci return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) & 75962306a36Sopenharmony_ci AMD64_RAW_EVENT_MASK_NB); 76062306a36Sopenharmony_ci} 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_cistatic inline bool cpl_is_matched(struct kvm_pmc *pmc) 76362306a36Sopenharmony_ci{ 76462306a36Sopenharmony_ci bool select_os, select_user; 76562306a36Sopenharmony_ci u64 config; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci if (pmc_is_gp(pmc)) { 76862306a36Sopenharmony_ci config = pmc->eventsel; 76962306a36Sopenharmony_ci select_os = config & ARCH_PERFMON_EVENTSEL_OS; 77062306a36Sopenharmony_ci select_user = config & ARCH_PERFMON_EVENTSEL_USR; 77162306a36Sopenharmony_ci } else { 77262306a36Sopenharmony_ci config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl, 77362306a36Sopenharmony_ci pmc->idx - INTEL_PMC_IDX_FIXED); 77462306a36Sopenharmony_ci select_os = config & 0x1; 77562306a36Sopenharmony_ci select_user = config & 0x2; 77662306a36Sopenharmony_ci } 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user; 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_civoid kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 78462306a36Sopenharmony_ci struct kvm_pmc *pmc; 78562306a36Sopenharmony_ci int i; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { 78862306a36Sopenharmony_ci pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci if (!pmc || !pmc_event_is_allowed(pmc)) 79162306a36Sopenharmony_ci continue; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci /* Ignore checks for edge detect, pin control, invert and CMASK bits */ 79462306a36Sopenharmony_ci if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc)) 79562306a36Sopenharmony_ci kvm_pmu_incr_counter(pmc); 79662306a36Sopenharmony_ci } 79762306a36Sopenharmony_ci} 79862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_cistatic bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter) 80162306a36Sopenharmony_ci{ 80262306a36Sopenharmony_ci u64 mask = kvm_pmu_ops.EVENTSEL_EVENT | 80362306a36Sopenharmony_ci KVM_PMU_MASKED_ENTRY_UMASK_MASK | 80462306a36Sopenharmony_ci KVM_PMU_MASKED_ENTRY_UMASK_MATCH | 80562306a36Sopenharmony_ci KVM_PMU_MASKED_ENTRY_EXCLUDE; 80662306a36Sopenharmony_ci int i; 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci for (i = 0; i < filter->nevents; i++) { 80962306a36Sopenharmony_ci if (filter->events[i] & ~mask) 81062306a36Sopenharmony_ci return false; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci return true; 81462306a36Sopenharmony_ci} 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_cistatic void convert_to_masked_filter(struct kvm_x86_pmu_event_filter *filter) 81762306a36Sopenharmony_ci{ 81862306a36Sopenharmony_ci int i, j; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci for (i = 0, j = 0; i < filter->nevents; i++) { 82162306a36Sopenharmony_ci /* 82262306a36Sopenharmony_ci * Skip events that are impossible to match against a guest 82362306a36Sopenharmony_ci * event. When filtering, only the event select + unit mask 82462306a36Sopenharmony_ci * of the guest event is used. To maintain backwards 82562306a36Sopenharmony_ci * compatibility, impossible filters can't be rejected :-( 82662306a36Sopenharmony_ci */ 82762306a36Sopenharmony_ci if (filter->events[i] & ~(kvm_pmu_ops.EVENTSEL_EVENT | 82862306a36Sopenharmony_ci ARCH_PERFMON_EVENTSEL_UMASK)) 82962306a36Sopenharmony_ci continue; 83062306a36Sopenharmony_ci /* 83162306a36Sopenharmony_ci * Convert userspace events to a common in-kernel event so 83262306a36Sopenharmony_ci * only one code path is needed to support both events. For 83362306a36Sopenharmony_ci * the in-kernel events use masked events because they are 83462306a36Sopenharmony_ci * flexible enough to handle both cases. To convert to masked 83562306a36Sopenharmony_ci * events all that's needed is to add an "all ones" umask_mask, 83662306a36Sopenharmony_ci * (unmasked filter events don't support EXCLUDE). 83762306a36Sopenharmony_ci */ 83862306a36Sopenharmony_ci filter->events[j++] = filter->events[i] | 83962306a36Sopenharmony_ci (0xFFULL << KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT); 84062306a36Sopenharmony_ci } 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci filter->nevents = j; 84362306a36Sopenharmony_ci} 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_cistatic int prepare_filter_lists(struct kvm_x86_pmu_event_filter *filter) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci int i; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci if (!(filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS)) 85062306a36Sopenharmony_ci convert_to_masked_filter(filter); 85162306a36Sopenharmony_ci else if (!is_masked_filter_valid(filter)) 85262306a36Sopenharmony_ci return -EINVAL; 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci /* 85562306a36Sopenharmony_ci * Sort entries by event select and includes vs. excludes so that all 85662306a36Sopenharmony_ci * entries for a given event select can be processed efficiently during 85762306a36Sopenharmony_ci * filtering. The EXCLUDE flag uses a more significant bit than the 85862306a36Sopenharmony_ci * event select, and so the sorted list is also effectively split into 85962306a36Sopenharmony_ci * includes and excludes sub-lists. 86062306a36Sopenharmony_ci */ 86162306a36Sopenharmony_ci sort(&filter->events, filter->nevents, sizeof(filter->events[0]), 86262306a36Sopenharmony_ci filter_sort_cmp, NULL); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci i = filter->nevents; 86562306a36Sopenharmony_ci /* Find the first EXCLUDE event (only supported for masked events). */ 86662306a36Sopenharmony_ci if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) { 86762306a36Sopenharmony_ci for (i = 0; i < filter->nevents; i++) { 86862306a36Sopenharmony_ci if (filter->events[i] & KVM_PMU_MASKED_ENTRY_EXCLUDE) 86962306a36Sopenharmony_ci break; 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci filter->nr_includes = i; 87462306a36Sopenharmony_ci filter->nr_excludes = filter->nevents - filter->nr_includes; 87562306a36Sopenharmony_ci filter->includes = filter->events; 87662306a36Sopenharmony_ci filter->excludes = filter->events + filter->nr_includes; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci return 0; 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ciint kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) 88262306a36Sopenharmony_ci{ 88362306a36Sopenharmony_ci struct kvm_pmu_event_filter __user *user_filter = argp; 88462306a36Sopenharmony_ci struct kvm_x86_pmu_event_filter *filter; 88562306a36Sopenharmony_ci struct kvm_pmu_event_filter tmp; 88662306a36Sopenharmony_ci struct kvm_vcpu *vcpu; 88762306a36Sopenharmony_ci unsigned long i; 88862306a36Sopenharmony_ci size_t size; 88962306a36Sopenharmony_ci int r; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci if (copy_from_user(&tmp, user_filter, sizeof(tmp))) 89262306a36Sopenharmony_ci return -EFAULT; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci if (tmp.action != KVM_PMU_EVENT_ALLOW && 89562306a36Sopenharmony_ci tmp.action != KVM_PMU_EVENT_DENY) 89662306a36Sopenharmony_ci return -EINVAL; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci if (tmp.flags & ~KVM_PMU_EVENT_FLAGS_VALID_MASK) 89962306a36Sopenharmony_ci return -EINVAL; 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) 90262306a36Sopenharmony_ci return -E2BIG; 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci size = struct_size(filter, events, tmp.nevents); 90562306a36Sopenharmony_ci filter = kzalloc(size, GFP_KERNEL_ACCOUNT); 90662306a36Sopenharmony_ci if (!filter) 90762306a36Sopenharmony_ci return -ENOMEM; 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci filter->action = tmp.action; 91062306a36Sopenharmony_ci filter->nevents = tmp.nevents; 91162306a36Sopenharmony_ci filter->fixed_counter_bitmap = tmp.fixed_counter_bitmap; 91262306a36Sopenharmony_ci filter->flags = tmp.flags; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci r = -EFAULT; 91562306a36Sopenharmony_ci if (copy_from_user(filter->events, user_filter->events, 91662306a36Sopenharmony_ci sizeof(filter->events[0]) * filter->nevents)) 91762306a36Sopenharmony_ci goto cleanup; 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci r = prepare_filter_lists(filter); 92062306a36Sopenharmony_ci if (r) 92162306a36Sopenharmony_ci goto cleanup; 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci mutex_lock(&kvm->lock); 92462306a36Sopenharmony_ci filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, 92562306a36Sopenharmony_ci mutex_is_locked(&kvm->lock)); 92662306a36Sopenharmony_ci mutex_unlock(&kvm->lock); 92762306a36Sopenharmony_ci synchronize_srcu_expedited(&kvm->srcu); 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) > 93062306a36Sopenharmony_ci sizeof(((struct kvm_pmu *)0)->__reprogram_pmi)); 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci kvm_for_each_vcpu(i, vcpu, kvm) 93362306a36Sopenharmony_ci atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull); 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci kvm_make_all_cpus_request(kvm, KVM_REQ_PMU); 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci r = 0; 93862306a36Sopenharmony_cicleanup: 93962306a36Sopenharmony_ci kfree(filter); 94062306a36Sopenharmony_ci return r; 94162306a36Sopenharmony_ci} 942