18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * @file op_model_ppro.h 38c2ecf20Sopenharmony_ci * Family 6 perfmon and architectural perfmon MSR operations 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * @remark Copyright 2002 OProfile authors 68c2ecf20Sopenharmony_ci * @remark Copyright 2008 Intel Corporation 78c2ecf20Sopenharmony_ci * @remark Read the file COPYING 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * @author John Levon 108c2ecf20Sopenharmony_ci * @author Philippe Elie 118c2ecf20Sopenharmony_ci * @author Graydon Hoare 128c2ecf20Sopenharmony_ci * @author Andi Kleen 138c2ecf20Sopenharmony_ci * @author Robert Richter <robert.richter@amd.com> 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include <linux/oprofile.h> 178c2ecf20Sopenharmony_ci#include <linux/slab.h> 188c2ecf20Sopenharmony_ci#include <asm/ptrace.h> 198c2ecf20Sopenharmony_ci#include <asm/msr.h> 208c2ecf20Sopenharmony_ci#include <asm/apic.h> 218c2ecf20Sopenharmony_ci#include <asm/nmi.h> 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci#include "op_x86_model.h" 248c2ecf20Sopenharmony_ci#include "op_counter.h" 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_cistatic int num_counters = 2; 278c2ecf20Sopenharmony_cistatic int counter_width = 32; 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_cistatic u64 reset_value[OP_MAX_COUNTER]; 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistatic void ppro_shutdown(struct op_msrs const * const msrs) 348c2ecf20Sopenharmony_ci{ 358c2ecf20Sopenharmony_ci int i; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 388c2ecf20Sopenharmony_ci if (!msrs->counters[i].addr) 398c2ecf20Sopenharmony_ci continue; 408c2ecf20Sopenharmony_ci release_perfctr_nmi(MSR_P6_PERFCTR0 + i); 418c2ecf20Sopenharmony_ci release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); 428c2ecf20Sopenharmony_ci } 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic int ppro_fill_in_addresses(struct op_msrs * const msrs) 468c2ecf20Sopenharmony_ci{ 478c2ecf20Sopenharmony_ci int i; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; i++) { 508c2ecf20Sopenharmony_ci if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 518c2ecf20Sopenharmony_ci goto fail; 528c2ecf20Sopenharmony_ci if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) { 538c2ecf20Sopenharmony_ci release_perfctr_nmi(MSR_P6_PERFCTR0 + i); 548c2ecf20Sopenharmony_ci goto fail; 558c2ecf20Sopenharmony_ci } 568c2ecf20Sopenharmony_ci /* both registers must be reserved */ 578c2ecf20Sopenharmony_ci msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 588c2ecf20Sopenharmony_ci msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 598c2ecf20Sopenharmony_ci continue; 608c2ecf20Sopenharmony_ci fail: 618c2ecf20Sopenharmony_ci if (!counter_config[i].enabled) 628c2ecf20Sopenharmony_ci continue; 638c2ecf20Sopenharmony_ci op_x86_warn_reserved(i); 648c2ecf20Sopenharmony_ci ppro_shutdown(msrs); 658c2ecf20Sopenharmony_ci return -EBUSY; 668c2ecf20Sopenharmony_ci } 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci return 0; 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistatic void ppro_setup_ctrs(struct op_x86_model_spec const *model, 738c2ecf20Sopenharmony_ci struct op_msrs const * const msrs) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci u64 val; 768c2ecf20Sopenharmony_ci int i; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) { 798c2ecf20Sopenharmony_ci union cpuid10_eax eax; 808c2ecf20Sopenharmony_ci eax.full = cpuid_eax(0xa); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* 838c2ecf20Sopenharmony_ci * For Core2 (family 6, model 15), don't reset the 848c2ecf20Sopenharmony_ci * counter width: 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_ci if (!(eax.split.version_id == 0 && 878c2ecf20Sopenharmony_ci __this_cpu_read(cpu_info.x86) == 6 && 888c2ecf20Sopenharmony_ci __this_cpu_read(cpu_info.x86_model) == 15)) { 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci if (counter_width < eax.split.bit_width) 918c2ecf20Sopenharmony_ci counter_width = eax.split.bit_width; 928c2ecf20Sopenharmony_ci } 938c2ecf20Sopenharmony_ci } 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci /* clear all counters */ 968c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 978c2ecf20Sopenharmony_ci if (!msrs->controls[i].addr) 988c2ecf20Sopenharmony_ci continue; 998c2ecf20Sopenharmony_ci rdmsrl(msrs->controls[i].addr, val); 1008c2ecf20Sopenharmony_ci if (val & ARCH_PERFMON_EVENTSEL_ENABLE) 1018c2ecf20Sopenharmony_ci op_x86_warn_in_use(i); 1028c2ecf20Sopenharmony_ci val &= model->reserved; 1038c2ecf20Sopenharmony_ci wrmsrl(msrs->controls[i].addr, val); 1048c2ecf20Sopenharmony_ci /* 1058c2ecf20Sopenharmony_ci * avoid a false detection of ctr overflows in NMI * 1068c2ecf20Sopenharmony_ci * handler 1078c2ecf20Sopenharmony_ci */ 1088c2ecf20Sopenharmony_ci wrmsrl(msrs->counters[i].addr, -1LL); 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci /* enable active counters */ 1128c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 1138c2ecf20Sopenharmony_ci if (counter_config[i].enabled && msrs->counters[i].addr) { 1148c2ecf20Sopenharmony_ci reset_value[i] = counter_config[i].count; 1158c2ecf20Sopenharmony_ci wrmsrl(msrs->counters[i].addr, -reset_value[i]); 1168c2ecf20Sopenharmony_ci rdmsrl(msrs->controls[i].addr, val); 1178c2ecf20Sopenharmony_ci val &= model->reserved; 1188c2ecf20Sopenharmony_ci val |= op_x86_get_ctrl(model, &counter_config[i]); 1198c2ecf20Sopenharmony_ci wrmsrl(msrs->controls[i].addr, val); 1208c2ecf20Sopenharmony_ci } else { 1218c2ecf20Sopenharmony_ci reset_value[i] = 0; 1228c2ecf20Sopenharmony_ci } 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_cistatic int ppro_check_ctrs(struct pt_regs * const regs, 1288c2ecf20Sopenharmony_ci struct op_msrs const * const msrs) 1298c2ecf20Sopenharmony_ci{ 1308c2ecf20Sopenharmony_ci u64 val; 1318c2ecf20Sopenharmony_ci int i; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 1348c2ecf20Sopenharmony_ci if (!reset_value[i]) 1358c2ecf20Sopenharmony_ci continue; 1368c2ecf20Sopenharmony_ci rdmsrl(msrs->counters[i].addr, val); 1378c2ecf20Sopenharmony_ci if (val & (1ULL << (counter_width - 1))) 1388c2ecf20Sopenharmony_ci continue; 1398c2ecf20Sopenharmony_ci oprofile_add_sample(regs, i); 1408c2ecf20Sopenharmony_ci wrmsrl(msrs->counters[i].addr, -reset_value[i]); 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci /* Only P6 based Pentium M need to re-unmask the apic vector but it 1448c2ecf20Sopenharmony_ci * doesn't hurt other P6 variant */ 1458c2ecf20Sopenharmony_ci apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* We can't work out if we really handled an interrupt. We 1488c2ecf20Sopenharmony_ci * might have caught a *second* counter just after overflowing 1498c2ecf20Sopenharmony_ci * the interrupt for this counter then arrives 1508c2ecf20Sopenharmony_ci * and we don't find a counter that's overflowed, so we 1518c2ecf20Sopenharmony_ci * would return 0 and get dazed + confused. Instead we always 1528c2ecf20Sopenharmony_ci * assume we found an overflow. This sucks. 1538c2ecf20Sopenharmony_ci */ 1548c2ecf20Sopenharmony_ci return 1; 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_cistatic void ppro_start(struct op_msrs const * const msrs) 1598c2ecf20Sopenharmony_ci{ 1608c2ecf20Sopenharmony_ci u64 val; 1618c2ecf20Sopenharmony_ci int i; 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 1648c2ecf20Sopenharmony_ci if (reset_value[i]) { 1658c2ecf20Sopenharmony_ci rdmsrl(msrs->controls[i].addr, val); 1668c2ecf20Sopenharmony_ci val |= ARCH_PERFMON_EVENTSEL_ENABLE; 1678c2ecf20Sopenharmony_ci wrmsrl(msrs->controls[i].addr, val); 1688c2ecf20Sopenharmony_ci } 1698c2ecf20Sopenharmony_ci } 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_cistatic void ppro_stop(struct op_msrs const * const msrs) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci u64 val; 1768c2ecf20Sopenharmony_ci int i; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 1798c2ecf20Sopenharmony_ci if (!reset_value[i]) 1808c2ecf20Sopenharmony_ci continue; 1818c2ecf20Sopenharmony_ci rdmsrl(msrs->controls[i].addr, val); 1828c2ecf20Sopenharmony_ci val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 1838c2ecf20Sopenharmony_ci wrmsrl(msrs->controls[i].addr, val); 1848c2ecf20Sopenharmony_ci } 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistruct op_x86_model_spec op_ppro_spec = { 1888c2ecf20Sopenharmony_ci .num_counters = 2, 1898c2ecf20Sopenharmony_ci .num_controls = 2, 1908c2ecf20Sopenharmony_ci .reserved = MSR_PPRO_EVENTSEL_RESERVED, 1918c2ecf20Sopenharmony_ci .fill_in_addresses = &ppro_fill_in_addresses, 1928c2ecf20Sopenharmony_ci .setup_ctrs = &ppro_setup_ctrs, 1938c2ecf20Sopenharmony_ci .check_ctrs = &ppro_check_ctrs, 1948c2ecf20Sopenharmony_ci .start = &ppro_start, 1958c2ecf20Sopenharmony_ci .stop = &ppro_stop, 1968c2ecf20Sopenharmony_ci .shutdown = &ppro_shutdown 1978c2ecf20Sopenharmony_ci}; 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci/* 2008c2ecf20Sopenharmony_ci * Architectural performance monitoring. 2018c2ecf20Sopenharmony_ci * 2028c2ecf20Sopenharmony_ci * Newer Intel CPUs (Core1+) have support for architectural 2038c2ecf20Sopenharmony_ci * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. 2048c2ecf20Sopenharmony_ci * The advantage of this is that it can be done without knowing about 2058c2ecf20Sopenharmony_ci * the specific CPU. 2068c2ecf20Sopenharmony_ci */ 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_cistatic void arch_perfmon_setup_counters(void) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci union cpuid10_eax eax; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci eax.full = cpuid_eax(0xa); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ 2158c2ecf20Sopenharmony_ci if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 && 2168c2ecf20Sopenharmony_ci boot_cpu_data.x86_model == 15) { 2178c2ecf20Sopenharmony_ci eax.split.version_id = 2; 2188c2ecf20Sopenharmony_ci eax.split.num_counters = 2; 2198c2ecf20Sopenharmony_ci eax.split.bit_width = 40; 2208c2ecf20Sopenharmony_ci } 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci op_arch_perfmon_spec.num_counters = num_counters; 2258c2ecf20Sopenharmony_ci op_arch_perfmon_spec.num_controls = num_counters; 2268c2ecf20Sopenharmony_ci} 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_cistatic int arch_perfmon_init(struct oprofile_operations *ignore) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci arch_perfmon_setup_counters(); 2318c2ecf20Sopenharmony_ci return 0; 2328c2ecf20Sopenharmony_ci} 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_cistruct op_x86_model_spec op_arch_perfmon_spec = { 2358c2ecf20Sopenharmony_ci .reserved = MSR_PPRO_EVENTSEL_RESERVED, 2368c2ecf20Sopenharmony_ci .init = &arch_perfmon_init, 2378c2ecf20Sopenharmony_ci /* num_counters/num_controls filled in at runtime */ 2388c2ecf20Sopenharmony_ci .fill_in_addresses = &ppro_fill_in_addresses, 2398c2ecf20Sopenharmony_ci /* user space does the cpuid check for available events */ 2408c2ecf20Sopenharmony_ci .setup_ctrs = &ppro_setup_ctrs, 2418c2ecf20Sopenharmony_ci .check_ctrs = &ppro_check_ctrs, 2428c2ecf20Sopenharmony_ci .start = &ppro_start, 2438c2ecf20Sopenharmony_ci .stop = &ppro_stop, 2448c2ecf20Sopenharmony_ci .shutdown = &ppro_shutdown 2458c2ecf20Sopenharmony_ci}; 246