1/* 2 * @file op_model_amd.c 3 * athlon / K7 / K8 / Family 10h model-specific MSR operations 4 * 5 * @remark Copyright 2002-2009 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author John Levon 9 * @author Philippe Elie 10 * @author Graydon Hoare 11 * @author Robert Richter <robert.richter@amd.com> 12 * @author Barry Kasindorf <barry.kasindorf@amd.com> 13 * @author Jason Yeh <jason.yeh@amd.com> 14 * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> 15 */ 16 17#include <linux/oprofile.h> 18#include <linux/device.h> 19#include <linux/pci.h> 20#include <linux/percpu.h> 21 22#include <asm/ptrace.h> 23#include <asm/msr.h> 24#include <asm/nmi.h> 25#include <asm/apic.h> 26#include <asm/processor.h> 27 28#include "op_x86_model.h" 29#include "op_counter.h" 30 31#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 32#define NUM_VIRT_COUNTERS 32 33#else 34#define NUM_VIRT_COUNTERS 0 35#endif 36 37#define OP_EVENT_MASK 0x0FFF 38#define OP_CTR_OVERFLOW (1ULL<<31) 39 40#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) 41 42static int num_counters; 43static unsigned long reset_value[OP_MAX_COUNTER]; 44 45#define IBS_FETCH_SIZE 6 46#define IBS_OP_SIZE 12 47 48static u32 ibs_caps; 49 50struct ibs_config { 51 unsigned long op_enabled; 52 unsigned long fetch_enabled; 53 unsigned long max_cnt_fetch; 54 unsigned long max_cnt_op; 55 unsigned long rand_en; 56 unsigned long dispatched_ops; 57 unsigned long branch_target; 58}; 59 60struct ibs_state { 61 u64 ibs_op_ctl; 62 int branch_target; 63 unsigned long sample_size; 64}; 65 66static struct ibs_config ibs_config; 67static struct ibs_state ibs_state; 68 69/* 70 * IBS randomization macros 71 */ 72#define IBS_RANDOM_BITS 12 73#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) 74#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) 75 76/* 77 * 16-bit Linear Feedback Shift Register (LFSR) 78 * 79 * 16 14 13 11 80 * Feedback polynomial = X + X + X + X + 1 81 */ 82static unsigned int lfsr_random(void) 83{ 84 static unsigned int lfsr_value = 0xF00D; 85 unsigned int bit; 86 87 /* Compute next bit to shift in */ 88 bit = ((lfsr_value >> 0) ^ 89 (lfsr_value >> 2) ^ 90 (lfsr_value >> 3) ^ 91 (lfsr_value >> 5)) & 0x0001; 92 93 /* Advance to next register value */ 94 lfsr_value = (lfsr_value >> 1) | (bit << 15); 95 96 return lfsr_value; 97} 98 99/* 100 * IBS software randomization 101 * 102 * The IBS periodic op counter is randomized in software. The lower 12 103 * bits of the 20 bit counter are randomized. IbsOpCurCnt is 104 * initialized with a 12 bit random value. 105 */ 106static inline u64 op_amd_randomize_ibs_op(u64 val) 107{ 108 unsigned int random = lfsr_random(); 109 110 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) 111 /* 112 * Work around if the hw can not write to IbsOpCurCnt 113 * 114 * Randomize the lower 8 bits of the 16 bit 115 * IbsOpMaxCnt [15:0] value in the range of -128 to 116 * +127 by adding/subtracting an offset to the 117 * maximum count (IbsOpMaxCnt). 118 * 119 * To avoid over or underflows and protect upper bits 120 * starting at bit 16, the initial value for 121 * IbsOpMaxCnt must fit in the range from 0x0081 to 122 * 0xff80. 123 */ 124 val += (s8)(random >> 4); 125 else 126 val |= (u64)(random & IBS_RANDOM_MASK) << 32; 127 128 return val; 129} 130 131static inline void 132op_amd_handle_ibs(struct pt_regs * const regs, 133 struct op_msrs const * const msrs) 134{ 135 u64 val, ctl; 136 struct op_entry entry; 137 138 if (!ibs_caps) 139 return; 140 141 if (ibs_config.fetch_enabled) { 142 rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); 143 if (ctl & IBS_FETCH_VAL) { 144 rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); 145 oprofile_write_reserve(&entry, regs, val, 146 IBS_FETCH_CODE, IBS_FETCH_SIZE); 147 oprofile_add_data64(&entry, val); 148 oprofile_add_data64(&entry, ctl); 149 rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); 150 oprofile_add_data64(&entry, val); 151 oprofile_write_commit(&entry); 152 153 /* reenable the IRQ */ 154 ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT); 155 ctl |= IBS_FETCH_ENABLE; 156 wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); 157 } 158 } 159 160 if (ibs_config.op_enabled) { 161 rdmsrl(MSR_AMD64_IBSOPCTL, ctl); 162 if (ctl & IBS_OP_VAL) { 163 rdmsrl(MSR_AMD64_IBSOPRIP, val); 164 oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, 165 ibs_state.sample_size); 166 oprofile_add_data64(&entry, val); 167 rdmsrl(MSR_AMD64_IBSOPDATA, val); 168 oprofile_add_data64(&entry, val); 169 rdmsrl(MSR_AMD64_IBSOPDATA2, val); 170 oprofile_add_data64(&entry, val); 171 rdmsrl(MSR_AMD64_IBSOPDATA3, val); 172 oprofile_add_data64(&entry, val); 173 rdmsrl(MSR_AMD64_IBSDCLINAD, val); 174 oprofile_add_data64(&entry, val); 175 rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); 176 oprofile_add_data64(&entry, val); 177 if (ibs_state.branch_target) { 178 rdmsrl(MSR_AMD64_IBSBRTARGET, val); 179 oprofile_add_data(&entry, (unsigned long)val); 180 } 181 oprofile_write_commit(&entry); 182 183 /* reenable the IRQ */ 184 ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); 185 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 186 } 187 } 188} 189 190static inline void op_amd_start_ibs(void) 191{ 192 u64 val; 193 194 if (!ibs_caps) 195 return; 196 197 memset(&ibs_state, 0, sizeof(ibs_state)); 198 199 /* 200 * Note: Since the max count settings may out of range we 201 * write back the actual used values so that userland can read 202 * it. 203 */ 204 205 if (ibs_config.fetch_enabled) { 206 val = ibs_config.max_cnt_fetch >> 4; 207 val = min(val, IBS_FETCH_MAX_CNT); 208 ibs_config.max_cnt_fetch = val << 4; 209 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 210 val |= IBS_FETCH_ENABLE; 211 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 212 } 213 214 if (ibs_config.op_enabled) { 215 val = ibs_config.max_cnt_op >> 4; 216 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { 217 /* 218 * IbsOpCurCnt not supported. See 219 * op_amd_randomize_ibs_op() for details. 220 */ 221 val = clamp(val, 0x0081ULL, 0xFF80ULL); 222 ibs_config.max_cnt_op = val << 4; 223 } else { 224 /* 225 * The start value is randomized with a 226 * positive offset, we need to compensate it 227 * with the half of the randomized range. Also 228 * avoid underflows. 229 */ 230 val += IBS_RANDOM_MAXCNT_OFFSET; 231 if (ibs_caps & IBS_CAPS_OPCNTEXT) 232 val = min(val, IBS_OP_MAX_CNT_EXT); 233 else 234 val = min(val, IBS_OP_MAX_CNT); 235 ibs_config.max_cnt_op = 236 (val - IBS_RANDOM_MAXCNT_OFFSET) << 4; 237 } 238 val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT); 239 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; 240 val |= IBS_OP_ENABLE; 241 ibs_state.ibs_op_ctl = val; 242 ibs_state.sample_size = IBS_OP_SIZE; 243 if (ibs_config.branch_target) { 244 ibs_state.branch_target = 1; 245 ibs_state.sample_size++; 246 } 247 val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); 248 wrmsrl(MSR_AMD64_IBSOPCTL, val); 249 } 250} 251 252static void op_amd_stop_ibs(void) 253{ 254 if (!ibs_caps) 255 return; 256 257 if (ibs_config.fetch_enabled) 258 /* clear max count and enable */ 259 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); 260 261 if (ibs_config.op_enabled) 262 /* clear max count and enable */ 263 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 264} 265 266#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 267 268static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 269 struct op_msrs const * const msrs) 270{ 271 u64 val; 272 int i; 273 274 /* enable active counters */ 275 for (i = 0; i < num_counters; ++i) { 276 int virt = op_x86_phys_to_virt(i); 277 if (!reset_value[virt]) 278 continue; 279 rdmsrl(msrs->controls[i].addr, val); 280 val &= model->reserved; 281 val |= op_x86_get_ctrl(model, &counter_config[virt]); 282 wrmsrl(msrs->controls[i].addr, val); 283 } 284} 285 286#endif 287 288/* functions for op_amd_spec */ 289 290static void op_amd_shutdown(struct op_msrs const * const msrs) 291{ 292 int i; 293 294 for (i = 0; i < num_counters; ++i) { 295 if (!msrs->counters[i].addr) 296 continue; 297 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 298 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); 299 } 300} 301 302static int op_amd_fill_in_addresses(struct op_msrs * const msrs) 303{ 304 int i; 305 306 for (i = 0; i < num_counters; i++) { 307 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 308 goto fail; 309 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { 310 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 311 goto fail; 312 } 313 /* both registers must be reserved */ 314 if (num_counters == AMD64_NUM_COUNTERS_CORE) { 315 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); 316 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); 317 } else { 318 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 319 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 320 } 321 continue; 322 fail: 323 if (!counter_config[i].enabled) 324 continue; 325 op_x86_warn_reserved(i); 326 op_amd_shutdown(msrs); 327 return -EBUSY; 328 } 329 330 return 0; 331} 332 333static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, 334 struct op_msrs const * const msrs) 335{ 336 u64 val; 337 int i; 338 339 /* setup reset_value */ 340 for (i = 0; i < OP_MAX_COUNTER; ++i) { 341 if (counter_config[i].enabled 342 && msrs->counters[op_x86_virt_to_phys(i)].addr) 343 reset_value[i] = counter_config[i].count; 344 else 345 reset_value[i] = 0; 346 } 347 348 /* clear all counters */ 349 for (i = 0; i < num_counters; ++i) { 350 if (!msrs->controls[i].addr) 351 continue; 352 rdmsrl(msrs->controls[i].addr, val); 353 if (val & ARCH_PERFMON_EVENTSEL_ENABLE) 354 op_x86_warn_in_use(i); 355 val &= model->reserved; 356 wrmsrl(msrs->controls[i].addr, val); 357 /* 358 * avoid a false detection of ctr overflows in NMI 359 * handler 360 */ 361 wrmsrl(msrs->counters[i].addr, -1LL); 362 } 363 364 /* enable active counters */ 365 for (i = 0; i < num_counters; ++i) { 366 int virt = op_x86_phys_to_virt(i); 367 if (!reset_value[virt]) 368 continue; 369 370 /* setup counter registers */ 371 wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); 372 373 /* setup control registers */ 374 rdmsrl(msrs->controls[i].addr, val); 375 val &= model->reserved; 376 val |= op_x86_get_ctrl(model, &counter_config[virt]); 377 wrmsrl(msrs->controls[i].addr, val); 378 } 379} 380 381static int op_amd_check_ctrs(struct pt_regs * const regs, 382 struct op_msrs const * const msrs) 383{ 384 u64 val; 385 int i; 386 387 for (i = 0; i < num_counters; ++i) { 388 int virt = op_x86_phys_to_virt(i); 389 if (!reset_value[virt]) 390 continue; 391 rdmsrl(msrs->counters[i].addr, val); 392 /* bit is clear if overflowed: */ 393 if (val & OP_CTR_OVERFLOW) 394 continue; 395 oprofile_add_sample(regs, virt); 396 wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); 397 } 398 399 op_amd_handle_ibs(regs, msrs); 400 401 /* See op_model_ppro.c */ 402 return 1; 403} 404 405static void op_amd_start(struct op_msrs const * const msrs) 406{ 407 u64 val; 408 int i; 409 410 for (i = 0; i < num_counters; ++i) { 411 if (!reset_value[op_x86_phys_to_virt(i)]) 412 continue; 413 rdmsrl(msrs->controls[i].addr, val); 414 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 415 wrmsrl(msrs->controls[i].addr, val); 416 } 417 418 op_amd_start_ibs(); 419} 420 421static void op_amd_stop(struct op_msrs const * const msrs) 422{ 423 u64 val; 424 int i; 425 426 /* 427 * Subtle: stop on all counters to avoid race with setting our 428 * pm callback 429 */ 430 for (i = 0; i < num_counters; ++i) { 431 if (!reset_value[op_x86_phys_to_virt(i)]) 432 continue; 433 rdmsrl(msrs->controls[i].addr, val); 434 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 435 wrmsrl(msrs->controls[i].addr, val); 436 } 437 438 op_amd_stop_ibs(); 439} 440 441/* 442 * check and reserve APIC extended interrupt LVT offset for IBS if 443 * available 444 */ 445 446static void init_ibs(void) 447{ 448 ibs_caps = get_ibs_caps(); 449 450 if (!ibs_caps) 451 return; 452 453 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); 454} 455 456static int (*create_arch_files)(struct dentry *root); 457 458static int setup_ibs_files(struct dentry *root) 459{ 460 struct dentry *dir; 461 int ret = 0; 462 463 /* architecture specific files */ 464 if (create_arch_files) 465 ret = create_arch_files(root); 466 467 if (ret) 468 return ret; 469 470 if (!ibs_caps) 471 return ret; 472 473 /* model specific files */ 474 475 /* setup some reasonable defaults */ 476 memset(&ibs_config, 0, sizeof(ibs_config)); 477 ibs_config.max_cnt_fetch = 250000; 478 ibs_config.max_cnt_op = 250000; 479 480 if (ibs_caps & IBS_CAPS_FETCHSAM) { 481 dir = oprofilefs_mkdir(root, "ibs_fetch"); 482 oprofilefs_create_ulong(dir, "enable", 483 &ibs_config.fetch_enabled); 484 oprofilefs_create_ulong(dir, "max_count", 485 &ibs_config.max_cnt_fetch); 486 oprofilefs_create_ulong(dir, "rand_enable", 487 &ibs_config.rand_en); 488 } 489 490 if (ibs_caps & IBS_CAPS_OPSAM) { 491 dir = oprofilefs_mkdir(root, "ibs_op"); 492 oprofilefs_create_ulong(dir, "enable", 493 &ibs_config.op_enabled); 494 oprofilefs_create_ulong(dir, "max_count", 495 &ibs_config.max_cnt_op); 496 if (ibs_caps & IBS_CAPS_OPCNT) 497 oprofilefs_create_ulong(dir, "dispatched_ops", 498 &ibs_config.dispatched_ops); 499 if (ibs_caps & IBS_CAPS_BRNTRGT) 500 oprofilefs_create_ulong(dir, "branch_target", 501 &ibs_config.branch_target); 502 } 503 504 return 0; 505} 506 507struct op_x86_model_spec op_amd_spec; 508 509static int op_amd_init(struct oprofile_operations *ops) 510{ 511 init_ibs(); 512 create_arch_files = ops->create_files; 513 ops->create_files = setup_ibs_files; 514 515 if (boot_cpu_data.x86 == 0x15) { 516 num_counters = AMD64_NUM_COUNTERS_CORE; 517 } else { 518 num_counters = AMD64_NUM_COUNTERS; 519 } 520 521 op_amd_spec.num_counters = num_counters; 522 op_amd_spec.num_controls = num_counters; 523 op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); 524 525 return 0; 526} 527 528struct op_x86_model_spec op_amd_spec = { 529 /* num_counters/num_controls filled in at runtime */ 530 .reserved = MSR_AMD_EVENTSEL_RESERVED, 531 .event_mask = OP_EVENT_MASK, 532 .init = op_amd_init, 533 .fill_in_addresses = &op_amd_fill_in_addresses, 534 .setup_ctrs = &op_amd_setup_ctrs, 535 .check_ctrs = &op_amd_check_ctrs, 536 .start = &op_amd_start, 537 .stop = &op_amd_stop, 538 .shutdown = &op_amd_shutdown, 539#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 540 .switch_ctrl = &op_mux_switch_ctrl, 541#endif 542}; 543